From c4ebe916694d6365c081c3b5ad2d62745a8fb4d6 Mon Sep 17 00:00:00 2001 From: Kenneth Date: Wed, 27 May 2026 00:13:19 +0100 Subject: [PATCH] feat: add file tree helpers --- src/util/file.rs | 189 ++++++++++++++++++++++++++++++++++++++++- src/util/file_tests.rs | 140 ++++++++++++++++++++++++++++++ src/util/str.rs | 11 +++ 3 files changed, 339 insertions(+), 1 deletion(-) create mode 100644 src/util/file_tests.rs diff --git a/src/util/file.rs b/src/util/file.rs index 2cf2c17..405c232 100644 --- a/src/util/file.rs +++ b/src/util/file.rs @@ -1,4 +1,4 @@ -use std::path::Path; +use std::{ops::Deref, path::Path, sync::Arc}; use memchr::{memchr, memchr2_iter}; @@ -15,6 +15,14 @@ pub(crate) enum FileType { Unknown, } +pub(crate) struct SortedByPath(Vec); + +pub(crate) struct FileTreeItem { + pub(crate) full_path: Arc, + pub(crate) name: Arc, + pub(crate) level: usize, +} + pub(crate) fn classify_content(content: &[u8]) -> ContentType { if content.is_empty() { ContentType::Text @@ -76,3 +84,182 @@ pub(crate) fn line_ranges(content: &[u8]) -> Vec> { ranges } + +pub(crate) fn sort_by_path(mut items: Vec, key: impl Fn(&T) -> &str) -> SortedByPath { + items.sort_by(|a, b| { + let a_path = key(a); + let b_path = key(b); + let a_is_root_file = !a_path.contains('/'); + let b_is_root_file = !b_path.contains('/'); + + match (a_is_root_file, b_is_root_file) { + | (true, false) => return std::cmp::Ordering::Greater, + | (false, true) => return std::cmp::Ordering::Less, + | _ => {} + } + + let mut a_parts = a_path.split('/'); + let mut b_parts = b_path.split('/'); + loop { + match (a_parts.next(), b_parts.next()) { + | (Some(a), Some(b)) => { + if a != b { + return a.cmp(b); + } + } + | (Some(_), None) => return std::cmp::Ordering::Greater, + | (None, Some(_)) => return std::cmp::Ordering::Less, + | (None, None) => return std::cmp::Ordering::Equal, + } + } + }); + SortedByPath(items) +} + +pub(crate) fn build_file_tree_from_sorted_paths(paths: &SortedByPath) -> Vec +where + T: AsRef, +{ + let mut stack: Vec<&str> = Vec::with_capacity(50); + let mut leafs: Vec<&str> = Vec::with_capacity(50); + + let mut items: Vec = Vec::new(); + + fn strip_path_prefix<'a>(path: &'a str, prefix: &str) -> &'a str { + path.strip_prefix(prefix) + .and_then(|it| it.strip_prefix('/')) + .unwrap_or(path) + } + + fn flush_leafs<'a>( + leafs: &mut Vec<&'a str>, + stack: &[&str], + items: &mut Vec, + emitted_depth: usize, + base_depth: usize, + ) { + if leafs.is_empty() && stack.is_empty() { + return; + } + + let stack_dir_path = Arc::::from(stack.join("/")); + + let (common_dir_path, stack_dir_name) = + if (base_depth == 0 || base_depth == stack.len()) && emitted_depth == 0 { + (None, Arc::clone(&stack_dir_path)) + } else { + let common_dir_path = if base_depth == stack.len() { + Arc::::from(stack[..emitted_depth].join("/")) + } else { + Arc::::from(stack[..base_depth].join("/")) + }; + let stack_dir_name = + Arc::::from(strip_path_prefix(&stack_dir_path, &common_dir_path)); + (Some(common_dir_path), stack_dir_name) + }; + + let stack_dir_depth = if let Some(common_dir_path) = common_dir_path + && emitted_depth == 0 + { + items.push(FileTreeItem { + full_path: Arc::clone(&common_dir_path), + name: common_dir_path, + level: base_depth.saturating_sub(1), + }); + base_depth + } else { + emitted_depth + }; + + items.push(FileTreeItem { + full_path: Arc::clone(&stack_dir_path), + name: stack_dir_name, + level: stack_dir_depth, + }); + + for leaf in leafs.drain(..) { + items.push(FileTreeItem { + full_path: Arc::::from(leaf), + name: strip_path_prefix(&leaf, &stack_dir_path).into(), + level: stack.len(), + }); + } + } + + let mut base_depth = 0; + let mut emitted_depth = 0; + + for path in paths.0.iter() { + let path = path.as_ref(); + match path.rsplit_once('/') { + | None => { + flush_leafs(&mut leafs, &stack, &mut items, emitted_depth, base_depth); + stack.clear(); + // top level file + items.push(FileTreeItem { + full_path: path.into(), + name: path.into(), + level: 0, + }); + } + + | Some((parent, _)) => { + let mut common_depth = 0; + + for (i, seg) in parent.split('/').enumerate() { + let stack_item = stack.get(i); + if stack_item.is_none() { + // segment is unseen, push to stack + stack.push(seg); + common_depth += 1; + } else if Some(&seg) == stack.get(i) { + // segment matches stack, continue comparison + common_depth += 1; + } else { + // segment differs from stack, stop comparison + break; + } + } + + if common_depth == stack.len() { + // current path is in same directory as stack, add to leafs + leafs.push(path); + base_depth = common_depth; + } else { + // e.g. stack = ["a", "b", "c"], path = ["a", "c"] + // common dir path = "a/", stack dir path = "a/b/c", common count = 1 + // push common dir a to items + // also push stack dir a/b/c to items but strip a from name so that it becomes "b/c" with level equal to common_count + // finally push any leaf under a/b/c + + flush_leafs(&mut leafs, &stack, &mut items, emitted_depth, common_depth); + + // pop top of stack minus common dir + stack.truncate(common_depth); + emitted_depth = common_depth; + + for seg in parent.split('/').skip(common_depth) { + stack.push(seg); + } + + leafs.push(path); + } + } + } + } + + flush_leafs(&mut leafs, &stack, &mut items, emitted_depth, base_depth); + + items +} + +impl Deref for SortedByPath { + type Target = [T]; + fn deref(&self) -> &[T] { + self.0.as_slice() + } +} + +#[cfg(test)] +#[path = "file_tests.rs"] +mod tests; diff --git a/src/util/file_tests.rs b/src/util/file_tests.rs new file mode 100644 index 0000000..e4c7b70 --- /dev/null +++ b/src/util/file_tests.rs @@ -0,0 +1,140 @@ +use super::*; + +fn assert_tree(paths: &[&str], expected: &[(&str, &str, usize)]) { + let sorted_paths = sort_by_path(paths.to_vec(), |path| *path); + assert_eq!( + sorted_paths.0.as_slice(), + paths, + "test inputs must already be sorted by sort_by_path", + ); + + let actual = build_file_tree_from_sorted_paths(&sorted_paths) + .into_iter() + .map(|item| { + ( + item.full_path.to_string(), + item.name.to_string(), + item.level, + ) + }) + .collect::>(); + + let expected = expected + .iter() + .map(|(full_path, name, level)| ((*full_path).to_string(), (*name).to_string(), *level)) + .collect::>(); + + assert_eq!(actual, expected); +} + +#[test] +fn sorts_paths_by_components_with_root_files_at_bottom() { + let sorted_paths = sort_by_path( + vec![ + "tests/integration.rs", + "src/api/repos.rs", + "README.md", + "Cargo.toml", + "src/api/issues.rs", + "src/libs.rs", + ], + |path| *path, + ); + + assert_eq!( + sorted_paths.0, + vec![ + "src/api/issues.rs", + "src/api/repos.rs", + "src/libs.rs", + "tests/integration.rs", + "Cargo.toml", + "README.md", + ], + ); +} + +#[test] +fn builds_empty_tree_for_empty_paths() { + assert_tree(&[], &[]); +} + +#[test] +fn emits_top_level_files_as_level_zero_items() { + assert_tree( + &["Cargo.toml", "README.md"], + &[ + ("Cargo.toml", "Cargo.toml", 0), + ("README.md", "README.md", 0), + ], + ); +} + +#[test] +fn groups_files_that_share_the_same_parent() { + assert_tree( + &["src/api/issues.rs", "src/api/repos.rs"], + &[ + ("src/api", "src/api", 0), + ("src/api/issues.rs", "issues.rs", 2), + ("src/api/repos.rs", "repos.rs", 2), + ], + ); +} + +#[test] +fn emits_shared_parent_once_for_sibling_singleton_dirs() { + // "src/c" is a generated directory row for display, not an input path. + assert_tree( + &["src/a/b", "src/c/d"], + &[ + ("src", "src", 0), + ("src/a", "a", 1), + ("src/a/b", "b", 2), + ("src/c", "c", 1), + ("src/c/d", "d", 2), + ], + ); +} + +#[test] +fn expands_unrelated_single_child_dirs() { + assert_tree( + &["src/libs.rs", "tests/integration.rs"], + &[ + ("src", "src", 0), + ("src/libs.rs", "libs.rs", 1), + ("tests", "tests", 0), + ("tests/integration.rs", "integration.rs", 1), + ], + ); +} + +#[test] +fn flushes_pending_dir_before_later_top_level_file() { + assert_tree( + &["lib/a.rs", "src/a.rs", "z.txt"], + &[ + ("lib", "lib", 0), + ("lib/a.rs", "a.rs", 1), + ("src", "src", 0), + ("src/a.rs", "a.rs", 1), + ("z.txt", "z.txt", 0), + ], + ); +} + +#[test] +fn keeps_emitted_parent_for_mixed_multi_file_and_singleton_branches() { + assert_tree( + &["src/a/b", "src/a/c", "src/d/e"], + &[ + ("src", "src", 0), + ("src/a", "a", 1), + ("src/a/b", "b", 2), + ("src/a/c", "c", 2), + ("src/d", "d", 1), + ("src/d/e", "e", 2), + ], + ); +} diff --git a/src/util/str.rs b/src/util/str.rs index 3391892..f685734 100644 --- a/src/util/str.rs +++ b/src/util/str.rs @@ -18,3 +18,14 @@ impl Into for api::issues::Id { gpui::ElementId::Name(gpui::SharedString::new(Arc::clone(&self.0))) } } + +fn overlap<'a>(a: &'a str, b: &str) -> &'a str { + let a_chars = a.chars(); + let b_chars = b.chars(); + let len = a_chars.size_hint().0.min(b_chars.size_hint().0); + let mut i = 0; + while i < len && a.chars().nth(i) == b.chars().nth(i) { + i += 1; + } + &a[..i] +}