fix: file tree construction logic

This commit is contained in:
2026-06-06 14:03:58 +01:00
parent 74be11bf42
commit 6a127aab9e
2 changed files with 452 additions and 190 deletions

View File

@@ -1,4 +1,4 @@
use std::{ops::Deref, path::Path, sync::Arc};
use std::{collections::VecDeque, ops::Deref, path::Path, sync::Arc};
use memchr::{memchr, memchr2_iter};
@@ -29,6 +29,13 @@ pub(crate) struct FileTreeItem {
pub(crate) level: usize,
}
pub(crate) struct FileTreeNode<'a> {
segment: &'a str,
child_ptrs: Option<(usize, usize)>,
child_count: usize,
next_sibling: Option<usize>,
}
#[derive(Clone, Copy, PartialEq, Eq)]
pub(crate) enum FileTreeItemKind {
File,
@@ -138,163 +145,211 @@ pub(crate) fn sort_by_path<T>(mut items: Vec<T>, key: impl Fn(&T) -> &str) -> So
}
pub(crate) fn build_file_tree<T>(
paths: &SortedByPath<T>,
items: &SortedByPath<T>,
key: impl Fn(&T) -> &str,
) -> Vec<FileTreeItem> {
let mut stack: Vec<&str> = Vec::with_capacity(50);
let mut leafs: Vec<&str> = Vec::with_capacity(50);
let mut nodes: Vec<FileTreeNode> = Vec::new();
nodes.push(FileTreeNode {
segment: "",
child_ptrs: None,
child_count: 0,
next_sibling: None,
});
for item in items.iter() {
let path = key(item);
let mut ptr = 0;
for seg in path.split('/') {
let node = &nodes[ptr];
match node.child_ptrs {
| Some((start_child_i, end_child_i)) => {
let mut child_ptr = start_child_i;
let mut found = false;
loop {
let child_node = &nodes[child_ptr];
if child_node.segment == seg {
found = true;
break;
}
match child_node.next_sibling {
| Some(next_i) => {
child_ptr = next_i;
}
| None => {
break;
}
}
}
if found {
// child with matching segment found, go to the child next
ptr = child_ptr;
} else {
// no matching child under this node
// push segment under this node
let pushed_i = nodes.len();
nodes.push(FileTreeNode {
segment: seg,
child_ptrs: None,
child_count: 0,
next_sibling: None,
});
// link pushed node to last child of current node
nodes[end_child_i].next_sibling = Some(pushed_i);
// update end child ptr of current node
{
let node = &mut nodes[ptr];
node.child_ptrs = Some((start_child_i, pushed_i));
node.child_count += 1;
}
// go to the pushed node next
ptr = pushed_i;
}
}
| None => {
// this node has no child, push
let pushed_i = nodes.len();
nodes.push(FileTreeNode {
segment: seg,
child_ptrs: None,
child_count: 0,
next_sibling: None,
});
{
let node = &mut nodes[ptr];
node.child_ptrs = Some((pushed_i, pushed_i));
node.child_count += 1;
}
ptr = pushed_i;
}
}
}
}
struct StackItem<'a> {
node: &'a FileTreeNode<'a>,
level: usize,
path_len: usize,
name_start: usize,
name_end: usize,
}
let mut items: Vec<FileTreeItem> = Vec::new();
let mut stack: Vec<StackItem> = Vec::new();
fn strip_path_prefix<'a>(path: &'a str, prefix: &str) -> &'a str {
path.strip_prefix(prefix)
.map(|it| it.strip_prefix('/').unwrap_or(it))
.unwrap_or(path)
}
fn flush_leafs<'a>(
leafs: &mut Vec<&'a str>,
stack: &[&str],
items: &mut Vec<FileTreeItem>,
emitted_depth: usize,
base_depth: usize,
) -> bool {
let mut base_dir_created = false;
if leafs.is_empty() && stack.is_empty() {
return false;
}
let stack_dir_path = Arc::<str>::from(stack.join("/"));
let (common_dir_path, stack_dir_name) =
if (base_depth == 0 || base_depth == stack.len()) && emitted_depth == 0 {
(None, Some(Arc::clone(&stack_dir_path)))
} else {
let common_dir_path = if base_depth == stack.len() || emitted_depth == stack.len() {
Arc::<str>::from(stack[..emitted_depth].join("/"))
} else {
Arc::<str>::from(stack[..base_depth].join("/"))
};
let stack_dir_name =
Arc::<str>::from(strip_path_prefix(&stack_dir_path, &common_dir_path));
(
Some(common_dir_path),
if stack_dir_name.len() == 0 {
None
} else {
Some(stack_dir_name)
},
)
};
let stack_dir_depth = if let Some(common_dir_path) = common_dir_path
&& emitted_depth == 0
{
fn emit_dir_item(level: usize, full_path: &str, name: &str, items: &mut Vec<FileTreeItem>) {
let dir_path = Arc::<str>::from(full_path.trim_end_matches('/'));
if full_path == name {
items.push(FileTreeItem {
kind: FileTreeItemKind::Directory,
full_path: Arc::clone(&common_dir_path),
name: common_dir_path,
level: base_depth.saturating_sub(1),
full_path: Arc::clone(&dir_path),
name: dir_path,
level: level,
});
base_dir_created = true;
base_depth
} else {
emitted_depth
};
if let Some(stack_dir_name) = stack_dir_name {
items.push(FileTreeItem {
kind: FileTreeItemKind::Directory,
full_path: Arc::clone(&stack_dir_path),
name: stack_dir_name,
level: stack_dir_depth,
full_path: dir_path,
name: Arc::<str>::from(name.trim_end_matches('/')),
level: level,
});
}
for leaf in leafs.drain(..) {
items.push(FileTreeItem {
kind: FileTreeItemKind::File,
full_path: Arc::<str>::from(leaf),
name: strip_path_prefix(&leaf, &stack_dir_path).into(),
level: stack.len(),
});
}
base_dir_created
}
let mut base_depth = 0;
let mut emitted_depth = 0;
stack.push(StackItem {
node: &nodes[0],
level: 0,
path_len: 0,
name_start: 0,
name_end: 0,
});
for path in paths.0.iter() {
let path = key(path);
match path.rsplit_once('/') {
| None => {
flush_leafs(&mut leafs, &stack, &mut items, emitted_depth, base_depth);
stack.clear();
// top level file
items.push(FileTreeItem {
kind: FileTreeItemKind::File,
full_path: path.into(),
name: path.into(),
level: 0,
});
}
let mut dir_path = String::new();
| Some((parent, _)) => {
let mut common_depth = 0;
while let Some(item) = stack.pop() {
let StackItem {
node,
level,
path_len,
name_start,
name_end,
} = item;
for (i, seg) in parent.split('/').enumerate() {
let stack_item = stack.get(i);
if stack_item.is_none() {
// segment is unseen, push to stack
stack.push(seg);
common_depth += 1;
} else if Some(&seg) == stack.get(i) {
// segment matches stack, continue comparison
common_depth += 1;
} else {
// segment differs from stack, stop comparison
break;
}
}
dir_path.truncate(path_len);
if common_depth == stack.len() {
// current path is in same directory as stack, add to leafs
leafs.push(path);
base_depth = common_depth;
let next_level: usize;
let mut next_name_start = name_start;
let mut next_name_end = name_end;
if node.segment.is_empty() {
next_level = level + 1;
} else {
dir_path.push_str(node.segment);
dir_path.push('/');
if node.child_count == 0 {
items.push(FileTreeItem {
kind: FileTreeItemKind::File,
full_path: Arc::<str>::from(&dir_path[..dir_path.len() - 1]),
name: Arc::<str>::from(node.segment),
level: level - 1,
});
next_level = level;
} else if node.child_count == 1 {
next_name_end = dir_path.len();
next_level = level;
} else {
// e.g. stack = ["a", "b", "c"], path = ["a", "c"]
// common dir path = "a/", stack dir path = "a/b/c", common count = 1
// push common dir a to items
// also push stack dir a/b/c to items but strip a from name so that it becomes "b/c" with level equal to common_count
// finally push any leaf under a/b/c
let base_dir_created =
flush_leafs(&mut leafs, &stack, &mut items, emitted_depth, common_depth);
// pop top of stack minus common dir
stack.truncate(common_depth);
if base_dir_created {
emitted_depth = common_depth;
} else {
emitted_depth = 0;
}
for seg in parent.split('/').skip(common_depth) {
stack.push(seg);
}
leafs.push(path);
emit_dir_item(
level - 1,
&dir_path,
&dir_path[name_start..name_end + node.segment.len()],
&mut items,
);
next_name_start = dir_path.len();
next_name_end = next_name_start + 1;
next_level = level + 1;
}
}
if let Some(sib_i) = node.next_sibling {
let n = &nodes[sib_i];
stack.push(StackItem {
level,
path_len,
name_start,
name_end,
node: n,
});
}
if let Some((child_i, _)) = node.child_ptrs {
let n = &nodes[child_i];
if n.child_count == 0 && !dir_path.is_empty() && next_name_start < dir_path.len() {
emit_dir_item(
level - 1,
&dir_path,
&dir_path[next_name_start..next_name_end],
&mut items,
);
stack.push(StackItem {
level: next_level + 1,
node: n,
path_len: dir_path.len(),
name_start: next_name_start,
name_end: next_name_end,
});
} else {
stack.push(StackItem {
level: next_level,
node: n,
path_len: dir_path.len(),
name_start: next_name_start,
name_end: next_name_end,
});
}
}
}
flush_leafs(&mut leafs, &stack, &mut items, emitted_depth, base_depth);
items
}

View File

@@ -1,7 +1,46 @@
use super::*;
use serde_json::Value;
fn assert_tree(paths: &[&str], expected: &[(&str, &str, usize)]) {
const DIR: &str = "dir";
const FILE: &str = "file";
#[derive(Debug, PartialEq, Eq)]
struct TreeRow {
full_path: String,
name: String,
level: usize,
kind: &'static str,
}
fn kind_name(kind: FileTreeItemKind) -> &'static str {
match kind {
| FileTreeItemKind::Directory => DIR,
| FileTreeItemKind::File => FILE,
}
}
fn row_from_item(item: FileTreeItem) -> TreeRow {
TreeRow {
full_path: item.full_path.to_string(),
name: item.name.to_string(),
level: item.level,
kind: kind_name(item.kind),
}
}
fn expected_rows(expected: &[(&str, &str, usize, &'static str)]) -> Vec<TreeRow> {
expected
.iter()
.map(|(full_path, name, level, kind)| TreeRow {
full_path: (*full_path).to_string(),
name: (*name).to_string(),
level: *level,
kind,
})
.collect()
}
fn assert_tree(paths: &[&str], expected: &[(&str, &str, usize, &'static str)]) {
let sorted_paths = sort_by_path(paths.to_vec(), |path| *path);
assert_eq!(
sorted_paths.0.as_slice(),
@@ -11,21 +50,94 @@ fn assert_tree(paths: &[&str], expected: &[(&str, &str, usize)]) {
let actual = build_file_tree(&sorted_paths, |path| *path)
.into_iter()
.map(|item| {
(
item.full_path.to_string(),
item.name.to_string(),
item.level,
)
})
.map(row_from_item)
.collect::<Vec<_>>();
let expected = expected
.iter()
.map(|(full_path, name, level)| ((*full_path).to_string(), (*name).to_string(), *level))
.collect::<Vec<_>>();
assert_tree_rows(paths, actual, expected_rows(expected));
}
assert_eq!(actual, expected);
fn assert_tree_rows(paths: &[&str], actual: Vec<TreeRow>, expected: Vec<TreeRow>) {
if actual != expected {
panic!("{}", format_tree_failure(paths, &expected, &actual));
}
}
fn format_tree_failure(paths: &[&str], expected: &[TreeRow], actual: &[TreeRow]) -> String {
let mut output = String::new();
output.push_str("file tree mismatch\n\n");
output.push_str("input paths:\n");
if paths.is_empty() {
output.push_str(" <empty>\n");
} else {
for (i, path) in paths.iter().enumerate() {
output.push_str(&format!(" {i:>2}: {path}\n"));
}
}
output.push('\n');
output.push_str("expected tree:\n");
format_rows(&mut output, expected);
output.push('\n');
output.push_str("actual tree:\n");
format_rows(&mut output, actual);
output.push('\n');
output.push_str("first mismatch:\n");
match first_mismatch(expected, actual) {
| Some(i) => {
output.push_str(&format!(" row {i}\n"));
output.push_str(" expected: ");
format_row_inline(&mut output, expected.get(i));
output.push('\n');
output.push_str(" actual: ");
format_row_inline(&mut output, actual.get(i));
output.push('\n');
}
| None => output.push_str(" <none>\n"),
}
output
}
fn first_mismatch(expected: &[TreeRow], actual: &[TreeRow]) -> Option<usize> {
let max_len = expected.len().max(actual.len());
(0..max_len).find(|&i| expected.get(i) != actual.get(i))
}
fn format_rows(output: &mut String, rows: &[TreeRow]) {
if rows.is_empty() {
output.push_str(" <empty>\n");
return;
}
output.push_str(" # kind lvl tree full_path\n");
output.push_str(" -- ----- ---- ----------------------------------------- -----------------------------------------\n");
for (i, row) in rows.iter().enumerate() {
let indent = " ".repeat(row.level);
let tree_name = format!("{indent}{}", row.name);
output.push_str(&format!(
" {i:>2} {:<5} {:>4} {:<41} {}\n",
row.kind, row.level, tree_name, row.full_path,
));
}
}
fn format_row_inline(output: &mut String, row: Option<&TreeRow>) {
match row {
| Some(row) => {
let indent = " ".repeat(row.level);
output.push_str(&format!(
"{} level={} name={}{} full_path={}",
row.kind, row.level, indent, row.name, row.full_path
));
}
| None => output.push_str("<missing>"),
}
}
#[test]
@@ -78,8 +190,8 @@ fn emits_top_level_files_as_level_zero_items() {
assert_tree(
&["Cargo.toml", "README.md"],
&[
("Cargo.toml", "Cargo.toml", 0),
("README.md", "README.md", 0),
("Cargo.toml", "Cargo.toml", 0, FILE),
("README.md", "README.md", 0, FILE),
],
);
}
@@ -89,9 +201,9 @@ fn groups_files_that_share_the_same_parent() {
assert_tree(
&["src/api/issues.rs", "src/api/repos.rs"],
&[
("src/api", "src/api", 0),
("src/api/issues.rs", "issues.rs", 2),
("src/api/repos.rs", "repos.rs", 2),
("src/api", "src/api", 0, DIR),
("src/api/issues.rs", "issues.rs", 1, FILE),
("src/api/repos.rs", "repos.rs", 1, FILE),
],
);
}
@@ -102,11 +214,11 @@ fn emits_shared_parent_once_for_sibling_singleton_dirs() {
assert_tree(
&["src/a/b", "src/c/d"],
&[
("src", "src", 0),
("src/a", "a", 1),
("src/a/b", "b", 2),
("src/c", "c", 1),
("src/c/d", "d", 2),
("src", "src", 0, DIR),
("src/a", "a", 1, DIR),
("src/a/b", "b", 2, FILE),
("src/c", "c", 1, DIR),
("src/c/d", "d", 2, FILE),
],
);
}
@@ -116,10 +228,10 @@ fn expands_unrelated_single_child_dirs() {
assert_tree(
&["src/libs.rs", "tests/integration.rs"],
&[
("src", "src", 0),
("src/libs.rs", "libs.rs", 1),
("tests", "tests", 0),
("tests/integration.rs", "integration.rs", 1),
("src", "src", 0, DIR),
("src/libs.rs", "libs.rs", 1, FILE),
("tests", "tests", 0, DIR),
("tests/integration.rs", "integration.rs", 1, FILE),
],
);
}
@@ -129,11 +241,11 @@ fn flushes_pending_dir_before_later_top_level_file() {
assert_tree(
&["lib/a.rs", "src/a.rs", "z.txt"],
&[
("lib", "lib", 0),
("lib/a.rs", "a.rs", 1),
("src", "src", 0),
("src/a.rs", "a.rs", 1),
("z.txt", "z.txt", 0),
("lib", "lib", 0, DIR),
("lib/a.rs", "a.rs", 1, FILE),
("src", "src", 0, DIR),
("src/a.rs", "a.rs", 1, FILE),
("z.txt", "z.txt", 0, FILE),
],
);
}
@@ -143,12 +255,12 @@ fn keeps_emitted_parent_for_mixed_multi_file_and_singleton_branches() {
assert_tree(
&["src/a/b", "src/a/c", "src/d/e"],
&[
("src", "src", 0),
("src/a", "a", 1),
("src/a/b", "b", 2),
("src/a/c", "c", 2),
("src/d", "d", 1),
("src/d/e", "e", 2),
("src", "src", 0, DIR),
("src/a", "a", 1, DIR),
("src/a/b", "b", 2, FILE),
("src/a/c", "c", 2, FILE),
("src/d", "d", 1, DIR),
("src/d/e", "e", 2, FILE),
],
);
}
@@ -180,30 +292,125 @@ fn builds_tree_for_pull_request_fixture_with_root_and_nested_file() {
let actual = build_file_tree(&sorted_paths, |path| *path)
.into_iter()
.map(|item| {
(
item.full_path.to_string(),
item.name.to_string(),
item.level,
)
})
.map(row_from_item)
.collect::<Vec<_>>();
assert_eq!(
assert_tree_rows(
sorted_paths.0.as_slice(),
actual,
vec![
("src".to_string(), "src".to_string(), 0),
expected_rows(&[
("src", "src", 0, DIR),
("src/screen/dashboard", "screen/dashboard", 1, DIR),
(
"src/screen/dashboard".to_string(),
"screen/dashboard".to_string(),
1,
"src/screen/dashboard/issue_list.rs",
"issue_list.rs",
2,
FILE,
),
(
"src/screen/dashboard/issue_list.rs".to_string(),
"issue_list.rs".to_string(),
3,
),
("src/query.rs".to_string(), "query.rs".to_string(), 1),
("src/query.rs", "query.rs", 1, FILE),
]),
);
}
#[test]
fn keeps_single_nested_file_one_level_under_compacted_directory() {
assert_tree(
&["src/screen/dashboard/issue_list.rs"],
&[
("src/screen/dashboard", "src/screen/dashboard", 0, DIR),
("src/screen/dashboard/issue_list.rs", "issue_list.rs", 1, FILE),
],
);
}
#[test]
fn keeps_sibling_files_one_level_under_compacted_parent() {
assert_tree(
&["app/controllers/home.rs", "app/controllers/users.rs"],
&[
("app/controllers", "app/controllers", 0, DIR),
("app/controllers/home.rs", "home.rs", 1, FILE),
("app/controllers/users.rs", "users.rs", 1, FILE),
],
);
}
#[test]
fn keeps_compacted_branch_child_under_emitted_parent() {
assert_tree(
&["src/screen/dashboard/issue_list.rs", "src/query.rs"],
&[
("src", "src", 0, DIR),
("src/screen/dashboard", "screen/dashboard", 1, DIR),
("src/screen/dashboard/issue_list.rs", "issue_list.rs", 2, FILE),
("src/query.rs", "query.rs", 1, FILE),
],
);
}
#[test]
fn emits_shared_multi_segment_prefix_at_root_level() {
assert_tree(
&[
"app/features/auth/login.rs",
"app/features/billing/invoice.rs",
],
&[
("app/features", "app/features", 0, DIR),
("app/features/auth", "auth", 1, DIR),
("app/features/auth/login.rs", "login.rs", 2, FILE),
("app/features/billing", "billing", 1, DIR),
("app/features/billing/invoice.rs", "invoice.rs", 2, FILE),
],
);
}
#[test]
fn keeps_files_under_sibling_branches_at_visible_depth() {
assert_tree(
&[
"app/features/auth/login.rs",
"app/features/auth/logout.rs",
"app/features/billing/invoice.rs",
"app/features/billing/refund.rs",
],
&[
("app/features", "app/features", 0, DIR),
("app/features/auth", "auth", 1, DIR),
("app/features/auth/login.rs", "login.rs", 2, FILE),
("app/features/auth/logout.rs", "logout.rs", 2, FILE),
("app/features/billing", "billing", 1, DIR),
("app/features/billing/invoice.rs", "invoice.rs", 2, FILE),
("app/features/billing/refund.rs", "refund.rs", 2, FILE),
],
);
}
#[test]
fn resets_visible_depth_after_compacted_branch_before_root_file() {
assert_tree(
&["lib/core/runtime/mod.rs", "README.md"],
&[
("lib/core/runtime", "lib/core/runtime", 0, DIR),
("lib/core/runtime/mod.rs", "mod.rs", 1, FILE),
("README.md", "README.md", 0, FILE),
],
);
}
#[test]
fn keeps_cascading_shared_prefixes_as_expanded_ancestors() {
assert_tree(
&["src/a/b/c/d", "src/a/b/d", "src/a/d", "src/d"],
&[
("src", "src", 0, DIR),
("src/a", "a", 1, DIR),
("src/a/b", "b", 2, DIR),
("src/a/b/c", "c", 3, DIR),
("src/a/b/c/d", "d", 4, FILE),
("src/a/b/d", "d", 3, FILE),
("src/a/d", "d", 2, FILE),
("src/d", "d", 1, FILE),
],
);
}