From 6a127aab9e3dc5485154ebbf8b87eb0612af3168 Mon Sep 17 00:00:00 2001 From: Kenneth Date: Sat, 6 Jun 2026 14:03:58 +0100 Subject: [PATCH] fix: file tree construction logic --- src/util/file.rs | 321 ++++++++++++++++++++++++----------------- src/util/file_tests.rs | 321 +++++++++++++++++++++++++++++++++-------- 2 files changed, 452 insertions(+), 190 deletions(-) diff --git a/src/util/file.rs b/src/util/file.rs index 497632d..0442f72 100644 --- a/src/util/file.rs +++ b/src/util/file.rs @@ -1,4 +1,4 @@ -use std::{ops::Deref, path::Path, sync::Arc}; +use std::{collections::VecDeque, ops::Deref, path::Path, sync::Arc}; use memchr::{memchr, memchr2_iter}; @@ -29,6 +29,13 @@ pub(crate) struct FileTreeItem { pub(crate) level: usize, } +pub(crate) struct FileTreeNode<'a> { + segment: &'a str, + child_ptrs: Option<(usize, usize)>, + child_count: usize, + next_sibling: Option, +} + #[derive(Clone, Copy, PartialEq, Eq)] pub(crate) enum FileTreeItemKind { File, @@ -138,163 +145,211 @@ pub(crate) fn sort_by_path(mut items: Vec, key: impl Fn(&T) -> &str) -> So } pub(crate) fn build_file_tree( - paths: &SortedByPath, + items: &SortedByPath, key: impl Fn(&T) -> &str, ) -> Vec { - let mut stack: Vec<&str> = Vec::with_capacity(50); - let mut leafs: Vec<&str> = Vec::with_capacity(50); + let mut nodes: Vec = Vec::new(); + + nodes.push(FileTreeNode { + segment: "", + child_ptrs: None, + child_count: 0, + next_sibling: None, + }); + + for item in items.iter() { + let path = key(item); + let mut ptr = 0; + for seg in path.split('/') { + let node = &nodes[ptr]; + match node.child_ptrs { + | Some((start_child_i, end_child_i)) => { + let mut child_ptr = start_child_i; + let mut found = false; + loop { + let child_node = &nodes[child_ptr]; + if child_node.segment == seg { + found = true; + break; + } + match child_node.next_sibling { + | Some(next_i) => { + child_ptr = next_i; + } + | None => { + break; + } + } + } + if found { + // child with matching segment found, go to the child next + ptr = child_ptr; + } else { + // no matching child under this node + // push segment under this node + let pushed_i = nodes.len(); + nodes.push(FileTreeNode { + segment: seg, + child_ptrs: None, + child_count: 0, + next_sibling: None, + }); + // link pushed node to last child of current node + nodes[end_child_i].next_sibling = Some(pushed_i); + // update end child ptr of current node + { + let node = &mut nodes[ptr]; + node.child_ptrs = Some((start_child_i, pushed_i)); + node.child_count += 1; + } + // go to the pushed node next + ptr = pushed_i; + } + } + + | None => { + // this node has no child, push + let pushed_i = nodes.len(); + nodes.push(FileTreeNode { + segment: seg, + child_ptrs: None, + child_count: 0, + next_sibling: None, + }); + { + let node = &mut nodes[ptr]; + node.child_ptrs = Some((pushed_i, pushed_i)); + node.child_count += 1; + } + ptr = pushed_i; + } + } + } + } + + struct StackItem<'a> { + node: &'a FileTreeNode<'a>, + level: usize, + path_len: usize, + name_start: usize, + name_end: usize, + } let mut items: Vec = Vec::new(); + let mut stack: Vec = Vec::new(); - fn strip_path_prefix<'a>(path: &'a str, prefix: &str) -> &'a str { - path.strip_prefix(prefix) - .map(|it| it.strip_prefix('/').unwrap_or(it)) - .unwrap_or(path) - } - - fn flush_leafs<'a>( - leafs: &mut Vec<&'a str>, - stack: &[&str], - items: &mut Vec, - emitted_depth: usize, - base_depth: usize, - ) -> bool { - let mut base_dir_created = false; - - if leafs.is_empty() && stack.is_empty() { - return false; - } - - let stack_dir_path = Arc::::from(stack.join("/")); - - let (common_dir_path, stack_dir_name) = - if (base_depth == 0 || base_depth == stack.len()) && emitted_depth == 0 { - (None, Some(Arc::clone(&stack_dir_path))) - } else { - let common_dir_path = if base_depth == stack.len() || emitted_depth == stack.len() { - Arc::::from(stack[..emitted_depth].join("/")) - } else { - Arc::::from(stack[..base_depth].join("/")) - }; - let stack_dir_name = - Arc::::from(strip_path_prefix(&stack_dir_path, &common_dir_path)); - ( - Some(common_dir_path), - if stack_dir_name.len() == 0 { - None - } else { - Some(stack_dir_name) - }, - ) - }; - - let stack_dir_depth = if let Some(common_dir_path) = common_dir_path - && emitted_depth == 0 - { + fn emit_dir_item(level: usize, full_path: &str, name: &str, items: &mut Vec) { + let dir_path = Arc::::from(full_path.trim_end_matches('/')); + if full_path == name { items.push(FileTreeItem { kind: FileTreeItemKind::Directory, - full_path: Arc::clone(&common_dir_path), - name: common_dir_path, - level: base_depth.saturating_sub(1), + full_path: Arc::clone(&dir_path), + name: dir_path, + level: level, }); - base_dir_created = true; - base_depth } else { - emitted_depth - }; - - if let Some(stack_dir_name) = stack_dir_name { items.push(FileTreeItem { kind: FileTreeItemKind::Directory, - full_path: Arc::clone(&stack_dir_path), - name: stack_dir_name, - level: stack_dir_depth, + full_path: dir_path, + name: Arc::::from(name.trim_end_matches('/')), + level: level, }); } - - for leaf in leafs.drain(..) { - items.push(FileTreeItem { - kind: FileTreeItemKind::File, - full_path: Arc::::from(leaf), - name: strip_path_prefix(&leaf, &stack_dir_path).into(), - level: stack.len(), - }); - } - - base_dir_created } - let mut base_depth = 0; - let mut emitted_depth = 0; + stack.push(StackItem { + node: &nodes[0], + level: 0, + path_len: 0, + name_start: 0, + name_end: 0, + }); - for path in paths.0.iter() { - let path = key(path); - match path.rsplit_once('/') { - | None => { - flush_leafs(&mut leafs, &stack, &mut items, emitted_depth, base_depth); - stack.clear(); - // top level file - items.push(FileTreeItem { - kind: FileTreeItemKind::File, - full_path: path.into(), - name: path.into(), - level: 0, - }); - } + let mut dir_path = String::new(); - | Some((parent, _)) => { - let mut common_depth = 0; + while let Some(item) = stack.pop() { + let StackItem { + node, + level, + path_len, + name_start, + name_end, + } = item; - for (i, seg) in parent.split('/').enumerate() { - let stack_item = stack.get(i); - if stack_item.is_none() { - // segment is unseen, push to stack - stack.push(seg); - common_depth += 1; - } else if Some(&seg) == stack.get(i) { - // segment matches stack, continue comparison - common_depth += 1; - } else { - // segment differs from stack, stop comparison - break; - } - } + dir_path.truncate(path_len); - if common_depth == stack.len() { - // current path is in same directory as stack, add to leafs - leafs.push(path); - base_depth = common_depth; + let next_level: usize; + let mut next_name_start = name_start; + let mut next_name_end = name_end; + if node.segment.is_empty() { + next_level = level + 1; + } else { + dir_path.push_str(node.segment); + dir_path.push('/'); + if node.child_count == 0 { + items.push(FileTreeItem { + kind: FileTreeItemKind::File, + full_path: Arc::::from(&dir_path[..dir_path.len() - 1]), + name: Arc::::from(node.segment), + level: level - 1, + }); + next_level = level; + } else if node.child_count == 1 { + next_name_end = dir_path.len(); + next_level = level; } else { - // e.g. stack = ["a", "b", "c"], path = ["a", "c"] - // common dir path = "a/", stack dir path = "a/b/c", common count = 1 - // push common dir a to items - // also push stack dir a/b/c to items but strip a from name so that it becomes "b/c" with level equal to common_count - // finally push any leaf under a/b/c - - let base_dir_created = - flush_leafs(&mut leafs, &stack, &mut items, emitted_depth, common_depth); - - // pop top of stack minus common dir - stack.truncate(common_depth); - - if base_dir_created { - emitted_depth = common_depth; - } else { - emitted_depth = 0; - } - - for seg in parent.split('/').skip(common_depth) { - stack.push(seg); - } - - leafs.push(path); + emit_dir_item( + level - 1, + &dir_path, + &dir_path[name_start..name_end + node.segment.len()], + &mut items, + ); + next_name_start = dir_path.len(); + next_name_end = next_name_start + 1; + next_level = level + 1; } } + + if let Some(sib_i) = node.next_sibling { + let n = &nodes[sib_i]; + stack.push(StackItem { + level, + path_len, + name_start, + name_end, + node: n, + }); + } + + if let Some((child_i, _)) = node.child_ptrs { + let n = &nodes[child_i]; + + if n.child_count == 0 && !dir_path.is_empty() && next_name_start < dir_path.len() { + emit_dir_item( + level - 1, + &dir_path, + &dir_path[next_name_start..next_name_end], + &mut items, + ); + stack.push(StackItem { + level: next_level + 1, + node: n, + path_len: dir_path.len(), + name_start: next_name_start, + name_end: next_name_end, + }); + } else { + stack.push(StackItem { + level: next_level, + node: n, + path_len: dir_path.len(), + name_start: next_name_start, + name_end: next_name_end, + }); + } } } - flush_leafs(&mut leafs, &stack, &mut items, emitted_depth, base_depth); - items } diff --git a/src/util/file_tests.rs b/src/util/file_tests.rs index 4750630..e82928d 100644 --- a/src/util/file_tests.rs +++ b/src/util/file_tests.rs @@ -1,7 +1,46 @@ use super::*; use serde_json::Value; -fn assert_tree(paths: &[&str], expected: &[(&str, &str, usize)]) { +const DIR: &str = "dir"; +const FILE: &str = "file"; + +#[derive(Debug, PartialEq, Eq)] +struct TreeRow { + full_path: String, + name: String, + level: usize, + kind: &'static str, +} + +fn kind_name(kind: FileTreeItemKind) -> &'static str { + match kind { + | FileTreeItemKind::Directory => DIR, + | FileTreeItemKind::File => FILE, + } +} + +fn row_from_item(item: FileTreeItem) -> TreeRow { + TreeRow { + full_path: item.full_path.to_string(), + name: item.name.to_string(), + level: item.level, + kind: kind_name(item.kind), + } +} + +fn expected_rows(expected: &[(&str, &str, usize, &'static str)]) -> Vec { + expected + .iter() + .map(|(full_path, name, level, kind)| TreeRow { + full_path: (*full_path).to_string(), + name: (*name).to_string(), + level: *level, + kind, + }) + .collect() +} + +fn assert_tree(paths: &[&str], expected: &[(&str, &str, usize, &'static str)]) { let sorted_paths = sort_by_path(paths.to_vec(), |path| *path); assert_eq!( sorted_paths.0.as_slice(), @@ -11,21 +50,94 @@ fn assert_tree(paths: &[&str], expected: &[(&str, &str, usize)]) { let actual = build_file_tree(&sorted_paths, |path| *path) .into_iter() - .map(|item| { - ( - item.full_path.to_string(), - item.name.to_string(), - item.level, - ) - }) + .map(row_from_item) .collect::>(); - let expected = expected - .iter() - .map(|(full_path, name, level)| ((*full_path).to_string(), (*name).to_string(), *level)) - .collect::>(); + assert_tree_rows(paths, actual, expected_rows(expected)); +} - assert_eq!(actual, expected); +fn assert_tree_rows(paths: &[&str], actual: Vec, expected: Vec) { + if actual != expected { + panic!("{}", format_tree_failure(paths, &expected, &actual)); + } +} + +fn format_tree_failure(paths: &[&str], expected: &[TreeRow], actual: &[TreeRow]) -> String { + let mut output = String::new(); + + output.push_str("file tree mismatch\n\n"); + + output.push_str("input paths:\n"); + if paths.is_empty() { + output.push_str(" \n"); + } else { + for (i, path) in paths.iter().enumerate() { + output.push_str(&format!(" {i:>2}: {path}\n")); + } + } + + output.push('\n'); + output.push_str("expected tree:\n"); + format_rows(&mut output, expected); + + output.push('\n'); + output.push_str("actual tree:\n"); + format_rows(&mut output, actual); + + output.push('\n'); + output.push_str("first mismatch:\n"); + match first_mismatch(expected, actual) { + | Some(i) => { + output.push_str(&format!(" row {i}\n")); + output.push_str(" expected: "); + format_row_inline(&mut output, expected.get(i)); + output.push('\n'); + output.push_str(" actual: "); + format_row_inline(&mut output, actual.get(i)); + output.push('\n'); + } + | None => output.push_str(" \n"), + } + + output +} + +fn first_mismatch(expected: &[TreeRow], actual: &[TreeRow]) -> Option { + let max_len = expected.len().max(actual.len()); + (0..max_len).find(|&i| expected.get(i) != actual.get(i)) +} + +fn format_rows(output: &mut String, rows: &[TreeRow]) { + if rows.is_empty() { + output.push_str(" \n"); + return; + } + + output.push_str(" # kind lvl tree full_path\n"); + output.push_str(" -- ----- ---- ----------------------------------------- -----------------------------------------\n"); + + for (i, row) in rows.iter().enumerate() { + let indent = " ".repeat(row.level); + let tree_name = format!("{indent}{}", row.name); + output.push_str(&format!( + " {i:>2} {:<5} {:>4} {:<41} {}\n", + row.kind, row.level, tree_name, row.full_path, + )); + } +} + +fn format_row_inline(output: &mut String, row: Option<&TreeRow>) { + match row { + | Some(row) => { + let indent = " ".repeat(row.level); + output.push_str(&format!( + "{} level={} name={}{} full_path={}", + row.kind, row.level, indent, row.name, row.full_path + )); + } + + | None => output.push_str(""), + } } #[test] @@ -78,8 +190,8 @@ fn emits_top_level_files_as_level_zero_items() { assert_tree( &["Cargo.toml", "README.md"], &[ - ("Cargo.toml", "Cargo.toml", 0), - ("README.md", "README.md", 0), + ("Cargo.toml", "Cargo.toml", 0, FILE), + ("README.md", "README.md", 0, FILE), ], ); } @@ -89,9 +201,9 @@ fn groups_files_that_share_the_same_parent() { assert_tree( &["src/api/issues.rs", "src/api/repos.rs"], &[ - ("src/api", "src/api", 0), - ("src/api/issues.rs", "issues.rs", 2), - ("src/api/repos.rs", "repos.rs", 2), + ("src/api", "src/api", 0, DIR), + ("src/api/issues.rs", "issues.rs", 1, FILE), + ("src/api/repos.rs", "repos.rs", 1, FILE), ], ); } @@ -102,11 +214,11 @@ fn emits_shared_parent_once_for_sibling_singleton_dirs() { assert_tree( &["src/a/b", "src/c/d"], &[ - ("src", "src", 0), - ("src/a", "a", 1), - ("src/a/b", "b", 2), - ("src/c", "c", 1), - ("src/c/d", "d", 2), + ("src", "src", 0, DIR), + ("src/a", "a", 1, DIR), + ("src/a/b", "b", 2, FILE), + ("src/c", "c", 1, DIR), + ("src/c/d", "d", 2, FILE), ], ); } @@ -116,10 +228,10 @@ fn expands_unrelated_single_child_dirs() { assert_tree( &["src/libs.rs", "tests/integration.rs"], &[ - ("src", "src", 0), - ("src/libs.rs", "libs.rs", 1), - ("tests", "tests", 0), - ("tests/integration.rs", "integration.rs", 1), + ("src", "src", 0, DIR), + ("src/libs.rs", "libs.rs", 1, FILE), + ("tests", "tests", 0, DIR), + ("tests/integration.rs", "integration.rs", 1, FILE), ], ); } @@ -129,11 +241,11 @@ fn flushes_pending_dir_before_later_top_level_file() { assert_tree( &["lib/a.rs", "src/a.rs", "z.txt"], &[ - ("lib", "lib", 0), - ("lib/a.rs", "a.rs", 1), - ("src", "src", 0), - ("src/a.rs", "a.rs", 1), - ("z.txt", "z.txt", 0), + ("lib", "lib", 0, DIR), + ("lib/a.rs", "a.rs", 1, FILE), + ("src", "src", 0, DIR), + ("src/a.rs", "a.rs", 1, FILE), + ("z.txt", "z.txt", 0, FILE), ], ); } @@ -143,12 +255,12 @@ fn keeps_emitted_parent_for_mixed_multi_file_and_singleton_branches() { assert_tree( &["src/a/b", "src/a/c", "src/d/e"], &[ - ("src", "src", 0), - ("src/a", "a", 1), - ("src/a/b", "b", 2), - ("src/a/c", "c", 2), - ("src/d", "d", 1), - ("src/d/e", "e", 2), + ("src", "src", 0, DIR), + ("src/a", "a", 1, DIR), + ("src/a/b", "b", 2, FILE), + ("src/a/c", "c", 2, FILE), + ("src/d", "d", 1, DIR), + ("src/d/e", "e", 2, FILE), ], ); } @@ -180,30 +292,125 @@ fn builds_tree_for_pull_request_fixture_with_root_and_nested_file() { let actual = build_file_tree(&sorted_paths, |path| *path) .into_iter() - .map(|item| { - ( - item.full_path.to_string(), - item.name.to_string(), - item.level, - ) - }) + .map(row_from_item) .collect::>(); - assert_eq!( + assert_tree_rows( + sorted_paths.0.as_slice(), actual, - vec![ - ("src".to_string(), "src".to_string(), 0), + expected_rows(&[ + ("src", "src", 0, DIR), + ("src/screen/dashboard", "screen/dashboard", 1, DIR), ( - "src/screen/dashboard".to_string(), - "screen/dashboard".to_string(), - 1, + "src/screen/dashboard/issue_list.rs", + "issue_list.rs", + 2, + FILE, ), - ( - "src/screen/dashboard/issue_list.rs".to_string(), - "issue_list.rs".to_string(), - 3, - ), - ("src/query.rs".to_string(), "query.rs".to_string(), 1), + ("src/query.rs", "query.rs", 1, FILE), + ]), + ); +} + +#[test] +fn keeps_single_nested_file_one_level_under_compacted_directory() { + assert_tree( + &["src/screen/dashboard/issue_list.rs"], + &[ + ("src/screen/dashboard", "src/screen/dashboard", 0, DIR), + ("src/screen/dashboard/issue_list.rs", "issue_list.rs", 1, FILE), + ], + ); +} + +#[test] +fn keeps_sibling_files_one_level_under_compacted_parent() { + assert_tree( + &["app/controllers/home.rs", "app/controllers/users.rs"], + &[ + ("app/controllers", "app/controllers", 0, DIR), + ("app/controllers/home.rs", "home.rs", 1, FILE), + ("app/controllers/users.rs", "users.rs", 1, FILE), + ], + ); +} + +#[test] +fn keeps_compacted_branch_child_under_emitted_parent() { + assert_tree( + &["src/screen/dashboard/issue_list.rs", "src/query.rs"], + &[ + ("src", "src", 0, DIR), + ("src/screen/dashboard", "screen/dashboard", 1, DIR), + ("src/screen/dashboard/issue_list.rs", "issue_list.rs", 2, FILE), + ("src/query.rs", "query.rs", 1, FILE), + ], + ); +} + +#[test] +fn emits_shared_multi_segment_prefix_at_root_level() { + assert_tree( + &[ + "app/features/auth/login.rs", + "app/features/billing/invoice.rs", + ], + &[ + ("app/features", "app/features", 0, DIR), + ("app/features/auth", "auth", 1, DIR), + ("app/features/auth/login.rs", "login.rs", 2, FILE), + ("app/features/billing", "billing", 1, DIR), + ("app/features/billing/invoice.rs", "invoice.rs", 2, FILE), + ], + ); +} + +#[test] +fn keeps_files_under_sibling_branches_at_visible_depth() { + assert_tree( + &[ + "app/features/auth/login.rs", + "app/features/auth/logout.rs", + "app/features/billing/invoice.rs", + "app/features/billing/refund.rs", + ], + &[ + ("app/features", "app/features", 0, DIR), + ("app/features/auth", "auth", 1, DIR), + ("app/features/auth/login.rs", "login.rs", 2, FILE), + ("app/features/auth/logout.rs", "logout.rs", 2, FILE), + ("app/features/billing", "billing", 1, DIR), + ("app/features/billing/invoice.rs", "invoice.rs", 2, FILE), + ("app/features/billing/refund.rs", "refund.rs", 2, FILE), + ], + ); +} + +#[test] +fn resets_visible_depth_after_compacted_branch_before_root_file() { + assert_tree( + &["lib/core/runtime/mod.rs", "README.md"], + &[ + ("lib/core/runtime", "lib/core/runtime", 0, DIR), + ("lib/core/runtime/mod.rs", "mod.rs", 1, FILE), + ("README.md", "README.md", 0, FILE), + ], + ); +} + +#[test] +fn keeps_cascading_shared_prefixes_as_expanded_ancestors() { + assert_tree( + &["src/a/b/c/d", "src/a/b/d", "src/a/d", "src/d"], + &[ + ("src", "src", 0, DIR), + ("src/a", "a", 1, DIR), + ("src/a/b", "b", 2, DIR), + ("src/a/b/c", "c", 3, DIR), + ("src/a/b/c/d", "d", 4, FILE), + ("src/a/b/d", "d", 3, FILE), + ("src/a/d", "d", 2, FILE), + ("src/d", "d", 1, FILE), ], ); }