diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 852f361feb3..21db6b6374e 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -391,7 +391,7 @@ jobs: - name: features of gix-features run: | set +x - for feature in progress fs-walkdir-parallel parallel io-pipe crc32 zlib zlib-rust-backend fast-sha1 rustsha1 cache-efficiency-debug; do + for feature in progress parallel io-pipe crc32 zlib zlib-rust-backend fast-sha1 rustsha1 cache-efficiency-debug; do (cd gix-features && cargo build --features "$feature" --target "$TARGET") done - name: crates with 'wasm' feature diff --git a/Cargo.lock b/Cargo.lock index 097826a9048..4bd5b1712ad 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1849,7 +1849,6 @@ dependencies = [ "gix-hash 0.16.0", "gix-trace 0.1.12", "gix-utils 0.1.14", - "jwalk", "libc", "once_cell", "parking_lot", @@ -2520,6 +2519,7 @@ dependencies = [ "gix-ref 0.50.0", "gix-testtools", "gix-validate 0.9.3", + "insta", ] [[package]] diff --git a/gix-features/Cargo.toml b/gix-features/Cargo.toml index 7a5d598bc6f..67983ee4f34 100644 --- a/gix-features/Cargo.toml +++ b/gix-features/Cargo.toml @@ -26,9 +26,6 @@ progress-unit-human-numbers = ["prodash?/unit-human"] ## Provide human readable byte units for progress bars. progress-unit-bytes = ["dep:bytesize", "prodash?/unit-bytes"] -## If set, walkdir iterators will be multi-threaded. -fs-walkdir-parallel = ["dep:jwalk", "dep:gix-utils"] - ## Provide utilities suitable for working with the `std::fs::read_dir()`. fs-read-dir = ["dep:gix-utils"] @@ -131,7 +128,6 @@ gix-utils = { version = "^0.1.14", path = "../gix-utils", optional = true } crossbeam-channel = { version = "0.5.0", optional = true } parking_lot = { version = "0.12.0", default-features = false, optional = true } -jwalk = { version = "0.8.1", optional = true } walkdir = { version = "2.3.2", optional = true } # used when parallel is off # hashing and 'fast-sha1' feature diff --git a/gix-features/src/fs.rs b/gix-features/src/fs.rs index 6f9d2191d67..5cf5b7e1afa 100644 --- a/gix-features/src/fs.rs +++ b/gix-features/src/fs.rs @@ -7,7 +7,7 @@ //! * [`jwalk::WalkDir`](https://docs.rs/jwalk/0.5.1/jwalk/type.WalkDir.html) if `parallel` feature is enabled //! * [walkdir::WalkDir](https://docs.rs/walkdir/2.3.1/walkdir/struct.WalkDir.html) otherwise -#[cfg(any(feature = "walkdir", feature = "fs-walkdir-parallel"))] +#[cfg(feature = "walkdir")] mod shared { /// The desired level of parallelism. pub enum Parallelism { @@ -21,7 +21,7 @@ mod shared { } } -#[cfg(any(feature = "walkdir", feature = "fs-walkdir-parallel", feature = "fs-read-dir"))] +#[cfg(any(feature = "walkdir", feature = "fs-read-dir"))] mod walkdir_precompose { use std::borrow::Cow; use std::ffi::OsStr; @@ -83,13 +83,13 @@ mod walkdir_precompose { /// A platform over entries in a directory, which may or may not precompose unicode after retrieving /// paths from the file system. - #[cfg(any(feature = "walkdir", feature = "fs-walkdir-parallel"))] + #[cfg(feature = "walkdir")] pub struct WalkDir { pub(crate) inner: Option, pub(crate) precompose_unicode: bool, } - #[cfg(any(feature = "walkdir", feature = "fs-walkdir-parallel"))] + #[cfg(feature = "walkdir")] pub struct WalkDirIter where T: Iterator>, @@ -99,7 +99,7 @@ mod walkdir_precompose { pub(crate) precompose_unicode: bool, } - #[cfg(any(feature = "walkdir", feature = "fs-walkdir-parallel"))] + #[cfg(feature = "walkdir")] impl Iterator for WalkDirIter where T: Iterator>, @@ -142,128 +142,7 @@ pub mod read_dir { } /// -#[cfg(feature = "fs-walkdir-parallel")] -pub mod walkdir { - use std::borrow::Cow; - use std::ffi::OsStr; - use std::fs::FileType; - use std::path::Path; - - use jwalk::WalkDir as WalkDirImpl; - pub use jwalk::{DirEntry as DirEntryGeneric, DirEntryIter as DirEntryIterGeneric, Error}; - - pub use super::shared::Parallelism; - - type DirEntryImpl = DirEntryGeneric<((), ())>; - - /// A directory entry returned by [DirEntryIter]. - pub type DirEntry = super::walkdir_precompose::DirEntry; - /// A platform to create a [DirEntryIter] from. - pub type WalkDir = super::walkdir_precompose::WalkDir; - - impl super::walkdir_precompose::DirEntryApi for DirEntryImpl { - fn path(&self) -> Cow<'_, Path> { - self.path().into() - } - - fn file_name(&self) -> Cow<'_, OsStr> { - self.file_name().into() - } - - fn file_type(&self) -> std::io::Result { - Ok(self.file_type()) - } - } - - impl IntoIterator for WalkDir { - type Item = Result; - type IntoIter = DirEntryIter; - - fn into_iter(self) -> Self::IntoIter { - DirEntryIter { - inner: self.inner.expect("always set (builder fix)").into_iter(), - precompose_unicode: self.precompose_unicode, - } - } - } - - impl WalkDir { - /// Set the minimum component depth of paths of entries. - pub fn min_depth(mut self, min: usize) -> Self { - self.inner = Some(self.inner.take().expect("always set").min_depth(min)); - self - } - /// Set the maximum component depth of paths of entries. - pub fn max_depth(mut self, max: usize) -> Self { - self.inner = Some(self.inner.take().expect("always set").max_depth(max)); - self - } - /// Follow symbolic links. - pub fn follow_links(mut self, toggle: bool) -> Self { - self.inner = Some(self.inner.take().expect("always set").follow_links(toggle)); - self - } - } - - impl From for jwalk::Parallelism { - fn from(v: Parallelism) -> Self { - match v { - Parallelism::Serial => jwalk::Parallelism::Serial, - Parallelism::ThreadPoolPerTraversal { thread_name } => std::thread::available_parallelism() - .map_or_else( - |_| Parallelism::Serial.into(), - |threads| { - let pool = jwalk::rayon::ThreadPoolBuilder::new() - .num_threads(threads.get().min(16)) - .stack_size(128 * 1024) - .thread_name(move |idx| format!("{thread_name} {idx}")) - .build() - .expect("we only set options that can't cause a build failure"); - jwalk::Parallelism::RayonExistingPool { - pool: pool.into(), - busy_timeout: None, - } - }, - ), - } - } - } - - /// Instantiate a new directory iterator which will not skip hidden files, with the given level of `parallelism`. - /// - /// Use `precompose_unicode` to represent the `core.precomposeUnicode` configuration option. - pub fn walkdir_new(root: &Path, parallelism: Parallelism, precompose_unicode: bool) -> WalkDir { - WalkDir { - inner: WalkDirImpl::new(root) - .skip_hidden(false) - .parallelism(parallelism.into()) - .into(), - precompose_unicode, - } - } - - /// Instantiate a new directory iterator which will not skip hidden files and is sorted - /// - /// Use `precompose_unicode` to represent the `core.precomposeUnicode` configuration option. - pub fn walkdir_sorted_new(root: &Path, parallelism: Parallelism, precompose_unicode: bool) -> WalkDir { - WalkDir { - inner: WalkDirImpl::new(root) - .skip_hidden(false) - .sort(true) - .parallelism(parallelism.into()) - .into(), - precompose_unicode, - } - } - - type DirEntryIterImpl = DirEntryIterGeneric<((), ())>; - - /// The Iterator yielding directory items - pub type DirEntryIter = super::walkdir_precompose::WalkDirIter; -} - -/// -#[cfg(all(feature = "walkdir", not(feature = "fs-walkdir-parallel")))] +#[cfg(feature = "walkdir")] pub mod walkdir { use std::borrow::Cow; use std::ffi::OsStr; @@ -338,8 +217,21 @@ pub mod walkdir { /// /// Use `precompose_unicode` to represent the `core.precomposeUnicode` configuration option. pub fn walkdir_sorted_new(root: &Path, _: Parallelism, precompose_unicode: bool) -> WalkDir { + fn ft_to_number(ft: std::fs::FileType) -> usize { + if ft.is_file() { + 1 + } else { + 2 + } + } WalkDir { - inner: WalkDirImpl::new(root).sort_by_file_name().into(), + inner: WalkDirImpl::new(root) + .sort_by(|a, b| { + ft_to_number(a.file_type()) + .cmp(&ft_to_number(b.file_type())) + .then_with(|| a.file_name().cmp(b.file_name())) + }) + .into(), precompose_unicode, } } @@ -348,7 +240,7 @@ pub mod walkdir { pub type DirEntryIter = super::walkdir_precompose::WalkDirIter; } -#[cfg(any(feature = "walkdir", feature = "fs-walkdir-parallel"))] +#[cfg(feature = "walkdir")] pub use self::walkdir::{walkdir_new, walkdir_sorted_new, WalkDir}; /// Prepare open options which won't follow symlinks when the file is opened. diff --git a/gix-ref/tests/Cargo.toml b/gix-ref/tests/Cargo.toml index 0b0345bd0eb..9ec1578b984 100644 --- a/gix-ref/tests/Cargo.toml +++ b/gix-ref/tests/Cargo.toml @@ -32,3 +32,4 @@ gix-hash = { path = "../../gix-hash" } gix-validate = { path = "../../gix-validate" } gix-lock = { path = "../../gix-lock" } gix-object = { path = "../../gix-object" } +insta = "1.42.1" diff --git a/gix-ref/tests/fixtures/generated-archives/make_repo_for_1850_repro.tar b/gix-ref/tests/fixtures/generated-archives/make_repo_for_1850_repro.tar new file mode 100644 index 00000000000..9e5178f7397 Binary files /dev/null and b/gix-ref/tests/fixtures/generated-archives/make_repo_for_1850_repro.tar differ diff --git a/gix-ref/tests/fixtures/make_repo_for_1850_repro.sh b/gix-ref/tests/fixtures/make_repo_for_1850_repro.sh new file mode 100755 index 00000000000..fee6ec02e81 --- /dev/null +++ b/gix-ref/tests/fixtures/make_repo_for_1850_repro.sh @@ -0,0 +1,17 @@ +#!/usr/bin/env bash +set -eu -o pipefail + +git init -q + +cat <.git/packed-refs +# pack-refs with: peeled fully-peeled sorted +17dad46c0ce3be4d4b6d45def031437ab2e40666 refs/heads/ig-branch-remote +83a70366fcc1255d35a00102138293bac673b331 refs/heads/ig-inttest +21b57230833a1733f6685e14eabe936a09689a1b refs/heads/ig-pr4021 +d773228d0ee0012fcca53fffe581b0fce0b1dc56 refs/heads/ig/aliases +ba37abe04f91fec76a6b9a817d40ee2daec47207 refs/heads/ig/cifail +EOF + +mkdir -p .git/refs/heads/ig/pr +echo d22f46f3d7d2504d56c573b5fe54919bd16be48a >.git/refs/heads/ig/push-name +echo 4dec145966c546402c5a9e28b932e7c8c939e01e >.git/refs/heads/ig-pr4021 diff --git a/gix-ref/tests/refs/file/store/iter.rs b/gix-ref/tests/refs/file/store/iter.rs index bcd80ded6cb..1d32428f07e 100644 --- a/gix-ref/tests/refs/file/store/iter.rs +++ b/gix-ref/tests/refs/file/store/iter.rs @@ -31,8 +31,8 @@ mod with_namespace { .map(|r: gix_ref::Reference| r.name) .collect::>(); let expected_namespaced_refs = vec![ - "refs/namespaces/bar/refs/heads/multi-link-target1", "refs/namespaces/bar/refs/multi-link", + "refs/namespaces/bar/refs/heads/multi-link-target1", "refs/namespaces/bar/refs/remotes/origin/multi-link-target3", "refs/namespaces/bar/refs/tags/multi-link-target2", ]; @@ -50,8 +50,8 @@ mod with_namespace { .map(|r| r.name.into_inner()) .collect::>(), [ - "refs/namespaces/bar/refs/heads/multi-link-target1", "refs/namespaces/bar/refs/multi-link", + "refs/namespaces/bar/refs/heads/multi-link-target1", "refs/namespaces/bar/refs/tags/multi-link-target2" ] ); @@ -149,8 +149,8 @@ mod with_namespace { let packed = ns_store.open_packed_buffer()?; let expected_refs = vec![ - "refs/heads/multi-link-target1", "refs/multi-link", + "refs/heads/multi-link-target1", "refs/remotes/origin/multi-link-target3", "refs/tags/multi-link-target2", ]; @@ -198,8 +198,8 @@ mod with_namespace { .map(|r| r.name.into_inner()) .collect::>(), [ - "refs/heads/multi-link-target1", "refs/multi-link", + "refs/heads/multi-link-target1", "refs/tags/multi-link-target2", ], "loose iterators have namespace support as well" @@ -214,8 +214,8 @@ mod with_namespace { .map(|r| r.name.into_inner()) .collect::>(), [ - "refs/namespaces/bar/refs/heads/multi-link-target1", "refs/namespaces/bar/refs/multi-link", + "refs/namespaces/bar/refs/heads/multi-link-target1", "refs/namespaces/bar/refs/tags/multi-link-target2", "refs/namespaces/foo/refs/remotes/origin/HEAD" ], @@ -291,14 +291,14 @@ fn loose_iter_with_broken_refs() -> crate::Result { ref_paths, vec![ "d1", + "loop-a", + "loop-b", + "multi-link", "heads/A", "heads/d1", "heads/dt1", "heads/main", "heads/multi-link-target1", - "loop-a", - "loop-b", - "multi-link", "remotes/origin/HEAD", "remotes/origin/main", "remotes/origin/multi-link-target3", @@ -413,6 +413,57 @@ fn overlay_iter() -> crate::Result { Ok(()) } +#[test] +fn overlay_iter_reproduce_1850() -> crate::Result { + let store = store_at("make_repo_for_1850_repro.sh")?; + let ref_names = store + .iter()? + .all()? + .map(|r| r.map(|r| (r.name.as_bstr().to_owned(), r.target))) + .collect::, _>>()?; + insta::assert_debug_snapshot!(ref_names, @r#" + [ + ( + "refs/heads/ig-branch-remote", + Object( + Sha1(17dad46c0ce3be4d4b6d45def031437ab2e40666), + ), + ), + ( + "refs/heads/ig-inttest", + Object( + Sha1(83a70366fcc1255d35a00102138293bac673b331), + ), + ), + ( + "refs/heads/ig-pr4021", + Object( + Sha1(4dec145966c546402c5a9e28b932e7c8c939e01e), + ), + ), + ( + "refs/heads/ig/aliases", + Object( + Sha1(d773228d0ee0012fcca53fffe581b0fce0b1dc56), + ), + ), + ( + "refs/heads/ig/cifail", + Object( + Sha1(ba37abe04f91fec76a6b9a817d40ee2daec47207), + ), + ), + ( + "refs/heads/ig/push-name", + Object( + Sha1(d22f46f3d7d2504d56c573b5fe54919bd16be48a), + ), + ), + ] + "#); + Ok(()) +} + #[test] fn overlay_iter_with_prefix_wont_allow_absolute_paths() -> crate::Result { let store = store_with_packed_refs()?; diff --git a/gix/Cargo.toml b/gix/Cargo.toml index 8da874b8d6f..5e0a0d2a31a 100644 --- a/gix/Cargo.toml +++ b/gix/Cargo.toml @@ -230,11 +230,6 @@ max-control = ["parallel", "pack-cache-lru-static", "pack-cache-lru-dynamic"] ## No C toolchain is involved. max-performance-safe = ["max-control"] -## If set, walkdir iterators will be multi-threaded which affects the listing of loose objects and references. -## Note, however, that this will use `rayon` under the hood and spawn threads for each traversal to avoid a global rayon thread pool. -## Thus this option is more interesting to one-off client applications, rather than the server. -parallel-walkdir = ["gix-features/fs-walkdir-parallel"] - ## The tempfile registry uses a better implementation of a thread-safe hashmap, relying on an external crate. ## This may be useful when tempfiles are created and accessed in a massively parallel fashion and you know that this is indeed faster than ## the simpler implementation that is the default. diff --git a/gix/tests/gix/repository/reference.rs b/gix/tests/gix/repository/reference.rs index 94e9e8574d2..acea4e016ed 100644 --- a/gix/tests/gix/repository/reference.rs +++ b/gix/tests/gix/repository/reference.rs @@ -102,10 +102,10 @@ mod iter_references { "refs/heads/d1", "refs/heads/dt1", "refs/heads/main", - "refs/heads/multi-link-target1", "refs/loop-a", "refs/loop-b", "refs/multi-link", + "refs/heads/multi-link-target1", "refs/remotes/origin/HEAD", "refs/remotes/origin/main", "refs/remotes/origin/multi-link-target3", diff --git a/justfile b/justfile index 76d209aa577..da837e97e13 100755 --- a/justfile +++ b/justfile @@ -98,7 +98,6 @@ check: cargo check -p gix-status --all-features cargo check -p gix-features --all-features cargo check -p gix-features --features parallel - cargo check -p gix-features --features fs-walkdir-parallel cargo check -p gix-features --features fs-read-dir cargo check -p gix-features --features rustsha1 cargo check -p gix-features --features fast-sha1