diff --git a/Cargo.lock b/Cargo.lock
index 966af438..81166bb1 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -155,7 +155,7 @@ dependencies = [
  "js-sys",
  "num-traits",
  "wasm-bindgen",
- "windows-link 0.2.1",
+ "windows-link",
 ]
 
 [[package]]
@@ -311,10 +311,10 @@ dependencies = [
  "portable-atomic",
  "rayon",
  "regex",
+ "rustc-hash",
  "serde",
  "serde_json",
  "stfu8",
- "sysinfo",
  "tempfile",
  "terminal_size",
  "thousands",
@@ -389,7 +389,7 @@ dependencies = [
  "js-sys",
  "log",
  "wasm-bindgen",
- "windows-core 0.62.2",
+ "windows-core",
 ]
 
 [[package]]
@@ -469,15 +469,6 @@ dependencies = [
  "libc",
 ]
 
-[[package]]
-name = "ntapi"
-version = "0.4.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c70f219e21142367c70c0b30c6a9e3a14d55b4d12a204d897fbec83a0363f081"
-dependencies = [
- "winapi",
-]
-
 [[package]]
 name = "nu-ansi-term"
 version = "0.50.3"
@@ -505,31 +496,12 @@ dependencies = [
  "objc2-encode",
 ]
 
-[[package]]
-name = "objc2-core-foundation"
-version = "0.3.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2a180dd8642fa45cdb7dd721cd4c11b1cadd4929ce112ebd8b9f5803cc79d536"
-dependencies = [
- "bitflags",
-]
-
 [[package]]
 name = "objc2-encode"
 version = "4.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "ef25abbcd74fb2609453eb695bd2f860d389e457f67dc17cafc8b8cbc89d0c33"
 
-[[package]]
-name = "objc2-io-kit"
-version = "0.3.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "33fafba39597d6dc1fb709123dfa8289d39406734be322956a69f0931c73bb15"
-dependencies = [
- "libc",
- "objc2-core-foundation",
-]
-
 [[package]]
 name = "once_cell"
 version = "1.21.3"
@@ -654,6 +626,12 @@ version = "0.2.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "88f8660c1ff60292143c98d08fc6e2f654d722db50410e3f3797d40baaf9d8f3"
 
+[[package]]
+name = "rustc-hash"
+version = "2.1.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "94300abf3f1ae2e2b8ffb7b58043de3d399c73fa6f4b73826402a5c457614dbe"
+
 [[package]]
 name = "rustix"
 version = "1.1.3"
@@ -745,20 +723,6 @@ dependencies = [
  "unicode-ident",
 ]
 
-[[package]]
-name = "sysinfo"
-version = "0.37.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "16607d5caffd1c07ce073528f9ed972d88db15dd44023fa57142963be3feb11f"
-dependencies = [
- "libc",
- "memchr",
- "ntapi",
- "objc2-core-foundation",
- "objc2-io-kit",
- "windows",
-]
-
 [[package]]
 name = "tempfile"
 version = "3.24.0"
@@ -935,41 +899,6 @@ version = "0.4.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
 
-[[package]]
-name = "windows"
-version = "0.61.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9babd3a767a4c1aef6900409f85f5d53ce2544ccdfaa86dad48c91782c6d6893"
-dependencies = [
- "windows-collections",
- "windows-core 0.61.2",
- "windows-future",
- "windows-link 0.1.3",
- "windows-numerics",
-]
-
-[[package]]
-name = "windows-collections"
-version = "0.2.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3beeceb5e5cfd9eb1d76b381630e82c4241ccd0d27f1a39ed41b2760b255c5e8"
-dependencies = [
- "windows-core 0.61.2",
-]
-
-[[package]]
-name = "windows-core"
-version = "0.61.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c0fdd3ddb90610c7638aa2b3a3ab2904fb9e5cdbecc643ddb3647212781c4ae3"
-dependencies = [
- "windows-implement",
- "windows-interface",
- "windows-link 0.1.3",
- "windows-result 0.3.4",
- "windows-strings 0.4.2",
-]
-
 [[package]]
 name = "windows-core"
 version = "0.62.2"
@@ -978,20 +907,9 @@ checksum = "b8e83a14d34d0623b51dce9581199302a221863196a1dde71a7663a4c2be9deb"
 dependencies = [
  "windows-implement",
  "windows-interface",
- "windows-link 0.2.1",
- "windows-result 0.4.1",
- "windows-strings 0.5.1",
-]
-
-[[package]]
-name = "windows-future"
-version = "0.2.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "fc6a41e98427b19fe4b73c550f060b59fa592d7d686537eebf9385621bfbad8e"
-dependencies = [
- "windows-core 0.61.2",
- "windows-link 0.1.3",
- "windows-threading",
+ "windows-link",
+ "windows-result",
+ "windows-strings",
 ]
 
 [[package]]
@@ -1016,53 +934,19 @@ dependencies = [
  "syn",
 ]
 
-[[package]]
-name = "windows-link"
-version = "0.1.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5e6ad25900d524eaabdbbb96d20b4311e1e7ae1699af4fb28c17ae66c80d798a"
-
 [[package]]
 name = "windows-link"
 version = "0.2.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5"
 
-[[package]]
-name = "windows-numerics"
-version = "0.2.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9150af68066c4c5c07ddc0ce30421554771e528bde427614c61038bc2c92c2b1"
-dependencies = [
- "windows-core 0.61.2",
- "windows-link 0.1.3",
-]
-
-[[package]]
-name = "windows-result"
-version = "0.3.4"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "56f42bd332cc6c8eac5af113fc0c1fd6a8fd2aa08a0119358686e5160d0586c6"
-dependencies = [
- "windows-link 0.1.3",
-]
-
 [[package]]
 name = "windows-result"
 version = "0.4.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "7781fa89eaf60850ac3d2da7af8e5242a5ea78d1a11c49bf2910bb5a73853eb5"
 dependencies = [
- "windows-link 0.2.1",
-]
-
-[[package]]
-name = "windows-strings"
-version = "0.4.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "56e6c93f3a0c3b36176cb1327a4958a0353d5d166c2a35cb268ace15e91d3b57"
-dependencies = [
- "windows-link 0.1.3",
+ "windows-link",
 ]
 
 [[package]]
@@ -1071,7 +955,7 @@ version = "0.5.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "7837d08f69c77cf6b07689544538e017c1bfcf57e34b4c0ff58e6c2cd3b37091"
 dependencies = [
- "windows-link 0.2.1",
+ "windows-link",
 ]
 
 [[package]]
@@ -1089,7 +973,7 @@ version = "0.61.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc"
 dependencies = [
- "windows-link 0.2.1",
+ "windows-link",
 ]
 
 [[package]]
@@ -1098,7 +982,7 @@ version = "0.53.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "4945f9f551b88e0d65f3db0bc25c33b8acea4d9e41163edf90dcd0b19f9069f3"
 dependencies = [
- "windows-link 0.2.1",
+ "windows-link",
  "windows_aarch64_gnullvm",
  "windows_aarch64_msvc",
  "windows_i686_gnu",
@@ -1109,15 +993,6 @@ dependencies = [
  "windows_x86_64_msvc",
 ]
 
-[[package]]
-name = "windows-threading"
-version = "0.1.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b66463ad2e0ea3bbf808b7f1d371311c80e115c0b71d60efc142cafbcfb057a6"
-dependencies = [
- "windows-link 0.1.3",
-]
-
 [[package]]
 name = "windows_aarch64_gnullvm"
 version = "0.53.1"
diff --git a/Cargo.toml b/Cargo.toml
index c03ccdce..f556a905 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -39,9 +39,9 @@ regex = "1"
 config-file = "0.2"
 serde = { version = "1.0", features = ["derive"] }
 serde_json = "1.0"
-sysinfo = "0.37"
 ctrlc = "3"
 chrono = "0.4"
+rustc-hash = "2"
 
 [target.'cfg(not(target_has_atomic = "64"))'.dependencies]
 portable-atomic = "1.4"
diff --git a/README.md b/README.md
index d46b87b0..b61cfd22 100644
--- a/README.md
+++ b/README.md
@@ -117,8 +117,8 @@ Usage: dust -e regex (Only include files matching this regex (eg dust -e "\.png$
 Usage: dust -v regex (Exclude files matching this regex (eg dust -v "\.png$" would ignore png files))
 Usage: dust -L (dereference-links - Treat sym links as directories and go into them)
 Usage: dust -P (Disable the progress indicator)
+Usage: dust -T (Set the number of walker threads. Default is the CPU count. For high-latency storage like NFS or remote mounts, a higher count can speed up the walk by overlapping more concurrent stat calls.)
 Usage: dust -R (For screen readers. Removes bars/symbols. Adds new column: depth level. (May want to use -p for full path too))
-Usage: dust -S (Custom Stack size - Use if you see: 'fatal runtime error: stack overflow' (default allocation: low memory=1048576, high memory=1073741824)"),
 Usage: dust --skip-total (No total row will be displayed)
 Usage: dust -z 40000/30MB/20kib (Exclude output files/directories below size 40000 bytes / 30MB / 20KiB)
 Usage: dust -j (Prints JSON representation of directories, try: dust -j  | jq)
diff --git a/completions/_dust b/completions/_dust
index 28483922..c4da5df6 100644
--- a/completions/_dust
+++ b/completions/_dust
@@ -54,8 +54,8 @@ kb\:"kilobyte (kB)"
 mb\:"megabyte (MB)"
 gb\:"gigabyte (GB)"
 tb\:"terabyte (TB)"))' \
-'-S+[Specify memory to use as stack size - use if you see\: '\''fatal runtime error\: stack overflow'\'' (default low memory=1048576, high memory=1073741824)]:STACK_SIZE:_default' \
-'--stack-size=[Specify memory to use as stack size - use if you see\: '\''fatal runtime error\: stack overflow'\'' (default low memory=1048576, high memory=1073741824)]:STACK_SIZE:_default' \
+'-S+[Deprecated. The walker no longer recurses so a custom stack size is unnecessary. Accepted for compatibility but the value is ignored]:STACK_SIZE:_default' \
+'--stack-size=[Deprecated. The walker no longer recurses so a custom stack size is unnecessary. Accepted for compatibility but the value is ignored]:STACK_SIZE:_default' \
 '-M+[+/-n matches files modified more/less than n days ago , and n matches files modified exactly n days ago, days are rounded down.That is +n => (−∞, curr−(n+1)), n => \[curr−(n+1), curr−n), and -n => (𝑐𝑢𝑟𝑟−𝑛, +∞)]:MTIME:_default' \
 '--mtime=[+/-n matches files modified more/less than n days ago , and n matches files modified exactly n days ago, days are rounded down.That is +n => (−∞, curr−(n+1)), n => \[curr−(n+1), curr−n), and -n => (𝑐𝑢𝑟𝑟−𝑛, +∞)]:MTIME:_default' \
 '-A+[just like -mtime, but based on file access time]:ATIME:_default' \
diff --git a/completions/_dust.ps1 b/completions/_dust.ps1
index 9129d190..4513c655 100644
--- a/completions/_dust.ps1
+++ b/completions/_dust.ps1
@@ -42,8 +42,8 @@ Register-ArgumentCompleter -Native -CommandName 'dust' -ScriptBlock {
             [CompletionResult]::new('--terminal-width', '--terminal-width', [CompletionResultType]::ParameterName, 'Specify width of output overriding the auto detection of terminal width')
             [CompletionResult]::new('-o', '-o', [CompletionResultType]::ParameterName, 'Changes output display size. si will print sizes in powers of 1000. b k m g t kb mb gb tb will print the whole tree in that size')
             [CompletionResult]::new('--output-format', '--output-format', [CompletionResultType]::ParameterName, 'Changes output display size. si will print sizes in powers of 1000. b k m g t kb mb gb tb will print the whole tree in that size')
-            [CompletionResult]::new('-S', '-S ', [CompletionResultType]::ParameterName, 'Specify memory to use as stack size - use if you see: ''fatal runtime error: stack overflow'' (default low memory=1048576, high memory=1073741824)')
-            [CompletionResult]::new('--stack-size', '--stack-size', [CompletionResultType]::ParameterName, 'Specify memory to use as stack size - use if you see: ''fatal runtime error: stack overflow'' (default low memory=1048576, high memory=1073741824)')
+            [CompletionResult]::new('-S', '-S ', [CompletionResultType]::ParameterName, 'Deprecated. The walker no longer recurses so a custom stack size is unnecessary. Accepted for compatibility but the value is ignored')
+            [CompletionResult]::new('--stack-size', '--stack-size', [CompletionResultType]::ParameterName, 'Deprecated. The walker no longer recurses so a custom stack size is unnecessary. Accepted for compatibility but the value is ignored')
             [CompletionResult]::new('-M', '-M ', [CompletionResultType]::ParameterName, '+/-n matches files modified more/less than n days ago , and n matches files modified exactly n days ago, days are rounded down.That is +n => (−∞, curr−(n+1)), n => [curr−(n+1), curr−n), and -n => (𝑐𝑢𝑟𝑟−𝑛, +∞)')
             [CompletionResult]::new('--mtime', '--mtime', [CompletionResultType]::ParameterName, '+/-n matches files modified more/less than n days ago , and n matches files modified exactly n days ago, days are rounded down.That is +n => (−∞, curr−(n+1)), n => [curr−(n+1), curr−n), and -n => (𝑐𝑢𝑟𝑟−𝑛, +∞)')
             [CompletionResult]::new('-A', '-A ', [CompletionResultType]::ParameterName, 'just like -mtime, but based on file access time')
diff --git a/completions/dust.elv b/completions/dust.elv
index f7561401..17c87e65 100644
--- a/completions/dust.elv
+++ b/completions/dust.elv
@@ -39,8 +39,8 @@ set edit:completion:arg-completer[dust] = {|@words|
             cand --terminal-width 'Specify width of output overriding the auto detection of terminal width'
             cand -o 'Changes output display size. si will print sizes in powers of 1000. b k m g t kb mb gb tb will print the whole tree in that size'
             cand --output-format 'Changes output display size. si will print sizes in powers of 1000. b k m g t kb mb gb tb will print the whole tree in that size'
-            cand -S 'Specify memory to use as stack size - use if you see: ''fatal runtime error: stack overflow'' (default low memory=1048576, high memory=1073741824)'
-            cand --stack-size 'Specify memory to use as stack size - use if you see: ''fatal runtime error: stack overflow'' (default low memory=1048576, high memory=1073741824)'
+            cand -S 'Deprecated. The walker no longer recurses so a custom stack size is unnecessary. Accepted for compatibility but the value is ignored'
+            cand --stack-size 'Deprecated. The walker no longer recurses so a custom stack size is unnecessary. Accepted for compatibility but the value is ignored'
             cand -M '+/-n matches files modified more/less than n days ago , and n matches files modified exactly n days ago, days are rounded down.That is +n => (−∞, curr−(n+1)), n => [curr−(n+1), curr−n), and -n => (𝑐𝑢𝑟𝑟−𝑛, +∞)'
             cand --mtime '+/-n matches files modified more/less than n days ago , and n matches files modified exactly n days ago, days are rounded down.That is +n => (−∞, curr−(n+1)), n => [curr−(n+1), curr−n), and -n => (𝑐𝑢𝑟𝑟−𝑛, +∞)'
             cand -A 'just like -mtime, but based on file access time'
diff --git a/completions/dust.fish b/completions/dust.fish
index 8b52c6a7..e41a4cde 100644
--- a/completions/dust.fish
+++ b/completions/dust.fish
@@ -18,7 +18,7 @@ kb\t'kilobyte (kB)'
 mb\t'megabyte (MB)'
 gb\t'gigabyte (GB)'
 tb\t'terabyte (TB)'"
-complete -c dust -s S -l stack-size -d 'Specify memory to use as stack size - use if you see: \'fatal runtime error: stack overflow\' (default low memory=1048576, high memory=1073741824)' -r
+complete -c dust -s S -l stack-size -d 'Deprecated. The walker no longer recurses so a custom stack size is unnecessary. Accepted for compatibility but the value is ignored' -r
 complete -c dust -s M -l mtime -d '+/-n matches files modified more/less than n days ago , and n matches files modified exactly n days ago, days are rounded down.That is +n => (−∞, curr−(n+1)), n => [curr−(n+1), curr−n), and -n => (𝑐𝑢𝑟𝑟−𝑛, +∞)' -r
 complete -c dust -s A -l atime -d 'just like -mtime, but based on file access time' -r
 complete -c dust -s y -l ctime -d 'just like -mtime, but based on file change time' -r
diff --git a/man-page/dust.1 b/man-page/dust.1
index 5de50488..53b85a45 100644
--- a/man-page/dust.1
+++ b/man-page/dust.1
@@ -123,7 +123,7 @@ tb: terabyte (TB)
 .RE
 .TP
 \fB\-S\fR, \fB\-\-stack\-size\fR \fI<STACK_SIZE>\fR
-Specify memory to use as stack size \- use if you see: \*(Aqfatal runtime error: stack overflow\*(Aq (default low memory=1048576, high memory=1073741824)
+Deprecated. The walker no longer recurses so a custom stack size is unnecessary. Accepted for compatibility but the value is ignored
 .TP
 \fB\-j\fR, \fB\-\-output\-json\fR
 Output the directory tree as json to the current directory
diff --git a/src/cli.rs b/src/cli.rs
index bf9f37f4..ffe8acb6 100644
--- a/src/cli.rs
+++ b/src/cli.rs
@@ -144,9 +144,8 @@ pub struct Cli {
     #[arg(short, long, value_enum, value_name("FORMAT"), ignore_case(true))]
     pub output_format: Option<OutputFormat>,
 
-    /// Specify memory to use as stack size - use if you see: 'fatal runtime
-    /// error: stack overflow' (default low memory=1048576, high
-    /// memory=1073741824)
+    /// Deprecated. The walker no longer recurses so a custom stack size is
+    /// unnecessary. Accepted for compatibility but the value is ignored.
     #[arg(short('S'), long)]
     pub stack_size: Option<usize>,
 
diff --git a/src/config.rs b/src/config.rs
index 141500e3..07d95740 100644
--- a/src/config.rs
+++ b/src/config.rs
@@ -31,7 +31,6 @@ pub struct Config {
     pub disable_progress: Option<bool>,
     pub depth: Option<usize>,
     pub bars_on_right: Option<bool>,
-    pub stack_size: Option<usize>,
     pub threads: Option<usize>,
     pub output_json: Option<bool>,
     pub print_errors: Option<bool>,
@@ -138,14 +137,6 @@ impl Config {
     pub fn get_bars_on_right(&self, options: &Cli) -> bool {
         Some(true) == self.bars_on_right || options.bars_on_right
     }
-    pub fn get_custom_stack_size(&self, options: &Cli) -> Option<usize> {
-        let from_cmd_line = options.stack_size;
-        if from_cmd_line.is_none() {
-            self.stack_size
-        } else {
-            from_cmd_line
-        }
-    }
     pub fn get_threads(&self, options: &Cli) -> Option<usize> {
         let from_cmd_line = options.threads;
         if from_cmd_line.is_none() {
diff --git a/src/dir_walker.rs b/src/dir_walker.rs
index c56baa85..50fcad0d 100644
--- a/src/dir_walker.rs
+++ b/src/dir_walker.rs
@@ -3,6 +3,8 @@ use std::fs;
 use std::io::Error;
 use std::sync::Arc;
 use std::sync::Mutex;
+use std::sync::OnceLock;
+use std::sync::atomic::{AtomicUsize, Ordering as AtomicOrdering};
 
 use crate::node::Node;
 use crate::progress::ORDERING;
@@ -12,19 +14,20 @@ use crate::progress::RuntimeErrors;
 use crate::utils::is_filtered_out_due_to_file_time;
 use crate::utils::is_filtered_out_due_to_invert_regex;
 use crate::utils::is_filtered_out_due_to_regex;
-use rayon::iter::ParallelBridge;
-use rayon::prelude::ParallelIterator;
+use rayon::iter::{IntoParallelIterator, ParallelIterator};
 use regex::Regex;
 use std::path::Path;
 use std::path::PathBuf;
 
 use std::collections::HashSet;
 
+use rustc_hash::FxHashSet;
+
 use crate::node::build_node;
 use std::fs::DirEntry;
 
 use crate::node::FileTime;
-use crate::platform::get_metadata;
+use crate::platform::{MetadataTuple, get_metadata, tuple_from_metadata};
 
 #[derive(Debug)]
 pub enum Operator {
@@ -48,27 +51,105 @@ pub struct WalkData<'a> {
     pub follow_links: bool,
     pub progress_data: Arc<PAtomicInfo>,
     pub errors: Arc<Mutex<RuntimeErrors>>,
+    // True iff any of the filter-style WalkData fields (ignore_directories,
+    // allowed_filesystems, filter_*_time, filter_regex, invert_filter_regex)
+    // would do work in `ignore_file`. Computed once in main.rs so the hot
+    // path in `process_entry` can skip the function call entirely when no
+    // filter flags are set.
+    pub has_any_filter: bool,
 }
 
-pub fn walk_it(dirs: HashSet<PathBuf>, walk_data: &WalkData) -> Vec<Node> {
-    let mut inodes = HashSet::new();
-    let top_level_nodes: Vec<_> = dirs
-        .into_iter()
-        .filter_map(|d| {
-            let prog_data = &walk_data.progress_data;
-            prog_data.clear_state(&d);
-            let node = walk(d, walk_data, 0)?;
-
-            prog_data.state.store(Operation::PREPARING, ORDERING);
+// Per-directory bookkeeping used during the parallel walk. Each directory gets
+// one `PendingDir`. Subdirectory tasks hold an `Arc` back to their parent so
+// they can push their finished `Node` into the parent's `children` and
+// decrement `pending`. When `pending` reaches zero the directory is ready to
+// be built and handed up to its own parent.
+struct PendingDir {
+    dir: PathBuf,
+    depth: usize,
+    is_symlink: bool,
+    parent: Option<Arc<PendingDir>>,
+    // Starts at 1 for the directory itself; incremented per spawned
+    // subdirectory task. Each completion decrements by 1. Reaching 0
+    // means this directory and all descendants are done.
+    pending: AtomicUsize,
+    children: Mutex<Vec<Node>>,
+    // Cached stat for this directory, set once at `walk_dir` entry and
+    // consumed at `finalize_chain`. Avoids a second stat per directory
+    // (one for the is_dir/is_file branching, one to build the Node).
+    // We cache the parsed `MetadataTuple` rather than `std::fs::Metadata`
+    // Just the info we need, ~120 B/dir smaller, and `Copy`.
+    // `None` means the stat failed (broken symlink, raced deletion, ...).
+    cached_metadata: OnceLock<Option<MetadataTuple>>,
+}
 
-            clean_inodes(node, &mut inodes, walk_data)
-        })
-        .collect();
+pub fn walk_it(dirs: HashSet<PathBuf>, walk_data: &WalkData) -> Vec<Node> {
+    // FxHash is substantially faster than std's default SipHash on small
+    // primitive keys. DoS resistance is irrelevant here; the keys are
+    // (inode, device) pairs from the filesystem, not user input.
+    let mut inodes: FxHashSet<(u64, u64)> = FxHashSet::default();
+    let mut top_level_nodes: Vec<Node> = Vec::new();
+
+    for d in dirs {
+        walk_data.progress_data.clear_state(&d);
+
+        // A root passed on the command line that *is* a symlink-to-dir gets
+        // followed regardless of `follow_links`. This preserves the existing
+        // behavior.
+        let root_is_symlink = fs::symlink_metadata(&d)
+            .map(|m| m.file_type().is_symlink())
+            .unwrap_or(false);
+
+        // Synthetic outer parent above the root. Lets `finalize_chain` build
+        // the root's Node via the same code path as every other directory: it
+        // pushes the finished root Node into `outer.children`, then bubbles
+        // one more time and stops at outer's `parent: None` early-return
+        // before any further build_node call. We drain `outer.children`
+        // afterwards.
+        let outer = Arc::new(PendingDir {
+            dir: PathBuf::new(),
+            depth: 0,
+            is_symlink: false,
+            parent: None,
+            pending: AtomicUsize::new(1),
+            children: Mutex::new(Vec::new()),
+            cached_metadata: OnceLock::new(),
+        });
+        let root = Arc::new(PendingDir {
+            dir: d,
+            depth: 0,
+            is_symlink: root_is_symlink,
+            parent: Some(outer.clone()),
+            // Sentinel +1: ensures subdirectory tasks can't bubble through
+            // finalize_chain until the root's own scan is done.
+            pending: AtomicUsize::new(1),
+            children: Mutex::new(Vec::new()),
+            cached_metadata: OnceLock::new(),
+        });
+
+        // Single scope per root: all descendant work runs as flat tasks inside
+        // it, so stack depth is O(1) regardless of tree depth.
+        rayon::scope(|s| {
+            s.spawn(move |s| walk_dir(s, root, walk_data));
+        });
+
+        walk_data
+            .progress_data
+            .state
+            .store(Operation::PREPARING, ORDERING);
+
+        let mut outer_children = std::mem::take(&mut *outer.children.lock().unwrap());
+        if let Some(node) = outer_children.pop()
+            && let Some(cleaned) = clean_inodes(node, &mut inodes, walk_data)
+        {
+            top_level_nodes.push(cleaned);
+        }
+    }
     top_level_nodes
 }
 
 // Remove files which have the same inode, we don't want to double count them.
-fn clean_inodes(x: Node, inodes: &mut HashSet<(u64, u64)>, walk_data: &WalkData) -> Option<Node> {
+fn clean_inodes(x: Node, inodes: &mut FxHashSet<(u64, u64)>, walk_data: &WalkData) -> Option<Node> {
     if !walk_data.use_apparent_size
         && let Some(id) = x.inode_device
         && !inodes.insert(id)
@@ -130,14 +211,19 @@ fn is_ignored_path(path: &Path, walk_data: &WalkData) -> bool {
         return true;
     }
 
-    // Entry is inside an ignored absolute path
-    // Absolute paths should be canonicalized before being added to `WalkData.ignore_directories`
+    // Entry is inside an ignored absolute path.
+    // Absolute paths should be canonicalized before being added to
+    // `WalkData.ignore_directories`. Canonicalize `path` at most once
+    // (and only if there is at least one absolute ignore path), instead
+    // of re-canonicalizing inside the loop per ignored entry.
+    let mut absolute_entry_path: Option<PathBuf> = None;
     for ignored_path in walk_data.ignore_directories.iter() {
         if !ignored_path.is_absolute() {
             continue;
         }
-        let absolute_entry_path = std::fs::canonicalize(path).unwrap_or_default();
-        if absolute_entry_path.starts_with(ignored_path) {
+        let canon = absolute_entry_path
+            .get_or_insert_with(|| std::fs::canonicalize(path).unwrap_or_default());
+        if canon.starts_with(ignored_path) {
             return true;
         }
     }
@@ -145,54 +231,79 @@ fn is_ignored_path(path: &Path, walk_data: &WalkData) -> bool {
     false
 }
 
-fn ignore_file(entry: &DirEntry, walk_data: &WalkData) -> bool {
-    if is_ignored_path(&entry.path(), walk_data) {
+// Predicate for whether `ignore_file`'s filter checks would consult any
+// `MetadataTuple` field (dev or m/a/c times). Path-only filters (regex,
+// `--ignore-directory`) don't need a stat.
+fn filter_needs_metadata(walk_data: &WalkData) -> bool {
+    !walk_data.allowed_filesystems.is_empty()
+        || walk_data.filter_accessed_time.is_some()
+        || walk_data.filter_modified_time.is_some()
+        || walk_data.filter_changed_time.is_some()
+}
+
+// `metadata` is the entry's pre-fetched stat tuple (or `None` if nothing
+// here would have needed it). The caller fetches it once and threads it
+// through to `build_node` afterwards, so we never stat the same file
+// twice on a filter-active walk.
+fn ignore_file(
+    entry: &DirEntry,
+    path: &Path,
+    file_type: std::fs::FileType,
+    metadata: Option<&MetadataTuple>,
+    walk_data: &WalkData,
+) -> bool {
+    // `is_ignored_path` is a no-op when no ignore dirs are configured, but the
+    // guard still pays off: it skips the HashSet hash+probe on every entry.
+    if !walk_data.ignore_directories.is_empty() && is_ignored_path(path, walk_data) {
         return true;
     }
 
     let is_dot_file = entry.file_name().to_str().unwrap_or("").starts_with('.');
-    let follow_links = walk_data.follow_links && entry.file_type().is_ok_and(|ft| ft.is_symlink());
 
-    if !walk_data.allowed_filesystems.is_empty() {
-        let size_inode_device = get_metadata(entry.path(), false, follow_links);
-        if let Some((_size, Some((_id, dev)), _gunk)) = size_inode_device
-            && !walk_data.allowed_filesystems.contains(&dev)
-        {
-            return true;
-        }
+    if !walk_data.allowed_filesystems.is_empty()
+        && let Some((_size, Some((_id, dev)), _gunk)) = metadata
+        && !walk_data.allowed_filesystems.contains(dev)
+    {
+        return true;
     }
-    if walk_data.filter_accessed_time.is_some()
+
+    let has_time_filter = walk_data.filter_accessed_time.is_some()
         || walk_data.filter_modified_time.is_some()
-        || walk_data.filter_changed_time.is_some()
+        || walk_data.filter_changed_time.is_some();
+
+    // `file_type` from the d_type-based DirEntry::file_type already tells
+    // us whether this is a regular file. For symlinks we still need one
+    // `path.is_file()` syscall (metadata follows the link) to match the
+    // previous behavior so do it at most once and cache it.
+    let is_file_for_filter = file_type.is_file() || (file_type.is_symlink() && path.is_file());
+
+    if has_time_filter
+        && let Some((_, _, (modified_time, accessed_time, changed_time))) = metadata
+        && is_file_for_filter
+        && [
+            (&walk_data.filter_modified_time, *modified_time),
+            (&walk_data.filter_accessed_time, *accessed_time),
+            (&walk_data.filter_changed_time, *changed_time),
+        ]
+        .iter()
+        .any(|(filter_time, actual_time)| {
+            is_filtered_out_due_to_file_time(filter_time, *actual_time)
+        })
     {
-        let size_inode_device = get_metadata(entry.path(), false, follow_links);
-        if let Some((_, _, (modified_time, accessed_time, changed_time))) = size_inode_device
-            && entry.path().is_file()
-            && [
-                (&walk_data.filter_modified_time, modified_time),
-                (&walk_data.filter_accessed_time, accessed_time),
-                (&walk_data.filter_changed_time, changed_time),
-            ]
-            .iter()
-            .any(|(filter_time, actual_time)| {
-                is_filtered_out_due_to_file_time(filter_time, *actual_time)
-            })
-        {
-            return true;
-        }
+        return true;
     }
 
     // Keeping `walk_data.filter_regex.is_empty()` is important for performance reasons, it stops unnecessary work
     if !walk_data.filter_regex.is_empty()
-        && entry.path().is_file()
-        && is_filtered_out_due_to_regex(walk_data.filter_regex, &entry.path())
+        && is_file_for_filter
+        && is_filtered_out_due_to_regex(walk_data.filter_regex, path)
     {
         return true;
     }
 
     if !walk_data.invert_filter_regex.is_empty()
-        && entry.path().is_file()
-        && is_filtered_out_due_to_invert_regex(walk_data.invert_filter_regex, &entry.path())
+        && is_file_for_filter
+        && is_filtered_out_due_to_invert_regex(walk_data.invert_filter_regex, path)
     {
         return true;
     }
@@ -200,98 +311,300 @@ fn ignore_file(entry: &DirEntry, walk_data: &WalkData) -> bool {
     is_dot_file && walk_data.ignore_hidden
 }
 
-fn walk(dir: PathBuf, walk_data: &WalkData, depth: usize) -> Option<Node> {
-    let prog_data = &walk_data.progress_data;
-    let errors = &walk_data.errors;
-
-    let children = if dir.is_dir() {
-        let read_dir = fs::read_dir(&dir);
-        match read_dir {
-            Ok(entries) => {
-                entries
-                    .into_iter()
-                    .par_bridge()
-                    .filter_map(|entry| {
-                        match entry {
-                            Ok(ref entry) => {
-                                // uncommenting the below line gives simpler code but
-                                // rayon doesn't parallelize as well giving a 3X performance drop
-                                // hence we unravel the recursion a bit
-
-                                // return walk(entry.path(), walk_data, depth)
-
-                                if !ignore_file(entry, walk_data)
-                                    && let Ok(data) = entry.file_type()
-                                {
-                                    if data.is_dir()
-                                        || (walk_data.follow_links && data.is_symlink())
-                                    {
-                                        return walk(entry.path(), walk_data, depth + 1);
-                                    }
-
-                                    let node = build_node(
-                                        entry.path(),
-                                        vec![],
-                                        data.is_symlink(),
-                                        data.is_file(),
-                                        depth,
-                                        walk_data,
-                                    );
-
-                                    prog_data.num_files.fetch_add(1, ORDERING);
-                                    if let Some(ref file) = node {
-                                        prog_data.total_file_size.fetch_add(file.size, ORDERING);
-                                    }
-
-                                    return node;
-                                }
-                            }
-                            Err(ref failed) => {
-                                if handle_error_and_retry(failed, &dir, walk_data) {
-                                    return walk(dir.clone(), walk_data, depth);
-                                }
-                            }
-                        }
-                        None
-                    })
-                    .collect()
-            }
-            Err(failed) => {
-                if handle_error_and_retry(&failed, &dir, walk_data) {
-                    return walk(dir, walk_data, depth);
-                } else {
-                    vec![]
+fn walk_dir<'scope>(
+    scope: &rayon::Scope<'scope>,
+    pending: Arc<PendingDir>,
+    walk_data: &'scope WalkData<'scope>,
+) {
+    let md_result = if pending.is_symlink {
+        fs::metadata(&pending.dir)
+    } else {
+        fs::symlink_metadata(&pending.dir)
+    };
+    let (is_dir_path, is_file_path, tuple) = match &md_result {
+        Ok(m) => (
+            m.is_dir(),
+            m.is_file(),
+            tuple_from_metadata(m, walk_data.use_apparent_size),
+        ),
+        Err(_) => (false, false, None),
+    };
+    let _ = pending.cached_metadata.set(tuple);
+
+    if is_dir_path {
+        // EINTR is the only retryable error. Looping iteratively (rather than
+        // recursing on retry, like the old code) keeps stack depth O(1).
+        loop {
+            let entries = match fs::read_dir(&pending.dir) {
+                Ok(entries) => entries,
+                Err(ref failed) => {
+                    record_error(failed, &pending.dir, walk_data);
+                    if is_retryable(failed) {
+                        continue;
+                    }
+                    break;
                 }
+            };
+
+            // Drain into a Vec before doing anything observable on `pending`.
+            // This is the load-bearing structural choice for retry safety: if
+            // we decide to retry, we throw the Vec away and re-list, with no
+            // spawned subdir tasks or pushed file nodes to roll back.
+            let collected: Vec<_> = entries.collect();
+
+            // If any entry yielded a retryable error, throw the Vec away and
+            // re-list. We record only that one error (which bumps the EINTR
+            // counter and trips a panic threshold if retries are runaway);
+            // other errors aren't recorded yet because they'll resurface on
+            // retry if they're real, and recording them now would log
+            // phantoms when the retry succeeds cleanly.
+            if let Some(failed) = collected
+                .iter()
+                .filter_map(|r| r.as_ref().err())
+                .find(|e| is_retryable(e))
+            {
+                record_error(failed, &pending.dir, walk_data);
+                continue;
             }
+
+            // Commit point: from here on we mutate `pending`. File nodes
+            // are accumulated thread-locally by rayon's collect (no lock
+            // contention in the hot loop) and merged with one extend.
+            // Subdirs spawn from inside process_entry and bubble their own
+            // Node in via finalize_chain later; those still take the lock,
+            // but at most once per subdir.
+            //
+            // Pre-reserve children capacity to `collected.len()` (an upper
+            // bound: every entry contributes at most one child Node, either
+            // as a file via the extend below or as a subdir via bubble-up
+            // in finalize_chain).
+            {
+                let mut children = pending.children.lock().unwrap();
+                children.reserve(collected.len());
+            }
+
+            let file_nodes: Vec<Node> = collected
+                .into_par_iter()
+                .filter_map(|r| match r {
+                    Ok(entry) => process_entry(scope, &pending, &entry, walk_data),
+                    Err(failed) => {
+                        record_error(&failed, &pending.dir, walk_data);
+                        None
+                    }
+                })
+                .collect();
+
+            if !file_nodes.is_empty() {
+                pending.children.lock().unwrap().extend(file_nodes);
+            }
+            break;
         }
-    } else {
-        if !dir.is_file() {
-            let mut editable_error = errors.lock().unwrap();
-            let bad_file = dir.as_os_str().to_string_lossy().into();
-            editable_error.file_not_found.insert(bad_file);
+    } else if !is_file_path {
+        let mut editable_error = walk_data.errors.lock().unwrap();
+        let bad_file = pending.dir.as_os_str().to_string_lossy().into();
+        editable_error.file_not_found.insert(bad_file);
+    }
+
+    finalize_chain(pending, walk_data);
+}
+
+// Returns the file's Node when the entry is a file (so the caller can
+// gather it via rayon's collect). Returns None for ignored entries and
+// for subdirectories. Subdirs spawn a walk task and contribute their
+// Node later via finalize_chain instead.
+fn process_entry<'scope>(
+    scope: &rayon::Scope<'scope>,
+    pending: &Arc<PendingDir>,
+    entry: &DirEntry,
+    walk_data: &'scope WalkData<'scope>,
+) -> Option<Node> {
+    // Compute path + file_type once per entry and thread them through.
+    // `entry.path()` allocates a PathBuf; `entry.file_type()` can require a
+    // stat on filesystems without d_type support. Previously each was called
+    // up to 3 times per entry.
+    let path = entry.path();
+    let file_type = entry.file_type().ok()?;
+    // Fetch metadata at most once per entry. Without filters, the
+    // per-file stat lives inside `build_node` as before. With filters,
+    // we used to stat twice — once in `ignore_file` for the filter
+    // check, once in `build_node` to actually build the Node. Now we
+    // fetch once and thread the tuple through.
+    //
+    // Use the user's `use_apparent_size` flag at fetch time so the
+    // tuple is already in the form `build_node` wants. `ignore_file`
+    // discards the size field, so this is harmless for the filter
+    // logic but means the tuple can be reused unchanged below.
+    let mut prefetched: Option<MetadataTuple> = None;
+    // Fast path: no filters means `ignore_file` has nothing to do. On a
+    // default walk this avoids a function call, a HashSet probe, and an
+    // OsString allocation for `file_name` per entry. We still need to honour
+    // `ignore_hidden` separately when no other filters are set.
+    if walk_data.has_any_filter {
+        if filter_needs_metadata(walk_data) {
+            let follow_links = walk_data.follow_links && file_type.is_symlink();
+            prefetched = get_metadata(&path, walk_data.use_apparent_size, follow_links);
         }
-        vec![]
-    };
-    let is_symlink = if walk_data.follow_links {
-        match fs::symlink_metadata(&dir) {
-            Ok(metadata) => metadata.file_type().is_symlink(),
-            Err(_) => false,
+        if ignore_file(entry, &path, file_type, prefetched.as_ref(), walk_data) {
+            return None;
         }
-    } else {
-        false
-    };
-    build_node(dir, children, is_symlink, false, depth, walk_data)
+    } else if walk_data.ignore_hidden && entry.file_name().to_str().unwrap_or("").starts_with('.') {
+        return None;
+    }
+    let is_symlink = file_type.is_symlink();
+
+    // If the entry is a directory we'll spawn off a new task to walk it.
+    if file_type.is_dir() || (walk_data.follow_links && is_symlink) {
+        // Increment must happen before scope.spawn so a fast child's decrement
+        // can never observe pending = 0 before this walk_dir's finalize_chain
+        // runs. It can be Relaxed ordering because rayon's scope spawn does
+        // its own fencing.
+        pending.pending.fetch_add(1, AtomicOrdering::Relaxed);
+        let child = Arc::new(PendingDir {
+            dir: path,
+            depth: pending.depth + 1,
+            is_symlink,
+            parent: Some(pending.clone()),
+            pending: AtomicUsize::new(1),
+            children: Mutex::new(Vec::new()),
+            cached_metadata: OnceLock::new(),
+        });
+        scope.spawn(move |s| walk_dir(s, child, walk_data));
+        return None;
+    }
+
+    // Under `-f` / `--filecount` without metadata-needing filters,
+    // the file's `MetadataTuple` is mostly thrown away, so we create
+    // a fake `MetadataTuple` to avoid the syscall.
+    //
+    // `node_from_tuple` sets size to 1 unconditionally and the time
+    // fields are only consulted when `-M` / `-A` / `-y` are active.
+    // The only field actually consumed downstream is `inode_device`
+    // for `clean_inodes` dedup, and both halves are available
+    // without a syscall:
+    //
+    //   * `inode` comes from `getdents64`'s `d_ino` (already returned
+    //     from the `read_dir` that gave us this entry; surfaced via
+    //     `DirEntry::ino()` on unix). On Linux's getdents64, d_ino
+    //     matches statx's stx_ino — confirmed by the kernel's filldir
+    //     callback, which copies the inode straight from the dentry.
+    //
+    //   * `dev` is the parent directory's dev. Cross-mount transitions
+    //     can only happen at directory boundaries, and each such
+    //     boundary is a fresh `walk_dir` invocation that re-stats the
+    //     mount point — so within a single dir's child list, every
+    //     non-directory entry shares its dev with the parent.
+    //
+    // Synthesise the tuple instead of statting. Gated three ways:
+    // (a) prefetched is None — no filter wanted metadata. This
+    //     transitively guarantees no time filter (`-M` / `-A` / `-y`)
+    //     is set, because `filter_needs_metadata()` returns true under
+    //     any of those. With no time filter, the synthesised `times=0`
+    //     are inert downstream: `is_filtered_out_due_to_file_time(&None,
+    //     _)` short-circuits to false in `node_from_tuple`.
+    // (b) `-f` is on — otherwise size/times do matter for display.
+    // (c) `-L` is off — under follow_links, a symlink-to-file would
+    //     have been dedup'd against its target via stat-follow; d_ino
+    //     gives the symlink's own inode, a different key. Falling back
+    //     to the stat path under `-L` preserves previous behavior.
+    // Unix-gated: on Windows there's no cheap `d_ino`-equivalent in
+    // `DirEntry`, so the existing stat path keeps running there.
+    #[cfg(target_family = "unix")]
+    if prefetched.is_none() && walk_data.by_filecount && !walk_data.follow_links {
+        use std::os::unix::fs::DirEntryExt;
+        let parent_dev = pending
+            .cached_metadata
+            .get()
+            .copied()
+            .flatten()
+            .and_then(|t| t.1.map(|(_, dev)| dev))
+            .unwrap_or(0);
+        prefetched = Some((0, Some((entry.ino(), parent_dev)), (0, 0, 0)));
+    }
+
+    let node = build_node(
+        path,
+        vec![],
+        prefetched,
+        is_symlink,
+        file_type.is_file(),
+        pending.depth,
+        walk_data,
+    );
+
+    let prog_data = &walk_data.progress_data;
+    prog_data.num_files.fetch_add(1, ORDERING);
+    if let Some(ref n) = node {
+        prog_data.total_file_size.fetch_add(n.size, ORDERING);
+    }
+    node
 }
 
-fn handle_error_and_retry(failed: &Error, dir: &Path, walk_data: &WalkData) -> bool {
+// Iteratively bubbles completions up the parent chain, taking exactly one
+// lock per directory along the way.
+//
+// Each iteration "completes" `pending`. We carry `node_to_push` between
+// iterations: it holds the previous level's built Node so we can push it
+// into `pending.children` in the same critical section as our own
+// decrement. That collapses what would otherwise be three separate locks
+// per directory (push from child, decrement, take children) into one.
+//
+// Termination paths:
+//   1. pending stays > 0 after decrement: not the last completer. Return
+//      with the prior level's Node already pushed into our children.
+//   2. pending hits 0 and parent is None: this is the synthetic outer
+//      created in `walk_it`. Its `children` now holds the finished root
+//      Node; `walk_it` drains it after `rayon::scope` returns.
+fn finalize_chain(mut pending: Arc<PendingDir>, walk_data: &WalkData) {
+    let mut node_to_push: Option<Node> = None;
+    loop {
+        // Single critical section per directory: push the prior level's
+        // Node, decrement (atomically, since `pending` is a separate
+        // primitive from the children Vec), and (if we're the last
+        // completer) take our children so we can build our own Node
+        // outside the lock.
+        //
+        // The fetch_sub runs while holding the children mutex. That's not
+        // required for the atomic itself, but it lets the "am I last?"
+        // check and the subsequent `take` happen back-to-back without
+        // re-locking, and it serializes against any concurrent push from
+        // a sibling task that hasn't yet decremented.
+        let (parent, children) = {
+            let mut children_guard = pending.children.lock().unwrap();
+            if let Some(n) = node_to_push.take() {
+                children_guard.push(n);
+            }
+            // Relaxed: the children mutex carries the happens-before edge
+            // an Acquire fence would otherwise provide.
+            if pending.pending.fetch_sub(1, AtomicOrdering::Relaxed) != 1 {
+                return;
+            }
+            let Some(parent) = pending.parent.clone() else {
+                return;
+            };
+            (parent, std::mem::take(&mut *children_guard))
+        };
+        let cached = pending.cached_metadata.get().copied().flatten();
+        node_to_push = build_node(
+            pending.dir.clone(),
+            children,
+            cached,
+            pending.is_symlink,
+            false,
+            pending.depth,
+            walk_data,
+        );
+        pending = parent;
+    }
+}
+
+fn is_retryable(failed: &Error) -> bool {
+    failed.kind() == std::io::ErrorKind::Interrupted
+}
+
+fn record_error(failed: &Error, dir: &Path, walk_data: &WalkData) {
     let mut editable_error = walk_data.errors.lock().unwrap();
     match failed.kind() {
-        std::io::ErrorKind::PermissionDenied => {
-            editable_error
-                .no_permissions
-                .insert(dir.to_string_lossy().into());
-        }
-        std::io::ErrorKind::InvalidInput => {
+        std::io::ErrorKind::PermissionDenied | std::io::ErrorKind::InvalidInput => {
             editable_error
                 .no_permissions
                 .insert(dir.to_string_lossy().into());
@@ -305,15 +618,12 @@ fn handle_error_and_retry(failed: &Error, dir: &Path, walk_data: &WalkData) -> b
             // However, if there is no limit this results in infinite retrys and dust never finishes
             if editable_error.interrupted_error > 999 {
                 panic!("Multiple Interrupted Errors occurred while scanning filesystem. Aborting");
-            } else {
-                return true;
             }
         }
         _ => {
             editable_error.unknown_error.insert(failed.to_string());
         }
     }
-    false
 }
 
 mod tests {
@@ -351,13 +661,14 @@ mod tests {
             follow_links: false,
             progress_data: indicator.data.clone(),
             errors: Arc::new(Mutex::new(RuntimeErrors::default())),
+            has_any_filter: true,
         }
     }
 
     #[test]
     #[allow(clippy::redundant_clone)]
     fn test_should_ignore_file() {
-        let mut inodes = HashSet::new();
+        let mut inodes = FxHashSet::default();
         let n = create_node();
         let walkdata = create_walker(false);
 
@@ -374,7 +685,7 @@ mod tests {
     #[test]
     #[allow(clippy::redundant_clone)]
     fn test_should_not_ignore_files_if_using_apparent_size() {
-        let mut inodes = HashSet::new();
+        let mut inodes = FxHashSet::default();
         let n = create_node();
         let walkdata = create_walker(true);
 
@@ -425,4 +736,137 @@ mod tests {
         assert_eq!(sort_by_inode(&c, &a), Ordering::Less);
         assert_eq!(sort_by_inode(&b, &c), Ordering::Less);
     }
+
+    #[cfg(test)]
+    fn count_nodes(node: &Node) -> usize {
+        let mut count = 0;
+        let mut stack: Vec<&Node> = vec![node];
+        while let Some(n) = stack.pop() {
+            count += 1;
+            stack.extend(n.children.iter());
+        }
+        count
+    }
+
+    #[cfg(test)]
+    fn max_depth(node: &Node) -> usize {
+        let mut max = node.depth;
+        let mut stack: Vec<&Node> = vec![node];
+        while let Some(n) = stack.pop() {
+            if n.depth > max {
+                max = n.depth;
+            }
+            stack.extend(n.children.iter());
+        }
+        max
+    }
+
+    #[test]
+    fn test_walk_deeply_nested_tree() {
+        // Builds tmp/a/a/.../a (DEPTH levels) and walks it. Catches regressions
+        // back to a recursive walker, which would risk stack overflow on deep
+        // trees (the original motivation for the removed -S flag).
+        const DEPTH: usize = 500;
+        let tmp = tempfile::tempdir().unwrap();
+        let mut path = tmp.path().to_path_buf();
+        for _ in 0..DEPTH {
+            path.push("a");
+            std::fs::create_dir(&path).unwrap();
+        }
+
+        let walkdata = create_walker(true);
+        let mut roots = HashSet::new();
+        roots.insert(tmp.path().to_path_buf());
+
+        let result = walk_it(roots, &walkdata);
+        assert_eq!(result.len(), 1);
+        assert_eq!(max_depth(&result[0]), DEPTH);
+        // Root + DEPTH descendants, each holding exactly one child.
+        assert_eq!(count_nodes(&result[0]), DEPTH + 1);
+    }
+
+    #[test]
+    fn test_walk_wide_directory() {
+        // Many sibling files in one directory exercise the per-directory
+        // parallel iteration: every file pushes into the same parent's
+        // `children` Mutex, and finalize_chain is the sole reader.
+        use std::io::Write;
+        const N: usize = 500;
+        let tmp = tempfile::tempdir().unwrap();
+        for i in 0..N {
+            let mut f = std::fs::File::create(tmp.path().join(format!("f{i}"))).unwrap();
+            writeln!(f, "{i}").unwrap();
+        }
+
+        let walkdata = create_walker(true);
+        let mut roots = HashSet::new();
+        roots.insert(tmp.path().to_path_buf());
+
+        let result = walk_it(roots, &walkdata);
+        assert_eq!(result.len(), 1);
+        assert_eq!(result[0].children.len(), N);
+        assert_eq!(count_nodes(&result[0]), N + 1);
+    }
+
+    #[test]
+    fn test_walk_missing_root_records_file_not_found() {
+        // A root that is neither a dir nor a file hits the `else if
+        // !pending.dir.is_file()` branch in walk_dir and should be recorded
+        // under `file_not_found`.
+        let tmp = tempfile::tempdir().unwrap();
+        let missing = tmp.path().join("does-not-exist");
+
+        let walkdata = create_walker(true);
+        let mut roots = HashSet::new();
+        roots.insert(missing.clone());
+
+        let _ = walk_it(roots, &walkdata);
+        let errors = walkdata.errors.lock().unwrap();
+        assert!(
+            errors
+                .file_not_found
+                .contains(&missing.to_string_lossy().into_owned()),
+            "expected file_not_found to contain {missing:?}, got {:?}",
+            errors.file_not_found
+        );
+    }
+
+    #[cfg(unix)]
+    #[test]
+    fn test_walk_permission_denied_subdir_is_recorded() {
+        // A subdirectory we can't read should land in `no_permissions` via
+        // record_error's PermissionDenied arm. Skipped when running as root,
+        // since chmod 000 doesn't deny root.
+        use std::os::unix::fs::PermissionsExt;
+
+        let tmp = tempfile::tempdir().unwrap();
+        let locked = tmp.path().join("locked");
+        std::fs::create_dir(&locked).unwrap();
+        std::fs::set_permissions(&locked, std::fs::Permissions::from_mode(0o000)).unwrap();
+
+        // Probe: if we can still list it, we're effectively root (or the FS
+        // ignores mode bits) and the test can't observe a PermissionDenied.
+        if std::fs::read_dir(&locked).is_ok() {
+            std::fs::set_permissions(&locked, std::fs::Permissions::from_mode(0o755)).unwrap();
+            return;
+        }
+
+        let walkdata = create_walker(true);
+        let mut roots = HashSet::new();
+        roots.insert(tmp.path().to_path_buf());
+
+        let _ = walk_it(roots, &walkdata);
+
+        // Restore permissions before tempdir's Drop tries to clean up.
+        std::fs::set_permissions(&locked, std::fs::Permissions::from_mode(0o755)).unwrap();
+
+        let errors = walkdata.errors.lock().unwrap();
+        assert!(
+            errors
+                .no_permissions
+                .contains(&locked.to_string_lossy().into_owned()),
+            "expected no_permissions to contain {locked:?}, got {:?}",
+            errors.no_permissions
+        );
+    }
 }
diff --git a/src/main.rs b/src/main.rs
index b9c75937..b2018996 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -29,7 +29,6 @@ use std::panic;
 use std::process;
 use std::sync::Arc;
 use std::sync::Mutex;
-use sysinfo::System;
 use utils::canonicalize_absolute_path;
 
 use self::display::draw_it;
@@ -241,6 +240,16 @@ fn main() {
     let filter_accessed_time = config.get_accessed_time_operator(&options);
     let filter_changed_time = config.get_changed_time_operator(&options);
 
+    // Precompute whether any filter flag is active so the walker's hot path
+    // can skip `ignore_file` entirely on a default walk.
+    let has_any_filter = !ignored_full_path.is_empty()
+        || !allowed_filesystems.is_empty()
+        || filter_modified_time.is_some()
+        || filter_accessed_time.is_some()
+        || filter_changed_time.is_some()
+        || !filter_regexs.is_empty()
+        || !invert_filter_regexs.is_empty();
+
     let walk_data = WalkData {
         ignore_directories: ignored_full_path,
         filter_regex: &filter_regexs,
@@ -256,12 +265,19 @@ fn main() {
         follow_links,
         progress_data: indicator.data.clone(),
         errors: errors_for_rayon,
+        has_any_filter,
     };
 
     let threads_to_use = config.get_threads(&options);
-    let stack_size = config.get_custom_stack_size(&options);
 
-    init_rayon(&stack_size, &threads_to_use).install(|| {
+    if options.stack_size.is_some() {
+        eprintln!(
+            "warning: --stack-size/-S is deprecated and ignored; \
+             the walker no longer recurses, so a custom stack is unnecessary."
+        );
+    }
+
+    init_rayon(&threads_to_use).install(|| {
         let top_level_nodes = walk_it(simplified_dirs, &walk_data);
 
         let tree = match summarize_file_types {
@@ -431,54 +447,13 @@ fn read_paths_from_source(path: &str, null_terminated: bool) -> Vec<String> {
     }
 }
 
-fn init_rayon(stack: &Option<usize>, threads: &Option<usize>) -> rayon::ThreadPool {
-    let stack_size = match stack {
-        Some(s) => Some(*s),
-        None => {
-            // Do not increase the stack size on a 32 bit system, it will fail
-            if cfg!(target_pointer_width = "32") {
-                None
-            } else {
-                let large_stack = usize::pow(1024, 3);
-                let mut sys = System::new_all();
-                sys.refresh_memory();
-                // Larger stack size if possible to handle cases with lots of nested directories
-                let available = sys.available_memory();
-                if available > (large_stack * threads.unwrap_or(1)).try_into().unwrap() {
-                    Some(large_stack)
-                } else {
-                    None
-                }
-            }
-        }
-    };
-
-    match build_thread_pool(stack_size, threads) {
-        Ok(pool) => pool,
-        Err(err) => {
-            eprintln!("Problem initializing rayon, try: export RAYON_NUM_THREADS=1");
-            if stack.is_none() && stack_size.is_some() {
-                // stack parameter was none, try with default stack size
-                if let Ok(pool) = build_thread_pool(None, threads) {
-                    eprintln!("WARNING: not using large stack size, got error: {err}");
-                    return pool;
-                }
-            }
-            panic!("{err}");
-        }
+fn init_rayon(threads: &Option<usize>) -> rayon::ThreadPool {
+    let mut builder = rayon::ThreadPoolBuilder::new();
+    if let Some(t) = threads {
+        builder = builder.num_threads(*t);
     }
-}
-
-fn build_thread_pool(
-    stack_size: Option<usize>,
-    threads: &Option<usize>,
-) -> Result<rayon::ThreadPool, rayon::ThreadPoolBuildError> {
-    let mut pool_builder = rayon::ThreadPoolBuilder::new();
-    if let Some(stack_size_param) = stack_size {
-        pool_builder = pool_builder.stack_size(stack_size_param);
-    }
-    if let Some(thread_count) = threads {
-        pool_builder = pool_builder.num_threads(*thread_count);
-    }
-    pool_builder.build()
+    builder.build().unwrap_or_else(|err| {
+        eprintln!("Problem initializing rayon, try: export RAYON_NUM_THREADS=1");
+        panic!("{err}");
+    })
 }
diff --git a/src/node.rs b/src/node.rs
index 7af8e0a6..d32c8aa5 100644
--- a/src/node.rs
+++ b/src/node.rs
@@ -1,5 +1,5 @@
 use crate::dir_walker::WalkData;
-use crate::platform::get_metadata;
+use crate::platform::{MetadataTuple, get_metadata};
 use crate::utils::is_filtered_out_due_to_file_time;
 use crate::utils::is_filtered_out_due_to_invert_regex;
 use crate::utils::is_filtered_out_due_to_regex;
@@ -33,61 +33,80 @@ impl From<crate::cli::FileTime> for FileTime {
     }
 }
 
+/// Build a Node for a directory or file.
+///
+/// If `cached` is `Some`, reuse the already-extracted metadata tuple to
+/// avoid a stat. `walk_dir` stats each directory once at entry and threads
+/// the result through here via `finalize_chain`. Files pass `None` since
+/// `process_entry` doesn't pre-stat them.
 #[allow(clippy::too_many_arguments)]
 pub fn build_node(
     dir: PathBuf,
     children: Vec<Node>,
+    cached: Option<MetadataTuple>,
     is_symlink: bool,
     is_file: bool,
     depth: usize,
     walk_data: &WalkData,
 ) -> Option<Node> {
-    let use_apparent_size = walk_data.use_apparent_size;
+    let data = match cached {
+        Some(t) => t,
+        None => get_metadata(
+            &dir,
+            walk_data.use_apparent_size,
+            walk_data.follow_links && is_symlink,
+        )?,
+    };
+    Some(node_from_tuple(
+        dir, children, data, is_file, depth, walk_data,
+    ))
+}
+
+fn node_from_tuple(
+    dir: PathBuf,
+    children: Vec<Node>,
+    data: MetadataTuple,
+    is_file: bool,
+    depth: usize,
+    walk_data: &WalkData,
+) -> Node {
     let by_filecount = walk_data.by_filecount;
     let by_filetime = &walk_data.by_filetime;
+    let inode_device = data.1;
 
-    get_metadata(
-        &dir,
-        use_apparent_size,
-        walk_data.follow_links && is_symlink,
-    )
-    .map(|data| {
-        let inode_device = data.1;
-
-        let size = if is_filtered_out_due_to_regex(walk_data.filter_regex, &dir)
-            || is_filtered_out_due_to_invert_regex(walk_data.invert_filter_regex, &dir)
-            || by_filecount && !is_file
-            || [
-                (&walk_data.filter_modified_time, data.2.0),
-                (&walk_data.filter_accessed_time, data.2.1),
-                (&walk_data.filter_changed_time, data.2.2),
-            ]
-            .iter()
-            .any(|(filter_time, actual_time)| {
-                is_filtered_out_due_to_file_time(filter_time, *actual_time)
-            }) {
-            0
-        } else if by_filecount {
-            1
-        } else if by_filetime.is_some() {
-            match by_filetime {
-                Some(FileTime::Modified) => data.2.0.unsigned_abs(),
-                Some(FileTime::Accessed) => data.2.1.unsigned_abs(),
-                Some(FileTime::Changed) => data.2.2.unsigned_abs(),
-                None => unreachable!(),
-            }
-        } else {
-            data.0
-        };
-
-        Node {
-            name: dir,
-            size,
-            children,
-            inode_device,
-            depth,
+    let size = if is_filtered_out_due_to_regex(walk_data.filter_regex, &dir)
+        || is_filtered_out_due_to_invert_regex(walk_data.invert_filter_regex, &dir)
+        || by_filecount && !is_file
+        || [
+            (&walk_data.filter_modified_time, data.2.0),
+            (&walk_data.filter_accessed_time, data.2.1),
+            (&walk_data.filter_changed_time, data.2.2),
+        ]
+        .iter()
+        .any(|(filter_time, actual_time)| {
+            is_filtered_out_due_to_file_time(filter_time, *actual_time)
+        }) {
+        0
+    } else if by_filecount {
+        1
+    } else if by_filetime.is_some() {
+        match by_filetime {
+            Some(FileTime::Modified) => data.2.0.unsigned_abs(),
+            Some(FileTime::Accessed) => data.2.1.unsigned_abs(),
+            Some(FileTime::Changed) => data.2.2.unsigned_abs(),
+            None => unreachable!(),
         }
-    })
+    } else {
+        data.0
+    };
+
+    Node {
+        name: dir,
+        size,
+        children,
+        inode_device,
+        depth,
+    }
 }
 
 impl PartialEq for Node {
diff --git a/src/platform.rs b/src/platform.rs
index 72a3a18a..d5424eb2 100644
--- a/src/platform.rs
+++ b/src/platform.rs
@@ -13,61 +13,71 @@ fn get_block_size() -> u64 {
 type InodeAndDevice = (u64, u64);
 type FileTime = (i64, i64, i64);
 
+/// The parsed stat fields the walker consumes.
+pub type MetadataTuple = (u64, Option<InodeAndDevice>, FileTime);
+
 #[cfg(target_family = "unix")]
 pub fn get_metadata<P: AsRef<Path>>(
     path: P,
     use_apparent_size: bool,
     follow_links: bool,
-) -> Option<(u64, Option<InodeAndDevice>, FileTime)> {
-    use std::os::unix::fs::MetadataExt;
+) -> Option<MetadataTuple> {
     let metadata = if follow_links {
         path.as_ref().metadata()
     } else {
         path.as_ref().symlink_metadata()
     };
     match metadata {
-        Ok(md) => {
-            let file_size = md.len();
-            if use_apparent_size {
-                Some((
-                    file_size,
-                    Some((md.ino(), md.dev())),
-                    (md.mtime(), md.atime(), md.ctime()),
-                ))
-            } else {
-                // On NTFS mounts, the reported block count can be unexpectedly large.
-                // To avoid overestimating disk usage, cap the allocated size to what the
-                // file should occupy based on the file system I/O block size (blksize).
-                // Related: https://github.com/bootandy/dust/issues/295
-                let blksize = md.blksize();
-                let target_size = file_size.div_ceil(blksize) * blksize;
-                let reported_size = md.blocks() * get_block_size();
-
-                // File systems can pre-allocate more space for a file than what would be necessary
-                let pre_allocation_buffer = blksize * 65536;
-                let max_size = target_size + pre_allocation_buffer;
-                let allocated_size = if reported_size > max_size {
-                    target_size
-                } else {
-                    reported_size
-                };
-                Some((
-                    allocated_size,
-                    Some((md.ino(), md.dev())),
-                    (md.mtime(), md.atime(), md.ctime()),
-                ))
-            }
-        }
+        Ok(md) => tuple_from_metadata(&md, use_apparent_size),
         Err(_e) => None,
     }
 }
 
+/// Extract the data tuple from an already-fetched `Metadata`, no syscall.
+#[cfg(target_family = "unix")]
+pub fn tuple_from_metadata(
+    md: &std::fs::Metadata,
+    use_apparent_size: bool,
+) -> Option<MetadataTuple> {
+    use std::os::unix::fs::MetadataExt;
+    let file_size = md.len();
+    if use_apparent_size {
+        Some((
+            file_size,
+            Some((md.ino(), md.dev())),
+            (md.mtime(), md.atime(), md.ctime()),
+        ))
+    } else {
+        // On NTFS mounts, the reported block count can be unexpectedly large.
+        // To avoid overestimating disk usage, cap the allocated size to what the
+        // file should occupy based on the file system I/O block size (blksize).
+        // Related: https://github.com/bootandy/dust/issues/295
+        let blksize = md.blksize();
+        let target_size = file_size.div_ceil(blksize) * blksize;
+        let reported_size = md.blocks() * get_block_size();
+
+        // File systems can pre-allocate more space for a file than what would be necessary
+        let pre_allocation_buffer = blksize * 65536;
+        let max_size = target_size + pre_allocation_buffer;
+        let allocated_size = if reported_size > max_size {
+            target_size
+        } else {
+            reported_size
+        };
+        Some((
+            allocated_size,
+            Some((md.ino(), md.dev())),
+            (md.mtime(), md.atime(), md.ctime()),
+        ))
+    }
+}
+
 #[cfg(target_family = "windows")]
 pub fn get_metadata<P: AsRef<Path>>(
     path: P,
     use_apparent_size: bool,
     follow_links: bool,
-) -> Option<(u64, Option<InodeAndDevice>, FileTime)> {
+) -> Option<MetadataTuple> {
     // On windows opening the file to get size, file ID and volume can be very
     // expensive because 1) it causes a few system calls, and more importantly 2) it can cause
     // windows defender to scan the file.
@@ -131,10 +141,7 @@ pub fn get_metadata<P: AsRef<Path>>(
         Ok(Handle::from_file(file))
     }
 
-    fn get_metadata_expensive(
-        path: &Path,
-        use_apparent_size: bool,
-    ) -> Option<(u64, Option<InodeAndDevice>, FileTime)> {
+    fn get_metadata_expensive(path: &Path, use_apparent_size: bool) -> Option<MetadataTuple> {
         use winapi_util::file::information;
 
         let h = handle_from_path_limited(path).ok()?;
@@ -164,7 +171,6 @@ pub fn get_metadata<P: AsRef<Path>>(
         }
     }
 
-    use std::os::windows::fs::MetadataExt;
     let path = path.as_ref();
     let metadata = if follow_links {
         path.metadata()
@@ -172,46 +178,58 @@ pub fn get_metadata<P: AsRef<Path>>(
         path.symlink_metadata()
     };
     match metadata {
-        Ok(ref md) => {
-            const FILE_ATTRIBUTE_ARCHIVE: u32 = 0x20;
-            const FILE_ATTRIBUTE_READONLY: u32 = 0x01;
-            const FILE_ATTRIBUTE_HIDDEN: u32 = 0x02;
-            const FILE_ATTRIBUTE_SYSTEM: u32 = 0x04;
-            const FILE_ATTRIBUTE_NORMAL: u32 = 0x80;
-            const FILE_ATTRIBUTE_DIRECTORY: u32 = 0x10;
-            const FILE_ATTRIBUTE_SPARSE_FILE: u32 = 0x00000200;
-            const FILE_ATTRIBUTE_PINNED: u32 = 0x00080000;
-            const FILE_ATTRIBUTE_UNPINNED: u32 = 0x00100000;
-            const FILE_ATTRIBUTE_RECALL_ON_OPEN: u32 = 0x00040000;
-            const FILE_ATTRIBUTE_RECALL_ON_DATA_ACCESS: u32 = 0x00400000;
-            const FILE_ATTRIBUTE_OFFLINE: u32 = 0x00001000;
-            // normally FILE_ATTRIBUTE_SPARSE_FILE would be enough, however Windows sometimes likes to mask it out. see: https://stackoverflow.com/q/54560454
-            const IS_PROBABLY_ONEDRIVE: u32 = FILE_ATTRIBUTE_SPARSE_FILE
-                | FILE_ATTRIBUTE_PINNED
-                | FILE_ATTRIBUTE_UNPINNED
-                | FILE_ATTRIBUTE_RECALL_ON_OPEN
-                | FILE_ATTRIBUTE_RECALL_ON_DATA_ACCESS
-                | FILE_ATTRIBUTE_OFFLINE;
-            let attr_filtered = md.file_attributes()
-                & !(FILE_ATTRIBUTE_HIDDEN | FILE_ATTRIBUTE_READONLY | FILE_ATTRIBUTE_SYSTEM);
-            if ((attr_filtered & FILE_ATTRIBUTE_ARCHIVE) != 0
-                || (attr_filtered & FILE_ATTRIBUTE_DIRECTORY) != 0
-                || md.file_attributes() == FILE_ATTRIBUTE_NORMAL)
-                && !((attr_filtered & IS_PROBABLY_ONEDRIVE != 0) && use_apparent_size)
-            {
-                Some((
-                    md.len(),
-                    None,
-                    (
-                        md.last_write_time() as i64,
-                        md.last_access_time() as i64,
-                        md.creation_time() as i64,
-                    ),
-                ))
-            } else {
-                get_metadata_expensive(path, use_apparent_size)
-            }
-        }
+        Ok(ref md) => tuple_from_metadata(md, use_apparent_size)
+            .or_else(|| get_metadata_expensive(path, use_apparent_size)),
         _ => get_metadata_expensive(path, use_apparent_size),
     }
 }
+
+/// Extract the data tuple from an already-fetched `Metadata` on Windows.
+/// Returns `None` when the file needs the expensive (handle-open) path —
+/// the caller must fall back. Directories and normal files always return
+/// `Some` (that's the cheap branch the current code already takes).
+#[cfg(target_family = "windows")]
+pub fn tuple_from_metadata(
+    md: &std::fs::Metadata,
+    use_apparent_size: bool,
+) -> Option<MetadataTuple> {
+    use std::os::windows::fs::MetadataExt;
+    const FILE_ATTRIBUTE_ARCHIVE: u32 = 0x20;
+    const FILE_ATTRIBUTE_READONLY: u32 = 0x01;
+    const FILE_ATTRIBUTE_HIDDEN: u32 = 0x02;
+    const FILE_ATTRIBUTE_SYSTEM: u32 = 0x04;
+    const FILE_ATTRIBUTE_NORMAL: u32 = 0x80;
+    const FILE_ATTRIBUTE_DIRECTORY: u32 = 0x10;
+    const FILE_ATTRIBUTE_SPARSE_FILE: u32 = 0x00000200;
+    const FILE_ATTRIBUTE_PINNED: u32 = 0x00080000;
+    const FILE_ATTRIBUTE_UNPINNED: u32 = 0x00100000;
+    const FILE_ATTRIBUTE_RECALL_ON_OPEN: u32 = 0x00040000;
+    const FILE_ATTRIBUTE_RECALL_ON_DATA_ACCESS: u32 = 0x00400000;
+    const FILE_ATTRIBUTE_OFFLINE: u32 = 0x00001000;
+    // normally FILE_ATTRIBUTE_SPARSE_FILE would be enough, however Windows sometimes likes to mask it out. see: https://stackoverflow.com/q/54560454
+    const IS_PROBABLY_ONEDRIVE: u32 = FILE_ATTRIBUTE_SPARSE_FILE
+        | FILE_ATTRIBUTE_PINNED
+        | FILE_ATTRIBUTE_UNPINNED
+        | FILE_ATTRIBUTE_RECALL_ON_OPEN
+        | FILE_ATTRIBUTE_RECALL_ON_DATA_ACCESS
+        | FILE_ATTRIBUTE_OFFLINE;
+    let attr_filtered = md.file_attributes()
+        & !(FILE_ATTRIBUTE_HIDDEN | FILE_ATTRIBUTE_READONLY | FILE_ATTRIBUTE_SYSTEM);
+    if ((attr_filtered & FILE_ATTRIBUTE_ARCHIVE) != 0
+        || (attr_filtered & FILE_ATTRIBUTE_DIRECTORY) != 0
+        || md.file_attributes() == FILE_ATTRIBUTE_NORMAL)
+        && !((attr_filtered & IS_PROBABLY_ONEDRIVE != 0) && use_apparent_size)
+    {
+        Some((
+            md.len(),
+            None,
+            (
+                md.last_write_time() as i64,
+                md.last_access_time() as i64,
+                md.creation_time() as i64,
+            ),
+        ))
+    } else {
+        None
+    }
+}
diff --git a/tests/tests_symlinks.rs b/tests/tests_symlinks.rs
index 2ceb43d6..454944ec 100644
--- a/tests/tests_symlinks.rs
+++ b/tests/tests_symlinks.rs
@@ -111,6 +111,33 @@ pub fn test_hard_sym_link_no_dup_multi_arg() {
     assert!(has_file_only || has_link_only);
 }
 
+// Regression: dust passed a symlink-to-dir as its root path (no `-L`)
+// must walk into the target dir, matching `du`'s `Path::is_dir()`-style
+// behavior.
+#[cfg_attr(target_os = "windows", ignore)]
+#[test]
+pub fn test_root_symlink_to_dir_no_follow() {
+    let dir = Builder::new().tempdir().unwrap();
+    let target = dir.path().join("target");
+    std::fs::create_dir(&target).unwrap();
+    let mut f = File::create(target.join("notes.txt")).unwrap();
+    writeln!(f, "hello").unwrap();
+
+    let link = dir.path().join("link");
+    let link_s = link_it(link.clone(), target.to_str().unwrap(), true);
+
+    let mut cmd = cargo_bin_cmd!("dust");
+    let output = cmd.args(["-p", "-c", "-w", "999", &link_s]).unwrap().stdout;
+    let output = str::from_utf8(&output).unwrap();
+    let notes_line = format!("── {}/notes.txt", link_s);
+    assert!(
+        output.contains(&notes_line),
+        "expected `{}` in output, got:\n{}",
+        notes_line,
+        output
+    );
+}
+
 #[cfg_attr(target_os = "windows", ignore)]
 #[test]
 pub fn test_recursive_sym_link() {