diff options
author | Sven-Hendrik Haase <svenstaro@gmail.com> | 2025-03-07 22:11:01 +0000 |
---|---|---|
committer | Sven-Hendrik Haase <svenstaro@gmail.com> | 2025-03-07 22:11:01 +0000 |
commit | b0ff082db45040386265f1d887d1da71890edff3 (patch) | |
tree | 3850a5973bac047b77f5c09deeb0b46d4ab2c2b0 /src | |
parent | Merge pull request #1482 from svenstaro/add-asynchronous-directory-size-loading (diff) | |
download | miniserve-b0ff082db45040386265f1d887d1da71890edff3.tar.gz miniserve-b0ff082db45040386265f1d887d1da71890edff3.zip |
For directory size counting, ignore inodes we've seen before
This might be the case when there are hardlinks on systems that support those.
Diffstat (limited to '')
-rw-r--r-- | src/file_op.rs | 40 |
1 files changed, 35 insertions, 5 deletions
diff --git a/src/file_op.rs b/src/file_op.rs index 149cd2a..1db4672 100644 --- a/src/file_op.rs +++ b/src/file_op.rs @@ -1,7 +1,13 @@ //! Handlers for file upload and removal +use std::collections::HashSet; use std::io::ErrorKind; + +#[cfg(target_family = "unix")] +use std::os::unix::fs::MetadataExt; + use std::path::{Component, Path, PathBuf}; +use std::sync::Arc; use actix_web::{HttpRequest, HttpResponse, http::header, web}; use async_walkdir::{Filtering, WalkDir}; @@ -12,6 +18,7 @@ use sha2::digest::DynDigest; use sha2::{Digest, Sha256, Sha512}; use tempfile::NamedTempFile; use tokio::io::AsyncWriteExt; +use tokio::sync::RwLock; use crate::{ config::MiniserveConfig, errors::RuntimeError, file_utils::contains_symlink, @@ -41,15 +48,38 @@ impl FileHash { /// Get the recursively calculated dir size for a given dir /// +/// Counts hardlinked files only once if the OS supports hardlinks. +/// /// Expects `dir` to be sanitized. This function doesn't do any sanitization itself. pub async fn recursive_dir_size(dir: &Path) -> Result<u64, RuntimeError> { - let mut entries = WalkDir::new(dir).filter(|entry| async move { - if let Ok(metadata) = entry.metadata().await { - if metadata.is_file() { - return Filtering::Continue; + #[cfg(target_family = "unix")] + let seen_inodes = Arc::new(RwLock::new(HashSet::new())); + let mut entries = WalkDir::new(dir).filter(move |entry| { + { + #[cfg(target_family = "unix")] + let seen = seen_inodes.clone(); + async move { + if let Ok(metadata) = entry.metadata().await { + if metadata.is_file() { + // On Unix, we want to filter inodes that we've already seen so we get a more + // accurate count of real size used on disk. + #[cfg(target_family = "unix")] + { + let (device_id, inode) = (metadata.dev(), metadata.ino()); + + // Check if this file has been seen before based on its device ID and inode number + if seen.read().await.contains(&(device_id, inode)) { + return Filtering::Ignore; + } else { + seen.write().await.insert((device_id, inode)); + } + } + return Filtering::Continue; + } + } + Filtering::Ignore } } - Filtering::Ignore }); let mut total_size = 0; |