app_lib/
audio_scanner.rs

1//! Audio sample file scanner with metadata extraction.
2//!
3//! Discovers audio samples (extensions in [`crate::audio_extensions::AUDIO_EXTENSIONS`]) across
4//! the filesystem. Extracts audio metadata (sample rate, bit depth,
5//! channels, duration) by reading file headers directly. Supports
6//! symlink deduplication and parallel directory traversal via Rayon.
7//! Symlinks in directory listings are resolved via `metadata(2)` so links to
8//! files and subdirectories are scanned (broken links are skipped).
9
10use crate::audio_extensions::AUDIO_EXTENSIONS;
11use crate::history::AudioSample;
12use crate::scanner_skip_dirs::SCANNER_SKIP_DIRS as SKIP_DIRS;
13use crate::unified_walker::IncrementalDirState;
14
15/// Normalize macOS firmlink paths: /System/Volumes/Data/Users/... → /Users/...
16/// On macOS, / and /System/Volumes/Data are the same volume linked via firmlinks.
17/// canonicalize() doesn't resolve these, causing duplicate directory visits.
18fn normalize_macos_path(p: std::path::PathBuf) -> std::path::PathBuf {
19    #[cfg(target_os = "macos")]
20    {
21        let s = p.to_string_lossy();
22        if s.starts_with("/System/Volumes/Data/") {
23            return std::path::PathBuf::from(&s["/System/Volumes/Data".len()..]);
24        }
25    }
26    p
27}
28use rayon::prelude::*;
29use dashmap::DashSet;
30use std::collections::HashSet;
31use std::fs;
32use std::io::Read;
33use std::path::{Path, PathBuf};
34use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
35use std::sync::{Arc, Mutex};
36
37pub fn format_size(bytes: u64) -> String {
38    crate::format_size(bytes)
39}
40
41pub fn get_audio_roots() -> Vec<PathBuf> {
42    let home = dirs::home_dir().unwrap_or_default();
43    let mut roots = Vec::new();
44
45    #[cfg(target_os = "macos")]
46    {
47        roots.push(home.clone());
48        roots.push(PathBuf::from("/Library/Audio"));
49        roots.push(PathBuf::from("/Applications"));
50        if let Ok(vols) = fs::read_dir("/Volumes") {
51            for entry in vols.flatten() {
52                let path = entry.path();
53                if path.is_dir() || path.is_symlink() {
54                    roots.push(path);
55                }
56            }
57        }
58    }
59
60    #[cfg(target_os = "windows")]
61    {
62        roots.push(home.clone());
63        roots.push(PathBuf::from(
64            std::env::var("ProgramFiles").unwrap_or_else(|_| "C:\\Program Files".into()),
65        ));
66        roots.push(PathBuf::from(
67            std::env::var("ProgramFiles(x86)").unwrap_or_else(|_| "C:\\Program Files (x86)".into()),
68        ));
69        for c in b'C'..=b'Z' {
70            let drive = format!("{}:\\", c as char);
71            if Path::new(&drive).exists() {
72                roots.push(PathBuf::from(drive));
73            }
74        }
75    }
76
77    #[cfg(target_os = "linux")]
78    {
79        roots.push(home.clone());
80        roots.push(PathBuf::from("/usr/share/sounds"));
81        roots.push(PathBuf::from("/usr/local/share/sounds"));
82    }
83
84    roots.sort();
85    roots.dedup();
86    roots.into_iter().filter(|r| r.exists()).collect()
87}
88
89pub fn walk_for_audio(
90    roots: &[PathBuf],
91    on_batch: &mut dyn FnMut(&[AudioSample], usize),
92    should_stop: &(dyn Fn() -> bool + Sync),
93    exclude: Option<HashSet<String>>,
94    active_dirs: Option<Arc<Mutex<Vec<String>>>>,
95    incremental: Option<Arc<IncrementalDirState>>,
96) {
97    let batch_size = 100;
98    let stop = Arc::new(AtomicBool::new(false));
99    let found = Arc::new(AtomicUsize::new(0));
100    let active = active_dirs.unwrap_or_else(|| Arc::new(Mutex::new(Vec::new())));
101    let (tx, rx) = std::sync::mpsc::sync_channel::<Vec<AudioSample>>(256);
102    let visited = Arc::new(DashSet::new());
103    let exclude = Arc::new(exclude.unwrap_or_default());
104
105    // Dedicated pool — limit threads to avoid FD exhaustion with parallel scans
106    let roots_owned: Vec<PathBuf> = roots.to_vec();
107    let stop2 = stop.clone();
108    let found2 = found.clone();
109    let incremental = incremental.clone();
110    let pool = rayon::ThreadPoolBuilder::new()
111        .num_threads(num_cpus::get().max(4))
112        .build()
113        .unwrap();
114    std::thread::spawn(move || {
115        pool.install(|| {
116            roots_owned.par_iter().for_each(|root| {
117                if stop2.load(Ordering::Relaxed) {
118                    return;
119                }
120                walk_dir_parallel(
121                    root,
122                    0,
123                    &visited,
124                    &tx,
125                    &found2,
126                    batch_size,
127                    &stop2,
128                    &exclude,
129                    &active,
130                    incremental.clone(),
131                );
132            });
133        });
134        drop(pool); // Release thread pool resources immediately
135    });
136
137    // Stream results to callback as they arrive, checking stop frequently
138    let mut total_found = 0usize;
139    loop {
140        if should_stop() {
141            stop.store(true, Ordering::Relaxed);
142            while rx.try_recv().is_ok() {}
143            break;
144        }
145        match rx.recv_timeout(std::time::Duration::from_millis(10)) {
146            Ok(samples) => {
147                total_found += samples.len();
148                on_batch(&samples, total_found);
149            }
150            Err(std::sync::mpsc::RecvTimeoutError::Timeout) => continue,
151            Err(std::sync::mpsc::RecvTimeoutError::Disconnected) => break,
152        }
153    }
154}
155
156#[allow(clippy::too_many_arguments)]
157fn walk_dir_parallel(
158    dir: &Path,
159    depth: u32,
160    visited: &Arc<DashSet<PathBuf>>,
161    tx: &std::sync::mpsc::SyncSender<Vec<AudioSample>>,
162    found: &Arc<AtomicUsize>,
163    batch_size: usize,
164    stop: &Arc<AtomicBool>,
165    exclude: &Arc<HashSet<String>>,
166    active_dirs: &Arc<Mutex<Vec<String>>>,
167    incremental: Option<Arc<IncrementalDirState>>,
168) {
169    if depth > 30 || stop.load(Ordering::Relaxed) {
170        return;
171    }
172
173    {
174        let orig = normalize_macos_path(dir.to_path_buf());
175        let canon = fs::canonicalize(dir).ok().map(normalize_macos_path);
176        let key = canon.unwrap_or_else(|| orig.clone());
177        if !visited.insert(key) {
178            return;
179        }
180        visited.insert(orig);
181    }
182
183    if let Some(ref inc) = incremental {
184        if inc.should_skip(dir) {
185            return;
186        }
187    }
188
189    // Track active directory (rolling window of last 30 visited)
190    let dir_str = dir.to_string_lossy().to_string();
191    {
192        let mut ad = active_dirs.lock().unwrap_or_else(|e| e.into_inner());
193        ad.push(dir_str.clone());
194        if ad.len() > 200 {
195            let excess = ad.len() - 200;
196            ad.drain(..excess);
197        }
198    }
199
200    let entries: Vec<_> = match fs::read_dir(dir) {
201        Ok(e) => e.flatten().collect(),
202        Err(_e) => {
203            return;
204        }
205    };
206
207    let mut files = Vec::new();
208    let mut subdirs = Vec::new();
209
210    for entry in &entries {
211        let name = entry.file_name();
212        let name_str = name.to_string_lossy();
213        // `@` prefix = Synology NAS system dirs (`@eaDir` in every media folder,
214        // `@tmp`, `@syno*`, `@appstore`, `@docker`, etc.). `@eaDir` alone can
215        // double a scan's file count on Synology shares.
216        if name_str.starts_with('.')
217            || name_str.starts_with('@')
218            || SKIP_DIRS.contains(&name_str.as_ref())
219            || exclude.contains(name_str.as_ref())
220        {
221            continue;
222        }
223        // Use cached file_type() from readdir's d_type instead of path.is_dir()/
224        // is_file() — each of those triggers a stat() syscall (network roundtrip
225        // on SMB). file_type() is free on Unix when d_type is populated.
226        let ft = match entry.file_type() {
227            Ok(f) => f,
228            Err(_) => continue,
229        };
230        let path = entry.path();
231        if ft.is_dir() {
232            subdirs.push(path);
233        } else if ft.is_file() {
234            files.push((path, dir.to_path_buf()));
235        } else if ft.is_symlink() {
236            match fs::metadata(&path) {
237                Ok(m) if m.is_dir() => {
238                    subdirs.push(path);
239                }
240                Ok(m) if m.is_file() => {
241                    files.push((path, dir.to_path_buf()));
242                }
243                _ => {}
244            }
245        }
246    }
247
248    // Process files in this directory
249    let mut batch = Vec::new();
250    for (path, parent) in files {
251        let ext = path
252            .extension()
253            .map(|e| format!(".{}", e.to_string_lossy().to_lowercase()))
254            .unwrap_or_default();
255
256        if AUDIO_EXTENSIONS.contains(&ext.as_str()) {
257            let path_str = path.to_string_lossy().to_string();
258            if exclude.contains(&path_str) {
259                continue;
260            }
261            if let Ok(meta) = fs::metadata(&path) {
262                // Skip empty or unreadable files
263                if meta.len() == 0 {
264                    continue;
265                }
266                // Skip files where we can't read timestamps (broken symlinks, unmounted volumes)
267                if meta.modified().is_err() && meta.accessed().is_err() {
268                    continue;
269                }
270                let sample_name = path
271                    .file_stem()
272                    .map(|s| s.to_string_lossy().to_string())
273                    .unwrap_or_default();
274                let modified = meta
275                    .modified()
276                    .ok()
277                    .map(|t| {
278                        let dt: chrono::DateTime<chrono::Utc> = t.into();
279                        dt.format("%Y-%m-%d").to_string()
280                    })
281                    .unwrap_or_default();
282
283                // Read headers for all supported formats (symphonia probe reads
284                // codec params without decoding — fast enough for bulk scan)
285                let am = get_audio_metadata(path.to_str().unwrap_or(""));
286                let (dur, ch, sr, bps) =
287                    (am.duration, am.channels, am.sample_rate, am.bits_per_sample);
288                batch.push(AudioSample {
289                    name: sample_name,
290                    path: path.to_string_lossy().to_string(),
291                    directory: parent.to_string_lossy().to_string(),
292                    format: ext.strip_prefix('.').unwrap_or("").to_uppercase(),
293                    size: meta.len(),
294                    size_formatted: format_size(meta.len()),
295                    modified,
296                    duration: dur,
297                    channels: ch,
298                    sample_rate: sr,
299                    bits_per_sample: bps,
300                });
301                found.fetch_add(1, Ordering::Relaxed);
302
303                if batch.len() >= batch_size {
304                    let _ = tx.send(batch);
305                    batch = Vec::new();
306                }
307            }
308        }
309    }
310    if !batch.is_empty() {
311        let _ = tx.send(batch);
312    }
313
314    // Recurse into subdirectories in parallel
315    subdirs.par_iter().for_each(|subdir| {
316        walk_dir_parallel(
317            subdir,
318            depth + 1,
319            visited,
320            tx,
321            found,
322            batch_size,
323            stop,
324            exclude,
325            active_dirs,
326            incremental.clone(),
327        );
328    });
329
330    if let Some(ref inc) = incremental {
331        inc.record_scanned_dir(dir);
332    }
333
334    // Remove dir from active list
335}
336
337// Audio metadata extraction
338#[derive(Debug, Clone, serde::Serialize)]
339pub struct AudioMetadata {
340    #[serde(rename = "fullPath")]
341    pub full_path: String,
342    #[serde(rename = "fileName")]
343    pub file_name: String,
344    pub directory: String,
345    pub format: String,
346    #[serde(rename = "sizeBytes")]
347    pub size_bytes: u64,
348    pub created: String,
349    pub modified: String,
350    pub accessed: String,
351    pub permissions: String,
352    #[serde(skip_serializing_if = "Option::is_none")]
353    pub channels: Option<u16>,
354    #[serde(rename = "sampleRate", skip_serializing_if = "Option::is_none")]
355    pub sample_rate: Option<u32>,
356    #[serde(rename = "bitsPerSample", skip_serializing_if = "Option::is_none")]
357    pub bits_per_sample: Option<u16>,
358    #[serde(skip_serializing_if = "Option::is_none")]
359    pub duration: Option<f64>,
360    #[serde(skip_serializing_if = "Option::is_none")]
361    pub error: Option<String>,
362}
363
364pub fn get_audio_metadata(file_path: &str) -> AudioMetadata {
365    let path = Path::new(file_path);
366    let meta = match fs::metadata(path) {
367        Ok(m) => m,
368        Err(e) => {
369            return AudioMetadata {
370                full_path: file_path.to_string(),
371                file_name: String::new(),
372                directory: String::new(),
373                format: String::new(),
374                size_bytes: 0,
375                created: String::new(),
376                modified: String::new(),
377                accessed: String::new(),
378                permissions: String::new(),
379                channels: None,
380                sample_rate: None,
381                bits_per_sample: None,
382                duration: None,
383                error: Some(e.to_string()),
384            };
385        }
386    };
387
388    let ext = path
389        .extension()
390        .map(|e| format!(".{}", e.to_string_lossy().to_lowercase()))
391        .unwrap_or_default();
392
393    let fmt_time = |t: std::io::Result<std::time::SystemTime>| -> String {
394        t.ok()
395            .map(|t| {
396                let dt: chrono::DateTime<chrono::Utc> = t.into();
397                dt.to_rfc3339()
398            })
399            .unwrap_or_default()
400    };
401
402    #[cfg(unix)]
403    let permissions = {
404        use std::os::unix::fs::PermissionsExt;
405        format!("0{:o}", meta.permissions().mode() & 0o777)
406    };
407    #[cfg(not(unix))]
408    let permissions = String::new();
409
410    let mut result = AudioMetadata {
411        full_path: file_path.to_string(),
412        file_name: path
413            .file_name()
414            .map(|s| s.to_string_lossy().to_string())
415            .unwrap_or_default(),
416        directory: path
417            .parent()
418            .map(|p| p.to_string_lossy().to_string())
419            .unwrap_or_default(),
420        format: ext.strip_prefix('.').unwrap_or("").to_uppercase(),
421        size_bytes: meta.len(),
422        created: fmt_time(meta.created()),
423        modified: fmt_time(meta.modified()),
424        accessed: fmt_time(meta.accessed()),
425        permissions,
426        channels: None,
427        sample_rate: None,
428        bits_per_sample: None,
429        duration: None,
430        error: None,
431    };
432
433    // Parse audio headers
434    match ext.as_str() {
435        ".wav" => parse_wav(path, &mut result),
436        ".aiff" | ".aif" => parse_aiff(path, &mut result),
437        ".flac" => parse_flac(path, &mut result),
438        ".mp3" | ".ogg" | ".m4a" | ".aac" | ".opus" | ".wma" => {
439            probe_with_symphonia(path, &mut result)
440        }
441        _ => {}
442    }
443
444    result
445}
446
447/// Fast metadata probe using symphonia — reads codec params without decoding.
448fn probe_with_symphonia(path: &Path, meta: &mut AudioMetadata) {
449    use symphonia::core::formats::FormatOptions;
450    use symphonia::core::io::MediaSourceStream;
451    use symphonia::core::meta::MetadataOptions;
452    use symphonia::core::probe::Hint;
453
454    let file = match std::fs::File::open(path) {
455        Ok(f) => f,
456        Err(_) => return,
457    };
458    let mss = MediaSourceStream::new(Box::new(file), Default::default());
459
460    let mut hint = Hint::new();
461    if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
462        hint.with_extension(ext);
463    }
464
465    let probed = match symphonia::default::get_probe().format(
466        &hint,
467        mss,
468        &FormatOptions::default(),
469        &MetadataOptions::default(),
470    ) {
471        Ok(p) => p,
472        Err(_) => return,
473    };
474
475    if let Some(track) = probed.format.default_track() {
476        let params = &track.codec_params;
477        if let Some(sr) = params.sample_rate {
478            meta.sample_rate = Some(sr);
479        }
480        if let Some(ch) = params.channels {
481            meta.channels = Some(ch.count() as u16);
482        }
483        if let Some(bps) = params.bits_per_sample {
484            meta.bits_per_sample = Some(bps as u16);
485        }
486        // Duration from time base + n_frames
487        if let (Some(tb), Some(n_frames)) = (params.time_base, params.n_frames) {
488            let time = tb.calc_time(n_frames);
489            meta.duration = Some(time.seconds as f64 + time.frac);
490        }
491    }
492}
493
494fn parse_wav(path: &Path, meta: &mut AudioMetadata) {
495    let mut file = match fs::File::open(path) {
496        Ok(f) => f,
497        Err(_) => return,
498    };
499    let mut header = [0u8; 44];
500    if file.read_exact(&mut header).is_err() {
501        return;
502    }
503
504    if &header[0..4] == b"RIFF" && &header[8..12] == b"WAVE" {
505        meta.channels = Some(u16::from_le_bytes([header[22], header[23]]));
506        meta.sample_rate = Some(u32::from_le_bytes([
507            header[24], header[25], header[26], header[27],
508        ]));
509        let byte_rate = u32::from_le_bytes([header[28], header[29], header[30], header[31]]);
510        meta.bits_per_sample = Some(u16::from_le_bytes([header[34], header[35]]));
511        let data_size = u32::from_le_bytes([header[40], header[41], header[42], header[43]]);
512        if byte_rate > 0 {
513            meta.duration = Some(data_size as f64 / byte_rate as f64);
514        }
515    }
516}
517
518fn parse_aiff(path: &Path, meta: &mut AudioMetadata) {
519    let mut file = match fs::File::open(path) {
520        Ok(f) => f,
521        Err(_) => return,
522    };
523    let mut buf = [0u8; 512];
524    let bytes_read = match file.read(&mut buf) {
525        Ok(n) => n,
526        Err(_) => return,
527    };
528
529    if bytes_read < 12 || &buf[0..4] != b"FORM" || &buf[8..12] != b"AIFF" {
530        return;
531    }
532
533    let mut offset = 12usize;
534    while offset + 8 < bytes_read {
535        let chunk_id = &buf[offset..offset + 4];
536        let chunk_size = u32::from_be_bytes([
537            buf[offset + 4],
538            buf[offset + 5],
539            buf[offset + 6],
540            buf[offset + 7],
541        ]) as usize;
542
543        if chunk_id == b"COMM" && offset + 18 < bytes_read {
544            meta.channels = Some(u16::from_be_bytes([buf[offset + 8], buf[offset + 9]]));
545            let num_frames = u32::from_be_bytes([
546                buf[offset + 10],
547                buf[offset + 11],
548                buf[offset + 12],
549                buf[offset + 13],
550            ]);
551            meta.bits_per_sample = Some(u16::from_be_bytes([buf[offset + 14], buf[offset + 15]]));
552
553            // 80-bit extended float for sample rate
554            let exponent = u16::from_be_bytes([buf[offset + 16], buf[offset + 17]]) as i32;
555            let mantissa = u32::from_be_bytes([
556                buf[offset + 18],
557                buf[offset + 19],
558                buf[offset + 20],
559                buf[offset + 21],
560            ]);
561            let exp = exponent - 16383 - 31;
562            let sample_rate = (mantissa as f64 * 2f64.powi(exp)).round() as u32;
563            meta.sample_rate = Some(sample_rate);
564            if sample_rate > 0 {
565                meta.duration = Some(num_frames as f64 / sample_rate as f64);
566            }
567            break;
568        }
569
570        offset += 8 + chunk_size;
571        if !chunk_size.is_multiple_of(2) {
572            offset += 1;
573        }
574    }
575}
576
577fn parse_flac(path: &Path, meta: &mut AudioMetadata) {
578    let mut file = match fs::File::open(path) {
579        Ok(f) => f,
580        Err(_) => return,
581    };
582    let mut buf = [0u8; 42];
583    if file.read_exact(&mut buf).is_err() {
584        return;
585    }
586
587    if &buf[0..4] != b"fLaC" {
588        return;
589    }
590
591    let sample_rate = ((buf[18] as u32) << 12) | ((buf[19] as u32) << 4) | ((buf[20] as u32) >> 4);
592    let channels = ((buf[20] >> 1) & 0x07) + 1;
593    let bits_per_sample = (((buf[20] & 1) as u16) << 4) | (((buf[21] >> 4) as u16) + 1);
594
595    let total_samples = (((buf[21] & 0x0F) as u64) * (1u64 << 32))
596        | ((buf[22] as u64) << 24)
597        | ((buf[23] as u64) << 16)
598        | ((buf[24] as u64) << 8)
599        | (buf[25] as u64);
600
601    meta.sample_rate = Some(sample_rate);
602    meta.channels = Some(channels as u16);
603    meta.bits_per_sample = Some(bits_per_sample);
604
605    if sample_rate > 0 && total_samples > 0 {
606        meta.duration = Some(total_samples as f64 / sample_rate as f64);
607    }
608}
609
610#[cfg(test)]
611mod tests {
612    use super::*;
613    use std::collections::HashSet;
614    use std::fs;
615    use std::io::Write;
616    use std::slice::from_ref;
617
618    #[test]
619    fn test_format_size() {
620        assert_eq!(format_size(0), "0 B");
621        assert_eq!(format_size(500), "500.0 B");
622        assert_eq!(format_size(1024), "1.0 KB");
623        assert_eq!(format_size(1_048_576), "1.0 MB");
624        assert_eq!(format_size(1_073_741_824), "1.0 GB");
625    }
626
627    #[test]
628    fn test_audio_extensions_complete() {
629        for ext in &[
630            ".wav", ".mp3", ".flac", ".aiff", ".ogg", ".m4a", ".opus", ".aac", ".wma", ".aif",
631            ".rex", ".rx2", ".sf2", ".sfz",
632        ] {
633            assert!(
634                AUDIO_EXTENSIONS.contains(ext),
635                "AUDIO_EXTENSIONS should contain {}",
636                ext
637            );
638        }
639    }
640
641    #[test]
642    fn test_normalize_macos_path_audio_scanner() {
643        let p = PathBuf::from("/System/Volumes/Data/tmp/audio");
644        let n = normalize_macos_path(p);
645        #[cfg(target_os = "macos")]
646        assert_eq!(n, PathBuf::from("/tmp/audio"));
647        #[cfg(not(target_os = "macos"))]
648        assert_eq!(n, PathBuf::from("/System/Volumes/Data/tmp/audio"));
649    }
650
651    #[test]
652    fn test_get_audio_roots_not_empty() {
653        let roots = get_audio_roots();
654        assert!(
655            roots.iter().any(|r| r.exists()),
656            "get_audio_roots should return at least one existing path"
657        );
658    }
659
660    #[test]
661    fn test_walk_for_audio_empty_dir() {
662        let tmp = std::env::temp_dir().join("upum_test_walk_empty");
663        let _ = fs::remove_dir_all(&tmp);
664        fs::create_dir_all(&tmp).unwrap();
665
666        let mut total = 0usize;
667        walk_for_audio(
668            from_ref(&tmp),
669            &mut |_batch, count| {
670                total = count;
671            },
672            &|| false,
673            None,
674            None,
675            None,
676        );
677        assert_eq!(total, 0);
678        let _ = fs::remove_dir_all(&tmp);
679    }
680
681    #[test]
682    fn test_walk_for_audio_finds_files() {
683        let tmp = std::env::temp_dir().join("upum_test_walk_finds");
684        let _ = fs::remove_dir_all(&tmp);
685        fs::create_dir_all(&tmp).unwrap();
686        fs::write(tmp.join("test.wav"), b"fake wav data").unwrap();
687        fs::write(tmp.join("test.txt"), b"not audio").unwrap();
688
689        let mut found = Vec::new();
690        walk_for_audio(
691            from_ref(&tmp),
692            &mut |batch, _count| {
693                found.extend_from_slice(batch);
694            },
695            &|| false,
696            None,
697            None,
698            None,
699        );
700        assert_eq!(found.len(), 1);
701        assert!(found[0].path.contains("test.wav"));
702        let _ = fs::remove_dir_all(&tmp);
703    }
704
705    #[test]
706    fn test_walk_for_audio_exclude_full_path_skips_file() {
707        let tmp = std::env::temp_dir().join("upum_test_walk_exclude_path");
708        let _ = fs::remove_dir_all(&tmp);
709        fs::create_dir_all(&tmp).unwrap();
710        fs::write(tmp.join("keep.wav"), b"fake wav data").unwrap();
711        fs::write(tmp.join("skip.wav"), b"fake wav data").unwrap();
712        let mut ex = HashSet::new();
713        ex.insert(tmp.join("skip.wav").to_string_lossy().into_owned());
714
715        let mut found = Vec::new();
716        walk_for_audio(
717            from_ref(&tmp),
718            &mut |batch, _count| {
719                found.extend_from_slice(batch);
720            },
721            &|| false,
722            Some(ex),
723            None,
724            None,
725        );
726        assert_eq!(found.len(), 1);
727        assert!(found[0].path.contains("keep.wav"));
728        let _ = fs::remove_dir_all(&tmp);
729    }
730
731    #[test]
732    fn test_walk_for_audio_exclude_directory_name_skips_subtree() {
733        let tmp = std::env::temp_dir().join("upum_test_walk_exclude_dirname");
734        let _ = fs::remove_dir_all(&tmp);
735        fs::create_dir_all(tmp.join("skipme")).unwrap();
736        fs::create_dir_all(tmp.join("keep")).unwrap();
737        fs::write(tmp.join("skipme/hidden.wav"), b"fake wav data").unwrap();
738        fs::write(tmp.join("keep/show.wav"), b"fake wav data").unwrap();
739
740        let mut ex = HashSet::new();
741        ex.insert("skipme".into());
742
743        let mut found = Vec::new();
744        walk_for_audio(
745            from_ref(&tmp),
746            &mut |batch, _count| {
747                found.extend_from_slice(batch);
748            },
749            &|| false,
750            Some(ex),
751            None,
752            None,
753        );
754        assert_eq!(found.len(), 1);
755        assert!(found[0].path.contains("show.wav"));
756        let _ = fs::remove_dir_all(&tmp);
757    }
758
759    #[test]
760    fn test_walk_for_audio_stop() {
761        let tmp = std::env::temp_dir().join("upum_test_walk_stop");
762        let _ = fs::remove_dir_all(&tmp);
763        fs::create_dir_all(&tmp).unwrap();
764        fs::write(tmp.join("test.wav"), b"fake wav data").unwrap();
765
766        let mut found = Vec::new();
767        walk_for_audio(
768            from_ref(&tmp),
769            &mut |batch, _count| {
770                found.extend_from_slice(batch);
771            },
772            &|| true,
773            None,
774            None,
775            None,
776        );
777        assert_eq!(found.len(), 0);
778        let _ = fs::remove_dir_all(&tmp);
779    }
780
781    #[test]
782    fn test_walk_for_audio_skips_dotdirs() {
783        let tmp = std::env::temp_dir().join("upum_test_walk_dotdirs");
784        let _ = fs::remove_dir_all(&tmp);
785        fs::create_dir_all(tmp.join(".hidden")).unwrap();
786        fs::create_dir_all(tmp.join("visible")).unwrap();
787        fs::write(tmp.join(".hidden").join("test.wav"), b"hidden").unwrap();
788        fs::write(tmp.join("visible").join("test.wav"), b"visible").unwrap();
789
790        let mut found = Vec::new();
791        walk_for_audio(
792            from_ref(&tmp),
793            &mut |batch, _count| {
794                found.extend_from_slice(batch);
795            },
796            &|| false,
797            None,
798            None,
799            None,
800        );
801        assert_eq!(found.len(), 1);
802        assert!(found[0].path.contains("visible"));
803        let _ = fs::remove_dir_all(&tmp);
804    }
805
806    #[test]
807    fn test_walk_for_audio_skips_node_modules() {
808        let tmp = std::env::temp_dir().join("upum_test_walk_nodemod");
809        let _ = fs::remove_dir_all(&tmp);
810        fs::create_dir_all(tmp.join("node_modules")).unwrap();
811        fs::create_dir_all(tmp.join("music")).unwrap();
812        fs::write(tmp.join("node_modules").join("test.wav"), b"nm").unwrap();
813        fs::write(tmp.join("music").join("test.wav"), b"music").unwrap();
814
815        let mut found = Vec::new();
816        walk_for_audio(
817            from_ref(&tmp),
818            &mut |batch, _count| {
819                found.extend_from_slice(batch);
820            },
821            &|| false,
822            None,
823            None,
824            None,
825        );
826        assert_eq!(found.len(), 1);
827        assert!(found[0].path.contains("music"));
828        let _ = fs::remove_dir_all(&tmp);
829    }
830
831    #[test]
832    fn test_get_audio_metadata_nonexistent() {
833        let path = "/nonexistent/audio_haxor_test_path/no_such_file.wav";
834        let meta = get_audio_metadata(path);
835        assert!(meta.error.is_some(), "missing file should surface io error");
836        assert_eq!(meta.size_bytes, 0);
837        assert_eq!(meta.full_path, path);
838    }
839
840    #[test]
841    fn test_get_audio_metadata_wav() {
842        let tmp = std::env::temp_dir().join("upum_test_meta_wav");
843        let _ = fs::remove_dir_all(&tmp);
844        fs::create_dir_all(&tmp).unwrap();
845        let wav_path = tmp.join("test.wav");
846
847        let mut header = [0u8; 44];
848        // RIFF header
849        header[0..4].copy_from_slice(b"RIFF");
850        let file_size: u32 = 44 - 8 + 1000;
851        header[4..8].copy_from_slice(&file_size.to_le_bytes());
852        header[8..12].copy_from_slice(b"WAVE");
853        // fmt chunk
854        header[12..16].copy_from_slice(b"fmt ");
855        header[16..20].copy_from_slice(&16u32.to_le_bytes());
856        header[20..22].copy_from_slice(&1u16.to_le_bytes()); // PCM
857        header[22..24].copy_from_slice(&2u16.to_le_bytes()); // channels
858        header[24..28].copy_from_slice(&44100u32.to_le_bytes()); // sample rate
859        header[28..32].copy_from_slice(&176400u32.to_le_bytes()); // byte rate
860        header[32..34].copy_from_slice(&4u16.to_le_bytes()); // block align
861        header[34..36].copy_from_slice(&16u16.to_le_bytes()); // bits per sample
862                                                              // data chunk
863        header[36..40].copy_from_slice(b"data");
864        header[40..44].copy_from_slice(&1000u32.to_le_bytes());
865
866        let mut file = fs::File::create(&wav_path).unwrap();
867        file.write_all(&header).unwrap();
868        // Write some data bytes to match the data size
869        file.write_all(&vec![0u8; 1000]).unwrap();
870
871        let meta = get_audio_metadata(wav_path.to_str().unwrap());
872        assert_eq!(meta.format, "WAV");
873        assert_eq!(meta.channels, Some(2));
874        assert_eq!(meta.sample_rate, Some(44100));
875        assert_eq!(meta.bits_per_sample, Some(16));
876        assert!(meta.error.is_none());
877        let _ = fs::remove_dir_all(&tmp);
878    }
879
880    #[test]
881    fn test_get_audio_metadata_flac() {
882        let tmp = std::env::temp_dir().join("upum_test_meta_flac");
883        let _ = fs::remove_dir_all(&tmp);
884        fs::create_dir_all(&tmp).unwrap();
885        let flac_path = tmp.join("test.flac");
886
887        let mut buf = [0u8; 42];
888        // fLaC magic
889        buf[0..4].copy_from_slice(b"fLaC");
890        // Metadata block header: last-block flag (0x80) + type 0 (STREAMINFO)
891        buf[4] = 0x80;
892        // Block size = 34 as 3-byte big-endian
893        buf[5] = 0;
894        buf[6] = 0;
895        buf[7] = 34;
896        // Min/max block size
897        buf[8..10].copy_from_slice(&4096u16.to_be_bytes());
898        buf[10..12].copy_from_slice(&4096u16.to_be_bytes());
899        // Min/max frame size (3 bytes each, zeros)
900        // bytes 12-17 are already 0
901        // Sample rate (20 bits) + channels-1 (3 bits) + bps-1 high bit (1 bit)
902        // 44100 Hz, 2 channels, 16 bits per sample
903        // sample_rate = 44100 = 0xAC44
904        // byte18 = (44100 >> 12) = 0x0A
905        // byte19 = (44100 >> 4) & 0xFF = 0xC4 (44100 = 0xAC44, >> 4 = 0xAC4, & 0xFF = 0xC4)
906        // byte20 = ((44100 & 0x0F) << 4) | ((2-1) << 1) | ((16-1) >> 4)
907        //        = (0x04 << 4) | (1 << 1) | (15 >> 4)
908        //        = 0x40 | 0x02 | 0x00 = 0x42
909        buf[18] = 0x0A;
910        buf[19] = 0xC4;
911        buf[20] = 0x42;
912        // byte21: bps-1 low 4 bits (15 & 0x0F = 0xF) << 4 | total_samples high 4 bits
913        // total_samples = 44100 = 0x0000AC44
914        // high 4 bits of 36-bit total = 0
915        buf[21] = 0xF0;
916        // bytes 22-25: total samples low 32 bits = 44100
917        buf[22] = 0x00;
918        buf[23] = 0x00;
919        buf[24] = 0xAC;
920        buf[25] = 0x44;
921        // bytes 26-41: MD5 (zeros, already set)
922
923        fs::write(&flac_path, buf).unwrap();
924
925        let meta = get_audio_metadata(flac_path.to_str().unwrap());
926        assert_eq!(meta.format, "FLAC");
927        assert_eq!(meta.sample_rate, Some(44100));
928        assert_eq!(meta.channels, Some(2));
929        // bits_per_sample parsing: (((buf[20] & 1) as u16) << 4) | ((buf[21] >> 4) as u16) + 1
930        // = ((0x42 & 1) << 4) | (0xF0 >> 4) + 1 = (0 << 4) | 15 + 1 = 16
931        assert_eq!(meta.bits_per_sample, Some(16));
932        assert!(meta.error.is_none());
933        let _ = fs::remove_dir_all(&tmp);
934    }
935
936    #[test]
937    fn test_walk_for_audio_respects_depth_limit() {
938        let tmp = std::env::temp_dir().join("upum_test_walk_depth");
939        let _ = fs::remove_dir_all(&tmp);
940
941        // Create a dir structure 32 levels deep (exceeds depth > 30 guard)
942        let mut deep = tmp.clone();
943        for i in 0..32 {
944            deep = deep.join(format!("d{}", i));
945        }
946        fs::create_dir_all(&deep).unwrap();
947        fs::write(deep.join("deep.wav"), b"deep wav").unwrap();
948
949        let mut found = Vec::new();
950        walk_for_audio(
951            from_ref(&tmp),
952            &mut |batch, _count| {
953                found.extend_from_slice(batch);
954            },
955            &|| false,
956            None,
957            None,
958            None,
959        );
960        assert!(
961            !found.iter().any(|s| s.name == "deep"),
962            "Should not find audio files deeper than 30 levels"
963        );
964        let _ = fs::remove_dir_all(&tmp);
965    }
966
967    #[test]
968    fn test_walk_for_audio_batching() {
969        let tmp = std::env::temp_dir().join("upum_test_walk_batching");
970        let _ = fs::remove_dir_all(&tmp);
971        fs::create_dir_all(&tmp).unwrap();
972
973        for i in 0..120 {
974            fs::write(tmp.join(format!("sample_{}.wav", i)), b"wav data").unwrap();
975        }
976
977        let mut batch_call_count = 0usize;
978        walk_for_audio(
979            from_ref(&tmp),
980            &mut |_batch, _count| {
981                batch_call_count += 1;
982            },
983            &|| false,
984            None,
985            None,
986            None,
987        );
988        assert!(
989            batch_call_count >= 2,
990            "Expected on_batch called at least twice for 120 files with batch_size=100, got {}",
991            batch_call_count
992        );
993        let _ = fs::remove_dir_all(&tmp);
994    }
995
996    #[test]
997    fn test_walk_for_audio_deduplicates_symlinks() {
998        let tmp = std::env::temp_dir().join("upum_test_walk_symlinks");
999        let _ = fs::remove_dir_all(&tmp);
1000        let subdir = tmp.join("originals");
1001        fs::create_dir_all(&subdir).unwrap();
1002        fs::write(subdir.join("test.wav"), b"wav data").unwrap();
1003
1004        // Create a symlink to subdir
1005        #[cfg(unix)]
1006        {
1007            let link = tmp.join("linked");
1008            std::os::unix::fs::symlink(&subdir, &link).unwrap();
1009
1010            let mut found = Vec::new();
1011            walk_for_audio(
1012                from_ref(&tmp),
1013                &mut |batch, _count| {
1014                    found.extend_from_slice(batch);
1015                },
1016                &|| false,
1017                None,
1018                None,
1019                None,
1020            );
1021            let wav_count = found.iter().filter(|s| s.name == "test").count();
1022            assert_eq!(
1023                wav_count, 1,
1024                "test.wav should be found exactly once despite symlink, found {}",
1025                wav_count
1026            );
1027        }
1028        let _ = fs::remove_dir_all(&tmp);
1029    }
1030
1031    #[test]
1032    fn test_walk_for_audio_deduplicates_overlapping_roots() {
1033        let tmp = std::env::temp_dir().join("upum_test_audio_overlap");
1034        let _ = fs::remove_dir_all(&tmp);
1035        let child = tmp.join("sub");
1036        fs::create_dir_all(&child).unwrap();
1037        fs::write(child.join("overlap.wav"), b"fake wav").unwrap();
1038        fs::write(tmp.join("top.wav"), b"fake wav").unwrap();
1039
1040        let mut found = Vec::new();
1041        walk_for_audio(
1042            &[tmp.clone(), child.clone()],
1043            &mut |batch, _| found.extend_from_slice(batch),
1044            &|| false,
1045            None,
1046            None,
1047            None,
1048        );
1049        let overlap_count = found.iter().filter(|s| s.name == "overlap").count();
1050        assert_eq!(
1051            overlap_count, 1,
1052            "overlap.wav found {} times",
1053            overlap_count
1054        );
1055        assert!(found.iter().any(|s| s.name == "top"));
1056        let _ = fs::remove_dir_all(&tmp);
1057    }
1058
1059    #[test]
1060    fn test_walk_for_audio_consistent_counts() {
1061        let tmp = std::env::temp_dir().join("upum_test_audio_consistent");
1062        let _ = fs::remove_dir_all(&tmp);
1063        for i in 0..5 {
1064            let d = tmp.join(format!("dir{}", i));
1065            fs::create_dir_all(&d).unwrap();
1066            fs::write(d.join(format!("s{}.wav", i)), b"fake wav").unwrap();
1067        }
1068        let mut c1 = 0;
1069        walk_for_audio(
1070            &[tmp.clone()],
1071            &mut |b, _| c1 += b.len(),
1072            &|| false,
1073            None,
1074            None,
1075            None,
1076        );
1077        let mut c2 = 0;
1078        walk_for_audio(
1079            &[tmp.clone()],
1080            &mut |b, _| c2 += b.len(),
1081            &|| false,
1082            None,
1083            None,
1084            None,
1085        );
1086        assert_eq!(c1, c2, "two scans should match: {} vs {}", c1, c2);
1087        assert_eq!(c1, 5);
1088        let _ = fs::remove_dir_all(&tmp);
1089    }
1090
1091    #[test]
1092    fn test_get_audio_metadata_aiff() {
1093        let tmp = std::env::temp_dir().join("upum_test_meta_aiff");
1094        let _ = fs::remove_dir_all(&tmp);
1095        fs::create_dir_all(&tmp).unwrap();
1096        let aiff_path = tmp.join("test.aiff");
1097
1098        // Build a minimal valid AIFF file
1099        let mut data = Vec::new();
1100        // FORM header
1101        data.extend_from_slice(b"FORM");
1102        // file_size - 8 placeholder (will fill after)
1103        let total_size: u32 = 4 + 8 + 18; // "AIFF" + COMM chunk header + COMM data
1104        data.extend_from_slice(&total_size.to_be_bytes());
1105        data.extend_from_slice(b"AIFF");
1106        // COMM chunk
1107        data.extend_from_slice(b"COMM");
1108        data.extend_from_slice(&18u32.to_be_bytes()); // chunk size
1109        data.extend_from_slice(&1u16.to_be_bytes()); // channels = 1
1110        data.extend_from_slice(&48000u32.to_be_bytes()); // num_frames = 48000
1111        data.extend_from_slice(&24u16.to_be_bytes()); // bits_per_sample = 24
1112                                                      // 80-bit extended float for sample rate 48000
1113                                                      // exponent = 16383 + 15 = 16398 = 0x400E
1114                                                      // mantissa = 48000 << 16 = 0xBB80_0000 (top 32 bits), lower 32 bits = 0
1115        data.extend_from_slice(&[0x40, 0x0E, 0xBB, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00]);
1116
1117        fs::write(&aiff_path, &data).unwrap();
1118
1119        let meta = get_audio_metadata(aiff_path.to_str().unwrap());
1120        assert_eq!(meta.format, "AIFF");
1121        assert_eq!(meta.channels, Some(1));
1122        assert_eq!(meta.sample_rate, Some(48000));
1123        assert_eq!(meta.bits_per_sample, Some(24));
1124        assert!(meta.error.is_none());
1125        let _ = fs::remove_dir_all(&tmp);
1126    }
1127
1128    #[test]
1129    fn test_format_size_boundary_values() {
1130        assert_eq!(format_size(1023), "1023.0 B");
1131        assert_eq!(format_size(1024), "1.0 KB");
1132        assert_eq!(format_size(1025), "1.0 KB");
1133        assert_eq!(format_size(1024 * 1024 - 1), "1024.0 KB");
1134        assert_eq!(format_size(1024 * 1024), "1.0 MB");
1135    }
1136
1137    #[test]
1138    fn test_format_size_zero() {
1139        assert_eq!(format_size(0), "0 B");
1140    }
1141
1142    #[test]
1143    fn test_audio_extensions_includes_common() {
1144        for ext in &[".wav", ".mp3", ".flac"] {
1145            assert!(
1146                AUDIO_EXTENSIONS.contains(ext),
1147                "AUDIO_EXTENSIONS must include {}",
1148                ext
1149            );
1150        }
1151    }
1152
1153    #[test]
1154    fn test_parse_wav_invalid() {
1155        let tmp = std::env::temp_dir().join("upum_test_parse_wav_invalid");
1156        let _ = fs::remove_dir_all(&tmp);
1157        fs::create_dir_all(&tmp).unwrap();
1158        let path = tmp.join("garbage.wav");
1159        fs::write(
1160            &path,
1161            [
1162                0xDE, 0xAD, 0xBE, 0xEF, 0x00, 0x11, 0x22, 0x33, 0xAA, 0xBB, 0xCC, 0xDD, 0x00, 0x00,
1163                0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1164                0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1165                0x00, 0x00,
1166            ],
1167        )
1168        .unwrap();
1169
1170        let mut meta = AudioMetadata {
1171            full_path: path.to_string_lossy().to_string(),
1172            file_name: "garbage.wav".to_string(),
1173            directory: tmp.to_string_lossy().to_string(),
1174            format: "WAV".to_string(),
1175            size_bytes: 44,
1176            created: String::new(),
1177            modified: String::new(),
1178            accessed: String::new(),
1179            permissions: String::new(),
1180            channels: None,
1181            sample_rate: None,
1182            bits_per_sample: None,
1183            duration: None,
1184            error: None,
1185        };
1186        parse_wav(&path, &mut meta);
1187        // Should not crash; fields remain None since RIFF/WAVE magic doesn't match
1188        assert!(meta.channels.is_none());
1189        assert!(meta.sample_rate.is_none());
1190        let _ = fs::remove_dir_all(&tmp);
1191    }
1192
1193    #[test]
1194    fn test_get_audio_metadata_wav_zero_byte_rate_skips_duration() {
1195        let tmp = std::env::temp_dir().join("upum_test_meta_wav_zero_br");
1196        let _ = fs::remove_dir_all(&tmp);
1197        fs::create_dir_all(&tmp).unwrap();
1198        let wav_path = tmp.join("zero_br.wav");
1199
1200        let mut header = [0u8; 44];
1201        header[0..4].copy_from_slice(b"RIFF");
1202        let file_size: u32 = 44 - 8 + 1000;
1203        header[4..8].copy_from_slice(&file_size.to_le_bytes());
1204        header[8..12].copy_from_slice(b"WAVE");
1205        header[12..16].copy_from_slice(b"fmt ");
1206        header[16..20].copy_from_slice(&16u32.to_le_bytes());
1207        header[20..22].copy_from_slice(&1u16.to_le_bytes());
1208        header[22..24].copy_from_slice(&2u16.to_le_bytes());
1209        header[24..28].copy_from_slice(&44100u32.to_le_bytes());
1210        header[28..32].copy_from_slice(&0u32.to_le_bytes());
1211        header[32..34].copy_from_slice(&4u16.to_le_bytes());
1212        header[34..36].copy_from_slice(&16u16.to_le_bytes());
1213        header[36..40].copy_from_slice(b"data");
1214        header[40..44].copy_from_slice(&1000u32.to_le_bytes());
1215
1216        let mut file = fs::File::create(&wav_path).unwrap();
1217        file.write_all(&header).unwrap();
1218        file.write_all(&vec![0u8; 1000]).unwrap();
1219
1220        let meta = get_audio_metadata(wav_path.to_str().unwrap());
1221        assert_eq!(meta.format, "WAV");
1222        assert_eq!(meta.channels, Some(2));
1223        assert_eq!(meta.sample_rate, Some(44100));
1224        assert!(
1225            meta.duration.is_none(),
1226            "byte_rate 0 must not produce duration via division"
1227        );
1228        let _ = fs::remove_dir_all(&tmp);
1229    }
1230
1231    #[test]
1232    fn test_get_audio_metadata_no_extension_still_reads_file_times() {
1233        let tmp = std::env::temp_dir().join("upum_test_meta_no_ext");
1234        let _ = fs::remove_dir_all(&tmp);
1235        fs::create_dir_all(&tmp).unwrap();
1236        let path = tmp.join("README"); // no extension
1237        fs::write(&path, b"plain").unwrap();
1238        let meta = get_audio_metadata(path.to_str().unwrap());
1239        assert_eq!(meta.format, "");
1240        assert_eq!(meta.file_name, "README");
1241        assert!(meta.error.is_none());
1242        assert_eq!(meta.size_bytes, 5);
1243        let _ = fs::remove_dir_all(&tmp);
1244    }
1245
1246    #[test]
1247    fn test_get_audio_metadata_rex_skips_native_header_parse() {
1248        let tmp = std::env::temp_dir().join("upum_test_meta_rex_loop.rex");
1249        let _ = fs::remove_file(&tmp);
1250        fs::write(&tmp, b"not a rex header").unwrap();
1251        let meta = get_audio_metadata(tmp.to_str().unwrap());
1252        assert_eq!(meta.format, "REX");
1253        assert!(
1254            meta.duration.is_none() && meta.sample_rate.is_none(),
1255            ".rex is listed as audio but get_audio_metadata has no parser branch for it"
1256        );
1257        assert!(meta.error.is_none());
1258        let _ = fs::remove_file(&tmp);
1259    }
1260}