app_lib/
midi_scanner.rs

1//! MIDI file scanner — dedicated walker independent of the preset scanner.
2//!
3//! Discovers `.mid` / `.midi` files across the user home directory (`~`,
4//! resolved via [`dirs::home_dir`]) plus system-wide locations. Supports
5//! parallel traversal and stop signaling.
6//! Symlinks are followed so link targets are scanned.
7
8use crate::history::MidiFile;
9use crate::scanner_skip_dirs::SCANNER_SKIP_DIRS as SKIP_DIRS;
10use crate::unified_walker::IncrementalDirState;
11use rayon::prelude::*;
12use dashmap::DashSet;
13use std::collections::HashSet;
14use std::fs;
15use std::path::{Path, PathBuf};
16use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
17use std::sync::{Arc, Mutex};
18
19fn normalize_macos_path(p: PathBuf) -> PathBuf {
20    #[cfg(target_os = "macos")]
21    {
22        let s = p.to_string_lossy();
23        if s.starts_with("/System/Volumes/Data/") {
24            return PathBuf::from(&s["/System/Volumes/Data".len()..]);
25        }
26    }
27    p
28}
29
30const MIDI_EXTENSIONS: &[&str] = &[".mid", ".midi"];
31
32fn format_size(bytes: u64) -> String {
33    crate::format_size(bytes)
34}
35
36pub fn get_midi_roots() -> Vec<PathBuf> {
37    let home = dirs::home_dir().unwrap_or_default();
38    let mut roots = Vec::new();
39
40    if !home.as_os_str().is_empty() {
41        roots.push(home.clone());
42    }
43
44    #[cfg(target_os = "macos")]
45    {
46        roots.push(PathBuf::from("/Library/Audio/Presets"));
47    }
48
49    roots.sort();
50    roots.dedup();
51    roots.into_iter().filter(|r| r.exists()).collect()
52}
53
54pub fn walk_for_midi(
55    roots: &[PathBuf],
56    on_batch: &mut dyn FnMut(&[MidiFile], usize),
57    should_stop: &(dyn Fn() -> bool + Sync),
58    exclude: Option<HashSet<String>>,
59    active_dirs: Option<Arc<Mutex<Vec<String>>>>,
60    incremental: Option<Arc<IncrementalDirState>>,
61) {
62    let batch_size = 100;
63    let stop = Arc::new(AtomicBool::new(false));
64    let found = Arc::new(AtomicUsize::new(0));
65    let active = active_dirs.unwrap_or_else(|| Arc::new(Mutex::new(Vec::new())));
66    let (tx, rx) = std::sync::mpsc::sync_channel::<Vec<MidiFile>>(256);
67    let visited = Arc::new(DashSet::new());
68    let exclude = Arc::new(exclude.unwrap_or_default());
69
70    let roots_owned: Vec<PathBuf> = roots.to_vec();
71    let stop2 = stop.clone();
72    let found2 = found.clone();
73    let incremental = incremental.clone();
74    let pool = rayon::ThreadPoolBuilder::new()
75        .num_threads(num_cpus::get().max(4))
76        .build()
77        .unwrap();
78    std::thread::spawn(move || {
79        pool.install(|| {
80            roots_owned.par_iter().for_each(|root| {
81                if stop2.load(Ordering::Relaxed) {
82                    return;
83                }
84                walk_dir_parallel(
85                    root,
86                    0,
87                    None,
88                    &visited,
89                    &tx,
90                    &found2,
91                    batch_size,
92                    &stop2,
93                    &exclude,
94                    &active,
95                    incremental.clone(),
96                );
97            });
98        });
99        drop(pool);
100    });
101
102    let mut total_found = 0usize;
103    loop {
104        if should_stop() {
105            stop.store(true, Ordering::Relaxed);
106            while rx.try_recv().is_ok() {}
107            break;
108        }
109        match rx.recv_timeout(std::time::Duration::from_millis(10)) {
110            Ok(midi_files) => {
111                total_found += midi_files.len();
112                on_batch(&midi_files, total_found);
113            }
114            Err(std::sync::mpsc::RecvTimeoutError::Timeout) => continue,
115            Err(std::sync::mpsc::RecvTimeoutError::Disconnected) => break,
116        }
117    }
118}
119
120#[allow(clippy::too_many_arguments)]
121fn walk_dir_parallel(
122    dir: &Path,
123    depth: u32,
124    parent_dev: Option<u64>,
125    visited: &Arc<DashSet<PathBuf>>,
126    tx: &std::sync::mpsc::SyncSender<Vec<MidiFile>>,
127    found: &Arc<AtomicUsize>,
128    batch_size: usize,
129    stop: &Arc<AtomicBool>,
130    exclude: &Arc<HashSet<String>>,
131    active_dirs: &Arc<Mutex<Vec<String>>>,
132    incremental: Option<Arc<IncrementalDirState>>,
133) {
134    if depth > 30 || stop.load(Ordering::Relaxed) {
135        return;
136    }
137
138    // Mount-point detection — on Unix, a dir whose st_dev differs from its
139    // parent's sits on a different filesystem (network mount, external drive,
140    // overlayfs, etc.). Log the boundary so the user can see which mounts
141    // the walker actually entered.
142    #[cfg(unix)]
143    let current_dev: Option<u64> = {
144        use std::os::unix::fs::MetadataExt;
145        match fs::metadata(dir) {
146            Ok(m) => {
147                let d = m.dev();
148                if let Some(pd) = parent_dev {
149                    if pd != d {
150                        crate::write_app_log_verbose(format!(
151                            "SCAN MOUNT — midi | {} | parent_dev={} current_dev={}",
152                            dir.display(),
153                            pd,
154                            d
155                        ));
156                    }
157                }
158                Some(d)
159            }
160            Err(_) => None,
161        }
162    };
163    #[cfg(not(unix))]
164    let current_dev: Option<u64> = None;
165    let _ = parent_dev;
166
167    // Canonicalize outside the lock-free set — it's a syscall (network roundtrip on
168    // SMB) and must not block other workers while in flight.
169    {
170        let orig = normalize_macos_path(dir.to_path_buf());
171        let canon = fs::canonicalize(dir).ok().map(normalize_macos_path);
172        let key = canon.clone().unwrap_or_else(|| orig.clone());
173        if !visited.insert(key.clone()) {
174            // Dedup hit — already visited via another path. Log if this is
175            // something the user might care about (network mounts, /mnt).
176            let s = dir.to_string_lossy();
177            if s.contains("/mnt/") || s.ends_with("/mnt") {
178                crate::write_app_log_verbose(format!(
179                    "SCAN DEDUP SKIP — midi | orig={} | canon={} | key={}",
180                    orig.display(),
181                    canon
182                        .as_ref()
183                        .map(|p| p.display().to_string())
184                        .unwrap_or_else(|| "<canonicalize failed>".into()),
185                    key.display(),
186                ));
187            }
188            return;
189        }
190        visited.insert(orig);
191    }
192
193    if let Some(ref inc) = incremental {
194        if inc.should_skip(dir) {
195            return;
196        }
197    }
198
199    let dir_str = dir.to_string_lossy().to_string();
200    {
201        let mut ad = active_dirs.lock().unwrap_or_else(|e| e.into_inner());
202        ad.push(dir_str.clone());
203        if ad.len() > 200 {
204            let excess = ad.len() - 200;
205            ad.drain(..excess);
206        }
207    }
208
209    // Diagnostic: log when we enter /mnt/ paths (SMB mounts typically).
210    let entries: Vec<_> = match fs::read_dir(dir) {
211        Ok(e) => e.flatten().collect(),
212        Err(_e) => {
213            return;
214        }
215    };
216
217    let mut files = Vec::new();
218    let mut subdirs = Vec::new();
219
220    for entry in &entries {
221        let name = entry.file_name();
222        let name_str = name.to_string_lossy();
223        // `@` prefix = Synology NAS system dirs (@eaDir, @tmp, @syno*, etc.).
224        if name_str.starts_with('.')
225            || name_str.starts_with('@')
226            || SKIP_DIRS.contains(&name_str.as_ref())
227            || exclude.contains(name_str.as_ref())
228        {
229            continue;
230        }
231        // Cached d_type from readdir — no extra stat() syscall per entry.
232        let ft = match entry.file_type() {
233            Ok(f) => f,
234            Err(_) => continue,
235        };
236        let path = entry.path();
237        if ft.is_dir() {
238            subdirs.push(path);
239        } else if ft.is_file() {
240            files.push((path, dir.to_path_buf()));
241        } else if ft.is_symlink() {
242            match fs::metadata(&path) {
243                Ok(m) if m.is_dir() => {
244                    subdirs.push(path);
245                }
246                Ok(m) if m.is_file() => {
247                    files.push((path, dir.to_path_buf()));
248                }
249                _ => {}
250            }
251        }
252    }
253
254    let mut batch = Vec::new();
255    for (path, parent) in files {
256        let ext = path
257            .extension()
258            .map(|e| format!(".{}", e.to_string_lossy().to_lowercase()))
259            .unwrap_or_default();
260
261        if MIDI_EXTENSIONS.contains(&ext.as_str()) {
262            let path_str = path.to_string_lossy().to_string();
263            if exclude.contains(&path_str) {
264                continue;
265            }
266            if let Ok(meta) = fs::metadata(&path) {
267                let midi_name = path
268                    .file_stem()
269                    .map(|s| s.to_string_lossy().to_string())
270                    .unwrap_or_default();
271                let modified = meta
272                    .modified()
273                    .ok()
274                    .map(|t| {
275                        let dt: chrono::DateTime<chrono::Utc> = t.into();
276                        dt.format("%Y-%m-%d").to_string()
277                    })
278                    .unwrap_or_default();
279
280                batch.push(MidiFile {
281                    name: midi_name,
282                    path: path_str,
283                    directory: parent.to_string_lossy().to_string(),
284                    format: ext[1..].to_uppercase(),
285                    size: meta.len(),
286                    size_formatted: format_size(meta.len()),
287                    modified,
288                });
289                found.fetch_add(1, Ordering::Relaxed);
290
291                if batch.len() >= batch_size {
292                    let _ = tx.send(batch);
293                    batch = Vec::new();
294                }
295            }
296        }
297    }
298    if !batch.is_empty() {
299        let _ = tx.send(batch);
300    }
301
302    subdirs.par_iter().for_each(|subdir| {
303        walk_dir_parallel(
304            subdir,
305            depth + 1,
306            current_dev,
307            visited,
308            tx,
309            found,
310            batch_size,
311            stop,
312            exclude,
313            active_dirs,
314            incremental.clone(),
315        );
316    });
317
318    if let Some(ref inc) = incremental {
319        inc.record_scanned_dir(dir);
320    }
321}
322
323#[cfg(test)]
324mod tests {
325    use super::*;
326
327    #[test]
328    fn test_midi_extensions_complete() {
329        assert!(MIDI_EXTENSIONS.contains(&".mid"));
330        assert!(MIDI_EXTENSIONS.contains(&".midi"));
331    }
332
333    #[test]
334    fn test_get_midi_roots_returns_existing_paths() {
335        let roots = get_midi_roots();
336        for r in &roots {
337            assert!(r.exists(), "returned root {:?} should exist", r);
338        }
339    }
340
341    fn test_dir(name: &str) -> PathBuf {
342        let p = std::env::temp_dir().join(format!(
343            "upum_midi_scan_{}_{}",
344            name,
345            std::time::SystemTime::now()
346                .duration_since(std::time::UNIX_EPOCH)
347                .unwrap()
348                .as_nanos()
349        ));
350        let _ = fs::remove_dir_all(&p);
351        fs::create_dir_all(&p).unwrap();
352        p
353    }
354
355    fn touch(p: &Path, content: &[u8]) {
356        if let Some(parent) = p.parent() {
357            fs::create_dir_all(parent).unwrap();
358        }
359        std::fs::write(p, content).unwrap();
360    }
361
362    #[test]
363    fn test_walk_for_midi_finds_only_midi_files() {
364        let root = test_dir("finds_only_midi");
365        touch(&root.join("song.mid"), b"MThd");
366        touch(&root.join("another.MIDI"), b"MThd");
367        touch(&root.join("preset.fxp"), b"nope");
368        touch(&root.join("audio.wav"), b"RIFF");
369        touch(&root.join("doc.pdf"), b"%PDF");
370
371        let mut found_names: Vec<String> = Vec::new();
372        walk_for_midi(
373            &[root.clone()],
374            &mut |batch, _total| {
375                for m in batch {
376                    found_names.push(m.name.clone());
377                }
378            },
379            &|| false,
380            None,
381            None,
382            None,
383        );
384        found_names.sort();
385        assert_eq!(found_names, vec!["another".to_string(), "song".to_string()]);
386        let _ = fs::remove_dir_all(&root);
387    }
388
389    #[test]
390    fn test_walk_for_midi_exclude_set() {
391        let root = test_dir("exclude_set");
392        touch(&root.join("keep.mid"), b"MThd");
393        touch(&root.join("drop.mid"), b"MThd");
394        let mut excl = HashSet::new();
395        excl.insert(root.join("drop.mid").to_string_lossy().to_string());
396
397        let mut names: Vec<String> = Vec::new();
398        walk_for_midi(
399            &[root.clone()],
400            &mut |batch, _| {
401                for m in batch {
402                    names.push(m.name.clone());
403                }
404            },
405            &|| false,
406            Some(excl),
407            None,
408            None,
409        );
410        assert_eq!(names, vec!["keep".to_string()]);
411        let _ = fs::remove_dir_all(&root);
412    }
413
414    #[test]
415    fn test_walk_for_midi_skips_hidden_and_node_modules() {
416        let root = test_dir("skip_hidden");
417        touch(&root.join("visible.mid"), b"MThd");
418        touch(&root.join(".hidden.mid"), b"MThd");
419        touch(&root.join("node_modules/dep.mid"), b"MThd");
420        touch(&root.join("@eaDir/thumb.mid"), b"MThd");
421
422        let mut names: Vec<String> = Vec::new();
423        walk_for_midi(
424            &[root.clone()],
425            &mut |batch, _| {
426                for m in batch {
427                    names.push(m.name.clone());
428                }
429            },
430            &|| false,
431            None,
432            None,
433            None,
434        );
435        assert_eq!(names, vec!["visible".to_string()]);
436        let _ = fs::remove_dir_all(&root);
437    }
438
439    #[test]
440    fn test_walk_for_midi_populates_metadata() {
441        let root = test_dir("metadata");
442        touch(&root.join("song.mid"), b"MThd\x00\x00\x00\x06");
443
444        let mut files: Vec<MidiFile> = Vec::new();
445        walk_for_midi(
446            &[root.clone()],
447            &mut |batch, _| files.extend_from_slice(batch),
448            &|| false,
449            None,
450            None,
451            None,
452        );
453        assert_eq!(files.len(), 1);
454        let f = &files[0];
455        assert_eq!(f.name, "song");
456        assert_eq!(f.format, "MID");
457        assert_eq!(f.size, 8);
458        assert!(!f.modified.is_empty());
459        let _ = fs::remove_dir_all(&root);
460    }
461}