1use rayon::prelude::*;
8
9pub fn extract_page_count(path: &str) -> Option<u32> {
11 let doc = lopdf::Document::load(path).ok()?;
12 Some(doc.get_pages().len() as u32)
13}
14
15pub fn extract_pages_batch(paths: &[String]) -> Vec<(String, u32)> {
18 paths
19 .par_iter()
20 .filter_map(|p| extract_page_count(p).map(|n| (p.clone(), n)))
21 .collect()
22}
23
24#[cfg(test)]
25mod tests {
26 use super::*;
27
28 #[test]
29 fn extract_pages_missing_file_returns_none() {
30 assert!(extract_page_count("/nonexistent/file.pdf").is_none());
31 }
32
33 #[test]
34 fn extract_pages_not_a_pdf_returns_none() {
35 let tmp = std::env::temp_dir().join("upum_not_a_pdf.pdf");
36 std::fs::write(&tmp, b"this is not a pdf").unwrap();
37 let res = extract_page_count(tmp.to_str().unwrap());
38 let _ = std::fs::remove_file(&tmp);
39 assert!(res.is_none());
40 }
41
42 #[test]
43 fn extract_pages_batch_skips_bad_files() {
44 let paths = vec![
45 "/nonexistent/a.pdf".to_string(),
46 "/nonexistent/b.pdf".to_string(),
47 ];
48 let result = extract_pages_batch(&paths);
49 assert!(result.is_empty());
50 }
51
52 #[test]
54 fn extract_page_count_matches_printpdf_three_pages() {
55 use printpdf::{Mm, Op, PdfDocument, PdfPage, PdfSaveOptions};
56 use std::fs::File;
57 use std::io::BufWriter;
58
59 let tmp =
60 std::env::temp_dir().join(format!("ah_pdf_meta_three_{}.pdf", std::process::id()));
61 let mut doc = PdfDocument::new("pdf_meta_test");
62 let p1 = PdfPage::new(Mm(40.0), Mm(40.0), vec![Op::SaveGraphicsState, Op::RestoreGraphicsState]);
63 let p2 = PdfPage::new(Mm(40.0), Mm(40.0), vec![Op::SaveGraphicsState, Op::RestoreGraphicsState]);
64 let p3 = PdfPage::new(Mm(40.0), Mm(40.0), vec![Op::SaveGraphicsState, Op::RestoreGraphicsState]);
65 doc.with_pages(vec![p1, p2, p3]);
66 let bytes = doc.save(&PdfSaveOptions::default(), &mut Vec::new());
67 std::io::Write::write_all(
68 &mut BufWriter::new(File::create(&tmp).expect("temp pdf create")),
69 &bytes,
70 )
71 .expect("printpdf save");
72
73 let n = extract_page_count(tmp.to_str().unwrap());
74 let _ = std::fs::remove_file(&tmp);
75 assert_eq!(n, Some(3));
76 }
77
78 #[test]
79 fn extract_pages_batch_merges_valid_paths() {
80 use printpdf::{Mm, Op, PdfDocument, PdfPage, PdfSaveOptions};
81 use std::fs::File;
82 use std::io::BufWriter;
83
84 let id = std::process::id();
85 let a = std::env::temp_dir().join(format!("ah_pdf_batch_a_{id}.pdf"));
86 let b = std::env::temp_dir().join(format!("ah_pdf_batch_b_{id}.pdf"));
87
88 let mut doc_a = PdfDocument::new("a");
89 doc_a.with_pages(vec![PdfPage::new(
90 Mm(30.0),
91 Mm(30.0),
92 vec![Op::SaveGraphicsState, Op::RestoreGraphicsState],
93 )]);
94 let bytes = doc_a.save(&PdfSaveOptions::default(), &mut Vec::new());
95 std::io::Write::write_all(&mut BufWriter::new(File::create(&a).unwrap()), &bytes)
96 .expect("save a");
97
98 let mut doc_b = PdfDocument::new("b");
99 doc_b.with_pages(vec![
100 PdfPage::new(
101 Mm(30.0),
102 Mm(30.0),
103 vec![Op::SaveGraphicsState, Op::RestoreGraphicsState],
104 ),
105 PdfPage::new(
106 Mm(30.0),
107 Mm(30.0),
108 vec![Op::SaveGraphicsState, Op::RestoreGraphicsState],
109 ),
110 ]);
111 let bytes = doc_b.save(&PdfSaveOptions::default(), &mut Vec::new());
112 std::io::Write::write_all(&mut BufWriter::new(File::create(&b).unwrap()), &bytes)
113 .expect("save b");
114
115 let paths = vec![
116 a.to_string_lossy().into_owned(),
117 b.to_string_lossy().into_owned(),
118 "/totally/missing/xyz.pdf".to_string(),
119 ];
120 let mut pairs = extract_pages_batch(&paths);
121 pairs.sort_by(|x, y| x.0.cmp(&y.0));
122
123 let _ = std::fs::remove_file(&a);
124 let _ = std::fs::remove_file(&b);
125
126 assert_eq!(pairs.len(), 2);
127 assert!(pairs.iter().any(|(_, n)| *n == 1));
128 assert!(pairs.iter().any(|(_, n)| *n == 2));
129 }
130}