1use std::fs::{File, OpenOptions};
32use std::io::{Read, Write};
33use std::time::{Duration, Instant};
34
35use camino::{Utf8Path, Utf8PathBuf};
36use fs2::FileExt;
37use tracing::warn;
38
39use super::metadata::{DoigetExtension, Metadata};
40use super::{EntryInfo, Store, StoreError};
41use crate::{Safekey, SCHEMA_VERSION};
42
43const METADATA_DIR: &str = ".metadata";
46
47const LOCK_TIMEOUT: Duration = Duration::from_secs(5);
49
50const LOCK_POLL_INTERVAL: Duration = Duration::from_millis(50);
54
55#[derive(Debug, Clone)]
57pub struct FsStore {
58 root: Utf8PathBuf,
59 metadata_dir: Utf8PathBuf,
60}
61
62impl FsStore {
63 pub fn new(root: Utf8PathBuf) -> Result<Self, StoreError> {
74 if root.exists() && !root.is_dir() {
77 return Err(StoreError::Io(std::io::Error::new(
78 std::io::ErrorKind::AlreadyExists,
79 format!("store root {} exists but is not a directory", root),
80 )));
81 }
82 let metadata_dir = root.join(METADATA_DIR);
83
84 create_dir_secure(root.as_std_path())?;
85 create_dir_secure(metadata_dir.as_std_path())?;
86
87 Ok(Self { root, metadata_dir })
88 }
89
90 pub fn root(&self) -> &Utf8Path {
92 &self.root
93 }
94
95 fn metadata_path(&self, key: &Safekey) -> Result<Utf8PathBuf, StoreError> {
104 guard_safekey(key.as_str())?;
105 let p = self.metadata_dir.join(format!("{}.toml", key.as_str()));
106 if p.parent() != Some(self.metadata_dir.as_path()) {
110 return Err(StoreError::PathTraversal { path: p });
111 }
112 Ok(p)
113 }
114
115 fn lock_path(&self, key: &Safekey) -> Result<Utf8PathBuf, StoreError> {
116 guard_safekey(key.as_str())?;
117 Ok(self
118 .metadata_dir
119 .join(format!("{}.toml.lock", key.as_str())))
120 }
121
122 fn pdf_path(&self, key: &Safekey) -> Result<Utf8PathBuf, StoreError> {
123 guard_safekey(key.as_str())?;
124 Ok(self.root.join(format!("{}.pdf", key.as_str())))
125 }
126}
127
128impl Store for FsStore {
129 fn read(&self, key: &Safekey) -> Result<Option<Metadata>, StoreError> {
130 let meta_path = self.metadata_path(key)?;
131 if !meta_path.exists() {
132 return Ok(None);
133 }
134
135 let lock_path = self.lock_path(key)?;
140 let lock_file = open_or_create_lock_file(&lock_path)?;
141 acquire_lock(&lock_file, &lock_path, LockMode::Shared)?;
142
143 let raw = std::fs::read_to_string(meta_path.as_std_path())?;
144 let _ = <File as FileExt>::unlock(&lock_file);
149
150 let metadata: Metadata = toml::from_str(&raw)?;
151 check_schema_version(&metadata.schema_version)?;
152 Ok(Some(metadata))
153 }
154
155 fn write(&self, key: &Safekey, m: &Metadata, pdf: Option<&Utf8Path>) -> Result<(), StoreError> {
156 let meta_path = self.metadata_path(key)?;
157 let lock_path = self.lock_path(key)?;
158 let lock_file = open_or_create_lock_file(&lock_path)?;
159 acquire_lock(&lock_file, &lock_path, LockMode::Exclusive)?;
160
161 let merged = if meta_path.exists() {
167 let raw = std::fs::read_to_string(meta_path.as_std_path())?;
168 let existing: Metadata = toml::from_str(&raw)?;
169 check_schema_version_for_write(&existing.schema_version)?;
170 merge_metadata(existing, m.clone())
171 } else {
172 m.clone()
173 };
174
175 let normalized = normalize_toml(&merged)?;
178
179 if let Some(pdf_src) = pdf {
192 let pdf_dst = self.pdf_path(key)?;
193 let mut bytes = Vec::new();
194 File::open(pdf_src.as_std_path())?.read_to_end(&mut bytes)?;
195 atomic_write(&pdf_dst, &bytes)?;
197 }
198
199 atomic_write(&meta_path, normalized.as_bytes())?;
203
204 let _ = <File as FileExt>::unlock(&lock_file);
205 Ok(())
206 }
207
208 fn list_recent(&self, limit: usize) -> Result<Vec<EntryInfo>, StoreError> {
209 let mut entries = read_all_entries(&self.metadata_dir)?;
210 entries.sort_by_key(|e| std::cmp::Reverse(e.fetched_at));
213 entries.truncate(limit);
214 Ok(entries)
215 }
216
217 fn search(&self, query: &str, limit: usize) -> Result<Vec<EntryInfo>, StoreError> {
221 let q = query.to_lowercase();
222 let mut hits = Vec::new();
223 for path in metadata_files(&self.metadata_dir)? {
224 let raw = std::fs::read_to_string(path.as_std_path())?;
225 let Ok(md) = toml::from_str::<Metadata>(&raw) else {
226 continue;
229 };
230 let haystacks = [
231 md.title.to_lowercase(),
232 md.authors.join(" ").to_lowercase(),
233 md.venue.clone().unwrap_or_default().to_lowercase(),
234 md.publisher.clone().unwrap_or_default().to_lowercase(),
235 ];
236 if haystacks.iter().any(|h| h.contains(&q)) {
237 let safekey = safekey_from_metadata_filename(&path);
238 hits.push(EntryInfo {
239 safekey,
240 title: md.title,
241 year: md.year,
242 fetched_at: md.doiget.as_ref().map(|d| d.fetched_at),
243 });
244 if hits.len() >= limit {
245 break;
246 }
247 }
248 }
249 Ok(hits)
250 }
251}
252
253fn guard_safekey(s: &str) -> Result<(), StoreError> {
263 let bad = s.is_empty()
264 || s.contains('/')
265 || s.contains('\\')
266 || s.contains("..")
267 || s.contains('\0')
268 || s.starts_with('.')
269 || !s
270 .chars()
271 .all(|c| c.is_ascii_alphanumeric() || c == '.' || c == '-' || c == '_');
272 if bad {
273 Err(StoreError::PathTraversal {
274 path: Utf8PathBuf::from(s),
275 })
276 } else {
277 Ok(())
278 }
279}
280
281fn safekey_from_metadata_filename(p: &Utf8Path) -> Safekey {
285 Safekey(p.file_stem().unwrap_or("").to_string())
286}
287
288#[derive(Debug, Clone, Copy)]
290enum LockMode {
291 Shared,
293 Exclusive,
295}
296
297fn open_or_create_lock_file(path: &Utf8Path) -> Result<File, StoreError> {
300 let f = OpenOptions::new()
301 .create(true)
302 .read(true)
303 .write(true)
304 .truncate(false)
305 .open(path.as_std_path())?;
306 Ok(f)
307}
308
309fn acquire_lock(lock_file: &File, lock_path: &Utf8Path, mode: LockMode) -> Result<(), StoreError> {
312 let deadline = Instant::now() + LOCK_TIMEOUT;
313 loop {
314 let attempt = match mode {
320 LockMode::Shared => <File as FileExt>::try_lock_shared(lock_file),
321 LockMode::Exclusive => <File as FileExt>::try_lock_exclusive(lock_file),
322 };
323 match attempt {
324 Ok(()) => return Ok(()),
325 Err(e) => {
326 let contended = e.raw_os_error() == fs2::lock_contended_error().raw_os_error();
327 if !contended {
328 return Err(StoreError::Io(e));
330 }
331 if Instant::now() >= deadline {
332 return Err(StoreError::LockTimeout {
333 path: lock_path.to_owned(),
334 });
335 }
336 std::thread::sleep(LOCK_POLL_INTERVAL);
337 }
338 }
339 }
340}
341
342fn check_schema_version(theirs: &str) -> Result<(), StoreError> {
347 let (their_major, their_minor) = parse_schema_version(theirs)?;
348 let (our_major, our_minor) = parse_schema_version(SCHEMA_VERSION)?;
349 if their_major > our_major {
350 warn!(
351 theirs = theirs,
352 ours = SCHEMA_VERSION,
353 "store entry uses a future-major schema_version; entering read-only mode \
354 for this entry (docs/STORE.md §3)"
355 );
356 } else if their_major == our_major && their_minor > our_minor {
357 warn!(
358 theirs = theirs,
359 ours = SCHEMA_VERSION,
360 "store entry uses a newer minor schema_version; reading in compatibility mode \
361 (docs/STORE.md §3 future-minor tolerance)"
362 );
363 }
364 Ok(())
365}
366
367fn check_schema_version_for_write(theirs: &str) -> Result<(), StoreError> {
371 let (their_major, their_minor) = parse_schema_version(theirs)?;
372 let (our_major, our_minor) = parse_schema_version(SCHEMA_VERSION)?;
373 if their_major > our_major || (their_major == our_major && their_minor > our_minor) {
374 return Err(StoreError::SchemaTooNew {
375 theirs: theirs.to_string(),
376 ours: SCHEMA_VERSION.to_string(),
377 });
378 }
379 Ok(())
380}
381
382fn parse_schema_version(s: &str) -> Result<(u32, u32), StoreError> {
383 let (maj, min) = s.split_once('.').ok_or(StoreError::MissingField {
384 field: "schema_version",
385 })?;
386 let maj: u32 = maj.parse().map_err(|_| StoreError::MissingField {
387 field: "schema_version",
388 })?;
389 let min: u32 = min.parse().map_err(|_| StoreError::MissingField {
390 field: "schema_version",
391 })?;
392 Ok((maj, min))
393}
394
395fn merge_metadata(existing: Metadata, incoming: Metadata) -> Metadata {
404 let mut out = incoming.clone();
405
406 if let (Ok((em, en)), Ok((im, in_))) = (
409 parse_schema_version(&existing.schema_version),
410 parse_schema_version(&incoming.schema_version),
411 ) {
412 if (em, en) > (im, in_) {
413 out.schema_version = existing.schema_version.clone();
414 }
415 }
416
417 if !existing.title.is_empty() && existing.title != incoming.title {
420 warn!(
421 field = "title",
422 existing = existing.title.as_str(),
423 "preserving reserved field set by another tool (docs/STORE.md §6)"
424 );
425 out.title = existing.title;
426 }
427 if !existing.authors.is_empty() && existing.authors != incoming.authors {
428 warn!(
429 field = "authors",
430 "preserving reserved field set by another tool (docs/STORE.md §6)"
431 );
432 out.authors = existing.authors;
433 }
434
435 macro_rules! merge_opt {
437 ($field:ident) => {
438 if existing.$field.is_some() && existing.$field != incoming.$field {
439 warn!(
440 field = stringify!($field),
441 "preserving reserved field set by another tool (docs/STORE.md §6)"
442 );
443 out.$field = existing.$field;
444 }
445 };
446 }
447 merge_opt!(year);
448 merge_opt!(doi);
449 merge_opt!(arxiv_id);
450 merge_opt!(abstract_);
451 merge_opt!(venue);
452 merge_opt!(publisher);
453 merge_opt!(issn);
454 merge_opt!(isbn);
455 merge_opt!(type_);
456 merge_opt!(url);
457 merge_opt!(pdf_path);
458
459 if !existing.keywords.is_empty() && existing.keywords != incoming.keywords {
461 warn!(
462 field = "keywords",
463 "preserving reserved field set by another tool (docs/STORE.md §6)"
464 );
465 out.keywords = existing.keywords;
466 }
467
468 if out.doiget.is_none() && existing.doiget.is_some() {
472 out.doiget = existing.doiget;
473 }
474
475 let mut merged_other = existing.other;
484 for (k, v) in out.other.iter() {
485 merged_other.entry(k.clone()).or_insert_with(|| v.clone());
486 }
487 out.other = merged_other;
488
489 out
490}
491
492fn normalize_toml(m: &Metadata) -> Result<String, StoreError> {
499 let value = toml::Value::try_from(m)?;
502 let mut out = String::new();
503 write_normalized_toml(&value, &mut out)?;
504 if !out.ends_with('\n') {
505 out.push('\n');
506 }
507 Ok(out)
508}
509
510fn write_normalized_toml(value: &toml::Value, out: &mut String) -> Result<(), StoreError> {
515 let table = match value {
516 toml::Value::Table(t) => t,
517 _ => {
518 return Err(StoreError::Serialize(
519 <toml::ser::Error as serde::ser::Error>::custom(
520 "Metadata did not serialize to a TOML table",
521 ),
522 ));
523 }
524 };
525
526 let mut top_keys: Vec<&String> = Vec::new();
529 let mut sub_table_keys: Vec<&String> = Vec::new();
530 for (k, v) in table.iter() {
531 if matches!(v, toml::Value::Table(_)) {
532 sub_table_keys.push(k);
533 } else {
534 top_keys.push(k);
535 }
536 }
537 top_keys.sort();
538 sub_table_keys.sort();
539
540 if let Some(v) = table.get("schema_version") {
542 write_kv("schema_version", v, out)?;
543 }
544 for k in top_keys {
545 if k == "schema_version" {
546 continue;
547 }
548 if let Some(v) = table.get(k) {
549 write_kv(k, v, out)?;
550 }
551 }
552 for k in sub_table_keys {
553 if let Some(toml::Value::Table(sub)) = table.get(k) {
554 out.push('\n');
555 out.push('[');
556 out.push_str(k);
557 out.push_str("]\n");
558 let sorted: std::collections::BTreeMap<&String, &toml::Value> = sub.iter().collect();
560 for (sk, sv) in sorted {
561 write_kv(sk, sv, out)?;
562 }
563 }
564 }
565 Ok(())
566}
567
568fn write_kv(key: &str, value: &toml::Value, out: &mut String) -> Result<(), StoreError> {
572 out.push_str(key);
573 out.push_str(" = ");
574 let rendered = toml_value_inline(value)?;
575 out.push_str(&rendered);
576 out.push('\n');
577 Ok(())
578}
579
580fn toml_value_inline(value: &toml::Value) -> Result<String, StoreError> {
583 let s = match value {
584 toml::Value::Table(_) => {
585 return Err(StoreError::Serialize(
586 <toml::ser::Error as serde::ser::Error>::custom(
587 "nested tables not supported by inline writer",
588 ),
589 ));
590 }
591 v => {
596 let mut wrapper = toml::map::Map::new();
597 wrapper.insert("__v".to_string(), v.clone());
598 let rendered = toml::to_string(&toml::Value::Table(wrapper))?;
599 let body = rendered
602 .strip_prefix("__v = ")
603 .ok_or_else(|| {
604 StoreError::Serialize(<toml::ser::Error as serde::ser::Error>::custom(
605 "unexpected toml singleton format",
606 ))
607 })?
608 .trim_end_matches('\n')
609 .to_string();
610 body
611 }
612 };
613 Ok(s)
614}
615
616fn atomic_write(dst: &Utf8Path, bytes: &[u8]) -> std::io::Result<()> {
625 let file_name = dst.file_name().ok_or_else(|| {
626 std::io::Error::new(
627 std::io::ErrorKind::InvalidInput,
628 "destination path has no file name",
629 )
630 })?;
631 let mut tmp_path = dst.to_path_buf();
632 tmp_path.set_file_name(format!("{}.tmp", file_name));
633
634 {
635 let mut f = OpenOptions::new()
636 .create(true)
637 .write(true)
638 .truncate(true)
639 .open(tmp_path.as_std_path())?;
640 f.write_all(bytes)?;
641 f.sync_all()?;
642 }
643 std::fs::rename(tmp_path.as_std_path(), dst.as_std_path())?;
644
645 #[cfg(unix)]
649 {
650 if let Some(parent) = dst.parent() {
651 if let Ok(dir) = File::open(parent.as_std_path()) {
652 let _ = dir.sync_all();
653 }
654 }
655 }
656
657 Ok(())
658}
659
660fn create_dir_secure(path: &std::path::Path) -> std::io::Result<()> {
662 if path.exists() {
663 return Ok(());
664 }
665 std::fs::create_dir_all(path)?;
666 #[cfg(unix)]
667 {
668 use std::os::unix::fs::PermissionsExt;
669 let mut perms = std::fs::metadata(path)?.permissions();
670 perms.set_mode(0o700);
671 std::fs::set_permissions(path, perms)?;
672 }
673 Ok(())
674}
675
676fn metadata_files(metadata_dir: &Utf8Path) -> std::io::Result<Vec<Utf8PathBuf>> {
683 let mut out = Vec::new();
684 if !metadata_dir.exists() {
685 return Ok(out);
686 }
687 for entry in std::fs::read_dir(metadata_dir.as_std_path())? {
688 let entry = entry?;
689 if !entry.file_type()?.is_file() {
690 continue;
691 }
692 let path = entry.path();
693 let utf8_path = match Utf8PathBuf::from_path_buf(path) {
694 Ok(p) => p,
695 Err(_) => continue,
696 };
697 let name = match utf8_path.file_name() {
698 Some(n) => n,
699 None => continue,
700 };
701 if name.ends_with(".toml") && !name.ends_with(".tmp") {
702 out.push(utf8_path);
703 }
704 }
705 Ok(out)
706}
707
708fn read_all_entries(metadata_dir: &Utf8Path) -> Result<Vec<EntryInfo>, StoreError> {
709 let mut out = Vec::new();
710 for path in metadata_files(metadata_dir)? {
711 let raw = std::fs::read_to_string(path.as_std_path())?;
712 let Ok(md) = toml::from_str::<Metadata>(&raw) else {
713 continue;
715 };
716 let safekey = safekey_from_metadata_filename(&path);
717 out.push(EntryInfo {
718 safekey,
719 title: md.title,
720 year: md.year,
721 fetched_at: md.doiget.map(|d| d.fetched_at),
722 });
723 }
724 Ok(out)
725}
726
727#[allow(dead_code)]
731fn _doiget_extension_is_visible(d: DoigetExtension) -> DoigetExtension {
732 d
733}
734
735#[cfg(test)]
742#[allow(clippy::expect_used, clippy::unwrap_used, clippy::panic)]
743mod tests {
744 use super::*;
745 use std::collections::BTreeMap;
746 use std::sync::Arc;
747 use std::thread;
748
749 use chrono::TimeZone;
750 use tempfile::TempDir;
751
752 use crate::{Doi, Safekey, SCHEMA_VERSION};
753
754 fn tmp_dir_utf8(dir: &TempDir) -> Utf8PathBuf {
755 Utf8PathBuf::from_path_buf(dir.path().to_path_buf()).expect("temp dir path must be UTF-8")
756 }
757
758 fn sample_safekey() -> Safekey {
759 Safekey("doi_10.1234_example".to_string())
762 }
763
764 fn sample_metadata() -> Metadata {
765 Metadata {
766 schema_version: SCHEMA_VERSION.to_string(),
767 title: "Sample Paper Title".to_string(),
768 authors: vec!["Alice Researcher".to_string(), "Bob Coauthor".to_string()],
769 year: Some(2026),
770 doi: Some(Doi("10.1234/example".to_string())),
771 arxiv_id: None,
772 abstract_: Some("A short abstract.".to_string()),
773 venue: Some("Phys. Rev. X".to_string()),
774 publisher: Some("American Physical Society".to_string()),
775 issn: Some("2160-3308".to_string()),
776 isbn: None,
777 type_: Some("journal-article".to_string()),
778 keywords: vec!["physics".to_string(), "tdd".to_string()],
779 url: Some("https://example.test/paper".to_string()),
780 pdf_path: Some("doi_10.1234_example.pdf".to_string()),
781 doiget: Some(DoigetExtension {
782 fetched_at: chrono::Utc.with_ymd_and_hms(2026, 5, 6, 12, 0, 0).unwrap(),
783 source: "unpaywall".to_string(),
784 license: "CC-BY-4.0".to_string(),
785 size_bytes: 1234567,
786 mcp_call_id: Some("01JCKZ7Q0000000000000000AB".to_string()),
787 }),
788 other: BTreeMap::new(),
789 }
790 }
791
792 fn fresh_store(dir: &TempDir) -> FsStore {
793 let root = tmp_dir_utf8(dir).join("papers");
794 FsStore::new(root).expect("FsStore::new")
795 }
796
797 #[test]
798 fn roundtrip_reserved_fields() {
799 let dir = TempDir::new().expect("tmp");
800 let store = fresh_store(&dir);
801 let key = sample_safekey();
802 let m = sample_metadata();
803 store.write(&key, &m, None).expect("write");
804
805 let read = store.read(&key).expect("read").expect("Some");
806 assert_eq!(read.schema_version, m.schema_version);
807 assert_eq!(read.title, m.title);
808 assert_eq!(read.authors, m.authors);
809 assert_eq!(read.year, m.year);
810 assert_eq!(
811 read.doi.as_ref().map(|d| d.as_str()),
812 Some("10.1234/example")
813 );
814 assert_eq!(read.abstract_, m.abstract_);
815 assert_eq!(read.venue, m.venue);
816 assert_eq!(read.publisher, m.publisher);
817 assert_eq!(read.issn, m.issn);
818 assert_eq!(read.type_, m.type_);
819 assert_eq!(read.keywords, m.keywords);
820 assert_eq!(read.url, m.url);
821 assert_eq!(read.pdf_path, m.pdf_path);
822 }
823
824 #[test]
825 fn roundtrip_doiget_extension() {
826 let dir = TempDir::new().expect("tmp");
827 let store = fresh_store(&dir);
828 let key = sample_safekey();
829 let m = sample_metadata();
830 store.write(&key, &m, None).expect("write");
831
832 let read = store.read(&key).expect("read").expect("Some");
833 let d = read.doiget.expect("doiget table present");
834 let want = m.doiget.expect("input doiget");
835 assert_eq!(d.fetched_at, want.fetched_at);
836 assert_eq!(d.source, want.source);
837 assert_eq!(d.license, want.license);
838 assert_eq!(d.size_bytes, want.size_bytes);
839 assert_eq!(d.mcp_call_id, want.mcp_call_id);
840 }
841
842 #[test]
843 fn read_returns_none_for_missing_safekey() {
844 let dir = TempDir::new().expect("tmp");
845 let store = fresh_store(&dir);
846 let key = Safekey("nonexistent".to_string());
847 let res = store.read(&key).expect("read ok");
848 assert!(res.is_none(), "expected Ok(None), got {:?}", res);
849 }
850
851 #[test]
852 fn schema_too_new_blocks_writes_but_allows_reads() {
853 let dir = TempDir::new().expect("tmp");
854 let store = fresh_store(&dir);
855 let key = sample_safekey();
856
857 let meta_path = store.metadata_path(&key).expect("path");
859 std::fs::create_dir_all(meta_path.parent().expect("parent").as_std_path()).expect("mkdir");
860 let body = "schema_version = \"2.0\"\ntitle = \"Future\"\nauthors = []\n";
861 std::fs::write(meta_path.as_std_path(), body).expect("write");
862
863 let read = store.read(&key).expect("read ok");
865 assert!(read.is_some(), "future-major file must be readable");
866
867 let m = sample_metadata();
869 let err = store.write(&key, &m, None).expect_err("write must fail");
870 match err {
871 StoreError::SchemaTooNew { theirs, ours } => {
872 assert_eq!(theirs, "2.0");
873 assert_eq!(ours, SCHEMA_VERSION);
874 }
875 other => panic!("expected SchemaTooNew, got {:?}", other),
876 }
877 }
878
879 #[test]
880 fn concurrent_writers_serialize_via_flock() {
881 let dir = TempDir::new().expect("tmp");
887 let store = Arc::new(fresh_store(&dir));
888 let key = sample_safekey();
889
890 store.write(&key, &sample_metadata(), None).expect("seed");
892
893 let mut handles = Vec::new();
894 for source in ["unpaywall", "europepmc"] {
895 let store = Arc::clone(&store);
896 let key = key.clone();
897 handles.push(thread::spawn(move || {
898 let mut m = sample_metadata();
899 if let Some(d) = m.doiget.as_mut() {
900 d.source = source.to_string();
901 }
902 store.write(&key, &m, None).expect("write");
903 }));
904 }
905 for h in handles {
906 h.join().expect("join");
907 }
908
909 let read = store.read(&key).expect("read").expect("Some");
912 let source = read.doiget.expect("doiget").source;
913 assert!(
914 source == "unpaywall" || source == "europepmc",
915 "winning source must be one of the contenders, got {}",
916 source
917 );
918 }
919
920 #[test]
921 fn list_recent_orders_by_fetched_at_desc() {
922 let dir = TempDir::new().expect("tmp");
923 let store = fresh_store(&dir);
924
925 for (idx, year_seed) in [(1, 2024_u32), (2, 2025), (3, 2026)] {
926 let key = Safekey(format!("doi_10.1234_entry{}", idx));
927 let mut m = sample_metadata();
928 m.title = format!("Entry {}", idx);
929 if let Some(d) = m.doiget.as_mut() {
930 d.fetched_at = chrono::Utc
931 .with_ymd_and_hms(year_seed as i32, 5, 6, 12, 0, 0)
932 .unwrap();
933 }
934 store.write(&key, &m, None).expect("write");
935 }
936
937 let recent = store.list_recent(10).expect("list");
938 assert_eq!(recent.len(), 3, "expected 3 entries, got {}", recent.len());
939 assert_eq!(recent[0].title, "Entry 3");
941 assert_eq!(recent[1].title, "Entry 2");
942 assert_eq!(recent[2].title, "Entry 1");
943 for w in recent.windows(2) {
944 assert!(
945 w[0].fetched_at >= w[1].fetched_at,
946 "recent[].fetched_at must be non-increasing"
947 );
948 }
949 }
950
951 #[test]
952 fn search_finds_by_title_substring() {
953 let dir = TempDir::new().expect("tmp");
954 let store = fresh_store(&dir);
955
956 let key = Safekey("doi_10.1234_quantum".to_string());
957 let mut m = sample_metadata();
958 m.title = "Quantum Stuff and Other Topics".to_string();
959 store.write(&key, &m, None).expect("write");
960
961 let hits = store.search("quantum", 10).expect("search");
962 assert_eq!(hits.len(), 1, "expected 1 hit, got {}", hits.len());
963 assert_eq!(hits[0].title, "Quantum Stuff and Other Topics");
964
965 let empty = store.search("relativity", 10).expect("search");
966 assert!(empty.is_empty(), "expected no hits, got {:?}", empty);
967 }
968
969 #[test]
970 fn path_traversal_in_safekey_blocked() {
971 let dir = TempDir::new().expect("tmp");
972 let store = fresh_store(&dir);
973 let bad = Safekey("../etc/passwd".to_string());
974
975 match store.read(&bad) {
976 Err(StoreError::PathTraversal { .. }) => {}
977 other => panic!("expected PathTraversal, got {:?}", other),
978 }
979 let m = sample_metadata();
980 match store.write(&bad, &m, None) {
981 Err(StoreError::PathTraversal { .. }) => {}
982 other => panic!("expected PathTraversal, got {:?}", other),
983 }
984 }
985
986 #[test]
987 fn write_then_read_normalized_toml_alphabetizes_keys() {
988 let dir = TempDir::new().expect("tmp");
992 let store = fresh_store(&dir);
993 let key = sample_safekey();
994 store.write(&key, &sample_metadata(), None).expect("write");
995
996 let path = store.metadata_path(&key).expect("path");
997 let raw = std::fs::read_to_string(path.as_std_path()).expect("read");
998 let first_line = raw.lines().next().expect("at least one line");
1000 assert!(
1001 first_line.starts_with("schema_version = "),
1002 "first line must be schema_version, got: {:?}",
1003 first_line
1004 );
1005 assert!(raw.ends_with('\n'), "file must end with a newline");
1007 assert!(!raw.contains('\r'), "no CR allowed; LF only");
1009 assert!(raw.contains("\n[doiget]\n"), "doiget sub-table missing");
1011 let doiget_idx = raw.find("[doiget]").expect("doiget block");
1013 let after = &raw[doiget_idx..];
1014 let fetched_at_idx = after
1015 .find("fetched_at = ")
1016 .expect("fetched_at key in doiget");
1017 let license_idx = after.find("license = ").expect("license key in doiget");
1018 assert!(
1019 fetched_at_idx < license_idx,
1020 "fetched_at must precede license within [doiget]"
1021 );
1022 }
1023
1024 #[test]
1025 fn write_preserves_unknown_table_from_existing_file() {
1026 let dir = TempDir::new().expect("tmp");
1029 let store = fresh_store(&dir);
1030 let key = sample_safekey();
1031 let meta_path = store.metadata_path(&key).expect("path");
1032
1033 let body = format!(
1034 "schema_version = \"{}\"\ntitle = \"Existing\"\nauthors = [\"Carol\"]\n\n\
1035 [bibliofetch]\nharvest = \"2026-01-01\"\n",
1036 SCHEMA_VERSION
1037 );
1038 std::fs::write(meta_path.as_std_path(), body).expect("write");
1039
1040 let mut m = sample_metadata();
1041 m.title = "Doiget Wins?".to_string(); store.write(&key, &m, None).expect("write");
1043
1044 let read_raw = std::fs::read_to_string(meta_path.as_std_path()).expect("re-read");
1045 assert!(
1046 read_raw.contains("bibliofetch"),
1047 "[bibliofetch] table was dropped: {}",
1048 read_raw
1049 );
1050 assert!(
1051 read_raw.contains("title = \"Existing\""),
1052 "doiget overwrote a reserved field set by another tool: {}",
1053 read_raw
1054 );
1055 }
1056
1057 #[test]
1066 fn bibliofetch_typed_table_and_unknown_scalar_survive_roundtrip() {
1067 let dir = TempDir::new().expect("tmp");
1068 let store = fresh_store(&dir);
1069 let key = sample_safekey();
1070 let meta_path = store.metadata_path(&key).expect("path");
1071
1072 let body = format!(
1075 "schema_version = \"{}\"\n\
1076 title = \"Existing\"\n\
1077 authors = [\"Carol\"]\n\
1078 zotero_key = \"ABC123\"\n\n\
1079 [bibliofetch]\n\
1080 harvest = \"2026-02-03\"\n\
1081 count = 42\n\
1082 tags = [\"x\", \"y\"]\n",
1083 SCHEMA_VERSION
1084 );
1085 std::fs::write(meta_path.as_std_path(), body).expect("seed write");
1086
1087 let m0 = store.read(&key).expect("read ok").expect("entry present");
1090 assert!(
1091 m0.other.contains_key("bibliofetch"),
1092 "[bibliofetch] not captured into `other` on read: {:?}",
1093 m0.other
1094 );
1095 assert_eq!(
1096 m0.other.get("zotero_key").and_then(|v| v.as_str()),
1097 Some("ABC123"),
1098 "unknown top-level scalar not captured: {:?}",
1099 m0.other
1100 );
1101
1102 let mut m_doiget = sample_metadata();
1104 m_doiget.title = "Doiget Would Overwrite".to_string();
1105 store.write(&key, &m_doiget, None).expect("doiget write");
1106
1107 let m1 = store
1110 .read(&key)
1111 .expect("re-read ok")
1112 .expect("entry present");
1113 assert_eq!(
1114 m1.title, "Existing",
1115 "STORE.md §6: doiget overwrote a reserved field"
1116 );
1117 let bf = m1
1118 .other
1119 .get("bibliofetch")
1120 .and_then(|v| v.as_table())
1121 .expect("[bibliofetch] table survived read->write->read");
1122 assert_eq!(
1123 bf.get("harvest").and_then(|v| v.as_str()),
1124 Some("2026-02-03")
1125 );
1126 assert_eq!(bf.get("count").and_then(|v| v.as_integer()), Some(42));
1127 let tags = bf
1128 .get("tags")
1129 .and_then(|v| v.as_array())
1130 .expect("tags array survived");
1131 let tags: Vec<&str> = tags.iter().filter_map(|v| v.as_str()).collect();
1132 assert_eq!(tags, vec!["x", "y"]);
1133 assert_eq!(
1134 m1.other.get("zotero_key").and_then(|v| v.as_str()),
1135 Some("ABC123"),
1136 "unknown top-level scalar lost across the cycle"
1137 );
1138
1139 let raw = std::fs::read_to_string(meta_path.as_std_path()).expect("raw re-read");
1141 assert!(raw.ends_with('\n'), "missing trailing newline: {raw:?}");
1142 }
1143
1144 #[test]
1150 fn other_key_collision_prefers_existing() {
1151 let dir = TempDir::new().expect("tmp");
1152 let store = fresh_store(&dir);
1153 let key = sample_safekey();
1154 let meta_path = store.metadata_path(&key).expect("path");
1155
1156 let body = format!(
1157 "schema_version = \"{}\"\ntitle = \"Existing\"\nauthors = [\"Carol\"]\n\
1158 zotero_key = \"FROM_BIBLIOFETCH\"\n",
1159 SCHEMA_VERSION
1160 );
1161 std::fs::write(meta_path.as_std_path(), body).expect("seed");
1162
1163 let mut m = sample_metadata();
1164 m.other.insert(
1165 "zotero_key".to_string(),
1166 toml::Value::String("FROM_DOIGET".to_string()),
1167 );
1168 store.write(&key, &m, None).expect("write");
1169
1170 let got = store.read(&key).expect("read").expect("present");
1171 assert_eq!(
1172 got.other.get("zotero_key").and_then(|v| v.as_str()),
1173 Some("FROM_BIBLIOFETCH"),
1174 "STORE.md §6: existing `other` value must win on collision"
1175 );
1176 }
1177
1178 #[test]
1179 fn pdf_is_copied_atomically_on_write() {
1180 let dir = TempDir::new().expect("tmp");
1181 let store = fresh_store(&dir);
1182 let key = sample_safekey();
1183
1184 let src_dir = TempDir::new().expect("tmp src");
1186 let src_path = Utf8PathBuf::from_path_buf(src_dir.path().to_path_buf())
1187 .expect("utf8 src dir")
1188 .join("input.pdf");
1189 std::fs::write(src_path.as_std_path(), b"%PDF-1.7 synthetic").expect("write src");
1190
1191 store
1192 .write(&key, &sample_metadata(), Some(&src_path))
1193 .expect("write");
1194
1195 let dst = store.pdf_path(&key).expect("pdf path");
1196 let bytes = std::fs::read(dst.as_std_path()).expect("read dst");
1197 assert_eq!(bytes, b"%PDF-1.7 synthetic");
1198 }
1199}