Skip to main content

doiget_core/store/
fs_store.rs

1//! Filesystem-backed [`Store`] implementation.
2//!
3//! Binding spec: `docs/STORE.md` §§1-7. Re-stated as the implementation
4//! contract:
5//!
6//! - **§1 Layout:** `<root>/<safekey>.pdf` and `<root>/.metadata/<safekey>.toml`,
7//!   `.toml.lock` siblings for advisory locking.
8//! - **§3 Schema version policy:** parse `<MAJOR>.<MINOR>`. Future `MAJOR`
9//!   yields [`StoreError::SchemaTooNew`] on writes, warn-and-tolerate on
10//!   reads. Future `MINOR` (same major) yields a `tracing::warn!`-then-OK on
11//!   reads.
12//! - **§4 Lock protocol:** `flock` (`fs2::FileExt`) on the SEPARATE
13//!   `.toml.lock` file with a 5 s timeout polled via `try_lock_*`.
14//! - **§5 Atomic write:** write `<safekey>.toml.tmp` → `sync_all` → `rename`
15//!   → fsync parent dir (POSIX). On Windows `std::fs::rename` invokes
16//!   `MoveFileEx` with `MOVEFILE_REPLACE_EXISTING | MOVEFILE_WRITE_THROUGH`,
17//!   so no extra parent-fsync syscall is required. Each file is atomic
18//!   individually; there is no cross-file transaction. The PDF is
19//!   therefore written BEFORE the metadata that references it (issue
20//!   #122), so a crash between the two renames can only leave an orphan
21//!   PDF or the prior consistent entry — never metadata pointing at a
22//!   missing PDF.
23//! - **§6 Coexistence with BiblioFetch.jl:** when re-writing an existing
24//!   entry, reserved top-level fields previously present are NOT overwritten
25//!   if the new value differs. Only the `[doiget]` table and `other` are
26//!   updated freely.
27//! - **§7 Normalization:** alphabetical key order, `\n` line endings,
28//!   trailing newline. Implemented through `BTreeMap`-backed re-serialization
29//!   of the on-wire `toml::Value`.
30
31use std::fs::{File, OpenOptions};
32use std::io::{Read, Write};
33use std::time::{Duration, Instant};
34
35use camino::{Utf8Path, Utf8PathBuf};
36use fs2::FileExt;
37use tracing::warn;
38
39use super::metadata::{DoigetExtension, Metadata};
40use super::{EntryInfo, Store, StoreError};
41use crate::{Safekey, SCHEMA_VERSION};
42
43/// Subdirectory under `<root>` that holds metadata TOML files and their
44/// advisory lock siblings, per `docs/STORE.md` §1.
45const METADATA_DIR: &str = ".metadata";
46
47/// Lock-acquisition timeout per `docs/STORE.md` §4 (5 seconds).
48const LOCK_TIMEOUT: Duration = Duration::from_secs(5);
49
50/// How long to back off between `try_lock_*` polls. Small relative to
51/// [`LOCK_TIMEOUT`] so a contended writer in the common case sees the lock
52/// released within ~50 ms.
53const LOCK_POLL_INTERVAL: Duration = Duration::from_millis(50);
54
55/// Filesystem-shaped [`Store`] implementation rooted at `<root>`.
56#[derive(Debug, Clone)]
57pub struct FsStore {
58    root: Utf8PathBuf,
59    metadata_dir: Utf8PathBuf,
60}
61
62impl FsStore {
63    /// Open or create a store at `root`.
64    ///
65    /// Creates `<root>/` and `<root>/.metadata/` if missing. On POSIX, both
66    /// directories are created with mode `0700` (owner-only). On Windows,
67    /// directory ACLs are inherited (no-op).
68    ///
69    /// # Errors
70    ///
71    /// Returns [`StoreError::Io`] if `root` exists but is not a directory,
72    /// or if directory creation fails.
73    pub fn new(root: Utf8PathBuf) -> Result<Self, StoreError> {
74        // Reject non-directory existing paths up front; `create_dir_all` on
75        // a regular-file path returns a confusing platform-dependent error.
76        if root.exists() && !root.is_dir() {
77            return Err(StoreError::Io(std::io::Error::new(
78                std::io::ErrorKind::AlreadyExists,
79                format!("store root {} exists but is not a directory", root),
80            )));
81        }
82        let metadata_dir = root.join(METADATA_DIR);
83
84        create_dir_secure(root.as_std_path())?;
85        create_dir_secure(metadata_dir.as_std_path())?;
86
87        Ok(Self { root, metadata_dir })
88    }
89
90    /// Returns the store root.
91    pub fn root(&self) -> &Utf8Path {
92        &self.root
93    }
94
95    /// Resolve the metadata-TOML path for `key`, with a defense-in-depth
96    /// path-traversal check.
97    ///
98    /// `Safekey` construction already restricts the inner string to
99    /// `[A-Za-z0-9._-]` per `docs/SAFEKEY.md`. The check below catches
100    /// hand-crafted `Safekey` values produced by in-crate `pub(crate)`
101    /// shortcuts (e.g. tests) and any future regression in the safekey
102    /// charset.
103    fn metadata_path(&self, key: &Safekey) -> Result<Utf8PathBuf, StoreError> {
104        guard_safekey(key.as_str())?;
105        let p = self.metadata_dir.join(format!("{}.toml", key.as_str()));
106        // Final paranoia: parent must equal `metadata_dir`. After the charset
107        // check above this should always hold; if it ever does not, surface
108        // it as `PathTraversal` rather than panicking.
109        if p.parent() != Some(self.metadata_dir.as_path()) {
110            return Err(StoreError::PathTraversal { path: p });
111        }
112        Ok(p)
113    }
114
115    fn lock_path(&self, key: &Safekey) -> Result<Utf8PathBuf, StoreError> {
116        guard_safekey(key.as_str())?;
117        Ok(self
118            .metadata_dir
119            .join(format!("{}.toml.lock", key.as_str())))
120    }
121
122    fn pdf_path(&self, key: &Safekey) -> Result<Utf8PathBuf, StoreError> {
123        guard_safekey(key.as_str())?;
124        Ok(self.root.join(format!("{}.pdf", key.as_str())))
125    }
126}
127
128impl Store for FsStore {
129    fn read(&self, key: &Safekey) -> Result<Option<Metadata>, StoreError> {
130        let meta_path = self.metadata_path(key)?;
131        if !meta_path.exists() {
132            return Ok(None);
133        }
134
135        // Per `docs/STORE.md` §4 we MAY take a shared lock for reads. Use the
136        // sibling `.lock` file. Lock acquisition errors are surfaced as
137        // LockTimeout (5 s budget); locking is best-effort on platforms that
138        // implement it as a no-op.
139        let lock_path = self.lock_path(key)?;
140        let lock_file = open_or_create_lock_file(&lock_path)?;
141        acquire_lock(&lock_file, &lock_path, LockMode::Shared)?;
142
143        let raw = std::fs::read_to_string(meta_path.as_std_path())?;
144        // Drop the lock by closing the file handle; explicit unlock ensures
145        // determinism on platforms where Drop semantics differ. Disambiguate
146        // from `std::fs::File::unlock` (stabilized in 1.89) to keep MSRV
147        // at 1.86 — the `<File as FileExt>::…` form forces the `fs2` impl.
148        let _ = <File as FileExt>::unlock(&lock_file);
149
150        let metadata: Metadata = toml::from_str(&raw)?;
151        check_schema_version(&metadata.schema_version)?;
152        Ok(Some(metadata))
153    }
154
155    fn write(&self, key: &Safekey, m: &Metadata, pdf: Option<&Utf8Path>) -> Result<(), StoreError> {
156        let meta_path = self.metadata_path(key)?;
157        let lock_path = self.lock_path(key)?;
158        let lock_file = open_or_create_lock_file(&lock_path)?;
159        acquire_lock(&lock_file, &lock_path, LockMode::Exclusive)?;
160
161        // Re-read existing TOML (if any) so we can apply the §6 merge rule:
162        // never overwrite a reserved top-level field previously written by
163        // another tool. We DO let the new value win for the [doiget] table
164        // (doiget owns it per §6) and for `other` (preserve unknown tables
165        // on update; new contents replace prior contents).
166        let merged = if meta_path.exists() {
167            let raw = std::fs::read_to_string(meta_path.as_std_path())?;
168            let existing: Metadata = toml::from_str(&raw)?;
169            check_schema_version_for_write(&existing.schema_version)?;
170            merge_metadata(existing, m.clone())
171        } else {
172            m.clone()
173        };
174
175        // Serialize → normalize per §7. The normalizer enforces alphabetical
176        // key order within tables and a trailing `\n`.
177        let normalized = normalize_toml(&merged)?;
178
179        // Issue #122 — crash-consistent ordering: the PDF is written
180        // BEFORE the metadata that references it. A crash between the
181        // two atomic renames then leaves either the previous
182        // consistent entry or no metadata at all — NEVER metadata
183        // whose `pdf_path` points at a `.pdf` that does not exist
184        // yet. (The reverse order could publish a dangling pointer.)
185        // Worst case under the new order is an orphan `<safekey>.pdf`
186        // with stale/absent metadata, which list/search ignore (they
187        // key off metadata) and a re-fetch overwrites — strictly
188        // safer than a torn pointer. There is still no cross-file
189        // transaction; this ordering is the bounded MVP guarantee
190        // (documented in STORE.md §5).
191        if let Some(pdf_src) = pdf {
192            let pdf_dst = self.pdf_path(key)?;
193            let mut bytes = Vec::new();
194            File::open(pdf_src.as_std_path())?.read_to_end(&mut bytes)?;
195            // Same atomic dance as the metadata, byte-by-byte.
196            atomic_write(&pdf_dst, &bytes)?;
197        }
198
199        // Atomic write per §5: tmp → fsync → rename → fsync parent.
200        // Done LAST so the metadata only becomes visible once its PDF
201        // (if any) is already durably on disk.
202        atomic_write(&meta_path, normalized.as_bytes())?;
203
204        let _ = <File as FileExt>::unlock(&lock_file);
205        Ok(())
206    }
207
208    fn list_recent(&self, limit: usize) -> Result<Vec<EntryInfo>, StoreError> {
209        let mut entries = read_all_entries(&self.metadata_dir)?;
210        // Most-recent first by [doiget].fetched_at; entries with no
211        // `[doiget]` table sort last (None < Some via Reverse).
212        entries.sort_by_key(|e| std::cmp::Reverse(e.fetched_at));
213        entries.truncate(limit);
214        Ok(entries)
215    }
216
217    /// Phase 1 search is a linear scan over all metadata files. Phase 2 will
218    /// add a tantivy / sqlite-fts index when the corpus grows past the point
219    /// where O(N) per query becomes noticeable in CLI latency.
220    fn search(&self, query: &str, limit: usize) -> Result<Vec<EntryInfo>, StoreError> {
221        let q = query.to_lowercase();
222        let mut hits = Vec::new();
223        for path in metadata_files(&self.metadata_dir)? {
224            let raw = std::fs::read_to_string(path.as_std_path())?;
225            let Ok(md) = toml::from_str::<Metadata>(&raw) else {
226                // Malformed entries are skipped rather than failing the
227                // whole query. A future audit task will surface them.
228                continue;
229            };
230            let haystacks = [
231                md.title.to_lowercase(),
232                md.authors.join(" ").to_lowercase(),
233                md.venue.clone().unwrap_or_default().to_lowercase(),
234                md.publisher.clone().unwrap_or_default().to_lowercase(),
235            ];
236            if haystacks.iter().any(|h| h.contains(&q)) {
237                let safekey = safekey_from_metadata_filename(&path);
238                hits.push(EntryInfo {
239                    safekey,
240                    title: md.title,
241                    year: md.year,
242                    fetched_at: md.doiget.as_ref().map(|d| d.fetched_at),
243                });
244                if hits.len() >= limit {
245                    break;
246                }
247            }
248        }
249        Ok(hits)
250    }
251}
252
253// ---------------------------------------------------------------------------
254// Helpers
255// ---------------------------------------------------------------------------
256
257/// Reject any safekey containing path-traversal indicators. `Safekey`
258/// construction already enforces `[A-Za-z0-9._-]`-only chars per
259/// `docs/SAFEKEY.md`; this is defense-in-depth in case a hand-crafted
260/// `Safekey` (e.g. an in-crate `Safekey("...".into())` shortcut) is passed
261/// in.
262fn guard_safekey(s: &str) -> Result<(), StoreError> {
263    let bad = s.is_empty()
264        || s.contains('/')
265        || s.contains('\\')
266        || s.contains("..")
267        || s.contains('\0')
268        || s.starts_with('.')
269        || !s
270            .chars()
271            .all(|c| c.is_ascii_alphanumeric() || c == '.' || c == '-' || c == '_');
272    if bad {
273        Err(StoreError::PathTraversal {
274            path: Utf8PathBuf::from(s),
275        })
276    } else {
277        Ok(())
278    }
279}
280
281/// Recover the safekey from a `<key>.toml` filename. Used only for surfacing
282/// list/search results; the safekey we emit here originated as a stored
283/// safekey, so it has already passed `guard_safekey` at write time.
284fn safekey_from_metadata_filename(p: &Utf8Path) -> Safekey {
285    Safekey(p.file_stem().unwrap_or("").to_string())
286}
287
288/// Lock mode for [`acquire_lock`].
289#[derive(Debug, Clone, Copy)]
290enum LockMode {
291    /// `flock(LOCK_SH)` — multiple readers OK.
292    Shared,
293    /// `flock(LOCK_EX)` — exclusive writer.
294    Exclusive,
295}
296
297/// Open (or create) the advisory lock file. Lock files are never deleted
298/// during normal operation per `docs/STORE.md` §4.
299fn open_or_create_lock_file(path: &Utf8Path) -> Result<File, StoreError> {
300    let f = OpenOptions::new()
301        .create(true)
302        .read(true)
303        .write(true)
304        .truncate(false)
305        .open(path.as_std_path())?;
306    Ok(f)
307}
308
309/// Acquire `mode` on `lock_file`, polling `try_lock_*` until success or the
310/// 5 s budget expires per `docs/STORE.md` §4.
311fn acquire_lock(lock_file: &File, lock_path: &Utf8Path, mode: LockMode) -> Result<(), StoreError> {
312    let deadline = Instant::now() + LOCK_TIMEOUT;
313    loop {
314        // Disambiguate from `std::fs::File::try_lock_shared` (stabilized in
315        // 1.89), which is an inherent method on `File` and would otherwise
316        // shadow the trait method. The `<File as FileExt>::…` form forces
317        // the `fs2` impl; we want the cross-platform behavior that returns
318        // `std::io::Error` rather than the std `TryLockError` newtype.
319        let attempt = match mode {
320            LockMode::Shared => <File as FileExt>::try_lock_shared(lock_file),
321            LockMode::Exclusive => <File as FileExt>::try_lock_exclusive(lock_file),
322        };
323        match attempt {
324            Ok(()) => return Ok(()),
325            Err(e) => {
326                let contended = e.raw_os_error() == fs2::lock_contended_error().raw_os_error();
327                if !contended {
328                    // Not a "would-block" error — surface it directly.
329                    return Err(StoreError::Io(e));
330                }
331                if Instant::now() >= deadline {
332                    return Err(StoreError::LockTimeout {
333                        path: lock_path.to_owned(),
334                    });
335                }
336                std::thread::sleep(LOCK_POLL_INTERVAL);
337            }
338        }
339    }
340}
341
342/// Verify `schema_version` is acceptable for a read. Per `docs/STORE.md` §3,
343/// reads succeed with a `tracing::warn!` for ANY future schema_version
344/// (minor or major); the read-only mode is enforced at write time
345/// (see [`check_schema_version_for_write`]).
346fn check_schema_version(theirs: &str) -> Result<(), StoreError> {
347    let (their_major, their_minor) = parse_schema_version(theirs)?;
348    let (our_major, our_minor) = parse_schema_version(SCHEMA_VERSION)?;
349    if their_major > our_major {
350        warn!(
351            theirs = theirs,
352            ours = SCHEMA_VERSION,
353            "store entry uses a future-major schema_version; entering read-only mode \
354             for this entry (docs/STORE.md §3)"
355        );
356    } else if their_major == our_major && their_minor > our_minor {
357        warn!(
358            theirs = theirs,
359            ours = SCHEMA_VERSION,
360            "store entry uses a newer minor schema_version; reading in compatibility mode \
361             (docs/STORE.md §3 future-minor tolerance)"
362        );
363    }
364    Ok(())
365}
366
367/// Same as [`check_schema_version`] but used on the EXISTING file before a
368/// write merge: any `schema_version` strictly greater than ours (major or
369/// minor) refuses the write per `docs/STORE.md` §3 read-only-mode rule.
370fn check_schema_version_for_write(theirs: &str) -> Result<(), StoreError> {
371    let (their_major, their_minor) = parse_schema_version(theirs)?;
372    let (our_major, our_minor) = parse_schema_version(SCHEMA_VERSION)?;
373    if their_major > our_major || (their_major == our_major && their_minor > our_minor) {
374        return Err(StoreError::SchemaTooNew {
375            theirs: theirs.to_string(),
376            ours: SCHEMA_VERSION.to_string(),
377        });
378    }
379    Ok(())
380}
381
382fn parse_schema_version(s: &str) -> Result<(u32, u32), StoreError> {
383    let (maj, min) = s.split_once('.').ok_or(StoreError::MissingField {
384        field: "schema_version",
385    })?;
386    let maj: u32 = maj.parse().map_err(|_| StoreError::MissingField {
387        field: "schema_version",
388    })?;
389    let min: u32 = min.parse().map_err(|_| StoreError::MissingField {
390        field: "schema_version",
391    })?;
392    Ok((maj, min))
393}
394
395/// Apply the `docs/STORE.md` §6 merge rule: doiget MUST NOT modify reserved
396/// top-level fields written by another tool. Concretely: if `existing` has a
397/// reserved field set to a value different from `incoming`, KEEP existing.
398/// `[doiget]` is owned by doiget and is overwritten freely. `other` (unknown
399/// tables / fields like `[bibliofetch]`) is preserved through union: fields
400/// in `existing` not present in `incoming` are kept; otherwise `incoming`
401/// wins (callers usually leave `other` empty on a re-fetch, so existing
402/// fields survive intact).
403fn merge_metadata(existing: Metadata, incoming: Metadata) -> Metadata {
404    let mut out = incoming.clone();
405
406    // schema_version: never downgrade. The §6 exception explicitly allows a
407    // coordinated minor revision bump, so we take the max of the two.
408    if let (Ok((em, en)), Ok((im, in_))) = (
409        parse_schema_version(&existing.schema_version),
410        parse_schema_version(&incoming.schema_version),
411    ) {
412        if (em, en) > (im, in_) {
413            out.schema_version = existing.schema_version.clone();
414        }
415    }
416
417    // Reserved fields with non-Option String types: prefer existing if it
418    // differs from incoming (and is non-empty).
419    if !existing.title.is_empty() && existing.title != incoming.title {
420        warn!(
421            field = "title",
422            existing = existing.title.as_str(),
423            "preserving reserved field set by another tool (docs/STORE.md §6)"
424        );
425        out.title = existing.title;
426    }
427    if !existing.authors.is_empty() && existing.authors != incoming.authors {
428        warn!(
429            field = "authors",
430            "preserving reserved field set by another tool (docs/STORE.md §6)"
431        );
432        out.authors = existing.authors;
433    }
434
435    // Optional reserved fields: prefer existing Some over incoming Some-different.
436    macro_rules! merge_opt {
437        ($field:ident) => {
438            if existing.$field.is_some() && existing.$field != incoming.$field {
439                warn!(
440                    field = stringify!($field),
441                    "preserving reserved field set by another tool (docs/STORE.md §6)"
442                );
443                out.$field = existing.$field;
444            }
445        };
446    }
447    merge_opt!(year);
448    merge_opt!(doi);
449    merge_opt!(arxiv_id);
450    merge_opt!(abstract_);
451    merge_opt!(venue);
452    merge_opt!(publisher);
453    merge_opt!(issn);
454    merge_opt!(isbn);
455    merge_opt!(type_);
456    merge_opt!(url);
457    merge_opt!(pdf_path);
458
459    // keywords (Vec<String>): prefer existing if non-empty and different.
460    if !existing.keywords.is_empty() && existing.keywords != incoming.keywords {
461        warn!(
462            field = "keywords",
463            "preserving reserved field set by another tool (docs/STORE.md §6)"
464        );
465        out.keywords = existing.keywords;
466    }
467
468    // [doiget]: doiget owns this table; incoming wins (already in `out`).
469    // If incoming has no [doiget] but existing did, keep the existing one
470    // so a metadata-only re-write doesn't silently drop a fetch record.
471    if out.doiget.is_none() && existing.doiget.is_some() {
472        out.doiget = existing.doiget;
473    }
474
475    // `other` (unknown tables / fields): union, prefer EXISTING on key
476    // collision (issue #123). STORE.md §6 forbids doiget overwriting a
477    // field/table another tool authored; an unknown key already on disk
478    // (e.g. a `[bibliofetch]` sub-key) must win over whatever doiget
479    // happens to carry in `other`. Doiget normally leaves `other`
480    // empty on a re-fetch, so this only changes behaviour in the
481    // (latent) case where both sides populate the same unknown key —
482    // there, "never overwrite" is the correct §6 resolution.
483    let mut merged_other = existing.other;
484    for (k, v) in out.other.iter() {
485        merged_other.entry(k.clone()).or_insert_with(|| v.clone());
486    }
487    out.other = merged_other;
488
489    out
490}
491
492/// Serialize `m` to TOML and apply `docs/STORE.md` §7 normalization:
493/// alphabetical key order within tables, `\n` line endings, trailing
494/// newline.
495///
496/// Implementation: serialize the [`Metadata`] to `toml::Value`, then walk
497/// the value tree and re-emit through `BTreeMap` for stable key order.
498fn normalize_toml(m: &Metadata) -> Result<String, StoreError> {
499    // Serialize to a Value to escape Rust-struct field order; tables are
500    // re-keyed alphabetically below.
501    let value = toml::Value::try_from(m)?;
502    let mut out = String::new();
503    write_normalized_toml(&value, &mut out)?;
504    if !out.ends_with('\n') {
505        out.push('\n');
506    }
507    Ok(out)
508}
509
510/// Walk the top-level table, emit reserved-vs-table keys in normalized
511/// order: `schema_version` first, then remaining scalar/array keys
512/// alphabetically, then sub-tables alphabetically. Within sub-tables, keys
513/// are alphabetical via `BTreeMap`.
514fn write_normalized_toml(value: &toml::Value, out: &mut String) -> Result<(), StoreError> {
515    let table = match value {
516        toml::Value::Table(t) => t,
517        _ => {
518            return Err(StoreError::Serialize(
519                <toml::ser::Error as serde::ser::Error>::custom(
520                    "Metadata did not serialize to a TOML table",
521                ),
522            ));
523        }
524    };
525
526    // Partition into scalar/array keys (top-level) vs sub-tables (rendered
527    // as `[name]` blocks). `schema_version` is forced first per §7.
528    let mut top_keys: Vec<&String> = Vec::new();
529    let mut sub_table_keys: Vec<&String> = Vec::new();
530    for (k, v) in table.iter() {
531        if matches!(v, toml::Value::Table(_)) {
532            sub_table_keys.push(k);
533        } else {
534            top_keys.push(k);
535        }
536    }
537    top_keys.sort();
538    sub_table_keys.sort();
539
540    // schema_version always first.
541    if let Some(v) = table.get("schema_version") {
542        write_kv("schema_version", v, out)?;
543    }
544    for k in top_keys {
545        if k == "schema_version" {
546            continue;
547        }
548        if let Some(v) = table.get(k) {
549            write_kv(k, v, out)?;
550        }
551    }
552    for k in sub_table_keys {
553        if let Some(toml::Value::Table(sub)) = table.get(k) {
554            out.push('\n');
555            out.push('[');
556            out.push_str(k);
557            out.push_str("]\n");
558            // Within a sub-table, alphabetical order via BTreeMap.
559            let sorted: std::collections::BTreeMap<&String, &toml::Value> = sub.iter().collect();
560            for (sk, sv) in sorted {
561                write_kv(sk, sv, out)?;
562            }
563        }
564    }
565    Ok(())
566}
567
568/// Render a single `key = value` line. Uses `toml::to_string` on the value
569/// half so quoting / escaping matches the spec ("ASCII-safe single-line
570/// strings use `\"...\"`", §7).
571fn write_kv(key: &str, value: &toml::Value, out: &mut String) -> Result<(), StoreError> {
572    out.push_str(key);
573    out.push_str(" = ");
574    let rendered = toml_value_inline(value)?;
575    out.push_str(&rendered);
576    out.push('\n');
577    Ok(())
578}
579
580/// Render a TOML value as a single-line inline expression. Tables are
581/// rejected (the caller emits them as `[name]` blocks instead).
582fn toml_value_inline(value: &toml::Value) -> Result<String, StoreError> {
583    let s = match value {
584        toml::Value::Table(_) => {
585            return Err(StoreError::Serialize(
586                <toml::ser::Error as serde::ser::Error>::custom(
587                    "nested tables not supported by inline writer",
588                ),
589            ));
590        }
591        // Defer to toml's own serializer for a single value via a one-key
592        // shim. `toml::to_string` on a value alone is not supported in
593        // toml 1.x, but wrapping it in a singleton table and slicing off
594        // the key is reliable.
595        v => {
596            let mut wrapper = toml::map::Map::new();
597            wrapper.insert("__v".to_string(), v.clone());
598            let rendered = toml::to_string(&toml::Value::Table(wrapper))?;
599            // Output looks like `__v = <value>\n`. Strip the prefix and
600            // trailing newline.
601            let body = rendered
602                .strip_prefix("__v = ")
603                .ok_or_else(|| {
604                    StoreError::Serialize(<toml::ser::Error as serde::ser::Error>::custom(
605                        "unexpected toml singleton format",
606                    ))
607                })?
608                .trim_end_matches('\n')
609                .to_string();
610            body
611        }
612    };
613    Ok(s)
614}
615
616/// Atomic write per `docs/STORE.md` §5: write `tmp` → `sync_all` → `rename`
617/// → fsync parent (POSIX). On Windows `std::fs::rename` already issues
618/// `MoveFileEx(.., MOVEFILE_REPLACE_EXISTING | MOVEFILE_WRITE_THROUGH)`,
619/// so the parent-fsync step is a no-op.
620///
621/// A crash mid-write leaves either the old file intact (if before the
622/// rename) or the new file fully written (if after). It never leaves a
623/// partially-visible new file.
624fn atomic_write(dst: &Utf8Path, bytes: &[u8]) -> std::io::Result<()> {
625    let file_name = dst.file_name().ok_or_else(|| {
626        std::io::Error::new(
627            std::io::ErrorKind::InvalidInput,
628            "destination path has no file name",
629        )
630    })?;
631    let mut tmp_path = dst.to_path_buf();
632    tmp_path.set_file_name(format!("{}.tmp", file_name));
633
634    {
635        let mut f = OpenOptions::new()
636            .create(true)
637            .write(true)
638            .truncate(true)
639            .open(tmp_path.as_std_path())?;
640        f.write_all(bytes)?;
641        f.sync_all()?;
642    }
643    std::fs::rename(tmp_path.as_std_path(), dst.as_std_path())?;
644
645    // Best-effort parent-dir fsync on POSIX. On Windows opening a directory
646    // for sync is not supported; the rename above already used
647    // MOVEFILE_WRITE_THROUGH semantics.
648    #[cfg(unix)]
649    {
650        if let Some(parent) = dst.parent() {
651            if let Ok(dir) = File::open(parent.as_std_path()) {
652                let _ = dir.sync_all();
653            }
654        }
655    }
656
657    Ok(())
658}
659
660/// Create `path` if missing. On POSIX, set mode `0700`.
661fn create_dir_secure(path: &std::path::Path) -> std::io::Result<()> {
662    if path.exists() {
663        return Ok(());
664    }
665    std::fs::create_dir_all(path)?;
666    #[cfg(unix)]
667    {
668        use std::os::unix::fs::PermissionsExt;
669        let mut perms = std::fs::metadata(path)?.permissions();
670        perms.set_mode(0o700);
671        std::fs::set_permissions(path, perms)?;
672    }
673    Ok(())
674}
675
676/// List metadata-TOML files (skipping `.tmp` artifacts and `.lock` siblings).
677///
678/// Non-UTF-8 entry names are skipped silently. Safekey-derived filenames are
679/// pure ASCII per `docs/SAFEKEY.md`, so this only filters out unrelated
680/// non-UTF-8 garbage that may have been dropped into the store directory by
681/// a third party.
682fn metadata_files(metadata_dir: &Utf8Path) -> std::io::Result<Vec<Utf8PathBuf>> {
683    let mut out = Vec::new();
684    if !metadata_dir.exists() {
685        return Ok(out);
686    }
687    for entry in std::fs::read_dir(metadata_dir.as_std_path())? {
688        let entry = entry?;
689        if !entry.file_type()?.is_file() {
690            continue;
691        }
692        let path = entry.path();
693        let utf8_path = match Utf8PathBuf::from_path_buf(path) {
694            Ok(p) => p,
695            Err(_) => continue,
696        };
697        let name = match utf8_path.file_name() {
698            Some(n) => n,
699            None => continue,
700        };
701        if name.ends_with(".toml") && !name.ends_with(".tmp") {
702            out.push(utf8_path);
703        }
704    }
705    Ok(out)
706}
707
708fn read_all_entries(metadata_dir: &Utf8Path) -> Result<Vec<EntryInfo>, StoreError> {
709    let mut out = Vec::new();
710    for path in metadata_files(metadata_dir)? {
711        let raw = std::fs::read_to_string(path.as_std_path())?;
712        let Ok(md) = toml::from_str::<Metadata>(&raw) else {
713            // Corrupt / future-major entries are skipped from list output.
714            continue;
715        };
716        let safekey = safekey_from_metadata_filename(&path);
717        out.push(EntryInfo {
718            safekey,
719            title: md.title,
720            year: md.year,
721            fetched_at: md.doiget.map(|d| d.fetched_at),
722        });
723    }
724    Ok(out)
725}
726
727// `DoigetExtension` is referenced in the test module below; this tiny shim
728// keeps the symbol live in non-test builds so rustdoc intra-doc linking
729// stays stable.
730#[allow(dead_code)]
731fn _doiget_extension_is_visible(d: DoigetExtension) -> DoigetExtension {
732    d
733}
734
735// ---------------------------------------------------------------------------
736// Tests
737// ---------------------------------------------------------------------------
738
739// `expect`/`unwrap` are idiomatic in tests where panics double as assertions.
740// Workspace lints deny them in production code; relax for the test module.
741#[cfg(test)]
742#[allow(clippy::expect_used, clippy::unwrap_used, clippy::panic)]
743mod tests {
744    use super::*;
745    use std::collections::BTreeMap;
746    use std::sync::Arc;
747    use std::thread;
748
749    use chrono::TimeZone;
750    use tempfile::TempDir;
751
752    use crate::{Doi, Safekey, SCHEMA_VERSION};
753
754    fn tmp_dir_utf8(dir: &TempDir) -> Utf8PathBuf {
755        Utf8PathBuf::from_path_buf(dir.path().to_path_buf()).expect("temp dir path must be UTF-8")
756    }
757
758    fn sample_safekey() -> Safekey {
759        // In-crate `pub(crate)` shortcut; matches the pattern used in
760        // safekey vector tests in lib.rs.
761        Safekey("doi_10.1234_example".to_string())
762    }
763
764    fn sample_metadata() -> Metadata {
765        Metadata {
766            schema_version: SCHEMA_VERSION.to_string(),
767            title: "Sample Paper Title".to_string(),
768            authors: vec!["Alice Researcher".to_string(), "Bob Coauthor".to_string()],
769            year: Some(2026),
770            doi: Some(Doi("10.1234/example".to_string())),
771            arxiv_id: None,
772            abstract_: Some("A short abstract.".to_string()),
773            venue: Some("Phys. Rev. X".to_string()),
774            publisher: Some("American Physical Society".to_string()),
775            issn: Some("2160-3308".to_string()),
776            isbn: None,
777            type_: Some("journal-article".to_string()),
778            keywords: vec!["physics".to_string(), "tdd".to_string()],
779            url: Some("https://example.test/paper".to_string()),
780            pdf_path: Some("doi_10.1234_example.pdf".to_string()),
781            doiget: Some(DoigetExtension {
782                fetched_at: chrono::Utc.with_ymd_and_hms(2026, 5, 6, 12, 0, 0).unwrap(),
783                source: "unpaywall".to_string(),
784                license: "CC-BY-4.0".to_string(),
785                size_bytes: 1234567,
786                mcp_call_id: Some("01JCKZ7Q0000000000000000AB".to_string()),
787            }),
788            other: BTreeMap::new(),
789        }
790    }
791
792    fn fresh_store(dir: &TempDir) -> FsStore {
793        let root = tmp_dir_utf8(dir).join("papers");
794        FsStore::new(root).expect("FsStore::new")
795    }
796
797    #[test]
798    fn roundtrip_reserved_fields() {
799        let dir = TempDir::new().expect("tmp");
800        let store = fresh_store(&dir);
801        let key = sample_safekey();
802        let m = sample_metadata();
803        store.write(&key, &m, None).expect("write");
804
805        let read = store.read(&key).expect("read").expect("Some");
806        assert_eq!(read.schema_version, m.schema_version);
807        assert_eq!(read.title, m.title);
808        assert_eq!(read.authors, m.authors);
809        assert_eq!(read.year, m.year);
810        assert_eq!(
811            read.doi.as_ref().map(|d| d.as_str()),
812            Some("10.1234/example")
813        );
814        assert_eq!(read.abstract_, m.abstract_);
815        assert_eq!(read.venue, m.venue);
816        assert_eq!(read.publisher, m.publisher);
817        assert_eq!(read.issn, m.issn);
818        assert_eq!(read.type_, m.type_);
819        assert_eq!(read.keywords, m.keywords);
820        assert_eq!(read.url, m.url);
821        assert_eq!(read.pdf_path, m.pdf_path);
822    }
823
824    #[test]
825    fn roundtrip_doiget_extension() {
826        let dir = TempDir::new().expect("tmp");
827        let store = fresh_store(&dir);
828        let key = sample_safekey();
829        let m = sample_metadata();
830        store.write(&key, &m, None).expect("write");
831
832        let read = store.read(&key).expect("read").expect("Some");
833        let d = read.doiget.expect("doiget table present");
834        let want = m.doiget.expect("input doiget");
835        assert_eq!(d.fetched_at, want.fetched_at);
836        assert_eq!(d.source, want.source);
837        assert_eq!(d.license, want.license);
838        assert_eq!(d.size_bytes, want.size_bytes);
839        assert_eq!(d.mcp_call_id, want.mcp_call_id);
840    }
841
842    #[test]
843    fn read_returns_none_for_missing_safekey() {
844        let dir = TempDir::new().expect("tmp");
845        let store = fresh_store(&dir);
846        let key = Safekey("nonexistent".to_string());
847        let res = store.read(&key).expect("read ok");
848        assert!(res.is_none(), "expected Ok(None), got {:?}", res);
849    }
850
851    #[test]
852    fn schema_too_new_blocks_writes_but_allows_reads() {
853        let dir = TempDir::new().expect("tmp");
854        let store = fresh_store(&dir);
855        let key = sample_safekey();
856
857        // Hand-craft a TOML with a future-major schema_version.
858        let meta_path = store.metadata_path(&key).expect("path");
859        std::fs::create_dir_all(meta_path.parent().expect("parent").as_std_path()).expect("mkdir");
860        let body = "schema_version = \"2.0\"\ntitle = \"Future\"\nauthors = []\n";
861        std::fs::write(meta_path.as_std_path(), body).expect("write");
862
863        // Read should succeed (read-only mode per §3, warn is best-effort).
864        let read = store.read(&key).expect("read ok");
865        assert!(read.is_some(), "future-major file must be readable");
866
867        // Write should refuse with SchemaTooNew.
868        let m = sample_metadata();
869        let err = store.write(&key, &m, None).expect_err("write must fail");
870        match err {
871            StoreError::SchemaTooNew { theirs, ours } => {
872                assert_eq!(theirs, "2.0");
873                assert_eq!(ours, SCHEMA_VERSION);
874            }
875            other => panic!("expected SchemaTooNew, got {:?}", other),
876        }
877    }
878
879    #[test]
880    fn concurrent_writers_serialize_via_flock() {
881        // Two threads writing to the same key with different [doiget].source
882        // values. The flock SHOULD make every write atomic from the on-disk
883        // perspective: at no point is the metadata file half-written, and
884        // every parse succeeds. We do not assert WHICH writer wins — only
885        // that the file remains valid TOML throughout.
886        let dir = TempDir::new().expect("tmp");
887        let store = Arc::new(fresh_store(&dir));
888        let key = sample_safekey();
889
890        // Pre-create so both threads exercise the merge path.
891        store.write(&key, &sample_metadata(), None).expect("seed");
892
893        let mut handles = Vec::new();
894        for source in ["unpaywall", "europepmc"] {
895            let store = Arc::clone(&store);
896            let key = key.clone();
897            handles.push(thread::spawn(move || {
898                let mut m = sample_metadata();
899                if let Some(d) = m.doiget.as_mut() {
900                    d.source = source.to_string();
901                }
902                store.write(&key, &m, None).expect("write");
903            }));
904        }
905        for h in handles {
906            h.join().expect("join");
907        }
908
909        // Every read after the two writers complete must succeed and produce
910        // a value whose `[doiget].source` is one of the two contenders.
911        let read = store.read(&key).expect("read").expect("Some");
912        let source = read.doiget.expect("doiget").source;
913        assert!(
914            source == "unpaywall" || source == "europepmc",
915            "winning source must be one of the contenders, got {}",
916            source
917        );
918    }
919
920    #[test]
921    fn list_recent_orders_by_fetched_at_desc() {
922        let dir = TempDir::new().expect("tmp");
923        let store = fresh_store(&dir);
924
925        for (idx, year_seed) in [(1, 2024_u32), (2, 2025), (3, 2026)] {
926            let key = Safekey(format!("doi_10.1234_entry{}", idx));
927            let mut m = sample_metadata();
928            m.title = format!("Entry {}", idx);
929            if let Some(d) = m.doiget.as_mut() {
930                d.fetched_at = chrono::Utc
931                    .with_ymd_and_hms(year_seed as i32, 5, 6, 12, 0, 0)
932                    .unwrap();
933            }
934            store.write(&key, &m, None).expect("write");
935        }
936
937        let recent = store.list_recent(10).expect("list");
938        assert_eq!(recent.len(), 3, "expected 3 entries, got {}", recent.len());
939        // Most-recent first: 2026, 2025, 2024.
940        assert_eq!(recent[0].title, "Entry 3");
941        assert_eq!(recent[1].title, "Entry 2");
942        assert_eq!(recent[2].title, "Entry 1");
943        for w in recent.windows(2) {
944            assert!(
945                w[0].fetched_at >= w[1].fetched_at,
946                "recent[].fetched_at must be non-increasing"
947            );
948        }
949    }
950
951    #[test]
952    fn search_finds_by_title_substring() {
953        let dir = TempDir::new().expect("tmp");
954        let store = fresh_store(&dir);
955
956        let key = Safekey("doi_10.1234_quantum".to_string());
957        let mut m = sample_metadata();
958        m.title = "Quantum Stuff and Other Topics".to_string();
959        store.write(&key, &m, None).expect("write");
960
961        let hits = store.search("quantum", 10).expect("search");
962        assert_eq!(hits.len(), 1, "expected 1 hit, got {}", hits.len());
963        assert_eq!(hits[0].title, "Quantum Stuff and Other Topics");
964
965        let empty = store.search("relativity", 10).expect("search");
966        assert!(empty.is_empty(), "expected no hits, got {:?}", empty);
967    }
968
969    #[test]
970    fn path_traversal_in_safekey_blocked() {
971        let dir = TempDir::new().expect("tmp");
972        let store = fresh_store(&dir);
973        let bad = Safekey("../etc/passwd".to_string());
974
975        match store.read(&bad) {
976            Err(StoreError::PathTraversal { .. }) => {}
977            other => panic!("expected PathTraversal, got {:?}", other),
978        }
979        let m = sample_metadata();
980        match store.write(&bad, &m, None) {
981            Err(StoreError::PathTraversal { .. }) => {}
982            other => panic!("expected PathTraversal, got {:?}", other),
983        }
984    }
985
986    #[test]
987    fn write_then_read_normalized_toml_alphabetizes_keys() {
988        // §7 normalization: schema_version first, then reserved fields
989        // alphabetically, then sub-tables alphabetically with alphabetical
990        // keys inside.
991        let dir = TempDir::new().expect("tmp");
992        let store = fresh_store(&dir);
993        let key = sample_safekey();
994        store.write(&key, &sample_metadata(), None).expect("write");
995
996        let path = store.metadata_path(&key).expect("path");
997        let raw = std::fs::read_to_string(path.as_std_path()).expect("read");
998        // schema_version must be the first line.
999        let first_line = raw.lines().next().expect("at least one line");
1000        assert!(
1001            first_line.starts_with("schema_version = "),
1002            "first line must be schema_version, got: {:?}",
1003            first_line
1004        );
1005        // EOF newline.
1006        assert!(raw.ends_with('\n'), "file must end with a newline");
1007        // No CR characters anywhere.
1008        assert!(!raw.contains('\r'), "no CR allowed; LF only");
1009        // Sub-table appears.
1010        assert!(raw.contains("\n[doiget]\n"), "doiget sub-table missing");
1011        // Within [doiget], `fetched_at` must precede `license` alphabetically.
1012        let doiget_idx = raw.find("[doiget]").expect("doiget block");
1013        let after = &raw[doiget_idx..];
1014        let fetched_at_idx = after
1015            .find("fetched_at = ")
1016            .expect("fetched_at key in doiget");
1017        let license_idx = after.find("license = ").expect("license key in doiget");
1018        assert!(
1019            fetched_at_idx < license_idx,
1020            "fetched_at must precede license within [doiget]"
1021        );
1022    }
1023
1024    #[test]
1025    fn write_preserves_unknown_table_from_existing_file() {
1026        // §6 + §8: if the existing file has a `[bibliofetch]` table, a
1027        // doiget rewrite must not silently drop it.
1028        let dir = TempDir::new().expect("tmp");
1029        let store = fresh_store(&dir);
1030        let key = sample_safekey();
1031        let meta_path = store.metadata_path(&key).expect("path");
1032
1033        let body = format!(
1034            "schema_version = \"{}\"\ntitle = \"Existing\"\nauthors = [\"Carol\"]\n\n\
1035             [bibliofetch]\nharvest = \"2026-01-01\"\n",
1036            SCHEMA_VERSION
1037        );
1038        std::fs::write(meta_path.as_std_path(), body).expect("write");
1039
1040        let mut m = sample_metadata();
1041        m.title = "Doiget Wins?".to_string(); // would normally overwrite, but §6 keeps existing
1042        store.write(&key, &m, None).expect("write");
1043
1044        let read_raw = std::fs::read_to_string(meta_path.as_std_path()).expect("re-read");
1045        assert!(
1046            read_raw.contains("bibliofetch"),
1047            "[bibliofetch] table was dropped: {}",
1048            read_raw
1049        );
1050        assert!(
1051            read_raw.contains("title = \"Existing\""),
1052            "doiget overwrote a reserved field set by another tool: {}",
1053            read_raw
1054        );
1055    }
1056
1057    /// Issue #121: prove the BiblioFetch.jl coexistence contract
1058    /// end-to-end through the actual `read()` / `write()` API with
1059    /// TYPED values — not a raw-text substring check. Seeds a
1060    /// "BiblioFetch-authored" entry with reserved fields, a
1061    /// `[bibliofetch]` table carrying typed sub-keys (string / int /
1062    /// array) AND an unknown top-level scalar, then asserts a
1063    /// doiget read→mutate→write→read cycle preserves all of it and
1064    /// does not clobber the reserved field (STORE.md §6 + §8).
1065    #[test]
1066    fn bibliofetch_typed_table_and_unknown_scalar_survive_roundtrip() {
1067        let dir = TempDir::new().expect("tmp");
1068        let store = fresh_store(&dir);
1069        let key = sample_safekey();
1070        let meta_path = store.metadata_path(&key).expect("path");
1071
1072        // Written "by BiblioFetch.jl": reserved fields + a typed
1073        // [bibliofetch] table + an unknown top-level scalar.
1074        let body = format!(
1075            "schema_version = \"{}\"\n\
1076             title = \"Existing\"\n\
1077             authors = [\"Carol\"]\n\
1078             zotero_key = \"ABC123\"\n\n\
1079             [bibliofetch]\n\
1080             harvest = \"2026-02-03\"\n\
1081             count = 42\n\
1082             tags = [\"x\", \"y\"]\n",
1083            SCHEMA_VERSION
1084        );
1085        std::fs::write(meta_path.as_std_path(), body).expect("seed write");
1086
1087        // First read through the real API must surface the unknowns
1088        // in `other`.
1089        let m0 = store.read(&key).expect("read ok").expect("entry present");
1090        assert!(
1091            m0.other.contains_key("bibliofetch"),
1092            "[bibliofetch] not captured into `other` on read: {:?}",
1093            m0.other
1094        );
1095        assert_eq!(
1096            m0.other.get("zotero_key").and_then(|v| v.as_str()),
1097            Some("ABC123"),
1098            "unknown top-level scalar not captured: {:?}",
1099            m0.other
1100        );
1101
1102        // doiget rewrites (e.g. a re-fetch) with its own metadata.
1103        let mut m_doiget = sample_metadata();
1104        m_doiget.title = "Doiget Would Overwrite".to_string();
1105        store.write(&key, &m_doiget, None).expect("doiget write");
1106
1107        // Read again — everything BiblioFetch authored must still be
1108        // there, byte/value-identical, and the reserved field intact.
1109        let m1 = store
1110            .read(&key)
1111            .expect("re-read ok")
1112            .expect("entry present");
1113        assert_eq!(
1114            m1.title, "Existing",
1115            "STORE.md §6: doiget overwrote a reserved field"
1116        );
1117        let bf = m1
1118            .other
1119            .get("bibliofetch")
1120            .and_then(|v| v.as_table())
1121            .expect("[bibliofetch] table survived read->write->read");
1122        assert_eq!(
1123            bf.get("harvest").and_then(|v| v.as_str()),
1124            Some("2026-02-03")
1125        );
1126        assert_eq!(bf.get("count").and_then(|v| v.as_integer()), Some(42));
1127        let tags = bf
1128            .get("tags")
1129            .and_then(|v| v.as_array())
1130            .expect("tags array survived");
1131        let tags: Vec<&str> = tags.iter().filter_map(|v| v.as_str()).collect();
1132        assert_eq!(tags, vec!["x", "y"]);
1133        assert_eq!(
1134            m1.other.get("zotero_key").and_then(|v| v.as_str()),
1135            Some("ABC123"),
1136            "unknown top-level scalar lost across the cycle"
1137        );
1138
1139        // STORE.md §7 normalization: trailing newline preserved.
1140        let raw = std::fs::read_to_string(meta_path.as_std_path()).expect("raw re-read");
1141        assert!(raw.ends_with('\n'), "missing trailing newline: {raw:?}");
1142    }
1143
1144    /// Issue #123: on an `other`-key collision the EXISTING on-disk
1145    /// value must win (STORE.md §6 "never overwrite"). Seeds a
1146    /// `zotero_key` "by another tool", then has doiget write an entry
1147    /// whose own `other` carries a different `zotero_key`; the disk
1148    /// value must survive.
1149    #[test]
1150    fn other_key_collision_prefers_existing() {
1151        let dir = TempDir::new().expect("tmp");
1152        let store = fresh_store(&dir);
1153        let key = sample_safekey();
1154        let meta_path = store.metadata_path(&key).expect("path");
1155
1156        let body = format!(
1157            "schema_version = \"{}\"\ntitle = \"Existing\"\nauthors = [\"Carol\"]\n\
1158             zotero_key = \"FROM_BIBLIOFETCH\"\n",
1159            SCHEMA_VERSION
1160        );
1161        std::fs::write(meta_path.as_std_path(), body).expect("seed");
1162
1163        let mut m = sample_metadata();
1164        m.other.insert(
1165            "zotero_key".to_string(),
1166            toml::Value::String("FROM_DOIGET".to_string()),
1167        );
1168        store.write(&key, &m, None).expect("write");
1169
1170        let got = store.read(&key).expect("read").expect("present");
1171        assert_eq!(
1172            got.other.get("zotero_key").and_then(|v| v.as_str()),
1173            Some("FROM_BIBLIOFETCH"),
1174            "STORE.md §6: existing `other` value must win on collision"
1175        );
1176    }
1177
1178    #[test]
1179    fn pdf_is_copied_atomically_on_write() {
1180        let dir = TempDir::new().expect("tmp");
1181        let store = fresh_store(&dir);
1182        let key = sample_safekey();
1183
1184        // Write a small synthetic "PDF" file.
1185        let src_dir = TempDir::new().expect("tmp src");
1186        let src_path = Utf8PathBuf::from_path_buf(src_dir.path().to_path_buf())
1187            .expect("utf8 src dir")
1188            .join("input.pdf");
1189        std::fs::write(src_path.as_std_path(), b"%PDF-1.7 synthetic").expect("write src");
1190
1191        store
1192            .write(&key, &sample_metadata(), Some(&src_path))
1193            .expect("write");
1194
1195        let dst = store.pdf_path(&key).expect("pdf path");
1196        let bytes = std::fs::read(dst.as_std_path()).expect("read dst");
1197        assert_eq!(bytes, b"%PDF-1.7 synthetic");
1198    }
1199}