doiget_core/store/mod.rs
1//! Filesystem-backed metadata store.
2//!
3//! Binding spec: [`docs/STORE.md`](../../../../docs/STORE.md) (NORMATIVE shared
4//! spec for layout, schema, lock protocol, atomic write, normalization).
5//! Public API surface: `docs/PUBLIC_API.md` §2 (Store trait), §3 (Metadata).
6//!
7//! ## Entry points
8//!
9//! - [`Store`] — the trait surface implementations expose.
10//! - [`FsStore`] — filesystem-backed implementation rooted at a configurable
11//! directory (default `~/papers/`).
12//! - [`Metadata`] / [`DoigetExtension`] — the on-disk schema, mirrored from
13//! `docs/STORE.md` §2.
14//!
15//! ## Cross-tool coexistence
16//!
17//! `~/papers/` is a shared resource between doiget and BiblioFetch.jl. Both
18//! tools follow the lock protocol in `docs/STORE.md` §4 and the atomic-write
19//! sequence in §5. Per §6, doiget MUST NOT overwrite reserved top-level
20//! fields previously written by another tool — see [`FsStore::write`].
21
22mod fs_store;
23pub mod metadata;
24pub mod render;
25
26pub use fs_store::FsStore;
27pub use metadata::{DoigetExtension, Metadata};
28pub use render::{to_bibtex, to_csl_array};
29
30use camino::Utf8Path;
31use serde::Serialize;
32use thiserror::Error;
33
34use crate::Safekey;
35
36/// Brief summary of a stored entry; returned by
37/// [`Store::list_recent`] / [`Store::search`].
38///
39/// `non_exhaustive` so adding new summary fields (e.g. `doi`, `authors`) in a
40/// later revision is non-breaking. Pattern-match with a wildcard arm.
41///
42/// `Serialize` enables `list-recent --mode json` / `search --mode json`
43/// (#204) — the wire form is the obvious field-name JSON: `{"safekey":
44/// "...", "title": "...", "year": 2024, "fetched_at": "2026-05-20T…Z"}`,
45/// with `null` for absent optionals.
46///
47/// # Wire-format stability (post-#208 self-review §1)
48///
49/// Once a release ships with the \[`Serialize`\] derive, the field
50/// **names** below become part of the public API: a downstream consumer
51/// (CLI agent, MCP tool, BiblioFetch.jl, third-party script) MAY bind
52/// to them. Renaming a field is then a semver minor bump and warrants
53/// a CHANGELOG \[BREAKING\] note. Adding new fields is still safe
54/// (per `#[non_exhaustive]`).
55#[derive(Debug, Clone, Serialize)]
56#[non_exhaustive]
57pub struct EntryInfo {
58 /// The safekey of the entry. See `docs/SAFEKEY.md`.
59 pub safekey: Safekey,
60 /// Title from the entry's reserved `title` field.
61 pub title: String,
62 /// Year, if any, from the entry's reserved `year` field.
63 pub year: Option<i32>,
64 /// `fetched_at` from the `[doiget]` table, if any.
65 pub fetched_at: Option<chrono::DateTime<chrono::Utc>>,
66}
67
68/// Errors emitted by [`Store`] implementations.
69#[derive(Debug, Error)]
70#[non_exhaustive]
71pub enum StoreError {
72 /// Underlying I/O failure.
73 #[error("io error: {0}")]
74 Io(#[from] std::io::Error),
75 /// Malformed TOML or schema mismatch on read.
76 #[error("toml deserialize error: {0}")]
77 Deserialize(#[from] toml::de::Error),
78 /// Failed to serialize a [`Metadata`] to TOML.
79 #[error("toml serialize error: {0}")]
80 Serialize(#[from] toml::ser::Error),
81 /// Could not acquire the advisory `flock` within the 5 s budget named in
82 /// `docs/STORE.md` §4.
83 #[error("flock timeout (5s) on {path}")]
84 LockTimeout {
85 /// The lock-file path that was contended.
86 path: camino::Utf8PathBuf,
87 },
88 /// The on-disk `schema_version` is a future major; per `docs/STORE.md` §3
89 /// the entry is read-only for this build.
90 #[error("schema_version too new: {theirs} > {ours}; entry is read-only")]
91 SchemaTooNew {
92 /// Schema version observed on disk.
93 theirs: String,
94 /// Schema version this build supports.
95 ours: String,
96 },
97 /// A reserved field that the spec marks as required is missing.
98 #[error("required field missing: {field}")]
99 MissingField {
100 /// The name of the missing reserved field.
101 field: &'static str,
102 },
103 /// The supplied [`Safekey`] resolves to a path outside the store root.
104 /// Defense-in-depth check; `Safekey` construction already enforces the
105 /// `[A-Za-z0-9._-]`-only charset per `docs/SAFEKEY.md`.
106 #[error("path is outside the store root: {path}")]
107 PathTraversal {
108 /// The offending resolved path.
109 path: camino::Utf8PathBuf,
110 },
111}
112
113/// Filesystem-shaped metadata store, semver-locked per `docs/PUBLIC_API.md`
114/// §2.
115///
116/// Implementations are responsible for honoring:
117///
118/// - `docs/STORE.md` §4 lock protocol (advisory `flock` on
119/// `<safekey>.toml.lock` with a 5 s timeout).
120/// - `docs/STORE.md` §5 atomic-write sequence (`tmp` → fsync → rename →
121/// fsync parent).
122/// - `docs/STORE.md` §6 doiget write discipline: never overwrite reserved
123/// top-level fields previously written by another tool.
124/// - `docs/STORE.md` §7 TOML normalization (alphabetical key order, `\n`
125/// line endings, trailing newline).
126pub trait Store: Send + Sync {
127 /// Read the entry keyed by `key`.
128 ///
129 /// Returns `Ok(None)` if no entry exists. Returns `Err` on I/O failure,
130 /// malformed TOML, or unrecoverable schema mismatch (e.g. future major).
131 fn read(&self, key: &Safekey) -> Result<Option<Metadata>, StoreError>;
132
133 /// Write or update the entry keyed by `key`.
134 ///
135 /// If `pdf` is `Some`, the file at that path is copied to
136 /// `<root>/<safekey>.pdf` via the same atomic-rename dance as the
137 /// metadata file. The caller is responsible for emitting the
138 /// `event=store_write` provenance row (see `docs/PROVENANCE_LOG.md` §3).
139 fn write(&self, key: &Safekey, m: &Metadata, pdf: Option<&Utf8Path>) -> Result<(), StoreError>;
140
141 /// Return up to `limit` entries, most-recent first by `[doiget].fetched_at`.
142 fn list_recent(&self, limit: usize) -> Result<Vec<EntryInfo>, StoreError>;
143
144 /// Return up to `limit` entries whose title / authors / venue / publisher
145 /// case-insensitively contain `query`.
146 fn search(&self, query: &str, limit: usize) -> Result<Vec<EntryInfo>, StoreError>;
147}