Skip to main content

doiget_core/store/
mod.rs

1//! Filesystem-backed metadata store.
2//!
3//! Binding spec: [`docs/STORE.md`](../../../../docs/STORE.md) (NORMATIVE shared
4//! spec for layout, schema, lock protocol, atomic write, normalization).
5//! Public API surface: `docs/PUBLIC_API.md` §2 (Store trait), §3 (Metadata).
6//!
7//! ## Entry points
8//!
9//! - [`Store`] — the trait surface implementations expose.
10//! - [`FsStore`] — filesystem-backed implementation rooted at a configurable
11//!   directory (default `~/papers/`).
12//! - [`Metadata`] / [`DoigetExtension`] — the on-disk schema, mirrored from
13//!   `docs/STORE.md` §2.
14//!
15//! ## Cross-tool coexistence
16//!
17//! `~/papers/` is a shared resource between doiget and BiblioFetch.jl. Both
18//! tools follow the lock protocol in `docs/STORE.md` §4 and the atomic-write
19//! sequence in §5. Per §6, doiget MUST NOT overwrite reserved top-level
20//! fields previously written by another tool — see [`FsStore::write`].
21
22mod fs_store;
23pub mod metadata;
24pub mod render;
25
26pub use fs_store::FsStore;
27pub use metadata::{DoigetExtension, Metadata};
28pub use render::{to_bibtex, to_csl_array};
29
30use camino::Utf8Path;
31use serde::Serialize;
32use thiserror::Error;
33
34use crate::Safekey;
35
36/// Brief summary of a stored entry; returned by
37/// [`Store::list_recent`] / [`Store::search`].
38///
39/// `non_exhaustive` so adding new summary fields (e.g. `doi`, `authors`) in a
40/// later revision is non-breaking. Pattern-match with a wildcard arm.
41///
42/// `Serialize` enables `list-recent --mode json` / `search --mode json`
43/// (#204) — the wire form is the obvious field-name JSON: `{"safekey":
44/// "...", "title": "...", "year": 2024, "fetched_at": "2026-05-20T…Z"}`,
45/// with `null` for absent optionals.
46///
47/// # Wire-format stability (post-#208 self-review §1)
48///
49/// Once a release ships with the \[`Serialize`\] derive, the field
50/// **names** below become part of the public API: a downstream consumer
51/// (CLI agent, MCP tool, BiblioFetch.jl, third-party script) MAY bind
52/// to them. Renaming a field is then a semver minor bump and warrants
53/// a CHANGELOG \[BREAKING\] note. Adding new fields is still safe
54/// (per `#[non_exhaustive]`).
55#[derive(Debug, Clone, Serialize)]
56#[non_exhaustive]
57pub struct EntryInfo {
58    /// The safekey of the entry. See `docs/SAFEKEY.md`.
59    pub safekey: Safekey,
60    /// Title from the entry's reserved `title` field.
61    pub title: String,
62    /// Year, if any, from the entry's reserved `year` field.
63    pub year: Option<i32>,
64    /// `fetched_at` from the `[doiget]` table, if any.
65    pub fetched_at: Option<chrono::DateTime<chrono::Utc>>,
66}
67
68/// Errors emitted by [`Store`] implementations.
69#[derive(Debug, Error)]
70#[non_exhaustive]
71pub enum StoreError {
72    /// Underlying I/O failure.
73    #[error("io error: {0}")]
74    Io(#[from] std::io::Error),
75    /// Malformed TOML or schema mismatch on read.
76    #[error("toml deserialize error: {0}")]
77    Deserialize(#[from] toml::de::Error),
78    /// Failed to serialize a [`Metadata`] to TOML.
79    #[error("toml serialize error: {0}")]
80    Serialize(#[from] toml::ser::Error),
81    /// Could not acquire the advisory `flock` within the 5 s budget named in
82    /// `docs/STORE.md` §4.
83    #[error("flock timeout (5s) on {path}")]
84    LockTimeout {
85        /// The lock-file path that was contended.
86        path: camino::Utf8PathBuf,
87    },
88    /// The on-disk `schema_version` is a future major; per `docs/STORE.md` §3
89    /// the entry is read-only for this build.
90    #[error("schema_version too new: {theirs} > {ours}; entry is read-only")]
91    SchemaTooNew {
92        /// Schema version observed on disk.
93        theirs: String,
94        /// Schema version this build supports.
95        ours: String,
96    },
97    /// A reserved field that the spec marks as required is missing.
98    #[error("required field missing: {field}")]
99    MissingField {
100        /// The name of the missing reserved field.
101        field: &'static str,
102    },
103    /// The supplied [`Safekey`] resolves to a path outside the store root.
104    /// Defense-in-depth check; `Safekey` construction already enforces the
105    /// `[A-Za-z0-9._-]`-only charset per `docs/SAFEKEY.md`.
106    #[error("path is outside the store root: {path}")]
107    PathTraversal {
108        /// The offending resolved path.
109        path: camino::Utf8PathBuf,
110    },
111}
112
113/// Filesystem-shaped metadata store, semver-locked per `docs/PUBLIC_API.md`
114/// §2.
115///
116/// Implementations are responsible for honoring:
117///
118/// - `docs/STORE.md` §4 lock protocol (advisory `flock` on
119///   `<safekey>.toml.lock` with a 5 s timeout).
120/// - `docs/STORE.md` §5 atomic-write sequence (`tmp` → fsync → rename →
121///   fsync parent).
122/// - `docs/STORE.md` §6 doiget write discipline: never overwrite reserved
123///   top-level fields previously written by another tool.
124/// - `docs/STORE.md` §7 TOML normalization (alphabetical key order, `\n`
125///   line endings, trailing newline).
126pub trait Store: Send + Sync {
127    /// Read the entry keyed by `key`.
128    ///
129    /// Returns `Ok(None)` if no entry exists. Returns `Err` on I/O failure,
130    /// malformed TOML, or unrecoverable schema mismatch (e.g. future major).
131    fn read(&self, key: &Safekey) -> Result<Option<Metadata>, StoreError>;
132
133    /// Write or update the entry keyed by `key`.
134    ///
135    /// If `pdf` is `Some`, the file at that path is copied to
136    /// `<root>/<safekey>.pdf` via the same atomic-rename dance as the
137    /// metadata file. The caller is responsible for emitting the
138    /// `event=store_write` provenance row (see `docs/PROVENANCE_LOG.md` §3).
139    fn write(&self, key: &Safekey, m: &Metadata, pdf: Option<&Utf8Path>) -> Result<(), StoreError>;
140
141    /// Return up to `limit` entries, most-recent first by `[doiget].fetched_at`.
142    fn list_recent(&self, limit: usize) -> Result<Vec<EntryInfo>, StoreError>;
143
144    /// Return up to `limit` entries whose title / authors / venue / publisher
145    /// case-insensitively contain `query`.
146    fn search(&self, query: &str, limit: usize) -> Result<Vec<EntryInfo>, StoreError>;
147}