Skip to main content

doiget_core/store/
render.rs

1//! Citation renderers for stored [`Metadata`] — BibTeX and CSL JSON 1.0.
2//!
3//! Phase 2 / Slice 15b. The rendering logic originally lived in the
4//! `doiget-cli` `bib` / `csl` subcommands; it is hoisted here so the
5//! `doiget-mcp` `doiget_bibtex_export` / `doiget_csl_export` tools and
6//! the CLI share a single implementation (`docs/MCP_TOOLS.md` §1 rows
7//! `doiget_bibtex_export` / `doiget_csl_export`).
8//!
9//! Both renderers are pure functions of a [`Metadata`] plus a citation
10//! key (the entry's safekey). No I/O, no network. They emit the Phase 1
11//! binding fields from `docs/STORE.md` §2 (title, authors, year, doi,
12//! venue, publisher, issn); richer entry-type / field mapping is a
13//! Phase 2 follow-up.
14
15use serde::Serialize;
16
17use super::Metadata;
18
19// ---------------------------------------------------------------------------
20// BibTeX
21// ---------------------------------------------------------------------------
22
23/// Render a single BibTeX entry for `m`, keyed by `citation_key`.
24///
25/// `journal-article` → `@article`; everything else → `@misc` (Phase 2
26/// starter — `@inproceedings` / `@book` mapping is a follow-up). Field
27/// order: `title`, `author`, `year`, `doi`, `journal`, `publisher`,
28/// `issn`; any empty / `None` field is omitted. The returned string is a
29/// complete entry terminated by `}\n`.
30///
31/// Literal `{` / `}` in a field value would unbalance the surrounding
32/// braces; they are stripped (with a `tracing::warn!`) rather than
33/// TeX-escaped — real-world Crossref / Unpaywall titles rarely contain
34/// bare braces, so this is safe-by-default for the Phase 2 starter.
35#[must_use]
36pub fn to_bibtex(citation_key: &str, m: &Metadata) -> String {
37    let mut out = String::new();
38    let entry_type = bibtex_entry_type(m.type_.as_deref());
39    out.push_str(&format!("@{entry_type}{{{citation_key},\n"));
40
41    push_field(&mut out, "title", &m.title);
42    if !m.authors.is_empty() {
43        // BibTeX joins multiple authors with the literal token " and ".
44        push_field(&mut out, "author", &m.authors.join(" and "));
45    }
46    if let Some(year) = m.year {
47        push_field(&mut out, "year", &year.to_string());
48    }
49    if let Some(doi) = &m.doi {
50        push_field(&mut out, "doi", doi.as_str());
51    }
52    if let Some(venue) = m.venue.as_deref() {
53        if !venue.is_empty() {
54            push_field(&mut out, "journal", venue);
55        }
56    }
57    if let Some(publisher) = m.publisher.as_deref() {
58        if !publisher.is_empty() {
59            push_field(&mut out, "publisher", publisher);
60        }
61    }
62    if let Some(issn) = m.issn.as_deref() {
63        if !issn.is_empty() {
64            push_field(&mut out, "issn", issn);
65        }
66    }
67
68    out.push_str("}\n");
69    out
70}
71
72/// Map a Crossref-taxonomy `type` string to a BibTeX entry type.
73///
74/// Phase 2 starter only differentiates `journal-article` (→ `article`)
75/// from everything else (→ `misc`).
76fn bibtex_entry_type(type_: Option<&str>) -> &'static str {
77    match type_ {
78        Some("journal-article") => "article",
79        _ => "misc",
80    }
81}
82
83/// Append a single `  <key>      = {<value>},\n` line, padded so the `=`
84/// columns line up across the seven-field Phase 2 surface (width 10 is
85/// wide enough for `publisher`, the longest key).
86fn push_field(out: &mut String, key: &str, value: &str) {
87    let escaped = strip_bibtex_unsafe(key, value);
88    out.push_str(&format!("  {key:<10} = {{{escaped}}},\n"));
89}
90
91/// Strip BibTeX-unsafe `{` / `}` from `value`, warning once per field so
92/// the dropped characters are visible in stderr / structured logs.
93fn strip_bibtex_unsafe(key: &str, value: &str) -> String {
94    if value.contains('{') || value.contains('}') {
95        tracing::warn!(
96            field = key,
97            "stripping literal '{{'/'}}' from BibTeX field value; \
98             a TeX-aware escaper lands in a Phase 2 follow-up"
99        );
100    }
101    value.chars().filter(|c| !matches!(c, '{' | '}')).collect()
102}
103
104// ---------------------------------------------------------------------------
105// CSL JSON 1.0
106// ---------------------------------------------------------------------------
107
108/// Render `m` as a CSL JSON 1.0 **array** (a single-element array, so it
109/// is a drop-in for citeproc-js / pandoc `--csl-json` consumers that
110/// expect a list of items), keyed by `citation_key`.
111///
112/// `journal-article` → CSL `article-journal`; everything else →
113/// `manuscript` (citeproc-js renders that without forcing a container).
114/// Empty optional fields are omitted from the JSON.
115#[must_use]
116pub fn to_csl_array(citation_key: &str, m: &Metadata) -> serde_json::Value {
117    let item = build_csl_item(citation_key, m);
118    // `CslItem` is all-`Serialize` over owned/borrowed primitives, so
119    // `to_value` cannot fail; fall back to an empty array rather than
120    // panicking if a future field breaks that invariant.
121    serde_json::to_value([item]).unwrap_or_else(|_| serde_json::Value::Array(Vec::new()))
122}
123
124/// One CSL JSON 1.0 item, scoped to the binding fields the local
125/// `Metadata` schema can populate. Field order is the citeproc-js
126/// conventional order so a human diffing two outputs sees a stable
127/// column layout.
128#[derive(Debug, Serialize)]
129struct CslItem<'a> {
130    id: &'a str,
131    #[serde(rename = "type")]
132    type_: &'static str,
133    title: &'a str,
134    #[serde(skip_serializing_if = "Vec::is_empty")]
135    author: Vec<CslName>,
136    #[serde(skip_serializing_if = "Option::is_none")]
137    issued: Option<CslIssued>,
138    #[serde(rename = "DOI", skip_serializing_if = "Option::is_none")]
139    doi: Option<&'a str>,
140    #[serde(rename = "container-title", skip_serializing_if = "Option::is_none")]
141    container_title: Option<&'a str>,
142    #[serde(skip_serializing_if = "Option::is_none")]
143    publisher: Option<&'a str>,
144    #[serde(rename = "ISSN", skip_serializing_if = "Option::is_none")]
145    issn: Option<&'a str>,
146}
147
148/// CSL name-variable shape. Empty halves are omitted so a single-token
149/// name lands as `{"family": "Plato"}` rather than with a stray `given`.
150#[derive(Debug, Serialize)]
151struct CslName {
152    #[serde(skip_serializing_if = "String::is_empty")]
153    family: String,
154    #[serde(skip_serializing_if = "String::is_empty")]
155    given: String,
156}
157
158/// CSL date-variable shape, year-only for Phase 1. `date-parts` is a
159/// list-of-lists; we only know the year so the inner list is `[<year>]`.
160#[derive(Debug, Serialize)]
161struct CslIssued {
162    #[serde(rename = "date-parts")]
163    date_parts: Vec<Vec<i32>>,
164}
165
166fn build_csl_item<'a>(citation_key: &'a str, m: &'a Metadata) -> CslItem<'a> {
167    CslItem {
168        id: citation_key,
169        type_: match m.type_.as_deref() {
170            Some("journal-article") => "article-journal",
171            _ => "manuscript",
172        },
173        title: &m.title,
174        author: m.authors.iter().map(|s| parse_author(s)).collect(),
175        issued: m.year.map(|y| CslIssued {
176            date_parts: vec![vec![y]],
177        }),
178        doi: m.doi.as_ref().map(|d| d.as_str()),
179        container_title: m.venue.as_deref(),
180        publisher: m.publisher.as_deref(),
181        issn: m.issn.as_deref(),
182    }
183}
184
185/// Split a free-form name string into CSL `family` / `given` halves.
186///
187/// - `Family, Given` (comma present): split on the first comma.
188/// - Otherwise split on the LAST whitespace: left is given, right is
189///   family (`"Alice Researcher"` → family `"Researcher"`, given
190///   `"Alice"`) — the convention citeproc-js uses for string names.
191/// - Single token: whole string is the family, `given` empty.
192fn parse_author(name: &str) -> CslName {
193    let trimmed = name.trim();
194    if let Some((family, given)) = trimmed.split_once(',') {
195        CslName {
196            family: family.trim().to_string(),
197            given: given.trim().to_string(),
198        }
199    } else if let Some(idx) = trimmed.rfind(char::is_whitespace) {
200        let (given, family) = trimmed.split_at(idx);
201        CslName {
202            family: family.trim().to_string(),
203            given: given.trim().to_string(),
204        }
205    } else {
206        CslName {
207            family: trimmed.to_string(),
208            given: String::new(),
209        }
210    }
211}
212
213// ---------------------------------------------------------------------------
214// Tests
215// ---------------------------------------------------------------------------
216
217#[cfg(test)]
218#[allow(clippy::expect_used, clippy::unwrap_used, clippy::panic)]
219mod tests {
220    use std::collections::BTreeMap;
221
222    use chrono::TimeZone;
223
224    use super::*;
225    use crate::store::{DoigetExtension, Metadata};
226    use crate::{Doi, SCHEMA_VERSION};
227
228    fn fixture(type_: Option<&str>) -> Metadata {
229        Metadata {
230            schema_version: SCHEMA_VERSION.to_string(),
231            title: "Quantum Stuff".to_string(),
232            authors: vec!["Alice Researcher".to_string(), "Bob Coauthor".to_string()],
233            year: Some(2026),
234            doi: Some(Doi::parse("10.1234/example").expect("valid DOI")),
235            arxiv_id: None,
236            abstract_: None,
237            venue: Some("Phys Rev X".to_string()),
238            publisher: Some("APS".to_string()),
239            issn: Some("2160-3308".to_string()),
240            isbn: None,
241            type_: type_.map(str::to_string),
242            keywords: vec![],
243            url: None,
244            pdf_path: None,
245            doiget: Some(DoigetExtension {
246                fetched_at: chrono::Utc
247                    .with_ymd_and_hms(2026, 5, 6, 12, 0, 0)
248                    .single()
249                    .expect("valid timestamp"),
250                source: "unpaywall".to_string(),
251                license: "CC-BY-4.0".to_string(),
252                size_bytes: 1234,
253                mcp_call_id: None,
254            }),
255            other: BTreeMap::new(),
256        }
257    }
258
259    // ---- BibTeX ----
260
261    #[test]
262    fn bibtex_journal_article_renders_as_article() {
263        let s = to_bibtex("doi_10.1234_example", &fixture(Some("journal-article")));
264        assert!(s.starts_with("@article{doi_10.1234_example,\n"), "{s}");
265        assert!(s.contains("title      = {Quantum Stuff},"), "{s}");
266        assert!(
267            s.contains("author     = {Alice Researcher and Bob Coauthor},"),
268            "{s}"
269        );
270        assert!(s.contains("year       = {2026},"), "{s}");
271        assert!(s.contains("doi        = {10.1234/example},"), "{s}");
272        assert!(s.contains("journal    = {Phys Rev X},"), "{s}");
273        assert!(s.contains("publisher  = {APS},"), "{s}");
274        assert!(s.contains("issn       = {2160-3308},"), "{s}");
275        assert!(s.ends_with("}\n"), "{s}");
276    }
277
278    #[test]
279    fn bibtex_missing_and_unknown_type_render_as_misc() {
280        assert!(to_bibtex("k", &fixture(None)).starts_with("@misc{k,\n"));
281        assert!(to_bibtex("k", &fixture(Some("posted-content"))).starts_with("@misc{k,\n"));
282    }
283
284    #[test]
285    fn bibtex_empty_optionals_omitted() {
286        let mut m = fixture(Some("journal-article"));
287        m.venue = None;
288        m.publisher = None;
289        m.issn = None;
290        let s = to_bibtex("k", &m);
291        assert!(!s.contains("journal"), "{s}");
292        assert!(!s.contains("publisher"), "{s}");
293        assert!(!s.contains("issn"), "{s}");
294        assert!(s.contains("title") && s.contains("author") && s.contains("year"));
295    }
296
297    #[test]
298    fn bibtex_no_authors_omits_author_line() {
299        let mut m = fixture(Some("journal-article"));
300        m.authors = vec![];
301        assert!(!to_bibtex("k", &m).contains("author"));
302    }
303
304    #[test]
305    fn bibtex_braces_stripped() {
306        let mut m = fixture(Some("journal-article"));
307        m.title = "A {curly} Title".to_string();
308        assert!(to_bibtex("k", &m).contains("title      = {A curly Title},"));
309    }
310
311    // ---- CSL ----
312
313    #[test]
314    fn csl_array_shape_and_fields() {
315        let v = to_csl_array("doi_10.1234_example", &fixture(Some("journal-article")));
316        let arr = v.as_array().expect("CSL output is an array");
317        assert_eq!(arr.len(), 1);
318        let it = &arr[0];
319        assert_eq!(it["id"], "doi_10.1234_example");
320        assert_eq!(it["type"], "article-journal");
321        assert_eq!(it["title"], "Quantum Stuff");
322        assert_eq!(it["DOI"], "10.1234/example");
323        assert_eq!(it["container-title"], "Phys Rev X");
324        assert_eq!(it["ISSN"], "2160-3308");
325        assert_eq!(it["issued"]["date-parts"][0][0], 2026);
326        assert_eq!(it["author"][0]["family"], "Researcher");
327        assert_eq!(it["author"][0]["given"], "Alice");
328    }
329
330    #[test]
331    fn csl_unknown_type_is_manuscript() {
332        let v = to_csl_array("k", &fixture(None));
333        assert_eq!(v.as_array().unwrap()[0]["type"], "manuscript");
334    }
335
336    #[test]
337    fn csl_comma_name_split() {
338        let mut m = fixture(Some("journal-article"));
339        m.authors = vec!["Curie, Marie".to_string(), "Plato".to_string()];
340        let v = to_csl_array("k", &m);
341        let authors = v.as_array().unwrap()[0]["author"]
342            .as_array()
343            .unwrap()
344            .clone();
345        assert_eq!(authors[0]["family"], "Curie");
346        assert_eq!(authors[0]["given"], "Marie");
347        assert_eq!(authors[1]["family"], "Plato");
348        assert!(
349            authors[1].get("given").is_none(),
350            "single-token name has no given"
351        );
352    }
353}