//! dtx-parser (nom version) — find files generated by LaTeX .dtx / .ins source trees. //! //! Uses nom 8.x combinators for the structural parsing (brace groups, control //! words, def-bodies) while keeping the macro-expansion logic imperative, since //! TeX macro expansion is inherently stateful and context-sensitive — not a //! good fit for pure parser combinators. use std::collections::HashMap; use std::path::{Path, PathBuf}; use walkdir::WalkDir; use nom::{ IResult, Parser, bytes::complete::{tag, take_till, take_while, take_while1}, character::complete::char, combinator::recognize, sequence::preceded, }; type NomErr<'a> = nom::error::Error<&'a str>; type PR<'a, O> = IResult<&'a str, O, NomErr<'a>>; /// Collect all .dtx / .ins files reachable from `path`. /// If `path` is a file with the right extension, return it directly. /// If `path` is a directory, walk it recursively. /// Anything else (wrong extension, symlink to neither) is silently skipped. /// Read a file as a String. Tries UTF-8 first (zero-copy check); if that /// fails (e.g. ISO-8859 encoded .dtx files) uses `from_utf8_lossy` which /// replaces invalid sequences with U+FFFD. This is SIMD-accelerated and /// vastly faster than a per-byte Latin-1 decode for large files. /// Invalid bytes only appear in comments/author names — never in the macro /// names or filenames we parse — so lossy replacement is safe here. pub fn read_file_contents(path: &Path) -> std::io::Result { let bytes = std::fs::read(path)?; if std::str::from_utf8(&bytes).is_ok() { // SAFETY: we just verified it is valid UTF-8 return Ok(unsafe { String::from_utf8_unchecked(bytes) }); } // Lossy fallback: invalid bytes become U+FFFD, everything else preserved Ok(String::from_utf8_lossy(&bytes).into_owned()) } fn collect_paths(path: &Path) -> Vec { if path.is_file() { let ext = path.extension().and_then(|e| e.to_str()).unwrap_or(""); if ext == "dtx" || ext == "ins" { return vec![path.to_path_buf()]; } else { eprintln!( "Warning: '{}' is not a .dtx or .ins file — skipping.", path.display() ); return Vec::new(); } } if path.is_dir() { return WalkDir::new(path) .follow_links(true) .into_iter() .filter_map(|e| e.ok()) .filter(|e| e.path().is_file()) .filter(|e| { let ext = e.path().extension().and_then(|x| x.to_str()).unwrap_or(""); ext == "dtx" || ext == "ins" }) .map(|e| e.into_path()) .collect(); } eprintln!( "Warning: '{}' is neither a file nor a directory — skipping.", path.display() ); Vec::new() } // ─── Macro table ───────────────────────────────────────────────────────────── #[derive(Clone, Debug)] struct MacroDef { param_count: usize, body: String, may_generate: bool, } type MacroTable = HashMap; // ─── nom primitive parsers ──────────────────────────────────────────────────── /// TeX letter: ASCII alpha or `@`. fn is_tex_letter(c: char) -> bool { c.is_ascii_alphabetic() || c == '@' } /// Parse a TeX control word name (letters + @), returning the name as &str. /// Called after the `\` has already been consumed. fn tex_control_word<'a>(input: &'a str) -> PR<'a, &'a str> { take_while1(is_tex_letter)(input) } /// Parse a TeX comment: `%` up to (but not including) the newline. fn tex_comment(input: &str) -> PR<'_, ()> { let (input, _) = char('%')(input)?; let (input, _) = take_till(|c| c == '\n')(input)?; Ok((input, ())) } /// Skip any mix of ASCII whitespace and `%`-comments. fn ws(mut input: &str) -> PR<'_, ()> { loop { // skip whitespace let (rest, _) = take_while(|c: char| c.is_ascii_whitespace())(input)?; input = rest; // try to consume a comment match tex_comment(input) { Ok((rest, _)) => input = rest, Err(_) => break, } } Ok((input, ())) } /// Parse a brace-group `{…}` and return its inner content as &str. /// Handles nested braces and `%`-comments (so `%}` doesn't close the group). fn brace_group<'a>(input: &'a str) -> PR<'a, &'a str> { // We need to track brace depth manually; nom's `delimited` can't do that. // We use `nom::bytes::complete::take` indirectly via a manual scan. let (input, _) = char::<&str, NomErr>('{')(input)?; let bytes = input.as_bytes(); let mut depth = 1usize; let mut i = 0; while i < bytes.len() { match bytes[i] { b'%' => { i += 1; while i < bytes.len() && bytes[i] != b'\n' { i += 1; } } b'{' => { depth += 1; i += 1; } b'}' => { depth -= 1; if depth == 0 { return Ok((&input[i + 1..], &input[..i])); } i += 1; } _ => i += 1, // Non-ASCII bytes never match {, }, %, \n: safe } } // Unterminated group — return what we have (mirrors hand-rolled behaviour) Ok(("", input)) } /// Parse and discard a single brace-group (fast depth-only, no slice needed). fn skip_brace_group(input: &str) -> PR<'_, ()> { let (input, _) = ws(input)?; let (input, _) = char::<&str, NomErr>('{')(input)?; let bytes = input.as_bytes(); let mut depth = 1usize; let mut i = 0; while i < bytes.len() { match bytes[i] { b'%' => { i += 1; while i < bytes.len() && bytes[i] != b'\n' { i += 1; } } b'{' => { depth += 1; i += 1; } b'}' => { depth -= 1; i += 1; if depth == 0 { return Ok((&input[i..], ())); } } _ => i += 1, } } Ok(("", ())) } /// Parse ws + brace_group, returning inner content. /// Direct implementation avoids the `preceded` combinator allocation overhead. fn ws_brace_group<'a>(input: &'a str) -> PR<'a, &'a str> { let (input, _) = ws(input)?; brace_group(input) } // ─── Control-word matching helper ──────────────────────────────────────────── /// Match `\name` as a complete control word (not a prefix of a longer name). /// Consumes `\name` and returns the rest if the next char is not a TeX letter. fn match_cmd<'a>(name: &'static str) -> impl Fn(&'a str) -> PR<'a, ()> { move |input: &'a str| { let (input, _) = char::<&'a str, NomErr<'a>>('\\')(input)?; let (input, _) = tag(name)(input)?; // Ensure it's a complete word if input.starts_with(is_tex_letter) { return Err(nom::Err::Error(nom::error::Error::new( input, nom::error::ErrorKind::Tag, ))); } Ok((input, ())) } } // ─── Top-level parse ───────────────────────────────────────────────────────── /// Scan `content` for a `\generate`, `\generateFile`, or /// `\begin{filecontents` command that is **not** inside a `%`-comment. /// Used as a fast pre-filter before full parsing. fn has_generate_cmd(content: &str) -> bool { let bytes = content.as_bytes(); let mut i = 0; while i < bytes.len() { match bytes[i] { b'%' => { // Skip to end of line i += 1; while i < bytes.len() && bytes[i] != b'\n' { i += 1; } } b'\\' => { let rest = &bytes[i + 1..]; if rest.starts_with(b"generate") { let after = &rest[b"generate".len()..]; let next = after.first().copied().unwrap_or(0); if !next.is_ascii_alphabetic() && next != b'@' { return true; // bare \generate } if after.starts_with(b"File") { let after_file = &after[b"File".len()..]; let next2 = after_file.first().copied().unwrap_or(0); if !next2.is_ascii_alphabetic() && next2 != b'@' { return true; // \generateFile } } } // \begin{filecontents} or \begin{filecontents*} if rest.starts_with(b"begin") { let after = &rest[b"begin".len()..]; // skip optional whitespace let after = after .iter() .position(|&b| !b.is_ascii_whitespace()) .map(|p| &after[p..]) .unwrap_or(after); if after.starts_with(b"{filecontents") { return true; } } i += 1; } _ => i += 1, } } false } pub fn parse_file(path: &Path, content: &str) -> Vec { // Fast pre-check: scan for \generate / \generateFile outside comments. // A plain contains("\\generate") would fire on comment lines like // "% use \generate to..." — this avoids that false positive. if !has_generate_cmd(content) { return Vec::new(); } let jobname = path .file_stem() .and_then(|s| s.to_str()) .unwrap_or("jobname") .to_owned(); let mut macros = MacroTable::new(); let mut targets: Vec = Vec::with_capacity(16); let mut s = content; loop { // Jump to next backslash — everything else is prose s = advance_to_backslash(s); if s.is_empty() { break; } // Try each command we care about if let Ok((rest, ())) = match_cmd("def")(s) { s = handle_def(rest, &mut macros); } else if let Ok((rest, ())) = match_cmd("generateFile")(s) { s = handle_generate_file(rest, ¯os, &jobname, &mut targets); } else if let Ok((rest, ())) = match_cmd("generate")(s) { s = handle_generate(rest, ¯os, &jobname, &mut targets); } else if let Ok((rest, ())) = match_cmd("begin")(s) { s = handle_filecontents(rest, ¯os, &jobname, &mut targets); } else { // Could be a user macro — try to look it up if let Ok((rest, name)) = preceded(tag("\\"), tex_control_word).parse(s) { if let Some(def) = macros.get(name).cloned() { if def.may_generate { let (args, after) = read_raw_args(rest, def.param_count); s = after; let substituted = substitute_params(&def.body, &args); scan_for_generates(&substituted, ¯os, &jobname, &mut targets, 0); } else { s = skip_n_brace_groups(rest, def.param_count); } continue; } s = rest; } else { // Advance past this `\` s = &s[1..]; } } } targets } /// Advance to the next `\` character, skipping `%`-comments. fn advance_to_backslash(s: &str) -> &str { let bytes = s.as_bytes(); let mut i = 0; while i < bytes.len() { match bytes[i] { b'\\' => return &s[i..], b'%' => { i += 1; while i < bytes.len() && bytes[i] != b'\n' { i += 1; } } _ => i += 1, // Non-ASCII bytes never match \\ or %: safe } } "" } // ─── Scan helper (re-entrant on expanded text) ──────────────────────────────── const MAX_SCAN_DEPTH: usize = 16; fn scan_for_generates( text: &str, macros: &MacroTable, jobname: &str, targets: &mut Vec, depth: usize, ) { if depth > MAX_SCAN_DEPTH { return; } let mut s = text; loop { s = advance_to_backslash(s); if s.is_empty() { break; } if let Ok((rest, ())) = match_cmd("generateFile")(s) { s = handle_generate_file(rest, macros, jobname, targets); } else if let Ok((rest, ())) = match_cmd("generate")(s) { s = handle_generate(rest, macros, jobname, targets); } else if let Ok((rest, ())) = match_cmd("begin")(s) { s = handle_filecontents(rest, macros, jobname, targets); } else if let Ok((rest, name)) = preceded(tag("\\"), tex_control_word).parse(s) { if let Some(def) = macros.get(name).cloned() { if def.may_generate { let (args, after) = read_raw_args(rest, def.param_count); s = after; let substituted = substitute_params(&def.body, &args); scan_for_generates(&substituted, macros, jobname, targets, depth + 1); continue; } else { s = skip_n_brace_groups(rest, def.param_count); } } else { s = rest; } } else { s = &s[1..]; // skip the `\` } } } // ─── \def handler ──────────────────────────────────────────────────────────── fn handle_def<'a>(s: &'a str, macros: &mut MacroTable) -> &'a str { // Parse: ws \ MacroName [#1 #2 … #N] { body } let Ok((s, _)) = ws(s) else { return s }; let Ok((s, _)): PR<_> = char('\\')(s) else { return s; }; let Ok((s, name)) = tex_control_word(s) else { return s; }; let Ok((mut s, _)) = ws(s) else { return s }; // Count parameter specs #1 … #N let mut param_count = 0usize; while let Ok((rest, _)) = recognize(( char::<&str, NomErr<'_>>('#'), nom::character::complete::satisfy(|c| c.is_ascii_digit()), )) .parse(s) { param_count += 1; s = rest; let Ok((rest2, _)) = ws(s) else { break }; s = rest2; } // Body match brace_group(s) { Ok((rest, body)) => { let may_generate = body_may_generate(body, macros); macros.insert( name.to_owned(), MacroDef { param_count, body: body.to_owned(), may_generate, }, ); rest } Err(_) => s, } } // ─── \begin{filecontents} handler ─────────────────────────────────────────── /// Advance past the body of a filecontents environment, stopping after /// `\end{env_name}`. fn skip_filecontents_body<'a>(s: &'a str, env_name: &str) -> &'a str { let end_tag = format!("\\end{{{}}}", env_name); match s.find(end_tag.as_str()) { Some(pos) => &s[pos + end_tag.len()..], None => "", // malformed — no matching \end found } } /// Handle `\begin{filecontents}` and `\begin{filecontents*}`. /// Syntax (LaTeX 2019+ also allows an optional `[options]` before `{filename}`): /// \begin{filecontents}{filename} /// \begin{filecontents*}{filename} /// \begin{filecontents*}[options]{filename} /// `s` is positioned right after the `\begin` token. fn handle_filecontents<'a>( s: &'a str, macros: &MacroTable, jobname: &str, targets: &mut Vec, ) -> &'a str { // Expect {filecontents} or {filecontents*} — nom uses ws_brace_group let Ok((s, env_name)) = ws_brace_group(s) else { return s; }; let env_name = env_name.trim(); if env_name != "filecontents" && env_name != "filecontents*" { return s; } let env_name = env_name.to_owned(); let Ok((s, _)) = ws(s) else { return s }; // Skip optional [options] group (LaTeX 2019+) let s = if s.starts_with('[') { let bytes = s.as_bytes(); let mut i = 1; while i < bytes.len() && bytes[i] != b']' { i += 1; } &s[i + 1..] } else { s }; // Next brace group is the filename let Ok((s, raw)) = ws_brace_group(s) else { return s; }; let target = expand_text(raw, macros, jobname); let target = target.trim().to_owned(); if !target.is_empty() { targets.push(target); } // Skip the environment body up to \end{filecontents} / \end{filecontents*} skip_filecontents_body(s, &env_name) } // ─── \generateFile handler ─────────────────────────────────────────────────── /// `s` is right after `\generateFile`. /// Syntax: `\generateFile{target}{flag}{spec}` fn body_may_generate(body: &str, macros: &MacroTable) -> bool { let bytes = body.as_bytes(); let mut i = 0; while i < bytes.len() { match bytes[i] { b'%' => { i += 1; while i < bytes.len() && bytes[i] != b'\n' { i += 1; } } b'\\' => { i += 1; if i >= bytes.len() { break; } if is_tex_letter(bytes[i] as char) { let start = i; while i < bytes.len() && is_tex_letter(bytes[i] as char) { i += 1; } let name = &body[start..i]; if name == "generate" || name == "generateFile" { return true; } if let Some(def) = macros.get(name) && def.may_generate { return true; } } else { i += 1; } } _ => i += 1, } } false } fn handle_generate_file<'a>( s: &'a str, macros: &MacroTable, jobname: &str, targets: &mut Vec, ) -> &'a str { match ws_brace_group(s) { Ok((rest, raw)) => { let target = expand_text(raw, macros, jobname); let target = target.trim().to_owned(); if !target.is_empty() { targets.push(target); } // Discard flag + spec groups skip_n_brace_groups(rest, 2) } Err(_) => s, } } // ─── \generate handler ─────────────────────────────────────────────────────── fn handle_generate<'a>( s: &'a str, macros: &MacroTable, jobname: &str, targets: &mut Vec, ) -> &'a str { let Ok((s2, _)) = ws(s) else { return s }; // \generate\file{target}{spec} if let Ok((rest, ())) = match_cmd("file")(s2) { return handle_file_group(rest, macros, jobname, targets); } // \generate{…} if let Ok((rest_outer, inner)) = brace_group(s2) { let inner_trimmed = inner.trim_start_matches(|c: char| c.is_ascii_whitespace()); // Strip leading comments to find the first real token let inner_trimmed = skip_ws_str(inner_trimmed); if inner_trimmed.starts_with('\\') { // Body form: scan for \file calls inside let mut inner_s = inner; loop { inner_s = advance_to_backslash(inner_s); if inner_s.is_empty() { break; } if let Ok((after, ())) = match_cmd("file")(inner_s) { inner_s = handle_file_group(after, macros, jobname, targets); } else { // Skip any other command name if let Ok((after, _)) = preceded(tag("\\"), tex_control_word).parse(inner_s) { inner_s = after; } else { inner_s = &inner_s[1..]; } } } return rest_outer; } // Bare \generate{target} let target = expand_text(inner, macros, jobname); let target = target.trim().to_owned(); if !target.is_empty() { targets.push(target); } return skip_n_brace_groups(rest_outer, 1); } s } fn handle_file_group<'a>( s: &'a str, macros: &MacroTable, jobname: &str, targets: &mut Vec, ) -> &'a str { match ws_brace_group(s) { Ok((rest, raw)) => { let target = expand_text(raw, macros, jobname); let target = target.trim().to_owned(); if !target.is_empty() { targets.push(target); } // Read the spec group and scan it for nested \file calls // (some .ins files chain \file inside the spec via trailing %) match ws_brace_group(rest) { Ok((after_spec, spec)) => { let mut inner = spec; loop { inner = advance_to_backslash(inner); if inner.is_empty() { break; } if let Ok((after_file, ())) = match_cmd("file")(inner) { inner = handle_file_group(after_file, macros, jobname, targets); } else { // skip this control word if let Ok((after, _)) = preceded(tag("\\"), tex_control_word).parse(inner) { inner = after; } else { inner = &inner[1..]; } } } after_spec } Err(_) => rest, } } Err(_) => s, } } /// Skip whitespace+comments using our `ws` parser on a plain string. fn skip_ws_str(s: &str) -> &str { ws(s).map(|(rest, _)| rest).unwrap_or(s) } /// Skip `n` brace groups (discard content). fn skip_n_brace_groups(mut s: &str, n: usize) -> &str { for _ in 0..n { if let Ok((rest, ())) = skip_brace_group(s) { s = rest; } else { break; } } s } // ─── Argument reading ───────────────────────────────────────────────────────── fn read_raw_args(s: &str, n: usize) -> (Vec, &str) { let mut args = Vec::with_capacity(n); let mut s = s; for _ in 0..n { match ws_brace_group(s) { Ok((rest, raw)) => { args.push(raw.to_owned()); s = rest; } Err(_) => args.push(String::new()), } } (args, s) } fn substitute_params(text: &str, args: &[String]) -> String { let mut result = String::with_capacity(text.len()); let mut chars = text.char_indices().peekable(); while let Some((i, ch)) = chars.next() { if ch == '#' { let next_byte = text.as_bytes().get(i + 1).copied(); if let Some(b) = next_byte && b.is_ascii_digit() && b != b'0' { let idx = (b - b'0') as usize; if let Some(arg) = args.get(idx - 1) { result.push_str(arg); } chars.next(); // consume the digit continue; } } result.push(ch); } result } // ─── Text expansion ─────────────────────────────────────────────────────────── use std::borrow::Cow; const MAX_EXPAND_DEPTH: usize = 16; fn expand_text<'a>(text: &'a str, macros: &MacroTable, jobname: &str) -> Cow<'a, str> { if !text.as_bytes().contains(&b'\\') { return Cow::Borrowed(text); } Cow::Owned(expand_text_depth(text, macros, jobname, 0)) } fn expand_text_depth(text: &str, macros: &MacroTable, jobname: &str, depth: usize) -> String { if depth > MAX_EXPAND_DEPTH { return text.to_owned(); } let mut result = String::with_capacity(text.len()); let bytes = text.as_bytes(); let mut i = 0; while i < bytes.len() { if bytes[i] == b'%' { while i < bytes.len() && bytes[i] != b'\n' { i += 1; } continue; } if bytes[i] != b'\\' { let ch_str = &text[i..]; if let Some(ch) = ch_str.chars().next() { result.push(ch); i += ch.len_utf8(); } else { i += 1; } continue; } i += 1; // skip backslash if i >= bytes.len() { result.push('\\'); break; } if !is_tex_letter(bytes[i] as char) { result.push('\\'); let ch_str = &text[i..]; if let Some(ch) = ch_str.chars().next() { result.push(ch); i += ch.len_utf8(); } continue; } // Control word let start = i; while i < bytes.len() && is_tex_letter(bytes[i] as char) { i += 1; } let name = &text[start..i]; if name == "jobname" { result.push_str(jobname); } else if let Some(def) = macros.get(name).cloned() { if def.param_count == 0 { let expanded = expand_text_depth(&def.body, macros, jobname, depth + 1); result.push_str(&expanded); } else { let rest_text = &text[i..]; let (args, after_args) = read_args_from_str(rest_text, def.param_count, macros, jobname); let substituted = substitute_params(&def.body, &args); let expanded = expand_text_depth(&substituted, macros, jobname, depth + 1); result.push_str(&expanded); i = text.len() - after_args.len(); } } else { result.push('\\'); result.push_str(name); } } result } fn read_args_from_str<'a>( s: &'a str, n: usize, macros: &MacroTable, jobname: &str, ) -> (Vec, &'a str) { let mut args = Vec::with_capacity(n); let mut s = s; for _ in 0..n { match ws_brace_group(s) { Ok((rest, raw)) => { args.push(expand_text(raw, macros, jobname).into_owned()); s = rest; } Err(_) => args.push(String::new()), } } (args, s) } // ─── Tests ─────────────────────────────────────────────────────────────────── #[cfg(test)] mod tests { use super::*; use std::path::Path; fn parse(filename: &str, src: &str) -> Vec { parse_file(Path::new(filename), src) } #[test] fn test_generate_file_basic() { let src = r#"\generateFile{bondcolor.drv}{t}{\from{bondcolor.dtx}{driver}}"#; assert_eq!(parse("bondcolor.dtx", src), vec!["bondcolor.drv"]); } #[test] fn test_jobname_in_generate_file() { let src = r#"\generateFile{\jobname.sty}{t}{\from{\jobname.dtx}{package}}"#; assert_eq!(parse("mybundle.dtx", src), vec!["mybundle.sty"]); } #[test] fn test_def_zero_arg() { let src = r#" \def\myfile{coolpackage} \generateFile{\myfile.sty}{t}{\from{\myfile.dtx}{package}} "#; assert_eq!(parse("pkg.dtx", src), vec!["coolpackage.sty"]); } #[test] fn test_generate_backslash_file() { let src = r#"\generate\file{output.tex}{\from{source.dtx}{body}}"#; assert_eq!(parse("source.dtx", src), vec!["output.tex"]); } #[test] fn test_generate_body_with_file() { let src = r#"\generate{\file{a.sty}{\from{pkg.dtx}{style}}\file{b.drv}{\from{pkg.dtx}{driver}}}"#; assert_eq!(parse("pkg.dtx", src), vec!["a.sty", "b.drv"]); } #[test] fn test_multiple_generate_file() { let src = r#" \generateFile{a.sty}{t}{\from{pkg.dtx}{style}} \generateFile{b.drv}{t}{\from{pkg.dtx}{driver}} "#; assert_eq!(parse("pkg.dtx", src), vec!["a.sty", "b.drv"]); } #[test] fn test_comment_between_command_and_brace() { let src = "\\generateFile% a comment\n{result.tex}{t}{\\from{x.dtx}{}}"; assert_eq!(parse("x.dtx", src), vec!["result.tex"]); } #[test] fn test_parameterised_macro_extract() { let src = r#" \def\Extract#1#2#3{\generate{\file{#1.#2}{\from{\jobname.dtx}{#3}}}} \Extract{\jobname}{sty}{package} \Extract{README}{md}{readme} \Extract{etc}{uue}{etc} \Extract{make}{sh}{make} "#; let out = parse("mybundle.dtx", src); assert_eq!(out, vec!["mybundle.sty", "README.md", "etc.uue", "make.sh"]); } #[test] fn test_second_def_shadows_first() { let src = r#" \def\Extract#1#2#3{\generate{\file{#1.#2}{\from{\jobname.dtx}{#3}}}} \def\Extract#1#2#3{} \Extract{\jobname}{sty}{package} \Extract{README}{md}{readme} "#; let out = parse("mybundle.dtx", src); assert!(out.is_empty(), "expected empty, got {out:?}"); } #[test] fn test_jobname_in_macro_arg() { let src = r#" \def\MkFile#1#2{\generateFile{#1.#2}{t}{\from{\jobname.dtx}{#2}}} \MkFile{\jobname}{sty} \MkFile{extra}{ins} "#; let out = parse("mypackage.dtx", src); assert_eq!(out, vec!["mypackage.sty", "extra.ins"]); } #[test] fn test_zero_arg_macro_with_generate() { let src = r#" \def\DoGenerate{\generateFile{auto.sty}{t}{\from{auto.dtx}{style}}} \DoGenerate "#; let out = parse("auto.dtx", src); assert_eq!(out, vec!["auto.sty"]); } #[test] fn test_nwejm_pattern() { let src = r#" \def\NWEJM@classname{\jobname} \def\NWEJM@addons{addons} \def\NWEJM@examplestemplate{\jobname-examples-template} % \usedir{tex/latex/\NWEJM@classname} \generate{% \file{\NWEJM@classname.cls}{\from{\jobname.dtx}{class}} \file{\NWEJM@classname art.cls}{\from{\jobname.dtx}{class-article}} \file{\NWEJM@classname.dbx}{\from{\jobname.dtx}{datamodel}} \file{\NWEJM@classname.cbx}{\from{\jobname.dtx}{citestyle}} \file{\NWEJM@classname.bbx}{\from{\jobname.dtx}{bibstyle}} \file{\NWEJM@classname.lbx}{\from{\jobname.dtx}{languagemodel}} \nopreamble\nopostamble \file{\NWEJM@classname.cfg}{\from{\jobname.dtx}{configuration}} \file{\NWEJM@classname-english.trsl}{\from{\jobname.dtx}{english}} \file{\NWEJM@classname-french.trsl}{\from{\jobname.dtx}{french}} \file{\NWEJM@classname-german.trsl}{\from{\jobname.dtx}{german}} \file{\NWEJM@classname-dutch.trsl}{\from{\jobname.dtx}{dutch}} }% "#; let out = parse("NWEJM.dtx", src); assert_eq!( out, vec![ "NWEJM.cls", "NWEJM art.cls", "NWEJM.dbx", "NWEJM.cbx", "NWEJM.bbx", "NWEJM.lbx", "NWEJM.cfg", "NWEJM-english.trsl", "NWEJM-french.trsl", "NWEJM-german.trsl", "NWEJM-dutch.trsl", ] ); } #[test] fn test_file_not_matched_as_prefix_of_longer_name() { let src = r#"\generate{\filecontents{some.tex}{body}\file{real.sty}{\from{x.dtx}{s}}}"#; let out = parse("x.dtx", src); assert_eq!(out, vec!["real.sty"]); } #[test] fn test_unicode_in_comment() { let src = "% Ma\u{00EF}eul Rouquette \u{2014} ma\u{00EF}eul dot net\n\\generateFile{pkg.sty}{t}{\\from{pkg.dtx}{package}}\n"; let out = parse("pkg.dtx", src); assert_eq!(out, vec!["pkg.sty"]); } #[test] fn test_unicode_inside_brace_group() { let src = "\\generateFile{r\u{00E9}sum\u{00E9}.sty}{t}{\\from{r\u{00E9}sum\u{00E9}.dtx}{pkg}}"; let out = parse("r\u{00E9}sum\u{00E9}.dtx", src); assert_eq!(out, vec!["r\u{00E9}sum\u{00E9}.sty"]); } #[test] fn test_filecontents_star() { let src = "\\begin{filecontents*}{\\jobname.bib}\n@article{x}{}\n\\end{filecontents*}"; let out = parse("mypaper.dtx", src); assert_eq!(out, vec!["mypaper.bib"]); } #[test] fn test_filecontents_no_star() { let src = "\\begin{filecontents}{readme.txt}\nSome content\n\\end{filecontents}"; let out = parse("pkg.dtx", src); assert_eq!(out, vec!["readme.txt"]); } #[test] fn test_filecontents_with_options() { // LaTeX 2019+ optional [options] before filename let src = "\\begin{filecontents*}[overwrite]{data.csv}\na,b,c\n\\end{filecontents*}"; let out = parse("pkg.dtx", src); assert_eq!(out, vec!["data.csv"]); } #[test] fn test_filecontents_in_comment_not_matched() { // \begin{filecontents} in a comment must not produce a result let src = "% \\begin{filecontents}{ignored.txt}\n\\generateFile{real.sty}{t}{\\from{x.dtx}{s}}"; let out = parse("x.dtx", src); assert_eq!(out, vec!["real.sty"]); } #[test] fn test_file_chained_in_spec_via_comment() { // Real-world pattern: \file spec is left unclosed by trailing %, // chaining further \file calls inside it. let src = r#"\generate{% \usepreamble\package\file{a.cls}{\from{pkg.dtx}{cls}% \nopreamble\nopostamble\file{build.sh}{\from{pkg.dtx}{build}}% \nopreamble\nopostamble\file{clean.sh}{\from{pkg.dtx}{clean}}% \file{b.sty}{\from{pkg.dtx}{sty}} }"#; let out = parse("pkg.dtx", src); assert_eq!(out, vec!["a.cls", "build.sh", "clean.sh", "b.sty"]); } #[test] fn test_filecontents_body_not_parsed() { // Commands inside the filecontents body must not be treated as // real \generate statements. let src = r#" \begin{filecontents*}{mymacros.sty} \def\Extract#1#2#3{\generate{\file{#1.#2}{\from{\jobname.dtx}{#3}}}} \Extract{fake}{tex}{fake} \end{filecontents*} \generateFile{real.sty}{t}{\from{pkg.dtx}{package}} "#; let out = parse("pkg.dtx", src); assert_eq!(out, vec!["mymacros.sty", "real.sty"]); } #[test] fn test_filecontents_no_star_body_skipped() { let src = r#" \begin{filecontents}{helper.tex} \generateFile{fake.sty}{t}{\from{x.dtx}{s}} \end{filecontents} \generateFile{real.sty}{t}{\from{pkg.dtx}{s}} "#; let out = parse("pkg.dtx", src); assert_eq!(out, vec!["helper.tex", "real.sty"]); } }