From bab45fc1ebfbce2b4993449b09e025bd901de519 Mon Sep 17 00:00:00 2001 From: rm-dr <96270320+rm-dr@users.noreply.github.com> Date: Thu, 19 Sep 2024 07:51:38 -0700 Subject: [PATCH] Added `bundle create` --- Cargo.lock | 137 +++- Cargo.toml | 46 +- .../tectonic/v2cli/commands/bundle/actions.rs | 140 ++++ .../tectonic/v2cli/commands/bundle/create.rs | 118 ++++ .../commands/{bundle.rs => bundle/mod.rs} | 18 +- .../v2cli/commands/bundle/pack/bundlev1.rs | 218 +++++++ .../v2cli/commands/bundle/pack/mod.rs | 1 + .../v2cli/commands/bundle/select/input/dir.rs | 56 ++ .../v2cli/commands/bundle/select/input/mod.rs | 42 ++ .../v2cli/commands/bundle/select/input/tar.rs | 77 +++ .../v2cli/commands/bundle/select/mod.rs | 3 + .../v2cli/commands/bundle/select/picker.rs | 602 ++++++++++++++++++ .../v2cli/commands/bundle/select/spec.rs | 72 +++ src/bin/tectonic/v2cli/mod.rs | 8 + 14 files changed, 1522 insertions(+), 16 deletions(-) create mode 100644 src/bin/tectonic/v2cli/commands/bundle/actions.rs create mode 100644 src/bin/tectonic/v2cli/commands/bundle/create.rs rename src/bin/tectonic/v2cli/commands/{bundle.rs => bundle/mod.rs} (91%) create mode 100644 src/bin/tectonic/v2cli/commands/bundle/pack/bundlev1.rs create mode 100644 src/bin/tectonic/v2cli/commands/bundle/pack/mod.rs create mode 100644 src/bin/tectonic/v2cli/commands/bundle/select/input/dir.rs create mode 100644 src/bin/tectonic/v2cli/commands/bundle/select/input/mod.rs create mode 100644 src/bin/tectonic/v2cli/commands/bundle/select/input/tar.rs create mode 100644 src/bin/tectonic/v2cli/commands/bundle/select/mod.rs create mode 100755 src/bin/tectonic/v2cli/commands/bundle/select/picker.rs create mode 100644 src/bin/tectonic/v2cli/commands/bundle/select/spec.rs diff --git a/Cargo.lock b/Cargo.lock index 621a2df7..348ae750 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -252,7 +252,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "05efc5cfd9110c8416e471df0e96702d58690178e206e61b7173706673c93706" dependencies = [ "memchr", - "regex-automata", + "regex-automata 0.4.6", "serde", ] @@ -1075,8 +1075,8 @@ dependencies = [ "aho-corasick", "bstr", "log", - "regex-automata", - "regex-syntax", + "regex-automata 0.4.6", + "regex-syntax 0.8.2", ] [[package]] @@ -1390,7 +1390,7 @@ dependencies = [ "globset", "log", "memchr", - "regex-automata", + "regex-automata 0.4.6", "same-file", "walkdir", "winapi-util", @@ -1597,6 +1597,15 @@ version = "0.4.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "90ed8c1e510134f979dbc4f070f87d4313098b704861a105fe34231c70a3901c" +[[package]] +name = "matchers" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8263075bb86c5a1b1427b5ae862e8889656f126e9f77c484496e8b47cf5c5558" +dependencies = [ + "regex-automata 0.1.10", +] + [[package]] name = "md-5" version = "0.10.6" @@ -1757,6 +1766,16 @@ dependencies = [ "windows-sys 0.48.0", ] +[[package]] +name = "nu-ansi-term" +version = "0.46.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77a8165726e8236064dbb45459242600304b42a5ea24ee2948e18e023bf7ba84" +dependencies = [ + "overload", + "winapi", +] + [[package]] name = "num-conv" version = "0.1.0" @@ -1871,6 +1890,12 @@ dependencies = [ "vcpkg", ] +[[package]] +name = "overload" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b15813163c1d831bf4a13c3610c05c0d03b39feb07f7e09fa234dac9b15aaf39" + [[package]] name = "parking_lot" version = "0.12.1" @@ -2193,8 +2218,17 @@ checksum = "b62dbe01f0b06f9d8dc7d49e05a0785f153b00b2c227856282f671e0318c9b15" dependencies = [ "aho-corasick", "memchr", - "regex-automata", - "regex-syntax", + "regex-automata 0.4.6", + "regex-syntax 0.8.2", +] + +[[package]] +name = "regex-automata" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132" +dependencies = [ + "regex-syntax 0.6.29", ] [[package]] @@ -2205,9 +2239,15 @@ checksum = "86b83b8b9847f9bf95ef68afb0b8e6cdb80f498442f5179a29fad448fcc1eaea" dependencies = [ "aho-corasick", "memchr", - "regex-syntax", + "regex-syntax 0.8.2", ] +[[package]] +name = "regex-syntax" +version = "0.6.29" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1" + [[package]] name = "regex-syntax" version = "0.8.2" @@ -2475,6 +2515,15 @@ dependencies = [ "digest", ] +[[package]] +name = "sharded-slab" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f40ca3c46823713e0d4209592e8d6e826aa57e928f09752619fc696c499637f6" +dependencies = [ + "lazy_static", +] + [[package]] name = "signal-hook-registry" version = "1.4.1" @@ -2604,10 +2653,22 @@ version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "55937e1799185b12863d447f42597ed69d9928686b8d88a1df17376a097d8369" +[[package]] +name = "tar" +version = "0.4.41" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cb797dad5fb5b76fcf519e702f4a589483b5ef06567f160c392832c1f5e44909" +dependencies = [ + "filetime", + "libc", + "xattr", +] + [[package]] name = "tectonic" version = "0.0.0-dev.0" dependencies = [ + "anyhow", "byte-unit", "cfg-if", "clap", @@ -2626,8 +2687,10 @@ dependencies = [ "md-5", "open", "quick-xml", + "regex", "serde", "sha2", + "tar", "tectonic_bridge_core", "tectonic_bundles", "tectonic_docmodel", @@ -2646,7 +2709,10 @@ dependencies = [ "time", "tokio", "toml", + "tracing", + "tracing-subscriber", "url", + "walkdir", "watchexec", "watchexec-filterer-globset", "watchexec-signals", @@ -2943,6 +3009,16 @@ dependencies = [ "syn 2.0.52", ] +[[package]] +name = "thread_local" +version = "1.1.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b9ef9bad013ada3808854ceac7b46812a6465ba368859a37e2100283d2d719c" +dependencies = [ + "cfg-if", + "once_cell", +] + [[package]] name = "time" version = "0.3.36" @@ -3136,6 +3212,36 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c06d3da6113f116aaee68e4d601191614c9053067f9ab7f6edbcb161237daa54" dependencies = [ "once_cell", + "valuable", +] + +[[package]] +name = "tracing-log" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ee855f1f400bd0e5c02d150ae5de3840039a3f54b025156404e34c23c03f47c3" +dependencies = [ + "log", + "once_cell", + "tracing-core", +] + +[[package]] +name = "tracing-subscriber" +version = "0.3.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ad0f048c97dbd9faa9b7df56362b8ebcaa52adb06b498c050d2f4e32f90a7a8b" +dependencies = [ + "matchers", + "nu-ansi-term", + "once_cell", + "regex", + "sharded-slab", + "smallvec", + "thread_local", + "tracing", + "tracing-core", + "tracing-log", ] [[package]] @@ -3268,6 +3374,12 @@ version = "1.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f00cc9702ca12d3c81455259621e676d0f7251cec66a21e98fe2e9a37db93b2a" +[[package]] +name = "valuable" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "830b7e5d4d90034032940e4ace0d9a9a057e7a45cd94e6c007832e39edb82f6d" + [[package]] name = "vcpkg" version = "0.2.15" @@ -3749,6 +3861,17 @@ dependencies = [ "tap", ] +[[package]] +name = "xattr" +version = "1.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8da84f1a25939b27f6820d92aed108f83ff920fdf11a7b19366c27c4cda81d4f" +dependencies = [ + "libc", + "linux-raw-sys", + "rustix", +] + [[package]] name = "xdg" version = "2.5.2" diff --git a/Cargo.toml b/Cargo.toml index 33b796f0..c60b4298 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -16,7 +16,13 @@ documentation = "https://docs.rs/tectonic" repository = "https://github.com/tectonic-typesetting/tectonic/" readme = "CARGO_README.md" keywords = ["tex", "latex", "typesetting", "font"] -categories = ["command-line-interface", "parser-implementations", "rendering", "science", "text-processing"] +categories = [ + "command-line-interface", + "parser-implementations", + "rendering", + "science", + "text-processing", +] license = "MIT" edition = "2018" exclude = ["/dist/", "/reference_sources/"] @@ -96,6 +102,12 @@ watchexec-supervisor = "1.0" zip = { version = "^0.6", default-features = false, features = ["deflate"] } time = "0.3.36" clap_complete = "4.5.1" +walkdir = "2" +regex = "1.10.2" +anyhow = "1.0.80" +tar = "0.4.40" +tracing = "0.1" +tracing-subscriber = { version = "0.3", features = ["env-filter"] } [features] default = ["geturl-reqwest", "serialization"] @@ -113,7 +125,10 @@ external-harfbuzz = ["tectonic_engine_xetex/external-harfbuzz"] geturl-curl = ["tectonic_bundles/geturl-curl", "tectonic_geturl/curl"] geturl-reqwest = ["tectonic_bundles/geturl-reqwest", "tectonic_geturl/reqwest"] -native-tls-vendored = ["tectonic_bundles/native-tls-vendored", "tectonic_geturl/native-tls-vendored"] +native-tls-vendored = [ + "tectonic_bundles/native-tls-vendored", + "tectonic_geturl/native-tls-vendored", +] # developer feature to compile with the necessary flags for profiling tectonic. profile = [] @@ -124,7 +139,12 @@ futures = "0.3" headers = "0.4" http-body-util = "0.1.0" hyper = { version = "1.0.0", features = ["server", "http1", "http2"] } -hyper-util = { version = "0.1", features = ["server", "http1", "http2", "tokio"] } +hyper-util = { version = "0.1", features = [ + "server", + "http1", + "http2", + "tokio", +] } tempfile = "^3.1" [package.metadata.vcpkg] @@ -137,9 +157,23 @@ overlay-triplets-path = "dist/vcpkg-triplets" # guidance if they might need to set $VCPKGRS_TRIPLET. [package.metadata.vcpkg.target] x86_64-apple-darwin = { install = ["freetype", "harfbuzz[graphite2]", "icu"] } -aarch64-apple-darwin = { triplet = "arm64-osx", install = ["freetype", "harfbuzz[graphite2]", "icu"] } -x86_64-unknown-linux-gnu = { install = ["fontconfig", "freetype", "harfbuzz[graphite2]", "icu"] } -x86_64-pc-windows-msvc = { triplet = "x64-windows-static-release", install = ["fontconfig", "freetype", "harfbuzz[graphite2]", "icu"] } +aarch64-apple-darwin = { triplet = "arm64-osx", install = [ + "freetype", + "harfbuzz[graphite2]", + "icu", +] } +x86_64-unknown-linux-gnu = { install = [ + "fontconfig", + "freetype", + "harfbuzz[graphite2]", + "icu", +] } +x86_64-pc-windows-msvc = { triplet = "x64-windows-static-release", install = [ + "fontconfig", + "freetype", + "harfbuzz[graphite2]", + "icu", +] } [package.metadata.internal_dep_versions] tectonic_bridge_core = "thiscommit:2023-06-11:PvhF7YB" diff --git a/src/bin/tectonic/v2cli/commands/bundle/actions.rs b/src/bin/tectonic/v2cli/commands/bundle/actions.rs new file mode 100644 index 00000000..3b2b7c19 --- /dev/null +++ b/src/bin/tectonic/v2cli/commands/bundle/actions.rs @@ -0,0 +1,140 @@ +use super::{ + create::{BundleCreateCommand, BundleFormat}, + pack::bundlev1::BundleV1, + select::{picker::FilePicker, spec::BundleSpec}, +}; +use anyhow::{Context, Result}; +use std::{ + cmp::Ordering, + fs::{self, File}, + io::Read, + thread, + time::Duration, +}; +use tracing::{error, info, warn}; + +pub(super) fn select(cli: &BundleCreateCommand) -> Result<()> { + let bundle_dir = cli + .bundle_spec + .canonicalize() + .unwrap() + .parent() + .unwrap() + .to_path_buf(); + + let mut file = File::open(&cli.bundle_spec)?; + let mut file_str = String::new(); + file.read_to_string(&mut file_str)?; + let bundle_config: BundleSpec = match toml::from_str(&file_str) { + Ok(x) => x, + Err(e) => { + error!("failed to load bundle specification",); + return Err(e.into()); + } + }; + + if let Err(e) = bundle_config.validate() { + error!("failed to validate bundle specification"); + return Err(e); + }; + + // Remove build dir if it exists + if cli.build_dir.exists() { + warn!( + "build dir {} aleady exists", + cli.build_dir.to_str().unwrap() + ); + + for i in (1..=5).rev() { + warn!( + "recursively removing {} in {i} second{}", + cli.build_dir.to_str().unwrap(), + if i != 1 { "s" } else { "" } + ); + thread::sleep(Duration::from_secs(1)); + } + thread::sleep(Duration::from_secs(2)); + + fs::remove_dir_all(&cli.build_dir)?; + } + fs::create_dir_all(&cli.build_dir).context("while creating build dir")?; + + let mut picker = FilePicker::new( + bundle_config.clone(), + cli.build_dir.clone(), + bundle_dir.clone(), + )?; + + // Run selector + let sources: Vec = picker.iter_sources().map(|x| x.to_string()).collect(); + for source in sources { + picker.add_source(cli, &source)?; + } + picker.finish(true)?; + + // Print statistics + info!("summary is below:\n{}", picker.stats.make_string()); + + match picker.stats.compare_patch_found_applied() { + Ordering::Equal => {} + Ordering::Greater => { + warn!("some patches were not applied"); + } + Ordering::Less => { + warn!("some patches applied multiple times"); + } + } + + // Check output hash + { + let mut file = File::open(cli.build_dir.join("content/SHA256SUM"))?; + let mut hash = String::new(); + file.read_to_string(&mut hash)?; + let hash = hash.trim(); + if hash != bundle_config.bundle.expected_hash { + warn!("final bundle hash doesn't match bundle configuration:"); + warn!("bundle hash is {hash}"); + warn!("config hash is {}", bundle_config.bundle.expected_hash); + } else { + info!("final bundle hash matches configuration"); + info!("hash is {hash}"); + } + } + + Ok(()) +} + +pub(super) fn pack(cli: &BundleCreateCommand) -> Result<()> { + let mut file = File::open(&cli.bundle_spec)?; + let mut file_str = String::new(); + file.read_to_string(&mut file_str)?; + let bundle_config: BundleSpec = toml::from_str(&file_str)?; + + if !cli.build_dir.join("content").is_dir() { + error!( + "content directory `{}/content` doesn't exist, can't continue", + cli.build_dir.to_str().unwrap() + ); + return Ok(()); + } + + let target_name = format!("{}.ttb", &bundle_config.bundle.name); + let target = cli.build_dir.join(&target_name); + if target.exists() { + if target.is_file() { + warn!("target bundle `{target_name}` exists, removing"); + fs::remove_file(&target)?; + } else { + error!("target bundle `{target_name}` exists and isn't a file, can't continue"); + return Ok(()); + } + } + + match cli.format { + BundleFormat::BundleV1 => { + BundleV1::make(Box::new(File::create(target)?), cli.build_dir.clone())? + } + } + + Ok(()) +} diff --git a/src/bin/tectonic/v2cli/commands/bundle/create.rs b/src/bin/tectonic/v2cli/commands/bundle/create.rs new file mode 100644 index 00000000..76b1c56c --- /dev/null +++ b/src/bin/tectonic/v2cli/commands/bundle/create.rs @@ -0,0 +1,118 @@ +use clap::{Parser, ValueEnum}; +use std::{fmt::Display, path::PathBuf}; +use tectonic::{config::PersistentConfig, Result}; +use tectonic_status_base::StatusBackend; +use tracing::error; + +use crate::v2cli::{CommandCustomizations, TectonicCommand}; + +// +// MARK: Cli arguments +// + +#[derive(Debug, Copy, Clone, ValueEnum)] +pub enum BundleJob { + /// Run the following jobs in order + #[value(name = "all")] + All, + + /// (Stage 1) Select and patch all files in this bundle + #[value(name = "select")] + Select, + + /// (Stage 2) Pack selected files into a bundle + #[value(name = "pack")] + Pack, +} + +impl Display for BundleJob { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::All => write!(f, "all"), + Self::Select => write!(f, "select"), + Self::Pack => write!(f, "pack"), + } + } +} + +impl BundleJob { + pub fn do_select(&self) -> bool { + matches!(self, Self::All | Self::Select) + } + + pub fn do_pack(&self) -> bool { + matches!(self, Self::All | Self::Pack) + } +} + +#[derive(Parser, Debug)] +pub struct BundleCreateCommand { + /// Which job we should run. `all` is default, + /// but single jobs can be run on their own for debugging. + #[arg(long, default_value_t = BundleJob::All)] + pub job: BundleJob, + + /// Bundle specification TOML file. + pub bundle_spec: PathBuf, + + /// Build directory for this bundle. + /// Will be removed. + #[arg(short, long)] + pub build_dir: PathBuf, + + /// What kind of bundle should we produce? + /// This only has an effect when running jobs `all` or `pack` + #[arg(default_value_t = BundleFormat::BundleV1)] + pub format: BundleFormat, + + /// If this flag is set, don't fail when an input's hash doesn't match + /// the hash specified in the bundle's configuration file. + /// This only has an effect when running jobs `all` or `select` + #[arg(long, default_value_t = false)] + pub allow_hash_mismatch: bool, +} + +#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, ValueEnum)] +pub enum BundleFormat { + #[value(name = "v1")] + BundleV1, +} + +impl Display for BundleFormat { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::BundleV1 => write!(f, "v1")?, + } + Ok(()) + } +} + +impl TectonicCommand for BundleCreateCommand { + fn customize(&self, cc: &mut CommandCustomizations) { + cc.always_stderr = true; + } + + fn execute(self, _config: PersistentConfig, _status: &mut dyn StatusBackend) -> Result { + if self.job.do_select() { + match super::actions::select(&self) { + Ok(_) => {} + Err(e) => { + error!("select job failed with error: {e}"); + return Err(e.into()); + } + }; + } + + if self.job.do_pack() { + match super::actions::pack(&self) { + Ok(_) => {} + Err(e) => { + error!("bundle packer failed with error: {e}"); + return Err(e.into()); + } + }; + } + + Ok(0) + } +} diff --git a/src/bin/tectonic/v2cli/commands/bundle.rs b/src/bin/tectonic/v2cli/commands/bundle/mod.rs similarity index 91% rename from src/bin/tectonic/v2cli/commands/bundle.rs rename to src/bin/tectonic/v2cli/commands/bundle/mod.rs index f5b1900f..7549af63 100644 --- a/src/bin/tectonic/v2cli/commands/bundle.rs +++ b/src/bin/tectonic/v2cli/commands/bundle/mod.rs @@ -1,4 +1,5 @@ use clap::{Parser, Subcommand}; +use create::BundleCreateCommand; use tectonic::{ config::PersistentConfig, docmodel::{DocumentExt, DocumentSetupOptions}, @@ -11,6 +12,11 @@ use tectonic_status_base::StatusBackend; use crate::v2cli::{CommandCustomizations, TectonicCommand}; +mod actions; +mod create; +mod pack; +mod select; + fn get_a_bundle( _config: PersistentConfig, only_cached: bool, @@ -45,13 +51,13 @@ fn get_a_bundle( } /// `bundle`: Commands relating to Tectonic bundles -#[derive(Debug, Eq, PartialEq, Parser)] +#[derive(Debug, Parser)] pub struct BundleCommand { #[command(subcommand)] command: BundleCommands, } -#[derive(Debug, Eq, PartialEq, Subcommand)] +#[derive(Debug, Subcommand)] enum BundleCommands { #[command(name = "cat")] /// Dump the contents of a file in the bundle @@ -60,6 +66,10 @@ enum BundleCommands { #[command(name = "search")] /// Filter the list of filenames contained in the bundle Search(BundleSearchCommand), + + #[command(name = "create")] + /// Create a new bundle + Create(BundleCreateCommand), } impl TectonicCommand for BundleCommand { @@ -67,6 +77,7 @@ impl TectonicCommand for BundleCommand { match &self.command { BundleCommands::Cat(c) => c.customize(cc), BundleCommands::Search(c) => c.customize(cc), + BundleCommands::Create(c) => c.customize(cc), } } @@ -74,11 +85,12 @@ impl TectonicCommand for BundleCommand { match self.command { BundleCommands::Cat(c) => c.execute(config, status), BundleCommands::Search(c) => c.execute(config, status), + BundleCommands::Create(c) => c.execute(config, status), } } } -#[derive(Debug, Eq, PartialEq, Parser)] +#[derive(Debug, Parser)] struct BundleCatCommand { /// Use only resource files cached locally #[arg(short = 'C', long)] diff --git a/src/bin/tectonic/v2cli/commands/bundle/pack/bundlev1.rs b/src/bin/tectonic/v2cli/commands/bundle/pack/bundlev1.rs new file mode 100644 index 00000000..26c4a7f0 --- /dev/null +++ b/src/bin/tectonic/v2cli/commands/bundle/pack/bundlev1.rs @@ -0,0 +1,218 @@ +use anyhow::{bail, Result}; +use flate2::{write::GzEncoder, Compression}; +use std::{ + fmt::Display, + fs::{self, File}, + io::{stdout, BufRead, BufReader, Read, Seek, Write}, + num::ParseIntError, + path::PathBuf, +}; +use tracing::info; + +pub trait WriteSeek: std::io::Write + Seek {} +impl WriteSeek for T {} + +pub fn decode_hex(s: &str) -> Result, ParseIntError> { + (0..s.len()) + .step_by(2) + .map(|i| u8::from_str_radix(&s[i..i + 2], 16)) + .collect() +} + +// Size of ttbv1 header +const HEADER_SIZE: u64 = 66u64; + +#[derive(Debug)] +struct FileListEntry { + path: PathBuf, + hash: String, + start: u64, + + // We need the compressed length to build + // a range request for this bundle. We also + // keep the real length around for performance + // (we'll only need to allocate vectors once) + real_len: u32, + gzip_len: u32, +} + +impl Display for FileListEntry { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + format!( + "{} {} {} {} {}", + self.start, + self.gzip_len, + self.real_len, + self.hash, + self.path.to_str().unwrap() + ) + .fmt(f) + } +} + +pub struct BundleV1 { + filelist: Vec, + target: Box, + content_dir: PathBuf, + + index_start: u64, + index_real_len: u32, + index_gzip_len: u32, +} + +impl BundleV1 { + pub fn make(target: Box, build_dir: PathBuf) -> Result<()> { + let mut bundle = BundleV1::new(target, build_dir)?; + + bundle.add_files()?; + bundle.write_index()?; + bundle.write_header()?; + + Ok(()) + } + + fn new(target: Box, build_dir: PathBuf) -> Result { + Ok(BundleV1 { + filelist: Vec::new(), + target, + content_dir: build_dir.join("content"), + index_start: 0, + index_gzip_len: 0, + index_real_len: 0, + }) + } + + fn add_files(&mut self) -> Result { + let mut byte_count = HEADER_SIZE; // Start after header + let mut real_len_sum = 0; // Compute average compression ratio + + self.target.seek(std::io::SeekFrom::Start(byte_count))?; + + let filelist_file = File::open(self.content_dir.join("FILELIST"))?; + let reader = BufReader::new(filelist_file); + + info!("Building ttbv1 bundle..."); + + for line in reader.lines() { + stdout().flush()?; + + let line = line?; + let mut bits = line.split_whitespace(); + + if let Some(hash) = bits.next() { + let path = bits.collect::>().join(" "); + + let mut file = fs::File::open(self.content_dir.join(&path))?; + + // Compress and write bytes + let mut encoder = GzEncoder::new(Vec::new(), Compression::default()); + let real_len = std::io::copy(&mut file, &mut encoder)?; + let gzip_len = self.target.write(&encoder.finish()?)?; + assert!(real_len < u32::MAX as u64); + assert!(gzip_len < u32::MAX as usize); + + // Add to index + self.filelist.push(FileListEntry { + start: byte_count, + gzip_len: gzip_len as u32, + real_len: real_len as u32, + path: PathBuf::from(path), + hash: hash.to_owned(), + }); + byte_count += gzip_len as u64; + real_len_sum += real_len; + } else { + bail!("malformed filelist line"); + } + } + + info!( + "Average compression ratio: {:.2}", + real_len_sum as f64 / byte_count as f64 + ); + + Ok(byte_count) + } + + fn write_index(&mut self) -> Result<()> { + // Generate a ttbv1 index and write it to the bundle. + // + // This index is a replacement for FILELIST and SEARCH, containing everything in those files + // (in addition to some ttbv1-specific information) + // + // The original FILELIST and SEARCH files are still included in the bundle. + + // Get current position + self.index_start = self.target.stream_position()?; + + info!("Writing index"); + + let mut encoder = GzEncoder::new(Vec::new(), Compression::default()); + let mut real_len = 0usize; + + real_len += encoder.write("[DEFAULTSEARCH]\n".as_bytes())?; + real_len += encoder.write("MAIN\n".as_bytes())?; + + real_len += encoder.write("[SEARCH:MAIN]\n".as_bytes())?; + for l in fs::read_to_string(self.content_dir.join("SEARCH"))?.lines() { + real_len += encoder.write(l.as_bytes())?; + real_len += encoder.write(b"\n")?; + } + + real_len += encoder.write("[FILELIST]\n".as_bytes())?; + for i in &self.filelist { + let s = format!("{i}\n"); + real_len += encoder.write(s.as_bytes())?; + } + let gzip_len = self.target.write(&encoder.finish()?)?; + assert!(gzip_len < u32::MAX as usize); + assert!(real_len < u32::MAX as usize); + self.index_gzip_len = gzip_len as u32; + self.index_real_len = real_len as u32; + + info!( + "index is at {} and has length {}", + self.index_start, self.index_gzip_len + ); + + Ok(()) + } + + fn write_header(&mut self) -> Result { + self.target.seek(std::io::SeekFrom::Start(0))?; + + info!("Writing header"); + + // Parse bundle hash + let mut hash_file = File::open(self.content_dir.join("SHA256SUM")).unwrap(); + let mut hash_text = String::new(); + hash_file.read_to_string(&mut hash_text)?; + let digest = decode_hex(hash_text.trim())?; + + let mut byte_count = 0u64; + + // 14 bytes: signature + // Always "tectonicbundle", in any bundle version. + // + // This "magic sequence" lets us more easily distinguish between + // random binary files and proper tectonic bundles. + byte_count += self.target.write(b"tectonicbundle")? as u64; + + // 4 bytes: bundle version + byte_count += self.target.write(&1u32.to_le_bytes())? as u64; + + // 8 + 4 + 4 = 12 bytes: location and real length of index + byte_count += self.target.write(&self.index_start.to_le_bytes())? as u64; + byte_count += self.target.write(&self.index_gzip_len.to_le_bytes())? as u64; + byte_count += self.target.write(&self.index_real_len.to_le_bytes())? as u64; + + // 32 bytes: bundle hash + // We include this in the header so we don't need to load the index to get the hash. + byte_count += self.target.write(&digest)? as u64; + + // Make sure we wrote the expected number of bytes + assert!(byte_count == HEADER_SIZE); + + Ok(byte_count) + } +} diff --git a/src/bin/tectonic/v2cli/commands/bundle/pack/mod.rs b/src/bin/tectonic/v2cli/commands/bundle/pack/mod.rs new file mode 100644 index 00000000..fd3fe2ce --- /dev/null +++ b/src/bin/tectonic/v2cli/commands/bundle/pack/mod.rs @@ -0,0 +1 @@ +pub mod bundlev1; diff --git a/src/bin/tectonic/v2cli/commands/bundle/select/input/dir.rs b/src/bin/tectonic/v2cli/commands/bundle/select/input/dir.rs new file mode 100644 index 00000000..a1c7b704 --- /dev/null +++ b/src/bin/tectonic/v2cli/commands/bundle/select/input/dir.rs @@ -0,0 +1,56 @@ +use super::BundleInput; +use anyhow::Result; +use std::{ + fs::{self}, + io::Read, + path::PathBuf, +}; +use walkdir::WalkDir; + +pub struct DirBundleInput { + dir: PathBuf, +} + +impl DirBundleInput { + pub fn new(dir: PathBuf) -> Self { + Self { + dir: dir.canonicalize().unwrap(), + } + } +} + +impl BundleInput for DirBundleInput { + fn iter_files(&mut self) -> impl Iterator)>> { + WalkDir::new(&self.dir) + .into_iter() + .filter_map(|x| match x { + Err(_) => Some(x), + Ok(x) => { + if !x.file_type().is_file() { + None + } else { + Some(Ok(x)) + } + } + }) + .map(move |x| match x { + Ok(x) => { + let path = x + .into_path() + .canonicalize() + .unwrap() + .strip_prefix(&self.dir) + .unwrap() + .to_str() + .unwrap() + .to_string(); + + Ok(( + path.clone(), + Box::new(fs::File::open(self.dir.join(path))?) as Box, + )) + } + Err(e) => Err(anyhow::Error::from(e)), + }) + } +} diff --git a/src/bin/tectonic/v2cli/commands/bundle/select/input/mod.rs b/src/bin/tectonic/v2cli/commands/bundle/select/input/mod.rs new file mode 100644 index 00000000..02106df3 --- /dev/null +++ b/src/bin/tectonic/v2cli/commands/bundle/select/input/mod.rs @@ -0,0 +1,42 @@ +mod dir; +mod tar; + +use anyhow::Result; +use std::{io::Read, path::PathBuf}; + +trait BundleInput { + #[allow(clippy::type_complexity)] + fn iter_files(&mut self) -> impl Iterator)>>; +} + +pub enum Input { + Directory(dir::DirBundleInput), + Tarball(tar::TarBundleInput), +} + +impl<'a> Input { + pub fn new_dir(path: PathBuf) -> Self { + Self::Directory(dir::DirBundleInput::new(path)) + } + + pub fn new_tarball(path: PathBuf, root: Option) -> Result { + Ok(Self::Tarball(tar::TarBundleInput::new(path, root)?)) + } + + #[allow(clippy::type_complexity)] + pub fn iter_files( + &'a mut self, + ) -> Box)>> + 'a> { + match self { + Self::Directory(x) => Box::new(x.iter_files()), + Self::Tarball(x) => Box::new(x.iter_files()), + } + } + + pub fn hash(&self) -> Option<&str> { + match self { + Self::Directory(_) => None, + Self::Tarball(x) => Some(x.hash()), + } + } +} diff --git a/src/bin/tectonic/v2cli/commands/bundle/select/input/tar.rs b/src/bin/tectonic/v2cli/commands/bundle/select/input/tar.rs new file mode 100644 index 00000000..38f88922 --- /dev/null +++ b/src/bin/tectonic/v2cli/commands/bundle/select/input/tar.rs @@ -0,0 +1,77 @@ +use anyhow::Result; +use sha2::{Digest, Sha256}; +use std::{ + fs::File, + io::{Read, Seek}, + path::PathBuf, +}; +use tar::Archive; +use tracing::info; + +use super::BundleInput; + +pub struct TarBundleInput { + archive: Archive, + root: PathBuf, + hash: String, +} + +impl TarBundleInput { + pub fn new(path: PathBuf, root: Option) -> Result { + let path = path.canonicalize()?; + let mut file = File::open(&path)?; + + info!("computing hash of {}", path.to_str().unwrap()); + + let hash = { + let mut hasher = Sha256::new(); + let _ = std::io::copy(&mut file, &mut hasher)?; + hasher + .finalize() + .iter() + .map(|b| format!("{b:02x}")) + .collect::>() + .concat() + }; + + file.seek(std::io::SeekFrom::Start(0))?; + Ok(Self { + archive: Archive::new(file), + root: root.unwrap_or(PathBuf::from("")), + hash, + }) + } + + pub fn hash(&self) -> &str { + &self.hash + } +} + +impl BundleInput for TarBundleInput { + fn iter_files(&mut self) -> impl Iterator)>> { + let root = self.root.clone(); + self.archive.entries().unwrap().filter_map(move |x| { + // TODO: error handling + let xr = x.as_ref().unwrap(); + + if !xr.header().entry_type().is_file() { + None + } else { + let path = xr.path().unwrap(); + + if !path.starts_with(&root) { + None + } else { + Some(Ok(( + path.strip_prefix(&root) + .unwrap() + .to_str() + .unwrap() + .to_string(), + Box::new(x.unwrap()) as Box, + ))) + } + } + }) + } +} diff --git a/src/bin/tectonic/v2cli/commands/bundle/select/mod.rs b/src/bin/tectonic/v2cli/commands/bundle/select/mod.rs new file mode 100644 index 00000000..e1c088fd --- /dev/null +++ b/src/bin/tectonic/v2cli/commands/bundle/select/mod.rs @@ -0,0 +1,3 @@ +pub mod input; +pub mod picker; +pub mod spec; diff --git a/src/bin/tectonic/v2cli/commands/bundle/select/picker.rs b/src/bin/tectonic/v2cli/commands/bundle/select/picker.rs new file mode 100755 index 00000000..0249cd73 --- /dev/null +++ b/src/bin/tectonic/v2cli/commands/bundle/select/picker.rs @@ -0,0 +1,602 @@ +use anyhow::{bail, Context, Result}; +use regex::Regex; +use sha2::{Digest, Sha256}; +use std::{ + cmp::Ordering, + collections::HashMap, + fmt::Display, + fs::{self, File}, + io::{self, Cursor, Read, Write}, + iter::FromIterator, + path::{Path, PathBuf}, + process::{Command, Stdio}, +}; +use tracing::{debug, error, info, trace, warn}; +use walkdir::WalkDir; + +use crate::v2cli::commands::bundle::create::BundleCreateCommand; + +use super::{ + input::Input, + spec::BundleSearchOrder, + spec::{BundleInputSource, BundleSpec}, +}; + +#[derive(Default)] +pub struct PickStatistics { + /// Total number of files added from each source + added: HashMap, + + /// Number of file conflicts + conflicts: usize, + + /// Total number of files ignored + ignored: usize, + + /// Total number of patches applied + patch_applied: usize, + + /// Total number of patches found + patch_found: usize, +} + +impl PickStatistics { + /// Returns a pretty status summary string + pub fn make_string(&self) -> String { + let mut output_string = format!( + concat!( + "=============== Summary ===============\n", + " file conflicts: {}\n", + " files ignored: {}\n", + " diffs applied/found: {}/{}\n", + " =============================\n", + ), + self.conflicts, self.ignored, self.patch_applied, self.patch_found, + ); + + let mut sum = 0; + for (source, count) in &self.added { + let s = format!("{source} files: "); + output_string.push_str(&format!(" {s}{}{count}\n", " ".repeat(22 - s.len()))); + sum += count; + } + output_string.push_str(&format!(" total files: {sum}\n\n")); + + output_string.push_str(&"=".repeat(39).to_string()); + output_string + } + + /// Did we find as many, fewer, or more patches than we applied? + pub fn compare_patch_found_applied(&self) -> Ordering { + self.patch_found.cmp(&self.patch_applied) + } +} + +struct FileListEntry { + /// Path relative to content dir (does not start with a slash) + path: PathBuf, + hash: Option, +} + +impl Display for FileListEntry { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + format!( + "{} {}", + match &self.hash { + Some(s) => &s, + None => "nohash", + }, + self.path.to_str().unwrap(), + ) + .fmt(f) + } +} + +pub struct FilePicker { + /// This bundle specification's root directory. + /// (i.e, where we found bundle.toml) + bundle_dir: PathBuf, + + /// Where to place this bundle's files + build_dir: PathBuf, + + /// This file picker's statistics + pub stats: PickStatistics, + + /// All files we've picked so far. + /// This map's keys are the `path` value of `FileListEntry`. + filelist: HashMap, + + bundle_spec: BundleSpec, +} + +impl FilePicker { + /// Transform a search order file with shortcuts + /// (bash-like brace expansion, like `/a/b/{tex,latex}/c`) + /// into a plain list of strings. + fn expand_search_line(s: &str) -> Result> { + if !(s.contains('{') || s.contains('}')) { + return Ok(vec![s.to_owned()]); + } + + let first = match s.find('{') { + Some(x) => x, + None => bail!("Bad search path format"), + }; + + let last = match s.find('}') { + Some(x) => x, + None => bail!("Bad search path format"), + }; + + let head = &s[..first]; + let mid = &s[first + 1..last]; + + if mid.contains('{') || mid.contains('}') { + // Mismatched or nested braces + bail!("Bad search path format"); + } + + // We find the first brace, so only tail may have other expansions. + let tail = Self::expand_search_line(&s[last + 1..s.len()])?; + + if mid.is_empty() { + bail!("Bad search path format"); + } + + let mut output: Vec = Vec::new(); + for m in mid.split(',') { + for t in &tail { + if m.is_empty() { + bail!("Bad search path format"); + } + output.push(format!("{}{}{}", head, m, t)); + } + } + + Ok(output) + } + + /// Patch a file in-place. + /// This should be done after calling `add_file`. + fn apply_patch( + &mut self, + path: &Path, + path_in_source: &Path, + diffs: &HashMap, + ) -> Result { + // Is this file patched? + if !diffs.contains_key(path_in_source) { + return Ok(false); + } + + info!("patching `{}`", path_in_source.to_str().unwrap()); + + self.stats.patch_applied += 1; + + // Discard first line of diff + let diff_file = fs::read_to_string(&diffs[path_in_source]).unwrap(); + let (_, diff) = diff_file.split_once('\n').unwrap(); + + // TODO: don't require `patch` + let mut child = Command::new("patch") + .arg("--quiet") + .arg("--no-backup") + .arg(path) + .stdin(Stdio::piped()) + .spawn() + .context("while spawning `patch`")?; + + let mut stdin = child.stdin.take().unwrap(); + stdin + .write_all(diff.as_bytes()) + .context("while passing diff to `patch`")?; + drop(stdin); + child.wait().context("while waiting for `patch`")?; + + Ok(true) + } + + /// Add a file into the file list. + fn add_to_filelist(&mut self, path: PathBuf, file: Option<&Path>) -> Result<()> { + trace!("adding `{path:?}` to file list"); + + self.filelist.insert( + path.clone(), + FileListEntry { + path: path.clone(), + hash: match file { + None => None, + Some(f) => { + let mut hasher = Sha256::new(); + let _ = std::io::copy( + &mut fs::File::open(f) + .with_context(|| format!("while computing hash of {path:?}"))?, + &mut hasher, + )?; + Some( + hasher + .finalize() + .iter() + .map(|b| format!("{b:02x}")) + .collect::>() + .concat(), + ) + } + }, + }, + ); + + Ok(()) + } + + /// Add a file to this picker's content directory + fn add_file( + &mut self, + path_in_source: &Path, + source: &str, + file_content: &mut dyn Read, + diffs: &HashMap, + ) -> Result<()> { + let target_path = self + .build_dir + .join("content") + .join(source) + .join(path_in_source); + + // Path to this file, relative to content dir + let rel = target_path + .strip_prefix(self.build_dir.join("content")) + .unwrap() + .to_path_buf(); + + trace!("adding {path_in_source:?} from source `{source}`"); + + // Skip files that already exist + if self.filelist.contains_key(&rel) { + self.stats.conflicts += 1; + warn!("{path_in_source:?} from source `{source}` already exists, skipping"); + return Ok(()); + } + + fs::create_dir_all(match target_path.parent() { + Some(x) => x, + None => bail!("couldn't get parent of target"), + }) + .context("failed to create content directory")?; + + // Copy to content dir. + let mut file = fs::File::create(&target_path)?; + io::copy(file_content, &mut file).with_context(|| { + format!("while writing file `{path_in_source:?}` from source `{source}`") + })?; + + // Apply patch if one exists + self.apply_patch(&target_path, path_in_source, diffs) + .with_context(|| { + format!("while patching `{path_in_source:?}` from source `{source}`") + })?; + + self.add_to_filelist(rel, Some(&target_path)) + .with_context(|| { + format!("while adding file `{path_in_source:?}` from source `{source}`") + })?; + + Ok(()) + } +} + +// Public methods +impl FilePicker { + /// Create a new file picker working in build_dir + pub fn new(bundle_spec: BundleSpec, build_dir: PathBuf, bundle_dir: PathBuf) -> Result { + if !build_dir.is_dir() { + bail!("build_dir is not a directory!") + } + + if build_dir.read_dir()?.next().is_some() { + bail!("build_dir is not empty!") + } + + Ok(FilePicker { + bundle_dir, + build_dir, + filelist: HashMap::new(), + bundle_spec, + stats: PickStatistics::default(), + }) + } + + /// Iterate over this bundle's sources + pub fn iter_sources(&self) -> impl Iterator { + self.bundle_spec.inputs.keys() + } + + /// Add a directory of files to this bundle under `source_name`, + /// applying patches and checking for replacements. + pub fn add_source(&mut self, cli: &BundleCreateCommand, source: &str) -> Result<()> { + info!("adding source `{source}`"); + + let input = self.bundle_spec.inputs.get(source).unwrap().clone(); + let mut added = 0usize; + + // Load diff files + let diffs = input + .patch_dir + .as_ref() + .map(|x| -> Result> { + let mut diffs = HashMap::new(); + + for entry in WalkDir::new(self.bundle_dir.join(x)) { + // Only iterate files + let entry = entry?; + if !entry.file_type().is_file() { + continue; + } + let entry = entry.into_path(); + + // Only include files with a `.diff extension` + if entry.extension().map(|x| x != "diff").unwrap_or(true) { + continue; + } + + // Read first line of diff to get target path + let diff_file = fs::read_to_string(&entry).unwrap(); + let (target, _) = diff_file.split_once('\n').unwrap(); + + trace!(tectonic_log_source = "select", "adding diff {entry:?}"); + + for t in Self::expand_search_line(target)? + .into_iter() + .map(PathBuf::from) + { + if diffs.contains_key(&t) { + warn!("the target of diff {entry:?} conflicts with another, ignoring"); + continue; + } + + diffs.insert(t, entry.clone()); + self.stats.patch_found += 1; + } + } + + Ok(diffs) + }) + .unwrap_or(Ok(HashMap::new()))?; + + // Load and compile ignore patterns + let ignore_patterns = { + // Global patterns + let mut ignore = self + .bundle_spec + .bundle + .ignore + .as_ref() + .map(|v| { + v.iter() + .map(|x| Regex::new(&format!("^{x}$"))) + .collect::, regex::Error>>() + }) + .unwrap_or(Ok(Vec::new()))?; + + // Input patterns + ignore.extend( + input + .ignore + .as_ref() + .map(|v| { + v.iter() + .map(|x| Regex::new(&format!("^/{source}/{x}$"))) + .collect::, regex::Error>>() + }) + .unwrap_or(Ok(Vec::new()))?, + ); + + ignore + }; + + let mut source_backend = match &input.source { + BundleInputSource::Directory { path, .. } => Input::new_dir(self.bundle_dir.join(path)), + BundleInputSource::Tarball { + path, + root_dir, + hash, + } => { + let x = match Input::new_tarball(self.bundle_dir.join(path), root_dir.clone()) { + Ok(x) => x, + Err(e) => { + error!("could not add source `{source}` from tarball"); + return Err(e); + } + }; + let hash = hash.clone(); + self.add_file( + Path::new("TAR-SHA256SUM"), + source, + &mut Cursor::new(format!("{}\n", x.hash().unwrap())), + &HashMap::new(), + )?; + + if x.hash().unwrap() != hash { + if cli.allow_hash_mismatch { + warn!("hash of tarball for source `{source}` doesn't match expected value"); + warn!("expected: {}", x.hash().unwrap()); + warn!("got: {}", hash); + } else { + error!( + "hash of tarball for source `{source}` doesn't match expected value" + ); + error!("expected: {}", x.hash().unwrap()); + error!("got: {}", hash); + bail!("hash of tarball for source `{source}` doesn't match expected value") + } + } + + info!("OK, tar hash matches bundle config"); + x + } + }; + + for x in source_backend.iter_files() { + let (rel_file_path, mut read) = x?; + + let ignore = { + let f = format!("/{source}/{}", rel_file_path); + let mut ignore = false; + for pattern in &ignore_patterns { + if pattern.is_match(&f) { + ignore = true; + break; + } + } + ignore + }; + + // Skip ignored files + if ignore { + debug!( + "skipping file {rel_file_path:?} from source `{source}` because of ignore patterns" + ); + self.stats.ignored += 1; + continue; + } + + // Debug info + if self.filelist.len() % 1937 == 1936 { + info!("selecting files ({source}, {})", self.filelist.len()); + } + + trace!("adding file {rel_file_path:?} from source `{source}`"); + + self.add_file(Path::new(&rel_file_path), source, &mut read, &diffs) + .with_context(|| format!("while adding file `{rel_file_path:?}`"))?; + added += 1; + } + + self.stats.added.insert(source.to_owned(), added); + + Ok(()) + } + + pub fn finish(&mut self, save_debug_files: bool) -> Result<()> { + info!("writing auxillary files"); + + // Save search specification + let search = { + let mut search = Vec::new(); + let path = self.build_dir.join("content/SEARCH"); + + for s in &self.bundle_spec.bundle.search_order { + match s { + BundleSearchOrder::Plain(s) => { + for i in Self::expand_search_line(s)? { + search.push(i); + } + } + BundleSearchOrder::Input { input } => { + let s = &self.bundle_spec.inputs.get(input).unwrap().search_order; + if let Some(s) = s { + for line in s { + for i in Self::expand_search_line(&format!("/{input}/{line}"))? { + search.push(i); + } + } + } else { + for i in Self::expand_search_line(&format!("/{input}//"))? { + search.push(i); + } + } + } + } + } + + let mut file = File::create(&path).context("while writing SEARCH")?; + for s in &search { + writeln!(file, "{s}")?; + } + + self.add_to_filelist(PathBuf::from("SEARCH"), Some(&path))?; + + search + }; + + { + // These aren't hashed, but must be listed anyway. + // The hash is generated from the filelist, so we must add these before hashing. + self.add_to_filelist(PathBuf::from("SHA256SUM"), None)?; + self.add_to_filelist(PathBuf::from("FILELIST"), None)?; + + let mut filelist_vec = Vec::from_iter(self.filelist.values()); + filelist_vec.sort_by(|a, b| a.path.cmp(&b.path)); + + let filelist_path = self.build_dir.join("content/FILELIST"); + + // Save FILELIST. + let mut file = File::create(&filelist_path).context("while writing FILELIST")?; + for entry in filelist_vec { + writeln!(file, "{entry}")?; + } + + // Compute and save hash + let mut file = File::create(self.build_dir.join("content/SHA256SUM")) + .context("while writing SHA256SUM")?; + + let mut hasher = Sha256::new(); + let _ = std::io::copy(&mut fs::File::open(&filelist_path)?, &mut hasher)?; + let hash = hasher + .finalize() + .iter() + .map(|b| format!("{b:02x}")) + .collect::>() + .concat(); + + writeln!(file, "{hash}")?; + } + + if save_debug_files { + // Generate search-report + { + let mut file = File::create(self.build_dir.join("search-report")) + .context("while writing search-report")?; + for entry in WalkDir::new(self.build_dir.join("content")) { + let entry = entry?; + if !entry.file_type().is_dir() { + continue; + } + let entry = entry + .into_path() + .strip_prefix(self.build_dir.join("content")) + .unwrap() + .to_owned(); + let entry = PathBuf::from("/").join(entry); + + // Will this directory be searched? + let mut is_searched = false; + for rule in &search { + if rule.ends_with("//") { + // Match start of patent path + // (cutting off the last slash from) + if entry.starts_with(&rule[0..rule.len() - 1]) { + is_searched = true; + break; + } + } else { + // Match full parent path + if entry.to_str().unwrap() == rule { + is_searched = true; + break; + } + } + } + + if !is_searched { + let s = entry.to_str().unwrap(); + let t = s.matches('/').count(); + writeln!(file, "{}{s}", "\t".repeat(t - 1))?; + } + } + } + } + Ok(()) + } +} diff --git a/src/bin/tectonic/v2cli/commands/bundle/select/spec.rs b/src/bin/tectonic/v2cli/commands/bundle/select/spec.rs new file mode 100644 index 00000000..1b89358e --- /dev/null +++ b/src/bin/tectonic/v2cli/commands/bundle/select/spec.rs @@ -0,0 +1,72 @@ +use anyhow::{bail, Result}; +use serde::Deserialize; +use std::collections::HashMap; +use std::path::PathBuf; + +#[derive(Debug, Deserialize, Clone)] +#[serde(deny_unknown_fields)] +pub struct BundleSpec { + pub bundle: BundleConfig, + pub inputs: HashMap, +} + +impl BundleSpec { + /// Make sure this bundle specification is valid + pub fn validate(&self) -> Result<()> { + for i in &self.bundle.search_order { + match i { + BundleSearchOrder::Input { ref input } => { + if !self.inputs.contains_key(input) { + bail!("root search order contains unknown input `{input}`"); + } + } + BundleSearchOrder::Plain(_) => {} + } + } + + Ok(()) + } +} + +#[derive(Debug, Deserialize, Clone)] +pub struct BundleConfig { + /// The bundle's name + pub name: String, + + /// The hash of the resulting ttbv1 bundle + pub expected_hash: String, + + /// Search paths for this bundle + pub search_order: Vec, + + /// Files to ignore from this input + pub ignore: Option>, +} + +#[derive(Debug, Deserialize, Clone)] +#[serde(untagged)] +pub enum BundleSearchOrder { + Plain(String), + Input { input: String }, +} + +#[derive(Debug, Deserialize, Clone)] +pub struct BundleInput { + pub source: BundleInputSource, + pub ignore: Option>, + pub patch_dir: Option, + pub search_order: Option>, +} + +#[derive(Debug, Deserialize, Clone)] +pub enum BundleInputSource { + #[serde(rename = "dir")] + Directory { path: PathBuf }, + + #[serde(rename = "tarball")] + Tarball { + hash: String, + path: PathBuf, + root_dir: Option, + }, +} diff --git a/src/bin/tectonic/v2cli/mod.rs b/src/bin/tectonic/v2cli/mod.rs index 6e5e7014..133555e2 100644 --- a/src/bin/tectonic/v2cli/mod.rs +++ b/src/bin/tectonic/v2cli/mod.rs @@ -14,6 +14,7 @@ use tectonic::{ }; use tectonic_errors::prelude::anyhow; use tectonic_status_base::plain::PlainStatusBackend; +use tracing::level_filters::LevelFilter; use self::commands::{ build::BuildCommand, @@ -91,6 +92,13 @@ pub fn v2_main(effective_args: &[OsString]) { let args = V2CliOptions::parse_from(effective_args); + tracing_subscriber::fmt() + .with_max_level(LevelFilter::INFO) + .with_target(false) + .without_time() + .with_ansi(args.cli_color.should_enable()) + .init(); + // Command-specific customizations before we do our centralized setup. // This is a semi-hack so that we can set up certain commands to ensure // that status info is always printed to stderr.