Added `bundle create`

This commit is contained in:
rm-dr 2024-09-19 07:51:38 -07:00
parent ac15f72f49
commit bab45fc1eb
No known key found for this signature in database
GPG Key ID: B4DF96450FAAD9F2
14 changed files with 1522 additions and 16 deletions

137
Cargo.lock generated
View File

@ -252,7 +252,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "05efc5cfd9110c8416e471df0e96702d58690178e206e61b7173706673c93706"
dependencies = [
"memchr",
"regex-automata",
"regex-automata 0.4.6",
"serde",
]
@ -1075,8 +1075,8 @@ dependencies = [
"aho-corasick",
"bstr",
"log",
"regex-automata",
"regex-syntax",
"regex-automata 0.4.6",
"regex-syntax 0.8.2",
]
[[package]]
@ -1390,7 +1390,7 @@ dependencies = [
"globset",
"log",
"memchr",
"regex-automata",
"regex-automata 0.4.6",
"same-file",
"walkdir",
"winapi-util",
@ -1597,6 +1597,15 @@ version = "0.4.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "90ed8c1e510134f979dbc4f070f87d4313098b704861a105fe34231c70a3901c"
[[package]]
name = "matchers"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8263075bb86c5a1b1427b5ae862e8889656f126e9f77c484496e8b47cf5c5558"
dependencies = [
"regex-automata 0.1.10",
]
[[package]]
name = "md-5"
version = "0.10.6"
@ -1757,6 +1766,16 @@ dependencies = [
"windows-sys 0.48.0",
]
[[package]]
name = "nu-ansi-term"
version = "0.46.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "77a8165726e8236064dbb45459242600304b42a5ea24ee2948e18e023bf7ba84"
dependencies = [
"overload",
"winapi",
]
[[package]]
name = "num-conv"
version = "0.1.0"
@ -1871,6 +1890,12 @@ dependencies = [
"vcpkg",
]
[[package]]
name = "overload"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b15813163c1d831bf4a13c3610c05c0d03b39feb07f7e09fa234dac9b15aaf39"
[[package]]
name = "parking_lot"
version = "0.12.1"
@ -2193,8 +2218,17 @@ checksum = "b62dbe01f0b06f9d8dc7d49e05a0785f153b00b2c227856282f671e0318c9b15"
dependencies = [
"aho-corasick",
"memchr",
"regex-automata",
"regex-syntax",
"regex-automata 0.4.6",
"regex-syntax 0.8.2",
]
[[package]]
name = "regex-automata"
version = "0.1.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132"
dependencies = [
"regex-syntax 0.6.29",
]
[[package]]
@ -2205,9 +2239,15 @@ checksum = "86b83b8b9847f9bf95ef68afb0b8e6cdb80f498442f5179a29fad448fcc1eaea"
dependencies = [
"aho-corasick",
"memchr",
"regex-syntax",
"regex-syntax 0.8.2",
]
[[package]]
name = "regex-syntax"
version = "0.6.29"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1"
[[package]]
name = "regex-syntax"
version = "0.8.2"
@ -2475,6 +2515,15 @@ dependencies = [
"digest",
]
[[package]]
name = "sharded-slab"
version = "0.1.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f40ca3c46823713e0d4209592e8d6e826aa57e928f09752619fc696c499637f6"
dependencies = [
"lazy_static",
]
[[package]]
name = "signal-hook-registry"
version = "1.4.1"
@ -2604,10 +2653,22 @@ version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "55937e1799185b12863d447f42597ed69d9928686b8d88a1df17376a097d8369"
[[package]]
name = "tar"
version = "0.4.41"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cb797dad5fb5b76fcf519e702f4a589483b5ef06567f160c392832c1f5e44909"
dependencies = [
"filetime",
"libc",
"xattr",
]
[[package]]
name = "tectonic"
version = "0.0.0-dev.0"
dependencies = [
"anyhow",
"byte-unit",
"cfg-if",
"clap",
@ -2626,8 +2687,10 @@ dependencies = [
"md-5",
"open",
"quick-xml",
"regex",
"serde",
"sha2",
"tar",
"tectonic_bridge_core",
"tectonic_bundles",
"tectonic_docmodel",
@ -2646,7 +2709,10 @@ dependencies = [
"time",
"tokio",
"toml",
"tracing",
"tracing-subscriber",
"url",
"walkdir",
"watchexec",
"watchexec-filterer-globset",
"watchexec-signals",
@ -2943,6 +3009,16 @@ dependencies = [
"syn 2.0.52",
]
[[package]]
name = "thread_local"
version = "1.1.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8b9ef9bad013ada3808854ceac7b46812a6465ba368859a37e2100283d2d719c"
dependencies = [
"cfg-if",
"once_cell",
]
[[package]]
name = "time"
version = "0.3.36"
@ -3136,6 +3212,36 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c06d3da6113f116aaee68e4d601191614c9053067f9ab7f6edbcb161237daa54"
dependencies = [
"once_cell",
"valuable",
]
[[package]]
name = "tracing-log"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ee855f1f400bd0e5c02d150ae5de3840039a3f54b025156404e34c23c03f47c3"
dependencies = [
"log",
"once_cell",
"tracing-core",
]
[[package]]
name = "tracing-subscriber"
version = "0.3.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ad0f048c97dbd9faa9b7df56362b8ebcaa52adb06b498c050d2f4e32f90a7a8b"
dependencies = [
"matchers",
"nu-ansi-term",
"once_cell",
"regex",
"sharded-slab",
"smallvec",
"thread_local",
"tracing",
"tracing-core",
"tracing-log",
]
[[package]]
@ -3268,6 +3374,12 @@ version = "1.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f00cc9702ca12d3c81455259621e676d0f7251cec66a21e98fe2e9a37db93b2a"
[[package]]
name = "valuable"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "830b7e5d4d90034032940e4ace0d9a9a057e7a45cd94e6c007832e39edb82f6d"
[[package]]
name = "vcpkg"
version = "0.2.15"
@ -3749,6 +3861,17 @@ dependencies = [
"tap",
]
[[package]]
name = "xattr"
version = "1.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8da84f1a25939b27f6820d92aed108f83ff920fdf11a7b19366c27c4cda81d4f"
dependencies = [
"libc",
"linux-raw-sys",
"rustix",
]
[[package]]
name = "xdg"
version = "2.5.2"

View File

@ -16,7 +16,13 @@ documentation = "https://docs.rs/tectonic"
repository = "https://github.com/tectonic-typesetting/tectonic/"
readme = "CARGO_README.md"
keywords = ["tex", "latex", "typesetting", "font"]
categories = ["command-line-interface", "parser-implementations", "rendering", "science", "text-processing"]
categories = [
"command-line-interface",
"parser-implementations",
"rendering",
"science",
"text-processing",
]
license = "MIT"
edition = "2018"
exclude = ["/dist/", "/reference_sources/"]
@ -96,6 +102,12 @@ watchexec-supervisor = "1.0"
zip = { version = "^0.6", default-features = false, features = ["deflate"] }
time = "0.3.36"
clap_complete = "4.5.1"
walkdir = "2"
regex = "1.10.2"
anyhow = "1.0.80"
tar = "0.4.40"
tracing = "0.1"
tracing-subscriber = { version = "0.3", features = ["env-filter"] }
[features]
default = ["geturl-reqwest", "serialization"]
@ -113,7 +125,10 @@ external-harfbuzz = ["tectonic_engine_xetex/external-harfbuzz"]
geturl-curl = ["tectonic_bundles/geturl-curl", "tectonic_geturl/curl"]
geturl-reqwest = ["tectonic_bundles/geturl-reqwest", "tectonic_geturl/reqwest"]
native-tls-vendored = ["tectonic_bundles/native-tls-vendored", "tectonic_geturl/native-tls-vendored"]
native-tls-vendored = [
"tectonic_bundles/native-tls-vendored",
"tectonic_geturl/native-tls-vendored",
]
# developer feature to compile with the necessary flags for profiling tectonic.
profile = []
@ -124,7 +139,12 @@ futures = "0.3"
headers = "0.4"
http-body-util = "0.1.0"
hyper = { version = "1.0.0", features = ["server", "http1", "http2"] }
hyper-util = { version = "0.1", features = ["server", "http1", "http2", "tokio"] }
hyper-util = { version = "0.1", features = [
"server",
"http1",
"http2",
"tokio",
] }
tempfile = "^3.1"
[package.metadata.vcpkg]
@ -137,9 +157,23 @@ overlay-triplets-path = "dist/vcpkg-triplets"
# guidance if they might need to set $VCPKGRS_TRIPLET.
[package.metadata.vcpkg.target]
x86_64-apple-darwin = { install = ["freetype", "harfbuzz[graphite2]", "icu"] }
aarch64-apple-darwin = { triplet = "arm64-osx", install = ["freetype", "harfbuzz[graphite2]", "icu"] }
x86_64-unknown-linux-gnu = { install = ["fontconfig", "freetype", "harfbuzz[graphite2]", "icu"] }
x86_64-pc-windows-msvc = { triplet = "x64-windows-static-release", install = ["fontconfig", "freetype", "harfbuzz[graphite2]", "icu"] }
aarch64-apple-darwin = { triplet = "arm64-osx", install = [
"freetype",
"harfbuzz[graphite2]",
"icu",
] }
x86_64-unknown-linux-gnu = { install = [
"fontconfig",
"freetype",
"harfbuzz[graphite2]",
"icu",
] }
x86_64-pc-windows-msvc = { triplet = "x64-windows-static-release", install = [
"fontconfig",
"freetype",
"harfbuzz[graphite2]",
"icu",
] }
[package.metadata.internal_dep_versions]
tectonic_bridge_core = "thiscommit:2023-06-11:PvhF7YB"

View File

@ -0,0 +1,140 @@
use super::{
create::{BundleCreateCommand, BundleFormat},
pack::bundlev1::BundleV1,
select::{picker::FilePicker, spec::BundleSpec},
};
use anyhow::{Context, Result};
use std::{
cmp::Ordering,
fs::{self, File},
io::Read,
thread,
time::Duration,
};
use tracing::{error, info, warn};
pub(super) fn select(cli: &BundleCreateCommand) -> Result<()> {
let bundle_dir = cli
.bundle_spec
.canonicalize()
.unwrap()
.parent()
.unwrap()
.to_path_buf();
let mut file = File::open(&cli.bundle_spec)?;
let mut file_str = String::new();
file.read_to_string(&mut file_str)?;
let bundle_config: BundleSpec = match toml::from_str(&file_str) {
Ok(x) => x,
Err(e) => {
error!("failed to load bundle specification",);
return Err(e.into());
}
};
if let Err(e) = bundle_config.validate() {
error!("failed to validate bundle specification");
return Err(e);
};
// Remove build dir if it exists
if cli.build_dir.exists() {
warn!(
"build dir {} aleady exists",
cli.build_dir.to_str().unwrap()
);
for i in (1..=5).rev() {
warn!(
"recursively removing {} in {i} second{}",
cli.build_dir.to_str().unwrap(),
if i != 1 { "s" } else { "" }
);
thread::sleep(Duration::from_secs(1));
}
thread::sleep(Duration::from_secs(2));
fs::remove_dir_all(&cli.build_dir)?;
}
fs::create_dir_all(&cli.build_dir).context("while creating build dir")?;
let mut picker = FilePicker::new(
bundle_config.clone(),
cli.build_dir.clone(),
bundle_dir.clone(),
)?;
// Run selector
let sources: Vec<String> = picker.iter_sources().map(|x| x.to_string()).collect();
for source in sources {
picker.add_source(cli, &source)?;
}
picker.finish(true)?;
// Print statistics
info!("summary is below:\n{}", picker.stats.make_string());
match picker.stats.compare_patch_found_applied() {
Ordering::Equal => {}
Ordering::Greater => {
warn!("some patches were not applied");
}
Ordering::Less => {
warn!("some patches applied multiple times");
}
}
// Check output hash
{
let mut file = File::open(cli.build_dir.join("content/SHA256SUM"))?;
let mut hash = String::new();
file.read_to_string(&mut hash)?;
let hash = hash.trim();
if hash != bundle_config.bundle.expected_hash {
warn!("final bundle hash doesn't match bundle configuration:");
warn!("bundle hash is {hash}");
warn!("config hash is {}", bundle_config.bundle.expected_hash);
} else {
info!("final bundle hash matches configuration");
info!("hash is {hash}");
}
}
Ok(())
}
pub(super) fn pack(cli: &BundleCreateCommand) -> Result<()> {
let mut file = File::open(&cli.bundle_spec)?;
let mut file_str = String::new();
file.read_to_string(&mut file_str)?;
let bundle_config: BundleSpec = toml::from_str(&file_str)?;
if !cli.build_dir.join("content").is_dir() {
error!(
"content directory `{}/content` doesn't exist, can't continue",
cli.build_dir.to_str().unwrap()
);
return Ok(());
}
let target_name = format!("{}.ttb", &bundle_config.bundle.name);
let target = cli.build_dir.join(&target_name);
if target.exists() {
if target.is_file() {
warn!("target bundle `{target_name}` exists, removing");
fs::remove_file(&target)?;
} else {
error!("target bundle `{target_name}` exists and isn't a file, can't continue");
return Ok(());
}
}
match cli.format {
BundleFormat::BundleV1 => {
BundleV1::make(Box::new(File::create(target)?), cli.build_dir.clone())?
}
}
Ok(())
}

View File

@ -0,0 +1,118 @@
use clap::{Parser, ValueEnum};
use std::{fmt::Display, path::PathBuf};
use tectonic::{config::PersistentConfig, Result};
use tectonic_status_base::StatusBackend;
use tracing::error;
use crate::v2cli::{CommandCustomizations, TectonicCommand};
//
// MARK: Cli arguments
//
#[derive(Debug, Copy, Clone, ValueEnum)]
pub enum BundleJob {
/// Run the following jobs in order
#[value(name = "all")]
All,
/// (Stage 1) Select and patch all files in this bundle
#[value(name = "select")]
Select,
/// (Stage 2) Pack selected files into a bundle
#[value(name = "pack")]
Pack,
}
impl Display for BundleJob {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::All => write!(f, "all"),
Self::Select => write!(f, "select"),
Self::Pack => write!(f, "pack"),
}
}
}
impl BundleJob {
pub fn do_select(&self) -> bool {
matches!(self, Self::All | Self::Select)
}
pub fn do_pack(&self) -> bool {
matches!(self, Self::All | Self::Pack)
}
}
#[derive(Parser, Debug)]
pub struct BundleCreateCommand {
/// Which job we should run. `all` is default,
/// but single jobs can be run on their own for debugging.
#[arg(long, default_value_t = BundleJob::All)]
pub job: BundleJob,
/// Bundle specification TOML file.
pub bundle_spec: PathBuf,
/// Build directory for this bundle.
/// Will be removed.
#[arg(short, long)]
pub build_dir: PathBuf,
/// What kind of bundle should we produce?
/// This only has an effect when running jobs `all` or `pack`
#[arg(default_value_t = BundleFormat::BundleV1)]
pub format: BundleFormat,
/// If this flag is set, don't fail when an input's hash doesn't match
/// the hash specified in the bundle's configuration file.
/// This only has an effect when running jobs `all` or `select`
#[arg(long, default_value_t = false)]
pub allow_hash_mismatch: bool,
}
#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, ValueEnum)]
pub enum BundleFormat {
#[value(name = "v1")]
BundleV1,
}
impl Display for BundleFormat {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::BundleV1 => write!(f, "v1")?,
}
Ok(())
}
}
impl TectonicCommand for BundleCreateCommand {
fn customize(&self, cc: &mut CommandCustomizations) {
cc.always_stderr = true;
}
fn execute(self, _config: PersistentConfig, _status: &mut dyn StatusBackend) -> Result<i32> {
if self.job.do_select() {
match super::actions::select(&self) {
Ok(_) => {}
Err(e) => {
error!("select job failed with error: {e}");
return Err(e.into());
}
};
}
if self.job.do_pack() {
match super::actions::pack(&self) {
Ok(_) => {}
Err(e) => {
error!("bundle packer failed with error: {e}");
return Err(e.into());
}
};
}
Ok(0)
}
}

View File

@ -1,4 +1,5 @@
use clap::{Parser, Subcommand};
use create::BundleCreateCommand;
use tectonic::{
config::PersistentConfig,
docmodel::{DocumentExt, DocumentSetupOptions},
@ -11,6 +12,11 @@ use tectonic_status_base::StatusBackend;
use crate::v2cli::{CommandCustomizations, TectonicCommand};
mod actions;
mod create;
mod pack;
mod select;
fn get_a_bundle(
_config: PersistentConfig,
only_cached: bool,
@ -45,13 +51,13 @@ fn get_a_bundle(
}
/// `bundle`: Commands relating to Tectonic bundles
#[derive(Debug, Eq, PartialEq, Parser)]
#[derive(Debug, Parser)]
pub struct BundleCommand {
#[command(subcommand)]
command: BundleCommands,
}
#[derive(Debug, Eq, PartialEq, Subcommand)]
#[derive(Debug, Subcommand)]
enum BundleCommands {
#[command(name = "cat")]
/// Dump the contents of a file in the bundle
@ -60,6 +66,10 @@ enum BundleCommands {
#[command(name = "search")]
/// Filter the list of filenames contained in the bundle
Search(BundleSearchCommand),
#[command(name = "create")]
/// Create a new bundle
Create(BundleCreateCommand),
}
impl TectonicCommand for BundleCommand {
@ -67,6 +77,7 @@ impl TectonicCommand for BundleCommand {
match &self.command {
BundleCommands::Cat(c) => c.customize(cc),
BundleCommands::Search(c) => c.customize(cc),
BundleCommands::Create(c) => c.customize(cc),
}
}
@ -74,11 +85,12 @@ impl TectonicCommand for BundleCommand {
match self.command {
BundleCommands::Cat(c) => c.execute(config, status),
BundleCommands::Search(c) => c.execute(config, status),
BundleCommands::Create(c) => c.execute(config, status),
}
}
}
#[derive(Debug, Eq, PartialEq, Parser)]
#[derive(Debug, Parser)]
struct BundleCatCommand {
/// Use only resource files cached locally
#[arg(short = 'C', long)]

View File

@ -0,0 +1,218 @@
use anyhow::{bail, Result};
use flate2::{write::GzEncoder, Compression};
use std::{
fmt::Display,
fs::{self, File},
io::{stdout, BufRead, BufReader, Read, Seek, Write},
num::ParseIntError,
path::PathBuf,
};
use tracing::info;
pub trait WriteSeek: std::io::Write + Seek {}
impl<T: Write + Seek> WriteSeek for T {}
pub fn decode_hex(s: &str) -> Result<Vec<u8>, ParseIntError> {
(0..s.len())
.step_by(2)
.map(|i| u8::from_str_radix(&s[i..i + 2], 16))
.collect()
}
// Size of ttbv1 header
const HEADER_SIZE: u64 = 66u64;
#[derive(Debug)]
struct FileListEntry {
path: PathBuf,
hash: String,
start: u64,
// We need the compressed length to build
// a range request for this bundle. We also
// keep the real length around for performance
// (we'll only need to allocate vectors once)
real_len: u32,
gzip_len: u32,
}
impl Display for FileListEntry {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
format!(
"{} {} {} {} {}",
self.start,
self.gzip_len,
self.real_len,
self.hash,
self.path.to_str().unwrap()
)
.fmt(f)
}
}
pub struct BundleV1 {
filelist: Vec<FileListEntry>,
target: Box<dyn WriteSeek>,
content_dir: PathBuf,
index_start: u64,
index_real_len: u32,
index_gzip_len: u32,
}
impl BundleV1 {
pub fn make(target: Box<dyn WriteSeek>, build_dir: PathBuf) -> Result<()> {
let mut bundle = BundleV1::new(target, build_dir)?;
bundle.add_files()?;
bundle.write_index()?;
bundle.write_header()?;
Ok(())
}
fn new(target: Box<dyn WriteSeek>, build_dir: PathBuf) -> Result<BundleV1> {
Ok(BundleV1 {
filelist: Vec::new(),
target,
content_dir: build_dir.join("content"),
index_start: 0,
index_gzip_len: 0,
index_real_len: 0,
})
}
fn add_files(&mut self) -> Result<u64> {
let mut byte_count = HEADER_SIZE; // Start after header
let mut real_len_sum = 0; // Compute average compression ratio
self.target.seek(std::io::SeekFrom::Start(byte_count))?;
let filelist_file = File::open(self.content_dir.join("FILELIST"))?;
let reader = BufReader::new(filelist_file);
info!("Building ttbv1 bundle...");
for line in reader.lines() {
stdout().flush()?;
let line = line?;
let mut bits = line.split_whitespace();
if let Some(hash) = bits.next() {
let path = bits.collect::<Vec<&str>>().join(" ");
let mut file = fs::File::open(self.content_dir.join(&path))?;
// Compress and write bytes
let mut encoder = GzEncoder::new(Vec::new(), Compression::default());
let real_len = std::io::copy(&mut file, &mut encoder)?;
let gzip_len = self.target.write(&encoder.finish()?)?;
assert!(real_len < u32::MAX as u64);
assert!(gzip_len < u32::MAX as usize);
// Add to index
self.filelist.push(FileListEntry {
start: byte_count,
gzip_len: gzip_len as u32,
real_len: real_len as u32,
path: PathBuf::from(path),
hash: hash.to_owned(),
});
byte_count += gzip_len as u64;
real_len_sum += real_len;
} else {
bail!("malformed filelist line");
}
}
info!(
"Average compression ratio: {:.2}",
real_len_sum as f64 / byte_count as f64
);
Ok(byte_count)
}
fn write_index(&mut self) -> Result<()> {
// Generate a ttbv1 index and write it to the bundle.
//
// This index is a replacement for FILELIST and SEARCH, containing everything in those files
// (in addition to some ttbv1-specific information)
//
// The original FILELIST and SEARCH files are still included in the bundle.
// Get current position
self.index_start = self.target.stream_position()?;
info!("Writing index");
let mut encoder = GzEncoder::new(Vec::new(), Compression::default());
let mut real_len = 0usize;
real_len += encoder.write("[DEFAULTSEARCH]\n".as_bytes())?;
real_len += encoder.write("MAIN\n".as_bytes())?;
real_len += encoder.write("[SEARCH:MAIN]\n".as_bytes())?;
for l in fs::read_to_string(self.content_dir.join("SEARCH"))?.lines() {
real_len += encoder.write(l.as_bytes())?;
real_len += encoder.write(b"\n")?;
}
real_len += encoder.write("[FILELIST]\n".as_bytes())?;
for i in &self.filelist {
let s = format!("{i}\n");
real_len += encoder.write(s.as_bytes())?;
}
let gzip_len = self.target.write(&encoder.finish()?)?;
assert!(gzip_len < u32::MAX as usize);
assert!(real_len < u32::MAX as usize);
self.index_gzip_len = gzip_len as u32;
self.index_real_len = real_len as u32;
info!(
"index is at {} and has length {}",
self.index_start, self.index_gzip_len
);
Ok(())
}
fn write_header(&mut self) -> Result<u64> {
self.target.seek(std::io::SeekFrom::Start(0))?;
info!("Writing header");
// Parse bundle hash
let mut hash_file = File::open(self.content_dir.join("SHA256SUM")).unwrap();
let mut hash_text = String::new();
hash_file.read_to_string(&mut hash_text)?;
let digest = decode_hex(hash_text.trim())?;
let mut byte_count = 0u64;
// 14 bytes: signature
// Always "tectonicbundle", in any bundle version.
//
// This "magic sequence" lets us more easily distinguish between
// random binary files and proper tectonic bundles.
byte_count += self.target.write(b"tectonicbundle")? as u64;
// 4 bytes: bundle version
byte_count += self.target.write(&1u32.to_le_bytes())? as u64;
// 8 + 4 + 4 = 12 bytes: location and real length of index
byte_count += self.target.write(&self.index_start.to_le_bytes())? as u64;
byte_count += self.target.write(&self.index_gzip_len.to_le_bytes())? as u64;
byte_count += self.target.write(&self.index_real_len.to_le_bytes())? as u64;
// 32 bytes: bundle hash
// We include this in the header so we don't need to load the index to get the hash.
byte_count += self.target.write(&digest)? as u64;
// Make sure we wrote the expected number of bytes
assert!(byte_count == HEADER_SIZE);
Ok(byte_count)
}
}

View File

@ -0,0 +1 @@
pub mod bundlev1;

View File

@ -0,0 +1,56 @@
use super::BundleInput;
use anyhow::Result;
use std::{
fs::{self},
io::Read,
path::PathBuf,
};
use walkdir::WalkDir;
pub struct DirBundleInput {
dir: PathBuf,
}
impl DirBundleInput {
pub fn new(dir: PathBuf) -> Self {
Self {
dir: dir.canonicalize().unwrap(),
}
}
}
impl BundleInput for DirBundleInput {
fn iter_files(&mut self) -> impl Iterator<Item = Result<(String, Box<dyn Read + '_>)>> {
WalkDir::new(&self.dir)
.into_iter()
.filter_map(|x| match x {
Err(_) => Some(x),
Ok(x) => {
if !x.file_type().is_file() {
None
} else {
Some(Ok(x))
}
}
})
.map(move |x| match x {
Ok(x) => {
let path = x
.into_path()
.canonicalize()
.unwrap()
.strip_prefix(&self.dir)
.unwrap()
.to_str()
.unwrap()
.to_string();
Ok((
path.clone(),
Box::new(fs::File::open(self.dir.join(path))?) as Box<dyn Read>,
))
}
Err(e) => Err(anyhow::Error::from(e)),
})
}
}

View File

@ -0,0 +1,42 @@
mod dir;
mod tar;
use anyhow::Result;
use std::{io::Read, path::PathBuf};
trait BundleInput {
#[allow(clippy::type_complexity)]
fn iter_files(&mut self) -> impl Iterator<Item = Result<(String, Box<dyn Read + '_>)>>;
}
pub enum Input {
Directory(dir::DirBundleInput),
Tarball(tar::TarBundleInput),
}
impl<'a> Input {
pub fn new_dir(path: PathBuf) -> Self {
Self::Directory(dir::DirBundleInput::new(path))
}
pub fn new_tarball(path: PathBuf, root: Option<PathBuf>) -> Result<Self> {
Ok(Self::Tarball(tar::TarBundleInput::new(path, root)?))
}
#[allow(clippy::type_complexity)]
pub fn iter_files(
&'a mut self,
) -> Box<dyn Iterator<Item = Result<(String, Box<dyn Read + 'a>)>> + 'a> {
match self {
Self::Directory(x) => Box::new(x.iter_files()),
Self::Tarball(x) => Box::new(x.iter_files()),
}
}
pub fn hash(&self) -> Option<&str> {
match self {
Self::Directory(_) => None,
Self::Tarball(x) => Some(x.hash()),
}
}
}

View File

@ -0,0 +1,77 @@
use anyhow::Result;
use sha2::{Digest, Sha256};
use std::{
fs::File,
io::{Read, Seek},
path::PathBuf,
};
use tar::Archive;
use tracing::info;
use super::BundleInput;
pub struct TarBundleInput {
archive: Archive<File>,
root: PathBuf,
hash: String,
}
impl TarBundleInput {
pub fn new(path: PathBuf, root: Option<PathBuf>) -> Result<Self> {
let path = path.canonicalize()?;
let mut file = File::open(&path)?;
info!("computing hash of {}", path.to_str().unwrap());
let hash = {
let mut hasher = Sha256::new();
let _ = std::io::copy(&mut file, &mut hasher)?;
hasher
.finalize()
.iter()
.map(|b| format!("{b:02x}"))
.collect::<Vec<_>>()
.concat()
};
file.seek(std::io::SeekFrom::Start(0))?;
Ok(Self {
archive: Archive::new(file),
root: root.unwrap_or(PathBuf::from("")),
hash,
})
}
pub fn hash(&self) -> &str {
&self.hash
}
}
impl BundleInput for TarBundleInput {
fn iter_files(&mut self) -> impl Iterator<Item = Result<(String, Box<dyn Read + '_>)>> {
let root = self.root.clone();
self.archive.entries().unwrap().filter_map(move |x| {
// TODO: error handling
let xr = x.as_ref().unwrap();
if !xr.header().entry_type().is_file() {
None
} else {
let path = xr.path().unwrap();
if !path.starts_with(&root) {
None
} else {
Some(Ok((
path.strip_prefix(&root)
.unwrap()
.to_str()
.unwrap()
.to_string(),
Box::new(x.unwrap()) as Box<dyn Read>,
)))
}
}
})
}
}

View File

@ -0,0 +1,3 @@
pub mod input;
pub mod picker;
pub mod spec;

View File

@ -0,0 +1,602 @@
use anyhow::{bail, Context, Result};
use regex::Regex;
use sha2::{Digest, Sha256};
use std::{
cmp::Ordering,
collections::HashMap,
fmt::Display,
fs::{self, File},
io::{self, Cursor, Read, Write},
iter::FromIterator,
path::{Path, PathBuf},
process::{Command, Stdio},
};
use tracing::{debug, error, info, trace, warn};
use walkdir::WalkDir;
use crate::v2cli::commands::bundle::create::BundleCreateCommand;
use super::{
input::Input,
spec::BundleSearchOrder,
spec::{BundleInputSource, BundleSpec},
};
#[derive(Default)]
pub struct PickStatistics {
/// Total number of files added from each source
added: HashMap<String, usize>,
/// Number of file conflicts
conflicts: usize,
/// Total number of files ignored
ignored: usize,
/// Total number of patches applied
patch_applied: usize,
/// Total number of patches found
patch_found: usize,
}
impl PickStatistics {
/// Returns a pretty status summary string
pub fn make_string(&self) -> String {
let mut output_string = format!(
concat!(
"=============== Summary ===============\n",
" file conflicts: {}\n",
" files ignored: {}\n",
" diffs applied/found: {}/{}\n",
" =============================\n",
),
self.conflicts, self.ignored, self.patch_applied, self.patch_found,
);
let mut sum = 0;
for (source, count) in &self.added {
let s = format!("{source} files: ");
output_string.push_str(&format!(" {s}{}{count}\n", " ".repeat(22 - s.len())));
sum += count;
}
output_string.push_str(&format!(" total files: {sum}\n\n"));
output_string.push_str(&"=".repeat(39).to_string());
output_string
}
/// Did we find as many, fewer, or more patches than we applied?
pub fn compare_patch_found_applied(&self) -> Ordering {
self.patch_found.cmp(&self.patch_applied)
}
}
struct FileListEntry {
/// Path relative to content dir (does not start with a slash)
path: PathBuf,
hash: Option<String>,
}
impl Display for FileListEntry {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
format!(
"{} {}",
match &self.hash {
Some(s) => &s,
None => "nohash",
},
self.path.to_str().unwrap(),
)
.fmt(f)
}
}
pub struct FilePicker {
/// This bundle specification's root directory.
/// (i.e, where we found bundle.toml)
bundle_dir: PathBuf,
/// Where to place this bundle's files
build_dir: PathBuf,
/// This file picker's statistics
pub stats: PickStatistics,
/// All files we've picked so far.
/// This map's keys are the `path` value of `FileListEntry`.
filelist: HashMap<PathBuf, FileListEntry>,
bundle_spec: BundleSpec,
}
impl FilePicker {
/// Transform a search order file with shortcuts
/// (bash-like brace expansion, like `/a/b/{tex,latex}/c`)
/// into a plain list of strings.
fn expand_search_line(s: &str) -> Result<Vec<String>> {
if !(s.contains('{') || s.contains('}')) {
return Ok(vec![s.to_owned()]);
}
let first = match s.find('{') {
Some(x) => x,
None => bail!("Bad search path format"),
};
let last = match s.find('}') {
Some(x) => x,
None => bail!("Bad search path format"),
};
let head = &s[..first];
let mid = &s[first + 1..last];
if mid.contains('{') || mid.contains('}') {
// Mismatched or nested braces
bail!("Bad search path format");
}
// We find the first brace, so only tail may have other expansions.
let tail = Self::expand_search_line(&s[last + 1..s.len()])?;
if mid.is_empty() {
bail!("Bad search path format");
}
let mut output: Vec<String> = Vec::new();
for m in mid.split(',') {
for t in &tail {
if m.is_empty() {
bail!("Bad search path format");
}
output.push(format!("{}{}{}", head, m, t));
}
}
Ok(output)
}
/// Patch a file in-place.
/// This should be done after calling `add_file`.
fn apply_patch(
&mut self,
path: &Path,
path_in_source: &Path,
diffs: &HashMap<PathBuf, PathBuf>,
) -> Result<bool> {
// Is this file patched?
if !diffs.contains_key(path_in_source) {
return Ok(false);
}
info!("patching `{}`", path_in_source.to_str().unwrap());
self.stats.patch_applied += 1;
// Discard first line of diff
let diff_file = fs::read_to_string(&diffs[path_in_source]).unwrap();
let (_, diff) = diff_file.split_once('\n').unwrap();
// TODO: don't require `patch`
let mut child = Command::new("patch")
.arg("--quiet")
.arg("--no-backup")
.arg(path)
.stdin(Stdio::piped())
.spawn()
.context("while spawning `patch`")?;
let mut stdin = child.stdin.take().unwrap();
stdin
.write_all(diff.as_bytes())
.context("while passing diff to `patch`")?;
drop(stdin);
child.wait().context("while waiting for `patch`")?;
Ok(true)
}
/// Add a file into the file list.
fn add_to_filelist(&mut self, path: PathBuf, file: Option<&Path>) -> Result<()> {
trace!("adding `{path:?}` to file list");
self.filelist.insert(
path.clone(),
FileListEntry {
path: path.clone(),
hash: match file {
None => None,
Some(f) => {
let mut hasher = Sha256::new();
let _ = std::io::copy(
&mut fs::File::open(f)
.with_context(|| format!("while computing hash of {path:?}"))?,
&mut hasher,
)?;
Some(
hasher
.finalize()
.iter()
.map(|b| format!("{b:02x}"))
.collect::<Vec<_>>()
.concat(),
)
}
},
},
);
Ok(())
}
/// Add a file to this picker's content directory
fn add_file(
&mut self,
path_in_source: &Path,
source: &str,
file_content: &mut dyn Read,
diffs: &HashMap<PathBuf, PathBuf>,
) -> Result<()> {
let target_path = self
.build_dir
.join("content")
.join(source)
.join(path_in_source);
// Path to this file, relative to content dir
let rel = target_path
.strip_prefix(self.build_dir.join("content"))
.unwrap()
.to_path_buf();
trace!("adding {path_in_source:?} from source `{source}`");
// Skip files that already exist
if self.filelist.contains_key(&rel) {
self.stats.conflicts += 1;
warn!("{path_in_source:?} from source `{source}` already exists, skipping");
return Ok(());
}
fs::create_dir_all(match target_path.parent() {
Some(x) => x,
None => bail!("couldn't get parent of target"),
})
.context("failed to create content directory")?;
// Copy to content dir.
let mut file = fs::File::create(&target_path)?;
io::copy(file_content, &mut file).with_context(|| {
format!("while writing file `{path_in_source:?}` from source `{source}`")
})?;
// Apply patch if one exists
self.apply_patch(&target_path, path_in_source, diffs)
.with_context(|| {
format!("while patching `{path_in_source:?}` from source `{source}`")
})?;
self.add_to_filelist(rel, Some(&target_path))
.with_context(|| {
format!("while adding file `{path_in_source:?}` from source `{source}`")
})?;
Ok(())
}
}
// Public methods
impl FilePicker {
/// Create a new file picker working in build_dir
pub fn new(bundle_spec: BundleSpec, build_dir: PathBuf, bundle_dir: PathBuf) -> Result<Self> {
if !build_dir.is_dir() {
bail!("build_dir is not a directory!")
}
if build_dir.read_dir()?.next().is_some() {
bail!("build_dir is not empty!")
}
Ok(FilePicker {
bundle_dir,
build_dir,
filelist: HashMap::new(),
bundle_spec,
stats: PickStatistics::default(),
})
}
/// Iterate over this bundle's sources
pub fn iter_sources(&self) -> impl Iterator<Item = &String> {
self.bundle_spec.inputs.keys()
}
/// Add a directory of files to this bundle under `source_name`,
/// applying patches and checking for replacements.
pub fn add_source(&mut self, cli: &BundleCreateCommand, source: &str) -> Result<()> {
info!("adding source `{source}`");
let input = self.bundle_spec.inputs.get(source).unwrap().clone();
let mut added = 0usize;
// Load diff files
let diffs = input
.patch_dir
.as_ref()
.map(|x| -> Result<HashMap<PathBuf, PathBuf>> {
let mut diffs = HashMap::new();
for entry in WalkDir::new(self.bundle_dir.join(x)) {
// Only iterate files
let entry = entry?;
if !entry.file_type().is_file() {
continue;
}
let entry = entry.into_path();
// Only include files with a `.diff extension`
if entry.extension().map(|x| x != "diff").unwrap_or(true) {
continue;
}
// Read first line of diff to get target path
let diff_file = fs::read_to_string(&entry).unwrap();
let (target, _) = diff_file.split_once('\n').unwrap();
trace!(tectonic_log_source = "select", "adding diff {entry:?}");
for t in Self::expand_search_line(target)?
.into_iter()
.map(PathBuf::from)
{
if diffs.contains_key(&t) {
warn!("the target of diff {entry:?} conflicts with another, ignoring");
continue;
}
diffs.insert(t, entry.clone());
self.stats.patch_found += 1;
}
}
Ok(diffs)
})
.unwrap_or(Ok(HashMap::new()))?;
// Load and compile ignore patterns
let ignore_patterns = {
// Global patterns
let mut ignore = self
.bundle_spec
.bundle
.ignore
.as_ref()
.map(|v| {
v.iter()
.map(|x| Regex::new(&format!("^{x}$")))
.collect::<Result<Vec<Regex>, regex::Error>>()
})
.unwrap_or(Ok(Vec::new()))?;
// Input patterns
ignore.extend(
input
.ignore
.as_ref()
.map(|v| {
v.iter()
.map(|x| Regex::new(&format!("^/{source}/{x}$")))
.collect::<Result<Vec<Regex>, regex::Error>>()
})
.unwrap_or(Ok(Vec::new()))?,
);
ignore
};
let mut source_backend = match &input.source {
BundleInputSource::Directory { path, .. } => Input::new_dir(self.bundle_dir.join(path)),
BundleInputSource::Tarball {
path,
root_dir,
hash,
} => {
let x = match Input::new_tarball(self.bundle_dir.join(path), root_dir.clone()) {
Ok(x) => x,
Err(e) => {
error!("could not add source `{source}` from tarball");
return Err(e);
}
};
let hash = hash.clone();
self.add_file(
Path::new("TAR-SHA256SUM"),
source,
&mut Cursor::new(format!("{}\n", x.hash().unwrap())),
&HashMap::new(),
)?;
if x.hash().unwrap() != hash {
if cli.allow_hash_mismatch {
warn!("hash of tarball for source `{source}` doesn't match expected value");
warn!("expected: {}", x.hash().unwrap());
warn!("got: {}", hash);
} else {
error!(
"hash of tarball for source `{source}` doesn't match expected value"
);
error!("expected: {}", x.hash().unwrap());
error!("got: {}", hash);
bail!("hash of tarball for source `{source}` doesn't match expected value")
}
}
info!("OK, tar hash matches bundle config");
x
}
};
for x in source_backend.iter_files() {
let (rel_file_path, mut read) = x?;
let ignore = {
let f = format!("/{source}/{}", rel_file_path);
let mut ignore = false;
for pattern in &ignore_patterns {
if pattern.is_match(&f) {
ignore = true;
break;
}
}
ignore
};
// Skip ignored files
if ignore {
debug!(
"skipping file {rel_file_path:?} from source `{source}` because of ignore patterns"
);
self.stats.ignored += 1;
continue;
}
// Debug info
if self.filelist.len() % 1937 == 1936 {
info!("selecting files ({source}, {})", self.filelist.len());
}
trace!("adding file {rel_file_path:?} from source `{source}`");
self.add_file(Path::new(&rel_file_path), source, &mut read, &diffs)
.with_context(|| format!("while adding file `{rel_file_path:?}`"))?;
added += 1;
}
self.stats.added.insert(source.to_owned(), added);
Ok(())
}
pub fn finish(&mut self, save_debug_files: bool) -> Result<()> {
info!("writing auxillary files");
// Save search specification
let search = {
let mut search = Vec::new();
let path = self.build_dir.join("content/SEARCH");
for s in &self.bundle_spec.bundle.search_order {
match s {
BundleSearchOrder::Plain(s) => {
for i in Self::expand_search_line(s)? {
search.push(i);
}
}
BundleSearchOrder::Input { input } => {
let s = &self.bundle_spec.inputs.get(input).unwrap().search_order;
if let Some(s) = s {
for line in s {
for i in Self::expand_search_line(&format!("/{input}/{line}"))? {
search.push(i);
}
}
} else {
for i in Self::expand_search_line(&format!("/{input}//"))? {
search.push(i);
}
}
}
}
}
let mut file = File::create(&path).context("while writing SEARCH")?;
for s in &search {
writeln!(file, "{s}")?;
}
self.add_to_filelist(PathBuf::from("SEARCH"), Some(&path))?;
search
};
{
// These aren't hashed, but must be listed anyway.
// The hash is generated from the filelist, so we must add these before hashing.
self.add_to_filelist(PathBuf::from("SHA256SUM"), None)?;
self.add_to_filelist(PathBuf::from("FILELIST"), None)?;
let mut filelist_vec = Vec::from_iter(self.filelist.values());
filelist_vec.sort_by(|a, b| a.path.cmp(&b.path));
let filelist_path = self.build_dir.join("content/FILELIST");
// Save FILELIST.
let mut file = File::create(&filelist_path).context("while writing FILELIST")?;
for entry in filelist_vec {
writeln!(file, "{entry}")?;
}
// Compute and save hash
let mut file = File::create(self.build_dir.join("content/SHA256SUM"))
.context("while writing SHA256SUM")?;
let mut hasher = Sha256::new();
let _ = std::io::copy(&mut fs::File::open(&filelist_path)?, &mut hasher)?;
let hash = hasher
.finalize()
.iter()
.map(|b| format!("{b:02x}"))
.collect::<Vec<_>>()
.concat();
writeln!(file, "{hash}")?;
}
if save_debug_files {
// Generate search-report
{
let mut file = File::create(self.build_dir.join("search-report"))
.context("while writing search-report")?;
for entry in WalkDir::new(self.build_dir.join("content")) {
let entry = entry?;
if !entry.file_type().is_dir() {
continue;
}
let entry = entry
.into_path()
.strip_prefix(self.build_dir.join("content"))
.unwrap()
.to_owned();
let entry = PathBuf::from("/").join(entry);
// Will this directory be searched?
let mut is_searched = false;
for rule in &search {
if rule.ends_with("//") {
// Match start of patent path
// (cutting off the last slash from)
if entry.starts_with(&rule[0..rule.len() - 1]) {
is_searched = true;
break;
}
} else {
// Match full parent path
if entry.to_str().unwrap() == rule {
is_searched = true;
break;
}
}
}
if !is_searched {
let s = entry.to_str().unwrap();
let t = s.matches('/').count();
writeln!(file, "{}{s}", "\t".repeat(t - 1))?;
}
}
}
}
Ok(())
}
}

View File

@ -0,0 +1,72 @@
use anyhow::{bail, Result};
use serde::Deserialize;
use std::collections::HashMap;
use std::path::PathBuf;
#[derive(Debug, Deserialize, Clone)]
#[serde(deny_unknown_fields)]
pub struct BundleSpec {
pub bundle: BundleConfig,
pub inputs: HashMap<String, BundleInput>,
}
impl BundleSpec {
/// Make sure this bundle specification is valid
pub fn validate(&self) -> Result<()> {
for i in &self.bundle.search_order {
match i {
BundleSearchOrder::Input { ref input } => {
if !self.inputs.contains_key(input) {
bail!("root search order contains unknown input `{input}`");
}
}
BundleSearchOrder::Plain(_) => {}
}
}
Ok(())
}
}
#[derive(Debug, Deserialize, Clone)]
pub struct BundleConfig {
/// The bundle's name
pub name: String,
/// The hash of the resulting ttbv1 bundle
pub expected_hash: String,
/// Search paths for this bundle
pub search_order: Vec<BundleSearchOrder>,
/// Files to ignore from this input
pub ignore: Option<Vec<String>>,
}
#[derive(Debug, Deserialize, Clone)]
#[serde(untagged)]
pub enum BundleSearchOrder {
Plain(String),
Input { input: String },
}
#[derive(Debug, Deserialize, Clone)]
pub struct BundleInput {
pub source: BundleInputSource,
pub ignore: Option<Vec<String>>,
pub patch_dir: Option<PathBuf>,
pub search_order: Option<Vec<String>>,
}
#[derive(Debug, Deserialize, Clone)]
pub enum BundleInputSource {
#[serde(rename = "dir")]
Directory { path: PathBuf },
#[serde(rename = "tarball")]
Tarball {
hash: String,
path: PathBuf,
root_dir: Option<PathBuf>,
},
}

View File

@ -14,6 +14,7 @@ use tectonic::{
};
use tectonic_errors::prelude::anyhow;
use tectonic_status_base::plain::PlainStatusBackend;
use tracing::level_filters::LevelFilter;
use self::commands::{
build::BuildCommand,
@ -91,6 +92,13 @@ pub fn v2_main(effective_args: &[OsString]) {
let args = V2CliOptions::parse_from(effective_args);
tracing_subscriber::fmt()
.with_max_level(LevelFilter::INFO)
.with_target(false)
.without_time()
.with_ansi(args.cli_color.should_enable())
.init();
// Command-specific customizations before we do our centralized setup.
// This is a semi-hack so that we can set up certain commands to ensure
// that status info is always printed to stderr.