diff --git a/Cargo.lock b/Cargo.lock index 348ae750..6b42fe22 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2782,6 +2782,7 @@ dependencies = [ "tectonic_geturl", "tectonic_io_base", "tectonic_status_base", + "url", "zip", ] diff --git a/crates/bridge_core/src/lib.rs b/crates/bridge_core/src/lib.rs index ee61ec74..0a7405db 100644 --- a/crates/bridge_core/src/lib.rs +++ b/crates/bridge_core/src/lib.rs @@ -106,13 +106,7 @@ pub trait DriverHooks { /// argument specifies the cryptographic digest of the data that were /// written. Note that this function takes ownership of the name and /// digest. - fn event_output_closed( - &mut self, - _name: String, - _digest: DigestData, - _status: &mut dyn StatusBackend, - ) { - } + fn event_output_closed(&mut self, _name: String, _digest: DigestData) {} /// This function is called when an input file is closed. The "digest" /// argument specifies the cryptographic digest of the data that were @@ -560,7 +554,7 @@ impl<'a> CoreBridgeState<'a> { rv = true; } let (name, digest) = oh.into_name_digest(); - self.hooks.event_output_closed(name, digest, self.status); + self.hooks.event_output_closed(name, digest); break; } } diff --git a/crates/bundles/Cargo.toml b/crates/bundles/Cargo.toml index 11eb48e1..5fc0d65d 100644 --- a/crates/bundles/Cargo.toml +++ b/crates/bundles/Cargo.toml @@ -5,7 +5,7 @@ [package] name = "tectonic_bundles" -version = "0.0.0-dev.0" # assigned with cranko (see README) +version = "0.0.0-dev.0" # assigned with cranko (see README) authors = ["Peter Williams "] description = """ Tectonic "bundle" (support file collection) implementations. @@ -25,6 +25,7 @@ tectonic_geturl = { path = "../geturl", version = "0.0.0-dev.0", default-feature tectonic_io_base = { path = "../io_base", version = "0.0.0-dev.0" } tectonic_status_base = { path = "../status_base", version = "0.0.0-dev.0" } zip = { version = "^0.6", default-features = false, features = ["deflate"] } +url = "^2.0" [features] default = ["geturl-reqwest"] diff --git a/crates/bundles/src/cache.rs b/crates/bundles/src/cache.rs index 4c8cfa98..c347bce2 100644 --- a/crates/bundles/src/cache.rs +++ b/crates/bundles/src/cache.rs @@ -4,730 +4,24 @@ //! Local caching of bundle data. //! //! This module implements Tectonic’s local filesystem caching mechanism for TeX -//! support files. To enable efficient caching with proper invalidation -//! semantics, the caching layer does *not* merely wrap [`IoProvider`] -//! implementations. Instead, a cacheable bundle must implement the -//! [`CacheBackend`] trait defined in this module. An example of such a bundle -//! is the [`crate::itar::IndexedTarBackend`] for bundles served over HTTP. -//! -//! In order to access a cacheable bundle, you need a handle to a local -//! [`Cache`], probably obtained with [`Cache::get_user_default()`], and a URL, -//! which you’ll pass to [`Cache::open()`]. When using this function, you must -//! explicitly specify the concrete [`CacheBackend`] type that will service -//! backend requests. +//! support files. To make a cachable bundle, wrap any [`CachableBundle`] with a +//! [`BundleCache`]. -use fs2::FileExt; +use crate::{Bundle, CachableBundle, FileIndex, FileInfo}; use std::{ - collections::HashMap, - env, fs::{self, File}, - io::{BufRead, BufReader, Error as IoError, ErrorKind as IoErrorKind, Read, Write}, + io::{self, BufReader, Read, Write}, path::{Path, PathBuf}, + process, str::FromStr, }; -use tectonic_errors::prelude::*; +use tectonic_errors::{anyhow::Context, prelude::*}; use tectonic_io_base::{ app_dirs, - digest::{self, Digest, DigestData}, - try_open_file, InputHandle, InputOrigin, IoProvider, OpenResult, + digest::{self, DigestData}, + InputHandle, InputOrigin, IoProvider, OpenResult, }; -use tectonic_status_base::{tt_warning, StatusBackend}; - -use crate::Bundle; - -/// A cache of data from one or more bundles using the local filesystem. -#[derive(Debug)] -pub struct Cache { - root: PathBuf, -} - -impl Cache { - /// Get a handle to a bundle cache, using default per-user settings. - /// - /// The cache location defaults to the `AppDataType::UserCache` - /// provided by `app_dirs2` but can be overwritten using the - /// `TECTONIC_CACHE_DIR` environment variable. - /// - /// This method may perform I/O to create the user cache directory, so it is - /// fallible. (Due to its `app_dirs2` implementation, it would have to be - /// fallible even if it didn't perform I/O.) - pub fn get_user_default() -> Result { - let env_cache_path = env::var_os("TECTONIC_CACHE_DIR"); - - let cache_path = match env_cache_path { - Some(env_cache_path) => { - let env_cache_path = env_cache_path.into(); - fs::create_dir_all(&env_cache_path)?; - env_cache_path - } - None => app_dirs::ensure_user_cache_dir("")?, - }; - - Ok(Cache { root: cache_path }) - } - - /// Get a handle to a bundle cache, using a custom cache directory. - pub fn get_for_custom_directory>(root: P) -> Self { - Cache { root: root.into() } - } - - /// Get the root directory of this cache. - pub fn root(&self) -> &Path { - &self.root - } - - /// Open a bundle through the cache layer. - /// - /// The URL specifies where the backend data live; it must be understood by, - /// and contain data appropriate for, the [`CacheBackend`] type associated - /// with the bundle that you’re creating. If *only_cached* is true, this - /// instance will never actually connect to the backend; if any uncached - /// files are requested, they will be represented as "not found". - pub fn open( - &mut self, - url: &str, - only_cached: bool, - status: &mut dyn StatusBackend, - ) -> Result> { - CachingBundle::new(url, only_cached, status, &self.root) - } -} - -/// Information describing a cache backend. -/// -/// This type is returned by a [`CacheBackend`] on a "pull", a first-time -/// connection to the backend. It contains the detailed information that needs -/// to be saved in the cache to provide for efficient operation in subsequent -/// uses. -#[derive(Clone, Debug)] -pub struct BackendPullData { - /// The final, "resolved" URL pointing to the backing content, in the case - /// that the starting URL redirects. - pub resolved_url: String, - - /// The digest of the overall bundle content. - pub digest: DigestData, - - /// The bundle indexing data, allowing efficient retrieval of files from the - /// backend. - /// - /// This is a multi-line string, where each line is an entry for a file. - /// These lines will be parsed by [`CacheBackend::parse_index_line`]. This - /// string will potentially contain several megabytes of data. - pub index: String, -} - -/// A source of files that can supply a cache-based bundle. -/// -/// This trait is combined with [`CachingBundle`] to implement a caching bundle -/// interface. -pub trait CacheBackend: Sized { - /// Information about a file stored in the backend. - /// - /// This information should be serializable to a single line of text. It is - /// parsed out of the contents of [`BackendPullData::index`] by - /// [`Self::parse_index_line`], and later passed to [`Self::get_file`] to - /// enable the backend to efficiently retrieve the file in question. For - /// instance, it might contain offset information informing the backend how - /// to efficiently retrieve the file in question. - type FileInfo: Clone; - - /// Connect to the backend and download its key information. - /// - /// This method is used the first time that the cache connects to a backend. - /// The return value includes a package of information ([`BackendPullData`]) - /// that the cache will store to enable efficient operation on subsequent - /// requests. - fn open_with_pull( - start_url: &str, - status: &mut dyn StatusBackend, - ) -> Result<(Self, BackendPullData)>; - - /// Connect to the backend and fetch validation information. - /// - /// This method is used when this backend has already been accessed by the - /// cache during a previous execution. If we need to download more data from - /// the backend, we first need to verify that the cached data still look - /// valid. This method asks the backend to pull its "digest file" (currently - /// named `SHA256SUM`) and return its contents for validate. The method - /// should return `Err` on actual errors, and `Ok(None)` if there are any - /// indications that the cached indexing data should be thrown out and - /// re-fetched. - fn open_with_quick_check( - resolved_url: &str, - digest_file_info: &Self::FileInfo, - status: &mut dyn StatusBackend, - ) -> Result>; - - /// Parse a line of the indexing data. - /// - /// The returned tuple should give the file name and an opaque - /// [`Self::FileInfo`] that may help the backend retrieve the file in the - /// future. The indexing data are originally obtained from - /// [`BackendPullData::index`], but are stored in a file locally. This - /// method should return an error if this particular line of index data - /// seems to be malformatted. Such lines will probably just be silently - /// ignored. - fn parse_index_line(line: &str) -> Result<(String, Self::FileInfo)>; - - /// Obtain a file from the backend. - /// - /// Backend-specific retrieval information can be passed in the - /// [`Self::FileInfo`] item, which is constructed from the backend’s index - /// information. The file should be returned as one large byte vector. - fn get_file( - &mut self, - name: &str, - info: &Self::FileInfo, - status: &mut dyn StatusBackend, - ) -> Result>; -} - -/// Information about a cached file. -#[derive(Clone, Copy, Debug)] -struct CachedFileInfo { - /// The length of the file in bytes. - /// - /// This field isn't currently used, but seems handy to keep around. - _length: u64, - - /// The digest of the file contents. - /// - /// This digest is used to locate the cached data on disk. - digest: DigestData, -} - -/// A caching bundle that obtains files from some a backend. -/// -/// This bundle implementation is the key to Tectonic’s ability to download TeX -/// support files on the fly. The cache backend is generally expected to be some -/// kind of network-based resource, and the caching scheme is designed so that a -/// document build can avoid touching the network altogether if no new files -/// need to be downloaded. -#[derive(Debug)] -pub struct CachingBundle { - /// The URL specifying where to start looking for the bundle data. - /// - /// The caching layer maintains two URLs: the "start" URL and the "resolved" - /// URL. The goal here is to be able to store a single URL for fetching - /// data, but maintain the capability to update the bundle data behind that - /// URL. Requests to the start URL may get redirected (one or more times) - /// until eventually we arrive at the "resolved" URL. While the redirection - /// of the start URL might change, the contents of a resolved URL should - /// never change once published. - start_url: String, - - /// The "resolved" URL for the backing data. - /// - /// The bundle data located at this URL should never change. - resolved_url: String, - - /// The cached value of the backend’s content digest. - /// - /// This is stored in a file at [`Self::digest_path`]. This value may be - /// inaccurate, if the backing bundle has been updated (or if the cache is - /// corrupt, etc.) and we haven't yet synchronized with the backend and - /// discovered that fact. - cached_digest: DigestData, - - /// Information about all of the files that have been cached locally. - /// - /// This maps filenames to summary information that can then be used to - /// retrieve file data from [`Self::data_base`]. The contents are loaded - /// from the manifest file if the cache is non-empty. - contents: HashMap, - - /// Information about all of the files known to the backend. - /// - /// This maps filenames to [`CacheBackend::FileInfo`] data that can be used - /// to retrieve a file from the backend if needed. - index: HashMap, - - /// If true, only use cached files -- never connect to the backend. - /// - /// This option can be useful if we are operating disconnected from the - /// network (e.g., on an airplane). If you add a new figure to your - /// document, the engine will inquire about several related files that it - /// thinks might exist. Without this option, such an inquiry might require - /// Tectonic to hit the network, when the user knows for sure that the - /// bundle is not going to contain these files. - only_cached: bool, - - /// The connection to the cache backend, maybe. - /// - /// This field will be `None` if there are locally cached data present and - /// there has not yet been a need to connect to the backend. If it becomes - /// necessary to "pull" and/or download a new file from the backend, this - /// value will become `Some` — it represents something like an open network - /// connection. - backend: Option, - - /// The path to a file containing a cached copy of the backend's content - /// digest. - /// - /// This file path is based on [`Self::start_url`]. - digest_path: PathBuf, - - /// A directory where we will save [`Self::resolved_url`]. - /// - /// We need to cache `resolved_url` to enable the "quick check" backend - /// reconnection path. The actual cache file path is based on the backend’s - /// content digest. - resolved_base: PathBuf, - - /// A directory where we will save the cache manifest. - /// - /// The manifest file contains information about the files that have - /// actually been fetched from the backend and saved locally. The actual - /// manifest file path is based on the backend’s content digest. - manifest_path: PathBuf, - - /// A directory where we will save cached file data. - /// - /// This directory contains the actual cached file contents, in a directory - /// structured based on the digest of each file’s content. - data_base: PathBuf, -} - -/// A locally-cached analogue of [`BackendPullData`]. -/// -/// This data structure is what we try to recover from the cache to see if we -/// can avoid connecting to the backend. -#[derive(Clone, Debug)] -struct CachedPullData { - /// The saved backend content digest. - pub digest: DigestData, - - /// The saved "resolved URL" for the backend. - pub resolved_url: String, - - /// The saved indexing information for the backend. - pub index: HashMap, -} - -impl CachingBundle { - fn new( - start_url: &str, - only_cached: bool, - status: &mut dyn StatusBackend, - cache_root: &Path, - ) -> Result { - // Set up our paths. - let digest_path = - ensure_cache_dir(cache_root, "urls")?.join(app_dirs::app_dirs2::sanitized(start_url)); - let resolved_base = ensure_cache_dir(cache_root, "redirects")?; - let index_base = ensure_cache_dir(cache_root, "indexes")?; - let manifest_base = ensure_cache_dir(cache_root, "manifests")?; - let data_base = ensure_cache_dir(cache_root, "files")?; - - // The whole point of this cache is to avoid connecting to the backend - // if at all possible. So we first see if we have cached the "pull data" - // that describe the overall backend contents. - - let mut backend = None; - - let cached_pull_data = - match load_cached_pull_data::(&digest_path, &resolved_base, &index_base)? { - Some(c) => c, - None => { - // Some portion of the required cached data is missing. We need to - // do a complete pull and then cache the results. - - let (new_backend, pull_data) = CB::open_with_pull(start_url, status)?; - backend = Some(new_backend); - - let digest_text = pull_data.digest.to_string(); - file_create_write(&digest_path, |f| writeln!(f, "{}", &digest_text))?; - file_create_write(make_txt_path(&resolved_base, &digest_text), |f| { - f.write_all(pull_data.resolved_url.as_bytes()) - })?; - file_create_write(make_txt_path(&index_base, &digest_text), |f| { - f.write_all(pull_data.index.as_bytes()) - })?; - - // Now that we've done that, load_cached_pull_data() really ought to succeed ... - atry!( - load_cached_pull_data::(&digest_path, &resolved_base, &index_base)?; - ["cache files missing even after they were created"] - ) - } - }; - - // We call this `cached_digest`, but if `backend` is Some, it is a - // validated, fresh digest. - - let cached_digest = cached_pull_data.digest; - - // Now that we have the backend content digest, we know which manifest - // to use. Read it in, if it exists. - - let manifest_path = make_txt_path(&manifest_base, &cached_digest.to_string()); - let mut contents = HashMap::new(); - - match try_open_file(&manifest_path) { - OpenResult::NotAvailable => {} - OpenResult::Err(e) => { - return Err(e); - } - OpenResult::Ok(mfile) => { - // Note that the lock is released when the file is closed, - // which is good since BufReader::new() and BufReader::lines() - // consume their objects. - if let Err(e) = mfile.lock_shared() { - tt_warning!(status, "failed to lock manifest file \"{}\" for reading; this might be fine", - manifest_path.display(); e.into()); - } - - let f = BufReader::new(mfile); - - for res in f.lines() { - let line = res?; - let mut bits = line.rsplitn(3, ' '); - - let (original_name, length, digest) = - match (bits.next(), bits.next(), bits.next(), bits.next()) { - (Some(s), Some(t), Some(r), None) => (r, t, s), - _ => continue, - }; - - let name = original_name.to_owned(); - - let length = match length.parse::() { - Ok(l) => l, - Err(_) => continue, - }; - - let digest = if digest == "-" { - continue; - } else { - match DigestData::from_str(digest) { - Ok(d) => d, - Err(e) => { - tt_warning!(status, "ignoring bad digest data \"{}\" for \"{}\" in \"{}\"", - &digest, original_name, manifest_path.display() ; e); - continue; - } - } - }; - - contents.insert( - name, - CachedFileInfo { - _length: length, - digest, - }, - ); - } - } - } - - // All set. - - Ok(CachingBundle { - start_url: start_url.to_owned(), - resolved_url: cached_pull_data.resolved_url, - digest_path, - cached_digest, - manifest_path, - data_base, - resolved_base, - contents, - only_cached, - backend, - index: cached_pull_data.index, - }) - } - - /// Save data about a file to our local cache manifest. - fn save_to_manifest(&mut self, name: &str, length: u64, digest: DigestData) -> Result<()> { - let digest_text = digest.to_string(); - - // Due to a quirk about permissions for file locking on Windows, we - // need to add `.read(true)` to be able to lock a file opened in - // append mode. - let mut man = fs::OpenOptions::new() - .append(true) - .create(true) - .read(true) - .open(&self.manifest_path)?; - - // Lock will be released when file is closed at the end of this function. - atry!( - man.lock_exclusive(); - ["failed to lock manifest file \"{}\" for writing", self.manifest_path.display()] - ); - - // If a filename contains newline characters, it will mess up our - // line-based manifest format. Be paranoid and refuse to record such - // filenames. - if !name.contains(['\n', '\r']) { - writeln!(man, "{name} {length} {digest_text}")?; - } - - self.contents.insert( - name.to_owned(), - CachedFileInfo { - _length: length, - digest, - }, - ); - - Ok(()) - } - - /// Ensure that the backend is connected and valid. - /// - /// Here we do a "quick check" to see if the backend's digest is what we - /// expect. If not, we do a lame thing where we error out but set things up - /// so that things should succeed if the program is re-run. Exactly the lame - /// TeX user experience that I've been trying to avoid! - /// - /// After this function has been called, you can assume that `self.backend` - /// is Some. - fn ensure_backend_validity(&mut self, status: &mut dyn StatusBackend) -> Result<()> { - // If backend is Some, we already have a validated connection to it. - if self.backend.is_some() { - return Ok(()); - } - - // Do the quick check. If anything goes wrong, eat the error and try a - // fresh pull. - if let Some(info) = self.index.get(digest::DIGEST_NAME) { - if let Ok(Some((backend, digest))) = - CB::open_with_quick_check(&self.resolved_url, info, status) - { - if self.cached_digest == digest { - // We managed to pull some data that match the digest. We - // can be quite confident that the bundle is what we expect - // it to be. - self.backend = Some(backend); - return Ok(()); - } - } - } - - // The quick check failed. Try to pull all data to make sure that it - // wasn't a network error or that the resolved URL hasn't been updated. - let (new_backend, pull_data) = CB::open_with_pull(&self.start_url, status)?; - - if self.cached_digest != pull_data.digest { - // Crap! The backend isn't what we thought it was. We may have been - // giving incorrect results if we pulled files out of the cache - // before this invocation. Rewrite the digest file so that next time - // we'll start afresh, then bail. - file_create_write(&self.digest_path, |f| writeln!(f, "{}", pull_data.digest))?; - bail!("backend digest changed; rerun tectonic to use updated information"); - } - - if self.resolved_url != pull_data.resolved_url { - // The resolved URL has changed, but the digest is the same. So - // let's just update the URL and keep going. - let resolved_path = make_txt_path(&self.resolved_base, &pull_data.digest.to_string()); - file_create_write(resolved_path, |f| { - f.write_all(pull_data.resolved_url.as_bytes()) - })?; - - self.resolved_url = pull_data.resolved_url; - } - - // OK, it seems that everything is in order. - self.backend = Some(new_backend); - Ok(()) - } - - /// Make sure that a file is available, and return its filesystem path. - /// - /// If the file is already cached, just pull it out. Otherwise, fetch it - /// from the backend. - fn ensure_file_availability( - &mut self, - name: &str, - status: &mut dyn StatusBackend, - ) -> OpenResult { - // Already in the cache? - if let Some(info) = self.contents.get(name) { - return match info.digest.create_two_part_path(&self.data_base) { - Ok(p) => OpenResult::Ok(p), - Err(e) => OpenResult::Err(e), - }; - } - - // No, it's not. Are we in cache-only mode? - if self.only_cached { - return OpenResult::NotAvailable; - } - - // Is the file in the backend at all? - let info = match self.index.get(name).cloned() { - Some(info) => info, - None => return OpenResult::NotAvailable, - }; - - // Yes, it is. Time to fetch it! In order to do that, we need to ensure - // that we have a valid backend connection. - if let Err(e) = self.ensure_backend_validity(status) { - return OpenResult::Err(e); - } - - // Cool, we're connected to the backend now. Get the file. Note that we - // don't need to check for updates to the index after the - // ensure-validity, because we require that the contents of the bundle - // are unchanged (as expressed in the content digest): if they did - // change, ensure_backend_validity() would have bailed, because we might - // have returned incorrect data for previous requests that hit the - // cache. - - let content = match self.backend.as_mut().unwrap().get_file(name, &info, status) { - Ok(c) => c, - Err(e) => return OpenResult::Err(e), - }; - - let length = content.len(); - - let mut digest_builder = digest::create(); - digest_builder.update(&content); - let digest = DigestData::from(digest_builder); - - let final_path = match digest.create_two_part_path(&self.data_base) { - Ok(p) => p, - Err(e) => return OpenResult::Err(e), - }; - - // Perform a racy check for the destination existing, because this - // matters on Windows: if the destination is already there, we'll get - // an error because the destination is marked read-only. Assuming - // non-pathological filesystem manipulation, though, we'll only be - // subject to the race once. - - if !final_path.exists() { - if let Err(e) = file_create_write(&final_path, |f| f.write_all(&content)) { - return OpenResult::Err(e); - } - - // Now we can make the file readonly. It would be nice to set the - // permissions using the already-open file handle owned by the - // tempfile, but mkstemp doesn't give us access. - let mut perms = match fs::metadata(&final_path) { - Ok(p) => p, - Err(e) => { - return OpenResult::Err(e.into()); - } - } - .permissions(); - perms.set_readonly(true); - - if let Err(e) = fs::set_permissions(&final_path, perms) { - return OpenResult::Err(e.into()); - } - } - - // And finally add a record of this file to our manifest. Note that - // we're opening and closing the manifest every time we cache a new - // file; not so efficient, but whatever. - - if let Err(e) = self.save_to_manifest(name, length as u64, digest) { - return OpenResult::Err(e); - } - - OpenResult::Ok(final_path) - } -} - -impl IoProvider for CachingBundle { - fn input_open_name( - &mut self, - name: &str, - status: &mut dyn StatusBackend, - ) -> OpenResult { - let path = match self.ensure_file_availability(name, status) { - OpenResult::Ok(p) => p, - OpenResult::NotAvailable => return OpenResult::NotAvailable, - OpenResult::Err(e) => return OpenResult::Err(e), - }; - - let f = match File::open(path) { - Ok(f) => f, - Err(e) => return OpenResult::Err(e.into()), - }; - - OpenResult::Ok(InputHandle::new_read_only( - name, - BufReader::new(f), - InputOrigin::Other, - )) - } -} - -impl Bundle for CachingBundle { - fn get_digest(&mut self, _status: &mut dyn StatusBackend) -> Result { - Ok(self.cached_digest) - } - - fn all_files(&mut self, status: &mut dyn StatusBackend) -> Result> { - if !self.only_cached { - self.ensure_backend_validity(status)?; - } - Ok(self.index.keys().cloned().collect()) - } -} - -/// Load the cached "pull" data for a backend. -/// -/// If any of the files are not found or otherwise have issues, return None. -fn load_cached_pull_data( - digest_path: &Path, - resolved_base: &Path, - index_base: &Path, -) -> Result>> { - // Convert file-not-found errors into None. - return match inner::(digest_path, resolved_base, index_base) { - Ok(r) => Ok(Some(r)), - Err(e) => { - if let Some(ioe) = e.downcast_ref::() { - if ioe.kind() == IoErrorKind::NotFound { - return Ok(None); - } - } - - Err(e) - } - }; - - fn inner( - digest_path: &Path, - resolved_base: &Path, - index_base: &Path, - ) -> Result> { - let digest_text = { - let f = File::open(digest_path)?; - let mut digest_text = String::with_capacity(digest::DIGEST_LEN); - f.take(digest::DIGEST_LEN as u64) - .read_to_string(&mut digest_text)?; - digest_text - }; - - let resolved_path = make_txt_path(resolved_base, &digest_text); - let resolved_url = fs::read_to_string(resolved_path)?; - - let index_path = make_txt_path(index_base, &digest_text); - let index = { - let f = File::open(index_path)?; - let mut index = HashMap::new(); - for line in BufReader::new(f).lines() { - if let Ok((name, info)) = CB::parse_index_line(&line?) { - index.insert(name, info); - } - } - index - }; - - Ok(CachedPullData { - digest: DigestData::from_str(&digest_text)?, - resolved_url, - index, - }) - } -} +use tectonic_status_base::StatusBackend; /// A convenience method to provide a better error message when writing to a created file. fn file_create_write(path: P, write_fn: F) -> Result<()> @@ -748,17 +42,330 @@ where Ok(()) } -/// Ensure that a directory exists. -fn ensure_cache_dir(root: &Path, path: &str) -> Result { - let full_path = root.join(path); - atry!( - fs::create_dir_all(&full_path); - ["failed to create directory `{}` or one of its parents", full_path.display()] - ); - Ok(full_path) +// Make sure a directory exists. +// "inline" version is for convenience. +macro_rules! ensure_dir { + (inline, $path:expr) => { + { + atry!( + fs::create_dir_all(&$path); + ["failed to create directory `{}` or one of its parents", $path.display()] + ); + $path + } + }; + + ($path:expr) => { + atry!( + fs::create_dir_all(&$path); + ["failed to create directory `{}` or one of its parents", $path.display()] + ); + }; } -/// Convenience to generate a text filename -fn make_txt_path(base: &Path, name: &str) -> PathBuf { - base.join(name).with_extension("txt") +/// A cache wrapper for another bundle. +/// +/// This bundle implementation is the key to Tectonic’s ability to download TeX +/// support files on the fly. This is usually used to wrap some kind of network- +/// based bundle, but can be used with any struct that implements [`Bundle`]. +/// +/// The caching scheme here is designed so that a document build may avoid +/// touching the network altogether if no new files need to be downloaded. +pub struct BundleCache<'this, T> { + /// If true, only use cached files -- never connect to the backend. + /// + /// This option can be useful if we are operating disconnected from the + /// network (e.g., on an airplane). If you add a new figure to your + /// document, the engine will inquire about several related files that it + /// thinks might exist. Without this option, such an inquiry might require + /// Tectonic to hit the network, when the user knows for sure that the + /// bundle is not going to contain these files. + only_cached: bool, + + /// The bundle we're wrapping. When files don't exist in the cache, + /// we'll get them from here. + bundle: Box>, + + /// The root directory of this cache. + /// All other paths are subdirectories of this path. + cache_root: PathBuf, + + // The hash of the bundle we're caching. + bundle_hash: DigestData, +} + +impl<'this, T: FileIndex<'this>> BundleCache<'this, T> { + /// Make a new filesystem-backed cache from `bundle`. + /// + /// This method will fail if we can't connect to the bundle AND + /// we don't already have it in our cache. + /// Other than that, this method does not require network access. + pub fn new( + mut bundle: Box>, + only_cached: bool, + cache_root: Option, + ) -> Result { + // If cache_root is none, use default location. + let cache_root = match cache_root { + None => app_dirs::get_user_cache_dir("bundles").context("while making cache root")?, + Some(p) => ensure_dir!(inline, p), + }; + + let hash_dir = ensure_dir!(inline, &cache_root.join("hashes")); + let hash_file = hash_dir.join(app_dirs::app_dirs2::sanitized(&bundle.get_location())); + + let saved_hash = { + if !hash_file.exists() { + None + } else { + match File::open(&hash_file) { + Err(e) => return Err(e.into()), + Ok(f) => { + let mut digest_text = String::with_capacity(digest::DIGEST_LEN); + f.take(digest::DIGEST_LEN as u64) + .read_to_string(&mut digest_text) + .with_context(|| { + format!("while reading hash from {hash_file:?} in cache") + })?; + Some( + DigestData::from_str(&digest_text) + .with_context(|| format!("while parsing hash `{digest_text}`"))?, + ) + } + } + } + }; + + let live_hash = bundle.get_digest(); + + // Check remote bundle digest + let bundle_hash: DigestData = match (saved_hash, live_hash) { + (None, Err(e)) => { + bail!("this bundle isn't cached, and we couldn't get it from the internet. Error: {e}"); + } + (Some(s), Ok(l)) => { + if s != l { + // Silently update hash in cache. + // We don't need to delete anything, since data is indexed by hash. + // TODO: show a warning + file_create_write(&hash_file, |f| writeln!(f, "{}", &l.to_string())) + .with_context(|| { + format!("while updating bundle hash in {hash_file:?} in cache") + })?; + l + } else { + l + } + } + (None, Ok(l)) => { + file_create_write(&hash_file, |f| writeln!(f, "{}", &l.to_string())).with_context( + || format!("while writing bundle hash to {hash_file:?} in cache"), + )?; + l + } + (Some(h), Err(_)) => h, // Bundle is offline, but we're ok. + }; + + let bundle = BundleCache { + only_cached, + bundle, + cache_root, + bundle_hash, + }; + + // Right now, files are stored in + // `/data//. + // This works for now, but may cause issues if we add multiple + // bundle formats with incompatible path schemes. We assume that + // all bundles with the same hash use the same path scheme, + // which is true for network TTB and fs TTB. + // Adding support for multiple formats of a single bundle hash + // shouldn't be too hard, but isn't necessary yet. + ensure_dir!(&bundle + .cache_root + .join(format!("data/{}", bundle.bundle_hash))); + + Ok(bundle) + } + + /// Build a cache path for the given bundle file + fn get_file_path(&self, info: &T::InfoType) -> PathBuf { + let mut out = self.cache_root.clone(); + out.push(format!("data/{}", self.bundle_hash)); + out.push(info.path()); + out + } + + /// Build a temporary path for the given bundle file + /// To ensure safety with multiple instances of tectonic, + /// files are first downloaded to a known-unique location, then renamed. + fn get_file_path_tmp(&self, info: &T::InfoType) -> PathBuf { + let mut out = self.cache_root.clone(); + out.push(format!("data/{}", self.bundle_hash)); + out.push(format!("{}-tmp-pid{}", info.path(), process::id())); + out + } + + fn ensure_index(&mut self) -> Result<()> { + let target = self + .cache_root + .join(format!("data/{}.index", self.bundle_hash)); + + // We check for two things here: + // - that the bundle index is initialized + // - that the bundle index is cached. + // + // It would be nice to assume that the bundle index is never initialized + // before this function is called, but we can't do that. Unlike ttb, + // itar bundles cannot retrieve the bundle hash without loading the index. + if target.exists() { + if self.bundle.index().is_initialized() { + return Ok(()); + } + + // Initialize bundle index using cached file + let mut file = File::open(&target) + .with_context(|| format!("while opening index {target:?} in cache"))?; + self.bundle + .initialize_index(&mut file) + .with_context(|| format!("while inititalizing index using cached {target:?}"))?; + } else { + // Download index + + // We first download to a temporary file, rename to target + // Makes sure that parallel runs of tectonic don't break the index + let tmp_target = self.cache_root.join(format!( + "data/{}.index-tmp-pid{}", + self.bundle_hash, + process::id() + )); + + let mut reader = self + .bundle + .get_index_reader() + .context("while getting index reader")?; + let mut file = File::create(&tmp_target) + .with_context(|| format!("while creating index {tmp_target:?} in cache"))?; + io::copy(&mut reader, &mut file) + .with_context(|| format!("while writing index {tmp_target:?} in cache"))?; + drop(file); + + fs::rename(&tmp_target, &target).with_context(|| { + format!("while renaming index {tmp_target:?} to {target:?} in cache") + })?; + + if self.bundle.index().is_initialized() { + return Ok(()); + } + + let mut file = File::open(&target) + .with_context(|| format!("while opening index from {target:?} in cache"))?; + self.bundle + .initialize_index(&mut file) + .with_context(|| format!("while initializing index {target:?} in cache"))?; + } + + Ok(()) + } + + /// Get a FileInfo from a name. + /// This returns (in_cache, info), where in_cache is true + /// if this file is already in our cache and can be retrieved + /// without touching the backing bundle. + fn get_fileinfo(&mut self, name: &str) -> OpenResult<(bool, T::InfoType)> { + if let Err(e) = self.ensure_index() { + return OpenResult::Err(e); + }; + + let info = match self.bundle.search(name) { + Some(i) => i, + None => return OpenResult::NotAvailable, + }; + + let target = self.get_file_path(&info); + OpenResult::Ok((target.exists(), info)) + } + + /// Fetch a file from the bundle backing this cache. + /// Returns a path to the file that was created. + fn fetch_file( + &mut self, + info: T::InfoType, + status: &mut dyn StatusBackend, + ) -> OpenResult { + let target = self.get_file_path(&info); + match fs::create_dir_all(target.parent().unwrap()) { + Ok(()) => {} + Err(e) => return OpenResult::Err(e.into()), + }; + + // Already in the cache? + if target.exists() { + return OpenResult::Ok(target); + } + + // No, it's not. Are we in cache-only mode? + if self.only_cached { + return OpenResult::NotAvailable; + } + + // Get the file. + let mut handle = match self.bundle.open_fileinfo(&info, status) { + OpenResult::Ok(c) => c, + OpenResult::Err(e) => return OpenResult::Err(e), + OpenResult::NotAvailable => return OpenResult::NotAvailable, + }; + + // Download to a known-unique temporary location, then move. + // This prevents issues when running multiple processes. + let tmp_path = self.get_file_path_tmp(&info); + if let Err(e) = file_create_write(&tmp_path, |f| io::copy(&mut handle, f).map(|_| ())) { + return OpenResult::Err(e); + } + if let Err(e) = fs::rename(&tmp_path, &target) { + return OpenResult::Err(e.into()); + }; + + OpenResult::Ok(target) + } +} + +impl<'this, T: FileIndex<'this>> IoProvider for BundleCache<'this, T> { + fn input_open_name( + &mut self, + name: &str, + status: &mut dyn StatusBackend, + ) -> OpenResult { + let path = match self.get_fileinfo(name) { + OpenResult::NotAvailable => return OpenResult::NotAvailable, + OpenResult::Err(e) => return OpenResult::Err(e), + OpenResult::Ok((true, f)) => self.get_file_path(&f), + OpenResult::Ok((false, f)) => match self.fetch_file(f, status) { + OpenResult::Ok(p) => p, + OpenResult::NotAvailable => return OpenResult::NotAvailable, + OpenResult::Err(e) => return OpenResult::Err(e), + }, + }; + + let f = match File::open(path) { + Ok(f) => f, + Err(e) => return OpenResult::Err(e.into()), + }; + + OpenResult::Ok(InputHandle::new_read_only( + name, + BufReader::new(f), + InputOrigin::Other, + )) + } +} + +impl<'this, T: FileIndex<'this>> Bundle for BundleCache<'this, T> { + fn get_digest(&mut self) -> Result { + Ok(self.bundle_hash) + } + + fn all_files(&self) -> Vec { + self.bundle.all_files() + } } diff --git a/crates/bundles/src/dir.rs b/crates/bundles/src/dir.rs index 1ec980a7..8a406730 100644 --- a/crates/bundles/src/dir.rs +++ b/crates/bundles/src/dir.rs @@ -5,11 +5,13 @@ use std::{ fs, + io::Read, path::{Path, PathBuf}, + str::FromStr, }; use tectonic_errors::prelude::*; -use tectonic_io_base::{filesystem::FilesystemIo, InputHandle, IoProvider, OpenResult}; -use tectonic_status_base::StatusBackend; +use tectonic_io_base::{digest, filesystem::FilesystemIo, InputHandle, IoProvider, OpenResult}; +use tectonic_status_base::{NoopStatusBackend, StatusBackend}; use super::Bundle; @@ -56,21 +58,34 @@ impl IoProvider for DirBundle { } impl Bundle for DirBundle { - fn all_files(&mut self, _status: &mut dyn StatusBackend) -> Result> { - let mut files = Vec::new(); + fn all_files(&self) -> Vec { + fs::read_dir(self.0.root()) + .unwrap() + .filter_map(|x| x.ok()) + .filter(|x| !x.file_type().map(|x| x.is_dir()).unwrap_or(false)) + .map(|x| x.file_name().to_str().unwrap_or("").to_owned()) + .filter(|x| !x.is_empty()) + .collect() + } - // We intentionally do not explore the directory recursively. - for entry in fs::read_dir(self.0.root())? { - let entry = entry?; - - // This catches both regular files and symlinks:` - if !entry.file_type()?.is_dir() { - if let Some(s) = entry.file_name().to_str() { - files.push(s.to_owned()); - } + fn get_digest(&mut self) -> Result { + let digest_text = match self.input_open_name(digest::DIGEST_NAME, &mut NoopStatusBackend {}) + { + OpenResult::Ok(h) => { + let mut text = String::new(); + h.take(64).read_to_string(&mut text)?; + text } - } - Ok(files) + OpenResult::NotAvailable => { + bail!("bundle does not provide needed SHA256SUM file"); + } + + OpenResult::Err(e) => { + return Err(e); + } + }; + + Ok(atry!(digest::DigestData::from_str(&digest_text); ["corrupted SHA256 digest data"])) } } diff --git a/crates/bundles/src/itar.rs b/crates/bundles/src/itar.rs index 7b4730e7..eab29d43 100644 --- a/crates/bundles/src/itar.rs +++ b/crates/bundles/src/itar.rs @@ -3,10 +3,11 @@ //! The web-friendly "indexed tar" bundle backend. //! -//! The main type offered by this module is the [`IndexedTarBackend`] struct, -//! which cannot be used directly as a [`tectonic_io_base::IoProvider`] but is -//! the default backend for cached web-based bundle access through the -//! [`crate::cache::CachingBundle`] framework. +//! The main type offered by this module is the [`ItarBundle`] struct, +//! which can (but should not) be used directly as any other bundle. +//! +//! Instead, wrap it in a [`crate::BundleCache`] for filesystem-backed +//! caching. //! //! While the on-server file format backing the "indexed tar" backend is indeed //! a standard `tar` file, as far as the client is concerned, this backend is @@ -14,187 +15,273 @@ //! resource, the index file merely contains a byte offset and length that are //! then used to construct an HTTP Range request to obtain the file as needed. +use crate::{Bundle, CachableBundle, FileIndex, FileInfo, NET_RETRY_ATTEMPTS, NET_RETRY_SLEEP_MS}; use flate2::read::GzDecoder; -use std::{convert::TryInto, io::Read, str::FromStr}; +use std::{ + collections::HashMap, + io::{BufRead, BufReader, Cursor, Read}, + str::FromStr, + thread, + time::Duration, +}; use tectonic_errors::prelude::*; use tectonic_geturl::{DefaultBackend, DefaultRangeReader, GetUrlBackend, RangeReader}; -use tectonic_io_base::digest::{self, DigestData}; -use tectonic_status_base::{tt_note, tt_warning, StatusBackend}; +use tectonic_io_base::{digest, InputHandle, InputOrigin, IoProvider, OpenResult}; +use tectonic_status_base::{tt_note, tt_warning, NoopStatusBackend, StatusBackend}; -use crate::cache::{BackendPullData, CacheBackend}; - -const MAX_HTTP_ATTEMPTS: usize = 4; - -/// The internal file-information struct used by the [`IndexedTarBackend`]. -#[derive(Clone, Copy, Debug)] -pub struct FileInfo { +/// The internal file-information struct used by the [`ItarBundle`]. +#[derive(Clone, Debug)] +pub struct ItarFileInfo { + name: String, offset: u64, - length: u64, + length: usize, } -/// A simple web-based file backend based on HTTP Range requests. -/// -/// This type implements the [`CacheBackend`] trait and so can be used for -/// web-based bundle access thorugh the [`crate::cache::CachingBundle`] -/// framework. -#[derive(Debug)] -pub struct IndexedTarBackend { - reader: DefaultRangeReader, +impl FileInfo for ItarFileInfo { + fn name(&self) -> &str { + &self.name + } + fn path(&self) -> &str { + &self.name + } } -impl CacheBackend for IndexedTarBackend { - type FileInfo = FileInfo; +/// A simple FileIndex for compatiblity with [`crate::BundleCache`] +#[derive(Default, Debug)] +pub struct ItarFileIndex { + content: HashMap, +} - fn open_with_pull( - start_url: &str, - status: &mut dyn StatusBackend, - ) -> Result<(Self, BackendPullData)> { - // Step 1: resolve URL - let mut geturl_backend = DefaultBackend::default(); - let resolved_url = geturl_backend.resolve_url(start_url, status)?; +impl<'this> FileIndex<'this> for ItarFileIndex { + type InfoType = ItarFileInfo; - // Step 2: fetch index - let index = { - let mut index = String::new(); - let index_url = format!("{}.index.gz", &resolved_url); - tt_note!(status, "downloading index {}", index_url); - GzDecoder::new(geturl_backend.get_url(&index_url, status)?) - .read_to_string(&mut index)?; - index - }; - - // Step 3: get digest, setting up instance as we go - - let mut cache_backend = IndexedTarBackend { - reader: geturl_backend.open_range_reader(&resolved_url), - }; - - let digest_info = { - let mut digest_info = None; - - for line in index.lines() { - if let Ok((name, info)) = Self::parse_index_line(line) { - if name == digest::DIGEST_NAME { - digest_info = Some(info); - break; - } - } - } - - atry!( - digest_info; - ["backend does not provide needed {} file", digest::DIGEST_NAME] - ) - }; - - let digest_text = - String::from_utf8(cache_backend.get_file(digest::DIGEST_NAME, &digest_info, status)?) - .map_err(|e| e.utf8_error())?; - let digest = DigestData::from_str(&digest_text)?; - - // All done. - Ok(( - cache_backend, - BackendPullData { - resolved_url, - digest, - index, - }, - )) + fn iter(&'this self) -> Box + 'this> { + Box::new(self.content.values()) } - fn open_with_quick_check( - resolved_url: &str, - digest_file_info: &Self::FileInfo, - status: &mut dyn StatusBackend, - ) -> Result> { - let mut cache_backend = IndexedTarBackend { - reader: DefaultBackend::default().open_range_reader(resolved_url), - }; + fn len(&self) -> usize { + self.content.len() + } - if let Ok(d) = cache_backend.get_file(digest::DIGEST_NAME, digest_file_info, status) { - if let Ok(d) = String::from_utf8(d) { - if let Ok(d) = DigestData::from_str(&d) { - return Ok(Some((cache_backend, d))); - } + fn initialize(&mut self, reader: &mut dyn Read) -> Result<()> { + self.content.clear(); + + for line in BufReader::new(reader).lines() { + let line = line?; + let mut bits = line.split_whitespace(); + + if let (Some(name), Some(offset), Some(length)) = + (bits.next(), bits.next(), bits.next()) + { + self.content.insert( + name.to_owned(), + ItarFileInfo { + name: name.to_owned(), + offset: offset.parse::()?, + length: length.parse::()?, + }, + ); + } else { + // TODO: preserve the warning info or something! + bail!("malformed index line"); } } - - Ok(None) + Ok(()) } - fn parse_index_line(line: &str) -> Result<(String, Self::FileInfo)> { - let mut bits = line.split_whitespace(); + /// Find a file in this index + fn search(&'this mut self, name: &str) -> Option { + self.content.get(name).cloned() + } +} - if let (Some(name), Some(offset), Some(length)) = (bits.next(), bits.next(), bits.next()) { - Ok(( - name.to_owned(), - FileInfo { - offset: offset.parse::()?, - length: length.parse::()?, - }, - )) - } else { - // TODO: preserve the warning info or something! - bail!("malformed index line"); +/// The old-fashoned Tectonic web bundle format. +pub struct ItarBundle { + url: String, + /// Maps all available file names to [`FileInfo`]s. + /// This is empty after we create this bundle, so we don't need network + /// to make an object. It is automatically filled by get_index when we need it. + index: ItarFileIndex, + + /// RangeReader object, responsible for sending queries. + /// Will be None when the object is created, automatically + /// replaced with Some(...) once needed. + reader: Option, +} + +impl ItarBundle { + /// Make a new ItarBundle. + /// This method does not require network access. + /// It will succeed even in we can't connect to the bundle, or if we're given a bad url. + pub fn new(url: String) -> Result { + Ok(ItarBundle { + index: ItarFileIndex::default(), + reader: None, + url, + }) + } + + fn connect_reader(&mut self) { + let geturl_backend = DefaultBackend::default(); + // Connect reader if it is not already connected + if self.reader.is_none() { + self.reader = Some(geturl_backend.open_range_reader(&self.url)); } } - fn get_file( + /// Fill this bundle's index, if it is empty. + fn ensure_index(&mut self) -> Result<()> { + // Fetch index if it is empty + if self.index.is_initialized() { + return Ok(()); + } + self.connect_reader(); + + let mut reader = self.get_index_reader()?; + self.index.initialize(&mut reader)?; + + Ok(()) + } +} + +impl IoProvider for ItarBundle { + fn input_open_name( &mut self, name: &str, - info: &Self::FileInfo, status: &mut dyn StatusBackend, - ) -> Result> { - tt_note!(status, "downloading {}", name); + ) -> OpenResult { + if let Err(e) = self.ensure_index() { + return OpenResult::Err(e); + }; - // Historically, sometimes our web service would drop connections when - // fetching a bunch of resource files (i.e., on the first invocation). - // The error manifested itself in a way that has a not-so-nice user - // experience. Our solution: retry the request a few times in case it - // was a transient problem. + let info = match self.index.search(name) { + Some(a) => a, + None => return OpenResult::NotAvailable, + }; - let n = info.length.try_into().unwrap(); - let mut buf = Vec::with_capacity(n); - let mut overall_failed = true; - let mut any_failed = false; - - // Our HTTP implementation actually has problems with zero-sized ranged - // reads (Azure gives us a 200 response, which we don't properly - // handle), but when the file is 0-sized we're all set anyway! - if n > 0 { - for _ in 0..MAX_HTTP_ATTEMPTS { - let mut stream = match self.reader.read_range(info.offset, n) { - Ok(r) => r, - Err(e) => { - tt_warning!(status, "failure requesting \"{}\" from network", name; e); - any_failed = true; - continue; - } - }; - - if let Err(e) = stream.read_to_end(&mut buf) { - tt_warning!(status, "failure downloading \"{}\" from network", name; e.into()); - any_failed = true; - continue; - } - - overall_failed = false; - break; - } - - if overall_failed { - bail!( - "failed to retrieve \"{}\" from the network; \ - this most probably is not Tectonic's fault \ - -- please check your network connection.", - name - ); - } else if any_failed { - tt_note!(status, "download succeeded after retry"); - } - } - - Ok(buf) + // Retries are handled in open_fileinfo, + // since BundleCache never calls input_open_name. + self.open_fileinfo(&info, status) + } +} + +impl Bundle for ItarBundle { + fn all_files(&self) -> Vec { + self.index.iter().map(|x| x.path().to_owned()).collect() + } + + fn get_digest(&mut self) -> Result { + let digest_text = match self.input_open_name(digest::DIGEST_NAME, &mut NoopStatusBackend {}) + { + OpenResult::Ok(h) => { + let mut text = String::new(); + h.take(64).read_to_string(&mut text)?; + text + } + + OpenResult::NotAvailable => { + // Broken or un-cacheable backend. + bail!("bundle does not provide needed SHA256SUM file"); + } + + OpenResult::Err(e) => { + return Err(e); + } + }; + + Ok(atry!(digest::DigestData::from_str(&digest_text); ["corrupted SHA256 digest data"])) + } +} + +impl<'this> CachableBundle<'this, ItarFileIndex> for ItarBundle { + fn get_location(&mut self) -> String { + self.url.clone() + } + + fn initialize_index(&mut self, source: &mut dyn Read) -> Result<()> { + self.index.initialize(source)?; + Ok(()) + } + + fn index(&mut self) -> &mut ItarFileIndex { + &mut self.index + } + + fn search(&mut self, name: &str) -> Option { + self.index.search(name) + } + + fn get_index_reader(&mut self) -> Result> { + let mut geturl_backend = DefaultBackend::default(); + let index_url = format!("{}.index.gz", &self.url); + let reader = GzDecoder::new(geturl_backend.get_url(&index_url)?); + Ok(Box::new(reader)) + } + + fn open_fileinfo( + &mut self, + info: &ItarFileInfo, + status: &mut dyn StatusBackend, + ) -> OpenResult { + match self.ensure_index() { + Ok(_) => {} + Err(e) => return OpenResult::Err(e), + }; + + let mut v = Vec::with_capacity(info.length); + tt_note!(status, "downloading {}", info.name); + + // Edge case for zero-sized reads + // (these cause errors on some web hosts) + if info.length == 0 { + return OpenResult::Ok(InputHandle::new_read_only( + info.name.to_owned(), + Cursor::new(v), + InputOrigin::Other, + )); + } + + // Get file with retries + for i in 0..NET_RETRY_ATTEMPTS { + let mut stream = match self + .reader + .as_mut() + .unwrap() + .read_range(info.offset, info.length) + { + Ok(r) => r, + Err(e) => { + tt_warning!(status, + "failure fetching \"{}\" from network ({}/{NET_RETRY_ATTEMPTS})", + info.name, i+1; e + ); + thread::sleep(Duration::from_millis(NET_RETRY_SLEEP_MS)); + continue; + } + }; + + match stream.read_to_end(&mut v) { + Ok(_) => {} + Err(e) => { + tt_warning!(status, + "failure downloading \"{}\" from network ({}/{NET_RETRY_ATTEMPTS})", + info.name, i+1; e.into() + ); + thread::sleep(Duration::from_millis(NET_RETRY_SLEEP_MS)); + continue; + } + }; + + return OpenResult::Ok(InputHandle::new_read_only( + info.name.to_owned(), + Cursor::new(v), + InputOrigin::Other, + )); + } + + OpenResult::Err(anyhow!( + "failed to download \"{}\"; please check your network connection.", + info.name + )) } } diff --git a/crates/bundles/src/lib.rs b/crates/bundles/src/lib.rs index 503dbcac..fb43d78b 100644 --- a/crates/bundles/src/lib.rs +++ b/crates/bundles/src/lib.rs @@ -11,22 +11,82 @@ //! //! This crate provides the following bundle implementations: //! -//! - [`cache::CachingBundle`] for access to remote bundles with local -//! filesystem caching. +//! - [`cache::BundleCache`] provides filesystem-backed caching for any bundle +//! - [`itar::ItarBundle`] provides filesystem-backed caching for any bundle //! - [`dir::DirBundle`] turns a directory full of files into a bundle; it is //! useful for testing and lightweight usage. //! - [`zip::ZipBundle`] for a ZIP-format bundle. -use std::{io::Read, str::FromStr}; -use tectonic_errors::{anyhow::bail, atry, Result}; -use tectonic_io_base::{digest, digest::DigestData, IoProvider, OpenResult}; +use std::{fmt::Debug, io::Read, path::PathBuf}; +use tectonic_errors::{prelude::bail, Result}; +use tectonic_io_base::{digest::DigestData, InputHandle, IoProvider, OpenResult}; use tectonic_status_base::StatusBackend; pub mod cache; pub mod dir; pub mod itar; +mod ttb; +pub mod ttb_fs; +pub mod ttb_net; pub mod zip; +use cache::BundleCache; +use dir::DirBundle; +use itar::ItarBundle; +use ttb_fs::TTBFsBundle; +use ttb_net::TTBNetBundle; +use zip::ZipBundle; + +// How many times network bundles should retry +// a download, and how long they should wait +// between attempts. +const NET_RETRY_ATTEMPTS: usize = 3; +const NET_RETRY_SLEEP_MS: u64 = 500; + +/// Uniquely identifies a file in a bundle. +pub trait FileInfo: Clone + Debug { + /// Return a path to this file, relative to the bundle. + fn path(&self) -> &str; + + /// Return the name of this file + fn name(&self) -> &str; +} + +/// Keeps track of +pub trait FileIndex<'this> +where + Self: Sized + 'this + Debug, +{ + /// The FileInfo this index handles + type InfoType: FileInfo; + + /// Iterate over all [`FileInfo`]s in this index + fn iter(&'this self) -> Box + 'this>; + + /// Get the number of [`FileInfo`]s in this index + fn len(&self) -> usize; + + /// Returns true if this index is empty + fn is_empty(&self) -> bool { + self.len() == 0 + } + + /// Has this index been filled with bundle data? + /// This is always false until we call [`self.initialize()`], + /// and is always true afterwards. + fn is_initialized(&self) -> bool { + !self.is_empty() + } + + /// Fill this index from a file + fn initialize(&mut self, reader: &mut dyn Read) -> Result<()>; + + /// Search for a file in this index, obeying search order. + /// + /// Returns a `Some(FileInfo)` if a file was found, and `None` otherwise. + fn search(&'this mut self, name: &str) -> Option; +} + /// A trait for bundles of Tectonic support files. /// /// A "bundle" is an [`IoProvider`] with a few special properties. Bundles are @@ -39,59 +99,154 @@ pub mod zip; /// of TeX support files, and that you can generate one or more TeX format files /// using only the files contained in a bundle. pub trait Bundle: IoProvider { - /// Get a cryptographic digest summarizing this bundle’s contents. - /// - /// The digest summarizes the exact contents of every file in the bundle. It - /// is computed from the sorted names and SHA256 digests of the component - /// files [as implemented in the TeXLive bundle builder][x]. - /// - /// [x]: https://github.com/tectonic-typesetting/tectonic-texlive-bundles/blob/master/scripts/ttb_utils.py#L321 - /// - /// The default implementation gets the digest from a file named - /// `SHA256SUM`, which is expected to contain the digest in hex-encoded - /// format. - fn get_digest(&mut self, status: &mut dyn StatusBackend) -> Result { - let digest_text = match self.input_open_name(digest::DIGEST_NAME, status) { - OpenResult::Ok(h) => { - let mut text = String::new(); - h.take(64).read_to_string(&mut text)?; - text - } + /// Get a cryptographic digest summarizing this bundle’s contents, + /// which summarizes the exact contents of every file in the bundle. + fn get_digest(&mut self) -> Result; - OpenResult::NotAvailable => { - // Broken or un-cacheable backend. - bail!("bundle does not provide needed SHA256SUM file"); - } - - OpenResult::Err(e) => { - return Err(e); - } - }; - - Ok(atry!(DigestData::from_str(&digest_text); ["corrupted SHA256 digest data"])) - } - - /// Enumerate the files in this bundle. - /// - /// This interface is intended to be used for diagnostics, not by anything - /// during actual execution of an engine. This should include meta-files - /// such as the `SHA256SUM` file. The ordering of the returned filenames is - /// unspecified. - /// - /// To ease implementation, the filenames are returned in one big vector of - /// owned strings. For a large bundle, the memory consumed by this operation - /// might be fairly substantial (although we are talking megabytes, not - /// gigabytes). - fn all_files(&mut self, status: &mut dyn StatusBackend) -> Result>; + /// Iterate over all file paths in this bundle. + /// This is used for the `bundle search` command + fn all_files(&self) -> Vec; } impl Bundle for Box { - fn get_digest(&mut self, status: &mut dyn StatusBackend) -> Result { - (**self).get_digest(status) + fn get_digest(&mut self) -> Result { + (**self).get_digest() } - fn all_files(&mut self, status: &mut dyn StatusBackend) -> Result> { - (**self).all_files(status) + fn all_files(&self) -> Vec { + (**self).all_files() + } +} + +/// A bundle that may be cached. +/// +/// These methods do not implement any new features. +/// Instead, they give the [`cache::BundleCache`] wrapper +/// more direct access to existing bundle functionality. +pub trait CachableBundle<'this, T> +where + Self: Bundle + 'this, + T: FileIndex<'this>, +{ + /// Initialize this bundle's file index from an external reader + /// This allows us to retrieve the FileIndex from the cache WITHOUT + /// touching the network. + fn initialize_index(&mut self, _source: &mut dyn Read) -> Result<()> { + Ok(()) + } + + /// Get a `Read` instance to this bundle's index, + /// reading directly from the backend. + fn get_index_reader(&mut self) -> Result>; + + /// Return a reference to this bundle's FileIndex. + fn index(&mut self) -> &mut T; + + /// Open the file that `info` points to. + fn open_fileinfo( + &mut self, + info: &T::InfoType, + status: &mut dyn StatusBackend, + ) -> OpenResult; + + /// Search for a file in this bundle. + /// This should foward the call to `self.index` + fn search(&mut self, name: &str) -> Option; + + /// Return a string that corresponds to this bundle's location, probably a URL. + /// We should NOT need to do any network IO to get this value. + fn get_location(&mut self) -> String; +} + +impl<'this, T: FileIndex<'this>, B: CachableBundle<'this, T> + ?Sized> CachableBundle<'this, T> + for Box +{ + fn initialize_index(&mut self, source: &mut dyn Read) -> Result<()> { + (**self).initialize_index(source) + } + + fn get_location(&mut self) -> String { + (**self).get_location() + } + + fn get_index_reader(&mut self) -> Result> { + (**self).get_index_reader() + } + + fn index(&mut self) -> &mut T { + (**self).index() + } + + fn open_fileinfo( + &mut self, + info: &T::InfoType, + status: &mut dyn StatusBackend, + ) -> OpenResult { + (**self).open_fileinfo(info, status) + } + + fn search(&mut self, name: &str) -> Option { + (**self).search(name) + } +} + +/// Try to open a bundle from a string, +/// detecting its type. +/// +/// Returns None if auto-detection fails. +pub fn detect_bundle( + source: String, + only_cached: bool, + custom_cache_dir: Option, +) -> Result>> { + use url::Url; + + // Parse URL and detect bundle type + if let Ok(url) = Url::parse(&source) { + if url.scheme() == "https" || url.scheme() == "http" { + if source.ends_with("ttb") { + let bundle = BundleCache::new( + Box::new(TTBNetBundle::new(source)?), + only_cached, + custom_cache_dir, + )?; + return Ok(Some(Box::new(bundle))); + } else { + let bundle = BundleCache::new( + Box::new(ItarBundle::new(source)?), + only_cached, + custom_cache_dir, + )?; + return Ok(Some(Box::new(bundle))); + } + } else if url.scheme() == "file" { + let file_path = url.to_file_path().map_err(|_| { + std::io::Error::new( + std::io::ErrorKind::InvalidInput, + "failed to parse local path", + ) + })?; + return bundle_from_path(file_path); + } else { + return Ok(None); + } + } else { + // If we couldn't parse the URL, this is probably a local path. + return bundle_from_path(PathBuf::from(source)); + } + + fn bundle_from_path(p: PathBuf) -> Result>> { + let ext = p.extension().map_or("", |x| x.to_str().unwrap_or("")); + + if p.is_dir() { + Ok(Some(Box::new(DirBundle::new(p)))) + } else if ext == "zip" { + Ok(Some(Box::new(ZipBundle::open(p)?))) + } else if ext == "ttb" { + Ok(Some(Box::new(TTBFsBundle::open(p)?))) + } else { + Ok(None) + } } } @@ -127,12 +282,11 @@ pub fn get_fallback_bundle_url(format_version: u32) -> String { /// `tectonic` crate provides a configuration mechanism to allow the user to /// override the bundle URL setting, and that should be preferred if you’re in a /// position to use it. -pub fn get_fallback_bundle( - format_version: u32, - only_cached: bool, - status: &mut dyn StatusBackend, -) -> Result> { +pub fn get_fallback_bundle(format_version: u32, only_cached: bool) -> Result> { let url = get_fallback_bundle_url(format_version); - let mut cache = cache::Cache::get_user_default()?; - cache.open(&url, only_cached, status) + let bundle = detect_bundle(url, only_cached, None)?; + if bundle.is_none() { + bail!("could not open default bundle") + } + Ok(bundle.unwrap()) } diff --git a/crates/bundles/src/ttb.rs b/crates/bundles/src/ttb.rs new file mode 100644 index 00000000..5c8afd12 --- /dev/null +++ b/crates/bundles/src/ttb.rs @@ -0,0 +1,288 @@ +// Copyright 2023-2024 the Tectonic Project +// Licensed under the MIT License. + +//! Common tools for the ttbv1 format, used in both +//! network and filesystem bundles. + +use crate::{FileIndex, FileInfo}; +use std::{ + collections::HashMap, + convert::{TryFrom, TryInto}, + io::{BufRead, BufReader, Read}, + str::FromStr, +}; +use tectonic_errors::prelude::*; +use tectonic_io_base::digest::{self, DigestData}; + +pub struct TTBv1Header { + pub index_start: u64, + pub index_real_len: u32, + pub index_gzip_len: u32, + pub digest: DigestData, +} + +impl TryFrom<[u8; 70]> for TTBv1Header { + type Error = Error; + + fn try_from(header: [u8; 70]) -> Result { + let signature = &header[0..14]; + let version = u32::from_le_bytes(header[14..18].try_into()?); + let index_start = u64::from_le_bytes(header[18..26].try_into()?); + let index_gzip_len = u32::from_le_bytes(header[26..30].try_into()?); + let index_real_len = u32::from_le_bytes(header[30..34].try_into()?); + let digest: DigestData = DigestData::from_str(&digest::bytes_to_hex(&header[34..66]))?; + + if signature != b"tectonicbundle" { + bail!("this is not a bundle"); + } + + if version != 1 { + bail!("wrong ttb version"); + } + + Ok(TTBv1Header { + digest, + index_start, + index_real_len, + index_gzip_len, + }) + } +} + +/// file info for TTbundle +#[derive(Clone, Debug)] +pub struct TTBFileInfo { + pub start: u64, + pub real_len: u32, + pub gzip_len: u32, + pub path: String, + pub name: String, + pub hash: Option, +} + +impl FileInfo for TTBFileInfo { + fn name(&self) -> &str { + &self.name + } + + fn path(&self) -> &str { + &self.path + } +} + +#[derive(Default, Debug)] +pub struct TTBFileIndex { + // Vector of fileinfos. + // This MUST be sorted by path for search() to work properly! + pub content: Vec, + + search_orders: HashMap>, + default_search_order: String, + + // Remember previous searches so we don't have to iterate over content again. + search_cache: HashMap>, +} + +impl TTBFileIndex { + fn read_filelist_line(&mut self, line: String) -> Result<()> { + let mut bits = line.split_whitespace(); + + if let (Some(start), Some(gzip_len), Some(real_len), Some(hash)) = + (bits.next(), bits.next(), bits.next(), bits.next()) + { + let path = bits.collect::>().join(" "); + let (_, name) = path.rsplit_once('/').unwrap_or(("", &path)); + + // Basic path validation. + // TODO: more robust checks + if path.starts_with('/') + || path.contains("./") // Also catches "/../" + || path.contains("//") + { + bail!("bad bundle file path `{path}`"); + } + + self.content.push(TTBFileInfo { + start: start.parse::()?, + gzip_len: gzip_len.parse::()?, + real_len: real_len.parse::()?, + path: path.to_owned(), + name: name.to_owned(), + hash: match hash { + "nohash" => None, + _ => Some(hash.to_owned()), + }, + }); + } else { + // TODO: preserve the warning info or something! + bail!("malformed FILELIST line"); + } + + Ok(()) + } + + fn read_search_line(&mut self, name: String, line: String) -> Result<()> { + let stat = self.search_orders.entry(name).or_default(); + stat.push(line); + Ok(()) + } + + fn read_defaultsearch_line(&mut self, line: String) -> Result<()> { + self.default_search_order = line; + Ok(()) + } +} + +impl<'this> FileIndex<'this> for TTBFileIndex { + type InfoType = TTBFileInfo; + + fn iter(&'this self) -> Box + 'this> { + Box::new(self.content.iter()) + } + + fn len(&self) -> usize { + self.content.len() + } + + fn initialize(&mut self, reader: &mut dyn Read) -> Result<()> { + self.content.clear(); + self.search_orders.clear(); + self.search_cache.clear(); + self.default_search_order.clear(); + + let mut mode: String = String::new(); + for line in BufReader::new(reader).lines() { + let line = line?; + + if line.starts_with('[') { + mode = line[1..line.len() - 1].to_owned(); + continue; + } + + if mode.is_empty() { + continue; + } + + let (cmd, arg) = mode.rsplit_once(':').unwrap_or((&mode[..], "")); + + match cmd { + "DEFAULTSEARCH" => self.read_defaultsearch_line(line)?, + "FILELIST" => self.read_filelist_line(line)?, + "SEARCH" => self.read_search_line(arg.to_owned(), line)?, + _ => continue, + } + } + + Ok(()) + } + + fn search(&'this mut self, name: &str) -> Option { + match self.search_cache.get(name) { + None => {} + Some(r) => return r.clone(), + } + + let search = self.search_orders.get(&self.default_search_order).unwrap(); + + // Edge case: absolute paths + if name.starts_with('/') { + return None; + } + + // Get last element of path, since + // some packages reference a path to a file. + // `fithesis4` is one example. + let relative_parent: bool; + + let n = match name.rsplit_once('/') { + Some(n) => { + relative_parent = true; + n.1 + } + None => { + relative_parent = false; + name + } + }; + + // If we don't have this path in the index, this file doesn't exist. + // The code below will clone these strings iff it has to. + let mut infos: Vec<&TTBFileInfo> = Vec::new(); + for i in self.iter() { + if i.name() == n { + infos.push(i); + } else if !infos.is_empty() { + // infos is sorted, so we can stop searching now. + break; + } + } + + if relative_parent { + // TODO: REWORK + let mut matching: Option<&TTBFileInfo> = None; + for info in &infos { + if info.path().ends_with(&name) { + match matching { + Some(_) => return None, // TODO: warning. This shouldn't happen. + None => matching = Some(info), + } + } + } + let matching = Some(matching?.clone()); + self.search_cache.insert(name.to_owned(), matching.clone()); + matching + } else { + // Even if paths.len() is 1, we don't return here. + // We need to make sure this file matches a search path: + // if it's in a directory we don't search, we shouldn't find it! + + let mut picked: Vec<&TTBFileInfo> = Vec::new(); + for rule in search { + // Remove leading slash from rule + // (search patterns start with slashes, but paths do not) + let rule = &rule[1..]; + + for info in &infos { + if rule.ends_with("//") { + // Match start of patent path + // (cutting off the last slash) + if info.path().starts_with(&rule[0..rule.len() - 1]) { + picked.push(info); + } + } else { + // Match full parent path + if &info.path()[0..info.path().len() - name.len()] == rule { + picked.push(info); + } + } + } + if !picked.is_empty() { + break; + } + } + + let r = { + if picked.is_empty() { + // No file in our search dirs had this name. + None + } else if picked.len() == 1 { + // We found exactly one file with this name. + // + // This chain of functions is essentially picked[0], + // but takes ownership of the string without requiring + // a .clone(). + Some(picked[0].clone()) + } else { + // We found multiple files with this name, all of which + // have the same priority. Pick alphabetically to emulate + // an "alphabetic DFS" search order. + picked.sort_by(|a, b| a.path().cmp(b.path())); + Some(picked[0].clone()) + } + }; + + self.search_cache.insert(name.to_owned(), r.clone()); + r + } + } +} diff --git a/crates/bundles/src/ttb_fs.rs b/crates/bundles/src/ttb_fs.rs new file mode 100644 index 00000000..55fb3ee0 --- /dev/null +++ b/crates/bundles/src/ttb_fs.rs @@ -0,0 +1,131 @@ +// Copyright 2023-2024 the Tectonic Project +// Licensed under the MIT License. + +//! Read ttb v1 bundles on the filesystem. +//! +//! The main type offered by this module is the [`Ttbv1NetBundle`] struct. + +use crate::{ + ttb::{TTBFileIndex, TTBFileInfo, TTBv1Header}, + Bundle, FileIndex, FileInfo, +}; +use flate2::read::GzDecoder; +use std::{ + convert::TryFrom, + fs::File, + io::{Cursor, Read, Seek, SeekFrom}, + path::Path, +}; +use tectonic_errors::prelude::*; +use tectonic_io_base::{digest::DigestData, InputHandle, InputOrigin, IoProvider, OpenResult}; +use tectonic_status_base::StatusBackend; + +/// Read a [`TTBFileInfo`] from this bundle. +/// We assume that `fileinfo` points to a valid file in this bundle. +fn read_fileinfo<'a>(fileinfo: &TTBFileInfo, reader: &'a mut File) -> Result> { + reader.seek(SeekFrom::Start(fileinfo.start))?; + Ok(Box::new(GzDecoder::new( + reader.take(fileinfo.gzip_len as u64), + ))) +} + +/// A bundle backed by a ZIP file. +pub struct TTBFsBundle +where + for<'a> T: FileIndex<'a>, +{ + file: File, + index: T, +} + +/// The internal file-information struct used by the [`TTBFsBundle`]. + +impl TTBFsBundle { + /// Create a new ZIP bundle for a generic readable and seekable stream. + pub fn new(file: File) -> Result { + Ok(TTBFsBundle { + file, + index: TTBFileIndex::default(), + }) + } + + fn get_header(&mut self) -> Result { + self.file.seek(SeekFrom::Start(0))?; + let mut header: [u8; 70] = [0u8; 70]; + self.file.read_exact(&mut header)?; + self.file.seek(SeekFrom::Start(0))?; + let header = TTBv1Header::try_from(header)?; + Ok(header) + } + + // Fill this bundle's search rules, fetching files from our backend. + fn fill_index(&mut self) -> Result<()> { + let header = self.get_header()?; + let info = TTBFileInfo { + start: header.index_start, + gzip_len: header.index_real_len, + real_len: header.index_gzip_len, + path: "/INDEX".to_owned(), + name: "INDEX".to_owned(), + hash: None, + }; + + let mut reader = read_fileinfo(&info, &mut self.file)?; + self.index.initialize(&mut reader)?; + + Ok(()) + } + + /// Open a file on the filesystem as a zip bundle. + pub fn open>(path: P) -> Result { + Self::new(File::open(path)?) + } +} + +impl IoProvider for TTBFsBundle { + fn input_open_name( + &mut self, + name: &str, + _status: &mut dyn StatusBackend, + ) -> OpenResult { + // Fetch index if it is empty + if self.index.is_empty() { + if let Err(e) = self.fill_index() { + return OpenResult::Err(e); + } + } + + let info = match self.index.search(name) { + None => return OpenResult::NotAvailable, + Some(s) => s, + }; + + let mut v: Vec = Vec::with_capacity(info.real_len as usize); + + match read_fileinfo(&info, &mut self.file) { + Err(e) => return OpenResult::Err(e), + Ok(mut b) => { + if let Err(e) = b.read_to_end(&mut v) { + return OpenResult::Err(e.into()); + } + } + }; + + OpenResult::Ok(InputHandle::new_read_only( + name, + Cursor::new(v), + InputOrigin::Other, + )) + } +} + +impl Bundle for TTBFsBundle { + fn all_files(&self) -> Vec { + self.index.iter().map(|x| x.path().to_owned()).collect() + } + + fn get_digest(&mut self) -> Result { + let header = self.get_header()?; + Ok(header.digest) + } +} diff --git a/crates/bundles/src/ttb_net.rs b/crates/bundles/src/ttb_net.rs new file mode 100644 index 00000000..ec2f8642 --- /dev/null +++ b/crates/bundles/src/ttb_net.rs @@ -0,0 +1,222 @@ +// Copyright 2023-2024 the Tectonic Project +// Licensed under the MIT License. + +//! Read ttb v1 bundles on the internet. +//! +//! The main type offered by this module is the [`TTBNetBundle`] struct, +//! which can (but should not) be used directly as a [`tectonic_io_base::IoProvider`]. +//! +//! Instead, wrap it in a [`crate::BundleCache`] for filesystem-backed caching. + +use crate::{ + ttb::{TTBFileIndex, TTBFileInfo, TTBv1Header}, + Bundle, CachableBundle, FileIndex, FileInfo, NET_RETRY_ATTEMPTS, NET_RETRY_SLEEP_MS, +}; +use flate2::read::GzDecoder; +use std::{ + convert::TryFrom, + io::{Cursor, Read}, + thread, + time::Duration, +}; +use tectonic_errors::prelude::*; +use tectonic_geturl::{DefaultBackend, DefaultRangeReader, GetUrlBackend, RangeReader}; +use tectonic_io_base::{InputHandle, InputOrigin, IoProvider, OpenResult}; +use tectonic_status_base::{tt_note, tt_warning, StatusBackend}; + +/// Read a [`TTBFileInfo`] from this bundle. +/// We assume that `fileinfo` points to a valid file in this bundle. +fn read_fileinfo(fileinfo: &TTBFileInfo, reader: &mut DefaultRangeReader) -> Result> { + // fileinfo.length is a u32, so it must fit inside a usize (assuming 32/64-bit machine). + let stream = reader.read_range(fileinfo.start, fileinfo.gzip_len as usize)?; + Ok(Box::new(GzDecoder::new(stream))) +} + +/// Access ttbv1 bundle hosted on the internet. +/// This struct provides NO caching. All files +/// are downloaded. +/// +/// As such, this bundle should probably be wrapped in a [`crate::BundleCache`]. +pub struct TTBNetBundle +where + for<'a> T: FileIndex<'a>, +{ + url: String, + index: T, + + // We need the network to load these. + // They're None until absolutely necessary. + reader: Option, +} + +/// The internal file-information struct used by the [`TTBNetBundle`]. + +impl TTBNetBundle { + /// Create a new ZIP bundle for a generic readable and seekable stream. + /// This method does not require network access. + /// It will succeed even in we can't connect to the bundle, or if we're given a bad url. + pub fn new(url: String) -> Result { + Ok(TTBNetBundle { + reader: None, + index: TTBFileIndex::default(), + url, + }) + } + + fn connect_reader(&mut self) -> Result<()> { + if self.reader.is_some() { + return Ok(()); + } + let geturl_backend = DefaultBackend::default(); + self.reader = Some(geturl_backend.open_range_reader(&self.url)); + Ok(()) + } + + fn get_header(&mut self) -> Result { + self.connect_reader()?; + let mut header: [u8; 70] = [0u8; 70]; + self.reader + .as_mut() + .unwrap() + .read_range(0, 70)? + .read_exact(&mut header)?; + let header = TTBv1Header::try_from(header)?; + Ok(header) + } + + // Fill this bundle's index if it is empty. + fn ensure_index(&mut self) -> Result<()> { + if self.index.is_initialized() { + return Ok(()); + } + + let mut reader = self.get_index_reader()?; + self.index.initialize(&mut reader)?; + Ok(()) + } +} + +impl IoProvider for TTBNetBundle { + fn input_open_name( + &mut self, + name: &str, + status: &mut dyn StatusBackend, + ) -> OpenResult { + if let Err(e) = self.ensure_index() { + return OpenResult::Err(e); + }; + + let info = match self.search(name) { + None => return OpenResult::NotAvailable, + Some(s) => s, + }; + + // Retries are handled in open_fileinfo, + // since BundleCache never calls input_open_name. + self.open_fileinfo(&info, status) + } +} + +impl Bundle for TTBNetBundle { + fn all_files(&self) -> Vec { + self.index.iter().map(|x| x.path().to_owned()).collect() + } + + fn get_digest(&mut self) -> Result { + let header = self.get_header()?; + Ok(header.digest) + } +} + +impl<'this> CachableBundle<'this, TTBFileIndex> for TTBNetBundle { + fn get_location(&mut self) -> String { + self.url.clone() + } + + fn initialize_index(&mut self, source: &mut dyn Read) -> Result<()> { + self.index.initialize(source)?; + Ok(()) + } + + fn index(&mut self) -> &mut TTBFileIndex { + &mut self.index + } + + fn search(&mut self, name: &str) -> Option { + self.index.search(name) + } + + fn get_index_reader(&mut self) -> Result> { + self.connect_reader()?; + let header = self.get_header()?; + + read_fileinfo( + &TTBFileInfo { + start: header.index_start, + gzip_len: header.index_gzip_len, + real_len: header.index_real_len, + path: "".to_owned(), + name: "".to_owned(), + hash: None, + }, + self.reader.as_mut().unwrap(), + ) + } + + fn open_fileinfo( + &mut self, + info: &TTBFileInfo, + status: &mut dyn StatusBackend, + ) -> OpenResult { + let mut v: Vec = Vec::with_capacity(info.real_len as usize); + tt_note!(status, "downloading {}", info.name); + + // Edge case for zero-sized reads + // (these cause errors on some web hosts) + if info.gzip_len == 0 { + return OpenResult::Ok(InputHandle::new_read_only( + info.name.to_owned(), + Cursor::new(v), + InputOrigin::Other, + )); + } + + // Get file with retries + for i in 0..NET_RETRY_ATTEMPTS { + let mut reader = match read_fileinfo(info, self.reader.as_mut().unwrap()) { + Ok(r) => r, + Err(e) => { + tt_warning!(status, + "failure fetching \"{}\" from network ({}/{NET_RETRY_ATTEMPTS})", + info.name, i+1; e + ); + thread::sleep(Duration::from_millis(NET_RETRY_SLEEP_MS)); + continue; + } + }; + + match reader.read_to_end(&mut v) { + Ok(_) => {} + Err(e) => { + tt_warning!(status, + "failure downloading \"{}\" from network ({}/{NET_RETRY_ATTEMPTS})", + info.name, i+1; e.into() + ); + thread::sleep(Duration::from_millis(NET_RETRY_SLEEP_MS)); + continue; + } + }; + + return OpenResult::Ok(InputHandle::new_read_only( + info.name.to_owned(), + Cursor::new(v), + InputOrigin::Other, + )); + } + + OpenResult::Err(anyhow!( + "failed to download \"{}\"; please check your network connection.", + info.name + )) + } +} diff --git a/crates/bundles/src/zip.rs b/crates/bundles/src/zip.rs index c0d2757d..d2665f53 100644 --- a/crates/bundles/src/zip.rs +++ b/crates/bundles/src/zip.rs @@ -3,18 +3,18 @@ //! ZIP files as Tectonic bundles. +use crate::Bundle; use std::{ fs::File, io::{Cursor, Read, Seek}, path::Path, + str::FromStr, }; use tectonic_errors::prelude::*; -use tectonic_io_base::{InputHandle, InputOrigin, IoProvider, OpenResult}; -use tectonic_status_base::StatusBackend; +use tectonic_io_base::{digest, InputHandle, InputOrigin, IoProvider, OpenResult}; +use tectonic_status_base::{NoopStatusBackend, StatusBackend}; use zip::{result::ZipError, ZipArchive}; -use crate::Bundle; - /// A bundle backed by a ZIP file. pub struct ZipBundle { zip: ZipArchive, @@ -57,7 +57,11 @@ impl IoProvider for ZipBundle { } }; - let mut buf = Vec::with_capacity(zipitem.size() as usize); + let s = zipitem.size(); + if s >= u32::MAX as u64 { + return OpenResult::Err(anyhow!("Zip item too large.")); + } + let mut buf = Vec::with_capacity(s as usize); if let Err(e) = zipitem.read_to_end(&mut buf) { return OpenResult::Err(e.into()); @@ -72,7 +76,28 @@ impl IoProvider for ZipBundle { } impl Bundle for ZipBundle { - fn all_files(&mut self, _status: &mut dyn StatusBackend) -> Result> { - Ok(self.zip.file_names().map(|s| s.to_owned()).collect()) + fn all_files(&self) -> Vec { + self.zip.file_names().map(|x| x.to_owned()).collect() + } + + fn get_digest(&mut self) -> Result { + let digest_text = match self.input_open_name(digest::DIGEST_NAME, &mut NoopStatusBackend {}) + { + OpenResult::Ok(h) => { + let mut text = String::new(); + h.take(64).read_to_string(&mut text)?; + text + } + + OpenResult::NotAvailable => { + bail!("bundle does not provide needed SHA256SUM file"); + } + + OpenResult::Err(e) => { + return Err(e); + } + }; + + Ok(atry!(digest::DigestData::from_str(&digest_text); ["corrupted SHA256 digest data"])) } } diff --git a/crates/engine_spx2html/src/lib.rs b/crates/engine_spx2html/src/lib.rs index eee3dc91..885d0230 100644 --- a/crates/engine_spx2html/src/lib.rs +++ b/crates/engine_spx2html/src/lib.rs @@ -154,7 +154,7 @@ impl Spx2HtmlEngine { let mut output = hooks.io().output_open_name(asp).must_exist()?; serde_json::to_writer_pretty(&mut output, &ser)?; let (name, digest) = output.into_name_digest(); - hooks.event_output_closed(name, digest, status); + hooks.event_output_closed(name, digest); } else if !self.do_not_emit_assets { assets.emit(fonts, &mut common)?; } diff --git a/crates/geturl/src/curl.rs b/crates/geturl/src/curl.rs index 178bd2e4..5af83211 100644 --- a/crates/geturl/src/curl.rs +++ b/crates/geturl/src/curl.rs @@ -6,7 +6,6 @@ use curl::easy::Easy; use std::io::Cursor; use tectonic_errors::{anyhow::bail, Result}; -use tectonic_status_base::StatusBackend; use crate::{GetUrlBackend, RangeReader}; @@ -67,11 +66,11 @@ impl GetUrlBackend for CurlBackend { type Response = Cursor>; type RangeReader = CurlRangeReader; - fn get_url(&mut self, url: &str, _status: &mut dyn StatusBackend) -> Result { + fn get_url(&mut self, url: &str) -> Result { get_url_generic(&mut self.handle, url, None) } - fn resolve_url(&mut self, url: &str, _status: &mut dyn StatusBackend) -> Result { + fn resolve_url(&mut self, url: &str) -> Result { Ok(url.into()) } diff --git a/crates/geturl/src/lib.rs b/crates/geturl/src/lib.rs index 079eb933..554c7d05 100644 --- a/crates/geturl/src/lib.rs +++ b/crates/geturl/src/lib.rs @@ -25,7 +25,6 @@ use cfg_if::cfg_if; use std::io::Read; use tectonic_errors::Result; -use tectonic_status_base::StatusBackend; /// A trait for reading byte ranges from an HTTP resource. pub trait RangeReader { @@ -48,10 +47,10 @@ pub trait GetUrlBackend: Default { /// /// But we attempt to detect redirects into CDNs/S3/etc and *stop* following /// before we get that deep. - fn resolve_url(&mut self, url: &str, status: &mut dyn StatusBackend) -> Result; + fn resolve_url(&mut self, url: &str) -> Result; /// Perform an HTTP GET on a URL, returning a readable result. - fn get_url(&mut self, url: &str, status: &mut dyn StatusBackend) -> Result; + fn get_url(&mut self, url: &str) -> Result; /// Open a range reader that can perform byte-range reads on the specified URL. fn open_range_reader(&self, url: &str) -> Self::RangeReader; diff --git a/crates/geturl/src/null.rs b/crates/geturl/src/null.rs index dd97cf20..db0a7839 100644 --- a/crates/geturl/src/null.rs +++ b/crates/geturl/src/null.rs @@ -10,7 +10,6 @@ use std::{ result::Result as StdResult, }; use tectonic_errors::Result; -use tectonic_status_base::StatusBackend; use crate::{GetUrlBackend, RangeReader}; @@ -34,11 +33,11 @@ impl GetUrlBackend for NullBackend { type Response = Empty; type RangeReader = NullRangeReader; - fn get_url(&mut self, _url: &str, _status: &mut dyn StatusBackend) -> Result { + fn get_url(&mut self, _url: &str) -> Result { Err((NoGetUrlBackendError {}).into()) } - fn resolve_url(&mut self, _url: &str, _status: &mut dyn StatusBackend) -> Result { + fn resolve_url(&mut self, _url: &str) -> Result { Err((NoGetUrlBackendError {}).into()) } diff --git a/crates/geturl/src/reqwest.rs b/crates/geturl/src/reqwest.rs index 661df08e..2567d215 100644 --- a/crates/geturl/src/reqwest.rs +++ b/crates/geturl/src/reqwest.rs @@ -10,7 +10,6 @@ use reqwest::{ StatusCode, Url, }; use tectonic_errors::{anyhow::bail, Result}; -use tectonic_status_base::{tt_note, StatusBackend}; use crate::{GetUrlBackend, RangeReader}; @@ -24,7 +23,7 @@ impl GetUrlBackend for ReqwestBackend { type Response = Response; type RangeReader = ReqwestRangeReader; - fn get_url(&mut self, url: &str, _status: &mut dyn StatusBackend) -> Result { + fn get_url(&mut self, url: &str) -> Result { let res = Client::new().get(url).send()?; if !res.status().is_success() { bail!( @@ -36,9 +35,7 @@ impl GetUrlBackend for ReqwestBackend { Ok(res) } - fn resolve_url(&mut self, url: &str, status: &mut dyn StatusBackend) -> Result { - tt_note!(status, "connecting to {}", url); - + fn resolve_url(&mut self, url: &str) -> Result { let parsed = Url::parse(url)?; let original_filename = parsed .path_segments() @@ -96,10 +93,6 @@ impl GetUrlBackend for ReqwestBackend { } let final_url: String = res.url().clone().into(); - if final_url != url { - tt_note!(status, "resolved to {}", final_url); - } - Ok(final_url) } diff --git a/crates/io_base/src/app_dirs.rs b/crates/io_base/src/app_dirs.rs index 363985fb..472346b6 100644 --- a/crates/io_base/src/app_dirs.rs +++ b/crates/io_base/src/app_dirs.rs @@ -9,6 +9,7 @@ use app_dirs2::AppDataType; use std::path::PathBuf; +use std::{env, fs}; use tectonic_errors::prelude::*; /// The instance of the `app_dirs2` crate that this crate links to. @@ -61,6 +62,27 @@ pub fn ensure_user_config() -> Result { /// - macOS: `$HOME/Library/Caches/Tectonic` /// - Others: `$XDG_CACHE_HOME/Tectonic` if defined, otherwise /// `$HOME/.cache/Tectonic` -pub fn ensure_user_cache_dir(path: &str) -> Result { - Ok(app_dirs2::app_dir(AppDataType::UserCache, &APP_INFO, path)?) +/// +/// +/// The cache location defaults to the `AppDataType::UserCache` +/// provided by `app_dirs2` but can be overwritten using the +/// `TECTONIC_CACHE_DIR` environment variable. +/// +/// This method may perform I/O to create the user cache directory, so it is +/// fallible. (Due to its `app_dirs2` implementation, it would have to be +/// fallible even if it didn't perform I/O.) +pub fn get_user_cache_dir(subdir: &str) -> Result { + let env_cache_path = env::var_os("TECTONIC_CACHE_DIR"); + + let cache_path = match env_cache_path { + Some(env_cache_path) => { + let mut env_cache_path: PathBuf = env_cache_path.into(); + env_cache_path.push(subdir); + fs::create_dir_all(&env_cache_path)?; + env_cache_path + } + None => app_dirs2::app_dir(AppDataType::UserCache, &APP_INFO, subdir)?, + }; + + Ok(cache_path) } diff --git a/src/bin/tectonic/compile.rs b/src/bin/tectonic/compile.rs index 99a55213..9fabb0a7 100644 --- a/src/bin/tectonic/compile.rs +++ b/src/bin/tectonic/compile.rs @@ -10,7 +10,7 @@ use std::path::{Path, PathBuf}; use tectonic_bridge_core::{SecuritySettings, SecurityStance}; use tectonic::{ - config::PersistentConfig, + config::{maybe_return_test_bundle, PersistentConfig}, driver::{OutputFormat, PassSetting, ProcessingSession, ProcessingSessionBuilder}, errmsg, errors::{ErrorKind, Result}, @@ -19,6 +19,8 @@ use tectonic::{ unstable_opts::{UnstableArg, UnstableOptions}, }; +use tectonic_bundles::detect_bundle; + #[derive(Debug, Parser)] pub struct CompileOptions { /// The file to process, or "-" to process the standard input stream @@ -94,8 +96,6 @@ pub struct CompileOptions { //impl TectonicCommand for CompileOptions { impl CompileOptions { - //fn customize(&self, _cc: &mut CommandCustomizations) {} - pub fn execute(self, config: PersistentConfig, status: &mut dyn StatusBackend) -> Result { let unstable = UnstableOptions::from_unstable_args(self.unstable.into_iter()); @@ -185,16 +185,26 @@ impl CompileOptions { } } - let only_cached = self.only_cached; - if only_cached { + if self.only_cached { tt_note!(status, "using only cached resource files"); } - if let Some(path) = self.bundle { - sess_builder.bundle(config.make_local_file_provider(path, status)?); - } else if let Some(u) = self.web_bundle { - sess_builder.bundle(config.make_cached_url_provider(&u, only_cached, None, status)?); + + if let Some(bundle) = self.bundle { + // TODO: this is ugly. + // It's probably a good idea to re-design our code so we + // don't need special cases for tests our source. + if let Ok(bundle) = maybe_return_test_bundle(Some(bundle.clone())) { + sess_builder.bundle(bundle); + } else if let Some(bundle) = detect_bundle(bundle.clone(), self.only_cached, None)? { + sess_builder.bundle(bundle); + } else { + return Err(errmsg!("`{bundle}` doesn't specify a valid bundle.")); + } + } else if let Ok(bundle) = maybe_return_test_bundle(None) { + // TODO: this is ugly too. + sess_builder.bundle(bundle); } else { - sess_builder.bundle(config.default_bundle(only_cached, status)?); + sess_builder.bundle(config.default_bundle(self.only_cached)?); } sess_builder.build_date_from_env(deterministic_mode); diff --git a/src/bin/tectonic/v2cli/commands/bundle/mod.rs b/src/bin/tectonic/v2cli/commands/bundle/mod.rs index 7549af63..aaab41b2 100644 --- a/src/bin/tectonic/v2cli/commands/bundle/mod.rs +++ b/src/bin/tectonic/v2cli/commands/bundle/mod.rs @@ -29,7 +29,7 @@ fn get_a_bundle( let doc = ws.first_document(); let mut options: DocumentSetupOptions = Default::default(); options.only_cached(only_cached); - doc.bundle(&options, status) + doc.bundle(&options) } Err(e) => { @@ -43,7 +43,6 @@ fn get_a_bundle( Ok(Box::new(tectonic_bundles::get_fallback_bundle( tectonic_engine_xetex::FORMAT_SERIAL, only_cached, - status, )?)) } } @@ -131,8 +130,8 @@ impl BundleSearchCommand { } fn execute(self, config: PersistentConfig, status: &mut dyn StatusBackend) -> Result { - let mut bundle = get_a_bundle(config, self.only_cached, status)?; - let files = bundle.all_files(status)?; + let bundle = get_a_bundle(config, self.only_cached, status)?; + let files = bundle.all_files(); // Is there a better way to do this? let filter: Box bool> = if let Some(t) = self.term { diff --git a/src/bin/tectonic/v2cli/commands/new.rs b/src/bin/tectonic/v2cli/commands/new.rs index 14bf4d77..febf440c 100644 --- a/src/bin/tectonic/v2cli/commands/new.rs +++ b/src/bin/tectonic/v2cli/commands/new.rs @@ -30,7 +30,7 @@ impl TectonicCommand for InitCommand { let wc = WorkspaceCreator::new(path); ctry!( - wc.create_defaulted(config, status, self.web_bundle); + wc.create_defaulted(&config, self.bundle); "failed to create the new Tectonic workspace" ); Ok(0) @@ -61,7 +61,7 @@ impl TectonicCommand for NewCommand { let wc = WorkspaceCreator::new(self.path); ctry!( - wc.create_defaulted(config, status, self.web_bundle); + wc.create_defaulted(&config, self.bundle); "failed to create the new Tectonic workspace" ); Ok(0) diff --git a/src/bin/tectonic/v2cli/commands/show.rs b/src/bin/tectonic/v2cli/commands/show.rs index dc5b4442..d8656ccd 100644 --- a/src/bin/tectonic/v2cli/commands/show.rs +++ b/src/bin/tectonic/v2cli/commands/show.rs @@ -1,5 +1,6 @@ use clap::{CommandFactory, Parser}; use tectonic::{config::PersistentConfig, errors::Result}; +use tectonic_io_base::app_dirs; use tectonic_status_base::StatusBackend; use crate::v2cli::{CommandCustomizations, TectonicCommand, V2CliOptions}; @@ -47,9 +48,7 @@ impl ShowUserCacheDirCommand { } fn execute(self, _config: PersistentConfig, _status: &mut dyn StatusBackend) -> Result { - use tectonic_bundles::cache::Cache; - let cache = Cache::get_user_default()?; - println!("{}", cache.root().display()); + println!("{}", app_dirs::get_user_cache_dir("bundles")?.display()); Ok(0) } } diff --git a/src/config.rs b/src/config.rs index 6ac8aadc..60a648f7 100644 --- a/src/config.rs +++ b/src/config.rs @@ -12,19 +12,13 @@ #[cfg(feature = "serde")] use serde::{Deserialize, Serialize}; use std::{ - path::{Path, PathBuf}, + path::PathBuf, sync::atomic::{AtomicBool, Ordering}, }; -use tectonic_bundles::{ - cache::Cache, dir::DirBundle, itar::IndexedTarBackend, zip::ZipBundle, Bundle, -}; +use tectonic_bundles::{detect_bundle, Bundle}; use tectonic_io_base::app_dirs; -use url::Url; -use crate::{ - errors::{ErrorKind, Result}, - status::StatusBackend, -}; +use crate::errors::{ErrorKind, Result}; /// Awesome hack time!!! /// @@ -44,19 +38,19 @@ pub fn is_config_test_mode_activated() -> bool { CONFIG_TEST_MODE_ACTIVATED.load(Ordering::SeqCst) } -pub fn is_test_bundle_wanted(web_bundle: Option) -> bool { +pub fn is_test_bundle_wanted(bundle: Option) -> bool { if !is_config_test_mode_activated() { return false; } - match web_bundle { + match bundle { None => true, Some(x) if x.contains("test-bundle://") => true, _ => false, } } -pub fn maybe_return_test_bundle(web_bundle: Option) -> Result> { - if is_test_bundle_wanted(web_bundle) { +pub fn maybe_return_test_bundle(bundle: Option) -> Result> { + if is_test_bundle_wanted(bundle) { Ok(Box::::default()) } else { Err(ErrorKind::Msg("not asking for the default test bundle".to_owned()).into()) @@ -134,53 +128,14 @@ impl PersistentConfig { Ok(PersistentConfig::default()) } - pub fn make_cached_url_provider( - &self, - url: &str, - only_cached: bool, - custom_cache_root: Option<&Path>, - status: &mut dyn StatusBackend, - ) -> Result> { - if let Ok(test_bundle) = maybe_return_test_bundle(Some(url.to_owned())) { - return Ok(test_bundle); - } - - let mut cache = if let Some(root) = custom_cache_root { - Cache::get_for_custom_directory(root) - } else { - Cache::get_user_default()? - }; - - let bundle = cache.open::(url, only_cached, status)?; - Ok(Box::new(bundle) as _) - } - - pub fn make_local_file_provider( - &self, - file_path: PathBuf, - _status: &mut dyn StatusBackend, - ) -> Result> { - let bundle: Box = if file_path.is_dir() { - Box::new(DirBundle::new(file_path)) - } else { - Box::new(ZipBundle::open(file_path)?) - }; - Ok(bundle) - } - pub fn default_bundle_loc(&self) -> &str { &self.default_bundles[0].url } - pub fn default_bundle( - &self, - only_cached: bool, - status: &mut dyn StatusBackend, - ) -> Result> { - use std::io; - - if let Ok(test_bundle) = maybe_return_test_bundle(None) { - return Ok(test_bundle); + pub fn default_bundle(&self, only_cached: bool) -> Result> { + if CONFIG_TEST_MODE_ACTIVATED.load(Ordering::SeqCst) { + let bundle = crate::test_util::TestBundle::default(); + return Ok(Box::new(bundle)); } if self.default_bundles.len() != 1 { @@ -190,25 +145,18 @@ impl PersistentConfig { .into()); } - let url = Url::parse(&self.default_bundles[0].url) - .map_err(|_| io::Error::new(io::ErrorKind::InvalidInput, "failed to parse url"))?; - if url.scheme() == "file" { - // load the local zip file. - let file_path = url.to_file_path().map_err(|_| { - io::Error::new(io::ErrorKind::InvalidInput, "failed to parse local path") - })?; - return self.make_local_file_provider(file_path, status); - } - let bundle = - self.make_cached_url_provider(&self.default_bundles[0].url, only_cached, None, status)?; - Ok(Box::new(bundle) as _) + Ok( + detect_bundle(self.default_bundles[0].url.to_owned(), only_cached, None) + .unwrap() + .unwrap(), + ) } pub fn format_cache_path(&self) -> Result { if is_config_test_mode_activated() { Ok(crate::test_util::test_path(&[])) } else { - Ok(app_dirs::ensure_user_cache_dir("formats")?) + Ok(app_dirs::get_user_cache_dir("formats")?) } } } diff --git a/src/docmodel.rs b/src/docmodel.rs index 20f12ce7..bbcd6ac1 100644 --- a/src/docmodel.rs +++ b/src/docmodel.rs @@ -7,28 +7,21 @@ //! `tectonic_docmodel` crate with the actual document-processing capabilities //! provided by the processing engines. -use std::{ - fmt::Write as FmtWrite, - fs, io, - path::{Path, PathBuf}, -}; +use std::{fmt::Write as FmtWrite, fs, io, path::PathBuf}; use tectonic_bridge_core::SecuritySettings; -use tectonic_bundles::{ - cache::Cache, dir::DirBundle, itar::IndexedTarBackend, zip::ZipBundle, Bundle, -}; +use tectonic_bundles::{detect_bundle, Bundle}; use tectonic_docmodel::{ document::{BuildTargetType, Document, InputFile}, workspace::{Workspace, WorkspaceCreator}, }; use tectonic_geturl::{DefaultBackend, GetUrlBackend}; -use url::Url; use crate::{ config, ctry, driver::{OutputFormat, PassSetting, ProcessingSessionBuilder}, errors::{ErrorKind, Result}, status::StatusBackend, - tt_note, + test_util, tt_note, unstable_opts::UnstableOptions, }; @@ -79,11 +72,7 @@ pub trait DocumentExt { /// /// This parses [`Document::bundle_loc`] and turns it into the appropriate /// bundle backend. - fn bundle( - &self, - setup_options: &DocumentSetupOptions, - status: &mut dyn StatusBackend, - ) -> Result>; + fn bundle(&self, setup_options: &DocumentSetupOptions) -> Result>; /// Set up a [`ProcessingSessionBuilder`] for one of the outputs. /// @@ -98,38 +87,18 @@ pub trait DocumentExt { } impl DocumentExt for Document { - fn bundle( - &self, - setup_options: &DocumentSetupOptions, - status: &mut dyn StatusBackend, - ) -> Result> { - fn bundle_from_path(p: PathBuf) -> Result> { - if p.is_dir() { - Ok(Box::new(DirBundle::new(p))) - } else { - Ok(Box::new(ZipBundle::open(p)?)) - } + fn bundle(&self, setup_options: &DocumentSetupOptions) -> Result> { + // Load test bundle + if config::is_config_test_mode_activated() { + let bundle = test_util::TestBundle::default(); + return Ok(Box::new(bundle)); } - if let Ok(test_bundle) = config::maybe_return_test_bundle(None) { - Ok(test_bundle) - } else if let Ok(url) = Url::parse(&self.bundle_loc) { - if url.scheme() != "file" { - let mut cache = Cache::get_user_default()?; - let bundle = cache.open::( - &self.bundle_loc, - setup_options.only_cached, - status, - )?; - Ok(Box::new(bundle)) - } else { - let file_path = url.to_file_path().map_err(|_| { - io::Error::new(io::ErrorKind::InvalidInput, "failed to parse local path") - })?; - bundle_from_path(file_path) - } - } else { - bundle_from_path(Path::new(&self.bundle_loc).to_owned()) + let d = detect_bundle(self.bundle_loc.clone(), setup_options.only_cached, None)?; + + match d { + Some(b) => Ok(b), + None => Err(io::Error::new(io::ErrorKind::InvalidInput, "Could not get bundle").into()), } } @@ -198,7 +167,7 @@ impl DocumentExt for Document { if setup_options.only_cached { tt_note!(status, "using only cached resource files"); } - sess_builder.bundle(self.bundle(setup_options, status)?); + sess_builder.bundle(self.bundle(setup_options)?); let mut tex_dir = self.src_dir().to_owned(); tex_dir.push("src"); @@ -225,25 +194,23 @@ pub trait WorkspaceCreatorExt { /// for the main document. fn create_defaulted( self, - config: config::PersistentConfig, - status: &mut dyn StatusBackend, - web_bundle: Option, + config: &config::PersistentConfig, + bundle: Option, ) -> Result; } impl WorkspaceCreatorExt for WorkspaceCreator { fn create_defaulted( self, - config: config::PersistentConfig, - status: &mut dyn StatusBackend, - web_bundle: Option, + config: &config::PersistentConfig, + bundle: Option, ) -> Result { - let bundle_loc = if config::is_test_bundle_wanted(web_bundle.clone()) { + let bundle_loc = if config::is_test_bundle_wanted(bundle.clone()) { "test-bundle://".to_owned() } else { - let unresolved_loc = web_bundle.unwrap_or(config.default_bundle_loc().to_owned()); + let loc = bundle.unwrap_or(config.default_bundle_loc().to_owned()); let mut gub = DefaultBackend::default(); - gub.resolve_url(&unresolved_loc, status)? + gub.resolve_url(&loc)? }; Ok(self.create(bundle_loc, Vec::new())?) diff --git a/src/driver.rs b/src/driver.rs index 0242cd1f..f3816f14 100644 --- a/src/driver.rs +++ b/src/driver.rs @@ -641,12 +641,7 @@ impl DriverHooks for BridgeState { self } - fn event_output_closed( - &mut self, - name: String, - digest: DigestData, - _status: &mut dyn StatusBackend, - ) { + fn event_output_closed(&mut self, name: String, digest: DigestData) { let summ = self .events .get_mut(&name) @@ -1166,7 +1161,7 @@ impl ProcessingSessionBuilder { let format_cache_path = self .format_cache_path .unwrap_or_else(|| filesystem_root.clone()); - let format_cache = FormatCache::new(bundle.get_digest(status)?, format_cache_path); + let format_cache = FormatCache::new(bundle.get_digest()?, format_cache_path); let genuine_stdout = if self.print_stdout { Some(GenuineStdoutIo::new()) diff --git a/src/lib.rs b/src/lib.rs index beb4345d..7c9ffe96 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -149,7 +149,7 @@ pub fn latex_to_pdf>(latex: T) -> Result> { "failed to open the default configuration file"); let only_cached = false; - let bundle = ctry!(config.default_bundle(only_cached, &mut status); + let bundle = ctry!(config.default_bundle(only_cached); "failed to load the default resource bundle"); let format_cache_path = ctry!(config.format_cache_path(); diff --git a/src/test_util.rs b/src/test_util.rs index c345d772..eaec8183 100644 --- a/src/test_util.rs +++ b/src/test_util.rs @@ -126,11 +126,11 @@ impl IoProvider for TestBundle { } impl Bundle for TestBundle { - fn get_digest(&mut self, _status: &mut dyn StatusBackend) -> Result { + fn get_digest(&mut self) -> Result { Ok(DigestData::zeros()) } - fn all_files(&mut self, status: &mut dyn StatusBackend) -> Result> { - self.0.all_files(status) + fn all_files(&self) -> Vec { + self.0.all_files() } } diff --git a/tests/formats.rs b/tests/formats.rs index a89ee852..f8d82269 100644 --- a/tests/formats.rs +++ b/tests/formats.rs @@ -117,12 +117,7 @@ impl<'a> DriverHooks for FormatTestDriver<'a> { self } - fn event_output_closed( - &mut self, - name: String, - digest: DigestData, - _status: &mut dyn StatusBackend, - ) { + fn event_output_closed(&mut self, name: String, digest: DigestData) { let summ = self .events .get_mut(&name)