Get Wasm modules from Web URLs

Signed-off-by: itowlson <ivan.towlson@fermyon.com>
This commit is contained in:
itowlson 2022-11-09 17:25:57 +13:00
parent 79d1615392
commit bcffffe182
No known key found for this signature in database
GPG Key ID: 35C1D48AFA20CF2E
11 changed files with 304 additions and 8 deletions

View File

@ -137,6 +137,8 @@ pub enum RawModuleSource {
FileReference(PathBuf),
/// Reference to a remote bindle
Bindle(FileComponentBindleSource),
/// Reference to a Wasm file at a URL
Url(FileComponentUrlSource),
}
/// A component source from Bindle.
@ -151,3 +153,13 @@ pub struct FileComponentBindleSource {
/// Parcel to use from the bindle.
pub parcel: String,
}
/// A component source from a URL.
#[derive(Clone, Debug, Deserialize, PartialEq, Eq, Serialize)]
#[serde(deny_unknown_fields, rename_all = "snake_case")]
pub struct FileComponentUrlSource {
/// The URL of the Wasm binary.
pub url: String,
/// The digest of the Wasm binary, used for integrity checking. This must be a
/// SHA256 digest, in the form `sha256:...`
pub digest: String,
}

View File

@ -10,21 +10,28 @@ pub mod config;
#[cfg(test)]
mod tests;
use std::{path::Path, str::FromStr};
use std::{
path::{Path, PathBuf},
str::FromStr,
};
use anyhow::{anyhow, bail, Context, Result};
use futures::future;
use itertools::Itertools;
use outbound_http::allowed_http_hosts::validate_allowed_http_hosts;
use path_absolutize::Absolutize;
use reqwest::Url;
use spin_manifest::{
Application, ApplicationInformation, ApplicationOrigin, CoreComponent, ModuleSource,
SpinVersion, WasmConfig,
};
use tokio::{fs::File, io::AsyncReadExt};
use crate::bindle::BindleConnectionInfo;
use crate::{bindle::BindleConnectionInfo, digest::bytes_sha256_string};
use config::{RawAppInformation, RawAppManifest, RawAppManifestAnyVersion, RawComponentManifest};
use self::config::FileComponentUrlSource;
/// Given the path to a spin.toml manifest file, prepare its assets locally and
/// get a prepared application configuration consumable by a Spin execution context.
/// If a directory is provided, use it as the base directory to expand the assets,
@ -188,6 +195,17 @@ async fn core(
let name = format!("{}@{}", bindle_id, parcel_sha);
ModuleSource::Buffer(bytes, name)
}
config::RawModuleSource::Url(us) => {
let source = UrlSource::new(&us)
.with_context(|| format!("Can't use Web source in component {}", id))?;
let bytes = source
.get()
.await
.with_context(|| format!("Can't use source {} for component {}", us.url, id))?;
ModuleSource::Buffer(bytes, us.url)
}
};
let description = raw.description;
@ -215,6 +233,119 @@ async fn core(
})
}
/// A parsed URL source for a component module.
#[derive(Debug)]
pub struct UrlSource {
url: Url,
digest: ComponentDigest,
}
impl UrlSource {
/// Parses a URL source from a raw component manifest.
pub fn new(us: &FileComponentUrlSource) -> anyhow::Result<UrlSource> {
let url = reqwest::Url::parse(&us.url)
.with_context(|| format!("Invalid source URL {}", us.url))?;
if url.scheme() != "https" {
anyhow::bail!("Invalid URL scheme {}: must be HTTPS", url.scheme(),);
}
let digest = ComponentDigest::try_from(&us.digest)?;
Ok(Self { url, digest })
}
/// The URL of the source.
pub fn url(&self) -> &Url {
&self.url
}
/// A relative path URL derived from the URL.
pub fn url_relative_path(&self) -> PathBuf {
let path = self.url.path();
let rel_path = path.trim_start_matches('/');
PathBuf::from(rel_path)
}
/// The digest string (omitting the format).
pub fn digest_str(&self) -> &str {
match &self.digest {
ComponentDigest::Sha256(s) => s,
}
}
/// Gets the data from the source as a byte buffer.
pub async fn get(&self) -> anyhow::Result<Vec<u8>> {
let response = reqwest::get(self.url.clone())
.await
.with_context(|| format!("Error fetching source URL {}", self.url))?;
// TODO: handle redirects
let status = response.status();
if status != reqwest::StatusCode::OK {
let reason = status.canonical_reason().unwrap_or("(no reason provided)");
anyhow::bail!(
"Error fetching source URL {}: {} {}",
self.url,
status.as_u16(),
reason
);
}
let body = response
.bytes()
.await
.with_context(|| format!("Error loading source URL {}", self.url))?;
let bytes = body.into_iter().collect_vec();
self.digest.verify(&bytes).context("Incorrect digest")?;
Ok(bytes)
}
}
#[derive(Debug)]
enum ComponentDigest {
Sha256(String),
}
impl TryFrom<&String> for ComponentDigest {
type Error = anyhow::Error;
fn try_from(value: &String) -> Result<Self, Self::Error> {
if let Some((format, text)) = value.split_once(':') {
match format {
"sha256" => {
if text.is_empty() {
Err(anyhow!("Invalid digest string '{value}': no digest"))
} else {
Ok(Self::Sha256(text.to_owned()))
}
}
_ => Err(anyhow!(
"Invalid digest string '{value}': format must be sha256"
)),
}
} else {
Err(anyhow!(
"Invalid digest string '{value}': format must be 'sha256:...'"
))
}
}
}
impl ComponentDigest {
fn verify(&self, bytes: &[u8]) -> anyhow::Result<()> {
match self {
Self::Sha256(expected) => {
let actual = &bytes_sha256_string(bytes);
if expected == actual {
Ok(())
} else {
Err(anyhow!("Downloaded file does not match specified digest: expected {expected}, actual {actual}"))
}
}
}
}
}
/// Converts the raw application information from the spin.toml manifest to the standard configuration.
fn info(raw: RawAppInformation, src: impl AsRef<Path>) -> ApplicationInformation {
ApplicationInformation {

View File

@ -91,11 +91,63 @@ fn test_manifest() -> Result<()> {
let b = match cfg.components[1].source.clone() {
RawModuleSource::Bindle(b) => b,
RawModuleSource::FileReference(_) => panic!("expected bindle source"),
RawModuleSource::Url(_) => panic!("expected bindle source"),
};
assert_eq!(b.reference, "bindle reference".to_string());
assert_eq!(b.parcel, "parcel".to_string());
let u = match cfg.components[2].source.clone() {
RawModuleSource::Url(u) => u,
RawModuleSource::FileReference(_) => panic!("expected URL source"),
RawModuleSource::Bindle(_) => panic!("expected URL source"),
};
assert_eq!(u.url, "https://example.com/wasm.wasm.wasm".to_string());
assert_eq!(u.digest, "sha256:12345".to_string());
Ok(())
}
#[tokio::test]
async fn can_parse_url_sources() -> Result<()> {
let fcs = FileComponentUrlSource {
url: "https://example.com/wasm.wasm.wasm".to_owned(),
digest: "sha256:12345".to_owned(),
};
let us = UrlSource::new(&fcs)?;
assert_eq!("https", us.url().scheme());
assert_eq!("/wasm.wasm.wasm", us.url().path());
assert_eq!(PathBuf::from("wasm.wasm.wasm"), us.url_relative_path());
Ok(())
}
#[tokio::test]
async fn url_sources_are_validated() -> Result<()> {
let fcs1 = FileComponentUrlSource {
url: "ftp://example.com/wasm.wasm.wasm".to_owned(),
digest: "sha256:12345".to_owned(),
};
UrlSource::new(&fcs1).expect_err("fcs1 should fail on scheme");
let fcs2 = FileComponentUrlSource {
url: "SNORKBONGLY".to_owned(),
digest: "sha256:12345".to_owned(),
};
UrlSource::new(&fcs2).expect_err("fcs2 should fail because not a URL");
let fcs3 = FileComponentUrlSource {
url: "https://example.com/wasm.wasm.wasm".to_owned(),
digest: "sha123:12345".to_owned(),
};
UrlSource::new(&fcs3).expect_err("fcs3 should fail on digest fmt");
let fcs4 = FileComponentUrlSource {
url: "https://example.com/wasm.wasm.wasm".to_owned(),
digest: "sha256:".to_owned(),
};
UrlSource::new(&fcs4).expect_err("fcs4 should fail on empty digest");
Ok(())
}

View File

@ -23,3 +23,11 @@ parcel = "parcel"
reference = "bindle reference"
[component.trigger]
route = "/test"
[[component]]
id = "web"
[component.source]
url = "https://example.com/wasm.wasm.wasm"
digest = "sha256:12345"
[component.trigger]
route = "/dont/test"

View File

@ -8,7 +8,7 @@ use semver::BuildMetadata;
use spin_loader::{
bindle::config as bindle_schema,
digest::{bytes_sha256_string, file_sha256_string},
local::{config as local_schema, validate_raw_app_manifest},
local::{config as local_schema, validate_raw_app_manifest, UrlSource},
};
use std::path::{Path, PathBuf};
@ -42,7 +42,7 @@ pub async fn expand_manifest(
// - there is a parcel for the spin.toml-a-like and it has the magic media type
// - n parcels for the Wasm modules at their locations
let wasm_parcels = wasm_parcels(&manifest, &app_dir)
let wasm_parcels = wasm_parcels(&manifest, &app_dir, &scratch_dir)
.await
.context("Failed to collect Wasm modules")?;
let wasm_parcels = consolidate_wasm_parcels(wasm_parcels);
@ -113,6 +113,11 @@ fn bindle_component_manifest(
"This version of Spin can't publish components whose sources are already bindles"
)
}
local_schema::RawModuleSource::Url(us) => {
let source = UrlSource::new(us)
.with_context(|| format!("Can't use Web source in component {}", local.id))?;
source.digest_str().to_owned()
}
};
let asset_group = local.wasm.files.as_ref().map(|_| group_name_for(&local.id));
Ok(bindle_schema::RawComponentManifest {
@ -132,8 +137,12 @@ fn bindle_component_manifest(
async fn wasm_parcels(
manifest: &local_schema::RawAppManifest,
base_dir: &Path,
scratch_dir: impl AsRef<Path>,
) -> Result<Vec<SourcedParcel>> {
let parcel_futures = manifest.components.iter().map(|c| wasm_parcel(c, base_dir));
let parcel_futures = manifest
.components
.iter()
.map(|c| wasm_parcel(c, base_dir, scratch_dir.as_ref()));
let parcels = futures::future::join_all(parcel_futures).await;
parcels.into_iter().collect()
}
@ -141,16 +150,45 @@ async fn wasm_parcels(
async fn wasm_parcel(
component: &local_schema::RawComponentManifest,
base_dir: &Path,
scratch_dir: impl AsRef<Path>,
) -> Result<SourcedParcel> {
let wasm_file = match &component.source {
local_schema::RawModuleSource::FileReference(path) => path,
let (wasm_file, absolute_wasm_file) = match &component.source {
local_schema::RawModuleSource::FileReference(path) => {
(path.to_owned(), base_dir.join(path))
}
local_schema::RawModuleSource::Bindle(_) => {
anyhow::bail!(
"This version of Spin can't publish components whose sources are already bindles"
)
}
local_schema::RawModuleSource::Url(us) => {
let id = &component.id;
let source = UrlSource::new(us)
.with_context(|| format!("Can't use Web source in component {}", id))?;
let bytes = source
.get()
.await
.with_context(|| format!("Can't use source {} for component {}", us.url, id))?;
let temp_dir = scratch_dir.as_ref().join("downloads");
let temp_file = temp_dir.join(us.digest.replace(':', "_"));
tokio::fs::create_dir_all(temp_dir)
.await
.context("Failed to save download to temporary file")?;
tokio::fs::write(&temp_file, &bytes)
.await
.context("Failed to save download to temporary file")?;
let absolute_path = dunce::canonicalize(&temp_file)
.context("Failed to acquire full path for app downloaded temporary file")?;
let dest_relative_path = source.url_relative_path();
(dest_relative_path, absolute_path)
}
};
let absolute_wasm_file = base_dir.join(wasm_file);
file_parcel(&absolute_wasm_file, wasm_file, None, "application/wasm").await
}

View File

@ -441,6 +441,7 @@ impl DeployCommand {
copy(&mut r, &mut sha256)?;
}
config::RawModuleSource::Bindle(_b) => {}
config::RawModuleSource::Url(us) => sha256.update(us.digest.as_bytes()),
}
if let Some(files) = &x.wasm.files {
let source_dir = crate::app_dir(&self.app)?;

View File

@ -0,0 +1,9 @@
title = "Test"
base_url = "http://localhost:3000"
about = "This site is generated with Bartholomew, the Spin micro-CMS. And this message is in site.toml."
theme = "fermyon"
index_site_pages = ["main"]
enable_shortcodes = false
[extra]
copyright = "The Site Authors"

View File

@ -0,0 +1,4 @@
title = "Test"
template = "home"
date = "2022-10-15T00:22:56Z"
---

View File

@ -4,6 +4,13 @@ name = "spin-assets-test"
trigger = {type = "http", base = "/"}
version = "1.0.0"
[[component]]
source = { url = "https://github.com/fermyon/bartholomew/releases/download/v0.6.0/bartholomew.wasm", digest = "sha256:b64bc17da4484ff7fee619ba543f077be69b3a1f037506e0eeee1fb020d42786" }
id = "bartholomew"
files = [ "content/**/*" , "templates/*", "config/*"]
[component.trigger]
route = "/..."
[[component]]
id = "fs"
# should we just use git submodules to avoid having binary test files here?

View File

@ -0,0 +1,7 @@
<html>
<body>
<div>
<h1>Hello</h1>
</div>
</body>
</html>

View File

@ -175,6 +175,9 @@ mod integration_tests {
let s = SpinTestController::with_bindle(RUST_HTTP_STATIC_ASSETS_REST_REF, &b.url, &[])
.await?;
assert_status(&s, "/", 200).await?;
assert_response_contains(&s, "/", "<h1>Hello</h1>").await?;
assert_status(&s, "/static/thisshouldbemounted/1", 200).await?;
assert_status(&s, "/static/thisshouldbemounted/2", 200).await?;
assert_status(&s, "/static/thisshouldbemounted/3", 200).await?;
@ -662,6 +665,9 @@ mod integration_tests {
)
.await?;
assert_status(&s, "/", 200).await?;
assert_response_contains(&s, "/", "<h1>Hello</h1>").await?;
assert_status(&s, "/static/thisshouldbemounted/1", 200).await?;
assert_status(&s, "/static/thisshouldbemounted/2", 200).await?;
assert_status(&s, "/static/thisshouldbemounted/3", 200).await?;
@ -806,6 +812,27 @@ mod integration_tests {
Ok(())
}
async fn assert_response_contains(
s: &SpinTestController,
absolute_uri: &str,
expected: &str,
) -> Result<()> {
let res = req(s, absolute_uri).await?;
let body = hyper::body::to_bytes(res.into_body())
.await
.expect("read body");
let body_text =
String::from_utf8(body.into_iter().collect()).expect("convert body to string");
assert!(
body_text.contains(expected),
"expected to contain {}, got {}",
expected,
body_text
);
Ok(())
}
async fn req(s: &SpinTestController, absolute_uri: &str) -> Result<Response<Body>> {
let c = Client::new();
let url = format!("http://{}{}", s.url, absolute_uri)