diff --git a/tools/sdk-sync/Cargo.lock b/tools/sdk-sync/Cargo.lock index 933bda688..bb387da5b 100644 --- a/tools/sdk-sync/Cargo.lock +++ b/tools/sdk-sync/Cargo.lock @@ -60,12 +60,30 @@ version = "1.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" +[[package]] +name = "bytesize" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c58ec36aac5066d5ca17df51b3e70279f5670a72102f5752cb7e7c856adfc70" + [[package]] name = "cfg-if" version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" +[[package]] +name = "chrono" +version = "0.4.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "670ad68c9088c2a963aaa298cb369688cf3f9465ce5e2d4ca10e6e0098a1ce73" +dependencies = [ + "libc", + "num-integer", + "num-traits", + "winapi", +] + [[package]] name = "clap" version = "3.1.8" @@ -96,12 +114,6 @@ dependencies = [ "syn", ] -[[package]] -name = "core-foundation-sys" -version = "0.8.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5827cebf4670468b8772dd191856768aedcb1b0278a04f989f7766351917b9dc" - [[package]] name = "crossbeam-channel" version = "0.5.4" @@ -314,6 +326,12 @@ dependencies = [ "autocfg", ] +[[package]] +name = "minimal-lexical" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" + [[package]] name = "mockall" version = "0.11.0" @@ -341,6 +359,16 @@ dependencies = [ "syn", ] +[[package]] +name = "nom" +version = "7.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a8903e5a29a317527874d0402f867152a3d21c908bb0b933e416c65e301d4c36" +dependencies = [ + "memchr", + "minimal-lexical", +] + [[package]] name = "normalize-line-endings" version = "0.3.0" @@ -348,12 +376,13 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "61807f77802ff30975e01f4f071c8ba10c022052f98b3294119f3e615d13e5be" [[package]] -name = "ntapi" -version = "0.3.7" +name = "num-integer" +version = "0.1.45" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c28774a7fd2fbb4f0babd8237ce554b73af68021b5f695a3cebd6c59bac0980f" +checksum = "225d3389fb3509a24c93f5c29eb6bde2586b98d9f016636dff58d7c6f7569cd9" dependencies = [ - "winapi", + "autocfg", + "num-traits", ] [[package]] @@ -578,7 +607,7 @@ dependencies = [ "regex", "serde", "smithy-rs-tool-common", - "sysinfo", + "systemstat", "tempfile", "toml", "tracing", @@ -650,16 +679,16 @@ dependencies = [ ] [[package]] -name = "sysinfo" -version = "0.23.11" +name = "systemstat" +version = "0.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3bf915673a340ee41f2fc24ad1286c75ea92026f04b65a0d0e5132d80b95fc61" +checksum = "5f5dc96f7634f46ac7e485b8c051f5b89ec8ee5cc023236dd12fe4ae2fb52f80" dependencies = [ - "cfg-if", - "core-foundation-sys", + "bytesize", + "chrono", + "lazy_static", "libc", - "ntapi", - "once_cell", + "nom", "winapi", ] diff --git a/tools/sdk-sync/Cargo.toml b/tools/sdk-sync/Cargo.toml index f803884a1..e9e889e62 100644 --- a/tools/sdk-sync/Cargo.toml +++ b/tools/sdk-sync/Cargo.toml @@ -22,7 +22,7 @@ num_cpus = "1.13.1" rayon = "1.5.2" serde = { version = "1.0.136", features = ["derive"] } smithy-rs-tool-common = { version = "0.1", path = "../smithy-rs-tool-common" } -sysinfo = { version = "0.23.11", default-features = false } +systemstat = "0.1.11" tempfile = "3.3.0" toml = "0.5.9" tracing = "0.1.34" diff --git a/tools/sdk-sync/src/main.rs b/tools/sdk-sync/src/main.rs index 3de674b98..37b673454 100644 --- a/tools/sdk-sync/src/main.rs +++ b/tools/sdk-sync/src/main.rs @@ -6,10 +6,11 @@ use anyhow::{Context, Result}; use clap::Parser; use sdk_sync::init_tracing; +use sdk_sync::sync::gen::CodeGenSettings; use sdk_sync::sync::Sync; use smithy_rs_tool_common::macros::here; use std::path::PathBuf; -use sysinfo::{System, SystemExt}; +use systemstat::{Platform, System}; use tracing::info; const CODEGEN_MIN_RAM_REQUIRED_GB: usize = 2; @@ -36,6 +37,29 @@ struct Args { /// system property) to use for Smithy codegen. Defaults to 1. #[clap(long)] smithy_parallelism: Option, + /// The maximum Java heap space (in megabytes) that the Gradle daemon is allowed to use during code generation. + #[clap(long)] + max_gradle_heap_megabytes: Option, + /// The maximum Java metaspace (in megabytes) that the Gradle daemon is allowed to use during code generation. + #[clap(long)] + max_gradle_metaspace_megabytes: Option, +} + +impl Args { + fn codegen_settings(&self) -> CodeGenSettings { + let defaults = CodeGenSettings::default(); + CodeGenSettings { + smithy_parallelism: self + .smithy_parallelism + .unwrap_or(defaults.smithy_parallelism), + max_gradle_heap_megabytes: self + .max_gradle_heap_megabytes + .unwrap_or(defaults.max_gradle_heap_megabytes), + max_gradle_metaspace_megabytes: self + .max_gradle_metaspace_megabytes + .unwrap_or(defaults.max_gradle_metaspace_megabytes), + } + } } /// This tool syncs codegen changes from smithy-rs, examples changes from aws-doc-sdk-examples, @@ -59,13 +83,11 @@ fn main() -> Result<()> { init_tracing(); let args = Args::parse(); - let sys = System::new_all(); - let available_ram_gb = (sys.available_memory() / 1024 / 1024) as usize; + let available_ram_gb = available_ram_gb(); let num_cpus = num_cpus::get_physical(); info!("Available RAM (GB): {available_ram_gb}"); info!("Num physical CPUs: {num_cpus}"); - let smithy_parallelism = args.smithy_parallelism.unwrap_or(1); let sync_threads = if let Some(sync_threads) = args.sync_threads { sync_threads } else { @@ -84,8 +106,14 @@ fn main() -> Result<()> { &args.aws_doc_sdk_examples.canonicalize().context(here!())?, &args.aws_sdk_rust.canonicalize().context(here!())?, &args.smithy_rs.canonicalize().context(here!())?, - smithy_parallelism, + args.codegen_settings(), )?; sync.sync().map_err(|e| e.context("The sync failed")) } + +fn available_ram_gb() -> usize { + let sys = System::new(); + let memory = sys.memory().expect("determine free memory"); + (memory.free.as_u64() / 1024 / 1024 / 1024) as usize +} diff --git a/tools/sdk-sync/src/sync.rs b/tools/sdk-sync/src/sync.rs index 6558b41dd..b7ea23998 100644 --- a/tools/sdk-sync/src/sync.rs +++ b/tools/sdk-sync/src/sync.rs @@ -3,7 +3,7 @@ * SPDX-License-Identifier: Apache-2.0 */ -use self::gen::{DefaultSdkGenerator, SdkGenerator}; +use self::gen::{CodeGenSettings, DefaultSdkGenerator, SdkGenerator}; use crate::fs::{DefaultFs, Fs}; use crate::git::{Commit, Git, GitCLI}; use crate::versions::{DefaultVersions, Versions, VersionsManifest}; @@ -11,7 +11,12 @@ use anyhow::{bail, Context, Result}; use smithy_rs_tool_common::macros::here; use std::collections::BTreeSet; use std::path::{Path, PathBuf}; +use std::sync::atomic::{AtomicUsize, Ordering}; +use std::sync::mpsc::{Sender, TryRecvError}; use std::sync::Arc; +use std::thread; +use std::time::Duration; +use systemstat::{ByteSize, Platform, System}; use tracing::{debug, info, info_span}; use tracing_attributes::instrument; @@ -21,6 +26,73 @@ pub const BOT_NAME: &str = "AWS SDK Rust Bot"; pub const BOT_EMAIL: &str = "aws-sdk-rust-primary@amazon.com"; pub const MODEL_STASH_BRANCH_NAME: &str = "__sdk_sync__models_"; +#[derive(Default)] +struct SyncProgress { + commits_completed: AtomicUsize, + total_commits: AtomicUsize, +} + +struct ProgressThread { + handle: Option>, + tx: Sender, +} + +impl ProgressThread { + pub fn spawn(progress: Arc) -> ProgressThread { + let (tx, rx) = std::sync::mpsc::channel(); + let handle = thread::spawn(move || { + let mut done = false; + let system = System::new(); + while !done { + let cpu = system.cpu_load_aggregate().ok(); + for _ in 0..15 { + thread::sleep(Duration::from_secs(1)); + if !matches!(rx.try_recv(), Err(TryRecvError::Empty)) { + done = true; + break; + } + } + let cpu = if let Some(Ok(cpu)) = cpu.map(|cpu| cpu.done()) { + format!("{:.1}", 100.0 - cpu.idle * 100.0) + } else { + "error".to_string() + }; + let (memory, swap) = system.memory_and_swap().unwrap(); + info!( + "Progress: smithy-rs commit {}/{}, cpu use: {}, memory used: {}, swap used: {}", + progress.commits_completed.load(Ordering::Relaxed), + progress.total_commits.load(Ordering::Relaxed), + cpu, + Self::format_memory(memory.free, memory.total), + Self::format_memory(swap.free, swap.total), + ); + } + }); + ProgressThread { + handle: Some(handle), + tx, + } + } + + fn format_memory(free: ByteSize, total: ByteSize) -> String { + let (free, total) = (free.as_u64(), total.as_u64()); + let format_part = |val: u64| format!("{:.3}GB", val as f64 / 1024.0 / 1024.0 / 1024.0); + format!( + "{}/{}", + format_part(total.saturating_sub(free)), + format_part(total) + ) + } +} + +impl Drop for ProgressThread { + fn drop(&mut self) { + // Attempt to stop the loop in the thread + let _ = self.tx.send(true); + let _ = self.handle.take().map(|handle| handle.join()); + } +} + pub struct Sync { aws_doc_sdk_examples: Arc, aws_sdk_rust: Arc, @@ -28,7 +100,8 @@ pub struct Sync { fs: Arc, versions: Arc, previous_versions_manifest: Arc, - smithy_parallelism: usize, + codegen_settings: CodeGenSettings, + progress: Arc, // Keep a reference to the temp directory so that it doesn't get cleaned up until the sync is complete _temp_dir: Arc, } @@ -38,7 +111,7 @@ impl Sync { aws_doc_sdk_examples_path: &Path, aws_sdk_rust_path: &Path, smithy_rs_path: &Path, - smithy_parallelism: usize, + codegen_settings: CodeGenSettings, ) -> Result { let _temp_dir = Arc::new(tempfile::tempdir().context(here!("create temp dir"))?); let aws_sdk_rust = Arc::new(GitCLI::new(aws_sdk_rust_path)?); @@ -58,7 +131,8 @@ impl Sync { fs, versions: Arc::new(DefaultVersions::new()), previous_versions_manifest, - smithy_parallelism, + codegen_settings, + progress: Default::default(), _temp_dir, }) } @@ -78,13 +152,16 @@ impl Sync { fs: Arc::new(fs), versions: Arc::new(versions), previous_versions_manifest: Arc::new(PathBuf::from("doesnt-matter-for-tests")), - smithy_parallelism: 1, + codegen_settings: Default::default(), + progress: Default::default(), _temp_dir: Arc::new(tempfile::tempdir().unwrap()), } } #[instrument(skip(self))] pub fn sync(&self) -> Result<()> { + let _progress_thread = ProgressThread::spawn(self.progress.clone()); + info!("Loading versions.toml..."); let versions = self .versions @@ -162,7 +239,7 @@ impl Sync { self.fs.clone(), None, self.smithy_rs.path(), - self.smithy_parallelism, + &self.codegen_settings, ) .context(here!())?; let generated_sdk = sdk_gen.generate_sdk().context(here!())?; @@ -203,6 +280,9 @@ impl Sync { } info!("Syncing {} commit(s)...", commits.len()); + self.progress + .total_commits + .store(commits.len(), Ordering::Relaxed); // Generate code in parallel for each individual commit let code_gen_paths = { @@ -211,7 +291,8 @@ impl Sync { let examples_revision = versions.aws_doc_sdk_examples_revision.clone(); let examples_path = self.aws_sdk_rust.path().join("examples"); let fs = self.fs.clone(); - let smithy_parallelism = self.smithy_parallelism; + let codegen_settings = self.codegen_settings.clone(); + let progress = self.progress.clone(); commits .par_iter() @@ -235,10 +316,11 @@ impl Sync { fs.clone(), Some(commit.hash.clone()), smithy_rs.path(), - smithy_parallelism, + &codegen_settings, ) .context(here!())?; let sdk_path = sdk_gen.generate_sdk().context(here!())?; + progress.commits_completed.fetch_add(1, Ordering::Relaxed); Ok((commit, sdk_path)) }) .collect::>>()? @@ -287,7 +369,7 @@ impl Sync { self.fs.clone(), None, self.smithy_rs.path(), - self.smithy_parallelism, + &self.codegen_settings, ) .context(here!())?; let generated_sdk = sdk_gen.generate_sdk().context(here!())?; diff --git a/tools/sdk-sync/src/sync/gen.rs b/tools/sdk-sync/src/sync/gen.rs index bab5bb81a..d7f7b89fe 100644 --- a/tools/sdk-sync/src/sync/gen.rs +++ b/tools/sdk-sync/src/sync/gen.rs @@ -13,6 +13,23 @@ use std::process::Command; use std::sync::Arc; use tracing::{info, instrument}; +#[derive(Clone, Debug)] +pub struct CodeGenSettings { + pub smithy_parallelism: usize, + pub max_gradle_heap_megabytes: usize, + pub max_gradle_metaspace_megabytes: usize, +} + +impl Default for CodeGenSettings { + fn default() -> Self { + Self { + smithy_parallelism: 1, + max_gradle_heap_megabytes: 512, + max_gradle_metaspace_megabytes: 512, + } + } +} + pub struct GeneratedSdk { path: PathBuf, // Keep a reference to the temp directory so that it doesn't get cleaned up @@ -48,7 +65,7 @@ pub struct DefaultSdkGenerator { examples_path: PathBuf, fs: Arc, smithy_rs: Box, - smithy_parallelism: usize, + settings: CodeGenSettings, temp_dir: Arc, } @@ -61,7 +78,7 @@ impl DefaultSdkGenerator { fs: Arc, reset_to_commit: Option, original_smithy_rs_path: &Path, - smithy_parallelism: usize, + settings: &CodeGenSettings, ) -> Result { let temp_dir = tempfile::tempdir().context(here!("create temp dir"))?; GitCLI::new(original_smithy_rs_path) @@ -82,7 +99,7 @@ impl DefaultSdkGenerator { examples_path: examples_path.into(), fs, smithy_rs: Box::new(smithy_rs) as Box, - smithy_parallelism, + settings: settings.clone(), temp_dir: Arc::new(temp_dir), }) } @@ -110,9 +127,7 @@ impl DefaultSdkGenerator { Ok(()) } - /// Runs `aws:sdk:assemble` target with property `aws.fullsdk=true` set - #[instrument(skip(self))] - fn aws_sdk_assemble(&self) -> Result<()> { + fn do_aws_sdk_assemble(&self) -> Result<()> { info!("Generating the SDK..."); let mut command = Command::new("./gradlew"); @@ -126,14 +141,19 @@ impl DefaultSdkGenerator { command.arg(format!( "-Dorg.gradle.jvmargs={}", [ - // Retain default Gradle JVM args - "-Xmx512m", - "-XX:MaxMetaspaceSize=256m", + // Configure Gradle JVM memory settings + format!("-Xmx{}m", self.settings.max_gradle_heap_megabytes), + format!( + "-XX:MaxMetaspaceSize={}m", + self.settings.max_gradle_metaspace_megabytes + ), + "-XX:+UseSerialGC".to_string(), + "-verbose:gc".to_string(), // Disable incremental compilation and caching since we're compiling exactly once per commit - "-Dkotlin.incremental=false", - "-Dkotlin.caching.enabled=false", + "-Dkotlin.incremental=false".to_string(), + "-Dkotlin.caching.enabled=false".to_string(), // Run the compiler in the gradle daemon process to avoid more forking thrash - "-Dkotlin.compiler.execution.strategy=in-process" + "-Dkotlin.compiler.execution.strategy=in-process".to_string() ] .join(" ") )); @@ -141,7 +161,7 @@ impl DefaultSdkGenerator { // Disable Smithy's codegen parallelism in favor of sdk-sync parallelism command.arg(format!( "-Djava.util.concurrent.ForkJoinPool.common.parallelism={}", - self.smithy_parallelism + self.settings.smithy_parallelism )); command.arg("-Paws.fullsdk=true"); @@ -162,6 +182,26 @@ impl DefaultSdkGenerator { handle_failure("aws_sdk_assemble", &output)?; Ok(()) } + + /// Runs `aws:sdk:assemble` target with property `aws.fullsdk=true` set + #[instrument(skip(self))] + fn aws_sdk_assemble(&self) -> Result<()> { + let result = self.do_aws_sdk_assemble(); + if result.is_err() { + // On failure, do a dump of running processes to give more insight into if there is a process leak going on + match Command::new("ps").arg("-ef").output() { + Ok(output) => info!( + "Running processes shortly after failure:\n---\n{}---\n", + String::from_utf8_lossy(&output.stdout) + ), + Err(err) => info!( + "Failed to get running processes shortly after failure: {}", + err + ), + } + } + result + } } impl SdkGenerator for DefaultSdkGenerator { diff --git a/tools/sdk-sync/tests/e2e_test.rs b/tools/sdk-sync/tests/e2e_test.rs index 28e9d8699..016add58a 100644 --- a/tools/sdk-sync/tests/e2e_test.rs +++ b/tools/sdk-sync/tests/e2e_test.rs @@ -92,7 +92,7 @@ fn test_without_model_changes() { &tmp_dir.as_ref().join("aws-doc-sdk-examples"), &tmp_dir.as_ref().join("aws-sdk-rust"), &tmp_dir.as_ref().join("smithy-rs"), - 1, + Default::default(), ) .expect("create sync success"); sync.sync().expect("sync success"); @@ -211,7 +211,7 @@ fn test_with_model_changes() { &tmp_dir.as_ref().join("aws-doc-sdk-examples"), &tmp_dir.as_ref().join("aws-sdk-rust"), &tmp_dir.as_ref().join("smithy-rs"), - 1, + Default::default(), ) .expect("create sync success"); sync.sync().expect("sync success");