Add observability and more configuration to `sdk-sync` (#1452)

* Make `sdk-sync` Gradle heap/metaspace constraints configurable
* Dump process info on `sdk-sync` codegen failure
* Periodically log progress information in `sdk-sync`
* Enable verbose GC for codegen and use serial GC
This commit is contained in:
John DiSanti 2022-06-13 15:01:28 -07:00 committed by GitHub
parent ea2ae7bfc6
commit 9156aca9fd
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 227 additions and 48 deletions

View File

@ -60,12 +60,30 @@ version = "1.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
[[package]]
name = "bytesize"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6c58ec36aac5066d5ca17df51b3e70279f5670a72102f5752cb7e7c856adfc70"
[[package]]
name = "cfg-if"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
[[package]]
name = "chrono"
version = "0.4.19"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "670ad68c9088c2a963aaa298cb369688cf3f9465ce5e2d4ca10e6e0098a1ce73"
dependencies = [
"libc",
"num-integer",
"num-traits",
"winapi",
]
[[package]]
name = "clap"
version = "3.1.8"
@ -96,12 +114,6 @@ dependencies = [
"syn",
]
[[package]]
name = "core-foundation-sys"
version = "0.8.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5827cebf4670468b8772dd191856768aedcb1b0278a04f989f7766351917b9dc"
[[package]]
name = "crossbeam-channel"
version = "0.5.4"
@ -314,6 +326,12 @@ dependencies = [
"autocfg",
]
[[package]]
name = "minimal-lexical"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a"
[[package]]
name = "mockall"
version = "0.11.0"
@ -341,6 +359,16 @@ dependencies = [
"syn",
]
[[package]]
name = "nom"
version = "7.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a8903e5a29a317527874d0402f867152a3d21c908bb0b933e416c65e301d4c36"
dependencies = [
"memchr",
"minimal-lexical",
]
[[package]]
name = "normalize-line-endings"
version = "0.3.0"
@ -348,12 +376,13 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "61807f77802ff30975e01f4f071c8ba10c022052f98b3294119f3e615d13e5be"
[[package]]
name = "ntapi"
version = "0.3.7"
name = "num-integer"
version = "0.1.45"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c28774a7fd2fbb4f0babd8237ce554b73af68021b5f695a3cebd6c59bac0980f"
checksum = "225d3389fb3509a24c93f5c29eb6bde2586b98d9f016636dff58d7c6f7569cd9"
dependencies = [
"winapi",
"autocfg",
"num-traits",
]
[[package]]
@ -578,7 +607,7 @@ dependencies = [
"regex",
"serde",
"smithy-rs-tool-common",
"sysinfo",
"systemstat",
"tempfile",
"toml",
"tracing",
@ -650,16 +679,16 @@ dependencies = [
]
[[package]]
name = "sysinfo"
version = "0.23.11"
name = "systemstat"
version = "0.1.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3bf915673a340ee41f2fc24ad1286c75ea92026f04b65a0d0e5132d80b95fc61"
checksum = "5f5dc96f7634f46ac7e485b8c051f5b89ec8ee5cc023236dd12fe4ae2fb52f80"
dependencies = [
"cfg-if",
"core-foundation-sys",
"bytesize",
"chrono",
"lazy_static",
"libc",
"ntapi",
"once_cell",
"nom",
"winapi",
]

View File

@ -22,7 +22,7 @@ num_cpus = "1.13.1"
rayon = "1.5.2"
serde = { version = "1.0.136", features = ["derive"] }
smithy-rs-tool-common = { version = "0.1", path = "../smithy-rs-tool-common" }
sysinfo = { version = "0.23.11", default-features = false }
systemstat = "0.1.11"
tempfile = "3.3.0"
toml = "0.5.9"
tracing = "0.1.34"

View File

@ -6,10 +6,11 @@
use anyhow::{Context, Result};
use clap::Parser;
use sdk_sync::init_tracing;
use sdk_sync::sync::gen::CodeGenSettings;
use sdk_sync::sync::Sync;
use smithy_rs_tool_common::macros::here;
use std::path::PathBuf;
use sysinfo::{System, SystemExt};
use systemstat::{Platform, System};
use tracing::info;
const CODEGEN_MIN_RAM_REQUIRED_GB: usize = 2;
@ -36,6 +37,29 @@ struct Args {
/// system property) to use for Smithy codegen. Defaults to 1.
#[clap(long)]
smithy_parallelism: Option<usize>,
/// The maximum Java heap space (in megabytes) that the Gradle daemon is allowed to use during code generation.
#[clap(long)]
max_gradle_heap_megabytes: Option<usize>,
/// The maximum Java metaspace (in megabytes) that the Gradle daemon is allowed to use during code generation.
#[clap(long)]
max_gradle_metaspace_megabytes: Option<usize>,
}
impl Args {
fn codegen_settings(&self) -> CodeGenSettings {
let defaults = CodeGenSettings::default();
CodeGenSettings {
smithy_parallelism: self
.smithy_parallelism
.unwrap_or(defaults.smithy_parallelism),
max_gradle_heap_megabytes: self
.max_gradle_heap_megabytes
.unwrap_or(defaults.max_gradle_heap_megabytes),
max_gradle_metaspace_megabytes: self
.max_gradle_metaspace_megabytes
.unwrap_or(defaults.max_gradle_metaspace_megabytes),
}
}
}
/// This tool syncs codegen changes from smithy-rs, examples changes from aws-doc-sdk-examples,
@ -59,13 +83,11 @@ fn main() -> Result<()> {
init_tracing();
let args = Args::parse();
let sys = System::new_all();
let available_ram_gb = (sys.available_memory() / 1024 / 1024) as usize;
let available_ram_gb = available_ram_gb();
let num_cpus = num_cpus::get_physical();
info!("Available RAM (GB): {available_ram_gb}");
info!("Num physical CPUs: {num_cpus}");
let smithy_parallelism = args.smithy_parallelism.unwrap_or(1);
let sync_threads = if let Some(sync_threads) = args.sync_threads {
sync_threads
} else {
@ -84,8 +106,14 @@ fn main() -> Result<()> {
&args.aws_doc_sdk_examples.canonicalize().context(here!())?,
&args.aws_sdk_rust.canonicalize().context(here!())?,
&args.smithy_rs.canonicalize().context(here!())?,
smithy_parallelism,
args.codegen_settings(),
)?;
sync.sync().map_err(|e| e.context("The sync failed"))
}
fn available_ram_gb() -> usize {
let sys = System::new();
let memory = sys.memory().expect("determine free memory");
(memory.free.as_u64() / 1024 / 1024 / 1024) as usize
}

View File

@ -3,7 +3,7 @@
* SPDX-License-Identifier: Apache-2.0
*/
use self::gen::{DefaultSdkGenerator, SdkGenerator};
use self::gen::{CodeGenSettings, DefaultSdkGenerator, SdkGenerator};
use crate::fs::{DefaultFs, Fs};
use crate::git::{Commit, Git, GitCLI};
use crate::versions::{DefaultVersions, Versions, VersionsManifest};
@ -11,7 +11,12 @@ use anyhow::{bail, Context, Result};
use smithy_rs_tool_common::macros::here;
use std::collections::BTreeSet;
use std::path::{Path, PathBuf};
use std::sync::atomic::{AtomicUsize, Ordering};
use std::sync::mpsc::{Sender, TryRecvError};
use std::sync::Arc;
use std::thread;
use std::time::Duration;
use systemstat::{ByteSize, Platform, System};
use tracing::{debug, info, info_span};
use tracing_attributes::instrument;
@ -21,6 +26,73 @@ pub const BOT_NAME: &str = "AWS SDK Rust Bot";
pub const BOT_EMAIL: &str = "aws-sdk-rust-primary@amazon.com";
pub const MODEL_STASH_BRANCH_NAME: &str = "__sdk_sync__models_";
#[derive(Default)]
struct SyncProgress {
commits_completed: AtomicUsize,
total_commits: AtomicUsize,
}
struct ProgressThread {
handle: Option<thread::JoinHandle<()>>,
tx: Sender<bool>,
}
impl ProgressThread {
pub fn spawn(progress: Arc<SyncProgress>) -> ProgressThread {
let (tx, rx) = std::sync::mpsc::channel();
let handle = thread::spawn(move || {
let mut done = false;
let system = System::new();
while !done {
let cpu = system.cpu_load_aggregate().ok();
for _ in 0..15 {
thread::sleep(Duration::from_secs(1));
if !matches!(rx.try_recv(), Err(TryRecvError::Empty)) {
done = true;
break;
}
}
let cpu = if let Some(Ok(cpu)) = cpu.map(|cpu| cpu.done()) {
format!("{:.1}", 100.0 - cpu.idle * 100.0)
} else {
"error".to_string()
};
let (memory, swap) = system.memory_and_swap().unwrap();
info!(
"Progress: smithy-rs commit {}/{}, cpu use: {}, memory used: {}, swap used: {}",
progress.commits_completed.load(Ordering::Relaxed),
progress.total_commits.load(Ordering::Relaxed),
cpu,
Self::format_memory(memory.free, memory.total),
Self::format_memory(swap.free, swap.total),
);
}
});
ProgressThread {
handle: Some(handle),
tx,
}
}
fn format_memory(free: ByteSize, total: ByteSize) -> String {
let (free, total) = (free.as_u64(), total.as_u64());
let format_part = |val: u64| format!("{:.3}GB", val as f64 / 1024.0 / 1024.0 / 1024.0);
format!(
"{}/{}",
format_part(total.saturating_sub(free)),
format_part(total)
)
}
}
impl Drop for ProgressThread {
fn drop(&mut self) {
// Attempt to stop the loop in the thread
let _ = self.tx.send(true);
let _ = self.handle.take().map(|handle| handle.join());
}
}
pub struct Sync {
aws_doc_sdk_examples: Arc<dyn Git>,
aws_sdk_rust: Arc<dyn Git>,
@ -28,7 +100,8 @@ pub struct Sync {
fs: Arc<dyn Fs>,
versions: Arc<dyn Versions>,
previous_versions_manifest: Arc<PathBuf>,
smithy_parallelism: usize,
codegen_settings: CodeGenSettings,
progress: Arc<SyncProgress>,
// Keep a reference to the temp directory so that it doesn't get cleaned up until the sync is complete
_temp_dir: Arc<tempfile::TempDir>,
}
@ -38,7 +111,7 @@ impl Sync {
aws_doc_sdk_examples_path: &Path,
aws_sdk_rust_path: &Path,
smithy_rs_path: &Path,
smithy_parallelism: usize,
codegen_settings: CodeGenSettings,
) -> Result<Self> {
let _temp_dir = Arc::new(tempfile::tempdir().context(here!("create temp dir"))?);
let aws_sdk_rust = Arc::new(GitCLI::new(aws_sdk_rust_path)?);
@ -58,7 +131,8 @@ impl Sync {
fs,
versions: Arc::new(DefaultVersions::new()),
previous_versions_manifest,
smithy_parallelism,
codegen_settings,
progress: Default::default(),
_temp_dir,
})
}
@ -78,13 +152,16 @@ impl Sync {
fs: Arc::new(fs),
versions: Arc::new(versions),
previous_versions_manifest: Arc::new(PathBuf::from("doesnt-matter-for-tests")),
smithy_parallelism: 1,
codegen_settings: Default::default(),
progress: Default::default(),
_temp_dir: Arc::new(tempfile::tempdir().unwrap()),
}
}
#[instrument(skip(self))]
pub fn sync(&self) -> Result<()> {
let _progress_thread = ProgressThread::spawn(self.progress.clone());
info!("Loading versions.toml...");
let versions = self
.versions
@ -162,7 +239,7 @@ impl Sync {
self.fs.clone(),
None,
self.smithy_rs.path(),
self.smithy_parallelism,
&self.codegen_settings,
)
.context(here!())?;
let generated_sdk = sdk_gen.generate_sdk().context(here!())?;
@ -203,6 +280,9 @@ impl Sync {
}
info!("Syncing {} commit(s)...", commits.len());
self.progress
.total_commits
.store(commits.len(), Ordering::Relaxed);
// Generate code in parallel for each individual commit
let code_gen_paths = {
@ -211,7 +291,8 @@ impl Sync {
let examples_revision = versions.aws_doc_sdk_examples_revision.clone();
let examples_path = self.aws_sdk_rust.path().join("examples");
let fs = self.fs.clone();
let smithy_parallelism = self.smithy_parallelism;
let codegen_settings = self.codegen_settings.clone();
let progress = self.progress.clone();
commits
.par_iter()
@ -235,10 +316,11 @@ impl Sync {
fs.clone(),
Some(commit.hash.clone()),
smithy_rs.path(),
smithy_parallelism,
&codegen_settings,
)
.context(here!())?;
let sdk_path = sdk_gen.generate_sdk().context(here!())?;
progress.commits_completed.fetch_add(1, Ordering::Relaxed);
Ok((commit, sdk_path))
})
.collect::<Result<Vec<_>>>()?
@ -287,7 +369,7 @@ impl Sync {
self.fs.clone(),
None,
self.smithy_rs.path(),
self.smithy_parallelism,
&self.codegen_settings,
)
.context(here!())?;
let generated_sdk = sdk_gen.generate_sdk().context(here!())?;

View File

@ -13,6 +13,23 @@ use std::process::Command;
use std::sync::Arc;
use tracing::{info, instrument};
#[derive(Clone, Debug)]
pub struct CodeGenSettings {
pub smithy_parallelism: usize,
pub max_gradle_heap_megabytes: usize,
pub max_gradle_metaspace_megabytes: usize,
}
impl Default for CodeGenSettings {
fn default() -> Self {
Self {
smithy_parallelism: 1,
max_gradle_heap_megabytes: 512,
max_gradle_metaspace_megabytes: 512,
}
}
}
pub struct GeneratedSdk {
path: PathBuf,
// Keep a reference to the temp directory so that it doesn't get cleaned up
@ -48,7 +65,7 @@ pub struct DefaultSdkGenerator {
examples_path: PathBuf,
fs: Arc<dyn Fs>,
smithy_rs: Box<dyn Git>,
smithy_parallelism: usize,
settings: CodeGenSettings,
temp_dir: Arc<tempfile::TempDir>,
}
@ -61,7 +78,7 @@ impl DefaultSdkGenerator {
fs: Arc<dyn Fs>,
reset_to_commit: Option<CommitHash>,
original_smithy_rs_path: &Path,
smithy_parallelism: usize,
settings: &CodeGenSettings,
) -> Result<Self> {
let temp_dir = tempfile::tempdir().context(here!("create temp dir"))?;
GitCLI::new(original_smithy_rs_path)
@ -82,7 +99,7 @@ impl DefaultSdkGenerator {
examples_path: examples_path.into(),
fs,
smithy_rs: Box::new(smithy_rs) as Box<dyn Git>,
smithy_parallelism,
settings: settings.clone(),
temp_dir: Arc::new(temp_dir),
})
}
@ -110,9 +127,7 @@ impl DefaultSdkGenerator {
Ok(())
}
/// Runs `aws:sdk:assemble` target with property `aws.fullsdk=true` set
#[instrument(skip(self))]
fn aws_sdk_assemble(&self) -> Result<()> {
fn do_aws_sdk_assemble(&self) -> Result<()> {
info!("Generating the SDK...");
let mut command = Command::new("./gradlew");
@ -126,14 +141,19 @@ impl DefaultSdkGenerator {
command.arg(format!(
"-Dorg.gradle.jvmargs={}",
[
// Retain default Gradle JVM args
"-Xmx512m",
"-XX:MaxMetaspaceSize=256m",
// Configure Gradle JVM memory settings
format!("-Xmx{}m", self.settings.max_gradle_heap_megabytes),
format!(
"-XX:MaxMetaspaceSize={}m",
self.settings.max_gradle_metaspace_megabytes
),
"-XX:+UseSerialGC".to_string(),
"-verbose:gc".to_string(),
// Disable incremental compilation and caching since we're compiling exactly once per commit
"-Dkotlin.incremental=false",
"-Dkotlin.caching.enabled=false",
"-Dkotlin.incremental=false".to_string(),
"-Dkotlin.caching.enabled=false".to_string(),
// Run the compiler in the gradle daemon process to avoid more forking thrash
"-Dkotlin.compiler.execution.strategy=in-process"
"-Dkotlin.compiler.execution.strategy=in-process".to_string()
]
.join(" ")
));
@ -141,7 +161,7 @@ impl DefaultSdkGenerator {
// Disable Smithy's codegen parallelism in favor of sdk-sync parallelism
command.arg(format!(
"-Djava.util.concurrent.ForkJoinPool.common.parallelism={}",
self.smithy_parallelism
self.settings.smithy_parallelism
));
command.arg("-Paws.fullsdk=true");
@ -162,6 +182,26 @@ impl DefaultSdkGenerator {
handle_failure("aws_sdk_assemble", &output)?;
Ok(())
}
/// Runs `aws:sdk:assemble` target with property `aws.fullsdk=true` set
#[instrument(skip(self))]
fn aws_sdk_assemble(&self) -> Result<()> {
let result = self.do_aws_sdk_assemble();
if result.is_err() {
// On failure, do a dump of running processes to give more insight into if there is a process leak going on
match Command::new("ps").arg("-ef").output() {
Ok(output) => info!(
"Running processes shortly after failure:\n---\n{}---\n",
String::from_utf8_lossy(&output.stdout)
),
Err(err) => info!(
"Failed to get running processes shortly after failure: {}",
err
),
}
}
result
}
}
impl SdkGenerator for DefaultSdkGenerator {

View File

@ -92,7 +92,7 @@ fn test_without_model_changes() {
&tmp_dir.as_ref().join("aws-doc-sdk-examples"),
&tmp_dir.as_ref().join("aws-sdk-rust"),
&tmp_dir.as_ref().join("smithy-rs"),
1,
Default::default(),
)
.expect("create sync success");
sync.sync().expect("sync success");
@ -211,7 +211,7 @@ fn test_with_model_changes() {
&tmp_dir.as_ref().join("aws-doc-sdk-examples"),
&tmp_dir.as_ref().join("aws-sdk-rust"),
&tmp_dir.as_ref().join("smithy-rs"),
1,
Default::default(),
)
.expect("create sync success");
sync.sync().expect("sync success");