diff --git a/cargo-auditable/src/auditable_from_metadata.rs b/cargo-auditable/src/auditable_from_metadata.rs index 0353d69..cccd115 100644 --- a/cargo-auditable/src/auditable_from_metadata.rs +++ b/cargo-auditable/src/auditable_from_metadata.rs @@ -1,10 +1,17 @@ //! Converts from `cargo_metadata` crate structs to `auditable-serde` structs, //! which map to our own serialialized representation. -use std::{cmp::min, cmp::Ordering::*, collections::HashMap, error::Error, fmt::Display}; +use std::{ + cmp::{min, Ordering::*}, + collections::{HashMap, HashSet}, + error::Error, + fmt::Display, +}; use auditable_serde::{DependencyKind, Package, Source, VersionInfo}; +use crate::collect_audit_data::CargoTreePkg; + fn source_from_meta(meta_source: &cargo_metadata::Source) -> Source { match meta_source.repr.as_str() { "registry+https://github.com/rust-lang/crates.io-index" => Source::CratesIo, @@ -73,6 +80,7 @@ impl Error for InsufficientMetadata {} pub fn encode_audit_data( metadata: &cargo_metadata::Metadata, + tree_pkgs: &HashSet, ) -> Result { let toplevel_crate_id = metadata .resolve @@ -136,6 +144,19 @@ pub fn encode_audit_data( // In this case they will not be in the map at all. We skip them, along with dev-dependencies. dep_kind.is_some() && dep_kind.unwrap() != &PrivateDepKind::Development }) + .filter(|p| { + // Due to `cargo metadata` doing feature unification across all dep kinds + // it will over-report dependencies if a dev-dependency enables more features + // on a normal or build-dependency, which causes them to pull in more packages. + // The only stable API that Cargo currently exposes that does NOT do this + // is `cargo tree --edges=normal,build` so we cross-reference the package list against that. + // This is less robust than I'd like, but it's the least bad option I have. + // See the documentation on `parse_cargo_tree_output()` for more details. + tree_pkgs.contains(&CargoTreePkg { + name: p.name.clone(), + version: p.version.clone(), + }) + }) .collect(); // This function is the simplest place to introduce sorting, since @@ -197,7 +218,9 @@ pub fn encode_audit_data( // and dev-dependencies are allowed to have cycles, // so we may end up encoding cyclic graph if we don't handle that. let dep_id = dep.pkg.repr.as_str(); - if strongest_dep_kind(&dep.dep_kinds) != PrivateDepKind::Development { + if strongest_dep_kind(&dep.dep_kinds) != PrivateDepKind::Development + && id_to_index.contains_key(dep_id) + { package.dependencies.push(id_to_index[dep_id]); } } @@ -214,31 +237,3 @@ fn strongest_dep_kind(deps: &[cargo_metadata::DepKindInfo]) -> PrivateDepKind { .max() .unwrap_or(PrivateDepKind::Runtime) // for compatibility with Rust earlier than 1.41 } - -#[cfg(test)] -mod tests { - #![allow(unused_imports)] // otherwise conditional compilation emits warnings - use super::*; - use std::fs; - use std::{ - convert::TryInto, - path::{Path, PathBuf}, - str::FromStr, - }; - - fn load_metadata(cargo_toml_path: &Path) -> cargo_metadata::Metadata { - let mut cmd = cargo_metadata::MetadataCommand::new(); - cmd.manifest_path(cargo_toml_path); - cmd.exec().unwrap() - } - - #[test] - fn dependency_cycle() { - let cargo_toml_path = PathBuf::from(std::env::var("CARGO_MANIFEST_DIR").unwrap()) - .join("tests/fixtures/cargo-audit-dep-cycle/Cargo.toml"); - let metadata = load_metadata(&cargo_toml_path); - let version_info_struct: VersionInfo = encode_audit_data(&metadata).unwrap(); - let json = serde_json::to_string(&version_info_struct).unwrap(); - VersionInfo::from_str(&json).unwrap(); // <- the part we care about succeeding - } -} diff --git a/cargo-auditable/src/collect_audit_data.rs b/cargo-auditable/src/collect_audit_data.rs index 5f20046..053c1a4 100644 --- a/cargo-auditable/src/collect_audit_data.rs +++ b/cargo-auditable/src/collect_audit_data.rs @@ -1,6 +1,6 @@ use cargo_metadata::{Metadata, MetadataCommand}; use miniz_oxide::deflate::compress_to_vec_zlib; -use std::str::from_utf8; +use std::{collections::HashSet, str::from_utf8}; use crate::{ auditable_from_metadata::encode_audit_data, cargo_arguments::CargoArgs, @@ -9,82 +9,173 @@ use crate::{ /// Calls `cargo metadata` to obtain the dependency tree, serializes it to JSON and compresses it pub fn compressed_dependency_list(rustc_args: &RustcArgs, target_triple: &str) -> Vec { - let metadata = get_metadata(rustc_args, target_triple); - let version_info = encode_audit_data(&metadata).unwrap(); + let (metadata, tree) = get_metadata(rustc_args, target_triple); + let version_info = encode_audit_data(&metadata, &tree).unwrap(); let json = serde_json::to_string(&version_info).unwrap(); // compression level 7 makes this complete in a few milliseconds, so no need to drop to a lower level in debug mode let compressed_json = compress_to_vec_zlib(json.as_bytes(), 7); compressed_json } -fn get_metadata(args: &RustcArgs, target_triple: &str) -> Metadata { - let mut metadata_command = MetadataCommand::new(); - +fn get_metadata(args: &RustcArgs, target_triple: &str) -> (Metadata, HashSet) { // Cargo sets the path to itself in the `CARGO` environment variable: // https://doc.rust-lang.org/cargo/reference/environment-variables.html#environment-variables-cargo-sets-for-3rd-party-subcommands // This is also useful for using `cargo auditable` as a drop-in replacement for Cargo. - if let Some(path) = std::env::var_os("CARGO") { - metadata_command.cargo_path(path); - } + let cargo_path = std::env::var_os("CARGO").unwrap_or("cargo".into()); // Point cargo-metadata to the correct Cargo.toml in a workspace. // CARGO_MANIFEST_DIR env var will be set by Cargo when it calls our rustc wrapper let manifest_dir = std::env::var_os("CARGO_MANIFEST_DIR").unwrap(); - metadata_command.current_dir(manifest_dir); - // Pass the features that are actually enabled for this crate to cargo-metadata + // Argument shared between `cargo metadata` and `cargo tree` + let mut shared_args: Vec = Vec::new(); + + // Convert the features that are actually enabled for the crate into Cargo argument format let mut features = args.enabled_features(); if let Some(index) = features.iter().position(|x| x == &"default") { features.remove(index); } else { - metadata_command.features(cargo_metadata::CargoOpt::NoDefaultFeatures); + shared_args.push("--no-default-features".into()); + } + // no need for special handling of --all-features, features are resolved into an explicit list when passed to rustc + if !features.is_empty() { + shared_args.push("--features".into()); + shared_args.push(features.join(",")); } - let owned_features: Vec = features.iter().map(|s| s.to_string()).collect(); - metadata_command.features(cargo_metadata::CargoOpt::SomeFeatures(owned_features)); - - // Restrict the dependency resolution to just the platform the binary is being compiled for. - // By default `cargo metadata` resolves the dependency tree for all platforms. - let mut other_args = vec!["--filter-platform".to_owned(), target_triple.to_owned()]; // Pass arguments such as `--config`, `--offline` and `--locked` // from the original CLI invocation of `cargo auditable` let orig_args = CargoArgs::from_env() .expect("Env var 'CARGO_AUDITABLE_ORIG_ARGS' set by 'cargo-auditable' is unset!"); if orig_args.offline { - other_args.push("--offline".to_owned()); + shared_args.push("--offline".to_owned()); } if orig_args.frozen { - other_args.push("--frozen".to_owned()); + shared_args.push("--frozen".to_owned()); } if orig_args.locked { - other_args.push("--locked".to_owned()); + shared_args.push("--locked".to_owned()); } for arg in orig_args.config { - other_args.push("--config".to_owned()); - other_args.push(arg); + shared_args.push("--config".to_owned()); + shared_args.push(arg); } - // This can only be done once, multiple calls will replace previously set options. - metadata_command.other_options(other_args); - - // Get the underlying std::process::Command and re-implement MetadataCommand::exec, - // to clear RUSTC_WORKSPACE_WRAPPER in the child process to avoid recursion. - // The alternative would be modifying the environment of our own process, - // which is sketchy and discouraged on POSIX because it's not thread-safe: - // https://doc.rust-lang.org/stable/std/env/fn.remove_var.html - let mut metadata_command = metadata_command.cargo_command(); - metadata_command.env_remove("RUSTC_WORKSPACE_WRAPPER"); - let output = metadata_command.output().unwrap(); - if !output.status.success() { + let mut metadata_args = vec!["metadata".to_string(), "--format-version=1".to_string()]; + let tree_args = [ + "tree", + "--edges=normal,build", + "--prefix=none", + "--format={p}", + ]; + let mut tree_args: Vec = tree_args.iter().map(|s| s.to_string()).collect(); + + // Restrict the dependency resolution to just the platform the binary is being compiled for. + // By default `cargo metadata` resolves the dependency tree for all platforms, so it has to be passed explicitly. + metadata_args.extend_from_slice(&["--filter-platform".to_owned(), target_triple.to_owned()]); + tree_args.extend_from_slice(&["--target".to_owned(), target_triple.to_owned()]); + + // Now that we've resolved the args, start assembling commands + let mut metadata_command = std::process::Command::new(&cargo_path); + let mut tree_command = std::process::Command::new(&cargo_path); + + metadata_command.args(metadata_args); + tree_command.args(tree_args); + + for cmd in [&mut metadata_command, &mut tree_command] { + // Clear RUSTC_WORKSPACE_WRAPPER in the child process to avoid recursion. + // The alternative would be modifying the environment of our own process, + // which is sketchy and discouraged on POSIX because it's not thread-safe: + // https://doc.rust-lang.org/stable/std/env/fn.remove_var.html + cmd.env_remove("RUSTC_WORKSPACE_WRAPPER"); + + cmd.current_dir(&manifest_dir); + cmd.args(&shared_args); + } + + // guard against `cargo tree` getting localizations in the future, just in case + tree_command.env("LC_ALL", "C"); + + let meta_out = metadata_command.output().unwrap(); + if !meta_out.status.success() { panic!( "cargo metadata failure: {}", - String::from_utf8_lossy(&output.stderr) + String::from_utf8_lossy(&meta_out.stderr) ); } - let stdout = from_utf8(&output.stdout) + let stdout = from_utf8(&meta_out.stdout) .expect("cargo metadata output not utf8") .lines() .find(|line| line.starts_with('{')) .expect("cargo metadata output not json"); - MetadataCommand::parse(stdout).expect("failed to parse cargo metadata output") + let metadata = MetadataCommand::parse(stdout).expect("failed to parse cargo metadata output"); + + let tree_out = tree_command.output().unwrap(); + if !tree_out.status.success() { + panic!( + "cargo tree failure: {}", + String::from_utf8_lossy(&tree_out.stderr) + ); + } + let tree_stdout: Vec<&str> = from_utf8(&tree_out.stdout) + .expect("cargo tree output not utf8") + .lines() + .collect(); + + let cargo_tree_pkgs = parse_cargo_tree_output(&tree_stdout); + + (metadata, cargo_tree_pkgs) +} + +#[derive(Debug, Clone, Hash, PartialEq, Eq)] +pub struct CargoTreePkg { + pub name: String, + pub version: cargo_metadata::semver::Version, +} + +/// Sadly `cargo metadata` does not expose enough information to accurately reconstruct +/// the dependency tree. +/// [Features are always resolved at workspace level, not per-crate.](https://github.com/rust-lang/cargo/issues/7754) +/// Features only enabled by dev-dependencies are also getting enabled in `cargo metadata` output, +/// but not in the real build. +/// +/// The issue is discussed in more detail here: +/// +/// We have three ways to approach this: +/// +/// The [Cargo native SBOM RFC](https://github.com/rust-lang/rfcs/pull/3553) would solve it, +/// but it will be a long time (potentially years) until it's stabilized. +/// +/// We could use [an independent reimplementation of the Cargo dependency resolution](https://docs.rs/guppy/) +/// to fill in the gaps in `cargo metadata`, but that involves a lot of complexity, +/// and risks getting out of sync with the actual Cargo algorithms, resulting in subtly incorrect SBOMs. +/// This will be especially bad in LTS Linux distributions where `cargo auditable` can be years out of date. +/// +/// The third option is to parse `cargo tree` output, which isn't meant to be machine-readable. +/// The advantages of this is that if something goes wrong, at least it should be very noticeable, +/// and we don't take on a great deal of complexity or risk going out of sync with complex Cargo algorithms. +/// +/// This implements the third option - it seems to be the least bad one available. +fn parse_cargo_tree_output(lines: &[&str]) -> HashSet { + let mut result: HashSet = HashSet::new(); + for line in lines { + if !line.is_empty() { + // cargo tree output can contain empty lines + result.insert(parse_cargo_tree_line(line)); + } + } + result +} + +fn parse_cargo_tree_line(line: &str) -> CargoTreePkg { + let parts: Vec<&str> = line.split_ascii_whitespace().collect(); + let name = parts[0].to_owned(); + let version_str = parts[1]; + let version_str = version_str + .strip_prefix("v") + .expect("Failed to parse version string \"{version_str}\": does not start with 'v'"); + // Technically this can fail because crates.io didn't always enforce semver validity, and crates with invalid semver exist. + // But since we're relying on the cargo_metadata crate that also parses semver, in those cases we're screwed regardless. + let version = cargo_metadata::semver::Version::parse(version_str).unwrap(); + CargoTreePkg { name, version } } diff --git a/cargo-auditable/tests/fixtures/runtime_and_dev_dep_with_different_features/Cargo.toml b/cargo-auditable/tests/fixtures/runtime_and_dev_dep_with_different_features/Cargo.toml new file mode 100644 index 0000000..4675d67 --- /dev/null +++ b/cargo-auditable/tests/fixtures/runtime_and_dev_dep_with_different_features/Cargo.toml @@ -0,0 +1,9 @@ +[workspace] + +resolver = "2" +members = [ + "top_level_crate", + "dev_dep", + "runtime_dep", + "optional_transitive_dep", +] diff --git a/cargo-auditable/tests/fixtures/runtime_and_dev_dep_with_different_features/dev_dep/Cargo.toml b/cargo-auditable/tests/fixtures/runtime_and_dev_dep_with_different_features/dev_dep/Cargo.toml new file mode 100644 index 0000000..d7c8c91 --- /dev/null +++ b/cargo-auditable/tests/fixtures/runtime_and_dev_dep_with_different_features/dev_dep/Cargo.toml @@ -0,0 +1,9 @@ +[package] +name = "dev_dep" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +runtime_dep = {path = "../runtime_dep", features = ["optional_dep"]} diff --git a/cargo-auditable/tests/fixtures/runtime_and_dev_dep_with_different_features/dev_dep/src/lib.rs b/cargo-auditable/tests/fixtures/runtime_and_dev_dep_with_different_features/dev_dep/src/lib.rs new file mode 100644 index 0000000..1b4a90c --- /dev/null +++ b/cargo-auditable/tests/fixtures/runtime_and_dev_dep_with_different_features/dev_dep/src/lib.rs @@ -0,0 +1,8 @@ +#[cfg(test)] +mod tests { + #[test] + fn it_works() { + let result = 2 + 2; + assert_eq!(result, 4); + } +} diff --git a/cargo-auditable/tests/fixtures/runtime_and_dev_dep_with_different_features/optional_transitive_dep/Cargo.toml b/cargo-auditable/tests/fixtures/runtime_and_dev_dep_with_different_features/optional_transitive_dep/Cargo.toml new file mode 100644 index 0000000..f5a2fef --- /dev/null +++ b/cargo-auditable/tests/fixtures/runtime_and_dev_dep_with_different_features/optional_transitive_dep/Cargo.toml @@ -0,0 +1,8 @@ +[package] +name = "optional_transitive_dep" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] diff --git a/cargo-auditable/tests/fixtures/runtime_and_dev_dep_with_different_features/optional_transitive_dep/src/lib.rs b/cargo-auditable/tests/fixtures/runtime_and_dev_dep_with_different_features/optional_transitive_dep/src/lib.rs new file mode 100644 index 0000000..7d12d9a --- /dev/null +++ b/cargo-auditable/tests/fixtures/runtime_and_dev_dep_with_different_features/optional_transitive_dep/src/lib.rs @@ -0,0 +1,14 @@ +pub fn add(left: usize, right: usize) -> usize { + left + right +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn it_works() { + let result = add(2, 2); + assert_eq!(result, 4); + } +} diff --git a/cargo-auditable/tests/fixtures/runtime_and_dev_dep_with_different_features/runtime_dep/Cargo.toml b/cargo-auditable/tests/fixtures/runtime_and_dev_dep_with_different_features/runtime_dep/Cargo.toml new file mode 100644 index 0000000..2b8c002 --- /dev/null +++ b/cargo-auditable/tests/fixtures/runtime_and_dev_dep_with_different_features/runtime_dep/Cargo.toml @@ -0,0 +1,12 @@ +[package] +name = "runtime_dep" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[features] +optional_dep = ["optional_transitive_dep"] + +[dependencies] +optional_transitive_dep = { path = "../optional_transitive_dep", optional = true } diff --git a/cargo-auditable/tests/fixtures/runtime_and_dev_dep_with_different_features/runtime_dep/src/lib.rs b/cargo-auditable/tests/fixtures/runtime_and_dev_dep_with_different_features/runtime_dep/src/lib.rs new file mode 100644 index 0000000..7d12d9a --- /dev/null +++ b/cargo-auditable/tests/fixtures/runtime_and_dev_dep_with_different_features/runtime_dep/src/lib.rs @@ -0,0 +1,14 @@ +pub fn add(left: usize, right: usize) -> usize { + left + right +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn it_works() { + let result = add(2, 2); + assert_eq!(result, 4); + } +} diff --git a/cargo-auditable/tests/fixtures/runtime_and_dev_dep_with_different_features/top_level_crate/Cargo.toml b/cargo-auditable/tests/fixtures/runtime_and_dev_dep_with_different_features/top_level_crate/Cargo.toml new file mode 100644 index 0000000..6e44c96 --- /dev/null +++ b/cargo-auditable/tests/fixtures/runtime_and_dev_dep_with_different_features/top_level_crate/Cargo.toml @@ -0,0 +1,12 @@ +[package] +name = "top_level_crate" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +runtime_dep = {path = "../runtime_dep"} + +[dev-dependencies] +dev_dep = {path = "../dev_dep"} diff --git a/cargo-auditable/tests/fixtures/runtime_and_dev_dep_with_different_features/top_level_crate/src/main.rs b/cargo-auditable/tests/fixtures/runtime_and_dev_dep_with_different_features/top_level_crate/src/main.rs new file mode 100644 index 0000000..e7a11a9 --- /dev/null +++ b/cargo-auditable/tests/fixtures/runtime_and_dev_dep_with_different_features/top_level_crate/src/main.rs @@ -0,0 +1,3 @@ +fn main() { + println!("Hello, world!"); +} diff --git a/cargo-auditable/tests/it.rs b/cargo-auditable/tests/it.rs index f589895..cbad794 100644 --- a/cargo-auditable/tests/it.rs +++ b/cargo-auditable/tests/it.rs @@ -464,3 +464,38 @@ fn test_wasm() { eprintln!("wasm_crate.wasm dependency info: {dep_info:?}"); assert_eq!(dep_info.packages.len(), 19); } + +#[test] +fn test_dependency_unification() { + // Path to workspace fixture Cargo.toml. See that file for overview of workspace members and their dependencies. + let workspace_cargo_toml = PathBuf::from(env!("CARGO_MANIFEST_DIR")) + .join("tests/fixtures/runtime_and_dev_dep_with_different_features/Cargo.toml"); + // Run in workspace root with default features + let bins = run_cargo_auditable(workspace_cargo_toml, &[], &[]); + eprintln!("Test fixture binary map: {bins:?}"); + + let toplevel_crate_bin = &bins.get("top_level_crate").unwrap()[0]; + let dep_info = get_dependency_info(toplevel_crate_bin); + eprintln!("{toplevel_crate_bin} dependency info: {dep_info:?}"); + assert!(dep_info.packages.len() == 2); + // optional_transitive_dep should not be present, it's only reachable through dev-dependencies + assert!(!dep_info + .packages + .iter() + .any(|p| p.name == "optional_transitive_dep")); +} + +#[test] +fn test_dep_cycle() { + // Path to workspace fixture Cargo.toml. See that file for overview of workspace members and their dependencies. + let workspace_cargo_toml = PathBuf::from(env!("CARGO_MANIFEST_DIR")) + .join("tests/fixtures/cargo-audit-dep-cycle/Cargo.toml"); + // Run in workspace root with default features + let bins = run_cargo_auditable(workspace_cargo_toml, &[], &[]); + eprintln!("Test fixture binary map: {bins:?}"); + + let toplevel_crate_bin = &bins.get("cargo-audit-dep-cycle").unwrap()[0]; + // Deserialization checks for cycles, so we only need to verify that it succeeds + let dep_info = get_dependency_info(toplevel_crate_bin); + eprintln!("{toplevel_crate_bin} dependency info: {dep_info:?}"); +}