Skip to content

More precise dependency list #210

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 16 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
55 changes: 25 additions & 30 deletions cargo-auditable/src/auditable_from_metadata.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,17 @@
//! Converts from `cargo_metadata` crate structs to `auditable-serde` structs,
//! which map to our own serialialized representation.

use std::{cmp::min, cmp::Ordering::*, collections::HashMap, error::Error, fmt::Display};
use std::{
cmp::{min, Ordering::*},
collections::{HashMap, HashSet},
error::Error,
fmt::Display,
};

use auditable_serde::{DependencyKind, Package, Source, VersionInfo};

use crate::collect_audit_data::CargoTreePkg;

fn source_from_meta(meta_source: &cargo_metadata::Source) -> Source {
match meta_source.repr.as_str() {
"registry+https://github.com/rust-lang/crates.io-index" => Source::CratesIo,
Expand Down Expand Up @@ -73,6 +80,7 @@ impl Error for InsufficientMetadata {}

pub fn encode_audit_data(
metadata: &cargo_metadata::Metadata,
tree_pkgs: &HashSet<CargoTreePkg>,
) -> Result<VersionInfo, InsufficientMetadata> {
let toplevel_crate_id = metadata
.resolve
Expand Down Expand Up @@ -136,6 +144,19 @@ pub fn encode_audit_data(
// In this case they will not be in the map at all. We skip them, along with dev-dependencies.
dep_kind.is_some() && dep_kind.unwrap() != &PrivateDepKind::Development
})
.filter(|p| {
// Due to `cargo metadata` doing feature unification across all dep kinds
// it will over-report dependencies if a dev-dependency enables more features
// on a normal or build-dependency, which causes them to pull in more packages.
// The only stable API that Cargo currently exposes that does NOT do this
// is `cargo tree --edges=normal,build` so we cross-reference the package list against that.
// This is less robust than I'd like, but it's the least bad option I have.
// See the documentation on `parse_cargo_tree_output()` for more details.
tree_pkgs.contains(&CargoTreePkg {
name: p.name.clone(),
version: p.version.clone(),
})
})
.collect();

// This function is the simplest place to introduce sorting, since
Expand Down Expand Up @@ -197,7 +218,9 @@ pub fn encode_audit_data(
// and dev-dependencies are allowed to have cycles,
// so we may end up encoding cyclic graph if we don't handle that.
let dep_id = dep.pkg.repr.as_str();
if strongest_dep_kind(&dep.dep_kinds) != PrivateDepKind::Development {
if strongest_dep_kind(&dep.dep_kinds) != PrivateDepKind::Development
&& id_to_index.contains_key(dep_id)
{
package.dependencies.push(id_to_index[dep_id]);
}
}
Expand All @@ -214,31 +237,3 @@ fn strongest_dep_kind(deps: &[cargo_metadata::DepKindInfo]) -> PrivateDepKind {
.max()
.unwrap_or(PrivateDepKind::Runtime) // for compatibility with Rust earlier than 1.41
}

#[cfg(test)]
mod tests {
#![allow(unused_imports)] // otherwise conditional compilation emits warnings
use super::*;
use std::fs;
use std::{
convert::TryInto,
path::{Path, PathBuf},
str::FromStr,
};

fn load_metadata(cargo_toml_path: &Path) -> cargo_metadata::Metadata {
let mut cmd = cargo_metadata::MetadataCommand::new();
cmd.manifest_path(cargo_toml_path);
cmd.exec().unwrap()
}

#[test]
fn dependency_cycle() {
let cargo_toml_path = PathBuf::from(std::env::var("CARGO_MANIFEST_DIR").unwrap())
.join("tests/fixtures/cargo-audit-dep-cycle/Cargo.toml");
let metadata = load_metadata(&cargo_toml_path);
let version_info_struct: VersionInfo = encode_audit_data(&metadata).unwrap();
let json = serde_json::to_string(&version_info_struct).unwrap();
VersionInfo::from_str(&json).unwrap(); // <- the part we care about succeeding
}
}
167 changes: 129 additions & 38 deletions cargo-auditable/src/collect_audit_data.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use cargo_metadata::{Metadata, MetadataCommand};
use miniz_oxide::deflate::compress_to_vec_zlib;
use std::str::from_utf8;
use std::{collections::HashSet, str::from_utf8};

use crate::{
auditable_from_metadata::encode_audit_data, cargo_arguments::CargoArgs,
Expand All @@ -9,82 +9,173 @@ use crate::{

/// Calls `cargo metadata` to obtain the dependency tree, serializes it to JSON and compresses it
pub fn compressed_dependency_list(rustc_args: &RustcArgs, target_triple: &str) -> Vec<u8> {
let metadata = get_metadata(rustc_args, target_triple);
let version_info = encode_audit_data(&metadata).unwrap();
let (metadata, tree) = get_metadata(rustc_args, target_triple);
let version_info = encode_audit_data(&metadata, &tree).unwrap();
let json = serde_json::to_string(&version_info).unwrap();
// compression level 7 makes this complete in a few milliseconds, so no need to drop to a lower level in debug mode
let compressed_json = compress_to_vec_zlib(json.as_bytes(), 7);
compressed_json
}

fn get_metadata(args: &RustcArgs, target_triple: &str) -> Metadata {
let mut metadata_command = MetadataCommand::new();

fn get_metadata(args: &RustcArgs, target_triple: &str) -> (Metadata, HashSet<CargoTreePkg>) {
// Cargo sets the path to itself in the `CARGO` environment variable:
// https://doc.rust-lang.org/cargo/reference/environment-variables.html#environment-variables-cargo-sets-for-3rd-party-subcommands
// This is also useful for using `cargo auditable` as a drop-in replacement for Cargo.
if let Some(path) = std::env::var_os("CARGO") {
metadata_command.cargo_path(path);
}
let cargo_path = std::env::var_os("CARGO").unwrap_or("cargo".into());

// Point cargo-metadata to the correct Cargo.toml in a workspace.
// CARGO_MANIFEST_DIR env var will be set by Cargo when it calls our rustc wrapper
let manifest_dir = std::env::var_os("CARGO_MANIFEST_DIR").unwrap();
metadata_command.current_dir(manifest_dir);

// Pass the features that are actually enabled for this crate to cargo-metadata
// Argument shared between `cargo metadata` and `cargo tree`
let mut shared_args: Vec<String> = Vec::new();

// Convert the features that are actually enabled for the crate into Cargo argument format
let mut features = args.enabled_features();
if let Some(index) = features.iter().position(|x| x == &"default") {
features.remove(index);
} else {
metadata_command.features(cargo_metadata::CargoOpt::NoDefaultFeatures);
shared_args.push("--no-default-features".into());
}
// no need for special handling of --all-features, features are resolved into an explicit list when passed to rustc
if !features.is_empty() {
shared_args.push("--features".into());
shared_args.push(features.join(","));
}
let owned_features: Vec<String> = features.iter().map(|s| s.to_string()).collect();
metadata_command.features(cargo_metadata::CargoOpt::SomeFeatures(owned_features));

// Restrict the dependency resolution to just the platform the binary is being compiled for.
// By default `cargo metadata` resolves the dependency tree for all platforms.
let mut other_args = vec!["--filter-platform".to_owned(), target_triple.to_owned()];

// Pass arguments such as `--config`, `--offline` and `--locked`
// from the original CLI invocation of `cargo auditable`
let orig_args = CargoArgs::from_env()
.expect("Env var 'CARGO_AUDITABLE_ORIG_ARGS' set by 'cargo-auditable' is unset!");
if orig_args.offline {
other_args.push("--offline".to_owned());
shared_args.push("--offline".to_owned());
}
if orig_args.frozen {
other_args.push("--frozen".to_owned());
shared_args.push("--frozen".to_owned());
}
if orig_args.locked {
other_args.push("--locked".to_owned());
shared_args.push("--locked".to_owned());
}
for arg in orig_args.config {
other_args.push("--config".to_owned());
other_args.push(arg);
shared_args.push("--config".to_owned());
shared_args.push(arg);
}

// This can only be done once, multiple calls will replace previously set options.
metadata_command.other_options(other_args);

// Get the underlying std::process::Command and re-implement MetadataCommand::exec,
// to clear RUSTC_WORKSPACE_WRAPPER in the child process to avoid recursion.
// The alternative would be modifying the environment of our own process,
// which is sketchy and discouraged on POSIX because it's not thread-safe:
// https://doc.rust-lang.org/stable/std/env/fn.remove_var.html
let mut metadata_command = metadata_command.cargo_command();
metadata_command.env_remove("RUSTC_WORKSPACE_WRAPPER");
let output = metadata_command.output().unwrap();
if !output.status.success() {
let mut metadata_args = vec!["metadata".to_string(), "--format-version=1".to_string()];
let tree_args = [
"tree",
"--edges=normal,build",
"--prefix=none",
"--format={p}",
];
let mut tree_args: Vec<String> = tree_args.iter().map(|s| s.to_string()).collect();

// Restrict the dependency resolution to just the platform the binary is being compiled for.
// By default `cargo metadata` resolves the dependency tree for all platforms, so it has to be passed explicitly.
metadata_args.extend_from_slice(&["--filter-platform".to_owned(), target_triple.to_owned()]);
tree_args.extend_from_slice(&["--target".to_owned(), target_triple.to_owned()]);

// Now that we've resolved the args, start assembling commands
let mut metadata_command = std::process::Command::new(&cargo_path);
let mut tree_command = std::process::Command::new(&cargo_path);

metadata_command.args(metadata_args);
tree_command.args(tree_args);

for cmd in [&mut metadata_command, &mut tree_command] {
// Clear RUSTC_WORKSPACE_WRAPPER in the child process to avoid recursion.
// The alternative would be modifying the environment of our own process,
// which is sketchy and discouraged on POSIX because it's not thread-safe:
// https://doc.rust-lang.org/stable/std/env/fn.remove_var.html
cmd.env_remove("RUSTC_WORKSPACE_WRAPPER");

cmd.current_dir(&manifest_dir);
cmd.args(&shared_args);
}

// guard against `cargo tree` getting localizations in the future, just in case
tree_command.env("LC_ALL", "C");

let meta_out = metadata_command.output().unwrap();
if !meta_out.status.success() {
panic!(
"cargo metadata failure: {}",
String::from_utf8_lossy(&output.stderr)
String::from_utf8_lossy(&meta_out.stderr)
);
}
let stdout = from_utf8(&output.stdout)
let stdout = from_utf8(&meta_out.stdout)
.expect("cargo metadata output not utf8")
.lines()
.find(|line| line.starts_with('{'))
.expect("cargo metadata output not json");
MetadataCommand::parse(stdout).expect("failed to parse cargo metadata output")
let metadata = MetadataCommand::parse(stdout).expect("failed to parse cargo metadata output");

let tree_out = tree_command.output().unwrap();
if !tree_out.status.success() {
panic!(
"cargo tree failure: {}",
String::from_utf8_lossy(&tree_out.stderr)
);
}
let tree_stdout: Vec<&str> = from_utf8(&tree_out.stdout)
.expect("cargo tree output not utf8")
.lines()
.collect();

let cargo_tree_pkgs = parse_cargo_tree_output(&tree_stdout);

(metadata, cargo_tree_pkgs)
}

#[derive(Debug, Clone, Hash, PartialEq, Eq)]
pub struct CargoTreePkg {
pub name: String,
pub version: cargo_metadata::semver::Version,
}

/// Sadly `cargo metadata` does not expose enough information to accurately reconstruct
/// the dependency tree.
/// [Features are always resolved at workspace level, not per-crate.](https://github.com/rust-lang/cargo/issues/7754)
/// Features only enabled by dev-dependencies are also getting enabled in `cargo metadata` output,
/// but not in the real build.
///
/// The issue is discussed in more detail here: <https://github.com/rust-secure-code/cargo-auditable/issues/66>
///
/// We have three ways to approach this:
///
/// The [Cargo native SBOM RFC](https://github.com/rust-lang/rfcs/pull/3553) would solve it,
/// but it will be a long time (potentially years) until it's stabilized.
///
/// We could use [an independent reimplementation of the Cargo dependency resolution](https://docs.rs/guppy/)
/// to fill in the gaps in `cargo metadata`, but that involves a lot of complexity,
/// and risks getting out of sync with the actual Cargo algorithms, resulting in subtly incorrect SBOMs.
/// This will be especially bad in LTS Linux distributions where `cargo auditable` can be years out of date.
///
/// The third option is to parse `cargo tree` output, which isn't meant to be machine-readable.
/// The advantages of this is that if something goes wrong, at least it should be very noticeable,
/// and we don't take on a great deal of complexity or risk going out of sync with complex Cargo algorithms.
///
/// This implements the third option - it seems to be the least bad one available.
fn parse_cargo_tree_output(lines: &[&str]) -> HashSet<CargoTreePkg> {
let mut result: HashSet<CargoTreePkg> = HashSet::new();
for line in lines {
if !line.is_empty() {
// cargo tree output can contain empty lines
result.insert(parse_cargo_tree_line(line));
}
}
result
}

fn parse_cargo_tree_line(line: &str) -> CargoTreePkg {
let parts: Vec<&str> = line.split_ascii_whitespace().collect();
let name = parts[0].to_owned();
let version_str = parts[1];
let version_str = version_str
.strip_prefix("v")
.expect("Failed to parse version string \"{version_str}\": does not start with 'v'");
// Technically this can fail because crates.io didn't always enforce semver validity, and crates with invalid semver exist.
// But since we're relying on the cargo_metadata crate that also parses semver, in those cases we're screwed regardless.
let version = cargo_metadata::semver::Version::parse(version_str).unwrap();
CargoTreePkg { name, version }
Comment on lines +170 to +180
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

To better prepare for a LTS Linux distribution having an old cargo-auditable (which is mentioned in another fn docstring in this PR), it might be worthwhile adding a few ways to avoid problems if cargo tree .. output changes in a way that could break cargo-auditable .

  1. Don't use cargo tree for binaries that are not in a workspace, as I assume that is the only time when cargo tree is beneficial.
  2. Have a command line flag or envvar flag to turn off using cargo tree.
  3. This function and its callers ripple up any parsing errors (including parts[1]) in a Result, and have a command line flag or envvar flag to allow the user to choose whether to fail or emit warnings when this parser fails.

2 & 3 could be the same config item, likeCARGO_AUDITABLE_USE_CARGO_TREE=off,on,warn

I personally dont mind if these protections are not done. Just ideas to help get this feature across the line.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It actually helps with more than just workspaces. cargo metadata also does somewhat incorrect platform resolution, e.g. with rustix depending on errno on Linux according to cargo metadata but not cargo tree unless you use cargo tree --platforms=all, so it needs to be enabled unconditionally.

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
[workspace]

resolver = "2"
members = [
"top_level_crate",
"dev_dep",
"runtime_dep",
"optional_transitive_dep",
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
[package]
name = "dev_dep"
version = "0.1.0"
edition = "2021"

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[dependencies]
runtime_dep = {path = "../runtime_dep", features = ["optional_dep"]}
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
#[cfg(test)]
mod tests {
#[test]
fn it_works() {
let result = 2 + 2;
assert_eq!(result, 4);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
[package]
name = "optional_transitive_dep"
version = "0.1.0"
edition = "2021"

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[dependencies]
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
pub fn add(left: usize, right: usize) -> usize {
left + right
}

#[cfg(test)]
mod tests {
use super::*;

#[test]
fn it_works() {
let result = add(2, 2);
assert_eq!(result, 4);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
[package]
name = "runtime_dep"
version = "0.1.0"
edition = "2021"

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[features]
optional_dep = ["optional_transitive_dep"]

[dependencies]
optional_transitive_dep = { path = "../optional_transitive_dep", optional = true }
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
pub fn add(left: usize, right: usize) -> usize {
left + right
}

#[cfg(test)]
mod tests {
use super::*;

#[test]
fn it_works() {
let result = add(2, 2);
assert_eq!(result, 4);
}
}
Loading
Loading