From 568d07448a187d4a118ed66ca9cac0720ca0b291 Mon Sep 17 00:00:00 2001 From: Evan Almloff Date: Mon, 14 Apr 2025 14:53:48 -0500 Subject: [PATCH 01/11] Fix deduping assets --- Cargo.lock | 9 +- packages/cli-opt/src/lib.rs | 64 ++++++- packages/cli/Cargo.toml | 28 ++- packages/cli/src/build/bundle.rs | 84 ++++----- packages/cli/src/build/request.rs | 6 - packages/cli/src/build/web.rs | 20 +- packages/cli/src/cli/link.rs | 169 +++++++++++++++-- packages/cli/src/serve/handle.rs | 26 +-- packages/const-serialize/src/lib.rs | 2 +- packages/manganis/manganis-core/src/asset.rs | 1 + packages/manganis/manganis-core/src/css.rs | 1 + packages/manganis/manganis-core/src/folder.rs | 1 + packages/manganis/manganis-core/src/images.rs | 3 + packages/manganis/manganis-core/src/js.rs | 1 + .../manganis/manganis-core/src/options.rs | 1 + packages/manganis/manganis-macro/src/asset.rs | 16 +- packages/manganis/manganis/src/hash.rs | 39 ---- packages/manganis/manganis/src/lib.rs | 1 - .../manganis/manganis/src/macro_helpers.rs | 171 +----------------- 19 files changed, 312 insertions(+), 331 deletions(-) delete mode 100644 packages/manganis/manganis/src/hash.rs diff --git a/Cargo.lock b/Cargo.lock index 2ba68fa6e9..ce6a7095d3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2159,7 +2159,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d067ad48b8650848b989a59a86c6c36a995d02d2bf778d45c3c5d57bc2718f02" dependencies = [ "smallvec", - "target-lexicon", + "target-lexicon 0.12.16", ] [[package]] @@ -3600,6 +3600,7 @@ dependencies = [ "strum 0.26.3", "syn 2.0.100", "tar", + "target-lexicon 0.13.2", "tauri-bundler", "tauri-utils", "tempfile", @@ -13762,6 +13763,12 @@ version = "0.12.16" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "61c41af27dd6d1e27b1b16b489db798443478cef1f06a660c96db617ba5de3b1" +[[package]] +name = "target-lexicon" +version = "0.13.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e502f78cdbb8ba4718f566c418c52bc729126ffd16baee5baa718cf25dd5a69a" + [[package]] name = "target-triple" version = "0.1.4" diff --git a/packages/cli-opt/src/lib.rs b/packages/cli-opt/src/lib.rs index 97c934a747..d3d36bcc8a 100644 --- a/packages/cli-opt/src/lib.rs +++ b/packages/cli-opt/src/lib.rs @@ -1,10 +1,13 @@ use anyhow::Context; +use manganis::AssetOptions; use manganis_core::linker::LinkSection; use manganis_core::BundledAsset; use object::{read::archive::ArchiveFile, File as ObjectFile, Object, ObjectSection}; +use rayon::iter::{IntoParallelRefIterator, ParallelIterator}; use serde::{Deserialize, Serialize}; -use std::path::Path; -use std::{collections::HashMap, path::PathBuf}; +use std::collections::{HashMap, HashSet}; +use std::path::{Path, PathBuf}; +use std::sync::{Arc, RwLock}; mod css; mod file; @@ -21,7 +24,7 @@ pub use file::process_file_to; #[derive(Debug, PartialEq, Default, Clone, Serialize, Deserialize)] pub struct AssetManifest { /// Map of bundled asset name to the asset itself - pub assets: HashMap, + assets: HashMap>, } impl AssetManifest { @@ -31,21 +34,30 @@ impl AssetManifest { asset_path: &Path, options: manganis::AssetOptions, ) -> anyhow::Result { - let hash = manganis_core::hash::AssetHash::hash_file_contents(asset_path) - .context("Failed to hash file")?; - let output_path_str = asset_path.to_str().ok_or(anyhow::anyhow!( "Failed to convert wasm bindgen output path to string" ))?; - let bundled_asset = - manganis::macro_helpers::create_bundled_asset(output_path_str, hash.bytes(), options); + let bundled_asset = manganis::macro_helpers::create_bundled_asset(output_path_str, options); - self.assets.insert(asset_path.into(), bundled_asset); + self.assets + .entry(asset_path.to_path_buf()) + .or_default() + .insert(bundled_asset); Ok(bundled_asset) } + /// Get any assets that are tied to a specific source file + pub fn get_assets_for_source(&self, path: &Path) -> Option<&HashSet> { + self.assets.get(path) + } + + /// Iterate over all the assets in the manifest + pub fn assets(&self) -> impl Iterator { + self.assets.values().flat_map(|assets| assets.iter()) + } + #[allow(dead_code)] pub fn load_from_file(path: &Path) -> anyhow::Result { let src = std::fs::read_to_string(path)?; @@ -120,7 +132,9 @@ impl AssetManifest { const_serialize::deserialize_const!(BundledAsset, buffer) { self.assets - .insert(asset.absolute_source_path().into(), asset); + .entry(PathBuf::from(asset.absolute_source_path())) + .or_default() + .insert(asset); buffer = remaining_buffer; } } @@ -128,3 +142,33 @@ impl AssetManifest { Ok(()) } } + +/// Optimize a list of assets in parallel +pub fn optimize_all_assets( + assets_to_transfer: Vec<(PathBuf, PathBuf, AssetOptions)>, + on_optimization_start: impl FnMut(&Path, &Path, &AssetOptions) + Sync + Send, + on_optimization_end: impl FnMut(&Path, &Path, &AssetOptions) + Sync + Send, +) -> anyhow::Result<()> { + let on_optimization_start = Arc::new(RwLock::new(on_optimization_start)); + let on_optimization_end = Arc::new(RwLock::new(on_optimization_end)); + assets_to_transfer + .par_iter() + .try_for_each(|(from, to, options)| { + { + let mut on_optimization_start = on_optimization_start.write().unwrap(); + on_optimization_start(from, to, options); + } + + let res = process_file_to(options, from, to); + if let Err(err) = res.as_ref() { + tracing::error!("Failed to copy asset {from:?}: {err}"); + } + + { + let mut on_optimization_end = on_optimization_end.write().unwrap(); + on_optimization_end(from, to, options); + } + + res.map(|_| ()) + }) +} diff --git a/packages/cli/Cargo.toml b/packages/cli/Cargo.toml index 958bd9603b..a007c9ea1b 100644 --- a/packages/cli/Cargo.toml +++ b/packages/cli/Cargo.toml @@ -58,7 +58,12 @@ axum-server = { workspace = true, features = ["tls-rustls"] } axum-extra = { workspace = true, features = ["typed-header"] } tower-http = { workspace = true, features = ["full"] } proc-macro2 = { workspace = true, features = ["span-locations"] } -syn = { workspace = true, features = ["full", "extra-traits", "visit", "visit-mut"] } +syn = { workspace = true, features = [ + "full", + "extra-traits", + "visit", + "visit-mut", +] } headers = "0.4.0" walkdir = "2" @@ -66,11 +71,7 @@ dunce = { workspace = true } # tools download dirs = { workspace = true } -reqwest = { workspace = true, features = [ - "rustls-tls", - "trust-dns", - "json" -] } +reqwest = { workspace = true, features = ["rustls-tls", "trust-dns", "json"] } tower = { workspace = true } once_cell = "1.19.0" @@ -92,7 +93,13 @@ ignore = "0.4.22" env_logger = { workspace = true } const-serialize = { workspace = true, features = ["serde"] } -tracing-subscriber = { version = "0.3.18", features = ["std", "env-filter", "json", "registry", "fmt"] } +tracing-subscriber = { version = "0.3.18", features = [ + "std", + "env-filter", + "json", + "registry", + "fmt", +] } console-subscriber = { version = "0.3.0", optional = true } tracing = { workspace = true } wasm-opt = { version = "0.116.1", optional = true } @@ -109,9 +116,10 @@ log = { version = "0.4", features = ["max_level_off", "release_max_level_off"] } tempfile = "3.3" manganis = { workspace = true } manganis-core = { workspace = true } +target-lexicon = "0.13.2" # Extracting data from an executable -object = {version="0.36.0", features=["wasm"]} +object = { version = "0.36.0", features = ["wasm"] } tokio-util = { version = "0.7.11", features = ["full"] } itertools = "0.13.0" throbber-widgets-tui = "=0.7.0" @@ -162,5 +170,5 @@ pkg-fmt = "zip" [package.metadata.docs.rs] all-features = false -rustc-args = [ "--cfg", "docsrs" ] -rustdoc-args = [ "--cfg", "docsrs" ] +rustc-args = ["--cfg", "docsrs"] +rustdoc-args = ["--cfg", "docsrs"] diff --git a/packages/cli/src/build/bundle.rs b/packages/cli/src/build/bundle.rs index e2b769f9d2..7753a4efaa 100644 --- a/packages/cli/src/build/bundle.rs +++ b/packages/cli/src/build/bundle.rs @@ -3,9 +3,8 @@ use super::templates::InfoPlistData; use crate::{BuildRequest, Platform, WasmOptConfig}; use crate::{Result, TraceSrc}; use anyhow::Context; -use dioxus_cli_opt::{process_file_to, AssetManifest}; +use dioxus_cli_opt::{optimize_all_assets, AssetManifest}; use manganis::{AssetOptions, JsAssetOptions}; -use rayon::prelude::{IntoParallelRefIterator, ParallelIterator}; use std::future::Future; use std::path::{Path, PathBuf}; use std::pin::Pin; @@ -354,38 +353,19 @@ impl AppBundle { Ok(()) } - /// Copy the assets out of the manifest and into the target location - /// - /// Should be the same on all platforms - just copy over the assets from the manifest into the output directory - async fn write_assets(&self) -> Result<()> { - // Server doesn't need assets - web will provide them - if self.build.build.platform() == Platform::Server { - return Ok(()); - } - + /// Clean up any assets that no longer exist in the manifest + async fn clear_old_assets(&self) -> Result<()> { let asset_dir = self.build.asset_dir(); // First, clear the asset dir of any files that don't exist in the new manifest _ = tokio::fs::create_dir_all(&asset_dir).await; // Create a set of all the paths that new files will be bundled to - let mut keep_bundled_output_paths: HashSet<_> = self + let keep_bundled_output_paths: HashSet<_> = self .app .assets - .assets - .values() + .assets() .map(|a| asset_dir.join(a.bundled_path())) .collect(); - // The CLI creates a .version file in the asset dir to keep track of what version of the optimizer - // the asset was processed. If that version doesn't match the CLI version, we need to re-optimize - // all assets. - let version_file = self.build.asset_optimizer_version_file(); - let clear_cache = std::fs::read_to_string(&version_file) - .ok() - .filter(|s| s == crate::VERSION.as_str()) - .is_none(); - if clear_cache { - keep_bundled_output_paths.clear(); - } // one possible implementation of walking a directory only visiting files fn remove_old_assets<'a>( @@ -425,24 +405,40 @@ impl AppBundle { ); remove_old_assets(&asset_dir, &keep_bundled_output_paths).await?; + Ok(()) + } + + /// Copy the assets out of the manifest and into the target location + /// + /// Should be the same on all platforms - just copy over the assets from the manifest into the output directory + async fn write_assets(&self) -> Result<()> { + // Server doesn't need assets - web will provide them + if self.build.build.platform() == Platform::Server { + return Ok(()); + } + + self.clear_old_assets(); + + let asset_dir = self.build.asset_dir(); + // todo(jon): we also want to eventually include options for each asset's optimization and compression, which we currently aren't let mut assets_to_transfer = vec![]; // Queue the bundled assets - for (asset, bundled) in &self.app.assets.assets { - let from = asset.clone(); - let to = asset_dir.join(bundled.bundled_path()); + for asset in self.app.assets.assets() { + let from = PathBuf::from(asset.absolute_source_path()); + let to = asset_dir.join(asset.bundled_path()); // prefer to log using a shorter path relative to the workspace dir by trimming the workspace dir let from_ = from .strip_prefix(self.build.krate.workspace_dir()) .unwrap_or(from.as_path()); - let to_ = from + let to_ = to .strip_prefix(self.build.krate.workspace_dir()) .unwrap_or(to.as_path()); tracing::debug!("Copying asset {from_:?} to {to_:?}"); - assets_to_transfer.push((from, to, *bundled.options())); + assets_to_transfer.push((from, to, *asset.options())); } // And then queue the legacy assets @@ -462,20 +458,16 @@ impl AppBundle { let ws_dir = self.build.krate.workspace_dir(); // Optimizing assets is expensive and blocking, so we do it in a tokio spawn blocking task tokio::task::spawn_blocking(move || { - assets_to_transfer - .par_iter() - .try_for_each(|(from, to, options)| { + optimize_all_assets( + assets_to_transfer, + |from, _, _| { let processing = started_processing.fetch_add(1, Ordering::SeqCst); let from_ = from.strip_prefix(&ws_dir).unwrap_or(from); tracing::trace!( "Starting asset copy {processing}/{asset_count} from {from_:?}" ); - - let res = process_file_to(options, from, to); - if let Err(err) = res.as_ref() { - tracing::error!("Failed to copy asset {from:?}: {err}"); - } - + }, + |from, _, _| { let finished = copied.fetch_add(1, Ordering::SeqCst); BuildRequest::status_copied_asset( &progress, @@ -483,22 +475,12 @@ impl AppBundle { asset_count, from.to_path_buf(), ); - - res.map(|_| ()) - }) + }, + ) }) .await .map_err(|e| anyhow::anyhow!("A task failed while trying to copy assets: {e}"))??; - // // Remove the wasm bindgen output directory if it exists - // _ = std::fs::remove_dir_all(self.build.wasm_bindgen_out_dir()); - - // Write the version file so we know what version of the optimizer we used - std::fs::write( - self.build.asset_optimizer_version_file(), - crate::VERSION.as_str(), - )?; - Ok(()) } diff --git a/packages/cli/src/build/request.rs b/packages/cli/src/build/request.rs index 77d9464dbf..fadb551d36 100644 --- a/packages/cli/src/build/request.rs +++ b/packages/cli/src/build/request.rs @@ -463,7 +463,6 @@ impl BuildRequest { /// /// There's a chance that's not actually true, so this function is kept around in case we do /// need to revert to "deep extraction". - #[allow(unused)] async fn deep_linker_asset_extract(&self) -> Result { // Create a temp file to put the output of the args // We need to do this since rustc won't actually print the link args to stdout, so we need to @@ -751,11 +750,6 @@ impl BuildRequest { } } - /// Get the path to the asset optimizer version file - pub fn asset_optimizer_version_file(&self) -> PathBuf { - self.platform_dir().join(".cli-version") - } - pub fn platform_exe_name(&self) -> String { match self.build.platform() { Platform::MacOS => self.krate.executable_name().to_string(), diff --git a/packages/cli/src/build/web.rs b/packages/cli/src/build/web.rs index 52b2e8e914..b040d43676 100644 --- a/packages/cli/src/build/web.rs +++ b/packages/cli/src/build/web.rs @@ -85,7 +85,7 @@ impl AppBundle { } // Inject any resources from manganis into the head - for asset in self.app.assets.assets.values() { + for asset in self.app.assets.assets() { let asset_path = asset.bundled_path(); match asset.options() { AssetOptions::Css(css_options) => { @@ -117,9 +117,11 @@ impl AppBundle { let wasm_path = self .app .assets - .assets - .get(&wasm_source_path) + .get_assets_for_source(&wasm_source_path) .expect("WASM asset should exist in web bundles") + .iter() + .next() + .expect("Should only be one wasm asset") .bundled_path(); head_resources.push_str(&format!( "" @@ -177,18 +179,22 @@ r#"