Skip to content

detect network file system #237

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 4 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 22 additions & 2 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

23 changes: 11 additions & 12 deletions data/src/data_client.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,25 +25,24 @@ utils::configurable_constants! {
ref DEFAULT_CAS_ENDPOINT: String = "http://localhost:8080".to_string();
}

pub fn default_config(
endpoint: String,
xorb_compression: Option<CompressionScheme>,
token_info: Option<(String, u64)>,
token_refresher: Option<Arc<dyn TokenRefresher>>,
) -> errors::Result<Arc<TranslatorConfig>> {
// if HF_HOME is set use that instead of ~/.cache/huggingface
// if HF_XET_CACHE is set use that instead of ~/.cache/huggingface/xet
// HF_XET_CACHE takes precedence over HF_HOME
let cache_root_path = if env::var("HF_XET_CACHE").is_ok() {
lazy_static! {
pub static ref XET_CACHE_PATH: PathBuf = if env::var("HF_XET_CACHE").is_ok() {
PathBuf::from(env::var("HF_XET_CACHE").unwrap())
} else if env::var("HF_HOME").is_ok() {
let home = env::var("HF_HOME").unwrap();
PathBuf::from(home).join("xet")
} else {
let home = home_dir().unwrap_or(current_dir()?);
let home = home_dir().unwrap_or(current_dir().unwrap_or_default());
home.join(".cache").join("huggingface").join("xet")
};
}

pub fn default_config(
endpoint: String,
xorb_compression: Option<CompressionScheme>,
token_info: Option<(String, u64)>,
token_refresher: Option<Arc<dyn TokenRefresher>>,
) -> errors::Result<Arc<TranslatorConfig>> {
let (token, token_expiration) = token_info.unzip();
let auth_cfg = AuthConfig::maybe_new(token, token_expiration, token_refresher);

Expand All @@ -61,7 +60,7 @@ pub fn default_config(
format!("{endpoint_prefix}-{}", &endpoint_hash[..16])
};

let cache_path = cache_root_path.join(endpoint_tag);
let cache_path = XET_CACHE_PATH.join(endpoint_tag);
std::fs::create_dir_all(&cache_path)?;

let staging_root = cache_path.join("staging");
Expand Down
6 changes: 4 additions & 2 deletions hf_xet/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 8 additions & 0 deletions hf_xet/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ use std::fmt::Debug;
use std::iter::IntoIterator;
use std::sync::Arc;

use data::data_client::XET_CACHE_PATH;
use data::errors::DataProcessingError;
use data::{data_client, PointerFile};
use pyo3::exceptions::PyRuntimeError;
Expand Down Expand Up @@ -156,8 +157,15 @@ pub fn hf_xet(py: Python<'_>, m: &Bound<'_, PyModule>) -> PyResult<()> {
m.add_class::<PyPointerFile>()?;

// Init the threadpool
// this also initializes the tracing subscriber enabling logs.
// logs before init_threadpool will not be visible
runtime::init_threadpool(py)?;

if utils::is_network_fs(&XET_CACHE_PATH).unwrap_or_default() {
// TODO: check high perf mode/cache status and change to a warn log if not enabled
tracing::debug!("hf-xet detected your cache and session path is on a network file system, this can cause performance issues, consider setting HF_XET_HIGH_PERFORMANCE=1 or disabling the cache");
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Provide the flag to disable the cache instead. It seems odd that "if network file system" -> "set HF_XET_HIGH_PERFORMANCE".

}

#[cfg(feature = "profiling")]
{
profiling::start_profiler();
Expand Down
4 changes: 4 additions & 0 deletions utils/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,10 @@ pin-project = "1.0.12"
tracing = "0.1.31"
bytes = "1.8.0"
async-trait = "0.1.87"
anyhow = "1.0.97"

[target.'cfg(unix)'.dependencies]
nix = { version = "0.29.0", features = ["fs"] }


[dev-dependencies]
Expand Down
38 changes: 38 additions & 0 deletions utils/src/fs_utils.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
#[cfg(target_os = "linux")]
pub use linux::*;

#[cfg(target_os = "linux")]
mod linux {
use std::path::Path;

use nix::libc;
use nix::sys::statfs;

// code/s not in libc
const LUSTRE_SUPER_MAGIC: libc::__fsword_t = 0x0BD00BD0;

pub fn is_network_fs(path: impl AsRef<Path>) -> anyhow::Result<bool> {
let fs_type = statfs::statfs(path.as_ref())?.filesystem_type();

let ret = matches!(
fs_type.0,
libc::NFS_SUPER_MAGIC
| libc::SMB_SUPER_MAGIC
| libc::FUSE_SUPER_MAGIC
| libc::AFS_SUPER_MAGIC
| libc::CODA_SUPER_MAGIC
| LUSTRE_SUPER_MAGIC
);
Ok(ret)
}
}

#[cfg(not(target_os = "linux"))]
pub use not_linux::*;

#[cfg(not(target_os = "linux"))]
mod not_linux {
pub fn is_network_fs(_path: impl AsRef<std::path::Path>) -> anyhow::Result<bool> {
Ok(false)
}
}
2 changes: 2 additions & 0 deletions utils/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,3 +14,5 @@ pub use async_read::CopyReader;
pub use output_bytes::output_bytes;

pub mod constant_declarations;
mod fs_utils;
pub use fs_utils::*;
Loading