Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
225 changes: 219 additions & 6 deletions iris-mpc-cpu/benches/dot.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@ use iris_mpc_cpu::protocol::{
ops::galois_ring_pairwise_distance, shared_iris::GaloisRingSharedIris,
};
use itertools::Itertools;
use rand::{thread_rng, Rng};
use rand::seq::index::sample;
use rand::{rngs::StdRng, thread_rng, Rng, SeedableRng};
use rayon::{prelude::*, ThreadPoolBuilder};

pub fn bench_galois_ring_pairwise_distance(c: &mut Criterion) {
Expand Down Expand Up @@ -42,7 +43,7 @@ pub fn bench_galois_ring_pairwise_distance(c: &mut Criterion) {
.map(|_| Some((shares[0].clone(), shares[1].clone())))
.collect_vec()
},
|pairs| galois_ring_pairwise_distance(black_box(pairs)),
|pairs| black_box(galois_ring_pairwise_distance(black_box(pairs))),
BatchSize::SmallInput,
)
});
Expand All @@ -59,7 +60,7 @@ pub fn bench_galois_ring_pairwise_distance(c: &mut Criterion) {
})
.collect_vec()
},
|pairs| galois_ring_pairwise_distance(black_box(pairs)),
|pairs| black_box(galois_ring_pairwise_distance(black_box(pairs))),
BatchSize::SmallInput,
)
});
Expand Down Expand Up @@ -95,7 +96,7 @@ pub fn bench_galois_ring_pairwise_distance(c: &mut Criterion) {
|batches| {
batches
.into_par_iter()
.map(|pairs| galois_ring_pairwise_distance(black_box(pairs)))
.map(|pairs| black_box(galois_ring_pairwise_distance(black_box(pairs))))
.collect::<Vec<_>>()
},
BatchSize::SmallInput,
Expand Down Expand Up @@ -124,7 +125,36 @@ pub fn bench_galois_ring_pairwise_distance(c: &mut Criterion) {
|batches| {
batches
.into_par_iter()
.map(|pairs| galois_ring_pairwise_distance(black_box(pairs)))
.map(|pairs| black_box(galois_ring_pairwise_distance(black_box(pairs))))
.collect::<Vec<_>>()
},
BatchSize::SmallInput,
)
})
});

g.bench_function("RAM-bound with identical workloads per thread", |b| {
pool.install(|| {
b.iter_batched(
|| {
// Generate *multiple* batches of *non-cacheable* iris pairs.
(0..num_threads)
.map(|_| {
let mut rng = StdRng::seed_from_u64(42);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The intent would be clearer generating once and cloning num_threads times.

(0..batch_size)
.map(|_| {
let a = rng.gen::<usize>() % shares.len();
let b = rng.gen::<usize>() % shares.len();
Some((shares[a].clone(), shares[b].clone()))
})
.collect_vec()
})
.collect_vec()
},
|batches| {
batches
.into_par_iter()
.map(|pairs| black_box(galois_ring_pairwise_distance(black_box(pairs))))
.collect::<Vec<_>>()
},
BatchSize::SmallInput,
Expand All @@ -135,5 +165,188 @@ pub fn bench_galois_ring_pairwise_distance(c: &mut Criterion) {
g.finish();
}

criterion_group!(benches, bench_galois_ring_pairwise_distance);
fn generate_random_shared_irises(count: usize) -> Vec<Arc<GaloisRingSharedIris>> {
let mut rng = thread_rng();
(0..count)
.map(|_| {
let iris = IrisCode::random_rng(&mut rng);
// We only need one of the three shares for the benchmark structure.
Arc::new(GaloisRingSharedIris::generate_shares_locally(&mut rng, iris)[0].clone())
})
.collect()
}

// Simulate dot-products as they occur in search_layer:
// DEPTH times we open a new candidate node, each contributing NEW_NODES_PER_LEVEL new nodes
// which are batched to compute distance to the source
// Constants are chosen according to staging runs as of late August 2025.

pub fn search_layer_like_calls(c: &mut Criterion) {
let mut g = c.benchmark_group("search_layer_like_calls");
g.sample_size(10usize);

const INITIAL_RHS_COUNT: usize = 512;
const DEPTH: usize = 356;
const NEW_NODES_PER_LEVEL: usize = 65;
let total_elements = INITIAL_RHS_COUNT + (DEPTH * NEW_NODES_PER_LEVEL);
g.throughput(Throughput::Elements(total_elements as u64));

g.bench_function("single-threaded", |b| {
let source_iris = generate_random_shared_irises(1).pop().unwrap();

let initial_rhs_irises = generate_random_shared_irises(INITIAL_RHS_COUNT);
let all_irises = generate_random_shared_irises(total_elements);

b.iter_batched(
|| {
let mut rng = StdRng::seed_from_u64(42);
let num_indices = DEPTH * NEW_NODES_PER_LEVEL;
sample(&mut rng, all_irises.len(), num_indices).into_vec()
},
|mut indices| {
let initial_pairs: Vec<_> = initial_rhs_irises
.iter()
.map(|rhs| Some((source_iris.clone(), rhs.clone())))
.collect();
black_box(galois_ring_pairwise_distance(black_box(initial_pairs)));

for _ in 0..DEPTH {
let curr_indices = indices.drain(0..NEW_NODES_PER_LEVEL).collect_vec();
let pairs_on_level: Vec<_> = (0..NEW_NODES_PER_LEVEL)
.map(|i| Some((source_iris.clone(), all_irises[curr_indices[i]].clone())))
.collect();

black_box(galois_ring_pairwise_distance(black_box(pairs_on_level)));
}
},
BatchSize::SmallInput,
)
});
g.finish();

let num_threads = num_cpus::get_physical();
let pool = ThreadPoolBuilder::new()
.num_threads(num_threads)
.build()
.unwrap();

let mut g_parallel = c.benchmark_group(format!(
"parallel search_layer-like calls -- num_threads={}",
num_threads
));
g_parallel.sample_size(10);
g_parallel.throughput(Throughput::Elements((total_elements * num_threads) as u64));

g_parallel.bench_function("identical order per thread", |b| {
pool.install(|| {
let thread_setups: Vec<_> = (0..num_threads)
.map(|_| {
let source_iris = generate_random_shared_irises(1).pop().unwrap();
let initial_rhs_irises = generate_random_shared_irises(INITIAL_RHS_COUNT);
(source_iris, initial_rhs_irises)
})
.collect();
let all_irises = generate_random_shared_irises(DEPTH * NEW_NODES_PER_LEVEL);

b.iter_batched(
|| {
(0..num_threads)
.map(|_| {
let mut rng = StdRng::seed_from_u64(42);
let num_indices = DEPTH * NEW_NODES_PER_LEVEL;
sample(&mut rng, all_irises.len(), num_indices).into_vec()
})
.collect::<Vec<_>>()
},
|all_threads_indices| {
all_threads_indices
.par_iter()
.zip(thread_setups.par_iter())
.for_each(|(indices_for_thread, (source_iris, initial_rhs_irises))| {
let mut indices = indices_for_thread.clone();

let initial_pairs: Vec<_> = initial_rhs_irises
.iter()
.map(|rhs| Some((source_iris.clone(), rhs.clone())))
.collect();
black_box(galois_ring_pairwise_distance(black_box(initial_pairs)));

for _ in 0..DEPTH {
let curr_indices =
indices.drain(0..NEW_NODES_PER_LEVEL).collect_vec();
let pairs_on_level: Vec<_> = curr_indices
.iter()
.map(|&index| {
Some((source_iris.clone(), all_irises[index].clone()))
})
.collect();
black_box(galois_ring_pairwise_distance(black_box(pairs_on_level)));
}
});
},
BatchSize::SmallInput,
)
});
});

g_parallel.bench_function("different order per thread", |b| {
pool.install(|| {
let thread_setups: Vec<_> = (0..num_threads)
.map(|_| {
let source_iris = generate_random_shared_irises(1).pop().unwrap();
let initial_rhs_irises = generate_random_shared_irises(INITIAL_RHS_COUNT);
(source_iris, initial_rhs_irises)
})
.collect();
let all_irises = generate_random_shared_irises(DEPTH * NEW_NODES_PER_LEVEL);

b.iter_batched(
|| {
// Setup closure: Generate the random indices for EACH thread.
(0..num_threads)
.map(|i| {
let mut rng = StdRng::seed_from_u64(i as u64);
let num_indices = DEPTH * NEW_NODES_PER_LEVEL;
sample(&mut rng, all_irises.len(), num_indices).into_vec()
})
.collect::<Vec<_>>()
},
|all_threads_indices| {
all_threads_indices
.par_iter()
.zip(thread_setups.par_iter())
.for_each(|(indices_for_thread, (source_iris, initial_rhs_irises))| {
let mut indices = indices_for_thread.clone();

let initial_pairs: Vec<_> = initial_rhs_irises
.iter()
.map(|rhs| Some((source_iris.clone(), rhs.clone())))
.collect();
black_box(galois_ring_pairwise_distance(black_box(initial_pairs)));

for _ in 0..DEPTH {
let curr_indices =
indices.drain(0..NEW_NODES_PER_LEVEL).collect_vec();
let pairs_on_level: Vec<_> = curr_indices
.iter()
.map(|&index| {
Some((source_iris.clone(), all_irises[index].clone()))
})
.collect();
black_box(galois_ring_pairwise_distance(black_box(pairs_on_level)));
}
});
},
BatchSize::SmallInput,
)
});
});
g_parallel.finish();
}

criterion_group!(
benches,
bench_galois_ring_pairwise_distance,
search_layer_like_calls
);
criterion_main!(benches);
Loading