From 5ee2237bc9d964bab25bf888fbdf9d40fea9c015 Mon Sep 17 00:00:00 2001 From: quake Date: Fri, 7 Mar 2025 09:55:17 +0900 Subject: [PATCH] perf: optimize instruction cache indexing to reduce local hotspots --- src/decoder.rs | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/decoder.rs b/src/decoder.rs index 63bdf454..b021d964 100644 --- a/src/decoder.rs +++ b/src/decoder.rs @@ -10,7 +10,7 @@ use crate::memory::Memory; use crate::{Error, ISA_A, ISA_B, ISA_MOP, RISCV_PAGESIZE}; const RISCV_PAGESIZE_MASK: u64 = RISCV_PAGESIZE as u64 - 1; -const INSTRUCTION_CACHE_SIZE: usize = 4096; +const INSTRUCTION_CACHE_SIZE: usize = 2048; pub struct Decoder { factories: Vec, @@ -99,14 +99,14 @@ impl Decoder { let instruction_cache_key = { // according to RISC-V instruction encoding, the lowest bit in PC will always be zero let pc = pc >> 1; - // Here we try to balance between local code and remote code. At times, - // we can find the code jumping to a remote function(e.g., memcpy or - // alloc), then resumes execution at a local location. Previous cache - // key only optimizes for local operations, while this new cache key - // balances the code between a 8192-byte local region, and certain remote - // code region. Notice the value 12 and 8 here are chosen by empirical - // evidence. - ((pc & 0xFF) | (pc >> 12 << 8)) as usize % INSTRUCTION_CACHE_SIZE + // This indexing strategy optimizes instruction cache utilization by improving the distribution of addresses. + // - `pc >> 5`: Incorporates higher bits to ensure a more even spread across cache indices. + // - `pc << 1`: Spreads lower-bit information into higher positions, enhancing variability. + // - `^` (XOR): Further randomizes index distribution, reducing cache conflicts and improving hit rates. + // + // This approach helps balance cache efficiency between local execution and remote function calls, + // reducing hotspots and improving overall performance. + ((pc >> 5) ^ (pc << 1)) as usize % INSTRUCTION_CACHE_SIZE }; let cached_instruction = self.instructions_cache[instruction_cache_key]; if cached_instruction.0 == pc {