[perf] sysbus: Improve add_cycls()
Fulfill TODO from long ago, I used perf-record (--call-graph dwarf) and detected that add_cycles() was hot enough, I added 2 optimizations: - Removed bound checks from array accesses - Increase the LUT size to include dummy entries for open-bus to eliminate the if check run_60_frames time: [183.65 ms 183.69 ms 183.73 ms] change: [-9.4414% -9.2849% -9.1315%] (p = 0.00 < 0.05) Performance has improved. Former-commit-id: 1cbb596b856e604ad6c48eb0d47771e7cee44d1e Former-commit-id: 9f15e35237f343d0c816fd9d51d81081736d9e17
This commit is contained in:
parent
cb2b97e0c7
commit
21708a3d58
|
@ -46,23 +46,24 @@ pub mod consts {
|
||||||
|
|
||||||
use consts::*;
|
use consts::*;
|
||||||
|
|
||||||
const CYCLE_LUT_SIZE: usize = 0x10;
|
// Only the first 15 entries are actually used, the rest are dummy entries for open-bus
|
||||||
|
const CYCLE_LUT_SIZE: usize = 0x100;
|
||||||
|
|
||||||
#[derive(Serialize, Deserialize, Clone)]
|
#[derive(Serialize, Deserialize, Clone)]
|
||||||
struct CycleLookupTables {
|
struct CycleLookupTables {
|
||||||
n_cycles32: [usize; CYCLE_LUT_SIZE],
|
n_cycles32: Box<[usize]>,
|
||||||
s_cycles32: [usize; CYCLE_LUT_SIZE],
|
s_cycles32: Box<[usize]>,
|
||||||
n_cycles16: [usize; CYCLE_LUT_SIZE],
|
n_cycles16: Box<[usize]>,
|
||||||
s_cycles16: [usize; CYCLE_LUT_SIZE],
|
s_cycles16: Box<[usize]>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Default for CycleLookupTables {
|
impl Default for CycleLookupTables {
|
||||||
fn default() -> CycleLookupTables {
|
fn default() -> CycleLookupTables {
|
||||||
CycleLookupTables {
|
CycleLookupTables {
|
||||||
n_cycles32: [1; CYCLE_LUT_SIZE],
|
n_cycles32: vec![1; CYCLE_LUT_SIZE].into_boxed_slice(),
|
||||||
s_cycles32: [1; CYCLE_LUT_SIZE],
|
s_cycles32: vec![1; CYCLE_LUT_SIZE].into_boxed_slice(),
|
||||||
n_cycles16: [1; CYCLE_LUT_SIZE],
|
n_cycles16: vec![1; CYCLE_LUT_SIZE].into_boxed_slice(),
|
||||||
s_cycles16: [1; CYCLE_LUT_SIZE],
|
s_cycles16: vec![1; CYCLE_LUT_SIZE].into_boxed_slice(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -242,26 +243,22 @@ impl SysBus {
|
||||||
pub fn add_cycles(&mut self, addr: Addr, access: MemoryAccess, width: MemoryAccessWidth) {
|
pub fn add_cycles(&mut self, addr: Addr, access: MemoryAccess, width: MemoryAccessWidth) {
|
||||||
use MemoryAccess::*;
|
use MemoryAccess::*;
|
||||||
use MemoryAccessWidth::*;
|
use MemoryAccessWidth::*;
|
||||||
let page = (addr >> 24) as usize;
|
let page = ((addr >> 24) & 0xF) as usize;
|
||||||
|
|
||||||
// TODO optimize out by making the LUTs have 0x100 entries for each possible page ?
|
let cycles = unsafe {
|
||||||
let cycles = if page > 0xF {
|
|
||||||
// open bus
|
|
||||||
1
|
|
||||||
} else {
|
|
||||||
match width {
|
match width {
|
||||||
MemoryAccess8 | MemoryAccess16 => match access {
|
MemoryAccess8 | MemoryAccess16 => match access {
|
||||||
NonSeq => self.cycle_luts.n_cycles16[page],
|
NonSeq => self.cycle_luts.n_cycles16.get_unchecked(page),
|
||||||
Seq => self.cycle_luts.s_cycles16[page],
|
Seq => self.cycle_luts.s_cycles16.get_unchecked(page),
|
||||||
},
|
},
|
||||||
MemoryAccess32 => match access {
|
MemoryAccess32 => match access {
|
||||||
NonSeq => self.cycle_luts.n_cycles32[page],
|
NonSeq => self.cycle_luts.n_cycles32.get_unchecked(page),
|
||||||
Seq => self.cycle_luts.s_cycles32[page],
|
Seq => self.cycle_luts.s_cycles32.get_unchecked(page),
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
self.scheduler.update(cycles);
|
self.scheduler.update(*cycles);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Helper for "open-bus" accesses
|
/// Helper for "open-bus" accesses
|
||||||
|
|
Reference in a new issue