[perf] sysbus: Improve add_cycls()

Fulfill TODO from long ago, I used perf-record (--call-graph dwarf)  and detected that add_cycles() was hot enough,
I added 2 optimizations:
- Removed bound checks from array accesses
- Increase the LUT size to include dummy entries for open-bus to eliminate the if check

run_60_frames           time:   [183.65 ms 183.69 ms 183.73 ms]
                        change: [-9.4414% -9.2849% -9.1315%] (p = 0.00 < 0.05)
                        Performance has improved.


Former-commit-id: 1cbb596b856e604ad6c48eb0d47771e7cee44d1e
Former-commit-id: 9f15e35237f343d0c816fd9d51d81081736d9e17
This commit is contained in:
Michel Heily 2021-06-05 17:25:01 +03:00
parent cb2b97e0c7
commit 21708a3d58

View file

@ -46,23 +46,24 @@ pub mod consts {
use consts::*; use consts::*;
const CYCLE_LUT_SIZE: usize = 0x10; // Only the first 15 entries are actually used, the rest are dummy entries for open-bus
const CYCLE_LUT_SIZE: usize = 0x100;
#[derive(Serialize, Deserialize, Clone)] #[derive(Serialize, Deserialize, Clone)]
struct CycleLookupTables { struct CycleLookupTables {
n_cycles32: [usize; CYCLE_LUT_SIZE], n_cycles32: Box<[usize]>,
s_cycles32: [usize; CYCLE_LUT_SIZE], s_cycles32: Box<[usize]>,
n_cycles16: [usize; CYCLE_LUT_SIZE], n_cycles16: Box<[usize]>,
s_cycles16: [usize; CYCLE_LUT_SIZE], s_cycles16: Box<[usize]>,
} }
impl Default for CycleLookupTables { impl Default for CycleLookupTables {
fn default() -> CycleLookupTables { fn default() -> CycleLookupTables {
CycleLookupTables { CycleLookupTables {
n_cycles32: [1; CYCLE_LUT_SIZE], n_cycles32: vec![1; CYCLE_LUT_SIZE].into_boxed_slice(),
s_cycles32: [1; CYCLE_LUT_SIZE], s_cycles32: vec![1; CYCLE_LUT_SIZE].into_boxed_slice(),
n_cycles16: [1; CYCLE_LUT_SIZE], n_cycles16: vec![1; CYCLE_LUT_SIZE].into_boxed_slice(),
s_cycles16: [1; CYCLE_LUT_SIZE], s_cycles16: vec![1; CYCLE_LUT_SIZE].into_boxed_slice(),
} }
} }
} }
@ -242,26 +243,22 @@ impl SysBus {
pub fn add_cycles(&mut self, addr: Addr, access: MemoryAccess, width: MemoryAccessWidth) { pub fn add_cycles(&mut self, addr: Addr, access: MemoryAccess, width: MemoryAccessWidth) {
use MemoryAccess::*; use MemoryAccess::*;
use MemoryAccessWidth::*; use MemoryAccessWidth::*;
let page = (addr >> 24) as usize; let page = ((addr >> 24) & 0xF) as usize;
// TODO optimize out by making the LUTs have 0x100 entries for each possible page ? let cycles = unsafe {
let cycles = if page > 0xF {
// open bus
1
} else {
match width { match width {
MemoryAccess8 | MemoryAccess16 => match access { MemoryAccess8 | MemoryAccess16 => match access {
NonSeq => self.cycle_luts.n_cycles16[page], NonSeq => self.cycle_luts.n_cycles16.get_unchecked(page),
Seq => self.cycle_luts.s_cycles16[page], Seq => self.cycle_luts.s_cycles16.get_unchecked(page),
}, },
MemoryAccess32 => match access { MemoryAccess32 => match access {
NonSeq => self.cycle_luts.n_cycles32[page], NonSeq => self.cycle_luts.n_cycles32.get_unchecked(page),
Seq => self.cycle_luts.s_cycles32[page], Seq => self.cycle_luts.s_cycles32.get_unchecked(page),
}, },
} }
}; };
self.scheduler.update(cycles); self.scheduler.update(*cycles);
} }
/// Helper for "open-bus" accesses /// Helper for "open-bus" accesses