[perf] arm: Improve performance with barrel shifter.

Instead of using a struct member to hold the last barrel shifter carry output, which is expansive as it needs to be saved/loaded from memory, I now pass the carry around as an INOUT parameter.
Using perf anotate really shows high perctile of samples being spent on reading/writing `self.bs_carry_out`

Since this is a rather "surgical" changeset, I have made sure to run it against eggvance test suite, mGBA test suite and some games as well.

I actually saw better improvements than what the benchmark measured, but 7% is decent enough :)

```
run_60_frames           time:   [180.18 ms 180.45 ms 180.77 ms]
                        change: [-7.2464% -6.9081% -6.6324%] (p = 0.00 < 0.05)
                        Performance has improved.
```


Former-commit-id: 7cd7105a07aa0b78cab9dc8bbae3682b02b7ab7c
Former-commit-id: c68514beb3fa6c34f5f65544acbead21e527dbb0
This commit is contained in:
Michel Heily 2021-06-05 18:54:46 +03:00
parent caf89d8532
commit 20506091cc
8 changed files with 78 additions and 87 deletions

View file

@ -111,66 +111,59 @@ impl BarrelShifterValue {
} }
impl<I: MemoryInterface> Core<I> { impl<I: MemoryInterface> Core<I> {
pub fn lsl(&mut self, val: u32, amount: u32, carry_in: bool) -> u32 { pub fn lsl(&mut self, val: u32, amount: u32, carry: &mut bool) -> u32 {
match amount { match amount {
0 => { 0 => val,
self.bs_carry_out = carry_in;
val
}
x if x < 32 => { x if x < 32 => {
self.bs_carry_out = val.wrapping_shr(32 - x) & 1 == 1; *carry = val.wrapping_shr(32 - x) & 1 == 1;
val << x val << x
} }
32 => { 32 => {
self.bs_carry_out = val & 1 == 1; *carry = val & 1 == 1;
0 0
} }
_ => { _ => {
self.bs_carry_out = false; *carry = false;
0 0
} }
} }
} }
pub fn lsr(&mut self, val: u32, amount: u32, carry_in: bool, immediate: bool) -> u32 { pub fn lsr(&mut self, val: u32, amount: u32, carry: &mut bool, immediate: bool) -> u32 {
if amount != 0 { if amount != 0 {
match amount { match amount {
x if x < 32 => { x if x < 32 => {
self.bs_carry_out = (val >> (amount - 1) & 1) == 1; *carry = (val >> (amount - 1) & 1) == 1;
val >> amount val >> amount
} }
32 => { 32 => {
self.bs_carry_out = val.bit(31); *carry = val.bit(31);
0 0
} }
_ => { _ => {
self.bs_carry_out = false; *carry = false;
0 0
} }
} }
} else if immediate { } else if immediate {
self.bs_carry_out = val.bit(31); *carry = val.bit(31);
0 0
} else { } else {
self.bs_carry_out = carry_in;
val val
} }
} }
pub fn asr(&mut self, val: u32, amount: u32, carry_in: bool, immediate: bool) -> u32 { pub fn asr(&mut self, val: u32, amount: u32, carry: &mut bool, immediate: bool) -> u32 {
let amount = if immediate && amount == 0 { 32 } else { amount }; let amount = if immediate && amount == 0 { 32 } else { amount };
match amount { match amount {
0 => { 0 => val,
self.bs_carry_out = carry_in;
val
}
x if x < 32 => { x if x < 32 => {
self.bs_carry_out = val.wrapping_shr(amount - 1) & 1 == 1; *carry = val.wrapping_shr(amount - 1) & 1 == 1;
(val as i32).wrapping_shr(amount) as u32 (val as i32).wrapping_shr(amount) as u32
} }
_ => { _ => {
let bit31 = val.bit(31); let bit31 = val.bit(31);
self.bs_carry_out = bit31; *carry = bit31;
if bit31 { if bit31 {
0xffffffff 0xffffffff
} else { } else {
@ -180,9 +173,9 @@ impl<I: MemoryInterface> Core<I> {
} }
} }
pub fn rrx(&mut self, val: u32, carry_in: bool) -> u32 { pub fn rrx(&mut self, val: u32, carry: &mut bool) -> u32 {
let old_c = carry_in as i32; let old_c = *carry as i32;
self.bs_carry_out = val & 0b1 != 0; *carry = val & 0b1 != 0;
(((val as u32) >> 1) as i32 | (old_c << 31)) as u32 (((val as u32) >> 1) as i32 | (old_c << 31)) as u32
} }
@ -190,14 +183,14 @@ impl<I: MemoryInterface> Core<I> {
&mut self, &mut self,
val: u32, val: u32,
amount: u32, amount: u32,
carry_in: bool, carry: &mut bool,
immediate: bool, immediate: bool,
rrx: bool, rrx: bool,
) -> u32 { ) -> u32 {
match amount { match amount {
0 => { 0 => {
if immediate & rrx { if immediate & rrx {
self.rrx(val, carry_in) self.rrx(val, carry)
} else { } else {
val val
} }
@ -209,7 +202,7 @@ impl<I: MemoryInterface> Core<I> {
} else { } else {
val val
}; };
self.bs_carry_out = (val as u32).bit(31); *carry = (val as u32).bit(31);
val val
} }
} }
@ -222,12 +215,9 @@ impl<I: MemoryInterface> Core<I> {
shift: BarrelShiftOpCode, shift: BarrelShiftOpCode,
val: u32, val: u32,
amount: u32, amount: u32,
carry_in: bool, carry: &mut bool,
immediate: bool, immediate: bool,
) -> u32 { ) -> u32 {
// TODO get rid of Core::bs_carry_out field in favour sending the carry as a &mut reference,
// Forcing calling functions to do something with the carry :
self.bs_carry_out = carry_in;
// //
// From GBATEK: // From GBATEK:
// Zero Shift Amount (Shift Register by Immediate, with Immediate=0) // Zero Shift Amount (Shift Register by Immediate, with Immediate=0)
@ -248,10 +238,10 @@ impl<I: MemoryInterface> Core<I> {
// in the range 1 to 32 and see above. // in the range 1 to 32 and see above.
// //
match shift { match shift {
BarrelShiftOpCode::LSL => self.lsl(val, amount, carry_in), BarrelShiftOpCode::LSL => self.lsl(val, amount, carry),
BarrelShiftOpCode::LSR => self.lsr(val, amount, carry_in, immediate), BarrelShiftOpCode::LSR => self.lsr(val, amount, carry, immediate),
BarrelShiftOpCode::ASR => self.asr(val, amount, carry_in, immediate), BarrelShiftOpCode::ASR => self.asr(val, amount, carry, immediate),
BarrelShiftOpCode::ROR => self.ror(val, amount, carry_in, immediate, true), BarrelShiftOpCode::ROR => self.ror(val, amount, carry, immediate, true),
} }
} }
@ -261,7 +251,7 @@ impl<I: MemoryInterface> Core<I> {
bs_op: BarrelShiftOpCode, bs_op: BarrelShiftOpCode,
reg: usize, reg: usize,
rs: usize, rs: usize,
carry: bool, carry: &mut bool,
) -> u32 { ) -> u32 {
let mut val = self.get_reg(reg); let mut val = self.get_reg(reg);
if reg == REG_PC { if reg == REG_PC {
@ -271,8 +261,7 @@ impl<I: MemoryInterface> Core<I> {
self.barrel_shift_op(bs_op, val, amount, carry, false) self.barrel_shift_op(bs_op, val, amount, carry, false)
} }
pub fn register_shift(&mut self, shift: &ShiftedRegister) -> u32 { pub fn register_shift(&mut self, shift: &ShiftedRegister, carry: &mut bool) -> u32 {
let carry = self.cpsr.C();
match shift.shift_by { match shift.shift_by {
ShiftRegisterBy::ByAmount(amount) => { ShiftRegisterBy::ByAmount(amount) => {
let result = let result =
@ -285,13 +274,13 @@ impl<I: MemoryInterface> Core<I> {
} }
} }
pub fn get_barrel_shifted_value(&mut self, sval: &BarrelShifterValue) -> u32 { pub fn get_barrel_shifted_value(&mut self, sval: &BarrelShifterValue, carry: &mut bool) -> u32 {
// TODO decide if error handling or panic here // TODO decide if error handling or panic here
match sval { match sval {
BarrelShifterValue::ImmediateValue(offset) => *offset as u32, BarrelShifterValue::ImmediateValue(offset) => *offset as u32,
BarrelShifterValue::ShiftedRegister(shifted_reg) => { BarrelShifterValue::ShiftedRegister(shifted_reg) => {
let added = (*shifted_reg).added.unwrap_or(true); let added = (*shifted_reg).added.unwrap_or(true);
let abs = self.register_shift(shifted_reg) as u32; let abs = self.register_shift(shifted_reg, carry) as u32;
if added { if added {
abs as u32 abs as u32
} else { } else {

View file

@ -93,7 +93,10 @@ impl<I: MemoryInterface> Core<I> {
if insn.bit(25) { if insn.bit(25) {
let immediate = insn & 0xff; let immediate = insn & 0xff;
let rotate = 2 * insn.bit_range(8..12); let rotate = 2 * insn.bit_range(8..12);
self.ror(immediate, rotate, self.cpsr.C(), false, true) let mut carry = self.cpsr.C();
let v = self.ror(immediate, rotate, &mut carry, false, true);
self.cpsr.set_C(carry);
v
} else { } else {
self.get_reg((insn & 0b1111) as usize) self.get_reg((insn & 0b1111) as usize)
} }
@ -175,13 +178,12 @@ impl<I: MemoryInterface> Core<I> {
let mut s_flag = insn.set_cond_flag(); let mut s_flag = insn.set_cond_flag();
let opcode = insn.opcode(); let opcode = insn.opcode();
let mut carry = self.cpsr.C();
let op2 = if insn.bit(25) { let op2 = if insn.bit(25) {
let immediate = insn & 0xff; let immediate = insn & 0xff;
let rotate = 2 * insn.bit_range(8..12); let rotate = 2 * insn.bit_range(8..12);
// TODO refactor out // TODO refactor out
let bs_carry_in = self.cpsr.C(); self.ror(immediate, rotate, &mut carry, false, true)
self.bs_carry_out = bs_carry_in;
self.ror(immediate, rotate, self.cpsr.C(), false, true)
} else { } else {
let reg = insn & 0xf; let reg = insn & 0xf;
@ -203,7 +205,7 @@ impl<I: MemoryInterface> Core<I> {
shift_by: shift_by, shift_by: shift_by,
added: None, added: None,
}; };
self.register_shift(&shifted_reg) self.register_shift(&shifted_reg, &mut carry)
}; };
if rd == REG_PC && s_flag { if rd == REG_PC && s_flag {
@ -211,9 +213,7 @@ impl<I: MemoryInterface> Core<I> {
s_flag = false; s_flag = false;
} }
let carry = self.cpsr.C() as u32;
let alu_res = if s_flag { let alu_res = if s_flag {
let mut carry = self.bs_carry_out;
let mut overflow = self.cpsr.V(); let mut overflow = self.cpsr.V();
let result = match opcode { let result = match opcode {
AND | TST => op1 & op2, AND | TST => op1 & op2,
@ -238,15 +238,16 @@ impl<I: MemoryInterface> Core<I> {
Some(result) Some(result)
} }
} else { } else {
let c = carry as u32;
Some(match opcode { Some(match opcode {
AND => op1 & op2, AND => op1 & op2,
EOR => op1 ^ op2, EOR => op1 ^ op2,
SUB => op1.wrapping_sub(op2), SUB => op1.wrapping_sub(op2),
RSB => op2.wrapping_sub(op1), RSB => op2.wrapping_sub(op1),
ADD => op1.wrapping_add(op2), ADD => op1.wrapping_add(op2),
ADC => op1.wrapping_add(op2).wrapping_add(carry), ADC => op1.wrapping_add(op2).wrapping_add(c),
SBC => op1.wrapping_sub(op2.wrapping_add(1 - carry)), SBC => op1.wrapping_sub(op2.wrapping_add(1 - c)),
RSC => op2.wrapping_sub(op1.wrapping_add(1 - carry)), RSC => op2.wrapping_sub(op1.wrapping_add(1 - c)),
ORR => op1 | op2, ORR => op1 | op2,
MOV => op2, MOV => op2,
BIC => op1 & (!op2), BIC => op1 & (!op2),
@ -290,7 +291,9 @@ impl<I: MemoryInterface> Core<I> {
if base_reg == REG_PC { if base_reg == REG_PC {
addr = self.pc_arm() + 8; // prefetching addr = self.pc_arm() + 8; // prefetching
} }
let offset = self.get_barrel_shifted_value(&insn.ldr_str_offset()); // TODO: wrong to use in here let mut carry = self.cpsr.C();
let offset = self.get_barrel_shifted_value(&insn.ldr_str_offset(), &mut carry); // TODO: wrong to use in here
self.cpsr.set_C(carry);
let effective_addr = (addr as i32).wrapping_add(offset as i32) as Addr; let effective_addr = (addr as i32).wrapping_add(offset as i32) as Addr;
// TODO - confirm this // TODO - confirm this
@ -350,15 +353,16 @@ impl<I: MemoryInterface> Core<I> {
} }
pub fn exec_arm_ldr_str_hs_reg(&mut self, insn: u32) -> CpuAction { pub fn exec_arm_ldr_str_hs_reg(&mut self, insn: u32) -> CpuAction {
self.ldr_str_hs( let offset = {
insn, let added = insn.add_offset_flag();
BarrelShifterValue::ShiftedRegister(ShiftedRegister { let abs = self.get_reg((insn & 0xf) as usize);
reg: (insn & 0xf) as usize, if added {
shift_by: ShiftRegisterBy::ByAmount(0), abs as u32
bs_op: BarrelShiftOpCode::LSL, } else {
added: Some(insn.add_offset_flag()), (-(abs as i32)) as u32
}), }
) };
self.ldr_str_hs_common(insn, offset)
} }
pub fn exec_arm_ldr_str_hs_imm(&mut self, insn: u32) -> CpuAction { pub fn exec_arm_ldr_str_hs_imm(&mut self, insn: u32) -> CpuAction {
@ -368,11 +372,11 @@ impl<I: MemoryInterface> Core<I> {
} else { } else {
(-(offset8 as i32)) as u32 (-(offset8 as i32)) as u32
}; };
self.ldr_str_hs(insn, BarrelShifterValue::ImmediateValue(offset8)) self.ldr_str_hs_common(insn, offset8)
} }
#[inline(always)] #[inline(always)]
pub fn ldr_str_hs(&mut self, insn: u32, offset: BarrelShifterValue) -> CpuAction { pub fn ldr_str_hs_common(&mut self, insn: u32, offset: u32) -> CpuAction {
let mut result = CpuAction::AdvancePC(NonSeq); let mut result = CpuAction::AdvancePC(NonSeq);
let load = insn.load_flag(); let load = insn.load_flag();
@ -385,8 +389,6 @@ impl<I: MemoryInterface> Core<I> {
addr = self.pc_arm() + 8; // prefetching addr = self.pc_arm() + 8; // prefetching
} }
let offset = self.get_barrel_shifted_value(&offset);
// TODO - confirm this // TODO - confirm this
let old_mode = self.cpsr.mode(); let old_mode = self.cpsr.mode();
if !pre_index && writeback { if !pre_index && writeback {

View file

@ -83,8 +83,6 @@ pub struct SavedCpuState {
pub(super) spsr: RegPSR, pub(super) spsr: RegPSR,
pub(super) banks: BankedRegisters, pub(super) banks: BankedRegisters,
pub(super) bs_carry_out: bool,
} }
#[derive(Clone, Debug)] #[derive(Clone, Debug)]
@ -125,9 +123,6 @@ pub struct Core<I: MemoryInterface> {
pub cpsr: RegPSR, pub cpsr: RegPSR,
pub(super) spsr: RegPSR, pub(super) spsr: RegPSR,
// Todo - do I still need this?
pub(super) bs_carry_out: bool,
pub(super) banks: BankedRegisters, pub(super) banks: BankedRegisters,
#[cfg(feature = "debugger")] #[cfg(feature = "debugger")]
@ -146,7 +141,6 @@ impl<I: MemoryInterface> Core<I> {
cpsr, cpsr,
spsr: Default::default(), spsr: Default::default(),
banks: BankedRegisters::default(), banks: BankedRegisters::default(),
bs_carry_out: false,
#[cfg(feature = "debugger")] #[cfg(feature = "debugger")]
dbg_info: DebuggerInfo::default(), dbg_info: DebuggerInfo::default(),
@ -167,7 +161,6 @@ impl<I: MemoryInterface> Core<I> {
banks: state.banks, banks: state.banks,
spsr: state.spsr, spsr: state.spsr,
bs_carry_out: state.bs_carry_out,
pipeline: state.pipeline, pipeline: state.pipeline,
next_fetch_access: state.next_fetch_access, next_fetch_access: state.next_fetch_access,
@ -184,7 +177,6 @@ impl<I: MemoryInterface> Core<I> {
gpr: self.gpr.clone(), gpr: self.gpr.clone(),
spsr: self.spsr, spsr: self.spsr,
banks: self.banks.clone(), banks: self.banks.clone(),
bs_carry_out: self.bs_carry_out,
pipeline: self.pipeline.clone(), pipeline: self.pipeline.clone(),
next_fetch_access: self.next_fetch_access, next_fetch_access: self.next_fetch_access,
} }
@ -196,7 +188,6 @@ impl<I: MemoryInterface> Core<I> {
self.gpr = state.gpr; self.gpr = state.gpr;
self.spsr = state.spsr; self.spsr = state.spsr;
self.banks = state.banks; self.banks = state.banks;
self.bs_carry_out = state.bs_carry_out;
self.pipeline = state.pipeline; self.pipeline = state.pipeline;
self.next_fetch_access = state.next_fetch_access; self.next_fetch_access = state.next_fetch_access;
} }

View file

@ -1,8 +1,8 @@
use std::fmt; use std::fmt;
use std::marker::PhantomData; use std::marker::PhantomData;
use super::InstructionDecoder;
use super::Addr; use super::Addr;
use super::InstructionDecoder;
pub struct Disassembler<'a, D> pub struct Disassembler<'a, D>
where where

View file

@ -134,7 +134,10 @@ impl<I: MemoryInterface> Core<I> {
if addr & 0x3 != 0 { if addr & 0x3 != 0 {
let rotation = (addr & 0x3) << 3; let rotation = (addr & 0x3) << 3;
let value = self.load_32(addr & !0x3, access); let value = self.load_32(addr & !0x3, access);
self.ror(value, rotation, self.cpsr.C(), false, false) let mut carry = self.cpsr.C();
let v = self.ror(value, rotation, &mut carry, false, false);
self.cpsr.set_C(carry);
v
} else { } else {
self.load_32(addr, access) self.load_32(addr, access)
} }
@ -146,7 +149,10 @@ impl<I: MemoryInterface> Core<I> {
if addr & 0x1 != 0 { if addr & 0x1 != 0 {
let rotation = (addr & 0x1) << 3; let rotation = (addr & 0x1) << 3;
let value = self.load_16(addr & !0x1, access); let value = self.load_16(addr & !0x1, access);
self.ror(value as u32, rotation, self.cpsr.C(), false, false) let mut carry = self.cpsr.C();
let v = self.ror(value as u32, rotation, &mut carry, false, false);
self.cpsr.set_C(carry);
v
} else { } else {
self.load_16(addr, access) as u32 self.load_16(addr, access) as u32
} }

View file

@ -15,15 +15,16 @@ impl<I: MemoryInterface> Core<I> {
let rs = insn.bit_range(3..6) as usize; let rs = insn.bit_range(3..6) as usize;
let shift_amount = insn.offset5() as u8 as u32; let shift_amount = insn.offset5() as u8 as u32;
let mut carry = self.cpsr.C();
let op2 = self.barrel_shift_op( let op2 = self.barrel_shift_op(
insn.format1_op(), insn.format1_op(),
self.gpr[rs], self.gpr[rs],
shift_amount, shift_amount,
self.cpsr.C(), &mut carry,
true, true,
); );
self.gpr[rd] = op2; self.gpr[rd] = op2;
self.alu_update_flags(op2, false, self.bs_carry_out, self.cpsr.V()); self.alu_update_flags(op2, false, carry, self.cpsr.V());
CpuAction::AdvancePC(Seq) CpuAction::AdvancePC(Seq)
} }
@ -95,9 +96,8 @@ impl<I: MemoryInterface> Core<I> {
macro_rules! shifter_op { macro_rules! shifter_op {
($bs_op:expr) => {{ ($bs_op:expr) => {{
let result = self.shift_by_register($bs_op, rd, rs, carry); let result = self.shift_by_register($bs_op, rd, rs, &mut carry);
self.idle_cycle(); self.idle_cycle();
carry = self.bs_carry_out;
result result
}}; }};
} }

View file

@ -336,7 +336,10 @@ impl EepromController {
match (src, dst) { match (src, dst) {
// DMA to EEPROM // DMA to EEPROM
(_, 0x0d000000..=0x0dffffff) => { (_, 0x0d000000..=0x0dffffff) => {
debug!("caught eeprom dma transfer src={:#x} dst={:#x} count={}", src, dst, count); debug!(
"caught eeprom dma transfer src={:#x} dst={:#x} count={}",
src, dst, count
);
let eeprom_type = match count { let eeprom_type = match count {
// Read(11) + 6bit address + stop bit // Read(11) + 6bit address + stop bit
9 => Eeprom512, 9 => Eeprom512,
@ -346,7 +349,10 @@ impl EepromController {
73 => Eeprom512, 73 => Eeprom512,
// Write(11) + 14bit address + 64bit value + stop bit // Write(11) + 14bit address + 64bit value + stop bit
81 => Eeprom8k, 81 => Eeprom8k,
_ => panic!("unexpected bit count ({}) when detecting eeprom size", count) _ => panic!(
"unexpected bit count ({}) when detecting eeprom size",
count
),
}; };
info!("detected eeprom type: {:?}", eeprom_type); info!("detected eeprom type: {:?}", eeprom_type);
self.chip.borrow_mut().set_type(eeprom_type); self.chip.borrow_mut().set_type(eeprom_type);
@ -356,7 +362,7 @@ impl EepromController {
(0x0d000000..=0x0dffffff, _) => { (0x0d000000..=0x0dffffff, _) => {
panic!("reading from eeprom when real size is not detected yet is not supported by this emulator") panic!("reading from eeprom when real size is not detected yet is not supported by this emulator")
} }
_ => {/* Not a eeprom dma, doing nothing */} _ => { /* Not a eeprom dma, doing nothing */ }
} }
} else { } else {
// this might be a eeprom request, so we need to reset the eeprom state machine if its dirty (due to bad behaving games, or tests roms) // this might be a eeprom request, so we need to reset the eeprom state machine if its dirty (due to bad behaving games, or tests roms)

View file

@ -291,10 +291,7 @@ impl SoundController {
self.resampler.in_freq = self.sample_rate; self.resampler.in_freq = self.sample_rate;
} }
self.cycles_per_sample = 512 >> resolution; self.cycles_per_sample = 512 >> resolution;
info!( info!("bias - setting sample frequency to {}hz", self.sample_rate);
"bias - setting sample frequency to {}hz",
self.sample_rate
);
// TODO this will not affect the currently scheduled sample event // TODO this will not affect the currently scheduled sample event
} }