[perf] arm: Improve performance with barrel shifter.

Instead of using a struct member to hold the last barrel shifter carry output, which is expansive as it needs to be saved/loaded from memory, I now pass the carry around as an INOUT parameter.
Using perf anotate really shows high perctile of samples being spent on reading/writing `self.bs_carry_out`

Since this is a rather "surgical" changeset, I have made sure to run it against eggvance test suite, mGBA test suite and some games as well.

I actually saw better improvements than what the benchmark measured, but 7% is decent enough :)

```
run_60_frames           time:   [180.18 ms 180.45 ms 180.77 ms]
                        change: [-7.2464% -6.9081% -6.6324%] (p = 0.00 < 0.05)
                        Performance has improved.
```


Former-commit-id: 7cd7105a07aa0b78cab9dc8bbae3682b02b7ab7c
Former-commit-id: c68514beb3fa6c34f5f65544acbead21e527dbb0
This commit is contained in:
Michel Heily 2021-06-05 18:54:46 +03:00
parent caf89d8532
commit 20506091cc
8 changed files with 78 additions and 87 deletions

View file

@ -111,66 +111,59 @@ impl BarrelShifterValue {
} }
impl<I: MemoryInterface> Core<I> { impl<I: MemoryInterface> Core<I> {
pub fn lsl(&mut self, val: u32, amount: u32, carry_in: bool) -> u32 { pub fn lsl(&mut self, val: u32, amount: u32, carry: &mut bool) -> u32 {
match amount { match amount {
0 => { 0 => val,
self.bs_carry_out = carry_in;
val
}
x if x < 32 => { x if x < 32 => {
self.bs_carry_out = val.wrapping_shr(32 - x) & 1 == 1; *carry = val.wrapping_shr(32 - x) & 1 == 1;
val << x val << x
} }
32 => { 32 => {
self.bs_carry_out = val & 1 == 1; *carry = val & 1 == 1;
0 0
} }
_ => { _ => {
self.bs_carry_out = false; *carry = false;
0 0
} }
} }
} }
pub fn lsr(&mut self, val: u32, amount: u32, carry_in: bool, immediate: bool) -> u32 { pub fn lsr(&mut self, val: u32, amount: u32, carry: &mut bool, immediate: bool) -> u32 {
if amount != 0 { if amount != 0 {
match amount { match amount {
x if x < 32 => { x if x < 32 => {
self.bs_carry_out = (val >> (amount - 1) & 1) == 1; *carry = (val >> (amount - 1) & 1) == 1;
val >> amount val >> amount
} }
32 => { 32 => {
self.bs_carry_out = val.bit(31); *carry = val.bit(31);
0 0
} }
_ => { _ => {
self.bs_carry_out = false; *carry = false;
0 0
} }
} }
} else if immediate { } else if immediate {
self.bs_carry_out = val.bit(31); *carry = val.bit(31);
0 0
} else { } else {
self.bs_carry_out = carry_in;
val val
} }
} }
pub fn asr(&mut self, val: u32, amount: u32, carry_in: bool, immediate: bool) -> u32 { pub fn asr(&mut self, val: u32, amount: u32, carry: &mut bool, immediate: bool) -> u32 {
let amount = if immediate && amount == 0 { 32 } else { amount }; let amount = if immediate && amount == 0 { 32 } else { amount };
match amount { match amount {
0 => { 0 => val,
self.bs_carry_out = carry_in;
val
}
x if x < 32 => { x if x < 32 => {
self.bs_carry_out = val.wrapping_shr(amount - 1) & 1 == 1; *carry = val.wrapping_shr(amount - 1) & 1 == 1;
(val as i32).wrapping_shr(amount) as u32 (val as i32).wrapping_shr(amount) as u32
} }
_ => { _ => {
let bit31 = val.bit(31); let bit31 = val.bit(31);
self.bs_carry_out = bit31; *carry = bit31;
if bit31 { if bit31 {
0xffffffff 0xffffffff
} else { } else {
@ -180,9 +173,9 @@ impl<I: MemoryInterface> Core<I> {
} }
} }
pub fn rrx(&mut self, val: u32, carry_in: bool) -> u32 { pub fn rrx(&mut self, val: u32, carry: &mut bool) -> u32 {
let old_c = carry_in as i32; let old_c = *carry as i32;
self.bs_carry_out = val & 0b1 != 0; *carry = val & 0b1 != 0;
(((val as u32) >> 1) as i32 | (old_c << 31)) as u32 (((val as u32) >> 1) as i32 | (old_c << 31)) as u32
} }
@ -190,14 +183,14 @@ impl<I: MemoryInterface> Core<I> {
&mut self, &mut self,
val: u32, val: u32,
amount: u32, amount: u32,
carry_in: bool, carry: &mut bool,
immediate: bool, immediate: bool,
rrx: bool, rrx: bool,
) -> u32 { ) -> u32 {
match amount { match amount {
0 => { 0 => {
if immediate & rrx { if immediate & rrx {
self.rrx(val, carry_in) self.rrx(val, carry)
} else { } else {
val val
} }
@ -209,7 +202,7 @@ impl<I: MemoryInterface> Core<I> {
} else { } else {
val val
}; };
self.bs_carry_out = (val as u32).bit(31); *carry = (val as u32).bit(31);
val val
} }
} }
@ -222,12 +215,9 @@ impl<I: MemoryInterface> Core<I> {
shift: BarrelShiftOpCode, shift: BarrelShiftOpCode,
val: u32, val: u32,
amount: u32, amount: u32,
carry_in: bool, carry: &mut bool,
immediate: bool, immediate: bool,
) -> u32 { ) -> u32 {
// TODO get rid of Core::bs_carry_out field in favour sending the carry as a &mut reference,
// Forcing calling functions to do something with the carry :
self.bs_carry_out = carry_in;
// //
// From GBATEK: // From GBATEK:
// Zero Shift Amount (Shift Register by Immediate, with Immediate=0) // Zero Shift Amount (Shift Register by Immediate, with Immediate=0)
@ -248,10 +238,10 @@ impl<I: MemoryInterface> Core<I> {
// in the range 1 to 32 and see above. // in the range 1 to 32 and see above.
// //
match shift { match shift {
BarrelShiftOpCode::LSL => self.lsl(val, amount, carry_in), BarrelShiftOpCode::LSL => self.lsl(val, amount, carry),
BarrelShiftOpCode::LSR => self.lsr(val, amount, carry_in, immediate), BarrelShiftOpCode::LSR => self.lsr(val, amount, carry, immediate),
BarrelShiftOpCode::ASR => self.asr(val, amount, carry_in, immediate), BarrelShiftOpCode::ASR => self.asr(val, amount, carry, immediate),
BarrelShiftOpCode::ROR => self.ror(val, amount, carry_in, immediate, true), BarrelShiftOpCode::ROR => self.ror(val, amount, carry, immediate, true),
} }
} }
@ -261,7 +251,7 @@ impl<I: MemoryInterface> Core<I> {
bs_op: BarrelShiftOpCode, bs_op: BarrelShiftOpCode,
reg: usize, reg: usize,
rs: usize, rs: usize,
carry: bool, carry: &mut bool,
) -> u32 { ) -> u32 {
let mut val = self.get_reg(reg); let mut val = self.get_reg(reg);
if reg == REG_PC { if reg == REG_PC {
@ -271,8 +261,7 @@ impl<I: MemoryInterface> Core<I> {
self.barrel_shift_op(bs_op, val, amount, carry, false) self.barrel_shift_op(bs_op, val, amount, carry, false)
} }
pub fn register_shift(&mut self, shift: &ShiftedRegister) -> u32 { pub fn register_shift(&mut self, shift: &ShiftedRegister, carry: &mut bool) -> u32 {
let carry = self.cpsr.C();
match shift.shift_by { match shift.shift_by {
ShiftRegisterBy::ByAmount(amount) => { ShiftRegisterBy::ByAmount(amount) => {
let result = let result =
@ -285,13 +274,13 @@ impl<I: MemoryInterface> Core<I> {
} }
} }
pub fn get_barrel_shifted_value(&mut self, sval: &BarrelShifterValue) -> u32 { pub fn get_barrel_shifted_value(&mut self, sval: &BarrelShifterValue, carry: &mut bool) -> u32 {
// TODO decide if error handling or panic here // TODO decide if error handling or panic here
match sval { match sval {
BarrelShifterValue::ImmediateValue(offset) => *offset as u32, BarrelShifterValue::ImmediateValue(offset) => *offset as u32,
BarrelShifterValue::ShiftedRegister(shifted_reg) => { BarrelShifterValue::ShiftedRegister(shifted_reg) => {
let added = (*shifted_reg).added.unwrap_or(true); let added = (*shifted_reg).added.unwrap_or(true);
let abs = self.register_shift(shifted_reg) as u32; let abs = self.register_shift(shifted_reg, carry) as u32;
if added { if added {
abs as u32 abs as u32
} else { } else {

View file

@ -93,7 +93,10 @@ impl<I: MemoryInterface> Core<I> {
if insn.bit(25) { if insn.bit(25) {
let immediate = insn & 0xff; let immediate = insn & 0xff;
let rotate = 2 * insn.bit_range(8..12); let rotate = 2 * insn.bit_range(8..12);
self.ror(immediate, rotate, self.cpsr.C(), false, true) let mut carry = self.cpsr.C();
let v = self.ror(immediate, rotate, &mut carry, false, true);
self.cpsr.set_C(carry);
v
} else { } else {
self.get_reg((insn & 0b1111) as usize) self.get_reg((insn & 0b1111) as usize)
} }
@ -175,13 +178,12 @@ impl<I: MemoryInterface> Core<I> {
let mut s_flag = insn.set_cond_flag(); let mut s_flag = insn.set_cond_flag();
let opcode = insn.opcode(); let opcode = insn.opcode();
let mut carry = self.cpsr.C();
let op2 = if insn.bit(25) { let op2 = if insn.bit(25) {
let immediate = insn & 0xff; let immediate = insn & 0xff;
let rotate = 2 * insn.bit_range(8..12); let rotate = 2 * insn.bit_range(8..12);
// TODO refactor out // TODO refactor out
let bs_carry_in = self.cpsr.C(); self.ror(immediate, rotate, &mut carry, false, true)
self.bs_carry_out = bs_carry_in;
self.ror(immediate, rotate, self.cpsr.C(), false, true)
} else { } else {
let reg = insn & 0xf; let reg = insn & 0xf;
@ -203,7 +205,7 @@ impl<I: MemoryInterface> Core<I> {
shift_by: shift_by, shift_by: shift_by,
added: None, added: None,
}; };
self.register_shift(&shifted_reg) self.register_shift(&shifted_reg, &mut carry)
}; };
if rd == REG_PC && s_flag { if rd == REG_PC && s_flag {
@ -211,9 +213,7 @@ impl<I: MemoryInterface> Core<I> {
s_flag = false; s_flag = false;
} }
let carry = self.cpsr.C() as u32;
let alu_res = if s_flag { let alu_res = if s_flag {
let mut carry = self.bs_carry_out;
let mut overflow = self.cpsr.V(); let mut overflow = self.cpsr.V();
let result = match opcode { let result = match opcode {
AND | TST => op1 & op2, AND | TST => op1 & op2,
@ -238,15 +238,16 @@ impl<I: MemoryInterface> Core<I> {
Some(result) Some(result)
} }
} else { } else {
let c = carry as u32;
Some(match opcode { Some(match opcode {
AND => op1 & op2, AND => op1 & op2,
EOR => op1 ^ op2, EOR => op1 ^ op2,
SUB => op1.wrapping_sub(op2), SUB => op1.wrapping_sub(op2),
RSB => op2.wrapping_sub(op1), RSB => op2.wrapping_sub(op1),
ADD => op1.wrapping_add(op2), ADD => op1.wrapping_add(op2),
ADC => op1.wrapping_add(op2).wrapping_add(carry), ADC => op1.wrapping_add(op2).wrapping_add(c),
SBC => op1.wrapping_sub(op2.wrapping_add(1 - carry)), SBC => op1.wrapping_sub(op2.wrapping_add(1 - c)),
RSC => op2.wrapping_sub(op1.wrapping_add(1 - carry)), RSC => op2.wrapping_sub(op1.wrapping_add(1 - c)),
ORR => op1 | op2, ORR => op1 | op2,
MOV => op2, MOV => op2,
BIC => op1 & (!op2), BIC => op1 & (!op2),
@ -290,7 +291,9 @@ impl<I: MemoryInterface> Core<I> {
if base_reg == REG_PC { if base_reg == REG_PC {
addr = self.pc_arm() + 8; // prefetching addr = self.pc_arm() + 8; // prefetching
} }
let offset = self.get_barrel_shifted_value(&insn.ldr_str_offset()); // TODO: wrong to use in here let mut carry = self.cpsr.C();
let offset = self.get_barrel_shifted_value(&insn.ldr_str_offset(), &mut carry); // TODO: wrong to use in here
self.cpsr.set_C(carry);
let effective_addr = (addr as i32).wrapping_add(offset as i32) as Addr; let effective_addr = (addr as i32).wrapping_add(offset as i32) as Addr;
// TODO - confirm this // TODO - confirm this
@ -350,15 +353,16 @@ impl<I: MemoryInterface> Core<I> {
} }
pub fn exec_arm_ldr_str_hs_reg(&mut self, insn: u32) -> CpuAction { pub fn exec_arm_ldr_str_hs_reg(&mut self, insn: u32) -> CpuAction {
self.ldr_str_hs( let offset = {
insn, let added = insn.add_offset_flag();
BarrelShifterValue::ShiftedRegister(ShiftedRegister { let abs = self.get_reg((insn & 0xf) as usize);
reg: (insn & 0xf) as usize, if added {
shift_by: ShiftRegisterBy::ByAmount(0), abs as u32
bs_op: BarrelShiftOpCode::LSL, } else {
added: Some(insn.add_offset_flag()), (-(abs as i32)) as u32
}), }
) };
self.ldr_str_hs_common(insn, offset)
} }
pub fn exec_arm_ldr_str_hs_imm(&mut self, insn: u32) -> CpuAction { pub fn exec_arm_ldr_str_hs_imm(&mut self, insn: u32) -> CpuAction {
@ -368,11 +372,11 @@ impl<I: MemoryInterface> Core<I> {
} else { } else {
(-(offset8 as i32)) as u32 (-(offset8 as i32)) as u32
}; };
self.ldr_str_hs(insn, BarrelShifterValue::ImmediateValue(offset8)) self.ldr_str_hs_common(insn, offset8)
} }
#[inline(always)] #[inline(always)]
pub fn ldr_str_hs(&mut self, insn: u32, offset: BarrelShifterValue) -> CpuAction { pub fn ldr_str_hs_common(&mut self, insn: u32, offset: u32) -> CpuAction {
let mut result = CpuAction::AdvancePC(NonSeq); let mut result = CpuAction::AdvancePC(NonSeq);
let load = insn.load_flag(); let load = insn.load_flag();
@ -385,8 +389,6 @@ impl<I: MemoryInterface> Core<I> {
addr = self.pc_arm() + 8; // prefetching addr = self.pc_arm() + 8; // prefetching
} }
let offset = self.get_barrel_shifted_value(&offset);
// TODO - confirm this // TODO - confirm this
let old_mode = self.cpsr.mode(); let old_mode = self.cpsr.mode();
if !pre_index && writeback { if !pre_index && writeback {

View file

@ -83,8 +83,6 @@ pub struct SavedCpuState {
pub(super) spsr: RegPSR, pub(super) spsr: RegPSR,
pub(super) banks: BankedRegisters, pub(super) banks: BankedRegisters,
pub(super) bs_carry_out: bool,
} }
#[derive(Clone, Debug)] #[derive(Clone, Debug)]
@ -125,9 +123,6 @@ pub struct Core<I: MemoryInterface> {
pub cpsr: RegPSR, pub cpsr: RegPSR,
pub(super) spsr: RegPSR, pub(super) spsr: RegPSR,
// Todo - do I still need this?
pub(super) bs_carry_out: bool,
pub(super) banks: BankedRegisters, pub(super) banks: BankedRegisters,
#[cfg(feature = "debugger")] #[cfg(feature = "debugger")]
@ -146,7 +141,6 @@ impl<I: MemoryInterface> Core<I> {
cpsr, cpsr,
spsr: Default::default(), spsr: Default::default(),
banks: BankedRegisters::default(), banks: BankedRegisters::default(),
bs_carry_out: false,
#[cfg(feature = "debugger")] #[cfg(feature = "debugger")]
dbg_info: DebuggerInfo::default(), dbg_info: DebuggerInfo::default(),
@ -167,7 +161,6 @@ impl<I: MemoryInterface> Core<I> {
banks: state.banks, banks: state.banks,
spsr: state.spsr, spsr: state.spsr,
bs_carry_out: state.bs_carry_out,
pipeline: state.pipeline, pipeline: state.pipeline,
next_fetch_access: state.next_fetch_access, next_fetch_access: state.next_fetch_access,
@ -184,7 +177,6 @@ impl<I: MemoryInterface> Core<I> {
gpr: self.gpr.clone(), gpr: self.gpr.clone(),
spsr: self.spsr, spsr: self.spsr,
banks: self.banks.clone(), banks: self.banks.clone(),
bs_carry_out: self.bs_carry_out,
pipeline: self.pipeline.clone(), pipeline: self.pipeline.clone(),
next_fetch_access: self.next_fetch_access, next_fetch_access: self.next_fetch_access,
} }
@ -196,7 +188,6 @@ impl<I: MemoryInterface> Core<I> {
self.gpr = state.gpr; self.gpr = state.gpr;
self.spsr = state.spsr; self.spsr = state.spsr;
self.banks = state.banks; self.banks = state.banks;
self.bs_carry_out = state.bs_carry_out;
self.pipeline = state.pipeline; self.pipeline = state.pipeline;
self.next_fetch_access = state.next_fetch_access; self.next_fetch_access = state.next_fetch_access;
} }

View file

@ -1,8 +1,8 @@
use std::fmt; use std::fmt;
use std::marker::PhantomData; use std::marker::PhantomData;
use super::InstructionDecoder;
use super::Addr; use super::Addr;
use super::InstructionDecoder;
pub struct Disassembler<'a, D> pub struct Disassembler<'a, D>
where where

View file

@ -134,7 +134,10 @@ impl<I: MemoryInterface> Core<I> {
if addr & 0x3 != 0 { if addr & 0x3 != 0 {
let rotation = (addr & 0x3) << 3; let rotation = (addr & 0x3) << 3;
let value = self.load_32(addr & !0x3, access); let value = self.load_32(addr & !0x3, access);
self.ror(value, rotation, self.cpsr.C(), false, false) let mut carry = self.cpsr.C();
let v = self.ror(value, rotation, &mut carry, false, false);
self.cpsr.set_C(carry);
v
} else { } else {
self.load_32(addr, access) self.load_32(addr, access)
} }
@ -146,7 +149,10 @@ impl<I: MemoryInterface> Core<I> {
if addr & 0x1 != 0 { if addr & 0x1 != 0 {
let rotation = (addr & 0x1) << 3; let rotation = (addr & 0x1) << 3;
let value = self.load_16(addr & !0x1, access); let value = self.load_16(addr & !0x1, access);
self.ror(value as u32, rotation, self.cpsr.C(), false, false) let mut carry = self.cpsr.C();
let v = self.ror(value as u32, rotation, &mut carry, false, false);
self.cpsr.set_C(carry);
v
} else { } else {
self.load_16(addr, access) as u32 self.load_16(addr, access) as u32
} }

View file

@ -15,15 +15,16 @@ impl<I: MemoryInterface> Core<I> {
let rs = insn.bit_range(3..6) as usize; let rs = insn.bit_range(3..6) as usize;
let shift_amount = insn.offset5() as u8 as u32; let shift_amount = insn.offset5() as u8 as u32;
let mut carry = self.cpsr.C();
let op2 = self.barrel_shift_op( let op2 = self.barrel_shift_op(
insn.format1_op(), insn.format1_op(),
self.gpr[rs], self.gpr[rs],
shift_amount, shift_amount,
self.cpsr.C(), &mut carry,
true, true,
); );
self.gpr[rd] = op2; self.gpr[rd] = op2;
self.alu_update_flags(op2, false, self.bs_carry_out, self.cpsr.V()); self.alu_update_flags(op2, false, carry, self.cpsr.V());
CpuAction::AdvancePC(Seq) CpuAction::AdvancePC(Seq)
} }
@ -95,9 +96,8 @@ impl<I: MemoryInterface> Core<I> {
macro_rules! shifter_op { macro_rules! shifter_op {
($bs_op:expr) => {{ ($bs_op:expr) => {{
let result = self.shift_by_register($bs_op, rd, rs, carry); let result = self.shift_by_register($bs_op, rd, rs, &mut carry);
self.idle_cycle(); self.idle_cycle();
carry = self.bs_carry_out;
result result
}}; }};
} }

View file

@ -336,7 +336,10 @@ impl EepromController {
match (src, dst) { match (src, dst) {
// DMA to EEPROM // DMA to EEPROM
(_, 0x0d000000..=0x0dffffff) => { (_, 0x0d000000..=0x0dffffff) => {
debug!("caught eeprom dma transfer src={:#x} dst={:#x} count={}", src, dst, count); debug!(
"caught eeprom dma transfer src={:#x} dst={:#x} count={}",
src, dst, count
);
let eeprom_type = match count { let eeprom_type = match count {
// Read(11) + 6bit address + stop bit // Read(11) + 6bit address + stop bit
9 => Eeprom512, 9 => Eeprom512,
@ -346,7 +349,10 @@ impl EepromController {
73 => Eeprom512, 73 => Eeprom512,
// Write(11) + 14bit address + 64bit value + stop bit // Write(11) + 14bit address + 64bit value + stop bit
81 => Eeprom8k, 81 => Eeprom8k,
_ => panic!("unexpected bit count ({}) when detecting eeprom size", count) _ => panic!(
"unexpected bit count ({}) when detecting eeprom size",
count
),
}; };
info!("detected eeprom type: {:?}", eeprom_type); info!("detected eeprom type: {:?}", eeprom_type);
self.chip.borrow_mut().set_type(eeprom_type); self.chip.borrow_mut().set_type(eeprom_type);

View file

@ -291,10 +291,7 @@ impl SoundController {
self.resampler.in_freq = self.sample_rate; self.resampler.in_freq = self.sample_rate;
} }
self.cycles_per_sample = 512 >> resolution; self.cycles_per_sample = 512 >> resolution;
info!( info!("bias - setting sample frequency to {}hz", self.sample_rate);
"bias - setting sample frequency to {}hz",
self.sample_rate
);
// TODO this will not affect the currently scheduled sample event // TODO this will not affect the currently scheduled sample event
} }