optimize/cpu: Pass Arm/Thumb Instruction and other large structs as references.

Also, get rid of unnecessary derive Copy.

Since the ArmInstruction (and Thumb) derived from Copy, the compiler
always saves them to the stack when they are passed by value to other
functions, thus making some unwanted performance overhead.

I removed the derive Copy, and also pass them as references. This
project has a lot of "derive Copy" everywhere, and I should take note if
this is happenning elsewhere aswell.


Former-commit-id: 2f94c6050fa26c5b777244bd26706d4e6e2f0dc9
This commit is contained in:
Michel Heily 2020-03-28 15:47:10 +03:00 committed by MishMish
parent 2af9249a6c
commit f0aa671674
7 changed files with 55 additions and 51 deletions

View file

@ -262,7 +262,7 @@ impl Core {
self.barrel_shift_op(bs_op, val, amount, carry, false)
}
pub fn register_shift(&mut self, shift: ShiftedRegister) -> u32 {
pub fn register_shift(&mut self, shift: &ShiftedRegister) -> u32 {
let carry = self.cpsr.C();
match shift.shift_by {
ShiftRegisterBy::ByAmount(amount) => {
@ -276,12 +276,12 @@ impl Core {
}
}
pub fn get_barrel_shifted_value(&mut self, sval: BarrelShifterValue) -> u32 {
pub fn get_barrel_shifted_value(&mut self, sval: &BarrelShifterValue) -> u32 {
// TODO decide if error handling or panic here
match sval {
BarrelShifterValue::ImmediateValue(offset) => offset as u32,
BarrelShifterValue::ImmediateValue(offset) => *offset as u32,
BarrelShifterValue::ShiftedRegister(shifted_reg) => {
let added = shifted_reg.added.unwrap_or(true);
let added = (*shifted_reg).added.unwrap_or(true);
let abs = self.register_shift(shifted_reg) as u32;
if added {
abs as u32

View file

@ -11,7 +11,7 @@ use crate::core::Bus;
use super::*;
impl Core {
pub fn exec_arm(&mut self, bus: &mut SysBus, insn: ArmInstruction) -> CpuAction {
pub fn exec_arm(&mut self, bus: &mut SysBus, insn: &ArmInstruction) -> CpuAction {
if insn.cond != ArmCond::AL {
if !self.check_arm_cond(insn.cond) {
self.S_cycle32(bus, self.pc);
@ -40,7 +40,7 @@ impl Core {
}
/// Cycles 2S+1N
fn exec_b_bl(&mut self, sb: &mut SysBus, insn: ArmInstruction) -> CpuAction {
fn exec_b_bl(&mut self, sb: &mut SysBus, insn: &ArmInstruction) -> CpuAction {
self.S_cycle32(sb, self.pc);
if insn.link_flag() {
self.set_reg(REG_LR, (insn.pc + (self.word_size() as u32)) & !0b1);
@ -73,7 +73,7 @@ impl Core {
}
/// Cycles 2S+1N
fn exec_bx(&mut self, sb: &mut SysBus, insn: ArmInstruction) -> CpuAction {
fn exec_bx(&mut self, sb: &mut SysBus, insn: &ArmInstruction) -> CpuAction {
self.branch_exchange(sb, self.get_reg(insn.rn()))
}
@ -94,7 +94,7 @@ impl Core {
CpuAction::AdvancePC
}
fn exec_msr_reg(&mut self, sb: &mut SysBus, insn: ArmInstruction) -> CpuAction {
fn exec_msr_reg(&mut self, sb: &mut SysBus, insn: &ArmInstruction) -> CpuAction {
self.write_status_register(sb, insn.spsr_flag(), self.get_reg(insn.rm()))
}
@ -129,10 +129,10 @@ impl Core {
CpuAction::AdvancePC
}
fn exec_msr_flags(&mut self, sb: &mut SysBus, insn: ArmInstruction) -> CpuAction {
fn exec_msr_flags(&mut self, sb: &mut SysBus, insn: &ArmInstruction) -> CpuAction {
self.S_cycle32(sb, self.pc);
let op = insn.operand2();
let op = self.decode_operand2(op);
let op = self.decode_operand2(&op);
if insn.spsr_flag() {
self.spsr.set_flag_bits(op);
@ -142,12 +142,12 @@ impl Core {
CpuAction::AdvancePC
}
fn decode_operand2(&mut self, op2: BarrelShifterValue) -> u32 {
fn decode_operand2(&mut self, op2: &BarrelShifterValue) -> u32 {
match op2 {
BarrelShifterValue::RotatedImmediate(val, amount) => {
self.ror(val, amount, self.cpsr.C(), false, true)
self.ror(*val, *amount, self.cpsr.C(), false, true)
}
BarrelShifterValue::ShiftedRegister(x) => self.register_shift(x),
BarrelShifterValue::ShiftedRegister(x) => self.register_shift(&x),
_ => unreachable!(),
}
}
@ -164,7 +164,7 @@ impl Core {
///
/// Cycles: 1S+x+y (from GBATEK)
/// Add x=1I cycles if Op2 shifted-by-register. Add y=1S+1N cycles if Rd=R15.
fn exec_data_processing(&mut self, sb: &mut SysBus, insn: ArmInstruction) -> CpuAction {
fn exec_data_processing(&mut self, sb: &mut SysBus, insn: &ArmInstruction) -> CpuAction {
use AluOpCode::*;
self.S_cycle32(sb, self.pc);
@ -186,7 +186,7 @@ impl Core {
}
_ => {}
}
let op2 = self.decode_operand2(op2);
let op2 = self.decode_operand2(&op2);
let reg_rd = insn.rd();
if !s_flag {
@ -275,7 +275,7 @@ impl Core {
/// STR{cond}{B}{T} Rd,<Address> | 2N | ---- | [Rn+/-<offset>]=Rd
/// ------------------------------------------------------------------------------
/// For LDR, add y=1S+1N if Rd=R15.
fn exec_ldr_str(&mut self, sb: &mut SysBus, insn: ArmInstruction) -> CpuAction {
fn exec_ldr_str(&mut self, sb: &mut SysBus, insn: &ArmInstruction) -> CpuAction {
let mut result = CpuAction::AdvancePC;
let load = insn.load_flag();
@ -287,7 +287,7 @@ impl Core {
if base_reg == REG_PC {
addr = insn.pc + 8; // prefetching
}
let offset = self.get_barrel_shifted_value(insn.ldr_str_offset());
let offset = self.get_barrel_shifted_value(&insn.ldr_str_offset());
let effective_addr = (addr as i32).wrapping_add(offset as i32) as Addr;
// TODO - confirm this
@ -352,7 +352,7 @@ impl Core {
result
}
fn exec_ldr_str_hs(&mut self, sb: &mut SysBus, insn: ArmInstruction) -> CpuAction {
fn exec_ldr_str_hs(&mut self, sb: &mut SysBus, insn: &ArmInstruction) -> CpuAction {
let mut result = CpuAction::AdvancePC;
let load = insn.load_flag();
@ -365,7 +365,7 @@ impl Core {
addr = insn.pc + 8; // prefetching
}
let offset = self.get_barrel_shifted_value(insn.ldr_str_hs_offset().unwrap());
let offset = self.get_barrel_shifted_value(&insn.ldr_str_hs_offset().unwrap());
// TODO - confirm this
let old_mode = self.cpsr.mode();
@ -434,7 +434,7 @@ impl Core {
result
}
fn exec_ldm_stm(&mut self, sb: &mut SysBus, insn: ArmInstruction) -> CpuAction {
fn exec_ldm_stm(&mut self, sb: &mut SysBus, insn: &ArmInstruction) -> CpuAction {
let mut result = CpuAction::AdvancePC;
let mut full = insn.pre_index_flag();
@ -584,7 +584,7 @@ impl Core {
result
}
fn exec_mul_mla(&mut self, sb: &mut SysBus, insn: ArmInstruction) -> CpuAction {
fn exec_mul_mla(&mut self, sb: &mut SysBus, insn: &ArmInstruction) -> CpuAction {
let (rd, rn, rs, rm) = (insn.rd(), insn.rn(), insn.rs(), insn.rm());
// check validity
@ -619,7 +619,7 @@ impl Core {
CpuAction::AdvancePC
}
fn exec_mull_mlal(&mut self, sb: &mut SysBus, insn: ArmInstruction) -> CpuAction {
fn exec_mull_mlal(&mut self, sb: &mut SysBus, insn: &ArmInstruction) -> CpuAction {
let (rd_hi, rd_lo, rn, rs, rm) =
(insn.rd_hi(), insn.rd_lo(), insn.rn(), insn.rs(), insn.rm());
@ -666,7 +666,7 @@ impl Core {
CpuAction::AdvancePC
}
fn exec_arm_swp(&mut self, sb: &mut SysBus, insn: ArmInstruction) -> CpuAction {
fn exec_arm_swp(&mut self, sb: &mut SysBus, insn: &ArmInstruction) -> CpuAction {
let base_addr = self.get_reg(insn.rn());
if insn.transfer_size() == 1 {
let t = sb.read_8(base_addr);

View file

@ -99,7 +99,7 @@ pub enum ArmHalfwordTransferType {
SignedHalfwords = 0b11,
}
#[derive(Serialize, Deserialize, Debug, Copy, Clone, PartialEq)]
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq)]
pub struct ArmInstruction {
pub cond: ArmCond,
pub fmt: ArmFormat,

View file

@ -306,9 +306,9 @@ impl Core {
#[cfg(feature = "debugger")]
{
self.gpr_previous = self.get_registers();
self.last_executed = Some(DecodedInstruction::Arm(decoded_arm));
self.last_executed = Some(DecodedInstruction::Arm(decoded_arm.clone()));
}
let result = self.exec_arm(sb, decoded_arm);
let result = self.exec_arm(sb, &decoded_arm);
match result {
CpuAction::AdvancePC => self.advance_arm(),
CpuAction::FlushPipeline => {}
@ -320,9 +320,9 @@ impl Core {
#[cfg(feature = "debugger")]
{
self.gpr_previous = self.get_registers();
self.last_executed = Some(DecodedInstruction::Thumb(decoded_thumb));
self.last_executed = Some(DecodedInstruction::Thumb(decoded_thumb.clone()));
}
let result = self.exec_thumb(sb, decoded_thumb);
let result = self.exec_thumb(sb, &decoded_thumb);
match result {
CpuAction::AdvancePC => self.advance_thumb(),
CpuAction::FlushPipeline => {}

View file

@ -28,7 +28,7 @@ pub enum CpuAction {
FlushPipeline,
}
#[derive(Serialize, Deserialize, Debug, PartialEq, Copy, Clone)]
#[derive(Serialize, Deserialize, Debug, PartialEq, Clone)]
pub enum DecodedInstruction {
Arm(ArmInstruction),
Thumb(ThumbInstruction),

View file

@ -21,7 +21,7 @@ impl Core {
fn exec_thumb_move_shifted_reg(
&mut self,
sb: &mut SysBus,
insn: ThumbInstruction,
insn: &ThumbInstruction,
) -> CpuAction {
let rd = (insn.raw & 0b111) as usize;
let rs = insn.raw.bit_range(3..6) as usize;
@ -43,7 +43,7 @@ impl Core {
}
/// Format 2
fn exec_thumb_add_sub(&mut self, sb: &mut SysBus, insn: ThumbInstruction) -> CpuAction {
fn exec_thumb_add_sub(&mut self, sb: &mut SysBus, insn: &ThumbInstruction) -> CpuAction {
let rd = (insn.raw & 0b111) as usize;
let op1 = self.get_reg(insn.rs());
let op2 = if insn.is_immediate_operand() {
@ -71,7 +71,7 @@ impl Core {
fn exec_thumb_data_process_imm(
&mut self,
sb: &mut SysBus,
insn: ThumbInstruction,
insn: &ThumbInstruction,
) -> CpuAction {
use OpFormat3::*;
let op = insn.format3_op();
@ -96,7 +96,7 @@ impl Core {
}
/// Format 4
fn exec_thumb_alu_ops(&mut self, sb: &mut SysBus, insn: ThumbInstruction) -> CpuAction {
fn exec_thumb_alu_ops(&mut self, sb: &mut SysBus, insn: &ThumbInstruction) -> CpuAction {
let rd = (insn.raw & 0b111) as usize;
let rs = insn.rs();
let dst = self.get_reg(rd);
@ -153,7 +153,11 @@ impl Core {
}
/// Format 5
fn exec_thumb_hi_reg_op_or_bx(&mut self, sb: &mut SysBus, insn: ThumbInstruction) -> CpuAction {
fn exec_thumb_hi_reg_op_or_bx(
&mut self,
sb: &mut SysBus,
insn: &ThumbInstruction,
) -> CpuAction {
let op = insn.format5_op();
let rd = (insn.raw & 0b111) as usize;
let dst_reg = if insn.flag(ThumbInstruction::FLAG_H1) {
@ -201,7 +205,7 @@ impl Core {
}
/// Format 6
fn exec_thumb_ldr_pc(&mut self, sb: &mut SysBus, insn: ThumbInstruction) -> CpuAction {
fn exec_thumb_ldr_pc(&mut self, sb: &mut SysBus, insn: &ThumbInstruction) -> CpuAction {
let rd = insn.raw.bit_range(8..11) as usize;
let ofs = insn.word8() as Addr;
@ -222,7 +226,7 @@ impl Core {
fn do_exec_thumb_ldr_str(
&mut self,
sb: &mut SysBus,
insn: ThumbInstruction,
insn: &ThumbInstruction,
addr: Addr,
is_transferring_bytes: bool,
) -> CpuAction {
@ -260,7 +264,7 @@ impl Core {
fn exec_thumb_ldr_str_reg_offset(
&mut self,
bus: &mut SysBus,
insn: ThumbInstruction,
insn: &ThumbInstruction,
) -> CpuAction {
let rb = insn.raw.bit_range(3..6) as usize;
let addr = self.gpr[rb].wrapping_add(self.gpr[insn.ro()]);
@ -268,7 +272,7 @@ impl Core {
}
/// Format 8
fn exec_thumb_ldr_str_shb(&mut self, sb: &mut SysBus, insn: ThumbInstruction) -> CpuAction {
fn exec_thumb_ldr_str_shb(&mut self, sb: &mut SysBus, insn: &ThumbInstruction) -> CpuAction {
let rb = insn.raw.bit_range(3..6) as usize;
let rd = (insn.raw & 0b111) as usize;
@ -317,7 +321,7 @@ impl Core {
fn exec_thumb_ldr_str_imm_offset(
&mut self,
sb: &mut SysBus,
insn: ThumbInstruction,
insn: &ThumbInstruction,
) -> CpuAction {
let rb = insn.raw.bit_range(3..6) as usize;
@ -334,7 +338,7 @@ impl Core {
fn exec_thumb_ldr_str_halfword(
&mut self,
sb: &mut SysBus,
insn: ThumbInstruction,
insn: &ThumbInstruction,
) -> CpuAction {
let rb = insn.raw.bit_range(3..6) as usize;
let rd = (insn.raw & 0b111) as usize;
@ -355,7 +359,7 @@ impl Core {
}
/// Format 11
fn exec_thumb_ldr_str_sp(&mut self, sb: &mut SysBus, insn: ThumbInstruction) -> CpuAction {
fn exec_thumb_ldr_str_sp(&mut self, sb: &mut SysBus, insn: &ThumbInstruction) -> CpuAction {
let addr = self.gpr[REG_SP] + (insn.word8() as Addr);
let rd = insn.raw.bit_range(8..11) as usize;
if insn.is_load() {
@ -373,7 +377,7 @@ impl Core {
}
/// Format 12
fn exec_thumb_load_address(&mut self, sb: &mut SysBus, insn: ThumbInstruction) -> CpuAction {
fn exec_thumb_load_address(&mut self, sb: &mut SysBus, insn: &ThumbInstruction) -> CpuAction {
let rd = insn.raw.bit_range(8..11) as usize;
let result = if insn.flag(ThumbInstruction::FLAG_SP) {
self.gpr[REG_SP] + (insn.word8() as Addr)
@ -387,7 +391,7 @@ impl Core {
}
/// Format 13
fn exec_thumb_add_sp(&mut self, sb: &mut SysBus, insn: ThumbInstruction) -> CpuAction {
fn exec_thumb_add_sp(&mut self, sb: &mut SysBus, insn: &ThumbInstruction) -> CpuAction {
let op1 = self.gpr[REG_SP] as i32;
let op2 = insn.sword7();
@ -398,7 +402,7 @@ impl Core {
}
/// Format 14
fn exec_thumb_push_pop(&mut self, sb: &mut SysBus, insn: ThumbInstruction) -> CpuAction {
fn exec_thumb_push_pop(&mut self, sb: &mut SysBus, insn: &ThumbInstruction) -> CpuAction {
let mut result = CpuAction::AdvancePC;
// (From GBATEK) Execution Time: nS+1N+1I (POP), (n+1)S+2N+1I (POP PC), or (n-1)S+2N (PUSH).
@ -446,7 +450,7 @@ impl Core {
}
/// Format 15
fn exec_thumb_ldm_stm(&mut self, sb: &mut SysBus, insn: ThumbInstruction) -> CpuAction {
fn exec_thumb_ldm_stm(&mut self, sb: &mut SysBus, insn: &ThumbInstruction) -> CpuAction {
let mut result = CpuAction::AdvancePC;
// (From GBATEK) Execution Time: nS+1N+1I (POP), (n+1)S+2N+1I (POP PC), or (n-1)S+2N (PUSH).
@ -526,7 +530,7 @@ impl Core {
fn exec_thumb_branch_with_cond(
&mut self,
sb: &mut SysBus,
insn: ThumbInstruction,
insn: &ThumbInstruction,
) -> CpuAction {
if !self.check_arm_cond(insn.cond()) {
self.S_cycle16(sb, self.pc + 2);
@ -541,14 +545,14 @@ impl Core {
}
/// Format 17
fn exec_thumb_swi(&mut self, sb: &mut SysBus, _insn: ThumbInstruction) -> CpuAction {
fn exec_thumb_swi(&mut self, sb: &mut SysBus, _insn: &ThumbInstruction) -> CpuAction {
self.N_cycle16(sb, self.pc);
self.exception(sb, Exception::SoftwareInterrupt, self.pc - 2);
CpuAction::FlushPipeline
}
/// Format 18
fn exec_thumb_branch(&mut self, sb: &mut SysBus, insn: ThumbInstruction) -> CpuAction {
fn exec_thumb_branch(&mut self, sb: &mut SysBus, insn: &ThumbInstruction) -> CpuAction {
let offset = ((insn.offset11() << 21) >> 20) as i32;
self.pc = (self.pc as i32).wrapping_add(offset) as u32;
self.S_cycle16(sb, self.pc);
@ -560,7 +564,7 @@ impl Core {
fn exec_thumb_branch_long_with_link(
&mut self,
sb: &mut SysBus,
insn: ThumbInstruction,
insn: &ThumbInstruction,
) -> CpuAction {
let mut off = insn.offset11();
if insn.flag(ThumbInstruction::FLAG_LOW_OFFSET) {
@ -580,7 +584,7 @@ impl Core {
}
}
pub fn exec_thumb(&mut self, bus: &mut SysBus, insn: ThumbInstruction) -> CpuAction {
pub fn exec_thumb(&mut self, bus: &mut SysBus, insn: &ThumbInstruction) -> CpuAction {
match insn.fmt {
ThumbFormat::MoveShiftedReg => self.exec_thumb_move_shifted_reg(bus, insn),
ThumbFormat::AddSub => self.exec_thumb_add_sub(bus, insn),

View file

@ -51,7 +51,7 @@ pub enum ThumbFormat {
BranchLongWithLink,
}
#[derive(Serialize, Deserialize, Debug, Copy, Clone, PartialEq)]
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq)]
pub struct ThumbInstruction {
pub fmt: ThumbFormat,
pub raw: u16,