From f0aa6716744a0f6ac92632140df086512e92ed89 Mon Sep 17 00:00:00 2001 From: Michel Heily Date: Sat, 28 Mar 2020 15:47:10 +0300 Subject: [PATCH] optimize/cpu: Pass Arm/Thumb Instruction and other large structs as references. Also, get rid of unnecessary derive Copy. Since the ArmInstruction (and Thumb) derived from Copy, the compiler always saves them to the stack when they are passed by value to other functions, thus making some unwanted performance overhead. I removed the derive Copy, and also pass them as references. This project has a lot of "derive Copy" everywhere, and I should take note if this is happenning elsewhere aswell. Former-commit-id: 2f94c6050fa26c5b777244bd26706d4e6e2f0dc9 --- src/core/arm7tdmi/alu.rs | 8 +++--- src/core/arm7tdmi/arm/exec.rs | 38 +++++++++++++-------------- src/core/arm7tdmi/arm/mod.rs | 2 +- src/core/arm7tdmi/cpu.rs | 8 +++--- src/core/arm7tdmi/mod.rs | 2 +- src/core/arm7tdmi/thumb/exec.rs | 46 ++++++++++++++++++--------------- src/core/arm7tdmi/thumb/mod.rs | 2 +- 7 files changed, 55 insertions(+), 51 deletions(-) diff --git a/src/core/arm7tdmi/alu.rs b/src/core/arm7tdmi/alu.rs index 7e0b8c2..def9cc9 100644 --- a/src/core/arm7tdmi/alu.rs +++ b/src/core/arm7tdmi/alu.rs @@ -262,7 +262,7 @@ impl Core { self.barrel_shift_op(bs_op, val, amount, carry, false) } - pub fn register_shift(&mut self, shift: ShiftedRegister) -> u32 { + pub fn register_shift(&mut self, shift: &ShiftedRegister) -> u32 { let carry = self.cpsr.C(); match shift.shift_by { ShiftRegisterBy::ByAmount(amount) => { @@ -276,12 +276,12 @@ impl Core { } } - pub fn get_barrel_shifted_value(&mut self, sval: BarrelShifterValue) -> u32 { + pub fn get_barrel_shifted_value(&mut self, sval: &BarrelShifterValue) -> u32 { // TODO decide if error handling or panic here match sval { - BarrelShifterValue::ImmediateValue(offset) => offset as u32, + BarrelShifterValue::ImmediateValue(offset) => *offset as u32, BarrelShifterValue::ShiftedRegister(shifted_reg) => { - let added = shifted_reg.added.unwrap_or(true); + let added = (*shifted_reg).added.unwrap_or(true); let abs = self.register_shift(shifted_reg) as u32; if added { abs as u32 diff --git a/src/core/arm7tdmi/arm/exec.rs b/src/core/arm7tdmi/arm/exec.rs index 8da0674..b76c09c 100644 --- a/src/core/arm7tdmi/arm/exec.rs +++ b/src/core/arm7tdmi/arm/exec.rs @@ -11,7 +11,7 @@ use crate::core::Bus; use super::*; impl Core { - pub fn exec_arm(&mut self, bus: &mut SysBus, insn: ArmInstruction) -> CpuAction { + pub fn exec_arm(&mut self, bus: &mut SysBus, insn: &ArmInstruction) -> CpuAction { if insn.cond != ArmCond::AL { if !self.check_arm_cond(insn.cond) { self.S_cycle32(bus, self.pc); @@ -40,7 +40,7 @@ impl Core { } /// Cycles 2S+1N - fn exec_b_bl(&mut self, sb: &mut SysBus, insn: ArmInstruction) -> CpuAction { + fn exec_b_bl(&mut self, sb: &mut SysBus, insn: &ArmInstruction) -> CpuAction { self.S_cycle32(sb, self.pc); if insn.link_flag() { self.set_reg(REG_LR, (insn.pc + (self.word_size() as u32)) & !0b1); @@ -73,7 +73,7 @@ impl Core { } /// Cycles 2S+1N - fn exec_bx(&mut self, sb: &mut SysBus, insn: ArmInstruction) -> CpuAction { + fn exec_bx(&mut self, sb: &mut SysBus, insn: &ArmInstruction) -> CpuAction { self.branch_exchange(sb, self.get_reg(insn.rn())) } @@ -94,7 +94,7 @@ impl Core { CpuAction::AdvancePC } - fn exec_msr_reg(&mut self, sb: &mut SysBus, insn: ArmInstruction) -> CpuAction { + fn exec_msr_reg(&mut self, sb: &mut SysBus, insn: &ArmInstruction) -> CpuAction { self.write_status_register(sb, insn.spsr_flag(), self.get_reg(insn.rm())) } @@ -129,10 +129,10 @@ impl Core { CpuAction::AdvancePC } - fn exec_msr_flags(&mut self, sb: &mut SysBus, insn: ArmInstruction) -> CpuAction { + fn exec_msr_flags(&mut self, sb: &mut SysBus, insn: &ArmInstruction) -> CpuAction { self.S_cycle32(sb, self.pc); let op = insn.operand2(); - let op = self.decode_operand2(op); + let op = self.decode_operand2(&op); if insn.spsr_flag() { self.spsr.set_flag_bits(op); @@ -142,12 +142,12 @@ impl Core { CpuAction::AdvancePC } - fn decode_operand2(&mut self, op2: BarrelShifterValue) -> u32 { + fn decode_operand2(&mut self, op2: &BarrelShifterValue) -> u32 { match op2 { BarrelShifterValue::RotatedImmediate(val, amount) => { - self.ror(val, amount, self.cpsr.C(), false, true) + self.ror(*val, *amount, self.cpsr.C(), false, true) } - BarrelShifterValue::ShiftedRegister(x) => self.register_shift(x), + BarrelShifterValue::ShiftedRegister(x) => self.register_shift(&x), _ => unreachable!(), } } @@ -164,7 +164,7 @@ impl Core { /// /// Cycles: 1S+x+y (from GBATEK) /// Add x=1I cycles if Op2 shifted-by-register. Add y=1S+1N cycles if Rd=R15. - fn exec_data_processing(&mut self, sb: &mut SysBus, insn: ArmInstruction) -> CpuAction { + fn exec_data_processing(&mut self, sb: &mut SysBus, insn: &ArmInstruction) -> CpuAction { use AluOpCode::*; self.S_cycle32(sb, self.pc); @@ -186,7 +186,7 @@ impl Core { } _ => {} } - let op2 = self.decode_operand2(op2); + let op2 = self.decode_operand2(&op2); let reg_rd = insn.rd(); if !s_flag { @@ -275,7 +275,7 @@ impl Core { /// STR{cond}{B}{T} Rd,
| 2N | ---- | [Rn+/-]=Rd /// ------------------------------------------------------------------------------ /// For LDR, add y=1S+1N if Rd=R15. - fn exec_ldr_str(&mut self, sb: &mut SysBus, insn: ArmInstruction) -> CpuAction { + fn exec_ldr_str(&mut self, sb: &mut SysBus, insn: &ArmInstruction) -> CpuAction { let mut result = CpuAction::AdvancePC; let load = insn.load_flag(); @@ -287,7 +287,7 @@ impl Core { if base_reg == REG_PC { addr = insn.pc + 8; // prefetching } - let offset = self.get_barrel_shifted_value(insn.ldr_str_offset()); + let offset = self.get_barrel_shifted_value(&insn.ldr_str_offset()); let effective_addr = (addr as i32).wrapping_add(offset as i32) as Addr; // TODO - confirm this @@ -352,7 +352,7 @@ impl Core { result } - fn exec_ldr_str_hs(&mut self, sb: &mut SysBus, insn: ArmInstruction) -> CpuAction { + fn exec_ldr_str_hs(&mut self, sb: &mut SysBus, insn: &ArmInstruction) -> CpuAction { let mut result = CpuAction::AdvancePC; let load = insn.load_flag(); @@ -365,7 +365,7 @@ impl Core { addr = insn.pc + 8; // prefetching } - let offset = self.get_barrel_shifted_value(insn.ldr_str_hs_offset().unwrap()); + let offset = self.get_barrel_shifted_value(&insn.ldr_str_hs_offset().unwrap()); // TODO - confirm this let old_mode = self.cpsr.mode(); @@ -434,7 +434,7 @@ impl Core { result } - fn exec_ldm_stm(&mut self, sb: &mut SysBus, insn: ArmInstruction) -> CpuAction { + fn exec_ldm_stm(&mut self, sb: &mut SysBus, insn: &ArmInstruction) -> CpuAction { let mut result = CpuAction::AdvancePC; let mut full = insn.pre_index_flag(); @@ -584,7 +584,7 @@ impl Core { result } - fn exec_mul_mla(&mut self, sb: &mut SysBus, insn: ArmInstruction) -> CpuAction { + fn exec_mul_mla(&mut self, sb: &mut SysBus, insn: &ArmInstruction) -> CpuAction { let (rd, rn, rs, rm) = (insn.rd(), insn.rn(), insn.rs(), insn.rm()); // check validity @@ -619,7 +619,7 @@ impl Core { CpuAction::AdvancePC } - fn exec_mull_mlal(&mut self, sb: &mut SysBus, insn: ArmInstruction) -> CpuAction { + fn exec_mull_mlal(&mut self, sb: &mut SysBus, insn: &ArmInstruction) -> CpuAction { let (rd_hi, rd_lo, rn, rs, rm) = (insn.rd_hi(), insn.rd_lo(), insn.rn(), insn.rs(), insn.rm()); @@ -666,7 +666,7 @@ impl Core { CpuAction::AdvancePC } - fn exec_arm_swp(&mut self, sb: &mut SysBus, insn: ArmInstruction) -> CpuAction { + fn exec_arm_swp(&mut self, sb: &mut SysBus, insn: &ArmInstruction) -> CpuAction { let base_addr = self.get_reg(insn.rn()); if insn.transfer_size() == 1 { let t = sb.read_8(base_addr); diff --git a/src/core/arm7tdmi/arm/mod.rs b/src/core/arm7tdmi/arm/mod.rs index 665f819..0d2e0cc 100644 --- a/src/core/arm7tdmi/arm/mod.rs +++ b/src/core/arm7tdmi/arm/mod.rs @@ -99,7 +99,7 @@ pub enum ArmHalfwordTransferType { SignedHalfwords = 0b11, } -#[derive(Serialize, Deserialize, Debug, Copy, Clone, PartialEq)] +#[derive(Serialize, Deserialize, Debug, Clone, PartialEq)] pub struct ArmInstruction { pub cond: ArmCond, pub fmt: ArmFormat, diff --git a/src/core/arm7tdmi/cpu.rs b/src/core/arm7tdmi/cpu.rs index bee4ce5..7466dd9 100644 --- a/src/core/arm7tdmi/cpu.rs +++ b/src/core/arm7tdmi/cpu.rs @@ -306,9 +306,9 @@ impl Core { #[cfg(feature = "debugger")] { self.gpr_previous = self.get_registers(); - self.last_executed = Some(DecodedInstruction::Arm(decoded_arm)); + self.last_executed = Some(DecodedInstruction::Arm(decoded_arm.clone())); } - let result = self.exec_arm(sb, decoded_arm); + let result = self.exec_arm(sb, &decoded_arm); match result { CpuAction::AdvancePC => self.advance_arm(), CpuAction::FlushPipeline => {} @@ -320,9 +320,9 @@ impl Core { #[cfg(feature = "debugger")] { self.gpr_previous = self.get_registers(); - self.last_executed = Some(DecodedInstruction::Thumb(decoded_thumb)); + self.last_executed = Some(DecodedInstruction::Thumb(decoded_thumb.clone())); } - let result = self.exec_thumb(sb, decoded_thumb); + let result = self.exec_thumb(sb, &decoded_thumb); match result { CpuAction::AdvancePC => self.advance_thumb(), CpuAction::FlushPipeline => {} diff --git a/src/core/arm7tdmi/mod.rs b/src/core/arm7tdmi/mod.rs index 46109f8..076b8e3 100644 --- a/src/core/arm7tdmi/mod.rs +++ b/src/core/arm7tdmi/mod.rs @@ -28,7 +28,7 @@ pub enum CpuAction { FlushPipeline, } -#[derive(Serialize, Deserialize, Debug, PartialEq, Copy, Clone)] +#[derive(Serialize, Deserialize, Debug, PartialEq, Clone)] pub enum DecodedInstruction { Arm(ArmInstruction), Thumb(ThumbInstruction), diff --git a/src/core/arm7tdmi/thumb/exec.rs b/src/core/arm7tdmi/thumb/exec.rs index 4c29371..8c5929e 100644 --- a/src/core/arm7tdmi/thumb/exec.rs +++ b/src/core/arm7tdmi/thumb/exec.rs @@ -21,7 +21,7 @@ impl Core { fn exec_thumb_move_shifted_reg( &mut self, sb: &mut SysBus, - insn: ThumbInstruction, + insn: &ThumbInstruction, ) -> CpuAction { let rd = (insn.raw & 0b111) as usize; let rs = insn.raw.bit_range(3..6) as usize; @@ -43,7 +43,7 @@ impl Core { } /// Format 2 - fn exec_thumb_add_sub(&mut self, sb: &mut SysBus, insn: ThumbInstruction) -> CpuAction { + fn exec_thumb_add_sub(&mut self, sb: &mut SysBus, insn: &ThumbInstruction) -> CpuAction { let rd = (insn.raw & 0b111) as usize; let op1 = self.get_reg(insn.rs()); let op2 = if insn.is_immediate_operand() { @@ -71,7 +71,7 @@ impl Core { fn exec_thumb_data_process_imm( &mut self, sb: &mut SysBus, - insn: ThumbInstruction, + insn: &ThumbInstruction, ) -> CpuAction { use OpFormat3::*; let op = insn.format3_op(); @@ -96,7 +96,7 @@ impl Core { } /// Format 4 - fn exec_thumb_alu_ops(&mut self, sb: &mut SysBus, insn: ThumbInstruction) -> CpuAction { + fn exec_thumb_alu_ops(&mut self, sb: &mut SysBus, insn: &ThumbInstruction) -> CpuAction { let rd = (insn.raw & 0b111) as usize; let rs = insn.rs(); let dst = self.get_reg(rd); @@ -153,7 +153,11 @@ impl Core { } /// Format 5 - fn exec_thumb_hi_reg_op_or_bx(&mut self, sb: &mut SysBus, insn: ThumbInstruction) -> CpuAction { + fn exec_thumb_hi_reg_op_or_bx( + &mut self, + sb: &mut SysBus, + insn: &ThumbInstruction, + ) -> CpuAction { let op = insn.format5_op(); let rd = (insn.raw & 0b111) as usize; let dst_reg = if insn.flag(ThumbInstruction::FLAG_H1) { @@ -201,7 +205,7 @@ impl Core { } /// Format 6 - fn exec_thumb_ldr_pc(&mut self, sb: &mut SysBus, insn: ThumbInstruction) -> CpuAction { + fn exec_thumb_ldr_pc(&mut self, sb: &mut SysBus, insn: &ThumbInstruction) -> CpuAction { let rd = insn.raw.bit_range(8..11) as usize; let ofs = insn.word8() as Addr; @@ -222,7 +226,7 @@ impl Core { fn do_exec_thumb_ldr_str( &mut self, sb: &mut SysBus, - insn: ThumbInstruction, + insn: &ThumbInstruction, addr: Addr, is_transferring_bytes: bool, ) -> CpuAction { @@ -260,7 +264,7 @@ impl Core { fn exec_thumb_ldr_str_reg_offset( &mut self, bus: &mut SysBus, - insn: ThumbInstruction, + insn: &ThumbInstruction, ) -> CpuAction { let rb = insn.raw.bit_range(3..6) as usize; let addr = self.gpr[rb].wrapping_add(self.gpr[insn.ro()]); @@ -268,7 +272,7 @@ impl Core { } /// Format 8 - fn exec_thumb_ldr_str_shb(&mut self, sb: &mut SysBus, insn: ThumbInstruction) -> CpuAction { + fn exec_thumb_ldr_str_shb(&mut self, sb: &mut SysBus, insn: &ThumbInstruction) -> CpuAction { let rb = insn.raw.bit_range(3..6) as usize; let rd = (insn.raw & 0b111) as usize; @@ -317,7 +321,7 @@ impl Core { fn exec_thumb_ldr_str_imm_offset( &mut self, sb: &mut SysBus, - insn: ThumbInstruction, + insn: &ThumbInstruction, ) -> CpuAction { let rb = insn.raw.bit_range(3..6) as usize; @@ -334,7 +338,7 @@ impl Core { fn exec_thumb_ldr_str_halfword( &mut self, sb: &mut SysBus, - insn: ThumbInstruction, + insn: &ThumbInstruction, ) -> CpuAction { let rb = insn.raw.bit_range(3..6) as usize; let rd = (insn.raw & 0b111) as usize; @@ -355,7 +359,7 @@ impl Core { } /// Format 11 - fn exec_thumb_ldr_str_sp(&mut self, sb: &mut SysBus, insn: ThumbInstruction) -> CpuAction { + fn exec_thumb_ldr_str_sp(&mut self, sb: &mut SysBus, insn: &ThumbInstruction) -> CpuAction { let addr = self.gpr[REG_SP] + (insn.word8() as Addr); let rd = insn.raw.bit_range(8..11) as usize; if insn.is_load() { @@ -373,7 +377,7 @@ impl Core { } /// Format 12 - fn exec_thumb_load_address(&mut self, sb: &mut SysBus, insn: ThumbInstruction) -> CpuAction { + fn exec_thumb_load_address(&mut self, sb: &mut SysBus, insn: &ThumbInstruction) -> CpuAction { let rd = insn.raw.bit_range(8..11) as usize; let result = if insn.flag(ThumbInstruction::FLAG_SP) { self.gpr[REG_SP] + (insn.word8() as Addr) @@ -387,7 +391,7 @@ impl Core { } /// Format 13 - fn exec_thumb_add_sp(&mut self, sb: &mut SysBus, insn: ThumbInstruction) -> CpuAction { + fn exec_thumb_add_sp(&mut self, sb: &mut SysBus, insn: &ThumbInstruction) -> CpuAction { let op1 = self.gpr[REG_SP] as i32; let op2 = insn.sword7(); @@ -398,7 +402,7 @@ impl Core { } /// Format 14 - fn exec_thumb_push_pop(&mut self, sb: &mut SysBus, insn: ThumbInstruction) -> CpuAction { + fn exec_thumb_push_pop(&mut self, sb: &mut SysBus, insn: &ThumbInstruction) -> CpuAction { let mut result = CpuAction::AdvancePC; // (From GBATEK) Execution Time: nS+1N+1I (POP), (n+1)S+2N+1I (POP PC), or (n-1)S+2N (PUSH). @@ -446,7 +450,7 @@ impl Core { } /// Format 15 - fn exec_thumb_ldm_stm(&mut self, sb: &mut SysBus, insn: ThumbInstruction) -> CpuAction { + fn exec_thumb_ldm_stm(&mut self, sb: &mut SysBus, insn: &ThumbInstruction) -> CpuAction { let mut result = CpuAction::AdvancePC; // (From GBATEK) Execution Time: nS+1N+1I (POP), (n+1)S+2N+1I (POP PC), or (n-1)S+2N (PUSH). @@ -526,7 +530,7 @@ impl Core { fn exec_thumb_branch_with_cond( &mut self, sb: &mut SysBus, - insn: ThumbInstruction, + insn: &ThumbInstruction, ) -> CpuAction { if !self.check_arm_cond(insn.cond()) { self.S_cycle16(sb, self.pc + 2); @@ -541,14 +545,14 @@ impl Core { } /// Format 17 - fn exec_thumb_swi(&mut self, sb: &mut SysBus, _insn: ThumbInstruction) -> CpuAction { + fn exec_thumb_swi(&mut self, sb: &mut SysBus, _insn: &ThumbInstruction) -> CpuAction { self.N_cycle16(sb, self.pc); self.exception(sb, Exception::SoftwareInterrupt, self.pc - 2); CpuAction::FlushPipeline } /// Format 18 - fn exec_thumb_branch(&mut self, sb: &mut SysBus, insn: ThumbInstruction) -> CpuAction { + fn exec_thumb_branch(&mut self, sb: &mut SysBus, insn: &ThumbInstruction) -> CpuAction { let offset = ((insn.offset11() << 21) >> 20) as i32; self.pc = (self.pc as i32).wrapping_add(offset) as u32; self.S_cycle16(sb, self.pc); @@ -560,7 +564,7 @@ impl Core { fn exec_thumb_branch_long_with_link( &mut self, sb: &mut SysBus, - insn: ThumbInstruction, + insn: &ThumbInstruction, ) -> CpuAction { let mut off = insn.offset11(); if insn.flag(ThumbInstruction::FLAG_LOW_OFFSET) { @@ -580,7 +584,7 @@ impl Core { } } - pub fn exec_thumb(&mut self, bus: &mut SysBus, insn: ThumbInstruction) -> CpuAction { + pub fn exec_thumb(&mut self, bus: &mut SysBus, insn: &ThumbInstruction) -> CpuAction { match insn.fmt { ThumbFormat::MoveShiftedReg => self.exec_thumb_move_shifted_reg(bus, insn), ThumbFormat::AddSub => self.exec_thumb_add_sub(bus, insn), diff --git a/src/core/arm7tdmi/thumb/mod.rs b/src/core/arm7tdmi/thumb/mod.rs index 8d509f4..0298ec1 100644 --- a/src/core/arm7tdmi/thumb/mod.rs +++ b/src/core/arm7tdmi/thumb/mod.rs @@ -51,7 +51,7 @@ pub enum ThumbFormat { BranchLongWithLink, } -#[derive(Serialize, Deserialize, Debug, Copy, Clone, PartialEq)] +#[derive(Serialize, Deserialize, Debug, Clone, PartialEq)] pub struct ThumbInstruction { pub fmt: ThumbFormat, pub raw: u16,