From 32a20d2cbb6dcf86b2388f5f48ce1f958d381d07 Mon Sep 17 00:00:00 2001 From: Michel Heily Date: Tue, 11 Feb 2020 02:24:24 +0200 Subject: [PATCH] ptimize: CPU Pipeline optimization part 3 Move reload_pipeline calls inside instructions. This commit yeilds yet another 5% performance improvment. The next step is to move `advance_pc` into the instructions themselves and save the `match result` per executed instruction Former-commit-id: 42193ffc48fda9943665e6a74e873186627a0b4a --- src/core/arm7tdmi/arm/exec.rs | 16 ++++++++++++++-- src/core/arm7tdmi/cpu.rs | 4 ++-- src/core/arm7tdmi/exception.rs | 2 +- src/core/arm7tdmi/thumb/exec.rs | 12 ++++++++---- 4 files changed, 25 insertions(+), 9 deletions(-) diff --git a/src/core/arm7tdmi/arm/exec.rs b/src/core/arm7tdmi/arm/exec.rs index f1afac6..22cb802 100644 --- a/src/core/arm7tdmi/arm/exec.rs +++ b/src/core/arm7tdmi/arm/exec.rs @@ -45,6 +45,7 @@ impl Core { self.pc = (self.pc as i32).wrapping_add(insn.branch_offset()) as u32 & !1; + self.reload_pipeline32(sb); CpuAction::FlushPipeline } @@ -56,13 +57,15 @@ impl Core { if addr.bit(0) { addr = addr & !0x1; self.cpsr.set_state(CpuState::THUMB); + self.pc = addr; + self.reload_pipeline16(sb); } else { addr = addr & !0x3; self.cpsr.set_state(CpuState::ARM); + self.pc = addr; + self.reload_pipeline32(sb); } - self.pc = addr; - CpuAction::FlushPipeline } @@ -250,6 +253,11 @@ impl Core { if let Some(alu_res) = alu_res { self.set_reg(reg_rd, alu_res as u32); if reg_rd == REG_PC { + // T bit might have changed + match self.cpsr.state() { + CpuState::ARM => self.reload_pipeline32(sb), + CpuState::THUMB => self.reload_pipeline16(sb), + }; result = CpuAction::FlushPipeline; } } @@ -307,6 +315,7 @@ impl Core { self.add_cycle(); if dest_reg == REG_PC { + self.reload_pipeline32(sb); result = CpuAction::FlushPipeline; } } else { @@ -391,6 +400,7 @@ impl Core { self.add_cycle(); if dest_reg == REG_PC { + self.reload_pipeline32(sb); result = CpuAction::FlushPipeline; } } else { @@ -497,6 +507,7 @@ impl Core { if psr_transfer { self.transfer_spsr_mode(); } + self.reload_pipeline32(sb); result = CpuAction::FlushPipeline; } @@ -551,6 +562,7 @@ impl Core { if is_load { let val = self.ldr_word(addr, sb); self.set_reg(REG_PC, val & !3); + self.reload_pipeline32(sb); result = CpuAction::FlushPipeline; } else { self.write_32(addr, self.pc + 4, sb); diff --git a/src/core/arm7tdmi/cpu.rs b/src/core/arm7tdmi/cpu.rs index 5a54e0e..8f8447b 100644 --- a/src/core/arm7tdmi/cpu.rs +++ b/src/core/arm7tdmi/cpu.rs @@ -320,7 +320,7 @@ impl Core { let result = self.exec_arm(sb, decoded_arm); match result { CpuAction::AdvancePC => self.advance_arm(), - CpuAction::FlushPipeline => self.reload_pipeline(sb), + CpuAction::FlushPipeline => {}, } } @@ -334,7 +334,7 @@ impl Core { let result = self.exec_thumb(sb, decoded_thumb); match result { CpuAction::AdvancePC => self.advance_thumb(), - CpuAction::FlushPipeline => self.reload_pipeline(sb), + CpuAction::FlushPipeline => {}, } } diff --git a/src/core/arm7tdmi/exception.rs b/src/core/arm7tdmi/exception.rs index 9f75b08..b527a9c 100644 --- a/src/core/arm7tdmi/exception.rs +++ b/src/core/arm7tdmi/exception.rs @@ -56,13 +56,13 @@ impl Core { // Set PC to vector address self.pc = e as u32; + self.reload_pipeline32(sb); } pub fn irq(&mut self, sb: &mut SysBus) { if !self.cpsr.irq_disabled() { let lr = self.get_next_pc() + 4; self.exception(sb, Exception::Irq, lr); - self.reload_pipeline32(sb); } } diff --git a/src/core/arm7tdmi/thumb/exec.rs b/src/core/arm7tdmi/thumb/exec.rs index b20bfe1..4c29371 100644 --- a/src/core/arm7tdmi/thumb/exec.rs +++ b/src/core/arm7tdmi/thumb/exec.rs @@ -177,7 +177,8 @@ impl Core { OpFormat5::ADD => { self.set_reg(dst_reg, op1.wrapping_add(op2)); if dst_reg == REG_PC { - result = CpuAction::FlushPipeline + result = CpuAction::FlushPipeline; + self.reload_pipeline16(sb); } } OpFormat5::CMP => { @@ -190,6 +191,7 @@ impl Core { self.set_reg(dst_reg, op2 as u32); if dst_reg == REG_PC { result = CpuAction::FlushPipeline; + self.reload_pipeline16(sb); } } } @@ -421,6 +423,7 @@ impl Core { pop(self, sb, REG_PC); self.pc = self.pc & !1; result = CpuAction::FlushPipeline; + self.reload_pipeline16(sb); } self.S_cycle16(sb, self.pc + 2); } else { @@ -508,6 +511,7 @@ impl Core { let val = sb.read_32(addr); self.set_reg(REG_PC, val & !1); result = CpuAction::FlushPipeline; + self.reload_pipeline16(sb); } else { sb.write_32(addr, self.pc + 2); } @@ -531,6 +535,7 @@ impl Core { let offset = insn.bcond_offset(); self.S_cycle16(sb, self.pc); self.pc = (self.pc as i32).wrapping_add(offset) as u32; + self.reload_pipeline16(sb); CpuAction::FlushPipeline } } @@ -539,7 +544,6 @@ impl Core { fn exec_thumb_swi(&mut self, sb: &mut SysBus, _insn: ThumbInstruction) -> CpuAction { self.N_cycle16(sb, self.pc); self.exception(sb, Exception::SoftwareInterrupt, self.pc - 2); - CpuAction::FlushPipeline } @@ -548,7 +552,7 @@ impl Core { let offset = ((insn.offset11() << 21) >> 20) as i32; self.pc = (self.pc as i32).wrapping_add(offset) as u32; self.S_cycle16(sb, self.pc); - + self.reload_pipeline16(sb); CpuAction::FlushPipeline } @@ -565,7 +569,7 @@ impl Core { let next_pc = (self.pc - 2) | 1; self.pc = ((self.gpr[REG_LR] & !1) as i32).wrapping_add(off) as u32; self.gpr[REG_LR] = next_pc; - + self.reload_pipeline16(sb); CpuAction::FlushPipeline } else { off = (off << 21) >> 9;