ptimize: CPU Pipeline optimization part 3

Move reload_pipeline calls inside instructions.
This commit yeilds yet another 5% performance improvment.

The next step is to move `advance_pc` into the instructions themselves
and save the `match result` per executed instruction


Former-commit-id: 42193ffc48fda9943665e6a74e873186627a0b4a
This commit is contained in:
Michel Heily 2020-02-11 02:24:24 +02:00
parent 6beec306c2
commit 32a20d2cbb
4 changed files with 25 additions and 9 deletions

View file

@ -45,6 +45,7 @@ impl Core {
self.pc = (self.pc as i32).wrapping_add(insn.branch_offset()) as u32 & !1; self.pc = (self.pc as i32).wrapping_add(insn.branch_offset()) as u32 & !1;
self.reload_pipeline32(sb);
CpuAction::FlushPipeline CpuAction::FlushPipeline
} }
@ -56,13 +57,15 @@ impl Core {
if addr.bit(0) { if addr.bit(0) {
addr = addr & !0x1; addr = addr & !0x1;
self.cpsr.set_state(CpuState::THUMB); self.cpsr.set_state(CpuState::THUMB);
self.pc = addr;
self.reload_pipeline16(sb);
} else { } else {
addr = addr & !0x3; addr = addr & !0x3;
self.cpsr.set_state(CpuState::ARM); self.cpsr.set_state(CpuState::ARM);
self.pc = addr;
self.reload_pipeline32(sb);
} }
self.pc = addr;
CpuAction::FlushPipeline CpuAction::FlushPipeline
} }
@ -250,6 +253,11 @@ impl Core {
if let Some(alu_res) = alu_res { if let Some(alu_res) = alu_res {
self.set_reg(reg_rd, alu_res as u32); self.set_reg(reg_rd, alu_res as u32);
if reg_rd == REG_PC { if reg_rd == REG_PC {
// T bit might have changed
match self.cpsr.state() {
CpuState::ARM => self.reload_pipeline32(sb),
CpuState::THUMB => self.reload_pipeline16(sb),
};
result = CpuAction::FlushPipeline; result = CpuAction::FlushPipeline;
} }
} }
@ -307,6 +315,7 @@ impl Core {
self.add_cycle(); self.add_cycle();
if dest_reg == REG_PC { if dest_reg == REG_PC {
self.reload_pipeline32(sb);
result = CpuAction::FlushPipeline; result = CpuAction::FlushPipeline;
} }
} else { } else {
@ -391,6 +400,7 @@ impl Core {
self.add_cycle(); self.add_cycle();
if dest_reg == REG_PC { if dest_reg == REG_PC {
self.reload_pipeline32(sb);
result = CpuAction::FlushPipeline; result = CpuAction::FlushPipeline;
} }
} else { } else {
@ -497,6 +507,7 @@ impl Core {
if psr_transfer { if psr_transfer {
self.transfer_spsr_mode(); self.transfer_spsr_mode();
} }
self.reload_pipeline32(sb);
result = CpuAction::FlushPipeline; result = CpuAction::FlushPipeline;
} }
@ -551,6 +562,7 @@ impl Core {
if is_load { if is_load {
let val = self.ldr_word(addr, sb); let val = self.ldr_word(addr, sb);
self.set_reg(REG_PC, val & !3); self.set_reg(REG_PC, val & !3);
self.reload_pipeline32(sb);
result = CpuAction::FlushPipeline; result = CpuAction::FlushPipeline;
} else { } else {
self.write_32(addr, self.pc + 4, sb); self.write_32(addr, self.pc + 4, sb);

View file

@ -320,7 +320,7 @@ impl Core {
let result = self.exec_arm(sb, decoded_arm); let result = self.exec_arm(sb, decoded_arm);
match result { match result {
CpuAction::AdvancePC => self.advance_arm(), CpuAction::AdvancePC => self.advance_arm(),
CpuAction::FlushPipeline => self.reload_pipeline(sb), CpuAction::FlushPipeline => {},
} }
} }
@ -334,7 +334,7 @@ impl Core {
let result = self.exec_thumb(sb, decoded_thumb); let result = self.exec_thumb(sb, decoded_thumb);
match result { match result {
CpuAction::AdvancePC => self.advance_thumb(), CpuAction::AdvancePC => self.advance_thumb(),
CpuAction::FlushPipeline => self.reload_pipeline(sb), CpuAction::FlushPipeline => {},
} }
} }

View file

@ -56,13 +56,13 @@ impl Core {
// Set PC to vector address // Set PC to vector address
self.pc = e as u32; self.pc = e as u32;
self.reload_pipeline32(sb);
} }
pub fn irq(&mut self, sb: &mut SysBus) { pub fn irq(&mut self, sb: &mut SysBus) {
if !self.cpsr.irq_disabled() { if !self.cpsr.irq_disabled() {
let lr = self.get_next_pc() + 4; let lr = self.get_next_pc() + 4;
self.exception(sb, Exception::Irq, lr); self.exception(sb, Exception::Irq, lr);
self.reload_pipeline32(sb);
} }
} }

View file

@ -177,7 +177,8 @@ impl Core {
OpFormat5::ADD => { OpFormat5::ADD => {
self.set_reg(dst_reg, op1.wrapping_add(op2)); self.set_reg(dst_reg, op1.wrapping_add(op2));
if dst_reg == REG_PC { if dst_reg == REG_PC {
result = CpuAction::FlushPipeline result = CpuAction::FlushPipeline;
self.reload_pipeline16(sb);
} }
} }
OpFormat5::CMP => { OpFormat5::CMP => {
@ -190,6 +191,7 @@ impl Core {
self.set_reg(dst_reg, op2 as u32); self.set_reg(dst_reg, op2 as u32);
if dst_reg == REG_PC { if dst_reg == REG_PC {
result = CpuAction::FlushPipeline; result = CpuAction::FlushPipeline;
self.reload_pipeline16(sb);
} }
} }
} }
@ -421,6 +423,7 @@ impl Core {
pop(self, sb, REG_PC); pop(self, sb, REG_PC);
self.pc = self.pc & !1; self.pc = self.pc & !1;
result = CpuAction::FlushPipeline; result = CpuAction::FlushPipeline;
self.reload_pipeline16(sb);
} }
self.S_cycle16(sb, self.pc + 2); self.S_cycle16(sb, self.pc + 2);
} else { } else {
@ -508,6 +511,7 @@ impl Core {
let val = sb.read_32(addr); let val = sb.read_32(addr);
self.set_reg(REG_PC, val & !1); self.set_reg(REG_PC, val & !1);
result = CpuAction::FlushPipeline; result = CpuAction::FlushPipeline;
self.reload_pipeline16(sb);
} else { } else {
sb.write_32(addr, self.pc + 2); sb.write_32(addr, self.pc + 2);
} }
@ -531,6 +535,7 @@ impl Core {
let offset = insn.bcond_offset(); let offset = insn.bcond_offset();
self.S_cycle16(sb, self.pc); self.S_cycle16(sb, self.pc);
self.pc = (self.pc as i32).wrapping_add(offset) as u32; self.pc = (self.pc as i32).wrapping_add(offset) as u32;
self.reload_pipeline16(sb);
CpuAction::FlushPipeline CpuAction::FlushPipeline
} }
} }
@ -539,7 +544,6 @@ impl Core {
fn exec_thumb_swi(&mut self, sb: &mut SysBus, _insn: ThumbInstruction) -> CpuAction { fn exec_thumb_swi(&mut self, sb: &mut SysBus, _insn: ThumbInstruction) -> CpuAction {
self.N_cycle16(sb, self.pc); self.N_cycle16(sb, self.pc);
self.exception(sb, Exception::SoftwareInterrupt, self.pc - 2); self.exception(sb, Exception::SoftwareInterrupt, self.pc - 2);
CpuAction::FlushPipeline CpuAction::FlushPipeline
} }
@ -548,7 +552,7 @@ impl Core {
let offset = ((insn.offset11() << 21) >> 20) as i32; let offset = ((insn.offset11() << 21) >> 20) as i32;
self.pc = (self.pc as i32).wrapping_add(offset) as u32; self.pc = (self.pc as i32).wrapping_add(offset) as u32;
self.S_cycle16(sb, self.pc); self.S_cycle16(sb, self.pc);
self.reload_pipeline16(sb);
CpuAction::FlushPipeline CpuAction::FlushPipeline
} }
@ -565,7 +569,7 @@ impl Core {
let next_pc = (self.pc - 2) | 1; let next_pc = (self.pc - 2) | 1;
self.pc = ((self.gpr[REG_LR] & !1) as i32).wrapping_add(off) as u32; self.pc = ((self.gpr[REG_LR] & !1) as i32).wrapping_add(off) as u32;
self.gpr[REG_LR] = next_pc; self.gpr[REG_LR] = next_pc;
self.reload_pipeline16(sb);
CpuAction::FlushPipeline CpuAction::FlushPipeline
} else { } else {
off = (off << 21) >> 9; off = (off << 21) >> 9;