ptimize: CPU Pipeline optimization part 3
Move reload_pipeline calls inside instructions. This commit yeilds yet another 5% performance improvment. The next step is to move `advance_pc` into the instructions themselves and save the `match result` per executed instruction Former-commit-id: 42193ffc48fda9943665e6a74e873186627a0b4a
This commit is contained in:
parent
6beec306c2
commit
32a20d2cbb
|
@ -45,6 +45,7 @@ impl Core {
|
||||||
|
|
||||||
self.pc = (self.pc as i32).wrapping_add(insn.branch_offset()) as u32 & !1;
|
self.pc = (self.pc as i32).wrapping_add(insn.branch_offset()) as u32 & !1;
|
||||||
|
|
||||||
|
self.reload_pipeline32(sb);
|
||||||
CpuAction::FlushPipeline
|
CpuAction::FlushPipeline
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -56,12 +57,14 @@ impl Core {
|
||||||
if addr.bit(0) {
|
if addr.bit(0) {
|
||||||
addr = addr & !0x1;
|
addr = addr & !0x1;
|
||||||
self.cpsr.set_state(CpuState::THUMB);
|
self.cpsr.set_state(CpuState::THUMB);
|
||||||
|
self.pc = addr;
|
||||||
|
self.reload_pipeline16(sb);
|
||||||
} else {
|
} else {
|
||||||
addr = addr & !0x3;
|
addr = addr & !0x3;
|
||||||
self.cpsr.set_state(CpuState::ARM);
|
self.cpsr.set_state(CpuState::ARM);
|
||||||
}
|
|
||||||
|
|
||||||
self.pc = addr;
|
self.pc = addr;
|
||||||
|
self.reload_pipeline32(sb);
|
||||||
|
}
|
||||||
|
|
||||||
CpuAction::FlushPipeline
|
CpuAction::FlushPipeline
|
||||||
}
|
}
|
||||||
|
@ -250,6 +253,11 @@ impl Core {
|
||||||
if let Some(alu_res) = alu_res {
|
if let Some(alu_res) = alu_res {
|
||||||
self.set_reg(reg_rd, alu_res as u32);
|
self.set_reg(reg_rd, alu_res as u32);
|
||||||
if reg_rd == REG_PC {
|
if reg_rd == REG_PC {
|
||||||
|
// T bit might have changed
|
||||||
|
match self.cpsr.state() {
|
||||||
|
CpuState::ARM => self.reload_pipeline32(sb),
|
||||||
|
CpuState::THUMB => self.reload_pipeline16(sb),
|
||||||
|
};
|
||||||
result = CpuAction::FlushPipeline;
|
result = CpuAction::FlushPipeline;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -307,6 +315,7 @@ impl Core {
|
||||||
self.add_cycle();
|
self.add_cycle();
|
||||||
|
|
||||||
if dest_reg == REG_PC {
|
if dest_reg == REG_PC {
|
||||||
|
self.reload_pipeline32(sb);
|
||||||
result = CpuAction::FlushPipeline;
|
result = CpuAction::FlushPipeline;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
@ -391,6 +400,7 @@ impl Core {
|
||||||
self.add_cycle();
|
self.add_cycle();
|
||||||
|
|
||||||
if dest_reg == REG_PC {
|
if dest_reg == REG_PC {
|
||||||
|
self.reload_pipeline32(sb);
|
||||||
result = CpuAction::FlushPipeline;
|
result = CpuAction::FlushPipeline;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
@ -497,6 +507,7 @@ impl Core {
|
||||||
if psr_transfer {
|
if psr_transfer {
|
||||||
self.transfer_spsr_mode();
|
self.transfer_spsr_mode();
|
||||||
}
|
}
|
||||||
|
self.reload_pipeline32(sb);
|
||||||
result = CpuAction::FlushPipeline;
|
result = CpuAction::FlushPipeline;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -551,6 +562,7 @@ impl Core {
|
||||||
if is_load {
|
if is_load {
|
||||||
let val = self.ldr_word(addr, sb);
|
let val = self.ldr_word(addr, sb);
|
||||||
self.set_reg(REG_PC, val & !3);
|
self.set_reg(REG_PC, val & !3);
|
||||||
|
self.reload_pipeline32(sb);
|
||||||
result = CpuAction::FlushPipeline;
|
result = CpuAction::FlushPipeline;
|
||||||
} else {
|
} else {
|
||||||
self.write_32(addr, self.pc + 4, sb);
|
self.write_32(addr, self.pc + 4, sb);
|
||||||
|
|
|
@ -320,7 +320,7 @@ impl Core {
|
||||||
let result = self.exec_arm(sb, decoded_arm);
|
let result = self.exec_arm(sb, decoded_arm);
|
||||||
match result {
|
match result {
|
||||||
CpuAction::AdvancePC => self.advance_arm(),
|
CpuAction::AdvancePC => self.advance_arm(),
|
||||||
CpuAction::FlushPipeline => self.reload_pipeline(sb),
|
CpuAction::FlushPipeline => {},
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -334,7 +334,7 @@ impl Core {
|
||||||
let result = self.exec_thumb(sb, decoded_thumb);
|
let result = self.exec_thumb(sb, decoded_thumb);
|
||||||
match result {
|
match result {
|
||||||
CpuAction::AdvancePC => self.advance_thumb(),
|
CpuAction::AdvancePC => self.advance_thumb(),
|
||||||
CpuAction::FlushPipeline => self.reload_pipeline(sb),
|
CpuAction::FlushPipeline => {},
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -56,13 +56,13 @@ impl Core {
|
||||||
|
|
||||||
// Set PC to vector address
|
// Set PC to vector address
|
||||||
self.pc = e as u32;
|
self.pc = e as u32;
|
||||||
|
self.reload_pipeline32(sb);
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn irq(&mut self, sb: &mut SysBus) {
|
pub fn irq(&mut self, sb: &mut SysBus) {
|
||||||
if !self.cpsr.irq_disabled() {
|
if !self.cpsr.irq_disabled() {
|
||||||
let lr = self.get_next_pc() + 4;
|
let lr = self.get_next_pc() + 4;
|
||||||
self.exception(sb, Exception::Irq, lr);
|
self.exception(sb, Exception::Irq, lr);
|
||||||
self.reload_pipeline32(sb);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -177,7 +177,8 @@ impl Core {
|
||||||
OpFormat5::ADD => {
|
OpFormat5::ADD => {
|
||||||
self.set_reg(dst_reg, op1.wrapping_add(op2));
|
self.set_reg(dst_reg, op1.wrapping_add(op2));
|
||||||
if dst_reg == REG_PC {
|
if dst_reg == REG_PC {
|
||||||
result = CpuAction::FlushPipeline
|
result = CpuAction::FlushPipeline;
|
||||||
|
self.reload_pipeline16(sb);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
OpFormat5::CMP => {
|
OpFormat5::CMP => {
|
||||||
|
@ -190,6 +191,7 @@ impl Core {
|
||||||
self.set_reg(dst_reg, op2 as u32);
|
self.set_reg(dst_reg, op2 as u32);
|
||||||
if dst_reg == REG_PC {
|
if dst_reg == REG_PC {
|
||||||
result = CpuAction::FlushPipeline;
|
result = CpuAction::FlushPipeline;
|
||||||
|
self.reload_pipeline16(sb);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -421,6 +423,7 @@ impl Core {
|
||||||
pop(self, sb, REG_PC);
|
pop(self, sb, REG_PC);
|
||||||
self.pc = self.pc & !1;
|
self.pc = self.pc & !1;
|
||||||
result = CpuAction::FlushPipeline;
|
result = CpuAction::FlushPipeline;
|
||||||
|
self.reload_pipeline16(sb);
|
||||||
}
|
}
|
||||||
self.S_cycle16(sb, self.pc + 2);
|
self.S_cycle16(sb, self.pc + 2);
|
||||||
} else {
|
} else {
|
||||||
|
@ -508,6 +511,7 @@ impl Core {
|
||||||
let val = sb.read_32(addr);
|
let val = sb.read_32(addr);
|
||||||
self.set_reg(REG_PC, val & !1);
|
self.set_reg(REG_PC, val & !1);
|
||||||
result = CpuAction::FlushPipeline;
|
result = CpuAction::FlushPipeline;
|
||||||
|
self.reload_pipeline16(sb);
|
||||||
} else {
|
} else {
|
||||||
sb.write_32(addr, self.pc + 2);
|
sb.write_32(addr, self.pc + 2);
|
||||||
}
|
}
|
||||||
|
@ -531,6 +535,7 @@ impl Core {
|
||||||
let offset = insn.bcond_offset();
|
let offset = insn.bcond_offset();
|
||||||
self.S_cycle16(sb, self.pc);
|
self.S_cycle16(sb, self.pc);
|
||||||
self.pc = (self.pc as i32).wrapping_add(offset) as u32;
|
self.pc = (self.pc as i32).wrapping_add(offset) as u32;
|
||||||
|
self.reload_pipeline16(sb);
|
||||||
CpuAction::FlushPipeline
|
CpuAction::FlushPipeline
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -539,7 +544,6 @@ impl Core {
|
||||||
fn exec_thumb_swi(&mut self, sb: &mut SysBus, _insn: ThumbInstruction) -> CpuAction {
|
fn exec_thumb_swi(&mut self, sb: &mut SysBus, _insn: ThumbInstruction) -> CpuAction {
|
||||||
self.N_cycle16(sb, self.pc);
|
self.N_cycle16(sb, self.pc);
|
||||||
self.exception(sb, Exception::SoftwareInterrupt, self.pc - 2);
|
self.exception(sb, Exception::SoftwareInterrupt, self.pc - 2);
|
||||||
|
|
||||||
CpuAction::FlushPipeline
|
CpuAction::FlushPipeline
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -548,7 +552,7 @@ impl Core {
|
||||||
let offset = ((insn.offset11() << 21) >> 20) as i32;
|
let offset = ((insn.offset11() << 21) >> 20) as i32;
|
||||||
self.pc = (self.pc as i32).wrapping_add(offset) as u32;
|
self.pc = (self.pc as i32).wrapping_add(offset) as u32;
|
||||||
self.S_cycle16(sb, self.pc);
|
self.S_cycle16(sb, self.pc);
|
||||||
|
self.reload_pipeline16(sb);
|
||||||
CpuAction::FlushPipeline
|
CpuAction::FlushPipeline
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -565,7 +569,7 @@ impl Core {
|
||||||
let next_pc = (self.pc - 2) | 1;
|
let next_pc = (self.pc - 2) | 1;
|
||||||
self.pc = ((self.gpr[REG_LR] & !1) as i32).wrapping_add(off) as u32;
|
self.pc = ((self.gpr[REG_LR] & !1) as i32).wrapping_add(off) as u32;
|
||||||
self.gpr[REG_LR] = next_pc;
|
self.gpr[REG_LR] = next_pc;
|
||||||
|
self.reload_pipeline16(sb);
|
||||||
CpuAction::FlushPipeline
|
CpuAction::FlushPipeline
|
||||||
} else {
|
} else {
|
||||||
off = (off << 21) >> 9;
|
off = (off << 21) >> 9;
|
||||||
|
|
Reference in a new issue