ptimize: CPU Pipeline optimization part 3

Move reload_pipeline calls inside instructions. This commit yeilds yet another 5% performance improvment. The next step is to move `advance_pc` into the instructions themselves and save the `match result` per executed instruction Former-commit-id: 42193ffc48fda9943665e6a74e873186627a0b4a
2020-02-11 02:24:24 +02:00 · 2020-02-11 02:24:24 +02:00 · 32a20d2cbb
parent 6beec306c2
commit 32a20d2cbb
4 changed files with 25 additions and 9 deletions
--- a/src/core/arm7tdmi/arm/exec.rs
+++ b/src/core/arm7tdmi/arm/exec.rs
@ -45,6 +45,7 @@ impl Core {

        self.pc = (self.pc as i32).wrapping_add(insn.branch_offset()) as u32 & !1;

+        self.reload_pipeline32(sb);
        CpuAction::FlushPipeline
    }

@ -56,12 +57,14 @@ impl Core {
        if addr.bit(0) {
            addr = addr & !0x1;
            self.cpsr.set_state(CpuState::THUMB);
+            self.pc = addr;
+            self.reload_pipeline16(sb);
        } else {
            addr = addr & !0x3;
            self.cpsr.set_state(CpuState::ARM);
-        }
-
            self.pc = addr;
+            self.reload_pipeline32(sb);
+        }

        CpuAction::FlushPipeline
    }
@ -250,6 +253,11 @@ impl Core {
        if let Some(alu_res) = alu_res {
            self.set_reg(reg_rd, alu_res as u32);
            if reg_rd == REG_PC {
+                // T bit might have changed
+                match self.cpsr.state() {
+                    CpuState::ARM => self.reload_pipeline32(sb),
+                    CpuState::THUMB => self.reload_pipeline16(sb),
+                };
                result = CpuAction::FlushPipeline;
            }
        }
@ -307,6 +315,7 @@ impl Core {
            self.add_cycle();

            if dest_reg == REG_PC {
+                self.reload_pipeline32(sb);
                result = CpuAction::FlushPipeline;
            }
        } else {
@ -391,6 +400,7 @@ impl Core {
            self.add_cycle();

            if dest_reg == REG_PC {
+                self.reload_pipeline32(sb);
                result = CpuAction::FlushPipeline;
            }
        } else {
@ -497,6 +507,7 @@ impl Core {
                            if psr_transfer {
                                self.transfer_spsr_mode();
                            }
+                            self.reload_pipeline32(sb);
                            result = CpuAction::FlushPipeline;
                        }

@ -551,6 +562,7 @@ impl Core {
            if is_load {
                let val = self.ldr_word(addr, sb);
                self.set_reg(REG_PC, val & !3);
+                self.reload_pipeline32(sb);
                result = CpuAction::FlushPipeline;
            } else {
                self.write_32(addr, self.pc + 4, sb);
--- a/src/core/arm7tdmi/cpu.rs
+++ b/src/core/arm7tdmi/cpu.rs
@ -320,7 +320,7 @@ impl Core {
        let result = self.exec_arm(sb, decoded_arm);
        match result {
            CpuAction::AdvancePC => self.advance_arm(),
-            CpuAction::FlushPipeline => self.reload_pipeline(sb),
+            CpuAction::FlushPipeline => {},
        }
    }

@ -334,7 +334,7 @@ impl Core {
        let result = self.exec_thumb(sb, decoded_thumb);
        match result {
            CpuAction::AdvancePC => self.advance_thumb(),
-            CpuAction::FlushPipeline => self.reload_pipeline(sb),
+            CpuAction::FlushPipeline => {},
        }
    }

--- a/src/core/arm7tdmi/exception.rs
+++ b/src/core/arm7tdmi/exception.rs
@ -56,13 +56,13 @@ impl Core {

        // Set PC to vector address
        self.pc = e as u32;
+        self.reload_pipeline32(sb);
    }

    pub fn irq(&mut self, sb: &mut SysBus) {
        if !self.cpsr.irq_disabled() {
            let lr = self.get_next_pc() + 4;
            self.exception(sb, Exception::Irq, lr);
-            self.reload_pipeline32(sb);
        }
    }

--- a/src/core/arm7tdmi/thumb/exec.rs
+++ b/src/core/arm7tdmi/thumb/exec.rs
@ -177,7 +177,8 @@ impl Core {
            OpFormat5::ADD => {
                self.set_reg(dst_reg, op1.wrapping_add(op2));
                if dst_reg == REG_PC {
-                    result = CpuAction::FlushPipeline
+                    result = CpuAction::FlushPipeline;
+                    self.reload_pipeline16(sb);
                }
            }
            OpFormat5::CMP => {
@ -190,6 +191,7 @@ impl Core {
                self.set_reg(dst_reg, op2 as u32);
                if dst_reg == REG_PC {
                    result = CpuAction::FlushPipeline;
+                    self.reload_pipeline16(sb);
                }
            }
        }
@ -421,6 +423,7 @@ impl Core {
                pop(self, sb, REG_PC);
                self.pc = self.pc & !1;
                result = CpuAction::FlushPipeline;
+                self.reload_pipeline16(sb);
            }
            self.S_cycle16(sb, self.pc + 2);
        } else {
@ -508,6 +511,7 @@ impl Core {
                let val = sb.read_32(addr);
                self.set_reg(REG_PC, val & !1);
                result = CpuAction::FlushPipeline;
+                self.reload_pipeline16(sb);
            } else {
                sb.write_32(addr, self.pc + 2);
            }
@ -531,6 +535,7 @@ impl Core {
            let offset = insn.bcond_offset();
            self.S_cycle16(sb, self.pc);
            self.pc = (self.pc as i32).wrapping_add(offset) as u32;
+            self.reload_pipeline16(sb);
            CpuAction::FlushPipeline
        }
    }
@ -539,7 +544,6 @@ impl Core {
    fn exec_thumb_swi(&mut self, sb: &mut SysBus, _insn: ThumbInstruction) -> CpuAction {
        self.N_cycle16(sb, self.pc);
        self.exception(sb, Exception::SoftwareInterrupt, self.pc - 2);
-
        CpuAction::FlushPipeline
    }

@ -548,7 +552,7 @@ impl Core {
        let offset = ((insn.offset11() << 21) >> 20) as i32;
        self.pc = (self.pc as i32).wrapping_add(offset) as u32;
        self.S_cycle16(sb, self.pc);
-
+        self.reload_pipeline16(sb);
        CpuAction::FlushPipeline
    }

@ -565,7 +569,7 @@ impl Core {
            let next_pc = (self.pc - 2) | 1;
            self.pc = ((self.gpr[REG_LR] & !1) as i32).wrapping_add(off) as u32;
            self.gpr[REG_LR] = next_pc;
-
+            self.reload_pipeline16(sb);
            CpuAction::FlushPipeline
        } else {
            off = (off << 21) >> 9;