optimize: CPU Pipeline optimization part 2

Optimize redundent pipeline stages
About 5% performance gain.

Also rustfmt..


Former-commit-id: 2f5fc95798e97eb963fea976866bbeaf637084b0
This commit is contained in:
Michel Heily 2020-02-11 01:52:18 +02:00
parent 1f79205f51
commit 6beec306c2
7 changed files with 98 additions and 164 deletions

View file

@ -246,7 +246,13 @@ impl Core {
} }
} }
pub fn shift_by_register(&mut self, bs_op: BarrelShiftOpCode, reg: usize, rs: usize, carry: bool) -> u32 { pub fn shift_by_register(
&mut self,
bs_op: BarrelShiftOpCode,
reg: usize,
rs: usize,
carry: bool,
) -> u32 {
let mut val = self.get_reg(reg); let mut val = self.get_reg(reg);
self.add_cycle(); // +1I self.add_cycle(); // +1I
if reg == REG_PC { if reg == REG_PC {
@ -260,7 +266,8 @@ impl Core {
let carry = self.cpsr.C(); let carry = self.cpsr.C();
match shift.shift_by { match shift.shift_by {
ShiftRegisterBy::ByAmount(amount) => { ShiftRegisterBy::ByAmount(amount) => {
let result = self.barrel_shift_op(shift.bs_op, self.get_reg(shift.reg), amount, carry, true); let result =
self.barrel_shift_op(shift.bs_op, self.get_reg(shift.reg), amount, carry, true);
result result
} }
ShiftRegisterBy::ByRegister(rs) => { ShiftRegisterBy::ByRegister(rs) => {

View file

@ -3,7 +3,7 @@ use crate::bit::BitIndex;
use super::super::alu::*; use super::super::alu::*;
use crate::core::arm7tdmi::psr::RegPSR; use crate::core::arm7tdmi::psr::RegPSR;
use crate::core::arm7tdmi::CpuAction; use crate::core::arm7tdmi::CpuAction;
use crate::core::arm7tdmi::{Core, Addr, CpuMode, CpuState, REG_LR, REG_PC}; use crate::core::arm7tdmi::{Addr, Core, CpuMode, CpuState, REG_LR, REG_PC};
use crate::core::sysbus::SysBus; use crate::core::sysbus::SysBus;
use crate::core::Bus; use crate::core::Bus;
@ -92,12 +92,7 @@ impl Core {
self.write_status_register(sb, insn.spsr_flag(), self.get_reg(insn.rm())) self.write_status_register(sb, insn.spsr_flag(), self.get_reg(insn.rm()))
} }
fn write_status_register( fn write_status_register(&mut self, sb: &mut SysBus, is_spsr: bool, value: u32) -> CpuAction {
&mut self,
sb: &mut SysBus,
is_spsr: bool,
value: u32,
) -> CpuAction {
let new_status_reg = RegPSR::new(value); let new_status_reg = RegPSR::new(value);
match self.cpsr.mode() { match self.cpsr.mode() {
CpuMode::User => { CpuMode::User => {
@ -146,9 +141,7 @@ impl Core {
BarrelShifterValue::RotatedImmediate(val, amount) => { BarrelShifterValue::RotatedImmediate(val, amount) => {
self.ror(val, amount, self.cpsr.C(), false, true) self.ror(val, amount, self.cpsr.C(), false, true)
} }
BarrelShifterValue::ShiftedRegister(x) => { BarrelShifterValue::ShiftedRegister(x) => self.register_shift(x),
self.register_shift(x)
}
_ => unreachable!(), _ => unreachable!(),
} }
} }
@ -616,7 +609,9 @@ impl Core {
(insn.rd_hi(), insn.rd_lo(), insn.rn(), insn.rs(), insn.rm()); (insn.rd_hi(), insn.rd_lo(), insn.rn(), insn.rs(), insn.rm());
// check validity // check validity
assert!(!(REG_PC == rd_hi || REG_PC == rd_lo || REG_PC == rn || REG_PC == rs || REG_PC == rm)); assert!(
!(REG_PC == rd_hi || REG_PC == rd_lo || REG_PC == rn || REG_PC == rs || REG_PC == rm)
);
assert!(!(rd_hi != rd_hi && rd_hi != rm && rd_lo != rm)); assert!(!(rd_hi != rd_hi && rd_hi != rm && rd_lo != rm));
let op1 = self.get_reg(rm); let op1 = self.get_reg(rm);

View file

@ -1,35 +1,20 @@
#[cfg(feature = "debugger")] #[cfg(feature = "debugger")]
use std::fmt;
#[cfg(feature = "debugger")]
use ansi_term::{Colour, Style};
#[cfg(feature = "debugger")]
use super::reg_string; use super::reg_string;
#[cfg(feature = "debugger")]
use ansi_term::{Colour, Style};
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
#[cfg(feature = "debugger")]
use std::fmt;
use super::CpuAction;
pub use super::exception::Exception; pub use super::exception::Exception;
use super::CpuAction;
use super::{ use super::{
arm::*, psr::RegPSR, thumb::ThumbInstruction, Addr, CpuMode, CpuResult, CpuState, arm::*, psr::RegPSR, thumb::ThumbInstruction, Addr, CpuMode, CpuResult, CpuState,
DecodedInstruction, InstructionDecoder, DecodedInstruction, InstructionDecoder,
}; };
use crate::core::bus::Bus; use crate::core::bus::Bus;
use crate::core::sysbus::{ use crate::core::sysbus::{MemoryAccessType::*, MemoryAccessWidth::*, SysBus};
MemoryAccessType::*, MemoryAccessWidth::*, SysBus,
};
#[derive(Serialize, Deserialize, Clone, Copy, Debug, PartialEq)]
pub enum PipelineState {
Refill1,
Refill2,
Execute,
}
impl Default for PipelineState {
fn default() -> PipelineState {
PipelineState::Refill1
}
}
#[derive(Serialize, Deserialize, Clone, Debug, Default)] #[derive(Serialize, Deserialize, Clone, Debug, Default)]
pub struct Core { pub struct Core {
@ -48,7 +33,6 @@ pub struct Core {
pub(super) bs_carry_out: bool, pub(super) bs_carry_out: bool,
pub pipeline_state: PipelineState,
pipeline: [u32; 2], pipeline: [u32; 2],
pub last_executed: Option<DecodedInstruction>, pub last_executed: Option<DecodedInstruction>,
@ -309,91 +293,77 @@ impl Core {
} }
} }
pub fn did_pipeline_flush(&self) -> bool { // fn handle_exec_result(&mut self, sb: &mut SysBus, exec_result: CpuAction) {
self.pipeline_state != PipelineState::Execute // match self.cpsr.state() {
} // CpuState::ARM => {
// match exec_result {
fn handle_exec_result(&mut self, sb: &mut SysBus, exec_result: CpuAction) { // CpuAction::AdvancePC => self.advance_arm(),
match self.cpsr.state() { // CpuAction::FlushPipeline => self.reload_pipeline32(sb),
CpuState::ARM => { // }
match exec_result { // }
CpuAction::AdvancePC => self.advance_arm(), // CpuState::THUMB => {
CpuAction::FlushPipeline => self.flush_pipeline32(sb), // match exec_result {
} // CpuAction::AdvancePC => self.advance_thumb(),
} // CpuAction::FlushPipeline => self.reload_pipeline16(sb),
CpuState::THUMB => { // }
match exec_result { // }
CpuAction::AdvancePC => self.advance_thumb(), // }
CpuAction::FlushPipeline => self.flush_pipeline16(sb), // }
}
}
}
}
fn step_arm_exec(&mut self, insn: u32, sb: &mut SysBus) { fn step_arm_exec(&mut self, insn: u32, sb: &mut SysBus) {
let pc = self.pc; let decoded_arm = ArmInstruction::decode(insn, self.pc.wrapping_sub(8)).unwrap();
match self.pipeline_state { #[cfg(feature = "debugger")]
PipelineState::Refill1 => { {
self.pc = pc.wrapping_add(4); self.gpr_previous = self.get_registers();
self.pipeline_state = PipelineState::Refill2; }
self.last_executed = None; self.last_executed = Some(DecodedInstruction::Arm(decoded_arm));
} let result = self.exec_arm(sb, decoded_arm);
PipelineState::Refill2 => { match result {
self.pc = pc.wrapping_add(4); CpuAction::AdvancePC => self.advance_arm(),
self.pipeline_state = PipelineState::Execute; CpuAction::FlushPipeline => self.reload_pipeline(sb),
self.last_executed = None;
}
PipelineState::Execute => {
let decoded_arm = ArmInstruction::decode(insn, self.pc.wrapping_sub(8)).unwrap();
#[cfg(feature = "debugger")]
{
self.gpr_previous = self.get_registers();
}
self.last_executed = Some(DecodedInstruction::Arm(decoded_arm));
let result = self.exec_arm(sb, decoded_arm);
self.handle_exec_result(sb, result);
}
} }
} }
fn step_thumb_exec(&mut self, insn: u16, sb: &mut SysBus) { fn step_thumb_exec(&mut self, insn: u16, sb: &mut SysBus) {
let pc = self.pc; let decoded_thumb = ThumbInstruction::decode(insn, self.pc.wrapping_sub(4)).unwrap();
match self.pipeline_state { #[cfg(feature = "debugger")]
PipelineState::Refill1 => { {
self.pc = pc.wrapping_add(2); self.gpr_previous = self.get_registers();
self.pipeline_state = PipelineState::Refill2; }
self.last_executed = None; self.last_executed = Some(DecodedInstruction::Thumb(decoded_thumb));
} let result = self.exec_thumb(sb, decoded_thumb);
PipelineState::Refill2 => { match result {
self.pc = pc.wrapping_add(2); CpuAction::AdvancePC => self.advance_thumb(),
self.pipeline_state = PipelineState::Execute; CpuAction::FlushPipeline => self.reload_pipeline(sb),
self.last_executed = None;
}
PipelineState::Execute => {
let decoded_thumb = ThumbInstruction::decode(insn, self.pc.wrapping_sub(4)).unwrap();
#[cfg(feature = "debugger")]
{
self.gpr_previous = self.get_registers();
}
self.last_executed = Some(DecodedInstruction::Thumb(decoded_thumb));
let result = self.exec_thumb(sb, decoded_thumb);
self.handle_exec_result(sb, result);
}
} }
} }
#[inline] #[inline]
pub(super) fn flush_pipeline16(&mut self, sb: &mut SysBus) { pub(super) fn reload_pipeline16(&mut self, sb: &mut SysBus) {
self.pipeline_state = PipelineState::Refill1; self.pipeline[0] = sb.read_16(self.pc) as u32;
self.N_cycle16(sb, self.pc); self.N_cycle16(sb, self.pc);
self.S_cycle16(sb, self.pc + 2); self.advance_thumb();
self.pipeline[1] = sb.read_16(self.pc) as u32;
self.S_cycle16(sb, self.pc);
self.advance_thumb();
} }
#[inline] #[inline]
pub(super) fn flush_pipeline32(&mut self, sb: &mut SysBus) { pub(super) fn reload_pipeline32(&mut self, sb: &mut SysBus) {
self.pipeline_state = PipelineState::Refill1; self.pipeline[0] = sb.read_32(self.pc);
self.N_cycle32(sb, self.pc); self.N_cycle16(sb, self.pc);
self.S_cycle32(sb, self.pc + 4); self.advance_arm();
self.pipeline[1] = sb.read_32(self.pc);
self.S_cycle16(sb, self.pc);
self.advance_arm();
}
#[inline]
pub(super) fn reload_pipeline(&mut self, sb: &mut SysBus) {
match self.cpsr.state() {
CpuState::THUMB => self.reload_pipeline16(sb),
CpuState::ARM => self.reload_pipeline32(sb),
}
} }
#[inline] #[inline]
@ -406,23 +376,6 @@ impl Core {
self.pc = self.pc.wrapping_add(4) self.pc = self.pc.wrapping_add(4)
} }
// fn trace_opcode(&self, insn: u32) {
// if self.trace_opcodes && self.pipeline_state == PipelineState::Execute {
// println!("[{:08X}] PC=0x{:08x} | ", insn, self.pc);
// for r in 0..15 {
// println!("R{}=0x{:08x} ", r, self.gpr[r]);
// }
// println!(
// " N={} Z={} C={} V={} T={}\n",
// self.cpsr.N() as u8,
// self.cpsr.Z() as u8,
// self.cpsr.C() as u8,
// self.cpsr.V() as u8,
// self.cpsr.state() as u8,
// );
// }
// }
/// Perform a pipeline step /// Perform a pipeline step
/// If an instruction was executed in this step, return it. /// If an instruction was executed in this step, return it.
pub fn step(&mut self, bus: &mut SysBus) { pub fn step(&mut self, bus: &mut SysBus) {
@ -449,11 +402,7 @@ impl Core {
/// Get's the address of the next instruction that is going to be executed /// Get's the address of the next instruction that is going to be executed
pub fn get_next_pc(&self) -> Addr { pub fn get_next_pc(&self) -> Addr {
let insn_size = self.word_size() as u32; let insn_size = self.word_size() as u32;
match self.pipeline_state { self.pc - 2 * insn_size
PipelineState::Refill1 => self.pc,
PipelineState::Refill2 => self.pc - insn_size,
PipelineState::Execute => self.pc - 2 * insn_size,
}
} }
pub fn get_cpu_state(&self) -> CpuState { pub fn get_cpu_state(&self) -> CpuState {

View file

@ -1,5 +1,5 @@
use super::super::sysbus::SysBus; use super::super::sysbus::SysBus;
use super::cpu::{Core, PipelineState}; use super::cpu::Core;
use super::{CpuMode, CpuState}; use super::{CpuMode, CpuState};
use colored::*; use colored::*;
@ -59,13 +59,10 @@ impl Core {
} }
pub fn irq(&mut self, sb: &mut SysBus) { pub fn irq(&mut self, sb: &mut SysBus) {
if self.pipeline_state != PipelineState::Execute {
panic!("IRQ when pipeline refilling! {:?}", self.pipeline_state);
}
if !self.cpsr.irq_disabled() { if !self.cpsr.irq_disabled() {
let lr = self.get_next_pc() + 4; let lr = self.get_next_pc() + 4;
self.exception(sb, Exception::Irq, lr); self.exception(sb, Exception::Irq, lr);
self.flush_pipeline32(sb); self.reload_pipeline32(sb);
} }
} }

View file

@ -25,7 +25,7 @@ pub(self) use crate::core::Addr;
pub enum CpuAction { pub enum CpuAction {
AdvancePC, AdvancePC,
FlushPipeline FlushPipeline,
} }
#[derive(Serialize, Deserialize, Debug, PartialEq, Copy, Clone)] #[derive(Serialize, Deserialize, Debug, PartialEq, Copy, Clone)]

View file

@ -22,13 +22,18 @@ impl Core {
&mut self, &mut self,
sb: &mut SysBus, sb: &mut SysBus,
insn: ThumbInstruction, insn: ThumbInstruction,
) -> CpuAction { ) -> CpuAction {
let rd = (insn.raw & 0b111) as usize; let rd = (insn.raw & 0b111) as usize;
let rs = insn.raw.bit_range(3..6) as usize; let rs = insn.raw.bit_range(3..6) as usize;
let shift_amount = insn.offset5() as u8 as u32; let shift_amount = insn.offset5() as u8 as u32;
let op2 = self.barrel_shift_op(insn.format1_op(), self.gpr[rs], shift_amount, self.cpsr.C(), true); let op2 = self.barrel_shift_op(
insn.format1_op(),
self.gpr[rs],
shift_amount,
self.cpsr.C(),
true,
);
self.gpr[rd] = op2; self.gpr[rd] = op2;
self.alu_update_flags(op2, false, self.bs_carry_out, self.cpsr.V()); self.alu_update_flags(op2, false, self.bs_carry_out, self.cpsr.V());
@ -148,11 +153,7 @@ impl Core {
} }
/// Format 5 /// Format 5
fn exec_thumb_hi_reg_op_or_bx( fn exec_thumb_hi_reg_op_or_bx(&mut self, sb: &mut SysBus, insn: ThumbInstruction) -> CpuAction {
&mut self,
sb: &mut SysBus,
insn: ThumbInstruction,
) -> CpuAction {
let op = insn.format5_op(); let op = insn.format5_op();
let rd = (insn.raw & 0b111) as usize; let rd = (insn.raw & 0b111) as usize;
let dst_reg = if insn.flag(ThumbInstruction::FLAG_H1) { let dst_reg = if insn.flag(ThumbInstruction::FLAG_H1) {
@ -172,7 +173,7 @@ impl Core {
match op { match op {
OpFormat5::BX => { OpFormat5::BX => {
return self.branch_exchange(sb, self.get_reg(src_reg)); return self.branch_exchange(sb, self.get_reg(src_reg));
}, }
OpFormat5::ADD => { OpFormat5::ADD => {
self.set_reg(dst_reg, op1.wrapping_add(op2)); self.set_reg(dst_reg, op1.wrapping_add(op2));
if dst_reg == REG_PC { if dst_reg == REG_PC {
@ -370,11 +371,7 @@ impl Core {
} }
/// Format 12 /// Format 12
fn exec_thumb_load_address( fn exec_thumb_load_address(&mut self, sb: &mut SysBus, insn: ThumbInstruction) -> CpuAction {
&mut self,
sb: &mut SysBus,
insn: ThumbInstruction,
) -> CpuAction {
let rd = insn.raw.bit_range(8..11) as usize; let rd = insn.raw.bit_range(8..11) as usize;
let result = if insn.flag(ThumbInstruction::FLAG_SP) { let result = if insn.flag(ThumbInstruction::FLAG_SP) {
self.gpr[REG_SP] + (insn.word8() as Addr) self.gpr[REG_SP] + (insn.word8() as Addr)
@ -539,11 +536,7 @@ impl Core {
} }
/// Format 17 /// Format 17
fn exec_thumb_swi( fn exec_thumb_swi(&mut self, sb: &mut SysBus, _insn: ThumbInstruction) -> CpuAction {
&mut self,
sb: &mut SysBus,
_insn: ThumbInstruction,
) -> CpuAction {
self.N_cycle16(sb, self.pc); self.N_cycle16(sb, self.pc);
self.exception(sb, Exception::SoftwareInterrupt, self.pc - 2); self.exception(sb, Exception::SoftwareInterrupt, self.pc - 2);

View file

@ -1,3 +1,4 @@
/// Struct containing everything /// Struct containing everything
use std::cell::RefCell; use std::cell::RefCell;
use std::rc::Rc; use std::rc::Rc;
@ -121,10 +122,7 @@ impl GameBoyAdvance {
} }
fn step_cpu(&mut self, io: &mut IoDevices) -> usize { fn step_cpu(&mut self, io: &mut IoDevices) -> usize {
if io.intc.irq_pending() if io.intc.irq_pending() && self.cpu.last_executed.is_some() {
&& self.cpu.last_executed.is_some()
&& !self.cpu.did_pipeline_flush()
{
self.cpu.irq(&mut self.sysbus); self.cpu.irq(&mut self.sysbus);
io.haltcnt = HaltState::Running; io.haltcnt = HaltState::Running;
} }
@ -215,13 +213,8 @@ mod tests {
.build() .build()
.unwrap(); .unwrap();
let dummy = Rc::new(RefCell::new(DummyInterface::new())); let dummy = Rc::new(RefCell::new(DummyInterface::new()));
let mut gba = GameBoyAdvance::new( let mut gba =
bios, GameBoyAdvance::new(bios, cartridge, dummy.clone(), dummy.clone(), dummy.clone());
cartridge,
dummy.clone(),
dummy.clone(),
dummy.clone(),
);
gba.skip_bios(); gba.skip_bios();
gba gba