optimize: CPU Pipeline optimization part 2

Optimize redundent pipeline stages
About 5% performance gain.

Also rustfmt..


Former-commit-id: 2f5fc95798e97eb963fea976866bbeaf637084b0
This commit is contained in:
Michel Heily 2020-02-11 01:52:18 +02:00
parent 1f79205f51
commit 6beec306c2
7 changed files with 98 additions and 164 deletions

View file

@ -246,7 +246,13 @@ impl Core {
}
}
pub fn shift_by_register(&mut self, bs_op: BarrelShiftOpCode, reg: usize, rs: usize, carry: bool) -> u32 {
pub fn shift_by_register(
&mut self,
bs_op: BarrelShiftOpCode,
reg: usize,
rs: usize,
carry: bool,
) -> u32 {
let mut val = self.get_reg(reg);
self.add_cycle(); // +1I
if reg == REG_PC {
@ -260,7 +266,8 @@ impl Core {
let carry = self.cpsr.C();
match shift.shift_by {
ShiftRegisterBy::ByAmount(amount) => {
let result = self.barrel_shift_op(shift.bs_op, self.get_reg(shift.reg), amount, carry, true);
let result =
self.barrel_shift_op(shift.bs_op, self.get_reg(shift.reg), amount, carry, true);
result
}
ShiftRegisterBy::ByRegister(rs) => {

View file

@ -3,7 +3,7 @@ use crate::bit::BitIndex;
use super::super::alu::*;
use crate::core::arm7tdmi::psr::RegPSR;
use crate::core::arm7tdmi::CpuAction;
use crate::core::arm7tdmi::{Core, Addr, CpuMode, CpuState, REG_LR, REG_PC};
use crate::core::arm7tdmi::{Addr, Core, CpuMode, CpuState, REG_LR, REG_PC};
use crate::core::sysbus::SysBus;
use crate::core::Bus;
@ -92,12 +92,7 @@ impl Core {
self.write_status_register(sb, insn.spsr_flag(), self.get_reg(insn.rm()))
}
fn write_status_register(
&mut self,
sb: &mut SysBus,
is_spsr: bool,
value: u32,
) -> CpuAction {
fn write_status_register(&mut self, sb: &mut SysBus, is_spsr: bool, value: u32) -> CpuAction {
let new_status_reg = RegPSR::new(value);
match self.cpsr.mode() {
CpuMode::User => {
@ -146,9 +141,7 @@ impl Core {
BarrelShifterValue::RotatedImmediate(val, amount) => {
self.ror(val, amount, self.cpsr.C(), false, true)
}
BarrelShifterValue::ShiftedRegister(x) => {
self.register_shift(x)
}
BarrelShifterValue::ShiftedRegister(x) => self.register_shift(x),
_ => unreachable!(),
}
}
@ -616,7 +609,9 @@ impl Core {
(insn.rd_hi(), insn.rd_lo(), insn.rn(), insn.rs(), insn.rm());
// check validity
assert!(!(REG_PC == rd_hi || REG_PC == rd_lo || REG_PC == rn || REG_PC == rs || REG_PC == rm));
assert!(
!(REG_PC == rd_hi || REG_PC == rd_lo || REG_PC == rn || REG_PC == rs || REG_PC == rm)
);
assert!(!(rd_hi != rd_hi && rd_hi != rm && rd_lo != rm));
let op1 = self.get_reg(rm);

View file

@ -1,35 +1,20 @@
#[cfg(feature = "debugger")]
use std::fmt;
#[cfg(feature = "debugger")]
use ansi_term::{Colour, Style};
#[cfg(feature = "debugger")]
use super::reg_string;
#[cfg(feature = "debugger")]
use ansi_term::{Colour, Style};
use serde::{Deserialize, Serialize};
#[cfg(feature = "debugger")]
use std::fmt;
use super::CpuAction;
pub use super::exception::Exception;
use super::CpuAction;
use super::{
arm::*, psr::RegPSR, thumb::ThumbInstruction, Addr, CpuMode, CpuResult, CpuState,
DecodedInstruction, InstructionDecoder,
};
use crate::core::bus::Bus;
use crate::core::sysbus::{
MemoryAccessType::*, MemoryAccessWidth::*, SysBus,
};
#[derive(Serialize, Deserialize, Clone, Copy, Debug, PartialEq)]
pub enum PipelineState {
Refill1,
Refill2,
Execute,
}
impl Default for PipelineState {
fn default() -> PipelineState {
PipelineState::Refill1
}
}
use crate::core::sysbus::{MemoryAccessType::*, MemoryAccessWidth::*, SysBus};
#[derive(Serialize, Deserialize, Clone, Debug, Default)]
pub struct Core {
@ -48,7 +33,6 @@ pub struct Core {
pub(super) bs_carry_out: bool,
pub pipeline_state: PipelineState,
pipeline: [u32; 2],
pub last_executed: Option<DecodedInstruction>,
@ -309,91 +293,77 @@ impl Core {
}
}
pub fn did_pipeline_flush(&self) -> bool {
self.pipeline_state != PipelineState::Execute
}
fn handle_exec_result(&mut self, sb: &mut SysBus, exec_result: CpuAction) {
match self.cpsr.state() {
CpuState::ARM => {
match exec_result {
CpuAction::AdvancePC => self.advance_arm(),
CpuAction::FlushPipeline => self.flush_pipeline32(sb),
}
}
CpuState::THUMB => {
match exec_result {
CpuAction::AdvancePC => self.advance_thumb(),
CpuAction::FlushPipeline => self.flush_pipeline16(sb),
}
}
}
}
// fn handle_exec_result(&mut self, sb: &mut SysBus, exec_result: CpuAction) {
// match self.cpsr.state() {
// CpuState::ARM => {
// match exec_result {
// CpuAction::AdvancePC => self.advance_arm(),
// CpuAction::FlushPipeline => self.reload_pipeline32(sb),
// }
// }
// CpuState::THUMB => {
// match exec_result {
// CpuAction::AdvancePC => self.advance_thumb(),
// CpuAction::FlushPipeline => self.reload_pipeline16(sb),
// }
// }
// }
// }
fn step_arm_exec(&mut self, insn: u32, sb: &mut SysBus) {
let pc = self.pc;
match self.pipeline_state {
PipelineState::Refill1 => {
self.pc = pc.wrapping_add(4);
self.pipeline_state = PipelineState::Refill2;
self.last_executed = None;
}
PipelineState::Refill2 => {
self.pc = pc.wrapping_add(4);
self.pipeline_state = PipelineState::Execute;
self.last_executed = None;
}
PipelineState::Execute => {
let decoded_arm = ArmInstruction::decode(insn, self.pc.wrapping_sub(8)).unwrap();
#[cfg(feature = "debugger")]
{
self.gpr_previous = self.get_registers();
}
self.last_executed = Some(DecodedInstruction::Arm(decoded_arm));
let result = self.exec_arm(sb, decoded_arm);
self.handle_exec_result(sb, result);
}
let decoded_arm = ArmInstruction::decode(insn, self.pc.wrapping_sub(8)).unwrap();
#[cfg(feature = "debugger")]
{
self.gpr_previous = self.get_registers();
}
self.last_executed = Some(DecodedInstruction::Arm(decoded_arm));
let result = self.exec_arm(sb, decoded_arm);
match result {
CpuAction::AdvancePC => self.advance_arm(),
CpuAction::FlushPipeline => self.reload_pipeline(sb),
}
}
fn step_thumb_exec(&mut self, insn: u16, sb: &mut SysBus) {
let pc = self.pc;
match self.pipeline_state {
PipelineState::Refill1 => {
self.pc = pc.wrapping_add(2);
self.pipeline_state = PipelineState::Refill2;
self.last_executed = None;
}
PipelineState::Refill2 => {
self.pc = pc.wrapping_add(2);
self.pipeline_state = PipelineState::Execute;
self.last_executed = None;
}
PipelineState::Execute => {
let decoded_thumb = ThumbInstruction::decode(insn, self.pc.wrapping_sub(4)).unwrap();
#[cfg(feature = "debugger")]
{
self.gpr_previous = self.get_registers();
}
self.last_executed = Some(DecodedInstruction::Thumb(decoded_thumb));
let result = self.exec_thumb(sb, decoded_thumb);
self.handle_exec_result(sb, result);
}
let decoded_thumb = ThumbInstruction::decode(insn, self.pc.wrapping_sub(4)).unwrap();
#[cfg(feature = "debugger")]
{
self.gpr_previous = self.get_registers();
}
self.last_executed = Some(DecodedInstruction::Thumb(decoded_thumb));
let result = self.exec_thumb(sb, decoded_thumb);
match result {
CpuAction::AdvancePC => self.advance_thumb(),
CpuAction::FlushPipeline => self.reload_pipeline(sb),
}
}
#[inline]
pub(super) fn flush_pipeline16(&mut self, sb: &mut SysBus) {
self.pipeline_state = PipelineState::Refill1;
pub(super) fn reload_pipeline16(&mut self, sb: &mut SysBus) {
self.pipeline[0] = sb.read_16(self.pc) as u32;
self.N_cycle16(sb, self.pc);
self.S_cycle16(sb, self.pc + 2);
self.advance_thumb();
self.pipeline[1] = sb.read_16(self.pc) as u32;
self.S_cycle16(sb, self.pc);
self.advance_thumb();
}
#[inline]
pub(super) fn flush_pipeline32(&mut self, sb: &mut SysBus) {
self.pipeline_state = PipelineState::Refill1;
self.N_cycle32(sb, self.pc);
self.S_cycle32(sb, self.pc + 4);
pub(super) fn reload_pipeline32(&mut self, sb: &mut SysBus) {
self.pipeline[0] = sb.read_32(self.pc);
self.N_cycle16(sb, self.pc);
self.advance_arm();
self.pipeline[1] = sb.read_32(self.pc);
self.S_cycle16(sb, self.pc);
self.advance_arm();
}
#[inline]
pub(super) fn reload_pipeline(&mut self, sb: &mut SysBus) {
match self.cpsr.state() {
CpuState::THUMB => self.reload_pipeline16(sb),
CpuState::ARM => self.reload_pipeline32(sb),
}
}
#[inline]
@ -406,23 +376,6 @@ impl Core {
self.pc = self.pc.wrapping_add(4)
}
// fn trace_opcode(&self, insn: u32) {
// if self.trace_opcodes && self.pipeline_state == PipelineState::Execute {
// println!("[{:08X}] PC=0x{:08x} | ", insn, self.pc);
// for r in 0..15 {
// println!("R{}=0x{:08x} ", r, self.gpr[r]);
// }
// println!(
// " N={} Z={} C={} V={} T={}\n",
// self.cpsr.N() as u8,
// self.cpsr.Z() as u8,
// self.cpsr.C() as u8,
// self.cpsr.V() as u8,
// self.cpsr.state() as u8,
// );
// }
// }
/// Perform a pipeline step
/// If an instruction was executed in this step, return it.
pub fn step(&mut self, bus: &mut SysBus) {
@ -449,11 +402,7 @@ impl Core {
/// Get's the address of the next instruction that is going to be executed
pub fn get_next_pc(&self) -> Addr {
let insn_size = self.word_size() as u32;
match self.pipeline_state {
PipelineState::Refill1 => self.pc,
PipelineState::Refill2 => self.pc - insn_size,
PipelineState::Execute => self.pc - 2 * insn_size,
}
self.pc - 2 * insn_size
}
pub fn get_cpu_state(&self) -> CpuState {

View file

@ -1,5 +1,5 @@
use super::super::sysbus::SysBus;
use super::cpu::{Core, PipelineState};
use super::cpu::Core;
use super::{CpuMode, CpuState};
use colored::*;
@ -59,13 +59,10 @@ impl Core {
}
pub fn irq(&mut self, sb: &mut SysBus) {
if self.pipeline_state != PipelineState::Execute {
panic!("IRQ when pipeline refilling! {:?}", self.pipeline_state);
}
if !self.cpsr.irq_disabled() {
let lr = self.get_next_pc() + 4;
self.exception(sb, Exception::Irq, lr);
self.flush_pipeline32(sb);
self.reload_pipeline32(sb);
}
}

View file

@ -25,7 +25,7 @@ pub(self) use crate::core::Addr;
pub enum CpuAction {
AdvancePC,
FlushPipeline
FlushPipeline,
}
#[derive(Serialize, Deserialize, Debug, PartialEq, Copy, Clone)]

View file

@ -22,13 +22,18 @@ impl Core {
&mut self,
sb: &mut SysBus,
insn: ThumbInstruction,
) -> CpuAction {
) -> CpuAction {
let rd = (insn.raw & 0b111) as usize;
let rs = insn.raw.bit_range(3..6) as usize;
let shift_amount = insn.offset5() as u8 as u32;
let op2 = self.barrel_shift_op(insn.format1_op(), self.gpr[rs], shift_amount, self.cpsr.C(), true);
let op2 = self.barrel_shift_op(
insn.format1_op(),
self.gpr[rs],
shift_amount,
self.cpsr.C(),
true,
);
self.gpr[rd] = op2;
self.alu_update_flags(op2, false, self.bs_carry_out, self.cpsr.V());
@ -148,11 +153,7 @@ impl Core {
}
/// Format 5
fn exec_thumb_hi_reg_op_or_bx(
&mut self,
sb: &mut SysBus,
insn: ThumbInstruction,
) -> CpuAction {
fn exec_thumb_hi_reg_op_or_bx(&mut self, sb: &mut SysBus, insn: ThumbInstruction) -> CpuAction {
let op = insn.format5_op();
let rd = (insn.raw & 0b111) as usize;
let dst_reg = if insn.flag(ThumbInstruction::FLAG_H1) {
@ -172,7 +173,7 @@ impl Core {
match op {
OpFormat5::BX => {
return self.branch_exchange(sb, self.get_reg(src_reg));
},
}
OpFormat5::ADD => {
self.set_reg(dst_reg, op1.wrapping_add(op2));
if dst_reg == REG_PC {
@ -370,11 +371,7 @@ impl Core {
}
/// Format 12
fn exec_thumb_load_address(
&mut self,
sb: &mut SysBus,
insn: ThumbInstruction,
) -> CpuAction {
fn exec_thumb_load_address(&mut self, sb: &mut SysBus, insn: ThumbInstruction) -> CpuAction {
let rd = insn.raw.bit_range(8..11) as usize;
let result = if insn.flag(ThumbInstruction::FLAG_SP) {
self.gpr[REG_SP] + (insn.word8() as Addr)
@ -539,11 +536,7 @@ impl Core {
}
/// Format 17
fn exec_thumb_swi(
&mut self,
sb: &mut SysBus,
_insn: ThumbInstruction,
) -> CpuAction {
fn exec_thumb_swi(&mut self, sb: &mut SysBus, _insn: ThumbInstruction) -> CpuAction {
self.N_cycle16(sb, self.pc);
self.exception(sb, Exception::SoftwareInterrupt, self.pc - 2);

View file

@ -1,3 +1,4 @@
/// Struct containing everything
use std::cell::RefCell;
use std::rc::Rc;
@ -121,10 +122,7 @@ impl GameBoyAdvance {
}
fn step_cpu(&mut self, io: &mut IoDevices) -> usize {
if io.intc.irq_pending()
&& self.cpu.last_executed.is_some()
&& !self.cpu.did_pipeline_flush()
{
if io.intc.irq_pending() && self.cpu.last_executed.is_some() {
self.cpu.irq(&mut self.sysbus);
io.haltcnt = HaltState::Running;
}
@ -215,13 +213,8 @@ mod tests {
.build()
.unwrap();
let dummy = Rc::new(RefCell::new(DummyInterface::new()));
let mut gba = GameBoyAdvance::new(
bios,
cartridge,
dummy.clone(),
dummy.clone(),
dummy.clone(),
);
let mut gba =
GameBoyAdvance::new(bios, cartridge, dummy.clone(), dummy.clone(), dummy.clone());
gba.skip_bios();
gba