core: Optimizing GameBoyAdvance::frame
Profiling GameBoyAdvance::frame shows that it spends way too much time idleing on SoundController::update & Gpu::step waiting for the cycle count to reach to the next event, Consuming cpu time for the Arm7tdmi core. This commit changes the implementation of the main loop functions so that the CPU will run as many cycles as possible and the peripherals will only be updated when needed. The is a performance improvement of roughly 50% in fps in some games! Former-commit-id: 937e097f958423934c70b7face6b6b02926b7a51
This commit is contained in:
parent
36cf4e62ce
commit
c6feb5d500
|
@ -1,11 +1,8 @@
|
|||
extern crate bit_set;
|
||||
|
||||
use bit_set::BitSet;
|
||||
|
||||
use super::iodev::consts::{REG_FIFO_A, REG_FIFO_B};
|
||||
use super::sysbus::SysBus;
|
||||
use super::{Addr, Bus, Interrupt, IrqBitmask};
|
||||
|
||||
use bit_set::BitSet;
|
||||
use num::FromPrimitive;
|
||||
|
||||
#[derive(Debug)]
|
||||
|
@ -178,7 +175,7 @@ impl DmaChannel {
|
|||
#[derive(Debug)]
|
||||
pub struct DmaController {
|
||||
pub channels: [DmaChannel; 4],
|
||||
pending_bittset: BitSet,
|
||||
pending_set: BitSet,
|
||||
cycles: usize,
|
||||
}
|
||||
|
||||
|
@ -191,20 +188,20 @@ impl DmaController {
|
|||
DmaChannel::new(2),
|
||||
DmaChannel::new(3),
|
||||
],
|
||||
pending_bittset: BitSet::with_capacity(4),
|
||||
pending_set: BitSet::with_capacity(4),
|
||||
cycles: 0,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn has_work(&self) -> bool {
|
||||
!self.pending_bittset.is_empty()
|
||||
pub fn is_active(&self) -> bool {
|
||||
!self.pending_set.is_empty()
|
||||
}
|
||||
|
||||
pub fn perform_work(&mut self, sb: &mut SysBus, irqs: &mut IrqBitmask) {
|
||||
for id in self.pending_bittset.iter() {
|
||||
for id in self.pending_set.iter() {
|
||||
self.channels[id].xfer(sb, irqs);
|
||||
}
|
||||
self.pending_bittset.clear();
|
||||
self.pending_set.clear();
|
||||
}
|
||||
|
||||
pub fn write_16(&mut self, channel_id: usize, ofs: u32, value: u16) {
|
||||
|
@ -216,9 +213,9 @@ impl DmaController {
|
|||
8 => self.channels[channel_id].write_word_count(value),
|
||||
10 => {
|
||||
if self.channels[channel_id].write_dma_ctrl(value) {
|
||||
self.pending_bittset.insert(channel_id);
|
||||
self.pending_set.insert(channel_id);
|
||||
} else {
|
||||
self.pending_bittset.remove(channel_id);
|
||||
self.pending_set.remove(channel_id);
|
||||
}
|
||||
}
|
||||
_ => panic!("Invalid dma offset {:x}", ofs),
|
||||
|
@ -228,7 +225,7 @@ impl DmaController {
|
|||
pub fn notify_vblank(&mut self) {
|
||||
for i in 0..4 {
|
||||
if self.channels[i].ctrl.is_enabled() && self.channels[i].ctrl.timing() == 1 {
|
||||
self.pending_bittset.insert(i);
|
||||
self.pending_set.insert(i);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -236,7 +233,7 @@ impl DmaController {
|
|||
pub fn notify_hblank(&mut self) {
|
||||
for i in 0..4 {
|
||||
if self.channels[i].ctrl.is_enabled() && self.channels[i].ctrl.timing() == 2 {
|
||||
self.pending_bittset.insert(i);
|
||||
self.pending_set.insert(i);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -248,7 +245,7 @@ impl DmaController {
|
|||
&& self.channels[i].ctrl.timing() == 3
|
||||
&& self.channels[i].dst == fifo_addr
|
||||
{
|
||||
self.pending_bittset.insert(i);
|
||||
self.pending_set.insert(i);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -16,6 +16,8 @@ pub struct GameBoyAdvance {
|
|||
pub sysbus: Box<SysBus>,
|
||||
pub cpu: Core,
|
||||
input_device: Rc<RefCell<dyn InputInterface>>,
|
||||
|
||||
cycles_to_next_event: usize,
|
||||
}
|
||||
|
||||
impl GameBoyAdvance {
|
||||
|
@ -34,6 +36,8 @@ impl GameBoyAdvance {
|
|||
cpu: cpu,
|
||||
sysbus: Box::new(SysBus::new(io, bios_rom, gamepak)),
|
||||
input_device: input_device,
|
||||
|
||||
cycles_to_next_event: 1,
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -74,41 +78,64 @@ impl GameBoyAdvance {
|
|||
None
|
||||
}
|
||||
|
||||
pub fn step(&mut self) {
|
||||
let mut irqs = IrqBitmask(0);
|
||||
fn step_cpu(&mut self, io: &mut IoDevices) -> usize {
|
||||
if io.intc.irq_pending()
|
||||
&& self.cpu.last_executed.is_some()
|
||||
&& !self.cpu.did_pipeline_flush()
|
||||
{
|
||||
self.cpu.irq(&mut self.sysbus);
|
||||
io.haltcnt = HaltState::Running;
|
||||
}
|
||||
let previous_cycles = self.cpu.cycles;
|
||||
self.cpu.step(&mut self.sysbus);
|
||||
self.cpu.cycles - previous_cycles
|
||||
}
|
||||
|
||||
pub fn step(&mut self) {
|
||||
// // I hate myself for doing this, but rust left me no choice.
|
||||
let io = unsafe {
|
||||
let ptr = &mut *self.sysbus as *mut SysBus;
|
||||
&mut (*ptr).io as &mut IoDevices
|
||||
};
|
||||
|
||||
let cycles = if !io.dmac.has_work() {
|
||||
if io.intc.irq_pending()
|
||||
&& self.cpu.last_executed.is_some()
|
||||
&& !self.cpu.did_pipeline_flush()
|
||||
{
|
||||
self.cpu.irq(&mut self.sysbus);
|
||||
io.haltcnt = HaltState::Running;
|
||||
}
|
||||
let mut irqs = IrqBitmask(0);
|
||||
|
||||
if HaltState::Running == io.haltcnt {
|
||||
self.cpu.step(&mut self.sysbus).unwrap();
|
||||
self.cpu.cycles - previous_cycles
|
||||
let mut cycles_left = self.cycles_to_next_event;
|
||||
let mut cycles_to_next_event = std::usize::MAX;
|
||||
let mut cycles = 0;
|
||||
|
||||
while cycles_left > 0 {
|
||||
let mut irqs = IrqBitmask(0);
|
||||
let _cycles = if !io.dmac.is_active() {
|
||||
if HaltState::Running == io.haltcnt {
|
||||
self.step_cpu(io)
|
||||
} else {
|
||||
cycles = cycles_left;
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
1
|
||||
io.dmac.perform_work(&mut self.sysbus, &mut irqs);
|
||||
io.intc.request_irqs(irqs);
|
||||
return;
|
||||
};
|
||||
|
||||
cycles += _cycles;
|
||||
if cycles_left < _cycles {
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
io.dmac.perform_work(&mut self.sysbus, &mut irqs);
|
||||
0
|
||||
};
|
||||
|
||||
io.timers.step(cycles, &mut self.sysbus, &mut irqs);
|
||||
|
||||
io.gpu.step(cycles, &mut self.sysbus, &mut irqs);
|
||||
cycles_left -= _cycles;
|
||||
}
|
||||
|
||||
// update gpu & sound
|
||||
io.timers.update(cycles, &mut self.sysbus, &mut irqs);
|
||||
io.gpu.step(
|
||||
cycles,
|
||||
&mut self.sysbus,
|
||||
&mut irqs,
|
||||
&mut cycles_to_next_event,
|
||||
);
|
||||
io.sound.update(cycles, &mut cycles_to_next_event);
|
||||
self.cycles_to_next_event = cycles_to_next_event;
|
||||
io.intc.request_irqs(irqs);
|
||||
io.sound.update(self.cpu.cycles);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -180,7 +180,9 @@ pub struct Gpu {
|
|||
#[debug_stub = "video handle"]
|
||||
video_device: VideoDeviceRcRefCell,
|
||||
pub state: GpuState,
|
||||
cycles: usize,
|
||||
|
||||
/// how many cycles left until next gpu state ?
|
||||
cycles_left_for_current_state: usize,
|
||||
|
||||
// registers
|
||||
pub vcount: usize, // VCOUNT
|
||||
|
@ -231,7 +233,7 @@ impl Gpu {
|
|||
|
||||
state: HDraw,
|
||||
vcount: 0,
|
||||
cycles: 0,
|
||||
cycles_left_for_current_state: CYCLES_HDRAW,
|
||||
|
||||
palette_ram: BoxedMemory::new(vec![0; PALETTE_RAM_SIZE].into_boxed_slice()),
|
||||
vram: BoxedMemory::new(vec![0; VIDEO_RAM_SIZE].into_boxed_slice()),
|
||||
|
@ -342,34 +344,39 @@ impl Gpu {
|
|||
}
|
||||
|
||||
// Returns the new gpu state
|
||||
pub fn step(&mut self, cycles: usize, sb: &mut SysBus, irqs: &mut IrqBitmask) {
|
||||
self.cycles += cycles;
|
||||
pub fn step(
|
||||
&mut self,
|
||||
cycles: usize,
|
||||
sb: &mut SysBus,
|
||||
irqs: &mut IrqBitmask,
|
||||
cycles_to_next_event: &mut usize,
|
||||
) {
|
||||
if self.cycles_left_for_current_state <= cycles {
|
||||
let overshoot = cycles - self.cycles_left_for_current_state;
|
||||
|
||||
match self.state {
|
||||
HDraw => {
|
||||
if self.cycles > CYCLES_HDRAW {
|
||||
self.cycles -= CYCLES_HDRAW;
|
||||
// HBlank
|
||||
// handle the state change
|
||||
match self.state {
|
||||
HDraw => {
|
||||
// Transition to HBlank
|
||||
self.state = HBlank;
|
||||
self.cycles_left_for_current_state = CYCLES_HBLANK;
|
||||
self.dispstat.set_hblank_flag(true);
|
||||
if self.dispstat.hblank_irq_enable() {
|
||||
irqs.set_LCD_HBlank(true);
|
||||
};
|
||||
self.state = HBlank;
|
||||
sb.io.dmac.notify_hblank();
|
||||
}
|
||||
}
|
||||
HBlank => {
|
||||
if self.cycles > CYCLES_HBLANK {
|
||||
self.cycles -= CYCLES_HBLANK;
|
||||
|
||||
HBlank => {
|
||||
self.dispstat.set_hblank_flag(false);
|
||||
self.update_vcount(self.vcount + 1, irqs);
|
||||
|
||||
if self.vcount < DISPLAY_HEIGHT {
|
||||
self.render_scanline();
|
||||
self.state = HDraw;
|
||||
self.cycles_left_for_current_state = CYCLES_HDRAW;
|
||||
} else {
|
||||
self.state = VBlank;
|
||||
self.cycles_left_for_current_state = CYCLES_SCANLINE;
|
||||
self.dispstat.set_vblank_flag(true);
|
||||
if self.dispstat.vblank_irq_enable() {
|
||||
irqs.set_LCD_VBlank(true);
|
||||
|
@ -378,21 +385,33 @@ impl Gpu {
|
|||
self.video_device.borrow_mut().render(&self.frame_buffer);
|
||||
}
|
||||
}
|
||||
}
|
||||
VBlank => {
|
||||
if self.cycles > CYCLES_SCANLINE {
|
||||
self.cycles -= CYCLES_SCANLINE;
|
||||
|
||||
VBlank => {
|
||||
if self.vcount < DISPLAY_HEIGHT + VBLANK_LINES - 1 {
|
||||
self.update_vcount(self.vcount + 1, irqs);
|
||||
self.cycles_left_for_current_state = CYCLES_SCANLINE;
|
||||
} else {
|
||||
self.update_vcount(0, irqs);
|
||||
self.dispstat.set_vblank_flag(false);
|
||||
self.render_scanline();
|
||||
self.state = HDraw;
|
||||
|
||||
self.cycles_left_for_current_state = CYCLES_HDRAW;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
// handle the overshoot
|
||||
if overshoot < self.cycles_left_for_current_state {
|
||||
self.cycles_left_for_current_state -= overshoot;
|
||||
} else {
|
||||
panic!("OH SHIT");
|
||||
}
|
||||
} else {
|
||||
self.cycles_left_for_current_state -= cycles;
|
||||
}
|
||||
|
||||
if self.cycles_left_for_current_state < *cycles_to_next_event {
|
||||
*cycles_to_next_event = self.cycles_left_for_current_state;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -68,7 +68,7 @@ pub struct SoundController {
|
|||
audio_device: AudioDeviceRcRefCell,
|
||||
|
||||
sample_rate_to_cpu_freq: usize, // how many "cycles" are a sample?
|
||||
last_sample_cycles: usize, // cycles count when we last provided a new sample.
|
||||
cycles: usize, // cycles count when we last provided a new sample.
|
||||
|
||||
mse: bool,
|
||||
|
||||
|
@ -114,7 +114,7 @@ impl SoundController {
|
|||
audio_device: audio_device,
|
||||
|
||||
sample_rate_to_cpu_freq: 12345,
|
||||
last_sample_cycles: 0,
|
||||
cycles: 0,
|
||||
mse: false,
|
||||
left_volume: 0,
|
||||
left_sqr1: false,
|
||||
|
@ -286,7 +286,7 @@ impl SoundController {
|
|||
self.resampler.in_freq = self.sample_rate;
|
||||
}
|
||||
self.cycles_per_sample = 512 >> resolution;
|
||||
},
|
||||
}
|
||||
|
||||
_ => {
|
||||
// println!(
|
||||
|
@ -321,9 +321,10 @@ impl SoundController {
|
|||
}
|
||||
}
|
||||
|
||||
pub fn update(&mut self, cycles: usize) {
|
||||
while cycles - self.last_sample_cycles >= self.cycles_per_sample {
|
||||
self.last_sample_cycles += self.cycles_per_sample;
|
||||
pub fn update(&mut self, cycles: usize, cycles_to_next_event: &mut usize) {
|
||||
self.cycles += cycles;
|
||||
while self.cycles >= self.cycles_per_sample {
|
||||
self.cycles -= self.cycles_per_sample;
|
||||
|
||||
// time to push a new sample!
|
||||
|
||||
|
@ -341,6 +342,9 @@ impl SoundController {
|
|||
self.resampler
|
||||
.push_sample((sample[0], sample[1]), &mut *audio);
|
||||
}
|
||||
if self.cycles_per_sample < *cycles_to_next_event {
|
||||
*cycles_to_next_event = self.cycles_per_sample;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -2,6 +2,8 @@ use super::interrupt::{Interrupt, IrqBitmask};
|
|||
use super::iodev::consts::*;
|
||||
use super::sysbus::SysBus;
|
||||
|
||||
use bit_set::BitSet;
|
||||
|
||||
use num::FromPrimitive;
|
||||
|
||||
#[derive(Debug)]
|
||||
|
@ -42,11 +44,32 @@ impl Timer {
|
|||
_ => unreachable!(),
|
||||
}
|
||||
}
|
||||
|
||||
/// updates the timer with 'cycles' amount of cycles, returns the number of times it overflowed
|
||||
fn update(&mut self, cycles: usize, irqs: &mut IrqBitmask) -> usize {
|
||||
self.cycles += cycles;
|
||||
let mut num_overflows = 0;
|
||||
let freq = self.frequency();
|
||||
while self.cycles >= freq {
|
||||
self.cycles -= freq;
|
||||
self.data = self.data.wrapping_add(1);
|
||||
if self.data == 0 {
|
||||
if self.ctl.irq_enabled() {
|
||||
irqs.add_irq(self.irq);
|
||||
}
|
||||
self.data = self.initial_data;
|
||||
num_overflows += 1;
|
||||
}
|
||||
}
|
||||
|
||||
num_overflows
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct Timers {
|
||||
timers: [Timer; 4],
|
||||
running_timers: BitSet,
|
||||
pub trace: bool,
|
||||
}
|
||||
|
||||
|
@ -67,6 +90,7 @@ impl Timers {
|
|||
pub fn new() -> Timers {
|
||||
Timers {
|
||||
timers: [Timer::new(0), Timer::new(1), Timer::new(2), Timer::new(3)],
|
||||
running_timers: BitSet::with_capacity(4),
|
||||
trace: false,
|
||||
}
|
||||
}
|
||||
|
@ -75,6 +99,12 @@ impl Timers {
|
|||
let old_enabled = self[id].ctl.enabled();
|
||||
self[id].ctl.0 = value;
|
||||
let new_enabled = self[id].ctl.enabled();
|
||||
let cascade = self.timers[id].ctl.cascade();
|
||||
if new_enabled && !cascade {
|
||||
self.running_timers.insert(id);
|
||||
} else {
|
||||
self.running_timers.remove(id);
|
||||
}
|
||||
if self.trace && old_enabled != new_enabled {
|
||||
println!(
|
||||
"TMR{} {}",
|
||||
|
@ -127,45 +157,24 @@ impl Timers {
|
|||
}
|
||||
}
|
||||
|
||||
fn update_timer(&mut self, id: usize, cycles: usize, sb: &mut SysBus, irqs: &mut IrqBitmask) {
|
||||
let timer = &mut self.timers[id];
|
||||
timer.cycles += cycles;
|
||||
let mut num_overflows = 0;
|
||||
let freq = timer.frequency();
|
||||
while timer.cycles >= freq {
|
||||
timer.cycles -= freq;
|
||||
timer.data = timer.data.wrapping_add(1);
|
||||
if timer.data == 0 {
|
||||
if self.trace {
|
||||
println!("TMR{} overflown!", id);
|
||||
pub fn update(&mut self, cycles: usize, sb: &mut SysBus, irqs: &mut IrqBitmask) {
|
||||
for id in self.running_timers.iter() {
|
||||
if !self.timers[id].ctl.cascade() {
|
||||
let timer = &mut self.timers[id];
|
||||
let num_overflows = timer.update(cycles, irqs);
|
||||
if num_overflows > 0 {
|
||||
if id != 3 {
|
||||
let next_timer = &mut self.timers[id + 1];
|
||||
if next_timer.ctl.cascade() {
|
||||
next_timer.update(num_overflows, irqs);
|
||||
}
|
||||
}
|
||||
if id == 0 || id == 1 {
|
||||
sb.io
|
||||
.sound
|
||||
.handle_timer_overflow(&mut sb.io.dmac, id, num_overflows);
|
||||
}
|
||||
}
|
||||
if timer.ctl.irq_enabled() {
|
||||
irqs.add_irq(timer.irq);
|
||||
}
|
||||
timer.data = timer.initial_data;
|
||||
num_overflows += 1;
|
||||
}
|
||||
}
|
||||
|
||||
if num_overflows > 0 {
|
||||
if id != 3 {
|
||||
let next_timer = &mut self.timers[id + 1];
|
||||
if next_timer.ctl.cascade() {
|
||||
self.update_timer(id + 1, num_overflows, sb, irqs);
|
||||
}
|
||||
}
|
||||
if id == 0 || id == 1 {
|
||||
sb.io
|
||||
.sound
|
||||
.handle_timer_overflow(&mut sb.io.dmac, id, num_overflows);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn step(&mut self, cycles: usize, sb: &mut SysBus, irqs: &mut IrqBitmask) {
|
||||
for i in 0..4 {
|
||||
if self.timers[i].ctl.enabled() && !self.timers[i].ctl.cascade() {
|
||||
self.update_timer(i, cycles, sb, irqs);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -15,6 +15,7 @@ extern crate bit;
|
|||
extern crate bitfield;
|
||||
#[macro_use]
|
||||
extern crate bitflags;
|
||||
extern crate bit_set;
|
||||
|
||||
extern crate byteorder;
|
||||
|
||||
|
|
Reference in a new issue