optimization: Optimize VRAM accesses from within the gpu.

Not going through the memory mapping anymore.

Also, this commit wraps the gpu pixeldata with a struct


Former-commit-id: d7b706d9de61bac7909e10d6bbe092968e6acc9f
This commit is contained in:
Michel Heily 2019-08-02 17:58:56 +03:00
parent eb2a1a02fe
commit 9f0df9af06
2 changed files with 60 additions and 29 deletions

View file

@ -1,3 +1,5 @@
use std::fmt;
use super::arm7tdmi::{Addr, Bus}; use super::arm7tdmi::{Addr, Bus};
use super::ioregs::consts::*; use super::ioregs::consts::*;
use super::palette::{Palette, PixelFormat, Rgb15}; use super::palette::{Palette, PixelFormat, Rgb15};
@ -146,9 +148,37 @@ impl Default for GpuState {
} }
use GpuState::*; use GpuState::*;
pub struct FrameBuffer([Rgb15; 512 * 512]);
impl fmt::Debug for FrameBuffer {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "FrameBuffer: ")?;
for i in 0..6 {
let (r, g, b) = self.0[i].get_rgb24();
write!(f, "#{:x}{:x}{:x}, ", r, g, b)?;
}
write!(f, "...")
}
}
impl std::ops::Index<usize> for FrameBuffer {
type Output = Rgb15;
fn index(&self, index: usize) -> &Self::Output {
&self.0[index]
}
}
impl std::ops::IndexMut<usize> for FrameBuffer {
fn index_mut(&mut self, index: usize) -> &mut Self::Output {
&mut self.0[index]
}
}
#[derive(Debug)]
pub struct Gpu { pub struct Gpu {
cycles: usize, cycles: usize,
pub pixeldata: [Rgb15; 512 * 512],
pub pixeldata: FrameBuffer,
pub state: GpuState, pub state: GpuState,
pub current_scanline: usize, // VCOUNT pub current_scanline: usize, // VCOUNT
} }
@ -171,7 +201,7 @@ impl Gpu {
state: HDraw, state: HDraw,
current_scanline: 0, current_scanline: 0,
cycles: 0, cycles: 0,
pixeldata: [Rgb15::from(0); 512 * 512], pixeldata: FrameBuffer([Rgb15::from(0); 512 * 512]),
} }
} }
@ -233,36 +263,37 @@ impl Gpu {
/// helper method that reads the palette index from a base address and x + y /// helper method that reads the palette index from a base address and x + y
pub fn read_pixel_index( pub fn read_pixel_index(
&self, &self,
sysbus: &SysBus, sb: &SysBus,
addr: Addr, addr: Addr,
x: u32, x: u32,
y: u32, y: u32,
format: PixelFormat, format: PixelFormat,
) -> usize { ) -> usize {
let ofs = addr - VRAM_ADDR;
match format { match format {
PixelFormat::BPP4 => { PixelFormat::BPP4 => {
let byte = sysbus.read_8(addr + index2d!(x / 2, y, 4)); let byte = sb.vram.read_8(ofs + index2d!(x / 2, y, 4));
if x & 1 != 0 { if x & 1 != 0 {
(byte >> 4) as usize (byte >> 4) as usize
} else { } else {
(byte & 0xf) as usize (byte & 0xf) as usize
} }
} }
PixelFormat::BPP8 => sysbus.read_8(addr + index2d!(x, y, 8)) as usize, PixelFormat::BPP8 => sb.vram.read_8(ofs + index2d!(x, y, 8)) as usize,
} }
} }
pub fn get_palette_color(&self, sysbus: &SysBus, index: u32, palette_index: u32) -> Rgb15 { pub fn get_palette_color(&self, sb: &SysBus, index: u32, palette_index: u32) -> Rgb15 {
sysbus sb.palette_ram
.read_16(0x0500_0000 + 2 * index + 0x20 * palette_index) .read_16(2 * index + 0x20 * palette_index)
.into() .into()
} }
fn scanline_mode0(&mut self, bg: u32, sysbus: &mut SysBus) { fn scanline_mode0(&mut self, bg: u32, sb: &mut SysBus) {
let bgcnt = self.bgcnt(bg, sysbus); let bgcnt = self.bgcnt(bg, sb);
let (h_ofs, v_ofs) = self.bgofs(bg, sysbus); let (h_ofs, v_ofs) = self.bgofs(bg, sb);
let tileset_base = bgcnt.char_block(); let tileset_base = bgcnt.char_block() - VRAM_ADDR;
let tilemap_base = bgcnt.screen_block(); let tilemap_base = bgcnt.screen_block() - VRAM_ADDR;
let (tile_size, pixel_format) = bgcnt.tile_format(); let (tile_size, pixel_format) = bgcnt.tile_format();
let (bg_width, bg_height) = bgcnt.size_regular(); let (bg_width, bg_height) = bgcnt.size_regular();
@ -299,13 +330,13 @@ impl Gpu {
let map_addr = tilemap_base let map_addr = tilemap_base
+ SCREEN_BLOCK_SIZE * screen_block + SCREEN_BLOCK_SIZE * screen_block
+ 2 * (index2d!((se_row + t) % 32, se_column, 32) as u32); + 2 * (index2d!((se_row + t) % 32, se_column, 32) as u32);
let entry = TileMapEntry::from(sysbus.read_16(map_addr)); let entry = TileMapEntry::from(sb.vram.read_16(map_addr - VRAM_ADDR));
let tile_addr = tileset_base + entry.tile_index * tile_size; let tile_addr = tileset_base + entry.tile_index * tile_size;
for tile_px in start_tile_x..=7 { for tile_px in start_tile_x..=7 {
let tile_py = (bg_y % 8) as u32; let tile_py = (bg_y % 8) as u32;
let index = self.read_pixel_index( let index = self.read_pixel_index(
sysbus, sb,
tile_addr, tile_addr,
if entry.x_flip { 7 - tile_px } else { tile_px }, if entry.x_flip { 7 - tile_px } else { tile_px },
if entry.y_flip { 7 - tile_py } else { tile_py }, if entry.y_flip { 7 - tile_py } else { tile_py },
@ -315,7 +346,7 @@ impl Gpu {
PixelFormat::BPP4 => entry.palette_bank as u32, PixelFormat::BPP4 => entry.palette_bank as u32,
PixelFormat::BPP8 => 0u32, PixelFormat::BPP8 => 0u32,
}; };
let color = self.get_palette_color(sysbus, index as u32, palette_bank); let color = self.get_palette_color(sb, index as u32, palette_bank);
if color.get_rgb24() != (0, 0, 0) { if color.get_rgb24() != (0, 0, 0) {
self.pixeldata[index2d!(screen_x as usize, screen_y as usize, 512)] = color; self.pixeldata[index2d!(screen_x as usize, screen_y as usize, 512)] = color;
} }
@ -338,15 +369,15 @@ impl Gpu {
for x in 0..Self::DISPLAY_WIDTH { for x in 0..Self::DISPLAY_WIDTH {
let pixel_index = index2d!(x, y, Self::DISPLAY_WIDTH); let pixel_index = index2d!(x, y, Self::DISPLAY_WIDTH);
let pixel_addr = 0x0600_0000 + 2 * (pixel_index as u32); let pixel_ofs = 2 * (pixel_index as u32);
self.pixeldata[index2d!(x, y, 512)] = sb.read_16(pixel_addr).into(); self.pixeldata[index2d!(x, y, 512)] = sb.vram.read_16(pixel_ofs).into();
} }
} }
fn scanline_mode4(&mut self, bg: u32, dispcnt: &DisplayControl, sysbus: &mut SysBus) { fn scanline_mode4(&mut self, bg: u32, dispcnt: &DisplayControl, sb: &mut SysBus) {
let page: u32 = match dispcnt.display_frame { let page_ofs: u32 = match dispcnt.display_frame {
0 => 0x0600_0000, 0 => 0x0600_0000 - VRAM_ADDR,
1 => 0x0600_a000, 1 => 0x0600_a000 - VRAM_ADDR,
_ => unreachable!(), _ => unreachable!(),
}; };
@ -354,9 +385,9 @@ impl Gpu {
for x in 0..Self::DISPLAY_WIDTH { for x in 0..Self::DISPLAY_WIDTH {
let bitmap_index = index2d!(x, y, Self::DISPLAY_WIDTH); let bitmap_index = index2d!(x, y, Self::DISPLAY_WIDTH);
let bitmap_addr = page + (bitmap_index as u32); let bitmap_ofs = page_ofs + (bitmap_index as u32);
let index = sysbus.read_8(bitmap_addr as Addr) as u32; let index = sb.vram.read_8(bitmap_ofs as Addr) as u32;
self.pixeldata[index2d!(x, y, 512)] = self.get_palette_color(sysbus, index, 0); self.pixeldata[index2d!(x, y, 512)] = self.get_palette_color(sb, index, 0);
} }
} }

View file

@ -15,7 +15,7 @@ const OAM_SIZE: usize = 1 * 1024;
#[derive(Debug)] #[derive(Debug)]
pub struct BoxedMemory { pub struct BoxedMemory {
mem: Box<[u8]>, pub mem: Box<[u8]>,
ws: WaitState, ws: WaitState,
mask: u32, mask: u32,
} }
@ -155,9 +155,9 @@ pub struct SysBus {
internal_work_ram: BoxedMemory, internal_work_ram: BoxedMemory,
/// Currently model the IOMem as regular buffer, later make it into something more sophisticated. /// Currently model the IOMem as regular buffer, later make it into something more sophisticated.
pub ioregs: IoRegs, pub ioregs: IoRegs,
palette_ram: BoxedMemory, pub palette_ram: BoxedMemory,
vram: BoxedMemory, pub vram: BoxedMemory,
oam: BoxedMemory, pub oam: BoxedMemory,
gamepak: Cartridge, gamepak: Cartridge,
dummy: DummyBus, dummy: DummyBus,
} }