optimization: Optimize VRAM accesses from within the gpu.
Not going through the memory mapping anymore. Also, this commit wraps the gpu pixeldata with a struct Former-commit-id: d7b706d9de61bac7909e10d6bbe092968e6acc9f
This commit is contained in:
parent
eb2a1a02fe
commit
9f0df9af06
|
@ -1,3 +1,5 @@
|
||||||
|
use std::fmt;
|
||||||
|
|
||||||
use super::arm7tdmi::{Addr, Bus};
|
use super::arm7tdmi::{Addr, Bus};
|
||||||
use super::ioregs::consts::*;
|
use super::ioregs::consts::*;
|
||||||
use super::palette::{Palette, PixelFormat, Rgb15};
|
use super::palette::{Palette, PixelFormat, Rgb15};
|
||||||
|
@ -146,9 +148,37 @@ impl Default for GpuState {
|
||||||
}
|
}
|
||||||
use GpuState::*;
|
use GpuState::*;
|
||||||
|
|
||||||
|
pub struct FrameBuffer([Rgb15; 512 * 512]);
|
||||||
|
|
||||||
|
impl fmt::Debug for FrameBuffer {
|
||||||
|
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||||
|
write!(f, "FrameBuffer: ")?;
|
||||||
|
for i in 0..6 {
|
||||||
|
let (r, g, b) = self.0[i].get_rgb24();
|
||||||
|
write!(f, "#{:x}{:x}{:x}, ", r, g, b)?;
|
||||||
|
}
|
||||||
|
write!(f, "...")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl std::ops::Index<usize> for FrameBuffer {
|
||||||
|
type Output = Rgb15;
|
||||||
|
fn index(&self, index: usize) -> &Self::Output {
|
||||||
|
&self.0[index]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl std::ops::IndexMut<usize> for FrameBuffer {
|
||||||
|
fn index_mut(&mut self, index: usize) -> &mut Self::Output {
|
||||||
|
&mut self.0[index]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
pub struct Gpu {
|
pub struct Gpu {
|
||||||
cycles: usize,
|
cycles: usize,
|
||||||
pub pixeldata: [Rgb15; 512 * 512],
|
|
||||||
|
pub pixeldata: FrameBuffer,
|
||||||
pub state: GpuState,
|
pub state: GpuState,
|
||||||
pub current_scanline: usize, // VCOUNT
|
pub current_scanline: usize, // VCOUNT
|
||||||
}
|
}
|
||||||
|
@ -171,7 +201,7 @@ impl Gpu {
|
||||||
state: HDraw,
|
state: HDraw,
|
||||||
current_scanline: 0,
|
current_scanline: 0,
|
||||||
cycles: 0,
|
cycles: 0,
|
||||||
pixeldata: [Rgb15::from(0); 512 * 512],
|
pixeldata: FrameBuffer([Rgb15::from(0); 512 * 512]),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -233,36 +263,37 @@ impl Gpu {
|
||||||
/// helper method that reads the palette index from a base address and x + y
|
/// helper method that reads the palette index from a base address and x + y
|
||||||
pub fn read_pixel_index(
|
pub fn read_pixel_index(
|
||||||
&self,
|
&self,
|
||||||
sysbus: &SysBus,
|
sb: &SysBus,
|
||||||
addr: Addr,
|
addr: Addr,
|
||||||
x: u32,
|
x: u32,
|
||||||
y: u32,
|
y: u32,
|
||||||
format: PixelFormat,
|
format: PixelFormat,
|
||||||
) -> usize {
|
) -> usize {
|
||||||
|
let ofs = addr - VRAM_ADDR;
|
||||||
match format {
|
match format {
|
||||||
PixelFormat::BPP4 => {
|
PixelFormat::BPP4 => {
|
||||||
let byte = sysbus.read_8(addr + index2d!(x / 2, y, 4));
|
let byte = sb.vram.read_8(ofs + index2d!(x / 2, y, 4));
|
||||||
if x & 1 != 0 {
|
if x & 1 != 0 {
|
||||||
(byte >> 4) as usize
|
(byte >> 4) as usize
|
||||||
} else {
|
} else {
|
||||||
(byte & 0xf) as usize
|
(byte & 0xf) as usize
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
PixelFormat::BPP8 => sysbus.read_8(addr + index2d!(x, y, 8)) as usize,
|
PixelFormat::BPP8 => sb.vram.read_8(ofs + index2d!(x, y, 8)) as usize,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn get_palette_color(&self, sysbus: &SysBus, index: u32, palette_index: u32) -> Rgb15 {
|
pub fn get_palette_color(&self, sb: &SysBus, index: u32, palette_index: u32) -> Rgb15 {
|
||||||
sysbus
|
sb.palette_ram
|
||||||
.read_16(0x0500_0000 + 2 * index + 0x20 * palette_index)
|
.read_16(2 * index + 0x20 * palette_index)
|
||||||
.into()
|
.into()
|
||||||
}
|
}
|
||||||
|
|
||||||
fn scanline_mode0(&mut self, bg: u32, sysbus: &mut SysBus) {
|
fn scanline_mode0(&mut self, bg: u32, sb: &mut SysBus) {
|
||||||
let bgcnt = self.bgcnt(bg, sysbus);
|
let bgcnt = self.bgcnt(bg, sb);
|
||||||
let (h_ofs, v_ofs) = self.bgofs(bg, sysbus);
|
let (h_ofs, v_ofs) = self.bgofs(bg, sb);
|
||||||
let tileset_base = bgcnt.char_block();
|
let tileset_base = bgcnt.char_block() - VRAM_ADDR;
|
||||||
let tilemap_base = bgcnt.screen_block();
|
let tilemap_base = bgcnt.screen_block() - VRAM_ADDR;
|
||||||
let (tile_size, pixel_format) = bgcnt.tile_format();
|
let (tile_size, pixel_format) = bgcnt.tile_format();
|
||||||
|
|
||||||
let (bg_width, bg_height) = bgcnt.size_regular();
|
let (bg_width, bg_height) = bgcnt.size_regular();
|
||||||
|
@ -299,13 +330,13 @@ impl Gpu {
|
||||||
let map_addr = tilemap_base
|
let map_addr = tilemap_base
|
||||||
+ SCREEN_BLOCK_SIZE * screen_block
|
+ SCREEN_BLOCK_SIZE * screen_block
|
||||||
+ 2 * (index2d!((se_row + t) % 32, se_column, 32) as u32);
|
+ 2 * (index2d!((se_row + t) % 32, se_column, 32) as u32);
|
||||||
let entry = TileMapEntry::from(sysbus.read_16(map_addr));
|
let entry = TileMapEntry::from(sb.vram.read_16(map_addr - VRAM_ADDR));
|
||||||
let tile_addr = tileset_base + entry.tile_index * tile_size;
|
let tile_addr = tileset_base + entry.tile_index * tile_size;
|
||||||
|
|
||||||
for tile_px in start_tile_x..=7 {
|
for tile_px in start_tile_x..=7 {
|
||||||
let tile_py = (bg_y % 8) as u32;
|
let tile_py = (bg_y % 8) as u32;
|
||||||
let index = self.read_pixel_index(
|
let index = self.read_pixel_index(
|
||||||
sysbus,
|
sb,
|
||||||
tile_addr,
|
tile_addr,
|
||||||
if entry.x_flip { 7 - tile_px } else { tile_px },
|
if entry.x_flip { 7 - tile_px } else { tile_px },
|
||||||
if entry.y_flip { 7 - tile_py } else { tile_py },
|
if entry.y_flip { 7 - tile_py } else { tile_py },
|
||||||
|
@ -315,7 +346,7 @@ impl Gpu {
|
||||||
PixelFormat::BPP4 => entry.palette_bank as u32,
|
PixelFormat::BPP4 => entry.palette_bank as u32,
|
||||||
PixelFormat::BPP8 => 0u32,
|
PixelFormat::BPP8 => 0u32,
|
||||||
};
|
};
|
||||||
let color = self.get_palette_color(sysbus, index as u32, palette_bank);
|
let color = self.get_palette_color(sb, index as u32, palette_bank);
|
||||||
if color.get_rgb24() != (0, 0, 0) {
|
if color.get_rgb24() != (0, 0, 0) {
|
||||||
self.pixeldata[index2d!(screen_x as usize, screen_y as usize, 512)] = color;
|
self.pixeldata[index2d!(screen_x as usize, screen_y as usize, 512)] = color;
|
||||||
}
|
}
|
||||||
|
@ -338,15 +369,15 @@ impl Gpu {
|
||||||
|
|
||||||
for x in 0..Self::DISPLAY_WIDTH {
|
for x in 0..Self::DISPLAY_WIDTH {
|
||||||
let pixel_index = index2d!(x, y, Self::DISPLAY_WIDTH);
|
let pixel_index = index2d!(x, y, Self::DISPLAY_WIDTH);
|
||||||
let pixel_addr = 0x0600_0000 + 2 * (pixel_index as u32);
|
let pixel_ofs = 2 * (pixel_index as u32);
|
||||||
self.pixeldata[index2d!(x, y, 512)] = sb.read_16(pixel_addr).into();
|
self.pixeldata[index2d!(x, y, 512)] = sb.vram.read_16(pixel_ofs).into();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn scanline_mode4(&mut self, bg: u32, dispcnt: &DisplayControl, sysbus: &mut SysBus) {
|
fn scanline_mode4(&mut self, bg: u32, dispcnt: &DisplayControl, sb: &mut SysBus) {
|
||||||
let page: u32 = match dispcnt.display_frame {
|
let page_ofs: u32 = match dispcnt.display_frame {
|
||||||
0 => 0x0600_0000,
|
0 => 0x0600_0000 - VRAM_ADDR,
|
||||||
1 => 0x0600_a000,
|
1 => 0x0600_a000 - VRAM_ADDR,
|
||||||
_ => unreachable!(),
|
_ => unreachable!(),
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -354,9 +385,9 @@ impl Gpu {
|
||||||
|
|
||||||
for x in 0..Self::DISPLAY_WIDTH {
|
for x in 0..Self::DISPLAY_WIDTH {
|
||||||
let bitmap_index = index2d!(x, y, Self::DISPLAY_WIDTH);
|
let bitmap_index = index2d!(x, y, Self::DISPLAY_WIDTH);
|
||||||
let bitmap_addr = page + (bitmap_index as u32);
|
let bitmap_ofs = page_ofs + (bitmap_index as u32);
|
||||||
let index = sysbus.read_8(bitmap_addr as Addr) as u32;
|
let index = sb.vram.read_8(bitmap_ofs as Addr) as u32;
|
||||||
self.pixeldata[index2d!(x, y, 512)] = self.get_palette_color(sysbus, index, 0);
|
self.pixeldata[index2d!(x, y, 512)] = self.get_palette_color(sb, index, 0);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -15,7 +15,7 @@ const OAM_SIZE: usize = 1 * 1024;
|
||||||
|
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
pub struct BoxedMemory {
|
pub struct BoxedMemory {
|
||||||
mem: Box<[u8]>,
|
pub mem: Box<[u8]>,
|
||||||
ws: WaitState,
|
ws: WaitState,
|
||||||
mask: u32,
|
mask: u32,
|
||||||
}
|
}
|
||||||
|
@ -155,9 +155,9 @@ pub struct SysBus {
|
||||||
internal_work_ram: BoxedMemory,
|
internal_work_ram: BoxedMemory,
|
||||||
/// Currently model the IOMem as regular buffer, later make it into something more sophisticated.
|
/// Currently model the IOMem as regular buffer, later make it into something more sophisticated.
|
||||||
pub ioregs: IoRegs,
|
pub ioregs: IoRegs,
|
||||||
palette_ram: BoxedMemory,
|
pub palette_ram: BoxedMemory,
|
||||||
vram: BoxedMemory,
|
pub vram: BoxedMemory,
|
||||||
oam: BoxedMemory,
|
pub oam: BoxedMemory,
|
||||||
gamepak: Cartridge,
|
gamepak: Cartridge,
|
||||||
dummy: DummyBus,
|
dummy: DummyBus,
|
||||||
}
|
}
|
||||||
|
|
Reference in a new issue