optimize/gpu: Optimize read_pixel_index calls inside render loops.

Former-commit-id: 763b24a151ddd3dcb69afacabcb214b26ad03052
This commit is contained in:
Michel Heily 2020-03-28 15:39:57 +03:00 committed by MishMish
parent 699b2decd2
commit 414c1b0557
3 changed files with 161 additions and 109 deletions

View file

@ -249,20 +249,31 @@ impl Gpu {
/// helper method that reads the palette index from a base address and x + y /// helper method that reads the palette index from a base address and x + y
pub fn read_pixel_index(&self, addr: u32, x: u32, y: u32, format: PixelFormat) -> usize { pub fn read_pixel_index(&self, addr: u32, x: u32, y: u32, format: PixelFormat) -> usize {
let ofs = addr - VRAM_ADDR;
match format { match format {
PixelFormat::BPP4 => { PixelFormat::BPP4 => self.read_pixel_index_bpp4(addr, x, y),
let byte = self.vram.read_8(ofs + index2d!(u32, x / 2, y, 4)); PixelFormat::BPP8 => self.read_pixel_index_bpp8(addr, x, y),
}
}
#[inline]
pub fn read_pixel_index_bpp4(&self, addr: u32, x: u32, y: u32) -> usize {
let ofs = addr - VRAM_ADDR + index2d!(u32, x / 2, y, 4);
let ofs = ofs as usize;
let byte = self.vram.read_8(ofs as u32);
if x & 1 != 0 { if x & 1 != 0 {
(byte >> 4) as usize (byte >> 4) as usize
} else { } else {
(byte & 0xf) as usize (byte & 0xf) as usize
} }
} }
PixelFormat::BPP8 => self.vram.read_8(ofs + index2d!(u32, x, y, 8)) as usize,
} #[inline]
pub fn read_pixel_index_bpp8(&self, addr: u32, x: u32, y: u32) -> usize {
let ofs = addr - VRAM_ADDR;
self.vram.read_8(ofs + index2d!(u32, x, y, 8)) as usize
} }
#[inline(always)]
pub fn get_palette_color(&self, index: u32, palette_index: u32, offset: u32) -> Rgb15 { pub fn get_palette_color(&self, index: u32, palette_index: u32, offset: u32) -> Rgb15 {
if index == 0 || (palette_index != 0 && index % 16 == 0) { if index == 0 || (palette_index != 0 && index % 16 == 0) {
return Rgb15::TRANSPARENT; return Rgb15::TRANSPARENT;

View file

@ -118,6 +118,9 @@ impl Gpu {
let half_height = bbox_h / 2; let half_height = bbox_h / 2;
let screen_width = DISPLAY_WIDTH as i32; let screen_width = DISPLAY_WIDTH as i32;
let iy = screen_y - (ref_y + half_height); let iy = screen_y - (ref_y + half_height);
macro_rules! render_loop {
($read_pixel_index_fn:ident) => {
for ix in (-half_width)..(half_width) { for ix in (-half_width)..(half_width) {
let screen_x = ref_x + half_width + ix; let screen_x = ref_x + half_width + ix;
if screen_x < 0 { if screen_x < 0 {
@ -146,14 +149,26 @@ impl Gpu {
+ index2d!(u32, texture_x / 8, texture_y / 8, tile_array_width) + index2d!(u32, texture_x / 8, texture_y / 8, tile_array_width)
* (tile_size as u32); * (tile_size as u32);
let pixel_index = let pixel_index =
self.read_pixel_index(tile_addr, tile_x as u32, tile_y as u32, pixel_format); self.$read_pixel_index_fn(tile_addr, tile_x as u32, tile_y as u32);
let pixel_color = let pixel_color =
self.get_palette_color(pixel_index as u32, palette_bank, PALRAM_OFS_FG); self.get_palette_color(pixel_index as u32, palette_bank, PALRAM_OFS_FG);
if pixel_color != Rgb15::TRANSPARENT { if pixel_color != Rgb15::TRANSPARENT {
self.write_obj_pixel(screen_x as usize, screen_y as usize, pixel_color, &attrs); self.write_obj_pixel(
screen_x as usize,
screen_y as usize,
pixel_color,
&attrs,
);
} }
} }
} }
};
}
match pixel_format {
PixelFormat::BPP4 => render_loop!(read_pixel_index_bpp4),
PixelFormat::BPP8 => render_loop!(read_pixel_index_bpp8),
}
} }
fn render_normal_obj(&mut self, attrs: ObjAttrs, _obj_num: usize) { fn render_normal_obj(&mut self, attrs: ObjAttrs, _obj_num: usize) {
@ -189,6 +204,9 @@ impl Gpu {
// render the pixels // render the pixels
let screen_width = DISPLAY_WIDTH as i32; let screen_width = DISPLAY_WIDTH as i32;
let end_x = ref_x + obj_w; let end_x = ref_x + obj_w;
macro_rules! render_loop {
($read_pixel_index_fn:ident) => {
for screen_x in ref_x..end_x { for screen_x in ref_x..end_x {
if screen_x < 0 { if screen_x < 0 {
continue; continue;
@ -219,15 +237,28 @@ impl Gpu {
let tile_x = sprite_x % 8; let tile_x = sprite_x % 8;
let tile_y = sprite_y % 8; let tile_y = sprite_y % 8;
let tile_addr = tile_base let tile_addr = tile_base
+ index2d!(u32, sprite_x / 8, sprite_y / 8, tile_array_width) * (tile_size as u32); + index2d!(u32, sprite_x / 8, sprite_y / 8, tile_array_width)
* (tile_size as u32);
let pixel_index = let pixel_index =
self.read_pixel_index(tile_addr, tile_x as u32, tile_y as u32, pixel_format); self.$read_pixel_index_fn(tile_addr, tile_x as u32, tile_y as u32);
let pixel_color = let pixel_color =
self.get_palette_color(pixel_index as u32, palette_bank, PALRAM_OFS_FG); self.get_palette_color(pixel_index as u32, palette_bank, PALRAM_OFS_FG);
if pixel_color != Rgb15::TRANSPARENT { if pixel_color != Rgb15::TRANSPARENT {
self.write_obj_pixel(screen_x as usize, screen_y as usize, pixel_color, &attrs); self.write_obj_pixel(
screen_x as usize,
screen_y as usize,
pixel_color,
&attrs,
);
} }
} }
};
}
match pixel_format {
PixelFormat::BPP4 => render_loop!(read_pixel_index_bpp4),
PixelFormat::BPP8 => render_loop!(read_pixel_index_bpp8),
}
} }
fn write_obj_pixel(&mut self, x: usize, y: usize, pixel_color: Rgb15, attrs: &ObjAttrs) { fn write_obj_pixel(&mut self, x: usize, y: usize, pixel_color: Rgb15, attrs: &ObjAttrs) {

View file

@ -48,19 +48,22 @@ impl Gpu {
let mut start_tile_x = bg_x % 8; let mut start_tile_x = bg_x % 8;
let tile_py = (bg_y % 8) as u32; let tile_py = (bg_y % 8) as u32;
#[allow(unused)]
macro_rules! render_loop {
($read_pixel_index:ident) => {
loop { loop {
let mut map_addr = let mut map_addr = tilemap_base
tilemap_base + SCREEN_BLOCK_SIZE * sbb + 2 * index2d!(u32, se_row, se_column, 32); + SCREEN_BLOCK_SIZE * sbb
+ 2 * index2d!(u32, se_row, se_column, 32);
for _ in se_row..32 { for _ in se_row..32 {
let entry = TileMapEntry(self.vram.read_16(map_addr - VRAM_ADDR)); let entry = TileMapEntry(self.vram.read_16(map_addr - VRAM_ADDR));
let tile_addr = tileset_base + entry.tile_index() * tile_size; let tile_addr = tileset_base + entry.tile_index() * tile_size;
for tile_px in start_tile_x..8 { for tile_px in start_tile_x..8 {
let index = self.read_pixel_index( let index = self.$read_pixel_index(
tile_addr, tile_addr,
if entry.x_flip() { 7 - tile_px } else { tile_px }, if entry.x_flip() { 7 - tile_px } else { tile_px },
if entry.y_flip() { 7 - tile_py } else { tile_py }, if entry.y_flip() { 7 - tile_py } else { tile_py },
pixel_format,
); );
let palette_bank = match pixel_format { let palette_bank = match pixel_format {
PixelFormat::BPP4 => entry.palette_bank() as u32, PixelFormat::BPP4 => entry.palette_bank() as u32,
@ -81,6 +84,13 @@ impl Gpu {
sbb = sbb ^ 1; sbb = sbb ^ 1;
} }
} }
};
}
match pixel_format {
PixelFormat::BPP4 => render_loop!(read_pixel_index_bpp4),
PixelFormat::BPP8 => render_loop!(read_pixel_index_bpp8),
}
} }
pub(in super::super) fn render_aff_bg(&mut self, bg: usize) { pub(in super::super) fn render_aff_bg(&mut self, bg: usize) {