optimize/gpu: Optimize read_pixel_index calls inside render loops.
Former-commit-id: 763b24a151ddd3dcb69afacabcb214b26ad03052
This commit is contained in:
parent
699b2decd2
commit
414c1b0557
|
@ -249,20 +249,31 @@ impl Gpu {
|
||||||
|
|
||||||
/// helper method that reads the palette index from a base address and x + y
|
/// helper method that reads the palette index from a base address and x + y
|
||||||
pub fn read_pixel_index(&self, addr: u32, x: u32, y: u32, format: PixelFormat) -> usize {
|
pub fn read_pixel_index(&self, addr: u32, x: u32, y: u32, format: PixelFormat) -> usize {
|
||||||
let ofs = addr - VRAM_ADDR;
|
|
||||||
match format {
|
match format {
|
||||||
PixelFormat::BPP4 => {
|
PixelFormat::BPP4 => self.read_pixel_index_bpp4(addr, x, y),
|
||||||
let byte = self.vram.read_8(ofs + index2d!(u32, x / 2, y, 4));
|
PixelFormat::BPP8 => self.read_pixel_index_bpp8(addr, x, y),
|
||||||
if x & 1 != 0 {
|
|
||||||
(byte >> 4) as usize
|
|
||||||
} else {
|
|
||||||
(byte & 0xf) as usize
|
|
||||||
}
|
|
||||||
}
|
|
||||||
PixelFormat::BPP8 => self.vram.read_8(ofs + index2d!(u32, x, y, 8)) as usize,
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
pub fn read_pixel_index_bpp4(&self, addr: u32, x: u32, y: u32) -> usize {
|
||||||
|
let ofs = addr - VRAM_ADDR + index2d!(u32, x / 2, y, 4);
|
||||||
|
let ofs = ofs as usize;
|
||||||
|
let byte = self.vram.read_8(ofs as u32);
|
||||||
|
if x & 1 != 0 {
|
||||||
|
(byte >> 4) as usize
|
||||||
|
} else {
|
||||||
|
(byte & 0xf) as usize
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
pub fn read_pixel_index_bpp8(&self, addr: u32, x: u32, y: u32) -> usize {
|
||||||
|
let ofs = addr - VRAM_ADDR;
|
||||||
|
self.vram.read_8(ofs + index2d!(u32, x, y, 8)) as usize
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline(always)]
|
||||||
pub fn get_palette_color(&self, index: u32, palette_index: u32, offset: u32) -> Rgb15 {
|
pub fn get_palette_color(&self, index: u32, palette_index: u32, offset: u32) -> Rgb15 {
|
||||||
if index == 0 || (palette_index != 0 && index % 16 == 0) {
|
if index == 0 || (palette_index != 0 && index % 16 == 0) {
|
||||||
return Rgb15::TRANSPARENT;
|
return Rgb15::TRANSPARENT;
|
||||||
|
|
|
@ -118,41 +118,56 @@ impl Gpu {
|
||||||
let half_height = bbox_h / 2;
|
let half_height = bbox_h / 2;
|
||||||
let screen_width = DISPLAY_WIDTH as i32;
|
let screen_width = DISPLAY_WIDTH as i32;
|
||||||
let iy = screen_y - (ref_y + half_height);
|
let iy = screen_y - (ref_y + half_height);
|
||||||
for ix in (-half_width)..(half_width) {
|
|
||||||
let screen_x = ref_x + half_width + ix;
|
|
||||||
if screen_x < 0 {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
if screen_x >= screen_width {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
if self
|
|
||||||
.obj_buffer_get(screen_x as usize, screen_y as usize)
|
|
||||||
.priority
|
|
||||||
<= attrs.2.priority()
|
|
||||||
&& !attrs.is_obj_window()
|
|
||||||
{
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
let transformed_x = (affine_matrix.pa * ix + affine_matrix.pb * iy) >> 8;
|
macro_rules! render_loop {
|
||||||
let transformed_y = (affine_matrix.pc * ix + affine_matrix.pd * iy) >> 8;
|
($read_pixel_index_fn:ident) => {
|
||||||
let texture_x = transformed_x + obj_w / 2;
|
for ix in (-half_width)..(half_width) {
|
||||||
let texture_y = transformed_y + obj_h / 2;
|
let screen_x = ref_x + half_width + ix;
|
||||||
if texture_x >= 0 && texture_x < obj_w && texture_y >= 0 && texture_y < obj_h {
|
if screen_x < 0 {
|
||||||
let tile_x = texture_x % 8;
|
continue;
|
||||||
let tile_y = texture_y % 8;
|
}
|
||||||
let tile_addr = tile_base
|
if screen_x >= screen_width {
|
||||||
+ index2d!(u32, texture_x / 8, texture_y / 8, tile_array_width)
|
break;
|
||||||
* (tile_size as u32);
|
}
|
||||||
let pixel_index =
|
if self
|
||||||
self.read_pixel_index(tile_addr, tile_x as u32, tile_y as u32, pixel_format);
|
.obj_buffer_get(screen_x as usize, screen_y as usize)
|
||||||
let pixel_color =
|
.priority
|
||||||
self.get_palette_color(pixel_index as u32, palette_bank, PALRAM_OFS_FG);
|
<= attrs.2.priority()
|
||||||
if pixel_color != Rgb15::TRANSPARENT {
|
&& !attrs.is_obj_window()
|
||||||
self.write_obj_pixel(screen_x as usize, screen_y as usize, pixel_color, &attrs);
|
{
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
let transformed_x = (affine_matrix.pa * ix + affine_matrix.pb * iy) >> 8;
|
||||||
|
let transformed_y = (affine_matrix.pc * ix + affine_matrix.pd * iy) >> 8;
|
||||||
|
let texture_x = transformed_x + obj_w / 2;
|
||||||
|
let texture_y = transformed_y + obj_h / 2;
|
||||||
|
if texture_x >= 0 && texture_x < obj_w && texture_y >= 0 && texture_y < obj_h {
|
||||||
|
let tile_x = texture_x % 8;
|
||||||
|
let tile_y = texture_y % 8;
|
||||||
|
let tile_addr = tile_base
|
||||||
|
+ index2d!(u32, texture_x / 8, texture_y / 8, tile_array_width)
|
||||||
|
* (tile_size as u32);
|
||||||
|
let pixel_index =
|
||||||
|
self.$read_pixel_index_fn(tile_addr, tile_x as u32, tile_y as u32);
|
||||||
|
let pixel_color =
|
||||||
|
self.get_palette_color(pixel_index as u32, palette_bank, PALRAM_OFS_FG);
|
||||||
|
if pixel_color != Rgb15::TRANSPARENT {
|
||||||
|
self.write_obj_pixel(
|
||||||
|
screen_x as usize,
|
||||||
|
screen_y as usize,
|
||||||
|
pixel_color,
|
||||||
|
&attrs,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
match pixel_format {
|
||||||
|
PixelFormat::BPP4 => render_loop!(read_pixel_index_bpp4),
|
||||||
|
PixelFormat::BPP8 => render_loop!(read_pixel_index_bpp8),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -189,44 +204,60 @@ impl Gpu {
|
||||||
// render the pixels
|
// render the pixels
|
||||||
let screen_width = DISPLAY_WIDTH as i32;
|
let screen_width = DISPLAY_WIDTH as i32;
|
||||||
let end_x = ref_x + obj_w;
|
let end_x = ref_x + obj_w;
|
||||||
for screen_x in ref_x..end_x {
|
|
||||||
if screen_x < 0 {
|
macro_rules! render_loop {
|
||||||
continue;
|
($read_pixel_index_fn:ident) => {
|
||||||
}
|
for screen_x in ref_x..end_x {
|
||||||
if screen_x >= screen_width {
|
if screen_x < 0 {
|
||||||
break;
|
continue;
|
||||||
}
|
}
|
||||||
if self
|
if screen_x >= screen_width {
|
||||||
.obj_buffer_get(screen_x as usize, screen_y as usize)
|
break;
|
||||||
.priority
|
}
|
||||||
<= attrs.2.priority()
|
if self
|
||||||
&& !attrs.is_obj_window()
|
.obj_buffer_get(screen_x as usize, screen_y as usize)
|
||||||
{
|
.priority
|
||||||
continue;
|
<= attrs.2.priority()
|
||||||
}
|
&& !attrs.is_obj_window()
|
||||||
let mut sprite_y = screen_y - ref_y;
|
{
|
||||||
let mut sprite_x = screen_x - ref_x;
|
continue;
|
||||||
sprite_y = if attrs.1.v_flip() {
|
}
|
||||||
obj_h - sprite_y - 1
|
let mut sprite_y = screen_y - ref_y;
|
||||||
} else {
|
let mut sprite_x = screen_x - ref_x;
|
||||||
sprite_y
|
sprite_y = if attrs.1.v_flip() {
|
||||||
|
obj_h - sprite_y - 1
|
||||||
|
} else {
|
||||||
|
sprite_y
|
||||||
|
};
|
||||||
|
sprite_x = if attrs.1.h_flip() {
|
||||||
|
obj_w - sprite_x - 1
|
||||||
|
} else {
|
||||||
|
sprite_x
|
||||||
|
};
|
||||||
|
let tile_x = sprite_x % 8;
|
||||||
|
let tile_y = sprite_y % 8;
|
||||||
|
let tile_addr = tile_base
|
||||||
|
+ index2d!(u32, sprite_x / 8, sprite_y / 8, tile_array_width)
|
||||||
|
* (tile_size as u32);
|
||||||
|
let pixel_index =
|
||||||
|
self.$read_pixel_index_fn(tile_addr, tile_x as u32, tile_y as u32);
|
||||||
|
let pixel_color =
|
||||||
|
self.get_palette_color(pixel_index as u32, palette_bank, PALRAM_OFS_FG);
|
||||||
|
if pixel_color != Rgb15::TRANSPARENT {
|
||||||
|
self.write_obj_pixel(
|
||||||
|
screen_x as usize,
|
||||||
|
screen_y as usize,
|
||||||
|
pixel_color,
|
||||||
|
&attrs,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
};
|
};
|
||||||
sprite_x = if attrs.1.h_flip() {
|
}
|
||||||
obj_w - sprite_x - 1
|
|
||||||
} else {
|
match pixel_format {
|
||||||
sprite_x
|
PixelFormat::BPP4 => render_loop!(read_pixel_index_bpp4),
|
||||||
};
|
PixelFormat::BPP8 => render_loop!(read_pixel_index_bpp8),
|
||||||
let tile_x = sprite_x % 8;
|
|
||||||
let tile_y = sprite_y % 8;
|
|
||||||
let tile_addr = tile_base
|
|
||||||
+ index2d!(u32, sprite_x / 8, sprite_y / 8, tile_array_width) * (tile_size as u32);
|
|
||||||
let pixel_index =
|
|
||||||
self.read_pixel_index(tile_addr, tile_x as u32, tile_y as u32, pixel_format);
|
|
||||||
let pixel_color =
|
|
||||||
self.get_palette_color(pixel_index as u32, palette_bank, PALRAM_OFS_FG);
|
|
||||||
if pixel_color != Rgb15::TRANSPARENT {
|
|
||||||
self.write_obj_pixel(screen_x as usize, screen_y as usize, pixel_color, &attrs);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -48,38 +48,48 @@ impl Gpu {
|
||||||
let mut start_tile_x = bg_x % 8;
|
let mut start_tile_x = bg_x % 8;
|
||||||
let tile_py = (bg_y % 8) as u32;
|
let tile_py = (bg_y % 8) as u32;
|
||||||
|
|
||||||
loop {
|
#[allow(unused)]
|
||||||
let mut map_addr =
|
macro_rules! render_loop {
|
||||||
tilemap_base + SCREEN_BLOCK_SIZE * sbb + 2 * index2d!(u32, se_row, se_column, 32);
|
($read_pixel_index:ident) => {
|
||||||
for _ in se_row..32 {
|
loop {
|
||||||
let entry = TileMapEntry(self.vram.read_16(map_addr - VRAM_ADDR));
|
let mut map_addr = tilemap_base
|
||||||
let tile_addr = tileset_base + entry.tile_index() * tile_size;
|
+ SCREEN_BLOCK_SIZE * sbb
|
||||||
|
+ 2 * index2d!(u32, se_row, se_column, 32);
|
||||||
|
for _ in se_row..32 {
|
||||||
|
let entry = TileMapEntry(self.vram.read_16(map_addr - VRAM_ADDR));
|
||||||
|
let tile_addr = tileset_base + entry.tile_index() * tile_size;
|
||||||
|
|
||||||
for tile_px in start_tile_x..8 {
|
for tile_px in start_tile_x..8 {
|
||||||
let index = self.read_pixel_index(
|
let index = self.$read_pixel_index(
|
||||||
tile_addr,
|
tile_addr,
|
||||||
if entry.x_flip() { 7 - tile_px } else { tile_px },
|
if entry.x_flip() { 7 - tile_px } else { tile_px },
|
||||||
if entry.y_flip() { 7 - tile_py } else { tile_py },
|
if entry.y_flip() { 7 - tile_py } else { tile_py },
|
||||||
pixel_format,
|
);
|
||||||
);
|
let palette_bank = match pixel_format {
|
||||||
let palette_bank = match pixel_format {
|
PixelFormat::BPP4 => entry.palette_bank() as u32,
|
||||||
PixelFormat::BPP4 => entry.palette_bank() as u32,
|
PixelFormat::BPP8 => 0u32,
|
||||||
PixelFormat::BPP8 => 0u32,
|
};
|
||||||
};
|
let color = self.get_palette_color(index as u32, palette_bank, 0);
|
||||||
let color = self.get_palette_color(index as u32, palette_bank, 0);
|
self.backgrounds[bg].line[screen_x as usize] = color;
|
||||||
self.backgrounds[bg].line[screen_x as usize] = color;
|
screen_x += 1;
|
||||||
screen_x += 1;
|
if (DISPLAY_WIDTH as u32) == screen_x {
|
||||||
if (DISPLAY_WIDTH as u32) == screen_x {
|
return;
|
||||||
return;
|
}
|
||||||
|
}
|
||||||
|
start_tile_x = 0;
|
||||||
|
map_addr += 2;
|
||||||
|
}
|
||||||
|
se_row = 0;
|
||||||
|
if bg_width == 512 {
|
||||||
|
sbb = sbb ^ 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
start_tile_x = 0;
|
};
|
||||||
map_addr += 2;
|
}
|
||||||
}
|
|
||||||
se_row = 0;
|
match pixel_format {
|
||||||
if bg_width == 512 {
|
PixelFormat::BPP4 => render_loop!(read_pixel_index_bpp4),
|
||||||
sbb = sbb ^ 1;
|
PixelFormat::BPP8 => render_loop!(read_pixel_index_bpp8),
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Reference in a new issue