[perf] core: gpu: optimize & clean finalize_scanline

Performance benchmark report improvement:
run_60_frames           time:   [176.85 ms 184.70 ms 191.47 ms]
                        change: [-11.727% -7.6991% -4.2923%] (p = 0.00 < 0.05)
                        Performance has improved.


Former-commit-id: ec91f286433c6798a848fa9727a12da38e62fc61
Former-commit-id: 8d0feea1e0d6b8230c71872bb4458aeec2f7d0e7
This commit is contained in:
Michel Heily 2020-11-04 15:17:21 -08:00 committed by MishMish
parent 22f544718a
commit 05b1ff10e3
5 changed files with 140 additions and 132 deletions

View file

@ -25,13 +25,11 @@ impl RenderLayerKind {
} }
} }
#[derive(Debug, PartialEq)] #[derive(Debug, PartialEq, Clone, Copy)]
pub struct RenderLayer { pub struct RenderLayer {
pub kind: RenderLayerKind, pub kind: RenderLayerKind,
pub priority: u16, pub priority: u16,
pub pixel: Rgb15, pub pixel: Rgb15,
/// priority used to distinguish between sprites, backgrounds and backdrop
pub priority_by_type: u8,
} }
impl RenderLayer { impl RenderLayer {
@ -40,7 +38,6 @@ impl RenderLayer {
kind: RenderLayerKind::from_usize(1 << bg).unwrap(), kind: RenderLayerKind::from_usize(1 << bg).unwrap(),
pixel: pixel, pixel: pixel,
priority: priority, priority: priority,
priority_by_type: 1,
} }
} }
@ -49,7 +46,6 @@ impl RenderLayer {
kind: RenderLayerKind::Objects, kind: RenderLayerKind::Objects,
pixel: pixel, pixel: pixel,
priority: priority, priority: priority,
priority_by_type: 0,
} }
} }
@ -58,7 +54,6 @@ impl RenderLayer {
kind: RenderLayerKind::Backdrop, kind: RenderLayerKind::Backdrop,
pixel: pixel, pixel: pixel,
priority: 4, priority: 4,
priority_by_type: 2,
} }
} }
@ -85,7 +80,7 @@ mod tests {
layers.push(RenderLayer::background(2, pixel, 2)); layers.push(RenderLayer::background(2, pixel, 2));
layers.push(RenderLayer::backdrop(backdrop)); layers.push(RenderLayer::backdrop(backdrop));
layers.push(RenderLayer::objects(pixel, 1)); layers.push(RenderLayer::objects(pixel, 1));
layers.sort_by_key(|k| (k.priority, k.priority_by_type)); layers.sort_by_key(|k| (k.priority, k.kind));
assert_eq!(RenderLayer::background(3, pixel, 0), layers[0]); assert_eq!(RenderLayer::background(3, pixel, 0), layers[0]);
} }
} }

View file

@ -92,6 +92,7 @@ impl GpuState {
use GpuState::*; use GpuState::*;
#[repr(transparent)]
#[derive(Serialize, Deserialize, Clone)] #[derive(Serialize, Deserialize, Clone)]
pub struct Scanline { pub struct Scanline {
inner: Vec<Rgb15>, inner: Vec<Rgb15>,
@ -136,6 +137,17 @@ pub struct Background {
mosaic_first_row: Scanline, mosaic_first_row: Scanline,
} }
impl Background {
#[inline]
pub fn get_priority(&self) -> u16 {
self.bgcnt.priority()
}
#[inline]
pub fn pixel_at(&self, x: usize) -> Rgb15 {
self.line[x]
}
}
#[derive(Debug, Default, Copy, Clone)] #[derive(Debug, Default, Copy, Clone)]
pub struct AffineMatrix { pub struct AffineMatrix {
pub pa: i32, pub pa: i32,

View file

@ -148,10 +148,12 @@ impl BlendFlags {
BlendFlags::BG3, BlendFlags::BG3,
]; ];
#[inline]
pub fn from_bg(bg: usize) -> BlendFlags { pub fn from_bg(bg: usize) -> BlendFlags {
Self::BG_LAYER_FLAG[bg] Self::BG_LAYER_FLAG[bg]
} }
#[inline]
pub fn obj_enabled(&self) -> bool { pub fn obj_enabled(&self) -> bool {
self.contains(BlendFlags::OBJ) self.contains(BlendFlags::OBJ)
} }

View file

@ -38,15 +38,8 @@ impl From<WindowFlags> for BlendFlags {
} }
impl Gpu { impl Gpu {
/// Returns background indexes in render order. Filters range by bg_start..=bg_end.
fn sorted_backgrounds(&self, bg_start: usize, bg_end: usize) -> ArrayVec<[usize; 4]> {
let mut backgrounds: ArrayVec<[usize; 4]> = (bg_start..=bg_end).collect();
backgrounds.sort_by_key(|bg| (self.backgrounds[*bg].bgcnt.priority(), *bg));
backgrounds
}
/// Filters a background indexes array by whether they're active /// Filters a background indexes array by whether they're active
fn active_backgrounds( fn active_backgrounds_for_window(
&self, &self,
backgrounds: &[usize], backgrounds: &[usize],
window_flags: WindowFlags, window_flags: WindowFlags,
@ -54,7 +47,7 @@ impl Gpu {
backgrounds backgrounds
.iter() .iter()
.copied() .copied()
.filter(|bg| self.dispcnt.enable_bg(*bg) && window_flags.bg_enabled(*bg)) .filter(|bg| window_flags.bg_enabled(*bg))
.collect() .collect()
} }
@ -73,29 +66,30 @@ impl Gpu {
/// Composes the render layers into a final scanline while applying needed special effects, and render it to the frame buffer /// Composes the render layers into a final scanline while applying needed special effects, and render it to the frame buffer
pub fn finalize_scanline(&mut self, bg_start: usize, bg_end: usize) { pub fn finalize_scanline(&mut self, bg_start: usize, bg_end: usize) {
let backdrop_color = Rgb15(self.palette_ram.read_16(0)); let backdrop_color = Rgb15(self.palette_ram.read_16(0));
let sorted_backgrounds = self.sorted_backgrounds(bg_start, bg_end);
// filter out disabled backgrounds and sort by priority
// the backgrounds are sorted once for the entire scanline
let mut sorted_backgrounds: ArrayVec<[usize; 4]> = (bg_start..=bg_end)
.filter(|bg| self.dispcnt.enable_bg(*bg))
.collect();
sorted_backgrounds.sort_by_key(|bg| (self.backgrounds[*bg].bgcnt.priority(), *bg));
let y = self.vcount; let y = self.vcount;
let output = unsafe {
let ptr = self.frame_buffer[y * DISPLAY_WIDTH..].as_mut_ptr();
std::slice::from_raw_parts_mut(ptr, DISPLAY_WIDTH)
};
if !self.dispcnt.is_using_windows() { if !self.dispcnt.is_using_windows() {
let win = WindowInfo::new(WindowType::WinNone, WindowFlags::all());
let backgrounds = self.active_backgrounds(&sorted_backgrounds, win.flags);
for x in 0..DISPLAY_WIDTH { for x in 0..DISPLAY_WIDTH {
let pixel = self.compose_pixel(x, y, &win, &backgrounds, backdrop_color); let win = WindowInfo::new(WindowType::WinNone, WindowFlags::all());
output[x] = pixel.to_rgb24(); self.finalize_pixel(x, y, &win, &sorted_backgrounds, backdrop_color);
} }
} else { } else {
let mut occupied = [false; DISPLAY_WIDTH]; let mut occupied = [false; DISPLAY_WIDTH];
let mut occupied_count = 0; let mut occupied_count = 0;
if self.dispcnt.enable_window0() && self.win0.contains_y(y) { if self.dispcnt.enable_window0() && self.win0.contains_y(y) {
let win = WindowInfo::new(WindowType::Win0, self.win0.flags); let win = WindowInfo::new(WindowType::Win0, self.win0.flags);
let backgrounds = self.active_backgrounds(&sorted_backgrounds, win.flags); let backgrounds =
self.active_backgrounds_for_window(&sorted_backgrounds, win.flags);
for x in self.win0.left()..self.win0.right() { for x in self.win0.left()..self.win0.right() {
let pixel = self.compose_pixel(x, y, &win, &backgrounds, backdrop_color); self.finalize_pixel(x, y, &win, &backgrounds, backdrop_color);
output[x] = pixel.to_rgb24();
occupied[x] = true; occupied[x] = true;
occupied_count += 1; occupied_count += 1;
} }
@ -105,25 +99,27 @@ impl Gpu {
} }
if self.dispcnt.enable_window1() && self.win1.contains_y(y) { if self.dispcnt.enable_window1() && self.win1.contains_y(y) {
let win = WindowInfo::new(WindowType::Win1, self.win1.flags); let win = WindowInfo::new(WindowType::Win1, self.win1.flags);
let backgrounds = self.active_backgrounds(&sorted_backgrounds, win.flags); let backgrounds =
self.active_backgrounds_for_window(&sorted_backgrounds, win.flags);
for x in self.win1.left()..self.win1.right() { for x in self.win1.left()..self.win1.right() {
if !occupied[x] { if occupied[x] {
let pixel = self.compose_pixel(x, y, &win, &backgrounds, backdrop_color); continue;
output[x] = pixel.to_rgb24(); }
self.finalize_pixel(x, y, &win, &backgrounds, backdrop_color);
occupied[x] = true; occupied[x] = true;
occupied_count += 1; occupied_count += 1;
} }
} }
}
if occupied_count == DISPLAY_WIDTH { if occupied_count == DISPLAY_WIDTH {
return; return;
} }
let win_out = WindowInfo::new(WindowType::WinOut, self.winout_flags); let win_out = WindowInfo::new(WindowType::WinOut, self.winout_flags);
let win_out_backgrounds = self.active_backgrounds(&sorted_backgrounds, win_out.flags); let win_out_backgrounds =
self.active_backgrounds_for_window(&sorted_backgrounds, win_out.flags);
if self.dispcnt.enable_obj_window() { if self.dispcnt.enable_obj_window() {
let win_obj = WindowInfo::new(WindowType::WinObj, self.winobj_flags); let win_obj = WindowInfo::new(WindowType::WinObj, self.winobj_flags);
let win_obj_backgrounds = let win_obj_backgrounds =
self.active_backgrounds(&sorted_backgrounds, win_obj.flags); self.active_backgrounds_for_window(&sorted_backgrounds, win_obj.flags);
for x in 0..DISPLAY_WIDTH { for x in 0..DISPLAY_WIDTH {
if occupied[x] { if occupied[x] {
continue; continue;
@ -131,28 +127,14 @@ impl Gpu {
let obj_entry = self.obj_buffer_get(x, y); let obj_entry = self.obj_buffer_get(x, y);
if obj_entry.window { if obj_entry.window {
// WinObj // WinObj
let pixel = self.compose_pixel( self.finalize_pixel(x, y, &win_obj, &win_obj_backgrounds, backdrop_color);
x, // occupied[x] = true;
y, // occupied_count += 1;
&win_obj,
&win_obj_backgrounds,
backdrop_color,
);
output[x] = pixel.to_rgb24();
occupied[x] = true;
occupied_count += 1;
} else { } else {
// WinOut // WinOut
let pixel = self.compose_pixel( self.finalize_pixel(x, y, &win_out, &win_out_backgrounds, backdrop_color);
x, // occupied[x] = true;
y, // occupied_count += 1;
&win_out,
&win_out_backgrounds,
backdrop_color,
);
output[x] = pixel.to_rgb24();
occupied[x] = true;
occupied_count += 1;
} }
} }
} else { } else {
@ -160,106 +142,118 @@ impl Gpu {
if occupied[x] { if occupied[x] {
continue; continue;
} }
let pixel = self.finalize_pixel(x, y, &win_out, &win_out_backgrounds, backdrop_color);
self.compose_pixel(x, y, &win_out, &win_out_backgrounds, backdrop_color); // occupied[x] = true;
output[x] = pixel.to_rgb24(); // occupied_count += 1;
occupied[x] = true;
occupied_count += 1;
} }
} }
} }
} }
fn compose_pixel( fn finalize_pixel(
&self, &mut self,
x: usize, x: usize,
y: usize, y: usize,
win: &WindowInfo, win: &WindowInfo,
backgrounds: &[usize], backgrounds: &[usize],
backdrop_color: Rgb15, backdrop_color: Rgb15,
) -> Rgb15 { ) {
let mut layers = ArrayVec::<[_; 7]>::new(); let output = unsafe {
unsafe { let ptr = self.frame_buffer[y * DISPLAY_WIDTH..].as_mut_ptr();
layers.push_unchecked(RenderLayer::backdrop(backdrop_color)); std::slice::from_raw_parts_mut(ptr, DISPLAY_WIDTH)
} };
for bg in backgrounds.iter() { // The backdrop layer is the default
let bg_pixel = self.backgrounds[*bg].line[x]; let backdrop_layer = RenderLayer::backdrop(backdrop_color);
if !bg_pixel.is_transparent() {
unsafe { // Backgrounds are already sorted
layers.push_unchecked(RenderLayer::background( // lets start by taking the first 2 backgrounds that have an opaque pixel at x
*bg, let mut it = backgrounds
bg_pixel, .iter()
self.backgrounds[*bg].bgcnt.priority(), .filter(|i| !self.backgrounds[**i].line[x].is_transparent())
)); .take(2);
}
let mut top_layer = it.next().map_or(backdrop_layer, |bg| {
let background = &self.backgrounds[*bg];
RenderLayer::background(*bg, background.pixel_at(x), background.get_priority())
});
let mut bot_layer = it.next().map_or(backdrop_layer, |bg| {
let background = &self.backgrounds[*bg];
RenderLayer::background(*bg, background.pixel_at(x), background.get_priority())
});
drop(it);
// Now that backgrounds are taken care of, we need to check if there is an object pixel that takes priority of one of the layers
let obj_entry = self.obj_buffer_get(x, y);
if win.flags.obj_enabled() && self.dispcnt.enable_obj() && !obj_entry.color.is_transparent()
{
let obj_layer = RenderLayer::objects(obj_entry.color, obj_entry.priority);
if obj_layer.priority <= top_layer.priority {
bot_layer = top_layer;
top_layer = obj_layer;
} else if obj_layer.priority <= bot_layer.priority {
bot_layer = obj_layer;
} }
} }
let obj_entry = self.obj_buffer_get(x, y); let obj_entry = self.obj_buffer_get(x, y);
if self.dispcnt.enable_obj() && win.flags.obj_enabled() && !obj_entry.color.is_transparent() let obj_alpha_blend = top_layer.is_object() && obj_entry.alpha;
let top_flags = self.bldcnt.top();
let bot_flags = self.bldcnt.bottom();
let sfx_enabled = (self.bldcnt.mode() != BldMode::BldNone || obj_alpha_blend)
&& top_flags.contains_render_layer(&top_layer); // sfx must at least have a first target configured
if win.flags.sfx_enabled() && sfx_enabled {
if top_layer.is_object()
&& obj_alpha_blend
&& bot_flags.contains_render_layer(&bot_layer)
{ {
unsafe { output[x] = self.do_alpha(top_layer.pixel, bot_layer.pixel).to_rgb24();
layers.push_unchecked(RenderLayer::objects(obj_entry.color, obj_entry.priority))
}
}
// now, sort the layers
layers.sort_by_key(|k| (k.priority, k.priority_by_type));
let top_pixel = layers[0].pixel; // self.layer_to_pixel(x, y, &layers[0]);
let mut result = top_pixel;
'blend: loop {
/* loop hack so we can leave this block early */
let obj_sfx = obj_entry.alpha && layers[0].is_object();
if win.flags.sfx_enabled() || obj_sfx {
let top_layer_flags = self.bldcnt.top();
let bot_layer_flags = self.bldcnt.bottom();
if !(top_layer_flags.contains_render_layer(&layers[0]) || obj_sfx) {
break 'blend;
}
// if this is object alpha blending, ensure that the bottom layer contains a color to blend with
let blend_mode = if obj_sfx
&& layers.len() > 1
&& bot_layer_flags.contains_render_layer(&layers[1])
{
BldMode::BldAlpha
} else { } else {
self.bldcnt.mode() let (top_layer, bot_layer) = (top_layer, bot_layer);
};
match blend_mode { match self.bldcnt.mode() {
BldMode::BldAlpha => { BldMode::BldAlpha => {
let bot_pixel = if layers.len() > 1 { output[x] = if bot_flags.contains_render_layer(&bot_layer) {
if !(bot_layer_flags.contains_render_layer(&layers[1])) { self.do_alpha(top_layer.pixel, bot_layer.pixel).to_rgb24()
break 'blend;
}
layers[1].pixel //self.layer_to_pixel(x, y, &layers[1])
} else { } else {
backdrop_color // alpha blending must have a 2nd target
}; top_layer.pixel.to_rgb24()
}
}
BldMode::BldWhite => output[x] = self.do_brighten(top_layer.pixel).to_rgb24(),
BldMode::BldBlack => output[x] = self.do_darken(top_layer.pixel).to_rgb24(),
BldMode::BldNone => output[x] = top_layer.pixel.to_rgb24(),
}
}
} else {
// no blending, just use the top pixel
output[x] = top_layer.pixel.to_rgb24();
}
}
#[inline]
fn do_alpha(&self, upper: Rgb15, lower: Rgb15) -> Rgb15 {
let eva = self.bldalpha.eva(); let eva = self.bldalpha.eva();
let evb = self.bldalpha.evb(); let evb = self.bldalpha.evb();
result = top_pixel.blend_with(bot_pixel, eva, evb); upper.blend_with(lower, eva, evb)
} }
BldMode::BldWhite => {
#[inline]
fn do_brighten(&self, c: Rgb15) -> Rgb15 {
let evy = self.bldy; let evy = self.bldy;
result = top_pixel.blend_with(Rgb15::WHITE, 16 - evy, evy); c.blend_with(Rgb15::WHITE, 16 - evy, evy)
} }
BldMode::BldBlack => {
#[inline]
fn do_darken(&self, c: Rgb15) -> Rgb15 {
let evy = self.bldy; let evy = self.bldy;
result = top_pixel.blend_with(Rgb15::BLACK, 16 - evy, evy); c.blend_with(Rgb15::BLACK, 16 - evy, evy)
}
BldMode::BldNone => {
result = top_pixel;
}
}
}
break 'blend;
}
result
} }
} }

View file

@ -76,4 +76,9 @@ impl WindowInfo {
pub fn new(typ: WindowType, flags: WindowFlags) -> WindowInfo { pub fn new(typ: WindowType, flags: WindowFlags) -> WindowInfo {
WindowInfo { typ, flags } WindowInfo { typ, flags }
} }
#[inline]
pub fn is_none(&self) -> bool {
self.typ == WindowType::WinNone
}
} }