direct3d: hoist flow types out, provide cell type, don't block GPU

Changes d3d11 to accept an array of cells to render that have already been
rendered to a texture and converted to texture coordinates.  This minimizes
the time we have to map the shader cell buffer which blocks the GPU from
using it.
This commit is contained in:
Jonathan Marler 2025-01-15 14:12:42 -07:00 committed by CJ van den Berg
parent 96c56c7124
commit ce068ee0dc
3 changed files with 173 additions and 183 deletions

View file

@ -73,6 +73,10 @@ float4 PixelMain(float4 sv_pos : SV_POSITION) : SV_TARGET {
uint2 texture_coord = glyph_cell_pos * cell_size + cell_pixel;
float4 glyph_texel = glyph_texture.Load(int3(texture_coord, 0));
float2 pos = sv_pos.xy / (cell_size * float2(col_count, row_count));
return float4(Pixel(pos, bg, fg, glyph_texel.a), 1.0);
float2 pos = (sv_pos.xy - 0.5) / (float2(cell_size) * float2(col_count, row_count));
float4 p = float4(Pixel(pos, bg, fg, glyph_texel.a), 1.0);
// return red/green for out-of-bound pixels for now
if (pos.x > 1) return float4(1,0,0,1);
if (pos.y > 1) return float4(0,1,0,1);
return p;
}

View file

@ -2,14 +2,10 @@ const builtin = @import("builtin");
const std = @import("std");
const win32 = @import("win32").everything;
const win32ext = @import("win32ext.zig");
const vaxis = @import("vaxis");
const dwrite = @import("dwrite.zig");
const GlyphIndexCache = @import("GlyphIndexCache.zig");
const TextRenderer = @import("DwriteRenderer.zig");
const RGB = @import("color").RGB;
const xterm = @import("xterm.zig");
const XY = @import("xy.zig").XY;
pub const Font = dwrite.Font;
@ -40,16 +36,21 @@ const Rgba8 = packed struct(u32) {
pub fn initRgb(r: u8, g: u8, b: u8) Color {
return .{ .r = r, .g = g, .b = b, .a = 255 };
}
pub fn initRgba(r: u8, g: u8, b: u8, a: u8) Color {
return .{ .r = r, .g = g, .b = b, .a = a };
}
};
pub const Cell = shader.Cell;
// types shared with the shader
const shader = struct {
pub const shader = struct {
const GridConfig = extern struct {
cell_size: [2]u32,
col_count: u32,
row_count: u32,
};
const Cell = extern struct {
pub const Cell = extern struct {
glyph_index: u32,
background: Rgba8,
foreground: Rgba8,
@ -58,7 +59,9 @@ const shader = struct {
const swap_chain_flags: u32 = @intFromEnum(win32.DXGI_SWAP_CHAIN_FLAG_FRAME_LATENCY_WAITABLE_OBJECT);
pub fn init() void {
pub fn init(opt: struct {
shader: ?[:0]const u8 = null,
}) void {
std.debug.assert(!global.init_called);
global.init_called = true;
dwrite.init();
@ -68,7 +71,7 @@ pub fn init() void {
else => false,
};
global.d3d, const debug = D3d.init(.{ .debug = try_debug });
global.d3d, const debug = D3d.init(.{ .try_debug = try_debug });
if (debug) {
const info = win32ext.queryInterface(global.d3d.device, win32.ID3D11InfoQueue);
@ -87,7 +90,7 @@ pub fn init() void {
}
}
global.shaders = Shaders.init();
global.shaders = Shaders.init(opt.shader);
{
const desc: win32.D3D11_BUFFER_DESC = .{
@ -114,12 +117,12 @@ pub fn init() void {
}
}
pub fn setBackground(state: *const WindowState, rgb: RGB) void {
global.background = .{ .r = rgb.r, .g = rgb.b, .b = rgb.b, .a = 255 };
pub fn setBackground(state: *const WindowState, c: Color) void {
global.background = c;
const color: win32.DXGI_RGBA = .{
.r = @as(f32, @floatFromInt(rgb.r)) / 255,
.g = @as(f32, @floatFromInt(rgb.g)) / 255,
.b = @as(f32, @floatFromInt(rgb.b)) / 255,
.r = @as(f32, @floatFromInt(c.r)) / 255,
.g = @as(f32, @floatFromInt(c.g)) / 255,
.b = @as(f32, @floatFromInt(c.b)) / 255,
.a = 1.0,
};
const hr = state.swap_chain.IDXGISwapChain1.SetBackgroundColor(&color);
@ -140,49 +143,9 @@ pub const WindowState = struct {
const swap_chain = initSwapChain(global.d3d.device, hwnd);
return .{ .swap_chain = swap_chain };
}
};
pub fn paint(
hwnd: win32.HWND,
state: *WindowState,
font: Font,
screen: *const vaxis.Screen,
) void {
var ps: win32.PAINTSTRUCT = undefined;
_ = win32.BeginPaint(hwnd, &ps) orelse fatalWin32("BeginPaint", win32.GetLastError());
defer if (0 == win32.EndPaint(hwnd, &ps)) fatalWin32("EndPaint", win32.GetLastError());
const client_size = getClientSize(u32, hwnd);
{
const swap_chain_size = getSwapChainSize(state.swap_chain);
if (swap_chain_size.x != client_size.x or swap_chain_size.y != client_size.y) {
log.debug(
"SwapChain Buffer Resize from {}x{} to {}x{}",
.{ swap_chain_size.x, swap_chain_size.y, client_size.x, client_size.y },
);
global.d3d.context.ClearState();
if (state.maybe_target_view) |target_view| {
_ = target_view.IUnknown.Release();
state.maybe_target_view = null;
}
global.d3d.context.Flush();
if (swap_chain_size.x == 0) @panic("possible? no need to resize?");
if (swap_chain_size.y == 0) @panic("possible? no need to resize?");
{
const hr = state.swap_chain.IDXGISwapChain.ResizeBuffers(
0,
@intCast(client_size.x),
@intCast(client_size.y),
.UNKNOWN,
swap_chain_flags,
);
if (hr < 0) fatalHr("ResizeBuffers", hr);
}
}
}
// TODO: this should take a utf8 graphme instead
pub fn generateGlyph(state: *WindowState, font: Font, codepoint: u21) u32 {
// for now we'll just use 1 texture and leverage the entire thing
const texture_cell_count: XY(u16) = getD3d11TextureMaxCellCount(font.cell_size);
const texture_cell_count_total: u32 =
@ -217,6 +180,69 @@ pub fn paint(
break :blk &(state.glyph_index_cache.?);
};
switch (glyph_index_cache.reserve(
global.glyph_cache_arena.allocator(),
codepoint,
) catch |e| oom(e)) {
.newly_reserved => |reserved| {
// var render_success = false;
// defer if (!render_success) state.glyph_index_cache.remove(reserved.index);
const pos: XY(u16) = cellPosFromIndex(reserved.index, texture_cell_count.x);
const coord = coordFromCellPos(font.cell_size, pos);
global.text_renderer.render(
global.d3d.device,
global.d3d.context,
global.d2d_factory,
font,
state.glyph_texture.obj,
codepoint,
coord,
);
return reserved.index;
},
.already_reserved => |index| return index,
}
}
};
pub fn paint(
state: *WindowState,
client_size: XY(u32),
font: Font,
row_count: u16,
col_count: u16,
top: u16,
cells: []const Cell,
) void {
{
const swap_chain_size = getSwapChainSize(state.swap_chain);
if (swap_chain_size.x != client_size.x or swap_chain_size.y != client_size.y) {
log.debug(
"SwapChain Buffer Resize from {}x{} to {}x{}",
.{ swap_chain_size.x, swap_chain_size.y, client_size.x, client_size.y },
);
global.d3d.context.ClearState();
if (state.maybe_target_view) |target_view| {
_ = target_view.IUnknown.Release();
state.maybe_target_view = null;
}
global.d3d.context.Flush();
if (swap_chain_size.x == 0) @panic("possible? no need to resize?");
if (swap_chain_size.y == 0) @panic("possible? no need to resize?");
{
const hr = state.swap_chain.IDXGISwapChain.ResizeBuffers(
0,
client_size.x,
client_size.y,
.UNKNOWN,
swap_chain_flags,
);
if (hr < 0) fatalHr("ResizeBuffers", hr);
}
}
}
const shader_col_count: u16 = @intCast(@divTrunc(client_size.x + font.cell_size.x - 1, font.cell_size.x));
const shader_row_count: u16 = @intCast(@divTrunc(client_size.y + font.cell_size.y - 1, font.cell_size.y));
@ -238,30 +264,8 @@ pub fn paint(
config.row_count = shader_row_count;
}
const space_glyph = generateGlyph(
font,
glyph_index_cache,
texture_cell_count.x,
" ",
state.glyph_texture.obj,
);
const populate_col_count: u16 = @min(screen.width, shader_col_count);
const populate_row_count: u16 = @min(screen.height, shader_row_count);
// we loop through and cache all the glyphs before mapping the cell buffer and potentially
// blocking the gpu while we're doing expensive text rendering
for (0..populate_row_count) |row| {
const row_offset = row * screen.width;
for (0..populate_col_count) |col| {
const screen_cell = &screen.buf[row_offset + col];
_ = generateGlyph(
font,
glyph_index_cache,
texture_cell_count.x,
screen_cell.char.grapheme,
state.glyph_texture.obj,
);
}
}
const copy_col_count: u16 = @min(col_count, shader_col_count);
const blank_space_glyph_index = state.generateGlyph(font, ' ');
const cell_count: u32 = @as(u32, shader_col_count) * @as(u32, shader_row_count);
state.shader_cells.updateCount(cell_count);
@ -279,47 +283,22 @@ pub fn paint(
const cells_shader: [*]shader.Cell = @ptrCast(@alignCast(mapped.pData));
for (0..shader_row_count) |row| {
const src_row_offset = row * screen.width;
const dst_row_offset = row * @as(usize, shader_col_count);
const src_col_count = if (row < screen.height) populate_col_count else 0;
for (0..src_col_count) |col| {
const screen_cell = &screen.buf[src_row_offset + col];
const codepoint = std.unicode.wtf8Decode(screen_cell.char.grapheme) catch std.unicode.replacement_character;
const glyph_index = blk: {
switch (glyph_index_cache.reserve(global.glyph_cache_arena.allocator(), codepoint) catch |e| oom(e)) {
.newly_reserved => |reserved| {
// should never happen unless there' more characters than the cache can hold
// var render_success = false;
// defer if (!render_success) state.glyph_index_cache.remove(reserved.index);
const pos: XY(u16) = cellPosFromIndex(reserved.index, texture_cell_count.x);
const coord = coordFromCellPos(font.cell_size, pos);
global.text_renderer.render(
global.d3d.device,
global.d3d.context,
global.d2d_factory,
font,
state.glyph_texture.obj,
codepoint,
coord,
const src_row = blk: {
const r = top + row;
break :blk r - if (r >= row_count) row_count else 0;
};
const src_row_offset = src_row * col_count;
const dst_row_offset = row * shader_col_count;
const copy_len = if (row < row_count) copy_col_count else 0;
@memcpy(
cells_shader[dst_row_offset..][0..copy_len],
cells[src_row_offset..][0..copy_len],
);
break :blk reserved.index;
},
.already_reserved => |i| break :blk i,
}
};
cells_shader[dst_row_offset + col] = .{
.glyph_index = glyph_index,
.background = shaderColorFromVaxis(screen_cell.style.bg),
.foreground = shaderColorFromVaxis(screen_cell.style.fg),
};
}
for (src_col_count..shader_col_count) |col| {
cells_shader[dst_row_offset + col] = .{
.glyph_index = space_glyph,
@memset(cells_shader[dst_row_offset..][copy_len..shader_col_count], .{
.glyph_index = blank_space_glyph_index,
.background = global.background,
.foreground = global.background,
};
}
});
}
}
@ -353,55 +332,12 @@ pub fn paint(
}
}
fn generateGlyph(
font: Font,
glyph_index_cache: *GlyphIndexCache,
texture_column_count: u16,
grapheme_utf8: []const u8,
texture: *win32.ID3D11Texture2D,
) u32 {
const codepoint = if (std.unicode.utf8ValidateSlice(grapheme_utf8))
std.unicode.wtf8Decode(grapheme_utf8) catch std.unicode.replacement_character
else
std.unicode.replacement_character;
switch (glyph_index_cache.reserve(
global.glyph_cache_arena.allocator(),
codepoint,
) catch |e| oom(e)) {
.newly_reserved => |reserved| {
// var render_success = false;
// defer if (!render_success) state.glyph_index_cache.remove(reserved.index);
const pos: XY(u16) = cellPosFromIndex(reserved.index, texture_column_count);
const coord = coordFromCellPos(font.cell_size, pos);
global.text_renderer.render(
global.d3d.device,
global.d3d.context,
global.d2d_factory,
font,
texture,
codepoint,
coord,
);
return reserved.index;
},
.already_reserved => |index| return index,
}
}
fn shaderColorFromVaxis(color: vaxis.Color) Rgba8 {
return switch (color) {
.default => .{ .r = 0, .g = 0, .b = 0, .a = 255 },
.index => |idx| return @bitCast(@as(u32, xterm.colors[idx]) << 8 | 0xff),
.rgb => |rgb| .{ .r = rgb[0], .g = rgb[1], .b = rgb[2], .a = 255 },
};
}
const D3d = struct {
device: *win32.ID3D11Device,
context: *win32.ID3D11DeviceContext,
context1: *win32.ID3D11DeviceContext1,
pub fn init(opt: struct { debug: bool }) struct { D3d, bool } {
pub fn init(opt: struct { try_debug: bool }) struct { D3d, bool } {
const levels = [_]win32.D3D_FEATURE_LEVEL{
.@"11_0",
};
@ -419,7 +355,7 @@ const D3d = struct {
};
for (configs) |config| {
const skip_config = config.debug and !opt.debug;
const skip_config = config.debug and !opt.try_debug;
if (skip_config) continue;
var device: *win32.ID3D11Device = undefined;
@ -549,8 +485,24 @@ fn initSwapChain(
const Shaders = struct {
vertex: *win32.ID3D11VertexShader,
pixel: *win32.ID3D11PixelShader,
pub fn init() Shaders {
const shader_source = @embedFile("terminal.hlsl");
pub fn init(maybe_file_path: ?[:0]const u8) Shaders {
var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator);
defer arena.deinit();
const shader_source: []const u8 = blk: {
if (maybe_file_path) |file_path| {
var file = std.fs.cwd().openFileZ(file_path, .{}) catch |e| std.debug.panic(
"failed to open --shader '{s}' with {s}",
.{ file_path, @errorName(e) },
);
defer file.close();
break :blk file.readToEndAlloc(arena.allocator(), std.math.maxInt(usize)) catch |e| std.debug.panic(
"read --shader '{s}' failed with {s}",
.{ file_path, @errorName(e) },
);
}
break :blk @embedFile("builtin.hlsl");
};
const file = maybe_file_path orelse "builtin.hlsl";
var vs_blob: *win32.ID3DBlob = undefined;
var error_blob: ?*win32.ID3DBlob = null;
@ -558,7 +510,7 @@ const Shaders = struct {
const hr = win32.D3DCompile(
shader_source.ptr,
shader_source.len,
null,
file,
null,
null,
"VertexMain",
@ -580,7 +532,7 @@ const Shaders = struct {
const hr = win32.D3DCompile(
shader_source.ptr,
shader_source.len,
null,
file,
null,
null,
"PixelMain",
@ -783,9 +735,6 @@ fn createCellBuffer(device: *win32.ID3D11Device, count: u32) *win32.ID3D11Buffer
}
fn getD3d11TextureMaxCellCount(cell_size: XY(u16)) XY(u16) {
// !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
// small size so we can just render the whole texture for development
//if (true) return .{ .x = 80, .y = 500 };
comptime std.debug.assert(win32.D3D11_REQ_TEXTURE2D_U_OR_V_DIMENSION == 16384);
return .{
.x = @intCast(@divTrunc(win32.D3D11_REQ_TEXTURE2D_U_OR_V_DIMENSION, cell_size.x)),

View file

@ -19,6 +19,7 @@ const input = @import("input");
const windowmsg = @import("windowmsg.zig");
const render = @import("d3d11.zig");
const xterm = @import("xterm.zig");
const FontFace = @import("FontFace.zig");
const XY = @import("xy.zig").XY;
@ -77,6 +78,9 @@ const global = struct {
var screen_arena: std.heap.ArenaAllocator = std.heap.ArenaAllocator.init(std.heap.page_allocator);
var screen: vaxis.Screen = .{};
var render_cells_arena = std.heap.ArenaAllocator.init(std.heap.page_allocator);
var render_cells: std.ArrayListUnmanaged(render.Cell) = .{};
};
const window_style_ex = win32.WINDOW_EX_STYLE{
.APPWINDOW = 1,
@ -88,7 +92,7 @@ const window_style = win32.WS_OVERLAPPEDWINDOW;
pub fn init() void {
std.debug.assert(!global.init_called);
global.init_called = true;
render.init();
render.init(.{});
}
const Icons = struct {
@ -993,7 +997,35 @@ fn WndProc(
const state = stateFromHwnd(hwnd);
const dpi = win32.dpiFromHwnd(hwnd);
const font = getFont(dpi, getFontSize(), getFontFace());
render.paint(hwnd, &state.render_state, font, &global.screen);
const client_size = getClientSize(u32, hwnd);
global.render_cells.resize(
global.render_cells_arena.allocator(),
global.screen.buf.len,
) catch |e| oom(e);
for (global.screen.buf, global.render_cells.items) |*screen_cell, *render_cell| {
const codepoint = if (std.unicode.utf8ValidateSlice(screen_cell.char.grapheme))
std.unicode.wtf8Decode(screen_cell.char.grapheme) catch std.unicode.replacement_character
else
std.unicode.replacement_character;
render_cell.* = .{
.glyph_index = state.render_state.generateGlyph(
font,
codepoint,
),
.background = renderColorFromVaxis(screen_cell.style.bg),
.foreground = renderColorFromVaxis(screen_cell.style.fg),
};
}
render.paint(
&state.render_state,
client_size,
font,
global.screen.height,
global.screen.width,
0,
global.render_cells.items,
);
return 0;
},
win32.WM_GETDPISCALEDSIZE => {
@ -1073,9 +1105,10 @@ fn WndProc(
return WM_APP_EXIT_RESULT;
},
WM_APP_SET_BACKGROUND => {
const rgb = RGB.from_u24(@intCast(0xffffff & wparam));
render.setBackground(
&stateFromHwnd(hwnd).render_state,
RGB.from_u24(@intCast(0xffffff & wparam)),
render.Color.initRgb(rgb.r, rgb.g, rgb.b),
);
win32.invalidateHwnd(hwnd);
return WM_APP_SET_BACKGROUND_RESULT;
@ -1195,10 +1228,14 @@ fn sendResize(
}) catch @panic("pid send failed");
}
pub const Rgb8 = struct { r: u8, g: u8, b: u8 };
fn toColorRef(rgb: Rgb8) u32 {
return (@as(u32, rgb.r) << 0) | (@as(u32, rgb.g) << 8) | (@as(u32, rgb.b) << 16);
fn renderColorFromVaxis(color: vaxis.Color) render.Color {
return switch (color) {
.default => render.Color.initRgb(0, 0, 0),
.index => |idx| return @bitCast(@as(u32, xterm.colors[idx]) << 8 | 0xff),
.rgb => |rgb| render.Color.initRgb(rgb[0], rgb[1], rgb[2]),
};
}
fn fatalWin32(what: []const u8, err: win32.WIN32_ERROR) noreturn {
std.debug.panic("{s} failed with {}", .{ what, err.fmt() });
}