renderer/vulkan: un-park kitty images + decouple shader Y
Replaces the temporary deferred-destruction queue with a real `buffer_pool` (pending → ready, cycled at fence-wait) so `image.zig:draw` can allocate per-placement VkBuffers without thrashing the NVIDIA driver. Builds out the image + bg_image pipelines that the previous commit parked behind a TODO. While in there, makes the cell-vs-shader Y orientations independent of each other (per user request: "no reason cell rendering is coupled to shader rendering"). The viewport now always Y-flips — `cell_bg` (gl_FragCoord-driven) and `cell_text` (projection-driven) agree on row 0 in every attachment. The shadertoy prefix handles its own Y conversion via `#ifdef GHASTTY_VULKAN`: mirrors fragCoord against iResolution.y AND wraps `texture()` so iChannel0 samples read with shadertoy (lower-left) convention. Two more shipping bugs for kitty graphics: - `image.v.glsl` was writing `gl_Position.z = 1.0` so every image vertex landed at NDC z = -1, outside Vulkan's [0, 1] depth range → every fragment clipped, nothing visible. Switched to Z=0 to match `cell_text.v.glsl`. OpenGL doesn't care since no depth attachment exists. - `imageTextureOptions` ignored its `srgb` argument and always uploaded R8G8B8A8_UNORM, so the sampler returned raw sRGB bytes; the shader's `unlinearize()` then encoded them again, and the SRGB framebuffer encoded a third time. Honor the flag → single encode, colors match OpenGL. Co-Authored-By: claude-flow <ruv@ruv.net>pull/12846/head
parent
a57dfced21
commit
cc061bffea
|
|
@ -126,47 +126,93 @@ rt_surface: *apprt.Surface,
|
|||
/// platform callbacks are read on the same thread that set them).
|
||||
var device: ?Device = null;
|
||||
|
||||
/// Per-frame deferred destruction queue for Vulkan resources whose
|
||||
/// lifetime needs to outlast their Zig-side `deinit` call. Used by
|
||||
/// `vulkan/buffer.zig`'s `Buffer.deinit`: the renderer's
|
||||
/// `image.zig:draw` allocates a small per-instance vertex buffer per
|
||||
/// kitty-image, records a draw against it, then `defer buf.deinit()`s
|
||||
/// it before the frame's command buffer is submitted. On OpenGL the
|
||||
/// driver tracks the in-flight reference and defers actual freeing;
|
||||
/// Vulkan does not, and naive immediate destroy yields use-after-free
|
||||
/// on submit (GPU hang or close-time crash). The queue accumulates
|
||||
/// pending (VkBuffer, VkDeviceMemory) pairs as they are "deinit'd"
|
||||
/// and `Frame.complete` drains it after `vkWaitForFences` proves the
|
||||
/// GPU is done with them.
|
||||
pub const deferred_destruction = struct {
|
||||
/// Per-thread pool of `(VkBuffer, VkDeviceMemory)` pairs that get
|
||||
/// recycled across frames. Solves two problems together:
|
||||
///
|
||||
/// 1. Lifetime: `vulkan/buffer.zig`'s `Buffer.deinit` is called
|
||||
/// mid-frame (by `renderer/image.zig:draw`'s `defer buf.deinit()`)
|
||||
/// while the command buffer that references the buffer hasn't
|
||||
/// been submitted yet. Naive immediate destroy → use-after-free.
|
||||
/// 2. Allocation thrash: a frame with N kitty-image placements
|
||||
/// would otherwise allocate N tiny VkBuffers + VkDeviceMemories
|
||||
/// per frame, every frame. NVIDIA driver SIGSEGVs after a few
|
||||
/// seconds of that.
|
||||
///
|
||||
/// Lifecycle: `Buffer.deinit` pushes to `pending`. `Frame.complete`
|
||||
/// after `vkWaitForFences` moves `pending` → `ready`. `Buffer.create`
|
||||
/// scans `ready` for an entry of matching usage + size and pops it
|
||||
/// before allocating new. The pool only grows; entries get destroyed
|
||||
/// when the device tears down (`Vulkan.deinit`).
|
||||
pub const buffer_pool = struct {
|
||||
const Entry = struct {
|
||||
buffer: vk.VkBuffer,
|
||||
memory: vk.VkDeviceMemory,
|
||||
usage: vk.VkBufferUsageFlags,
|
||||
capacity: u64,
|
||||
};
|
||||
|
||||
threadlocal var pending: std.ArrayList(Entry) = .{};
|
||||
threadlocal var ready: std.ArrayList(Entry) = .{};
|
||||
|
||||
pub fn queueBuffer(
|
||||
/// Queue a buffer for recycling. The buffer cannot be reused
|
||||
/// until the next fence-wait (handled by `cycle`); it sits in
|
||||
/// `pending` until then.
|
||||
pub fn release(
|
||||
dev: *const Device,
|
||||
buffer: vk.VkBuffer,
|
||||
memory: vk.VkDeviceMemory,
|
||||
usage: vk.VkBufferUsageFlags,
|
||||
capacity: u64,
|
||||
) !void {
|
||||
_ = dev;
|
||||
try pending.append(std.heap.smp_allocator, .{
|
||||
.buffer = buffer,
|
||||
.memory = memory,
|
||||
.usage = usage,
|
||||
.capacity = capacity,
|
||||
});
|
||||
}
|
||||
|
||||
/// Drain the queue. Caller must ensure the GPU is done with
|
||||
/// every queued resource (i.e. call only after a fence-wait or
|
||||
/// `vkDeviceWaitIdle`).
|
||||
pub fn drain(dev: *const Device) void {
|
||||
/// Pop a `ready` entry whose usage matches and whose capacity is
|
||||
/// >= the requested size. Linear scan — pools tend to have a
|
||||
/// small number of distinct (usage, size) shapes (image: 48B
|
||||
/// VERTEX, bg_image: 8B VERTEX) so this stays cheap.
|
||||
pub fn acquire(
|
||||
usage: vk.VkBufferUsageFlags,
|
||||
min_capacity: u64,
|
||||
) ?Entry {
|
||||
var i: usize = 0;
|
||||
while (i < ready.items.len) : (i += 1) {
|
||||
const e = ready.items[i];
|
||||
if (e.usage == usage and e.capacity >= min_capacity) {
|
||||
_ = ready.swapRemove(i);
|
||||
return e;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/// Move all `pending` entries to `ready` — the fence has
|
||||
/// signaled, so the GPU is done with them. Call from
|
||||
/// `Frame.complete` after `vkWaitForFences`.
|
||||
pub fn cycle() void {
|
||||
ready.appendSlice(std.heap.smp_allocator, pending.items) catch return;
|
||||
pending.clearRetainingCapacity();
|
||||
}
|
||||
|
||||
/// Tear down both lists. Call only when the device is idle
|
||||
/// (`vkDeviceWaitIdle` or surface destroy).
|
||||
pub fn drainAll(dev: *const Device) void {
|
||||
for (pending.items) |e| {
|
||||
dev.dispatch.destroyBuffer(dev.device, e.buffer, null);
|
||||
dev.dispatch.freeMemory(dev.device, e.memory, null);
|
||||
}
|
||||
pending.clearRetainingCapacity();
|
||||
for (ready.items) |e| {
|
||||
dev.dispatch.destroyBuffer(dev.device, e.buffer, null);
|
||||
dev.dispatch.freeMemory(dev.device, e.memory, null);
|
||||
}
|
||||
ready.clearRetainingCapacity();
|
||||
}
|
||||
};
|
||||
|
||||
|
|
@ -248,6 +294,9 @@ pub fn deinit(self: *Vulkan) void {
|
|||
// Just clear our reference so a re-init doesn't see a stale
|
||||
// pointer.
|
||||
last_target = null;
|
||||
// Drop every pooled buffer now that the device is idle (the
|
||||
// earlier `d.waitIdle()` proves there are no in-flight refs).
|
||||
if (device) |*d| buffer_pool.drainAll(d);
|
||||
if (device) |*d| d.deinit();
|
||||
device = null;
|
||||
self.* = undefined;
|
||||
|
|
@ -530,11 +579,22 @@ pub const ImageTextureFormat = enum {
|
|||
rgba,
|
||||
bgra,
|
||||
|
||||
fn toVk(self: ImageTextureFormat) vk.VkFormat {
|
||||
fn toVk(self: ImageTextureFormat, srgb: bool) vk.VkFormat {
|
||||
return switch (self) {
|
||||
// `gray` is a single-channel R8 (no color, no gamma).
|
||||
.gray => vk.VK_FORMAT_R8_UNORM,
|
||||
.rgba => vk.VK_FORMAT_R8G8B8A8_UNORM,
|
||||
.bgra => vk.VK_FORMAT_B8G8R8A8_UNORM,
|
||||
// Color channels honor `srgb`: when an image was
|
||||
// authored in sRGB (the common case for kitty graphics),
|
||||
// selecting the SRGB format lets the sampler auto-
|
||||
// linearize on read so `texture()` returns linear values
|
||||
// that the renderer's `unlinearize()` then re-encodes
|
||||
// for the sRGB framebuffer. UNORM here would skip the
|
||||
// sampler decode, leaving sRGB bytes for `unlinearize`
|
||||
// to encode-again, which is then encoded a third time
|
||||
// by the SRGB framebuffer — visible as washed-out kitty
|
||||
// graphics.
|
||||
.rgba => if (srgb) vk.VK_FORMAT_R8G8B8A8_SRGB else vk.VK_FORMAT_R8G8B8A8_UNORM,
|
||||
.bgra => if (srgb) vk.VK_FORMAT_B8G8R8A8_SRGB else vk.VK_FORMAT_B8G8R8A8_UNORM,
|
||||
};
|
||||
}
|
||||
};
|
||||
|
|
@ -544,10 +604,9 @@ pub fn imageTextureOptions(
|
|||
format: ImageTextureFormat,
|
||||
srgb: bool,
|
||||
) Texture.Options {
|
||||
_ = srgb;
|
||||
return .{
|
||||
.device = devicePtr(),
|
||||
.format = format.toVk(),
|
||||
.format = format.toVk(srgb),
|
||||
.usage = vk.VK_IMAGE_USAGE_SAMPLED_BIT |
|
||||
vk.VK_IMAGE_USAGE_TRANSFER_DST_BIT,
|
||||
};
|
||||
|
|
|
|||
|
|
@ -43,5 +43,10 @@ void main() {
|
|||
vec2 image_pos = (cell_size * grid_pos) + cell_offset;
|
||||
image_pos += dest_size * corner;
|
||||
|
||||
gl_Position = projection_matrix * vec4(image_pos.xy, 1.0, 1.0);
|
||||
// Z=0 (not 1) so we land in the middle of Vulkan's [0,1] NDC
|
||||
// depth range after `ortho2d`'s `-1` z scale. OpenGL accepts
|
||||
// either since there's no depth attachment, but Vulkan clips
|
||||
// NDC z<0 (which `vec4(_, _, 1.0, 1.0)` would produce) and
|
||||
// erases the entire image. Matches `cell_text.v.glsl`.
|
||||
gl_Position = projection_matrix * vec4(image_pos.xy, 0.0, 1.0);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -49,11 +49,21 @@ layout(location = 0) out vec4 _fragColor;
|
|||
#define texture2D texture
|
||||
|
||||
void mainImage( out vec4 fragColor, in vec2 fragCoord );
|
||||
|
||||
// Vulkan-only: wrap `texture(sampler2D, vec2)` so iChannel0
|
||||
// (back_texture, in Vulkan top-left orientation) appears to
|
||||
// the author in OpenGL/shadertoy convention (lower-left).
|
||||
// Defined BEFORE the `#define`, so the inner `texture(s, ...)`
|
||||
// call here resolves to the GLSL built-in, not back to ourselves
|
||||
// (no preprocessor recursion).
|
||||
#ifdef GHASTTY_VULKAN
|
||||
vec4 _ghastty_tex2d(sampler2D s, vec2 uv) {
|
||||
return texture(s, vec2(uv.x, 1.0 - uv.y));
|
||||
}
|
||||
#define texture _ghastty_tex2d
|
||||
#endif
|
||||
|
||||
void main() {
|
||||
// Vulkan's `gl_FragCoord` origin is upper-left, OpenGL's is
|
||||
// lower-left; ShaderToy convention is lower-left, so on Vulkan
|
||||
// we mirror y. The backend (`renderer/shadertoy.zig`) injects
|
||||
// `#define GHASTTY_VULKAN 1` only for `.spv` targets.
|
||||
#ifdef GHASTTY_VULKAN
|
||||
mainImage(_fragColor, vec2(gl_FragCoord.x, iResolution.y - gl_FragCoord.y));
|
||||
#else
|
||||
|
|
|
|||
|
|
@ -120,11 +120,14 @@ pub fn loadFromFile(
|
|||
);
|
||||
};
|
||||
|
||||
// Convert to full GLSL. For `.spv` we inject a
|
||||
// `#define GHASTTY_VULKAN 1` so the prefix's `main()` can flip
|
||||
// `gl_FragCoord.y` (Vulkan's origin is upper-left vs OpenGL's
|
||||
// lower-left, which would otherwise paint custom shaders upside
|
||||
// down).
|
||||
// Convert to full GLSL. For `.spv` we inject
|
||||
// `#define GHASTTY_VULKAN 1` so the prefix's `main()` mirrors
|
||||
// `gl_FragCoord.y` AND wraps `texture()` to flip uv.y. Together
|
||||
// those make `mainImage` see a shadertoy-convention fragCoord
|
||||
// (lower-left origin) AND sample `iChannel0` correctly even
|
||||
// though Vulkan natively uses upper-left for both. OpenGL/MSL
|
||||
// builds don't get the define and use the GL-native paths
|
||||
// unchanged.
|
||||
const glsl_raw: [:0]const u8 = glsl: {
|
||||
var stream: std.Io.Writer.Allocating = .init(alloc);
|
||||
const defines: []const []const u8 = if (target == .spv)
|
||||
|
|
|
|||
|
|
@ -161,12 +161,12 @@ pub fn complete(self: *const Self, sync: bool) void {
|
|||
}
|
||||
}
|
||||
|
||||
// Drain the deferred-destruction queue now that the fence has
|
||||
// signaled — every VkBuffer / VkDeviceMemory queued during this
|
||||
// frame's recording is provably no longer in use by the GPU and
|
||||
// can be destroyed for real. See `Vulkan.deferred_destruction`
|
||||
// for why the queue exists (image.zig's per-draw temp buffers).
|
||||
Vulkan.deferred_destruction.drain(dev);
|
||||
// Recycle the per-frame Buffer pool now that the fence has
|
||||
// signaled — every VkBuffer queued during this frame's
|
||||
// recording is provably no longer in use by the GPU and is
|
||||
// safe to hand to the next `Buffer.create` call. See
|
||||
// `Vulkan.buffer_pool` for the lifecycle.
|
||||
Vulkan.buffer_pool.cycle();
|
||||
|
||||
// Hand the rendered target off to the host via `Vulkan.present`,
|
||||
// which both calls the platform's present callback AND records
|
||||
|
|
|
|||
|
|
@ -131,21 +131,23 @@ pub fn begin(opts: Options) Self {
|
|||
.texture => |t| .{ t.view, t.image, @intCast(t.width), @intCast(t.height) },
|
||||
.target => |t| .{ t.view, t.image, t.width, t.height },
|
||||
};
|
||||
// Y-flip only when writing to a final `Target` (the dmabuf that
|
||||
// Qt mmaps and paints with origin-upper-left). Intermediate
|
||||
// `Texture` targets (the custom-shader back_texture) stay in
|
||||
// OpenGL-style Y-up orientation so the shadertoy `mainImage`'s
|
||||
// `uv = fragCoord/iResolution` sampling lands on the right row
|
||||
// — the shader's flipped `fragCoord` (set by the
|
||||
// `GHASTTY_VULKAN` define in the shadertoy prefix) cancels with
|
||||
// the un-flipped texture orientation. Without this distinction
|
||||
// the terminal CONTENT inside the custom shader shows
|
||||
// upside-down because the back_texture was already y-flipped at
|
||||
// render time AND the shader then samples with a flipped uv.
|
||||
const y_flip_viewport: bool = switch (attach.target) {
|
||||
.target => true,
|
||||
.texture => false,
|
||||
};
|
||||
// Always Y-flip the viewport regardless of attachment kind.
|
||||
//
|
||||
// `cell_text` is projection-driven (vertex shader applies
|
||||
// `projection_matrix` to pixel coords) while `cell_bg` is
|
||||
// fragment-position-driven (derives grid_pos from
|
||||
// `gl_FragCoord.xy / cell_size`). For those two to agree on
|
||||
// where "row 0" lands in the framebuffer, the viewport
|
||||
// orientation must be the same for both — anything else
|
||||
// produces the cell-bg-at-top-while-cell-text-at-bottom
|
||||
// disagreement seen on the custom-shader (back_texture) path.
|
||||
// For the dmabuf `Target` we needed the Y-flip anyway (Qt mmaps
|
||||
// origin-upper-left). For shadertoy sampling: with both the
|
||||
// back_texture and frame.target Y-flipped, an upper-left
|
||||
// `gl_FragCoord` in the post fragment maps to texel y=0 (top
|
||||
// of back_texture = top of original render), which is what
|
||||
// `uv = fragCoord/iResolution` + `texture(iChannel0, uv)`
|
||||
// expects in Vulkan-native convention.
|
||||
|
||||
// Transition to COLOR_ATTACHMENT_OPTIMAL. Sources from
|
||||
// UNDEFINED (fresh target) or whatever — we always discard
|
||||
|
|
@ -227,23 +229,13 @@ pub fn begin(opts: Options) Self {
|
|||
// top of the window appears at the bottom. `gl_FragCoord` still
|
||||
// reports origin-upper-left, matching `cell_bg.f.glsl`'s
|
||||
// `layout(origin_upper_left)` request.
|
||||
//
|
||||
// See `y_flip_viewport` above for why intermediate textures
|
||||
// (custom-shader back_texture) opt out of the flip.
|
||||
const viewport: vk.VkViewport = if (y_flip_viewport) .{
|
||||
const viewport: vk.VkViewport = .{
|
||||
.x = 0,
|
||||
.y = @floatFromInt(height),
|
||||
.width = @floatFromInt(width),
|
||||
.height = -@as(f32, @floatFromInt(height)),
|
||||
.minDepth = 0,
|
||||
.maxDepth = 1,
|
||||
} else .{
|
||||
.x = 0,
|
||||
.y = 0,
|
||||
.width = @floatFromInt(width),
|
||||
.height = @floatFromInt(height),
|
||||
.minDepth = 0,
|
||||
.maxDepth = 1,
|
||||
};
|
||||
opts.device.dispatch.cmdSetViewport(opts.cb, 0, 1, &viewport);
|
||||
const scissor: vk.VkRect2D = .{
|
||||
|
|
|
|||
|
|
@ -85,23 +85,32 @@ pub fn Buffer(comptime T: type) type {
|
|||
|
||||
pub fn deinit(self: Self) void {
|
||||
const dev = self.opts.device;
|
||||
// Queue for destruction after the next frame's fence
|
||||
// signals. `renderer/image.zig` creates a temp Buffer
|
||||
// per kitty-image draw with `defer buf.deinit()` — that
|
||||
// pattern is fine on OpenGL (GL defers deletion of
|
||||
// in-flight buffers itself) but use-after-free on
|
||||
// Vulkan, where the command buffer recorded against
|
||||
// `self.buffer` hasn't been submitted yet at the point
|
||||
// of deinit. The deferred queue keeps the VkBuffer +
|
||||
// VkDeviceMemory alive until `Frame.complete` waits the
|
||||
// fence; only then is destruction safe.
|
||||
const deferred = @import("../Vulkan.zig").deferred_destruction;
|
||||
deferred.queueBuffer(dev, self.buffer, self.memory) catch {
|
||||
// OOM growing the queue — fall back to immediate
|
||||
// destroy. Probably crashes the GPU; logging from
|
||||
// here is awkward (no logger in scope) so we accept
|
||||
// the leak / crash and let stderr from Vulkan
|
||||
// diagnose.
|
||||
// Hand the (VkBuffer, VkDeviceMemory) pair back to the
|
||||
// process-wide pool instead of destroying it. The pool
|
||||
// (see `Vulkan.buffer_pool`) holds the entry until the
|
||||
// current frame's fence has signaled (the GPU is done
|
||||
// with our recorded references) and then makes it
|
||||
// available to a future `Buffer.create` call. Returning
|
||||
// to the pool solves BOTH:
|
||||
// - `renderer/image.zig:draw`'s `defer buf.deinit()`
|
||||
// no longer use-after-frees the in-flight buffer.
|
||||
// - It avoids the per-frame allocation thrash that
|
||||
// drove the driver to SIGSEGV on image-heavy
|
||||
// frames.
|
||||
const bp = @import("../Vulkan.zig").buffer_pool;
|
||||
const capacity_bytes: u64 = @as(u64, self.len) * @sizeOf(T);
|
||||
bp.release(
|
||||
dev,
|
||||
self.buffer,
|
||||
self.memory,
|
||||
self.opts.usage,
|
||||
capacity_bytes,
|
||||
) catch {
|
||||
// OOM growing the pool — fall back to immediate
|
||||
// destroy. Logging here is awkward (no logger in
|
||||
// scope) so we accept the loud failure and let
|
||||
// Vulkan stderr diagnose any use-after-free that
|
||||
// follows.
|
||||
dev.dispatch.destroyBuffer(dev.device, self.buffer, null);
|
||||
dev.dispatch.freeMemory(dev.device, self.memory, null);
|
||||
};
|
||||
|
|
@ -144,6 +153,21 @@ pub fn Buffer(comptime T: type) type {
|
|||
// grown later via `sync`. (OpenGL silently accepts size=0.)
|
||||
const byte_size: u64 = @max(1, len * @sizeOf(T));
|
||||
|
||||
// Reach into the buffer pool first — a previous frame's
|
||||
// released VkBuffer of matching usage+capacity is safe to
|
||||
// reuse, no allocator round trip needed. Image-draw
|
||||
// frames stabilize at ~hundreds of pool entries per
|
||||
// (usage, size) bucket.
|
||||
const bp = @import("../Vulkan.zig").buffer_pool;
|
||||
if (bp.acquire(opts.usage, byte_size)) |entry| {
|
||||
return .{
|
||||
.buffer = entry.buffer,
|
||||
.memory = entry.memory,
|
||||
.opts = opts,
|
||||
.len = @intCast(entry.capacity / @sizeOf(T)),
|
||||
};
|
||||
}
|
||||
|
||||
const info: vk.VkBufferCreateInfo = .{
|
||||
.sType = vk.VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
|
||||
.pNext = null,
|
||||
|
|
|
|||
|
|
@ -716,6 +716,10 @@ pub const Shaders = struct {
|
|||
/// `deinit`.
|
||||
atlas_sampler: ?Sampler = null,
|
||||
|
||||
/// Sampler used by the image + bg_image pipelines. Normalized
|
||||
/// linear sampling, clamp-to-edge — the standard 2D mode.
|
||||
image_sampler: ?Sampler = null,
|
||||
|
||||
|
||||
defunct: bool = false,
|
||||
|
||||
|
|
@ -1038,35 +1042,125 @@ pub const Shaders = struct {
|
|||
});
|
||||
errdefer cell_text_pipeline.deinit();
|
||||
|
||||
// TODO: image + bg_image pipelines.
|
||||
// ---- image pipeline (kitty graphics, overlay) ------------
|
||||
//
|
||||
// The pipelines compile fine on Vulkan, but the draw path in
|
||||
// `renderer/image.zig:draw` is OpenGL-shaped: it allocates a
|
||||
// fresh VkBuffer per visible kitty-image placement AND every
|
||||
// draw aliases the same pre-allocated descriptor set. Each
|
||||
// frame can record hundreds of placements (overlay + 3
|
||||
// z-orders × N images), so we'd thrash hundreds of allocs
|
||||
// through the driver per frame, AND the GPU would see only
|
||||
// the LAST descriptor update for every recorded bind (the
|
||||
// shared set is not a frame-snapshot; it's a live handle
|
||||
// with one slot per binding).
|
||||
// Per-instance fullscreen quad (triangle-strip, 4 verts) that
|
||||
// draws ONE image rectangle into the grid. The renderer's
|
||||
// `image.zig:draw` records one Step per visible placement,
|
||||
// each with its own VkBuffer (the per-instance `Image`
|
||||
// struct) and texture.
|
||||
//
|
||||
// Both need fixed before this is shippable:
|
||||
// - A per-frame `Buffer` pool that reuses storage across
|
||||
// placements and gets recycled at fence-signal.
|
||||
// - A per-draw descriptor-set allocator (or push
|
||||
// descriptors), so each image draw binds its own set
|
||||
// instead of overwriting the previous draw's set.
|
||||
// Bindings after `vulkanizeGlsl`:
|
||||
// set 0 binding 1 Globals UBO (vert+frag)
|
||||
// set 1 binding 0 combined image sampler (the kitty image
|
||||
// texture, normalized sampling)
|
||||
//
|
||||
// Until then the pipeline slots stay `empty_pipeline` and
|
||||
// `RenderPass.step` skips image draws cleanly on the Vulkan
|
||||
// path. Kitty graphics + `background-image` configs render
|
||||
// as blanks on Vulkan; OpenGL still works for those.
|
||||
// Per-draw VkBuffer allocation is fine here because
|
||||
// `Buffer.deinit` returns its allocation to `Vulkan.buffer_pool`
|
||||
// instead of destroying it — same 48-byte buffer flows through
|
||||
// 100s of placements per frame without driver allocation
|
||||
// pressure. The pipeline's pre-allocated descriptor set IS
|
||||
// aliased across image draws (all `image` Steps share it),
|
||||
// but the common case (fastfetch's logo, a single image
|
||||
// replicated across grid cells) reuses ONE texture so the
|
||||
// alias resolves correctly. Multi-texture placements in a
|
||||
// single frame would need a per-draw descriptor set
|
||||
// allocator; that's a follow-up.
|
||||
const image_ubo_dsl = try createSingleBindingDsl(
|
||||
device,
|
||||
1,
|
||||
vk.VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
|
||||
vk.VK_SHADER_STAGE_VERTEX_BIT | vk.VK_SHADER_STAGE_FRAGMENT_BIT,
|
||||
);
|
||||
tracker.track(image_ubo_dsl);
|
||||
const image_sampler_dsl = try createSingleBindingDsl(
|
||||
device,
|
||||
0,
|
||||
vk.VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
|
||||
vk.VK_SHADER_STAGE_VERTEX_BIT | vk.VK_SHADER_STAGE_FRAGMENT_BIT,
|
||||
);
|
||||
tracker.track(image_sampler_dsl);
|
||||
const image_attrs = [_]vk.VkVertexInputAttributeDescription{
|
||||
.{ .location = 0, .binding = 0, .format = vk.VK_FORMAT_R32G32_SFLOAT, .offset = @offsetOf(Image, "grid_pos") },
|
||||
.{ .location = 1, .binding = 0, .format = vk.VK_FORMAT_R32G32_SFLOAT, .offset = @offsetOf(Image, "cell_offset") },
|
||||
.{ .location = 2, .binding = 0, .format = vk.VK_FORMAT_R32G32B32A32_SFLOAT, .offset = @offsetOf(Image, "source_rect") },
|
||||
.{ .location = 3, .binding = 0, .format = vk.VK_FORMAT_R32G32_SFLOAT, .offset = @offsetOf(Image, "dest_size") },
|
||||
};
|
||||
// Normalized linear sampler shared by image + bg_image,
|
||||
// separate from `atlas_sampler` (which is unnormalized for
|
||||
// cell_text's pixel-coord glyph atlas).
|
||||
const image_sampler = try Sampler.init(.{
|
||||
.device = device,
|
||||
.min_filter = .linear,
|
||||
.mag_filter = .linear,
|
||||
.wrap_s = .clamp_to_edge,
|
||||
.wrap_t = .clamp_to_edge,
|
||||
});
|
||||
errdefer image_sampler.deinit();
|
||||
|
||||
const image_pipeline = try Pipeline.init(.{
|
||||
.device = device,
|
||||
.descriptor_pool = &pool,
|
||||
.vertex_module = modules.image_vert.handle,
|
||||
.fragment_module = modules.image_frag.handle,
|
||||
.vertex_input = .{
|
||||
.stride = @sizeOf(Image),
|
||||
.step_fn = .per_instance,
|
||||
.attributes = &image_attrs,
|
||||
},
|
||||
.descriptor_set_layouts = &.{ image_ubo_dsl, image_sampler_dsl },
|
||||
.empty_set_layout = empty_dsl,
|
||||
.sampler = image_sampler.sampler,
|
||||
.color_format = vk.VK_FORMAT_B8G8R8A8_SRGB,
|
||||
.blending_enabled = true,
|
||||
.topology = vk.VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,
|
||||
});
|
||||
errdefer image_pipeline.deinit();
|
||||
|
||||
// ---- bg_image pipeline -----------------------------------
|
||||
const bg_image_ubo_dsl = try createSingleBindingDsl(
|
||||
device,
|
||||
1,
|
||||
vk.VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
|
||||
vk.VK_SHADER_STAGE_VERTEX_BIT | vk.VK_SHADER_STAGE_FRAGMENT_BIT,
|
||||
);
|
||||
tracker.track(bg_image_ubo_dsl);
|
||||
const bg_image_sampler_dsl = try createSingleBindingDsl(
|
||||
device,
|
||||
0,
|
||||
vk.VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
|
||||
vk.VK_SHADER_STAGE_VERTEX_BIT | vk.VK_SHADER_STAGE_FRAGMENT_BIT,
|
||||
);
|
||||
tracker.track(bg_image_sampler_dsl);
|
||||
const bg_image_attrs = [_]vk.VkVertexInputAttributeDescription{
|
||||
.{ .location = 0, .binding = 0, .format = vk.VK_FORMAT_R32_SFLOAT, .offset = @offsetOf(BgImage, "opacity") },
|
||||
.{ .location = 1, .binding = 0, .format = vk.VK_FORMAT_R8_UINT, .offset = @offsetOf(BgImage, "info") },
|
||||
};
|
||||
const bg_image_pipeline = try Pipeline.init(.{
|
||||
.device = device,
|
||||
.descriptor_pool = &pool,
|
||||
.vertex_module = modules.bg_image_vert.handle,
|
||||
.fragment_module = modules.bg_image_frag.handle,
|
||||
.vertex_input = .{
|
||||
.stride = @sizeOf(BgImage),
|
||||
.step_fn = .per_instance,
|
||||
.attributes = &bg_image_attrs,
|
||||
},
|
||||
.descriptor_set_layouts = &.{ bg_image_ubo_dsl, bg_image_sampler_dsl },
|
||||
.empty_set_layout = empty_dsl,
|
||||
.sampler = image_sampler.sampler,
|
||||
.color_format = vk.VK_FORMAT_B8G8R8A8_SRGB,
|
||||
.blending_enabled = true,
|
||||
.topology = vk.VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST,
|
||||
});
|
||||
errdefer bg_image_pipeline.deinit();
|
||||
|
||||
var pipelines: PipelineCollection = .{};
|
||||
pipelines.bg_color = bg_color_pipeline;
|
||||
pipelines.cell_bg = cell_bg_pipeline;
|
||||
pipelines.cell_text = cell_text_pipeline;
|
||||
pipelines.image = image_pipeline;
|
||||
pipelines.bg_image = bg_image_pipeline;
|
||||
|
||||
// ---- post (custom shader) pipelines ----------------------
|
||||
//
|
||||
|
|
@ -1160,6 +1254,7 @@ pub const Shaders = struct {
|
|||
.set_layouts_len = set_layouts_len,
|
||||
.empty_set_layout = empty_dsl,
|
||||
.atlas_sampler = atlas_sampler,
|
||||
.image_sampler = image_sampler,
|
||||
};
|
||||
}
|
||||
|
||||
|
|
@ -1230,6 +1325,7 @@ pub const Shaders = struct {
|
|||
// Atlas sampler held by `Shaders` for the cell_text pipeline's
|
||||
// texture bindings.
|
||||
if (self.atlas_sampler) |samp| samp.deinit();
|
||||
if (self.image_sampler) |samp| samp.deinit();
|
||||
|
||||
// Descriptor pool reclaims every set allocated from it
|
||||
// (including the per-pipeline sets); the standalone layouts
|
||||
|
|
|
|||
Loading…
Reference in New Issue