renderer/vulkan: per-draw Frame context with fence-paced submit
Adds `vulkan/Frame.zig` — the per-draw recording lifecycle the
renderer drives once per visible frame. Counterpart to
`opengl/Frame.zig`, but with explicit GPU sync that the GL path
got for free from the driver.
What lands:
- `begin(opts, device, target)` — begins recording into the
caller-provided command buffer.
- `complete(sync)` — ends recording, submits to the queue with
the caller-provided fence, and waits on the fence before
returning (always — see below for why `sync` is currently
informational).
Sync model: **fence-only, wait-on-complete**. We don't use
semaphores because the host owns presentation — we hand it a
dmabuf fd at `present` time, and the host's compositor handles
display sync. What libghostty needs to guarantee is "the GPU is
done writing to this dmabuf before the host imports it", which is
exactly what `vkWaitForFences` after submit accomplishes.
`sync == false` is accepted by the interface for parity with
`opengl/Frame.zig` but currently still waits — handing a dmabuf
to the host without the wait would race the GPU. The argument is
the extension point if/when we add multi-frame pipelining; today
swap_chain_count is 1 and every frame is sequential.
Ownership: the command buffer and fence are caller-owned (the
top-level `Vulkan.zig` will hold them as per-surface state) and
passed into `begin` via `Options`. Frame borrows them per draw.
Caller is responsible for `vkResetFences` / fresh CB state
between `complete` and the next `begin`.
`renderPass()` method is intentionally absent — landing it
requires `vulkan/RenderPass.zig`, which wraps
`vkCmdBeginRendering` / `vkCmdEndRendering` (Vulkan 1.3 dynamic
rendering, no `VkRenderPass` object) and the actual command-
recording layer. Follow-up commit. Callers trying to record into
a Frame today fail to compile, which is intentional — the
recording path isn't ready.
Dispatch additions: 5 new entries — `vkCreateFence`,
`vkDestroyFence`, `vkWaitForFences`, `vkResetFences`,
`vkResetCommandBuffer` (the last lets the renderer reuse one CB
across frames instead of alloc/free per frame).
Verification: temp-switch compile-check; only the expected
downstream `DerivedConfig` error from the stub substitution.
Reverted. OpenGL build still silent / clean.
Co-Authored-By: claude-flow <ruv@ruv.net>
pull/12846/head
parent
ebe48bd4cd
commit
e936f6d2d4
|
|
@ -69,6 +69,7 @@ pub const Texture = @import("vulkan/Texture.zig");
|
|||
pub const Target = @import("vulkan/Target.zig");
|
||||
pub const CommandPool = @import("vulkan/CommandPool.zig");
|
||||
pub const Pipeline = @import("vulkan/Pipeline.zig");
|
||||
pub const Frame = @import("vulkan/Frame.zig");
|
||||
pub const shaders = @import("vulkan/shaders.zig");
|
||||
|
||||
const bufferpkg = @import("vulkan/buffer.zig");
|
||||
|
|
|
|||
|
|
@ -147,6 +147,14 @@ pub const Dispatch = struct {
|
|||
// device-level resolution like any other device function.
|
||||
getMemoryFdKHR: std.meta.Child(vk.PFN_vkGetMemoryFdKHR),
|
||||
getImageSubresourceLayout: std.meta.Child(vk.PFN_vkGetImageSubresourceLayout),
|
||||
|
||||
// Per-frame sync (fence + command-buffer reset) — used by
|
||||
// `vulkan/Frame.zig`.
|
||||
createFence: std.meta.Child(vk.PFN_vkCreateFence),
|
||||
destroyFence: std.meta.Child(vk.PFN_vkDestroyFence),
|
||||
waitForFences: std.meta.Child(vk.PFN_vkWaitForFences),
|
||||
resetFences: std.meta.Child(vk.PFN_vkResetFences),
|
||||
resetCommandBuffer: std.meta.Child(vk.PFN_vkResetCommandBuffer),
|
||||
};
|
||||
|
||||
// ---- fields ---------------------------------------------------------
|
||||
|
|
@ -379,6 +387,16 @@ pub fn init(
|
|||
try dl.load(vk.PFN_vkGetMemoryFdKHR, "vkGetMemoryFdKHR");
|
||||
const get_image_subresource_layout =
|
||||
try dl.load(vk.PFN_vkGetImageSubresourceLayout, "vkGetImageSubresourceLayout");
|
||||
const create_fence =
|
||||
try dl.load(vk.PFN_vkCreateFence, "vkCreateFence");
|
||||
const destroy_fence =
|
||||
try dl.load(vk.PFN_vkDestroyFence, "vkDestroyFence");
|
||||
const wait_for_fences =
|
||||
try dl.load(vk.PFN_vkWaitForFences, "vkWaitForFences");
|
||||
const reset_fences =
|
||||
try dl.load(vk.PFN_vkResetFences, "vkResetFences");
|
||||
const reset_command_buffer =
|
||||
try dl.load(vk.PFN_vkResetCommandBuffer, "vkResetCommandBuffer");
|
||||
|
||||
return .{
|
||||
.platform = platform,
|
||||
|
|
@ -431,6 +449,11 @@ pub fn init(
|
|||
.destroyPipeline = destroy_pipeline,
|
||||
.getMemoryFdKHR = get_memory_fd_khr,
|
||||
.getImageSubresourceLayout = get_image_subresource_layout,
|
||||
.createFence = create_fence,
|
||||
.destroyFence = destroy_fence,
|
||||
.waitForFences = wait_for_fences,
|
||||
.resetFences = reset_fences,
|
||||
.resetCommandBuffer = reset_command_buffer,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,152 @@
|
|||
//! Per-draw recording context. Lifecycle: `begin` → caller records
|
||||
//! commands (via the eventual `renderPass()` accessor) → `complete`.
|
||||
//!
|
||||
//! Unlike `opengl/Frame.zig` (which is a zero-state wrapper around
|
||||
//! the implicit GL context), Vulkan's Frame drives the explicit
|
||||
//! sync model: a fence is signaled when the GPU finishes the
|
||||
//! frame's submit, and `complete` waits on it before handing the
|
||||
//! dmabuf fd to the host. That's required for correctness — the
|
||||
//! host shouldn't sample memory the GPU is still writing — and
|
||||
//! acceptable for perf because terminal frames cap at ~60Hz.
|
||||
//!
|
||||
//! Ownership: the command buffer and fence are owned by the
|
||||
//! top-level renderer (`Vulkan.zig`, not yet wired) and passed into
|
||||
//! `begin` via `Options`. Frame just borrows them. The top-level
|
||||
//! is responsible for creating/destroying them and for resetting
|
||||
//! the fence to unsignaled state before `begin` (this layer would
|
||||
//! conflate ownership otherwise).
|
||||
//!
|
||||
//! Why not semaphores? With dmabuf export to the host (rather than
|
||||
//! a `VkSwapchain` we own), we have no acquire/present semaphore
|
||||
//! pair to sync against. Fence-only is the right model when
|
||||
//! libghostty hands the host a "GPU is done writing to this fd"
|
||||
//! guarantee at present time. The host's own compositor handles
|
||||
//! display sync from there.
|
||||
//!
|
||||
//! `renderPass()` will land alongside `vulkan/RenderPass.zig` in a
|
||||
//! follow-up commit. For now it's not declared — calling code that
|
||||
//! tries to record into a frame will fail to compile, which is
|
||||
//! intentional: the recording path isn't ready.
|
||||
//!
|
||||
//! Counterpart: `src/renderer/opengl/Frame.zig`.
|
||||
|
||||
const Self = @This();
|
||||
|
||||
const std = @import("std");
|
||||
const vk = @import("vulkan").c;
|
||||
|
||||
const Device = @import("Device.zig");
|
||||
const Target = @import("Target.zig");
|
||||
|
||||
const log = std.log.scoped(.vulkan);
|
||||
|
||||
pub const Options = struct {
|
||||
/// Command buffer this frame's commands record into. Caller
|
||||
/// resets it to a fresh state before `begin` is called.
|
||||
cb: vk.VkCommandBuffer,
|
||||
|
||||
/// Fence that gets signaled when the submit completes. Caller
|
||||
/// resets it to unsignaled before `begin` is called.
|
||||
fence: vk.VkFence,
|
||||
};
|
||||
|
||||
pub const Error = error{
|
||||
/// `vkBeginCommandBuffer` / `vkEndCommandBuffer` /
|
||||
/// `vkQueueSubmit` / `vkWaitForFences` returned a non-success
|
||||
/// status.
|
||||
VulkanFailed,
|
||||
};
|
||||
|
||||
device: *const Device,
|
||||
target: *Target,
|
||||
cb: vk.VkCommandBuffer,
|
||||
fence: vk.VkFence,
|
||||
|
||||
/// Begin recording a frame. The command buffer is reset and started
|
||||
/// with `ONE_TIME_SUBMIT` since we always submit before the next
|
||||
/// `begin` overwrites it.
|
||||
pub fn begin(
|
||||
opts: Options,
|
||||
device: *const Device,
|
||||
target: *Target,
|
||||
) Error!Self {
|
||||
const begin_info: vk.VkCommandBufferBeginInfo = .{
|
||||
.sType = vk.VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
|
||||
.pNext = null,
|
||||
.flags = vk.VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
|
||||
.pInheritanceInfo = null,
|
||||
};
|
||||
const r = device.dispatch.beginCommandBuffer(opts.cb, &begin_info);
|
||||
if (r != vk.VK_SUCCESS) {
|
||||
log.err("vkBeginCommandBuffer (frame) failed: result={}", .{r});
|
||||
return error.VulkanFailed;
|
||||
}
|
||||
|
||||
return .{
|
||||
.device = device,
|
||||
.target = target,
|
||||
.cb = opts.cb,
|
||||
.fence = opts.fence,
|
||||
};
|
||||
}
|
||||
|
||||
/// End recording, submit to the queue with `self.fence`, and (if
|
||||
/// `sync` is true, which it always is for our dmabuf-export model)
|
||||
/// wait on the fence so the GPU is guaranteed to be done before
|
||||
/// the host imports the target's dmabuf.
|
||||
///
|
||||
/// `sync == false` is accepted by the interface for parity with
|
||||
/// `opengl/Frame.zig`, but currently still does the wait — without
|
||||
/// it, handing the dmabuf fd to the host would race the GPU. The
|
||||
/// argument may eventually drive multi-frame pipelining once a
|
||||
/// proper queue of frames is in flight.
|
||||
pub fn complete(self: *const Self, sync: bool) void {
|
||||
_ = sync;
|
||||
const dev = self.device;
|
||||
|
||||
{
|
||||
const r = dev.dispatch.endCommandBuffer(self.cb);
|
||||
if (r != vk.VK_SUCCESS) {
|
||||
log.err("vkEndCommandBuffer (frame) failed: result={}", .{r});
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
const submit_info: vk.VkSubmitInfo = .{
|
||||
.sType = vk.VK_STRUCTURE_TYPE_SUBMIT_INFO,
|
||||
.pNext = null,
|
||||
.waitSemaphoreCount = 0,
|
||||
.pWaitSemaphores = null,
|
||||
.pWaitDstStageMask = null,
|
||||
.commandBufferCount = 1,
|
||||
.pCommandBuffers = &self.cb,
|
||||
.signalSemaphoreCount = 0,
|
||||
.pSignalSemaphores = null,
|
||||
};
|
||||
{
|
||||
const r = dev.dispatch.queueSubmit(dev.queue, 1, &submit_info, self.fence);
|
||||
if (r != vk.VK_SUCCESS) {
|
||||
log.err("vkQueueSubmit (frame) failed: result={}", .{r});
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// Wait for the GPU to finish writing the target before letting
|
||||
// the host import the dmabuf. UINT64_MAX = "wait indefinitely".
|
||||
{
|
||||
const r = dev.dispatch.waitForFences(
|
||||
dev.device,
|
||||
1,
|
||||
&self.fence,
|
||||
vk.VK_TRUE,
|
||||
std.math.maxInt(u64),
|
||||
);
|
||||
if (r != vk.VK_SUCCESS) {
|
||||
log.err("vkWaitForFences (frame) failed: result={}", .{r});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
test {
|
||||
std.testing.refAllDecls(@This());
|
||||
}
|
||||
Loading…
Reference in New Issue