fix(audit): pass 2 — regression cleanup + remaining HIGH/MEDIUM
Pass 2 of /audit-code on PR #16. Both variants build clean. Critical (Pass 1 regressions): - buffer.zig:grow released the old VkBuffer BEFORE allocating the new one; if create() then failed the caller's deinit re-released the freed handles via the pool, crashing the driver. Now creates first and routes the old buffer through release after success. - Vulkan.zig:beginFrame partial failure (resetDescriptorPool fails after resetFences succeeds) left frame_fence unsignaled with no pending submit; the next Vulkan.deinit's waitForFences hung indefinitely. Fence reset is now LAST so an earlier failure leaves it signaled, plus an errdefer empty-submit re-signals it if Frame.begin itself fails post-reset. High: - RenderPass.step on pipeline reuse used to fall back to the pipeline's static descriptor set when step_pool.allocate failed — re-introducing the exact corruption the step_pool was added to prevent. Now drops the offending draw with a loud log instead. Same for missing step_pool. Medium: - vulkanizeGlsl `texture()` rewrite walks paren depth by raw bytes; added copySkippable inside the args so comments/strings containing `(` or `)` don't desync the depth tracker. - buffer_pool.cycle's OOM fallback held the pool mutex across vkDeviceWaitIdle, blocking every other renderer thread's release/acquire. Pending list is now moved into a local OUTSIDE the lock before the device wait. - SubsurfacePresenter binds zwp_linux_dmabuf_v1 at hardcoded v3; now skips the bind on a v1/v2 compositor (and clamps via std::min) to avoid a fatal protocol error that would tear down the wl_display. - presentVulkanDmabuf had no upper bound on `stride`; pathological stride near UINT32_MAX × height=65536 reached mmap with a ~280 TB request. Added stride <= MAX_DIM*16 cap. - drainVulkan early-return on m_hidden left m_pendingDmabuf.fd>=0; next post-Show present spuriously bumped m_droppedFrames every Hide/Show cycle. Now clears the slot under the lock. - shadertoy prefix-injection no-newline fallback used to write `#define` BEFORE `#version`, producing GLSL that violates the "#version must be first" rule. Now scans past the `#version` directive (number + optional profile) and synthesizes a newline before injecting defines. - shadertoy SPV target now validates non-empty + SPIR-V magic word (0x07230203) before alignedAlloc/memcpy; a zero-length file used to silently survive and crash vkCreateShaderModule. - Module.initFromSpirv mirrors the same checks for any future caller that bypasses glslang/shadertoy. Low: - Frame.complete's three-branch buffer_pool.cycle invocation collapsed to two branches (with/without deviceWaitIdle). - Buffer.deinit OOM fallback now logs via `log.warn` (the comment claiming "no logger in scope" was wrong; `log` IS imported). - GhosttySurface.h comment block referencing the removed 2 ms safety-net poll updated to point at m_droppedFrames instead. - m_droppedFrames now also counts legacy QImage path overwrites, matching the documented scope of the counter. - vulkanizeGlsl unnormalized_sampler_names list documented as a reserved-name constraint for user shaders (atlas_grayscale / atlas_color must not be used as user sampler names). - isHorizSpace + processIncludes whitespace check now accept \v and \f (legal GLSL preprocessor whitespace). - DescriptorPool: added error.InvalidPoolConfig for caller-side argument errors, distinct from driver-side error.VulkanFailed. - SubsurfacePresenter discoverGlobals now checks wl_display_roundtrip_queue's return value and logs disconnect- during-startup failures. Nit: - First-dmabuf log uses `unsigned long long` + `%llx` for drm_modifier (was `unsigned long` + `%lx` which truncates upper 32 bits on ILP32). Co-Authored-By: claude-flow <ruv@ruv.net>pull/12846/head
parent
0f80583825
commit
7b13d526cb
|
|
@ -1638,9 +1638,9 @@ void GhosttySurface::presentVulkanDmabuf(
|
|||
expected, true, std::memory_order_relaxed)) {
|
||||
std::fprintf(stderr,
|
||||
"[ghastty] first dmabuf for surface=%p: fd=%d %ux%u "
|
||||
"stride=%u fourcc=0x%08x mod=0x%lx image_backed=%d path=%s\n",
|
||||
"stride=%u fourcc=0x%08x mod=0x%llx image_backed=%d path=%s\n",
|
||||
static_cast<void *>(this), dmabuf_fd, width, height, stride,
|
||||
drm_format, static_cast<unsigned long>(drm_modifier),
|
||||
drm_format, static_cast<unsigned long long>(drm_modifier),
|
||||
image_backed ? 1 : 0, useSubsurface ? "subsurface" : "qimage");
|
||||
}
|
||||
|
||||
|
|
@ -1657,9 +1657,17 @@ void GhosttySurface::presentVulkanDmabuf(
|
|||
// terminal — and check that stride*height doesn't exceed
|
||||
// SIZE_MAX before promoting.
|
||||
constexpr quint32 MAX_DIM = 65536;
|
||||
// Cap stride at MAX_DIM × 4 (BGRA8) × a small slack factor for
|
||||
// tiled formats: ~4× the width-derived minimum is enough for any
|
||||
// legitimate vendor tiling, and it keeps `stride * height`
|
||||
// below ~64 GiB even at MAX_DIM. The previous lower-only bound
|
||||
// let a pathological renderer with stride near UINT32_MAX and
|
||||
// height=MAX_DIM reach mmap with a ~280 TB request.
|
||||
constexpr quint32 MAX_STRIDE = MAX_DIM * 16;
|
||||
if (dmabuf_fd < 0 || width == 0 || height == 0) return;
|
||||
if (width > MAX_DIM || height > MAX_DIM) return;
|
||||
if (stride < static_cast<quint64>(width) * 4) return;
|
||||
if (stride > MAX_STRIDE) return;
|
||||
// stride*height as 64-bit and check the size_t fit explicitly.
|
||||
const quint64 bytes64 = static_cast<quint64>(stride) * height;
|
||||
if (bytes64 > std::numeric_limits<std::size_t>::max()) return;
|
||||
|
|
@ -1737,10 +1745,23 @@ void GhosttySurface::presentVulkanDmabuf(
|
|||
|
||||
const double dpr_now = m_fbDpr.load(std::memory_order_acquire);
|
||||
if (dpr_now > 0) owned.setDevicePixelRatio(dpr_now);
|
||||
bool overwrote_legacy = false;
|
||||
{
|
||||
QMutexLocker lock(&m_pendingMutex);
|
||||
overwrote_legacy = !m_pending.isNull();
|
||||
m_pending = std::move(owned);
|
||||
}
|
||||
if (overwrote_legacy) {
|
||||
const auto count = m_droppedFrames.fetch_add(
|
||||
1, std::memory_order_relaxed) + 1;
|
||||
if (count <= 3 || count % 60 == 0) {
|
||||
std::fprintf(stderr,
|
||||
"[ghastty] surface=%p dropped frame "
|
||||
"(legacy QImage path, total=%llu)\n",
|
||||
static_cast<void *>(this),
|
||||
static_cast<unsigned long long>(count));
|
||||
}
|
||||
}
|
||||
QMetaObject::invokeMethod(this, "drainVulkan", Qt::QueuedConnection);
|
||||
}
|
||||
|
||||
|
|
@ -1749,7 +1770,17 @@ void GhosttySurface::drainVulkan() {
|
|||
// under the mutex, then dispatch it to the presenter outside the
|
||||
// lock so a renderer-thread `presentVulkanDmabuf` parking the
|
||||
// next frame doesn't block on wl_display_flush.
|
||||
if (m_hidden.load(std::memory_order_acquire)) return;
|
||||
if (m_hidden.load(std::memory_order_acquire)) {
|
||||
// Clear the parked descriptor on hide so the next post-Show
|
||||
// present doesn't see a "stale frame still pending" state and
|
||||
// spuriously bump m_droppedFrames every Hide/Show cycle. The
|
||||
// fd itself is libghostty-owned (per ABI it's only valid for
|
||||
// the duration of the original presentVulkanDmabuf call), so
|
||||
// there's nothing to release here beyond marking the slot empty.
|
||||
QMutexLocker lock(&m_pendingMutex);
|
||||
m_pendingDmabuf.fd = -1;
|
||||
return;
|
||||
}
|
||||
if (m_useSubsurface.load(std::memory_order_acquire) &&
|
||||
m_subsurfacePresenter) {
|
||||
PendingDmabuf frame;
|
||||
|
|
|
|||
|
|
@ -170,8 +170,9 @@ public:
|
|||
// mmap+memcpy'd QImage) and wakes the GUI thread via
|
||||
// `QMetaObject::invokeMethod(this, drainVulkan, Qt::QueuedConnection)`.
|
||||
// The GUI thread either commits the dmabuf to the wl_subsurface
|
||||
// (zero-copy) or paints the QImage (fallback). A 2 ms safety-net
|
||||
// poll catches anything `invokeMethod` ever fails to deliver.
|
||||
// (zero-copy) or paints the QImage (fallback). The dropped-frame
|
||||
// counter `m_droppedFrames` makes any genuine queue-loss visible
|
||||
// (zero in the steady state).
|
||||
Q_INVOKABLE void presentVulkanDmabuf(
|
||||
int dmabuf_fd,
|
||||
quint32 drm_format,
|
||||
|
|
|
|||
|
|
@ -86,16 +86,29 @@ void registryGlobal(void *data, wl_registry *registry, uint32_t name,
|
|||
g->subcompositor = static_cast<wl_subcompositor *>(
|
||||
wl_registry_bind(registry, name, &wl_subcompositor_interface, 1));
|
||||
} else if (std::strcmp(interface, zwp_linux_dmabuf_v1_interface.name) == 0) {
|
||||
// v3 has `create_immed`, which we want (synchronous wl_buffer
|
||||
// creation — the v2 async `create` + `created`/`failed` event
|
||||
// dance would add a layer of callback machinery for no real win
|
||||
// in our renderer's strict-fd-validity scenario). v4 adds the
|
||||
// dynamic format/modifier feedback dance; we don't need it yet.
|
||||
g->dmabuf = static_cast<zwp_linux_dmabuf_v1 *>(wl_registry_bind(
|
||||
registry, name, &zwp_linux_dmabuf_v1_interface, 3));
|
||||
// Add the listener immediately so the modifier events queued by
|
||||
// the bind get delivered when the dispatch loop continues.
|
||||
zwp_linux_dmabuf_v1_add_listener(g->dmabuf, &kDmabufListener, g);
|
||||
// We want at least v3 for `create_immed` (synchronous wl_buffer
|
||||
// creation — v1/v2 have only the async `create` + `created`/
|
||||
// `failed` dance). A compositor that only advertises v1/v2
|
||||
// can't satisfy our protocol assumptions; binding at v3 against
|
||||
// such a compositor would protocol-error and tear down the
|
||||
// entire wl_display. Skip the bind in that case so the
|
||||
// legacy QImage fallback engages cleanly.
|
||||
if (version < 3) {
|
||||
std::fprintf(stderr,
|
||||
"[ghastty] wayland: linux-dmabuf-v1 advertised at "
|
||||
"version %u; need >= 3 for create_immed, falling back "
|
||||
"to QImage path\n",
|
||||
version);
|
||||
} else {
|
||||
// Cap at v3 — v4 adds the dynamic format/modifier feedback
|
||||
// dance which we don't consume.
|
||||
const uint32_t v = std::min<uint32_t>(version, 3u);
|
||||
g->dmabuf = static_cast<zwp_linux_dmabuf_v1 *>(wl_registry_bind(
|
||||
registry, name, &zwp_linux_dmabuf_v1_interface, v));
|
||||
// Add the listener immediately so the modifier events queued
|
||||
// by the bind get delivered when the dispatch loop continues.
|
||||
zwp_linux_dmabuf_v1_add_listener(g->dmabuf, &kDmabufListener, g);
|
||||
}
|
||||
} else if (std::strcmp(interface, wp_viewporter_interface.name) == 0) {
|
||||
g->viewporter = static_cast<wp_viewporter *>(
|
||||
wl_registry_bind(registry, name, &wp_viewporter_interface, 1));
|
||||
|
|
@ -125,14 +138,24 @@ PresenterGlobals *discoverGlobals(wl_display *display) {
|
|||
// Roundtrip 1: bind compositor/subcompositor/dmabuf. Inside the
|
||||
// registry callback we attach the dmabuf listener immediately, so
|
||||
// any format/modifier events that arrive in the same dispatch
|
||||
// pass fire on it.
|
||||
wl_display_roundtrip_queue(display, queue);
|
||||
// pass fire on it. A negative return means the wl_display
|
||||
// disconnected mid-startup; subsequent tryCreate calls fall
|
||||
// through to the QImage path (g->compositor etc. stay null).
|
||||
if (wl_display_roundtrip_queue(display, queue) < 0) {
|
||||
std::fprintf(stderr,
|
||||
"[ghastty] wayland: discoverGlobals roundtrip 1 failed; "
|
||||
"subsurface present path disabled\n");
|
||||
}
|
||||
wl_registry_destroy(registry);
|
||||
// Roundtrip 2: belt-and-suspenders for any compositor that defers
|
||||
// the modifier events past the bind reply (most don't, but some
|
||||
// batch them). After this returns the modifier table is fully
|
||||
// populated and frozen for the process lifetime.
|
||||
if (globals.dmabuf) wl_display_roundtrip_queue(display, queue);
|
||||
if (globals.dmabuf && wl_display_roundtrip_queue(display, queue) < 0) {
|
||||
std::fprintf(stderr,
|
||||
"[ghastty] wayland: discoverGlobals roundtrip 2 failed; "
|
||||
"modifier table may be incomplete\n");
|
||||
}
|
||||
|
||||
std::size_t total_mods = 0;
|
||||
for (const auto &kv : globals.modifiers) total_mods += kv.second.size();
|
||||
|
|
|
|||
|
|
@ -206,25 +206,39 @@ pub const buffer_pool = struct {
|
|||
/// `Frame.complete` after `vkWaitForFences`.
|
||||
///
|
||||
/// `dev` is needed only on the OOM fallback path: if `ready`
|
||||
/// can't grow to absorb `pending`, we wait the device idle and
|
||||
/// then destroy the pending entries directly so the next frame
|
||||
/// doesn't double up on a pending list that can never drain.
|
||||
/// can't grow to absorb `pending`, we wait the device idle
|
||||
/// (OUTSIDE the mutex — see below) and then destroy the pending
|
||||
/// entries directly so the next frame doesn't double up on a
|
||||
/// pending list that can never drain.
|
||||
pub fn cycle(dev: *const Device) void {
|
||||
mutex.lock();
|
||||
defer mutex.unlock();
|
||||
ready.appendSlice(std.heap.smp_allocator, pending.items) catch {
|
||||
// Couldn't grow `ready` — destroy the pending GPU
|
||||
// resources directly. Other renderer threads may still
|
||||
// be submitting against the shared queue, so wait the
|
||||
// device idle to make sure no command buffer in flight
|
||||
// anywhere references these handles before we destroy.
|
||||
_ = dev.dispatch.deviceWaitIdle(dev.device);
|
||||
for (pending.items) |e| {
|
||||
dev.dispatch.destroyBuffer(dev.device, e.buffer, null);
|
||||
dev.dispatch.freeMemory(dev.device, e.memory, null);
|
||||
// Try the fast path first — append `pending` to `ready`
|
||||
// under the lock, then return. On OOM we have to destroy
|
||||
// the pending entries, but `vkDeviceWaitIdle` is slow and
|
||||
// holding the pool mutex across it would block every other
|
||||
// renderer thread's release/acquire/cycle. Move the
|
||||
// pending list into a local outside the lock, then drain.
|
||||
var oom_pending: std.ArrayList(Entry) = .{};
|
||||
defer oom_pending.deinit(std.heap.smp_allocator);
|
||||
{
|
||||
mutex.lock();
|
||||
defer mutex.unlock();
|
||||
if (ready.appendSlice(std.heap.smp_allocator, pending.items)) {
|
||||
pending.clearRetainingCapacity();
|
||||
return;
|
||||
} else |_| {
|
||||
// OOM. Move `pending` into our local so we can
|
||||
// drain it without holding the mutex.
|
||||
oom_pending = pending;
|
||||
pending = .{};
|
||||
}
|
||||
};
|
||||
pending.clearRetainingCapacity();
|
||||
}
|
||||
// Mutex released. Other threads can release/acquire/cycle
|
||||
// while we wait the device idle and destroy our slice.
|
||||
_ = dev.dispatch.deviceWaitIdle(dev.device);
|
||||
for (oom_pending.items) |e| {
|
||||
dev.dispatch.destroyBuffer(dev.device, e.buffer, null);
|
||||
dev.dispatch.freeMemory(dev.device, e.memory, null);
|
||||
}
|
||||
}
|
||||
|
||||
/// Tear down both lists. Call only when the device is idle
|
||||
|
|
@ -583,19 +597,48 @@ pub fn beginFrame(
|
|||
}
|
||||
|
||||
_ = self;
|
||||
// Reset the command buffer + fence + step descriptor pool so
|
||||
// this frame starts clean. `vkResetDescriptorPool` returns every
|
||||
// set the previous frame allocated to the pool — much cheaper
|
||||
// than freeing them individually, and removes any chance of
|
||||
// last-frame's set being bound by accident.
|
||||
// Reset this frame's per-frame state. ORDER MATTERS: the fence
|
||||
// reset goes LAST. If an earlier reset fails and we return an
|
||||
// error, `Frame.begin` never runs, no submit ever happens, and
|
||||
// the fence stays in whatever state it was in (initially
|
||||
// signaled, or signaled by the previous frame's submit). Any
|
||||
// subsequent `Vulkan.deinit` then waits on a SIGNALED fence
|
||||
// and returns immediately — pre-fix, an unsignaled fence with
|
||||
// no pending submit hung the deinit forever on
|
||||
// waitForFences(UINT64_MAX).
|
||||
if (dev.dispatch.resetCommandBuffer(frame_cb, 0) != vk.VK_SUCCESS)
|
||||
return error.VulkanFailed;
|
||||
if (dev.dispatch.resetFences(dev.device, 1, &frame_fence) != vk.VK_SUCCESS)
|
||||
return error.VulkanFailed;
|
||||
if (step_pool) |*p| {
|
||||
if (dev.dispatch.resetDescriptorPool(dev.device, p.pool, 0) != vk.VK_SUCCESS)
|
||||
return error.VulkanFailed;
|
||||
}
|
||||
// `vkResetDescriptorPool` returns every set the previous frame
|
||||
// allocated to the pool — much cheaper than freeing them
|
||||
// individually, and removes any chance of last-frame's set
|
||||
// being bound by accident.
|
||||
if (dev.dispatch.resetFences(dev.device, 1, &frame_fence) != vk.VK_SUCCESS)
|
||||
return error.VulkanFailed;
|
||||
// From here on the fence is UNSIGNALED. If `Frame.begin`
|
||||
// (vkBeginCommandBuffer) fails before any submit, we have to
|
||||
// re-signal the fence ourselves — otherwise the next
|
||||
// `Vulkan.deinit`'s waitForFences hangs indefinitely.
|
||||
errdefer {
|
||||
// Empty submit with this fence as the signal target is the
|
||||
// simplest portable way to push it back to signaled
|
||||
// without recording any commands.
|
||||
const empty: vk.VkSubmitInfo = .{
|
||||
.sType = vk.VK_STRUCTURE_TYPE_SUBMIT_INFO,
|
||||
.pNext = null,
|
||||
.waitSemaphoreCount = 0,
|
||||
.pWaitSemaphores = null,
|
||||
.pWaitDstStageMask = null,
|
||||
.commandBufferCount = 0,
|
||||
.pCommandBuffers = null,
|
||||
.signalSemaphoreCount = 0,
|
||||
.pSignalSemaphores = null,
|
||||
};
|
||||
_ = dev.queueSubmit(1, &empty, frame_fence);
|
||||
}
|
||||
|
||||
return try Frame.begin(
|
||||
.{
|
||||
|
|
|
|||
|
|
@ -184,6 +184,30 @@ pub fn loadFromFile(
|
|||
.glsl => try glslFromSpv(alloc_gpa, spirv),
|
||||
.msl => try mslFromSpv(alloc_gpa, spirv),
|
||||
.spv => spv: {
|
||||
// Validate before handing back: glslang has succeeded at
|
||||
// this point but a zero-length SPIR-V output would
|
||||
// crash `vkCreateShaderModule` (codeSize == 0). The
|
||||
// SPIR-V magic word check is defensive against future
|
||||
// backends that bypass glslang.
|
||||
if (spirv.len < 4) {
|
||||
std.log.warn(
|
||||
"shadertoy: empty SPIR-V output (size={})",
|
||||
.{spirv.len},
|
||||
);
|
||||
return error.InvalidShader;
|
||||
}
|
||||
// First 4 bytes are the SPIR-V magic word 0x07230203
|
||||
// (little-endian). Reject anything else loudly instead
|
||||
// of letting the driver crash.
|
||||
const magic = std.mem.readInt(u32, spirv[0..4], .little);
|
||||
if (magic != 0x07230203) {
|
||||
std.log.warn(
|
||||
"shadertoy: SPIR-V output missing magic word " ++
|
||||
"(got 0x{x:0>8}, expected 0x07230203)",
|
||||
.{magic},
|
||||
);
|
||||
return error.InvalidShader;
|
||||
}
|
||||
// Copy the SPIR-V binary out of the arena into a
|
||||
// 4-byte-aligned allocation under `alloc_gpa`. Vulkan
|
||||
// expects `pCode: []const u32`, so over-aligning is safe;
|
||||
|
|
@ -219,10 +243,12 @@ pub fn glslFromShader(
|
|||
// Find the first newline after `#version ...` and inject the
|
||||
// defines on the following line. The prefix is expected to
|
||||
// start with `#version` followed by a newline; if a future
|
||||
// edit ever drops that newline (e.g. a single-line prefix)
|
||||
// we inject the defines BEFORE the prefix so glslang sees
|
||||
// the directives on their own lines and reports a clear
|
||||
// error instead of us crashing on a `null.?` unwrap.
|
||||
// edit ever drops that newline (e.g. a single-line prefix
|
||||
// entirely on one line), we synthesize one between
|
||||
// `#version` and the rest, then inject the defines after.
|
||||
// GLSL requires `#version` to be the first non-blank line,
|
||||
// so injecting BEFORE it would silently produce invalid
|
||||
// GLSL.
|
||||
if (std.mem.indexOfScalar(u8, prefix, '\n')) |first_nl| {
|
||||
try writer.writeAll(prefix[0 .. first_nl + 1]);
|
||||
for (defines) |def| {
|
||||
|
|
@ -231,12 +257,31 @@ pub fn glslFromShader(
|
|||
try writer.writeAll("\n");
|
||||
}
|
||||
try writer.writeAll(prefix[first_nl + 1 ..]);
|
||||
} else {
|
||||
} else if (std.mem.startsWith(u8, prefix, "#version")) {
|
||||
// No newline anywhere, but it does start with `#version`.
|
||||
// Find the end of the version directive: scan past the
|
||||
// version number to the first non-version-token char,
|
||||
// synthesize a newline there, then write defines and
|
||||
// the rest of the prefix.
|
||||
var p: usize = "#version".len;
|
||||
while (p < prefix.len and (prefix[p] == ' ' or prefix[p] == '\t')) p += 1;
|
||||
while (p < prefix.len and prefix[p] >= '0' and prefix[p] <= '9') p += 1;
|
||||
// Optional profile (`core` / `compatibility` / `es`).
|
||||
while (p < prefix.len and (prefix[p] == ' ' or prefix[p] == '\t')) p += 1;
|
||||
while (p < prefix.len and ((prefix[p] >= 'a' and prefix[p] <= 'z') or
|
||||
(prefix[p] >= 'A' and prefix[p] <= 'Z'))) p += 1;
|
||||
try writer.writeAll(prefix[0..p]);
|
||||
try writer.writeByte('\n');
|
||||
for (defines) |def| {
|
||||
try writer.writeAll("#define ");
|
||||
try writer.writeAll(def);
|
||||
try writer.writeAll("\n");
|
||||
}
|
||||
try writer.writeAll(prefix[p..]);
|
||||
} else {
|
||||
// Prefix doesn't start with `#version` either — the
|
||||
// shader is malformed. Pass it through as-is so glslang
|
||||
// reports a clear parse error.
|
||||
try writer.writeAll(prefix);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -30,6 +30,11 @@ pub const Error = error{
|
|||
/// `vkCreateDescriptorPool` / `vkAllocateDescriptorSets` returned
|
||||
/// a non-success status.
|
||||
VulkanFailed,
|
||||
/// Caller passed an invalid pool configuration (e.g. `max_sets ==
|
||||
/// 0`, or every per-type cap is zero). Distinct from
|
||||
/// `VulkanFailed` so callers can tell driver-side errors from
|
||||
/// caller-side ones.
|
||||
InvalidPoolConfig,
|
||||
};
|
||||
|
||||
/// Construction caps. `max_sets` is the total number of descriptor
|
||||
|
|
@ -55,7 +60,7 @@ pub fn init(opts: Options) Error!Self {
|
|||
// a downstream allocation failure.
|
||||
if (opts.max_sets == 0) {
|
||||
log.err("DescriptorPool.init: max_sets must be > 0", .{});
|
||||
return error.VulkanFailed;
|
||||
return error.InvalidPoolConfig;
|
||||
}
|
||||
if (opts.uniform_buffers == 0 and
|
||||
opts.combined_image_samplers == 0 and
|
||||
|
|
@ -66,7 +71,7 @@ pub fn init(opts: Options) Error!Self {
|
|||
"(uniform_buffers, combined_image_samplers, storage_buffers)",
|
||||
.{},
|
||||
);
|
||||
return error.VulkanFailed;
|
||||
return error.InvalidPoolConfig;
|
||||
}
|
||||
|
||||
// Build a small VkDescriptorPoolSize array from whichever caps
|
||||
|
|
|
|||
|
|
@ -186,22 +186,14 @@ pub fn complete(self: *const Self, sync: bool) void {
|
|||
// Recycle the per-frame Buffer pool. Even on the error path we
|
||||
// still want to cycle: buffers that the failed submit referenced
|
||||
// are now stuck (we can't prove the GPU is done with them), so
|
||||
// we conservatively wait the device idle on the unhealthy path
|
||||
// before draining. Without this, every failed submit leaks
|
||||
// every buffer the renderer queued for that frame.
|
||||
if (health == .unhealthy and !submitted) {
|
||||
// Submit never happened — nothing in flight references
|
||||
// recorded buffers, safe to cycle directly.
|
||||
Vulkan.buffer_pool.cycle(dev);
|
||||
} else if (health == .unhealthy) {
|
||||
// Submit happened but fence wait failed (DEVICE_LOST etc.).
|
||||
// Drain the device before recycling to avoid use-after-free
|
||||
// on whatever queue is still ticking.
|
||||
// we conservatively wait the device idle when submit DID happen
|
||||
// but the fence wait failed (DEVICE_LOST etc.) before draining.
|
||||
// Without that wait, every failed submit could leak the buffers
|
||||
// the renderer queued for the frame.
|
||||
if (health == .unhealthy and submitted) {
|
||||
_ = dev.dispatch.deviceWaitIdle(dev.device);
|
||||
Vulkan.buffer_pool.cycle(dev);
|
||||
} else {
|
||||
Vulkan.buffer_pool.cycle(dev);
|
||||
}
|
||||
Vulkan.buffer_pool.cycle(dev);
|
||||
|
||||
// Hand the rendered target off to the host. On the unhealthy
|
||||
// path we skip present — the dmabuf may be partially written
|
||||
|
|
|
|||
|
|
@ -396,23 +396,42 @@ pub fn step(self: *Self, s: Step) void {
|
|||
var effective_sets: [Pipeline.MAX_DESCRIPTOR_SETS]vk.VkDescriptorSet =
|
||||
s.pipeline.descriptor_sets;
|
||||
const reused = self.markPipelineUsed(s.pipeline.pipeline);
|
||||
if (reused) if (self.step_pool) |pool| {
|
||||
if (reused) {
|
||||
// No step_pool means the renderer thread has no per-frame
|
||||
// descriptor pool wired up (test harness, smoke test). We
|
||||
// can't safely re-use this pipeline — updating the static
|
||||
// set in place would corrupt the prior draw's bindings.
|
||||
// Drop the draw rather than corrupt the frame.
|
||||
const pool = self.step_pool orelse {
|
||||
log.err(
|
||||
"RenderPass.step: pipeline re-used but no step_pool " ++
|
||||
"available; dropping draw to avoid corrupting prior draws",
|
||||
.{},
|
||||
);
|
||||
return;
|
||||
};
|
||||
for (s.pipeline.descriptor_set_layouts, 0..) |maybe_dsl, i| {
|
||||
if (i >= s.pipeline.set_count) break;
|
||||
const dsl = maybe_dsl orelse continue;
|
||||
if (pool.allocate(dsl)) |fresh| {
|
||||
effective_sets[i] = fresh;
|
||||
} else |err| {
|
||||
// Pool exhausted. The previous behavior was to
|
||||
// fall back to the pipeline's static set, but that
|
||||
// re-introduces the exact corruption the step_pool
|
||||
// mechanism exists to prevent. Drop the draw; the
|
||||
// user sees one missed image rather than every
|
||||
// image rendered with the last image's bindings.
|
||||
log.err(
|
||||
"RenderPass.step: per-call descriptor set " ++
|
||||
"allocation for set {} failed ({}); falling " ++
|
||||
"back to the pipeline's static set, which " ++
|
||||
"may corrupt prior draws on this pipeline",
|
||||
"allocation for set {} failed ({}); dropping draw " ++
|
||||
"(step_pool exhausted — increase STEP_POOL_MAX_SETS)",
|
||||
.{ i, err },
|
||||
);
|
||||
return;
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
// ---- update descriptor sets ---------------------------------
|
||||
//
|
||||
|
|
|
|||
|
|
@ -111,13 +111,16 @@ pub fn Buffer(comptime T: type) type {
|
|||
self.memory,
|
||||
self.opts.usage,
|
||||
capacity_bytes,
|
||||
) catch {
|
||||
) catch |err| {
|
||||
// OOM growing the pool. The buffer may still be
|
||||
// referenced by an in-flight command buffer, so we
|
||||
// wait the entire device idle before destroying —
|
||||
// expensive but correct. Logging here is awkward (no
|
||||
// logger in scope) so we accept the loud failure and
|
||||
// let Vulkan stderr diagnose anything that follows.
|
||||
// expensive but correct.
|
||||
log.warn(
|
||||
"Buffer.deinit: pool release failed ({}); falling " ++
|
||||
"back to vkDeviceWaitIdle + destroy",
|
||||
.{err},
|
||||
);
|
||||
_ = dev.dispatch.deviceWaitIdle(dev.device);
|
||||
dev.dispatch.destroyBuffer(dev.device, self.buffer, null);
|
||||
dev.dispatch.freeMemory(dev.device, self.memory, null);
|
||||
|
|
@ -261,14 +264,26 @@ pub fn Buffer(comptime T: type) type {
|
|||
}
|
||||
|
||||
/// Grow the buffer to hold at least `new_len` Ts. Vulkan
|
||||
/// buffers are immutable in size, so we route the old
|
||||
/// buffer through the recycle pool (it may still be
|
||||
/// referenced by the in-flight command buffer — destroying
|
||||
/// it directly would race the GPU same as `deinit` would)
|
||||
/// and create a fresh one. Contents are discarded; callers
|
||||
/// buffers are immutable in size, so we allocate a fresh
|
||||
/// one and then route the old one through the recycle pool
|
||||
/// (it may still be referenced by the in-flight command
|
||||
/// buffer — destroying it directly would race the GPU same
|
||||
/// as `deinit` would). Contents are discarded; callers
|
||||
/// always `sync` immediately after `grow` returns.
|
||||
///
|
||||
/// Order is critical: `create` first, `release` second.
|
||||
/// If we released the old buffer first and `create`
|
||||
/// failed, `self.{buffer,memory}` would be left dangling
|
||||
/// at freed handles, and the caller's eventual
|
||||
/// `self.deinit()` would double-destroy via the pool.
|
||||
fn grow(self: *Self, new_len: usize) Error!void {
|
||||
const dev = self.opts.device;
|
||||
const replacement = try create(self.opts, new_len);
|
||||
// From here on `self.{buffer,memory}` are the OLD pair;
|
||||
// release them. If `release` itself OOMs, we have to
|
||||
// destroy directly (same fallback as `deinit`), but the
|
||||
// new pair is already constructed and `self.* =
|
||||
// replacement` will reach a healthy state regardless.
|
||||
const bp = @import("../Vulkan.zig").buffer_pool;
|
||||
const capacity_bytes: u64 = @as(u64, self.len) * @sizeOf(T);
|
||||
bp.release(
|
||||
|
|
@ -278,13 +293,10 @@ pub fn Buffer(comptime T: type) type {
|
|||
self.opts.usage,
|
||||
capacity_bytes,
|
||||
) catch {
|
||||
// OOM appending to the pool — wait the device idle
|
||||
// and destroy directly. Same fallback as `deinit`.
|
||||
_ = dev.dispatch.deviceWaitIdle(dev.device);
|
||||
dev.dispatch.destroyBuffer(dev.device, self.buffer, null);
|
||||
dev.dispatch.freeMemory(dev.device, self.memory, null);
|
||||
};
|
||||
const replacement = try create(self.opts, new_len);
|
||||
self.* = replacement;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -74,7 +74,9 @@ fn processIncludes(comptime contents: [:0]const u8) [:0]const u8 {
|
|||
// any horizontal whitespace and require exactly one
|
||||
// double-quoted path.
|
||||
var p = i + "#include".len;
|
||||
while (p < contents.len and (contents[p] == ' ' or contents[p] == '\t')) : (p += 1) {}
|
||||
while (p < contents.len and (contents[p] == ' ' or contents[p] == '\t' or
|
||||
contents[p] == 0x0B or contents[p] == 0x0C)) : (p += 1)
|
||||
{}
|
||||
if (p >= contents.len or contents[p] != '"') {
|
||||
@compileError("processIncludes: malformed #include directive in shader");
|
||||
}
|
||||
|
|
@ -231,6 +233,10 @@ pub fn vulkanizeGlsl(
|
|||
i += 1; // consume the '('
|
||||
var depth: i32 = 1;
|
||||
while (i < src.len and depth > 0) {
|
||||
// Skip comments and string literals verbatim
|
||||
// — a `(` or `)` inside `/* */` or `"..."`
|
||||
// shouldn't move our paren depth tracker.
|
||||
if (try copySkippable(alloc, &out, src, &i)) continue;
|
||||
const cc = src[i];
|
||||
if (cc == '(') depth += 1;
|
||||
if (cc == ')') {
|
||||
|
|
@ -390,6 +396,13 @@ fn isIdentChar(c: u8) bool {
|
|||
/// tiny — broader matching would force `textureLod` on the custom
|
||||
/// shader's `iChannel0`, which is normalized, and bypassing the
|
||||
/// implicit-LOD opcode path makes the driver work harder per call.
|
||||
///
|
||||
/// User-supplied custom shaders MUST NOT name a sampler `atlas_grayscale`
|
||||
/// or `atlas_color` — doing so will trigger this rewrite and replace
|
||||
/// `texture()` calls on that sampler with `textureLod(..., 0.0)`,
|
||||
/// which is incorrect for any normalized sampler. The shadertoy
|
||||
/// shader convention uses `iChannel0..3`, so the conflict is unlikely
|
||||
/// in practice.
|
||||
const unnormalized_sampler_names = [_][]const u8{
|
||||
"atlas_grayscale",
|
||||
"atlas_color",
|
||||
|
|
@ -416,7 +429,10 @@ fn nextSamplerIsUnnormalized(src: []const u8, i: usize) bool {
|
|||
}
|
||||
|
||||
fn isHorizSpace(c: u8) bool {
|
||||
return c == ' ' or c == '\t';
|
||||
// GLSL preprocessor whitespace per the spec: space, tab,
|
||||
// vertical tab, form feed. Newline + carriage return are
|
||||
// separately handled as line terminators.
|
||||
return c == ' ' or c == '\t' or c == 0x0B or c == 0x0C;
|
||||
}
|
||||
|
||||
fn isAnySpace(c: u8) bool {
|
||||
|
|
@ -563,6 +579,24 @@ pub const Module = struct {
|
|||
spirv: []const u32,
|
||||
stage: Stage,
|
||||
) Error!Module {
|
||||
// Sanity-check the SPIR-V before handing it to the driver.
|
||||
// The glslang and shadertoy paths both validate already, but
|
||||
// a future caller wiring up the build-time-blob path could
|
||||
// accidentally pass an empty or non-SPIR-V buffer; failing
|
||||
// here with a clear error beats a `vkCreateShaderModule`
|
||||
// segfault inside the loader.
|
||||
if (spirv.len == 0) {
|
||||
log.err("Module.initFromSpirv: zero-length SPIR-V buffer", .{});
|
||||
return error.VulkanFailed;
|
||||
}
|
||||
if (spirv[0] != 0x07230203) {
|
||||
log.err(
|
||||
"Module.initFromSpirv: missing magic word " ++
|
||||
"(got 0x{x:0>8}, expected 0x07230203)",
|
||||
.{spirv[0]},
|
||||
);
|
||||
return error.VulkanFailed;
|
||||
}
|
||||
const info: vk.VkShaderModuleCreateInfo = .{
|
||||
.sType = vk.VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO,
|
||||
.pNext = null,
|
||||
|
|
|
|||
Loading…
Reference in New Issue