diff --git a/qt/src/GhosttySurface.cpp b/qt/src/GhosttySurface.cpp index 2296fc42b..7a35457c1 100644 --- a/qt/src/GhosttySurface.cpp +++ b/qt/src/GhosttySurface.cpp @@ -1638,9 +1638,9 @@ void GhosttySurface::presentVulkanDmabuf( expected, true, std::memory_order_relaxed)) { std::fprintf(stderr, "[ghastty] first dmabuf for surface=%p: fd=%d %ux%u " - "stride=%u fourcc=0x%08x mod=0x%lx image_backed=%d path=%s\n", + "stride=%u fourcc=0x%08x mod=0x%llx image_backed=%d path=%s\n", static_cast(this), dmabuf_fd, width, height, stride, - drm_format, static_cast(drm_modifier), + drm_format, static_cast(drm_modifier), image_backed ? 1 : 0, useSubsurface ? "subsurface" : "qimage"); } @@ -1657,9 +1657,17 @@ void GhosttySurface::presentVulkanDmabuf( // terminal — and check that stride*height doesn't exceed // SIZE_MAX before promoting. constexpr quint32 MAX_DIM = 65536; + // Cap stride at MAX_DIM × 4 (BGRA8) × a small slack factor for + // tiled formats: ~4× the width-derived minimum is enough for any + // legitimate vendor tiling, and it keeps `stride * height` + // below ~64 GiB even at MAX_DIM. The previous lower-only bound + // let a pathological renderer with stride near UINT32_MAX and + // height=MAX_DIM reach mmap with a ~280 TB request. + constexpr quint32 MAX_STRIDE = MAX_DIM * 16; if (dmabuf_fd < 0 || width == 0 || height == 0) return; if (width > MAX_DIM || height > MAX_DIM) return; if (stride < static_cast(width) * 4) return; + if (stride > MAX_STRIDE) return; // stride*height as 64-bit and check the size_t fit explicitly. const quint64 bytes64 = static_cast(stride) * height; if (bytes64 > std::numeric_limits::max()) return; @@ -1737,10 +1745,23 @@ void GhosttySurface::presentVulkanDmabuf( const double dpr_now = m_fbDpr.load(std::memory_order_acquire); if (dpr_now > 0) owned.setDevicePixelRatio(dpr_now); + bool overwrote_legacy = false; { QMutexLocker lock(&m_pendingMutex); + overwrote_legacy = !m_pending.isNull(); m_pending = std::move(owned); } + if (overwrote_legacy) { + const auto count = m_droppedFrames.fetch_add( + 1, std::memory_order_relaxed) + 1; + if (count <= 3 || count % 60 == 0) { + std::fprintf(stderr, + "[ghastty] surface=%p dropped frame " + "(legacy QImage path, total=%llu)\n", + static_cast(this), + static_cast(count)); + } + } QMetaObject::invokeMethod(this, "drainVulkan", Qt::QueuedConnection); } @@ -1749,7 +1770,17 @@ void GhosttySurface::drainVulkan() { // under the mutex, then dispatch it to the presenter outside the // lock so a renderer-thread `presentVulkanDmabuf` parking the // next frame doesn't block on wl_display_flush. - if (m_hidden.load(std::memory_order_acquire)) return; + if (m_hidden.load(std::memory_order_acquire)) { + // Clear the parked descriptor on hide so the next post-Show + // present doesn't see a "stale frame still pending" state and + // spuriously bump m_droppedFrames every Hide/Show cycle. The + // fd itself is libghostty-owned (per ABI it's only valid for + // the duration of the original presentVulkanDmabuf call), so + // there's nothing to release here beyond marking the slot empty. + QMutexLocker lock(&m_pendingMutex); + m_pendingDmabuf.fd = -1; + return; + } if (m_useSubsurface.load(std::memory_order_acquire) && m_subsurfacePresenter) { PendingDmabuf frame; diff --git a/qt/src/GhosttySurface.h b/qt/src/GhosttySurface.h index 41738d328..22cd82eb5 100644 --- a/qt/src/GhosttySurface.h +++ b/qt/src/GhosttySurface.h @@ -170,8 +170,9 @@ public: // mmap+memcpy'd QImage) and wakes the GUI thread via // `QMetaObject::invokeMethod(this, drainVulkan, Qt::QueuedConnection)`. // The GUI thread either commits the dmabuf to the wl_subsurface - // (zero-copy) or paints the QImage (fallback). A 2 ms safety-net - // poll catches anything `invokeMethod` ever fails to deliver. + // (zero-copy) or paints the QImage (fallback). The dropped-frame + // counter `m_droppedFrames` makes any genuine queue-loss visible + // (zero in the steady state). Q_INVOKABLE void presentVulkanDmabuf( int dmabuf_fd, quint32 drm_format, diff --git a/qt/src/wayland/SubsurfacePresenter.cpp b/qt/src/wayland/SubsurfacePresenter.cpp index f0b42655a..681b2f2f7 100644 --- a/qt/src/wayland/SubsurfacePresenter.cpp +++ b/qt/src/wayland/SubsurfacePresenter.cpp @@ -86,16 +86,29 @@ void registryGlobal(void *data, wl_registry *registry, uint32_t name, g->subcompositor = static_cast( wl_registry_bind(registry, name, &wl_subcompositor_interface, 1)); } else if (std::strcmp(interface, zwp_linux_dmabuf_v1_interface.name) == 0) { - // v3 has `create_immed`, which we want (synchronous wl_buffer - // creation — the v2 async `create` + `created`/`failed` event - // dance would add a layer of callback machinery for no real win - // in our renderer's strict-fd-validity scenario). v4 adds the - // dynamic format/modifier feedback dance; we don't need it yet. - g->dmabuf = static_cast(wl_registry_bind( - registry, name, &zwp_linux_dmabuf_v1_interface, 3)); - // Add the listener immediately so the modifier events queued by - // the bind get delivered when the dispatch loop continues. - zwp_linux_dmabuf_v1_add_listener(g->dmabuf, &kDmabufListener, g); + // We want at least v3 for `create_immed` (synchronous wl_buffer + // creation — v1/v2 have only the async `create` + `created`/ + // `failed` dance). A compositor that only advertises v1/v2 + // can't satisfy our protocol assumptions; binding at v3 against + // such a compositor would protocol-error and tear down the + // entire wl_display. Skip the bind in that case so the + // legacy QImage fallback engages cleanly. + if (version < 3) { + std::fprintf(stderr, + "[ghastty] wayland: linux-dmabuf-v1 advertised at " + "version %u; need >= 3 for create_immed, falling back " + "to QImage path\n", + version); + } else { + // Cap at v3 — v4 adds the dynamic format/modifier feedback + // dance which we don't consume. + const uint32_t v = std::min(version, 3u); + g->dmabuf = static_cast(wl_registry_bind( + registry, name, &zwp_linux_dmabuf_v1_interface, v)); + // Add the listener immediately so the modifier events queued + // by the bind get delivered when the dispatch loop continues. + zwp_linux_dmabuf_v1_add_listener(g->dmabuf, &kDmabufListener, g); + } } else if (std::strcmp(interface, wp_viewporter_interface.name) == 0) { g->viewporter = static_cast( wl_registry_bind(registry, name, &wp_viewporter_interface, 1)); @@ -125,14 +138,24 @@ PresenterGlobals *discoverGlobals(wl_display *display) { // Roundtrip 1: bind compositor/subcompositor/dmabuf. Inside the // registry callback we attach the dmabuf listener immediately, so // any format/modifier events that arrive in the same dispatch - // pass fire on it. - wl_display_roundtrip_queue(display, queue); + // pass fire on it. A negative return means the wl_display + // disconnected mid-startup; subsequent tryCreate calls fall + // through to the QImage path (g->compositor etc. stay null). + if (wl_display_roundtrip_queue(display, queue) < 0) { + std::fprintf(stderr, + "[ghastty] wayland: discoverGlobals roundtrip 1 failed; " + "subsurface present path disabled\n"); + } wl_registry_destroy(registry); // Roundtrip 2: belt-and-suspenders for any compositor that defers // the modifier events past the bind reply (most don't, but some // batch them). After this returns the modifier table is fully // populated and frozen for the process lifetime. - if (globals.dmabuf) wl_display_roundtrip_queue(display, queue); + if (globals.dmabuf && wl_display_roundtrip_queue(display, queue) < 0) { + std::fprintf(stderr, + "[ghastty] wayland: discoverGlobals roundtrip 2 failed; " + "modifier table may be incomplete\n"); + } std::size_t total_mods = 0; for (const auto &kv : globals.modifiers) total_mods += kv.second.size(); diff --git a/src/renderer/Vulkan.zig b/src/renderer/Vulkan.zig index 563dbbcc7..983822b0f 100644 --- a/src/renderer/Vulkan.zig +++ b/src/renderer/Vulkan.zig @@ -206,25 +206,39 @@ pub const buffer_pool = struct { /// `Frame.complete` after `vkWaitForFences`. /// /// `dev` is needed only on the OOM fallback path: if `ready` - /// can't grow to absorb `pending`, we wait the device idle and - /// then destroy the pending entries directly so the next frame - /// doesn't double up on a pending list that can never drain. + /// can't grow to absorb `pending`, we wait the device idle + /// (OUTSIDE the mutex — see below) and then destroy the pending + /// entries directly so the next frame doesn't double up on a + /// pending list that can never drain. pub fn cycle(dev: *const Device) void { - mutex.lock(); - defer mutex.unlock(); - ready.appendSlice(std.heap.smp_allocator, pending.items) catch { - // Couldn't grow `ready` — destroy the pending GPU - // resources directly. Other renderer threads may still - // be submitting against the shared queue, so wait the - // device idle to make sure no command buffer in flight - // anywhere references these handles before we destroy. - _ = dev.dispatch.deviceWaitIdle(dev.device); - for (pending.items) |e| { - dev.dispatch.destroyBuffer(dev.device, e.buffer, null); - dev.dispatch.freeMemory(dev.device, e.memory, null); + // Try the fast path first — append `pending` to `ready` + // under the lock, then return. On OOM we have to destroy + // the pending entries, but `vkDeviceWaitIdle` is slow and + // holding the pool mutex across it would block every other + // renderer thread's release/acquire/cycle. Move the + // pending list into a local outside the lock, then drain. + var oom_pending: std.ArrayList(Entry) = .{}; + defer oom_pending.deinit(std.heap.smp_allocator); + { + mutex.lock(); + defer mutex.unlock(); + if (ready.appendSlice(std.heap.smp_allocator, pending.items)) { + pending.clearRetainingCapacity(); + return; + } else |_| { + // OOM. Move `pending` into our local so we can + // drain it without holding the mutex. + oom_pending = pending; + pending = .{}; } - }; - pending.clearRetainingCapacity(); + } + // Mutex released. Other threads can release/acquire/cycle + // while we wait the device idle and destroy our slice. + _ = dev.dispatch.deviceWaitIdle(dev.device); + for (oom_pending.items) |e| { + dev.dispatch.destroyBuffer(dev.device, e.buffer, null); + dev.dispatch.freeMemory(dev.device, e.memory, null); + } } /// Tear down both lists. Call only when the device is idle @@ -583,19 +597,48 @@ pub fn beginFrame( } _ = self; - // Reset the command buffer + fence + step descriptor pool so - // this frame starts clean. `vkResetDescriptorPool` returns every - // set the previous frame allocated to the pool — much cheaper - // than freeing them individually, and removes any chance of - // last-frame's set being bound by accident. + // Reset this frame's per-frame state. ORDER MATTERS: the fence + // reset goes LAST. If an earlier reset fails and we return an + // error, `Frame.begin` never runs, no submit ever happens, and + // the fence stays in whatever state it was in (initially + // signaled, or signaled by the previous frame's submit). Any + // subsequent `Vulkan.deinit` then waits on a SIGNALED fence + // and returns immediately — pre-fix, an unsignaled fence with + // no pending submit hung the deinit forever on + // waitForFences(UINT64_MAX). if (dev.dispatch.resetCommandBuffer(frame_cb, 0) != vk.VK_SUCCESS) return error.VulkanFailed; - if (dev.dispatch.resetFences(dev.device, 1, &frame_fence) != vk.VK_SUCCESS) - return error.VulkanFailed; if (step_pool) |*p| { if (dev.dispatch.resetDescriptorPool(dev.device, p.pool, 0) != vk.VK_SUCCESS) return error.VulkanFailed; } + // `vkResetDescriptorPool` returns every set the previous frame + // allocated to the pool — much cheaper than freeing them + // individually, and removes any chance of last-frame's set + // being bound by accident. + if (dev.dispatch.resetFences(dev.device, 1, &frame_fence) != vk.VK_SUCCESS) + return error.VulkanFailed; + // From here on the fence is UNSIGNALED. If `Frame.begin` + // (vkBeginCommandBuffer) fails before any submit, we have to + // re-signal the fence ourselves — otherwise the next + // `Vulkan.deinit`'s waitForFences hangs indefinitely. + errdefer { + // Empty submit with this fence as the signal target is the + // simplest portable way to push it back to signaled + // without recording any commands. + const empty: vk.VkSubmitInfo = .{ + .sType = vk.VK_STRUCTURE_TYPE_SUBMIT_INFO, + .pNext = null, + .waitSemaphoreCount = 0, + .pWaitSemaphores = null, + .pWaitDstStageMask = null, + .commandBufferCount = 0, + .pCommandBuffers = null, + .signalSemaphoreCount = 0, + .pSignalSemaphores = null, + }; + _ = dev.queueSubmit(1, &empty, frame_fence); + } return try Frame.begin( .{ diff --git a/src/renderer/shadertoy.zig b/src/renderer/shadertoy.zig index f85b98271..24db7e592 100644 --- a/src/renderer/shadertoy.zig +++ b/src/renderer/shadertoy.zig @@ -184,6 +184,30 @@ pub fn loadFromFile( .glsl => try glslFromSpv(alloc_gpa, spirv), .msl => try mslFromSpv(alloc_gpa, spirv), .spv => spv: { + // Validate before handing back: glslang has succeeded at + // this point but a zero-length SPIR-V output would + // crash `vkCreateShaderModule` (codeSize == 0). The + // SPIR-V magic word check is defensive against future + // backends that bypass glslang. + if (spirv.len < 4) { + std.log.warn( + "shadertoy: empty SPIR-V output (size={})", + .{spirv.len}, + ); + return error.InvalidShader; + } + // First 4 bytes are the SPIR-V magic word 0x07230203 + // (little-endian). Reject anything else loudly instead + // of letting the driver crash. + const magic = std.mem.readInt(u32, spirv[0..4], .little); + if (magic != 0x07230203) { + std.log.warn( + "shadertoy: SPIR-V output missing magic word " ++ + "(got 0x{x:0>8}, expected 0x07230203)", + .{magic}, + ); + return error.InvalidShader; + } // Copy the SPIR-V binary out of the arena into a // 4-byte-aligned allocation under `alloc_gpa`. Vulkan // expects `pCode: []const u32`, so over-aligning is safe; @@ -219,10 +243,12 @@ pub fn glslFromShader( // Find the first newline after `#version ...` and inject the // defines on the following line. The prefix is expected to // start with `#version` followed by a newline; if a future - // edit ever drops that newline (e.g. a single-line prefix) - // we inject the defines BEFORE the prefix so glslang sees - // the directives on their own lines and reports a clear - // error instead of us crashing on a `null.?` unwrap. + // edit ever drops that newline (e.g. a single-line prefix + // entirely on one line), we synthesize one between + // `#version` and the rest, then inject the defines after. + // GLSL requires `#version` to be the first non-blank line, + // so injecting BEFORE it would silently produce invalid + // GLSL. if (std.mem.indexOfScalar(u8, prefix, '\n')) |first_nl| { try writer.writeAll(prefix[0 .. first_nl + 1]); for (defines) |def| { @@ -231,12 +257,31 @@ pub fn glslFromShader( try writer.writeAll("\n"); } try writer.writeAll(prefix[first_nl + 1 ..]); - } else { + } else if (std.mem.startsWith(u8, prefix, "#version")) { + // No newline anywhere, but it does start with `#version`. + // Find the end of the version directive: scan past the + // version number to the first non-version-token char, + // synthesize a newline there, then write defines and + // the rest of the prefix. + var p: usize = "#version".len; + while (p < prefix.len and (prefix[p] == ' ' or prefix[p] == '\t')) p += 1; + while (p < prefix.len and prefix[p] >= '0' and prefix[p] <= '9') p += 1; + // Optional profile (`core` / `compatibility` / `es`). + while (p < prefix.len and (prefix[p] == ' ' or prefix[p] == '\t')) p += 1; + while (p < prefix.len and ((prefix[p] >= 'a' and prefix[p] <= 'z') or + (prefix[p] >= 'A' and prefix[p] <= 'Z'))) p += 1; + try writer.writeAll(prefix[0..p]); + try writer.writeByte('\n'); for (defines) |def| { try writer.writeAll("#define "); try writer.writeAll(def); try writer.writeAll("\n"); } + try writer.writeAll(prefix[p..]); + } else { + // Prefix doesn't start with `#version` either — the + // shader is malformed. Pass it through as-is so glslang + // reports a clear parse error. try writer.writeAll(prefix); } } diff --git a/src/renderer/vulkan/DescriptorPool.zig b/src/renderer/vulkan/DescriptorPool.zig index 373074ae6..3fb8510a1 100644 --- a/src/renderer/vulkan/DescriptorPool.zig +++ b/src/renderer/vulkan/DescriptorPool.zig @@ -30,6 +30,11 @@ pub const Error = error{ /// `vkCreateDescriptorPool` / `vkAllocateDescriptorSets` returned /// a non-success status. VulkanFailed, + /// Caller passed an invalid pool configuration (e.g. `max_sets == + /// 0`, or every per-type cap is zero). Distinct from + /// `VulkanFailed` so callers can tell driver-side errors from + /// caller-side ones. + InvalidPoolConfig, }; /// Construction caps. `max_sets` is the total number of descriptor @@ -55,7 +60,7 @@ pub fn init(opts: Options) Error!Self { // a downstream allocation failure. if (opts.max_sets == 0) { log.err("DescriptorPool.init: max_sets must be > 0", .{}); - return error.VulkanFailed; + return error.InvalidPoolConfig; } if (opts.uniform_buffers == 0 and opts.combined_image_samplers == 0 and @@ -66,7 +71,7 @@ pub fn init(opts: Options) Error!Self { "(uniform_buffers, combined_image_samplers, storage_buffers)", .{}, ); - return error.VulkanFailed; + return error.InvalidPoolConfig; } // Build a small VkDescriptorPoolSize array from whichever caps diff --git a/src/renderer/vulkan/Frame.zig b/src/renderer/vulkan/Frame.zig index 0b9d6faa2..d63dabd6c 100644 --- a/src/renderer/vulkan/Frame.zig +++ b/src/renderer/vulkan/Frame.zig @@ -186,22 +186,14 @@ pub fn complete(self: *const Self, sync: bool) void { // Recycle the per-frame Buffer pool. Even on the error path we // still want to cycle: buffers that the failed submit referenced // are now stuck (we can't prove the GPU is done with them), so - // we conservatively wait the device idle on the unhealthy path - // before draining. Without this, every failed submit leaks - // every buffer the renderer queued for that frame. - if (health == .unhealthy and !submitted) { - // Submit never happened — nothing in flight references - // recorded buffers, safe to cycle directly. - Vulkan.buffer_pool.cycle(dev); - } else if (health == .unhealthy) { - // Submit happened but fence wait failed (DEVICE_LOST etc.). - // Drain the device before recycling to avoid use-after-free - // on whatever queue is still ticking. + // we conservatively wait the device idle when submit DID happen + // but the fence wait failed (DEVICE_LOST etc.) before draining. + // Without that wait, every failed submit could leak the buffers + // the renderer queued for the frame. + if (health == .unhealthy and submitted) { _ = dev.dispatch.deviceWaitIdle(dev.device); - Vulkan.buffer_pool.cycle(dev); - } else { - Vulkan.buffer_pool.cycle(dev); } + Vulkan.buffer_pool.cycle(dev); // Hand the rendered target off to the host. On the unhealthy // path we skip present — the dmabuf may be partially written diff --git a/src/renderer/vulkan/RenderPass.zig b/src/renderer/vulkan/RenderPass.zig index b3db3ce7e..5c68b1600 100644 --- a/src/renderer/vulkan/RenderPass.zig +++ b/src/renderer/vulkan/RenderPass.zig @@ -396,23 +396,42 @@ pub fn step(self: *Self, s: Step) void { var effective_sets: [Pipeline.MAX_DESCRIPTOR_SETS]vk.VkDescriptorSet = s.pipeline.descriptor_sets; const reused = self.markPipelineUsed(s.pipeline.pipeline); - if (reused) if (self.step_pool) |pool| { + if (reused) { + // No step_pool means the renderer thread has no per-frame + // descriptor pool wired up (test harness, smoke test). We + // can't safely re-use this pipeline — updating the static + // set in place would corrupt the prior draw's bindings. + // Drop the draw rather than corrupt the frame. + const pool = self.step_pool orelse { + log.err( + "RenderPass.step: pipeline re-used but no step_pool " ++ + "available; dropping draw to avoid corrupting prior draws", + .{}, + ); + return; + }; for (s.pipeline.descriptor_set_layouts, 0..) |maybe_dsl, i| { if (i >= s.pipeline.set_count) break; const dsl = maybe_dsl orelse continue; if (pool.allocate(dsl)) |fresh| { effective_sets[i] = fresh; } else |err| { + // Pool exhausted. The previous behavior was to + // fall back to the pipeline's static set, but that + // re-introduces the exact corruption the step_pool + // mechanism exists to prevent. Drop the draw; the + // user sees one missed image rather than every + // image rendered with the last image's bindings. log.err( "RenderPass.step: per-call descriptor set " ++ - "allocation for set {} failed ({}); falling " ++ - "back to the pipeline's static set, which " ++ - "may corrupt prior draws on this pipeline", + "allocation for set {} failed ({}); dropping draw " ++ + "(step_pool exhausted — increase STEP_POOL_MAX_SETS)", .{ i, err }, ); + return; } } - }; + } // ---- update descriptor sets --------------------------------- // diff --git a/src/renderer/vulkan/buffer.zig b/src/renderer/vulkan/buffer.zig index cd73eccce..0e29da584 100644 --- a/src/renderer/vulkan/buffer.zig +++ b/src/renderer/vulkan/buffer.zig @@ -111,13 +111,16 @@ pub fn Buffer(comptime T: type) type { self.memory, self.opts.usage, capacity_bytes, - ) catch { + ) catch |err| { // OOM growing the pool. The buffer may still be // referenced by an in-flight command buffer, so we // wait the entire device idle before destroying — - // expensive but correct. Logging here is awkward (no - // logger in scope) so we accept the loud failure and - // let Vulkan stderr diagnose anything that follows. + // expensive but correct. + log.warn( + "Buffer.deinit: pool release failed ({}); falling " ++ + "back to vkDeviceWaitIdle + destroy", + .{err}, + ); _ = dev.dispatch.deviceWaitIdle(dev.device); dev.dispatch.destroyBuffer(dev.device, self.buffer, null); dev.dispatch.freeMemory(dev.device, self.memory, null); @@ -261,14 +264,26 @@ pub fn Buffer(comptime T: type) type { } /// Grow the buffer to hold at least `new_len` Ts. Vulkan - /// buffers are immutable in size, so we route the old - /// buffer through the recycle pool (it may still be - /// referenced by the in-flight command buffer — destroying - /// it directly would race the GPU same as `deinit` would) - /// and create a fresh one. Contents are discarded; callers + /// buffers are immutable in size, so we allocate a fresh + /// one and then route the old one through the recycle pool + /// (it may still be referenced by the in-flight command + /// buffer — destroying it directly would race the GPU same + /// as `deinit` would). Contents are discarded; callers /// always `sync` immediately after `grow` returns. + /// + /// Order is critical: `create` first, `release` second. + /// If we released the old buffer first and `create` + /// failed, `self.{buffer,memory}` would be left dangling + /// at freed handles, and the caller's eventual + /// `self.deinit()` would double-destroy via the pool. fn grow(self: *Self, new_len: usize) Error!void { const dev = self.opts.device; + const replacement = try create(self.opts, new_len); + // From here on `self.{buffer,memory}` are the OLD pair; + // release them. If `release` itself OOMs, we have to + // destroy directly (same fallback as `deinit`), but the + // new pair is already constructed and `self.* = + // replacement` will reach a healthy state regardless. const bp = @import("../Vulkan.zig").buffer_pool; const capacity_bytes: u64 = @as(u64, self.len) * @sizeOf(T); bp.release( @@ -278,13 +293,10 @@ pub fn Buffer(comptime T: type) type { self.opts.usage, capacity_bytes, ) catch { - // OOM appending to the pool — wait the device idle - // and destroy directly. Same fallback as `deinit`. _ = dev.dispatch.deviceWaitIdle(dev.device); dev.dispatch.destroyBuffer(dev.device, self.buffer, null); dev.dispatch.freeMemory(dev.device, self.memory, null); }; - const replacement = try create(self.opts, new_len); self.* = replacement; } diff --git a/src/renderer/vulkan/shaders.zig b/src/renderer/vulkan/shaders.zig index 767e11d5d..bab1b9cce 100644 --- a/src/renderer/vulkan/shaders.zig +++ b/src/renderer/vulkan/shaders.zig @@ -74,7 +74,9 @@ fn processIncludes(comptime contents: [:0]const u8) [:0]const u8 { // any horizontal whitespace and require exactly one // double-quoted path. var p = i + "#include".len; - while (p < contents.len and (contents[p] == ' ' or contents[p] == '\t')) : (p += 1) {} + while (p < contents.len and (contents[p] == ' ' or contents[p] == '\t' or + contents[p] == 0x0B or contents[p] == 0x0C)) : (p += 1) + {} if (p >= contents.len or contents[p] != '"') { @compileError("processIncludes: malformed #include directive in shader"); } @@ -231,6 +233,10 @@ pub fn vulkanizeGlsl( i += 1; // consume the '(' var depth: i32 = 1; while (i < src.len and depth > 0) { + // Skip comments and string literals verbatim + // — a `(` or `)` inside `/* */` or `"..."` + // shouldn't move our paren depth tracker. + if (try copySkippable(alloc, &out, src, &i)) continue; const cc = src[i]; if (cc == '(') depth += 1; if (cc == ')') { @@ -390,6 +396,13 @@ fn isIdentChar(c: u8) bool { /// tiny — broader matching would force `textureLod` on the custom /// shader's `iChannel0`, which is normalized, and bypassing the /// implicit-LOD opcode path makes the driver work harder per call. +/// +/// User-supplied custom shaders MUST NOT name a sampler `atlas_grayscale` +/// or `atlas_color` — doing so will trigger this rewrite and replace +/// `texture()` calls on that sampler with `textureLod(..., 0.0)`, +/// which is incorrect for any normalized sampler. The shadertoy +/// shader convention uses `iChannel0..3`, so the conflict is unlikely +/// in practice. const unnormalized_sampler_names = [_][]const u8{ "atlas_grayscale", "atlas_color", @@ -416,7 +429,10 @@ fn nextSamplerIsUnnormalized(src: []const u8, i: usize) bool { } fn isHorizSpace(c: u8) bool { - return c == ' ' or c == '\t'; + // GLSL preprocessor whitespace per the spec: space, tab, + // vertical tab, form feed. Newline + carriage return are + // separately handled as line terminators. + return c == ' ' or c == '\t' or c == 0x0B or c == 0x0C; } fn isAnySpace(c: u8) bool { @@ -563,6 +579,24 @@ pub const Module = struct { spirv: []const u32, stage: Stage, ) Error!Module { + // Sanity-check the SPIR-V before handing it to the driver. + // The glslang and shadertoy paths both validate already, but + // a future caller wiring up the build-time-blob path could + // accidentally pass an empty or non-SPIR-V buffer; failing + // here with a clear error beats a `vkCreateShaderModule` + // segfault inside the loader. + if (spirv.len == 0) { + log.err("Module.initFromSpirv: zero-length SPIR-V buffer", .{}); + return error.VulkanFailed; + } + if (spirv[0] != 0x07230203) { + log.err( + "Module.initFromSpirv: missing magic word " ++ + "(got 0x{x:0>8}, expected 0x07230203)", + .{spirv[0]}, + ); + return error.VulkanFailed; + } const info: vk.VkShaderModuleCreateInfo = .{ .sType = vk.VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO, .pNext = null,