diff --git a/src/renderer/generic.zig b/src/renderer/generic.zig index 09963be07..cc0f3b303 100644 --- a/src/renderer/generic.zig +++ b/src/renderer/generic.zig @@ -1455,15 +1455,6 @@ pub fn Renderer(comptime GraphicsAPI: type) type { self: *Self, sync: bool, ) !void { - // const start = std.time.Instant.now() catch unreachable; - // const start_micro = std.time.microTimestamp(); - // defer { - // const end = std.time.Instant.now() catch unreachable; - // log.warn( - // "[drawFrame time] start_micro={} duration={}ns", - // .{ start_micro, end.since(start) / std.time.ns_per_us }, - // ); - // } // We hold a the draw mutex to prevent changes to any // data we access while we're in the middle of drawing. diff --git a/src/renderer/vulkan/Frame.zig b/src/renderer/vulkan/Frame.zig index 87a92ab6d..a3a32ec44 100644 --- a/src/renderer/vulkan/Frame.zig +++ b/src/renderer/vulkan/Frame.zig @@ -113,11 +113,7 @@ pub fn complete(self: *const Self, sync: bool) void { const dev = self.device; // Copy the just-rendered OPTIMAL-tiled image into the - // dmabuf-exported LINEAR pixel buffer. NVIDIA (and most - // discrete GPUs) refuse `FORMAT_FEATURE_COLOR_ATTACHMENT_BIT` - // on linear-tiled images, so the renderer draws into an - // OPTIMAL image and a transfer copy bridges to the dmabuf - // consumer. See `Target.zig` for the full rationale. + // dmabuf-exported LINEAR pixel buffer. See `Target.zig` for why. self.target.recordCopyToDmabuf(self.cb); { @@ -165,13 +161,9 @@ pub fn complete(self: *const Self, sync: bool) void { } } - // Hand the rendered target off to the host. This mirrors what - // `opengl/Frame.zig`'s `complete` does at the same point: it - // calls `self.renderer.api.present(self.target.*)`. Our analog - // is `Target.present()`, which routes through the platform's - // `present` callback (the apprt-side dmabuf consumer). Also - // stash on the renderer's `last_target` for `presentLastTarget` - // re-presents on no-op frames. + // Hand the rendered target off to the host via `Vulkan.present`, + // which both calls the platform's present callback AND records + // the target pointer for `presentLastTarget` no-op republishes. self.renderer.api.present(self.target) catch |err| { log.err("present failed: {}", .{err}); }; diff --git a/src/renderer/vulkan/Target.zig b/src/renderer/vulkan/Target.zig index beb8d9c88..a1417b117 100644 --- a/src/renderer/vulkan/Target.zig +++ b/src/renderer/vulkan/Target.zig @@ -213,20 +213,31 @@ pub fn init(opts: Options) Error!Self { var buf_reqs: vk.VkMemoryRequirements = undefined; dev.dispatch.getBufferMemoryRequirements(dev.device, dmabuf_buffer, &buf_reqs); - // Must be HOST_VISIBLE | HOST_COHERENT so the dmabuf fd is - // mmap-able from userspace. NVIDIA's dmabuf-exportable memory - // includes a host-visible type alongside the device-local ones; - // we explicitly request both flags so we don't accidentally pick - // a VRAM-only type whose mmap returns garbage. - const host_flags = @as(vk.VkMemoryPropertyFlags, vk.VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) | + // Prefer HOST_CACHED so reads from the mmap'd dmabuf are fast. + // Without it (HOST_VISIBLE | HOST_COHERENT only), NVIDIA gives + // back write-combining memory: GPU writes are fast but HOST reads + // crawl (~10 MB/s) because the mapping is uncached. The Qt + // `presentVulkanDmabuf` `QImage::copy()` reads every pixel, so a + // small ~3 MB frame took ~260 ms there. HOST_COHERENT is still + // requested so we don't need explicit flushes between GPU writes + // and host reads; HOST_CACHED on top makes the host reads + // cacheable. + const host_flags_cached = + @as(vk.VkMemoryPropertyFlags, vk.VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) | + vk.VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | + vk.VK_MEMORY_PROPERTY_HOST_CACHED_BIT; + const host_flags_uncached = + @as(vk.VkMemoryPropertyFlags, vk.VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) | vk.VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; - const dmabuf_mem_idx = dev.findMemoryType(buf_reqs.memoryTypeBits, host_flags) orelse { - log.err( - "no HOST_VISIBLE | HOST_COHERENT memory type for dmabuf (typeBits=0x{x})", - .{buf_reqs.memoryTypeBits}, - ); - return error.NoSuitableMemoryType; - }; + const dmabuf_mem_idx = dev.findMemoryType(buf_reqs.memoryTypeBits, host_flags_cached) orelse + dev.findMemoryType(buf_reqs.memoryTypeBits, host_flags_uncached) orelse + { + log.err( + "no HOST_VISIBLE memory type for dmabuf (typeBits=0x{x})", + .{buf_reqs.memoryTypeBits}, + ); + return error.NoSuitableMemoryType; + }; const export_info: vk.VkExportMemoryAllocateInfo = .{ .sType = vk.VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO, .pNext = null,