From 07b27921d4b411d146fc3c5b794d03aa9f7ee6bf Mon Sep 17 00:00:00 2001 From: Nathan Date: Sun, 24 May 2026 21:50:23 -0500 Subject: [PATCH] renderer/vulkan: probe LINEAR modifier, skip copy where supported MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Target probes VK_EXT_image_drm_format_modifier for DRM_FORMAT_MOD_LINEAR with COLOR_ATTACHMENT|TRANSFER_SRC|SAMPLED support. When the driver advertises it (AMD/Intel typically), the render image is allocated directly via VkImageDrmFormatModifierExplicitCreateInfoEXT and its own memory is exported as the dmabuf — no second VkBuffer, no end-of-frame vkCmdCopyImageToBuffer. When not (NVIDIA RTX 2080 / Vulkan 1.4.329 tested), falls back to the existing OPTIMAL+LINEAR-buffer copy path with a one-line warn. Logged the chosen mode at info on each Target init. Renamed Target.recordCopyToDmabuf → recordPresentBarrier; it dispatches on Target.tiling. Direct mode emits just a COLOR_ATTACHMENT_WRITE → HOST_READ memory barrier (image stays GENERAL — same memory backs both render target and host-mapped surface). Legacy mode is unchanged. Adds VK_EXT_image_drm_format_modifier to REQUIRED_DEVICE_EXTENSIONS and vkGetPhysicalDeviceFormatProperties2 to the dispatch table. C ABI is unchanged — ghostty_platform_vulkan_s.present already passes drm_modifier+stride; we just start populating with real probed values. Co-Authored-By: claude-flow --- src/renderer/vulkan/Device.zig | 25 +- src/renderer/vulkan/Frame.zig | 7 +- src/renderer/vulkan/RenderPass.zig | 5 +- src/renderer/vulkan/Target.zig | 509 ++++++++++++++++++++++++----- 4 files changed, 452 insertions(+), 94 deletions(-) diff --git a/src/renderer/vulkan/Device.zig b/src/renderer/vulkan/Device.zig index 20c6289dd..c857d0761 100644 --- a/src/renderer/vulkan/Device.zig +++ b/src/renderer/vulkan/Device.zig @@ -48,14 +48,19 @@ pub const MIN_API_VERSION = vk.VK_API_VERSION_1_3; /// VkDevice setup. The host must have created its VkDevice with /// these enabled; we only verify availability here. /// -/// Note: `VK_EXT_image_drm_format_modifier` is intentionally NOT -/// required yet — `vulkan/Target.zig` currently uses -/// `VK_IMAGE_TILING_LINEAR` for dmabuf export, which only needs the -/// two extensions below. When the driver-chosen modifier path lands, -/// add the modifier extension back here. +/// `VK_EXT_image_drm_format_modifier` is what lets +/// `vulkan/Target.zig` probe the per-modifier feature set (in +/// particular: does `DRM_FORMAT_MOD_LINEAR` advertise +/// `COLOR_ATTACHMENT_BIT`?) and, when supported, allocate the render +/// image with `VkImageDrmFormatModifierExplicitCreateInfoEXT` so its +/// memory can be exported as a dmabuf directly — no separate LINEAR +/// `VkBuffer` and no end-of-frame `vkCmdCopyImageToBuffer`. Drivers +/// where the modifier path can't satisfy the requested features fall +/// back to the legacy OPTIMAL-plus-copy path inside `Target`. pub const REQUIRED_DEVICE_EXTENSIONS = [_][:0]const u8{ "VK_KHR_external_memory_fd", "VK_EXT_external_memory_dma_buf", + "VK_EXT_image_drm_format_modifier", }; /// Errors that can come out of `init`. @@ -84,6 +89,13 @@ pub const Dispatch = struct { getPhysicalDeviceProperties: std.meta.Child(vk.PFN_vkGetPhysicalDeviceProperties), getPhysicalDeviceMemoryProperties: std.meta.Child(vk.PFN_vkGetPhysicalDeviceMemoryProperties), getPhysicalDeviceFormatProperties: std.meta.Child(vk.PFN_vkGetPhysicalDeviceFormatProperties), + /// Used by `Target` to chain `VkDrmFormatModifierPropertiesListEXT` + /// and enumerate which DRM modifiers the device exposes for a + /// given format. Vulkan 1.1 promoted `vkGetPhysicalDeviceFormatProperties2` + /// from `VK_KHR_get_physical_device_properties2` into core, so we + /// resolve it under the non-suffixed name — `MIN_API_VERSION` is + /// 1.3 (see line 45), well past the promotion. + getPhysicalDeviceFormatProperties2: std.meta.Child(vk.PFN_vkGetPhysicalDeviceFormatProperties2), enumerateDeviceExtensionProperties: std.meta.Child(vk.PFN_vkEnumerateDeviceExtensionProperties), getDeviceProcAddr: std.meta.Child(vk.PFN_vkGetDeviceProcAddr), @@ -307,6 +319,8 @@ pub fn init( try il.load(vk.PFN_vkGetPhysicalDeviceMemoryProperties, "vkGetPhysicalDeviceMemoryProperties"); const get_physical_device_format_properties = try il.load(vk.PFN_vkGetPhysicalDeviceFormatProperties, "vkGetPhysicalDeviceFormatProperties"); + const get_physical_device_format_properties_2 = + try il.load(vk.PFN_vkGetPhysicalDeviceFormatProperties2, "vkGetPhysicalDeviceFormatProperties2"); const enumerate_device_extension_properties = try il.load(vk.PFN_vkEnumerateDeviceExtensionProperties, "vkEnumerateDeviceExtensionProperties"); const get_device_proc_addr = @@ -499,6 +513,7 @@ pub fn init( .getPhysicalDeviceProperties = get_physical_device_properties, .getPhysicalDeviceMemoryProperties = get_physical_device_memory_properties, .getPhysicalDeviceFormatProperties = get_physical_device_format_properties, + .getPhysicalDeviceFormatProperties2 = get_physical_device_format_properties_2, .enumerateDeviceExtensionProperties = enumerate_device_extension_properties, .getDeviceProcAddr = get_device_proc_addr, .getDeviceQueue = get_device_queue, diff --git a/src/renderer/vulkan/Frame.zig b/src/renderer/vulkan/Frame.zig index e24e77068..b0a758a22 100644 --- a/src/renderer/vulkan/Frame.zig +++ b/src/renderer/vulkan/Frame.zig @@ -112,9 +112,10 @@ pub fn complete(self: *const Self, sync: bool) void { _ = sync; const dev = self.device; - // Copy the just-rendered OPTIMAL-tiled image into the - // dmabuf-exported LINEAR pixel buffer. See `Target.zig` for why. - self.target.recordCopyToDmabuf(self.cb); + // Make the rendered pixels visible to the host's mmap read. In + // `.direct` mode this is just a memory barrier; in `.legacy_copy` + // mode it also runs `vkCmdCopyImageToBuffer`. See `Target.zig`. + self.target.recordPresentBarrier(self.cb); { const r = dev.dispatch.endCommandBuffer(self.cb); diff --git a/src/renderer/vulkan/RenderPass.zig b/src/renderer/vulkan/RenderPass.zig index 73b79b81f..117cdda1a 100644 --- a/src/renderer/vulkan/RenderPass.zig +++ b/src/renderer/vulkan/RenderPass.zig @@ -448,8 +448,9 @@ pub fn complete(self: *const Self) void { // and some drivers can mishandle sampling from an out-of-spec // layout). A `.target` attachment is the dmabuf-backed // `frame.target`; the next op is - // `Target.recordCopyToDmabuf` which transitions from GENERAL - // anyway, so leave it in GENERAL here. + // `Target.recordPresentBarrier` which expects GENERAL on entry + // (it either stays in GENERAL in `.direct` mode or transitions to + // TRANSFER_SRC_OPTIMAL in `.legacy_copy`), so leave it in GENERAL here. const image: vk.VkImage, const new_layout: vk.VkImageLayout, const dst_stage: vk.VkPipelineStageFlags, const dst_access: vk.VkAccessFlags = switch (self.attachments[0].target) { .texture => |t| .{ diff --git a/src/renderer/vulkan/Target.zig b/src/renderer/vulkan/Target.zig index a1417b117..19df63eb4 100644 --- a/src/renderer/vulkan/Target.zig +++ b/src/renderer/vulkan/Target.zig @@ -1,30 +1,43 @@ -//! Render target: an OPTIMAL-tiled `VkImage` (the actual color -//! attachment) plus a dmabuf-exported `VkBuffer` containing the -//! rendered bytes in linear BGRA layout. The buffer's fd is the -//! payload of `ghostty_platform_vulkan_s.present`. +//! Render target: a `VkImage` whose memory is exported as a dmabuf +//! fd so the host (Qt) can present it via +//! `ghostty_platform_vulkan_s.present` without a CPU readback round +//! trip through libghostty. //! -//! Why both an image AND a buffer? +//! Two construction modes, picked at `init` time after probing +//! `VK_EXT_image_drm_format_modifier`: //! -//! NVIDIA (and probably others) do NOT expose -//! `FORMAT_FEATURE_COLOR_ATTACHMENT_BIT` for `linearTilingFeatures`. -//! That means a LINEAR-tiled `VkImage` cannot be used as a color -//! attachment — the driver accepts the image creation and the draw -//! recording, but actually rasterizes nothing. We confirmed this by -//! probing `vkGetPhysicalDeviceFormatProperties` for -//! `VK_FORMAT_B8G8R8A8_UNORM` (linearTilingFeatures=0x1dc03 without -//! the COLOR_ATTACHMENT bit). +//! - `.direct` — the render image itself is allocated with +//! `VkImageDrmFormatModifierExplicitCreateInfoEXT` +//! (`DRM_FORMAT_MOD_LINEAR`, single plane). Its `VkDeviceMemory` +//! is what we `vkGetMemoryFdKHR` and hand to the host. No second +//! allocation, no end-of-frame copy. Used when the driver +//! advertises `COLOR_ATTACHMENT_BIT | TRANSFER_SRC_BIT | +//! SAMPLED_BIT` for the LINEAR modifier in +//! `VkDrmFormatModifierPropertiesEXT.drmFormatModifierTilingFeatures`. //! -//! So the renderer draws into an OPTIMAL-tiled image (the format the -//! GPU is happy to rasterize into), then copies the result into a -//! LINEAR-laid-out exportable `VkBuffer` via `vkCmdCopyImageToBuffer`. -//! The Qt host mmaps the buffer's dmabuf fd and reads BGRA bytes with -//! the stride we report. +//! - `.legacy_copy` — fallback for drivers (notably NVIDIA at time +//! of writing) that don't expose `COLOR_ATTACHMENT_BIT` for +//! LINEAR via either the legacy `vkGetPhysicalDeviceFormatProperties` +//! query or the modifier-extension query. Allocates an OPTIMAL- +//! tiled render image plus a separate dmabuf-exported LINEAR +//! `VkBuffer`, and inserts a `vkCmdCopyImageToBuffer` at the end +//! of each frame. Behavior identical to the pre-modifier-path +//! code. //! -//! Ownership: libghostty owns the image, buffer, all memory, and the -//! dmabuf fd for the lifetime of the `Target`. The fd is passed to -//! the host via `present` as a borrow; the host must `dup()` if it -//! needs to hold it past the call. `deinit` closes the fd and frees -//! all the memory. +//! Why two modes? NVIDIA's `linearTilingFeatures` for BGRA8 doesn't +//! include `COLOR_ATTACHMENT_BIT`, so a LINEAR `VkImage` silently +//! rasterizes nothing (confirmed via +//! `vkGetPhysicalDeviceFormatProperties`: linearTilingFeatures=0x1dc03 +//! for `B8G8R8A8_UNORM`). The modifier-extension query is a separate +//! channel and *may* expose different feature bits per modifier — so +//! we always probe. Where the probe says yes, we drop the redundant +//! buffer + copy; where it says no, we keep working. +//! +//! Ownership: libghostty owns the image, any buffer, all memory, and +//! the dmabuf fd for the lifetime of the `Target`. The fd is passed +//! to the host via `present` as a borrow; the host must `dup()` if +//! it needs to hold it past the call. `deinit` closes the fd and +//! frees all the memory. //! //! Counterpart: `src/renderer/opengl/Target.zig`. @@ -43,6 +56,27 @@ const log = std.log.scoped(.vulkan); /// don't pull in libdrm headers just for a single constant. pub const DRM_FORMAT_MOD_LINEAR: u64 = 0; +/// Upper bound for the number of DRM format modifiers we ever expect +/// a driver to expose for a single format. Real-world drivers expose +/// well under 20 (mostly LINEAR + a handful of vendor tiled variants); +/// 64 gives us comfortable headroom with a ~1.5 KiB stack buffer and +/// avoids allocator threading through the per-surface init path. +const MAX_MODIFIERS: usize = 64; + +/// Which dmabuf-export strategy this `Target` settled on. See the +/// module-level doc comment for the full rationale. +pub const Tiling = enum { + /// Render image's own memory is exported as the dmabuf. Single + /// plane, `DRM_FORMAT_MOD_LINEAR`. No separate buffer, no copy. + direct, + + /// OPTIMAL render image + separate LINEAR `VkBuffer` dmabuf + /// target. End-of-frame `vkCmdCopyImageToBuffer`. Used when + /// neither tiling channel exposes `COLOR_ATTACHMENT_BIT` for + /// LINEAR. + legacy_copy, +}; + pub const Options = struct { device: *const Device, format: vk.VkFormat, @@ -75,14 +109,21 @@ device: *const Device, /// `device.platform`" (the singleton's copy from the first surface). platform: ?apprt.embedded.Platform.Vulkan = null, -// ---- render image (OPTIMAL, internal) ------------------------------- +/// Which present strategy this target uses. Decides whether +/// `recordPresentBarrier` emits a copy. +tiling: Tiling, + +// ---- render image --------------------------------------------------- +// In `.direct` mode this image's memory is the dmabuf; in +// `.legacy_copy` mode it's internal OPTIMAL memory we copy out of. image: vk.VkImage, image_memory: vk.VkDeviceMemory, view: vk.VkImageView, -// ---- dmabuf buffer (LINEAR pixel bytes, exported) ------------------- -dmabuf_buffer: vk.VkBuffer, -dmabuf_memory: vk.VkDeviceMemory, +// ---- dmabuf buffer (legacy mode only) ------------------------------- +// `null` in `.direct` mode — the image's memory is the dmabuf. +dmabuf_buffer: ?vk.VkBuffer, +dmabuf_memory: ?vk.VkDeviceMemory, format: vk.VkFormat, width: u32, @@ -93,15 +134,248 @@ drm_format: u32, drm_modifier: u64, stride: u32, -/// Current layout of the render image. Tracked so `recordCopyToDmabuf` -/// knows what oldLayout to use in its `COLOR_ATTACHMENT → TRANSFER_SRC` -/// barrier. The renderer transitions it elsewhere too (RenderPass). +/// Current layout of the render image. Tracked so +/// `recordPresentBarrier` knows what oldLayout to use in its barrier. +/// The renderer transitions it elsewhere too (RenderPass). layout: vk.VkImageLayout = vk.VK_IMAGE_LAYOUT_UNDEFINED, pub fn init(opts: Options) Error!Self { const dev = opts.device; const drm_format = try vkFormatToDrmFourcc(opts.format); + const required_features: vk.VkFormatFeatureFlags = + @as(vk.VkFormatFeatureFlags, vk.VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT) | + vk.VK_FORMAT_FEATURE_TRANSFER_SRC_BIT | + vk.VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT; + + if (try probeLinearModifierSupported(dev, opts.format, required_features)) { + log.info( + "Target: direct dmabuf export (LINEAR modifier) {}x{}", + .{ opts.width, opts.height }, + ); + return try initDirect(opts, drm_format); + } else { + log.warn( + "Target: LINEAR modifier lacks COLOR_ATTACHMENT support; " ++ + "falling back to OPTIMAL render + LINEAR-buffer copy", + .{}, + ); + return try initLegacyCopy(opts, drm_format); + } +} + +/// Ask the driver, via `VK_EXT_image_drm_format_modifier`'s +/// per-modifier feature list, whether `DRM_FORMAT_MOD_LINEAR` +/// supports the format-feature flags we need to use the image as a +/// color attachment + transfer source + sampled. +fn probeLinearModifierSupported( + dev: *const Device, + format: vk.VkFormat, + required_features: vk.VkFormatFeatureFlags, +) Error!bool { + var mods: [MAX_MODIFIERS]vk.VkDrmFormatModifierPropertiesEXT = undefined; + + // First pass: get count. + var mod_list: vk.VkDrmFormatModifierPropertiesListEXT = .{ + .sType = vk.VK_STRUCTURE_TYPE_DRM_FORMAT_MODIFIER_PROPERTIES_LIST_EXT, + .pNext = null, + .drmFormatModifierCount = 0, + .pDrmFormatModifierProperties = null, + }; + var props2: vk.VkFormatProperties2 = .{ + .sType = vk.VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_2, + .pNext = &mod_list, + .formatProperties = std.mem.zeroes(vk.VkFormatProperties), + }; + dev.dispatch.getPhysicalDeviceFormatProperties2( + dev.physical_device, + format, + &props2, + ); + + if (mod_list.drmFormatModifierCount == 0) return false; + if (mod_list.drmFormatModifierCount > MAX_MODIFIERS) { + // Cap to our stack buffer; we only look for LINEAR (which + // tends to be first or close to it), so a truncation here is + // very unlikely to hide it. Log if we ever hit this. + log.warn( + "modifier list truncated: driver reports {}, MAX_MODIFIERS={}", + .{ mod_list.drmFormatModifierCount, MAX_MODIFIERS }, + ); + mod_list.drmFormatModifierCount = MAX_MODIFIERS; + } + + // Second pass: fill list. + mod_list.pDrmFormatModifierProperties = &mods[0]; + dev.dispatch.getPhysicalDeviceFormatProperties2( + dev.physical_device, + format, + &props2, + ); + + for (mods[0..mod_list.drmFormatModifierCount]) |m| { + if (m.drmFormatModifier != DRM_FORMAT_MOD_LINEAR) continue; + // Single-plane only — multi-plane modifiers need a wider + // present-callback ABI (one fd/offset/stride per plane). + if (m.drmFormatModifierPlaneCount != 1) continue; + if ((m.drmFormatModifierTilingFeatures & required_features) == required_features) { + return true; + } + } + return false; +} + +/// `.direct` mode: allocate the render image with +/// `VkImageDrmFormatModifierExplicitCreateInfoEXT` and export its own +/// memory as the dmabuf. +fn initDirect(opts: Options, drm_format: u32) Error!Self { + const dev = opts.device; + + const image_usage = @as(vk.VkImageUsageFlags, vk.VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) | + vk.VK_IMAGE_USAGE_SAMPLED_BIT | + vk.VK_IMAGE_USAGE_TRANSFER_SRC_BIT | + opts.extra_usage; + + // BGRA8, single-plane LINEAR — rowPitch is just width * bpp. + const bytes_per_pixel: u32 = 4; + const row_pitch: vk.VkDeviceSize = @as(vk.VkDeviceSize, opts.width) * bytes_per_pixel; + + // ---- 1. Image: LINEAR-modifier, externally-shareable ----------- + const plane_layout: vk.VkSubresourceLayout = .{ + .offset = 0, + .size = 0, // ignored for EXPLICIT create-info + .rowPitch = row_pitch, + .arrayPitch = 0, + .depthPitch = 0, + }; + const mod_create: vk.VkImageDrmFormatModifierExplicitCreateInfoEXT = .{ + .sType = vk.VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_EXPLICIT_CREATE_INFO_EXT, + .pNext = null, + .drmFormatModifier = DRM_FORMAT_MOD_LINEAR, + .drmFormatModifierPlaneCount = 1, + .pPlaneLayouts = &plane_layout, + }; + const ext_image_info: vk.VkExternalMemoryImageCreateInfo = .{ + .sType = vk.VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO, + .pNext = &mod_create, + .handleTypes = vk.VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT, + }; + const image_info: vk.VkImageCreateInfo = .{ + .sType = vk.VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, + .pNext = &ext_image_info, + .flags = 0, + .imageType = vk.VK_IMAGE_TYPE_2D, + .format = opts.format, + .extent = .{ .width = opts.width, .height = opts.height, .depth = 1 }, + .mipLevels = 1, + .arrayLayers = 1, + .samples = vk.VK_SAMPLE_COUNT_1_BIT, + .tiling = vk.VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT, + .usage = image_usage, + .sharingMode = vk.VK_SHARING_MODE_EXCLUSIVE, + .queueFamilyIndexCount = 0, + .pQueueFamilyIndices = null, + .initialLayout = vk.VK_IMAGE_LAYOUT_UNDEFINED, + }; + var image: vk.VkImage = undefined; + if (dev.dispatch.createImage(dev.device, &image_info, null, &image) != vk.VK_SUCCESS) { + log.err("vkCreateImage (Target direct) failed", .{}); + return error.VulkanFailed; + } + errdefer dev.dispatch.destroyImage(dev.device, image, null); + + // ---- 2. Image memory: exportable, host-cacheable for Qt mmap --- + var image_reqs: vk.VkMemoryRequirements = undefined; + dev.dispatch.getImageMemoryRequirements(dev.device, image, &image_reqs); + + // HOST_CACHED matters: Qt's `presentVulkanDmabuf` mmaps and reads + // every pixel into a QImage. Without HOST_CACHED, NVIDIA hands + // back write-combining memory and that read crawls (see legacy + // path note for the ~260 ms regression we hit). HOST_COHERENT + // avoids explicit flushes. Fall back to uncached if cached isn't + // available for the memory type bits the image requires. + const host_flags_cached = + @as(vk.VkMemoryPropertyFlags, vk.VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) | + vk.VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | + vk.VK_MEMORY_PROPERTY_HOST_CACHED_BIT; + const host_flags_uncached = + @as(vk.VkMemoryPropertyFlags, vk.VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) | + vk.VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; + const image_mem_idx = dev.findMemoryType(image_reqs.memoryTypeBits, host_flags_cached) orelse + dev.findMemoryType(image_reqs.memoryTypeBits, host_flags_uncached) orelse + { + log.err( + "no HOST_VISIBLE memory type for direct dmabuf image (typeBits=0x{x})", + .{image_reqs.memoryTypeBits}, + ); + return error.NoSuitableMemoryType; + }; + const export_info: vk.VkExportMemoryAllocateInfo = .{ + .sType = vk.VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO, + .pNext = null, + .handleTypes = vk.VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT, + }; + const image_alloc: vk.VkMemoryAllocateInfo = .{ + .sType = vk.VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, + .pNext = &export_info, + .allocationSize = image_reqs.size, + .memoryTypeIndex = image_mem_idx, + }; + var image_memory: vk.VkDeviceMemory = undefined; + if (dev.dispatch.allocateMemory(dev.device, &image_alloc, null, &image_memory) != vk.VK_SUCCESS) { + log.err("vkAllocateMemory (Target direct image) failed", .{}); + return error.VulkanFailed; + } + errdefer dev.dispatch.freeMemory(dev.device, image_memory, null); + if (dev.dispatch.bindImageMemory(dev.device, image, image_memory, 0) != vk.VK_SUCCESS) { + log.err("vkBindImageMemory (Target direct image) failed", .{}); + return error.VulkanFailed; + } + + // ---- 3. View --------------------------------------------------- + const view = try createView(dev, image, opts.format); + errdefer dev.dispatch.destroyImageView(dev.device, view, null); + + // ---- 4. Export memory as dmabuf fd ----------------------------- + const fd = try exportDmabufFd(dev, image_memory); + errdefer std.posix.close(fd); + + // ---- 5. Query the actual plane stride -------------------------- + // We requested rowPitch = width * 4 via EXPLICIT create-info, but + // the driver can technically round up; ask for what we actually got. + var subres: vk.VkImageSubresource = .{ + .aspectMask = vk.VK_IMAGE_ASPECT_MEMORY_PLANE_0_BIT_EXT, + .mipLevel = 0, + .arrayLayer = 0, + }; + var layout: vk.VkSubresourceLayout = undefined; + dev.dispatch.getImageSubresourceLayout(dev.device, image, &subres, &layout); + + return .{ + .device = dev, + .platform = opts.platform, + .tiling = .direct, + .image = image, + .image_memory = image_memory, + .view = view, + .dmabuf_buffer = null, + .dmabuf_memory = null, + .format = opts.format, + .width = opts.width, + .height = opts.height, + .fd = fd, + .drm_format = drm_format, + .drm_modifier = DRM_FORMAT_MOD_LINEAR, + .stride = @intCast(layout.rowPitch), + }; +} + +/// `.legacy_copy` mode: OPTIMAL render image + separate LINEAR +/// dmabuf-exported `VkBuffer`. Behavior identical to the +/// pre-modifier-path code. +fn initLegacyCopy(opts: Options, drm_format: u32) Error!Self { + const dev = opts.device; + // BGRA8 — 4 bytes/pixel, packed (no per-row padding). const bytes_per_pixel: u32 = 4; const stride: u32 = opts.width * bytes_per_pixel; @@ -131,7 +405,7 @@ pub fn init(opts: Options) Error!Self { }; var image: vk.VkImage = undefined; if (dev.dispatch.createImage(dev.device, &image_info, null, &image) != vk.VK_SUCCESS) { - log.err("vkCreateImage (Target render) failed", .{}); + log.err("vkCreateImage (Target legacy render) failed", .{}); return error.VulkanFailed; } errdefer dev.dispatch.destroyImage(dev.device, image, null); @@ -150,42 +424,17 @@ pub fn init(opts: Options) Error!Self { }; var image_memory: vk.VkDeviceMemory = undefined; if (dev.dispatch.allocateMemory(dev.device, &image_alloc, null, &image_memory) != vk.VK_SUCCESS) { - log.err("vkAllocateMemory (Target render image) failed", .{}); + log.err("vkAllocateMemory (Target legacy render image) failed", .{}); return error.VulkanFailed; } errdefer dev.dispatch.freeMemory(dev.device, image_memory, null); if (dev.dispatch.bindImageMemory(dev.device, image, image_memory, 0) != vk.VK_SUCCESS) { - log.err("vkBindImageMemory (Target render image) failed", .{}); + log.err("vkBindImageMemory (Target legacy render image) failed", .{}); return error.VulkanFailed; } - // ---- 2. ImageView on the render image ------------------------- - const view_info: vk.VkImageViewCreateInfo = .{ - .sType = vk.VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, - .pNext = null, - .flags = 0, - .image = image, - .viewType = vk.VK_IMAGE_VIEW_TYPE_2D, - .format = opts.format, - .components = .{ - .r = vk.VK_COMPONENT_SWIZZLE_IDENTITY, - .g = vk.VK_COMPONENT_SWIZZLE_IDENTITY, - .b = vk.VK_COMPONENT_SWIZZLE_IDENTITY, - .a = vk.VK_COMPONENT_SWIZZLE_IDENTITY, - }, - .subresourceRange = .{ - .aspectMask = vk.VK_IMAGE_ASPECT_COLOR_BIT, - .baseMipLevel = 0, - .levelCount = 1, - .baseArrayLayer = 0, - .layerCount = 1, - }, - }; - var view: vk.VkImageView = undefined; - if (dev.dispatch.createImageView(dev.device, &view_info, null, &view) != vk.VK_SUCCESS) { - log.err("vkCreateImageView (Target) failed", .{}); - return error.VulkanFailed; - } + // ---- 2. View --------------------------------------------------- + const view = try createView(dev, image, opts.format); errdefer dev.dispatch.destroyImageView(dev.device, view, null); // ---- 3. Dmabuf buffer: LINEAR pixel data, external memory ----- @@ -260,22 +509,13 @@ pub fn init(opts: Options) Error!Self { return error.VulkanFailed; } - const fd_info: vk.VkMemoryGetFdInfoKHR = .{ - .sType = vk.VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR, - .pNext = null, - .memory = dmabuf_memory, - .handleType = vk.VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT, - }; - var fd: c_int = -1; - if (dev.dispatch.getMemoryFdKHR(dev.device, &fd_info, &fd) != vk.VK_SUCCESS or fd < 0) { - log.err("vkGetMemoryFdKHR (Target dmabuf) failed: fd={}", .{fd}); - return error.VulkanFailed; - } + const fd = try exportDmabufFd(dev, dmabuf_memory); errdefer std.posix.close(fd); return .{ .device = dev, .platform = opts.platform, + .tiling = .legacy_copy, .image = image, .image_memory = image_memory, .view = view, @@ -291,27 +531,128 @@ pub fn init(opts: Options) Error!Self { }; } +fn createView( + dev: *const Device, + image: vk.VkImage, + format: vk.VkFormat, +) Error!vk.VkImageView { + const view_info: vk.VkImageViewCreateInfo = .{ + .sType = vk.VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .pNext = null, + .flags = 0, + .image = image, + .viewType = vk.VK_IMAGE_VIEW_TYPE_2D, + .format = format, + .components = .{ + .r = vk.VK_COMPONENT_SWIZZLE_IDENTITY, + .g = vk.VK_COMPONENT_SWIZZLE_IDENTITY, + .b = vk.VK_COMPONENT_SWIZZLE_IDENTITY, + .a = vk.VK_COMPONENT_SWIZZLE_IDENTITY, + }, + .subresourceRange = .{ + .aspectMask = vk.VK_IMAGE_ASPECT_COLOR_BIT, + .baseMipLevel = 0, + .levelCount = 1, + .baseArrayLayer = 0, + .layerCount = 1, + }, + }; + var view: vk.VkImageView = undefined; + if (dev.dispatch.createImageView(dev.device, &view_info, null, &view) != vk.VK_SUCCESS) { + log.err("vkCreateImageView (Target) failed", .{}); + return error.VulkanFailed; + } + return view; +} + +fn exportDmabufFd(dev: *const Device, memory: vk.VkDeviceMemory) Error!i32 { + const fd_info: vk.VkMemoryGetFdInfoKHR = .{ + .sType = vk.VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR, + .pNext = null, + .memory = memory, + .handleType = vk.VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT, + }; + var fd: c_int = -1; + if (dev.dispatch.getMemoryFdKHR(dev.device, &fd_info, &fd) != vk.VK_SUCCESS or fd < 0) { + log.err("vkGetMemoryFdKHR (Target) failed: fd={}", .{fd}); + return error.VulkanFailed; + } + return fd; +} + pub fn deinit(self: *Self) void { const dev = self.device; if (self.fd >= 0) std.posix.close(self.fd); - dev.dispatch.destroyBuffer(dev.device, self.dmabuf_buffer, null); - dev.dispatch.freeMemory(dev.device, self.dmabuf_memory, null); + if (self.dmabuf_buffer) |b| dev.dispatch.destroyBuffer(dev.device, b, null); + if (self.dmabuf_memory) |m| dev.dispatch.freeMemory(dev.device, m, null); dev.dispatch.destroyImageView(dev.device, self.view, null); dev.dispatch.destroyImage(dev.device, self.image, null); dev.dispatch.freeMemory(dev.device, self.image_memory, null); self.* = undefined; } -/// Record the GPU commands that copy the render image into the -/// dmabuf-exported buffer. Call this AFTER all RenderPass work has -/// been recorded but BEFORE `vkEndCommandBuffer`. +/// Record the end-of-frame barrier(s) that make the rendered pixels +/// visible to the host's later mmap read. Dispatches on `self.tiling`: /// -/// Barriers: render image must transition from whatever the -/// RenderPass left it in (`GENERAL` after `RenderPass.complete`) to -/// `TRANSFER_SRC_OPTIMAL`. The dmabuf buffer doesn't have layouts; -/// we just add a memory barrier so the host's later read sees the -/// transferred bytes. -pub fn recordCopyToDmabuf(self: *Self, cb: vk.VkCommandBuffer) void { +/// - `.direct`: just an image layout/memory barrier — the render +/// image's own memory is the dmabuf, so we transition +/// `GENERAL → GENERAL` with `COLOR_ATTACHMENT_WRITE → HOST_READ` +/// visibility (`COLOR_ATTACHMENT_OUTPUT → HOST` stages). The +/// LINEAR-modifier image stays in GENERAL throughout — it's both +/// the render target and the host-mapped surface. +/// +/// - `.legacy_copy`: the original behavior — transition the +/// render image to `TRANSFER_SRC_OPTIMAL`, `vkCmdCopyImageToBuffer` +/// into the dmabuf buffer, buffer-memory barrier for HOST_READ +/// visibility. +/// +/// Call this AFTER all RenderPass work has been recorded but BEFORE +/// `vkEndCommandBuffer`. +pub fn recordPresentBarrier(self: *Self, cb: vk.VkCommandBuffer) void { + switch (self.tiling) { + .direct => self.recordDirectBarrier(cb), + .legacy_copy => self.recordCopyToDmabuf(cb), + } +} + +fn recordDirectBarrier(self: *Self, cb: vk.VkCommandBuffer) void { + const dev = self.device; + + // Image stays in GENERAL — it's the render target AND the + // host-mapped surface. We only need a memory barrier so the host's + // mmap read sees the writes from the COLOR_ATTACHMENT_OUTPUT stage. + const img_barrier: vk.VkImageMemoryBarrier = .{ + .sType = vk.VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, + .pNext = null, + .srcAccessMask = vk.VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, + .dstAccessMask = vk.VK_ACCESS_HOST_READ_BIT, + .oldLayout = vk.VK_IMAGE_LAYOUT_GENERAL, + .newLayout = vk.VK_IMAGE_LAYOUT_GENERAL, + .srcQueueFamilyIndex = vk.VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = vk.VK_QUEUE_FAMILY_IGNORED, + .image = self.image, + .subresourceRange = .{ + .aspectMask = vk.VK_IMAGE_ASPECT_COLOR_BIT, + .baseMipLevel = 0, + .levelCount = 1, + .baseArrayLayer = 0, + .layerCount = 1, + }, + }; + dev.dispatch.cmdPipelineBarrier( + cb, + vk.VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, + vk.VK_PIPELINE_STAGE_HOST_BIT, + 0, + 0, null, + 0, null, + 1, &img_barrier, + ); + + self.layout = vk.VK_IMAGE_LAYOUT_GENERAL; +} + +fn recordCopyToDmabuf(self: *Self, cb: vk.VkCommandBuffer) void { const dev = self.device; // Image: GENERAL → TRANSFER_SRC_OPTIMAL (the RenderPass leaves us @@ -363,7 +704,7 @@ pub fn recordCopyToDmabuf(self: *Self, cb: vk.VkCommandBuffer) void { cb, self.image, vk.VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, - self.dmabuf_buffer, + self.dmabuf_buffer.?, 1, ®ion, ); @@ -380,7 +721,7 @@ pub fn recordCopyToDmabuf(self: *Self, cb: vk.VkCommandBuffer) void { .dstAccessMask = vk.VK_ACCESS_HOST_READ_BIT, .srcQueueFamilyIndex = vk.VK_QUEUE_FAMILY_IGNORED, .dstQueueFamilyIndex = vk.VK_QUEUE_FAMILY_IGNORED, - .buffer = self.dmabuf_buffer, + .buffer = self.dmabuf_buffer.?, .offset = 0, .size = vk.VK_WHOLE_SIZE, };