renderer/vulkan: probe LINEAR modifier, skip copy where supported

Target probes VK_EXT_image_drm_format_modifier for DRM_FORMAT_MOD_LINEAR
with COLOR_ATTACHMENT|TRANSFER_SRC|SAMPLED support. When the driver
advertises it (AMD/Intel typically), the render image is allocated
directly via VkImageDrmFormatModifierExplicitCreateInfoEXT and its own
memory is exported as the dmabuf — no second VkBuffer, no end-of-frame
vkCmdCopyImageToBuffer. When not (NVIDIA RTX 2080 / Vulkan 1.4.329
tested), falls back to the existing OPTIMAL+LINEAR-buffer copy path
with a one-line warn.

Logged the chosen mode at info on each Target init.

Renamed Target.recordCopyToDmabuf → recordPresentBarrier; it dispatches
on Target.tiling. Direct mode emits just a COLOR_ATTACHMENT_WRITE →
HOST_READ memory barrier (image stays GENERAL — same memory backs both
render target and host-mapped surface). Legacy mode is unchanged.

Adds VK_EXT_image_drm_format_modifier to REQUIRED_DEVICE_EXTENSIONS and
vkGetPhysicalDeviceFormatProperties2 to the dispatch table.

C ABI is unchanged — ghostty_platform_vulkan_s.present already passes
drm_modifier+stride; we just start populating with real probed values.

Co-Authored-By: claude-flow <ruv@ruv.net>
pull/12846/head
Nathan 2026-05-24 21:50:23 -05:00
parent 6ba3d06b92
commit 07b27921d4
4 changed files with 452 additions and 94 deletions

View File

@ -48,14 +48,19 @@ pub const MIN_API_VERSION = vk.VK_API_VERSION_1_3;
/// VkDevice setup. The host must have created its VkDevice with
/// these enabled; we only verify availability here.
///
/// Note: `VK_EXT_image_drm_format_modifier` is intentionally NOT
/// required yet `vulkan/Target.zig` currently uses
/// `VK_IMAGE_TILING_LINEAR` for dmabuf export, which only needs the
/// two extensions below. When the driver-chosen modifier path lands,
/// add the modifier extension back here.
/// `VK_EXT_image_drm_format_modifier` is what lets
/// `vulkan/Target.zig` probe the per-modifier feature set (in
/// particular: does `DRM_FORMAT_MOD_LINEAR` advertise
/// `COLOR_ATTACHMENT_BIT`?) and, when supported, allocate the render
/// image with `VkImageDrmFormatModifierExplicitCreateInfoEXT` so its
/// memory can be exported as a dmabuf directly no separate LINEAR
/// `VkBuffer` and no end-of-frame `vkCmdCopyImageToBuffer`. Drivers
/// where the modifier path can't satisfy the requested features fall
/// back to the legacy OPTIMAL-plus-copy path inside `Target`.
pub const REQUIRED_DEVICE_EXTENSIONS = [_][:0]const u8{
"VK_KHR_external_memory_fd",
"VK_EXT_external_memory_dma_buf",
"VK_EXT_image_drm_format_modifier",
};
/// Errors that can come out of `init`.
@ -84,6 +89,13 @@ pub const Dispatch = struct {
getPhysicalDeviceProperties: std.meta.Child(vk.PFN_vkGetPhysicalDeviceProperties),
getPhysicalDeviceMemoryProperties: std.meta.Child(vk.PFN_vkGetPhysicalDeviceMemoryProperties),
getPhysicalDeviceFormatProperties: std.meta.Child(vk.PFN_vkGetPhysicalDeviceFormatProperties),
/// Used by `Target` to chain `VkDrmFormatModifierPropertiesListEXT`
/// and enumerate which DRM modifiers the device exposes for a
/// given format. Vulkan 1.1 promoted `vkGetPhysicalDeviceFormatProperties2`
/// from `VK_KHR_get_physical_device_properties2` into core, so we
/// resolve it under the non-suffixed name `MIN_API_VERSION` is
/// 1.3 (see line 45), well past the promotion.
getPhysicalDeviceFormatProperties2: std.meta.Child(vk.PFN_vkGetPhysicalDeviceFormatProperties2),
enumerateDeviceExtensionProperties: std.meta.Child(vk.PFN_vkEnumerateDeviceExtensionProperties),
getDeviceProcAddr: std.meta.Child(vk.PFN_vkGetDeviceProcAddr),
@ -307,6 +319,8 @@ pub fn init(
try il.load(vk.PFN_vkGetPhysicalDeviceMemoryProperties, "vkGetPhysicalDeviceMemoryProperties");
const get_physical_device_format_properties =
try il.load(vk.PFN_vkGetPhysicalDeviceFormatProperties, "vkGetPhysicalDeviceFormatProperties");
const get_physical_device_format_properties_2 =
try il.load(vk.PFN_vkGetPhysicalDeviceFormatProperties2, "vkGetPhysicalDeviceFormatProperties2");
const enumerate_device_extension_properties =
try il.load(vk.PFN_vkEnumerateDeviceExtensionProperties, "vkEnumerateDeviceExtensionProperties");
const get_device_proc_addr =
@ -499,6 +513,7 @@ pub fn init(
.getPhysicalDeviceProperties = get_physical_device_properties,
.getPhysicalDeviceMemoryProperties = get_physical_device_memory_properties,
.getPhysicalDeviceFormatProperties = get_physical_device_format_properties,
.getPhysicalDeviceFormatProperties2 = get_physical_device_format_properties_2,
.enumerateDeviceExtensionProperties = enumerate_device_extension_properties,
.getDeviceProcAddr = get_device_proc_addr,
.getDeviceQueue = get_device_queue,

View File

@ -112,9 +112,10 @@ pub fn complete(self: *const Self, sync: bool) void {
_ = sync;
const dev = self.device;
// Copy the just-rendered OPTIMAL-tiled image into the
// dmabuf-exported LINEAR pixel buffer. See `Target.zig` for why.
self.target.recordCopyToDmabuf(self.cb);
// Make the rendered pixels visible to the host's mmap read. In
// `.direct` mode this is just a memory barrier; in `.legacy_copy`
// mode it also runs `vkCmdCopyImageToBuffer`. See `Target.zig`.
self.target.recordPresentBarrier(self.cb);
{
const r = dev.dispatch.endCommandBuffer(self.cb);

View File

@ -448,8 +448,9 @@ pub fn complete(self: *const Self) void {
// and some drivers can mishandle sampling from an out-of-spec
// layout). A `.target` attachment is the dmabuf-backed
// `frame.target`; the next op is
// `Target.recordCopyToDmabuf` which transitions from GENERAL
// anyway, so leave it in GENERAL here.
// `Target.recordPresentBarrier` which expects GENERAL on entry
// (it either stays in GENERAL in `.direct` mode or transitions to
// TRANSFER_SRC_OPTIMAL in `.legacy_copy`), so leave it in GENERAL here.
const image: vk.VkImage, const new_layout: vk.VkImageLayout, const dst_stage: vk.VkPipelineStageFlags, const dst_access: vk.VkAccessFlags =
switch (self.attachments[0].target) {
.texture => |t| .{

View File

@ -1,30 +1,43 @@
//! Render target: an OPTIMAL-tiled `VkImage` (the actual color
//! attachment) plus a dmabuf-exported `VkBuffer` containing the
//! rendered bytes in linear BGRA layout. The buffer's fd is the
//! payload of `ghostty_platform_vulkan_s.present`.
//! Render target: a `VkImage` whose memory is exported as a dmabuf
//! fd so the host (Qt) can present it via
//! `ghostty_platform_vulkan_s.present` without a CPU readback round
//! trip through libghostty.
//!
//! Why both an image AND a buffer?
//! Two construction modes, picked at `init` time after probing
//! `VK_EXT_image_drm_format_modifier`:
//!
//! NVIDIA (and probably others) do NOT expose
//! `FORMAT_FEATURE_COLOR_ATTACHMENT_BIT` for `linearTilingFeatures`.
//! That means a LINEAR-tiled `VkImage` cannot be used as a color
//! attachment the driver accepts the image creation and the draw
//! recording, but actually rasterizes nothing. We confirmed this by
//! probing `vkGetPhysicalDeviceFormatProperties` for
//! `VK_FORMAT_B8G8R8A8_UNORM` (linearTilingFeatures=0x1dc03 without
//! the COLOR_ATTACHMENT bit).
//! - `.direct` the render image itself is allocated with
//! `VkImageDrmFormatModifierExplicitCreateInfoEXT`
//! (`DRM_FORMAT_MOD_LINEAR`, single plane). Its `VkDeviceMemory`
//! is what we `vkGetMemoryFdKHR` and hand to the host. No second
//! allocation, no end-of-frame copy. Used when the driver
//! advertises `COLOR_ATTACHMENT_BIT | TRANSFER_SRC_BIT |
//! SAMPLED_BIT` for the LINEAR modifier in
//! `VkDrmFormatModifierPropertiesEXT.drmFormatModifierTilingFeatures`.
//!
//! So the renderer draws into an OPTIMAL-tiled image (the format the
//! GPU is happy to rasterize into), then copies the result into a
//! LINEAR-laid-out exportable `VkBuffer` via `vkCmdCopyImageToBuffer`.
//! The Qt host mmaps the buffer's dmabuf fd and reads BGRA bytes with
//! the stride we report.
//! - `.legacy_copy` fallback for drivers (notably NVIDIA at time
//! of writing) that don't expose `COLOR_ATTACHMENT_BIT` for
//! LINEAR via either the legacy `vkGetPhysicalDeviceFormatProperties`
//! query or the modifier-extension query. Allocates an OPTIMAL-
//! tiled render image plus a separate dmabuf-exported LINEAR
//! `VkBuffer`, and inserts a `vkCmdCopyImageToBuffer` at the end
//! of each frame. Behavior identical to the pre-modifier-path
//! code.
//!
//! Ownership: libghostty owns the image, buffer, all memory, and the
//! dmabuf fd for the lifetime of the `Target`. The fd is passed to
//! the host via `present` as a borrow; the host must `dup()` if it
//! needs to hold it past the call. `deinit` closes the fd and frees
//! all the memory.
//! Why two modes? NVIDIA's `linearTilingFeatures` for BGRA8 doesn't
//! include `COLOR_ATTACHMENT_BIT`, so a LINEAR `VkImage` silently
//! rasterizes nothing (confirmed via
//! `vkGetPhysicalDeviceFormatProperties`: linearTilingFeatures=0x1dc03
//! for `B8G8R8A8_UNORM`). The modifier-extension query is a separate
//! channel and *may* expose different feature bits per modifier so
//! we always probe. Where the probe says yes, we drop the redundant
//! buffer + copy; where it says no, we keep working.
//!
//! Ownership: libghostty owns the image, any buffer, all memory, and
//! the dmabuf fd for the lifetime of the `Target`. The fd is passed
//! to the host via `present` as a borrow; the host must `dup()` if
//! it needs to hold it past the call. `deinit` closes the fd and
//! frees all the memory.
//!
//! Counterpart: `src/renderer/opengl/Target.zig`.
@ -43,6 +56,27 @@ const log = std.log.scoped(.vulkan);
/// don't pull in libdrm headers just for a single constant.
pub const DRM_FORMAT_MOD_LINEAR: u64 = 0;
/// Upper bound for the number of DRM format modifiers we ever expect
/// a driver to expose for a single format. Real-world drivers expose
/// well under 20 (mostly LINEAR + a handful of vendor tiled variants);
/// 64 gives us comfortable headroom with a ~1.5 KiB stack buffer and
/// avoids allocator threading through the per-surface init path.
const MAX_MODIFIERS: usize = 64;
/// Which dmabuf-export strategy this `Target` settled on. See the
/// module-level doc comment for the full rationale.
pub const Tiling = enum {
/// Render image's own memory is exported as the dmabuf. Single
/// plane, `DRM_FORMAT_MOD_LINEAR`. No separate buffer, no copy.
direct,
/// OPTIMAL render image + separate LINEAR `VkBuffer` dmabuf
/// target. End-of-frame `vkCmdCopyImageToBuffer`. Used when
/// neither tiling channel exposes `COLOR_ATTACHMENT_BIT` for
/// LINEAR.
legacy_copy,
};
pub const Options = struct {
device: *const Device,
format: vk.VkFormat,
@ -75,14 +109,21 @@ device: *const Device,
/// `device.platform`" (the singleton's copy from the first surface).
platform: ?apprt.embedded.Platform.Vulkan = null,
// ---- render image (OPTIMAL, internal) -------------------------------
/// Which present strategy this target uses. Decides whether
/// `recordPresentBarrier` emits a copy.
tiling: Tiling,
// ---- render image ---------------------------------------------------
// In `.direct` mode this image's memory is the dmabuf; in
// `.legacy_copy` mode it's internal OPTIMAL memory we copy out of.
image: vk.VkImage,
image_memory: vk.VkDeviceMemory,
view: vk.VkImageView,
// ---- dmabuf buffer (LINEAR pixel bytes, exported) -------------------
dmabuf_buffer: vk.VkBuffer,
dmabuf_memory: vk.VkDeviceMemory,
// ---- dmabuf buffer (legacy mode only) -------------------------------
// `null` in `.direct` mode the image's memory is the dmabuf.
dmabuf_buffer: ?vk.VkBuffer,
dmabuf_memory: ?vk.VkDeviceMemory,
format: vk.VkFormat,
width: u32,
@ -93,15 +134,248 @@ drm_format: u32,
drm_modifier: u64,
stride: u32,
/// Current layout of the render image. Tracked so `recordCopyToDmabuf`
/// knows what oldLayout to use in its `COLOR_ATTACHMENT TRANSFER_SRC`
/// barrier. The renderer transitions it elsewhere too (RenderPass).
/// Current layout of the render image. Tracked so
/// `recordPresentBarrier` knows what oldLayout to use in its barrier.
/// The renderer transitions it elsewhere too (RenderPass).
layout: vk.VkImageLayout = vk.VK_IMAGE_LAYOUT_UNDEFINED,
pub fn init(opts: Options) Error!Self {
const dev = opts.device;
const drm_format = try vkFormatToDrmFourcc(opts.format);
const required_features: vk.VkFormatFeatureFlags =
@as(vk.VkFormatFeatureFlags, vk.VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT) |
vk.VK_FORMAT_FEATURE_TRANSFER_SRC_BIT |
vk.VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT;
if (try probeLinearModifierSupported(dev, opts.format, required_features)) {
log.info(
"Target: direct dmabuf export (LINEAR modifier) {}x{}",
.{ opts.width, opts.height },
);
return try initDirect(opts, drm_format);
} else {
log.warn(
"Target: LINEAR modifier lacks COLOR_ATTACHMENT support; " ++
"falling back to OPTIMAL render + LINEAR-buffer copy",
.{},
);
return try initLegacyCopy(opts, drm_format);
}
}
/// Ask the driver, via `VK_EXT_image_drm_format_modifier`'s
/// per-modifier feature list, whether `DRM_FORMAT_MOD_LINEAR`
/// supports the format-feature flags we need to use the image as a
/// color attachment + transfer source + sampled.
fn probeLinearModifierSupported(
dev: *const Device,
format: vk.VkFormat,
required_features: vk.VkFormatFeatureFlags,
) Error!bool {
var mods: [MAX_MODIFIERS]vk.VkDrmFormatModifierPropertiesEXT = undefined;
// First pass: get count.
var mod_list: vk.VkDrmFormatModifierPropertiesListEXT = .{
.sType = vk.VK_STRUCTURE_TYPE_DRM_FORMAT_MODIFIER_PROPERTIES_LIST_EXT,
.pNext = null,
.drmFormatModifierCount = 0,
.pDrmFormatModifierProperties = null,
};
var props2: vk.VkFormatProperties2 = .{
.sType = vk.VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_2,
.pNext = &mod_list,
.formatProperties = std.mem.zeroes(vk.VkFormatProperties),
};
dev.dispatch.getPhysicalDeviceFormatProperties2(
dev.physical_device,
format,
&props2,
);
if (mod_list.drmFormatModifierCount == 0) return false;
if (mod_list.drmFormatModifierCount > MAX_MODIFIERS) {
// Cap to our stack buffer; we only look for LINEAR (which
// tends to be first or close to it), so a truncation here is
// very unlikely to hide it. Log if we ever hit this.
log.warn(
"modifier list truncated: driver reports {}, MAX_MODIFIERS={}",
.{ mod_list.drmFormatModifierCount, MAX_MODIFIERS },
);
mod_list.drmFormatModifierCount = MAX_MODIFIERS;
}
// Second pass: fill list.
mod_list.pDrmFormatModifierProperties = &mods[0];
dev.dispatch.getPhysicalDeviceFormatProperties2(
dev.physical_device,
format,
&props2,
);
for (mods[0..mod_list.drmFormatModifierCount]) |m| {
if (m.drmFormatModifier != DRM_FORMAT_MOD_LINEAR) continue;
// Single-plane only multi-plane modifiers need a wider
// present-callback ABI (one fd/offset/stride per plane).
if (m.drmFormatModifierPlaneCount != 1) continue;
if ((m.drmFormatModifierTilingFeatures & required_features) == required_features) {
return true;
}
}
return false;
}
/// `.direct` mode: allocate the render image with
/// `VkImageDrmFormatModifierExplicitCreateInfoEXT` and export its own
/// memory as the dmabuf.
fn initDirect(opts: Options, drm_format: u32) Error!Self {
const dev = opts.device;
const image_usage = @as(vk.VkImageUsageFlags, vk.VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) |
vk.VK_IMAGE_USAGE_SAMPLED_BIT |
vk.VK_IMAGE_USAGE_TRANSFER_SRC_BIT |
opts.extra_usage;
// BGRA8, single-plane LINEAR rowPitch is just width * bpp.
const bytes_per_pixel: u32 = 4;
const row_pitch: vk.VkDeviceSize = @as(vk.VkDeviceSize, opts.width) * bytes_per_pixel;
// ---- 1. Image: LINEAR-modifier, externally-shareable -----------
const plane_layout: vk.VkSubresourceLayout = .{
.offset = 0,
.size = 0, // ignored for EXPLICIT create-info
.rowPitch = row_pitch,
.arrayPitch = 0,
.depthPitch = 0,
};
const mod_create: vk.VkImageDrmFormatModifierExplicitCreateInfoEXT = .{
.sType = vk.VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_EXPLICIT_CREATE_INFO_EXT,
.pNext = null,
.drmFormatModifier = DRM_FORMAT_MOD_LINEAR,
.drmFormatModifierPlaneCount = 1,
.pPlaneLayouts = &plane_layout,
};
const ext_image_info: vk.VkExternalMemoryImageCreateInfo = .{
.sType = vk.VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO,
.pNext = &mod_create,
.handleTypes = vk.VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT,
};
const image_info: vk.VkImageCreateInfo = .{
.sType = vk.VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
.pNext = &ext_image_info,
.flags = 0,
.imageType = vk.VK_IMAGE_TYPE_2D,
.format = opts.format,
.extent = .{ .width = opts.width, .height = opts.height, .depth = 1 },
.mipLevels = 1,
.arrayLayers = 1,
.samples = vk.VK_SAMPLE_COUNT_1_BIT,
.tiling = vk.VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT,
.usage = image_usage,
.sharingMode = vk.VK_SHARING_MODE_EXCLUSIVE,
.queueFamilyIndexCount = 0,
.pQueueFamilyIndices = null,
.initialLayout = vk.VK_IMAGE_LAYOUT_UNDEFINED,
};
var image: vk.VkImage = undefined;
if (dev.dispatch.createImage(dev.device, &image_info, null, &image) != vk.VK_SUCCESS) {
log.err("vkCreateImage (Target direct) failed", .{});
return error.VulkanFailed;
}
errdefer dev.dispatch.destroyImage(dev.device, image, null);
// ---- 2. Image memory: exportable, host-cacheable for Qt mmap ---
var image_reqs: vk.VkMemoryRequirements = undefined;
dev.dispatch.getImageMemoryRequirements(dev.device, image, &image_reqs);
// HOST_CACHED matters: Qt's `presentVulkanDmabuf` mmaps and reads
// every pixel into a QImage. Without HOST_CACHED, NVIDIA hands
// back write-combining memory and that read crawls (see legacy
// path note for the ~260 ms regression we hit). HOST_COHERENT
// avoids explicit flushes. Fall back to uncached if cached isn't
// available for the memory type bits the image requires.
const host_flags_cached =
@as(vk.VkMemoryPropertyFlags, vk.VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) |
vk.VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
vk.VK_MEMORY_PROPERTY_HOST_CACHED_BIT;
const host_flags_uncached =
@as(vk.VkMemoryPropertyFlags, vk.VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) |
vk.VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
const image_mem_idx = dev.findMemoryType(image_reqs.memoryTypeBits, host_flags_cached) orelse
dev.findMemoryType(image_reqs.memoryTypeBits, host_flags_uncached) orelse
{
log.err(
"no HOST_VISIBLE memory type for direct dmabuf image (typeBits=0x{x})",
.{image_reqs.memoryTypeBits},
);
return error.NoSuitableMemoryType;
};
const export_info: vk.VkExportMemoryAllocateInfo = .{
.sType = vk.VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO,
.pNext = null,
.handleTypes = vk.VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT,
};
const image_alloc: vk.VkMemoryAllocateInfo = .{
.sType = vk.VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
.pNext = &export_info,
.allocationSize = image_reqs.size,
.memoryTypeIndex = image_mem_idx,
};
var image_memory: vk.VkDeviceMemory = undefined;
if (dev.dispatch.allocateMemory(dev.device, &image_alloc, null, &image_memory) != vk.VK_SUCCESS) {
log.err("vkAllocateMemory (Target direct image) failed", .{});
return error.VulkanFailed;
}
errdefer dev.dispatch.freeMemory(dev.device, image_memory, null);
if (dev.dispatch.bindImageMemory(dev.device, image, image_memory, 0) != vk.VK_SUCCESS) {
log.err("vkBindImageMemory (Target direct image) failed", .{});
return error.VulkanFailed;
}
// ---- 3. View ---------------------------------------------------
const view = try createView(dev, image, opts.format);
errdefer dev.dispatch.destroyImageView(dev.device, view, null);
// ---- 4. Export memory as dmabuf fd -----------------------------
const fd = try exportDmabufFd(dev, image_memory);
errdefer std.posix.close(fd);
// ---- 5. Query the actual plane stride --------------------------
// We requested rowPitch = width * 4 via EXPLICIT create-info, but
// the driver can technically round up; ask for what we actually got.
var subres: vk.VkImageSubresource = .{
.aspectMask = vk.VK_IMAGE_ASPECT_MEMORY_PLANE_0_BIT_EXT,
.mipLevel = 0,
.arrayLayer = 0,
};
var layout: vk.VkSubresourceLayout = undefined;
dev.dispatch.getImageSubresourceLayout(dev.device, image, &subres, &layout);
return .{
.device = dev,
.platform = opts.platform,
.tiling = .direct,
.image = image,
.image_memory = image_memory,
.view = view,
.dmabuf_buffer = null,
.dmabuf_memory = null,
.format = opts.format,
.width = opts.width,
.height = opts.height,
.fd = fd,
.drm_format = drm_format,
.drm_modifier = DRM_FORMAT_MOD_LINEAR,
.stride = @intCast(layout.rowPitch),
};
}
/// `.legacy_copy` mode: OPTIMAL render image + separate LINEAR
/// dmabuf-exported `VkBuffer`. Behavior identical to the
/// pre-modifier-path code.
fn initLegacyCopy(opts: Options, drm_format: u32) Error!Self {
const dev = opts.device;
// BGRA8 4 bytes/pixel, packed (no per-row padding).
const bytes_per_pixel: u32 = 4;
const stride: u32 = opts.width * bytes_per_pixel;
@ -131,7 +405,7 @@ pub fn init(opts: Options) Error!Self {
};
var image: vk.VkImage = undefined;
if (dev.dispatch.createImage(dev.device, &image_info, null, &image) != vk.VK_SUCCESS) {
log.err("vkCreateImage (Target render) failed", .{});
log.err("vkCreateImage (Target legacy render) failed", .{});
return error.VulkanFailed;
}
errdefer dev.dispatch.destroyImage(dev.device, image, null);
@ -150,42 +424,17 @@ pub fn init(opts: Options) Error!Self {
};
var image_memory: vk.VkDeviceMemory = undefined;
if (dev.dispatch.allocateMemory(dev.device, &image_alloc, null, &image_memory) != vk.VK_SUCCESS) {
log.err("vkAllocateMemory (Target render image) failed", .{});
log.err("vkAllocateMemory (Target legacy render image) failed", .{});
return error.VulkanFailed;
}
errdefer dev.dispatch.freeMemory(dev.device, image_memory, null);
if (dev.dispatch.bindImageMemory(dev.device, image, image_memory, 0) != vk.VK_SUCCESS) {
log.err("vkBindImageMemory (Target render image) failed", .{});
log.err("vkBindImageMemory (Target legacy render image) failed", .{});
return error.VulkanFailed;
}
// ---- 2. ImageView on the render image -------------------------
const view_info: vk.VkImageViewCreateInfo = .{
.sType = vk.VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
.pNext = null,
.flags = 0,
.image = image,
.viewType = vk.VK_IMAGE_VIEW_TYPE_2D,
.format = opts.format,
.components = .{
.r = vk.VK_COMPONENT_SWIZZLE_IDENTITY,
.g = vk.VK_COMPONENT_SWIZZLE_IDENTITY,
.b = vk.VK_COMPONENT_SWIZZLE_IDENTITY,
.a = vk.VK_COMPONENT_SWIZZLE_IDENTITY,
},
.subresourceRange = .{
.aspectMask = vk.VK_IMAGE_ASPECT_COLOR_BIT,
.baseMipLevel = 0,
.levelCount = 1,
.baseArrayLayer = 0,
.layerCount = 1,
},
};
var view: vk.VkImageView = undefined;
if (dev.dispatch.createImageView(dev.device, &view_info, null, &view) != vk.VK_SUCCESS) {
log.err("vkCreateImageView (Target) failed", .{});
return error.VulkanFailed;
}
// ---- 2. View ---------------------------------------------------
const view = try createView(dev, image, opts.format);
errdefer dev.dispatch.destroyImageView(dev.device, view, null);
// ---- 3. Dmabuf buffer: LINEAR pixel data, external memory -----
@ -260,22 +509,13 @@ pub fn init(opts: Options) Error!Self {
return error.VulkanFailed;
}
const fd_info: vk.VkMemoryGetFdInfoKHR = .{
.sType = vk.VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR,
.pNext = null,
.memory = dmabuf_memory,
.handleType = vk.VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT,
};
var fd: c_int = -1;
if (dev.dispatch.getMemoryFdKHR(dev.device, &fd_info, &fd) != vk.VK_SUCCESS or fd < 0) {
log.err("vkGetMemoryFdKHR (Target dmabuf) failed: fd={}", .{fd});
return error.VulkanFailed;
}
const fd = try exportDmabufFd(dev, dmabuf_memory);
errdefer std.posix.close(fd);
return .{
.device = dev,
.platform = opts.platform,
.tiling = .legacy_copy,
.image = image,
.image_memory = image_memory,
.view = view,
@ -291,27 +531,128 @@ pub fn init(opts: Options) Error!Self {
};
}
fn createView(
dev: *const Device,
image: vk.VkImage,
format: vk.VkFormat,
) Error!vk.VkImageView {
const view_info: vk.VkImageViewCreateInfo = .{
.sType = vk.VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
.pNext = null,
.flags = 0,
.image = image,
.viewType = vk.VK_IMAGE_VIEW_TYPE_2D,
.format = format,
.components = .{
.r = vk.VK_COMPONENT_SWIZZLE_IDENTITY,
.g = vk.VK_COMPONENT_SWIZZLE_IDENTITY,
.b = vk.VK_COMPONENT_SWIZZLE_IDENTITY,
.a = vk.VK_COMPONENT_SWIZZLE_IDENTITY,
},
.subresourceRange = .{
.aspectMask = vk.VK_IMAGE_ASPECT_COLOR_BIT,
.baseMipLevel = 0,
.levelCount = 1,
.baseArrayLayer = 0,
.layerCount = 1,
},
};
var view: vk.VkImageView = undefined;
if (dev.dispatch.createImageView(dev.device, &view_info, null, &view) != vk.VK_SUCCESS) {
log.err("vkCreateImageView (Target) failed", .{});
return error.VulkanFailed;
}
return view;
}
fn exportDmabufFd(dev: *const Device, memory: vk.VkDeviceMemory) Error!i32 {
const fd_info: vk.VkMemoryGetFdInfoKHR = .{
.sType = vk.VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR,
.pNext = null,
.memory = memory,
.handleType = vk.VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT,
};
var fd: c_int = -1;
if (dev.dispatch.getMemoryFdKHR(dev.device, &fd_info, &fd) != vk.VK_SUCCESS or fd < 0) {
log.err("vkGetMemoryFdKHR (Target) failed: fd={}", .{fd});
return error.VulkanFailed;
}
return fd;
}
pub fn deinit(self: *Self) void {
const dev = self.device;
if (self.fd >= 0) std.posix.close(self.fd);
dev.dispatch.destroyBuffer(dev.device, self.dmabuf_buffer, null);
dev.dispatch.freeMemory(dev.device, self.dmabuf_memory, null);
if (self.dmabuf_buffer) |b| dev.dispatch.destroyBuffer(dev.device, b, null);
if (self.dmabuf_memory) |m| dev.dispatch.freeMemory(dev.device, m, null);
dev.dispatch.destroyImageView(dev.device, self.view, null);
dev.dispatch.destroyImage(dev.device, self.image, null);
dev.dispatch.freeMemory(dev.device, self.image_memory, null);
self.* = undefined;
}
/// Record the GPU commands that copy the render image into the
/// dmabuf-exported buffer. Call this AFTER all RenderPass work has
/// been recorded but BEFORE `vkEndCommandBuffer`.
/// Record the end-of-frame barrier(s) that make the rendered pixels
/// visible to the host's later mmap read. Dispatches on `self.tiling`:
///
/// Barriers: render image must transition from whatever the
/// RenderPass left it in (`GENERAL` after `RenderPass.complete`) to
/// `TRANSFER_SRC_OPTIMAL`. The dmabuf buffer doesn't have layouts;
/// we just add a memory barrier so the host's later read sees the
/// transferred bytes.
pub fn recordCopyToDmabuf(self: *Self, cb: vk.VkCommandBuffer) void {
/// - `.direct`: just an image layout/memory barrier the render
/// image's own memory is the dmabuf, so we transition
/// `GENERAL GENERAL` with `COLOR_ATTACHMENT_WRITE HOST_READ`
/// visibility (`COLOR_ATTACHMENT_OUTPUT HOST` stages). The
/// LINEAR-modifier image stays in GENERAL throughout it's both
/// the render target and the host-mapped surface.
///
/// - `.legacy_copy`: the original behavior transition the
/// render image to `TRANSFER_SRC_OPTIMAL`, `vkCmdCopyImageToBuffer`
/// into the dmabuf buffer, buffer-memory barrier for HOST_READ
/// visibility.
///
/// Call this AFTER all RenderPass work has been recorded but BEFORE
/// `vkEndCommandBuffer`.
pub fn recordPresentBarrier(self: *Self, cb: vk.VkCommandBuffer) void {
switch (self.tiling) {
.direct => self.recordDirectBarrier(cb),
.legacy_copy => self.recordCopyToDmabuf(cb),
}
}
fn recordDirectBarrier(self: *Self, cb: vk.VkCommandBuffer) void {
const dev = self.device;
// Image stays in GENERAL it's the render target AND the
// host-mapped surface. We only need a memory barrier so the host's
// mmap read sees the writes from the COLOR_ATTACHMENT_OUTPUT stage.
const img_barrier: vk.VkImageMemoryBarrier = .{
.sType = vk.VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
.pNext = null,
.srcAccessMask = vk.VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,
.dstAccessMask = vk.VK_ACCESS_HOST_READ_BIT,
.oldLayout = vk.VK_IMAGE_LAYOUT_GENERAL,
.newLayout = vk.VK_IMAGE_LAYOUT_GENERAL,
.srcQueueFamilyIndex = vk.VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = vk.VK_QUEUE_FAMILY_IGNORED,
.image = self.image,
.subresourceRange = .{
.aspectMask = vk.VK_IMAGE_ASPECT_COLOR_BIT,
.baseMipLevel = 0,
.levelCount = 1,
.baseArrayLayer = 0,
.layerCount = 1,
},
};
dev.dispatch.cmdPipelineBarrier(
cb,
vk.VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
vk.VK_PIPELINE_STAGE_HOST_BIT,
0,
0, null,
0, null,
1, &img_barrier,
);
self.layout = vk.VK_IMAGE_LAYOUT_GENERAL;
}
fn recordCopyToDmabuf(self: *Self, cb: vk.VkCommandBuffer) void {
const dev = self.device;
// Image: GENERAL TRANSFER_SRC_OPTIMAL (the RenderPass leaves us
@ -363,7 +704,7 @@ pub fn recordCopyToDmabuf(self: *Self, cb: vk.VkCommandBuffer) void {
cb,
self.image,
vk.VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
self.dmabuf_buffer,
self.dmabuf_buffer.?,
1,
&region,
);
@ -380,7 +721,7 @@ pub fn recordCopyToDmabuf(self: *Self, cb: vk.VkCommandBuffer) void {
.dstAccessMask = vk.VK_ACCESS_HOST_READ_BIT,
.srcQueueFamilyIndex = vk.VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = vk.VK_QUEUE_FAMILY_IGNORED,
.buffer = self.dmabuf_buffer,
.buffer = self.dmabuf_buffer.?,
.offset = 0,
.size = vk.VK_WHOLE_SIZE,
};