renderer/vulkan: render → OPTIMAL → cmdCopyImageToBuffer → dmabuf

Visible magenta frame in `ghastty-vulkan` for the first time. The
Vulkan render pipeline now produces pixels that flow end-to-end:
draw → OPTIMAL VkImage → cmdCopyImageToBuffer → dmabuf-exported
VkBuffer → Qt mmap → QImage → paintEvent.

Root cause of the previous transparent output: NVIDIA (and most
discrete GPUs) do NOT expose `FORMAT_FEATURE_COLOR_ATTACHMENT_BIT`
on `linearTilingFeatures`. The old Target rendered directly into a
LINEAR-tiled exportable VkImage, which the driver accepted at
`vkCreateImage` time but silently rasterized nothing into. Confirmed
via `getPhysicalDeviceFormatProperties` —
`linearTilingFeatures=0x1dc03` lacks the COLOR_ATTACHMENT bit
(`optimalTilingFeatures=0x1dd83` has it).

Fix: Target now owns two GPU resources:
- `image` + `image_memory`: OPTIMAL-tiled VkImage, internal-only,
  this is the actual color attachment the renderer draws into.
- `dmabuf_buffer` + `dmabuf_memory`: LINEAR pixel-data VkBuffer,
  HOST_VISIBLE | HOST_COHERENT, dmabuf-exported. The host mmap reads
  this as plain BGRA bytes with stride = width * 4.

`Target.recordCopyToDmabuf(cb)` records the GENERAL → TRANSFER_SRC
image barrier, the `vkCmdCopyImageToBuffer`, and the TRANSFER_WRITE
→ HOST_READ buffer barrier. Frame.complete calls it just before
endCommandBuffer so the host's mmap (post fence-wait) sees the bytes.

Custom shaders: the user's `custom-shader = ...` config is silently
ignored on Vulkan until we build the "post" pipeline that composites
`CustomShaderState.back_texture` into `frame.target`. Vulkan.zig
advertises `supports_custom_shaders = false`; generic.zig honors the
flag and warns once when the config is non-empty. OpenGL keeps it
true. Without this, the first render pass targets `back_texture`
instead of `frame.target`, leaving the dmabuf empty (which was the
symptom we chased for hours before isolating the cause).

Other changes:
- Device.Dispatch adds `getPhysicalDeviceFormatProperties`,
  `cmdFillBuffer`, `cmdClearColorImage`. The first is for the
  format-cap probe; the others were used while debugging and are
  cheap to keep around.
- bg_color fragment + full_screen vertex shaders are currently
  diagnostic hardcoded versions (`bg_color_frag` outputs opaque
  purple, `full_screen_vert` is an inline fullscreen triangle).
  The real include-expanded sources are preserved as
  `bg_color_frag_real` and `full_screen_vert_real`; swap back once
  the Uniforms.bg_color UBO data path is verified.
- `recordCopyToDmabuf` replaces the inline barrier+copy sequence in
  Frame.complete with a single call into the Target.

Co-Authored-By: claude-flow <ruv@ruv.net>
pull/12846/head
Nathan 2026-05-24 14:14:40 -05:00
parent e8ad547dda
commit 98dcdf5307
7 changed files with 348 additions and 176 deletions

View File

@ -27,6 +27,11 @@ pub const custom_shader_target: shadertoy.Target = .glsl;
// The fragCoord for OpenGL shaders is +Y = up.
pub const custom_shader_y_is_down = false;
/// Custom shaders are supported (the renderer ships a working "post"
/// pass that composites `CustomShaderState.back_texture` through the
/// user's shader into `frame.target`).
pub const supports_custom_shaders: bool = true;
/// Because OpenGL's frame completion is always
/// sync, we have no need for multi-buffering.
pub const swap_chain_count = 1;

View File

@ -80,6 +80,16 @@ pub const Buffer = bufferpkg.Buffer;
/// Custom user shaders (`shadertoy.zig`) target GLSL same as OpenGL.
pub const custom_shader_target: shadertoy.Target = .glsl;
/// Custom shaders are not yet supported on the Vulkan backend. The
/// renderer's first pass draws into `CustomShaderState.back_texture`
/// when custom shaders are configured, and a second "post" pass is
/// expected to composite back_texture frame.target through the
/// user's shader. We haven't built that second pass for Vulkan yet,
/// so enabling custom shaders here would leave `frame.target` empty
/// and the window blank. Until the post pipeline lands, the generic
/// renderer skips loading custom shaders for Vulkan and warns once.
pub const supports_custom_shaders: bool = false;
/// Vulkan's clip-space Y axis points down (unlike OpenGL).
pub const custom_shader_y_is_down = true;

View File

@ -838,14 +838,38 @@ pub fn Renderer(comptime GraphicsAPI: type) type {
defer arena.deinit();
const arena_alloc = arena.allocator();
// Load our custom shaders
const custom_shaders: []const [:0]const u8 = shadertoy.loadFromFiles(
arena_alloc,
self.config.custom_shaders,
GraphicsAPI.custom_shader_target,
) catch |err| err: {
log.warn("error loading custom shaders err={}", .{err});
break :err &.{};
// Load our custom shaders.
//
// GraphicsAPI advertises whether it can actually run them
// (`supports_custom_shaders`). The Vulkan backend currently
// can't its post-pass / compositor pipeline that wires
// CustomShaderState.back_texture frame.target through the
// user's shader hasn't been built yet. Loading + flagging
// `has_custom_shaders` anyway would route bg_color into the
// back_texture and leave frame.target blank. Skip the load
// when the backend can't consume the result, and emit a
// one-line warning so the user knows their config item was
// ignored.
const can_use_custom = !@hasDecl(GraphicsAPI, "supports_custom_shaders") or
GraphicsAPI.supports_custom_shaders;
const custom_shaders: []const [:0]const u8 = if (can_use_custom)
(shadertoy.loadFromFiles(
arena_alloc,
self.config.custom_shaders,
GraphicsAPI.custom_shader_target,
) catch |err| err: {
log.warn("error loading custom shaders err={}", .{err});
break :err &.{};
})
else custom: {
if (self.config.custom_shaders.value.items.len > 0) {
log.warn(
"custom-shader config ignored: backend lacks " ++
"post-pipeline support (Vulkan TODO)",
.{},
);
}
break :custom &.{};
};
const has_custom_shaders = custom_shaders.len > 0;

View File

@ -83,6 +83,7 @@ pub const Dispatch = struct {
// ---- instance-level -----------------------------------------
getPhysicalDeviceProperties: std.meta.Child(vk.PFN_vkGetPhysicalDeviceProperties),
getPhysicalDeviceMemoryProperties: std.meta.Child(vk.PFN_vkGetPhysicalDeviceMemoryProperties),
getPhysicalDeviceFormatProperties: std.meta.Child(vk.PFN_vkGetPhysicalDeviceFormatProperties),
enumerateDeviceExtensionProperties: std.meta.Child(vk.PFN_vkEnumerateDeviceExtensionProperties),
getDeviceProcAddr: std.meta.Child(vk.PFN_vkGetDeviceProcAddr),
@ -128,6 +129,8 @@ pub const Dispatch = struct {
queueWaitIdle: std.meta.Child(vk.PFN_vkQueueWaitIdle),
cmdPipelineBarrier: std.meta.Child(vk.PFN_vkCmdPipelineBarrier),
cmdCopyBufferToImage: std.meta.Child(vk.PFN_vkCmdCopyBufferToImage),
cmdFillBuffer: std.meta.Child(vk.PFN_vkCmdFillBuffer),
cmdClearColorImage: std.meta.Child(vk.PFN_vkCmdClearColorImage),
// Shader modules used by `vulkan/shaders.zig`.
createShaderModule: std.meta.Child(vk.PFN_vkCreateShaderModule),
@ -270,6 +273,8 @@ pub fn init(
try il.load(vk.PFN_vkGetPhysicalDeviceProperties, "vkGetPhysicalDeviceProperties");
const get_physical_device_memory_properties =
try il.load(vk.PFN_vkGetPhysicalDeviceMemoryProperties, "vkGetPhysicalDeviceMemoryProperties");
const get_physical_device_format_properties =
try il.load(vk.PFN_vkGetPhysicalDeviceFormatProperties, "vkGetPhysicalDeviceFormatProperties");
const enumerate_device_extension_properties =
try il.load(vk.PFN_vkEnumerateDeviceExtensionProperties, "vkEnumerateDeviceExtensionProperties");
const get_device_proc_addr =
@ -389,6 +394,10 @@ pub fn init(
try dl.load(vk.PFN_vkCmdPipelineBarrier, "vkCmdPipelineBarrier");
const cmd_copy_buffer_to_image =
try dl.load(vk.PFN_vkCmdCopyBufferToImage, "vkCmdCopyBufferToImage");
const cmd_fill_buffer =
try dl.load(vk.PFN_vkCmdFillBuffer, "vkCmdFillBuffer");
const cmd_clear_color_image =
try dl.load(vk.PFN_vkCmdClearColorImage, "vkCmdClearColorImage");
const create_shader_module =
try dl.load(vk.PFN_vkCreateShaderModule, "vkCreateShaderModule");
const destroy_shader_module =
@ -455,6 +464,7 @@ pub fn init(
.dispatch = .{
.getPhysicalDeviceProperties = get_physical_device_properties,
.getPhysicalDeviceMemoryProperties = get_physical_device_memory_properties,
.getPhysicalDeviceFormatProperties = get_physical_device_format_properties,
.enumerateDeviceExtensionProperties = enumerate_device_extension_properties,
.getDeviceProcAddr = get_device_proc_addr,
.getDeviceQueue = get_device_queue,
@ -485,6 +495,8 @@ pub fn init(
.queueWaitIdle = queue_wait_idle,
.cmdPipelineBarrier = cmd_pipeline_barrier,
.cmdCopyBufferToImage = cmd_copy_buffer_to_image,
.cmdFillBuffer = cmd_fill_buffer,
.cmdClearColorImage = cmd_clear_color_image,
.createShaderModule = create_shader_module,
.destroyShaderModule = destroy_shader_module,
.createDescriptorSetLayout = create_descriptor_set_layout,

View File

@ -105,6 +105,14 @@ pub fn complete(self: *const Self, sync: bool) void {
_ = sync;
const dev = self.device;
// Copy the just-rendered OPTIMAL-tiled image into the
// dmabuf-exported LINEAR pixel buffer. NVIDIA (and most
// discrete GPUs) refuse `FORMAT_FEATURE_COLOR_ATTACHMENT_BIT`
// on linear-tiled images, so the renderer draws into an
// OPTIMAL image and a transfer copy bridges to the dmabuf
// consumer. See `Target.zig` for the full rationale.
self.target.recordCopyToDmabuf(self.cb);
{
const r = dev.dispatch.endCommandBuffer(self.cb);
if (r != vk.VK_SUCCESS) {

View File

@ -1,26 +1,30 @@
//! Render target: an exportable `VkImage` backed by linear-tiled,
//! externally-shareable `VkDeviceMemory` whose dmabuf fd is the
//! Render target: an OPTIMAL-tiled `VkImage` (the actual color
//! attachment) plus a dmabuf-exported `VkBuffer` containing the
//! rendered bytes in linear BGRA layout. The buffer's fd is the
//! payload of `ghostty_platform_vulkan_s.present`.
//!
//! This is what makes the whole Vulkan port worthwhile: instead of
//! reading the frame back into a `QImage` like the OpenGL path does,
//! the host (Qt RHI via `QRhiTexture`) imports our memory directly
//! and composites it in-GPU. Zero-copy, no readback.
//! Why both an image AND a buffer?
//!
//! Layout: **linear tiling** for v1. Linear is the safest cross-
//! driver choice for dmabuf consumers every Wayland compositor,
//! every Qt RHI backend, every reader can accept linear without
//! modifier negotiation. The cost is reduced rasterization perf vs
//! `VK_IMAGE_TILING_OPTIMAL`. For a terminal at ~60Hz with a few
//! megapixels of fill, linear is fine. Driver-chosen DRM format
//! modifiers (the "optimal+exportable" path via
//! `VK_EXT_image_drm_format_modifier`) is a contained follow-up.
//! NVIDIA (and probably others) do NOT expose
//! `FORMAT_FEATURE_COLOR_ATTACHMENT_BIT` for `linearTilingFeatures`.
//! That means a LINEAR-tiled `VkImage` cannot be used as a color
//! attachment the driver accepts the image creation and the draw
//! recording, but actually rasterizes nothing. We confirmed this by
//! probing `vkGetPhysicalDeviceFormatProperties` for
//! `VK_FORMAT_B8G8R8A8_UNORM` (linearTilingFeatures=0x1dc03 without
//! the COLOR_ATTACHMENT bit).
//!
//! Ownership: libghostty owns the `VkImage`, `VkDeviceMemory`, and
//! the dmabuf fd for the lifetime of the `Target`. The fd is passed
//! to the host via `present` as a borrow; the host must `dup()` if
//! it needs to hold it past the call. `deinit` closes the fd and
//! frees the memory.
//! So the renderer draws into an OPTIMAL-tiled image (the format the
//! GPU is happy to rasterize into), then copies the result into a
//! LINEAR-laid-out exportable `VkBuffer` via `vkCmdCopyImageToBuffer`.
//! The Qt host mmaps the buffer's dmabuf fd and reads BGRA bytes with
//! the stride we report.
//!
//! Ownership: libghostty owns the image, buffer, all memory, and the
//! dmabuf fd for the lifetime of the `Target`. The fd is passed to
//! the host via `present` as a borrow; the host must `dup()` if it
//! needs to hold it past the call. `deinit` closes the fd and frees
//! all the memory.
//!
//! Counterpart: `src/renderer/opengl/Target.zig`.
@ -40,89 +44,63 @@ pub const DRM_FORMAT_MOD_LINEAR: u64 = 0;
pub const Options = struct {
device: *const Device,
/// Color format. The DRM fourcc the host receives is derived
/// from this see `vkFormatToDrmFourcc` below.
format: vk.VkFormat,
/// Render target dimensions, in pixels.
width: u32,
height: u32,
/// Extra `VkImageUsageFlagBits` beyond the defaults
/// (`COLOR_ATTACHMENT_BIT | SAMPLED_BIT`). Rarely needed; left
/// as an escape hatch for things like a transfer source for
/// debug captures.
/// Extra `VkImageUsageFlagBits` for the render image, beyond the
/// defaults (`COLOR_ATTACHMENT_BIT | SAMPLED_BIT |
/// TRANSFER_SRC_BIT`). Rarely needed.
extra_usage: vk.VkImageUsageFlags = 0,
};
pub const Error = error{
/// A `vkCreate*` / `vkAllocate*` / `vkBind*` / `vkGetMemoryFdKHR`
/// returned a non-success status.
VulkanFailed,
/// `Device.findMemoryType` couldn't find a memory type matching
/// the image's requirements and the export memory flag bit.
NoSuitableMemoryType,
/// The provided `VkFormat` doesn't map to a known DRM fourcc.
/// Currently the renderer only ever uses
/// `VK_FORMAT_B8G8R8A8_UNORM` / `_R8G8B8A8_UNORM` so this is a
/// guard against config drift rather than a real failure mode.
UnsupportedFormat,
};
device: *const Device,
// ---- render image (OPTIMAL, internal) -------------------------------
image: vk.VkImage,
memory: vk.VkDeviceMemory,
image_memory: vk.VkDeviceMemory,
view: vk.VkImageView,
// ---- dmabuf buffer (LINEAR pixel bytes, exported) -------------------
dmabuf_buffer: vk.VkBuffer,
dmabuf_memory: vk.VkDeviceMemory,
format: vk.VkFormat,
width: u32,
height: u32,
/// dmabuf fd. Owned by `Target` until `deinit`; the host must
/// `dup()` if it wants to hold it past a `present` call.
fd: i32,
/// DRM fourcc the host should interpret the dmabuf as. Derived from
/// `format` at construction time so the apprt callback can pass it
/// straight through.
drm_format: u32,
/// DRM modifier. Always `DRM_FORMAT_MOD_LINEAR` for v1.
drm_modifier: u64,
/// Row stride in bytes `vkGetImageSubresourceLayout` tells us the
/// driver's actual rowPitch (which may include alignment padding).
/// The host needs this for the dmabuf import.
stride: u32,
/// Current image layout, mirroring the same field on `Texture`.
/// Starts at `UNDEFINED`; the renderer transitions it as needed
/// across the frame.
/// Current layout of the render image. Tracked so `recordCopyToDmabuf`
/// knows what oldLayout to use in its `COLOR_ATTACHMENT TRANSFER_SRC`
/// barrier. The renderer transitions it elsewhere too (RenderPass).
layout: vk.VkImageLayout = vk.VK_IMAGE_LAYOUT_UNDEFINED,
pub fn init(opts: Options) Error!Self {
const dev = opts.device;
const drm_format = try vkFormatToDrmFourcc(opts.format);
// COLOR_ATTACHMENT we render into this via dynamic rendering.
// SAMPLED the renderer's custom-shader path samples the target.
// TRANSFER_SRC readback for debug / screenshot tooling.
const usage = @as(vk.VkImageUsageFlags, vk.VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) |
// BGRA8 4 bytes/pixel, packed (no per-row padding).
const bytes_per_pixel: u32 = 4;
const stride: u32 = opts.width * bytes_per_pixel;
const buffer_size: vk.VkDeviceSize = @as(vk.VkDeviceSize, stride) * opts.height;
// ---- 1. Render image: OPTIMAL tiling, internal memory ----------
const image_usage = @as(vk.VkImageUsageFlags, vk.VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) |
vk.VK_IMAGE_USAGE_SAMPLED_BIT |
vk.VK_IMAGE_USAGE_TRANSFER_SRC_BIT |
opts.extra_usage;
// ---- 1. VkImage (with external-memory chain) ----------------
const external_memory_image_info: vk.VkExternalMemoryImageCreateInfo = .{
.sType = vk.VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO,
.pNext = null,
.handleTypes = vk.VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT,
};
const image_info: vk.VkImageCreateInfo = .{
.sType = vk.VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
.pNext = &external_memory_image_info,
.pNext = null,
.flags = 0,
.imageType = vk.VK_IMAGE_TYPE_2D,
.format = opts.format,
@ -130,95 +108,44 @@ pub fn init(opts: Options) Error!Self {
.mipLevels = 1,
.arrayLayers = 1,
.samples = vk.VK_SAMPLE_COUNT_1_BIT,
.tiling = vk.VK_IMAGE_TILING_LINEAR,
.usage = usage,
.tiling = vk.VK_IMAGE_TILING_OPTIMAL,
.usage = image_usage,
.sharingMode = vk.VK_SHARING_MODE_EXCLUSIVE,
.queueFamilyIndexCount = 0,
.pQueueFamilyIndices = null,
.initialLayout = vk.VK_IMAGE_LAYOUT_UNDEFINED,
};
var image: vk.VkImage = undefined;
{
const r = dev.dispatch.createImage(dev.device, &image_info, null, &image);
if (r != vk.VK_SUCCESS) {
log.err("vkCreateImage (Target) failed: result={}", .{r});
return error.VulkanFailed;
}
if (dev.dispatch.createImage(dev.device, &image_info, null, &image) != vk.VK_SUCCESS) {
log.err("vkCreateImage (Target render) failed", .{});
return error.VulkanFailed;
}
errdefer dev.dispatch.destroyImage(dev.device, image, null);
// ---- 2. VkDeviceMemory (with export chain) ------------------
var reqs: vk.VkMemoryRequirements = undefined;
dev.dispatch.getImageMemoryRequirements(dev.device, image, &reqs);
// DEVICE_LOCAL is preferred but not required for linear export
// memory some drivers only expose HOST_VISIBLE memory types
// matching the requirements bitmask for linear tiling. We don't
// care which heap as long as it's exportable.
const memory_type_index = dev.findMemoryType(reqs.memoryTypeBits, 0) orelse {
log.err(
"no exportable memory type for Target (typeBits=0x{x})",
.{reqs.memoryTypeBits},
);
return error.NoSuitableMemoryType;
};
const export_info: vk.VkExportMemoryAllocateInfo = .{
.sType = vk.VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO,
.pNext = null,
.handleTypes = vk.VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT,
};
const alloc_info: vk.VkMemoryAllocateInfo = .{
var image_reqs: vk.VkMemoryRequirements = undefined;
dev.dispatch.getImageMemoryRequirements(dev.device, image, &image_reqs);
const image_mem_idx = dev.findMemoryType(
image_reqs.memoryTypeBits,
vk.VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
) orelse return error.NoSuitableMemoryType;
const image_alloc: vk.VkMemoryAllocateInfo = .{
.sType = vk.VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
.pNext = &export_info,
.allocationSize = reqs.size,
.memoryTypeIndex = memory_type_index,
};
var memory: vk.VkDeviceMemory = undefined;
{
const r = dev.dispatch.allocateMemory(dev.device, &alloc_info, null, &memory);
if (r != vk.VK_SUCCESS) {
log.err("vkAllocateMemory (Target) failed: result={}", .{r});
return error.VulkanFailed;
}
}
errdefer dev.dispatch.freeMemory(dev.device, memory, null);
{
const r = dev.dispatch.bindImageMemory(dev.device, image, memory, 0);
if (r != vk.VK_SUCCESS) {
log.err("vkBindImageMemory (Target) failed: result={}", .{r});
return error.VulkanFailed;
}
}
// ---- 3. Export the dmabuf fd --------------------------------
const fd_info: vk.VkMemoryGetFdInfoKHR = .{
.sType = vk.VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR,
.pNext = null,
.memory = memory,
.handleType = vk.VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT,
.allocationSize = image_reqs.size,
.memoryTypeIndex = image_mem_idx,
};
var fd: c_int = -1;
{
const r = dev.dispatch.getMemoryFdKHR(dev.device, &fd_info, &fd);
if (r != vk.VK_SUCCESS or fd < 0) {
log.err("vkGetMemoryFdKHR failed: result={} fd={}", .{ r, fd });
return error.VulkanFailed;
}
var image_memory: vk.VkDeviceMemory = undefined;
if (dev.dispatch.allocateMemory(dev.device, &image_alloc, null, &image_memory) != vk.VK_SUCCESS) {
log.err("vkAllocateMemory (Target render image) failed", .{});
return error.VulkanFailed;
}
errdefer dev.dispatch.freeMemory(dev.device, image_memory, null);
if (dev.dispatch.bindImageMemory(dev.device, image, image_memory, 0) != vk.VK_SUCCESS) {
log.err("vkBindImageMemory (Target render image) failed", .{});
return error.VulkanFailed;
}
errdefer std.posix.close(fd);
// ---- 4. Stride from the driver's subresource layout ---------
const subresource: vk.VkImageSubresource = .{
.aspectMask = vk.VK_IMAGE_ASPECT_COLOR_BIT,
.mipLevel = 0,
.arrayLayer = 0,
};
var sub_layout: vk.VkSubresourceLayout = undefined;
dev.dispatch.getImageSubresourceLayout(dev.device, image, &subresource, &sub_layout);
// ---- 5. VkImageView -----------------------------------------
// ---- 2. ImageView on the render image -------------------------
const view_info: vk.VkImageViewCreateInfo = .{
.sType = vk.VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
.pNext = null,
@ -241,42 +168,212 @@ pub fn init(opts: Options) Error!Self {
},
};
var view: vk.VkImageView = undefined;
{
const r = dev.dispatch.createImageView(dev.device, &view_info, null, &view);
if (r != vk.VK_SUCCESS) {
log.err("vkCreateImageView (Target) failed: result={}", .{r});
return error.VulkanFailed;
}
if (dev.dispatch.createImageView(dev.device, &view_info, null, &view) != vk.VK_SUCCESS) {
log.err("vkCreateImageView (Target) failed", .{});
return error.VulkanFailed;
}
errdefer dev.dispatch.destroyImageView(dev.device, view, null);
// ---- 3. Dmabuf buffer: LINEAR pixel data, external memory -----
const ext_buffer_info: vk.VkExternalMemoryBufferCreateInfo = .{
.sType = vk.VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_BUFFER_CREATE_INFO,
.pNext = null,
.handleTypes = vk.VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT,
};
const buffer_info: vk.VkBufferCreateInfo = .{
.sType = vk.VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
.pNext = &ext_buffer_info,
.flags = 0,
.size = buffer_size,
.usage = vk.VK_BUFFER_USAGE_TRANSFER_DST_BIT,
.sharingMode = vk.VK_SHARING_MODE_EXCLUSIVE,
.queueFamilyIndexCount = 0,
.pQueueFamilyIndices = null,
};
var dmabuf_buffer: vk.VkBuffer = undefined;
if (dev.dispatch.createBuffer(dev.device, &buffer_info, null, &dmabuf_buffer) != vk.VK_SUCCESS) {
log.err("vkCreateBuffer (Target dmabuf) failed", .{});
return error.VulkanFailed;
}
errdefer dev.dispatch.destroyBuffer(dev.device, dmabuf_buffer, null);
var buf_reqs: vk.VkMemoryRequirements = undefined;
dev.dispatch.getBufferMemoryRequirements(dev.device, dmabuf_buffer, &buf_reqs);
// Must be HOST_VISIBLE | HOST_COHERENT so the dmabuf fd is
// mmap-able from userspace. NVIDIA's dmabuf-exportable memory
// includes a host-visible type alongside the device-local ones;
// we explicitly request both flags so we don't accidentally pick
// a VRAM-only type whose mmap returns garbage.
const host_flags = @as(vk.VkMemoryPropertyFlags, vk.VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) |
vk.VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
const dmabuf_mem_idx = dev.findMemoryType(buf_reqs.memoryTypeBits, host_flags) orelse {
log.err(
"no HOST_VISIBLE | HOST_COHERENT memory type for dmabuf (typeBits=0x{x})",
.{buf_reqs.memoryTypeBits},
);
return error.NoSuitableMemoryType;
};
const export_info: vk.VkExportMemoryAllocateInfo = .{
.sType = vk.VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO,
.pNext = null,
.handleTypes = vk.VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT,
};
const buf_alloc: vk.VkMemoryAllocateInfo = .{
.sType = vk.VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
.pNext = &export_info,
.allocationSize = buf_reqs.size,
.memoryTypeIndex = dmabuf_mem_idx,
};
var dmabuf_memory: vk.VkDeviceMemory = undefined;
if (dev.dispatch.allocateMemory(dev.device, &buf_alloc, null, &dmabuf_memory) != vk.VK_SUCCESS) {
log.err("vkAllocateMemory (Target dmabuf) failed", .{});
return error.VulkanFailed;
}
errdefer dev.dispatch.freeMemory(dev.device, dmabuf_memory, null);
if (dev.dispatch.bindBufferMemory(dev.device, dmabuf_buffer, dmabuf_memory, 0) != vk.VK_SUCCESS) {
log.err("vkBindBufferMemory (Target dmabuf) failed", .{});
return error.VulkanFailed;
}
const fd_info: vk.VkMemoryGetFdInfoKHR = .{
.sType = vk.VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR,
.pNext = null,
.memory = dmabuf_memory,
.handleType = vk.VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT,
};
var fd: c_int = -1;
if (dev.dispatch.getMemoryFdKHR(dev.device, &fd_info, &fd) != vk.VK_SUCCESS or fd < 0) {
log.err("vkGetMemoryFdKHR (Target dmabuf) failed: fd={}", .{fd});
return error.VulkanFailed;
}
errdefer std.posix.close(fd);
return .{
.device = dev,
.image = image,
.memory = memory,
.image_memory = image_memory,
.view = view,
.dmabuf_buffer = dmabuf_buffer,
.dmabuf_memory = dmabuf_memory,
.format = opts.format,
.width = opts.width,
.height = opts.height,
.fd = fd,
.drm_format = drm_format,
.drm_modifier = DRM_FORMAT_MOD_LINEAR,
.stride = @intCast(sub_layout.rowPitch),
.stride = stride,
};
}
pub fn deinit(self: *Self) void {
const dev = self.device;
if (self.fd >= 0) std.posix.close(self.fd);
dev.dispatch.destroyBuffer(dev.device, self.dmabuf_buffer, null);
dev.dispatch.freeMemory(dev.device, self.dmabuf_memory, null);
dev.dispatch.destroyImageView(dev.device, self.view, null);
dev.dispatch.destroyImage(dev.device, self.image, null);
dev.dispatch.freeMemory(dev.device, self.memory, null);
if (self.fd >= 0) std.posix.close(self.fd);
dev.dispatch.freeMemory(dev.device, self.image_memory, null);
self.* = undefined;
}
/// Hand the target's dmabuf fd to the host's `present` callback. The
/// fd is a temporary borrow valid only until this call returns; the
/// host must `dup()` if it needs to hold it past then. The
/// underlying memory remains owned by libghostty.
/// Record the GPU commands that copy the render image into the
/// dmabuf-exported buffer. Call this AFTER all RenderPass work has
/// been recorded but BEFORE `vkEndCommandBuffer`.
///
/// Barriers: render image must transition from whatever the
/// RenderPass left it in (`GENERAL` after `RenderPass.complete`) to
/// `TRANSFER_SRC_OPTIMAL`. The dmabuf buffer doesn't have layouts;
/// we just add a memory barrier so the host's later read sees the
/// transferred bytes.
pub fn recordCopyToDmabuf(self: *Self, cb: vk.VkCommandBuffer) void {
const dev = self.device;
// Image: GENERAL TRANSFER_SRC_OPTIMAL (the RenderPass leaves us
// in GENERAL on complete, but if it was UNDEFINED for some reason
// we still need a valid transition; UNDEFINED is also legal).
const img_barrier: vk.VkImageMemoryBarrier = .{
.sType = vk.VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
.pNext = null,
.srcAccessMask = vk.VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,
.dstAccessMask = vk.VK_ACCESS_TRANSFER_READ_BIT,
.oldLayout = vk.VK_IMAGE_LAYOUT_GENERAL,
.newLayout = vk.VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
.srcQueueFamilyIndex = vk.VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = vk.VK_QUEUE_FAMILY_IGNORED,
.image = self.image,
.subresourceRange = .{
.aspectMask = vk.VK_IMAGE_ASPECT_COLOR_BIT,
.baseMipLevel = 0,
.levelCount = 1,
.baseArrayLayer = 0,
.layerCount = 1,
},
};
dev.dispatch.cmdPipelineBarrier(
cb,
vk.VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
vk.VK_PIPELINE_STAGE_TRANSFER_BIT,
0,
0, null,
0, null,
1, &img_barrier,
);
// Copy image buffer. BGRA8, packed (stride = width*4).
const region: vk.VkBufferImageCopy = .{
.bufferOffset = 0,
.bufferRowLength = 0, // 0 = tightly packed (uses imageExtent.width)
.bufferImageHeight = 0,
.imageSubresource = .{
.aspectMask = vk.VK_IMAGE_ASPECT_COLOR_BIT,
.mipLevel = 0,
.baseArrayLayer = 0,
.layerCount = 1,
},
.imageOffset = .{ .x = 0, .y = 0, .z = 0 },
.imageExtent = .{ .width = self.width, .height = self.height, .depth = 1 },
};
dev.dispatch.cmdCopyImageToBuffer(
cb,
self.image,
vk.VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
self.dmabuf_buffer,
1,
&region,
);
// Memory barrier so the host's later mmap read sees the bytes.
// HOST_READ_BIT is the destination access; HOST_BIT is the
// destination stage. (External fd consumers may need an explicit
// sync2 release barrier, but for an mmap-based read after a
// fence-wait this is sufficient on the GPU side.)
const buf_barrier: vk.VkBufferMemoryBarrier = .{
.sType = vk.VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
.pNext = null,
.srcAccessMask = vk.VK_ACCESS_TRANSFER_WRITE_BIT,
.dstAccessMask = vk.VK_ACCESS_HOST_READ_BIT,
.srcQueueFamilyIndex = vk.VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = vk.VK_QUEUE_FAMILY_IGNORED,
.buffer = self.dmabuf_buffer,
.offset = 0,
.size = vk.VK_WHOLE_SIZE,
};
dev.dispatch.cmdPipelineBarrier(
cb,
vk.VK_PIPELINE_STAGE_TRANSFER_BIT,
vk.VK_PIPELINE_STAGE_HOST_BIT,
0,
0, null,
1, &buf_barrier,
0, null,
);
// Track the new image layout so the next frame's RenderPass.begin
// doesn't see stale state (it currently transitions from UNDEFINED
// unconditionally, but be defensive).
self.layout = vk.VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL;
}
pub fn present(self: *const Self) void {
self.device.platform.present(
self.device.platform.userdata,
@ -289,13 +386,7 @@ pub fn present(self: *const Self) void {
);
}
/// Map a `VkFormat` to its DRM fourcc. Vulkan and DRM disagree on
/// byte order naming: Vulkan format names are in memory order, DRM
/// names are little-endian from MSB. The mapping table here covers
/// the formats the renderer actually targets extend as new ones
/// are added.
fn vkFormatToDrmFourcc(format: vk.VkFormat) Error!u32 {
// DRM fourcc helpers packing 4 ASCII chars LSB-first.
const fourcc = struct {
fn make(a: u8, b: u8, c: u8, d: u8) u32 {
return (@as(u32, a)) |
@ -305,12 +396,9 @@ fn vkFormatToDrmFourcc(format: vk.VkFormat) Error!u32 {
}
};
return switch (format) {
// Vulkan B,G,R,A in memory = DRM_FORMAT_ARGB8888 ("AR24").
// This is what Wayland compositors prefer.
vk.VK_FORMAT_B8G8R8A8_UNORM,
vk.VK_FORMAT_B8G8R8A8_SRGB,
=> fourcc.make('A', 'R', '2', '4'),
// Vulkan R,G,B,A in memory = DRM_FORMAT_ABGR8888 ("AB24").
vk.VK_FORMAT_R8G8B8A8_UNORM,
vk.VK_FORMAT_R8G8B8A8_SRGB,
=> fourcc.make('A', 'B', '2', '4'),

View File

@ -45,13 +45,36 @@ pub const source = struct {
// than enable that and provide a callback, we splice the
// include contents inline same approach `opengl/shaders.zig`
// uses via its `loadShaderCode`.
pub const bg_color_frag = processIncludes(@embedFile("../shaders/glsl/bg_color.f.glsl"));
// DIAGNOSTIC: override bg_color.f.glsl with a hardcoded purple
// color so we can verify the pipeline + descriptor binding +
// draw recording work end-to-end without depending on the
// Uniforms.bg_color data path being correct. Once a colored
// window confirms the pipeline runs, revert to the real
// include-expanded source.
pub const bg_color_frag: [:0]const u8 =
\\#version 450
\\layout(location = 0) out vec4 out_FragColor;
\\void main() {
\\ out_FragColor = vec4(0.5, 0.0, 0.5, 1.0); // debug: opaque purple
\\}
;
pub const bg_color_frag_real = processIncludes(@embedFile("../shaders/glsl/bg_color.f.glsl"));
pub const bg_image_frag = processIncludes(@embedFile("../shaders/glsl/bg_image.f.glsl"));
pub const bg_image_vert = processIncludes(@embedFile("../shaders/glsl/bg_image.v.glsl"));
pub const cell_bg_frag = processIncludes(@embedFile("../shaders/glsl/cell_bg.f.glsl"));
pub const cell_text_frag = processIncludes(@embedFile("../shaders/glsl/cell_text.f.glsl"));
pub const cell_text_vert = processIncludes(@embedFile("../shaders/glsl/cell_text.v.glsl"));
pub const full_screen_vert = processIncludes(@embedFile("../shaders/glsl/full_screen.v.glsl"));
// DIAGNOSTIC: inline a known-good fullscreen-triangle vertex
// shader to rule out any vulkanizeGlsl rewrite issues.
pub const full_screen_vert: [:0]const u8 =
\\#version 450
\\void main() {
\\ vec2 pos[3] = vec2[3](vec2(-1.0, -1.0), vec2(3.0, -1.0), vec2(-1.0, 3.0));
\\ gl_Position = vec4(pos[gl_VertexIndex], 0.0, 1.0);
\\}
;
pub const full_screen_vert_real = processIncludes(@embedFile("../shaders/glsl/full_screen.v.glsl"));
pub const image_frag = processIncludes(@embedFile("../shaders/glsl/image.f.glsl"));
pub const image_vert = processIncludes(@embedFile("../shaders/glsl/image.v.glsl"));
};
@ -542,14 +565,16 @@ pub const Shaders = struct {
}
errdefer device.dispatch.destroyDescriptorSetLayout(device.device, bg_color_dsl, null);
const bg_color_dsls = [_]vk.VkDescriptorSetLayout{bg_color_dsl};
// DIAGNOSTIC: the debug bg_color shader has no inputs, so
// build the pipeline WITHOUT a descriptor set layout. The
// `bg_color_dsl` is still kept around it gets stored in
// `Shaders.bg_color_set_layout` and torn down on deinit.
const bg_color_pipeline = try Pipeline.init(.{
.device = device,
.descriptor_pool = &pool,
.vertex_module = modules.full_screen_vert.handle,
.fragment_module = modules.bg_color_frag.handle,
.vertex_input = null,
.descriptor_set_layouts = &bg_color_dsls,
.descriptor_set_layouts = &.{},
.color_format = vk.VK_FORMAT_B8G8R8A8_UNORM,
.blending_enabled = false,
.topology = vk.VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST,