renderer/vulkan: render → OPTIMAL → cmdCopyImageToBuffer → dmabuf
Visible magenta frame in `ghastty-vulkan` for the first time. The Vulkan render pipeline now produces pixels that flow end-to-end: draw → OPTIMAL VkImage → cmdCopyImageToBuffer → dmabuf-exported VkBuffer → Qt mmap → QImage → paintEvent. Root cause of the previous transparent output: NVIDIA (and most discrete GPUs) do NOT expose `FORMAT_FEATURE_COLOR_ATTACHMENT_BIT` on `linearTilingFeatures`. The old Target rendered directly into a LINEAR-tiled exportable VkImage, which the driver accepted at `vkCreateImage` time but silently rasterized nothing into. Confirmed via `getPhysicalDeviceFormatProperties` — `linearTilingFeatures=0x1dc03` lacks the COLOR_ATTACHMENT bit (`optimalTilingFeatures=0x1dd83` has it). Fix: Target now owns two GPU resources: - `image` + `image_memory`: OPTIMAL-tiled VkImage, internal-only, this is the actual color attachment the renderer draws into. - `dmabuf_buffer` + `dmabuf_memory`: LINEAR pixel-data VkBuffer, HOST_VISIBLE | HOST_COHERENT, dmabuf-exported. The host mmap reads this as plain BGRA bytes with stride = width * 4. `Target.recordCopyToDmabuf(cb)` records the GENERAL → TRANSFER_SRC image barrier, the `vkCmdCopyImageToBuffer`, and the TRANSFER_WRITE → HOST_READ buffer barrier. Frame.complete calls it just before endCommandBuffer so the host's mmap (post fence-wait) sees the bytes. Custom shaders: the user's `custom-shader = ...` config is silently ignored on Vulkan until we build the "post" pipeline that composites `CustomShaderState.back_texture` into `frame.target`. Vulkan.zig advertises `supports_custom_shaders = false`; generic.zig honors the flag and warns once when the config is non-empty. OpenGL keeps it true. Without this, the first render pass targets `back_texture` instead of `frame.target`, leaving the dmabuf empty (which was the symptom we chased for hours before isolating the cause). Other changes: - Device.Dispatch adds `getPhysicalDeviceFormatProperties`, `cmdFillBuffer`, `cmdClearColorImage`. The first is for the format-cap probe; the others were used while debugging and are cheap to keep around. - bg_color fragment + full_screen vertex shaders are currently diagnostic hardcoded versions (`bg_color_frag` outputs opaque purple, `full_screen_vert` is an inline fullscreen triangle). The real include-expanded sources are preserved as `bg_color_frag_real` and `full_screen_vert_real`; swap back once the Uniforms.bg_color UBO data path is verified. - `recordCopyToDmabuf` replaces the inline barrier+copy sequence in Frame.complete with a single call into the Target. Co-Authored-By: claude-flow <ruv@ruv.net>pull/12846/head
parent
e8ad547dda
commit
98dcdf5307
|
|
@ -27,6 +27,11 @@ pub const custom_shader_target: shadertoy.Target = .glsl;
|
|||
// The fragCoord for OpenGL shaders is +Y = up.
|
||||
pub const custom_shader_y_is_down = false;
|
||||
|
||||
/// Custom shaders are supported (the renderer ships a working "post"
|
||||
/// pass that composites `CustomShaderState.back_texture` through the
|
||||
/// user's shader into `frame.target`).
|
||||
pub const supports_custom_shaders: bool = true;
|
||||
|
||||
/// Because OpenGL's frame completion is always
|
||||
/// sync, we have no need for multi-buffering.
|
||||
pub const swap_chain_count = 1;
|
||||
|
|
|
|||
|
|
@ -80,6 +80,16 @@ pub const Buffer = bufferpkg.Buffer;
|
|||
/// Custom user shaders (`shadertoy.zig`) target GLSL — same as OpenGL.
|
||||
pub const custom_shader_target: shadertoy.Target = .glsl;
|
||||
|
||||
/// Custom shaders are not yet supported on the Vulkan backend. The
|
||||
/// renderer's first pass draws into `CustomShaderState.back_texture`
|
||||
/// when custom shaders are configured, and a second "post" pass is
|
||||
/// expected to composite back_texture → frame.target through the
|
||||
/// user's shader. We haven't built that second pass for Vulkan yet,
|
||||
/// so enabling custom shaders here would leave `frame.target` empty
|
||||
/// and the window blank. Until the post pipeline lands, the generic
|
||||
/// renderer skips loading custom shaders for Vulkan and warns once.
|
||||
pub const supports_custom_shaders: bool = false;
|
||||
|
||||
/// Vulkan's clip-space Y axis points down (unlike OpenGL).
|
||||
pub const custom_shader_y_is_down = true;
|
||||
|
||||
|
|
|
|||
|
|
@ -838,14 +838,38 @@ pub fn Renderer(comptime GraphicsAPI: type) type {
|
|||
defer arena.deinit();
|
||||
const arena_alloc = arena.allocator();
|
||||
|
||||
// Load our custom shaders
|
||||
const custom_shaders: []const [:0]const u8 = shadertoy.loadFromFiles(
|
||||
arena_alloc,
|
||||
self.config.custom_shaders,
|
||||
GraphicsAPI.custom_shader_target,
|
||||
) catch |err| err: {
|
||||
log.warn("error loading custom shaders err={}", .{err});
|
||||
break :err &.{};
|
||||
// Load our custom shaders.
|
||||
//
|
||||
// GraphicsAPI advertises whether it can actually run them
|
||||
// (`supports_custom_shaders`). The Vulkan backend currently
|
||||
// can't — its post-pass / compositor pipeline that wires
|
||||
// CustomShaderState.back_texture → frame.target through the
|
||||
// user's shader hasn't been built yet. Loading + flagging
|
||||
// `has_custom_shaders` anyway would route bg_color into the
|
||||
// back_texture and leave frame.target blank. Skip the load
|
||||
// when the backend can't consume the result, and emit a
|
||||
// one-line warning so the user knows their config item was
|
||||
// ignored.
|
||||
const can_use_custom = !@hasDecl(GraphicsAPI, "supports_custom_shaders") or
|
||||
GraphicsAPI.supports_custom_shaders;
|
||||
const custom_shaders: []const [:0]const u8 = if (can_use_custom)
|
||||
(shadertoy.loadFromFiles(
|
||||
arena_alloc,
|
||||
self.config.custom_shaders,
|
||||
GraphicsAPI.custom_shader_target,
|
||||
) catch |err| err: {
|
||||
log.warn("error loading custom shaders err={}", .{err});
|
||||
break :err &.{};
|
||||
})
|
||||
else custom: {
|
||||
if (self.config.custom_shaders.value.items.len > 0) {
|
||||
log.warn(
|
||||
"custom-shader config ignored: backend lacks " ++
|
||||
"post-pipeline support (Vulkan TODO)",
|
||||
.{},
|
||||
);
|
||||
}
|
||||
break :custom &.{};
|
||||
};
|
||||
|
||||
const has_custom_shaders = custom_shaders.len > 0;
|
||||
|
|
|
|||
|
|
@ -83,6 +83,7 @@ pub const Dispatch = struct {
|
|||
// ---- instance-level -----------------------------------------
|
||||
getPhysicalDeviceProperties: std.meta.Child(vk.PFN_vkGetPhysicalDeviceProperties),
|
||||
getPhysicalDeviceMemoryProperties: std.meta.Child(vk.PFN_vkGetPhysicalDeviceMemoryProperties),
|
||||
getPhysicalDeviceFormatProperties: std.meta.Child(vk.PFN_vkGetPhysicalDeviceFormatProperties),
|
||||
enumerateDeviceExtensionProperties: std.meta.Child(vk.PFN_vkEnumerateDeviceExtensionProperties),
|
||||
getDeviceProcAddr: std.meta.Child(vk.PFN_vkGetDeviceProcAddr),
|
||||
|
||||
|
|
@ -128,6 +129,8 @@ pub const Dispatch = struct {
|
|||
queueWaitIdle: std.meta.Child(vk.PFN_vkQueueWaitIdle),
|
||||
cmdPipelineBarrier: std.meta.Child(vk.PFN_vkCmdPipelineBarrier),
|
||||
cmdCopyBufferToImage: std.meta.Child(vk.PFN_vkCmdCopyBufferToImage),
|
||||
cmdFillBuffer: std.meta.Child(vk.PFN_vkCmdFillBuffer),
|
||||
cmdClearColorImage: std.meta.Child(vk.PFN_vkCmdClearColorImage),
|
||||
|
||||
// Shader modules — used by `vulkan/shaders.zig`.
|
||||
createShaderModule: std.meta.Child(vk.PFN_vkCreateShaderModule),
|
||||
|
|
@ -270,6 +273,8 @@ pub fn init(
|
|||
try il.load(vk.PFN_vkGetPhysicalDeviceProperties, "vkGetPhysicalDeviceProperties");
|
||||
const get_physical_device_memory_properties =
|
||||
try il.load(vk.PFN_vkGetPhysicalDeviceMemoryProperties, "vkGetPhysicalDeviceMemoryProperties");
|
||||
const get_physical_device_format_properties =
|
||||
try il.load(vk.PFN_vkGetPhysicalDeviceFormatProperties, "vkGetPhysicalDeviceFormatProperties");
|
||||
const enumerate_device_extension_properties =
|
||||
try il.load(vk.PFN_vkEnumerateDeviceExtensionProperties, "vkEnumerateDeviceExtensionProperties");
|
||||
const get_device_proc_addr =
|
||||
|
|
@ -389,6 +394,10 @@ pub fn init(
|
|||
try dl.load(vk.PFN_vkCmdPipelineBarrier, "vkCmdPipelineBarrier");
|
||||
const cmd_copy_buffer_to_image =
|
||||
try dl.load(vk.PFN_vkCmdCopyBufferToImage, "vkCmdCopyBufferToImage");
|
||||
const cmd_fill_buffer =
|
||||
try dl.load(vk.PFN_vkCmdFillBuffer, "vkCmdFillBuffer");
|
||||
const cmd_clear_color_image =
|
||||
try dl.load(vk.PFN_vkCmdClearColorImage, "vkCmdClearColorImage");
|
||||
const create_shader_module =
|
||||
try dl.load(vk.PFN_vkCreateShaderModule, "vkCreateShaderModule");
|
||||
const destroy_shader_module =
|
||||
|
|
@ -455,6 +464,7 @@ pub fn init(
|
|||
.dispatch = .{
|
||||
.getPhysicalDeviceProperties = get_physical_device_properties,
|
||||
.getPhysicalDeviceMemoryProperties = get_physical_device_memory_properties,
|
||||
.getPhysicalDeviceFormatProperties = get_physical_device_format_properties,
|
||||
.enumerateDeviceExtensionProperties = enumerate_device_extension_properties,
|
||||
.getDeviceProcAddr = get_device_proc_addr,
|
||||
.getDeviceQueue = get_device_queue,
|
||||
|
|
@ -485,6 +495,8 @@ pub fn init(
|
|||
.queueWaitIdle = queue_wait_idle,
|
||||
.cmdPipelineBarrier = cmd_pipeline_barrier,
|
||||
.cmdCopyBufferToImage = cmd_copy_buffer_to_image,
|
||||
.cmdFillBuffer = cmd_fill_buffer,
|
||||
.cmdClearColorImage = cmd_clear_color_image,
|
||||
.createShaderModule = create_shader_module,
|
||||
.destroyShaderModule = destroy_shader_module,
|
||||
.createDescriptorSetLayout = create_descriptor_set_layout,
|
||||
|
|
|
|||
|
|
@ -105,6 +105,14 @@ pub fn complete(self: *const Self, sync: bool) void {
|
|||
_ = sync;
|
||||
const dev = self.device;
|
||||
|
||||
// Copy the just-rendered OPTIMAL-tiled image into the
|
||||
// dmabuf-exported LINEAR pixel buffer. NVIDIA (and most
|
||||
// discrete GPUs) refuse `FORMAT_FEATURE_COLOR_ATTACHMENT_BIT`
|
||||
// on linear-tiled images, so the renderer draws into an
|
||||
// OPTIMAL image and a transfer copy bridges to the dmabuf
|
||||
// consumer. See `Target.zig` for the full rationale.
|
||||
self.target.recordCopyToDmabuf(self.cb);
|
||||
|
||||
{
|
||||
const r = dev.dispatch.endCommandBuffer(self.cb);
|
||||
if (r != vk.VK_SUCCESS) {
|
||||
|
|
|
|||
|
|
@ -1,26 +1,30 @@
|
|||
//! Render target: an exportable `VkImage` backed by linear-tiled,
|
||||
//! externally-shareable `VkDeviceMemory` whose dmabuf fd is the
|
||||
//! Render target: an OPTIMAL-tiled `VkImage` (the actual color
|
||||
//! attachment) plus a dmabuf-exported `VkBuffer` containing the
|
||||
//! rendered bytes in linear BGRA layout. The buffer's fd is the
|
||||
//! payload of `ghostty_platform_vulkan_s.present`.
|
||||
//!
|
||||
//! This is what makes the whole Vulkan port worthwhile: instead of
|
||||
//! reading the frame back into a `QImage` like the OpenGL path does,
|
||||
//! the host (Qt RHI via `QRhiTexture`) imports our memory directly
|
||||
//! and composites it in-GPU. Zero-copy, no readback.
|
||||
//! Why both an image AND a buffer?
|
||||
//!
|
||||
//! Layout: **linear tiling** for v1. Linear is the safest cross-
|
||||
//! driver choice for dmabuf consumers — every Wayland compositor,
|
||||
//! every Qt RHI backend, every reader can accept linear without
|
||||
//! modifier negotiation. The cost is reduced rasterization perf vs
|
||||
//! `VK_IMAGE_TILING_OPTIMAL`. For a terminal at ~60Hz with a few
|
||||
//! megapixels of fill, linear is fine. Driver-chosen DRM format
|
||||
//! modifiers (the "optimal+exportable" path via
|
||||
//! `VK_EXT_image_drm_format_modifier`) is a contained follow-up.
|
||||
//! NVIDIA (and probably others) do NOT expose
|
||||
//! `FORMAT_FEATURE_COLOR_ATTACHMENT_BIT` for `linearTilingFeatures`.
|
||||
//! That means a LINEAR-tiled `VkImage` cannot be used as a color
|
||||
//! attachment — the driver accepts the image creation and the draw
|
||||
//! recording, but actually rasterizes nothing. We confirmed this by
|
||||
//! probing `vkGetPhysicalDeviceFormatProperties` for
|
||||
//! `VK_FORMAT_B8G8R8A8_UNORM` (linearTilingFeatures=0x1dc03 without
|
||||
//! the COLOR_ATTACHMENT bit).
|
||||
//!
|
||||
//! Ownership: libghostty owns the `VkImage`, `VkDeviceMemory`, and
|
||||
//! the dmabuf fd for the lifetime of the `Target`. The fd is passed
|
||||
//! to the host via `present` as a borrow; the host must `dup()` if
|
||||
//! it needs to hold it past the call. `deinit` closes the fd and
|
||||
//! frees the memory.
|
||||
//! So the renderer draws into an OPTIMAL-tiled image (the format the
|
||||
//! GPU is happy to rasterize into), then copies the result into a
|
||||
//! LINEAR-laid-out exportable `VkBuffer` via `vkCmdCopyImageToBuffer`.
|
||||
//! The Qt host mmaps the buffer's dmabuf fd and reads BGRA bytes with
|
||||
//! the stride we report.
|
||||
//!
|
||||
//! Ownership: libghostty owns the image, buffer, all memory, and the
|
||||
//! dmabuf fd for the lifetime of the `Target`. The fd is passed to
|
||||
//! the host via `present` as a borrow; the host must `dup()` if it
|
||||
//! needs to hold it past the call. `deinit` closes the fd and frees
|
||||
//! all the memory.
|
||||
//!
|
||||
//! Counterpart: `src/renderer/opengl/Target.zig`.
|
||||
|
||||
|
|
@ -40,89 +44,63 @@ pub const DRM_FORMAT_MOD_LINEAR: u64 = 0;
|
|||
|
||||
pub const Options = struct {
|
||||
device: *const Device,
|
||||
|
||||
/// Color format. The DRM fourcc the host receives is derived
|
||||
/// from this — see `vkFormatToDrmFourcc` below.
|
||||
format: vk.VkFormat,
|
||||
|
||||
/// Render target dimensions, in pixels.
|
||||
width: u32,
|
||||
height: u32,
|
||||
|
||||
/// Extra `VkImageUsageFlagBits` beyond the defaults
|
||||
/// (`COLOR_ATTACHMENT_BIT | SAMPLED_BIT`). Rarely needed; left
|
||||
/// as an escape hatch for things like a transfer source for
|
||||
/// debug captures.
|
||||
/// Extra `VkImageUsageFlagBits` for the render image, beyond the
|
||||
/// defaults (`COLOR_ATTACHMENT_BIT | SAMPLED_BIT |
|
||||
/// TRANSFER_SRC_BIT`). Rarely needed.
|
||||
extra_usage: vk.VkImageUsageFlags = 0,
|
||||
};
|
||||
|
||||
pub const Error = error{
|
||||
/// A `vkCreate*` / `vkAllocate*` / `vkBind*` / `vkGetMemoryFdKHR`
|
||||
/// returned a non-success status.
|
||||
VulkanFailed,
|
||||
/// `Device.findMemoryType` couldn't find a memory type matching
|
||||
/// the image's requirements and the export memory flag bit.
|
||||
NoSuitableMemoryType,
|
||||
/// The provided `VkFormat` doesn't map to a known DRM fourcc.
|
||||
/// Currently the renderer only ever uses
|
||||
/// `VK_FORMAT_B8G8R8A8_UNORM` / `_R8G8B8A8_UNORM` so this is a
|
||||
/// guard against config drift rather than a real failure mode.
|
||||
UnsupportedFormat,
|
||||
};
|
||||
|
||||
device: *const Device,
|
||||
|
||||
// ---- render image (OPTIMAL, internal) -------------------------------
|
||||
image: vk.VkImage,
|
||||
memory: vk.VkDeviceMemory,
|
||||
image_memory: vk.VkDeviceMemory,
|
||||
view: vk.VkImageView,
|
||||
|
||||
// ---- dmabuf buffer (LINEAR pixel bytes, exported) -------------------
|
||||
dmabuf_buffer: vk.VkBuffer,
|
||||
dmabuf_memory: vk.VkDeviceMemory,
|
||||
|
||||
format: vk.VkFormat,
|
||||
width: u32,
|
||||
height: u32,
|
||||
|
||||
/// dmabuf fd. Owned by `Target` until `deinit`; the host must
|
||||
/// `dup()` if it wants to hold it past a `present` call.
|
||||
fd: i32,
|
||||
|
||||
/// DRM fourcc the host should interpret the dmabuf as. Derived from
|
||||
/// `format` at construction time so the apprt callback can pass it
|
||||
/// straight through.
|
||||
drm_format: u32,
|
||||
|
||||
/// DRM modifier. Always `DRM_FORMAT_MOD_LINEAR` for v1.
|
||||
drm_modifier: u64,
|
||||
|
||||
/// Row stride in bytes — `vkGetImageSubresourceLayout` tells us the
|
||||
/// driver's actual rowPitch (which may include alignment padding).
|
||||
/// The host needs this for the dmabuf import.
|
||||
stride: u32,
|
||||
|
||||
/// Current image layout, mirroring the same field on `Texture`.
|
||||
/// Starts at `UNDEFINED`; the renderer transitions it as needed
|
||||
/// across the frame.
|
||||
/// Current layout of the render image. Tracked so `recordCopyToDmabuf`
|
||||
/// knows what oldLayout to use in its `COLOR_ATTACHMENT → TRANSFER_SRC`
|
||||
/// barrier. The renderer transitions it elsewhere too (RenderPass).
|
||||
layout: vk.VkImageLayout = vk.VK_IMAGE_LAYOUT_UNDEFINED,
|
||||
|
||||
pub fn init(opts: Options) Error!Self {
|
||||
const dev = opts.device;
|
||||
const drm_format = try vkFormatToDrmFourcc(opts.format);
|
||||
|
||||
// COLOR_ATTACHMENT — we render into this via dynamic rendering.
|
||||
// SAMPLED — the renderer's custom-shader path samples the target.
|
||||
// TRANSFER_SRC — readback for debug / screenshot tooling.
|
||||
const usage = @as(vk.VkImageUsageFlags, vk.VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) |
|
||||
// BGRA8 — 4 bytes/pixel, packed (no per-row padding).
|
||||
const bytes_per_pixel: u32 = 4;
|
||||
const stride: u32 = opts.width * bytes_per_pixel;
|
||||
const buffer_size: vk.VkDeviceSize = @as(vk.VkDeviceSize, stride) * opts.height;
|
||||
|
||||
// ---- 1. Render image: OPTIMAL tiling, internal memory ----------
|
||||
const image_usage = @as(vk.VkImageUsageFlags, vk.VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) |
|
||||
vk.VK_IMAGE_USAGE_SAMPLED_BIT |
|
||||
vk.VK_IMAGE_USAGE_TRANSFER_SRC_BIT |
|
||||
opts.extra_usage;
|
||||
|
||||
// ---- 1. VkImage (with external-memory chain) ----------------
|
||||
const external_memory_image_info: vk.VkExternalMemoryImageCreateInfo = .{
|
||||
.sType = vk.VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO,
|
||||
.pNext = null,
|
||||
.handleTypes = vk.VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT,
|
||||
};
|
||||
const image_info: vk.VkImageCreateInfo = .{
|
||||
.sType = vk.VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
|
||||
.pNext = &external_memory_image_info,
|
||||
.pNext = null,
|
||||
.flags = 0,
|
||||
.imageType = vk.VK_IMAGE_TYPE_2D,
|
||||
.format = opts.format,
|
||||
|
|
@ -130,95 +108,44 @@ pub fn init(opts: Options) Error!Self {
|
|||
.mipLevels = 1,
|
||||
.arrayLayers = 1,
|
||||
.samples = vk.VK_SAMPLE_COUNT_1_BIT,
|
||||
.tiling = vk.VK_IMAGE_TILING_LINEAR,
|
||||
.usage = usage,
|
||||
.tiling = vk.VK_IMAGE_TILING_OPTIMAL,
|
||||
.usage = image_usage,
|
||||
.sharingMode = vk.VK_SHARING_MODE_EXCLUSIVE,
|
||||
.queueFamilyIndexCount = 0,
|
||||
.pQueueFamilyIndices = null,
|
||||
.initialLayout = vk.VK_IMAGE_LAYOUT_UNDEFINED,
|
||||
};
|
||||
var image: vk.VkImage = undefined;
|
||||
{
|
||||
const r = dev.dispatch.createImage(dev.device, &image_info, null, &image);
|
||||
if (r != vk.VK_SUCCESS) {
|
||||
log.err("vkCreateImage (Target) failed: result={}", .{r});
|
||||
return error.VulkanFailed;
|
||||
}
|
||||
if (dev.dispatch.createImage(dev.device, &image_info, null, &image) != vk.VK_SUCCESS) {
|
||||
log.err("vkCreateImage (Target render) failed", .{});
|
||||
return error.VulkanFailed;
|
||||
}
|
||||
errdefer dev.dispatch.destroyImage(dev.device, image, null);
|
||||
|
||||
// ---- 2. VkDeviceMemory (with export chain) ------------------
|
||||
var reqs: vk.VkMemoryRequirements = undefined;
|
||||
dev.dispatch.getImageMemoryRequirements(dev.device, image, &reqs);
|
||||
|
||||
// DEVICE_LOCAL is preferred but not required for linear export
|
||||
// memory — some drivers only expose HOST_VISIBLE memory types
|
||||
// matching the requirements bitmask for linear tiling. We don't
|
||||
// care which heap as long as it's exportable.
|
||||
const memory_type_index = dev.findMemoryType(reqs.memoryTypeBits, 0) orelse {
|
||||
log.err(
|
||||
"no exportable memory type for Target (typeBits=0x{x})",
|
||||
.{reqs.memoryTypeBits},
|
||||
);
|
||||
return error.NoSuitableMemoryType;
|
||||
};
|
||||
|
||||
const export_info: vk.VkExportMemoryAllocateInfo = .{
|
||||
.sType = vk.VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO,
|
||||
.pNext = null,
|
||||
.handleTypes = vk.VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT,
|
||||
};
|
||||
const alloc_info: vk.VkMemoryAllocateInfo = .{
|
||||
var image_reqs: vk.VkMemoryRequirements = undefined;
|
||||
dev.dispatch.getImageMemoryRequirements(dev.device, image, &image_reqs);
|
||||
const image_mem_idx = dev.findMemoryType(
|
||||
image_reqs.memoryTypeBits,
|
||||
vk.VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
|
||||
) orelse return error.NoSuitableMemoryType;
|
||||
const image_alloc: vk.VkMemoryAllocateInfo = .{
|
||||
.sType = vk.VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
|
||||
.pNext = &export_info,
|
||||
.allocationSize = reqs.size,
|
||||
.memoryTypeIndex = memory_type_index,
|
||||
};
|
||||
var memory: vk.VkDeviceMemory = undefined;
|
||||
{
|
||||
const r = dev.dispatch.allocateMemory(dev.device, &alloc_info, null, &memory);
|
||||
if (r != vk.VK_SUCCESS) {
|
||||
log.err("vkAllocateMemory (Target) failed: result={}", .{r});
|
||||
return error.VulkanFailed;
|
||||
}
|
||||
}
|
||||
errdefer dev.dispatch.freeMemory(dev.device, memory, null);
|
||||
|
||||
{
|
||||
const r = dev.dispatch.bindImageMemory(dev.device, image, memory, 0);
|
||||
if (r != vk.VK_SUCCESS) {
|
||||
log.err("vkBindImageMemory (Target) failed: result={}", .{r});
|
||||
return error.VulkanFailed;
|
||||
}
|
||||
}
|
||||
|
||||
// ---- 3. Export the dmabuf fd --------------------------------
|
||||
const fd_info: vk.VkMemoryGetFdInfoKHR = .{
|
||||
.sType = vk.VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR,
|
||||
.pNext = null,
|
||||
.memory = memory,
|
||||
.handleType = vk.VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT,
|
||||
.allocationSize = image_reqs.size,
|
||||
.memoryTypeIndex = image_mem_idx,
|
||||
};
|
||||
var fd: c_int = -1;
|
||||
{
|
||||
const r = dev.dispatch.getMemoryFdKHR(dev.device, &fd_info, &fd);
|
||||
if (r != vk.VK_SUCCESS or fd < 0) {
|
||||
log.err("vkGetMemoryFdKHR failed: result={} fd={}", .{ r, fd });
|
||||
return error.VulkanFailed;
|
||||
}
|
||||
var image_memory: vk.VkDeviceMemory = undefined;
|
||||
if (dev.dispatch.allocateMemory(dev.device, &image_alloc, null, &image_memory) != vk.VK_SUCCESS) {
|
||||
log.err("vkAllocateMemory (Target render image) failed", .{});
|
||||
return error.VulkanFailed;
|
||||
}
|
||||
errdefer dev.dispatch.freeMemory(dev.device, image_memory, null);
|
||||
if (dev.dispatch.bindImageMemory(dev.device, image, image_memory, 0) != vk.VK_SUCCESS) {
|
||||
log.err("vkBindImageMemory (Target render image) failed", .{});
|
||||
return error.VulkanFailed;
|
||||
}
|
||||
errdefer std.posix.close(fd);
|
||||
|
||||
// ---- 4. Stride from the driver's subresource layout ---------
|
||||
const subresource: vk.VkImageSubresource = .{
|
||||
.aspectMask = vk.VK_IMAGE_ASPECT_COLOR_BIT,
|
||||
.mipLevel = 0,
|
||||
.arrayLayer = 0,
|
||||
};
|
||||
var sub_layout: vk.VkSubresourceLayout = undefined;
|
||||
dev.dispatch.getImageSubresourceLayout(dev.device, image, &subresource, &sub_layout);
|
||||
|
||||
// ---- 5. VkImageView -----------------------------------------
|
||||
// ---- 2. ImageView on the render image -------------------------
|
||||
const view_info: vk.VkImageViewCreateInfo = .{
|
||||
.sType = vk.VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
|
||||
.pNext = null,
|
||||
|
|
@ -241,42 +168,212 @@ pub fn init(opts: Options) Error!Self {
|
|||
},
|
||||
};
|
||||
var view: vk.VkImageView = undefined;
|
||||
{
|
||||
const r = dev.dispatch.createImageView(dev.device, &view_info, null, &view);
|
||||
if (r != vk.VK_SUCCESS) {
|
||||
log.err("vkCreateImageView (Target) failed: result={}", .{r});
|
||||
return error.VulkanFailed;
|
||||
}
|
||||
if (dev.dispatch.createImageView(dev.device, &view_info, null, &view) != vk.VK_SUCCESS) {
|
||||
log.err("vkCreateImageView (Target) failed", .{});
|
||||
return error.VulkanFailed;
|
||||
}
|
||||
errdefer dev.dispatch.destroyImageView(dev.device, view, null);
|
||||
|
||||
// ---- 3. Dmabuf buffer: LINEAR pixel data, external memory -----
|
||||
const ext_buffer_info: vk.VkExternalMemoryBufferCreateInfo = .{
|
||||
.sType = vk.VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_BUFFER_CREATE_INFO,
|
||||
.pNext = null,
|
||||
.handleTypes = vk.VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT,
|
||||
};
|
||||
const buffer_info: vk.VkBufferCreateInfo = .{
|
||||
.sType = vk.VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
|
||||
.pNext = &ext_buffer_info,
|
||||
.flags = 0,
|
||||
.size = buffer_size,
|
||||
.usage = vk.VK_BUFFER_USAGE_TRANSFER_DST_BIT,
|
||||
.sharingMode = vk.VK_SHARING_MODE_EXCLUSIVE,
|
||||
.queueFamilyIndexCount = 0,
|
||||
.pQueueFamilyIndices = null,
|
||||
};
|
||||
var dmabuf_buffer: vk.VkBuffer = undefined;
|
||||
if (dev.dispatch.createBuffer(dev.device, &buffer_info, null, &dmabuf_buffer) != vk.VK_SUCCESS) {
|
||||
log.err("vkCreateBuffer (Target dmabuf) failed", .{});
|
||||
return error.VulkanFailed;
|
||||
}
|
||||
errdefer dev.dispatch.destroyBuffer(dev.device, dmabuf_buffer, null);
|
||||
|
||||
var buf_reqs: vk.VkMemoryRequirements = undefined;
|
||||
dev.dispatch.getBufferMemoryRequirements(dev.device, dmabuf_buffer, &buf_reqs);
|
||||
// Must be HOST_VISIBLE | HOST_COHERENT so the dmabuf fd is
|
||||
// mmap-able from userspace. NVIDIA's dmabuf-exportable memory
|
||||
// includes a host-visible type alongside the device-local ones;
|
||||
// we explicitly request both flags so we don't accidentally pick
|
||||
// a VRAM-only type whose mmap returns garbage.
|
||||
const host_flags = @as(vk.VkMemoryPropertyFlags, vk.VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) |
|
||||
vk.VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
|
||||
const dmabuf_mem_idx = dev.findMemoryType(buf_reqs.memoryTypeBits, host_flags) orelse {
|
||||
log.err(
|
||||
"no HOST_VISIBLE | HOST_COHERENT memory type for dmabuf (typeBits=0x{x})",
|
||||
.{buf_reqs.memoryTypeBits},
|
||||
);
|
||||
return error.NoSuitableMemoryType;
|
||||
};
|
||||
const export_info: vk.VkExportMemoryAllocateInfo = .{
|
||||
.sType = vk.VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO,
|
||||
.pNext = null,
|
||||
.handleTypes = vk.VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT,
|
||||
};
|
||||
const buf_alloc: vk.VkMemoryAllocateInfo = .{
|
||||
.sType = vk.VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
|
||||
.pNext = &export_info,
|
||||
.allocationSize = buf_reqs.size,
|
||||
.memoryTypeIndex = dmabuf_mem_idx,
|
||||
};
|
||||
var dmabuf_memory: vk.VkDeviceMemory = undefined;
|
||||
if (dev.dispatch.allocateMemory(dev.device, &buf_alloc, null, &dmabuf_memory) != vk.VK_SUCCESS) {
|
||||
log.err("vkAllocateMemory (Target dmabuf) failed", .{});
|
||||
return error.VulkanFailed;
|
||||
}
|
||||
errdefer dev.dispatch.freeMemory(dev.device, dmabuf_memory, null);
|
||||
if (dev.dispatch.bindBufferMemory(dev.device, dmabuf_buffer, dmabuf_memory, 0) != vk.VK_SUCCESS) {
|
||||
log.err("vkBindBufferMemory (Target dmabuf) failed", .{});
|
||||
return error.VulkanFailed;
|
||||
}
|
||||
|
||||
const fd_info: vk.VkMemoryGetFdInfoKHR = .{
|
||||
.sType = vk.VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR,
|
||||
.pNext = null,
|
||||
.memory = dmabuf_memory,
|
||||
.handleType = vk.VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT,
|
||||
};
|
||||
var fd: c_int = -1;
|
||||
if (dev.dispatch.getMemoryFdKHR(dev.device, &fd_info, &fd) != vk.VK_SUCCESS or fd < 0) {
|
||||
log.err("vkGetMemoryFdKHR (Target dmabuf) failed: fd={}", .{fd});
|
||||
return error.VulkanFailed;
|
||||
}
|
||||
errdefer std.posix.close(fd);
|
||||
|
||||
return .{
|
||||
.device = dev,
|
||||
.image = image,
|
||||
.memory = memory,
|
||||
.image_memory = image_memory,
|
||||
.view = view,
|
||||
.dmabuf_buffer = dmabuf_buffer,
|
||||
.dmabuf_memory = dmabuf_memory,
|
||||
.format = opts.format,
|
||||
.width = opts.width,
|
||||
.height = opts.height,
|
||||
.fd = fd,
|
||||
.drm_format = drm_format,
|
||||
.drm_modifier = DRM_FORMAT_MOD_LINEAR,
|
||||
.stride = @intCast(sub_layout.rowPitch),
|
||||
.stride = stride,
|
||||
};
|
||||
}
|
||||
|
||||
pub fn deinit(self: *Self) void {
|
||||
const dev = self.device;
|
||||
if (self.fd >= 0) std.posix.close(self.fd);
|
||||
dev.dispatch.destroyBuffer(dev.device, self.dmabuf_buffer, null);
|
||||
dev.dispatch.freeMemory(dev.device, self.dmabuf_memory, null);
|
||||
dev.dispatch.destroyImageView(dev.device, self.view, null);
|
||||
dev.dispatch.destroyImage(dev.device, self.image, null);
|
||||
dev.dispatch.freeMemory(dev.device, self.memory, null);
|
||||
if (self.fd >= 0) std.posix.close(self.fd);
|
||||
dev.dispatch.freeMemory(dev.device, self.image_memory, null);
|
||||
self.* = undefined;
|
||||
}
|
||||
|
||||
/// Hand the target's dmabuf fd to the host's `present` callback. The
|
||||
/// fd is a temporary borrow valid only until this call returns; the
|
||||
/// host must `dup()` if it needs to hold it past then. The
|
||||
/// underlying memory remains owned by libghostty.
|
||||
/// Record the GPU commands that copy the render image into the
|
||||
/// dmabuf-exported buffer. Call this AFTER all RenderPass work has
|
||||
/// been recorded but BEFORE `vkEndCommandBuffer`.
|
||||
///
|
||||
/// Barriers: render image must transition from whatever the
|
||||
/// RenderPass left it in (`GENERAL` after `RenderPass.complete`) to
|
||||
/// `TRANSFER_SRC_OPTIMAL`. The dmabuf buffer doesn't have layouts;
|
||||
/// we just add a memory barrier so the host's later read sees the
|
||||
/// transferred bytes.
|
||||
pub fn recordCopyToDmabuf(self: *Self, cb: vk.VkCommandBuffer) void {
|
||||
const dev = self.device;
|
||||
|
||||
// Image: GENERAL → TRANSFER_SRC_OPTIMAL (the RenderPass leaves us
|
||||
// in GENERAL on complete, but if it was UNDEFINED for some reason
|
||||
// we still need a valid transition; UNDEFINED is also legal).
|
||||
const img_barrier: vk.VkImageMemoryBarrier = .{
|
||||
.sType = vk.VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
|
||||
.pNext = null,
|
||||
.srcAccessMask = vk.VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,
|
||||
.dstAccessMask = vk.VK_ACCESS_TRANSFER_READ_BIT,
|
||||
.oldLayout = vk.VK_IMAGE_LAYOUT_GENERAL,
|
||||
.newLayout = vk.VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
|
||||
.srcQueueFamilyIndex = vk.VK_QUEUE_FAMILY_IGNORED,
|
||||
.dstQueueFamilyIndex = vk.VK_QUEUE_FAMILY_IGNORED,
|
||||
.image = self.image,
|
||||
.subresourceRange = .{
|
||||
.aspectMask = vk.VK_IMAGE_ASPECT_COLOR_BIT,
|
||||
.baseMipLevel = 0,
|
||||
.levelCount = 1,
|
||||
.baseArrayLayer = 0,
|
||||
.layerCount = 1,
|
||||
},
|
||||
};
|
||||
dev.dispatch.cmdPipelineBarrier(
|
||||
cb,
|
||||
vk.VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
|
||||
vk.VK_PIPELINE_STAGE_TRANSFER_BIT,
|
||||
0,
|
||||
0, null,
|
||||
0, null,
|
||||
1, &img_barrier,
|
||||
);
|
||||
|
||||
// Copy image → buffer. BGRA8, packed (stride = width*4).
|
||||
const region: vk.VkBufferImageCopy = .{
|
||||
.bufferOffset = 0,
|
||||
.bufferRowLength = 0, // 0 = tightly packed (uses imageExtent.width)
|
||||
.bufferImageHeight = 0,
|
||||
.imageSubresource = .{
|
||||
.aspectMask = vk.VK_IMAGE_ASPECT_COLOR_BIT,
|
||||
.mipLevel = 0,
|
||||
.baseArrayLayer = 0,
|
||||
.layerCount = 1,
|
||||
},
|
||||
.imageOffset = .{ .x = 0, .y = 0, .z = 0 },
|
||||
.imageExtent = .{ .width = self.width, .height = self.height, .depth = 1 },
|
||||
};
|
||||
dev.dispatch.cmdCopyImageToBuffer(
|
||||
cb,
|
||||
self.image,
|
||||
vk.VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
|
||||
self.dmabuf_buffer,
|
||||
1,
|
||||
®ion,
|
||||
);
|
||||
|
||||
// Memory barrier so the host's later mmap read sees the bytes.
|
||||
// HOST_READ_BIT is the destination access; HOST_BIT is the
|
||||
// destination stage. (External fd consumers may need an explicit
|
||||
// sync2 release barrier, but for an mmap-based read after a
|
||||
// fence-wait this is sufficient on the GPU side.)
|
||||
const buf_barrier: vk.VkBufferMemoryBarrier = .{
|
||||
.sType = vk.VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
|
||||
.pNext = null,
|
||||
.srcAccessMask = vk.VK_ACCESS_TRANSFER_WRITE_BIT,
|
||||
.dstAccessMask = vk.VK_ACCESS_HOST_READ_BIT,
|
||||
.srcQueueFamilyIndex = vk.VK_QUEUE_FAMILY_IGNORED,
|
||||
.dstQueueFamilyIndex = vk.VK_QUEUE_FAMILY_IGNORED,
|
||||
.buffer = self.dmabuf_buffer,
|
||||
.offset = 0,
|
||||
.size = vk.VK_WHOLE_SIZE,
|
||||
};
|
||||
dev.dispatch.cmdPipelineBarrier(
|
||||
cb,
|
||||
vk.VK_PIPELINE_STAGE_TRANSFER_BIT,
|
||||
vk.VK_PIPELINE_STAGE_HOST_BIT,
|
||||
0,
|
||||
0, null,
|
||||
1, &buf_barrier,
|
||||
0, null,
|
||||
);
|
||||
|
||||
// Track the new image layout so the next frame's RenderPass.begin
|
||||
// doesn't see stale state (it currently transitions from UNDEFINED
|
||||
// unconditionally, but be defensive).
|
||||
self.layout = vk.VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL;
|
||||
}
|
||||
|
||||
pub fn present(self: *const Self) void {
|
||||
self.device.platform.present(
|
||||
self.device.platform.userdata,
|
||||
|
|
@ -289,13 +386,7 @@ pub fn present(self: *const Self) void {
|
|||
);
|
||||
}
|
||||
|
||||
/// Map a `VkFormat` to its DRM fourcc. Vulkan and DRM disagree on
|
||||
/// byte order naming: Vulkan format names are in memory order, DRM
|
||||
/// names are little-endian from MSB. The mapping table here covers
|
||||
/// the formats the renderer actually targets — extend as new ones
|
||||
/// are added.
|
||||
fn vkFormatToDrmFourcc(format: vk.VkFormat) Error!u32 {
|
||||
// DRM fourcc helpers — packing 4 ASCII chars LSB-first.
|
||||
const fourcc = struct {
|
||||
fn make(a: u8, b: u8, c: u8, d: u8) u32 {
|
||||
return (@as(u32, a)) |
|
||||
|
|
@ -305,12 +396,9 @@ fn vkFormatToDrmFourcc(format: vk.VkFormat) Error!u32 {
|
|||
}
|
||||
};
|
||||
return switch (format) {
|
||||
// Vulkan B,G,R,A in memory = DRM_FORMAT_ARGB8888 ("AR24").
|
||||
// This is what Wayland compositors prefer.
|
||||
vk.VK_FORMAT_B8G8R8A8_UNORM,
|
||||
vk.VK_FORMAT_B8G8R8A8_SRGB,
|
||||
=> fourcc.make('A', 'R', '2', '4'),
|
||||
// Vulkan R,G,B,A in memory = DRM_FORMAT_ABGR8888 ("AB24").
|
||||
vk.VK_FORMAT_R8G8B8A8_UNORM,
|
||||
vk.VK_FORMAT_R8G8B8A8_SRGB,
|
||||
=> fourcc.make('A', 'B', '2', '4'),
|
||||
|
|
|
|||
|
|
@ -45,13 +45,36 @@ pub const source = struct {
|
|||
// than enable that and provide a callback, we splice the
|
||||
// include contents inline — same approach `opengl/shaders.zig`
|
||||
// uses via its `loadShaderCode`.
|
||||
pub const bg_color_frag = processIncludes(@embedFile("../shaders/glsl/bg_color.f.glsl"));
|
||||
|
||||
// DIAGNOSTIC: override bg_color.f.glsl with a hardcoded purple
|
||||
// color so we can verify the pipeline + descriptor binding +
|
||||
// draw recording work end-to-end without depending on the
|
||||
// Uniforms.bg_color data path being correct. Once a colored
|
||||
// window confirms the pipeline runs, revert to the real
|
||||
// include-expanded source.
|
||||
pub const bg_color_frag: [:0]const u8 =
|
||||
\\#version 450
|
||||
\\layout(location = 0) out vec4 out_FragColor;
|
||||
\\void main() {
|
||||
\\ out_FragColor = vec4(0.5, 0.0, 0.5, 1.0); // debug: opaque purple
|
||||
\\}
|
||||
;
|
||||
pub const bg_color_frag_real = processIncludes(@embedFile("../shaders/glsl/bg_color.f.glsl"));
|
||||
pub const bg_image_frag = processIncludes(@embedFile("../shaders/glsl/bg_image.f.glsl"));
|
||||
pub const bg_image_vert = processIncludes(@embedFile("../shaders/glsl/bg_image.v.glsl"));
|
||||
pub const cell_bg_frag = processIncludes(@embedFile("../shaders/glsl/cell_bg.f.glsl"));
|
||||
pub const cell_text_frag = processIncludes(@embedFile("../shaders/glsl/cell_text.f.glsl"));
|
||||
pub const cell_text_vert = processIncludes(@embedFile("../shaders/glsl/cell_text.v.glsl"));
|
||||
pub const full_screen_vert = processIncludes(@embedFile("../shaders/glsl/full_screen.v.glsl"));
|
||||
// DIAGNOSTIC: inline a known-good fullscreen-triangle vertex
|
||||
// shader to rule out any vulkanizeGlsl rewrite issues.
|
||||
pub const full_screen_vert: [:0]const u8 =
|
||||
\\#version 450
|
||||
\\void main() {
|
||||
\\ vec2 pos[3] = vec2[3](vec2(-1.0, -1.0), vec2(3.0, -1.0), vec2(-1.0, 3.0));
|
||||
\\ gl_Position = vec4(pos[gl_VertexIndex], 0.0, 1.0);
|
||||
\\}
|
||||
;
|
||||
pub const full_screen_vert_real = processIncludes(@embedFile("../shaders/glsl/full_screen.v.glsl"));
|
||||
pub const image_frag = processIncludes(@embedFile("../shaders/glsl/image.f.glsl"));
|
||||
pub const image_vert = processIncludes(@embedFile("../shaders/glsl/image.v.glsl"));
|
||||
};
|
||||
|
|
@ -542,14 +565,16 @@ pub const Shaders = struct {
|
|||
}
|
||||
errdefer device.dispatch.destroyDescriptorSetLayout(device.device, bg_color_dsl, null);
|
||||
|
||||
const bg_color_dsls = [_]vk.VkDescriptorSetLayout{bg_color_dsl};
|
||||
// DIAGNOSTIC: the debug bg_color shader has no inputs, so
|
||||
// build the pipeline WITHOUT a descriptor set layout. The
|
||||
// `bg_color_dsl` is still kept around — it gets stored in
|
||||
// `Shaders.bg_color_set_layout` and torn down on deinit.
|
||||
const bg_color_pipeline = try Pipeline.init(.{
|
||||
.device = device,
|
||||
.descriptor_pool = &pool,
|
||||
.vertex_module = modules.full_screen_vert.handle,
|
||||
.fragment_module = modules.bg_color_frag.handle,
|
||||
.vertex_input = null,
|
||||
.descriptor_set_layouts = &bg_color_dsls,
|
||||
.descriptor_set_layouts = &.{},
|
||||
.color_format = vk.VK_FORMAT_B8G8R8A8_UNORM,
|
||||
.blending_enabled = false,
|
||||
.topology = vk.VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST,
|
||||
|
|
|
|||
Loading…
Reference in New Issue