Merge pull request #17 from fuddlesworth/qt-vulkan-renderer

Vulkan renderer + Qt apprt with Wayland subsurface dmabuf presenter
pull/12846/head
Nathan 2026-05-26 18:26:04 -05:00 committed by GitHub
commit 4d01762849
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
64 changed files with 13378 additions and 145 deletions

View File

@ -74,6 +74,7 @@
.macos = .{ .path = "./pkg/macos", .lazy = true },
.oniguruma = .{ .path = "./pkg/oniguruma", .lazy = true },
.opengl = .{ .path = "./pkg/opengl", .lazy = true },
.vulkan = .{ .path = "./pkg/vulkan", .lazy = true },
.sentry = .{ .path = "./pkg/sentry", .lazy = true },
.simdutf = .{ .path = "./pkg/simdutf", .lazy = true },
.wuffs = .{ .path = "./pkg/wuffs", .lazy = true },

View File

@ -67,6 +67,11 @@ typedef enum {
GHOSTTY_PLATFORM_MACOS,
GHOSTTY_PLATFORM_IOS,
GHOSTTY_PLATFORM_OPENGL,
// Vulkan: fork-only platform tag. The host owns the
// VkInstance/Device/Queue and hands them to libghostty via
// `ghostty_platform_vulkan_s`. Frames come back to the host as
// dmabuf fds for zero-copy compositing.
GHOSTTY_PLATFORM_VULKAN,
} ghostty_platform_e;
typedef enum {
@ -481,10 +486,87 @@ typedef struct {
void (*present)(void* userdata);
} ghostty_platform_opengl_s;
// Vulkan host integration (fork-only). The host owns the
// VkInstance / VkPhysicalDevice / VkDevice / VkQueue (same ownership
// model as the OpenGL host); libghostty creates pipelines, command
// pools, and images against that device. Frames are handed back to the
// host as dmabuf file descriptors so a compositor-side toolkit (e.g.
// Qt RHI via QRhiTexture) can sample them without a CPU readback.
//
// Handles are typed as void* here so consumers don't need the Vulkan
// headers to compile the public C API; callers should treat them as
// VkInstance, VkPhysicalDevice, VkDevice, VkQueue respectively.
typedef struct {
// Userdata passed as the first argument to every callback below.
void* userdata;
// Return the address of vkGetInstanceProcAddr (as void*). libghostty
// uses this as the loader entry point for every other Vulkan
// function it needs.
void* (*get_instance_proc_addr)(void* userdata, const char* name);
// Host-owned Vulkan handles. libghostty does not destroy these; they
// remain owned by the host for the surface's lifetime.
void* (*instance)(void* userdata); // VkInstance
void* (*physical_device)(void* userdata); // VkPhysicalDevice
void* (*device)(void* userdata); // VkDevice
void* (*queue)(void* userdata); // VkQueue
uint32_t (*queue_family_index)(void* userdata);
// Compositor-supported DRM modifiers for a given DRM_FORMAT_*
// fourcc, as advertised by linux-dmabuf-v1's `modifier` events.
// libghostty intersects this with what its physical device
// supports for COLOR_ATTACHMENT to pick a tiling that the
// compositor will actually accept on attach. Without this
// intersection, drivers that don't expose COLOR_ATTACHMENT for
// the LINEAR modifier (NVIDIA) can't use the direct-export path
// and fall back to a CPU-readback path.
//
// Two-pass usage: call with `out=NULL, capacity=0` to query the
// total count; allocate; call again to fill. Returns the number
// of modifiers actually written (capped at `capacity`). May
// return 0 if the format isn't compositor-supported or the host
// doesn't speak linux-dmabuf-v1.
size_t (*get_supported_modifiers)(
void* userdata,
uint32_t drm_format,
uint64_t* out,
size_t capacity);
// Hand off a rendered frame to the host as a dmabuf fd. The host
// imports it (e.g. into Qt's RHI as a QRhiTexture, or attaches to
// a wl_subsurface via linux-dmabuf-v1) and composites.
//
// `image_backed` is true when the dmabuf was exported from a
// VkImage allocated with VK_EXT_image_drm_format_modifier — i.e.
// it's directly importable as a 2D image by the compositor or any
// GPU-side consumer. false when it was exported from a VkBuffer
// (the legacy NVIDIA fallback path where the driver doesn't
// advertise COLOR_ATTACHMENT for the LINEAR modifier on
// exportable images, so libghostty renders into an OPTIMAL image
// and copies the bytes into a linear VkBuffer for export). In the
// !image_backed case the fd is only usable via mmap + CPU
// readback — attempting a linux-dmabuf-v1 import will trigger an
// `invalid_wl_buffer` protocol error.
//
// libghostty retains ownership of the underlying VkDeviceMemory;
// the host must dup() the fd if it needs to hold it past the call.
void (*present)(
void* userdata,
int dmabuf_fd,
uint32_t drm_format,
uint64_t drm_modifier,
uint32_t width,
uint32_t height,
uint32_t stride,
bool image_backed);
} ghostty_platform_vulkan_s;
typedef union {
ghostty_platform_macos_s macos;
ghostty_platform_ios_s ios;
ghostty_platform_opengl_s opengl;
ghostty_platform_vulkan_s vulkan;
} ghostty_platform_u;
typedef enum {

View File

@ -165,5 +165,20 @@ fn buildGlslang(
);
}
// Ghastty Vulkan-friendly compile shim. Wraps glslang's C++ API
// to expose features (auto-map bindings/locations, source/target
// environment translation) that the upstream C API doesn't, so
// the renderer can compile OpenGL-flavored GLSL including
// user-supplied custom shaders to Vulkan-targeted SPIR-V.
lib.addCSourceFiles(.{
.root = b.path("override"),
.flags = flags.items,
.files = &.{"ghastty_vk_shim.cpp"},
});
lib.installHeader(
b.path("override/ghastty_vk_shim.h"),
"ghastty_vk_shim.h",
);
return lib;
}

View File

@ -1,4 +1,10 @@
pub const c = @cImport({
@cInclude("glslang/Include/glslang_c_interface.h");
@cInclude("glslang/Public/resource_limits_c.h");
// Ghastty-specific extension to glslang's C ABI: a Vulkan-
// friendly compile entry point that wraps the C++ TShader API
// (setAutoMapBindings / setAutoMapLocations / setEnvInput) the
// upstream C interface doesn't expose. See
// `pkg/glslang/override/ghastty_vk_shim.h`.
@cInclude("ghastty_vk_shim.h");
});

View File

@ -4,6 +4,7 @@ const shader = @import("shader.zig");
pub const c = @import("c.zig").c;
pub const testing = @import("test.zig");
pub const vk = @import("vk.zig");
pub const init = initpkg.init;
pub const finalize = initpkg.finalize;

View File

@ -0,0 +1,282 @@
// See `ghastty_vk_shim.h` for the contract.
#include "ghastty_vk_shim.h"
#include <cstdlib>
#include <cstring>
#include <mutex>
#include <string>
#include <unordered_map>
#include <vector>
#include <glslang/Include/PoolAlloc.h>
#include <glslang/Public/ShaderLang.h>
#include <glslang/Public/ResourceLimits.h>
#include <SPIRV/GlslangToSpv.h>
// glslang's `InitializeProcess` / `FinalizeProcess` must bracket
// any use of `glslang::TShader` / `glslang::TProgram`. The existing
// C-API path in `pkg/glslang/init.zig` calls `glslang_initialize_process`
// at startup, and per the glslang headers the C and C++ inits share
// state, so we don't initialize again here — calling `InitializeProcess`
// twice without a matching `FinalizeProcess` leaks reference counts.
namespace {
std::string drain_logs(glslang::TShader* shader, glslang::TProgram* program) {
std::string s;
if (shader != nullptr) {
const char* info = shader->getInfoLog();
const char* debug = shader->getInfoDebugLog();
if (info != nullptr && info[0] != '\0') { s += info; s += "\n"; }
if (debug != nullptr && debug[0] != '\0') { s += debug; s += "\n"; }
}
if (program != nullptr) {
const char* info = program->getInfoLog();
const char* debug = program->getInfoDebugLog();
if (info != nullptr && info[0] != '\0') { s += info; s += "\n"; }
if (debug != nullptr && debug[0] != '\0') { s += debug; s += "\n"; }
}
return s;
}
char* dup_to_c(const std::string& s) {
char* p = static_cast<char*>(std::malloc(s.size() + 1));
if (p == nullptr) return nullptr;
std::memcpy(p, s.data(), s.size());
p[s.size()] = '\0';
return p;
}
// Process-wide SPIR-V cache keyed by (source, stage). The renderer
// builds one Vulkan.Shaders per surface (per tab/split), which calls
// `Module.init` → `compileToSpv` for all 9 built-in shaders + every
// user custom shader. Each compile pulls memory from glslang's
// thread-local TPoolAllocator, which is a raw pointer in glslang's
// TLS that is NEVER released when a renderer thread exits (Zig
// pthread spawn doesn't run C++ thread_local destructors and there
// is no FinalizeThread hook). With N tabs, the leaked pool pages
// add up to tens of MB — observed via heaptrack as the dominant
// leak source (~17 MB across 15k+ allocations from
// glslang::TPoolAllocator::allocate).
//
// Cache the resulting SPIR-V instead. The built-in shaders produce
// byte-identical SPV regardless of which surface compiles them; the
// custom shaders only change when the user edits their config. So
// after the first surface, every other surface's compile is a
// cache hit with zero glslang work and zero new pool pages.
//
// Key format: source bytes followed by a single byte stage tag
// (0=vertex, 1=fragment). Disambiguates the rare case where two
// stages share identical source text.
std::mutex& spv_cache_mutex() {
static std::mutex m;
return m;
}
std::unordered_map<std::string, std::vector<uint32_t>>& spv_cache() {
static std::unordered_map<std::string, std::vector<uint32_t>> c;
return c;
}
std::string make_cache_key(const char* source, ghastty_glslang_stage_t stage) {
std::string key(source);
key.push_back(static_cast<char>(stage));
return key;
}
} // namespace
extern "C" int ghastty_glslang_compile_vulkan(
const char* source,
ghastty_glslang_stage_t stage,
uint32_t** spv_out,
size_t* spv_len_out,
char** err_out) {
// Reject any null out-pointer up-front. The previous code
// dereferenced all three unconditionally on line 1 of the
// function body — the in-tree Zig caller (`pkg/glslang/vk.zig`)
// always passes valid pointers, but this is a C ABI export and
// a future consumer that omits any out-arg would crash here
// before any error message could be reported. Returning early
// surfaces the precondition cleanly.
if (spv_out == nullptr || spv_len_out == nullptr || err_out == nullptr) {
return 1;
}
*spv_out = nullptr;
*spv_len_out = 0;
*err_out = nullptr;
if (source == nullptr) {
*err_out = dup_to_c("source pointer is null");
return 1;
}
// Cache hit: copy SPV from the cache and return without ever
// touching glslang. See the cache rationale comment above the
// map for why this is critical for the multi-tab leak.
const std::string key = make_cache_key(source, stage);
{
std::lock_guard<std::mutex> lg(spv_cache_mutex());
auto it = spv_cache().find(key);
if (it != spv_cache().end()) {
const std::vector<uint32_t>& cached = it->second;
const size_t bytes = cached.size() * sizeof(uint32_t);
uint32_t* out = static_cast<uint32_t*>(std::malloc(bytes));
if (out == nullptr) {
*err_out = dup_to_c(
"malloc failed for cached SPIR-V copy");
return 1;
}
std::memcpy(out, cached.data(), bytes);
*spv_out = out;
*spv_len_out = cached.size();
return 0;
}
}
EShLanguage lang;
switch (stage) {
case GHASTTY_GLSLANG_STAGE_VERTEX: lang = EShLangVertex; break;
case GHASTTY_GLSLANG_STAGE_FRAGMENT: lang = EShLangFragment; break;
default:
*err_out = dup_to_c("unknown stage");
return 1;
}
glslang::TShader shader(lang);
const char* sources[1] = { source };
shader.setStrings(sources, 1);
// Source environment is OpenGL GLSL, target environment is Vulkan.
// The cross-environment setup is what lets glslang translate
// OpenGL-only builtins (`gl_VertexID`, `gl_InstanceID`, etc.) to
// their Vulkan equivalents (`gl_VertexIndex`, `gl_InstanceIndex`)
// during SPIR-V generation. Matches `glslangValidator -V` and
// Qt's `QShaderBaker`.
shader.setEnvInput(
glslang::EShSourceGlsl,
lang,
glslang::EShClientVulkan,
/*version*/ 100);
shader.setEnvClient(
glslang::EShClientVulkan,
glslang::EShTargetVulkan_1_3);
shader.setEnvTarget(
glslang::EShTargetSpv,
glslang::EShTargetSpv_1_6);
// Auto-map: assign descriptor bindings and shader I/O locations
// for any `layout`-less declarations. Required for OpenGL GLSL
// that doesn't bother with explicit locations (which Vulkan SPIR-V
// requires).
shader.setAutoMapBindings(true);
shader.setAutoMapLocations(true);
const TBuiltInResource* resources = GetDefaultResources();
const EShMessages messages = static_cast<EShMessages>(
EShMsgDefault | EShMsgSpvRules | EShMsgVulkanRules);
if (!shader.parse(resources, /*default_version*/ 450,
ECoreProfile, /*force_default*/ false,
/*forward_compatible*/ true, messages)) {
*err_out = dup_to_c(drain_logs(&shader, nullptr));
return 1;
}
glslang::TProgram program;
program.addShader(&shader);
if (!program.link(messages)) {
*err_out = dup_to_c(drain_logs(&shader, &program));
return 1;
}
// mapIO() is what actually applies the auto-bind / auto-map
// resolution to the SPIR-V output. Without it the
// `setAutoMap*(true)` calls above are no-ops.
if (!program.mapIO()) {
std::string s = "glslang TProgram::mapIO() failed:\n";
s += drain_logs(&shader, &program);
*err_out = dup_to_c(s);
return 1;
}
std::vector<unsigned int> spv;
glslang::GlslangToSpv(*program.getIntermediate(lang), spv);
if (spv.empty()) {
*err_out = dup_to_c(
"GlslangToSpv produced no SPIR-V output");
return 1;
}
const size_t bytes = spv.size() * sizeof(uint32_t);
uint32_t* out = static_cast<uint32_t*>(std::malloc(bytes));
if (out == nullptr) {
*err_out = dup_to_c("malloc failed for SPIR-V output buffer");
return 1;
}
std::memcpy(out, spv.data(), bytes);
*spv_out = out;
*spv_len_out = spv.size();
// Populate the cache with the freshly-compiled SPV. Stored by
// value (std::move into the map); the SPV vector is the same
// data we just memcpy'd to `out` so the caller's malloc'd copy
// and the cache entry are independent. Future calls with this
// (source, stage) skip glslang entirely.
{
std::lock_guard<std::mutex> lg(spv_cache_mutex());
spv_cache().emplace(key, std::move(spv));
}
return 0;
}
extern "C" void ghastty_glslang_free_spirv(uint32_t* spv) {
std::free(spv);
}
extern "C" void ghastty_glslang_free_error(char* err) {
std::free(err);
}
extern "C" void ghastty_glslang_finalize_process(void) {
// Drop the cached SPV blobs first. The map owns the std::vector
// pages it holds; clearing returns them to the heap. Done before
// FinalizeProcess so a malicious post-finalize compile attempt
// (which would re-enter glslang on a dead process state) trips
// glslang's own checks rather than handing out stale cache hits.
{
std::lock_guard<std::mutex> lg(spv_cache_mutex());
spv_cache().clear();
}
// Release glslang's process-wide shared state FIRST. This deletes
// SharedSymbolTables[v][s][p][src][stage] entries that hold
// pointers INTO the thread pool; we want their dtors to run
// while the pool memory is still live.
glslang::FinalizeProcess();
// Now destroy this thread's TPoolAllocator entirely. popAll()
// alone is insufficient — it returns pages to glslang's
// internal free list but never gives them back to the system
// allocator (verified empirically: heaptrack total leaked
// unchanged after popAll). The pool is `new`-allocated in
// glslang::InitializeThreadPoolAllocator, so `delete` calls
// ~TPoolAllocator which `free()`s every page.
//
// heaptrack pointed the ~12 MB glslang leak at
// TPoolAllocator::allocate calls rooted in
// shadertoy.spirvFromGlsl on the GUI thread (since
// ghostty_surface_new runs glslang synchronously from
// MainWindow::newTab) — that pool's pages persist until the
// GUI thread exits, but a Qt app's GUI thread only exits at
// process termination, after atexit. Manual delete here gives
// the pages back before exit.
//
// Safe at atexit because every renderer thread has joined
// via Vulkan.threadExit (their pools are independent
// threadlocals already cleaned up), the SPV cache was just
// cleared, and FinalizeProcess just ran.
glslang::TPoolAllocator* pool = &glslang::GetThreadPoolAllocator();
glslang::SetThreadPoolAllocator(nullptr);
delete pool;
}

View File

@ -0,0 +1,81 @@
// Vulkan-targeted GLSL compilation that exposes glslang's
// C++-only features (auto-map bindings/locations, source/target
// environment translation for `gl_VertexID` → `gl_VertexIndex`)
// through a C-compatible entry point.
//
// glslang's public C API (`glslang_c_interface.h`) doesn't expose
// `setAutoMapBindings` / `setAutoMapLocations` / `setEnvInput` —
// they only live on the C++ `glslang::TShader` class. The CLI
// (`glslangValidator -V --auto-map-locations --auto-map-bindings`)
// and Qt's `QShaderBaker` both call them internally; this shim is
// the equivalent for libghostty.
//
// Used by `src/renderer/vulkan/shaders.zig` for both the renderer's
// built-in shaders and user-supplied custom shaders. The same
// function covers both because user-shader compilation happens at
// runtime against `libghostty.so`, not as a build step.
#ifndef GHASTTY_VK_SHIM_H
#define GHASTTY_VK_SHIM_H
#include <stddef.h>
#include <stdint.h>
#ifdef __cplusplus
extern "C" {
#endif
typedef enum {
GHASTTY_GLSLANG_STAGE_VERTEX = 0,
GHASTTY_GLSLANG_STAGE_FRAGMENT = 1,
} ghastty_glslang_stage_t;
// Compile a null-terminated GLSL source to Vulkan-flavored SPIR-V.
//
// Preconditions: `spv_out`, `spv_len_out`, and `err_out` MUST all be
// non-null. The function rejects any null out-pointer with rc=1
// and no error string (since `err_out` is itself part of the
// contract). `source` may be null; that produces a normal failure
// with `*err_out` set.
//
// On success: returns 0. `*spv_out` points to a freshly allocated
// array of `*spv_len_out` 32-bit SPIR-V words. Caller frees it
// with `ghastty_glslang_free_spirv`. `*err_out` is NULL.
//
// On failure: returns non-zero. `*err_out` points to a freshly
// allocated null-terminated error message (or NULL on out-arg
// precondition violation OR on internal OOM). Caller frees it
// with `ghastty_glslang_free_error`. `*spv_out` is NULL,
// `*spv_len_out` is 0.
int ghastty_glslang_compile_vulkan(
const char* source,
ghastty_glslang_stage_t stage,
uint32_t** spv_out,
size_t* spv_len_out,
char** err_out);
void ghastty_glslang_free_spirv(uint32_t* spv);
void ghastty_glslang_free_error(char* err);
// Release the process-wide glslang state: the per-thread
// TPoolAllocator pages (the high-water-mark pool memory that
// otherwise leaks for the process lifetime because Zig pthreads
// don't run C++ thread_local destructors) AND the shim's
// SPV cache.
//
// Idempotent. Call ONCE from the host's shutdown path AFTER all
// renderer threads have joined — calling it while a renderer
// thread might still touch glslang::TShader / TProgram is
// undefined behavior per glslang's contract.
//
// libghostty's own renderer-thread teardown (Vulkan.threadExit)
// is what serializes this safely: by the time the host's main()
// returns from QApplication::exec(), every renderer thread has
// already run threadExit and is joined.
void ghastty_glslang_finalize_process(void);
#ifdef __cplusplus
}
#endif
#endif /* GHASTTY_VK_SHIM_H */

90
pkg/glslang/vk.zig Normal file
View File

@ -0,0 +1,90 @@
//! Typed Zig wrapper around the Ghastty Vulkan-friendly glslang
//! compile shim (`pkg/glslang/override/ghastty_vk_shim.h`). The shim
//! itself is a small C entry point that wraps glslang's C++-only
//! `setAutoMapBindings` / `setAutoMapLocations` / `setEnvInput` knobs
//! the upstream C ABI doesn't expose.
//!
//! Callers use this instead of poking `glslang.c.ghastty_*` directly:
//! the malloc/free dance for the shim's out-pointers is finicky
//! (separate free entry points for SPIR-V and error strings, both
//! optional, both have to be dropped on the right path) and was
//! previously open-coded across two near-identical 25-line blocks
//! in `src/renderer/vulkan/shaders.zig`. This module is the binding
//! layer; the renderer just calls `compileToSpv` and gets a Zig
//! `[]const u32` slice.
const std = @import("std");
const Allocator = std.mem.Allocator;
const c = @import("c.zig").c;
const log = std.log.scoped(.glslang);
pub const Stage = enum {
vertex,
fragment,
fn cValue(self: Stage) c.ghastty_glslang_stage_t {
return switch (self) {
.vertex => c.GHASTTY_GLSLANG_STAGE_VERTEX,
.fragment => c.GHASTTY_GLSLANG_STAGE_FRAGMENT,
};
}
};
pub const Error = error{
/// The compile-shim's underlying glslang C++ pipeline (TShader
/// preprocess / parse + TProgram link + GlslangToSpv) failed.
/// The shim's error message is logged via `std.log.err` before
/// this error is returned no allocation is propagated to the
/// caller.
GlslangFailed,
} || Allocator.Error;
/// Compile a null-terminated GLSL source string to a Vulkan-flavored
/// SPIR-V binary.
///
/// On success, returns a slice owned by `alloc`; the caller frees with
/// `alloc.free(spv)`. The shim hands back its own malloc'd buffer
/// which we copy into `alloc` so the caller's `defer alloc.free` works
/// without remembering a separate `ghastty_glslang_free_spirv` call.
///
/// On failure, the shim's error string is logged with `std.log.err`
/// and `error.GlslangFailed` is returned the C-side malloc'd error
/// buffer is freed before returning so callers don't have to.
pub fn compileToSpv(
alloc: Allocator,
source: [:0]const u8,
stage: Stage,
) Error![]const u32 {
var spv_ptr: [*c]u32 = undefined;
var spv_len: usize = 0;
var err_ptr: [*c]u8 = undefined;
const rc = c.ghastty_glslang_compile_vulkan(
source.ptr,
stage.cValue(),
&spv_ptr,
&spv_len,
&err_ptr,
);
if (rc != 0) {
if (err_ptr != null) {
log.err("ghastty_glslang_compile_vulkan: rc={} {s}", .{
rc,
std.mem.span(@as([*:0]const u8, @ptrCast(err_ptr))),
});
c.ghastty_glslang_free_error(err_ptr);
} else {
log.err("ghastty_glslang_compile_vulkan: rc={} (no error string)", .{rc});
}
return error.GlslangFailed;
}
defer c.ghastty_glslang_free_spirv(spv_ptr);
// Copy out of the shim's malloc into `alloc` so the caller's
// free path is symmetric with every other allocator-owned slice.
const owned = try alloc.alloc(u32, spv_len);
@memcpy(owned, spv_ptr[0..spv_len]);
return owned;
}

195
pkg/vulkan/CommandPool.zig Normal file
View File

@ -0,0 +1,195 @@
//! Wrapper for `VkCommandPool` with a one-shot command-buffer helper.
//!
//! Initially used by `vulkan/Texture.zig` for staging-buffer uploads:
//! allocate a transient command buffer, record an upload + layout
//! barriers, submit, wait for completion, free.
//!
//! Eventually the renderer will grow a separate per-frame command
//! pool for the main draw stream; this pool stays around for
//! infrequent operations like atlas uploads where blocking the
//! caller is fine. The choice keeps the API small and avoids the
//! complication of multi-frame fence tracking for resources that
//! will outlive the upload.
const Self = @This();
const std = @import("std");
const vk = @import("c.zig").c;
const Device = @import("Device.zig");
const log = std.log.scoped(.vulkan);
pub const Error = error{
/// A `vkCreateCommandPool` / `vkAllocateCommandBuffers` /
/// `vkBeginCommandBuffer` / `vkEndCommandBuffer` / `vkQueueSubmit`
/// returned a non-success status. Logged with the raw `VkResult`.
VulkanFailed,
};
device: *const Device,
pool: vk.VkCommandPool,
/// Create a command pool on the device's graphics queue family. The
/// pool is created with `TRANSIENT_BIT | RESET_COMMAND_BUFFER_BIT`
/// because every command buffer we allocate here is short-lived and
/// freed (or reset) immediately after submit.
pub fn init(device: *const Device) Error!Self {
const info: vk.VkCommandPoolCreateInfo = .{
.sType = vk.VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
.pNext = null,
.flags = vk.VK_COMMAND_POOL_CREATE_TRANSIENT_BIT |
vk.VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT,
.queueFamilyIndex = device.queue_family_index,
};
var pool: vk.VkCommandPool = undefined;
const r = device.dispatch.createCommandPool(device.device, &info, null, &pool);
if (r != vk.VK_SUCCESS) {
log.err("vkCreateCommandPool failed: result={}", .{r});
return error.VulkanFailed;
}
return .{ .device = device, .pool = pool };
}
pub fn deinit(self: *Self) void {
self.device.dispatch.destroyCommandPool(self.device.device, self.pool, null);
self.* = undefined;
}
/// A one-shot recording session. Yielded from `beginOneShot`, drives
/// `endAndSubmit` when the caller is done recording.
pub const OneShot = struct {
pool: *Self,
cb: vk.VkCommandBuffer,
/// Record any commands directly via `cb` and the device dispatch
/// table (e.g. `pool.device.dispatch.cmdPipelineBarrier(cb, )`).
/// Then call `endAndSubmit`. The command buffer is freed by the
/// time this returns.
pub fn endAndSubmit(self: OneShot) Error!void {
const dev = self.pool.device;
// ALWAYS free the command buffer, success or failure.
// Without this errdefer the early returns from end / submit /
// waitIdle would leak the buffer slot until the pool is
// destroyed and a caller that treats `error.VulkanFailed`
// as recoverable (retries the upload) would eventually
// exhaust the pool.
//
// Vulkan-correctness wrinkle: a buffer in PENDING state
// (post-submit, pre-wait) cannot legally be freed that's
// UB per the spec. `submitted_pending` tracks whether we've
// submitted; on the error path we then `deviceWaitIdle`
// before freeing to drag the buffer back to a safely-freeable
// state. The errdefer fires on error only; the success path
// hits the explicit free below.
var cb_local = self.cb;
var submitted_pending: bool = false;
errdefer {
if (submitted_pending) {
// Buffer may be in PENDING state. Drain to be safe
// before freeing. deviceWaitIdle here is acceptable
// we're already on an error path for an atlas
// upload, so blocking the device once on the way out
// is preferable to leaving the buffer leaked OR to
// freeing a PENDING buffer (UB).
_ = dev.dispatch.deviceWaitIdle(dev.device);
}
dev.dispatch.freeCommandBuffers(dev.device, self.pool.pool, 1, &cb_local);
}
{
const r = dev.dispatch.endCommandBuffer(self.cb);
if (r != vk.VK_SUCCESS) {
log.err("vkEndCommandBuffer failed: result={}", .{r});
return error.VulkanFailed;
}
}
const submit_info: vk.VkSubmitInfo = .{
.sType = vk.VK_STRUCTURE_TYPE_SUBMIT_INFO,
.pNext = null,
.waitSemaphoreCount = 0,
.pWaitSemaphores = null,
.pWaitDstStageMask = null,
.commandBufferCount = 1,
.pCommandBuffers = &self.cb,
.signalSemaphoreCount = 0,
.pSignalSemaphores = null,
};
{
// Externally-synchronized via `Device.queueSubmit`
// see the note there. Splits/tabs both submit here for
// atlas uploads, and the per-frame Frame.complete path
// also uses the same queue.
const r = dev.queueSubmit(1, &submit_info, null);
if (r != vk.VK_SUCCESS) {
log.err("vkQueueSubmit failed: result={}", .{r});
return error.VulkanFailed;
}
submitted_pending = true;
}
// Block until the submit completes. Acceptable for one-shot
// uploads (atlas resizes are rare and the caller is willing
// to stall). Per-frame command submission will use fences
// and never queueWaitIdle.
{
const r = dev.queueWaitIdle();
if (r != vk.VK_SUCCESS) {
log.err("vkQueueWaitIdle failed: result={}", .{r});
return error.VulkanFailed;
}
submitted_pending = false;
}
// Success path: free the buffer (the errdefer above only
// fires on the error path, so we still need this on success).
// The pool itself stays around so back-to-back uploads can
// reuse it without re-allocating VkCommandPool.
dev.dispatch.freeCommandBuffers(dev.device, self.pool.pool, 1, &cb_local);
}
};
/// Allocate + begin a transient command buffer for a one-shot
/// upload. Pair with `OneShot.endAndSubmit`.
pub fn beginOneShot(self: *Self) Error!OneShot {
const dev = self.device;
const alloc_info: vk.VkCommandBufferAllocateInfo = .{
.sType = vk.VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
.pNext = null,
.commandPool = self.pool,
.level = vk.VK_COMMAND_BUFFER_LEVEL_PRIMARY,
.commandBufferCount = 1,
};
var cb: vk.VkCommandBuffer = undefined;
{
const r = dev.dispatch.allocateCommandBuffers(dev.device, &alloc_info, &cb);
if (r != vk.VK_SUCCESS) {
log.err("vkAllocateCommandBuffers failed: result={}", .{r});
return error.VulkanFailed;
}
}
errdefer dev.dispatch.freeCommandBuffers(dev.device, self.pool, 1, &cb);
const begin_info: vk.VkCommandBufferBeginInfo = .{
.sType = vk.VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
.pNext = null,
.flags = vk.VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
.pInheritanceInfo = null,
};
{
const r = dev.dispatch.beginCommandBuffer(cb, &begin_info);
if (r != vk.VK_SUCCESS) {
log.err("vkBeginCommandBuffer failed: result={}", .{r});
return error.VulkanFailed;
}
}
return .{ .pool = self, .cb = cb };
}
test {
std.testing.refAllDecls(@This());
}

View File

@ -0,0 +1,168 @@
//! Wrapper for `VkDescriptorPool` with allocation + per-set helpers.
//!
//! Vulkan descriptor sets are the per-pipeline resource-binding
//! handles: a descriptor set holds references to uniform buffers,
//! sampled images, samplers, etc., that a particular shader stage
//! draws from. They're allocated from a pool, populated via
//! `vkUpdateDescriptorSets`, and bound at draw time with
//! `vkCmdBindDescriptorSets`.
//!
//! Lifetime model: this wrapper assumes the pool outlives all sets
//! allocated from it (caller arranges teardown order). Sets aren't
//! individually freed destroying the pool reclaims everything.
//! That matches the per-frame pool pattern the renderer will use
//! (reset the pool at frame start; reallocate the sets for that
//! frame).
//!
//! Caps are caller-provided. Pass realistic numbers over-pooling
//! is fine; under-pooling fails at allocation time.
const Self = @This();
const std = @import("std");
const vk = @import("c.zig").c;
const Device = @import("Device.zig");
const log = std.log.scoped(.vulkan);
pub const Error = error{
/// `vkCreateDescriptorPool` / `vkAllocateDescriptorSets` returned
/// a non-success status.
VulkanFailed,
/// Caller passed an invalid pool configuration (e.g. `max_sets ==
/// 0`, or every per-type cap is zero). Distinct from
/// `VulkanFailed` so callers can tell driver-side errors from
/// caller-side ones.
InvalidPoolConfig,
};
/// Construction caps. `max_sets` is the total number of descriptor
/// sets the pool can ever vend; the per-type counts are individual
/// resource counts pooled across all those sets.
pub const Options = struct {
device: *const Device,
max_sets: u32,
uniform_buffers: u32 = 0,
combined_image_samplers: u32 = 0,
storage_buffers: u32 = 0,
};
device: *const Device,
pool: vk.VkDescriptorPool,
pub fn init(opts: Options) Error!Self {
// Vulkan spec requires `maxSets > 0` and `poolSizeCount > 0`
// a pool that vends N sets but doesn't admit any descriptor
// type would be useless and is rejected by some drivers
// (loose drivers accept it and fail at allocation time). Catch
// both shapes here so the caller gets a clear error instead of
// a downstream allocation failure.
if (opts.max_sets == 0) {
log.err("DescriptorPool.init: max_sets must be > 0", .{});
return error.InvalidPoolConfig;
}
if (opts.uniform_buffers == 0 and
opts.combined_image_samplers == 0 and
opts.storage_buffers == 0)
{
log.err(
"DescriptorPool.init: at least one per-type cap must be > 0 " ++
"(uniform_buffers, combined_image_samplers, storage_buffers)",
.{},
);
return error.InvalidPoolConfig;
}
// Build a small VkDescriptorPoolSize array from whichever caps
// are non-zero. Vulkan accepts an array; we cap at 3 entries
// matching the three types `Options` exposes.
var sizes: [3]vk.VkDescriptorPoolSize = undefined;
var n: u32 = 0;
if (opts.uniform_buffers > 0) {
sizes[n] = .{
.type = vk.VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
.descriptorCount = opts.uniform_buffers,
};
n += 1;
}
if (opts.combined_image_samplers > 0) {
sizes[n] = .{
.type = vk.VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
.descriptorCount = opts.combined_image_samplers,
};
n += 1;
}
if (opts.storage_buffers > 0) {
sizes[n] = .{
.type = vk.VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
.descriptorCount = opts.storage_buffers,
};
n += 1;
}
const info: vk.VkDescriptorPoolCreateInfo = .{
.sType = vk.VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
.pNext = null,
// No FREE_DESCRIPTOR_SET_BIT we tear down by destroying
// the pool (or `vkResetDescriptorPool` for the per-frame
// step pool).
.flags = 0,
.maxSets = opts.max_sets,
.poolSizeCount = n,
.pPoolSizes = &sizes,
};
var pool: vk.VkDescriptorPool = undefined;
const r = opts.device.dispatch.createDescriptorPool(
opts.device.device,
&info,
null,
&pool,
);
if (r != vk.VK_SUCCESS) {
log.err("vkCreateDescriptorPool failed: result={}", .{r});
return error.VulkanFailed;
}
return .{ .device = opts.device, .pool = pool };
}
pub fn deinit(self: *Self) void {
self.device.dispatch.destroyDescriptorPool(
self.device.device,
self.pool,
null,
);
self.* = undefined;
}
/// Allocate a single descriptor set against the provided layout.
/// On success the set is uninitialized populate it with
/// `vkUpdateDescriptorSets` before binding.
pub fn allocate(
self: *Self,
layout: vk.VkDescriptorSetLayout,
) Error!vk.VkDescriptorSet {
var layouts = [_]vk.VkDescriptorSetLayout{layout};
const info: vk.VkDescriptorSetAllocateInfo = .{
.sType = vk.VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
.pNext = null,
.descriptorPool = self.pool,
.descriptorSetCount = 1,
.pSetLayouts = &layouts,
};
var set: vk.VkDescriptorSet = undefined;
const r = self.device.dispatch.allocateDescriptorSets(
self.device.device,
&info,
&set,
);
if (r != vk.VK_SUCCESS) {
log.err("vkAllocateDescriptorSets failed: result={}", .{r});
return error.VulkanFailed;
}
return set;
}
test {
std.testing.refAllDecls(@This());
}

679
pkg/vulkan/Device.zig Normal file
View File

@ -0,0 +1,679 @@
//! Host-provided Vulkan device wrapper.
//!
//! libghostty does NOT call `vkCreateInstance` / `vkCreateDevice` for
//! the Vulkan renderer: per `ghostty_platform_vulkan_s` in
//! `include/ghostty.h`, the host (the apprt embedding libghostty
//! e.g. the Qt frontend) owns the entire Vulkan setup. We consume
//! its handles via the platform callbacks, validate the version /
//! extensions we need, and build a function-pointer dispatch table
//! the rest of the renderer can use.
//!
//! Why host-owned? The host already has a Vulkan instance/device for
//! its own compositing (Qt's RHI). Asking the host to share its
//! device means rendered frames can be handed back as raw `VkImage`
//! handles or dmabuf fds without a CPU readback or a second Vulkan
//! instance fighting for the same GPU resources.
//!
//! Vulkan version: 1.3 (Jan 2022). Promotes dynamic rendering,
//! sync2, extended dynamic state all of which simplify a
//! dirty-rect-style terminal renderer. Driver coverage is fine on
//! every distro currently in support.
//!
//! Required device extensions (must be enabled on the host's
//! VkDevice; we verify each on init):
//! - VK_KHR_external_memory_fd
//! - VK_EXT_external_memory_dma_buf
//! - VK_EXT_image_drm_format_modifier
//!
//! These are what let libghostty export the rendered VkImage memory
//! as a dmabuf fd so the host can import it for zero-copy
//! presentation (path 3 in the qt-vulkan-renderer scoping log:
//! preserves Qt's QWidget composition model AND avoids the CPU
//! readback the OpenGL path currently does).
const std = @import("std");
const Allocator = std.mem.Allocator;
const vk = @import("c.zig").c;
const log = std.log.scoped(.vulkan);
const Device = @This();
/// Minimum Vulkan API version the renderer requires.
pub const MIN_API_VERSION = vk.VK_API_VERSION_1_3;
/// Device extensions libghostty enables on top of the host's
/// VkDevice setup. The host must have created its VkDevice with
/// these enabled; we only verify availability here.
///
/// `VK_EXT_image_drm_format_modifier` is what lets
/// `vulkan/Target.zig` probe the per-modifier feature set (in
/// particular: does `DRM_FORMAT_MOD_LINEAR` advertise
/// `COLOR_ATTACHMENT_BIT`?) and, when supported, allocate the render
/// image with `VkImageDrmFormatModifierExplicitCreateInfoEXT` so its
/// memory can be exported as a dmabuf directly no separate LINEAR
/// `VkBuffer` and no end-of-frame `vkCmdCopyImageToBuffer`. Drivers
/// where the modifier path can't satisfy the requested features fall
/// back to the legacy OPTIMAL-plus-copy path inside `Target`.
pub const REQUIRED_DEVICE_EXTENSIONS = [_][:0]const u8{
"VK_KHR_external_memory_fd",
"VK_EXT_external_memory_dma_buf",
"VK_EXT_image_drm_format_modifier",
};
/// Errors that can come out of `init`.
pub const Error = error{
/// The host returned a null handle for `instance` / `device` /
/// `queue` / `physical_device`, or `get_instance_proc_addr`
/// failed to resolve a core Vulkan function we need to bootstrap.
HostHandleMissing,
/// The host's VkPhysicalDevice doesn't report a Vulkan API version
/// >= MIN_API_VERSION. Detected via `vkGetPhysicalDeviceProperties`.
UnsupportedVulkanVersion,
/// At least one entry in `REQUIRED_DEVICE_EXTENSIONS` was not
/// listed in `vkEnumerateDeviceExtensionProperties` for the
/// host's VkPhysicalDevice.
MissingRequiredExtension,
};
/// The function-pointer dispatch table libghostty resolves against the
/// host's instance / device. We only enumerate the entry points the
/// renderer actually uses; extending the table is the supported way
/// for follow-up renderer code to call additional Vulkan functions.
pub const Dispatch = struct {
// ---- instance-level -----------------------------------------
getPhysicalDeviceProperties: std.meta.Child(vk.PFN_vkGetPhysicalDeviceProperties),
getPhysicalDeviceMemoryProperties: std.meta.Child(vk.PFN_vkGetPhysicalDeviceMemoryProperties),
getPhysicalDeviceFormatProperties: std.meta.Child(vk.PFN_vkGetPhysicalDeviceFormatProperties),
/// Used by `Target` to chain `VkDrmFormatModifierPropertiesListEXT`
/// and enumerate which DRM modifiers the device exposes for a
/// given format. Vulkan 1.1 promoted `vkGetPhysicalDeviceFormatProperties2`
/// from `VK_KHR_get_physical_device_properties2` into core, so we
/// resolve it under the non-suffixed name `MIN_API_VERSION` is
/// 1.3 (see line 45), well past the promotion.
getPhysicalDeviceFormatProperties2: std.meta.Child(vk.PFN_vkGetPhysicalDeviceFormatProperties2),
enumerateDeviceExtensionProperties: std.meta.Child(vk.PFN_vkEnumerateDeviceExtensionProperties),
getDeviceProcAddr: std.meta.Child(vk.PFN_vkGetDeviceProcAddr),
// ---- device-level (resolved via getDeviceProcAddr) ----------
// Intentionally narrow for now every additional renderer-side
// call adds a field here and a `loadDevice` lookup in `init`.
getDeviceQueue: std.meta.Child(vk.PFN_vkGetDeviceQueue),
deviceWaitIdle: std.meta.Child(vk.PFN_vkDeviceWaitIdle),
// Sampler used by `vulkan/Sampler.zig`.
createSampler: std.meta.Child(vk.PFN_vkCreateSampler),
destroySampler: std.meta.Child(vk.PFN_vkDestroySampler),
// Texture (image + memory + view) used by `vulkan/Texture.zig`.
createImage: std.meta.Child(vk.PFN_vkCreateImage),
destroyImage: std.meta.Child(vk.PFN_vkDestroyImage),
getImageMemoryRequirements: std.meta.Child(vk.PFN_vkGetImageMemoryRequirements),
allocateMemory: std.meta.Child(vk.PFN_vkAllocateMemory),
freeMemory: std.meta.Child(vk.PFN_vkFreeMemory),
bindImageMemory: std.meta.Child(vk.PFN_vkBindImageMemory),
createImageView: std.meta.Child(vk.PFN_vkCreateImageView),
destroyImageView: std.meta.Child(vk.PFN_vkDestroyImageView),
// Buffer (host-visible vertex / uniform / cell-data storage)
// used by `vulkan/buffer.zig`.
createBuffer: std.meta.Child(vk.PFN_vkCreateBuffer),
destroyBuffer: std.meta.Child(vk.PFN_vkDestroyBuffer),
getBufferMemoryRequirements: std.meta.Child(vk.PFN_vkGetBufferMemoryRequirements),
bindBufferMemory: std.meta.Child(vk.PFN_vkBindBufferMemory),
mapMemory: std.meta.Child(vk.PFN_vkMapMemory),
unmapMemory: std.meta.Child(vk.PFN_vkUnmapMemory),
// Command pool / buffer + queue submit + recording
// used by `vulkan/CommandPool.zig` and (later) per-frame command
// recording in `vulkan/Frame.zig`.
createCommandPool: std.meta.Child(vk.PFN_vkCreateCommandPool),
destroyCommandPool: std.meta.Child(vk.PFN_vkDestroyCommandPool),
allocateCommandBuffers: std.meta.Child(vk.PFN_vkAllocateCommandBuffers),
freeCommandBuffers: std.meta.Child(vk.PFN_vkFreeCommandBuffers),
beginCommandBuffer: std.meta.Child(vk.PFN_vkBeginCommandBuffer),
endCommandBuffer: std.meta.Child(vk.PFN_vkEndCommandBuffer),
queueSubmit: std.meta.Child(vk.PFN_vkQueueSubmit),
queueWaitIdle: std.meta.Child(vk.PFN_vkQueueWaitIdle),
cmdPipelineBarrier: std.meta.Child(vk.PFN_vkCmdPipelineBarrier),
cmdCopyBufferToImage: std.meta.Child(vk.PFN_vkCmdCopyBufferToImage),
cmdFillBuffer: std.meta.Child(vk.PFN_vkCmdFillBuffer),
cmdClearColorImage: std.meta.Child(vk.PFN_vkCmdClearColorImage),
cmdBindVertexBuffers: std.meta.Child(vk.PFN_vkCmdBindVertexBuffers),
// Shader modules used by `vulkan/shaders.zig`.
createShaderModule: std.meta.Child(vk.PFN_vkCreateShaderModule),
destroyShaderModule: std.meta.Child(vk.PFN_vkDestroyShaderModule),
// Graphics pipeline + descriptor set layout
// used by `vulkan/Pipeline.zig`.
createDescriptorSetLayout: std.meta.Child(vk.PFN_vkCreateDescriptorSetLayout),
destroyDescriptorSetLayout: std.meta.Child(vk.PFN_vkDestroyDescriptorSetLayout),
createPipelineLayout: std.meta.Child(vk.PFN_vkCreatePipelineLayout),
destroyPipelineLayout: std.meta.Child(vk.PFN_vkDestroyPipelineLayout),
createGraphicsPipelines: std.meta.Child(vk.PFN_vkCreateGraphicsPipelines),
destroyPipeline: std.meta.Child(vk.PFN_vkDestroyPipeline),
// External memory fd export used by `vulkan/Target.zig`.
// `vkGetMemoryFdKHR` is from `VK_KHR_external_memory_fd`; needs
// device-level resolution like any other device function.
getMemoryFdKHR: std.meta.Child(vk.PFN_vkGetMemoryFdKHR),
getImageSubresourceLayout: std.meta.Child(vk.PFN_vkGetImageSubresourceLayout),
/// From `VK_EXT_image_drm_format_modifier`. Used by
/// `vulkan/Target.zig` after creating an image with the LIST
/// variant of the modifier create-info to discover which
/// modifier the driver actually chose.
getImageDrmFormatModifierPropertiesEXT: std.meta.Child(vk.PFN_vkGetImageDrmFormatModifierPropertiesEXT),
// Per-frame sync (fence + command-buffer reset) used by
// `vulkan/Frame.zig`.
createFence: std.meta.Child(vk.PFN_vkCreateFence),
destroyFence: std.meta.Child(vk.PFN_vkDestroyFence),
waitForFences: std.meta.Child(vk.PFN_vkWaitForFences),
resetFences: std.meta.Child(vk.PFN_vkResetFences),
resetCommandBuffer: std.meta.Child(vk.PFN_vkResetCommandBuffer),
// Drawing used by `vulkan/RenderPass.zig` (and the smoke
// test's renderTriangle helper). Vulkan 1.3 promoted
// `vkCmdBeginRendering` / `vkCmdEndRendering` from the
// `VK_KHR_dynamic_rendering` extension into core, so they're
// available without an extension opt-in.
cmdBeginRendering: std.meta.Child(vk.PFN_vkCmdBeginRendering),
cmdEndRendering: std.meta.Child(vk.PFN_vkCmdEndRendering),
cmdBindPipeline: std.meta.Child(vk.PFN_vkCmdBindPipeline),
cmdSetViewport: std.meta.Child(vk.PFN_vkCmdSetViewport),
cmdSetScissor: std.meta.Child(vk.PFN_vkCmdSetScissor),
cmdDraw: std.meta.Child(vk.PFN_vkCmdDraw),
cmdCopyImageToBuffer: std.meta.Child(vk.PFN_vkCmdCopyImageToBuffer),
// Descriptor sets used by `vulkan/DescriptorPool.zig`. Per-
// surface lifetime today; per-frame pooling will follow when
// the actual renderer integration lands.
createDescriptorPool: std.meta.Child(vk.PFN_vkCreateDescriptorPool),
destroyDescriptorPool: std.meta.Child(vk.PFN_vkDestroyDescriptorPool),
resetDescriptorPool: std.meta.Child(vk.PFN_vkResetDescriptorPool),
allocateDescriptorSets: std.meta.Child(vk.PFN_vkAllocateDescriptorSets),
updateDescriptorSets: std.meta.Child(vk.PFN_vkUpdateDescriptorSets),
cmdBindDescriptorSets: std.meta.Child(vk.PFN_vkCmdBindDescriptorSets),
};
// ---- fields ---------------------------------------------------------
instance: vk.VkInstance,
physical_device: vk.VkPhysicalDevice,
device: vk.VkDevice,
queue: vk.VkQueue,
queue_family_index: u32,
/// The Vulkan API version the host's physical device reports. Always
/// >= `MIN_API_VERSION` (if it were lower, `init` returns
/// `error.UnsupportedVulkanVersion`).
api_version: u32,
/// Cached `VkPhysicalDeviceMemoryProperties`. The properties are
/// immutable for the physical device's lifetime, so we query once
/// at `init` time instead of on every `findMemoryType` call (which
/// happens for every Buffer/Texture/Target allocation).
memory_properties: vk.VkPhysicalDeviceMemoryProperties,
dispatch: Dispatch,
/// Process-wide mutex protecting access to `queue`. Vulkan requires
/// external synchronization of `VkQueue` `vkQueueSubmit` and
/// `vkQueueWaitIdle` from multiple threads must not overlap. Splits
/// and tabs share the host's single queue (one VkQueue per process),
/// so the mutex serializes submissions across all renderer threads.
/// Use via `Device.queueSubmit` / `Device.queueWaitIdle`.
var queue_mutex: std.Thread.Mutex = .{};
/// Externally-synchronized `vkQueueSubmit`. ALL submissions to the
/// host queue (Frame, atlas upload, etc.) MUST go through this so
/// concurrent renderer threads from splits/tabs don't race the
/// driver into a hang.
pub fn queueSubmit(
self: *const Device,
submit_count: u32,
submits: [*c]const vk.VkSubmitInfo,
fence: vk.VkFence,
) vk.VkResult {
queue_mutex.lock();
defer queue_mutex.unlock();
return self.dispatch.queueSubmit(self.queue, submit_count, submits, fence);
}
/// Externally-synchronized `vkQueueWaitIdle`. Same reasoning as
/// `queueSubmit`.
pub fn queueWaitIdle(self: *const Device) vk.VkResult {
queue_mutex.lock();
defer queue_mutex.unlock();
return self.dispatch.queueWaitIdle(self.queue);
}
// ---- API ------------------------------------------------------------
/// Pre-resolved host-Vulkan handles passed into `Device.init`. Keeps
/// `pkg/vulkan` independent of any apprt type callers (e.g.
/// libghostty's `src/renderer/Vulkan.zig`) translate their own
/// platform-callback struct into this neutral shape.
pub const HostBootstrap = struct {
instance: vk.VkInstance,
physical_device: vk.VkPhysicalDevice,
device: vk.VkDevice,
queue: vk.VkQueue,
queue_family_index: u32,
/// Root proc-addr resolver. `Device.init` uses this to pull
/// `vkGetInstanceProcAddr` itself plus every instance-level
/// function it needs to bootstrap the dispatch table.
get_instance_proc_addr_raw: *const anyopaque,
};
/// Build a `Device` from pre-resolved host handles. Performs:
/// 1. Load the instance-level dispatch via `vkGetInstanceProcAddr`.
/// 2. Verify `physicalDeviceProperties.apiVersion >= 1.3`.
/// 3. Verify every entry in `REQUIRED_DEVICE_EXTENSIONS` is present
/// on the physical device.
/// 4. Load the device-level dispatch via `vkGetDeviceProcAddr`.
///
/// On success the returned `Device` is ready for the renderer to
/// build pipelines / images / command buffers against. The host
/// retains ownership of `instance` / `device` / `queue` `deinit`
/// is a no-op stub for symmetry.
pub fn init(
alloc: Allocator,
boot: HostBootstrap,
) (Error || Allocator.Error)!Device {
const instance = boot.instance;
const physical_device = boot.physical_device;
const device = boot.device;
const queue = boot.queue;
const queue_family_index = boot.queue_family_index;
// ---- instance-level dispatch ---------------------------------
// The caller-provided get_instance_proc_addr is our root entry
// point. We resolve other functions via vkGetInstanceProcAddr
// (instance, name); per the Vulkan spec, passing a non-null
// instance is valid for any function that takes an instance,
// physical device, device, or child object of any of these i.e.
// everything we care about.
const get_instance_proc_addr: std.meta.Child(vk.PFN_vkGetInstanceProcAddr) =
@ptrCast(@alignCast(boot.get_instance_proc_addr_raw));
const InstanceLoader = struct {
instance: vk.VkInstance,
get_instance_proc_addr: std.meta.Child(vk.PFN_vkGetInstanceProcAddr),
fn load(self: @This(), comptime T: type, name: [*:0]const u8) Error!std.meta.Child(T) {
const fp = self.get_instance_proc_addr(self.instance, name) orelse {
log.err("vkGetInstanceProcAddr returned null for {s}", .{name});
return error.HostHandleMissing;
};
return @ptrCast(fp);
}
};
const il: InstanceLoader = .{
.instance = instance,
.get_instance_proc_addr = get_instance_proc_addr,
};
const get_physical_device_properties =
try il.load(vk.PFN_vkGetPhysicalDeviceProperties, "vkGetPhysicalDeviceProperties");
const get_physical_device_memory_properties =
try il.load(vk.PFN_vkGetPhysicalDeviceMemoryProperties, "vkGetPhysicalDeviceMemoryProperties");
const get_physical_device_format_properties =
try il.load(vk.PFN_vkGetPhysicalDeviceFormatProperties, "vkGetPhysicalDeviceFormatProperties");
const get_physical_device_format_properties_2 =
try il.load(vk.PFN_vkGetPhysicalDeviceFormatProperties2, "vkGetPhysicalDeviceFormatProperties2");
const enumerate_device_extension_properties =
try il.load(vk.PFN_vkEnumerateDeviceExtensionProperties, "vkEnumerateDeviceExtensionProperties");
const get_device_proc_addr =
try il.load(vk.PFN_vkGetDeviceProcAddr, "vkGetDeviceProcAddr");
// ---- version check ------------------------------------------
var props: vk.VkPhysicalDeviceProperties = std.mem.zeroes(vk.VkPhysicalDeviceProperties);
get_physical_device_properties(physical_device, &props);
if (props.apiVersion < MIN_API_VERSION) {
log.err(
"host VkPhysicalDevice reports Vulkan {}.{}.{}, need >= {}.{}.{}",
.{
vk.VK_API_VERSION_MAJOR(props.apiVersion),
vk.VK_API_VERSION_MINOR(props.apiVersion),
vk.VK_API_VERSION_PATCH(props.apiVersion),
vk.VK_API_VERSION_MAJOR(MIN_API_VERSION),
vk.VK_API_VERSION_MINOR(MIN_API_VERSION),
vk.VK_API_VERSION_PATCH(MIN_API_VERSION),
},
);
return error.UnsupportedVulkanVersion;
}
// ---- extension check ----------------------------------------
var ext_count: u32 = 0;
{
const r = enumerate_device_extension_properties(physical_device, null, &ext_count, null);
// SUCCESS or INCOMPLETE both populate `ext_count`. INCOMPLETE
// shouldn't happen on the count-only call (no buffer to
// truncate) but we accept it defensively.
if (r != vk.VK_SUCCESS and r != vk.VK_INCOMPLETE) {
log.err("vkEnumerateDeviceExtensionProperties (count) failed: result={}", .{r});
return error.HostHandleMissing;
}
}
const exts = try alloc.alloc(vk.VkExtensionProperties, ext_count);
defer alloc.free(exts);
{
const r = enumerate_device_extension_properties(physical_device, null, &ext_count, exts.ptr);
if (r != vk.VK_SUCCESS and r != vk.VK_INCOMPLETE) {
log.err("vkEnumerateDeviceExtensionProperties (fill) failed: result={}", .{r});
return error.HostHandleMissing;
}
// VK_INCOMPLETE here means the extension list grew between
// the count and fill calls (race with a driver hot-reload
// very unlikely in practice but spec-permitted). The
// partially-filled buffer is still authoritative for the
// entries it does contain, but a required extension not yet
// populated would be missed. Treat as a hard fail since the
// extension presence check below would silently pass on a
// truncated list.
if (r == vk.VK_INCOMPLETE) {
log.err(
"vkEnumerateDeviceExtensionProperties returned INCOMPLETE; " ++
"device extension list changed between count and fill",
.{},
);
return error.HostHandleMissing;
}
}
inline for (REQUIRED_DEVICE_EXTENSIONS) |required| {
var found = false;
for (exts) |ext| {
const name_cstr: [*:0]const u8 = @ptrCast(&ext.extensionName);
if (std.mem.eql(u8, std.mem.span(name_cstr), required)) {
found = true;
break;
}
}
if (!found) {
log.err("required Vulkan device extension missing: {s}", .{required});
return error.MissingRequiredExtension;
}
}
// ---- device-level dispatch ----------------------------------
const DeviceLoader = struct {
device: vk.VkDevice,
get_device_proc_addr: std.meta.Child(vk.PFN_vkGetDeviceProcAddr),
fn load(self: @This(), comptime T: type, name: [*:0]const u8) Error!std.meta.Child(T) {
const fp = self.get_device_proc_addr(self.device, name) orelse {
log.err("vkGetDeviceProcAddr returned null for {s}", .{name});
return error.HostHandleMissing;
};
return @ptrCast(fp);
}
};
const dl: DeviceLoader = .{
.device = device,
.get_device_proc_addr = get_device_proc_addr,
};
const get_device_queue =
try dl.load(vk.PFN_vkGetDeviceQueue, "vkGetDeviceQueue");
const device_wait_idle =
try dl.load(vk.PFN_vkDeviceWaitIdle, "vkDeviceWaitIdle");
const create_sampler =
try dl.load(vk.PFN_vkCreateSampler, "vkCreateSampler");
const destroy_sampler =
try dl.load(vk.PFN_vkDestroySampler, "vkDestroySampler");
const create_image =
try dl.load(vk.PFN_vkCreateImage, "vkCreateImage");
const destroy_image =
try dl.load(vk.PFN_vkDestroyImage, "vkDestroyImage");
const get_image_memory_requirements =
try dl.load(vk.PFN_vkGetImageMemoryRequirements, "vkGetImageMemoryRequirements");
const allocate_memory =
try dl.load(vk.PFN_vkAllocateMemory, "vkAllocateMemory");
const free_memory =
try dl.load(vk.PFN_vkFreeMemory, "vkFreeMemory");
const bind_image_memory =
try dl.load(vk.PFN_vkBindImageMemory, "vkBindImageMemory");
const create_image_view =
try dl.load(vk.PFN_vkCreateImageView, "vkCreateImageView");
const destroy_image_view =
try dl.load(vk.PFN_vkDestroyImageView, "vkDestroyImageView");
const create_buffer =
try dl.load(vk.PFN_vkCreateBuffer, "vkCreateBuffer");
const destroy_buffer =
try dl.load(vk.PFN_vkDestroyBuffer, "vkDestroyBuffer");
const get_buffer_memory_requirements =
try dl.load(vk.PFN_vkGetBufferMemoryRequirements, "vkGetBufferMemoryRequirements");
const bind_buffer_memory =
try dl.load(vk.PFN_vkBindBufferMemory, "vkBindBufferMemory");
const map_memory =
try dl.load(vk.PFN_vkMapMemory, "vkMapMemory");
const unmap_memory =
try dl.load(vk.PFN_vkUnmapMemory, "vkUnmapMemory");
const create_command_pool =
try dl.load(vk.PFN_vkCreateCommandPool, "vkCreateCommandPool");
const destroy_command_pool =
try dl.load(vk.PFN_vkDestroyCommandPool, "vkDestroyCommandPool");
const allocate_command_buffers =
try dl.load(vk.PFN_vkAllocateCommandBuffers, "vkAllocateCommandBuffers");
const free_command_buffers =
try dl.load(vk.PFN_vkFreeCommandBuffers, "vkFreeCommandBuffers");
const begin_command_buffer =
try dl.load(vk.PFN_vkBeginCommandBuffer, "vkBeginCommandBuffer");
const end_command_buffer =
try dl.load(vk.PFN_vkEndCommandBuffer, "vkEndCommandBuffer");
const queue_submit =
try dl.load(vk.PFN_vkQueueSubmit, "vkQueueSubmit");
const queue_wait_idle =
try dl.load(vk.PFN_vkQueueWaitIdle, "vkQueueWaitIdle");
const cmd_pipeline_barrier =
try dl.load(vk.PFN_vkCmdPipelineBarrier, "vkCmdPipelineBarrier");
const cmd_copy_buffer_to_image =
try dl.load(vk.PFN_vkCmdCopyBufferToImage, "vkCmdCopyBufferToImage");
const cmd_fill_buffer =
try dl.load(vk.PFN_vkCmdFillBuffer, "vkCmdFillBuffer");
const cmd_clear_color_image =
try dl.load(vk.PFN_vkCmdClearColorImage, "vkCmdClearColorImage");
const cmd_bind_vertex_buffers =
try dl.load(vk.PFN_vkCmdBindVertexBuffers, "vkCmdBindVertexBuffers");
const create_shader_module =
try dl.load(vk.PFN_vkCreateShaderModule, "vkCreateShaderModule");
const destroy_shader_module =
try dl.load(vk.PFN_vkDestroyShaderModule, "vkDestroyShaderModule");
const create_descriptor_set_layout =
try dl.load(vk.PFN_vkCreateDescriptorSetLayout, "vkCreateDescriptorSetLayout");
const destroy_descriptor_set_layout =
try dl.load(vk.PFN_vkDestroyDescriptorSetLayout, "vkDestroyDescriptorSetLayout");
const create_pipeline_layout =
try dl.load(vk.PFN_vkCreatePipelineLayout, "vkCreatePipelineLayout");
const destroy_pipeline_layout =
try dl.load(vk.PFN_vkDestroyPipelineLayout, "vkDestroyPipelineLayout");
const create_graphics_pipelines =
try dl.load(vk.PFN_vkCreateGraphicsPipelines, "vkCreateGraphicsPipelines");
const destroy_pipeline =
try dl.load(vk.PFN_vkDestroyPipeline, "vkDestroyPipeline");
const get_memory_fd_khr =
try dl.load(vk.PFN_vkGetMemoryFdKHR, "vkGetMemoryFdKHR");
const get_image_subresource_layout =
try dl.load(vk.PFN_vkGetImageSubresourceLayout, "vkGetImageSubresourceLayout");
const get_image_drm_format_modifier_properties_ext =
try dl.load(vk.PFN_vkGetImageDrmFormatModifierPropertiesEXT, "vkGetImageDrmFormatModifierPropertiesEXT");
const create_fence =
try dl.load(vk.PFN_vkCreateFence, "vkCreateFence");
const destroy_fence =
try dl.load(vk.PFN_vkDestroyFence, "vkDestroyFence");
const wait_for_fences =
try dl.load(vk.PFN_vkWaitForFences, "vkWaitForFences");
const reset_fences =
try dl.load(vk.PFN_vkResetFences, "vkResetFences");
const reset_command_buffer =
try dl.load(vk.PFN_vkResetCommandBuffer, "vkResetCommandBuffer");
const cmd_begin_rendering =
try dl.load(vk.PFN_vkCmdBeginRendering, "vkCmdBeginRendering");
const cmd_end_rendering =
try dl.load(vk.PFN_vkCmdEndRendering, "vkCmdEndRendering");
const cmd_bind_pipeline =
try dl.load(vk.PFN_vkCmdBindPipeline, "vkCmdBindPipeline");
const cmd_set_viewport =
try dl.load(vk.PFN_vkCmdSetViewport, "vkCmdSetViewport");
const cmd_set_scissor =
try dl.load(vk.PFN_vkCmdSetScissor, "vkCmdSetScissor");
const cmd_draw =
try dl.load(vk.PFN_vkCmdDraw, "vkCmdDraw");
const cmd_copy_image_to_buffer =
try dl.load(vk.PFN_vkCmdCopyImageToBuffer, "vkCmdCopyImageToBuffer");
const create_descriptor_pool =
try dl.load(vk.PFN_vkCreateDescriptorPool, "vkCreateDescriptorPool");
const destroy_descriptor_pool =
try dl.load(vk.PFN_vkDestroyDescriptorPool, "vkDestroyDescriptorPool");
const reset_descriptor_pool =
try dl.load(vk.PFN_vkResetDescriptorPool, "vkResetDescriptorPool");
const allocate_descriptor_sets =
try dl.load(vk.PFN_vkAllocateDescriptorSets, "vkAllocateDescriptorSets");
const update_descriptor_sets =
try dl.load(vk.PFN_vkUpdateDescriptorSets, "vkUpdateDescriptorSets");
const cmd_bind_descriptor_sets =
try dl.load(vk.PFN_vkCmdBindDescriptorSets, "vkCmdBindDescriptorSets");
// Snapshot the memory properties once. They never change for
// the device's lifetime, so per-allocation re-queries (which
// findMemoryType used to do) were pure waste.
var memory_properties: vk.VkPhysicalDeviceMemoryProperties = undefined;
get_physical_device_memory_properties(physical_device, &memory_properties);
return .{
.instance = instance,
.physical_device = physical_device,
.device = device,
.queue = queue,
.queue_family_index = queue_family_index,
.api_version = props.apiVersion,
.memory_properties = memory_properties,
.dispatch = .{
.getPhysicalDeviceProperties = get_physical_device_properties,
.getPhysicalDeviceMemoryProperties = get_physical_device_memory_properties,
.getPhysicalDeviceFormatProperties = get_physical_device_format_properties,
.getPhysicalDeviceFormatProperties2 = get_physical_device_format_properties_2,
.enumerateDeviceExtensionProperties = enumerate_device_extension_properties,
.getDeviceProcAddr = get_device_proc_addr,
.getDeviceQueue = get_device_queue,
.deviceWaitIdle = device_wait_idle,
.createSampler = create_sampler,
.destroySampler = destroy_sampler,
.createImage = create_image,
.destroyImage = destroy_image,
.getImageMemoryRequirements = get_image_memory_requirements,
.allocateMemory = allocate_memory,
.freeMemory = free_memory,
.bindImageMemory = bind_image_memory,
.createImageView = create_image_view,
.destroyImageView = destroy_image_view,
.createBuffer = create_buffer,
.destroyBuffer = destroy_buffer,
.getBufferMemoryRequirements = get_buffer_memory_requirements,
.bindBufferMemory = bind_buffer_memory,
.mapMemory = map_memory,
.unmapMemory = unmap_memory,
.createCommandPool = create_command_pool,
.destroyCommandPool = destroy_command_pool,
.allocateCommandBuffers = allocate_command_buffers,
.freeCommandBuffers = free_command_buffers,
.beginCommandBuffer = begin_command_buffer,
.endCommandBuffer = end_command_buffer,
.queueSubmit = queue_submit,
.queueWaitIdle = queue_wait_idle,
.cmdPipelineBarrier = cmd_pipeline_barrier,
.cmdCopyBufferToImage = cmd_copy_buffer_to_image,
.cmdFillBuffer = cmd_fill_buffer,
.cmdClearColorImage = cmd_clear_color_image,
.cmdBindVertexBuffers = cmd_bind_vertex_buffers,
.createShaderModule = create_shader_module,
.destroyShaderModule = destroy_shader_module,
.createDescriptorSetLayout = create_descriptor_set_layout,
.destroyDescriptorSetLayout = destroy_descriptor_set_layout,
.createPipelineLayout = create_pipeline_layout,
.destroyPipelineLayout = destroy_pipeline_layout,
.createGraphicsPipelines = create_graphics_pipelines,
.destroyPipeline = destroy_pipeline,
.getMemoryFdKHR = get_memory_fd_khr,
.getImageSubresourceLayout = get_image_subresource_layout,
.getImageDrmFormatModifierPropertiesEXT = get_image_drm_format_modifier_properties_ext,
.createFence = create_fence,
.destroyFence = destroy_fence,
.waitForFences = wait_for_fences,
.resetFences = reset_fences,
.resetCommandBuffer = reset_command_buffer,
.cmdBeginRendering = cmd_begin_rendering,
.cmdEndRendering = cmd_end_rendering,
.cmdBindPipeline = cmd_bind_pipeline,
.cmdSetViewport = cmd_set_viewport,
.cmdSetScissor = cmd_set_scissor,
.cmdDraw = cmd_draw,
.cmdCopyImageToBuffer = cmd_copy_image_to_buffer,
.createDescriptorPool = create_descriptor_pool,
.destroyDescriptorPool = destroy_descriptor_pool,
.resetDescriptorPool = reset_descriptor_pool,
.allocateDescriptorSets = allocate_descriptor_sets,
.updateDescriptorSets = update_descriptor_sets,
.cmdBindDescriptorSets = cmd_bind_descriptor_sets,
},
};
}
/// Symmetry-only: every handle is host-owned. Provided so callers
/// can `defer device.deinit()` without special-casing.
pub fn deinit(self: *Device) void {
self.* = undefined;
}
/// Block until the device is idle. Useful before tearing down
/// renderer resources to make sure no command buffers are in
/// flight. On `VK_ERROR_DEVICE_LOST` (or any other failure) we
/// log the result so callers proceeding to destroy resources on
/// a dead device leave a diagnostic crumb instead of silently
/// crashing on the subsequent vkDestroy*.
pub fn waitIdle(self: *const Device) void {
const r = self.dispatch.deviceWaitIdle(self.device);
if (r != vk.VK_SUCCESS) {
log.warn("vkDeviceWaitIdle returned {}; teardown proceeding anyway", .{r});
}
}
/// Find a `VkMemoryType` index satisfying the requirements from a
/// `VkMemoryRequirements.memoryTypeBits` bitmask AND with all of
/// `required_props` set. Returns null if nothing matches.
///
/// Used by `vulkan/Texture.zig` (and later `vulkan/Buffer.zig`) to
/// pick an appropriate heap for a freshly created image/buffer.
pub fn findMemoryType(
self: *const Device,
type_bits: u32,
required_props: vk.VkMemoryPropertyFlags,
) ?u32 {
const props = &self.memory_properties;
var i: u32 = 0;
while (i < props.memoryTypeCount) : (i += 1) {
const bit: u32 = @as(u32, 1) << @intCast(i);
if (type_bits & bit == 0) continue;
if (props.memoryTypes[i].propertyFlags & required_props == required_props) {
return i;
}
}
return null;
}
test {
// Force type-checking of every decl in this file so the renderer
// bring-up catches signature mismatches against the Vulkan
// binding before the apprt-side wiring lands. The actual init
// path requires a real host-provided Vulkan device and is
// exercised end-to-end once the Qt frontend wires up
// `ghostty_platform_vulkan_s`.
std.testing.refAllDecls(@This());
}

163
pkg/vulkan/Sampler.zig Normal file
View File

@ -0,0 +1,163 @@
//! Wrapper for `VkSampler` the immutable filter / wrap configuration
//! the GPU uses when sampling a texture.
//!
//! libghostty doesn't share samplers across textures (the OpenGL
//! backend already creates one per texture-shaped need); we keep the
//! same per-callsite ownership model so the renderer interface
//! matches.
//!
//! Counterpart: `src/renderer/opengl/Sampler.zig`.
const Self = @This();
const std = @import("std");
const vk = @import("c.zig").c;
const Device = @import("Device.zig");
const log = std.log.scoped(.vulkan);
/// Texel filter mode. Maps 1:1 to `VkFilter` (which is a `c_uint`).
pub const Filter = enum(c_uint) {
nearest = vk.VK_FILTER_NEAREST,
linear = vk.VK_FILTER_LINEAR,
};
/// Texture coordinate wrap mode. Maps 1:1 to `VkSamplerAddressMode`
/// (a `c_uint`).
pub const AddressMode = enum(c_uint) {
repeat = vk.VK_SAMPLER_ADDRESS_MODE_REPEAT,
mirrored_repeat = vk.VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT,
clamp_to_edge = vk.VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
clamp_to_border = vk.VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER,
};
/// Sampler construction parameters. The same shape as the OpenGL
/// backend's `Sampler.Options` (so generic.zig can call
/// `Sampler.init(api.samplerOptions())` against either backend), with
/// a `device` reference so we can call `vkCreateSampler` against the
/// host's VkDevice without threading a global through.
pub const Options = struct {
device: *const Device,
min_filter: Filter,
mag_filter: Filter,
wrap_s: AddressMode,
wrap_t: AddressMode,
/// Vulkan-only: enable sampling with non-normalized texel
/// coordinates (so `texture(s, p)` reads texel `p` directly
/// rather than mapping `[0,1] x [0,1]` over the image).
///
/// This is what makes `sampler2D` behave like the OpenGL
/// `sampler2DRect` the renderer's text shaders were originally
/// authored against (after `vulkanizeGlsl` rewrites the type
/// name). Vulkan imposes a long list of co-requirements when
/// this is enabled `init` forces them rather than rejecting
/// inputs that violate them:
///
/// - `magFilter == minFilter` (we use `mag_filter`)
/// - `mipmapMode = NEAREST`
/// - `addressModeU/V` must be CLAMP_TO_EDGE / CLAMP_TO_BORDER
/// (we force CLAMP_TO_EDGE, ignoring `wrap_s/wrap_t`)
/// - `anisotropyEnable = FALSE`
/// - `compareEnable = FALSE`
/// - `minLod == maxLod == 0`
///
/// The bound image view must also be 1D or 2D with one mip
/// level and one array layer true for the glyph atlas.
unnormalized_coordinates: bool = false,
};
pub const Error = error{
/// `vkCreateSampler` returned a non-success status. Logged with
/// the raw `VkResult` value.
VulkanFailed,
};
sampler: vk.VkSampler,
device: *const Device,
/// Create a sampler against the host's VkDevice. The sampler is
/// destroyed in `deinit`; libghostty owns this handle's lifetime.
pub fn init(opts: Options) Error!Self {
const unnorm = opts.unnormalized_coordinates;
const info: vk.VkSamplerCreateInfo = .{
.sType = vk.VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO,
.pNext = null,
.flags = 0,
// When unnormalized, magFilter must equal minFilter (the
// sampling stage doesn't get to pick between them). Force
// both to `mag_filter` rather than rejecting at the caller.
.magFilter = @intFromEnum(opts.mag_filter),
.minFilter = if (unnorm)
@intFromEnum(opts.mag_filter)
else
@intFromEnum(opts.min_filter),
// The glyph atlases are 2D textures without mips; the
// renderer doesn't request mipmaps and the value here is
// ignored when `lodMin == lodMax == 0`. Unnormalized
// sampling requires NEAREST; we use LINEAR otherwise for
// forward-compatibility if we ever generate atlas mips.
.mipmapMode = if (unnorm)
vk.VK_SAMPLER_MIPMAP_MODE_NEAREST
else
vk.VK_SAMPLER_MIPMAP_MODE_LINEAR,
// Unnormalized requires CLAMP_TO_EDGE or CLAMP_TO_BORDER;
// we don't have a use for the latter, so force CLAMP_TO_EDGE.
.addressModeU = if (unnorm)
vk.VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE
else
@intFromEnum(opts.wrap_s),
.addressModeV = if (unnorm)
vk.VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE
else
@intFromEnum(opts.wrap_t),
// 2D textures never sample in W; the renderer ignores it. The
// value still has to be valid pick CLAMP_TO_EDGE.
.addressModeW = vk.VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
.mipLodBias = 0,
// Anisotropy is a per-physical-device feature toggle; the
// terminal grid doesn't benefit from it and gating on the
// feature flag adds host coordination noise. Skip. (Also a
// hard requirement for unnormalized sampling.)
.anisotropyEnable = vk.VK_FALSE,
.maxAnisotropy = 1,
.compareEnable = vk.VK_FALSE,
.compareOp = vk.VK_COMPARE_OP_ALWAYS,
.minLod = 0,
.maxLod = 0,
.borderColor = vk.VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK,
.unnormalizedCoordinates = if (unnorm) vk.VK_TRUE else vk.VK_FALSE,
};
var sampler: vk.VkSampler = undefined;
const result = opts.device.dispatch.createSampler(
opts.device.device,
&info,
null,
&sampler,
);
if (result != vk.VK_SUCCESS) {
log.err("vkCreateSampler failed: result={}", .{result});
return error.VulkanFailed;
}
return .{
.sampler = sampler,
.device = opts.device,
};
}
/// `Sampler` is held by value at every call site (`const samp =
/// try Sampler.init(...)`), so `deinit` takes `Self` not `*Self`
/// `const`-bound values can't be addressed-of for a `*Self`
/// signature. CommandPool/DescriptorPool take `*Self` because
/// they're held in mutable slots; this asymmetry follows
/// container ownership, not a stylistic choice.
pub fn deinit(self: Self) void {
self.device.dispatch.destroySampler(self.device.device, self.sampler, null);
}
test {
std.testing.refAllDecls(@This());
}

14
pkg/vulkan/build.zig Normal file
View File

@ -0,0 +1,14 @@
const std = @import("std");
pub fn build(b: *std.Build) !void {
// `addModule` registers "vulkan" on `b`'s module table; consumers
// (`src/build/SharedDeps.zig`) reach it via
// `b.lazyDependency("vulkan", ...).module("vulkan")`. No return
// value or further wiring is needed here Vulkan headers
// (`vulkan-headers` package) sit on the default system include
// path and libvulkan is link-system'd by the top-level build.
// Same pattern as `pkg/opengl/build.zig`.
_ = b.addModule("vulkan", .{
.root_source_file = b.path("main.zig"),
});
}

16
pkg/vulkan/c.zig Normal file
View File

@ -0,0 +1,16 @@
// Vulkan core API + the dmabuf-related extensions the renderer relies
// on for zero-copy presentation:
//
// - VK_KHR_external_memory / VK_KHR_external_memory_fd
// - VK_EXT_external_memory_dma_buf
// - VK_EXT_image_drm_format_modifier
//
// VK_USE_PLATFORM_* macros are intentionally NOT set here the
// renderer talks to its host purely via dmabuf fds (handed back to
// the apprt's `ghostty_platform_vulkan_s.present` callback), so
// libghostty never sees a wl_display or xcb_connection. That keeps
// the binding portable and lets the host (Qt RHI) do all the
// platform-specific compositing.
pub const c = @cImport({
@cInclude("vulkan/vulkan.h");
});

30
pkg/vulkan/main.zig Normal file
View File

@ -0,0 +1,30 @@
//! Vulkan bindings.
//!
//! Shaped after `pkg/opengl/`: `c` is the raw C API (a thin `@cImport`
//! wrapper around the system Vulkan headers); the per-resource files
//! alongside provide opinionated typed wrappers the renderer
//! consumes as primitives.
//!
//! The Vulkan renderer in `src/renderer/vulkan/` builds renderer
//! policy on top of these (Pipeline / RenderPass / Frame / Target
//! etc.); anything that's pure Vulkan-API plumbing belongs here.
//!
//! Vulkan core API + the dmabuf-related extensions the renderer relies
//! on for zero-copy presentation:
//!
//! - VK_KHR_external_memory / VK_KHR_external_memory_fd
//! - VK_EXT_external_memory_dma_buf
//! - VK_EXT_image_drm_format_modifier
//!
//! VK_USE_PLATFORM_* macros are intentionally NOT set in `c.zig`
//! libghostty talks to its host purely via dmabuf fds (handed back to
//! the apprt's `ghostty_platform_vulkan_s.present` callback), so it
//! never sees a `wl_display` or `xcb_connection`. That keeps the
//! binding portable and lets the host (Qt RHI) do all the
//! platform-specific compositing.
pub const c = @import("c.zig").c;
pub const Device = @import("Device.zig");
pub const Sampler = @import("Sampler.zig");
pub const CommandPool = @import("CommandPool.zig");
pub const DescriptorPool = @import("DescriptorPool.zig");

View File

@ -30,7 +30,7 @@ set(CMAKE_AUTOMOC ON)
include(GNUInstallDirs)
find_package(Qt6 REQUIRED COMPONENTS Gui Widgets OpenGL DBus
Multimedia Svg)
Multimedia Svg WaylandClient)
# WindowBlur + XkbTracker use qpa/qplatformnativeinterface.h to reach
# the wl_display / wl_surface / wl_seat for native compositor calls
# (blur, layer-shell screen pinning, raw wl_keyboard listeners). The
@ -43,7 +43,7 @@ find_package(Qt6 REQUIRED COMPONENTS Gui Widgets OpenGL DBus
# CMake config (older Debian) and we fall back to hand-wiring the
# include dir below.
set(QT_NO_PRIVATE_MODULE_WARNING ON)
find_package(Qt6 QUIET OPTIONAL_COMPONENTS GuiPrivate)
find_package(Qt6 QUIET OPTIONAL_COMPONENTS GuiPrivate WaylandClientPrivate)
# LayerShellQt: the quick terminal is a wlr-layer-shell dropdown window.
find_package(LayerShellQt REQUIRED)
@ -53,6 +53,11 @@ find_package(LayerShellQt REQUIRED)
# QPA native-handle accessors.
find_package(PkgConfig REQUIRED)
pkg_check_modules(WAYLAND_CLIENT REQUIRED IMPORTED_TARGET wayland-client)
# libEGL is only needed by the OpenGL variant `EglDmabufTarget`
# uses EGL_MESA_image_dma_buf_export to export an FBO-backed
# texture as a dmabuf. The Vulkan variant gets dmabufs straight
# from `VK_KHR_external_memory_fd` and never calls into EGL, so
# the EGL pkg-config + IMPORTED_TARGET is gated below.
# libxkbcommon: derive the unshifted Unicode codepoint for a key event
# from its XKB keycode, so libghostty's kitty encoder finds an entry for
# punctuation keys (Qt's ev->key() reports the SHIFTED symbol, e.g.
@ -60,22 +65,105 @@ pkg_check_modules(WAYLAND_CLIENT REQUIRED IMPORTED_TARGET wayland-client)
pkg_check_modules(XKBCOMMON REQUIRED IMPORTED_TARGET xkbcommon)
find_program(WAYLAND_SCANNER wayland-scanner REQUIRED)
# Generate client glue for the org_kde_kwin_blur protocol.
set(BLUR_XML "${CMAKE_CURRENT_SOURCE_DIR}/protocols/blur.xml")
set(BLUR_HEADER "${CMAKE_CURRENT_BINARY_DIR}/blur-client-protocol.h")
set(BLUR_CODE "${CMAKE_CURRENT_BINARY_DIR}/blur-protocol.c")
add_custom_command(OUTPUT "${BLUR_HEADER}"
COMMAND "${WAYLAND_SCANNER}" client-header "${BLUR_XML}" "${BLUR_HEADER}"
DEPENDS "${BLUR_XML}" VERBATIM)
add_custom_command(OUTPUT "${BLUR_CODE}"
COMMAND "${WAYLAND_SCANNER}" private-code "${BLUR_XML}" "${BLUR_CODE}"
DEPENDS "${BLUR_XML}" VERBATIM)
# `ghastty_wayland_protocol(<basename> <header_var> <code_var>)`
# Generates `<basename>-client-protocol.h` + `<basename>-protocol.c`
# in `CMAKE_CURRENT_BINARY_DIR` from `protocols/<basename>.xml` via
# `wayland-scanner`. Sets `<header_var>` and `<code_var>` in the
# caller's scope to the generated paths so the caller can hand them
# to `add_executable`'s source list.
#
# Each `add_custom_command` is independent the `private-code`
# output `#include`s the `client-header` output, but CMake creates
# the dependency at target-source-list time when both files appear
# in `add_executable`. Mirrors the pre-collapse pattern (two custom
# commands per protocol) only the boilerplate is gone.
function(ghastty_wayland_protocol basename header_var code_var)
set(xml "${CMAKE_CURRENT_SOURCE_DIR}/protocols/${basename}.xml")
set(hdr "${CMAKE_CURRENT_BINARY_DIR}/${basename}-client-protocol.h")
set(src "${CMAKE_CURRENT_BINARY_DIR}/${basename}-protocol.c")
add_custom_command(OUTPUT "${hdr}"
COMMAND "${WAYLAND_SCANNER}" client-header "${xml}" "${hdr}"
DEPENDS "${xml}" VERBATIM)
add_custom_command(OUTPUT "${src}"
COMMAND "${WAYLAND_SCANNER}" private-code "${xml}" "${src}"
DEPENDS "${xml}" VERBATIM)
set("${header_var}" "${hdr}" PARENT_SCOPE)
set("${code_var}" "${src}" PARENT_SCOPE)
endfunction()
# Per-protocol notes:
# - `blur` (`org_kde_kwin_blur`) KWin background-blur.
# - `linux-dmabuf-v1` Vulkan present path:
# wrap libghostty's dmabuf fd in a `wl_buffer` for the
# wayland::SubsurfacePresenter's `wl_surface`.
# - `viewporter` (`wp_viewporter`) destination size in
# surface-local coords; decouples the buffer's pixel dimensions
# from how big the subsurface appears on screen (fractional
# scaling).
# - `fractional-scale-v1` (`wp_fractional_scale_v1`)
# compositor reports per-surface fractional scale (120ths).
# Used as the authoritative scale for buffer sizing, avoiding
# any sync lag with Qt's `devicePixelRatioF()` cache.
ghastty_wayland_protocol(blur BLUR_HEADER BLUR_CODE)
ghastty_wayland_protocol(linux-dmabuf-v1 DMABUF_HEADER DMABUF_CODE)
ghastty_wayland_protocol(viewporter VIEWPORTER_HEADER VIEWPORTER_CODE)
ghastty_wayland_protocol(fractional-scale-v1 FRACSCALE_HEADER FRACSCALE_CODE)
# - `alpha-modifier-v1` (`wp_alpha_modifier_v1`)
# compositor-side per-surface alpha multiplier. QtWayland has no
# built-in setWindowOpacity equivalent (the QPA plugin warns
# "This plugin does not support setting window opacity" on every
# call), so QuickTerminal's fade-in/out drives this protocol
# directly. Supported on KWin, wlroots 0.17, Hyprland; NOT yet
# on mutter/GNOME.
ghastty_wayland_protocol(alpha-modifier-v1 ALPHAMOD_HEADER ALPHAMOD_CODE)
# libghostty is built out-of-tree by Zig.
get_filename_component(GHOSTTY_ROOT "${CMAKE_CURRENT_SOURCE_DIR}/.." ABSOLUTE)
set(GHOSTTY_LIB_DIR "${GHOSTTY_ROOT}/zig-out/lib")
set(GHOSTTY_SO "${GHOSTTY_LIB_DIR}/ghostty-internal.so")
# Variant: which libghostty.so this build links against. The
# rendering backend is baked into libghostty (Zig builds with
# `-Drenderer=opengl` vs `-Drenderer=vulkan` produce ABI-compatible
# but functionally distinct .so's), so the variant here is purely a
# *compile-time selector*. The binary name and install layout do
# NOT change `${CMAKE_INSTALL_BINDIR}/ghastty` and
# `${CMAKE_INSTALL_LIBDIR}/libghostty.so` for both. Developers who
# want both flavors installed at once should use distinct prefixes
# (`cmake --install --prefix /tmp/ghastty-vulkan`).
#
# Set via `cmake -DGHASTTY_VARIANT=vulkan -S qt -B qt/build-vulkan`.
set(GHASTTY_VARIANT "opengl" CACHE STRING
"Renderer variant: opengl (default) or vulkan")
set_property(CACHE GHASTTY_VARIANT PROPERTY STRINGS opengl vulkan)
# Validate the cache value: STRINGS only constrains the cmake-gui
# dropdown, not the command-line. `-DGHASTTY_VARIANT=foo` would
# otherwise silently fall into the OpenGL branch below.
if(NOT GHASTTY_VARIANT STREQUAL "opengl" AND
NOT GHASTTY_VARIANT STREQUAL "vulkan")
message(FATAL_ERROR
"GHASTTY_VARIANT='${GHASTTY_VARIANT}' is invalid; "
"must be 'opengl' or 'vulkan'.")
endif()
message(STATUS "Building variant=${GHASTTY_VARIANT}")
# Compile-time renderer pick. Each binary is linked against exactly
# one libghostty.so variant (opengl or vulkan), so the renderer
# choice is inherent to the binary no need for a runtime env var.
# GhosttySurface.cpp branches on GHASTTY_USE_VULKAN to spin up the
# Vulkan host vs the OpenGL context.
if(GHASTTY_VARIANT STREQUAL "vulkan")
add_compile_definitions(GHASTTY_USE_VULKAN)
endif()
# libEGL: needed by `opengl/EglDmabufTarget.cpp` for the OpenGL
# variant's zero-copy present path. Vulkan-variant binaries never
# pull in this source file (gated below) so the loader doesn't have
# to be installed for Vulkan-only systems.
if(GHASTTY_VARIANT STREQUAL "opengl")
pkg_check_modules(EGL REQUIRED IMPORTED_TARGET egl)
endif()
if(NOT EXISTS "${GHOSTTY_SO}")
message(FATAL_ERROR
"libghostty not found at ${GHOSTTY_SO}\n"
@ -119,12 +207,40 @@ add_executable(ghastty
src/TabWidget.cpp
src/undo/UndoStack.cpp
src/Util.cpp
src/wayland/AlphaModifier.cpp
src/wayland/SubsurfacePresenter.cpp
src/WindowBlur.cpp
src/XkbTracker.cpp
"${BLUR_CODE}"
"${BLUR_HEADER}"
"${DMABUF_CODE}"
"${DMABUF_HEADER}"
"${VIEWPORTER_CODE}"
"${VIEWPORTER_HEADER}"
"${FRACSCALE_CODE}"
"${FRACSCALE_HEADER}"
"${ALPHAMOD_CODE}"
"${ALPHAMOD_HEADER}"
)
# Vulkan host glue is variant-only. Adding it to the OpenGL build
# would force an unconditional libvulkan link on a binary that
# never calls into Vulkan, contradicting the side-by-side
# `~/.local/lib/libghostty.so` story that the variant block above
# documents.
if(GHASTTY_VARIANT STREQUAL "vulkan")
target_sources(ghastty PRIVATE src/vulkan/Host.cpp)
endif()
# `opengl/EglDmabufTarget.cpp` is OpenGL-variant only. The Vulkan
# variant exports dmabufs straight from VkDeviceMemory via
# VK_KHR_external_memory_fd and never calls into EGL, so excluding
# this source file from the Vulkan binary lets it stay free of
# libEGL too.
if(GHASTTY_VARIANT STREQUAL "opengl")
target_sources(ghastty PRIVATE src/opengl/EglDmabufTarget.cpp)
endif()
# Embed the app icon so it is available even running from the build tree.
qt_add_resources(ghastty "appicon"
PREFIX "/"
@ -151,7 +267,32 @@ target_link_libraries(ghastty PRIVATE
"${GHOSTTY_LINK_SO}"
)
# libEGL is OpenGL-variant only gated alongside the source file
# in the variant block above. Vulkan-variant binaries don't pull
# in libEGL at all.
if(GHASTTY_VARIANT STREQUAL "opengl")
target_link_libraries(ghastty PRIVATE PkgConfig::EGL)
endif()
# libvulkan is Vulkan-variant only. The OpenGL variant compiles
# nothing that references Vulkan symbols (vulkan/Host.cpp is gated
# above), so not linking libvulkan keeps OpenGL-only systems from
# needing the loader installed at runtime matching the
# documented side-by-side variant story above.
if(GHASTTY_VARIANT STREQUAL "vulkan")
target_link_libraries(ghastty PRIVATE vulkan)
endif()
# Hook up the private QPA headers (see find_package above).
#
# Qt6::WaylandClientPrivate gives us QtWaylandClient::QWaylandWindow,
# which we cast the QPA platform window to in GhosttySurface to call
# `commit()` directly that forces a parent wl_surface commit at the
# moment our subsurface state is ready, instead of waiting on Qt's
# backing-store flush which never fires for our translucent widget.
if(TARGET Qt6::WaylandClientPrivate)
target_link_libraries(ghastty PRIVATE Qt6::WaylandClientPrivate)
endif()
if(TARGET Qt6::GuiPrivate)
target_link_libraries(ghastty PRIVATE Qt6::GuiPrivate)
else()
@ -187,6 +328,9 @@ endif()
# actual zig-out artifact), and the .so's NEEDED entries also point
# into zig-out/lib for transitive deps.
# - Installed: libghostty.so lives next to the binary ($ORIGIN/../lib).
# Same layout regardless of variant the binary name doesn't change,
# the .so path doesn't change. Side-by-side installs of two variants
# need separate `--prefix`es.
set_target_properties(ghastty PROPERTIES
BUILD_RPATH "${GHOSTTY_LINK_DIR};${GHOSTTY_LIB_DIR}"
INSTALL_RPATH "$ORIGIN/../${CMAKE_INSTALL_LIBDIR}"

View File

@ -0,0 +1,118 @@
<?xml version="1.0" encoding="UTF-8"?>
<protocol name="alpha_modifier_v1">
<copyright>
Copyright 2023 Xaver Hugl
Permission is hereby granted, free of charge, to any person obtaining a
copy of this software and associated documentation files (the "Software"),
to deal in the Software without restriction, including without limitation
the rights to use, copy, modify, merge, publish, distribute, sublicense,
and/or sell copies of the Software, and to permit persons to whom the
Software is furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice (including the next
paragraph) shall be included in all copies or substantial portions of the
Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
</copyright>
<description summary="surface alpha modifier">
This interface allows a client to set a factor for the alpha values on a
surface, which can be used to offload such operations to the compositor,
which can in turn for example offload them to KMS.
Warning! The protocol described in this file is currently in the testing
phase. Backward compatible changes may be added together with the
corresponding interface version bump. Backward incompatible changes can
only be done by creating a new major version of the extension.
</description>
<interface name="wp_alpha_modifier_v1" version="1">
<description summary="surface alpha modifier manager">
This interface allows a client to set a factor for the alpha values on
a surface, which can be used to offload such operations to the
compositor. The default factor is UINT32_MAX.
This interface can be used to set an arbitrary alpha value for the
surface, allowing it to be made fully transparent by setting the factor
to 0, fully opaque by setting it to UINT32_MAX, or any value in
between.
Warning! The protocol described in this file is currently in the
testing phase. Backward compatible changes may be added together with
the corresponding interface version bump. Backward incompatible changes
can only be done by creating a new major version of the extension.
</description>
<request name="destroy" type="destructor">
<description summary="destroy the alpha modifier manager object">
Destroy the alpha modifier manager. This doesn't destroy objects
created with the manager.
</description>
</request>
<request name="get_surface">
<description summary="create a new alpha modifier surface object">
Create a new alpha modifier surface object associated with the given
wl_surface. If there is already such an object associated with the
wl_surface, the already_constructed error will be raised.
</description>
<arg name="id" type="new_id" interface="wp_alpha_modifier_surface_v1"/>
<arg name="surface" type="object" interface="wl_surface"/>
</request>
<enum name="error">
<entry name="already_constructed" value="0"
summary="wl_surface already has a alpha modifier object associated"/>
</enum>
</interface>
<interface name="wp_alpha_modifier_surface_v1" version="1">
<description summary="modifier object for a surface">
This interface allows the client to set a factor for the alpha values on
a surface, which can be used to offload such operations to the
compositor. Multiple alpha modifiers can be attached to the same
surface, in which case the resulting alpha will be the product of all
the multiplicative factors.
The default factor is UINT32_MAX.
</description>
<request name="destroy" type="destructor">
<description summary="remove the alpha modifier from the surface">
This destroys the object, and is equivalent to set_multiplier with
a value of UINT32_MAX, with the same double-buffered semantics as
set_multiplier.
</description>
</request>
<request name="set_multiplier">
<description summary="set the alpha multiplier">
Sets the alpha multiplier for the surface. The alpha multiplier is
double-buffered state, see wl_surface.commit for details.
The default factor is UINT32_MAX.
This factor is applied in the compositor's blending space, as an
additional step after the processing of per-pixel alpha values for
the surface. It allows to set an arbitrary alpha value for the
surface, including making the surface partially transparent even when
all the pixels are fully opaque, or fully transparent even when the
pixels are not.
</description>
<arg name="factor" type="uint" summary="the new alpha multiplier for the surface"/>
</request>
<enum name="error">
<entry name="no_surface" value="0"
summary="wl_surface was destroyed"/>
</enum>
</interface>
</protocol>

View File

@ -0,0 +1,102 @@
<?xml version="1.0" encoding="UTF-8"?>
<protocol name="fractional_scale_v1">
<copyright>
Copyright © 2022 Kenny Levinsen
Permission is hereby granted, free of charge, to any person obtaining a
copy of this software and associated documentation files (the "Software"),
to deal in the Software without restriction, including without limitation
the rights to use, copy, modify, merge, publish, distribute, sublicense,
and/or sell copies of the Software, and to permit persons to whom the
Software is furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice (including the next
paragraph) shall be included in all copies or substantial portions of the
Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
</copyright>
<description summary="Protocol for requesting fractional surface scales">
This protocol allows a compositor to suggest for surfaces to render at
fractional scales.
A client can submit scaled content by utilizing wp_viewport. This is done by
creating a wp_viewport object for the surface and setting the destination
rectangle to the surface size before the scale factor is applied.
The buffer size is calculated by multiplying the surface size by the
intended scale.
The wl_surface buffer scale should remain set to 1.
If a surface has a surface-local size of 100 px by 50 px and wishes to
submit buffers with a scale of 1.5, then a buffer of 150px by 75 px should
be used and the wp_viewport destination rectangle should be 100 px by 50 px.
For toplevel surfaces, the size is rounded halfway away from zero. The
rounding algorithm for subsurface position and size is not defined.
</description>
<interface name="wp_fractional_scale_manager_v1" version="1">
<description summary="fractional surface scale information">
A global interface for requesting surfaces to use fractional scales.
</description>
<request name="destroy" type="destructor">
<description summary="unbind the fractional surface scale interface">
Informs the server that the client will not be using this protocol
object anymore. This does not affect any other objects,
wp_fractional_scale_v1 objects included.
</description>
</request>
<enum name="error">
<entry name="fractional_scale_exists" value="0"
summary="the surface already has a fractional_scale object associated"/>
</enum>
<request name="get_fractional_scale">
<description summary="extend surface interface for scale information">
Create an add-on object for the the wl_surface to let the compositor
request fractional scales. If the given wl_surface already has a
wp_fractional_scale_v1 object associated, the fractional_scale_exists
protocol error is raised.
</description>
<arg name="id" type="new_id" interface="wp_fractional_scale_v1"
summary="the new surface scale info interface id"/>
<arg name="surface" type="object" interface="wl_surface"
summary="the surface"/>
</request>
</interface>
<interface name="wp_fractional_scale_v1" version="1">
<description summary="fractional scale interface to a wl_surface">
An additional interface to a wl_surface object which allows the compositor
to inform the client of the preferred scale.
</description>
<request name="destroy" type="destructor">
<description summary="remove surface scale information for surface">
Destroy the fractional scale object. When this object is destroyed,
preferred_scale events will no longer be sent.
</description>
</request>
<event name="preferred_scale">
<description summary="notify of new preferred scale">
Notification of a new preferred scale for this surface that the
compositor suggests that the client should use.
The sent scale is the numerator of a fraction with a denominator of 120.
</description>
<arg name="scale" type="uint" summary="the new preferred scale"/>
</event>
</interface>
</protocol>

View File

@ -0,0 +1,585 @@
<?xml version="1.0" encoding="UTF-8"?>
<protocol name="linux_dmabuf_v1">
<copyright>
Copyright © 2014, 2015 Collabora, Ltd.
Permission is hereby granted, free of charge, to any person obtaining a
copy of this software and associated documentation files (the "Software"),
to deal in the Software without restriction, including without limitation
the rights to use, copy, modify, merge, publish, distribute, sublicense,
and/or sell copies of the Software, and to permit persons to whom the
Software is furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice (including the next
paragraph) shall be included in all copies or substantial portions of the
Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
</copyright>
<interface name="zwp_linux_dmabuf_v1" version="5">
<description summary="factory for creating dmabuf-based wl_buffers">
This interface offers ways to create generic dmabuf-based wl_buffers.
For more information about dmabuf, see:
https://www.kernel.org/doc/html/next/userspace-api/dma-buf-alloc-exchange.html
Clients can use the get_surface_feedback request to get dmabuf feedback
for a particular surface. If the client wants to retrieve feedback not
tied to a surface, they can use the get_default_feedback request.
The following are required from clients:
- Clients must ensure that either all data in the dma-buf is
coherent for all subsequent read access or that coherency is
correctly handled by the underlying kernel-side dma-buf
implementation.
- Don't make any more attachments after sending the buffer to the
compositor. Making more attachments later increases the risk of
the compositor not being able to use (re-import) an existing
dmabuf-based wl_buffer.
The underlying graphics stack must ensure the following:
- The dmabuf file descriptors relayed to the server will stay valid
for the whole lifetime of the wl_buffer. This means the server may
at any time use those fds to import the dmabuf into any kernel
sub-system that might accept it.
However, when the underlying graphics stack fails to deliver the
promise, because of e.g. a device hot-unplug which raises internal
errors, after the wl_buffer has been successfully created the
compositor must not raise protocol errors to the client when dmabuf
import later fails.
To create a wl_buffer from one or more dmabufs, a client creates a
zwp_linux_dmabuf_params_v1 object with a zwp_linux_dmabuf_v1.create_params
request. All planes required by the intended format are added with
the 'add' request. Finally, a 'create' or 'create_immed' request is
issued, which has the following outcome depending on the import success.
The 'create' request,
- on success, triggers a 'created' event which provides the final
wl_buffer to the client.
- on failure, triggers a 'failed' event to convey that the server
cannot use the dmabufs received from the client.
For the 'create_immed' request,
- on success, the server immediately imports the added dmabufs to
create a wl_buffer. No event is sent from the server in this case.
- on failure, the server can choose to either:
- terminate the client by raising a fatal error.
- mark the wl_buffer as failed, and send a 'failed' event to the
client. If the client uses a failed wl_buffer as an argument to any
request, the behaviour is compositor implementation-defined.
For all DRM formats and unless specified in another protocol extension,
pre-multiplied alpha is used for pixel values.
Unless specified otherwise in another protocol extension, implicit
synchronization is used. In other words, compositors and clients must
wait and signal fences implicitly passed via the DMA-BUF's reservation
mechanism.
</description>
<request name="destroy" type="destructor">
<description summary="unbind the factory">
Objects created through this interface, especially wl_buffers, will
remain valid.
</description>
</request>
<request name="create_params">
<description summary="create a temporary object for buffer parameters">
This temporary object is used to collect multiple dmabuf handles into
a single batch to create a wl_buffer. It can only be used once and
should be destroyed after a 'created' or 'failed' event has been
received.
</description>
<arg name="params_id" type="new_id" interface="zwp_linux_buffer_params_v1"
summary="the new temporary"/>
</request>
<event name="format" deprecated-since="4">
<description summary="supported buffer format">
This event advertises one buffer format that the server supports.
All the supported formats are advertised once when the client
binds to this interface. A roundtrip after binding guarantees
that the client has received all supported formats.
For the definition of the format codes, see the
zwp_linux_buffer_params_v1::create request.
Starting version 4, the format event is deprecated and must not be
sent by compositors. Instead, use get_default_feedback or
get_surface_feedback.
</description>
<arg name="format" type="uint" summary="DRM_FORMAT code"/>
</event>
<event name="modifier" since="3" deprecated-since="4">
<description summary="supported buffer format modifier">
This event advertises the formats that the server supports, along with
the modifiers supported for each format. All the supported modifiers
for all the supported formats are advertised once when the client
binds to this interface. A roundtrip after binding guarantees that
the client has received all supported format-modifier pairs.
For legacy support, DRM_FORMAT_MOD_INVALID (that is, modifier_hi ==
0x00ffffff and modifier_lo == 0xffffffff) is allowed in this event.
It indicates that the server can support the format with an implicit
modifier. When a plane has DRM_FORMAT_MOD_INVALID as its modifier, it
is as if no explicit modifier is specified. The effective modifier
will be derived from the dmabuf.
A compositor that sends valid modifiers and DRM_FORMAT_MOD_INVALID for
a given format supports both explicit modifiers and implicit modifiers.
For the definition of the format and modifier codes, see the
zwp_linux_buffer_params_v1::create and zwp_linux_buffer_params_v1::add
requests.
Starting version 4, the modifier event is deprecated and must not be
sent by compositors. Instead, use get_default_feedback or
get_surface_feedback.
</description>
<arg name="format" type="uint" summary="DRM_FORMAT code"/>
<arg name="modifier_hi" type="uint"
summary="high 32 bits of layout modifier"/>
<arg name="modifier_lo" type="uint"
summary="low 32 bits of layout modifier"/>
</event>
<!-- Version 4 additions -->
<request name="get_default_feedback" since="4">
<description summary="get default feedback">
This request creates a new wp_linux_dmabuf_feedback object not bound
to a particular surface. This object will deliver feedback about dmabuf
parameters to use if the client doesn't support per-surface feedback
(see get_surface_feedback).
</description>
<arg name="id" type="new_id" interface="zwp_linux_dmabuf_feedback_v1"/>
</request>
<request name="get_surface_feedback" since="4">
<description summary="get feedback for a surface">
This request creates a new wp_linux_dmabuf_feedback object for the
specified wl_surface. This object will deliver feedback about dmabuf
parameters to use for buffers attached to this surface.
If the surface is destroyed before the wp_linux_dmabuf_feedback object,
the feedback object becomes inert.
</description>
<arg name="id" type="new_id" interface="zwp_linux_dmabuf_feedback_v1"/>
<arg name="surface" type="object" interface="wl_surface"/>
</request>
</interface>
<interface name="zwp_linux_buffer_params_v1" version="5">
<description summary="parameters for creating a dmabuf-based wl_buffer">
This temporary object is a collection of dmabufs and other
parameters that together form a single logical buffer. The temporary
object may eventually create one wl_buffer unless cancelled by
destroying it before requesting 'create'.
Single-planar formats only require one dmabuf, however
multi-planar formats may require more than one dmabuf. For all
formats, an 'add' request must be called once per plane (even if the
underlying dmabuf fd is identical).
You must use consecutive plane indices ('plane_idx' argument for 'add')
from zero to the number of planes used by the drm_fourcc format code.
All planes required by the format must be given exactly once, but can
be given in any order. Each plane index can only be set once; subsequent
calls with a plane index which has already been set will result in a
plane_set error being generated.
</description>
<enum name="error">
<entry name="already_used" value="0"
summary="the dmabuf_batch object has already been used to create a wl_buffer"/>
<entry name="plane_idx" value="1"
summary="plane index out of bounds"/>
<entry name="plane_set" value="2"
summary="the plane index was already set"/>
<entry name="incomplete" value="3"
summary="missing or too many planes to create a buffer"/>
<entry name="invalid_format" value="4"
summary="format not supported"/>
<entry name="invalid_dimensions" value="5"
summary="invalid width or height"/>
<entry name="out_of_bounds" value="6"
summary="offset + stride * height goes out of dmabuf bounds"/>
<entry name="invalid_wl_buffer" value="7"
summary="invalid wl_buffer resulted from importing dmabufs via
the create_immed request on given buffer_params"/>
</enum>
<request name="destroy" type="destructor">
<description summary="delete this object, used or not">
Cleans up the temporary data sent to the server for dmabuf-based
wl_buffer creation.
</description>
</request>
<request name="add">
<description summary="add a dmabuf to the temporary set">
This request adds one dmabuf to the set in this
zwp_linux_buffer_params_v1.
The 64-bit unsigned value combined from modifier_hi and modifier_lo
is the dmabuf layout modifier. DRM AddFB2 ioctl calls this the
fb modifier, which is defined in drm_mode.h of Linux UAPI.
This is an opaque token. Drivers use this token to express tiling,
compression, etc. driver-specific modifications to the base format
defined by the DRM fourcc code.
Starting from version 4, the invalid_format protocol error is sent if
the format + modifier pair was not advertised as supported.
Starting from version 5, the invalid_format protocol error is sent if
all planes don't use the same modifier.
This request raises the PLANE_IDX error if plane_idx is too large.
The error PLANE_SET is raised if attempting to set a plane that
was already set.
</description>
<arg name="fd" type="fd" summary="dmabuf fd"/>
<arg name="plane_idx" type="uint" summary="plane index"/>
<arg name="offset" type="uint" summary="offset in bytes"/>
<arg name="stride" type="uint" summary="stride in bytes"/>
<arg name="modifier_hi" type="uint"
summary="high 32 bits of layout modifier"/>
<arg name="modifier_lo" type="uint"
summary="low 32 bits of layout modifier"/>
</request>
<enum name="flags" bitfield="true">
<entry name="y_invert" value="1" summary="contents are y-inverted"/>
<entry name="interlaced" value="2" summary="content is interlaced"/>
<entry name="bottom_first" value="4" summary="bottom field first"/>
</enum>
<request name="create">
<description summary="create a wl_buffer from the given dmabufs">
This asks for creation of a wl_buffer from the added dmabuf
buffers. The wl_buffer is not created immediately but returned via
the 'created' event if the dmabuf sharing succeeds. The sharing
may fail at runtime for reasons a client cannot predict, in
which case the 'failed' event is triggered.
The 'format' argument is a DRM_FORMAT code, as defined by the
libdrm's drm_fourcc.h. The Linux kernel's DRM sub-system is the
authoritative source on how the format codes should work.
The 'flags' is a bitfield of the flags defined in enum "flags".
'y_invert' means the that the image needs to be y-flipped.
Flag 'interlaced' means that the frame in the buffer is not
progressive as usual, but interlaced. An interlaced buffer as
supported here must always contain both top and bottom fields.
The top field always begins on the first pixel row. The temporal
ordering between the two fields is top field first, unless
'bottom_first' is specified. It is undefined whether 'bottom_first'
is ignored if 'interlaced' is not set.
This protocol does not convey any information about field rate,
duration, or timing, other than the relative ordering between the
two fields in one buffer. A compositor may have to estimate the
intended field rate from the incoming buffer rate. It is undefined
whether the time of receiving wl_surface.commit with a new buffer
attached, applying the wl_surface state, wl_surface.frame callback
trigger, presentation, or any other point in the compositor cycle
is used to measure the frame or field times. There is no support
for detecting missed or late frames/fields/buffers either, and
there is no support whatsoever for cooperating with interlaced
compositor output.
The composited image quality resulting from the use of interlaced
buffers is explicitly undefined. A compositor may use elaborate
hardware features or software to deinterlace and create progressive
output frames from a sequence of interlaced input buffers, or it
may produce substandard image quality. However, compositors that
cannot guarantee reasonable image quality in all cases are recommended
to just reject all interlaced buffers.
Any argument errors, including non-positive width or height,
mismatch between the number of planes and the format, bad
format, bad offset or stride, may be indicated by fatal protocol
errors: INCOMPLETE, INVALID_FORMAT, INVALID_DIMENSIONS,
OUT_OF_BOUNDS.
Dmabuf import errors in the server that are not obvious client
bugs are returned via the 'failed' event as non-fatal. This
allows attempting dmabuf sharing and falling back in the client
if it fails.
This request can be sent only once in the object's lifetime, after
which the only legal request is destroy. This object should be
destroyed after issuing a 'create' request. Attempting to use this
object after issuing 'create' raises ALREADY_USED protocol error.
It is not mandatory to issue 'create'. If a client wants to
cancel the buffer creation, it can just destroy this object.
</description>
<arg name="width" type="int" summary="base plane width in pixels"/>
<arg name="height" type="int" summary="base plane height in pixels"/>
<arg name="format" type="uint" summary="DRM_FORMAT code"/>
<arg name="flags" type="uint" enum="flags" summary="see enum flags"/>
</request>
<event name="created">
<description summary="buffer creation succeeded">
This event indicates that the attempted buffer creation was
successful. It provides the new wl_buffer referencing the dmabuf(s).
Upon receiving this event, the client should destroy the
zwp_linux_buffer_params_v1 object.
</description>
<arg name="buffer" type="new_id" interface="wl_buffer"
summary="the newly created wl_buffer"/>
</event>
<event name="failed">
<description summary="buffer creation failed">
This event indicates that the attempted buffer creation has
failed. It usually means that one of the dmabuf constraints
has not been fulfilled.
Upon receiving this event, the client should destroy the
zwp_linux_buffer_params_v1 object.
</description>
</event>
<request name="create_immed" since="2">
<description summary="immediately create a wl_buffer from the given
dmabufs">
This asks for immediate creation of a wl_buffer by importing the
added dmabufs.
In case of import success, no event is sent from the server, and the
wl_buffer is ready to be used by the client.
Upon import failure, either of the following may happen, as seen fit
by the implementation:
- the client is terminated with one of the following fatal protocol
errors:
- INCOMPLETE, INVALID_FORMAT, INVALID_DIMENSIONS, OUT_OF_BOUNDS,
in case of argument errors such as mismatch between the number
of planes and the format, bad format, non-positive width or
height, or bad offset or stride.
- INVALID_WL_BUFFER, in case the cause for failure is unknown or
platform specific.
- the server creates an invalid wl_buffer, marks it as failed and
sends a 'failed' event to the client. The result of using this
invalid wl_buffer as an argument in any request by the client is
defined by the compositor implementation.
This takes the same arguments as a 'create' request, and obeys the
same restrictions.
</description>
<arg name="buffer_id" type="new_id" interface="wl_buffer"
summary="id for the newly created wl_buffer"/>
<arg name="width" type="int" summary="base plane width in pixels"/>
<arg name="height" type="int" summary="base plane height in pixels"/>
<arg name="format" type="uint" summary="DRM_FORMAT code"/>
<arg name="flags" type="uint" enum="flags" summary="see enum flags"/>
</request>
</interface>
<interface name="zwp_linux_dmabuf_feedback_v1" version="5">
<description summary="dmabuf feedback">
This object advertises dmabuf parameters feedback. This includes the
preferred devices and the supported formats/modifiers.
The parameters are sent once when this object is created and whenever they
change. The done event is always sent once after all parameters have been
sent. When a single parameter changes, all parameters are re-sent by the
compositor.
Compositors can re-send the parameters when the current client buffer
allocations are sub-optimal. Compositors should not re-send the
parameters if re-allocating the buffers would not result in a more optimal
configuration. In particular, compositors should avoid sending the exact
same parameters multiple times in a row.
The tranche_target_device and tranche_formats events are grouped by
tranches of preference. For each tranche, a tranche_target_device, one
tranche_flags and one or more tranche_formats events are sent, followed
by a tranche_done event finishing the list. The tranches are sent in
descending order of preference. All formats and modifiers in the same
tranche have the same preference.
To send parameters, the compositor sends one main_device event, tranches
(each consisting of one tranche_target_device event, one tranche_flags
event, tranche_formats events and then a tranche_done event), then one
done event.
</description>
<request name="destroy" type="destructor">
<description summary="destroy the feedback object">
Using this request a client can tell the server that it is not going to
use the wp_linux_dmabuf_feedback object anymore.
</description>
</request>
<event name="done">
<description summary="all feedback has been sent">
This event is sent after all parameters of a wp_linux_dmabuf_feedback
object have been sent.
This allows changes to the wp_linux_dmabuf_feedback parameters to be
seen as atomic, even if they happen via multiple events.
</description>
</event>
<event name="format_table">
<description summary="format and modifier table">
This event provides a file descriptor which can be memory-mapped to
access the format and modifier table.
The table contains a tightly packed array of consecutive format +
modifier pairs. Each pair is 16 bytes wide. It contains a format as a
32-bit unsigned integer, followed by 4 bytes of unused padding, and a
modifier as a 64-bit unsigned integer. The native endianness is used.
The client must map the file descriptor in read-only private mode.
Compositors are not allowed to mutate the table file contents once this
event has been sent. Instead, compositors must create a new, separate
table file and re-send feedback parameters. Compositors are allowed to
store duplicate format + modifier pairs in the table.
</description>
<arg name="fd" type="fd" summary="table file descriptor"/>
<arg name="size" type="uint" summary="table size, in bytes"/>
</event>
<event name="main_device">
<description summary="preferred main device">
This event advertises the main device that the server prefers to use
when direct scan-out to the target device isn't possible. The
advertised main device may be different for each
wp_linux_dmabuf_feedback object, and may change over time.
There is exactly one main device. The compositor must send at least
one preference tranche with tranche_target_device equal to main_device.
Clients need to create buffers that the main device can import and
read from, otherwise creating the dmabuf wl_buffer will fail (see the
wp_linux_buffer_params.create and create_immed requests for details).
The main device will also likely be kept active by the compositor,
so clients can use it instead of waking up another device for power
savings.
In general the device is a DRM node. The DRM node type (primary vs.
render) is unspecified. Clients must not rely on the compositor sending
a particular node type. Clients cannot check two devices for equality
by comparing the dev_t value.
If explicit modifiers are not supported and the client performs buffer
allocations on a different device than the main device, then the client
must force the buffer to have a linear layout.
</description>
<arg name="device" type="array" summary="device dev_t value"/>
</event>
<event name="tranche_done">
<description summary="a preference tranche has been sent">
This event splits tranche_target_device and tranche_formats events in
preference tranches. It is sent after a set of tranche_target_device
and tranche_formats events; it represents the end of a tranche. The
next tranche will have a lower preference.
</description>
</event>
<event name="tranche_target_device">
<description summary="target device">
This event advertises the target device that the server prefers to use
for a buffer created given this tranche. The advertised target device
may be different for each preference tranche, and may change over time.
There is exactly one target device per tranche.
The target device may be a scan-out device, for example if the
compositor prefers to directly scan-out a buffer created given this
tranche. The target device may be a rendering device, for example if
the compositor prefers to texture from said buffer.
The client can use this hint to allocate the buffer in a way that makes
it accessible from the target device, ideally directly. The buffer must
still be accessible from the main device, either through direct import
or through a potentially more expensive fallback path. If the buffer
can't be directly imported from the main device then clients must be
prepared for the compositor changing the tranche priority or making
wl_buffer creation fail (see the wp_linux_buffer_params.create and
create_immed requests for details).
If the device is a DRM node, the DRM node type (primary vs. render) is
unspecified. Clients must not rely on the compositor sending a
particular node type. Clients cannot check two devices for equality by
comparing the dev_t value.
This event is tied to a preference tranche, see the tranche_done event.
</description>
<arg name="device" type="array" summary="device dev_t value"/>
</event>
<event name="tranche_formats">
<description summary="supported buffer format modifier">
This event advertises the format + modifier combinations that the
compositor supports.
It carries an array of indices, each referring to a format + modifier
pair in the last received format table (see the format_table event).
Each index is a 16-bit unsigned integer in native endianness.
For legacy support, DRM_FORMAT_MOD_INVALID is an allowed modifier.
It indicates that the server can support the format with an implicit
modifier. When a buffer has DRM_FORMAT_MOD_INVALID as its modifier, it
is as if no explicit modifier is specified. The effective modifier
will be derived from the dmabuf.
A compositor that sends valid modifiers and DRM_FORMAT_MOD_INVALID for
a given format supports both explicit modifiers and implicit modifiers.
Compositors must not send duplicate format + modifier pairs within the
same tranche or across two different tranches with the same target
device and flags.
This event is tied to a preference tranche, see the tranche_done event.
For the definition of the format and modifier codes, see the
wp_linux_buffer_params.create request.
</description>
<arg name="indices" type="array" summary="array of 16-bit indexes"/>
</event>
<enum name="tranche_flags" bitfield="true">
<entry name="scanout" value="1" summary="direct scan-out tranche"/>
</enum>
<event name="tranche_flags">
<description summary="tranche flags">
This event sets tranche-specific flags.
The scanout flag is a hint that direct scan-out may be attempted by the
compositor on the target device if the client appropriately allocates a
buffer. How to allocate a buffer that can be scanned out on the target
device is implementation-defined.
This event is tied to a preference tranche, see the tranche_done event.
</description>
<arg name="flags" type="uint" enum="tranche_flags" summary="tranche flags"/>
</event>
</interface>
</protocol>

177
qt/protocols/viewporter.xml Normal file
View File

@ -0,0 +1,177 @@
<?xml version="1.0" encoding="UTF-8"?>
<protocol name="viewporter">
<copyright>
Copyright © 2013-2016 Collabora, Ltd.
Permission is hereby granted, free of charge, to any person obtaining a
copy of this software and associated documentation files (the "Software"),
to deal in the Software without restriction, including without limitation
the rights to use, copy, modify, merge, publish, distribute, sublicense,
and/or sell copies of the Software, and to permit persons to whom the
Software is furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice (including the next
paragraph) shall be included in all copies or substantial portions of the
Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
</copyright>
<interface name="wp_viewporter" version="1">
<description summary="surface cropping and scaling">
The global interface exposing surface cropping and scaling
capabilities is used to instantiate an interface extension for a
wl_surface object. This extended interface will then allow
cropping and scaling the surface contents, effectively
disconnecting the direct relationship between the buffer and the
surface size.
</description>
<request name="destroy" type="destructor">
<description summary="unbind from the cropping and scaling interface">
Informs the server that the client will not be using this
protocol object anymore. This does not affect any other objects,
wp_viewport objects included.
</description>
</request>
<enum name="error">
<entry name="viewport_exists" value="0"
summary="the surface already has a viewport object associated"/>
</enum>
<request name="get_viewport">
<description summary="extend surface interface for crop and scale">
Instantiate an interface extension for the given wl_surface to
crop and scale its content. If the given wl_surface already has
a wp_viewport object associated, the viewport_exists
protocol error is raised.
</description>
<arg name="id" type="new_id" interface="wp_viewport"
summary="the new viewport interface id"/>
<arg name="surface" type="object" interface="wl_surface"
summary="the surface"/>
</request>
</interface>
<interface name="wp_viewport" version="1">
<description summary="crop and scale interface to a wl_surface">
An additional interface to a wl_surface object, which allows the
client to specify the cropping and scaling of the surface
contents.
This interface works with two concepts: the source rectangle (src_x,
src_y, src_width, src_height), and the destination size (dst_width,
dst_height). The contents of the source rectangle are scaled to the
destination size, and content outside the source rectangle is ignored.
This state is double-buffered, see wl_surface.commit.
The two parts of crop and scale state are independent: the source
rectangle, and the destination size. Initially both are unset, that
is, no scaling is applied. The whole of the current wl_buffer is
used as the source, and the surface size is as defined in
wl_surface.attach.
If the destination size is set, it causes the surface size to become
dst_width, dst_height. The source (rectangle) is scaled to exactly
this size. This overrides whatever the attached wl_buffer size is,
unless the wl_buffer is NULL. If the wl_buffer is NULL, the surface
has no content and therefore no size. Otherwise, the size is always
at least 1x1 in surface local coordinates.
If the source rectangle is set, it defines what area of the wl_buffer is
taken as the source. If the source rectangle is set and the destination
size is not set, then src_width and src_height must be integers, and the
surface size becomes the source rectangle size. This results in cropping
without scaling. If src_width or src_height are not integers and
destination size is not set, the bad_size protocol error is raised when
the surface state is applied.
The coordinate transformations from buffer pixel coordinates up to
the surface-local coordinates happen in the following order:
1. buffer_transform (wl_surface.set_buffer_transform)
2. buffer_scale (wl_surface.set_buffer_scale)
3. crop and scale (wp_viewport.set*)
This means, that the source rectangle coordinates of crop and scale
are given in the coordinates after the buffer transform and scale,
i.e. in the coordinates that would be the surface-local coordinates
if the crop and scale was not applied.
If src_x or src_y are negative, the bad_value protocol error is raised.
Otherwise, if the source rectangle is partially or completely outside of
the non-NULL wl_buffer, then the out_of_buffer protocol error is raised
when the surface state is applied. A NULL wl_buffer does not raise the
out_of_buffer error.
If the wl_surface associated with the wp_viewport is destroyed,
all wp_viewport requests except 'destroy' raise the protocol error
no_surface.
If the wp_viewport object is destroyed, the crop and scale
state is removed from the wl_surface. The change will be applied
on the next wl_surface.commit.
</description>
<request name="destroy" type="destructor">
<description summary="remove scaling and cropping from the surface">
The associated wl_surface's crop and scale state is removed.
The change is applied on the next wl_surface.commit.
</description>
</request>
<enum name="error">
<entry name="bad_value" value="0"
summary="negative or zero values in width or height"/>
<entry name="bad_size" value="1"
summary="destination size is not integer"/>
<entry name="out_of_buffer" value="2"
summary="source rectangle extends outside of the content area"/>
<entry name="no_surface" value="3"
summary="the wl_surface was destroyed"/>
</enum>
<request name="set_source">
<description summary="set the source rectangle for cropping">
Set the source rectangle of the associated wl_surface. See
wp_viewport for the description, and relation to the wl_buffer
size.
If all of x, y, width and height are -1.0, the source rectangle is
unset instead. Any other set of values where width or height are zero
or negative, or x or y are negative, raise the bad_value protocol
error.
The crop and scale state is double-buffered, see wl_surface.commit.
</description>
<arg name="x" type="fixed" summary="source rectangle x"/>
<arg name="y" type="fixed" summary="source rectangle y"/>
<arg name="width" type="fixed" summary="source rectangle width"/>
<arg name="height" type="fixed" summary="source rectangle height"/>
</request>
<request name="set_destination">
<description summary="set the surface size for scaling">
Set the destination size of the associated wl_surface. See
wp_viewport for the description, and relation to the wl_buffer
size.
If width is -1 and height is -1, the destination size is unset
instead. Any other pair of values for width and height that
contains zero or negative values raises the bad_value protocol
error.
The crop and scale state is double-buffered, see wl_surface.commit.
</description>
<arg name="width" type="int" summary="surface width"/>
<arg name="height" type="int" summary="surface height"/>
</request>
</interface>
</protocol>

File diff suppressed because it is too large Load Diff

View File

@ -1,14 +1,30 @@
#pragma once
#include <atomic>
#include <condition_variable>
#include <cstdint>
#include <memory>
#include <mutex>
#include <QImage>
#include <QMutex>
#include <QPointer>
#include <QString>
#include <QStringList>
#include <QTimer>
#include <QWidget>
#include "ghostty.h"
#include "vulkan/Host.h"
namespace wayland {
class SubsurfacePresenter;
}
#ifndef GHASTTY_USE_VULKAN
namespace opengl {
class EglDmabufTarget;
}
#endif
class MainWindow;
class QContextMenuEvent;
@ -31,15 +47,27 @@ class OverlayScrollbar;
// One Ghostty terminal pane.
//
// libghostty's OpenGL renderer draws the terminal into an offscreen
// framebuffer owned by a private QOpenGLContext (there is no on-screen
// GL surface). Each frame is read back into a QImage and painted with
// QPainter. That keeps this an ordinary translucent QWidget, so it
// embeds in the QTabWidget / QSplitter tree and its transparent
// background composites to the desktop exactly like the rest of the
// widget chrome — avoiding QOpenGLWidget (composites opaque on Wayland)
// and an embedded QOpenGLWindow (does not present when embedded).
class GhosttySurface : public QWidget {
// Terminal pixels reach the screen via a wl_subsurface attached to
// the top-level QWindow's wl_surface (see wayland::SubsurfacePresenter).
// libghostty's renderer (Vulkan or OpenGL, picked at compile time
// via GHASTTY_USE_VULKAN) hands us a dmabuf fd per frame; we wrap
// it in a wl_buffer via zwp_linux_dmabuf_v1 and the compositor
// scans it out directly — no readback, no QPainter blit for the
// terminal area. Each pane in a split is a sibling subsurface
// under the same top-level wl_surface, positioned at its offset
// within the top-level via setPosition.
//
// This QWidget itself keeps WA_TranslucentBackground so the
// terminal area of the parent surface is transparent (the
// subsurface below shows through) and chrome (SearchBar,
// overlays, scrollbar) painted in paintEvent stays visible on top.
//
// Legacy fallback: if the compositor lacks the required Wayland
// globals (linux-dmabuf-v1, viewporter, subcompositor) or the
// renderer reports image_backed=false (NVIDIA Vulkan's
// legacy_copy path on this branch), the frame goes through a
// mmap+memcpy+QImage+QPainter::drawImage path instead.
class GhosttySurface : public QWidget, public vulkan::PresentSink {
Q_OBJECT
public:
@ -143,10 +171,67 @@ public:
void setPwd(const QString &pwd);
const QString &pwd() const { return m_pwd; }
// Apprt-side entry point for the Vulkan `present` callback. Fires
// on the renderer thread. Parks the dmabuf descriptor under
// `m_pendingMutex` (plus, for the legacy fallback path, an
// mmap+memcpy'd QImage) and wakes the GUI thread via
// `QMetaObject::invokeMethod(this, drainVulkan, Qt::QueuedConnection)`.
// The GUI thread either commits the dmabuf to the wl_subsurface
// (zero-copy) or paints the QImage (fallback). The dropped-frame
// counter `m_droppedFrames` makes any genuine queue-loss visible
// (zero in the steady state).
void presentVulkanDmabuf(
int dmabuf_fd,
quint32 drm_format,
quint64 drm_modifier,
quint32 width,
quint32 height,
quint32 stride,
bool image_backed);
// `vulkan::PresentSink` override. Thin forward to
// `presentVulkanDmabuf` so the existing implementation (and its
// doc comment above) stays where it is. Called by `vulkan::Host`'s
// present-callback trampoline on the libghostty renderer thread.
void presentDmabuf(int dmabuf_fd, std::uint32_t drm_format,
std::uint64_t drm_modifier, std::uint32_t width,
std::uint32_t height, std::uint32_t stride,
bool image_backed) override {
presentVulkanDmabuf(dmabuf_fd, drm_format, drm_modifier, width,
height, stride, image_backed);
}
// GUI-thread drain step: hands the most recent pending frame
// either to the SubsurfacePresenter (zero-copy path) or the
// QImage paint pipeline (fallback). Idempotent: returns
// immediately if nothing's pending. Invoked from the polling
// safety net AND from queued invocations triggered by the
// renderer thread.
Q_INVOKABLE void drainVulkan();
// Compositor frame-callback handler. Fires (on the GUI thread,
// via Wayland event-queue dispatch) when the compositor signals
// it's ready to display our next commit. Clears the in-flight
// flag and re-pumps drainVulkan to consume any frame the renderer
// parked while we were waiting. Q_INVOKABLE so it can also be
// posted via QMetaObject::invokeMethod from a queued context.
Q_INVOKABLE void onWaylandFrameReady();
// Force a wl_surface.commit on our parent native window via the
// QtWaylandClient::QWaylandWindow private API. The wl_subsurface
// is in sync mode, so child state changes only apply when the
// parent commits — but Qt's backing-store flush doesn't fire for
// a translucent QWidget with no paint damage. Calling this after
// every child commit ensures the cached child state actually
// reaches the compositor. Returns false on non-Wayland QPA or if
// the cast fails (no Qt private headers available).
bool forceParentCommit();
protected:
bool event(QEvent *) override;
void paintEvent(QPaintEvent *) override;
void resizeEvent(QResizeEvent *) override;
void moveEvent(QMoveEvent *) override;
// Disable Qt's Tab/Backtab focus traversal so those keys reach
// keyPressEvent and can be forwarded to the terminal.
@ -207,19 +292,125 @@ private:
ghostty_surface_t m_parentSurface; // inherited-config source; may be null
ghostty_surface_t m_surface = nullptr;
// Private offscreen GL context libghostty renders into.
// Private offscreen GL context libghostty renders into. Null for
// the Vulkan-backed renderer (libghostty hands frames back via a
// dmabuf fd to the apprt's `present` callback — no GL involved).
QOpenGLContext *m_context = nullptr;
QOffscreenSurface *m_offscreen = nullptr;
QOpenGLFramebufferObject *m_fbo = nullptr;
#ifndef GHASTTY_USE_VULKAN
// Dmabuf-exporting GL target (zero-copy path). Set when the EGL
// display advertises EGL_MESA_image_dma_buf_export and the
// wl_subsurface presenter is up; the renderer draws into this
// texture-backed framebuffer and we attach its fd straight to the
// subsurface — no glReadPixels, no QImage, no QPainter blit.
// Stays null when EGL support is missing or the subsurface failed
// to bring up, and the legacy m_fbo path runs as fallback.
//
// Vulkan-variant builds export dmabufs directly from
// VkDeviceMemory via VK_KHR_external_memory_fd and never touch
// EGL, so the field (and the entire EglDmabufTarget translation
// unit) is excluded from those binaries — matching the libEGL
// gating in qt/CMakeLists.txt.
std::unique_ptr<opengl::EglDmabufTarget> m_eglTarget;
#endif
QImage m_image; // last frame, read back from m_fbo
// True when this surface is using the Vulkan platform. The
// paintEvent uses this to draw a visible placeholder when no
// dmabuf has been imported yet; once
// `presentVulkanDmabuf` has filled `m_image` the placeholder
// gives way to the actual rendered content.
bool m_useVulkan = false;
// Cross-thread frame handoff for the Vulkan path. The renderer
// thread calls `presentVulkanDmabuf` with a borrowed dmabuf fd
// and posts a queued `drainVulkan` invocation; the GUI thread
// runs `drainVulkan` and routes the parked descriptor through
// either the wl_subsurface presenter (zero-copy) or the
// mmap+memcpy+QImage fallback. The dropped-frame counter
// (`m_droppedFrames`) surfaces any queue-loss that ever happens
// in practice — the earlier safety-net polling timer was
// removed once delivery was shown to be reliable.
//
// `m_useSubsurface` is set once on the GUI thread when the
// presenter comes up; the renderer thread reads it acquire-style
// to decide which path to populate per frame.
std::atomic<bool> m_useSubsurface{false};
// Subsurface (zero-copy) path: renderer thread parks the
// borrowed-fd descriptor here; GUI-thread timer hands it to the
// presenter.
struct PendingDmabuf {
int fd = -1;
quint32 drm_format = 0;
quint64 drm_modifier = 0;
quint32 width = 0;
quint32 height = 0;
quint32 stride = 0;
};
PendingDmabuf m_pendingDmabuf;
// Compositor-paced present gate. Now BACKPRESSURES THE RENDERER
// THREAD: presentVulkanDmabuf blocks (with a 100 ms safety
// timeout) until the compositor signals ready, so the renderer
// produces frames at the compositor's refresh rate instead of
// its own 125 FPS draw timer. Saves the GPU work + renderer-
// thread CPU that the prior GUI-side-drop model was paying for
// every wasted frame.
//
// State machine:
// - Initial: ready=true (first present goes through).
// - Renderer present: wait_for(ready || hidden); claim
// ready=false; park dmabuf; post drain.
// - GUI drain: consume + commit + register wl_surface.frame.
// - Compositor frame_done → onWaylandFrameReady: ready=true,
// notify CV. Renderer's next present unblocks immediately.
// - Hide / PlatformSurface destroy: ready=true, notify_all to
// unblock any in-flight renderer wait (predicate also checks
// m_hidden so the renderer bails without parking).
std::mutex m_compositorMutex;
std::condition_variable m_compositorCv;
bool m_compositorReady = true;
// True once drainVulkan has successfully attached a dmabuf
// whose dimensions match the widget's current device-pixel
// size. paintEvent reads this to decide whether to fill the
// terminal area with the configured background color (hides
// the otherwise-transparent flash on new-tab open) or with
// Qt::transparent (lets the subsurface buffer show through).
// Reset to false on Hide and on PlatformSurface destroy so
// the next Show re-paints the placeholder until a real frame
// is attached.
std::atomic<bool> m_subsurfaceHasFrame{false};
// Dedupes queued drainVulkan invocations posted from the renderer
// thread. Each renderer-thread `presentVulkanDmabuf` used to post
// a QueuedConnection invokeMethod unconditionally — at 125 FPS
// that's 125 Qt-event-queue allocations + dispatches per second,
// most of which no-op now that the compositor gate may not yet
// be ready. CAS to true to claim the slot; drainVulkan resets to
// false before consuming so a follow-up renderer frame can
// schedule its own drain. The pending-dmabuf "latest wins"
// semantic guarantees the renderer's newest frame is what
// drainVulkan sees regardless of how many parks happened between.
std::atomic<bool> m_drainScheduled{false};
// Legacy (mmap+memcpy) path: kept as a fallback when the
// presenter isn't available (e.g. compositor missing
// linux-dmabuf-v1). When the subsurface path is active this stays
// null and paintEvent skips its blit.
QImage m_pending;
QMutex m_pendingMutex;
// GL objects for the alpha-premultiply pass.
QOpenGLShaderProgram *m_premultProg = nullptr;
QOpenGLVertexArrayObject *m_premultVao = nullptr;
int m_fbw = 0; // framebuffer size, device pixels
int m_fbh = 0;
double m_fbDpr = 1.0; // DPR the framebuffer was sized at
// DPR the framebuffer was sized at. Atomic because the renderer
// thread reads it from `presentVulkanDmabuf` to tag the legacy
// QImage path while the GUI thread writes it from
// `syncSurfaceSize`. `double` writes aren't guaranteed atomic
// across threads on every architecture; std::atomic<double> uses
// CAS-loop fallbacks where needed.
std::atomic<double> m_fbDpr{1.0}; // DPR the framebuffer was sized at
QLabel *m_exitOverlay = nullptr; // "process exited" banner; lazily made
QLabel *m_keySeqOverlay = nullptr; // pending keybind chord; lazily made
@ -268,4 +459,46 @@ private:
// first PWD notification (libghostty fires one at spawn from the
// inherited config, then on every cwd change).
QString m_pwd;
// Wayland subsurface for the GPU-direct present path. Lazily
// created on first `QEvent::Show` once the top-level QWindow
// exists; null if the compositor lacks the required globals
// (linux-dmabuf-v1, viewporter, subcompositor), in which case
// the legacy mmap+memcpy+QImage+QPainter path renders pixels.
std::unique_ptr<wayland::SubsurfacePresenter> m_subsurfacePresenter;
// Per-surface latch for the first-dmabuf log breadcrumb so each
// pane / split prints its own line on first frame. Atomic because
// the renderer thread is what hits `presentVulkanDmabuf` and the
// first-frame check would otherwise race a sibling renderer
// thread on the same widget — relaxed CAS means at most one log
// line per surface, even under concurrent first frames.
std::atomic<bool> m_loggedFirstFrame{false};
// Count of frames overwritten in `m_pendingDmabuf` before the GUI
// thread drained them. Each overwrite is a missed compositor
// present — fd lifetime is unaffected (libghostty owns the
// dmabuf), but a sustained nonzero rate means the GUI thread is
// falling behind the renderer. Logged sparsely from
// `presentVulkanDmabuf`.
std::atomic<std::uint64_t> m_droppedFrames{0};
// Set true on QEvent::Hide, false on QEvent::Show. Guards the
// present path against a race where libghostty's renderer thread
// fires one more frame after we've detached the subsurface
// buffer on Hide — without this gate, that stray frame re-
// attaches a buffer and the now-inactive tab ghosts on top of
// whatever tab the user just switched to. `std::atomic` because
// the renderer thread reads it in `presentVulkanDmabuf` /
// `drainVulkan` while the GUI thread writes from event().
std::atomic<bool> m_hidden{false};
// Cache of the result of `dynamic_cast<QtWaylandClient::QWaylandWindow*>`
// for the top-level QWindow's QPA handle, used by
// `forceParentCommit`. The cast is non-trivial and the function
// is on the present hot path (called per Vulkan frame, per GL
// frame, per moveEvent, on Hide, etc.). Resolved on first
// successful call; invalidated whenever the platform-surface
// QWindow handle is recreated (PlatformSurfaceAboutToBeDestroyed
// event). Stored as void* so the header doesn't have to include
// any Qt private QPA headers; the .cpp casts back at use sites.
void *m_cachedWaylandWindow = nullptr;
};

View File

@ -65,9 +65,17 @@ XkbTracker::XkbTracker() {
if (m_keyboard == nullptr)
wl_display_roundtrip_queue(display, queue);
// The keyboard proxy is hot — move it onto the default queue so
// Qt's event loop dispatches our listeners alongside Qt's own
// input events.
// The keyboard + seat proxies are long-lived — move them onto the
// default queue so Qt's event loop dispatches our listeners
// alongside Qt's own input events, AND so they don't dangle on
// the about-to-be-destroyed private queue. Failing to migrate the
// seat caused a SIGSEGV at process exit: libwayland warned
// ("queue X destroyed while proxies still attached: wl_seat#NN")
// and then later seat events / display teardown dereferenced the
// dead queue.
if (m_seat) {
wl_proxy_set_queue(reinterpret_cast<wl_proxy *>(m_seat), nullptr);
}
if (m_keyboard) {
wl_proxy_set_queue(reinterpret_cast<wl_proxy *>(m_keyboard), nullptr);
}
@ -78,6 +86,7 @@ XkbTracker::~XkbTracker() {
// Process-wide singleton; OS reclaims at exit. Explicit teardown
// keeps leak checkers quiet and documents ownership.
if (m_keyboard) wl_keyboard_destroy(m_keyboard);
if (m_seat) wl_seat_destroy(m_seat);
if (m_state) xkb_state_unref(m_state);
if (m_keymap) xkb_keymap_unref(m_keymap);
if (m_ctx) xkb_context_unref(m_ctx);
@ -108,6 +117,12 @@ void XkbTracker::onRegistryGlobal(void *data, wl_registry *registry,
auto *seat = static_cast<wl_seat *>(
wl_registry_bind(registry, name, &wl_seat_interface, 5));
if (!seat) return;
// Stash the seat on the tracker so it outlives this callback and
// its private-queue registry. wl_seat is a long-lived proxy: we
// keep the listener alive for the full process lifetime so future
// capability changes (keyboard hot-plug, layout change) flow into
// onSeatCapabilities and we can re-bind the wl_keyboard.
self->m_seat = seat;
// Subscribe to capability changes; we'll grab the keyboard from
// the capability callback once the seat tells us it has one.
wl_seat_add_listener(seat, &kSeatListener, self);

View File

@ -94,6 +94,12 @@ class XkbTracker {
// a keymap is loaded.
uint32_t m_idxCapsLock = ~0u;
uint32_t m_idxNumLock = ~0u;
// wl_seat handle, owned by us via wl_registry_bind. Kept alive for
// the singleton's lifetime so capability changes (keyboard
// hot-plug, layout switch) keep flowing to onSeatCapabilities, and
// so the proxy isn't dangling on the private registry queue we
// destroy at the end of the ctor.
struct wl_seat *m_seat = nullptr;
// wl_keyboard handle, owned by us via wl_seat_get_keyboard.
struct wl_keyboard *m_keyboard = nullptr;
};

View File

@ -251,7 +251,7 @@ bool handleSystem(const Context &ctx, const ghostty_action_s &action) {
// abnormal threshold (default 250ms). Banner = "the process
// died unexpectedly," not "the process exited."
uint32_t threshold = 250;
config::get(&threshold, "abnormal-command-exit-runtime");
(void)config::get(&threshold, "abnormal-command-exit-runtime");
if (ce.runtime_ms < threshold) return true;
const int code = static_cast<int>(ce.exit_code);
post(src, [srcp, code]() {

View File

@ -1,4 +1,14 @@
#include <cstdio>
#include <cstdlib>
#include <cstring>
// (The atexit hook to ghastty_glslang_finalize_process that used
// to live here was removed: now that build-time SPV precompile
// is in place, the runtime libghostty no longer calls the glslang
// shim at all for built-ins, so the shim's symbols get DCE'd out
// of libghostty.so. The cosmetic FinalizeProcess+popAll cleanup
// also didn't reduce heaptrack's reported leak in practice, so
// the call wasn't pulling its weight anyway.)
#include <QApplication>
#include <QCoreApplication>
@ -22,7 +32,51 @@ static bool isCliActionInvocation(int argc, char **argv) {
return false;
}
// Default-disable MangoHud for this process. The Vulkan implicit
// layer hooks every vkQueueSubmit / vkAcquireNextImage / etc. to
// render its own overlay, which on this branch's animated-shader
// + multi-pane workload added ~25% extra main-thread CPU at idle
// (measured against a baseline of ~10% for the Wayland-buffer
// cache path). For a terminal, that's a steep tax on a feature
// users typically associate with games. A system-wide MANGOHUD=1
// (common in `~/.profile` for users who want the HUD on games) is
// explicitly OVERRIDDEN here — the user is invoking ghastty, not
// a game, and we don't want them to silently pay 25% extra CPU.
//
// Two layers of MangoHud's loading model:
// - VK_LOADER_LAYERS_DISABLE: Vulkan loader skips the layer
// entirely (no interception overhead).
// - DISABLE_MANGOHUD: belt-and-suspenders if the loader didn't
// honor the env var (older loaders) or another runtime force-
// loaded the layer through a different path.
//
// Escape hatch: GHASTTY_ALLOW_OVERLAY=1 skips the guard entirely
// so a user who genuinely wants MangoHud on the terminal (e.g.
// debugging the renderer with the HUD's frame-time graph) can
// opt back in without removing the layer JSON system-wide.
//
// setenv overwrite=1 throughout: the whole point is to override a
// pre-existing MANGOHUD=1 / DISABLE_MANGOHUD=0 / etc.
static void defaultDisableMangoHud() {
if (const char *opt = ::getenv("GHASTTY_ALLOW_OVERLAY");
opt && opt[0] == '1') return;
::setenv("MANGOHUD", "0", 1);
::setenv("DISABLE_MANGOHUD", "1", 1);
::setenv("VK_LOADER_LAYERS_DISABLE", "*MANGOHUD*", 1);
}
int main(int argc, char **argv) {
// Set the env BEFORE Qt's QApplication ctor (which can probe
// GL/Vulkan via QPA) and before the CLI action path (since
// libghostty action handlers may also touch the renderer).
defaultDisableMangoHud();
// (Build-time SPV precompile means the runtime libghostty no
// longer invokes glslang for built-in shaders, so the per-
// thread TPoolAllocator pages we used to leak from first-
// surface init don't exist on the Vulkan variant anymore. No
// atexit cleanup needed.)
// CLI action fast path: skip Qt entirely. ghostty_init parses argv
// for the `+action`; ghostty_cli_try_action runs it and exits the
// process. If something fails (unknown action, multiple actions),
@ -104,6 +158,15 @@ int main(int argc, char **argv) {
return 1;
}
// The Vulkan host is intentionally NOT bootstrapped here: doing it
// before any window is mapped on Wayland can interact badly with
// Qt's Wayland integration (the VkInstance starts grabbing display
// resources before Qt has finished its own connection setup, and
// on some compositor + driver combos the result is a process that
// runs but never actually displays a window). It's brought up
// lazily on the first surface that needs it — see
// `GhosttySurface.cpp`.
// initial-window: when false, start headless (no window mapped at
// launch). Combined with quit-after-last-window-closed=false this
// is how a user runs ghastty as a daemon for the global quick-

View File

@ -0,0 +1,275 @@
#include "EglDmabufTarget.h"
#include <cstdio>
#include <cstring>
#include <unistd.h>
#include <QOpenGLContext>
#include <QOpenGLFunctions>
#include <EGL/egl.h>
#include <EGL/eglext.h>
namespace opengl {
namespace {
// EGL_MESA_image_dma_buf_export entry points (loaded once per
// process). Resolved via `eglGetProcAddress`, which returns null if
// the extension isn't present.
using PFNeglExportDMABUFImageQueryMESA =
EGLBoolean (*)(EGLDisplay dpy, EGLImageKHR image, int *fourcc,
int *num_planes, EGLuint64KHR *modifiers);
using PFNeglExportDMABUFImageMESA =
EGLBoolean (*)(EGLDisplay dpy, EGLImageKHR image, int *fds,
EGLint *strides, EGLint *offsets);
struct EglFns {
PFNEGLCREATEIMAGEKHRPROC createImage = nullptr;
PFNEGLDESTROYIMAGEKHRPROC destroyImage = nullptr;
PFNeglExportDMABUFImageQueryMESA queryExport = nullptr;
PFNeglExportDMABUFImageMESA exportImage = nullptr;
bool resolved = false;
bool available = false;
};
EglFns &eglFns() {
static EglFns f;
return f;
}
bool ensureEglFns(EGLDisplay display) {
EglFns &f = eglFns();
if (f.resolved) return f.available;
f.resolved = true;
const char *exts = eglQueryString(display, EGL_EXTENSIONS);
if (!exts) return false;
auto hasExt = [exts](const char *name) {
const std::size_t n = std::strlen(name);
const char *p = exts;
while ((p = std::strstr(p, name)) != nullptr) {
if ((p == exts || p[-1] == ' ') && (p[n] == '\0' || p[n] == ' '))
return true;
p += n;
}
return false;
};
if (!hasExt("EGL_KHR_image_base") ||
!hasExt("EGL_MESA_image_dma_buf_export")) {
std::fprintf(stderr,
"[ghastty] EglDmabufTarget: EGL display lacks "
"EGL_KHR_image_base or EGL_MESA_image_dma_buf_export\n");
return false;
}
f.createImage = reinterpret_cast<PFNEGLCREATEIMAGEKHRPROC>(
eglGetProcAddress("eglCreateImageKHR"));
f.destroyImage = reinterpret_cast<PFNEGLDESTROYIMAGEKHRPROC>(
eglGetProcAddress("eglDestroyImageKHR"));
f.queryExport = reinterpret_cast<PFNeglExportDMABUFImageQueryMESA>(
eglGetProcAddress("eglExportDMABUFImageQueryMESA"));
f.exportImage = reinterpret_cast<PFNeglExportDMABUFImageMESA>(
eglGetProcAddress("eglExportDMABUFImageMESA"));
if (!f.createImage || !f.destroyImage || !f.queryExport ||
!f.exportImage) {
std::fprintf(stderr,
"[ghastty] EglDmabufTarget: eglGetProcAddress returned "
"null for required entry points\n");
return false;
}
f.available = true;
return true;
}
EGLDisplay currentEglDisplay() {
return eglGetCurrentDisplay();
}
// GL constants come from <QOpenGLFunctions> indirectly via the Qt
// GL headers — GL_TEXTURE_2D / GL_RGBA8 / GL_FRAMEBUFFER etc. are
// in scope without further includes.
} // namespace
bool EglDmabufTarget::available(QOpenGLContext *ctx) {
if (!ctx) return false;
if (!ctx->isValid()) return false;
EGLDisplay dpy = currentEglDisplay();
if (dpy == EGL_NO_DISPLAY) {
std::fprintf(
stderr,
"[ghastty] EglDmabufTarget: no current EGL display (call after "
"QOpenGLContext::makeCurrent on a Wayland QPA)\n");
return false;
}
return ensureEglFns(dpy);
}
std::unique_ptr<EglDmabufTarget> EglDmabufTarget::create(QOpenGLContext *ctx,
int width_px,
int height_px) {
if (!ctx || !ctx->isValid()) return nullptr;
if (width_px <= 0 || height_px <= 0) return nullptr;
EGLDisplay dpy = currentEglDisplay();
if (dpy == EGL_NO_DISPLAY) return nullptr;
if (!ensureEglFns(dpy)) return nullptr;
const EglFns &fns = eglFns();
auto *gl = ctx->functions();
if (!gl) return nullptr;
// We populate `target->m_*` AS we acquire each resource; on any
// failure we just `return nullptr` and let the unique_ptr's
// destructor unwind everything that's been stored so far. This is
// the only cleanup path — no manual gl->glDeleteTextures /
// ::close(fd) on early returns, which previously double-freed the
// texture and made the cleanup logic asymmetric per branch.
auto target = std::unique_ptr<EglDmabufTarget>(new EglDmabufTarget());
target->m_eglDisplay = dpy;
target->m_width = width_px;
target->m_height = height_px;
// 1. Allocate a GL texture sized to the desired framebuffer.
unsigned int tex = 0;
gl->glGenTextures(1, &tex);
if (tex == 0) return nullptr;
target->m_texture = tex;
gl->glBindTexture(GL_TEXTURE_2D, tex);
gl->glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
gl->glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
gl->glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, width_px, height_px, 0, GL_RGBA,
GL_UNSIGNED_BYTE, nullptr);
gl->glBindTexture(GL_TEXTURE_2D, 0);
// 2. Wrap as an EGLImage targeting the GL texture.
EGLImageKHR img = fns.createImage(
dpy, ctx->nativeInterface<QNativeInterface::QEGLContext>()
? reinterpret_cast<EGLContext>(
ctx->nativeInterface<QNativeInterface::QEGLContext>()
->nativeContext())
: eglGetCurrentContext(),
EGL_GL_TEXTURE_2D_KHR,
reinterpret_cast<EGLClientBuffer>(static_cast<uintptr_t>(tex)), nullptr);
if (img == EGL_NO_IMAGE_KHR) {
std::fprintf(stderr,
"[ghastty] EglDmabufTarget: eglCreateImageKHR failed (0x%x)\n",
eglGetError());
return nullptr;
}
target->m_eglImage = img;
// 3. Query the export metadata (fourcc, plane count, modifier).
int fourcc = 0;
int num_planes = 0;
EGLuint64KHR modifier = 0;
if (!fns.queryExport(dpy, img, &fourcc, &num_planes, &modifier)) {
std::fprintf(stderr,
"[ghastty] EglDmabufTarget: eglExportDMABUFImageQueryMESA "
"failed (0x%x)\n",
eglGetError());
return nullptr;
}
if (num_planes != 1) {
// Multi-plane modifiers need a wider present-callback ABI on the
// subsurface side. NVIDIA / Mesa default tilings for RGBA are
// single-plane in practice; refuse multi-plane cleanly and fall
// back to the QImage path.
std::fprintf(stderr,
"[ghastty] EglDmabufTarget: refusing multi-plane export "
"(num_planes=%d fourcc=0x%x mod=0x%llx)\n",
num_planes, fourcc,
static_cast<unsigned long long>(modifier));
return nullptr;
}
target->m_drmFormat = static_cast<std::uint32_t>(fourcc);
target->m_drmModifier = static_cast<std::uint64_t>(modifier);
// 4. Export the dmabuf fd + per-plane stride/offset.
int fd = -1;
EGLint stride = 0;
EGLint offset = 0;
if (!fns.exportImage(dpy, img, &fd, &stride, &offset) || fd < 0) {
std::fprintf(stderr,
"[ghastty] EglDmabufTarget: eglExportDMABUFImageMESA failed "
"(0x%x fd=%d)\n",
eglGetError(), fd);
return nullptr;
}
target->m_fd = fd;
target->m_stride = static_cast<std::uint32_t>(stride);
// The `wayland::SubsurfacePresenter` present path hardcodes
// `offset = 0` when wrapping this fd in a wl_buffer (see
// SubsurfacePresenter.cpp's zwp_linux_buffer_params_v1_add call).
// For LINEAR-tiled exports (the only thing this OpenGL path
// produces, by EGL_MESA_image_dma_buf_export's contract for a
// single-plane texture) `offset` is always 0 in practice. Reject
// anything else loudly so a future EGL implementation that
// returns a non-zero offset doesn't silently render at the wrong
// location.
if (offset != 0) {
std::fprintf(stderr,
"[ghastty] EglDmabufTarget: unexpected non-zero offset=%d "
"from eglExportDMABUFImageMESA; SubsurfacePresenter assumes "
"offset=0 for single-plane LINEAR exports\n",
offset);
::close(fd);
target->m_fd = -1;
return nullptr;
}
// 5. Attach to a framebuffer so libghostty can render into it.
unsigned int fbo = 0;
gl->glGenFramebuffers(1, &fbo);
if (fbo == 0) return nullptr;
target->m_framebuffer = fbo;
gl->glBindFramebuffer(GL_FRAMEBUFFER, fbo);
gl->glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0,
GL_TEXTURE_2D, tex, 0);
const unsigned int status = gl->glCheckFramebufferStatus(GL_FRAMEBUFFER);
gl->glBindFramebuffer(GL_FRAMEBUFFER, 0);
if (status != GL_FRAMEBUFFER_COMPLETE) {
std::fprintf(stderr,
"[ghastty] EglDmabufTarget: framebuffer incomplete (0x%x)\n",
status);
return nullptr;
}
std::fprintf(stderr,
"[ghastty] EglDmabufTarget: %dx%d fd=%d fourcc=0x%x mod=0x%llx "
"stride=%u\n",
width_px, height_px, fd, target->m_drmFormat,
static_cast<unsigned long long>(target->m_drmModifier),
target->m_stride);
return target;
}
EglDmabufTarget::EglDmabufTarget() = default;
EglDmabufTarget::~EglDmabufTarget() {
// Caller must ensure the owning QOpenGLContext is current; on
// GhosttySurface destruction we go through `makeCurrent` first.
auto ctx = QOpenGLContext::currentContext();
if (ctx) {
auto *gl = ctx->functions();
if (m_framebuffer) gl->glDeleteFramebuffers(1, &m_framebuffer);
if (m_texture) gl->glDeleteTextures(1, &m_texture);
}
if (m_eglImage && m_eglDisplay) {
eglFns().destroyImage(m_eglDisplay, m_eglImage);
}
if (m_fd >= 0) ::close(m_fd);
}
void EglDmabufTarget::bind() const {
auto ctx = QOpenGLContext::currentContext();
if (!ctx || !m_framebuffer) return;
ctx->functions()->glBindFramebuffer(GL_FRAMEBUFFER, m_framebuffer);
}
void EglDmabufTarget::release() const {
auto ctx = QOpenGLContext::currentContext();
if (!ctx) return;
ctx->functions()->glBindFramebuffer(GL_FRAMEBUFFER, 0);
}
} // namespace opengl

View File

@ -0,0 +1,87 @@
// Dmabuf-exporting GL render target for the OpenGL present path.
//
// libghostty's GL renderer draws into a host-owned framebuffer (see
// GhosttySurface's `m_fbo`). Today that framebuffer's pixels get
// pulled back through `glReadPixels` (via `QOpenGLFramebufferObject::toImage`)
// into a QImage, then re-uploaded to the QWidget backing store by
// QPainter. After this class is wired in, the host instead allocates
// a GL texture, wraps it as an `EGLImage` via `eglCreateImage`,
// exports its memory as a dmabuf via `eglExportDMABUFImageMESA`,
// and attaches that texture to a GL framebuffer for libghostty to
// draw into. The cached dmabuf fd / fourcc / modifier / stride are
// then handed straight to the `wayland::SubsurfacePresenter` — same
// zero-copy path the Vulkan renderer's Target uses, just sourced
// from EGL instead of Vulkan.
//
// Requires `EGL_MESA_image_dma_buf_export` (checked by the static
// `available()` predicate). Wayland-only by project decision.
#pragma once
#include <cstdint>
#include <memory>
class QOpenGLContext;
namespace opengl {
class EglDmabufTarget {
public:
// Detect at runtime whether the current EGL display advertises
// `EGL_MESA_image_dma_buf_export`. Caller MUST have a Wayland QPA
// and `ctx` must be a usable, makeCurrent-able QOpenGLContext.
// Cached after first call.
static bool available(QOpenGLContext *ctx);
// Build a target of the given device-pixel size. Returns nullptr
// on any EGL / GL failure (caller falls back to the legacy
// QOpenGLFramebufferObject + toImage path). `ctx` must be current
// on the calling thread when called.
static std::unique_ptr<EglDmabufTarget> create(QOpenGLContext *ctx,
int width_px,
int height_px);
~EglDmabufTarget();
// Bind the framebuffer for draw operations. Caller is responsible
// for `glViewport` / `glClear` etc. Mirrors `QOpenGLFramebufferObject::bind`.
void bind() const;
void release() const;
// Pixel + dmabuf metadata. Stable for the lifetime of this target;
// resize allocates a new target. `stride` is the value returned by
// `eglExportDMABUFImageMESA` for plane 0.
int width() const { return m_width; }
int height() const { return m_height; }
int fd() const { return m_fd; }
std::uint32_t drmFormat() const { return m_drmFormat; }
std::uint64_t drmModifier() const { return m_drmModifier; }
std::uint32_t stride() const { return m_stride; }
// Raw GL framebuffer object id for glBlitFramebuffer callers that
// need to write into the dmabuf-backed FBO from a different
// attached target (e.g. blitting from m_fbo with an inverted dst
// rect to flip Y, since the linux-dmabuf-v1 Y_INVERT flag is not
// universally supported).
unsigned int framebuffer() const { return m_framebuffer; }
EglDmabufTarget(const EglDmabufTarget &) = delete;
EglDmabufTarget &operator=(const EglDmabufTarget &) = delete;
private:
EglDmabufTarget();
// Opaque to callers (and avoids leaking EGL/GL handle types into
// the header). The .cpp owns the EGLDisplay/EGLImage casts.
void *m_eglDisplay = nullptr;
void *m_eglImage = nullptr;
unsigned int m_texture = 0;
unsigned int m_framebuffer = 0;
int m_width = 0;
int m_height = 0;
int m_fd = -1;
std::uint32_t m_drmFormat = 0;
std::uint64_t m_drmModifier = 0;
std::uint32_t m_stride = 0;
};
} // namespace opengl

View File

@ -6,17 +6,18 @@
#include <QCursor>
#include <QEasingCurve>
#include <QGuiApplication>
#include <QPropertyAnimation>
#include <QScreen>
#include <QSize>
#include <QString>
#include <QStringLiteral>
#include <QVariantAnimation>
#include <QWidget>
#include <QWindow>
#include <LayerShellQt/window.h>
#include "../config/Config.h"
#include "../wayland/AlphaModifier.h"
#include "ghostty.h"
namespace quickterm {
@ -43,14 +44,36 @@ int animationMs() {
return std::clamp(static_cast<int>(secs * 1000.0), 1, 1000);
}
// Apply opacity to the window. Uses wp_alpha_modifier_v1 when the
// compositor supports it (real per-surface alpha multiplier on the
// compositor side); otherwise falls through to a no-op (the
// animation still runs but the window just appears at the end —
// previously this called QWindow::setOpacity which spammed
// "This plugin does not support setting window opacity" warnings
// on every animation tick because QtWayland's QPA plugin has no
// implementation).
void applyOpacity(QWidget *window, double opacity) {
QWindow *handle = window->windowHandle();
if (!handle) return;
wayland::AlphaModifier::setOpacity(handle, opacity);
}
// Lazily fetch (or build) the per-window opacity animation, parented
// to `window` so its lifetime tracks the widget's.
QPropertyAnimation *animFor(QWidget *window) {
auto *existing = window->property(kAnimProperty).value<QPropertyAnimation *>();
// to `window` so its lifetime tracks the widget's. We use
// QVariantAnimation (not QPropertyAnimation on windowOpacity) so
// the per-tick value is delivered to our applyOpacity handler
// instead of QWindow::setOpacity (which QtWayland's QPA plugin
// doesn't implement — see applyOpacity comment).
QVariantAnimation *animFor(QWidget *window) {
auto *existing = window->property(kAnimProperty).value<QVariantAnimation *>();
if (existing) return existing;
auto *anim = new QPropertyAnimation(window, "windowOpacity", window);
auto *anim = new QVariantAnimation(window);
QObject::connect(anim, &QVariantAnimation::valueChanged, window,
[window](const QVariant &v) {
applyOpacity(window, v.toDouble());
});
window->setProperty(kAnimProperty,
QVariant::fromValue<QPropertyAnimation *>(anim));
QVariant::fromValue<QVariantAnimation *>(anim));
return anim;
}
@ -167,25 +190,33 @@ void setupLayerShell(QWidget *window) {
}
void animateIn(QWidget *window) {
window->setWindowOpacity(0.0);
// Show with opacity 0 first so the compositor never paints a
// fully-opaque frame before the animation kicks in. The
// QVariantAnimation valueChanged → applyOpacity path needs the
// wl_surface to exist, which means after show(). We call
// applyOpacity twice on either side of show() — once at 0.0 as
// a best-effort pre-show (no-op if wl_surface isn't up yet),
// once at 0.0 immediately after to lock in the start state.
applyOpacity(window, 0.0);
window->show();
window->raise();
window->activateWindow();
applyOpacity(window, 0.0);
const int ms = animationMs();
if (ms <= 0) {
window->setWindowOpacity(1.0);
applyOpacity(window, 1.0);
return;
}
// Stop any running fade so toggling rapidly doesn't stack
// animations.
QPropertyAnimation *anim = animFor(window);
QVariantAnimation *anim = animFor(window);
anim->stop();
// animateOut leaves a `finished -> hide()` handler attached to the
// shared animation object. If a fade-out was interrupted by this
// fade-in (rapid out/in cycle), the leftover handler would fire at
// the end of the in-fade and silently hide the just-revealed
// window — clear it before starting.
QObject::disconnect(anim, &QPropertyAnimation::finished, window, nullptr);
QObject::disconnect(anim, &QVariantAnimation::finished, window, nullptr);
anim->setDuration(ms);
anim->setStartValue(0.0);
anim->setEndValue(1.0);
@ -199,17 +230,21 @@ void animateOut(QWidget *window) {
window->hide();
return;
}
QPropertyAnimation *anim = animFor(window);
QVariantAnimation *anim = animFor(window);
anim->stop();
anim->setDuration(ms);
anim->setStartValue(window->windowOpacity());
// Start from the animation's last delivered value if we have one
// (a rapid in-then-out cycle interrupts at some intermediate
// alpha); otherwise assume the window was fully visible.
const QVariant cur = anim->currentValue();
anim->setStartValue(cur.isValid() ? cur.toDouble() : 1.0);
anim->setEndValue(0.0);
anim->setEasingCurve(QEasingCurve::InCubic);
// Disconnect any previous handler before reconnecting; otherwise a
// toggle-out-then-in cycle accumulates handlers that all fire on
// the next out.
QObject::disconnect(anim, &QPropertyAnimation::finished, window, nullptr);
QObject::connect(anim, &QPropertyAnimation::finished, window,
QObject::disconnect(anim, &QVariantAnimation::finished, window, nullptr);
QObject::connect(anim, &QVariantAnimation::finished, window,
[window]() { window->hide(); });
anim->start();
}

267
qt/src/vulkan/Host.cpp Normal file
View File

@ -0,0 +1,267 @@
// See `Host.h` for the contract.
#include "Host.h"
#include <array>
#include <cstdio>
#include <cstring>
#include <mutex>
#include <optional>
#include <vector>
#include "../wayland/DmabufRegistry.h"
namespace vulkan {
namespace {
constexpr const char *kRequiredDeviceExtensions[] = {
"VK_KHR_external_memory_fd",
"VK_EXT_external_memory_dma_buf",
// Needed so libghostty can allocate render images with a chosen
// DRM modifier (vendor-tiled where supported) and query the
// driver-chosen layout back via
// `vkGetImageDrmFormatModifierPropertiesEXT`. Without it on the
// host's VkDevice, the device-level proc-addr lookup for that
// function returns null and Target.init fails.
"VK_EXT_image_drm_format_modifier",
};
bool hasRequiredExtensions(VkPhysicalDevice pd) {
uint32_t n = 0;
vkEnumerateDeviceExtensionProperties(pd, nullptr, &n, nullptr);
if (n == 0) return false;
std::vector<VkExtensionProperties> exts(n);
vkEnumerateDeviceExtensionProperties(pd, nullptr, &n, exts.data());
for (const char *req : kRequiredDeviceExtensions) {
bool found = false;
for (const auto &e : exts) {
if (std::strcmp(e.extensionName, req) == 0) {
found = true;
break;
}
}
if (!found) return false;
}
return true;
}
std::optional<uint32_t> findGraphicsQueueFamily(VkPhysicalDevice pd) {
uint32_t n = 0;
vkGetPhysicalDeviceQueueFamilyProperties(pd, &n, nullptr);
if (n == 0) return std::nullopt;
std::vector<VkQueueFamilyProperties> props(n);
vkGetPhysicalDeviceQueueFamilyProperties(pd, &n, props.data());
for (uint32_t i = 0; i < n; ++i) {
if (props[i].queueFlags & VK_QUEUE_GRAPHICS_BIT) return i;
}
return std::nullopt;
}
// ---- Platform callback trampolines ----------------------------------
//
// `ghostty_platform_vulkan_s` is a plain C ABI; the callback signatures
// take a `void *userdata` that libghostty hands back to each callback.
// The handle-lookup callbacks (instance / physical_device / device /
// queue / queue_family_index / get_instance_proc_addr) ignore the
// userdata and resolve through the process singleton — there's only
// one Vulkan setup per process. The `present` callback DOES use the
// userdata: it's the `GhosttySurface *` that owns the rendered
// target, so we can hand the dmabuf back to the right widget.
void *cbGetInstanceProcAddr(void *ud, const char *name) {
(void)ud;
auto *host = Host::instance();
if (host == nullptr) return nullptr;
auto fp = vkGetInstanceProcAddr(host->vkInstance(), name);
return reinterpret_cast<void *>(fp);
}
void *cbInstance(void *ud) {
(void)ud;
auto *host = Host::instance();
return host != nullptr ? host->vkInstance() : nullptr;
}
void *cbPhysicalDevice(void *ud) {
(void)ud;
auto *host = Host::instance();
return host != nullptr ? host->vkPhysicalDevice() : nullptr;
}
void *cbDevice(void *ud) {
(void)ud;
auto *host = Host::instance();
return host != nullptr ? host->vkDevice() : nullptr;
}
void *cbQueue(void *ud) {
(void)ud;
auto *host = Host::instance();
return host != nullptr ? host->vkQueue() : nullptr;
}
uint32_t cbQueueFamilyIndex(void *ud) {
(void)ud;
auto *host = Host::instance();
return host != nullptr ? host->vkQueueFamilyIndex() : 0;
}
size_t cbGetSupportedModifiers(void *ud, uint32_t drm_format,
uint64_t *out, size_t capacity) {
(void)ud;
// Lock-free read of an immutable table. The table is primed on the
// GUI thread by `wayland::primeDmabufModifierRegistry`, called from
// `GhosttySurface`'s ctor (Vulkan branch) BEFORE the libghostty
// renderer thread is spawned for that surface. As long as that
// ordering invariant holds, this read sees a fully-populated table.
// `wayland::supportedDmabufModifiers` itself returns 0 if priming
// hasn't happened yet, so the failure mode is fail-safe (renderer
// gets an empty modifier list, falls back to legacy_copy mode).
return ::wayland::supportedDmabufModifiers(drm_format, out, capacity);
}
void cbPresent(
void *ud,
int dmabuf_fd,
uint32_t drm_format,
uint64_t drm_modifier,
uint32_t width,
uint32_t height,
uint32_t stride,
bool image_backed) {
if (ud == nullptr) return;
static_cast<PresentSink *>(ud)->presentDmabuf(
dmabuf_fd, drm_format, drm_modifier, width, height, stride,
image_backed);
}
} // namespace
bool Host::init() {
// ---- instance ---------------------------------------------------
VkApplicationInfo appInfo{};
appInfo.sType = VK_STRUCTURE_TYPE_APPLICATION_INFO;
appInfo.pApplicationName = "ghastty";
appInfo.applicationVersion = 1;
appInfo.pEngineName = "ghastty";
appInfo.engineVersion = 1;
appInfo.apiVersion = VK_API_VERSION_1_3;
VkInstanceCreateInfo instInfo{};
instInfo.sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO;
instInfo.pApplicationInfo = &appInfo;
if (vkCreateInstance(&instInfo, nullptr, &m_instance) != VK_SUCCESS) {
std::fprintf(stderr, "[vulkan] vkCreateInstance failed\n");
return false;
}
// ---- physical device -------------------------------------------
uint32_t pdCount = 0;
vkEnumeratePhysicalDevices(m_instance, &pdCount, nullptr);
if (pdCount == 0) {
std::fprintf(stderr, "[vulkan] no physical devices\n");
return false;
}
std::vector<VkPhysicalDevice> pds(pdCount);
vkEnumeratePhysicalDevices(m_instance, &pdCount, pds.data());
for (auto pd : pds) {
VkPhysicalDeviceProperties props;
vkGetPhysicalDeviceProperties(pd, &props);
if (props.apiVersion < VK_API_VERSION_1_3) continue;
if (!hasRequiredExtensions(pd)) continue;
auto qfi = findGraphicsQueueFamily(pd);
if (!qfi) continue;
m_physicalDevice = pd;
m_queueFamilyIndex = *qfi;
break;
}
if (m_physicalDevice == VK_NULL_HANDLE) {
std::fprintf(stderr,
"[vulkan] no suitable physical device "
"(need Vulkan 1.3 + external_memory_fd + dma_buf)\n");
return false;
}
// ---- logical device + queue ------------------------------------
float queuePriority = 1.0f;
VkDeviceQueueCreateInfo qci{};
qci.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO;
qci.queueFamilyIndex = m_queueFamilyIndex;
qci.queueCount = 1;
qci.pQueuePriorities = &queuePriority;
// libghostty's Vulkan renderer uses Vulkan 1.3 dynamic rendering
// (vkCmdBeginRendering / vkCmdEndRendering, no VkRenderPass).
// That feature has to be explicitly enabled at device creation
// time via VkPhysicalDeviceVulkan13Features.
VkPhysicalDeviceVulkan13Features vk13features{};
vk13features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_3_FEATURES;
vk13features.dynamicRendering = VK_TRUE;
vk13features.synchronization2 = VK_TRUE;
VkDeviceCreateInfo dci{};
dci.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO;
dci.pNext = &vk13features;
dci.queueCreateInfoCount = 1;
dci.pQueueCreateInfos = &qci;
dci.enabledExtensionCount =
static_cast<uint32_t>(std::size(kRequiredDeviceExtensions));
dci.ppEnabledExtensionNames = kRequiredDeviceExtensions;
if (vkCreateDevice(m_physicalDevice, &dci, nullptr, &m_device) != VK_SUCCESS) {
std::fprintf(stderr, "[vulkan] vkCreateDevice failed\n");
return false;
}
vkGetDeviceQueue(m_device, m_queueFamilyIndex, 0, &m_queue);
VkPhysicalDeviceProperties props;
vkGetPhysicalDeviceProperties(m_physicalDevice, &props);
std::fprintf(stderr,
"[vulkan] device ready: %s (Vulkan %u.%u.%u, qfi=%u)\n",
props.deviceName,
VK_API_VERSION_MAJOR(props.apiVersion),
VK_API_VERSION_MINOR(props.apiVersion),
VK_API_VERSION_PATCH(props.apiVersion),
m_queueFamilyIndex);
return true;
}
Host::~Host() {
if (m_device != VK_NULL_HANDLE) vkDestroyDevice(m_device, nullptr);
if (m_instance != VK_NULL_HANDLE) vkDestroyInstance(m_instance, nullptr);
}
ghostty_platform_vulkan_s Host::asPlatform(PresentSink *sink) const {
ghostty_platform_vulkan_s p{};
p.userdata = sink;
p.get_instance_proc_addr = cbGetInstanceProcAddr;
p.instance = cbInstance;
p.physical_device = cbPhysicalDevice;
p.device = cbDevice;
p.queue = cbQueue;
p.queue_family_index = cbQueueFamilyIndex;
p.get_supported_modifiers = cbGetSupportedModifiers;
p.present = cbPresent;
return p;
}
Host *Host::instance() {
static std::once_flag once;
static std::unique_ptr<Host> host;
std::call_once(once, []() {
auto candidate = std::unique_ptr<Host>(new Host());
if (candidate->init()) {
host = std::move(candidate);
}
// candidate's destructor runs on init failure and cleans up
// any partial state.
});
// The dmabuf modifier registry priming used to happen here too,
// inside this `call_once`. It moved out to `GhosttySurface`'s
// ctor: registry priming is a Wayland-protocol concern, not a
// Vulkan one, and `Host::instance()` is logically about Vulkan
// setup. Co-locating both in one trampoline coupled `Host` to a
// wayland-side concern that doesn't need it.
return host.get();
}
} // namespace vulkan

97
qt/src/vulkan/Host.h Normal file
View File

@ -0,0 +1,97 @@
// Vulkan host setup for the Ghastty Qt frontend.
//
// libghostty (when built with `-Drenderer=vulkan`) doesn't create
// its own VkInstance / VkDevice — the host does, then hands the
// handles down via the `ghostty_platform_vulkan_s` callback struct
// declared in `include/ghostty.h`. This class is the Qt-side owner
// of those handles.
//
// The host is process-singleton (one Vulkan instance + device shared
// across every `GhosttySurface`), constructed lazily on first use
// via `instance()`. Requires a physical device that supports
// VK_KHR_external_memory_fd, VK_EXT_external_memory_dma_buf, and
// VK_EXT_image_drm_format_modifier — all three are needed for the
// dmabuf-as-importable-image export path libghostty's Vulkan
// renderer uses to hand frames back to the host.
//
// The compositor dmabuf modifier registry that this host's
// `get_supported_modifiers` callback reads is primed elsewhere
// (in `GhosttySurface`'s ctor on the GUI thread, via
// `wayland::primeDmabufModifierRegistry` from
// `qt/src/wayland/DmabufRegistry.h`). That priming is a Wayland
// concern and used to leak into `Host::instance`'s `call_once` —
// which made `Host` (a Vulkan object) responsible for a
// Wayland-protocol concern it doesn't otherwise touch.
#pragma once
#include <cstdint>
#include <memory>
#include <vulkan/vulkan.h>
#include "ghostty.h"
namespace vulkan {
/// Receiver for a presented dmabuf-backed frame. Implemented by
/// `GhosttySurface`; abstract so `vulkan::Host` doesn't need to
/// know about the widget type. Replaces an earlier cross-TU
/// forward declaration of a free function `presentToGhosttySurface`
/// that coupled `Host.cpp` directly to `GhosttySurface.cpp`.
class PresentSink {
public:
virtual ~PresentSink() = default;
/// Hand off a rendered frame. Called on the libghostty renderer
/// thread; the implementation is responsible for marshalling to
/// whatever thread it composites on. The fd is borrowed for the
/// duration of the call — implementations that need to retain
/// it must `dup()`.
virtual void presentDmabuf(int dmabuf_fd, std::uint32_t drm_format,
std::uint64_t drm_modifier,
std::uint32_t width, std::uint32_t height,
std::uint32_t stride, bool image_backed) = 0;
};
/// Process-wide Vulkan setup. One per Ghastty process; threadsafe
/// to call `instance()` from anywhere (constructs once via
/// std::call_once on first access).
class Host {
public:
/// Return the process-wide host, or nullptr if Vulkan can't be
/// brought up on this system. Cached after the first call so
/// repeated lookups are cheap.
static Host *instance();
/// Build a `ghostty_platform_vulkan_s` callback struct whose
/// `present` callback delivers frames to `sink`. `sink` must
/// outlive the lifetime of any libghostty surface that was
/// configured with the returned platform struct. Other callbacks
/// (handle lookups, modifier registry) ignore `sink` and route
/// through the process singleton.
ghostty_platform_vulkan_s asPlatform(PresentSink *sink) const;
VkInstance vkInstance() const { return m_instance; }
VkPhysicalDevice vkPhysicalDevice() const { return m_physicalDevice; }
VkDevice vkDevice() const { return m_device; }
VkQueue vkQueue() const { return m_queue; }
uint32_t vkQueueFamilyIndex() const { return m_queueFamilyIndex; }
~Host();
// No copy/move — singleton.
Host(const Host &) = delete;
Host &operator=(const Host &) = delete;
private:
Host() = default;
bool init();
VkInstance m_instance = VK_NULL_HANDLE;
VkPhysicalDevice m_physicalDevice = VK_NULL_HANDLE;
VkDevice m_device = VK_NULL_HANDLE;
VkQueue m_queue = VK_NULL_HANDLE;
uint32_t m_queueFamilyIndex = 0;
};
} // namespace vulkan

View File

@ -0,0 +1,193 @@
#include "AlphaModifier.h"
#include <algorithm>
#include <cstdint>
#include <cstring>
#include <mutex>
#include <unordered_map>
#include <QGuiApplication>
#include <QWindow>
#include <qpa/qplatformnativeinterface.h>
#include <wayland-client.h>
#include "alpha-modifier-v1-client-protocol.h"
namespace wayland {
namespace {
// Process-wide binding. Lazily initialised on first supported()/
// setOpacity() call, then read lock-free via the atomic-by-fence
// guarantee of `std::call_once`. Once bound it lives for the
// process lifetime — there's no clean teardown path on Wayland
// global teardown that would matter for a manager-style global.
struct GlobalState {
wl_display *display = nullptr;
wp_alpha_modifier_v1 *manager = nullptr; // null if compositor lacks it
bool ready = false; // call_once fired (success or failure)
};
GlobalState &globalState() {
static GlobalState g;
return g;
}
// Listener: discover wp_alpha_modifier_v1 in the registry. The
// scoped wl_event_queue we use here is destroyed before the
// listener data goes out of scope, so the registry's child
// proxies (none survive past this binding pass) are safe.
void onRegistryGlobal(void *data, wl_registry *registry, uint32_t name,
const char *interface, uint32_t /*version*/) {
auto *g = static_cast<GlobalState *>(data);
if (std::strcmp(interface, wp_alpha_modifier_v1_interface.name) != 0)
return;
// Version 1 is the only version of this staging protocol so far.
g->manager = static_cast<wp_alpha_modifier_v1 *>(
wl_registry_bind(registry, name, &wp_alpha_modifier_v1_interface, 1));
}
void onRegistryGlobalRemove(void *, wl_registry *, uint32_t) {}
const wl_registry_listener kRegistryListener = {
&onRegistryGlobal,
&onRegistryGlobalRemove,
};
// Bind the manager global lazily on first use. Idempotent under
// std::call_once. Mirrors the private-queue pattern in
// XkbTracker — and like that, we migrate the bound proxy onto
// the default queue before destroying the private queue, so
// future calls (set_multiplier, get_surface) dispatch on Qt's
// event loop instead of a dangling queue.
void initOnce() {
static std::once_flag once;
std::call_once(once, []() {
auto &g = globalState();
QPlatformNativeInterface *native =
QGuiApplication::platformNativeInterface();
if (!native) {
g.ready = true;
return;
}
g.display = static_cast<wl_display *>(
native->nativeResourceForIntegration("wl_display"));
if (!g.display) {
g.ready = true;
return;
}
wl_event_queue *queue = wl_display_create_queue(g.display);
wl_registry *registry = wl_display_get_registry(g.display);
wl_proxy_set_queue(reinterpret_cast<wl_proxy *>(registry), queue);
wl_registry_add_listener(registry, &kRegistryListener, &g);
wl_display_roundtrip_queue(g.display, queue);
wl_registry_destroy(registry);
// Migrate the manager onto the default queue BEFORE destroying
// the private one — otherwise compositor-side messages for the
// manager (none expected for this protocol, but cleanliness
// matters and Qt's event queue is the dispatch target we want
// anyway) would target a destroyed queue, the same footgun that
// produced the exit-time SIGSEGV in XkbTracker.
if (g.manager) {
wl_proxy_set_queue(reinterpret_cast<wl_proxy *>(g.manager), nullptr);
}
wl_event_queue_destroy(queue);
g.ready = true;
});
}
// Per-wl_surface alpha modifier object cache. Cached so animation
// ticks don't re-roundtrip get_surface every frame.
//
// Keyed by wl_surface* — that's stable for the wl_surface's
// lifetime, and we explicitly drop on detach(). If a QWindow is
// destroyed without detach() being called the wl_surface gets
// destroyed by Qt; the cached wp_alpha_modifier_surface_v1 would
// then be invalid on next get_surface, so callers MUST detach()
// from the QWindow's destruction path. Map access is from the
// GUI thread only.
struct Cache {
std::unordered_map<wl_surface *, wp_alpha_modifier_surface_v1 *> entries;
};
Cache &cache() {
static Cache c;
return c;
}
wl_surface *surfaceFor(QWindow *window) {
if (!window) return nullptr;
QPlatformNativeInterface *native =
QGuiApplication::platformNativeInterface();
if (!native) return nullptr;
return static_cast<wl_surface *>(
native->nativeResourceForWindow("surface", window));
}
wp_alpha_modifier_surface_v1 *getOrCreate(wl_surface *surface) {
auto &c = cache();
auto it = c.entries.find(surface);
if (it != c.entries.end()) return it->second;
auto *manager = globalState().manager;
if (!manager) return nullptr;
auto *obj = wp_alpha_modifier_v1_get_surface(manager, surface);
if (!obj) return nullptr;
c.entries.emplace(surface, obj);
return obj;
}
} // namespace
bool AlphaModifier::supported() {
initOnce();
return globalState().manager != nullptr;
}
bool AlphaModifier::setOpacity(QWindow *window, double opacity) {
initOnce();
auto &g = globalState();
if (!g.manager) return false;
wl_surface *surface = surfaceFor(window);
if (!surface) return false;
auto *mod = getOrCreate(surface);
if (!mod) return false;
// Convert [0.0, 1.0] → [0, UINT32_MAX]. Clamp first; lround
// gives the closest integer, matching what users expect at the
// endpoints (1.0 → fully opaque, 0.0 → fully transparent) without
// off-by-one rounding drift at intermediate values.
const double clamped = std::clamp(opacity, 0.0, 1.0);
const uint32_t factor = static_cast<uint32_t>(
std::lround(clamped * static_cast<double>(UINT32_MAX)));
wp_alpha_modifier_surface_v1_set_multiplier(mod, factor);
// Alpha multiplier is double-buffered on the wl_surface; the
// change applies on the next wl_surface.commit. Commit here so
// the caller doesn't need to know about Wayland's double-buffer
// semantics. For Qt-managed top-level windows we don't have a
// clean Qt API to force a parent commit, so we wl_surface.commit
// the surface directly — same trick used elsewhere in this code
// for subsurface state changes.
wl_surface_commit(surface);
// And flush so the commit reaches the compositor immediately
// rather than sitting in libwayland-client's send buffer until
// Qt's next event-loop iteration. Otherwise rapid animation
// ticks would coalesce into one frame at the end of the tick
// cycle, defeating the smooth fade.
wl_display_flush(g.display);
return true;
}
void AlphaModifier::detach(QWindow *window) {
wl_surface *surface = surfaceFor(window);
if (!surface) return;
auto &c = cache();
auto it = c.entries.find(surface);
if (it == c.entries.end()) return;
wp_alpha_modifier_surface_v1_destroy(it->second);
c.entries.erase(it);
}
} // namespace wayland

View File

@ -0,0 +1,51 @@
// Per-window alpha multiplier via wp_alpha_modifier_v1.
//
// QtWayland's QPA plugin doesn't implement QWindow::setOpacity (it
// logs "This plugin does not support setting window opacity" on
// every call). For the QuickTerminal fade-in/out we need real
// per-surface alpha, so we drive the wp_alpha_modifier_v1 staging
// Wayland protocol ourselves.
//
// Compositor support (as of 2026-05): KWin (KDE 6+), wlroots
// (≥0.17), Hyprland — yes. mutter/GNOME — no. If the protocol
// isn't advertised, `setOpacity` returns false and the caller can
// either skip the animation or fall back to instant show/hide.
//
// Wayland-only by project decision (see feedback-qt-no-x11 memory).
#pragma once
struct wp_alpha_modifier_v1;
struct wp_alpha_modifier_surface_v1;
class QWindow;
namespace wayland {
class AlphaModifier {
public:
// Returns true if the compositor advertises wp_alpha_modifier_v1
// and we've successfully bound it. Cheap after the first call
// (the binding is cached process-wide). Use this to decide
// whether to drive an opacity animation or fall through to
// instant show/hide.
static bool supported();
// Set the window's alpha multiplier in [0.0, 1.0]. Must be
// called on the GUI thread (the thread that owns wl_display
// dispatch). Returns false if `window`'s native wl_surface
// isn't available yet (e.g. before first show), or if the
// compositor doesn't support the protocol.
//
// The wp_alpha_modifier_surface_v1 object is created lazily per
// wl_surface and cached for the surface's lifetime — repeated
// calls during an animation just emit set_multiplier + commit.
static bool setOpacity(QWindow *window, double opacity);
// Release the per-surface alpha modifier object for this window.
// Call when the window is being destroyed (or before re-creating
// its native surface). Equivalent to set_multiplier(UINT32_MAX)
// followed by destroy on the surface object.
static void detach(QWindow *window);
};
} // namespace wayland

View File

@ -0,0 +1,55 @@
// Compositor dmabuf modifier registry.
//
// Process-wide read-only table of `(drm_format, [modifier])` pairs the
// compositor advertises via `zwp_linux_dmabuf_v1`. libghostty's Vulkan
// renderer queries this through the
// `ghostty_platform_vulkan_s.get_supported_modifiers` callback when
// picking a modifier the compositor will accept on attach — without
// that intersection, drivers that don't expose `COLOR_ATTACHMENT_BIT`
// for `LINEAR` (NVIDIA) can't get into Target's direct-export mode at
// all and have to fall back to the legacy CPU-readback path.
//
// Why a header of its own instead of living on
// `wayland::SubsurfacePresenter`? The presenter is per-widget; the
// registry is process-wide and read-only after a one-shot prime. They
// share `globalState()` machinery internally
// (`SubsurfacePresenter.cpp`) but their public surfaces are unrelated
// concerns.
//
// Wayland-only by project decision (the Qt frontend is Wayland-only;
// see `feedback-qt-no-x11` memory). On non-Wayland QPA both functions
// are no-ops — `primeDmabufModifierRegistry` returns immediately and
// `supportedDmabufModifiers` returns 0 — so callers can stay
// runtime-agnostic.
#pragma once
#include <cstddef>
#include <cstdint>
namespace wayland {
// Eagerly discover the compositor's dmabuf modifier list on the
// CALLING THREAD. MUST be called from the GUI thread before any
// `supportedDmabufModifiers` reader runs (typically the libghostty
// renderer thread). Safe to call multiple times — discovery happens
// exactly once via the underlying `globalState`'s latched `searched`
// flag.
//
// Idempotent no-op if the QPA isn't Wayland or the
// QPlatformNativeInterface lookup fails.
void primeDmabufModifierRegistry();
// Read the cached compositor-supported DRM modifiers for the given
// DRM_FORMAT_* fourcc. Returns the number of modifiers actually
// written to `out` (capped at `capacity`). Pass `out=nullptr,
// capacity=0` to query the total count.
//
// Thread-safe for readers once `primeDmabufModifierRegistry` has
// returned. Returns 0 if the registry hasn't been primed yet or the
// format isn't advertised.
std::size_t supportedDmabufModifiers(std::uint32_t drm_format,
std::uint64_t *out,
std::size_t capacity);
} // namespace wayland

View File

@ -0,0 +1,785 @@
#include "SubsurfacePresenter.h"
#include "DmabufRegistry.h"
#include <algorithm>
#include <climits>
#include <cstdint>
#include <cstdio>
#include <cstring>
#include <sys/stat.h> // ::fstat — wl_buffer cache identity via st_ino
#include <unordered_map>
#include <vector>
#include <QGuiApplication>
#include <QLatin1String>
#include <QWindow>
#include <qpa/qplatformnativeinterface.h>
#include <wayland-client.h>
#include "fractional-scale-v1-client-protocol.h"
#include "linux-dmabuf-v1-client-protocol.h"
#include "viewporter-client-protocol.h"
namespace wayland {
namespace {
// Process-wide bindings for the Wayland globals the presenter needs,
// plus the (format → modifiers) table the compositor advertises via
// zwp_linux_dmabuf_v1's format/modifier events. Populated once by
// `discoverGlobals` on the GUI thread; subsequent reads from the
// renderer thread are safe because the table is never mutated after
// the initial discovery completes.
struct PresenterGlobals {
wl_compositor *compositor = nullptr;
wl_subcompositor *subcompositor = nullptr;
zwp_linux_dmabuf_v1 *dmabuf = nullptr;
wp_viewporter *viewporter = nullptr;
wp_fractional_scale_manager_v1 *fractionalScale = nullptr;
std::unordered_map<uint32_t, std::vector<uint64_t>> modifiers;
bool searched = false;
};
PresenterGlobals &globalState() {
static PresenterGlobals g;
return g;
}
// Pre-v4 dmabuf format event. We ignore it: v3 also fires `modifier`
// events for every (format, modifier) tuple including LINEAR — the
// `format` event is legacy from v1/v2 when modifiers didn't exist.
void dmabufFormat(void *, zwp_linux_dmabuf_v1 *, uint32_t /*format*/) {}
// `modifier` event: compositor advertises one (format, modifier) it
// can scan out. Fires once per pair during the bind roundtrip; we
// stash them all in the per-format vector. Only fires from inside
// `discoverGlobals` because we keep the dmabuf proxy on a private
// queue that's never dispatched after discovery — see the queue-
// retention comment in `discoverGlobals`. That guarantee is what
// lets the renderer thread read `globals.modifiers` without a
// lock, and is also why we don't bother deduping (one bind round
// only fires each pair once).
void dmabufModifier(void *data, zwp_linux_dmabuf_v1 *, uint32_t format,
uint32_t modifier_hi, uint32_t modifier_lo) {
auto *g = static_cast<PresenterGlobals *>(data);
const uint64_t modifier =
(static_cast<uint64_t>(modifier_hi) << 32) | modifier_lo;
g->modifiers[format].push_back(modifier);
}
const zwp_linux_dmabuf_v1_listener kDmabufListener = {
dmabufFormat,
dmabufModifier,
};
void registryGlobal(void *data, wl_registry *registry, uint32_t name,
const char *interface, uint32_t version) {
auto *g = static_cast<PresenterGlobals *>(data);
if (std::strcmp(interface, wl_compositor_interface.name) == 0) {
// Bind wl_compositor at version 3+ so child wl_surfaces we
// create support `set_buffer_scale` (added in v3, used by the
// presenter on HiDPI displays). Cap at v6 (the highest we've
// tested against); if the compositor advertises less, take
// what we get and `presentDmabuf` will skip the buffer_scale
// call on those compositors.
const uint32_t v = std::min<uint32_t>(version, 6u);
g->compositor = static_cast<wl_compositor *>(
wl_registry_bind(registry, name, &wl_compositor_interface, v));
} else if (std::strcmp(interface, wl_subcompositor_interface.name) == 0) {
g->subcompositor = static_cast<wl_subcompositor *>(
wl_registry_bind(registry, name, &wl_subcompositor_interface, 1));
} else if (std::strcmp(interface, zwp_linux_dmabuf_v1_interface.name) == 0) {
// We want at least v3 for `create_immed` (synchronous wl_buffer
// creation — v1/v2 have only the async `create` + `created`/
// `failed` dance). A compositor that only advertises v1/v2
// can't satisfy our protocol assumptions; binding at v3 against
// such a compositor would protocol-error and tear down the
// entire wl_display. Skip the bind in that case so the
// legacy QImage fallback engages cleanly.
if (version < 3) {
std::fprintf(stderr,
"[ghastty] wayland: linux-dmabuf-v1 advertised at "
"version %u; need >= 3 for create_immed, falling back "
"to QImage path\n",
version);
} else {
// Cap at v3 — v4 adds the dynamic format/modifier feedback
// dance which we don't consume.
const uint32_t v = std::min<uint32_t>(version, 3u);
g->dmabuf = static_cast<zwp_linux_dmabuf_v1 *>(wl_registry_bind(
registry, name, &zwp_linux_dmabuf_v1_interface, v));
// Add the listener immediately so the modifier events queued
// by the bind get delivered when the dispatch loop continues.
zwp_linux_dmabuf_v1_add_listener(g->dmabuf, &kDmabufListener, g);
}
} else if (std::strcmp(interface, wp_viewporter_interface.name) == 0) {
g->viewporter = static_cast<wp_viewporter *>(
wl_registry_bind(registry, name, &wp_viewporter_interface, 1));
} else if (std::strcmp(
interface, wp_fractional_scale_manager_v1_interface.name) == 0) {
g->fractionalScale = static_cast<wp_fractional_scale_manager_v1 *>(
wl_registry_bind(registry, name,
&wp_fractional_scale_manager_v1_interface, 1));
}
}
void registryGlobalRemove(void *, wl_registry *, uint32_t) {}
const wl_registry_listener kRegistryListener = {
registryGlobal,
registryGlobalRemove,
};
PresenterGlobals *discoverGlobals(wl_display *display) {
PresenterGlobals &globals = globalState();
if (globals.searched) return &globals;
globals.searched = true;
wl_event_queue *queue = wl_display_create_queue(display);
wl_registry *registry = wl_display_get_registry(display);
wl_proxy_set_queue(reinterpret_cast<wl_proxy *>(registry), queue);
wl_registry_add_listener(registry, &kRegistryListener, &globals);
// Roundtrip 1: bind compositor/subcompositor/dmabuf. Inside the
// registry callback we attach the dmabuf listener immediately, so
// any format/modifier events that arrive in the same dispatch
// pass fire on it. A negative return means the wl_display
// disconnected mid-startup; subsequent tryCreate calls fall
// through to the QImage path (g->compositor etc. stay null).
if (wl_display_roundtrip_queue(display, queue) < 0) {
std::fprintf(stderr,
"[ghastty] wayland: discoverGlobals roundtrip 1 failed; "
"subsurface present path disabled\n");
}
wl_registry_destroy(registry);
// Roundtrip 2: belt-and-suspenders for any compositor that defers
// the modifier events past the bind reply (most don't, but some
// batch them). After this returns the modifier table is fully
// populated and frozen for the process lifetime.
if (globals.dmabuf && wl_display_roundtrip_queue(display, queue) < 0) {
std::fprintf(stderr,
"[ghastty] wayland: discoverGlobals roundtrip 2 failed; "
"modifier table is incomplete — disabling dmabuf path\n");
// Drop whatever modifier entries we did get. A partially-
// populated table is dangerous: presentDmabuf would treat it
// as authoritative, hand a "supported" modifier to the
// compositor that the compositor may actually not accept, and
// the resulting `invalid_format` is a FATAL protocol error
// that kills the entire wl_display. Falling back to QImage
// path (modifiers map empty → tryCreate's checks fail / the
// Vulkan renderer drops to legacy_copy mode) is much safer.
globals.modifiers.clear();
globals.dmabuf = nullptr;
}
std::size_t total_mods = 0;
for (const auto &kv : globals.modifiers) total_mods += kv.second.size();
std::fprintf(stderr,
"[ghastty] wayland: discovered %zu dmabuf (format,modifier) "
"pairs across %zu formats\n",
total_mods, globals.modifiers.size());
// Move the bound proxies back to the default queue so Qt's main
// dispatch drives subsequent events on them, then drop the private
// queue. (Same lifecycle dance as `blurManager`.)
//
// EXCEPT the dmabuf proxy: its listener mutates `globals.modifiers`
// on every `modifier` event, and the renderer thread reads that
// map from `supportedDmabufModifiers` without locking. If we
// moved the proxy back to the default queue, a compositor
// restart / hot-plug fires more `modifier` events that would
// race the reader. Keep the proxy on `queue` and intentionally
// never dispatch that queue again — the events queue up
// harmlessly and are reaped at proxy destruction. The map is
// genuinely frozen post-discovery now.
if (globals.compositor)
wl_proxy_set_queue(reinterpret_cast<wl_proxy *>(globals.compositor),
nullptr);
if (globals.subcompositor)
wl_proxy_set_queue(reinterpret_cast<wl_proxy *>(globals.subcompositor),
nullptr);
if (globals.viewporter)
wl_proxy_set_queue(reinterpret_cast<wl_proxy *>(globals.viewporter),
nullptr);
if (globals.fractionalScale)
wl_proxy_set_queue(reinterpret_cast<wl_proxy *>(globals.fractionalScale),
nullptr);
// We deliberately leak `queue` (and leave globals.dmabuf attached
// to it) for the process lifetime — it has no resources beyond a
// small kernel-side buffer and going away would put dmabuf events
// back on the default queue.
return &globals;
}
wl_display *acquireWaylandDisplay() {
if (!QGuiApplication::platformName().startsWith(QLatin1String("wayland")))
return nullptr;
QPlatformNativeInterface *native = QGuiApplication::platformNativeInterface();
if (!native) return nullptr;
return static_cast<wl_display *>(
native->nativeResourceForIntegration("wl_display"));
}
// wl_buffer::release listener: the compositor is done sampling the
// buffer for any committed surface state. We KEEP the wl_buffer
// alive across releases — libghostty re-uses the same dmabuf fd
// across frames until resize, so we re-attach the cached wl_buffer
// on every present (see `m_cachedBuffer` in the header). The buffer
// is destroyed only when (a) the dmabuf shape changes (next
// `presentDmabuf` invalidates the cache) or (b) the presenter is
// destroyed.
//
// The underlying dmabuf memory is owned by libghostty; we never
// close that fd here (the SCM_RIGHTS transfer in
// zwp_linux_buffer_params.add gave the compositor its own
// reference, which lives independently of our wl_buffer).
void bufferRelease(void *, wl_buffer *) {
// No-op. See cache rationale above.
}
const wl_buffer_listener kBufferListener = {
bufferRelease,
};
// wl_callback::done listener for compositor-paced presents. Single-
// shot per callback — the proxy is destroyed here and the
// presenter's m_frameCallback field is cleared so the next present
// knows to register a fresh one. After cleanup, invoke the
// presenter's onFrameReady hook (set by GhosttySurface to pump the
// next pending frame).
void frameCallbackDone(void *data, wl_callback *cb, uint32_t /*time*/) {
auto *p = static_cast<wayland::SubsurfacePresenter *>(data);
// Defensive: if the listener fires after the proxy was destroyed
// by ~SubsurfacePresenter (Wayland guarantees no events on a
// destroyed proxy, so this shouldn't happen, but if a future
// refactor destroys the presenter before flushing the queue we'd
// rather no-op than UAF).
if (!p) {
wl_callback_destroy(cb);
return;
}
p->onFrameCallbackDone(cb);
}
const wl_callback_listener kFrameCallbackListener = {
frameCallbackDone,
};
} // namespace
void primeDmabufModifierRegistry() {
if (wl_display *display = acquireWaylandDisplay()) {
(void)discoverGlobals(display);
}
}
std::size_t supportedDmabufModifiers(std::uint32_t drm_format,
std::uint64_t *out,
std::size_t capacity) {
const PresenterGlobals &g = globalState();
if (!g.searched) return 0;
auto it = g.modifiers.find(drm_format);
if (it == g.modifiers.end()) return 0;
const std::size_t available = it->second.size();
if (out == nullptr || capacity == 0) return available;
const std::size_t copied = std::min(available, capacity);
std::memcpy(out, it->second.data(), copied * sizeof(std::uint64_t));
return copied;
}
std::unique_ptr<SubsurfacePresenter>
SubsurfacePresenter::tryCreate(QWindow *topLevel) {
if (!topLevel) return nullptr;
if (!QGuiApplication::platformName().startsWith(QLatin1String("wayland"))) {
std::fprintf(stderr,
"[ghastty] SubsurfacePresenter: not on Wayland QPA\n");
return nullptr;
}
QPlatformNativeInterface *native = QGuiApplication::platformNativeInterface();
if (!native) return nullptr;
auto *display = static_cast<wl_display *>(
native->nativeResourceForIntegration("wl_display"));
auto *parentSurface = static_cast<wl_surface *>(
native->nativeResourceForWindow("surface", topLevel));
if (!display || !parentSurface) {
std::fprintf(stderr,
"[ghastty] SubsurfacePresenter: missing wl_display or "
"parent wl_surface (display=%p surface=%p)\n",
static_cast<void *>(display),
static_cast<void *>(parentSurface));
return nullptr;
}
PresenterGlobals *g = discoverGlobals(display);
if (!g->compositor || !g->subcompositor || !g->dmabuf || !g->viewporter) {
std::fprintf(
stderr,
"[ghastty] SubsurfacePresenter: compositor missing required globals "
"(compositor=%p subcompositor=%p dmabuf=%p viewporter=%p)\n",
static_cast<void *>(g->compositor),
static_cast<void *>(g->subcompositor), static_cast<void *>(g->dmabuf),
static_cast<void *>(g->viewporter));
return nullptr;
}
// wp_fractional_scale_manager_v1 is optional — if missing we
// assume integer scale 1.0 and let wp_viewport.set_destination
// still do its job. Most modern compositors support it.
wl_surface *child = wl_compositor_create_surface(g->compositor);
if (!child) return nullptr;
wl_subsurface *sub =
wl_subcompositor_get_subsurface(g->subcompositor, child, parentSurface);
if (!sub) {
wl_surface_destroy(child);
return nullptr;
}
// Sync mode (the wl_subsurface default): wl_surface.commit on
// the child caches state until the parent commits, at which point
// both apply atomically. This is what guarantees lockstep resize
// behavior — parent grows to the new size and our matching
// new-size buffer apply in the same compositor frame, no gap.
//
// Sync mode requires the parent to commit for our state to
// apply. Qt's backing-store flush doesn't fire for our
// translucent QWidget (paintEvent produces no damage), so
// GhosttySurface forces the parent commit explicitly via
// QtWaylandClient::QWaylandWindow::commit() (Qt6::WaylandClient-
// Private) after every child commit + viewport update. See
// `forceParentCommit` in GhosttySurface.cpp.
//
// The earlier desync-mode attempt avoided the Qt-private
// dependency but couldn't deliver lockstep resize because the
// two surfaces commit independently in that mode.
// Initial subsurface position: (0,0) in parent-surface coords.
// GhosttySurface immediately calls setPosition after tryCreate
// returns with the pane's real offset within the top-level (and
// updates it on every moveEvent / resizeEvent).
wl_subsurface_set_position(sub, 0, 0);
// Stack the subsurface BELOW the parent so Qt's child widgets
// (SearchBar, overlays, scrollbar, exit/health/link/resize hints)
// remain visible — they're painted into the parent's backing
// store, and Wayland's default subsurface stacking is *above*
// parent which would hide all of them. With place_below the
// parent QWidget renders on top; WA_TranslucentBackground means
// the terminal area of the parent is transparent so the
// subsurface shows through, while the chrome painted by
// paintEvent stays visible on top.
wl_subsurface_place_below(sub, parentSurface);
// Set an empty input region so pointer/touch events fall through
// to the parent surface (Qt's QWindow). The default input region
// is the whole attached buffer, which would mean our subsurface
// captures every click in the terminal area — Qt's QWidget would
// never see contextMenuEvent (right-click menu), mouse press/
// release, or any other pointer event in the terminal. wl_region
// with no add_rectangle calls = empty = "no input." The region
// can be destroyed immediately after set_input_region; the
// compositor copies its state into the surface's pending state.
wl_region *empty = wl_compositor_create_region(g->compositor);
if (empty) {
wl_surface_set_input_region(child, empty);
wl_region_destroy(empty);
}
// wp_viewport: per-surface object that lets us tell the compositor
// the destination size in surface-local coords, independent of
// the buffer's pixel dimensions. With fractional scaling we
// render at, say, 960x720 device pixels into an 800x600 surface
// area, and the viewport handles the mapping.
wp_viewport *viewport =
wp_viewporter_get_viewport(g->viewporter, child);
if (!viewport) {
wl_subsurface_destroy(sub);
wl_surface_destroy(child);
return nullptr;
}
// wp_fractional_scale_v1: subscribe to the compositor's
// per-surface preferred scale. Optional — if the global is
// missing we stick with default 120 (= 1.0×).
wp_fractional_scale_v1 *frac_scale = nullptr;
if (g->fractionalScale) {
frac_scale = wp_fractional_scale_manager_v1_get_fractional_scale(
g->fractionalScale, child);
}
wl_display_flush(display);
if (int err = wl_display_get_error(display); err != 0) {
std::fprintf(stderr,
"[ghastty] SubsurfacePresenter: wl_display error %d after "
"subsurface creation\n",
err);
if (frac_scale) wp_fractional_scale_v1_destroy(frac_scale);
wp_viewport_destroy(viewport);
wl_subsurface_destroy(sub);
wl_surface_destroy(child);
return nullptr;
}
std::fprintf(stderr,
"[ghastty] SubsurfacePresenter: ready (parent=%p child=%p "
"sub=%p dmabuf=%p viewport=%p frac_scale=%p)\n",
static_cast<void *>(parentSurface), static_cast<void *>(child),
static_cast<void *>(sub), static_cast<void *>(g->dmabuf),
static_cast<void *>(viewport),
static_cast<void *>(frac_scale));
return std::unique_ptr<SubsurfacePresenter>(new SubsurfacePresenter(
display, child, sub, g->dmabuf, viewport, frac_scale));
}
const wp_fractional_scale_v1_listener kFractionalScaleListener = {
SubsurfacePresenter::onPreferredScale,
};
void SubsurfacePresenter::onPreferredScale(void *data,
wp_fractional_scale_v1 *,
uint32_t scale) {
auto *self = static_cast<SubsurfacePresenter *>(data);
if (scale == 0) return; // guard against compositor bugs
if (scale != self->m_preferredScale120) {
std::fprintf(stderr,
"[ghastty] SubsurfacePresenter: preferred scale %u/120 = "
"%.3f\n",
scale, static_cast<double>(scale) / 120.0);
self->m_preferredScale120 = scale;
}
}
SubsurfacePresenter::SubsurfacePresenter(wl_display *display, wl_surface *child,
wl_subsurface *sub,
zwp_linux_dmabuf_v1 *dmabuf,
wp_viewport *viewport,
wp_fractional_scale_v1 *frac_scale)
: m_display(display),
m_childSurface(child),
m_subsurface(sub),
m_dmabuf(dmabuf),
m_viewport(viewport),
m_fractionalScale(frac_scale) {
if (m_fractionalScale) {
wp_fractional_scale_v1_add_listener(m_fractionalScale,
&kFractionalScaleListener, this);
}
}
SubsurfacePresenter::~SubsurfacePresenter() {
// Destroy the pending frame callback first: subsequent dispatches
// of the wl_event_queue won't deliver its done event (Wayland
// guarantees no events on a destroyed proxy), so the dangling
// `this` pointer in the listener data can't fire.
if (m_frameCallback) {
wl_callback_destroy(m_frameCallback);
m_frameCallback = nullptr;
}
// Destroy the cached wl_buffer BEFORE the child surface — the
// buffer may still be attached. wl_buffer_destroy is safe whether
// or not the compositor has released it (Wayland guarantees no
// further events on a destroyed proxy).
if (m_cachedBuffer) {
wl_buffer_destroy(m_cachedBuffer);
m_cachedBuffer = nullptr;
}
if (m_fractionalScale) wp_fractional_scale_v1_destroy(m_fractionalScale);
if (m_viewport) wp_viewport_destroy(m_viewport);
if (m_subsurface) wl_subsurface_destroy(m_subsurface);
if (m_childSurface) wl_surface_destroy(m_childSurface);
if (m_display) wl_display_flush(m_display);
}
void SubsurfacePresenter::onFrameCallbackDone(wl_callback *cb) {
// The single-shot wl_callback is now spent. Destroy the proxy and
// clear our slot so the next present registers a fresh callback.
// Guard against the rare cb-mismatch case (shouldn't happen — the
// listener data routes to exactly this presenter and we only ever
// have one outstanding callback — but be defensive against future
// refactors).
if (cb == m_frameCallback) m_frameCallback = nullptr;
wl_callback_destroy(cb);
// Notify the consumer (e.g. GhosttySurface) that the compositor
// is ready for the next frame. The callback runs on the same
// thread that pumps Wayland events (the Qt GUI thread), so it can
// touch GUI-thread state directly.
if (m_onFrameReady) m_onFrameReady();
}
void SubsurfacePresenter::presentDmabuf(int fd, uint32_t drm_format,
uint64_t drm_modifier, uint32_t width,
uint32_t height, uint32_t stride,
int dest_width, int dest_height,
bool y_invert) {
if (fd < 0 || !m_dmabuf || !m_childSurface || !m_viewport) return;
if (dest_width <= 0) dest_width = 1;
if (dest_height <= 0) dest_height = 1;
// System-boundary input validation. width/height/stride flow in
// from libghostty's renderer thread and are about to be passed
// verbatim to the compositor. linux-dmabuf-v1 protocol errors
// (`invalid_dimensions`, `invalid_format`, etc.) are FATAL — they
// tear down the entire wl_display, killing every window in the
// process. We MUST reject malformed inputs locally rather than
// letting the compositor do it.
//
// Specifically reject: zero dimensions or stride, or any value
// that would silently flip negative when cast to int32_t at the
// create_immed call below (the wayland C API takes signed ints
// for dimensions; uint32_t >= 2^31 wraps to negative).
constexpr uint32_t kMaxDim = static_cast<uint32_t>(INT32_MAX);
if (width == 0 || height == 0 || stride == 0 ||
width > kMaxDim || height > kMaxDim || stride > kMaxDim) {
std::fprintf(stderr,
"[ghastty] SubsurfacePresenter: rejecting dmabuf with "
"out-of-range dimensions (w=%u h=%u stride=%u)\n",
width, height, stride);
return;
}
// Stride sanity: must be at least 4 bytes per pixel for
// 32-bit ARGB/XRGB/etc. — the only formats this presenter
// currently advertises support for. Tighter than the protocol's
// minimum but matches what the compositor will accept on attach.
if (stride < static_cast<uint64_t>(width) * 4) {
std::fprintf(stderr,
"[ghastty] SubsurfacePresenter: rejecting dmabuf with "
"stride=%u too small for width=%u (need >= %llu)\n",
stride, width,
static_cast<unsigned long long>(static_cast<uint64_t>(width) * 4));
return;
}
// Validate the (format, modifier) pair against the compositor's
// advertised list before handing it to `create_immed`. If the
// pair isn't on the list, the compositor will reject the
// subsequent `create_immed` with `invalid_format` — a FATAL
// protocol error that kills the entire wl_display, taking down
// every window in the process. Better to drop this single frame
// than to take down the app.
{
const PresenterGlobals &g = globalState();
const auto it = g.modifiers.find(drm_format);
bool ok = false;
if (it != g.modifiers.end()) {
for (const uint64_t m : it->second) {
if (m == drm_modifier) { ok = true; break; }
}
}
if (!ok) {
std::fprintf(stderr,
"[ghastty] SubsurfacePresenter: refusing dmabuf "
"(fourcc=0x%08x mod=0x%llx) — compositor doesn't "
"advertise this (format, modifier) pair\n",
drm_format,
static_cast<unsigned long long>(drm_modifier));
return;
}
}
// Wrap libghostty's borrowed fd in a wl_buffer. Cached across
// frames by (kernel inode, shape) — see m_cachedInode in the
// header for the full rationale. fstat the dmabuf fd to get the
// anon_inode that uniquely identifies the dma-buf object; it's
// stable across the dup that GhosttySurface did before parking,
// and changes only when libghostty allocates a new Target.
// fstat failure (rare; would indicate a closed fd, which we
// already check above via `fd < 0`) falls through to cache miss
// → create_immed will likely fail too, but the error path there
// already logs cleanly.
struct stat st;
unsigned long inode = 0;
if (::fstat(fd, &st) == 0) inode = static_cast<unsigned long>(st.st_ino);
const bool cache_hit = m_cachedBuffer != nullptr &&
inode != 0 &&
m_cachedInode == inode &&
m_cachedWidth == width &&
m_cachedHeight == height &&
m_cachedStride == stride &&
m_cachedFormat == drm_format &&
m_cachedModifier == drm_modifier &&
m_cachedYInvert == y_invert;
wl_buffer *buffer = nullptr;
if (cache_hit) {
buffer = m_cachedBuffer;
} else {
// Cache miss — destroy any stale buffer first so a failed
// create_immed below leaves the cache empty (rather than half-
// populated with the previous buffer that no longer matches the
// new inputs).
if (m_cachedBuffer) {
wl_buffer_destroy(m_cachedBuffer);
m_cachedBuffer = nullptr;
m_cachedInode = 0;
}
zwp_linux_buffer_params_v1 *params =
zwp_linux_dmabuf_v1_create_params(m_dmabuf);
if (!params) return;
zwp_linux_buffer_params_v1_add(params, fd, /*plane_idx*/ 0,
/*offset*/ 0, stride,
static_cast<uint32_t>(drm_modifier >> 32),
static_cast<uint32_t>(drm_modifier & 0xFFFFFFFFu));
const uint32_t buffer_flags =
y_invert ? ZWP_LINUX_BUFFER_PARAMS_V1_FLAGS_Y_INVERT : 0;
buffer = zwp_linux_buffer_params_v1_create_immed(
params, static_cast<int32_t>(width), static_cast<int32_t>(height),
drm_format, buffer_flags);
zwp_linux_buffer_params_v1_destroy(params);
if (!buffer) {
// Surface the wl_display error code if the failure was a
// protocol-fatal error (compositor rejected the buffer with
// `invalid_format` / `invalid_dimensions` / etc., which kills
// the wl_display). Without this, every subsequent presentDmabuf
// call silently no-ops on the dead display and the cause stays
// hidden until something else logs the disconnection.
const int wl_err = wl_display_get_error(m_display);
std::fprintf(stderr,
"[ghastty] SubsurfacePresenter: create_immed returned null "
"(fd=%d %ux%u fmt=0x%x mod=0x%llx wl_display_error=%d)\n",
fd, width, height, drm_format,
static_cast<unsigned long long>(drm_modifier), wl_err);
return;
}
// Listener data is unused — see `bufferRelease` for why this is
// nullptr (and the no-op release semantics that make the cache
// safe).
wl_buffer_add_listener(buffer, &kBufferListener, nullptr);
m_cachedBuffer = buffer;
m_cachedInode = inode;
m_cachedWidth = width;
m_cachedHeight = height;
m_cachedStride = stride;
m_cachedFormat = drm_format;
m_cachedModifier = drm_modifier;
m_cachedYInvert = y_invert;
}
// Tell the compositor the destination size in surface-local
// coordinates. With fractional scaling this is the logical pixel
// size (e.g. 800x600) while the buffer is at device pixels (e.g.
// 960x720 for 1.2× DPR). wp_viewport handles the mapping;
// wl_surface.set_buffer_scale is intentionally NOT used here
// because (a) it only supports integer scales, and (b) when
// wp_fractional_scale_v1 is active the protocol forbids using
// set_buffer_scale to anything other than 1.
if (dest_width != m_lastDestWidth || dest_height != m_lastDestHeight) {
wp_viewport_set_destination(m_viewport, dest_width, dest_height);
m_lastDestWidth = dest_width;
m_lastDestHeight = dest_height;
}
wl_surface_attach(m_childSurface, buffer, 0, 0);
// Damage the full buffer extent — terminals tend to update large
// dirty rects anyway (cursor blink, scroll, repaint) so a precise
// damage region wouldn't save much, and `damage_buffer` (vs
// `damage`) uses buffer coordinates so it's resolution-correct.
wl_surface_damage_buffer(m_childSurface, 0, 0, static_cast<int32_t>(width),
static_cast<int32_t>(height));
// Register a wl_surface.frame callback BEFORE the commit so the
// compositor knows we want to be paced. Only request a new one if
// none is outstanding — re-requesting before the prior fires would
// leak callbacks. The done handler clears m_frameCallback, so the
// next call here will register fresh.
if (!m_frameCallback) {
m_frameCallback = wl_surface_frame(m_childSurface);
if (m_frameCallback) {
wl_callback_add_listener(m_frameCallback, &kFrameCallbackListener,
this);
}
}
wl_surface_commit(m_childSurface);
wl_display_flush(m_display);
if (int err = wl_display_get_error(m_display); err != 0) {
std::fprintf(
stderr,
"[ghastty] SubsurfacePresenter: wl_display error %d after present\n",
err);
}
}
void SubsurfacePresenter::resizeDestination(int dest_width, int dest_height) {
if (!m_viewport || !m_childSurface) return;
if (dest_width <= 0 || dest_height <= 0) return;
if (dest_width == m_lastDestWidth && dest_height == m_lastDestHeight) return;
// Update destination + commit child WITHOUT attaching a new buffer.
// In desync mode the commit applies immediately and the compositor
// stretches the currently-attached buffer to the new dest extent.
// The next presentDmabuf will overwrite this with a properly-sized
// buffer, but until then the subsurface fills the new area instead
// of leaving a transparent gap during the parent's resize commit.
wp_viewport_set_destination(m_viewport, dest_width, dest_height);
m_lastDestWidth = dest_width;
m_lastDestHeight = dest_height;
wl_surface_commit(m_childSurface);
wl_display_flush(m_display);
}
void SubsurfacePresenter::setPosition(int x, int y) {
if (!m_subsurface) return;
if (x == m_lastX && y == m_lastY) return;
wl_subsurface_set_position(m_subsurface, x, y);
m_lastX = x;
m_lastY = y;
// Position is double-buffered on the parent surface — the caller
// must trigger a parent commit (forceParentCommit on the GhosttySurface
// side) for the change to land. We flush so the request is on the
// wire when that happens.
wl_display_flush(m_display);
}
void SubsurfacePresenter::hide() {
if (!m_childSurface) return;
// Attach NULL = no buffer. After commit + parent commit, the
// subsurface contributes nothing to the compositor's frame.
// Caller is responsible for forceParentCommit on its side.
wl_surface_attach(m_childSurface, nullptr, 0, 0);
wl_surface_commit(m_childSurface);
wl_display_flush(m_display);
}
void SubsurfacePresenter::flushDisplay() {
if (m_display) wl_display_flush(m_display);
}
bool SubsurfacePresenter::reattachCached() {
if (!m_childSurface || !m_cachedBuffer) return false;
// Re-show whatever we had attached before `hide()`. The cached
// wl_buffer survives across hide/show because the release
// listener no-ops (see `bufferRelease`). The dmabuf backing the
// buffer is still alive — libghostty owns the underlying
// VkDeviceMemory until the next Target.deinit (resize), and
// dma-buf kernel ref-counting keeps the pages pinned regardless
// of our client-side state.
//
// The content may be one frame stale (whatever was rendered just
// before Hide), but that's better than a transparent gap while
// the renderer thread spins up its first new frame after Show —
// the parent surface has WA_TranslucentBackground, so without a
// re-attach the user sees through to whatever is behind the
// window. The renderer's next frame overwrites this within
// DRAW_INTERVAL.
wl_surface_attach(m_childSurface, m_cachedBuffer, 0, 0);
wl_surface_damage_buffer(m_childSurface, 0, 0,
static_cast<int32_t>(m_cachedWidth),
static_cast<int32_t>(m_cachedHeight));
// Register a frame callback so the consumer's pacing state machine
// gets a "compositor is ready" event after this re-attach too —
// otherwise a tab switch could leave m_compositorReady stuck false
// (a stale frame callback from the pre-Hide commit may have been
// discarded by the compositor on the NULL attach).
if (!m_frameCallback) {
m_frameCallback = wl_surface_frame(m_childSurface);
if (m_frameCallback) {
wl_callback_add_listener(m_frameCallback, &kFrameCallbackListener,
this);
}
}
wl_surface_commit(m_childSurface);
wl_display_flush(m_display);
return true;
}
} // namespace wayland

View File

@ -0,0 +1,240 @@
// Wayland subsurface presenter for `GhosttySurface`.
//
// Owns one `wl_subsurface` parented to the `GhosttySurface`'s native
// `wl_surface`, plus the `zwp_linux_dmabuf_v1` machinery for wrapping
// libghostty's dmabuf fds in `wl_buffer`s and attaching them to that
// subsurface. The compositor scans the buffers out directly — no
// mmap, no memcpy, no QImage, no QPainter blit on the present path.
//
// The process-wide compositor modifier registry that used to share
// this header now lives in `DmabufRegistry.h`. The implementations
// share `globalState()` machinery in `SubsurfacePresenter.cpp` but
// the API surfaces are disjoint: presenter is per-widget, registry
// is process-wide and read-only.
//
// Wayland-only by project decision (the Qt frontend is Wayland-only;
// see `feedback-qt-no-x11` memory). If the host isn't on a Wayland
// QPA platform or the compositor lacks the required globals,
// `tryCreate` returns nullptr — the caller decides whether that's a
// fatal error.
#pragma once
#include <cstddef>
#include <cstdint>
#include <functional>
#include <memory>
struct wl_buffer;
struct wl_callback;
struct wl_display;
struct wl_subsurface;
struct wl_surface;
struct zwp_linux_dmabuf_v1;
struct wp_viewport;
struct wp_fractional_scale_v1;
class QWindow;
namespace wayland {
class SubsurfacePresenter {
public:
// Build a subsurface parented to `topLevel`'s native `wl_surface`,
// and bind the linux-dmabuf-v1 global on the same display. Pass
// the TOP-LEVEL QWindow (e.g. `widget->window()->windowHandle()`)
// — NOT a per-widget native QWindow. We attach all panes/splits
// as siblings under the top-level surface and position each with
// `setPosition`, instead of giving each pane its own QWindow
// (which Qt's QSplitter-embedded child widgets don't handle
// cleanly: "QWidgetWindow must be a top level window" warning,
// and the result renders black).
//
// Returns nullptr if any prerequisite is missing (non-Wayland QPA,
// null `wl_display`, `wl_subcompositor` unbindable,
// `zwp_linux_dmabuf_v1` unbindable, etc.).
static std::unique_ptr<SubsurfacePresenter> tryCreate(QWindow *topLevel);
~SubsurfacePresenter();
// Hand a dmabuf-backed frame to the compositor: wrap the fd in a
// `wl_buffer` via `zwp_linux_buffer_params_v1.create_immed`, attach
// to the subsurface, damage, commit. MUST be called on the Qt GUI
// thread (the thread that owns the wl_display dispatch); the
// renderer thread should marshal frames through a Qt-side queue.
//
// libghostty owns the fd; this method does not close it. The
// wayland client library duplicates the fd kernel-side via
// SCM_RIGHTS, so the compositor's reference survives even after
// libghostty reuses or closes its handle.
//
// `dest_width` / `dest_height` are the size of the subsurface in
// PARENT surface-local coordinates (i.e. logical pixels). For
// integer scales they match the buffer dimensions divided by the
// scale; for fractional scales they're independent (set via
// wp_viewport.set_destination, which decouples buffer dimensions
// from surface area).
// `y_invert` requests the compositor flip the buffer vertically
// when sampling. The OpenGL renderer's coordinate convention is
// bottom-left origin (Y up), but Wayland/DRM samples top-down —
// without the flag, GL frames render upside-down. Vulkan
// rasterizes Y-down by default and passes false.
void presentDmabuf(int fd, uint32_t drm_format, uint64_t drm_modifier,
uint32_t width, uint32_t height, uint32_t stride,
int dest_width, int dest_height,
bool y_invert = false);
// Compositor-preferred fractional scale for this surface, in
// units of 1/120 (e.g. 144 = 1.2, 180 = 1.5, 240 = 2.0). Returns
// 120 (= 1.0) until the compositor sends its first
// wp_fractional_scale_v1.preferred_scale event for our surface.
//
// Currently INFORMATIONAL only: GhosttySurface uses Qt's
// devicePixelRatioF() for buffer sizing (which Qt derives from
// the same protocol on Wayland), so the two values agree at
// steady state. Exposed for diagnostics + a future direct-
// protocol path that bypasses Qt's DPR cache lag during a
// screen-change race.
uint32_t preferredScale120() const { return m_preferredScale120; }
// Stretch the existing subsurface buffer to a new destination
// size WITHOUT attaching a new buffer. Used at the *start* of a
// resize, before the renderer has produced a new-size frame:
// wp_viewport.set_destination is double-buffered on the child
// surface, so committing the child here in desync mode applies
// the new destination immediately and the compositor stretches
// the old buffer to fill it. Result: the parent surface can grow
// to its new size with the subsurface already covering the new
// area (briefly stretched), instead of leaving a one-frame
// transparent gap where the translucent parent shows through.
//
// The next presentDmabuf call (with the real new-size buffer)
// replaces the stretched content, ending the brief blur.
//
// Same pattern mpv's vo_dmabuf_wayland uses for its video
// subsurface during resize.
void resizeDestination(int dest_width, int dest_height);
// Update the subsurface position in parent-surface-local coords.
// For panes inside splits / tabs, position is the GhosttySurface
// widget's offset within the top-level (`mapTo(window(),
// QPoint(0,0))`). wl_subsurface.set_position is double-buffered
// on the *parent* surface — caller must trigger a parent commit
// (Qt's QtWaylandClient::QWaylandWindow::commit()) for the new
// position to apply. No-op if the position hasn't changed.
void setPosition(int x, int y);
// Detach the currently-attached buffer so the subsurface becomes
// invisible. Called when the owning GhosttySurface hides (tab
// switch) so the inactive pane's pixels don't ghost on top of
// whatever the active tab is showing in the same on-screen
// region. The next presentDmabuf call re-attaches a buffer and
// the subsurface becomes visible again.
void hide();
// Register a callback fired (on the GUI thread, via Wayland event
// queue dispatch) when the compositor signals it's ready for the
// next frame on this subsurface. Lets the caller pace presents at
// the compositor's refresh rate instead of unconditionally
// committing every renderer frame.
//
// The callback fires AT MOST ONCE per `presentDmabuf` /
// `reattachCached` call — the underlying `wl_surface.frame`
// request is single-shot per commit. After the callback fires,
// the next present's commit will register a new frame_callback.
using OnFrameReady = std::function<void()>;
void setOnFrameReady(OnFrameReady cb) { m_onFrameReady = std::move(cb); }
// Flush the underlying wl_display to push any queued requests
// to the compositor. Useful after a forceParentCommit on the
// Qt side (which queues a parent wl_surface.commit but doesn't
// wl_display_flush), so the combined "child commit + parent
// commit" reach the compositor in one shot rather than racing
// Qt's next event-loop flush.
void flushDisplay();
// Re-attach + commit the most recently cached wl_buffer, if any.
// Called from `QEvent::Show` so a tab-switch / re-show sees the
// last frame immediately rather than a transparent area while
// the renderer thread spins up its first new frame. Without this,
// the parent surface paints through (WA_TranslucentBackground)
// and the user sees a flash of whatever is behind the window.
// Returns true if a cached buffer was actually re-attached;
// false if the cache was empty (first show — caller is
// responsible for the new-tab flash mitigation if needed).
bool reattachCached();
// Called from the wp_fractional_scale_v1.preferred_scale event.
// Public so the C-style listener struct at file scope in the .cpp
// can name it; not part of the API for other call sites.
static void onPreferredScale(void *data, wp_fractional_scale_v1 *,
uint32_t scale);
// wl_callback::done dispatch from the file-scope listener. Public
// for the same reason as onPreferredScale: C-style Wayland
// listeners need a static-callable entry point and we route the
// result back into the owning presenter via the listener's `data`
// pointer. Destroys the callback proxy, clears m_frameCallback,
// and invokes m_onFrameReady if set. Not part of the API for
// other call sites.
void onFrameCallbackDone(wl_callback *cb);
SubsurfacePresenter(const SubsurfacePresenter &) = delete;
SubsurfacePresenter &operator=(const SubsurfacePresenter &) = delete;
private:
SubsurfacePresenter(wl_display *display, wl_surface *child,
wl_subsurface *sub, zwp_linux_dmabuf_v1 *dmabuf,
wp_viewport *viewport,
wp_fractional_scale_v1 *frac_scale);
wl_display *m_display;
wl_surface *m_childSurface;
wl_subsurface *m_subsurface;
zwp_linux_dmabuf_v1 *m_dmabuf;
wp_viewport *m_viewport;
wp_fractional_scale_v1 *m_fractionalScale;
uint32_t m_preferredScale120 = 120; // default: 1.0×
int m_lastDestWidth = 0;
int m_lastDestHeight = 0;
int m_lastX = 0;
int m_lastY = 0;
// Pending wl_surface.frame callback for compositor-paced presents.
// Null between frame_done and the next presentDmabuf commit. Non-
// null between presentDmabuf and frame_done. Single-shot — the
// done handler destroys it and clears the field, then invokes
// `m_onFrameReady` if set.
wl_callback *m_frameCallback = nullptr;
OnFrameReady m_onFrameReady;
// wl_buffer cache keyed by dma-buf identity (kernel inode of the
// anon_inode backing the dma-buf, which is unique per Target
// regardless of fd-number reuse) plus the layout-relevant shape.
// libghostty re-uses the same dmabuf across frames until the
// next Target.deinit (resize); cache hits skip the create_immed
// round-trip + compositor-side dmabuf import that dominated
// GUI-thread CPU at 125 FPS.
//
// We can't key on the caller's fd value because GhosttySurface
// now dups the fd on the renderer thread (to outlive libghostty's
// close — see 22713b0d3) so the value is fresh per frame. Inode
// identity is stable across our dup AND across libghostty's
// close → reopen cycles, so cache invalidation matches Target
// identity exactly: same Target → same inode → cache hit; new
// Target → new inode → cache miss → recreate.
//
// Cache only stores the wl_buffer; the compositor SCM_RIGHTS-
// dup'd the fd into its own address space at create_immed time,
// so the cached wl_buffer doesn't need our fd to outlive the
// call. The caller owns + closes its own dup.
wl_buffer *m_cachedBuffer = nullptr;
unsigned long m_cachedInode = 0; // 0 = empty cache (anon_inode ino > 0)
uint32_t m_cachedWidth = 0;
uint32_t m_cachedHeight = 0;
uint32_t m_cachedStride = 0;
uint32_t m_cachedFormat = 0;
uint64_t m_cachedModifier = 0;
bool m_cachedYInvert = false;
};
} // namespace wayland

View File

@ -353,6 +353,7 @@ pub const Platform = union(PlatformTag) {
macos: MacOS,
ios: IOS,
opengl: OpenGL,
vulkan: Vulkan,
// If our build target for libghostty is not darwin then we do
// not include macos support at all.
@ -395,6 +396,70 @@ pub const Platform = union(PlatformTag) {
present: *const fn (?*anyopaque) callconv(.c) void,
};
/// Configuration for a host that owns a Vulkan device libghostty
/// should render against (fork-only). The host owns the
/// VkInstance / VkPhysicalDevice / VkDevice / VkQueue same
/// ownership model as `OpenGL` above. Frames are handed back to
/// the host as dmabuf file descriptors so the host can sample
/// them without a CPU readback.
///
/// Handles are `?*anyopaque` here so callers don't need Vulkan
/// headers to compile against the C API; treat them as VkInstance,
/// VkPhysicalDevice, VkDevice, VkQueue respectively.
pub const Vulkan = struct {
userdata: ?*anyopaque,
/// Resolve `vkGetInstanceProcAddr` (returned as `?*anyopaque`).
/// libghostty bootstraps the rest of the Vulkan loader from it.
get_instance_proc_addr: *const fn (
?*anyopaque,
[*:0]const u8,
) callconv(.c) ?*anyopaque,
/// Host-owned Vulkan handles. libghostty does not destroy
/// these.
instance: *const fn (?*anyopaque) callconv(.c) ?*anyopaque,
physical_device: *const fn (?*anyopaque) callconv(.c) ?*anyopaque,
device: *const fn (?*anyopaque) callconv(.c) ?*anyopaque,
queue: *const fn (?*anyopaque) callconv(.c) ?*anyopaque,
queue_family_index: *const fn (?*anyopaque) callconv(.c) u32,
/// Query the compositor-supported DRM modifiers for a given
/// DRM_FORMAT_* fourcc. Two-pass usage: call with
/// `out=null, capacity=0` for the count, then again with a
/// buffer of that size. Returns the number of modifiers
/// actually written. The renderer intersects this with the
/// GPU's per-modifier feature set to pick a tiling the
/// compositor will accept on attach.
get_supported_modifiers: *const fn (
?*anyopaque,
u32, // DRM_FORMAT_*
?[*]u64, // out
usize, // capacity
) callconv(.c) usize,
/// Hand off a rendered frame to the host as a dmabuf fd. The
/// host imports it for composition; libghostty retains
/// ownership of the underlying VkDeviceMemory and the fd is
/// valid only for the duration of the call (host must `dup()`
/// if it needs to hold the fd longer). `image_backed` tells
/// the host whether the fd was exported from a VkImage
/// (directly importable as a 2D image via linux-dmabuf-v1)
/// or from a VkBuffer (only usable via mmap + CPU readback);
/// see `vulkan/Target.zig` and `include/ghostty.h` for the
/// full rationale.
present: *const fn (
?*anyopaque,
i32, // dmabuf fd
u32, // DRM_FORMAT_*
u64, // DRM modifier
u32, // width (pixels)
u32, // height (pixels)
u32, // stride (bytes)
bool, // image_backed
) callconv(.c) void,
};
// The C ABI compatible version of this union. The tag is expected
// to be stored elsewhere.
pub const C = extern union {
@ -416,6 +481,35 @@ pub const Platform = union(PlatformTag) {
release_current: ?*const fn (?*anyopaque) callconv(.c) void,
present: ?*const fn (?*anyopaque) callconv(.c) void,
},
vulkan: extern struct {
userdata: ?*anyopaque,
get_instance_proc_addr: ?*const fn (
?*anyopaque,
[*:0]const u8,
) callconv(.c) ?*anyopaque,
instance: ?*const fn (?*anyopaque) callconv(.c) ?*anyopaque,
physical_device: ?*const fn (?*anyopaque) callconv(.c) ?*anyopaque,
device: ?*const fn (?*anyopaque) callconv(.c) ?*anyopaque,
queue: ?*const fn (?*anyopaque) callconv(.c) ?*anyopaque,
queue_family_index: ?*const fn (?*anyopaque) callconv(.c) u32,
get_supported_modifiers: ?*const fn (
?*anyopaque,
u32,
?[*]u64,
usize,
) callconv(.c) usize,
present: ?*const fn (
?*anyopaque,
i32,
u32,
u64,
u32,
u32,
u32,
bool,
) callconv(.c) void,
},
};
/// Initialize a Platform a tag and configuration from the C ABI.
@ -450,6 +544,47 @@ pub const Platform = union(PlatformTag) {
break :opengl error.PresentMustBeSet,
} };
},
.vulkan => vulkan: {
const config = c_platform.vulkan;
// Collapse the eight per-callback "MustBeSet"
// variants into a single `error.MissingVulkanCallback`.
// Pre-this, every caller of `Platform.init` had to
// handle 8 separate error tags (or `try` swallow
// them) eight names that all mean "the host
// didn't fill out one of these fields." Log which
// one was null for diagnostics; the error tag
// itself stays narrow.
const which: ?[]const u8 = blk: {
if (config.get_instance_proc_addr == null) break :blk "get_instance_proc_addr";
if (config.instance == null) break :blk "instance";
if (config.physical_device == null) break :blk "physical_device";
if (config.device == null) break :blk "device";
if (config.queue == null) break :blk "queue";
if (config.queue_family_index == null) break :blk "queue_family_index";
if (config.get_supported_modifiers == null) break :blk "get_supported_modifiers";
if (config.present == null) break :blk "present";
break :blk null;
};
if (which) |name| {
std.log.scoped(.embedded).err(
"ghostty_platform_vulkan_s.{s} is null",
.{name},
);
break :vulkan error.MissingVulkanCallback;
}
break :vulkan .{ .vulkan = .{
.userdata = config.userdata,
.get_instance_proc_addr = config.get_instance_proc_addr.?,
.instance = config.instance.?,
.physical_device = config.physical_device.?,
.device = config.device.?,
.queue = config.queue.?,
.queue_family_index = config.queue_family_index.?,
.get_supported_modifiers = config.get_supported_modifiers.?,
.present = config.present.?,
} };
},
};
}
};
@ -461,6 +596,8 @@ pub const PlatformTag = enum(c_int) {
macos = 1,
ios = 2,
opengl = 3,
// Fork-only platform tag for hosts that drive `src/renderer/Vulkan.zig`.
vulkan = 4,
};
pub const EnvVar = extern struct {
@ -538,6 +675,25 @@ pub const Surface = struct {
.x = @floatCast(opts.scale_factor),
.y = @floatCast(opts.scale_factor),
},
// Initial surface size. Must be large enough for the
// terminal to have at least a few cols/rows by default,
// because the shell process is forked as part of
// Surface.init and the PTY's winsize is whatever this
// size translates to. Tools like fastfetch query winsize
// (TIOCGWINSZ) on startup and lay out their kitty-image
// escape codes based on what they see; if winsize reports
// 0 cols × 0 rows, fastfetch sends the image with c=0
// r=0, and `Placement.pixelSize` (graphics_storage.zig)
// returns the image's NATIVE pixel dimensions visible
// to the user as a giant Kusanagi (or whatever logo)
// filling the whole pane. 800×600 was the historic
// default; restoring it. Race against a real wrong-size
// first frame coinciding with the widget's device-pixel
// size at a fractional DPR is handled separately by the
// host apprt sending its real size as early as possible
// (Qt: immediate ghostty_surface_set_size right after
// ghostty_surface_new, inheriting the parent surface's
// size for new tabs).
.size = .{ .width = 800, .height = 600 },
.cursor_pos = .{ .x = -1, .y = -1 },
};

View File

@ -688,6 +688,14 @@ pub const ExeEntrypoint = enum {
webgen_config,
webgen_actions,
webgen_commands,
/// Build-time tool: compiles one of the renderer's built-in
/// GLSL shaders to SPIR-V and writes the bytes to stdout.
/// Invoked by `src/build/VulkanSpv.zig` once per (shader, stage)
/// pair so libghostty can `@embedFile` the resulting .spv
/// instead of running glslang at runtime eliminates the
/// per-process TPoolAllocator high-water-mark leak (~10 MB)
/// that the Vulkan path otherwise pays on first surface init.
vulkan_spvgen,
};
/// The release channel for the build.

View File

@ -8,6 +8,7 @@ const HelpStrings = @import("HelpStrings.zig");
const MetallibStep = @import("MetallibStep.zig");
const UnicodeTables = @import("UnicodeTables.zig");
const GhosttyFrameData = @import("GhosttyFrameData.zig");
const VulkanSpv = @import("VulkanSpv.zig");
const DistResource = @import("GhosttyDist.zig").Resource;
config: *const Config,
@ -18,6 +19,9 @@ metallib: ?*MetallibStep,
unicode_tables: UnicodeTables,
framedata: GhosttyFrameData,
uucode_tables: std.Build.LazyPath,
/// Vulkan-only: build-time SPIR-V blobs for the renderer's
/// built-in shaders. Null on non-Vulkan builds.
vulkan_spv: ?VulkanSpv,
/// Used to keep track of a list of file sources.
pub const LazyPathList = std.ArrayList(std.Build.LazyPath);
@ -37,6 +41,15 @@ pub fn init(b: *std.Build, cfg: *const Config) !SharedDeps {
.unicode_tables = try .init(b, uucode_tables),
.framedata = try .init(b),
.uucode_tables = uucode_tables,
// Vulkan-only build artifact: precompiled SPV blobs for
// the renderer's built-in shaders. Skipping the build
// step entirely on non-Vulkan builds avoids paying for
// a host-target glslang link the OpenGL/Metal renderers
// would never use.
.vulkan_spv = if (cfg.renderer == .vulkan)
try VulkanSpv.init(b, cfg)
else
null,
// Setup by retarget
.options = undefined,
@ -452,6 +465,14 @@ pub fn add(
if (b.lazyDependency("opengl", .{})) |dep| {
step.root_module.addImport("opengl", dep.module("opengl"));
}
// The Vulkan binding is only loaded when the renderer is .vulkan
// (still in development see `src/renderer/Vulkan.zig`). Linking
// libvulkan happens further down in `linkSystemDeps`.
if (self.config.renderer == .vulkan) {
if (b.lazyDependency("vulkan", .{})) |dep| {
step.root_module.addImport("vulkan", dep.module("vulkan"));
}
}
if (b.lazyDependency("vaxis", .{})) |dep| {
step.root_module.addImport("vaxis", dep.module("vaxis"));
}
@ -600,6 +621,15 @@ pub fn add(
});
}
// Link the system Vulkan loader for the Vulkan renderer. The
// bindings themselves are in `pkg/vulkan` (added above as a Zig
// module). On Linux this resolves to libvulkan.so via the standard
// dynamic linker; Vulkan headers (`vulkan/vulkan.h`) come from the
// standard system include path (`vulkan-headers` package).
if (self.config.renderer == .vulkan) {
step.linkSystemLibrary2("vulkan", dynamic_link_opts);
}
// If we're building an exe then we have additional dependencies.
if (step.kind != .lib) {
// When we're targeting flatpak we ALWAYS link GTK so we
@ -615,6 +645,7 @@ pub fn add(
self.help_strings.addImport(step);
self.unicode_tables.addImport(step);
self.framedata.addImport(step);
if (self.vulkan_spv) |*v| v.addImport(step);
return static_libs;
}

167
src/build/VulkanSpv.zig Normal file
View File

@ -0,0 +1,167 @@
//! Build-time SPV precompile for the renderer's 9 built-in
//! shaders. Builds a host-target executable from
//! `src/vulkan_spvgen.zig` that takes (shader_name, stage) on
//! argv and emits SPIR-V bytes on stdout, then runs it 9 times
//! at build time and generates a `vulkan_spv.zig` module that
//! exposes the resulting blobs as `pub const X: []const u8 =
//! @embedFile("X.spv");` decls.
//!
//! Why: see `src/vulkan_spvgen.zig` for the leak/perf rationale.
//! Pre-compiling built-ins at build time lets the runtime call
//! `Module.initFromSpirv` instead of `Module.init`, skipping
//! glslang entirely on the per-process first-surface init that
//! otherwise hits glslang's TLS TPoolAllocator and leaves
//! ~10 MB of un-releasable pool pages.
//!
//! Mirrors `HelpStrings.zig`'s structure. Conditional: only
//! constructed when the build is targeting the Vulkan renderer
//! (caller gates this).
const VulkanSpv = @This();
const std = @import("std");
const Config = @import("Config.zig");
/// The (name, stage) tuples of the renderer's 9 built-in shaders.
/// Keep in sync with the decls of `renderer.vulkan.shaders.source`
/// and the corresponding `Module.init` call sites in
/// `renderer/vulkan/shaders.zig::Shaders.init`.
const Shader = struct { name: []const u8, stage: []const u8 };
const shaders = [_]Shader{
.{ .name = "bg_color_frag", .stage = "fragment" },
.{ .name = "bg_image_frag", .stage = "fragment" },
.{ .name = "bg_image_vert", .stage = "vertex" },
.{ .name = "cell_bg_frag", .stage = "fragment" },
.{ .name = "cell_text_frag", .stage = "fragment" },
.{ .name = "cell_text_vert", .stage = "vertex" },
.{ .name = "full_screen_vert", .stage = "vertex" },
.{ .name = "image_frag", .stage = "fragment" },
.{ .name = "image_vert", .stage = "vertex" },
};
/// Host-target executable; built once, run 9 times.
exe: *std.Build.Step.Compile,
/// LazyPath to the generated `vulkan_spv.zig` module.
output: std.Build.LazyPath,
pub fn init(b: *std.Build, cfg: *const Config) !VulkanSpv {
const exe = b.addExecutable(.{
.name = "vulkan_spvgen",
.root_module = b.createModule(.{
// Through main.zig so the exe_entrypoint switch
// resolves to vulkan_spvgen.zig. Matches the helpgen
// pattern (also root_source_file=main.zig + the
// entrypoint enum picks the actual main).
.root_source_file = b.path("src/main.zig"),
.target = b.graph.host,
// ReleaseFast is required: Debug mode produces
// R_X86_64_PC64 relocations when linking glslang's
// large static library that Zig's bundled linker
// can't handle. Release mode uses the small code
// model + system linker.
.optimize = .ReleaseFast,
.strip = false,
.omit_frame_pointer = false,
.unwind_tables = .sync,
}),
});
// Pin the entrypoint via build_options.
const spv_config = config: {
var copy = cfg.*;
copy.exe_entrypoint = .vulkan_spvgen;
break :config copy;
};
const options = b.addOptions();
try spv_config.addOptions(options);
exe.root_module.addOptions("build_options", options);
// Transitive imports the gen tool needs (mirrors what
// SharedDeps adds for the renderer build, but pinned to
// b.graph.host since this exe runs on the build machine).
if (b.lazyDependency("glslang", .{
.target = b.graph.host,
.optimize = .ReleaseFast,
})) |glslang_dep| {
exe.root_module.addImport("glslang", glslang_dep.module("glslang"));
exe.linkLibrary(glslang_dep.artifact("glslang"));
}
// `vulkan` is a header-only Zig module its build.zig only
// calls `b.addModule(...)`, so it doesn't accept target /
// optimize args.
if (b.lazyDependency("vulkan", .{})) |vulkan_dep| {
exe.root_module.addImport("vulkan", vulkan_dep.module("vulkan"));
}
// Run the exe once per shader, capture stdout, drop the
// resulting bytes into a single WriteFiles directory under
// distinct .spv filenames. Also generate a .zig stub that
// @embedFile()s each blob with a typed `[]const u8` decl
// matching the shader name that's what the renderer
// imports as "vulkan_spv".
var wf = b.addWriteFiles();
var module_src: std.ArrayList(u8) = .empty;
defer module_src.deinit(b.allocator);
try module_src.appendSlice(b.allocator,
\\// AUTO-GENERATED by src/build/VulkanSpv.zig — do not edit.
\\// Re-run `zig build -Drenderer=vulkan` after editing any
\\// of the renderer's built-in GLSL shaders.
\\//
\\// Each shader is exposed as `[]const u32` directly. The
\\// underlying storage is a comptime-aligned u8 array
\\// (`align(@alignOf(u32))`) so the bytesAsSlice cast is
\\// safe — the previous `@alignCast` of an unaligned
\\// @embedFile slice was UB and caused subtle SPIR-V
\\// misinterpretation (images rendered at wrong size on
\\// NVIDIA, which accepted the misaligned data and treated
\\// it as a slightly different program). Module.initFromSpirv
\\// takes []const u32 directly so callers can use these
\\// decls without further casts.
\\
\\const std = @import("std");
\\
\\
);
for (shaders) |s| {
const run = b.addRunArtifact(exe);
run.addArgs(&.{ s.name, s.stage });
const captured = run.captureStdOut();
const file_name = b.fmt("{s}.spv", .{s.name});
_ = wf.addCopyFile(captured, file_name);
// Two declarations per shader:
// - `<name>_raw` is the storage: a const array of u8
// aligned to @alignOf(u32) (forces .rodata layout to
// start on a 4-byte boundary, dereferences the
// @embedFile pointer to put bytes inline).
// - `<name>` is the public []const u32 view via
// bytesAsSlice (which asserts the runtime pointer's
// alignment matches the type's required alignment;
// guaranteed by the align() on _raw).
try module_src.writer(b.allocator).print(
\\const {0s}_raw align(@alignOf(u32)) = @embedFile("{1s}").*;
\\pub const {0s}: []const u32 = std.mem.bytesAsSlice(u32, {0s}_raw[0..]);
\\
,
.{ s.name, file_name },
);
}
const output = wf.add(
"vulkan_spv.zig",
try module_src.toOwnedSlice(b.allocator),
);
return .{
.exe = exe,
.output = output,
};
}
/// Attach the generated `vulkan_spv` module to a step that
/// builds libghostty (or anything else that needs the blobs).
pub fn addImport(self: *const VulkanSpv, step: *std.Build.Step.Compile) void {
self.output.addStepDependencies(&step.step);
step.root_module.addAnonymousImport("vulkan_spv", .{
.root_source_file = self.output,
});
}

View File

@ -10,6 +10,7 @@ const entrypoint = switch (build_config.exe_entrypoint) {
.webgen_config => @import("build/webgen/main_config.zig"),
.webgen_actions => @import("build/webgen/main_actions.zig"),
.webgen_commands => @import("build/webgen/main_commands.zig"),
.vulkan_spvgen => @import("vulkan_spvgen.zig"),
};
/// The main entrypoint for the program.

View File

@ -17,6 +17,7 @@ pub const Backend = @import("renderer/backend.zig").Backend;
pub const GenericRenderer = @import("renderer/generic.zig").Renderer;
pub const Metal = @import("renderer/Metal.zig");
pub const OpenGL = @import("renderer/OpenGL.zig");
pub const Vulkan = @import("renderer/Vulkan.zig");
pub const WebGL = @import("renderer/WebGL.zig");
pub const Options = @import("renderer/Options.zig");
pub const Overlay = @import("renderer/Overlay.zig");
@ -39,6 +40,7 @@ pub const Renderer = switch (build_config.renderer) {
.metal => GenericRenderer(Metal),
.opengl => GenericRenderer(OpenGL),
.webgl => WebGL,
.vulkan => GenericRenderer(Vulkan),
};
/// The health status of a renderer. These must be shared across all

View File

@ -100,9 +100,10 @@ pub fn init(alloc: Allocator, opts: rendererpkg.Options) !Metal {
.macos => |v| v.nsview,
.ios => |v| v.uiview,
// The OpenGL platform is only valid with the OpenGL
// renderer; it cannot provide a view for Metal.
.opengl => return error.UnsupportedPlatform,
// The OpenGL / Vulkan platforms are only valid with
// their respective renderers; neither provides a view
// for Metal.
.opengl, .vulkan => return error.UnsupportedPlatform,
},
},
@ -199,12 +200,29 @@ pub fn drawFrameEnd(self: *Metal) void {
pub fn initShaders(
self: *const Metal,
alloc: Allocator,
custom_shaders: []const [:0]const u8,
custom_shaders: []const []const u8,
) !shaders.Shaders {
// `shadertoy.loadFromFiles` returns `[]const []const u8` (a unified
// type so the SPV-target Vulkan path can share the loader); for
// `.msl` the underlying allocation IS null-terminated
// (`shadertoy.mslFromSpv` returns `[:0]const u8` and writes a
// sentinel one past `.len`). Reattach the sentinel for our
// downstream `Shaders.init` which expects `[:0]const u8`.
// Same pattern as `OpenGL.initShaders`.
const z_shaders = try alloc.alloc([:0]const u8, custom_shaders.len);
defer alloc.free(z_shaders);
for (custom_shaders, z_shaders) |bytes, *out| {
// Sentinel guard: `@ptrCast` does NOT verify the sentinel,
// so without this assert a future `loadFromFiles` change
// that forgets the trailing null would surface as an
// OOB read inside the Metal library compile.
std.debug.assert(bytes.len == 0 or bytes.ptr[bytes.len] == 0);
out.* = @ptrCast(bytes);
}
return try shaders.Shaders.init(
alloc,
self.device,
custom_shaders,
z_shaders,
// Using an `*_srgb` pixel format makes Metal gamma encode
// the pixels written to it *after* blending, which means
// we get linear alpha blending rather than gamma-incorrect

View File

@ -27,6 +27,11 @@ pub const custom_shader_target: shadertoy.Target = .glsl;
// The fragCoord for OpenGL shaders is +Y = up.
pub const custom_shader_y_is_down = false;
/// Custom shaders are supported (the renderer ships a working "post"
/// pass that composites `CustomShaderState.back_texture` through the
/// user's shader into `frame.target`).
pub const supports_custom_shaders: bool = true;
/// Because OpenGL's frame completion is always
/// sync, we have no need for multi-buffering.
pub const swap_chain_count = 1;
@ -211,8 +216,9 @@ pub fn surfaceInit(surface: *apprt.Surface) !void {
try prepareContext(&gladHostLoader);
},
// macOS and iOS use the Metal renderer.
.macos, .ios => return error.UnsupportedPlatform,
// macOS and iOS use the Metal renderer; the Vulkan platform
// is only valid with the Vulkan renderer (currently a stub).
.macos, .ios, .vulkan => return error.UnsupportedPlatform,
},
}
@ -295,12 +301,33 @@ pub fn drawFrameEnd(self: *OpenGL) void {
pub fn initShaders(
self: *const OpenGL,
alloc: Allocator,
custom_shaders: []const [:0]const u8,
custom_shaders: []const []const u8,
) !shaders.Shaders {
_ = alloc;
_ = self;
// `shadertoy.loadFromFiles` returns `[]const []const u8` so the
// SPV-target Vulkan path can share the loader, but for `.glsl`
// the underlying allocation IS null-terminated
// (`shadertoy.glslFromSpv` returns `[:0]const u8` and writes a
// sentinel one past `.len`). Reattach the sentinel for our
// downstream `Pipeline.init` calls that expect `[:0]const u8`.
//
// Use the caller-provided `alloc` (matches `Metal.initShaders`)
// this is a transient scratch slice torn down at function
// exit.
const z_shaders = try alloc.alloc([:0]const u8, custom_shaders.len);
defer alloc.free(z_shaders);
for (custom_shaders, z_shaders) |bytes, *out| {
// Defense against a future `loadFromFiles` change that
// forgets to null-terminate: assert the sentinel before we
// pretend the slice is `[:0]const u8`. `@ptrCast` does NOT
// verify the sentinel without this assert, a missing
// terminator surfaces as a downstream OOB read.
std.debug.assert(bytes.len == 0 or bytes.ptr[bytes.len] == 0);
out.* = @ptrCast(bytes);
}
return try shaders.Shaders.init(
self.alloc,
custom_shaders,
alloc,
z_shaders,
);
}

View File

@ -293,6 +293,18 @@ fn setQosClass(self: *const Thread) void {
}
fn syncDrawTimer(self: *Thread) void {
// Hidden surfaces have no business running the animation
// draw timer `drawFrame` would just early-return on the
// `!flags.visible` check and we'd burn 125 wakeups/sec on
// a no-op. With N background tabs each holding an animation
// timer, this dominated CPU on multi-tab sessions. The
// `.visible true` mailbox handler re-runs `syncDrawTimer`
// to re-arm when the tab becomes visible again.
if (!self.flags.visible) {
self.draw_active = false;
return;
}
skip: {
// If our renderer supports animations and has them, then we
// can apply draw timer based on custom shader animation configuration.
@ -360,6 +372,12 @@ fn drainMailbox(self: *Thread) !void {
// Visibility affects our QoS class
self.setQosClass();
// Visibility also gates the animation draw timer
// (see syncDrawTimer): hidden surfaces don't arm
// the 125 FPS timer, visible ones do. Re-run on
// every transition.
self.syncDrawTimer();
// If we became visible then we immediately rebuild cells
// (renderCallback skips updateFrame while invisible) and draw.
if (v) {
@ -623,8 +641,15 @@ fn renderCallback(
) catch |err|
log.warn("error rendering err={}", .{err});
// Draw
t.drawFrame(false);
// Draw. When the animation draw timer is already running
// (custom-shader-animation engaged), it will pick up the
// newly-updated cells at its next DRAW_INTERVAL tick drawing
// here too would double-up frames during animated-shader periods
// and burn host-thread CPU (per-frame Wayland buffer attach +
// commit on the Qt apprt) for no visible benefit. Without the
// timer, wakeup-driven draws are the only way frames reach the
// host, so we always draw in that case.
if (!t.draw_active) t.drawFrame(false);
return .disarm;
}

661
src/renderer/Vulkan.zig Normal file
View File

@ -0,0 +1,661 @@
//! Vulkan graphics API for libghostty's `GenericRenderer`. Active
//! on `-Drenderer=vulkan` builds; the host (e.g. the Qt frontend)
//! supplies a VkInstance / VkDevice / VkQueue via the
//! `ghostty_platform_vulkan_s` C ABI, libghostty drives all
//! pipeline / image / command-buffer work against those handles,
//! and rendered frames go back to the host as dmabuf fds for
//! zero-copy compositing.
//!
//! Per-frame model: fence-paced submit-then-wait (one frame in
//! flight), `Target` is the dmabuf-exportable render image,
//! `Frame.complete` waits on the fence before handing the fd to
//! the platform `present` callback.
//!
//! Submodules pure Vulkan-API wrappers live in `pkg/vulkan/`
//! (mirror of `pkg/opengl/`); renderer-policy modules live alongside
//! this file under `vulkan/`.
//!
//! In `pkg/vulkan/` (re-exported from this file as
//! `Vulkan.{Device,Sampler,CommandPool,DescriptorPool}`):
//! - `Device.zig` host-handle wrapper + dispatch table.
//! - `Sampler.zig` VkSampler.
//! - `CommandPool.zig` VkCommandPool + one-shot helper.
//! - `DescriptorPool.zig` per-frame descriptor pool.
//!
//! In `src/renderer/vulkan/`:
//! - `Texture.zig` VkImage + memory + view + staging upload.
//! - `Target.zig` dmabuf-exportable render target
//! (direct or legacy_copy mode).
//! - `buffer.zig` Buffer(T) host-coherent.
//! - `buffer_pool.zig` cross-frame VkBuffer recycle pool
//! (per-thread pending, shared ready).
//! - `ThreadState.zig` per-renderer-thread frame fence /
//! command buffer / step pool / last-target.
//! - `Pipeline.zig` VkPipeline + layout (dynamic rendering).
//! - `RenderPass.zig` dynamic-rendering pass + step recorder.
//! - `Frame.zig` per-draw context (fence-paced).
//! - `shaders.zig` GLSLSPIR-VVkShaderModule + the
//! OpenGL-GLSL Vulkan-GLSL rewriter.
pub const Vulkan = @This();
const std = @import("std");
const builtin = @import("builtin");
const Allocator = std.mem.Allocator;
const vulkan = @import("vulkan");
const vk = vulkan.c;
const apprt = @import("../apprt.zig");
const configpkg = @import("../config.zig");
const font = @import("../font/main.zig");
const rendererpkg = @import("../renderer.zig");
const shadertoy = @import("shadertoy.zig");
pub const GraphicsAPI = Vulkan;
// Device-dispatch primitives live in `pkg/vulkan/` so they can be
// reused by anything that needs a typed Vulkan binding (mirrors how
// `pkg/opengl/` houses Buffer/Program/Texture/etc.). The renderer
// re-exports them from this top-level so call sites continue to write
// `Vulkan.Device`, `Vulkan.Sampler`, etc.
pub const Device = vulkan.Device;
pub const Sampler = vulkan.Sampler;
pub const CommandPool = vulkan.CommandPool;
pub const DescriptorPool = vulkan.DescriptorPool;
// Renderer-policy primitives stay in `src/renderer/vulkan/` (dmabuf
// export, our pipeline + render-pass wiring, frame fence pacing, the
// GLSLSPIR-V loader).
pub const Texture = @import("vulkan/Texture.zig");
pub const Target = @import("vulkan/Target.zig");
pub const Pipeline = @import("vulkan/Pipeline.zig");
pub const RenderPass = @import("vulkan/RenderPass.zig");
pub const Frame = @import("vulkan/Frame.zig");
pub const shaders = @import("vulkan/shaders.zig");
const bufferpkg = @import("vulkan/buffer.zig");
pub const Buffer = bufferpkg.Buffer;
// ---- comptime contract --------------------------------------------------
/// Custom user shaders compile to SPIR-V directly skip the
/// GLSL SPIR-V GLSL roundtrip that `.glsl` would do. The
/// roundtrip exists for backends that consume GLSL (OpenGL, Metal
/// via MSL), but Vulkan ingests SPIR-V natively and we already have
/// a glslang shim for the renderer's built-in shaders. Bypassing
/// the roundtrip halves the per-shader compile cost AND avoids the
/// spirv-cross-emitted main() losing the upstream `gl_FragCoord.xy`
/// pattern we hook for the Y-flip fix.
pub const custom_shader_target: shadertoy.Target = .spv;
/// Custom shaders ARE now supported on the Vulkan backend.
/// `shaders.Shaders.init` builds one post pipeline per user shader
/// (UBO at set 0 binding 1, iChannel0 sampler at set 1 binding 0,
/// matching `shadertoy_prefix.glsl` after `vulkanizeGlsl` rewrites
/// the layouts). The renderer's post pass at the end of `drawFrame`
/// chains them first pipeline samples `back_texture` and writes
/// `front_texture`, swap, repeat; the last one writes
/// `frame.target` instead.
pub const supports_custom_shaders: bool = true;
/// Vulkan's clip-space Y axis points down (unlike OpenGL).
pub const custom_shader_y_is_down = true;
/// Extra `#define` lines `shadertoy.loadFromFile` injects into the
/// prefix between `#version` and the rest. `GHASTTY_VULKAN`
/// activates the Vulkan-side `gl_FragCoord` flip + `texture()`
/// upper-left wrap so `mainImage` sees shadertoy-convention coords
/// even though Vulkan rasterizes Y-down. OpenGL/MSL backends omit
/// this decl entirely and pass `&.{}` from `generic.zig`.
pub const custom_shader_extra_defines: []const []const u8 = &.{"GHASTTY_VULKAN 1"};
/// GLSL GLSL rewriter `shadertoy.loadFromFile` runs after the
/// prefix splice and before the SPIR-V compile. Plugs the
/// `vulkanizeGlsl` pass that rewrites `layout(binding = N)` into
/// `layout(set = S, binding = N)` so the resulting SPIR-V matches
/// the renderer's multi-set descriptor layout. Without this, the
/// shader's `iChannel0` lands at set 0 binding 0 while the post
/// pipeline binds it at set 1 binding 0 sampler returns garbage.
pub const rewriteCustomShaderSource = shaders.vulkanizeGlsl;
/// Single-buffered for v1; fence-paced submit-then-wait means there's
/// only ever one frame in flight.
pub const swap_chain_count = 1;
const log = std.log.scoped(.vulkan);
// ---- per-surface state --------------------------------------------------
alloc: Allocator,
blending: configpkg.Config.AlphaBlending,
rt_surface: *apprt.Surface,
/// Process-wide Vulkan device. The host owns one VkDevice shared
/// across every surface, so we mirror that as a single global slot
/// (not threadlocal the renderer thread is distinct from the main
/// thread that constructs the surface, and threadlocal doesn't
/// survive that boundary).
///
/// Initialized in `Vulkan.init` on the surface-construction thread;
/// read by every other thread via `devicePtr` after that. The renderer
/// holds `*const Vulkan` from `generic.zig` so we can't mutate fields
/// on the value same reason OpenGL uses a `threadlocal var gl_host`
/// (though OpenGL gets away with threadlocal because the OpenGL
/// platform callbacks are read on the same thread that set them).
var device: ?Device = null;
/// Refcount of live `Vulkan` renderer instances that share `device`.
/// Each `init` increments; each `deinit` decrements. The device is
/// only torn down when the count returns to 0, so closing one tab
/// (or one split) doesn't yank the VkDevice out from under the
/// surfaces still running in other tabs. Process-wide (matches
/// `device`'s scope). Mutated under `device_mutex` because
/// surfaces' renderer threads run independently and may init/deinit
/// concurrently.
var device_refcount: usize = 0;
var device_mutex: std.Thread.Mutex = .{};
/// Cross-frame buffer recycle pool. See `vulkan/buffer_pool.zig`
/// for the full lifecycle / multi-thread contract. Re-exported so
/// existing callers (`Vulkan.buffer_pool.cycle` etc.) keep working
/// unchanged.
pub const buffer_pool = @import("vulkan/buffer_pool.zig");
/// Per-renderer-thread state (frame command buffer, fence, descriptor
/// pool, last-target pointer). See `vulkan/ThreadState.zig` for the
/// lifecycle.
const ThreadState = @import("vulkan/ThreadState.zig");
// ---- lifecycle ----------------------------------------------------------
pub fn init(alloc: Allocator, opts: rendererpkg.Options) !Vulkan {
// Vulkan needs the device populated before the renderer's
// `FrameState.init` starts asking for buffer/texture options.
// Process-wide (not threadlocal): the renderer thread is
// distinct from the main thread that constructs the surface.
device_mutex.lock();
defer device_mutex.unlock();
if (device == null) {
switch (apprt.runtime) {
// The Vulkan renderer is embedded-only by design: the
// host owns the VkInstance/Device/Queue and hands them
// to libghostty via `ghostty_platform_vulkan_s`. There
// is no Vulkan path through the GTK apprt and never
// will be from this side. Compile-error any other
// runtime so a misconfigured `-Drenderer=vulkan
// -Dapp-runtime=gtk` build fails loudly at compile time
// instead of crashing at first surface init. Mirrors
// OpenGL.zig's `@compileError("unsupported app
// runtime for OpenGL")` pattern.
else => @compileError("unsupported app runtime for Vulkan (embedded-only)"),
apprt.embedded => switch (opts.rt_surface.platform) {
.vulkan => |platform| {
device = try Device.init(alloc, try bootstrapFromPlatform(platform));
log.info(
"Vulkan device ready (api=0x{x})",
.{device.?.api_version},
);
},
// The Platform union is decided at host-call time
// (the C ABI lets the host pick), so this arm
// really is a runtime check the host plugged us
// into a non-Vulkan surface.
.opengl, .macos, .ios => return error.UnsupportedPlatform,
},
}
}
device_refcount += 1;
return .{
.alloc = alloc,
.blending = opts.config.blending,
.rt_surface = opts.rt_surface,
};
}
pub fn deinit(self: *Vulkan) void {
// ThreadState.cleanup is NOT called here it runs in
// `threadExit` on the renderer thread, which is where the
// `threadlocal var` state was populated. Calling it here would
// read the GUI thread's empty TLS and silently leak everything.
// See the comment in `threadExit` for the full rationale.
// Decrement the shared-device refcount; only the last surface
// to deinit gets to destroy the VkDevice. Closing one of N tabs
// must NOT pull the device out from under the others that
// crashes (or invisibly silences) every other surface's
// renderer thread.
{
device_mutex.lock();
defer device_mutex.unlock();
// Refcount-underflow guard. Was `std.debug.assert(refcount > 0)`,
// but assertions compile out in ReleaseFast / ReleaseSmall a
// double-deinit would silently underflow the unsigned counter
// to a huge value, blocking the device tear-down forever (the
// refcount==0 branch below would never trigger). Hard-log
// even in release: a stale deinit is a contract violation
// we'd rather surface than mask. We still poison `self` at
// function exit so the caller sees consistent UB on either
// path.
if (device_refcount == 0) {
log.err("Vulkan.deinit: refcount underflow — double-deinit?", .{});
} else {
device_refcount -= 1;
if (device_refcount == 0) {
// Last surface: NOW we can safely drain the shared
// `ready` list of the buffer pool and tear the device
// down. The waitIdle is needed because non-final
// deinits skipped it. Each surface's deinit already
// drained its own per-thread `pending` (via
// buffer_pool.drainSelf above), so this path only
// needs to handle the cross-thread `ready`.
if (device) |*d| {
d.waitIdle();
buffer_pool.drainShared(d);
d.deinit();
}
device = null;
}
}
}
self.* = undefined;
}
/// Early per-surface setup hook. No-op for Vulkan: the host
/// hasn't finished installing the platform callbacks at this
/// point, so all device wiring waits until `Vulkan.init` (which
/// runs after the platform is plumbed through `opts`).
pub fn surfaceInit(surface: *apprt.Surface) !void {
_ = surface;
}
/// Main-thread setup just before the renderer thread spins up.
/// No-op: device construction happens in `Vulkan.init` (the
/// renderer's FrameState init path calls option getters before
/// `threadEnter`, and those getters need the device so it has
/// to be ready earlier than OpenGL needs it to be).
pub fn finalizeSurfaceInit(self: *const Vulkan, surface: *apprt.Surface) !void {
_ = self;
_ = surface;
}
pub fn threadEnter(self: *const Vulkan, surface: *apprt.Surface) !void {
_ = self;
_ = surface;
// No-op: device is brought up in `init` (the renderer's
// FrameState init path calls option getters before threadEnter
// and those need the device). Decl kept so
// `@hasDecl(GraphicsAPI, "threadEnter")` still resolves true in
// `generic.zig`.
}
pub fn threadExit(self: *const Vulkan) void {
_ = self;
if (device) |*d| {
// ThreadState.cleanup MUST run here, on the renderer thread,
// not in Vulkan.deinit (which runs on the GUI thread AFTER
// the renderer thread has joined see Surface.deinit). Our
// per-thread Vulkan state lives in `threadlocal var` slots
// populated on this thread; calling cleanup from the GUI
// thread reads the GUI thread's empty TLS, the destroys
// no-op, and the per-tab DescriptorPool / VkCommandBuffer /
// VkFence + buffer_pool pending list leak forever. heaptrack
// on a 20-tab open+close session attributed ~6 MB / 42 calls
// of NVIDIA driver-internal state to exactly this:
// DescriptorPool.init ThreadState.ensureInit pages that
// nothing ever released.
//
// Cleanup needs the device alive: refcount stays > 0 until
// Vulkan.deinit decrements it on the GUI thread, so the
// shared VkDevice is still valid here.
ThreadState.cleanup(d);
// waitIdle was the pre-fix behavior keep it as belt-and-
// suspenders for any non-ThreadState in-flight work this
// thread may have submitted via the shared queue.
d.waitIdle();
}
}
pub fn displayRealized(self: *Vulkan) void {
_ = self;
}
pub fn displayUnrealized(self: *Vulkan) void {
_ = self;
}
pub fn drawFrameStart(self: *Vulkan) void {
_ = self;
}
pub fn drawFrameEnd(self: *Vulkan) void {
_ = self;
}
pub fn initShaders(
self: *const Vulkan,
alloc: Allocator,
/// For Vulkan these are SPIR-V binaries (loaded with
/// `shadertoy.Target = .spv`), not GLSL strings see
/// `custom_shader_target` above.
custom_shaders: []const []const u8,
) !shaders.Shaders {
_ = self;
return try shaders.Shaders.init(alloc, devicePtr(), custom_shaders);
}
pub fn initTarget(self: *const Vulkan, width: usize, height: usize) !Target {
// SRGB format so the hardware gamma-encodes the linear premultiplied
// shader output at framebuffer-write time. The renderer's shaders
// produce linear premultiplied alpha; without an sRGB format the
// bytes in memory would be linear and Qt (which expects sRGB
// premultiplied) would render them as if they were already gamma
// encoded colors would look way too dark. The DRM fourcc the
// host sees is still ARGB8888; SRGB encoding is a Vulkan-side
// concern only.
//
// Per-surface platform: pulled from rt_surface so the `present`
// callback's `userdata` points at THIS surface's window. Splits
// and tabs share the process-wide Device but each owns its own
// platform copy without per-surface routing here, all dmabuf
// frames would funnel through whichever surface initialized the
// device first.
const platform = surfacePlatform(self.rt_surface) orelse
return error.UnsupportedPlatform;
return try Target.init(.{
.device = devicePtr(),
.format = vk.VK_FORMAT_B8G8R8A8_SRGB,
.width = @intCast(width),
.height = @intCast(height),
.platform = platform,
});
}
/// Translate the apprt's `Platform.Vulkan` callback struct into the
/// neutral `Device.HostBootstrap` the binding expects. Resolves the
/// host's handles + the root proc-addr resolver up-front so the
/// binding stays free of any apprt type. Any null host handle ->
/// `error.HostHandleMissing`.
fn bootstrapFromPlatform(
platform: apprt.embedded.Platform.Vulkan,
) Device.Error!Device.HostBootstrap {
const instance_handle = platform.instance(platform.userdata) orelse
return error.HostHandleMissing;
const physical_device_handle = platform.physical_device(platform.userdata) orelse
return error.HostHandleMissing;
const device_handle = platform.device(platform.userdata) orelse
return error.HostHandleMissing;
const queue_handle = platform.queue(platform.userdata) orelse
return error.HostHandleMissing;
const get_instance_proc_addr_raw = platform.get_instance_proc_addr(
platform.userdata,
"vkGetInstanceProcAddr",
) orelse return error.HostHandleMissing;
return .{
.instance = @ptrCast(instance_handle),
.physical_device = @ptrCast(physical_device_handle),
.device = @ptrCast(device_handle),
.queue = @ptrCast(queue_handle),
.queue_family_index = platform.queue_family_index(platform.userdata),
.get_instance_proc_addr_raw = get_instance_proc_addr_raw,
};
}
/// Extract the Vulkan platform callbacks from a surface, when the
/// surface was created with the Vulkan platform tag. Returns null
/// when the surface was tagged with a non-Vulkan platform the
/// caller is expected to reject the surface with
/// `error.UnsupportedPlatform`. (`Vulkan.init` already does the same
/// reject up-front, so reaching this function with a non-Vulkan
/// platform implies a surface plumbed through after that gate.)
fn surfacePlatform(rt_surface: *apprt.Surface) ?apprt.embedded.Platform.Vulkan {
// `init()` already gates non-embedded runtimes with a
// `@compileError`, so reaching this function on anything other
// than `apprt.embedded` is impossible. Direct embedded match
// here keeps the function single-arm.
if (apprt.runtime != apprt.embedded)
@compileError("unsupported app runtime for Vulkan (embedded-only)");
return switch (rt_surface.platform) {
.vulkan => |p| p,
else => null,
};
}
pub fn surfaceSize(self: *const Vulkan) !struct { width: u32, height: u32 } {
const size = self.rt_surface.size;
return .{ .width = size.width, .height = size.height };
}
pub fn present(self: *Vulkan, target: *Target) !void {
_ = self;
// The target is already populated by the time we get here:
// `Frame.complete` ended the command buffer, submitted with the
// fence, and waited for the GPU to finish before returning. So
// the dmabuf fd is safe to hand off.
target.present();
// Remember the target's address so `presentLastTarget` can
// re-present it on no-op frames. We store the pointer not a
// value copy so a subsequent `frame.resize` (which destroys
// the old Target and overwrites the FrameState's slot with a
// new one) is transparently followed. A value copy would leave
// us holding a closed fd and freed VkImage handles.
ThreadState.last_target = target;
}
pub fn presentLastTarget(self: *Vulkan) !void {
if (ThreadState.last_target) |t| try self.present(t);
}
pub fn beginFrame(
self: *const Vulkan,
renderer: *rendererpkg.Renderer,
target: *Target,
) !Frame {
_ = self;
const dev = devicePtr();
// Lazy per-thread resource init (no-op after the first frame on
// this thread). Sets up the command pool + buffer + fence +
// descriptor pool that get reused for every subsequent frame.
try ThreadState.ensureInit(dev);
// Reset this frame's per-frame state. The fence is the load-
// bearing piece for tear-down correctness: any error path that
// could leave the fence in an UNSIGNALED-with-no-pending-submit
// state will hang the next `Vulkan.deinit` on
// `waitForFences(UINT64_MAX)`.
//
// Defense: register the re-signal `errdefer` BEFORE the
// `beginFrameReset` call (which is the one that calls
// `vkResetFences`). If any reset fails, the errdefer fires
// an empty submit with this fence as the signal target,
// restoring the signaled state.
errdefer {
// Empty submit with this fence as the signal target is the
// simplest portable way to push it back to signaled without
// recording any commands. The fence in this errdefer can
// be in any of three states:
// 1. Reset by `beginFrameReset` (the failing path). The
// empty submit signals it cleanly.
// 2. Still in its prior-frame state (the resetFences call
// failed spec says the fence is in an undefined
// state). The empty submit re-signals once any prior
// pending submit on the queue retires; queueSubmit
// spec semantics guarantee the fence is signaled
// after all earlier submits complete.
// 3. Driver-lost on DEVICE_LOST. queueSubmit returns
// DEVICE_LOST too; we fall back to deviceWaitIdle.
// The fallback `vkDeviceWaitIdle` is the actual safety net
// without one of those signaling paths succeeding, the
// next `Vulkan.deinit` hangs on `waitForFences(UINT64_MAX)`.
const empty: vk.VkSubmitInfo = .{
.sType = vk.VK_STRUCTURE_TYPE_SUBMIT_INFO,
.pNext = null,
.waitSemaphoreCount = 0,
.pWaitSemaphores = null,
.pWaitDstStageMask = null,
.commandBufferCount = 0,
.pCommandBuffers = null,
.signalSemaphoreCount = 0,
.pSignalSemaphores = null,
};
const sr = dev.queueSubmit(1, &empty, ThreadState.frame_fence);
if (sr != vk.VK_SUCCESS) {
log.warn(
"beginFrame errdefer: empty queueSubmit failed " ++
"(result={}); waiting device idle to ensure the fence " ++
"doesn't hang the next deinit",
.{sr},
);
_ = dev.dispatch.deviceWaitIdle(dev.device);
}
}
try ThreadState.beginFrameReset(dev);
return try Frame.begin(
.{
.cb = ThreadState.frame_cb,
.fence = ThreadState.frame_fence,
.step_pool = if (ThreadState.step_pool) |*p| p else null,
},
dev,
renderer,
target,
);
}
// ---- buffer / texture / sampler option getters --------------------------
//
// `GenericRenderer` calls these without knowing or caring about Vulkan
// specifics; the returned `Options` structs are what each backend's
// resource wrapper expects to be passed back to its `init`. The
// Vulkan-flavored ones embed a `*const Device` reference plus
// backend-specific usage flags.
inline fn devicePtr() *const Device {
// Indirected through a getter so future refactors (e.g. allocating
// `Device` on the heap) don't ripple. Today the device is a
// process-wide `?Device` populated in `Vulkan.init` BEFORE the
// renderer's `FrameState.init` calls any of the option getters.
// A null here means the device construction failed AND someone
// called an option getter anyway a programming error, not a
// runtime condition we can recover from.
return &(device orelse {
@panic("Vulkan.devicePtr: device not initialized — option getter called before Vulkan.init succeeded");
});
}
/// Default buffer options. Vulkan needs an explicit usage bitmask;
/// callers that want a specific kind override via the per-kind getters
/// below. (Self is unused the device comes from the threadlocal.)
pub fn bufferOptions(_: *const Vulkan) bufferpkg.Options {
return .{
.device = devicePtr(),
.usage = vk.VK_BUFFER_USAGE_VERTEX_BUFFER_BIT,
};
}
pub fn instanceBufferOptions(_: *const Vulkan) bufferpkg.Options {
return .{
.device = devicePtr(),
.usage = vk.VK_BUFFER_USAGE_VERTEX_BUFFER_BIT,
};
}
pub fn uniformBufferOptions(_: *const Vulkan) bufferpkg.Options {
return .{
.device = devicePtr(),
.usage = vk.VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT,
};
}
pub fn fgBufferOptions(self: *const Vulkan) bufferpkg.Options {
return self.instanceBufferOptions();
}
pub fn bgBufferOptions(_: *const Vulkan) bufferpkg.Options {
// The bg cells buffer is consumed as a STORAGE BUFFER by the
// cell_bg fragment shader (binding `bg_cells`) and the cell_text
// vertex shader (same binding). The OpenGL backend doesn't
// distinguish every buffer is reusable across roles but
// Vulkan validates usage flags at descriptor-write time.
return .{
.device = devicePtr(),
.usage = vk.VK_BUFFER_USAGE_STORAGE_BUFFER_BIT,
};
}
pub fn imageBufferOptions(self: *const Vulkan) bufferpkg.Options {
return self.instanceBufferOptions();
}
pub fn bgImageBufferOptions(self: *const Vulkan) bufferpkg.Options {
return self.instanceBufferOptions();
}
pub fn textureOptions(_: *const Vulkan) Texture.Options {
// The renderer uses `textureOptions()`-shaped textures both for
// glyph atlases (sampled-only) AND for the custom-shader
// back_texture (which is BOTH sampled AND a render target).
// We hand back the wider usage set so both work. The format
// matches the renderer's `initTarget` choice
// (`B8G8R8A8_SRGB`) so a render sample render chain
// through the custom-shader pass keeps the same color format.
return .{
.device = devicePtr(),
.format = vk.VK_FORMAT_B8G8R8A8_SRGB,
.usage = vk.VK_IMAGE_USAGE_SAMPLED_BIT |
vk.VK_IMAGE_USAGE_TRANSFER_DST_BIT |
vk.VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT,
};
}
pub fn samplerOptions(_: *const Vulkan) Sampler.Options {
return .{
.device = devicePtr(),
.min_filter = .linear,
.mag_filter = .linear,
.wrap_s = .clamp_to_edge,
.wrap_t = .clamp_to_edge,
};
}
/// Re-export so callers can write `Vulkan.ImageTextureFormat`
/// matches the `OpenGL.ImageTextureFormat` shape on the OpenGL side.
/// Definition lives in `vulkan/Texture.zig` next to `Texture`
/// itself.
pub const ImageTextureFormat = Texture.ImageTextureFormat;
pub fn imageTextureOptions(
_: *const Vulkan,
format: ImageTextureFormat,
srgb: bool,
) Texture.Options {
return .{
.device = devicePtr(),
.format = format.toVk(srgb),
.usage = vk.VK_IMAGE_USAGE_SAMPLED_BIT |
vk.VK_IMAGE_USAGE_TRANSFER_DST_BIT,
};
}
pub fn initAtlasTexture(
_: *const Vulkan,
atlas: *const font.Atlas,
) !Texture {
const fmt: vk.VkFormat = switch (atlas.format) {
.grayscale => vk.VK_FORMAT_R8_UNORM,
.bgra => vk.VK_FORMAT_B8G8R8A8_UNORM,
else => return error.UnsupportedAtlasFormat,
};
return try Texture.init(
.{
.device = devicePtr(),
.format = fmt,
.usage = vk.VK_IMAGE_USAGE_SAMPLED_BIT |
vk.VK_IMAGE_USAGE_TRANSFER_DST_BIT,
},
atlas.size,
atlas.size,
null,
);
}

View File

@ -6,6 +6,11 @@ pub const Backend = enum {
opengl,
metal,
webgl,
/// Vulkan is on this fork only. Embedded-only the host owns
/// the VkInstance/Device/Queue and hands them in via
/// `ghostty_platform_vulkan_s`; libghostty renders against
/// those handles and exports the result as a dmabuf fd.
vulkan,
pub fn default(
target: std.Target,

View File

@ -838,14 +838,52 @@ pub fn Renderer(comptime GraphicsAPI: type) type {
defer arena.deinit();
const arena_alloc = arena.allocator();
// Load our custom shaders
const custom_shaders: []const [:0]const u8 = shadertoy.loadFromFiles(
arena_alloc,
self.config.custom_shaders,
GraphicsAPI.custom_shader_target,
) catch |err| err: {
log.warn("error loading custom shaders err={}", .{err});
break :err &.{};
// Load our custom shaders.
//
// GraphicsAPI advertises whether it can actually run them
// (`supports_custom_shaders`). The Vulkan backend currently
// can't its post-pass / compositor pipeline that wires
// CustomShaderState.back_texture frame.target through the
// user's shader hasn't been built yet. Loading + flagging
// `has_custom_shaders` anyway would route bg_color into the
// back_texture and leave frame.target blank. Skip the load
// when the backend can't consume the result, and emit a
// one-line warning so the user knows their config item was
// ignored.
const can_use_custom = !@hasDecl(GraphicsAPI, "supports_custom_shaders") or
GraphicsAPI.supports_custom_shaders;
const custom_shaders: []const []const u8 = if (can_use_custom)
(shadertoy.loadFromFiles(
arena_alloc,
self.config.custom_shaders,
.{
.target = GraphicsAPI.custom_shader_target,
// Optional per-backend hooks. Resolved at
// comptime via `@hasDecl`, so backends that
// don't need them stay free of extra-define /
// GLSL-rewrite logic.
.extra_defines = if (@hasDecl(GraphicsAPI, "custom_shader_extra_defines"))
GraphicsAPI.custom_shader_extra_defines
else
&.{},
.rewrite = if (@hasDecl(GraphicsAPI, "rewriteCustomShaderSource"))
GraphicsAPI.rewriteCustomShaderSource
else
null,
},
) catch |err| err: {
log.warn("error loading custom shaders err={}", .{err});
break :err &.{};
})
else custom: {
if (self.config.custom_shaders.value.items.len > 0) {
log.warn(
"custom-shader config ignored: backend lacks " ++
"post-pipeline support (Vulkan TODO)",
.{},
);
}
break :custom &.{};
};
const has_custom_shaders = custom_shaders.len > 0;
@ -1431,15 +1469,6 @@ pub fn Renderer(comptime GraphicsAPI: type) type {
self: *Self,
sync: bool,
) !void {
// const start = std.time.Instant.now() catch unreachable;
// const start_micro = std.time.microTimestamp();
// defer {
// const end = std.time.Instant.now() catch unreachable;
// log.warn(
// "[drawFrame time] start_micro={} duration={}ns",
// .{ start_micro, end.since(start) / std.time.ns_per_us },
// );
// }
// We hold a the draw mutex to prevent changes to any
// data we access while we're in the middle of drawing.
@ -1632,6 +1661,7 @@ pub fn Renderer(comptime GraphicsAPI: type) type {
self.images.draw(
&self.api,
self.shaders.pipelines.image,
frame.uniforms,
&pass,
.kitty_below_bg,
);
@ -1648,6 +1678,7 @@ pub fn Renderer(comptime GraphicsAPI: type) type {
self.images.draw(
&self.api,
self.shaders.pipelines.image,
frame.uniforms,
&pass,
.kitty_below_text,
);
@ -1675,6 +1706,7 @@ pub fn Renderer(comptime GraphicsAPI: type) type {
self.images.draw(
&self.api,
self.shaders.pipelines.image,
frame.uniforms,
&pass,
.kitty_above_text,
);
@ -1684,6 +1716,7 @@ pub fn Renderer(comptime GraphicsAPI: type) type {
if (self.overlay != null) self.images.draw(
&self.api,
self.shaders.pipelines.image,
frame.uniforms,
&pass,
.overlay,
);

View File

@ -105,6 +105,7 @@ pub const State = struct {
self: *State,
api: *GraphicsAPI,
pipeline: GraphicsAPI.Pipeline,
uniforms: GraphicsAPI.Buffer(GraphicsAPI.shaders.Uniforms),
pass: *GraphicsAPI.RenderPass,
placement_type: DrawPlacements,
) void {
@ -168,6 +169,21 @@ pub const State = struct {
pass.step(.{
.pipeline = pipeline,
// Bind uniforms explicitly per image step. Without
// this, the image pipeline relied on whatever
// uniforms a previous (cell_bg / cell_text) step
// happened to bind in the same render pass works
// if the renderer always draws cells before images,
// but a race on first-frame init (precompiled-SPV
// path returned from Shaders.init fast enough that
// image.draw could fire before the cell steps had
// populated the descriptor set) showed the image
// shader reading garbage cell_size from a stale
// UBO binding, producing image quads that covered
// the entire viewport. Defensive explicit bind
// makes the image pipeline's UBO source independent
// of prior-step ordering.
.uniforms = uniforms.buffer,
.buffers = &.{buf.buffer},
.textures = &.{texture},
.draw = .{

View File

@ -43,5 +43,10 @@ void main() {
vec2 image_pos = (cell_size * grid_pos) + cell_offset;
image_pos += dest_size * corner;
gl_Position = projection_matrix * vec4(image_pos.xy, 1.0, 1.0);
// Z=0 (not 1) so we land in the middle of Vulkan's [0,1] NDC
// depth range after `ortho2d`'s `-1` z scale. OpenGL accepts
// either since there's no depth attachment, but Vulkan clips
// NDC z<0 (which `vec4(_, _, 1.0, 1.0)` would produce) and
// erases the entire image. Matches `cell_text.v.glsl`.
gl_Position = projection_matrix * vec4(image_pos.xy, 0.0, 1.0);
}

View File

@ -49,4 +49,24 @@ layout(location = 0) out vec4 _fragColor;
#define texture2D texture
void mainImage( out vec4 fragColor, in vec2 fragCoord );
void main() { mainImage (_fragColor, gl_FragCoord.xy); }
// Vulkan-only: wrap `texture(sampler2D, vec2)` so iChannel0
// (back_texture, in Vulkan top-left orientation) appears to
// the author in OpenGL/shadertoy convention (lower-left).
// Defined BEFORE the `#define`, so the inner `texture(s, ...)`
// call here resolves to the GLSL built-in, not back to ourselves
// (no preprocessor recursion).
#ifdef GHASTTY_VULKAN
vec4 _ghastty_tex2d(sampler2D s, vec2 uv) {
return texture(s, vec2(uv.x, 1.0 - uv.y));
}
#define texture _ghastty_tex2d
#endif
void main() {
#ifdef GHASTTY_VULKAN
mainImage(_fragColor, vec2(gl_FragCoord.x, iResolution.y - gl_FragCoord.y));
#else
mainImage(_fragColor, gl_FragCoord.xy);
#endif
}

View File

@ -40,16 +40,69 @@ pub const Uniforms = extern struct {
};
/// The target to load shaders for.
pub const Target = enum { glsl, msl };
///
/// - `.glsl`: roundtripped through SPIR-V back to GLSL via
/// spirv-cross. Normalizes/validates the source. The OpenGL
/// backend consumes this.
/// - `.msl`: spirv-cross translation to Metal Shading Language.
/// - `.spv`: raw SPIR-V binary (no spirv-cross roundtrip). The
/// Vulkan backend consumes this Vulkan compiles GLSL SPIR-V
/// itself via glslang for its built-in shaders, and feeding
/// the user shader through GLSLSPIR-VGLSLSPIR-V again costs
/// 2× the compile work AND loses the original source structure
/// (which broke our `gl_FragCoord` Y-flip rewrite when the
/// spirv-cross-emitted main() didn't match the upstream prefix).
pub const Target = enum { glsl, msl, spv };
/// Optional GLSL GLSL rewriter applied between the prefix splice
/// and the SPIR-V compile. Vulkan plugs in `vulkanizeGlsl` here so
/// SPIR-V output uses the renderer's multi-set descriptor layout;
/// other backends pass `null`. Owns its allocation under the
/// caller's allocator (`shadertoy.loadFromFile` runs it inside an
/// arena that's torn down at function exit, so the rewriter's
/// returned slice may be arena-owned).
pub const Rewriter = *const fn (
alloc: Allocator,
src: []const u8,
) Allocator.Error![:0]const u8;
/// What `loadFromFile`/`loadFromFiles` need beyond the path itself.
/// Keeps the function decoupled from any specific backend every
/// backend-flavored knob becomes an explicit field, and `shadertoy`
/// itself reaches into no other backend's submodules.
pub const LoadOptions = struct {
/// Output language / format. See `Target` for the per-variant
/// rationale.
target: Target,
/// `#define <body>` lines injected after the prefix's
/// `#version` directive. Vulkan passes
/// `&.{"GHASTTY_VULKAN 1"}` so the prefix's `main()` flips
/// `gl_FragCoord.y` and wraps `texture()` for upper-left
/// sampling; OpenGL/MSL pass `&.{}`.
extra_defines: []const []const u8 = &.{},
/// Optional second-pass GLSL transform run between the prefix
/// splice and the SPIR-V compile. Vulkan installs
/// `vulkan/shaders.zig:vulkanizeGlsl` here for the multi-set
/// descriptor layout rewrite; other backends leave it null.
rewrite: ?Rewriter = null,
};
/// Load a set of shaders from files and convert them to the target
/// format. The shader order is preserved.
///
/// Result element type depends on `opts.target`: `.glsl`/`.msl`
/// produce null-terminated UTF-8 source strings; `.spv` produces
/// SPIR-V binary bytes (4-byte-aligned, no trailing null). We unify
/// the return type as `[]const []const u8` and have the caller cast/
/// reinterpret as needed.
pub fn loadFromFiles(
alloc_gpa: Allocator,
paths: configpkg.RepeatablePath,
target: Target,
) ![]const [:0]const u8 {
var list: std.ArrayList([:0]const u8) = .empty;
opts: LoadOptions,
) ![]const []const u8 {
var list: std.ArrayList([]const u8) = .empty;
defer list.deinit(alloc_gpa);
errdefer for (list.items) |shader| alloc_gpa.free(shader);
@ -59,13 +112,19 @@ pub fn loadFromFiles(
.required => |path| .{ path, false },
};
const shader = loadFromFile(alloc_gpa, path, target) catch |err| {
const shader = loadFromFile(alloc_gpa, path, opts) catch |err| {
if (err == error.FileNotFound and optional) {
continue;
}
return err;
};
// Take ownership of `shader` immediately. If the subsequent
// `list.append` itself OOMs, the freshly-loaded slice would
// leak `errdefer` at the function level only iterates
// `list.items`, and `shader` isn't in `list` yet. Free it
// explicitly on the error path before propagating.
errdefer alloc_gpa.free(shader);
log.info("loaded custom shader path={s}", .{path});
try list.append(alloc_gpa, shader);
}
@ -75,11 +134,16 @@ pub fn loadFromFiles(
/// Load a single shader from a file and convert it to the target language
/// ready to be used with renderers.
///
/// For `.glsl` / `.msl` the returned slice is a null-terminated UTF-8
/// source string; the underlying allocation is `[:0]const u8` and
/// callers that need the sentinel may safely cast. For `.spv` the
/// returned slice is raw SPIR-V bytes no terminator, 4-byte aligned.
pub fn loadFromFile(
alloc_gpa: Allocator,
path: []const u8,
target: Target,
) ![:0]const u8 {
opts: LoadOptions,
) ![]const u8 {
var arena = ArenaAllocator.init(alloc_gpa);
defer arena.deinit();
const alloc = arena.allocator();
@ -97,14 +161,33 @@ pub fn loadFromFile(
);
};
// Convert to full GLSL
const glsl: [:0]const u8 = glsl: {
// Convert to full GLSL. `opts.extra_defines` lets a backend
// inject `#define <body>` lines after the prefix's `#version`
// directive Vulkan uses this to flip `gl_FragCoord.y` and
// wrap `texture()` for upper-left sampling so `mainImage` sees
// shadertoy-convention coords; OpenGL/MSL pass `&.{}` and use
// the GL-native paths unchanged.
const glsl_raw: [:0]const u8 = glsl: {
var stream: std.Io.Writer.Allocating = .init(alloc);
try glslFromShader(&stream.writer, src);
try glslFromShader(&stream.writer, src, opts.extra_defines);
try stream.writer.writeByte(0);
break :glsl stream.written()[0 .. stream.written().len - 1 :0];
};
// Optional second-pass GLSL transform. Vulkan installs
// `vulkanizeGlsl` here so the resulting SPIR-V uses the
// renderer's multi-set descriptor layout (UBO=set 0,
// samplers=set 1, storage=set 2). Without that rewrite,
// glslang assigns everything to `set 0` and the post pipeline's
// descriptor set layout points at the wrong slots the
// shader's `iChannel0` ends up at set 0 binding 0 while the
// pipeline binds it at set 1 binding 0, sampling returns
// garbage / zero, output is transparent.
const glsl: [:0]const u8 = if (opts.rewrite) |f|
try f(alloc, glsl_raw)
else
glsl_raw;
// Convert to SPIR-V
const spirv: []const u8 = spirv: {
var stream: std.Io.Writer.Allocating = .init(alloc);
@ -129,12 +212,47 @@ pub fn loadFromFile(
break :spirv list.items;
};
// Convert to MSL
return switch (target) {
// Important: using the alloc_gpa here on purpose because this
// is the final result that will be returned to the caller.
// Validate the SPIR-V regardless of target. glslang has succeeded
// at this point but a zero-length output would crash
// `vkCreateShaderModule` on the Vulkan path AND would make
// `glslFromSpv` / `mslFromSpv` produce empty/garbage GLSL/MSL
// with poor diagnostics. Hoist the checks above the switch so
// every backend gets the same defensive validation.
if (spirv.len < 4) {
std.log.warn(
"shadertoy: empty SPIR-V output (size={})",
.{spirv.len},
);
return error.InvalidShader;
}
// First 4 bytes are the SPIR-V magic word 0x07230203
// (little-endian). Reject anything else loudly.
const magic = std.mem.readInt(u32, spirv[0..4], .little);
if (magic != 0x07230203) {
std.log.warn(
"shadertoy: SPIR-V output missing magic word " ++
"(got 0x{x:0>8}, expected 0x07230203)",
.{magic},
);
return error.InvalidShader;
}
// Important: using the alloc_gpa here on purpose because this is
// the final result that will be returned to the caller (the arena
// gets torn down on function exit).
return switch (opts.target) {
.glsl => try glslFromSpv(alloc_gpa, spirv),
.msl => try mslFromSpv(alloc_gpa, spirv),
.spv => spv: {
// Copy the SPIR-V binary out of the arena into a
// 4-byte-aligned allocation under `alloc_gpa`. Vulkan
// expects `pCode: []const u32`, so over-aligning is safe;
// we return as `[]const u8` to share the unified return
// type with the GLSL/MSL paths.
const dst = try alloc_gpa.alignedAlloc(u8, .of(u32), spirv.len);
@memcpy(dst, spirv);
break :spv dst;
},
};
}
@ -144,19 +262,97 @@ pub fn loadFromFile(
/// mainImage function and don't define any of the uniforms. This function
/// will convert the ShaderToy shader into a valid GLSL shader that can be
/// compiled and linked.
pub fn glslFromShader(writer: *std.Io.Writer, src: []const u8) !void {
pub fn glslFromShader(
writer: *std.Io.Writer,
src: []const u8,
/// Macros to inject as `#define <body>` lines after the prefix's
/// `#version` directive (GLSL requires `#version` first, so we
/// can't simply prepend). Empty for the default OpenGL/MSL paths;
/// the Vulkan SPV path uses this to flag the prefix's `main()`
/// to Y-flip `gl_FragCoord`.
defines: []const []const u8,
) !void {
const prefix = @embedFile("shaders/shadertoy_prefix.glsl");
try writer.writeAll(prefix);
if (defines.len == 0) {
try writer.writeAll(prefix);
} else {
// GLSL requires `#version` to be the first non-blank line,
// so we can't simply prepend defines. Find the first
// newline after `#version ` and inject defines on the
// following line.
//
// The prefix is `@embedFile`'d at comptime, so its bytes
// are known to the compiler assert it has a newline once
// here rather than threading branchy fallback paths
// through the runtime. A future prefix edit that loses its
// trailing newline will fail at comptime, not silently at
// runtime.
comptime {
if (std.mem.indexOfScalar(u8, prefix, '\n') == null) {
@compileError(
"shadertoy_prefix.glsl must contain at least one newline " ++
"for `#define` injection — see glslFromShader",
);
}
if (!std.mem.startsWith(u8, prefix, "#version")) {
@compileError(
"shadertoy_prefix.glsl must start with `#version` " ++
"(GLSL spec requirement) — see glslFromShader",
);
}
}
const first_nl = comptime std.mem.indexOfScalar(u8, prefix, '\n').?;
try writer.writeAll(prefix[0 .. first_nl + 1]);
for (defines) |def| {
try writer.writeAll("#define ");
try writer.writeAll(def);
try writer.writeAll("\n");
}
try writer.writeAll(prefix[first_nl + 1 ..]);
}
try writer.writeAll("\n\n");
try writer.writeAll(src);
}
/// Process-wide cache of compiled SPIR-V keyed by GLSL source bytes.
/// The C-API glslang path (`Shader.create` / `program.spirvGenerate`)
/// used below pulls allocations from glslang's thread-local
/// TPoolAllocator on every call pages that are never released
/// because Zig pthreads don't run C++ thread_local destructors. With
/// N tabs each calling `loadFromFiles` `loadFromFile`
/// `spirvFromGlsl` for the same custom shader file, that's N
/// renderer threads each leaking a per-thread pool. Caching the SPV
/// bytes lets every call after the first short-circuit without
/// touching glslang.
///
/// Same problem and same fix as the C++ shim's spv_cache in
/// pkg/glslang/override/ghastty_vk_shim.cpp; this one covers the
/// C-API path that the shim doesn't see.
var spv_cache_mutex: std.Thread.Mutex = .{};
var spv_cache: std.StringHashMapUnmanaged([]const u8) = .empty;
/// Convert a GLSL shader into SPIR-V assembly.
pub fn spirvFromGlsl(
writer: *std.Io.Writer,
errlog: ?*SpirvLog,
src: [:0]const u8,
) !void {
// Cache check. On hit, write the cached SPV to the writer and
// return without entering glslang. Strict-equality keying on
// the source bytes (incl. the NUL terminator) the input is
// deterministically generated upstream from a stable shader
// file + a small set of `#define` lines, so identical sources
// produce identical SPV.
{
spv_cache_mutex.lock();
defer spv_cache_mutex.unlock();
const key: []const u8 = src[0..src.len];
if (spv_cache.get(key)) |cached| {
try writer.writeAll(cached);
return;
}
}
// So we can run unit tests without fear.
if (builtin.is_test) try glslang.testing.ensureInit();
@ -205,6 +401,26 @@ pub fn spirvFromGlsl(
const ptr_u8: [*]u8 = @ptrCast(ptr);
const slice_u8: []u8 = ptr_u8[0 .. size * 4];
try writer.writeAll(slice_u8);
// Populate the cache so the next surface's compile of the same
// source short-circuits. Allocations are process-lifetime
// (smp_allocator, never freed) the keys + values are bounded
// by the number of distinct shaders the user has configured,
// which is small (typically 1-3); even at 100 KB per shader
// the total cache cost is negligible against the per-tab pool
// pages we'd otherwise leak.
spv_cache_mutex.lock();
defer spv_cache_mutex.unlock();
const key: []const u8 = src[0..src.len];
if (!spv_cache.contains(key)) {
const key_copy = std.heap.smp_allocator.dupe(u8, key) catch return;
errdefer std.heap.smp_allocator.free(key_copy);
const spv_copy = std.heap.smp_allocator.dupe(u8, slice_u8) catch return;
spv_cache.put(std.heap.smp_allocator, key_copy, spv_copy) catch {
std.heap.smp_allocator.free(spv_copy);
return;
};
}
}
/// Retrieve errors from spirv compilation.
@ -348,7 +564,7 @@ fn spvCross(
fn testGlslZ(alloc: Allocator, src: []const u8) ![:0]const u8 {
var buf: std.Io.Writer.Allocating = .init(alloc);
defer buf.deinit();
try glslFromShader(&buf.writer, src);
try glslFromShader(&buf.writer, src, &.{});
return try buf.toOwnedSliceSentinel(0);
}
@ -424,4 +640,3 @@ test "shadertoy to glsl" {
const test_crt = @embedFile("shaders/test_shadertoy_crt.glsl");
const test_invalid = @embedFile("shaders/test_shadertoy_invalid.glsl");
const test_focus = @embedFile("shaders/test_shadertoy_focus.glsl");

View File

@ -0,0 +1,242 @@
//! Per-draw recording context. Lifecycle: `begin` caller records
//! commands (via the eventual `renderPass()` accessor) `complete`.
//!
//! Unlike `opengl/Frame.zig` (which is a zero-state wrapper around
//! the implicit GL context), Vulkan's Frame drives the explicit
//! sync model: a fence is signaled when the GPU finishes the
//! frame's submit, and `complete` waits on it before handing the
//! dmabuf fd to the host. That's required for correctness the
//! host shouldn't sample memory the GPU is still writing and
//! acceptable for perf because terminal frames cap at ~60Hz.
//!
//! Ownership: the command buffer and fence are owned by the
//! top-level renderer (`Vulkan.zig`, not yet wired) and passed into
//! `begin` via `Options`. Frame just borrows them. The top-level
//! is responsible for creating/destroying them and for resetting
//! the fence to unsignaled state before `begin` (this layer would
//! conflate ownership otherwise).
//!
//! Why not semaphores? With dmabuf export to the host (rather than
//! a `VkSwapchain` we own), we have no acquire/present semaphore
//! pair to sync against. Fence-only is the right model when
//! libghostty hands the host a "GPU is done writing to this fd"
//! guarantee at present time. The host's own compositor handles
//! display sync from there.
//!
//! `renderPass()` will land alongside `vulkan/RenderPass.zig` in a
//! follow-up commit. For now it's not declared calling code that
//! tries to record into a frame will fail to compile, which is
//! intentional: the recording path isn't ready.
//!
//! Counterpart: `src/renderer/opengl/Frame.zig`.
const Self = @This();
const std = @import("std");
const vulkan = @import("vulkan");
const vk = vulkan.c;
const Device = vulkan.Device;
const DescriptorPool = vulkan.DescriptorPool;
const Target = @import("Target.zig");
const RenderPass = @import("RenderPass.zig");
const Vulkan = @import("../Vulkan.zig");
const Renderer = @import("../generic.zig").Renderer(Vulkan);
const Health = @import("../../renderer.zig").Health;
const log = std.log.scoped(.vulkan);
pub const Options = struct {
/// Command buffer this frame's commands record into. Caller
/// resets it to a fresh state before `begin` is called.
cb: vk.VkCommandBuffer,
/// Fence that gets signaled when the submit completes. Caller
/// resets it to unsignaled before `begin` is called.
fence: vk.VkFence,
/// Per-frame descriptor pool. `RenderPass.step` borrows it for
/// the per-call descriptor sets it allocates whenever a
/// pipeline is re-used within a single pass. The pool is
/// caller-owned (top-level `Vulkan.zig` keeps it threadlocal)
/// and must be reset (`vkResetDescriptorPool`) by the caller
/// before each Frame.begin so this frame's allocations don't
/// pile on the previous frame's.
step_pool: ?*DescriptorPool = null,
};
pub const Error = error{
/// `vkBeginCommandBuffer` / `vkEndCommandBuffer` /
/// `vkQueueSubmit` / `vkWaitForFences` returned a non-success
/// status.
VulkanFailed,
};
device: *const Device,
renderer: *Renderer,
target: *Target,
cb: vk.VkCommandBuffer,
fence: vk.VkFence,
step_pool: ?*DescriptorPool = null,
/// Begin recording a frame. The command buffer is reset and started
/// with `ONE_TIME_SUBMIT` since we always submit before the next
/// `begin` overwrites it.
pub fn begin(
opts: Options,
device: *const Device,
renderer: *Renderer,
target: *Target,
) Error!Self {
const begin_info: vk.VkCommandBufferBeginInfo = .{
.sType = vk.VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
.pNext = null,
.flags = vk.VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
.pInheritanceInfo = null,
};
const r = device.dispatch.beginCommandBuffer(opts.cb, &begin_info);
if (r != vk.VK_SUCCESS) {
log.err("vkBeginCommandBuffer (frame) failed: result={}", .{r});
return error.VulkanFailed;
}
return .{
.device = device,
.renderer = renderer,
.target = target,
.cb = opts.cb,
.fence = opts.fence,
.step_pool = opts.step_pool,
};
}
/// End recording, submit to the queue with `self.fence`, and (if
/// `sync` is true, which it always is for our dmabuf-export model)
/// wait on the fence so the GPU is guaranteed to be done before
/// the host imports the target's dmabuf.
///
/// `sync == false` is accepted by the interface for parity with
/// `opengl/Frame.zig`, but currently still does the wait without
/// it, handing the dmabuf fd to the host would race the GPU. The
/// argument may eventually drive multi-frame pipelining once a
/// proper queue of frames is in flight.
pub fn complete(self: *const Self, sync: bool) void {
// `sync` is part of the cross-backend `Frame.complete` interface
// (OpenGL / Metal / Vulkan all share it). The Vulkan path is
// always synchronous today: we waitForFences before handing the
// dmabuf fd to the host, and the host cannot sample a buffer
// mid-GPU-write. So `sync=false` is silently treated as
// `sync=true`. If multi-frame pipelining ever lands, this is
// where the param would gate the wait.
_ = sync;
const dev = self.device;
// `health` becomes `.unhealthy` on any GPU-side error below. We
// ALWAYS run `buffer_pool.cycle` and `frameCompleted` on the
// way out skipping them on error left every retired buffer
// stuck in `pending` (unbounded growth) and held the renderer's
// swap-chain semaphore forever, so the NEXT `drawFrame` would
// hang with no diagnostic.
var health: Health = .healthy;
var submitted = false;
// Make the rendered pixels visible to the host's mmap read. In
// `.direct` mode this is just a memory barrier; in `.legacy_copy`
// mode it also runs `vkCmdCopyImageToBuffer`. See `Target.zig`.
self.target.recordPresentBarrier(self.cb);
end_cb: {
const r = dev.dispatch.endCommandBuffer(self.cb);
if (r != vk.VK_SUCCESS) {
log.err("vkEndCommandBuffer (frame) failed: result={}", .{r});
health = .unhealthy;
break :end_cb;
}
const submit_info: vk.VkSubmitInfo = .{
.sType = vk.VK_STRUCTURE_TYPE_SUBMIT_INFO,
.pNext = null,
.waitSemaphoreCount = 0,
.pWaitSemaphores = null,
.pWaitDstStageMask = null,
.commandBufferCount = 1,
.pCommandBuffers = &self.cb,
.signalSemaphoreCount = 0,
.pSignalSemaphores = null,
};
// Externally-synchronized via `Device.queueSubmit` splits
// and tabs share the host's VkQueue and Vulkan rejects
// concurrent unsynchronized access.
const sr = dev.queueSubmit(1, &submit_info, self.fence);
if (sr != vk.VK_SUCCESS) {
log.err("vkQueueSubmit (frame) failed: result={}", .{sr});
health = .unhealthy;
break :end_cb;
}
submitted = true;
// Wait for the GPU to finish writing the target before letting
// the host import the dmabuf. UINT64_MAX = "wait indefinitely".
const wr = dev.dispatch.waitForFences(
dev.device,
1,
&self.fence,
vk.VK_TRUE,
std.math.maxInt(u64),
);
if (wr != vk.VK_SUCCESS) {
log.err("vkWaitForFences (frame) failed: result={}", .{wr});
health = .unhealthy;
}
}
// Recycle the per-frame Buffer pool. Even on the error path we
// still want to cycle: buffers that the failed submit referenced
// are now stuck (we can't prove the GPU is done with them), so
// we conservatively wait the device idle when submit DID happen
// but the fence wait failed (DEVICE_LOST etc.) before draining.
// Without that wait, every failed submit could leak the buffers
// the renderer queued for the frame.
if (health == .unhealthy and submitted) {
_ = dev.dispatch.deviceWaitIdle(dev.device);
}
Vulkan.buffer_pool.cycle(dev);
// Hand the rendered target off to the host. On the unhealthy
// path we skip present the dmabuf may be partially written
// and the host should see the previous frame instead (the
// generic renderer's no-op-frame logic re-presents
// `last_target`).
if (health == .healthy) {
self.renderer.api.present(self.target) catch |err| {
log.err("present failed: {}", .{err});
health = .unhealthy;
};
}
// Tell the generic renderer the frame is done so it releases the
// swap-chain semaphore. Without this, `SwapChain.nextFrame()`
// blocks the second call to `drawFrame` forever (one buffer in
// the chain, never freed). MUST run regardless of `health`.
self.renderer.frameCompleted(health);
}
/// Begin a render pass recording into this frame's command buffer.
/// The returned `RenderPass` accepts `step()` calls for the
/// per-pipeline draw work, and is finalized with `complete()`.
pub inline fn renderPass(
self: *const Self,
attachments: []const RenderPass.Options.Attachment,
) RenderPass {
return RenderPass.begin(.{
.device = self.device,
.cb = self.cb,
.step_pool = self.step_pool,
.attachments = attachments,
});
}
test {
std.testing.refAllDecls(@This());
}

View File

@ -0,0 +1,466 @@
//! `VkPipeline` (graphics) + the `VkPipelineLayout` that backs it.
//!
//! Vulkan 1.3 with **dynamic rendering**: we use
//! `VkPipelineRenderingCreateInfo` (chained into the pipeline create
//! info via `pNext`) instead of constructing a `VkRenderPass` + a
//! framebuffer per target. This removes the entire RenderPass /
//! Framebuffer object lifecycle the OpenGL backend never had to
//! think about saves significant boilerplate.
//!
//! Wrapper scope: the renderer-level "what shaders + what attachment
//! format" lives in `vulkan/shaders.zig`'s eventual `Shaders` struct
//! (mirroring `opengl/shaders.zig`). This file is the bare
//! `VkPipeline` wrapper that takes everything explicitly:
//! pre-compiled shader modules, descriptor set layouts, push
//! constant ranges, vertex input description, color attachment
//! format. The renderer's pipeline-collection assembly layer is
//! responsible for plumbing those together Pipeline.zig has no
//! per-shader knowledge.
//!
//! Counterpart: `src/renderer/opengl/Pipeline.zig`.
const Self = @This();
const std = @import("std");
const vulkan = @import("vulkan");
const vk = vulkan.c;
const Device = vulkan.Device;
const DescriptorPool = vulkan.DescriptorPool;
const log = std.log.scoped(.vulkan);
pub const StepFunction = enum {
/// Constant value across all vertices (no vertex input).
constant,
/// One per vertex.
per_vertex,
/// One per instance (`VK_VERTEX_INPUT_RATE_INSTANCE`).
per_instance,
};
/// Vertex input description. Pass `null` for shaders that don't read
/// vertex attributes (e.g. screen-quad shaders that derive position
/// from `gl_VertexIndex`).
pub const VertexInput = struct {
/// Byte stride of the vertex buffer.
stride: u32,
/// Whether the buffer is stepped per-vertex or per-instance.
step_fn: StepFunction = .per_vertex,
/// `binding = 0` attribute descriptions describing each field of
/// the vertex struct. The caller is responsible for building
/// these (offsets, formats) Pipeline doesn't introspect.
attributes: []const vk.VkVertexInputAttributeDescription,
};
/// Maximum descriptor sets a single pipeline can address. The
/// preprocessor in `shaders.zig` bins resources into 3 sets (UBO=0,
/// sampler=1, storage=2), so 3 is sufficient. Bump if/when a fourth
/// resource class is introduced.
pub const MAX_DESCRIPTOR_SETS: usize = 3;
pub const Options = struct {
device: *const Device,
/// Optional descriptor pool. If provided, `Pipeline.init`
/// allocates one descriptor set per non-null entry in
/// `descriptor_set_layouts` and stores them on
/// `Pipeline.descriptor_sets[i]`, indexed by set number.
/// `RenderPass.step` updates + binds them per frame.
descriptor_pool: ?*DescriptorPool = null,
/// Shader modules. The caller owns these Pipeline does not
/// destroy them on deinit (they're typically reused across
/// multiple pipelines and outlive any one of them).
vertex_module: vk.VkShaderModule,
fragment_module: vk.VkShaderModule,
/// Optional vertex input. `null` no vertex bindings.
vertex_input: ?VertexInput = null,
/// Per-set descriptor layouts. Element i corresponds to `set = i`
/// in the shader. `null` slots are placeholders for sets the
/// pipeline doesn't actually use Vulkan requires the pipeline
/// layout's `pSetLayouts` to be contiguous up to the max used
/// set number, so we substitute `empty_set_layout` for nulls.
descriptor_set_layouts: []const ?vk.VkDescriptorSetLayout = &.{},
/// 0-binding placeholder layout used to fill `null` entries in
/// `descriptor_set_layouts`. Required when any entry is null;
/// can stay null when every entry is non-null. Owned by the
/// caller (`Shaders.init` caches one and reuses it).
empty_set_layout: vk.VkDescriptorSetLayout = null,
/// Push constant ranges referenced by the shaders.
push_constant_ranges: []const vk.VkPushConstantRange = &.{},
/// Default sampler the pipeline owns and uses for every
/// combined-image-sampler binding the caller doesn't supply a
/// sampler for. Lets the renderer pass plain `textures` (parallel
/// to OpenGL's per-texture `glBindTextureUnit` model) without
/// having to also track per-binding samplers; the pipeline knows
/// the right sampler for its own atlases (e.g. cell_text uses
/// unnormalized coords for `sampler2D` standing in for the old
/// `sampler2DRect`). The handle is borrowed, not owned by
/// `Pipeline` `Shaders.init` owns the lifetime.
sampler: vk.VkSampler = null,
/// Color attachment format. With dynamic rendering this must
/// match the format of the image the renderer eventually targets
/// in `vkCmdBeginRendering`.
color_format: vk.VkFormat,
/// Pre-multiplied-alpha source-over blending. Disable for
/// the bg_color pass (full opaque background).
blending_enabled: bool = true,
/// Primitive topology. The renderer's shaders use TRIANGLE_STRIP
/// for the full-screen quad and TRIANGLE_LIST for instanced cells.
topology: vk.VkPrimitiveTopology = vk.VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST,
};
pub const Error = error{
/// `vkCreatePipelineLayout` or `vkCreateGraphicsPipelines`
/// returned a non-success status.
VulkanFailed,
};
device: *const Device,
pipeline: vk.VkPipeline,
layout: vk.VkPipelineLayout,
/// Descriptor sets allocated from `opts.descriptor_pool`, indexed by
/// set number. `descriptor_sets[i]` is the set bound at `set = i` in
/// the shader; `null` means the pipeline doesn't use that set (so
/// `RenderPass.step` skips updating/binding it). `set_count` is one
/// past the last non-null index, matching what
/// `vkCmdBindDescriptorSets` needs as `setCount`.
///
/// HOT-PATH NOTE: these sets are SHARED across all `step()` calls
/// that bind this pipeline within a single command buffer, but
/// `vkCmdDraw` reads descriptors at submit time, so re-using the
/// same pipeline twice with different per-call resources would
/// cause both draws to see the LAST update's bindings.
/// `RenderPass.step` defends against this by allocating a fresh
/// per-call set from the pass's `step_pool` whenever the per-step
/// resources differ; these `descriptor_sets[i]` slots act as
/// pre-warmed defaults (used only when the call site is
/// single-step-per-pipeline like bg_color / cell_bg).
descriptor_sets: [MAX_DESCRIPTOR_SETS]vk.VkDescriptorSet = .{ null, null, null },
set_count: u32 = 0,
/// Descriptor set layouts associated with this pipeline, indexed by
/// set number. `null` matches a `null` slot in `descriptor_sets`.
/// Stored so `RenderPass.step` can allocate per-call sets from the
/// pass's per-frame descriptor pool without round-tripping through
/// the original `Shaders.init` layout-creation code path.
descriptor_set_layouts: [MAX_DESCRIPTOR_SETS]vk.VkDescriptorSetLayout = .{ null, null, null },
/// Binding number that `Step.uniforms` writes to within set 0.
/// Defaults to 1 to match `common.glsl`'s
/// `layout(binding = 1, std140) uniform Globals`. Override per
/// pipeline if a different shader uses a different slot.
uniforms_binding: u32 = 1,
/// Pipeline-owned fallback sampler. See `Options.sampler`.
sampler: vk.VkSampler = null,
/// Vertex buffer stride (bytes). Needed so `RenderPass.step` can
/// bind a vertex buffer with the right per-instance/per-vertex
/// stride. Defaults to 0 (no vertex buffer); set automatically when
/// `Options.vertex_input` is non-null.
vertex_stride: u32 = 0,
pub fn init(opts: Options) Error!Self {
const dev = opts.device;
if (opts.descriptor_set_layouts.len > MAX_DESCRIPTOR_SETS) {
log.err(
"Pipeline.init: {} descriptor sets exceeds MAX_DESCRIPTOR_SETS={}",
.{ opts.descriptor_set_layouts.len, MAX_DESCRIPTOR_SETS },
);
return error.VulkanFailed;
}
// ---- pipeline layout ---------------------------------------
//
// Build a flat array of VkDescriptorSetLayout where index i is
// the layout for set=i. Null entries in `opts.descriptor_set_layouts`
// get substituted with `opts.empty_set_layout` Vulkan rejects
// VK_NULL_HANDLE in `pSetLayouts`. `Shaders.init` always supplies
// an empty layout when any null appears.
var flat_dsls: [MAX_DESCRIPTOR_SETS]vk.VkDescriptorSetLayout = .{ null, null, null };
for (opts.descriptor_set_layouts, 0..) |maybe_dsl, i| {
if (maybe_dsl) |dsl| {
flat_dsls[i] = dsl;
} else if (opts.empty_set_layout != null) {
flat_dsls[i] = opts.empty_set_layout;
} else {
log.err(
"Pipeline.init: set {} is null but no empty_set_layout was provided",
.{i},
);
return error.VulkanFailed;
}
}
const layout_info: vk.VkPipelineLayoutCreateInfo = .{
.sType = vk.VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
.pNext = null,
.flags = 0,
.setLayoutCount = @intCast(opts.descriptor_set_layouts.len),
.pSetLayouts = if (opts.descriptor_set_layouts.len > 0) &flat_dsls else null,
.pushConstantRangeCount = @intCast(opts.push_constant_ranges.len),
.pPushConstantRanges = if (opts.push_constant_ranges.len > 0)
opts.push_constant_ranges.ptr
else
null,
};
var layout: vk.VkPipelineLayout = undefined;
{
const r = dev.dispatch.createPipelineLayout(dev.device, &layout_info, null, &layout);
if (r != vk.VK_SUCCESS) {
log.err("vkCreatePipelineLayout failed: result={}", .{r});
return error.VulkanFailed;
}
}
errdefer dev.dispatch.destroyPipelineLayout(dev.device, layout, null);
// ---- shader stages -----------------------------------------
const stages: [2]vk.VkPipelineShaderStageCreateInfo = .{
.{
.sType = vk.VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
.pNext = null,
.flags = 0,
.stage = vk.VK_SHADER_STAGE_VERTEX_BIT,
.module = opts.vertex_module,
.pName = "main",
.pSpecializationInfo = null,
},
.{
.sType = vk.VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
.pNext = null,
.flags = 0,
.stage = vk.VK_SHADER_STAGE_FRAGMENT_BIT,
.module = opts.fragment_module,
.pName = "main",
.pSpecializationInfo = null,
},
};
// ---- vertex input -------------------------------------------
var vi_binding: vk.VkVertexInputBindingDescription = undefined;
const vertex_input: vk.VkPipelineVertexInputStateCreateInfo = if (opts.vertex_input) |vi| blk: {
vi_binding = .{
.binding = 0,
.stride = vi.stride,
.inputRate = switch (vi.step_fn) {
.constant, .per_vertex => vk.VK_VERTEX_INPUT_RATE_VERTEX,
.per_instance => vk.VK_VERTEX_INPUT_RATE_INSTANCE,
},
};
break :blk .{
.sType = vk.VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
.pNext = null,
.flags = 0,
.vertexBindingDescriptionCount = 1,
.pVertexBindingDescriptions = &vi_binding,
.vertexAttributeDescriptionCount = @intCast(vi.attributes.len),
.pVertexAttributeDescriptions = vi.attributes.ptr,
};
} else .{
.sType = vk.VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
.pNext = null,
.flags = 0,
.vertexBindingDescriptionCount = 0,
.pVertexBindingDescriptions = null,
.vertexAttributeDescriptionCount = 0,
.pVertexAttributeDescriptions = null,
};
// ---- input assembly + viewport (dynamic) + raster + ms ------
const input_assembly: vk.VkPipelineInputAssemblyStateCreateInfo = .{
.sType = vk.VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
.pNext = null,
.flags = 0,
.topology = opts.topology,
.primitiveRestartEnable = vk.VK_FALSE,
};
const viewport_state: vk.VkPipelineViewportStateCreateInfo = .{
.sType = vk.VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
.pNext = null,
.flags = 0,
.viewportCount = 1,
.pViewports = null,
.scissorCount = 1,
.pScissors = null,
};
const rasterization: vk.VkPipelineRasterizationStateCreateInfo = .{
.sType = vk.VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
.pNext = null,
.flags = 0,
.depthClampEnable = vk.VK_FALSE,
.rasterizerDiscardEnable = vk.VK_FALSE,
.polygonMode = vk.VK_POLYGON_MODE_FILL,
.cullMode = vk.VK_CULL_MODE_NONE,
.frontFace = vk.VK_FRONT_FACE_COUNTER_CLOCKWISE,
.depthBiasEnable = vk.VK_FALSE,
.depthBiasConstantFactor = 0,
.depthBiasClamp = 0,
.depthBiasSlopeFactor = 0,
.lineWidth = 1.0,
};
const multisample: vk.VkPipelineMultisampleStateCreateInfo = .{
.sType = vk.VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
.pNext = null,
.flags = 0,
.rasterizationSamples = vk.VK_SAMPLE_COUNT_1_BIT,
.sampleShadingEnable = vk.VK_FALSE,
.minSampleShading = 0,
.pSampleMask = null,
.alphaToCoverageEnable = vk.VK_FALSE,
.alphaToOneEnable = vk.VK_FALSE,
};
// ---- color blend --------------------------------------------
// Pre-multiplied alpha source-over: out = src + dst*(1-src.a).
// Same as the OpenGL backend's default blend (and what the
// shaders are written to produce).
const blend_attachment: vk.VkPipelineColorBlendAttachmentState = .{
.blendEnable = if (opts.blending_enabled) vk.VK_TRUE else vk.VK_FALSE,
.srcColorBlendFactor = vk.VK_BLEND_FACTOR_ONE,
.dstColorBlendFactor = vk.VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA,
.colorBlendOp = vk.VK_BLEND_OP_ADD,
.srcAlphaBlendFactor = vk.VK_BLEND_FACTOR_ONE,
.dstAlphaBlendFactor = vk.VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA,
.alphaBlendOp = vk.VK_BLEND_OP_ADD,
.colorWriteMask = vk.VK_COLOR_COMPONENT_R_BIT |
vk.VK_COLOR_COMPONENT_G_BIT |
vk.VK_COLOR_COMPONENT_B_BIT |
vk.VK_COLOR_COMPONENT_A_BIT,
};
const blend_state: vk.VkPipelineColorBlendStateCreateInfo = .{
.sType = vk.VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
.pNext = null,
.flags = 0,
.logicOpEnable = vk.VK_FALSE,
.logicOp = vk.VK_LOGIC_OP_COPY,
.attachmentCount = 1,
.pAttachments = &blend_attachment,
.blendConstants = .{ 0, 0, 0, 0 },
};
// ---- dynamic state -----------------------------------------
const dynamic_states = [_]vk.VkDynamicState{
vk.VK_DYNAMIC_STATE_VIEWPORT,
vk.VK_DYNAMIC_STATE_SCISSOR,
};
const dynamic_state: vk.VkPipelineDynamicStateCreateInfo = .{
.sType = vk.VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
.pNext = null,
.flags = 0,
.dynamicStateCount = @intCast(dynamic_states.len),
.pDynamicStates = &dynamic_states,
};
// ---- dynamic rendering info (chained via pNext) ------------
var color_format = opts.color_format;
const rendering_info: vk.VkPipelineRenderingCreateInfo = .{
.sType = vk.VK_STRUCTURE_TYPE_PIPELINE_RENDERING_CREATE_INFO,
.pNext = null,
.viewMask = 0,
.colorAttachmentCount = 1,
.pColorAttachmentFormats = &color_format,
.depthAttachmentFormat = vk.VK_FORMAT_UNDEFINED,
.stencilAttachmentFormat = vk.VK_FORMAT_UNDEFINED,
};
// ---- assemble + create -------------------------------------
const pipeline_info: vk.VkGraphicsPipelineCreateInfo = .{
.sType = vk.VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
.pNext = &rendering_info,
.flags = 0,
.stageCount = stages.len,
.pStages = &stages,
.pVertexInputState = &vertex_input,
.pInputAssemblyState = &input_assembly,
.pTessellationState = null,
.pViewportState = &viewport_state,
.pRasterizationState = &rasterization,
.pMultisampleState = &multisample,
.pDepthStencilState = null,
.pColorBlendState = &blend_state,
.pDynamicState = &dynamic_state,
.layout = layout,
// renderPass / subpass intentionally null dynamic rendering.
.renderPass = null,
.subpass = 0,
.basePipelineHandle = null,
.basePipelineIndex = -1,
};
var pipeline: vk.VkPipeline = undefined;
{
const r = dev.dispatch.createGraphicsPipelines(
dev.device,
null, // pipeline cache
1,
&pipeline_info,
null,
&pipeline,
);
if (r != vk.VK_SUCCESS) {
log.err("vkCreateGraphicsPipelines failed: result={}", .{r});
return error.VulkanFailed;
}
}
errdefer dev.dispatch.destroyPipeline(dev.device, pipeline, null);
// Allocate one descriptor set per non-null entry in
// `opts.descriptor_set_layouts`. Null entries are placeholders
// (the shader's set=i isn't actually used) nothing to allocate.
// Also remember the layouts on `Self` so `RenderPass.step` can
// allocate fresh per-call sets from a per-frame pool without
// re-creating layouts.
var dsets: [MAX_DESCRIPTOR_SETS]vk.VkDescriptorSet = .{ null, null, null };
var dsls: [MAX_DESCRIPTOR_SETS]vk.VkDescriptorSetLayout = .{ null, null, null };
if (opts.descriptor_pool) |pool_ptr| {
for (opts.descriptor_set_layouts, 0..) |maybe_dsl, i| {
if (maybe_dsl) |dsl| {
dsls[i] = dsl;
dsets[i] = pool_ptr.allocate(dsl) catch |err| {
log.err(
"Pipeline.init: descriptor set {} allocation failed: {}",
.{ i, err },
);
return error.VulkanFailed;
};
}
}
} else {
for (opts.descriptor_set_layouts, 0..) |maybe_dsl, i| {
if (maybe_dsl) |dsl| dsls[i] = dsl;
}
}
return .{
.device = dev,
.pipeline = pipeline,
.layout = layout,
.descriptor_sets = dsets,
.descriptor_set_layouts = dsls,
.set_count = @intCast(opts.descriptor_set_layouts.len),
.sampler = opts.sampler,
.vertex_stride = if (opts.vertex_input) |vi| vi.stride else 0,
};
}
pub fn deinit(self: *const Self) void {
const dev = self.device;
dev.dispatch.destroyPipeline(dev.device, self.pipeline, null);
dev.dispatch.destroyPipelineLayout(dev.device, self.layout, null);
}
test {
std.testing.refAllDecls(@This());
}

View File

@ -0,0 +1,49 @@
# Vulkan renderer backend
This directory holds the **renderer-policy** Vulkan files for libghostty.
Pure Vulkan-API wrappers (Device dispatch table, Sampler, CommandPool,
DescriptorPool) live in `pkg/vulkan/`, mirroring how `pkg/opengl/`
relates to `src/renderer/opengl/`.
## File layout
Renderer policy (this directory):
| File | OpenGL counterpart | Notes |
| ------------------- | ------------------------- | ------------------------------------------------------------------ |
| `Target.zig` | `opengl/Target.zig` | Render image + dmabuf export (direct or legacy_copy mode). |
| `Texture.zig` | `opengl/Texture.zig` | `VkImage` + `VkImageView` + upload helpers for the glyph atlas. |
| `buffer.zig` | `opengl/buffer.zig` | `Buffer(T)` host-coherent. |
| `buffer_pool.zig` | (none — GL implicit) | Cross-frame `VkBuffer` recycle pool, per-thread pending list. |
| `ThreadState.zig` | (none — GL implicit) | Per-renderer-thread frame fence / CB / descriptor pool / last-tgt. |
| `Pipeline.zig` | `opengl/Pipeline.zig` | Graphics pipeline + descriptor set layout creation. |
| `RenderPass.zig` | `opengl/RenderPass.zig` | Dynamic-rendering pass + step recorder. |
| `Frame.zig` | `opengl/Frame.zig` | Per-draw command buffer + fence-paced submit-then-wait. |
| `shaders.zig` | `opengl/shaders.zig` | GLSL → SPIR-V via glslang + the OpenGL-GLSL → Vulkan-GLSL rewrite. |
Pure Vulkan-API wrappers (in `pkg/vulkan/`):
| File | OpenGL counterpart | Notes |
| --------------------- | ------------------------ | ------------------------------------------------------------------ |
| `Device.zig` | (no analogue — GL ctx) | Host-provided VkInstance/Device/Queue + function dispatch table. |
| `Sampler.zig` | `pkg/opengl/Sampler.zig` | `VkSampler` (linear for atlases, nearest for cells). |
| `CommandPool.zig` | (none) | `VkCommandPool` + one-shot record/submit helper. |
| `DescriptorPool.zig` | (none) | Per-frame `VkDescriptorPool`. |
The renderer's top-level lives one directory up at `../Vulkan.zig`
and is the single module imported by `src/renderer.zig` when
`build_config.renderer == .vulkan`. It re-exports the `pkg/vulkan/`
types as `Vulkan.Device`, `Vulkan.Sampler`, etc., so call sites use a
single `Vulkan.*` namespace regardless of where each type physically
lives.
## Why dmabuf, not Vulkan swapchains?
The Qt frontend wants to keep `GhosttySurface` as a `QWidget` so that
splits (`QSplitter`), tabs (`QTabWidget`), and translucent composition
keep working. That rules out `QVulkanWindow`. Instead libghostty
exports the rendered `VkImage` memory as a dmabuf fd
(`VK_KHR_external_memory_fd` + `VK_EXT_image_drm_format_modifier`); the
Qt side imports it via `zwp_linux_dmabuf_v1` and attaches it to a
`wl_subsurface` parented to the top-level `wl_surface`. The compositor
scans the buffer out directly — no readback, no QImage round trip.

View File

@ -0,0 +1,673 @@
//! Per-pass recording helper for `vkCmdBeginRendering` /
//! `vkCmdEndRendering` (Vulkan 1.3 dynamic rendering no
//! `VkRenderPass` object needed) plus the per-`step` resource
//! binding + draw-call emission.
//!
//! `begin` transitions the attachment from its current layout to
//! `COLOR_ATTACHMENT_OPTIMAL` and opens a rendering scope with the
//! caller's clear color. `step` updates the pipeline's descriptor
//! sets from the Step's resources and records a draw call;
//! `complete` closes the rendering scope and transitions the
//! attachment to its consumer-facing layout (SHADER_READ_ONLY for
//! intermediate textures, GENERAL for the dmabuf-backed target).
//!
//! Counterpart: `src/renderer/opengl/RenderPass.zig`.
const Self = @This();
const std = @import("std");
const vulkan = @import("vulkan");
const vk = vulkan.c;
const Device = vulkan.Device;
const DescriptorPool = vulkan.DescriptorPool;
const Sampler = vulkan.Sampler;
const Pipeline = @import("Pipeline.zig");
const Target = @import("Target.zig");
const Texture = @import("Texture.zig");
const bufferpkg = @import("buffer.zig");
const log = std.log.scoped(.vulkan);
/// Primitive topology. Variant names match `pkg/opengl/primitives.zig`'s
/// `gl.Primitive` so the renderer's call sites in `generic.zig` (e.g.
/// `.draw = .{ .type = .triangle, ... }`) work against either backend
/// without per-backend branching. Mapped to `VkPrimitiveTopology` at
/// command-recording time.
pub const Primitive = enum {
point,
line,
line_strip,
triangle,
triangle_strip,
pub fn toVk(self: Primitive) vk.VkPrimitiveTopology {
return switch (self) {
.point => vk.VK_PRIMITIVE_TOPOLOGY_POINT_LIST,
.line => vk.VK_PRIMITIVE_TOPOLOGY_LINE_LIST,
.line_strip => vk.VK_PRIMITIVE_TOPOLOGY_LINE_STRIP,
.triangle => vk.VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST,
.triangle_strip => vk.VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,
};
}
};
pub const Options = struct {
/// Device + dispatch table for recording commands.
device: *const Device,
/// Caller-recorded command buffer to emit commands into. Provided
/// by the enclosing `Frame`.
cb: vk.VkCommandBuffer,
/// Per-frame descriptor pool. Used by `step` to allocate fresh
/// descriptor sets on the SECOND and later step() calls that
/// bind the same pipeline within this pass without it,
/// mutating the pipeline's static `descriptor_sets[i]` for the
/// second call would overwrite the first call's bindings before
/// the GPU has read them (vkCmdDraw reads at submit time).
/// Optional: passes that never re-use a pipeline (bg_color,
/// cell_bg, cell_text) work without it.
step_pool: ?*DescriptorPool = null,
/// Color attachments for the pass. With dynamic rendering each
/// attachment is a render target + optional clear color.
attachments: []const Attachment,
pub const Attachment = struct {
// Held by value to match the OpenGL backend's Attachment
// shape (so `generic.zig`'s call sites remain identical).
// Vulkan's `Texture` and `Target` carry a `layout` field
// that mutates across passes `RenderPass.begin` reads it
// to emit the right source-layout barrier, and
// `RenderPass.complete` updates the value-copy here. Because
// the value is a copy, that update doesn't propagate back
// to the caller; the call sites in `generic.zig` are
// intentionally fine with that they always pass the
// CURRENT `frame.target` / `state.{front,back}_texture`
// (whose `layout` was last updated by the previous pass's
// `recordPresentBarrier` / pipeline-end barrier in
// `Target.recordPresentBarrier` / `Texture.replaceRegion`)
// when constructing a new pass.
target: union(enum) {
texture: Texture,
target: Target,
},
clear_color: ?[4]f32 = null,
};
};
/// Describes one rendering step within the pass: which pipeline to
/// bind, which resources (uniforms / vertex buffers / textures /
/// samplers) to bind, and the draw call to issue.
pub const Step = struct {
pipeline: Pipeline,
uniforms: ?vk.VkBuffer = null,
buffers: []const ?vk.VkBuffer = &.{},
textures: []const ?Texture = &.{},
samplers: []const ?Sampler = &.{},
draw: Draw,
pub const Draw = struct {
type: Primitive,
vertex_count: usize,
instance_count: usize = 1,
};
};
pub const Error = error{
/// Reserved for command-recording failures. Currently unused
/// the recorder relies on Vulkan's silent-failure model
/// (record bad input validation flags it / next submit
/// returns DEVICE_LOST), but the slot stays open in case a
/// future step wants to fail-fast at record time.
VulkanFailed,
};
attachments: []const Options.Attachment,
cb: vk.VkCommandBuffer,
device: *const Device,
step_pool: ?*DescriptorPool = null,
step_number: usize = 0,
/// VkPipeline handles already used by an earlier `step` in this
/// pass. On second-and-later use of the same pipeline we allocate
/// a fresh per-call descriptor set from `step_pool` instead of
/// mutating `pipeline.descriptor_sets[i]` (vkCmdDraw reads at
/// submit time, so re-updating the same set in place would
/// overwrite the prior call's bindings before the GPU has read
/// them). Capacity covers our worst case: per-pass image draws
/// can fire dozens of pipeline reuses. The slice is empty when no
/// step_pool was provided.
seen_pipelines: [MAX_SEEN_PIPELINES]vk.VkPipeline = .{null} ** MAX_SEEN_PIPELINES,
seen_pipelines_len: usize = 0,
/// Last `Step.uniforms` value seen in this pass. The OpenGL backend
/// keeps the bound UBO across draw calls implicitly (GL state
/// persists), and the renderer's image/overlay draw calls in
/// `image.zig` don't pass `uniforms` at all they expect the
/// previously-bound UBO to still be live. Vulkan needs explicit
/// descriptor-set updates per pipeline, so we cache the last UBO
/// buffer here and reuse it when a step doesn't supply one. Reset
/// to null at `begin`.
last_uniforms: ?vk.VkBuffer = null,
/// Cap on the number of distinct pipelines we'll track per pass
/// for "first-use vs re-use" detection. The renderer's pass shape
/// is: bg_color (1), cell_bg (1), cell_text (1), bg_image (1),
/// image (varies). 8 is generous; we degrade gracefully to "always
/// allocate fresh" past this cap.
const MAX_SEEN_PIPELINES: usize = 8;
/// Begin a render pass. Transitions the first attachment to
/// `COLOR_ATTACHMENT_OPTIMAL` and opens a `vkCmdBeginRendering`
/// scope with the caller's clear color (defaults to opaque black).
///
/// We only act on attachments[0] for now the renderer's calls
/// always pass exactly one attachment per pass, matching the
/// OpenGL backend's `RenderPass.Options.attachments` use.
pub fn begin(opts: Options) Self {
const self: Self = .{
.attachments = opts.attachments,
.cb = opts.cb,
.device = opts.device,
.step_pool = opts.step_pool,
};
if (opts.attachments.len == 0) return self;
const attach = opts.attachments[0];
const view: vk.VkImageView, const image: vk.VkImage, const width: u32, const height: u32, const old_layout: vk.VkImageLayout = switch (attach.target) {
.texture => |t| .{ t.view, t.image, @intCast(t.width), @intCast(t.height), t.layout },
.target => |t| .{ t.view, t.image, t.width, t.height, t.layout },
};
// Always Y-flip the viewport regardless of attachment kind.
//
// `cell_text` is projection-driven (vertex shader applies
// `projection_matrix` to pixel coords) while `cell_bg` is
// fragment-position-driven (derives grid_pos from
// `gl_FragCoord.xy / cell_size`). For those two to agree on
// where "row 0" lands in the framebuffer, the viewport
// orientation must be the same for both anything else
// produces the cell-bg-at-top-while-cell-text-at-bottom
// disagreement seen on the custom-shader (back_texture) path.
// For the dmabuf `Target` we needed the Y-flip anyway (Qt mmaps
// origin-upper-left). For shadertoy sampling: with both the
// back_texture and frame.target Y-flipped, an upper-left
// `gl_FragCoord` in the post fragment maps to texel y=0 (top
// of back_texture = top of original render), which is what
// `uv = fragCoord/iResolution` + `texture(iChannel0, uv)`
// expects in Vulkan-native convention.
// Transition to COLOR_ATTACHMENT_OPTIMAL. The attachment's
// current layout drives the source-side of the barrier so a
// re-used target (e.g. `Target` in `.direct` mode after the
// previous frame's `recordDirectBarrier` left it in GENERAL,
// or `.legacy_copy` after `recordCopyToDmabuf` left it in
// TRANSFER_SRC_OPTIMAL, or a `Texture` after the previous
// pass's `complete` left it in SHADER_READ_ONLY_OPTIMAL) is
// transitioned correctly. UNDEFINED is the implicit-discard
// initial layout for a fresh image; we'd also accept it for
// an image whose contents we don't care about, but `loadOp =
// CLEAR` covers that case explicitly so we always pass a
// truthful old layout to validation.
{
// Source access depends on what the previous owner of the
// layout could have left in flight. For COLOR_ATTACHMENT_*
// it's the color-write access; for TRANSFER_SRC the read
// already retired but we conservatively name it; for
// SHADER_READ_ONLY the prior fragment-stage read; UNDEFINED
// and GENERAL want a no-op source mask (GENERAL was last
// written by the present-barrier and `recordDirectBarrier`
// has already chained that visibility into HOST the next
// frame doesn't need to re-flush it).
const src_access: vk.VkAccessFlags = switch (old_layout) {
vk.VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL => vk.VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,
vk.VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL => vk.VK_ACCESS_TRANSFER_READ_BIT,
vk.VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL => vk.VK_ACCESS_SHADER_READ_BIT,
else => 0,
};
const src_stage: vk.VkPipelineStageFlags = switch (old_layout) {
vk.VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL => vk.VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
vk.VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL => vk.VK_PIPELINE_STAGE_TRANSFER_BIT,
vk.VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL => vk.VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT,
else => vk.VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
};
const barrier: vk.VkImageMemoryBarrier = .{
.sType = vk.VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
.pNext = null,
.srcAccessMask = src_access,
.dstAccessMask = vk.VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,
.oldLayout = old_layout,
.newLayout = vk.VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
.srcQueueFamilyIndex = vk.VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = vk.VK_QUEUE_FAMILY_IGNORED,
.image = image,
.subresourceRange = .{
.aspectMask = vk.VK_IMAGE_ASPECT_COLOR_BIT,
.baseMipLevel = 0,
.levelCount = 1,
.baseArrayLayer = 0,
.layerCount = 1,
},
};
opts.device.dispatch.cmdPipelineBarrier(
opts.cb,
src_stage,
vk.VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
0,
0,
null,
0,
null,
1,
&barrier,
);
}
const clear_value: vk.VkClearValue = if (attach.clear_color) |c| .{
.color = .{ .float32 = c },
} else .{ .color = .{ .float32 = .{ 0, 0, 0, 1 } } };
const color_attachment: vk.VkRenderingAttachmentInfo = .{
.sType = vk.VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO,
.pNext = null,
.imageView = view,
.imageLayout = vk.VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
.resolveMode = vk.VK_RESOLVE_MODE_NONE,
.resolveImageView = null,
.resolveImageLayout = vk.VK_IMAGE_LAYOUT_UNDEFINED,
// Always clear: the renderer redraws every cell each frame,
// so prior contents are never useful. CLEAR is also free on
// tiled GPUs (avoids a full attachment load).
.loadOp = vk.VK_ATTACHMENT_LOAD_OP_CLEAR,
.storeOp = vk.VK_ATTACHMENT_STORE_OP_STORE,
.clearValue = clear_value,
};
const info: vk.VkRenderingInfo = .{
.sType = vk.VK_STRUCTURE_TYPE_RENDERING_INFO,
.pNext = null,
.flags = 0,
.renderArea = .{
.offset = .{ .x = 0, .y = 0 },
.extent = .{ .width = width, .height = height },
},
.layerCount = 1,
.viewMask = 0,
.colorAttachmentCount = 1,
.pColorAttachments = &color_attachment,
.pDepthAttachment = null,
.pStencilAttachment = null,
};
opts.device.dispatch.cmdBeginRendering(opts.cb, &info);
// Dynamic state: viewport + scissor follow the attachment size.
//
// Negative `height` (Vulkan 1.1 maintenance1 / core) flips the Y
// axis at viewport time so the renderer's OpenGL-style projection
// matrices (Y-up clip space, `ortho2d` with bottom > top) keep
// producing pixels at the expected location on screen. Without
// this, everything renders upside-down text intended for the
// top of the window appears at the bottom. `gl_FragCoord` still
// reports origin-upper-left, matching `cell_bg.f.glsl`'s
// `layout(origin_upper_left)` request.
const viewport: vk.VkViewport = .{
.x = 0,
.y = @floatFromInt(height),
.width = @floatFromInt(width),
.height = -@as(f32, @floatFromInt(height)),
.minDepth = 0,
.maxDepth = 1,
};
opts.device.dispatch.cmdSetViewport(opts.cb, 0, 1, &viewport);
const scissor: vk.VkRect2D = .{
.offset = .{ .x = 0, .y = 0 },
.extent = .{ .width = width, .height = height },
};
opts.device.dispatch.cmdSetScissor(opts.cb, 0, 1, &scissor);
return self;
}
/// Record one step of the pass.
///
/// Updates the pipeline's descriptor sets from the Step's resources
/// and emits the draw call. Resource conventions match the OpenGL
/// backend (so `generic.zig` call sites work unchanged):
///
/// - `uniforms` set 0, binding `pipeline.uniforms_binding`
/// (UBO; the Globals block from `common.glsl`)
/// - `buffers[0]` vertex buffer at binding 0 (when the pipeline
/// has a non-zero `vertex_stride`; ignored
/// otherwise). Matches OpenGL's "0th buffer is
/// the VBO" convention.
/// - `buffers[i]`, i1
/// set 2, binding `i` (storage buffer)
/// - `textures[i]` set 1, binding `i` (combined image sampler).
/// The sampler is `samplers[i]` if provided,
/// otherwise the pipeline's owned fallback
/// `pipeline.sampler` (so the renderer can pass
/// plain textures and let the pipeline pick the
/// sampler config it needs).
///
/// Skips when the pipeline hasn't been constructed yet
/// (`VkPipeline == null`) pipelines for shaders we haven't wired
/// up are default-null and we filter them out instead of crashing
/// on a null handle. A null pipeline reaching here once
/// shader bring-up has completed indicates a config / build issue
/// (e.g. a custom-shader compile failure that left the post pipeline
/// half-init); log so the missing draw is visible instead of a
/// silently-blank surface.
pub fn step(self: *Self, s: Step) void {
if (s.pipeline.pipeline == null) {
log.warn("RenderPass.step: skipping draw — pipeline not constructed", .{});
return;
}
if (s.draw.vertex_count == 0) return;
const dev = self.device;
// ---- vertex buffer (buffers[0]) ----------------------------
if (s.pipeline.vertex_stride > 0 and s.buffers.len > 0) {
if (s.buffers[0]) |vbo| {
const offsets = [_]vk.VkDeviceSize{0};
const bufs = [_]vk.VkBuffer{vbo};
dev.dispatch.cmdBindVertexBuffers(
self.cb,
0, // first binding
1, // binding count
&bufs,
&offsets,
);
}
}
// Pick effective descriptor sets for this step.
//
// First time we see a given pipeline within this pass, we use
// its pre-allocated `descriptor_sets[]` slots and update them
// in place cheap and avoids a per-pass-pool allocation in
// the common single-step case (bg_color/cell_bg/cell_text).
//
// SECOND-and-later use of the same pipeline within the same
// pass requires fresh sets: vkCmdDraw reads the descriptor
// contents at SUBMIT time, so re-updating the static sets in
// place would silently make every prior draw bound to this
// pipeline read the LAST update's UBO/sampler/storage. The
// image / kitty path issues N draws on the same `image`
// pipeline with per-call vertex buffers and textures without
// this fix every kitty image rendered with the FINAL image's
// texture and the final draw's vertex buffer.
//
// The fresh sets come from `step_pool`, owned by the enclosing
// Frame and reset at frame start. When `step_pool` is null
// (test harnesses, smoke tests) we fall back to the static
// sets and accept the limitation.
var effective_sets: [Pipeline.MAX_DESCRIPTOR_SETS]vk.VkDescriptorSet =
s.pipeline.descriptor_sets;
const reused = self.markPipelineUsed(s.pipeline.pipeline);
if (reused) {
// No step_pool means the renderer thread has no per-frame
// descriptor pool wired up (test harness, smoke test). We
// can't safely re-use this pipeline updating the static
// set in place would corrupt the prior draw's bindings.
// Drop the draw rather than corrupt the frame.
const pool = self.step_pool orelse {
log.err(
"RenderPass.step: pipeline re-used but no step_pool " ++
"available; dropping draw to avoid corrupting prior draws",
.{},
);
return;
};
for (s.pipeline.descriptor_set_layouts, 0..) |maybe_dsl, i| {
if (i >= s.pipeline.set_count) break;
const dsl = maybe_dsl orelse continue;
if (pool.allocate(dsl)) |fresh| {
effective_sets[i] = fresh;
} else |err| {
// Pool exhausted. The previous behavior was to
// fall back to the pipeline's static set, but that
// re-introduces the exact corruption the step_pool
// mechanism exists to prevent. Drop the draw; the
// user sees one missed image rather than every
// image rendered with the last image's bindings.
log.err(
"RenderPass.step: per-call descriptor set " ++
"allocation for set {} failed ({}); dropping draw " ++
"(step_pool exhausted — increase STEP_POOL_MAX_SETS)",
.{ i, err },
);
return;
}
}
}
// ---- update descriptor sets ---------------------------------
//
// We do one vkUpdateDescriptorSets call per descriptor write to
// keep the code straightforward; the total writes per frame are
// tiny (1 UBO + a handful of storage buffers + a handful of
// samplers) so batching wouldn't move the needle.
// UBO (set 0). The OpenGL backend's image/overlay draws don't
// pass `uniforms` they expect the previously-bound UBO to
// persist. Fall back to `last_uniforms` when the Step doesn't
// supply one. Track the new one for later steps.
const ubo: ?vk.VkBuffer = s.uniforms orelse self.last_uniforms;
if (s.uniforms) |b| self.last_uniforms = b;
if (effective_sets[0] != null) if (ubo) |ubo_buffer| {
const buffer_info: vk.VkDescriptorBufferInfo = .{
.buffer = ubo_buffer,
.offset = 0,
.range = vk.VK_WHOLE_SIZE,
};
const write: vk.VkWriteDescriptorSet = .{
.sType = vk.VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
.pNext = null,
.dstSet = effective_sets[0],
.dstBinding = s.pipeline.uniforms_binding,
.dstArrayElement = 0,
.descriptorCount = 1,
.descriptorType = vk.VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
.pImageInfo = null,
.pBufferInfo = &buffer_info,
.pTexelBufferView = null,
};
dev.dispatch.updateDescriptorSets(dev.device, 1, &write, 0, null);
};
// Samplers (set 1)
if (effective_sets[1] != null) {
const slot_count = @max(s.textures.len, s.samplers.len);
for (0..slot_count) |slot| {
const tex_opt: ?Texture = if (slot < s.textures.len) s.textures[slot] else null;
const tex = tex_opt orelse continue;
const samp_opt: ?Sampler = if (slot < s.samplers.len) s.samplers[slot] else null;
const sampler_handle: vk.VkSampler = if (samp_opt) |samp|
samp.sampler
else if (s.pipeline.sampler != null)
s.pipeline.sampler
else
continue;
const image_info: vk.VkDescriptorImageInfo = .{
.sampler = sampler_handle,
.imageView = tex.view,
.imageLayout = vk.VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
};
const write: vk.VkWriteDescriptorSet = .{
.sType = vk.VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
.pNext = null,
.dstSet = effective_sets[1],
.dstBinding = @intCast(slot),
.dstArrayElement = 0,
.descriptorCount = 1,
.descriptorType = vk.VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
.pImageInfo = &image_info,
.pBufferInfo = null,
.pTexelBufferView = null,
};
dev.dispatch.updateDescriptorSets(dev.device, 1, &write, 0, null);
}
}
// Storage buffers (set 2). `buffers[0]` is reserved for the
// vertex buffer (handled above), so storage starts at slot 1.
if (effective_sets[2] != null and s.buffers.len > 1) {
for (s.buffers[1..], 1..) |maybe_buf, slot| {
const buf = maybe_buf orelse continue;
const buffer_info: vk.VkDescriptorBufferInfo = .{
.buffer = buf,
.offset = 0,
.range = vk.VK_WHOLE_SIZE,
};
const write: vk.VkWriteDescriptorSet = .{
.sType = vk.VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
.pNext = null,
.dstSet = effective_sets[2],
.dstBinding = @intCast(slot),
.dstArrayElement = 0,
.descriptorCount = 1,
.descriptorType = vk.VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
.pImageInfo = null,
.pBufferInfo = &buffer_info,
.pTexelBufferView = null,
};
dev.dispatch.updateDescriptorSets(dev.device, 1, &write, 0, null);
}
}
// ---- bind descriptor sets -----------------------------------
//
// `cmdBindDescriptorSets` only accepts contiguous, non-null
// handles starting at `firstSet`. To handle the cell_bg case
// (sets 0 and 2, no set 1), we make one call per maximal
// contiguous run of non-null sets.
var start: usize = 0;
while (start < s.pipeline.set_count) {
if (effective_sets[start] == null) {
start += 1;
continue;
}
var end = start + 1;
while (end < s.pipeline.set_count and effective_sets[end] != null) : (end += 1) {}
dev.dispatch.cmdBindDescriptorSets(
self.cb,
vk.VK_PIPELINE_BIND_POINT_GRAPHICS,
s.pipeline.layout,
@intCast(start),
@intCast(end - start),
&effective_sets[start],
0,
null,
);
start = end;
}
dev.dispatch.cmdBindPipeline(
self.cb,
vk.VK_PIPELINE_BIND_POINT_GRAPHICS,
s.pipeline.pipeline,
);
dev.dispatch.cmdDraw(
self.cb,
@intCast(s.draw.vertex_count),
@intCast(s.draw.instance_count),
0,
0,
);
self.step_number += 1;
}
/// Mark `pipeline` as used in this pass and report whether it was
/// already seen. Returns `false` on the FIRST call (so `step` can
/// safely update the pipeline's static descriptor sets in place);
/// `true` on every subsequent call (so `step` allocates fresh sets
/// from `step_pool` to avoid clobbering the prior call's bindings).
///
/// Beyond `MAX_SEEN_PIPELINES` we conservatively report `true` so
/// callers always allocate fresh the alternative (silently
/// reverting to in-place updates) is the bug this whole mechanism
/// exists to prevent.
fn markPipelineUsed(self: *Self, pipeline: vk.VkPipeline) bool {
for (self.seen_pipelines[0..self.seen_pipelines_len]) |seen| {
if (seen == pipeline) return true;
}
if (self.seen_pipelines_len >= MAX_SEEN_PIPELINES) return true;
self.seen_pipelines[self.seen_pipelines_len] = pipeline;
self.seen_pipelines_len += 1;
return false;
}
/// Close the rendering scope and leave the attachment in a layout
/// the host can read back via the dmabuf export. `GENERAL` is the
/// safest choice for unknown consumer access patterns; the host
/// (Qt RHI) can transition again if it wants something more
/// specific.
pub fn complete(self: *const Self) void {
if (self.attachments.len == 0) return;
self.device.dispatch.cmdEndRendering(self.cb);
// Final layout depends on what consumes the attachment next.
// A `.texture` attachment is the custom-shader back_texture, read
// by the post pass's sampler transition to SHADER_READ_ONLY so
// the descriptor write's declared layout matches reality
// (otherwise validation flags VUID-vkCmdDraw-imageLayout-00344
// and some drivers can mishandle sampling from an out-of-spec
// layout). A `.target` attachment is the dmabuf-backed
// `frame.target`; the next op is
// `Target.recordPresentBarrier` which expects GENERAL on entry
// (it either stays in GENERAL in `.direct` mode or transitions to
// TRANSFER_SRC_OPTIMAL in `.legacy_copy`), so leave it in GENERAL here.
const image: vk.VkImage, const new_layout: vk.VkImageLayout, const dst_stage: vk.VkPipelineStageFlags, const dst_access: vk.VkAccessFlags =
switch (self.attachments[0].target) {
.texture => |t| .{
t.image,
vk.VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
vk.VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT,
vk.VK_ACCESS_SHADER_READ_BIT,
},
.target => |t| .{
t.image,
vk.VK_IMAGE_LAYOUT_GENERAL,
vk.VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
0,
},
};
const barrier: vk.VkImageMemoryBarrier = .{
.sType = vk.VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
.pNext = null,
.srcAccessMask = vk.VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,
.dstAccessMask = dst_access,
.oldLayout = vk.VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
.newLayout = new_layout,
.srcQueueFamilyIndex = vk.VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = vk.VK_QUEUE_FAMILY_IGNORED,
.image = image,
.subresourceRange = .{
.aspectMask = vk.VK_IMAGE_ASPECT_COLOR_BIT,
.baseMipLevel = 0,
.levelCount = 1,
.baseArrayLayer = 0,
.layerCount = 1,
},
};
self.device.dispatch.cmdPipelineBarrier(
self.cb,
vk.VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
dst_stage,
0,
0,
null,
0,
null,
1,
&barrier,
);
}
test {
std.testing.refAllDecls(@This());
}

View File

@ -0,0 +1,914 @@
//! Render target: a `VkImage` whose memory is exported as a dmabuf
//! fd so the host (Qt) can present it via
//! `ghostty_platform_vulkan_s.present` without a CPU readback round
//! trip through libghostty.
//!
//! Two construction modes, picked at `init` time after probing
//! `VK_EXT_image_drm_format_modifier`:
//!
//! - `.direct` the render image itself is allocated with
//! `VkImageDrmFormatModifierExplicitCreateInfoEXT`
//! (`DRM_FORMAT_MOD_LINEAR`, single plane). Its `VkDeviceMemory`
//! is what we `vkGetMemoryFdKHR` and hand to the host. No second
//! allocation, no end-of-frame copy. Used when the driver
//! advertises `COLOR_ATTACHMENT_BIT | TRANSFER_SRC_BIT |
//! SAMPLED_BIT` for the LINEAR modifier in
//! `VkDrmFormatModifierPropertiesEXT.drmFormatModifierTilingFeatures`.
//!
//! - `.legacy_copy` fallback for drivers (notably NVIDIA at time
//! of writing) that don't expose `COLOR_ATTACHMENT_BIT` for
//! LINEAR via either the legacy `vkGetPhysicalDeviceFormatProperties`
//! query or the modifier-extension query. Allocates an OPTIMAL-
//! tiled render image plus a separate dmabuf-exported LINEAR
//! `VkBuffer`, and inserts a `vkCmdCopyImageToBuffer` at the end
//! of each frame. Behavior identical to the pre-modifier-path
//! code.
//!
//! Why two modes? NVIDIA's `linearTilingFeatures` for BGRA8 doesn't
//! include `COLOR_ATTACHMENT_BIT`, so a LINEAR `VkImage` silently
//! rasterizes nothing (confirmed via
//! `vkGetPhysicalDeviceFormatProperties`: linearTilingFeatures=0x1dc03
//! for `B8G8R8A8_UNORM`). The modifier-extension query is a separate
//! channel and *may* expose different feature bits per modifier so
//! we always probe. Where the probe says yes, we drop the redundant
//! buffer + copy; where it says no, we keep working.
//!
//! Ownership: libghostty owns the image, any buffer, all memory, and
//! the dmabuf fd for the lifetime of the `Target`. The fd is passed
//! to the host via `present` as a borrow; the host must `dup()` if
//! it needs to hold it past the call. `deinit` closes the fd and
//! frees all the memory.
//!
//! Counterpart: `src/renderer/opengl/Target.zig`.
const Self = @This();
const std = @import("std");
const vk = @import("vulkan").c;
const apprt = @import("../../apprt.zig");
const Device = @import("vulkan").Device;
const log = std.log.scoped(.vulkan);
/// DRM modifier sentinel for "linear, no tiling". Matches
/// `DRM_FORMAT_MOD_LINEAR` from `<drm/drm_fourcc.h>`. Hardcoded so we
/// don't pull in libdrm headers just for a single constant.
pub const DRM_FORMAT_MOD_LINEAR: u64 = 0;
/// Upper bound for the number of DRM format modifiers we ever expect
/// a driver to expose for a single format. Real-world drivers expose
/// well under 20 (mostly LINEAR + a handful of vendor tiled variants);
/// 64 gives us comfortable headroom with a ~1.5 KiB stack buffer and
/// avoids allocator threading through the per-surface init path.
const MAX_MODIFIERS: usize = 64;
/// Which dmabuf-export strategy this `Target` settled on. See the
/// module-level doc comment for the full rationale.
pub const Tiling = enum {
/// Render image's own memory is exported as the dmabuf. Single
/// plane, `DRM_FORMAT_MOD_LINEAR`. No separate buffer, no copy.
direct,
/// OPTIMAL render image + separate LINEAR `VkBuffer` dmabuf
/// target. End-of-frame `vkCmdCopyImageToBuffer`. Used when
/// neither tiling channel exposes `COLOR_ATTACHMENT_BIT` for
/// LINEAR.
legacy_copy,
};
pub const Options = struct {
device: *const Device,
format: vk.VkFormat,
width: u32,
height: u32,
/// Extra `VkImageUsageFlagBits` for the render image, beyond the
/// defaults (`COLOR_ATTACHMENT_BIT | SAMPLED_BIT |
/// TRANSFER_SRC_BIT`). Rarely needed.
extra_usage: vk.VkImageUsageFlags = 0,
/// Per-surface platform callbacks. The host's process-wide
/// VkDevice is shared across splits/tabs, but each surface gets
/// its own platform copy with the right `userdata`, so
/// `present()` reaches the right window and `pickModifier`
/// asks the right host (compositor and host can in principle
/// differ across surfaces, e.g. mixed-DPI multi-screen).
platform: apprt.embedded.Platform.Vulkan,
};
pub const Error = error{
VulkanFailed,
NoSuitableMemoryType,
UnsupportedFormat,
};
device: *const Device,
/// Per-surface platform see `Options.platform`.
platform: apprt.embedded.Platform.Vulkan,
/// Which present strategy this target uses. Decides whether
/// `recordPresentBarrier` emits a copy.
tiling: Tiling,
// ---- render image ---------------------------------------------------
// In `.direct` mode this image's memory is the dmabuf; in
// `.legacy_copy` mode it's internal OPTIMAL memory we copy out of.
image: vk.VkImage,
image_memory: vk.VkDeviceMemory,
view: vk.VkImageView,
// ---- dmabuf buffer (legacy mode only) -------------------------------
// `null` in `.direct` mode the image's memory is the dmabuf.
dmabuf_buffer: ?vk.VkBuffer,
dmabuf_memory: ?vk.VkDeviceMemory,
format: vk.VkFormat,
width: u32,
height: u32,
fd: i32,
drm_format: u32,
drm_modifier: u64,
stride: u32,
/// Current layout of the render image. Tracked so
/// `recordPresentBarrier` knows what oldLayout to use in its barrier.
/// The renderer transitions it elsewhere too (RenderPass).
layout: vk.VkImageLayout = vk.VK_IMAGE_LAYOUT_UNDEFINED,
pub fn init(opts: Options) Error!Self {
const dev = opts.device;
const drm_format = try vkFormatToDrmFourcc(opts.format);
const required_features: vk.VkFormatFeatureFlags =
@as(vk.VkFormatFeatureFlags, vk.VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT) |
vk.VK_FORMAT_FEATURE_TRANSFER_SRC_BIT |
vk.VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT;
const picked = try pickModifier(dev, opts.platform, opts.format, drm_format, required_features);
if (picked) |m| {
const tag: []const u8 = if (m == DRM_FORMAT_MOD_LINEAR)
"LINEAR"
else
"vendor-tiled";
log.info(
"Target: direct dmabuf export ({s} modifier 0x{x}) {}x{}",
.{ tag, m, opts.width, opts.height },
);
return try initDirect(opts, drm_format, m);
}
log.warn(
"Target: no usable single-plane modifier with COLOR_ATTACHMENT " ++
"in compositor ∩ GPU intersection; falling back to " ++
"OPTIMAL render + LINEAR-buffer copy",
.{},
);
return try initLegacyCopy(opts, drm_format);
}
/// Intersect the compositor's accepted modifier list (from the host
/// callback) with the GPU's supported modifiers for `format` (queried
/// via `VK_EXT_image_drm_format_modifier`), filtered by single-plane
/// + the required format-feature flags. Prefer the first non-LINEAR
/// hit (vendor-tiled NVIDIA block-linear, AMD DCC variants, Intel
/// Y-tiled; these are where the perf win lives on most hardware).
/// Fall back to LINEAR if it's in the intersection. Return null when
/// no modifier qualifies the caller drops to `.legacy_copy`.
///
/// Why both intersections matter:
/// - GPU-only: passes on AMD/Intel for LINEAR but NVIDIA never
/// exposes COLOR_ATTACHMENT for LINEAR direct mode would
/// create the image OK but rasterize nothing.
/// - Compositor-only: GPU may not be able to render into the
/// compositor's preferred tilings (drivers don't always expose
/// COLOR_ATTACHMENT for every modifier).
fn pickModifier(
dev: *const Device,
platform: apprt.embedded.Platform.Vulkan,
format: vk.VkFormat,
drm_format: u32,
required_features: vk.VkFormatFeatureFlags,
) Error!?u64 {
// Compositor side: ask the host what it will accept on attach.
// Two-pass query (NULL out + capacity 0 returns count). Empty
// result means the compositor doesn't speak linux-dmabuf-v1 or
// doesn't advertise this format direct mode would still likely
// work for AMD/Intel LINEAR but the compositor attach would
// fail, so treat it as "no intersection."
var host_mods: [MAX_MODIFIERS]u64 = undefined;
const host_returned = platform.get_supported_modifiers(
platform.userdata,
drm_format,
&host_mods,
MAX_MODIFIERS,
);
// Clamp defensively. The C ABI contract is "host returns ≤ capacity",
// but we don't get to assume the host's implementation is correct
// and in safe builds an OOB read on `host_mods[..host_returned]`
// panics, hiding the real diagnostic.
const host_count: usize = @min(host_returned, MAX_MODIFIERS);
if (host_count == 0) {
log.warn(
"host advertises no dmabuf modifiers for format 0x{x}; " ++
"cannot use direct mode",
.{drm_format},
);
return null;
}
// GPU side: enumerate modifiers + their per-modifier feature bits.
var gpu_mods: [MAX_MODIFIERS]vk.VkDrmFormatModifierPropertiesEXT = undefined;
var mod_list: vk.VkDrmFormatModifierPropertiesListEXT = .{
.sType = vk.VK_STRUCTURE_TYPE_DRM_FORMAT_MODIFIER_PROPERTIES_LIST_EXT,
.pNext = null,
.drmFormatModifierCount = 0,
.pDrmFormatModifierProperties = null,
};
var props2: vk.VkFormatProperties2 = .{
.sType = vk.VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_2,
.pNext = &mod_list,
.formatProperties = std.mem.zeroes(vk.VkFormatProperties),
};
dev.dispatch.getPhysicalDeviceFormatProperties2(
dev.physical_device,
format,
&props2,
);
if (mod_list.drmFormatModifierCount == 0) return null;
if (mod_list.drmFormatModifierCount > MAX_MODIFIERS) {
log.warn(
"GPU modifier list truncated: driver reports {}, MAX_MODIFIERS={}",
.{ mod_list.drmFormatModifierCount, MAX_MODIFIERS },
);
mod_list.drmFormatModifierCount = MAX_MODIFIERS;
}
mod_list.pDrmFormatModifierProperties = &gpu_mods[0];
dev.dispatch.getPhysicalDeviceFormatProperties2(
dev.physical_device,
format,
&props2,
);
var has_linear: bool = false;
var best_tiled: ?u64 = null;
for (gpu_mods[0..mod_list.drmFormatModifierCount]) |gm| {
// Single-plane only: present callback ABI passes one fd /
// offset / stride. Multi-plane (AMD AFBC, some video
// formats) needs a wider ABI.
if (gm.drmFormatModifierPlaneCount != 1) continue;
if ((gm.drmFormatModifierTilingFeatures & required_features) != required_features) continue;
// Intersect with what the compositor accepts.
var compositor_ok = false;
for (host_mods[0..host_count]) |hm| {
if (hm == gm.drmFormatModifier) {
compositor_ok = true;
break;
}
}
if (!compositor_ok) continue;
if (gm.drmFormatModifier == DRM_FORMAT_MOD_LINEAR) {
has_linear = true;
} else if (best_tiled == null) {
best_tiled = gm.drmFormatModifier;
}
}
if (best_tiled) |m| return m;
if (has_linear) return DRM_FORMAT_MOD_LINEAR;
return null;
}
/// `.direct` mode: allocate the render image with
/// `VK_EXT_image_drm_format_modifier` so its own memory can be
/// exported as the dmabuf. Two create-info variants depending on
/// the chosen modifier:
/// - LINEAR: EXPLICIT layout (we know rowPitch = width*bpp).
/// Lets us populate `stride` deterministically without a
/// post-create driver query.
/// - non-LINEAR (vendor-tiled): LIST with a single-modifier list.
/// The driver picks the only option and computes its own
/// internal layout; we recover the chosen modifier via
/// `vkGetImageDrmFormatModifierPropertiesEXT` (sanity check
/// it should equal `chosen_mod`) and the per-plane layout via
/// `vkGetImageSubresourceLayout` for the right `stride` value.
fn initDirect(opts: Options, drm_format: u32, chosen_mod: u64) Error!Self {
const dev = opts.device;
const image_usage = @as(vk.VkImageUsageFlags, vk.VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) |
vk.VK_IMAGE_USAGE_SAMPLED_BIT |
vk.VK_IMAGE_USAGE_TRANSFER_SRC_BIT |
opts.extra_usage;
const bytes_per_pixel: u32 = 4;
const row_pitch: vk.VkDeviceSize = @as(vk.VkDeviceSize, opts.width) * bytes_per_pixel;
// ---- 1. Image: modifier-aware, externally-shareable -----------
const plane_layout: vk.VkSubresourceLayout = .{
.offset = 0,
.size = 0, // ignored for EXPLICIT create-info
.rowPitch = row_pitch,
.arrayPitch = 0,
.depthPitch = 0,
};
const explicit_create: vk.VkImageDrmFormatModifierExplicitCreateInfoEXT = .{
.sType = vk.VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_EXPLICIT_CREATE_INFO_EXT,
.pNext = null,
.drmFormatModifier = DRM_FORMAT_MOD_LINEAR,
.drmFormatModifierPlaneCount = 1,
.pPlaneLayouts = &plane_layout,
};
// Single-modifier list the driver "picks" the only option, but
// crucially computes its own opaque internal layout for the
// tiling, which we don't have to know.
const list_mod = chosen_mod;
const list_create: vk.VkImageDrmFormatModifierListCreateInfoEXT = .{
.sType = vk.VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_LIST_CREATE_INFO_EXT,
.pNext = null,
.drmFormatModifierCount = 1,
.pDrmFormatModifiers = &list_mod,
};
const mod_pnext: ?*const anyopaque = if (chosen_mod == DRM_FORMAT_MOD_LINEAR)
@ptrCast(&explicit_create)
else
@ptrCast(&list_create);
const ext_image_info: vk.VkExternalMemoryImageCreateInfo = .{
.sType = vk.VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO,
.pNext = mod_pnext,
.handleTypes = vk.VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT,
};
const image_info: vk.VkImageCreateInfo = .{
.sType = vk.VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
.pNext = &ext_image_info,
.flags = 0,
.imageType = vk.VK_IMAGE_TYPE_2D,
.format = opts.format,
.extent = .{ .width = opts.width, .height = opts.height, .depth = 1 },
.mipLevels = 1,
.arrayLayers = 1,
.samples = vk.VK_SAMPLE_COUNT_1_BIT,
.tiling = vk.VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT,
.usage = image_usage,
.sharingMode = vk.VK_SHARING_MODE_EXCLUSIVE,
.queueFamilyIndexCount = 0,
.pQueueFamilyIndices = null,
.initialLayout = vk.VK_IMAGE_LAYOUT_UNDEFINED,
};
var image: vk.VkImage = undefined;
if (dev.dispatch.createImage(dev.device, &image_info, null, &image) != vk.VK_SUCCESS) {
log.err("vkCreateImage (Target direct, mod=0x{x}) failed", .{chosen_mod});
return error.VulkanFailed;
}
errdefer dev.dispatch.destroyImage(dev.device, image, null);
// ---- 2. Image memory: exportable ---------------------------------
var image_reqs: vk.VkMemoryRequirements = undefined;
dev.dispatch.getImageMemoryRequirements(dev.device, image, &image_reqs);
// In direct mode the host doesn't mmap the dmabuf it imports it
// as a 2D image into the compositor (`image_backed=true` per
// `Target.present`). So DEVICE_LOCAL is the right choice: GPU-
// local memory is faster for the COLOR_ATTACHMENT_OUTPUT writes,
// and vendor-tiled modifiers often require it on drivers like
// NVIDIA (which won't expose HOST_VISIBLE memory types for the
// bits a tiled exportable image requires anyway).
const image_mem_idx = dev.findMemoryType(
image_reqs.memoryTypeBits,
vk.VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
) orelse {
log.err(
"no DEVICE_LOCAL memory type for direct dmabuf image " ++
"(mod=0x{x} typeBits=0x{x})",
.{ chosen_mod, image_reqs.memoryTypeBits },
);
return error.NoSuitableMemoryType;
};
const export_info: vk.VkExportMemoryAllocateInfo = .{
.sType = vk.VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO,
.pNext = null,
.handleTypes = vk.VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT,
};
const image_alloc: vk.VkMemoryAllocateInfo = .{
.sType = vk.VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
.pNext = &export_info,
.allocationSize = image_reqs.size,
.memoryTypeIndex = image_mem_idx,
};
var image_memory: vk.VkDeviceMemory = undefined;
if (dev.dispatch.allocateMemory(dev.device, &image_alloc, null, &image_memory) != vk.VK_SUCCESS) {
log.err("vkAllocateMemory (Target direct image) failed", .{});
return error.VulkanFailed;
}
errdefer dev.dispatch.freeMemory(dev.device, image_memory, null);
if (dev.dispatch.bindImageMemory(dev.device, image, image_memory, 0) != vk.VK_SUCCESS) {
log.err("vkBindImageMemory (Target direct image) failed", .{});
return error.VulkanFailed;
}
// ---- 3. View ---------------------------------------------------
const view = try createView(dev, image, opts.format);
errdefer dev.dispatch.destroyImageView(dev.device, view, null);
// ---- 4. Export memory as dmabuf fd -----------------------------
const fd = try exportDmabufFd(dev, image_memory);
errdefer std.posix.close(fd);
// ---- 5. Confirm the actual modifier + plane layout -------------
// For non-LINEAR we used LIST create-info (one entry), so the
// driver "picked" the only option. We query back via
// `vkGetImageDrmFormatModifierPropertiesEXT` as a sanity check
// and log a warning if the driver returned a different modifier
// that would indicate a driver bug or our list being ignored.
var actual_mod = chosen_mod;
if (chosen_mod != DRM_FORMAT_MOD_LINEAR) {
var mod_props: vk.VkImageDrmFormatModifierPropertiesEXT = .{
.sType = vk.VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_PROPERTIES_EXT,
.pNext = null,
.drmFormatModifier = 0,
};
if (dev.dispatch.getImageDrmFormatModifierPropertiesEXT(
dev.device,
image,
&mod_props,
) == vk.VK_SUCCESS) {
actual_mod = mod_props.drmFormatModifier;
if (actual_mod != chosen_mod) {
log.warn(
"driver chose modifier 0x{x}, we asked for 0x{x}",
.{ actual_mod, chosen_mod },
);
}
}
}
// Plane 0 layout: rowPitch is what we report as `stride` to the
// compositor. For LINEAR this is width*bpp (possibly padded).
// For vendor-tiled formats the value is implementation-specific
// the compositor's GPU knows how to interpret it given the
// modifier we report alongside.
var subres: vk.VkImageSubresource = .{
.aspectMask = vk.VK_IMAGE_ASPECT_MEMORY_PLANE_0_BIT_EXT,
.mipLevel = 0,
.arrayLayer = 0,
};
var layout: vk.VkSubresourceLayout = undefined;
dev.dispatch.getImageSubresourceLayout(dev.device, image, &subres, &layout);
return .{
.device = dev,
.platform = opts.platform,
.tiling = .direct,
.image = image,
.image_memory = image_memory,
.view = view,
.dmabuf_buffer = null,
.dmabuf_memory = null,
.format = opts.format,
.width = opts.width,
.height = opts.height,
.fd = fd,
.drm_format = drm_format,
.drm_modifier = actual_mod,
.stride = stride: {
// VkSubresourceLayout.rowPitch is u64 but the platform
// present callback accepts u32 stride. For a sanely-
// sized terminal target stride fits comfortably in u32,
// but vendor-tiled drivers at exotic resolutions could
// legitimately exceed it. Fail the init explicitly
// instead of letting `@intCast` panic in safe builds.
if (layout.rowPitch > std.math.maxInt(u32)) {
log.err(
"Target.initDirect: rowPitch {} > u32 max; refusing direct mode",
.{layout.rowPitch},
);
return error.UnsupportedFormat;
}
break :stride @intCast(layout.rowPitch);
},
};
}
/// `.legacy_copy` mode: OPTIMAL render image + separate LINEAR
/// dmabuf-exported `VkBuffer`. Behavior identical to the
/// pre-modifier-path code.
fn initLegacyCopy(opts: Options, drm_format: u32) Error!Self {
const dev = opts.device;
// BGRA8 4 bytes/pixel, packed (no per-row padding).
const bytes_per_pixel: u32 = 4;
const stride: u32 = opts.width * bytes_per_pixel;
const buffer_size: vk.VkDeviceSize = @as(vk.VkDeviceSize, stride) * opts.height;
// ---- 1. Render image: OPTIMAL tiling, internal memory ----------
const image_usage = @as(vk.VkImageUsageFlags, vk.VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) |
vk.VK_IMAGE_USAGE_SAMPLED_BIT |
vk.VK_IMAGE_USAGE_TRANSFER_SRC_BIT |
opts.extra_usage;
const image_info: vk.VkImageCreateInfo = .{
.sType = vk.VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
.pNext = null,
.flags = 0,
.imageType = vk.VK_IMAGE_TYPE_2D,
.format = opts.format,
.extent = .{ .width = opts.width, .height = opts.height, .depth = 1 },
.mipLevels = 1,
.arrayLayers = 1,
.samples = vk.VK_SAMPLE_COUNT_1_BIT,
.tiling = vk.VK_IMAGE_TILING_OPTIMAL,
.usage = image_usage,
.sharingMode = vk.VK_SHARING_MODE_EXCLUSIVE,
.queueFamilyIndexCount = 0,
.pQueueFamilyIndices = null,
.initialLayout = vk.VK_IMAGE_LAYOUT_UNDEFINED,
};
var image: vk.VkImage = undefined;
if (dev.dispatch.createImage(dev.device, &image_info, null, &image) != vk.VK_SUCCESS) {
log.err("vkCreateImage (Target legacy render) failed", .{});
return error.VulkanFailed;
}
errdefer dev.dispatch.destroyImage(dev.device, image, null);
var image_reqs: vk.VkMemoryRequirements = undefined;
dev.dispatch.getImageMemoryRequirements(dev.device, image, &image_reqs);
const image_mem_idx = dev.findMemoryType(
image_reqs.memoryTypeBits,
vk.VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
) orelse return error.NoSuitableMemoryType;
const image_alloc: vk.VkMemoryAllocateInfo = .{
.sType = vk.VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
.pNext = null,
.allocationSize = image_reqs.size,
.memoryTypeIndex = image_mem_idx,
};
var image_memory: vk.VkDeviceMemory = undefined;
if (dev.dispatch.allocateMemory(dev.device, &image_alloc, null, &image_memory) != vk.VK_SUCCESS) {
log.err("vkAllocateMemory (Target legacy render image) failed", .{});
return error.VulkanFailed;
}
errdefer dev.dispatch.freeMemory(dev.device, image_memory, null);
if (dev.dispatch.bindImageMemory(dev.device, image, image_memory, 0) != vk.VK_SUCCESS) {
log.err("vkBindImageMemory (Target legacy render image) failed", .{});
return error.VulkanFailed;
}
// ---- 2. View ---------------------------------------------------
const view = try createView(dev, image, opts.format);
errdefer dev.dispatch.destroyImageView(dev.device, view, null);
// ---- 3. Dmabuf buffer: LINEAR pixel data, external memory -----
const ext_buffer_info: vk.VkExternalMemoryBufferCreateInfo = .{
.sType = vk.VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_BUFFER_CREATE_INFO,
.pNext = null,
.handleTypes = vk.VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT,
};
const buffer_info: vk.VkBufferCreateInfo = .{
.sType = vk.VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
.pNext = &ext_buffer_info,
.flags = 0,
.size = buffer_size,
.usage = vk.VK_BUFFER_USAGE_TRANSFER_DST_BIT,
.sharingMode = vk.VK_SHARING_MODE_EXCLUSIVE,
.queueFamilyIndexCount = 0,
.pQueueFamilyIndices = null,
};
var dmabuf_buffer: vk.VkBuffer = undefined;
if (dev.dispatch.createBuffer(dev.device, &buffer_info, null, &dmabuf_buffer) != vk.VK_SUCCESS) {
log.err("vkCreateBuffer (Target dmabuf) failed", .{});
return error.VulkanFailed;
}
errdefer dev.dispatch.destroyBuffer(dev.device, dmabuf_buffer, null);
var buf_reqs: vk.VkMemoryRequirements = undefined;
dev.dispatch.getBufferMemoryRequirements(dev.device, dmabuf_buffer, &buf_reqs);
// Prefer HOST_CACHED so reads from the mmap'd dmabuf are fast.
// Without it (HOST_VISIBLE | HOST_COHERENT only), NVIDIA gives
// back write-combining memory: GPU writes are fast but HOST reads
// crawl (~10 MB/s) because the mapping is uncached. The Qt
// `presentVulkanDmabuf` `QImage::copy()` reads every pixel, so a
// small ~3 MB frame took ~260 ms there. HOST_COHERENT is still
// requested so we don't need explicit flushes between GPU writes
// and host reads; HOST_CACHED on top makes the host reads
// cacheable.
const host_flags_cached =
@as(vk.VkMemoryPropertyFlags, vk.VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) |
vk.VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
vk.VK_MEMORY_PROPERTY_HOST_CACHED_BIT;
const host_flags_uncached =
@as(vk.VkMemoryPropertyFlags, vk.VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) |
vk.VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
const dmabuf_mem_idx = dev.findMemoryType(buf_reqs.memoryTypeBits, host_flags_cached) orelse
dev.findMemoryType(buf_reqs.memoryTypeBits, host_flags_uncached) orelse
{
log.err(
"no HOST_VISIBLE memory type for dmabuf (typeBits=0x{x})",
.{buf_reqs.memoryTypeBits},
);
return error.NoSuitableMemoryType;
};
const export_info: vk.VkExportMemoryAllocateInfo = .{
.sType = vk.VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO,
.pNext = null,
.handleTypes = vk.VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT,
};
const buf_alloc: vk.VkMemoryAllocateInfo = .{
.sType = vk.VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
.pNext = &export_info,
.allocationSize = buf_reqs.size,
.memoryTypeIndex = dmabuf_mem_idx,
};
var dmabuf_memory: vk.VkDeviceMemory = undefined;
if (dev.dispatch.allocateMemory(dev.device, &buf_alloc, null, &dmabuf_memory) != vk.VK_SUCCESS) {
log.err("vkAllocateMemory (Target dmabuf) failed", .{});
return error.VulkanFailed;
}
errdefer dev.dispatch.freeMemory(dev.device, dmabuf_memory, null);
if (dev.dispatch.bindBufferMemory(dev.device, dmabuf_buffer, dmabuf_memory, 0) != vk.VK_SUCCESS) {
log.err("vkBindBufferMemory (Target dmabuf) failed", .{});
return error.VulkanFailed;
}
const fd = try exportDmabufFd(dev, dmabuf_memory);
errdefer std.posix.close(fd);
return .{
.device = dev,
.platform = opts.platform,
.tiling = .legacy_copy,
.image = image,
.image_memory = image_memory,
.view = view,
.dmabuf_buffer = dmabuf_buffer,
.dmabuf_memory = dmabuf_memory,
.format = opts.format,
.width = opts.width,
.height = opts.height,
.fd = fd,
.drm_format = drm_format,
.drm_modifier = DRM_FORMAT_MOD_LINEAR,
.stride = stride,
};
}
fn createView(
dev: *const Device,
image: vk.VkImage,
format: vk.VkFormat,
) Error!vk.VkImageView {
const view_info: vk.VkImageViewCreateInfo = .{
.sType = vk.VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
.pNext = null,
.flags = 0,
.image = image,
.viewType = vk.VK_IMAGE_VIEW_TYPE_2D,
.format = format,
.components = .{
.r = vk.VK_COMPONENT_SWIZZLE_IDENTITY,
.g = vk.VK_COMPONENT_SWIZZLE_IDENTITY,
.b = vk.VK_COMPONENT_SWIZZLE_IDENTITY,
.a = vk.VK_COMPONENT_SWIZZLE_IDENTITY,
},
.subresourceRange = .{
.aspectMask = vk.VK_IMAGE_ASPECT_COLOR_BIT,
.baseMipLevel = 0,
.levelCount = 1,
.baseArrayLayer = 0,
.layerCount = 1,
},
};
var view: vk.VkImageView = undefined;
if (dev.dispatch.createImageView(dev.device, &view_info, null, &view) != vk.VK_SUCCESS) {
log.err("vkCreateImageView (Target) failed", .{});
return error.VulkanFailed;
}
return view;
}
fn exportDmabufFd(dev: *const Device, memory: vk.VkDeviceMemory) Error!i32 {
const fd_info: vk.VkMemoryGetFdInfoKHR = .{
.sType = vk.VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR,
.pNext = null,
.memory = memory,
.handleType = vk.VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT,
};
var fd: c_int = -1;
if (dev.dispatch.getMemoryFdKHR(dev.device, &fd_info, &fd) != vk.VK_SUCCESS or fd < 0) {
log.err("vkGetMemoryFdKHR (Target) failed: fd={}", .{fd});
return error.VulkanFailed;
}
return fd;
}
pub fn deinit(self: *Self) void {
const dev = self.device;
if (self.fd >= 0) std.posix.close(self.fd);
if (self.dmabuf_buffer) |b| dev.dispatch.destroyBuffer(dev.device, b, null);
if (self.dmabuf_memory) |m| dev.dispatch.freeMemory(dev.device, m, null);
dev.dispatch.destroyImageView(dev.device, self.view, null);
dev.dispatch.destroyImage(dev.device, self.image, null);
dev.dispatch.freeMemory(dev.device, self.image_memory, null);
self.* = undefined;
}
/// Record the end-of-frame barrier(s) that make the rendered pixels
/// visible to the host's later mmap read. Dispatches on `self.tiling`:
///
/// - `.direct`: just an image layout/memory barrier the render
/// image's own memory is the dmabuf, so we transition
/// `GENERAL GENERAL` with `COLOR_ATTACHMENT_WRITE HOST_READ`
/// visibility (`COLOR_ATTACHMENT_OUTPUT HOST` stages). The
/// LINEAR-modifier image stays in GENERAL throughout it's both
/// the render target and the host-mapped surface.
///
/// - `.legacy_copy`: the original behavior transition the
/// render image to `TRANSFER_SRC_OPTIMAL`, `vkCmdCopyImageToBuffer`
/// into the dmabuf buffer, buffer-memory barrier for HOST_READ
/// visibility.
///
/// Call this AFTER all RenderPass work has been recorded but BEFORE
/// `vkEndCommandBuffer`.
pub fn recordPresentBarrier(self: *Self, cb: vk.VkCommandBuffer) void {
switch (self.tiling) {
.direct => self.recordDirectBarrier(cb),
.legacy_copy => self.recordCopyToDmabuf(cb),
}
}
fn recordDirectBarrier(self: *Self, cb: vk.VkCommandBuffer) void {
const dev = self.device;
// Image stays in GENERAL it's the render target AND the
// host-mapped surface. We only need a memory barrier so the host's
// mmap read sees the writes from the COLOR_ATTACHMENT_OUTPUT stage.
const img_barrier: vk.VkImageMemoryBarrier = .{
.sType = vk.VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
.pNext = null,
.srcAccessMask = vk.VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,
.dstAccessMask = vk.VK_ACCESS_HOST_READ_BIT,
.oldLayout = vk.VK_IMAGE_LAYOUT_GENERAL,
.newLayout = vk.VK_IMAGE_LAYOUT_GENERAL,
.srcQueueFamilyIndex = vk.VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = vk.VK_QUEUE_FAMILY_IGNORED,
.image = self.image,
.subresourceRange = .{
.aspectMask = vk.VK_IMAGE_ASPECT_COLOR_BIT,
.baseMipLevel = 0,
.levelCount = 1,
.baseArrayLayer = 0,
.layerCount = 1,
},
};
dev.dispatch.cmdPipelineBarrier(
cb,
vk.VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
vk.VK_PIPELINE_STAGE_HOST_BIT,
0,
0,
null,
0,
null,
1,
&img_barrier,
);
self.layout = vk.VK_IMAGE_LAYOUT_GENERAL;
}
fn recordCopyToDmabuf(self: *Self, cb: vk.VkCommandBuffer) void {
const dev = self.device;
// Image: GENERAL TRANSFER_SRC_OPTIMAL (the RenderPass leaves us
// in GENERAL on complete, but if it was UNDEFINED for some reason
// we still need a valid transition; UNDEFINED is also legal).
const img_barrier: vk.VkImageMemoryBarrier = .{
.sType = vk.VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
.pNext = null,
.srcAccessMask = vk.VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,
.dstAccessMask = vk.VK_ACCESS_TRANSFER_READ_BIT,
.oldLayout = vk.VK_IMAGE_LAYOUT_GENERAL,
.newLayout = vk.VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
.srcQueueFamilyIndex = vk.VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = vk.VK_QUEUE_FAMILY_IGNORED,
.image = self.image,
.subresourceRange = .{
.aspectMask = vk.VK_IMAGE_ASPECT_COLOR_BIT,
.baseMipLevel = 0,
.levelCount = 1,
.baseArrayLayer = 0,
.layerCount = 1,
},
};
dev.dispatch.cmdPipelineBarrier(
cb,
vk.VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
vk.VK_PIPELINE_STAGE_TRANSFER_BIT,
0,
0,
null,
0,
null,
1,
&img_barrier,
);
// Copy image buffer. BGRA8, packed (stride = width*4).
const region: vk.VkBufferImageCopy = .{
.bufferOffset = 0,
.bufferRowLength = 0, // 0 = tightly packed (uses imageExtent.width)
.bufferImageHeight = 0,
.imageSubresource = .{
.aspectMask = vk.VK_IMAGE_ASPECT_COLOR_BIT,
.mipLevel = 0,
.baseArrayLayer = 0,
.layerCount = 1,
},
.imageOffset = .{ .x = 0, .y = 0, .z = 0 },
.imageExtent = .{ .width = self.width, .height = self.height, .depth = 1 },
};
dev.dispatch.cmdCopyImageToBuffer(
cb,
self.image,
vk.VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
self.dmabuf_buffer.?,
1,
&region,
);
// Memory barrier so the host's later mmap read sees the bytes.
// HOST_READ_BIT is the destination access; HOST_BIT is the
// destination stage. (External fd consumers may need an explicit
// sync2 release barrier, but for an mmap-based read after a
// fence-wait this is sufficient on the GPU side.)
const buf_barrier: vk.VkBufferMemoryBarrier = .{
.sType = vk.VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
.pNext = null,
.srcAccessMask = vk.VK_ACCESS_TRANSFER_WRITE_BIT,
.dstAccessMask = vk.VK_ACCESS_HOST_READ_BIT,
.srcQueueFamilyIndex = vk.VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = vk.VK_QUEUE_FAMILY_IGNORED,
.buffer = self.dmabuf_buffer.?,
.offset = 0,
.size = vk.VK_WHOLE_SIZE,
};
dev.dispatch.cmdPipelineBarrier(
cb,
vk.VK_PIPELINE_STAGE_TRANSFER_BIT,
vk.VK_PIPELINE_STAGE_HOST_BIT,
0,
0,
null,
1,
&buf_barrier,
0,
null,
);
// Track the new image layout so the next frame's RenderPass.begin
// doesn't see stale state (it currently transitions from UNDEFINED
// unconditionally, but be defensive).
self.layout = vk.VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL;
}
pub fn present(self: *const Self) void {
// Per-surface platform its `userdata` points at THIS surface's
// GhosttySurface, so present reaches the right window.
const platform = self.platform;
// `image_backed` is the host's signal that this fd is importable
// by a 2D-image consumer (Wayland linux-dmabuf-v1, Vulkan
// external image, etc.). True in `.direct` mode where the fd was
// exported from a VkImage; false in `.legacy_copy` where it was
// exported from a VkBuffer and can only be read via mmap.
platform.present(
platform.userdata,
self.fd,
self.drm_format,
self.drm_modifier,
self.width,
self.height,
self.stride,
self.tiling == .direct,
);
}
fn vkFormatToDrmFourcc(format: vk.VkFormat) Error!u32 {
const fourcc = struct {
fn make(a: u8, b: u8, c: u8, d: u8) u32 {
return (@as(u32, a)) |
(@as(u32, b) << 8) |
(@as(u32, c) << 16) |
(@as(u32, d) << 24);
}
};
return switch (format) {
vk.VK_FORMAT_B8G8R8A8_UNORM,
vk.VK_FORMAT_B8G8R8A8_SRGB,
=> fourcc.make('A', 'R', '2', '4'),
vk.VK_FORMAT_R8G8B8A8_UNORM,
vk.VK_FORMAT_R8G8B8A8_SRGB,
=> fourcc.make('A', 'B', '2', '4'),
else => error.UnsupportedFormat,
};
}
test {
std.testing.refAllDecls(@This());
}

View File

@ -0,0 +1,430 @@
//! Wrapper for `VkImage` + `VkDeviceMemory` + `VkImageView` with a
//! staging-buffer upload path.
//!
//! Holds a 2D image, the backing device-local memory, and a view
//! configured for color sampling. All three handles are libghostty-
//! owned and destroyed in `deinit`.
//!
//! Uploads go through a temporary `Buffer(u8)` staging buffer
//! (`HOST_VISIBLE | HOST_COHERENT | TRANSFER_SRC`) and a per-call
//! `CommandPool` that drives the layout-transition
//! `vkCmdCopyBufferToImage` layout-transition sequence. Both
//! resources are destroyed by the time `replaceRegion` returns the
//! upload is synchronous from the caller's perspective. That's the
//! right tradeoff for atlas resizes (rare; the renderer can afford
//! the stall) but won't fit the eventual per-frame upload path,
//! which will reuse a long-lived `CommandPool` and fence-paced
//! submission.
//!
//! Layout tracking: a single `layout: VkImageLayout` field records
//! whether the image currently sits in `UNDEFINED` (fresh) or
//! `SHADER_READ_ONLY_OPTIMAL` (after at least one upload). The
//! barrier sequence in `replaceRegion` reads this field to pick the
//! right `srcAccessMask` / `srcStageMask`.
//!
//! Counterpart: `src/renderer/opengl/Texture.zig`.
const Self = @This();
const std = @import("std");
const vulkan = @import("vulkan");
const vk = vulkan.c;
const Device = vulkan.Device;
const CommandPool = vulkan.CommandPool;
const bufferpkg = @import("buffer.zig");
const log = std.log.scoped(.vulkan);
/// Pixel format hint matching `opengl/OpenGL.zig`'s `ImageTextureFormat`.
/// Used by `Vulkan.imageTextureOptions` to pick a `VkFormat` for kitty
/// graphics / background-image uploads. Lives here (next to `Texture`)
/// instead of in the renderer top-level so the rendering policy that
/// owns it (the SRGB-vs-UNORM choice for color channels) can be
/// inspected in one place.
pub const ImageTextureFormat = enum {
gray,
rgba,
bgra,
pub fn toVk(self: ImageTextureFormat, srgb: bool) vk.VkFormat {
return switch (self) {
// `gray` is a single-channel R8 (no color, no gamma).
.gray => vk.VK_FORMAT_R8_UNORM,
// Color channels honor `srgb`: when an image was
// authored in sRGB (the common case for kitty graphics),
// selecting the SRGB format lets the sampler auto-
// linearize on read so `texture()` returns linear values
// that the renderer's `unlinearize()` then re-encodes
// for the sRGB framebuffer. UNORM here would skip the
// sampler decode, leaving sRGB bytes for `unlinearize`
// to encode-again, which is then encoded a third time
// by the SRGB framebuffer visible as washed-out kitty
// graphics.
.rgba => if (srgb) vk.VK_FORMAT_R8G8B8A8_SRGB else vk.VK_FORMAT_R8G8B8A8_UNORM,
.bgra => if (srgb) vk.VK_FORMAT_B8G8R8A8_SRGB else vk.VK_FORMAT_B8G8R8A8_UNORM,
};
}
};
/// Texture construction parameters. Vulkan-native rather than mirroring
/// the OpenGL backend's separate `format` / `internal_format` Vulkan
/// encodes both into one `VkFormat`.
pub const Options = struct {
device: *const Device,
/// Pixel format. Common choices:
/// - `VK_FORMAT_R8G8B8A8_UNORM` color atlases, render target.
/// - `VK_FORMAT_R8G8B8A8_SRGB` sRGB color atlases.
/// - `VK_FORMAT_R8_UNORM` grayscale glyph atlas.
format: vk.VkFormat,
/// `VkImageUsageFlagBits` for the image. Typical:
/// - Atlas: `SAMPLED | TRANSFER_DST`
/// - Render target: `COLOR_ATTACHMENT | SAMPLED` (+ external
/// memory flags wired in by the export path)
/// `TRANSFER_DST_BIT` is forced on at create time so the upload
/// path always works callers don't have to remember.
usage: vk.VkImageUsageFlags,
/// Aspect mask for the image view. Defaults to color; depth images
/// would override.
aspect: vk.VkImageAspectFlags = vk.VK_IMAGE_ASPECT_COLOR_BIT,
};
pub const Error = error{
/// A `vkCreate*` or `vkAllocate*` returned a non-success status.
/// Logged with the raw `VkResult`.
VulkanFailed,
/// `findMemoryType` couldn't find a `DEVICE_LOCAL` memory type
/// matching the image's requirements. Effectively unrecoverable
/// typical Vulkan devices always expose at least one.
NoSuitableMemoryType,
};
image: vk.VkImage,
memory: vk.VkDeviceMemory,
view: vk.VkImageView,
format: vk.VkFormat,
/// Aspect mask the image was created with (e.g. COLOR_BIT for
/// renderable textures, DEPTH_BIT for depth attachments). Stored
/// so per-frame `replaceRegion` barrier/copy use the same aspect
/// the image view was made with hardcoding COLOR_BIT here was a
/// silent validation error for any non-color caller.
aspect: vk.VkImageAspectFlags,
width: usize,
height: usize,
device: *const Device,
/// Current image layout. Starts at `UNDEFINED`; `replaceRegion`
/// drives it to `SHADER_READ_ONLY_OPTIMAL` on the first call and
/// keeps it there afterwards. Read by the barrier sequence in
/// `replaceRegion` to pick the right transition source.
layout: vk.VkImageLayout = vk.VK_IMAGE_LAYOUT_UNDEFINED,
/// Create a 2D texture. With non-null `data`, the image is uploaded
/// and ends in `SHADER_READ_ONLY_OPTIMAL`. With null `data`, the
/// image is left in `UNDEFINED` the caller transitions it later
/// (typically via `replaceRegion` or as a render target).
pub fn init(
opts: Options,
width: usize,
height: usize,
data: ?[]const u8,
) Error!Self {
const dev = opts.device;
// ---- 1. VkImage ---------------------------------------------
// Force TRANSFER_DST_BIT so `replaceRegion` always works without
// callers having to remember to set it.
const usage = opts.usage | @as(vk.VkImageUsageFlags, vk.VK_IMAGE_USAGE_TRANSFER_DST_BIT);
const image_info: vk.VkImageCreateInfo = .{
.sType = vk.VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
.pNext = null,
.flags = 0,
.imageType = vk.VK_IMAGE_TYPE_2D,
.format = opts.format,
.extent = .{
.width = @intCast(width),
.height = @intCast(height),
.depth = 1,
},
.mipLevels = 1,
.arrayLayers = 1,
.samples = vk.VK_SAMPLE_COUNT_1_BIT,
.tiling = vk.VK_IMAGE_TILING_OPTIMAL,
.usage = usage,
.sharingMode = vk.VK_SHARING_MODE_EXCLUSIVE,
.queueFamilyIndexCount = 0,
.pQueueFamilyIndices = null,
.initialLayout = vk.VK_IMAGE_LAYOUT_UNDEFINED,
};
var image: vk.VkImage = undefined;
{
const r = dev.dispatch.createImage(dev.device, &image_info, null, &image);
if (r != vk.VK_SUCCESS) {
log.err("vkCreateImage failed: result={}", .{r});
return error.VulkanFailed;
}
}
errdefer dev.dispatch.destroyImage(dev.device, image, null);
// ---- 2. VkDeviceMemory --------------------------------------
var reqs: vk.VkMemoryRequirements = undefined;
dev.dispatch.getImageMemoryRequirements(dev.device, image, &reqs);
const memory_type_index = dev.findMemoryType(
reqs.memoryTypeBits,
vk.VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
) orelse {
log.err(
"no DEVICE_LOCAL memory type found for image (typeBits=0x{x})",
.{reqs.memoryTypeBits},
);
return error.NoSuitableMemoryType;
};
const alloc_info: vk.VkMemoryAllocateInfo = .{
.sType = vk.VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
.pNext = null,
.allocationSize = reqs.size,
.memoryTypeIndex = memory_type_index,
};
var memory: vk.VkDeviceMemory = undefined;
{
const r = dev.dispatch.allocateMemory(dev.device, &alloc_info, null, &memory);
if (r != vk.VK_SUCCESS) {
log.err("vkAllocateMemory failed: result={}", .{r});
return error.VulkanFailed;
}
}
errdefer dev.dispatch.freeMemory(dev.device, memory, null);
{
const r = dev.dispatch.bindImageMemory(dev.device, image, memory, 0);
if (r != vk.VK_SUCCESS) {
log.err("vkBindImageMemory failed: result={}", .{r});
return error.VulkanFailed;
}
}
// ---- 3. VkImageView -----------------------------------------
const view_info: vk.VkImageViewCreateInfo = .{
.sType = vk.VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
.pNext = null,
.flags = 0,
.image = image,
.viewType = vk.VK_IMAGE_VIEW_TYPE_2D,
.format = opts.format,
.components = .{
.r = vk.VK_COMPONENT_SWIZZLE_IDENTITY,
.g = vk.VK_COMPONENT_SWIZZLE_IDENTITY,
.b = vk.VK_COMPONENT_SWIZZLE_IDENTITY,
.a = vk.VK_COMPONENT_SWIZZLE_IDENTITY,
},
.subresourceRange = .{
.aspectMask = opts.aspect,
.baseMipLevel = 0,
.levelCount = 1,
.baseArrayLayer = 0,
.layerCount = 1,
},
};
var view: vk.VkImageView = undefined;
{
const r = dev.dispatch.createImageView(dev.device, &view_info, null, &view);
if (r != vk.VK_SUCCESS) {
log.err("vkCreateImageView failed: result={}", .{r});
return error.VulkanFailed;
}
}
errdefer dev.dispatch.destroyImageView(dev.device, view, null);
var self: Self = .{
.image = image,
.memory = memory,
.view = view,
.format = opts.format,
.aspect = opts.aspect,
.width = width,
.height = height,
.device = dev,
};
if (data) |d| try self.replaceRegion(0, 0, width, height, d);
return self;
}
pub fn deinit(self: Self) void {
const dev = self.device;
dev.dispatch.destroyImageView(dev.device, self.view, null);
dev.dispatch.destroyImage(dev.device, self.image, null);
dev.dispatch.freeMemory(dev.device, self.memory, null);
}
/// Replace a region of the texture with the provided data. Performs:
/// 1. Allocate a host-coherent staging buffer holding `data`.
/// 2. One-shot command buffer:
/// a. Barrier: current layout TRANSFER_DST_OPTIMAL.
/// b. `vkCmdCopyBufferToImage`.
/// c. Barrier: TRANSFER_DST_OPTIMAL SHADER_READ_ONLY_OPTIMAL.
/// 3. Submit + `vkQueueWaitIdle`.
/// 4. Free staging buffer + command pool.
///
/// On success, `self.layout` is `SHADER_READ_ONLY_OPTIMAL`.
pub fn replaceRegion(
self: *Self,
x: usize,
y: usize,
width: usize,
height: usize,
data: []const u8,
) Error!void {
// Empty-data / zero-region call: full no-op (does NOT transition
// the image layout). Callers passing nothing-to-upload are
// saying just that; transitioning anyway would issue a one-shot
// command-buffer + queueWaitIdle for no reason and would surprise
// a caller relying on the texture's current layout being
// preserved. If a caller ever needs a layout-only transition,
// add a separate `transitionToShaderRead` API rather than
// overloading replaceRegion's empty-data path.
if (data.len == 0 or width == 0 or height == 0) return;
const dev = self.device;
// ---- staging buffer -----------------------------------------
var staging = try bufferpkg.Buffer(u8).initFill(.{
.device = dev,
.usage = vk.VK_BUFFER_USAGE_TRANSFER_SRC_BIT,
}, data);
// `destroyImmediate` instead of `deinit`: replaceRegion runs
// synchronously on the calling thread (typically the main /
// app-init thread, NOT the renderer thread), and
// `OneShot.endAndSubmit` below calls `vkQueueWaitIdle` so the
// staging buffer is provably done with the GPU before this
// defer fires. Routing it into `Vulkan.buffer_pool` from a
// non-renderer thread would leak it forever the pool's
// `cycle()` runs only on the renderer thread.
defer staging.destroyImmediate();
// ---- command pool (one-shot) --------------------------------
var pool = try CommandPool.init(dev);
defer pool.deinit();
const session = try pool.beginOneShot();
// ---- barrier: current TRANSFER_DST_OPTIMAL ----------------
const old_layout = self.layout;
const src_access: vk.VkAccessFlags = switch (old_layout) {
vk.VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL => vk.VK_ACCESS_SHADER_READ_BIT,
else => 0,
};
const src_stage: vk.VkPipelineStageFlags = switch (old_layout) {
vk.VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL => vk.VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT,
else => vk.VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
};
{
const barrier: vk.VkImageMemoryBarrier = .{
.sType = vk.VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
.pNext = null,
.srcAccessMask = src_access,
.dstAccessMask = vk.VK_ACCESS_TRANSFER_WRITE_BIT,
.oldLayout = old_layout,
.newLayout = vk.VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
.srcQueueFamilyIndex = vk.VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = vk.VK_QUEUE_FAMILY_IGNORED,
.image = self.image,
.subresourceRange = .{
.aspectMask = self.aspect,
.baseMipLevel = 0,
.levelCount = 1,
.baseArrayLayer = 0,
.layerCount = 1,
},
};
dev.dispatch.cmdPipelineBarrier(
session.cb,
src_stage,
vk.VK_PIPELINE_STAGE_TRANSFER_BIT,
0, // dependencyFlags
0,
null, // memory barriers
0,
null, // buffer memory barriers
1,
&barrier,
);
}
// ---- vkCmdCopyBufferToImage ---------------------------------
{
const region: vk.VkBufferImageCopy = .{
.bufferOffset = 0,
.bufferRowLength = 0, // tightly packed
.bufferImageHeight = 0,
.imageSubresource = .{
.aspectMask = self.aspect,
.mipLevel = 0,
.baseArrayLayer = 0,
.layerCount = 1,
},
.imageOffset = .{
.x = @intCast(x),
.y = @intCast(y),
.z = 0,
},
.imageExtent = .{
.width = @intCast(width),
.height = @intCast(height),
.depth = 1,
},
};
dev.dispatch.cmdCopyBufferToImage(
session.cb,
staging.buffer,
self.image,
vk.VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
1,
&region,
);
}
// ---- barrier: TRANSFER_DST SHADER_READ_ONLY ---------------
{
const barrier: vk.VkImageMemoryBarrier = .{
.sType = vk.VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
.pNext = null,
.srcAccessMask = vk.VK_ACCESS_TRANSFER_WRITE_BIT,
.dstAccessMask = vk.VK_ACCESS_SHADER_READ_BIT,
.oldLayout = vk.VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
.newLayout = vk.VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
.srcQueueFamilyIndex = vk.VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = vk.VK_QUEUE_FAMILY_IGNORED,
.image = self.image,
.subresourceRange = .{
.aspectMask = self.aspect,
.baseMipLevel = 0,
.levelCount = 1,
.baseArrayLayer = 0,
.layerCount = 1,
},
};
dev.dispatch.cmdPipelineBarrier(
session.cb,
vk.VK_PIPELINE_STAGE_TRANSFER_BIT,
vk.VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT,
0,
0,
null,
0,
null,
1,
&barrier,
);
}
try session.endAndSubmit();
self.layout = vk.VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
}
test {
std.testing.refAllDecls(@This());
}

View File

@ -0,0 +1,232 @@
//! Per-renderer-thread Vulkan state. Lifecycle:
//!
//! - first `Vulkan.beginFrame` on a thread `ensureInit(dev)`
//! lazily creates a `CommandPool`, a single command buffer
//! allocated from it, a fence (created signaled), and a
//! `DescriptorPool` sized for one frame's worst-case usage.
//! All four are reused across frames; only the descriptor
//! pool is reset every frame.
//! - `Vulkan.deinit` on a surface (one per renderer thread)
//! `cleanup(dev)` waits the per-thread fence, frees CB,
//! destroys pool + fence, drops the cached `last_target`
//! pointer, and drains the per-thread `buffer_pool` pending
//! list (which is bounded by the same fence we just waited).
//!
//! Why threadlocal? Splits/tabs share the host's process-wide
//! `VkDevice`, but each renderer thread submits independently and
//! its fence-paced single-frame-in-flight model needs its own
//! fence + command buffer to avoid stomping the previous frame's
//! still-in-flight work. Threadlocal also matches the lifetime of
//! the buffer-pool's per-thread `pending` list (both are bounded
//! by the same `Frame.complete` fence wait).
//!
//! `last_target` lives here too because it's logically per-thread:
//! `presentLastTarget` re-presents whatever the renderer thread
//! handed to `present` last, and pointing at another thread's
//! target would route a different surface's frames to this
//! thread's window.
const std = @import("std");
const vulkan = @import("vulkan");
const vk = vulkan.c;
const Device = vulkan.Device;
const CommandPool = vulkan.CommandPool;
const DescriptorPool = vulkan.DescriptorPool;
const Target = @import("Target.zig");
const buffer_pool = @import("buffer_pool.zig");
const log = std.log.scoped(.vulkan);
/// Caps for the per-frame `step_pool`. Sized for the worst pass
/// shape (kitty image with N placements + the post pipelines): one
/// set per (image_step × MAX_DESCRIPTOR_SETS) plus a handful of
/// the renderer's other pipelines stepped once each. 256 is generous
/// actual frames stabilize well under that. If a frame ever
/// exhausts the pool, `RenderPass.step` falls back to the pipeline's
/// static set with a warning logged.
pub const STEP_POOL_MAX_SETS: u32 = 256;
pub const STEP_POOL_UNIFORM_BUFFERS: u32 = 256;
pub const STEP_POOL_COMBINED_IMAGE_SAMPLERS: u32 = 256;
pub const STEP_POOL_STORAGE_BUFFERS: u32 = 256;
pub const Error = error{
/// `vkAllocateCommandBuffers` / `vkCreateFence` returned a
/// non-success status. Wrapped here so the lazy-init path in
/// `ensureInit` can surface a single error type to callers.
VulkanFailed,
/// `DescriptorPool.init` rejected the caps we passed it (e.g.
/// max_sets == 0). Surfaces here so callers' error set matches.
InvalidPoolConfig,
} || std.mem.Allocator.Error;
/// Most recently presented target, used by `presentLastTarget` when
/// the renderer decides nothing new needs drawing. Stored as a
/// POINTER (not a value copy) into the FrameState's `target` slot
/// so it follows the target through a resize: `frame.resize` calls
/// `target.deinit()` on the old Target and overwrites the slot with
/// a new one a value copy would now reference a closed fd and
/// freed VkImage/VkBuffer/VkDeviceMemory handles, and Qt's mmap on
/// the closed fd could read whatever a later open() recycled the fd
/// for. Following the pointer instead always re-presents the
/// currently-live target.
pub threadlocal var last_target: ?*Target = null;
/// Per-surface (per-thread) command pool used for the frame's
/// command buffer. Lazily created in `ensureInit` on the first call;
/// destroyed in `cleanup`.
pub threadlocal var frame_pool: ?CommandPool = null;
/// The single command buffer allocated from `frame_pool` and reused
/// across frames. `vkResetCommandBuffer` is called at the start of
/// each `beginFrameReset` to clear prior recording.
pub threadlocal var frame_cb: vk.VkCommandBuffer = null;
/// Fence signaled when each frame's submit completes. Caller waits
/// on it in `Frame.complete` before handing the target dmabuf to
/// the host.
pub threadlocal var frame_fence: vk.VkFence = null;
/// Per-thread descriptor pool used by `RenderPass.step` to allocate
/// fresh descriptor sets when the same pipeline is bound more than
/// once in a single pass (vkCmdDraw reads descriptors at submit
/// time, so re-using the pipeline's static set would silently
/// corrupt prior draws). Reset at the start of every
/// `beginFrameReset` so this frame's allocations don't pile on the
/// previous frame's; the per-pass usage is bounded by a small
/// constant see the `STEP_POOL_*` caps above.
pub threadlocal var step_pool: ?DescriptorPool = null;
/// Lazy per-thread resource init. The first call on a renderer
/// thread sets up the command pool + buffer + fence + descriptor
/// pool that get reused for every subsequent frame. Subsequent
/// calls are no-ops.
///
/// Failure-mode contract: on error the threadlocal state is rolled
/// back to its pre-call values so the next `ensureInit` retries
/// cleanly. Without rollback, a partial failure would leave e.g.
/// `frame_pool != null and frame_cb == null`, and the next call's
/// `if (frame_pool == null)` guard would skip re-init locking the
/// thread out of the renderer permanently.
pub fn ensureInit(dev: *const Device) Error!void {
if (frame_pool == null) {
// Stage everything into locals; only commit to threadlocals
// after every step succeeds. errdefers chain rollback.
var pool = try CommandPool.init(dev);
errdefer pool.deinit();
const alloc_info: vk.VkCommandBufferAllocateInfo = .{
.sType = vk.VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
.pNext = null,
.commandPool = pool.pool,
.level = vk.VK_COMMAND_BUFFER_LEVEL_PRIMARY,
.commandBufferCount = 1,
};
var cb: vk.VkCommandBuffer = null;
if (dev.dispatch.allocateCommandBuffers(dev.device, &alloc_info, &cb) != vk.VK_SUCCESS)
return error.VulkanFailed;
errdefer dev.dispatch.freeCommandBuffers(dev.device, pool.pool, 1, &cb);
const fence_info: vk.VkFenceCreateInfo = .{
.sType = vk.VK_STRUCTURE_TYPE_FENCE_CREATE_INFO,
.pNext = null,
// Created signaled so the very first `Frame.complete`
// doesn't try to reset an unsignaled fence.
.flags = vk.VK_FENCE_CREATE_SIGNALED_BIT,
};
var fence: vk.VkFence = null;
if (dev.dispatch.createFence(dev.device, &fence_info, null, &fence) != vk.VK_SUCCESS)
return error.VulkanFailed;
// No errdefer for fence past this point all three threadlocals
// are about to be set together, atomically from the caller's
// perspective, so any later error in this function is impossible.
// (`if (step_pool == null)` is a separate block.)
frame_pool = pool;
frame_cb = cb;
frame_fence = fence;
}
if (step_pool == null) {
// Independent of the frame_pool/cb/fence triple its own
// failure leaves those committed and only step_pool null,
// which the next ensureInit() call retries correctly.
step_pool = try DescriptorPool.init(.{
.device = dev,
.max_sets = STEP_POOL_MAX_SETS,
.uniform_buffers = STEP_POOL_UNIFORM_BUFFERS,
.combined_image_samplers = STEP_POOL_COMBINED_IMAGE_SAMPLERS,
.storage_buffers = STEP_POOL_STORAGE_BUFFERS,
});
}
}
/// Reset per-frame state at the start of `beginFrame`. Caller is
/// responsible for installing an `errdefer` re-signal of the fence
/// so a failure here doesn't hang the next `Vulkan.deinit` on
/// `waitForFences(UINT64_MAX)` see the comment in
/// `Vulkan.beginFrame` for the full rationale.
pub fn beginFrameReset(dev: *const Device) error{VulkanFailed}!void {
if (dev.dispatch.resetCommandBuffer(frame_cb, 0) != vk.VK_SUCCESS)
return error.VulkanFailed;
if (step_pool) |*p| {
if (dev.dispatch.resetDescriptorPool(dev.device, p.pool, 0) != vk.VK_SUCCESS)
return error.VulkanFailed;
}
if (dev.dispatch.resetFences(dev.device, 1, &frame_fence) != vk.VK_SUCCESS)
return error.VulkanFailed;
}
/// Tear down THIS thread's state. Called from `Vulkan.deinit` on
/// each surface. Waits the per-thread fence (covers any in-flight
/// submit), then destroys the fence, frees the command buffer,
/// destroys the pools, drains the per-thread `buffer_pool` pending
/// list (bounded by the same fence wait), and clears `last_target`.
///
/// Per-surface teardown only needs THIS surface's submissions to be
/// done block on this thread's frame fence (if it exists) instead
/// of `vkDeviceWaitIdle` on the shared device, which would stall
/// every other tab/split's in-flight GPU work just to close one.
/// The final-refcount path in `Vulkan.deinit` does the device-wide
/// waitIdle.
pub fn cleanup(dev: *const Device) void {
if (frame_fence != null) {
const wait_r = dev.dispatch.waitForFences(
dev.device,
1,
&frame_fence,
vk.VK_TRUE,
std.math.maxInt(u64),
);
if (wait_r != vk.VK_SUCCESS) {
log.warn(
"ThreadState.cleanup: vkWaitForFences returned {}, falling back to device-wide wait",
.{wait_r},
);
dev.waitIdle();
}
dev.dispatch.destroyFence(dev.device, frame_fence, null);
frame_fence = null;
}
if (frame_pool != null and frame_cb != null) {
dev.dispatch.freeCommandBuffers(dev.device, frame_pool.?.pool, 1, &frame_cb);
frame_cb = null;
}
if (frame_pool) |*p| {
p.deinit();
frame_pool = null;
}
if (step_pool) |*p| {
p.deinit();
step_pool = null;
}
// Drain THIS thread's pending buffer-pool entries. The
// frame-fence wait above proved the GPU is done with them,
// and we have to do this from THIS thread because the
// pending list is in this thread's threadlocal storage
// the final-refcount drainShared can't reach it.
buffer_pool.drainSelf(dev);
// `last_target` is a borrow into this thread's FrameState
// target slot. The SwapChain teardown destroys the target;
// we just drop our reference.
last_target = null;
}

View File

@ -0,0 +1,352 @@
//! Host-coherent `VkBuffer` wrapper, generic over element type.
//!
//! Mirrors `src/renderer/opengl/buffer.zig`: `Buffer(T)` returns a
//! struct that holds one buffer's worth of `T`s, with init / initFill
//! / sync / syncFromArrayLists semantics that match the OpenGL
//! contract.
//!
//! Storage strategy: `HOST_VISIBLE | HOST_COHERENT` memory.
//! - HOST_VISIBLE lets us `vkMapMemory` the buffer and write directly.
//! - HOST_COHERENT means the writes are visible to the GPU without a
//! `vkFlushMappedMemoryRanges` round-trip.
//! - This is the simplest "dynamic" buffer pattern in Vulkan. It does
//! pay a small cost over device-local + staging on discrete GPUs,
//! but the renderer's per-frame buffer payloads are KBs (cell
//! instances + uniforms), not bandwidth-bound. The OpenGL backend
//! uses `dynamic_draw` for the same buffers, which behaves
//! similarly on most drivers.
//!
//! Growth policy: matches the OpenGL backend `sync` doubles the
//! buffer when content outgrows it, with no shrink. The buffer is
//! recreated (destroy/create) on growth because Vulkan buffers are
//! immutable in size.
const std = @import("std");
const Allocator = std.mem.Allocator;
const vulkan = @import("vulkan");
const vk = vulkan.c;
const Device = vulkan.Device;
const log = std.log.scoped(.vulkan);
/// Buffer construction parameters. The OpenGL backend's `target` /
/// `usage` enums don't map to Vulkan `target` (vertex vs element
/// binding point) is replaced by descriptor binding at draw time, and
/// `usage` (static_draw / dynamic_draw / etc.) is implicit in our
/// host-coherent allocation strategy. What's left is the Vulkan
/// `VkBufferUsageFlags` bitmask, which the renderer's `api.*BufferOptions`
/// methods will return differently per buffer kind (VERTEX_BUFFER_BIT
/// for instance buffers, UNIFORM_BUFFER_BIT for uniforms, etc.).
pub const Options = struct {
device: *const Device,
/// `VkBufferUsageFlagBits` for the buffer.
usage: vk.VkBufferUsageFlags,
};
pub const Error = error{
/// A `vkCreate*` / `vkAllocateMemory` / `vkBindBufferMemory` /
/// `vkMapMemory` returned a non-success status.
VulkanFailed,
/// `Device.findMemoryType` couldn't find a `HOST_VISIBLE | HOST_COHERENT`
/// memory type matching the buffer's requirements. Unlikely on any
/// real driver but worth flagging distinctly.
NoSuitableMemoryType,
};
/// `Buffer(T)`: a `VkBuffer` + backing `VkDeviceMemory` typed to hold
/// some number of `T`s. Mirrors `opengl/buffer.zig`'s `Buffer(T)` so
/// the renderer's call sites don't need a per-backend branch.
pub fn Buffer(comptime T: type) type {
return struct {
const Self = @This();
/// Underlying `VkBuffer` handle.
buffer: vk.VkBuffer,
/// Backing memory. Host-coherent; mappable directly.
memory: vk.VkDeviceMemory,
/// Options this buffer was allocated with.
opts: Options,
/// Current capacity, in number of `T`s.
len: usize,
/// Initialize a buffer with capacity for `len` `T`s. Contents
/// are uninitialized; call `sync` to populate.
pub fn init(opts: Options, len: usize) Error!Self {
return try create(opts, len);
}
/// Initialize a buffer pre-filled with the provided data.
pub fn initFill(opts: Options, data: []const T) Error!Self {
var self = try create(opts, data.len);
errdefer self.deinit();
try self.write(0, data);
return self;
}
/// Hand the (VkBuffer, VkDeviceMemory) pair back to the
/// process-wide pool. The pool (see `Vulkan.buffer_pool`)
/// holds the entry until the current frame's fence has
/// signaled (the GPU is done with our recorded references)
/// and then makes it available to a future `Buffer.create`
/// call. Returning to the pool solves both:
/// - `renderer/image.zig:draw`'s `defer buf.deinit()` no
/// longer use-after-frees the in-flight buffer.
/// - It avoids the per-frame allocation thrash that
/// drove the driver to SIGSEGV on image-heavy frames.
///
/// MUST be called only from the renderer thread (the path
/// whose fence will eventually retire references to this
/// buffer in `Frame.complete`). One-shot uploads (atlas
/// staging buffers, etc.) that already block on
/// `vkQueueWaitIdle` post-submit must use
/// `destroyImmediate` instead they don't share the
/// renderer thread's fence cycle.
pub fn deinit(self: Self) void {
const dev = self.opts.device;
const bp = @import("../Vulkan.zig").buffer_pool;
const capacity_bytes: u64 = @as(u64, self.len) * @sizeOf(T);
bp.release(
dev,
self.buffer,
self.memory,
self.opts.usage,
capacity_bytes,
) catch |err| {
// OOM growing the pool. The buffer may still be
// referenced by an in-flight command buffer, so we
// wait the entire device idle before destroying
// expensive but correct.
log.warn(
"Buffer.deinit: pool release failed ({}); falling " ++
"back to vkDeviceWaitIdle + destroy",
.{err},
);
_ = dev.dispatch.deviceWaitIdle(dev.device);
dev.dispatch.destroyBuffer(dev.device, self.buffer, null);
dev.dispatch.freeMemory(dev.device, self.memory, null);
};
}
/// Destroy the buffer immediately, bypassing the recycle
/// pool. The caller MUST ensure no in-flight command buffer
/// references this buffer (e.g. by having waited on a fence
/// or `vkQueueWaitIdle` covering its submission).
///
/// Used by short-lived staging buffers like
/// `Texture.replaceRegion` whose lifetime is bounded by a
/// `OneShot.endAndSubmit` that already drains the queue;
/// stuffing those into the pool from a non-renderer thread
/// would leak them (the renderer thread's `cycle` runs the
/// pool, so an upload thread's pushes never get reused).
pub fn destroyImmediate(self: Self) void {
const dev = self.opts.device;
dev.dispatch.destroyBuffer(dev.device, self.buffer, null);
dev.dispatch.freeMemory(dev.device, self.memory, null);
}
/// Replace the buffer's contents. Grows (doubles) if needed
/// matches the OpenGL backend's behavior. Data shorter than
/// the current capacity leaves the trailing slots untouched.
pub fn sync(self: *Self, data: []const T) Error!void {
if (data.len > self.len) try self.grow(data.len * 2);
try self.write(0, data);
}
/// Like `sync` but pulls from multiple `ArrayList`s in
/// sequence; returns the total number of elements written.
pub fn syncFromArrayLists(
self: *Self,
lists: []const std.ArrayListUnmanaged(T),
) Error!usize {
var total: usize = 0;
for (lists) |list| total += list.items.len;
if (total > self.len) try self.grow(total * 2);
var off: usize = 0;
for (lists) |list| {
if (list.items.len == 0) continue;
try self.write(off, list.items);
off += list.items.len;
}
return total;
}
// ---- internals -------------------------------------------
fn create(opts: Options, len: usize) Error!Self {
const dev = opts.device;
// Vulkan requires `size > 0` for buffer creation. Round up
// a zero request to 1 so the buffer exists and can be
// grown later via `sync`. (OpenGL silently accepts size=0.)
//
// Compute byte size in u64 to avoid the usize multiply
// overflowing on 32-bit hosts (or, theoretically, on a
// 64-bit host with `len` near `maxInt(usize)/@sizeOf(T)`,
// though that's astronomical for any real renderer
// payload). `std.math.mul` returns `error.Overflow` on
// overflow; map that onto `error.VulkanFailed` since the
// request is unservicable Vulkan can't allocate a
// buffer that big regardless of why we computed it.
const len_u64: u64 = @intCast(len);
const byte_size_raw = std.math.mul(u64, len_u64, @sizeOf(T)) catch
return error.VulkanFailed;
const byte_size: u64 = @max(1, byte_size_raw);
// Reach into the buffer pool first a previous frame's
// released VkBuffer of matching usage+capacity is safe to
// reuse, no allocator round trip needed. Image-draw
// frames stabilize at ~hundreds of pool entries per
// (usage, size) bucket.
const bp = @import("../Vulkan.zig").buffer_pool;
if (bp.acquire(opts.usage, byte_size)) |entry| {
return .{
.buffer = entry.buffer,
.memory = entry.memory,
.opts = opts,
.len = @intCast(entry.capacity / @sizeOf(T)),
};
}
const info: vk.VkBufferCreateInfo = .{
.sType = vk.VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
.pNext = null,
.flags = 0,
.size = byte_size,
.usage = opts.usage,
.sharingMode = vk.VK_SHARING_MODE_EXCLUSIVE,
.queueFamilyIndexCount = 0,
.pQueueFamilyIndices = null,
};
var buffer: vk.VkBuffer = undefined;
{
const r = dev.dispatch.createBuffer(dev.device, &info, null, &buffer);
if (r != vk.VK_SUCCESS) {
log.err("vkCreateBuffer failed: result={}", .{r});
return error.VulkanFailed;
}
}
errdefer dev.dispatch.destroyBuffer(dev.device, buffer, null);
var reqs: vk.VkMemoryRequirements = undefined;
dev.dispatch.getBufferMemoryRequirements(dev.device, buffer, &reqs);
const type_index = dev.findMemoryType(
reqs.memoryTypeBits,
vk.VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
vk.VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
) orelse {
log.err(
"no HOST_VISIBLE|HOST_COHERENT memory type for buffer (typeBits=0x{x})",
.{reqs.memoryTypeBits},
);
return error.NoSuitableMemoryType;
};
const alloc_info: vk.VkMemoryAllocateInfo = .{
.sType = vk.VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
.pNext = null,
.allocationSize = reqs.size,
.memoryTypeIndex = type_index,
};
var memory: vk.VkDeviceMemory = undefined;
{
const r = dev.dispatch.allocateMemory(dev.device, &alloc_info, null, &memory);
if (r != vk.VK_SUCCESS) {
log.err("vkAllocateMemory (buffer) failed: result={}", .{r});
return error.VulkanFailed;
}
}
errdefer dev.dispatch.freeMemory(dev.device, memory, null);
{
const r = dev.dispatch.bindBufferMemory(dev.device, buffer, memory, 0);
if (r != vk.VK_SUCCESS) {
log.err("vkBindBufferMemory failed: result={}", .{r});
return error.VulkanFailed;
}
}
return .{
.buffer = buffer,
.memory = memory,
.opts = opts,
.len = len,
};
}
/// Grow the buffer to hold at least `new_len` Ts. Vulkan
/// buffers are immutable in size, so we allocate a fresh
/// one and then route the old one through the recycle pool
/// (it may still be referenced by the in-flight command
/// buffer destroying it directly would race the GPU same
/// as `deinit` would). Contents are discarded; callers
/// always `sync` immediately after `grow` returns.
///
/// Order is critical: `create` first, `release` second.
/// If we released the old buffer first and `create`
/// failed, `self.{buffer,memory}` would be left dangling
/// at freed handles, and the caller's eventual
/// `self.deinit()` would double-destroy via the pool.
fn grow(self: *Self, new_len: usize) Error!void {
const dev = self.opts.device;
const replacement = try create(self.opts, new_len);
// From here on `self.{buffer,memory}` are the OLD pair;
// release them. If `release` itself OOMs, we have to
// destroy directly (same fallback as `deinit`), but the
// new pair is already constructed and `self.* =
// replacement` will reach a healthy state regardless.
const bp = @import("../Vulkan.zig").buffer_pool;
const capacity_bytes: u64 = @as(u64, self.len) * @sizeOf(T);
bp.release(
dev,
self.buffer,
self.memory,
self.opts.usage,
capacity_bytes,
) catch {
_ = dev.dispatch.deviceWaitIdle(dev.device);
dev.dispatch.destroyBuffer(dev.device, self.buffer, null);
dev.dispatch.freeMemory(dev.device, self.memory, null);
};
self.* = replacement;
}
/// Copy `data` into the buffer starting at element offset
/// `elem_off`. Host-coherent memory means the GPU sees the
/// writes without an explicit flush.
fn write(self: *const Self, elem_off: usize, data: []const T) Error!void {
if (data.len == 0) return;
const dev = self.opts.device;
const byte_off: u64 = elem_off * @sizeOf(T);
const byte_size: u64 = data.len * @sizeOf(T);
var mapped: ?*anyopaque = null;
{
const r = dev.dispatch.mapMemory(
dev.device,
self.memory,
byte_off,
byte_size,
0,
&mapped,
);
if (r != vk.VK_SUCCESS) {
log.err("vkMapMemory failed: result={}", .{r});
return error.VulkanFailed;
}
}
defer dev.dispatch.unmapMemory(dev.device, self.memory);
const dst: [*]u8 = @ptrCast(mapped.?);
const src: [*]const u8 = @ptrCast(data.ptr);
@memcpy(dst[0..byte_size], src[0..byte_size]);
}
};
}
test {
// Exercise top-level decls of a representative instantiation so
// type errors in the generic body surface during compile-check.
std.testing.refAllDecls(Buffer(u32));
}

View File

@ -0,0 +1,189 @@
//! Process-wide pool of `(VkBuffer, VkDeviceMemory)` pairs recycled
//! across frames on the renderer thread. Solves two problems
//! together:
//!
//! 1. Lifetime: `vulkan/buffer.zig`'s `Buffer.deinit` is called
//! mid-frame (by `renderer/image.zig:draw`'s `defer buf.deinit()`)
//! while the command buffer that references the buffer hasn't
//! been submitted yet. Naive immediate destroy use-after-free.
//! 2. Allocation thrash: a frame with N kitty-image placements
//! would otherwise allocate N tiny VkBuffers + VkDeviceMemories
//! per frame, every frame. NVIDIA driver SIGSEGVs after a few
//! seconds of that.
//!
//! Multi-thread design: `pending` is THREADLOCAL (each renderer
//! thread accumulates the buffers IT released during the current
//! frame), while `ready` is process-wide and mutex-protected (any
//! thread can recycle from it). Splits/tabs run independent
//! renderer threads against the SAME shared VkDevice a single
//! shared `pending` list would let thread A's `Frame.complete`
//! retire buffers thread B released but whose fence hasn't
//! signaled yet, handing B's still-GPU-in-flight buffer back to a
//! new `acquire`. Per-thread pending bounds the visibility of
//! each entry to the thread that knows when its fence signals.
//!
//! Lifecycle:
//! - `release(dev, )` (renderer thread) pushes to THAT thread's
//! `pending`.
//! - `cycle(dev)` (renderer thread, after `vkWaitForFences` on
//! the SAME thread's per-frame fence) moves THAT thread's
//! `pending` shared `ready` under the mutex.
//! - `acquire()` (any thread) pops a matching entry from `ready`
//! under the mutex.
//!
//! Caller responsibilities:
//! - Only call `release` from the renderer thread whose fence
//! the frame's GPU work signals; calling from a thread that
//! never reaches its own `Frame.complete` would leak entries
//! (they sit in that thread's `pending` forever). For one-shot
//! uploads from a non-renderer thread (atlas staging), use
//! `Buffer.destroyImmediate` instead, which bypasses this
//! pool entirely.
const std = @import("std");
const vulkan = @import("vulkan");
const vk = vulkan.c;
const Device = vulkan.Device;
const log = std.log.scoped(.vulkan);
pub const Entry = struct {
buffer: vk.VkBuffer,
memory: vk.VkDeviceMemory,
usage: vk.VkBufferUsageFlags,
capacity: u64,
};
/// Guards the process-wide `ready` list. Per-thread `pending` is
/// threadlocal and never under this mutex.
var ready_mutex: std.Thread.Mutex = .{};
/// Per-thread pending list. Entries here were released by THIS
/// thread during the current frame and are bounded by the
/// fence THIS thread will wait on in `Frame.complete`. Moved
/// to the shared `ready` list by `cycle()` after that wait
/// returns.
threadlocal var pending: std.ArrayList(Entry) = .{};
/// Process-wide ready list. Entries here are provably retired
/// (the bounding fence has signaled) and any thread may
/// `acquire` them.
var ready: std.ArrayList(Entry) = .{};
/// Queue a buffer for recycling. The buffer cannot be reused
/// until the next fence-wait (handled by `cycle`); it sits in
/// THIS thread's `pending` until then. Bounded by THIS thread's
/// per-frame fence see the per-thread pending rationale at
/// the top of this module.
pub fn release(
dev: *const Device,
buffer: vk.VkBuffer,
memory: vk.VkDeviceMemory,
usage: vk.VkBufferUsageFlags,
capacity: u64,
) !void {
_ = dev;
// No mutex: `pending` is threadlocal, only THIS thread
// touches it.
try pending.append(std.heap.smp_allocator, .{
.buffer = buffer,
.memory = memory,
.usage = usage,
.capacity = capacity,
});
}
/// Pop a `ready` entry whose usage matches and whose capacity is
/// >= the requested size. Linear scan pools tend to have a
/// small number of distinct (usage, size) shapes (image: 48B
/// VERTEX, bg_image: 8B VERTEX) so this stays cheap.
pub fn acquire(
usage: vk.VkBufferUsageFlags,
min_capacity: u64,
) ?Entry {
ready_mutex.lock();
defer ready_mutex.unlock();
var i: usize = 0;
while (i < ready.items.len) : (i += 1) {
const e = ready.items[i];
if (e.usage == usage and e.capacity >= min_capacity) {
_ = ready.swapRemove(i);
return e;
}
}
return null;
}
/// Move THIS thread's `pending` entries to the shared `ready`
/// THIS thread's fence has signaled, so the GPU is done with
/// every buffer in `pending`. Call from `Frame.complete` after
/// `vkWaitForFences`.
///
/// `dev` is needed only on the OOM fallback path: if `ready`
/// can't grow to absorb `pending`, we wait the device idle
/// (OUTSIDE the mutex see below) and then destroy the pending
/// entries directly so the next frame doesn't double up on a
/// pending list that can never drain.
pub fn cycle(dev: *const Device) void {
// Try the fast path first append THIS thread's `pending`
// to the shared `ready` under the lock, then clear pending.
// On OOM we have to destroy the pending entries, but
// `vkDeviceWaitIdle` is slow and holding the pool mutex
// across it would block every other renderer thread's
// release/acquire/cycle. Move the pending list into a
// local outside the lock, then drain.
var oom_pending: std.ArrayList(Entry) = .{};
defer oom_pending.deinit(std.heap.smp_allocator);
{
ready_mutex.lock();
defer ready_mutex.unlock();
if (ready.appendSlice(std.heap.smp_allocator, pending.items)) {
pending.clearRetainingCapacity();
return;
} else |_| {
// OOM. Move THIS thread's `pending` into our local
// so we can drain without holding the mutex.
oom_pending = pending;
pending = .{};
}
}
// Mutex released. Other threads can release/acquire/cycle
// while we wait the device idle and destroy our slice.
_ = dev.dispatch.deviceWaitIdle(dev.device);
for (oom_pending.items) |e| {
dev.dispatch.destroyBuffer(dev.device, e.buffer, null);
dev.dispatch.freeMemory(dev.device, e.memory, null);
}
}
/// Destroy THIS thread's `pending` entries directly. Call from
/// the same thread's `Vulkan.deinit` AFTER `vkWaitForFences`
/// on this thread's frame fence the bounding fence has
/// signaled so the GPU is provably done with these buffers.
///
/// Each renderer thread is responsible for cleaning up its own
/// pending list because Zig threadlocal storage is the calling
/// thread's; the final-refcount tear-down (`drainShared`) only
/// handles the process-wide `ready` list.
pub fn drainSelf(dev: *const Device) void {
for (pending.items) |e| {
dev.dispatch.destroyBuffer(dev.device, e.buffer, null);
dev.dispatch.freeMemory(dev.device, e.memory, null);
}
pending.clearRetainingCapacity();
}
/// Destroy every entry in the shared `ready` list. Call only
/// from the FINAL surface tear-down (the path that hits
/// `device_refcount == 0`) and only after every other renderer
/// thread has already run `drainSelf` on its own pending list.
pub fn drainShared(dev: *const Device) void {
ready_mutex.lock();
defer ready_mutex.unlock();
for (ready.items) |e| {
dev.dispatch.destroyBuffer(dev.device, e.buffer, null);
dev.dispatch.freeMemory(dev.device, e.memory, null);
}
ready.clearRetainingCapacity();
}

File diff suppressed because it is too large Load Diff

View File

@ -1051,10 +1051,26 @@ fn resizeCols(
break :wrapped wrapped;
};
// `c.y` is the cursor row from BEFORE this resize. When the
// call sequence is `resizeWithoutReflow(new_rows, old_cols)`
// `resizeCols(new_cols)` (the `.lt` arm above), `self.rows`
// has already been reduced to the new row count by the time
// we run, so a cursor strictly past the new bottom (`c.y >=
// self.rows`) would underflow `self.rows - c.y - 1`. Clamp
// to zero remaining rows in that case the cursor
// effectively sits on the last visible row after the
// shrink. Note: `c.y == self.rows - 1` (cursor AT the new
// bottom) does NOT underflow, but the `c.y + 1 >= self.rows`
// form still returns 0 there, matching the old
// `self.rows - c.y - 1 == 0` result.
const remaining_rows: usize = if (c.y + 1 >= self.rows)
0
else
self.rows - c.y - 1;
break :cursor .{
.tracked_pin = c.pin orelse try self.trackPin(p),
.untrack = c.pin == null,
.remaining_rows = self.rows - c.y - 1,
.remaining_rows = remaining_rows,
.wrapped_rows = wrapped,
};
} else null;

88
src/vulkan_spvgen.zig Normal file
View File

@ -0,0 +1,88 @@
//! Build-time tool: compiles one of `src/renderer/vulkan/shaders.zig`'s
//! `source.*` constants to SPIR-V and writes the bytes to stdout.
//!
//! Invoked by `src/build/VulkanSpv.zig` once per (shader_name, stage)
//! pair so the renderer can `@embedFile` the resulting .spv blobs
//! and call `Module.initFromSpirv` for built-ins instead of going
//! through `glslang.vk.compileToSpv` at runtime. The runtime path
//! is what populates glslang's per-thread `TPoolAllocator`, which
//! never releases its high-water-mark pages (Zig pthreads don't
//! run C++ thread_local destructors) heaptrack attributed ~10 MB
//! to that residual leak on the Vulkan variant, exactly the delta
//! over OpenGL (which never invokes glslang for its built-ins
//! because the GPU driver compiles GLSL natively).
//!
//! Usage:
//! vulkan_spvgen <shader_name> <stage>
//!
//! Where `shader_name` is one of the public decls of
//! `vulkan.shaders.source` (e.g. `bg_color_frag`, `cell_text_vert`)
//! and `stage` is `vertex` or `fragment`.
//!
//! On success: writes binary SPIR-V to stdout, exits 0.
//! On failure: writes a diagnostic to stderr, exits 1.
const std = @import("std");
const shaders = @import("renderer/vulkan/shaders.zig");
const glslang = @import("glslang");
pub fn main() !void {
var gpa: std.heap.GeneralPurposeAllocator(.{}) = .{};
defer _ = gpa.deinit();
const alloc = gpa.allocator();
const args = try std.process.argsAlloc(alloc);
defer std.process.argsFree(alloc, args);
if (args.len != 3) {
std.debug.print(
"usage: {s} <shader_name> <vertex|fragment>\n",
.{args[0]},
);
std.process.exit(1);
}
const name = args[1];
const stage = std.meta.stringToEnum(shaders.Stage, args[2]) orelse {
std.debug.print("invalid stage: {s}\n", .{args[2]});
std.process.exit(1);
};
try glslang.init();
defer glslang.finalize();
// Resolve the source by name. The runtime renderer accesses
// `shaders.source.bg_color_frag` etc. directly; we look up the
// matching decl by name at comptime so the build step can pass
// any of the 9 built-ins by string argv.
const src: [:0]const u8 = src: {
inline for (@typeInfo(shaders.source).@"struct".decls) |decl| {
if (std.mem.eql(u8, decl.name, name)) {
break :src @field(shaders.source, decl.name);
}
}
std.debug.print("unknown shader: {s}\n", .{name});
std.process.exit(1);
};
// Vulkan-flavor rewrite (gl_VertexID gl_VertexIndex, multi-set
// descriptor layout, etc.). Same path the runtime took before
// this precompile change.
const translated = try shaders.vulkanizeGlsl(alloc, src);
defer alloc.free(translated);
const spv = try glslang.vk.compileToSpv(
alloc,
translated,
stage.vkBindingStage(),
);
defer alloc.free(spv);
// Write the raw SPIR-V words (u32 little-endian on every host
// we build for; Vulkan loaders accept the in-memory byte order
// of the platform). The build step captures stdout into a .spv
// file the renderer @embedFiles at compile time.
var buf: [4096]u8 = undefined;
var stdout = std.fs.File.stdout().writerStreaming(&buf);
try stdout.interface.writeAll(std.mem.sliceAsBytes(spv));
try stdout.end();
}