Merge pull request #17 from fuddlesworth/qt-vulkan-renderer
Vulkan renderer + Qt apprt with Wayland subsurface dmabuf presenterpull/12846/head
commit
4d01762849
|
|
@ -74,6 +74,7 @@
|
|||
.macos = .{ .path = "./pkg/macos", .lazy = true },
|
||||
.oniguruma = .{ .path = "./pkg/oniguruma", .lazy = true },
|
||||
.opengl = .{ .path = "./pkg/opengl", .lazy = true },
|
||||
.vulkan = .{ .path = "./pkg/vulkan", .lazy = true },
|
||||
.sentry = .{ .path = "./pkg/sentry", .lazy = true },
|
||||
.simdutf = .{ .path = "./pkg/simdutf", .lazy = true },
|
||||
.wuffs = .{ .path = "./pkg/wuffs", .lazy = true },
|
||||
|
|
|
|||
|
|
@ -67,6 +67,11 @@ typedef enum {
|
|||
GHOSTTY_PLATFORM_MACOS,
|
||||
GHOSTTY_PLATFORM_IOS,
|
||||
GHOSTTY_PLATFORM_OPENGL,
|
||||
// Vulkan: fork-only platform tag. The host owns the
|
||||
// VkInstance/Device/Queue and hands them to libghostty via
|
||||
// `ghostty_platform_vulkan_s`. Frames come back to the host as
|
||||
// dmabuf fds for zero-copy compositing.
|
||||
GHOSTTY_PLATFORM_VULKAN,
|
||||
} ghostty_platform_e;
|
||||
|
||||
typedef enum {
|
||||
|
|
@ -481,10 +486,87 @@ typedef struct {
|
|||
void (*present)(void* userdata);
|
||||
} ghostty_platform_opengl_s;
|
||||
|
||||
// Vulkan host integration (fork-only). The host owns the
|
||||
// VkInstance / VkPhysicalDevice / VkDevice / VkQueue (same ownership
|
||||
// model as the OpenGL host); libghostty creates pipelines, command
|
||||
// pools, and images against that device. Frames are handed back to the
|
||||
// host as dmabuf file descriptors so a compositor-side toolkit (e.g.
|
||||
// Qt RHI via QRhiTexture) can sample them without a CPU readback.
|
||||
//
|
||||
// Handles are typed as void* here so consumers don't need the Vulkan
|
||||
// headers to compile the public C API; callers should treat them as
|
||||
// VkInstance, VkPhysicalDevice, VkDevice, VkQueue respectively.
|
||||
typedef struct {
|
||||
// Userdata passed as the first argument to every callback below.
|
||||
void* userdata;
|
||||
|
||||
// Return the address of vkGetInstanceProcAddr (as void*). libghostty
|
||||
// uses this as the loader entry point for every other Vulkan
|
||||
// function it needs.
|
||||
void* (*get_instance_proc_addr)(void* userdata, const char* name);
|
||||
|
||||
// Host-owned Vulkan handles. libghostty does not destroy these; they
|
||||
// remain owned by the host for the surface's lifetime.
|
||||
void* (*instance)(void* userdata); // VkInstance
|
||||
void* (*physical_device)(void* userdata); // VkPhysicalDevice
|
||||
void* (*device)(void* userdata); // VkDevice
|
||||
void* (*queue)(void* userdata); // VkQueue
|
||||
uint32_t (*queue_family_index)(void* userdata);
|
||||
|
||||
// Compositor-supported DRM modifiers for a given DRM_FORMAT_*
|
||||
// fourcc, as advertised by linux-dmabuf-v1's `modifier` events.
|
||||
// libghostty intersects this with what its physical device
|
||||
// supports for COLOR_ATTACHMENT to pick a tiling that the
|
||||
// compositor will actually accept on attach. Without this
|
||||
// intersection, drivers that don't expose COLOR_ATTACHMENT for
|
||||
// the LINEAR modifier (NVIDIA) can't use the direct-export path
|
||||
// and fall back to a CPU-readback path.
|
||||
//
|
||||
// Two-pass usage: call with `out=NULL, capacity=0` to query the
|
||||
// total count; allocate; call again to fill. Returns the number
|
||||
// of modifiers actually written (capped at `capacity`). May
|
||||
// return 0 if the format isn't compositor-supported or the host
|
||||
// doesn't speak linux-dmabuf-v1.
|
||||
size_t (*get_supported_modifiers)(
|
||||
void* userdata,
|
||||
uint32_t drm_format,
|
||||
uint64_t* out,
|
||||
size_t capacity);
|
||||
|
||||
// Hand off a rendered frame to the host as a dmabuf fd. The host
|
||||
// imports it (e.g. into Qt's RHI as a QRhiTexture, or attaches to
|
||||
// a wl_subsurface via linux-dmabuf-v1) and composites.
|
||||
//
|
||||
// `image_backed` is true when the dmabuf was exported from a
|
||||
// VkImage allocated with VK_EXT_image_drm_format_modifier — i.e.
|
||||
// it's directly importable as a 2D image by the compositor or any
|
||||
// GPU-side consumer. false when it was exported from a VkBuffer
|
||||
// (the legacy NVIDIA fallback path where the driver doesn't
|
||||
// advertise COLOR_ATTACHMENT for the LINEAR modifier on
|
||||
// exportable images, so libghostty renders into an OPTIMAL image
|
||||
// and copies the bytes into a linear VkBuffer for export). In the
|
||||
// !image_backed case the fd is only usable via mmap + CPU
|
||||
// readback — attempting a linux-dmabuf-v1 import will trigger an
|
||||
// `invalid_wl_buffer` protocol error.
|
||||
//
|
||||
// libghostty retains ownership of the underlying VkDeviceMemory;
|
||||
// the host must dup() the fd if it needs to hold it past the call.
|
||||
void (*present)(
|
||||
void* userdata,
|
||||
int dmabuf_fd,
|
||||
uint32_t drm_format,
|
||||
uint64_t drm_modifier,
|
||||
uint32_t width,
|
||||
uint32_t height,
|
||||
uint32_t stride,
|
||||
bool image_backed);
|
||||
} ghostty_platform_vulkan_s;
|
||||
|
||||
typedef union {
|
||||
ghostty_platform_macos_s macos;
|
||||
ghostty_platform_ios_s ios;
|
||||
ghostty_platform_opengl_s opengl;
|
||||
ghostty_platform_vulkan_s vulkan;
|
||||
} ghostty_platform_u;
|
||||
|
||||
typedef enum {
|
||||
|
|
|
|||
|
|
@ -165,5 +165,20 @@ fn buildGlslang(
|
|||
);
|
||||
}
|
||||
|
||||
// Ghastty Vulkan-friendly compile shim. Wraps glslang's C++ API
|
||||
// to expose features (auto-map bindings/locations, source/target
|
||||
// environment translation) that the upstream C API doesn't, so
|
||||
// the renderer can compile OpenGL-flavored GLSL — including
|
||||
// user-supplied custom shaders — to Vulkan-targeted SPIR-V.
|
||||
lib.addCSourceFiles(.{
|
||||
.root = b.path("override"),
|
||||
.flags = flags.items,
|
||||
.files = &.{"ghastty_vk_shim.cpp"},
|
||||
});
|
||||
lib.installHeader(
|
||||
b.path("override/ghastty_vk_shim.h"),
|
||||
"ghastty_vk_shim.h",
|
||||
);
|
||||
|
||||
return lib;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,4 +1,10 @@
|
|||
pub const c = @cImport({
|
||||
@cInclude("glslang/Include/glslang_c_interface.h");
|
||||
@cInclude("glslang/Public/resource_limits_c.h");
|
||||
// Ghastty-specific extension to glslang's C ABI: a Vulkan-
|
||||
// friendly compile entry point that wraps the C++ TShader API
|
||||
// (setAutoMapBindings / setAutoMapLocations / setEnvInput) the
|
||||
// upstream C interface doesn't expose. See
|
||||
// `pkg/glslang/override/ghastty_vk_shim.h`.
|
||||
@cInclude("ghastty_vk_shim.h");
|
||||
});
|
||||
|
|
|
|||
|
|
@ -4,6 +4,7 @@ const shader = @import("shader.zig");
|
|||
|
||||
pub const c = @import("c.zig").c;
|
||||
pub const testing = @import("test.zig");
|
||||
pub const vk = @import("vk.zig");
|
||||
|
||||
pub const init = initpkg.init;
|
||||
pub const finalize = initpkg.finalize;
|
||||
|
|
|
|||
|
|
@ -0,0 +1,282 @@
|
|||
// See `ghastty_vk_shim.h` for the contract.
|
||||
|
||||
#include "ghastty_vk_shim.h"
|
||||
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#include <mutex>
|
||||
#include <string>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
|
||||
#include <glslang/Include/PoolAlloc.h>
|
||||
#include <glslang/Public/ShaderLang.h>
|
||||
#include <glslang/Public/ResourceLimits.h>
|
||||
#include <SPIRV/GlslangToSpv.h>
|
||||
|
||||
// glslang's `InitializeProcess` / `FinalizeProcess` must bracket
|
||||
// any use of `glslang::TShader` / `glslang::TProgram`. The existing
|
||||
// C-API path in `pkg/glslang/init.zig` calls `glslang_initialize_process`
|
||||
// at startup, and per the glslang headers the C and C++ inits share
|
||||
// state, so we don't initialize again here — calling `InitializeProcess`
|
||||
// twice without a matching `FinalizeProcess` leaks reference counts.
|
||||
|
||||
namespace {
|
||||
|
||||
std::string drain_logs(glslang::TShader* shader, glslang::TProgram* program) {
|
||||
std::string s;
|
||||
if (shader != nullptr) {
|
||||
const char* info = shader->getInfoLog();
|
||||
const char* debug = shader->getInfoDebugLog();
|
||||
if (info != nullptr && info[0] != '\0') { s += info; s += "\n"; }
|
||||
if (debug != nullptr && debug[0] != '\0') { s += debug; s += "\n"; }
|
||||
}
|
||||
if (program != nullptr) {
|
||||
const char* info = program->getInfoLog();
|
||||
const char* debug = program->getInfoDebugLog();
|
||||
if (info != nullptr && info[0] != '\0') { s += info; s += "\n"; }
|
||||
if (debug != nullptr && debug[0] != '\0') { s += debug; s += "\n"; }
|
||||
}
|
||||
return s;
|
||||
}
|
||||
|
||||
char* dup_to_c(const std::string& s) {
|
||||
char* p = static_cast<char*>(std::malloc(s.size() + 1));
|
||||
if (p == nullptr) return nullptr;
|
||||
std::memcpy(p, s.data(), s.size());
|
||||
p[s.size()] = '\0';
|
||||
return p;
|
||||
}
|
||||
|
||||
// Process-wide SPIR-V cache keyed by (source, stage). The renderer
|
||||
// builds one Vulkan.Shaders per surface (per tab/split), which calls
|
||||
// `Module.init` → `compileToSpv` for all 9 built-in shaders + every
|
||||
// user custom shader. Each compile pulls memory from glslang's
|
||||
// thread-local TPoolAllocator, which is a raw pointer in glslang's
|
||||
// TLS that is NEVER released when a renderer thread exits (Zig
|
||||
// pthread spawn doesn't run C++ thread_local destructors and there
|
||||
// is no FinalizeThread hook). With N tabs, the leaked pool pages
|
||||
// add up to tens of MB — observed via heaptrack as the dominant
|
||||
// leak source (~17 MB across 15k+ allocations from
|
||||
// glslang::TPoolAllocator::allocate).
|
||||
//
|
||||
// Cache the resulting SPIR-V instead. The built-in shaders produce
|
||||
// byte-identical SPV regardless of which surface compiles them; the
|
||||
// custom shaders only change when the user edits their config. So
|
||||
// after the first surface, every other surface's compile is a
|
||||
// cache hit with zero glslang work and zero new pool pages.
|
||||
//
|
||||
// Key format: source bytes followed by a single byte stage tag
|
||||
// (0=vertex, 1=fragment). Disambiguates the rare case where two
|
||||
// stages share identical source text.
|
||||
std::mutex& spv_cache_mutex() {
|
||||
static std::mutex m;
|
||||
return m;
|
||||
}
|
||||
std::unordered_map<std::string, std::vector<uint32_t>>& spv_cache() {
|
||||
static std::unordered_map<std::string, std::vector<uint32_t>> c;
|
||||
return c;
|
||||
}
|
||||
|
||||
std::string make_cache_key(const char* source, ghastty_glslang_stage_t stage) {
|
||||
std::string key(source);
|
||||
key.push_back(static_cast<char>(stage));
|
||||
return key;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
extern "C" int ghastty_glslang_compile_vulkan(
|
||||
const char* source,
|
||||
ghastty_glslang_stage_t stage,
|
||||
uint32_t** spv_out,
|
||||
size_t* spv_len_out,
|
||||
char** err_out) {
|
||||
|
||||
// Reject any null out-pointer up-front. The previous code
|
||||
// dereferenced all three unconditionally on line 1 of the
|
||||
// function body — the in-tree Zig caller (`pkg/glslang/vk.zig`)
|
||||
// always passes valid pointers, but this is a C ABI export and
|
||||
// a future consumer that omits any out-arg would crash here
|
||||
// before any error message could be reported. Returning early
|
||||
// surfaces the precondition cleanly.
|
||||
if (spv_out == nullptr || spv_len_out == nullptr || err_out == nullptr) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
*spv_out = nullptr;
|
||||
*spv_len_out = 0;
|
||||
*err_out = nullptr;
|
||||
|
||||
if (source == nullptr) {
|
||||
*err_out = dup_to_c("source pointer is null");
|
||||
return 1;
|
||||
}
|
||||
|
||||
// Cache hit: copy SPV from the cache and return without ever
|
||||
// touching glslang. See the cache rationale comment above the
|
||||
// map for why this is critical for the multi-tab leak.
|
||||
const std::string key = make_cache_key(source, stage);
|
||||
{
|
||||
std::lock_guard<std::mutex> lg(spv_cache_mutex());
|
||||
auto it = spv_cache().find(key);
|
||||
if (it != spv_cache().end()) {
|
||||
const std::vector<uint32_t>& cached = it->second;
|
||||
const size_t bytes = cached.size() * sizeof(uint32_t);
|
||||
uint32_t* out = static_cast<uint32_t*>(std::malloc(bytes));
|
||||
if (out == nullptr) {
|
||||
*err_out = dup_to_c(
|
||||
"malloc failed for cached SPIR-V copy");
|
||||
return 1;
|
||||
}
|
||||
std::memcpy(out, cached.data(), bytes);
|
||||
*spv_out = out;
|
||||
*spv_len_out = cached.size();
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
EShLanguage lang;
|
||||
switch (stage) {
|
||||
case GHASTTY_GLSLANG_STAGE_VERTEX: lang = EShLangVertex; break;
|
||||
case GHASTTY_GLSLANG_STAGE_FRAGMENT: lang = EShLangFragment; break;
|
||||
default:
|
||||
*err_out = dup_to_c("unknown stage");
|
||||
return 1;
|
||||
}
|
||||
|
||||
glslang::TShader shader(lang);
|
||||
const char* sources[1] = { source };
|
||||
shader.setStrings(sources, 1);
|
||||
|
||||
// Source environment is OpenGL GLSL, target environment is Vulkan.
|
||||
// The cross-environment setup is what lets glslang translate
|
||||
// OpenGL-only builtins (`gl_VertexID`, `gl_InstanceID`, etc.) to
|
||||
// their Vulkan equivalents (`gl_VertexIndex`, `gl_InstanceIndex`)
|
||||
// during SPIR-V generation. Matches `glslangValidator -V` and
|
||||
// Qt's `QShaderBaker`.
|
||||
shader.setEnvInput(
|
||||
glslang::EShSourceGlsl,
|
||||
lang,
|
||||
glslang::EShClientVulkan,
|
||||
/*version*/ 100);
|
||||
shader.setEnvClient(
|
||||
glslang::EShClientVulkan,
|
||||
glslang::EShTargetVulkan_1_3);
|
||||
shader.setEnvTarget(
|
||||
glslang::EShTargetSpv,
|
||||
glslang::EShTargetSpv_1_6);
|
||||
|
||||
// Auto-map: assign descriptor bindings and shader I/O locations
|
||||
// for any `layout`-less declarations. Required for OpenGL GLSL
|
||||
// that doesn't bother with explicit locations (which Vulkan SPIR-V
|
||||
// requires).
|
||||
shader.setAutoMapBindings(true);
|
||||
shader.setAutoMapLocations(true);
|
||||
|
||||
const TBuiltInResource* resources = GetDefaultResources();
|
||||
const EShMessages messages = static_cast<EShMessages>(
|
||||
EShMsgDefault | EShMsgSpvRules | EShMsgVulkanRules);
|
||||
|
||||
if (!shader.parse(resources, /*default_version*/ 450,
|
||||
ECoreProfile, /*force_default*/ false,
|
||||
/*forward_compatible*/ true, messages)) {
|
||||
*err_out = dup_to_c(drain_logs(&shader, nullptr));
|
||||
return 1;
|
||||
}
|
||||
|
||||
glslang::TProgram program;
|
||||
program.addShader(&shader);
|
||||
if (!program.link(messages)) {
|
||||
*err_out = dup_to_c(drain_logs(&shader, &program));
|
||||
return 1;
|
||||
}
|
||||
// mapIO() is what actually applies the auto-bind / auto-map
|
||||
// resolution to the SPIR-V output. Without it the
|
||||
// `setAutoMap*(true)` calls above are no-ops.
|
||||
if (!program.mapIO()) {
|
||||
std::string s = "glslang TProgram::mapIO() failed:\n";
|
||||
s += drain_logs(&shader, &program);
|
||||
*err_out = dup_to_c(s);
|
||||
return 1;
|
||||
}
|
||||
|
||||
std::vector<unsigned int> spv;
|
||||
glslang::GlslangToSpv(*program.getIntermediate(lang), spv);
|
||||
if (spv.empty()) {
|
||||
*err_out = dup_to_c(
|
||||
"GlslangToSpv produced no SPIR-V output");
|
||||
return 1;
|
||||
}
|
||||
|
||||
const size_t bytes = spv.size() * sizeof(uint32_t);
|
||||
uint32_t* out = static_cast<uint32_t*>(std::malloc(bytes));
|
||||
if (out == nullptr) {
|
||||
*err_out = dup_to_c("malloc failed for SPIR-V output buffer");
|
||||
return 1;
|
||||
}
|
||||
std::memcpy(out, spv.data(), bytes);
|
||||
*spv_out = out;
|
||||
*spv_len_out = spv.size();
|
||||
|
||||
// Populate the cache with the freshly-compiled SPV. Stored by
|
||||
// value (std::move into the map); the SPV vector is the same
|
||||
// data we just memcpy'd to `out` so the caller's malloc'd copy
|
||||
// and the cache entry are independent. Future calls with this
|
||||
// (source, stage) skip glslang entirely.
|
||||
{
|
||||
std::lock_guard<std::mutex> lg(spv_cache_mutex());
|
||||
spv_cache().emplace(key, std::move(spv));
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
extern "C" void ghastty_glslang_free_spirv(uint32_t* spv) {
|
||||
std::free(spv);
|
||||
}
|
||||
|
||||
extern "C" void ghastty_glslang_free_error(char* err) {
|
||||
std::free(err);
|
||||
}
|
||||
|
||||
extern "C" void ghastty_glslang_finalize_process(void) {
|
||||
// Drop the cached SPV blobs first. The map owns the std::vector
|
||||
// pages it holds; clearing returns them to the heap. Done before
|
||||
// FinalizeProcess so a malicious post-finalize compile attempt
|
||||
// (which would re-enter glslang on a dead process state) trips
|
||||
// glslang's own checks rather than handing out stale cache hits.
|
||||
{
|
||||
std::lock_guard<std::mutex> lg(spv_cache_mutex());
|
||||
spv_cache().clear();
|
||||
}
|
||||
// Release glslang's process-wide shared state FIRST. This deletes
|
||||
// SharedSymbolTables[v][s][p][src][stage] entries that hold
|
||||
// pointers INTO the thread pool; we want their dtors to run
|
||||
// while the pool memory is still live.
|
||||
glslang::FinalizeProcess();
|
||||
|
||||
// Now destroy this thread's TPoolAllocator entirely. popAll()
|
||||
// alone is insufficient — it returns pages to glslang's
|
||||
// internal free list but never gives them back to the system
|
||||
// allocator (verified empirically: heaptrack total leaked
|
||||
// unchanged after popAll). The pool is `new`-allocated in
|
||||
// glslang::InitializeThreadPoolAllocator, so `delete` calls
|
||||
// ~TPoolAllocator which `free()`s every page.
|
||||
//
|
||||
// heaptrack pointed the ~12 MB glslang leak at
|
||||
// TPoolAllocator::allocate calls rooted in
|
||||
// shadertoy.spirvFromGlsl on the GUI thread (since
|
||||
// ghostty_surface_new runs glslang synchronously from
|
||||
// MainWindow::newTab) — that pool's pages persist until the
|
||||
// GUI thread exits, but a Qt app's GUI thread only exits at
|
||||
// process termination, after atexit. Manual delete here gives
|
||||
// the pages back before exit.
|
||||
//
|
||||
// Safe at atexit because every renderer thread has joined
|
||||
// via Vulkan.threadExit (their pools are independent
|
||||
// threadlocals already cleaned up), the SPV cache was just
|
||||
// cleared, and FinalizeProcess just ran.
|
||||
glslang::TPoolAllocator* pool = &glslang::GetThreadPoolAllocator();
|
||||
glslang::SetThreadPoolAllocator(nullptr);
|
||||
delete pool;
|
||||
}
|
||||
|
|
@ -0,0 +1,81 @@
|
|||
// Vulkan-targeted GLSL compilation that exposes glslang's
|
||||
// C++-only features (auto-map bindings/locations, source/target
|
||||
// environment translation for `gl_VertexID` → `gl_VertexIndex`)
|
||||
// through a C-compatible entry point.
|
||||
//
|
||||
// glslang's public C API (`glslang_c_interface.h`) doesn't expose
|
||||
// `setAutoMapBindings` / `setAutoMapLocations` / `setEnvInput` —
|
||||
// they only live on the C++ `glslang::TShader` class. The CLI
|
||||
// (`glslangValidator -V --auto-map-locations --auto-map-bindings`)
|
||||
// and Qt's `QShaderBaker` both call them internally; this shim is
|
||||
// the equivalent for libghostty.
|
||||
//
|
||||
// Used by `src/renderer/vulkan/shaders.zig` for both the renderer's
|
||||
// built-in shaders and user-supplied custom shaders. The same
|
||||
// function covers both because user-shader compilation happens at
|
||||
// runtime against `libghostty.so`, not as a build step.
|
||||
|
||||
#ifndef GHASTTY_VK_SHIM_H
|
||||
#define GHASTTY_VK_SHIM_H
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef enum {
|
||||
GHASTTY_GLSLANG_STAGE_VERTEX = 0,
|
||||
GHASTTY_GLSLANG_STAGE_FRAGMENT = 1,
|
||||
} ghastty_glslang_stage_t;
|
||||
|
||||
// Compile a null-terminated GLSL source to Vulkan-flavored SPIR-V.
|
||||
//
|
||||
// Preconditions: `spv_out`, `spv_len_out`, and `err_out` MUST all be
|
||||
// non-null. The function rejects any null out-pointer with rc=1
|
||||
// and no error string (since `err_out` is itself part of the
|
||||
// contract). `source` may be null; that produces a normal failure
|
||||
// with `*err_out` set.
|
||||
//
|
||||
// On success: returns 0. `*spv_out` points to a freshly allocated
|
||||
// array of `*spv_len_out` 32-bit SPIR-V words. Caller frees it
|
||||
// with `ghastty_glslang_free_spirv`. `*err_out` is NULL.
|
||||
//
|
||||
// On failure: returns non-zero. `*err_out` points to a freshly
|
||||
// allocated null-terminated error message (or NULL on out-arg
|
||||
// precondition violation OR on internal OOM). Caller frees it
|
||||
// with `ghastty_glslang_free_error`. `*spv_out` is NULL,
|
||||
// `*spv_len_out` is 0.
|
||||
int ghastty_glslang_compile_vulkan(
|
||||
const char* source,
|
||||
ghastty_glslang_stage_t stage,
|
||||
uint32_t** spv_out,
|
||||
size_t* spv_len_out,
|
||||
char** err_out);
|
||||
|
||||
void ghastty_glslang_free_spirv(uint32_t* spv);
|
||||
void ghastty_glslang_free_error(char* err);
|
||||
|
||||
// Release the process-wide glslang state: the per-thread
|
||||
// TPoolAllocator pages (the high-water-mark pool memory that
|
||||
// otherwise leaks for the process lifetime because Zig pthreads
|
||||
// don't run C++ thread_local destructors) AND the shim's
|
||||
// SPV cache.
|
||||
//
|
||||
// Idempotent. Call ONCE from the host's shutdown path AFTER all
|
||||
// renderer threads have joined — calling it while a renderer
|
||||
// thread might still touch glslang::TShader / TProgram is
|
||||
// undefined behavior per glslang's contract.
|
||||
//
|
||||
// libghostty's own renderer-thread teardown (Vulkan.threadExit)
|
||||
// is what serializes this safely: by the time the host's main()
|
||||
// returns from QApplication::exec(), every renderer thread has
|
||||
// already run threadExit and is joined.
|
||||
void ghastty_glslang_finalize_process(void);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* GHASTTY_VK_SHIM_H */
|
||||
|
|
@ -0,0 +1,90 @@
|
|||
//! Typed Zig wrapper around the Ghastty Vulkan-friendly glslang
|
||||
//! compile shim (`pkg/glslang/override/ghastty_vk_shim.h`). The shim
|
||||
//! itself is a small C entry point that wraps glslang's C++-only
|
||||
//! `setAutoMapBindings` / `setAutoMapLocations` / `setEnvInput` knobs
|
||||
//! the upstream C ABI doesn't expose.
|
||||
//!
|
||||
//! Callers use this instead of poking `glslang.c.ghastty_*` directly:
|
||||
//! the malloc/free dance for the shim's out-pointers is finicky
|
||||
//! (separate free entry points for SPIR-V and error strings, both
|
||||
//! optional, both have to be dropped on the right path) and was
|
||||
//! previously open-coded across two near-identical 25-line blocks
|
||||
//! in `src/renderer/vulkan/shaders.zig`. This module is the binding
|
||||
//! layer; the renderer just calls `compileToSpv` and gets a Zig
|
||||
//! `[]const u32` slice.
|
||||
|
||||
const std = @import("std");
|
||||
const Allocator = std.mem.Allocator;
|
||||
|
||||
const c = @import("c.zig").c;
|
||||
|
||||
const log = std.log.scoped(.glslang);
|
||||
|
||||
pub const Stage = enum {
|
||||
vertex,
|
||||
fragment,
|
||||
|
||||
fn cValue(self: Stage) c.ghastty_glslang_stage_t {
|
||||
return switch (self) {
|
||||
.vertex => c.GHASTTY_GLSLANG_STAGE_VERTEX,
|
||||
.fragment => c.GHASTTY_GLSLANG_STAGE_FRAGMENT,
|
||||
};
|
||||
}
|
||||
};
|
||||
|
||||
pub const Error = error{
|
||||
/// The compile-shim's underlying glslang C++ pipeline (TShader
|
||||
/// preprocess / parse + TProgram link + GlslangToSpv) failed.
|
||||
/// The shim's error message is logged via `std.log.err` before
|
||||
/// this error is returned — no allocation is propagated to the
|
||||
/// caller.
|
||||
GlslangFailed,
|
||||
} || Allocator.Error;
|
||||
|
||||
/// Compile a null-terminated GLSL source string to a Vulkan-flavored
|
||||
/// SPIR-V binary.
|
||||
///
|
||||
/// On success, returns a slice owned by `alloc`; the caller frees with
|
||||
/// `alloc.free(spv)`. The shim hands back its own malloc'd buffer
|
||||
/// which we copy into `alloc` so the caller's `defer alloc.free` works
|
||||
/// without remembering a separate `ghastty_glslang_free_spirv` call.
|
||||
///
|
||||
/// On failure, the shim's error string is logged with `std.log.err`
|
||||
/// and `error.GlslangFailed` is returned — the C-side malloc'd error
|
||||
/// buffer is freed before returning so callers don't have to.
|
||||
pub fn compileToSpv(
|
||||
alloc: Allocator,
|
||||
source: [:0]const u8,
|
||||
stage: Stage,
|
||||
) Error![]const u32 {
|
||||
var spv_ptr: [*c]u32 = undefined;
|
||||
var spv_len: usize = 0;
|
||||
var err_ptr: [*c]u8 = undefined;
|
||||
|
||||
const rc = c.ghastty_glslang_compile_vulkan(
|
||||
source.ptr,
|
||||
stage.cValue(),
|
||||
&spv_ptr,
|
||||
&spv_len,
|
||||
&err_ptr,
|
||||
);
|
||||
if (rc != 0) {
|
||||
if (err_ptr != null) {
|
||||
log.err("ghastty_glslang_compile_vulkan: rc={} {s}", .{
|
||||
rc,
|
||||
std.mem.span(@as([*:0]const u8, @ptrCast(err_ptr))),
|
||||
});
|
||||
c.ghastty_glslang_free_error(err_ptr);
|
||||
} else {
|
||||
log.err("ghastty_glslang_compile_vulkan: rc={} (no error string)", .{rc});
|
||||
}
|
||||
return error.GlslangFailed;
|
||||
}
|
||||
defer c.ghastty_glslang_free_spirv(spv_ptr);
|
||||
|
||||
// Copy out of the shim's malloc into `alloc` so the caller's
|
||||
// free path is symmetric with every other allocator-owned slice.
|
||||
const owned = try alloc.alloc(u32, spv_len);
|
||||
@memcpy(owned, spv_ptr[0..spv_len]);
|
||||
return owned;
|
||||
}
|
||||
|
|
@ -0,0 +1,195 @@
|
|||
//! Wrapper for `VkCommandPool` with a one-shot command-buffer helper.
|
||||
//!
|
||||
//! Initially used by `vulkan/Texture.zig` for staging-buffer uploads:
|
||||
//! allocate a transient command buffer, record an upload + layout
|
||||
//! barriers, submit, wait for completion, free.
|
||||
//!
|
||||
//! Eventually the renderer will grow a separate per-frame command
|
||||
//! pool for the main draw stream; this pool stays around for
|
||||
//! infrequent operations like atlas uploads where blocking the
|
||||
//! caller is fine. The choice keeps the API small and avoids the
|
||||
//! complication of multi-frame fence tracking for resources that
|
||||
//! will outlive the upload.
|
||||
|
||||
const Self = @This();
|
||||
|
||||
const std = @import("std");
|
||||
const vk = @import("c.zig").c;
|
||||
|
||||
const Device = @import("Device.zig");
|
||||
|
||||
const log = std.log.scoped(.vulkan);
|
||||
|
||||
pub const Error = error{
|
||||
/// A `vkCreateCommandPool` / `vkAllocateCommandBuffers` /
|
||||
/// `vkBeginCommandBuffer` / `vkEndCommandBuffer` / `vkQueueSubmit`
|
||||
/// returned a non-success status. Logged with the raw `VkResult`.
|
||||
VulkanFailed,
|
||||
};
|
||||
|
||||
device: *const Device,
|
||||
pool: vk.VkCommandPool,
|
||||
|
||||
/// Create a command pool on the device's graphics queue family. The
|
||||
/// pool is created with `TRANSIENT_BIT | RESET_COMMAND_BUFFER_BIT`
|
||||
/// because every command buffer we allocate here is short-lived and
|
||||
/// freed (or reset) immediately after submit.
|
||||
pub fn init(device: *const Device) Error!Self {
|
||||
const info: vk.VkCommandPoolCreateInfo = .{
|
||||
.sType = vk.VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
|
||||
.pNext = null,
|
||||
.flags = vk.VK_COMMAND_POOL_CREATE_TRANSIENT_BIT |
|
||||
vk.VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT,
|
||||
.queueFamilyIndex = device.queue_family_index,
|
||||
};
|
||||
var pool: vk.VkCommandPool = undefined;
|
||||
const r = device.dispatch.createCommandPool(device.device, &info, null, &pool);
|
||||
if (r != vk.VK_SUCCESS) {
|
||||
log.err("vkCreateCommandPool failed: result={}", .{r});
|
||||
return error.VulkanFailed;
|
||||
}
|
||||
return .{ .device = device, .pool = pool };
|
||||
}
|
||||
|
||||
pub fn deinit(self: *Self) void {
|
||||
self.device.dispatch.destroyCommandPool(self.device.device, self.pool, null);
|
||||
self.* = undefined;
|
||||
}
|
||||
|
||||
/// A one-shot recording session. Yielded from `beginOneShot`, drives
|
||||
/// `endAndSubmit` when the caller is done recording.
|
||||
pub const OneShot = struct {
|
||||
pool: *Self,
|
||||
cb: vk.VkCommandBuffer,
|
||||
|
||||
/// Record any commands directly via `cb` and the device dispatch
|
||||
/// table (e.g. `pool.device.dispatch.cmdPipelineBarrier(cb, …)`).
|
||||
/// Then call `endAndSubmit`. The command buffer is freed by the
|
||||
/// time this returns.
|
||||
pub fn endAndSubmit(self: OneShot) Error!void {
|
||||
const dev = self.pool.device;
|
||||
|
||||
// ALWAYS free the command buffer, success or failure.
|
||||
// Without this errdefer the early returns from end / submit /
|
||||
// waitIdle would leak the buffer slot — until the pool is
|
||||
// destroyed — and a caller that treats `error.VulkanFailed`
|
||||
// as recoverable (retries the upload) would eventually
|
||||
// exhaust the pool.
|
||||
//
|
||||
// Vulkan-correctness wrinkle: a buffer in PENDING state
|
||||
// (post-submit, pre-wait) cannot legally be freed — that's
|
||||
// UB per the spec. `submitted_pending` tracks whether we've
|
||||
// submitted; on the error path we then `deviceWaitIdle`
|
||||
// before freeing to drag the buffer back to a safely-freeable
|
||||
// state. The errdefer fires on error only; the success path
|
||||
// hits the explicit free below.
|
||||
var cb_local = self.cb;
|
||||
var submitted_pending: bool = false;
|
||||
errdefer {
|
||||
if (submitted_pending) {
|
||||
// Buffer may be in PENDING state. Drain to be safe
|
||||
// before freeing. deviceWaitIdle here is acceptable
|
||||
// — we're already on an error path for an atlas
|
||||
// upload, so blocking the device once on the way out
|
||||
// is preferable to leaving the buffer leaked OR to
|
||||
// freeing a PENDING buffer (UB).
|
||||
_ = dev.dispatch.deviceWaitIdle(dev.device);
|
||||
}
|
||||
dev.dispatch.freeCommandBuffers(dev.device, self.pool.pool, 1, &cb_local);
|
||||
}
|
||||
|
||||
{
|
||||
const r = dev.dispatch.endCommandBuffer(self.cb);
|
||||
if (r != vk.VK_SUCCESS) {
|
||||
log.err("vkEndCommandBuffer failed: result={}", .{r});
|
||||
return error.VulkanFailed;
|
||||
}
|
||||
}
|
||||
|
||||
const submit_info: vk.VkSubmitInfo = .{
|
||||
.sType = vk.VK_STRUCTURE_TYPE_SUBMIT_INFO,
|
||||
.pNext = null,
|
||||
.waitSemaphoreCount = 0,
|
||||
.pWaitSemaphores = null,
|
||||
.pWaitDstStageMask = null,
|
||||
.commandBufferCount = 1,
|
||||
.pCommandBuffers = &self.cb,
|
||||
.signalSemaphoreCount = 0,
|
||||
.pSignalSemaphores = null,
|
||||
};
|
||||
{
|
||||
// Externally-synchronized via `Device.queueSubmit` —
|
||||
// see the note there. Splits/tabs both submit here for
|
||||
// atlas uploads, and the per-frame Frame.complete path
|
||||
// also uses the same queue.
|
||||
const r = dev.queueSubmit(1, &submit_info, null);
|
||||
if (r != vk.VK_SUCCESS) {
|
||||
log.err("vkQueueSubmit failed: result={}", .{r});
|
||||
return error.VulkanFailed;
|
||||
}
|
||||
submitted_pending = true;
|
||||
}
|
||||
|
||||
// Block until the submit completes. Acceptable for one-shot
|
||||
// uploads (atlas resizes are rare and the caller is willing
|
||||
// to stall). Per-frame command submission will use fences
|
||||
// and never queueWaitIdle.
|
||||
{
|
||||
const r = dev.queueWaitIdle();
|
||||
if (r != vk.VK_SUCCESS) {
|
||||
log.err("vkQueueWaitIdle failed: result={}", .{r});
|
||||
return error.VulkanFailed;
|
||||
}
|
||||
submitted_pending = false;
|
||||
}
|
||||
|
||||
// Success path: free the buffer (the errdefer above only
|
||||
// fires on the error path, so we still need this on success).
|
||||
// The pool itself stays around so back-to-back uploads can
|
||||
// reuse it without re-allocating VkCommandPool.
|
||||
dev.dispatch.freeCommandBuffers(dev.device, self.pool.pool, 1, &cb_local);
|
||||
}
|
||||
};
|
||||
|
||||
/// Allocate + begin a transient command buffer for a one-shot
|
||||
/// upload. Pair with `OneShot.endAndSubmit`.
|
||||
pub fn beginOneShot(self: *Self) Error!OneShot {
|
||||
const dev = self.device;
|
||||
|
||||
const alloc_info: vk.VkCommandBufferAllocateInfo = .{
|
||||
.sType = vk.VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
|
||||
.pNext = null,
|
||||
.commandPool = self.pool,
|
||||
.level = vk.VK_COMMAND_BUFFER_LEVEL_PRIMARY,
|
||||
.commandBufferCount = 1,
|
||||
};
|
||||
var cb: vk.VkCommandBuffer = undefined;
|
||||
{
|
||||
const r = dev.dispatch.allocateCommandBuffers(dev.device, &alloc_info, &cb);
|
||||
if (r != vk.VK_SUCCESS) {
|
||||
log.err("vkAllocateCommandBuffers failed: result={}", .{r});
|
||||
return error.VulkanFailed;
|
||||
}
|
||||
}
|
||||
errdefer dev.dispatch.freeCommandBuffers(dev.device, self.pool, 1, &cb);
|
||||
|
||||
const begin_info: vk.VkCommandBufferBeginInfo = .{
|
||||
.sType = vk.VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
|
||||
.pNext = null,
|
||||
.flags = vk.VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
|
||||
.pInheritanceInfo = null,
|
||||
};
|
||||
{
|
||||
const r = dev.dispatch.beginCommandBuffer(cb, &begin_info);
|
||||
if (r != vk.VK_SUCCESS) {
|
||||
log.err("vkBeginCommandBuffer failed: result={}", .{r});
|
||||
return error.VulkanFailed;
|
||||
}
|
||||
}
|
||||
|
||||
return .{ .pool = self, .cb = cb };
|
||||
}
|
||||
|
||||
test {
|
||||
std.testing.refAllDecls(@This());
|
||||
}
|
||||
|
|
@ -0,0 +1,168 @@
|
|||
//! Wrapper for `VkDescriptorPool` with allocation + per-set helpers.
|
||||
//!
|
||||
//! Vulkan descriptor sets are the per-pipeline resource-binding
|
||||
//! handles: a descriptor set holds references to uniform buffers,
|
||||
//! sampled images, samplers, etc., that a particular shader stage
|
||||
//! draws from. They're allocated from a pool, populated via
|
||||
//! `vkUpdateDescriptorSets`, and bound at draw time with
|
||||
//! `vkCmdBindDescriptorSets`.
|
||||
//!
|
||||
//! Lifetime model: this wrapper assumes the pool outlives all sets
|
||||
//! allocated from it (caller arranges teardown order). Sets aren't
|
||||
//! individually freed — destroying the pool reclaims everything.
|
||||
//! That matches the per-frame pool pattern the renderer will use
|
||||
//! (reset the pool at frame start; reallocate the sets for that
|
||||
//! frame).
|
||||
//!
|
||||
//! Caps are caller-provided. Pass realistic numbers — over-pooling
|
||||
//! is fine; under-pooling fails at allocation time.
|
||||
|
||||
const Self = @This();
|
||||
|
||||
const std = @import("std");
|
||||
const vk = @import("c.zig").c;
|
||||
|
||||
const Device = @import("Device.zig");
|
||||
|
||||
const log = std.log.scoped(.vulkan);
|
||||
|
||||
pub const Error = error{
|
||||
/// `vkCreateDescriptorPool` / `vkAllocateDescriptorSets` returned
|
||||
/// a non-success status.
|
||||
VulkanFailed,
|
||||
/// Caller passed an invalid pool configuration (e.g. `max_sets ==
|
||||
/// 0`, or every per-type cap is zero). Distinct from
|
||||
/// `VulkanFailed` so callers can tell driver-side errors from
|
||||
/// caller-side ones.
|
||||
InvalidPoolConfig,
|
||||
};
|
||||
|
||||
/// Construction caps. `max_sets` is the total number of descriptor
|
||||
/// sets the pool can ever vend; the per-type counts are individual
|
||||
/// resource counts pooled across all those sets.
|
||||
pub const Options = struct {
|
||||
device: *const Device,
|
||||
max_sets: u32,
|
||||
uniform_buffers: u32 = 0,
|
||||
combined_image_samplers: u32 = 0,
|
||||
storage_buffers: u32 = 0,
|
||||
};
|
||||
|
||||
device: *const Device,
|
||||
pool: vk.VkDescriptorPool,
|
||||
|
||||
pub fn init(opts: Options) Error!Self {
|
||||
// Vulkan spec requires `maxSets > 0` and `poolSizeCount > 0` —
|
||||
// a pool that vends N sets but doesn't admit any descriptor
|
||||
// type would be useless and is rejected by some drivers
|
||||
// (loose drivers accept it and fail at allocation time). Catch
|
||||
// both shapes here so the caller gets a clear error instead of
|
||||
// a downstream allocation failure.
|
||||
if (opts.max_sets == 0) {
|
||||
log.err("DescriptorPool.init: max_sets must be > 0", .{});
|
||||
return error.InvalidPoolConfig;
|
||||
}
|
||||
if (opts.uniform_buffers == 0 and
|
||||
opts.combined_image_samplers == 0 and
|
||||
opts.storage_buffers == 0)
|
||||
{
|
||||
log.err(
|
||||
"DescriptorPool.init: at least one per-type cap must be > 0 " ++
|
||||
"(uniform_buffers, combined_image_samplers, storage_buffers)",
|
||||
.{},
|
||||
);
|
||||
return error.InvalidPoolConfig;
|
||||
}
|
||||
|
||||
// Build a small VkDescriptorPoolSize array from whichever caps
|
||||
// are non-zero. Vulkan accepts an array; we cap at 3 entries
|
||||
// matching the three types `Options` exposes.
|
||||
var sizes: [3]vk.VkDescriptorPoolSize = undefined;
|
||||
var n: u32 = 0;
|
||||
if (opts.uniform_buffers > 0) {
|
||||
sizes[n] = .{
|
||||
.type = vk.VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
|
||||
.descriptorCount = opts.uniform_buffers,
|
||||
};
|
||||
n += 1;
|
||||
}
|
||||
if (opts.combined_image_samplers > 0) {
|
||||
sizes[n] = .{
|
||||
.type = vk.VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
|
||||
.descriptorCount = opts.combined_image_samplers,
|
||||
};
|
||||
n += 1;
|
||||
}
|
||||
if (opts.storage_buffers > 0) {
|
||||
sizes[n] = .{
|
||||
.type = vk.VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
|
||||
.descriptorCount = opts.storage_buffers,
|
||||
};
|
||||
n += 1;
|
||||
}
|
||||
|
||||
const info: vk.VkDescriptorPoolCreateInfo = .{
|
||||
.sType = vk.VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
|
||||
.pNext = null,
|
||||
// No FREE_DESCRIPTOR_SET_BIT — we tear down by destroying
|
||||
// the pool (or `vkResetDescriptorPool` for the per-frame
|
||||
// step pool).
|
||||
.flags = 0,
|
||||
.maxSets = opts.max_sets,
|
||||
.poolSizeCount = n,
|
||||
.pPoolSizes = &sizes,
|
||||
};
|
||||
var pool: vk.VkDescriptorPool = undefined;
|
||||
const r = opts.device.dispatch.createDescriptorPool(
|
||||
opts.device.device,
|
||||
&info,
|
||||
null,
|
||||
&pool,
|
||||
);
|
||||
if (r != vk.VK_SUCCESS) {
|
||||
log.err("vkCreateDescriptorPool failed: result={}", .{r});
|
||||
return error.VulkanFailed;
|
||||
}
|
||||
return .{ .device = opts.device, .pool = pool };
|
||||
}
|
||||
|
||||
pub fn deinit(self: *Self) void {
|
||||
self.device.dispatch.destroyDescriptorPool(
|
||||
self.device.device,
|
||||
self.pool,
|
||||
null,
|
||||
);
|
||||
self.* = undefined;
|
||||
}
|
||||
|
||||
/// Allocate a single descriptor set against the provided layout.
|
||||
/// On success the set is uninitialized — populate it with
|
||||
/// `vkUpdateDescriptorSets` before binding.
|
||||
pub fn allocate(
|
||||
self: *Self,
|
||||
layout: vk.VkDescriptorSetLayout,
|
||||
) Error!vk.VkDescriptorSet {
|
||||
var layouts = [_]vk.VkDescriptorSetLayout{layout};
|
||||
const info: vk.VkDescriptorSetAllocateInfo = .{
|
||||
.sType = vk.VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
|
||||
.pNext = null,
|
||||
.descriptorPool = self.pool,
|
||||
.descriptorSetCount = 1,
|
||||
.pSetLayouts = &layouts,
|
||||
};
|
||||
var set: vk.VkDescriptorSet = undefined;
|
||||
const r = self.device.dispatch.allocateDescriptorSets(
|
||||
self.device.device,
|
||||
&info,
|
||||
&set,
|
||||
);
|
||||
if (r != vk.VK_SUCCESS) {
|
||||
log.err("vkAllocateDescriptorSets failed: result={}", .{r});
|
||||
return error.VulkanFailed;
|
||||
}
|
||||
return set;
|
||||
}
|
||||
|
||||
test {
|
||||
std.testing.refAllDecls(@This());
|
||||
}
|
||||
|
|
@ -0,0 +1,679 @@
|
|||
//! Host-provided Vulkan device wrapper.
|
||||
//!
|
||||
//! libghostty does NOT call `vkCreateInstance` / `vkCreateDevice` for
|
||||
//! the Vulkan renderer: per `ghostty_platform_vulkan_s` in
|
||||
//! `include/ghostty.h`, the host (the apprt embedding libghostty —
|
||||
//! e.g. the Qt frontend) owns the entire Vulkan setup. We consume
|
||||
//! its handles via the platform callbacks, validate the version /
|
||||
//! extensions we need, and build a function-pointer dispatch table
|
||||
//! the rest of the renderer can use.
|
||||
//!
|
||||
//! Why host-owned? The host already has a Vulkan instance/device for
|
||||
//! its own compositing (Qt's RHI). Asking the host to share its
|
||||
//! device means rendered frames can be handed back as raw `VkImage`
|
||||
//! handles or dmabuf fds without a CPU readback or a second Vulkan
|
||||
//! instance fighting for the same GPU resources.
|
||||
//!
|
||||
//! Vulkan version: 1.3 (Jan 2022). Promotes dynamic rendering,
|
||||
//! sync2, extended dynamic state — all of which simplify a
|
||||
//! dirty-rect-style terminal renderer. Driver coverage is fine on
|
||||
//! every distro currently in support.
|
||||
//!
|
||||
//! Required device extensions (must be enabled on the host's
|
||||
//! VkDevice; we verify each on init):
|
||||
//! - VK_KHR_external_memory_fd
|
||||
//! - VK_EXT_external_memory_dma_buf
|
||||
//! - VK_EXT_image_drm_format_modifier
|
||||
//!
|
||||
//! These are what let libghostty export the rendered VkImage memory
|
||||
//! as a dmabuf fd so the host can import it for zero-copy
|
||||
//! presentation (path 3 in the qt-vulkan-renderer scoping log:
|
||||
//! preserves Qt's QWidget composition model AND avoids the CPU
|
||||
//! readback the OpenGL path currently does).
|
||||
|
||||
const std = @import("std");
|
||||
const Allocator = std.mem.Allocator;
|
||||
|
||||
const vk = @import("c.zig").c;
|
||||
|
||||
const log = std.log.scoped(.vulkan);
|
||||
|
||||
const Device = @This();
|
||||
|
||||
/// Minimum Vulkan API version the renderer requires.
|
||||
pub const MIN_API_VERSION = vk.VK_API_VERSION_1_3;
|
||||
|
||||
/// Device extensions libghostty enables on top of the host's
|
||||
/// VkDevice setup. The host must have created its VkDevice with
|
||||
/// these enabled; we only verify availability here.
|
||||
///
|
||||
/// `VK_EXT_image_drm_format_modifier` is what lets
|
||||
/// `vulkan/Target.zig` probe the per-modifier feature set (in
|
||||
/// particular: does `DRM_FORMAT_MOD_LINEAR` advertise
|
||||
/// `COLOR_ATTACHMENT_BIT`?) and, when supported, allocate the render
|
||||
/// image with `VkImageDrmFormatModifierExplicitCreateInfoEXT` so its
|
||||
/// memory can be exported as a dmabuf directly — no separate LINEAR
|
||||
/// `VkBuffer` and no end-of-frame `vkCmdCopyImageToBuffer`. Drivers
|
||||
/// where the modifier path can't satisfy the requested features fall
|
||||
/// back to the legacy OPTIMAL-plus-copy path inside `Target`.
|
||||
pub const REQUIRED_DEVICE_EXTENSIONS = [_][:0]const u8{
|
||||
"VK_KHR_external_memory_fd",
|
||||
"VK_EXT_external_memory_dma_buf",
|
||||
"VK_EXT_image_drm_format_modifier",
|
||||
};
|
||||
|
||||
/// Errors that can come out of `init`.
|
||||
pub const Error = error{
|
||||
/// The host returned a null handle for `instance` / `device` /
|
||||
/// `queue` / `physical_device`, or `get_instance_proc_addr`
|
||||
/// failed to resolve a core Vulkan function we need to bootstrap.
|
||||
HostHandleMissing,
|
||||
|
||||
/// The host's VkPhysicalDevice doesn't report a Vulkan API version
|
||||
/// >= MIN_API_VERSION. Detected via `vkGetPhysicalDeviceProperties`.
|
||||
UnsupportedVulkanVersion,
|
||||
|
||||
/// At least one entry in `REQUIRED_DEVICE_EXTENSIONS` was not
|
||||
/// listed in `vkEnumerateDeviceExtensionProperties` for the
|
||||
/// host's VkPhysicalDevice.
|
||||
MissingRequiredExtension,
|
||||
};
|
||||
|
||||
/// The function-pointer dispatch table libghostty resolves against the
|
||||
/// host's instance / device. We only enumerate the entry points the
|
||||
/// renderer actually uses; extending the table is the supported way
|
||||
/// for follow-up renderer code to call additional Vulkan functions.
|
||||
pub const Dispatch = struct {
|
||||
// ---- instance-level -----------------------------------------
|
||||
getPhysicalDeviceProperties: std.meta.Child(vk.PFN_vkGetPhysicalDeviceProperties),
|
||||
getPhysicalDeviceMemoryProperties: std.meta.Child(vk.PFN_vkGetPhysicalDeviceMemoryProperties),
|
||||
getPhysicalDeviceFormatProperties: std.meta.Child(vk.PFN_vkGetPhysicalDeviceFormatProperties),
|
||||
/// Used by `Target` to chain `VkDrmFormatModifierPropertiesListEXT`
|
||||
/// and enumerate which DRM modifiers the device exposes for a
|
||||
/// given format. Vulkan 1.1 promoted `vkGetPhysicalDeviceFormatProperties2`
|
||||
/// from `VK_KHR_get_physical_device_properties2` into core, so we
|
||||
/// resolve it under the non-suffixed name — `MIN_API_VERSION` is
|
||||
/// 1.3 (see line 45), well past the promotion.
|
||||
getPhysicalDeviceFormatProperties2: std.meta.Child(vk.PFN_vkGetPhysicalDeviceFormatProperties2),
|
||||
enumerateDeviceExtensionProperties: std.meta.Child(vk.PFN_vkEnumerateDeviceExtensionProperties),
|
||||
getDeviceProcAddr: std.meta.Child(vk.PFN_vkGetDeviceProcAddr),
|
||||
|
||||
// ---- device-level (resolved via getDeviceProcAddr) ----------
|
||||
// Intentionally narrow for now — every additional renderer-side
|
||||
// call adds a field here and a `loadDevice` lookup in `init`.
|
||||
getDeviceQueue: std.meta.Child(vk.PFN_vkGetDeviceQueue),
|
||||
deviceWaitIdle: std.meta.Child(vk.PFN_vkDeviceWaitIdle),
|
||||
|
||||
// Sampler — used by `vulkan/Sampler.zig`.
|
||||
createSampler: std.meta.Child(vk.PFN_vkCreateSampler),
|
||||
destroySampler: std.meta.Child(vk.PFN_vkDestroySampler),
|
||||
|
||||
// Texture (image + memory + view) — used by `vulkan/Texture.zig`.
|
||||
createImage: std.meta.Child(vk.PFN_vkCreateImage),
|
||||
destroyImage: std.meta.Child(vk.PFN_vkDestroyImage),
|
||||
getImageMemoryRequirements: std.meta.Child(vk.PFN_vkGetImageMemoryRequirements),
|
||||
allocateMemory: std.meta.Child(vk.PFN_vkAllocateMemory),
|
||||
freeMemory: std.meta.Child(vk.PFN_vkFreeMemory),
|
||||
bindImageMemory: std.meta.Child(vk.PFN_vkBindImageMemory),
|
||||
createImageView: std.meta.Child(vk.PFN_vkCreateImageView),
|
||||
destroyImageView: std.meta.Child(vk.PFN_vkDestroyImageView),
|
||||
|
||||
// Buffer (host-visible vertex / uniform / cell-data storage) —
|
||||
// used by `vulkan/buffer.zig`.
|
||||
createBuffer: std.meta.Child(vk.PFN_vkCreateBuffer),
|
||||
destroyBuffer: std.meta.Child(vk.PFN_vkDestroyBuffer),
|
||||
getBufferMemoryRequirements: std.meta.Child(vk.PFN_vkGetBufferMemoryRequirements),
|
||||
bindBufferMemory: std.meta.Child(vk.PFN_vkBindBufferMemory),
|
||||
mapMemory: std.meta.Child(vk.PFN_vkMapMemory),
|
||||
unmapMemory: std.meta.Child(vk.PFN_vkUnmapMemory),
|
||||
|
||||
// Command pool / buffer + queue submit + recording —
|
||||
// used by `vulkan/CommandPool.zig` and (later) per-frame command
|
||||
// recording in `vulkan/Frame.zig`.
|
||||
createCommandPool: std.meta.Child(vk.PFN_vkCreateCommandPool),
|
||||
destroyCommandPool: std.meta.Child(vk.PFN_vkDestroyCommandPool),
|
||||
allocateCommandBuffers: std.meta.Child(vk.PFN_vkAllocateCommandBuffers),
|
||||
freeCommandBuffers: std.meta.Child(vk.PFN_vkFreeCommandBuffers),
|
||||
beginCommandBuffer: std.meta.Child(vk.PFN_vkBeginCommandBuffer),
|
||||
endCommandBuffer: std.meta.Child(vk.PFN_vkEndCommandBuffer),
|
||||
queueSubmit: std.meta.Child(vk.PFN_vkQueueSubmit),
|
||||
queueWaitIdle: std.meta.Child(vk.PFN_vkQueueWaitIdle),
|
||||
cmdPipelineBarrier: std.meta.Child(vk.PFN_vkCmdPipelineBarrier),
|
||||
cmdCopyBufferToImage: std.meta.Child(vk.PFN_vkCmdCopyBufferToImage),
|
||||
cmdFillBuffer: std.meta.Child(vk.PFN_vkCmdFillBuffer),
|
||||
cmdClearColorImage: std.meta.Child(vk.PFN_vkCmdClearColorImage),
|
||||
cmdBindVertexBuffers: std.meta.Child(vk.PFN_vkCmdBindVertexBuffers),
|
||||
|
||||
// Shader modules — used by `vulkan/shaders.zig`.
|
||||
createShaderModule: std.meta.Child(vk.PFN_vkCreateShaderModule),
|
||||
destroyShaderModule: std.meta.Child(vk.PFN_vkDestroyShaderModule),
|
||||
|
||||
// Graphics pipeline + descriptor set layout —
|
||||
// used by `vulkan/Pipeline.zig`.
|
||||
createDescriptorSetLayout: std.meta.Child(vk.PFN_vkCreateDescriptorSetLayout),
|
||||
destroyDescriptorSetLayout: std.meta.Child(vk.PFN_vkDestroyDescriptorSetLayout),
|
||||
createPipelineLayout: std.meta.Child(vk.PFN_vkCreatePipelineLayout),
|
||||
destroyPipelineLayout: std.meta.Child(vk.PFN_vkDestroyPipelineLayout),
|
||||
createGraphicsPipelines: std.meta.Child(vk.PFN_vkCreateGraphicsPipelines),
|
||||
destroyPipeline: std.meta.Child(vk.PFN_vkDestroyPipeline),
|
||||
|
||||
// External memory fd export — used by `vulkan/Target.zig`.
|
||||
// `vkGetMemoryFdKHR` is from `VK_KHR_external_memory_fd`; needs
|
||||
// device-level resolution like any other device function.
|
||||
getMemoryFdKHR: std.meta.Child(vk.PFN_vkGetMemoryFdKHR),
|
||||
getImageSubresourceLayout: std.meta.Child(vk.PFN_vkGetImageSubresourceLayout),
|
||||
/// From `VK_EXT_image_drm_format_modifier`. Used by
|
||||
/// `vulkan/Target.zig` after creating an image with the LIST
|
||||
/// variant of the modifier create-info to discover which
|
||||
/// modifier the driver actually chose.
|
||||
getImageDrmFormatModifierPropertiesEXT: std.meta.Child(vk.PFN_vkGetImageDrmFormatModifierPropertiesEXT),
|
||||
|
||||
// Per-frame sync (fence + command-buffer reset) — used by
|
||||
// `vulkan/Frame.zig`.
|
||||
createFence: std.meta.Child(vk.PFN_vkCreateFence),
|
||||
destroyFence: std.meta.Child(vk.PFN_vkDestroyFence),
|
||||
waitForFences: std.meta.Child(vk.PFN_vkWaitForFences),
|
||||
resetFences: std.meta.Child(vk.PFN_vkResetFences),
|
||||
resetCommandBuffer: std.meta.Child(vk.PFN_vkResetCommandBuffer),
|
||||
|
||||
// Drawing — used by `vulkan/RenderPass.zig` (and the smoke
|
||||
// test's renderTriangle helper). Vulkan 1.3 promoted
|
||||
// `vkCmdBeginRendering` / `vkCmdEndRendering` from the
|
||||
// `VK_KHR_dynamic_rendering` extension into core, so they're
|
||||
// available without an extension opt-in.
|
||||
cmdBeginRendering: std.meta.Child(vk.PFN_vkCmdBeginRendering),
|
||||
cmdEndRendering: std.meta.Child(vk.PFN_vkCmdEndRendering),
|
||||
cmdBindPipeline: std.meta.Child(vk.PFN_vkCmdBindPipeline),
|
||||
cmdSetViewport: std.meta.Child(vk.PFN_vkCmdSetViewport),
|
||||
cmdSetScissor: std.meta.Child(vk.PFN_vkCmdSetScissor),
|
||||
cmdDraw: std.meta.Child(vk.PFN_vkCmdDraw),
|
||||
cmdCopyImageToBuffer: std.meta.Child(vk.PFN_vkCmdCopyImageToBuffer),
|
||||
|
||||
// Descriptor sets — used by `vulkan/DescriptorPool.zig`. Per-
|
||||
// surface lifetime today; per-frame pooling will follow when
|
||||
// the actual renderer integration lands.
|
||||
createDescriptorPool: std.meta.Child(vk.PFN_vkCreateDescriptorPool),
|
||||
destroyDescriptorPool: std.meta.Child(vk.PFN_vkDestroyDescriptorPool),
|
||||
resetDescriptorPool: std.meta.Child(vk.PFN_vkResetDescriptorPool),
|
||||
allocateDescriptorSets: std.meta.Child(vk.PFN_vkAllocateDescriptorSets),
|
||||
updateDescriptorSets: std.meta.Child(vk.PFN_vkUpdateDescriptorSets),
|
||||
cmdBindDescriptorSets: std.meta.Child(vk.PFN_vkCmdBindDescriptorSets),
|
||||
};
|
||||
|
||||
// ---- fields ---------------------------------------------------------
|
||||
|
||||
instance: vk.VkInstance,
|
||||
physical_device: vk.VkPhysicalDevice,
|
||||
device: vk.VkDevice,
|
||||
queue: vk.VkQueue,
|
||||
queue_family_index: u32,
|
||||
|
||||
/// The Vulkan API version the host's physical device reports. Always
|
||||
/// >= `MIN_API_VERSION` (if it were lower, `init` returns
|
||||
/// `error.UnsupportedVulkanVersion`).
|
||||
api_version: u32,
|
||||
|
||||
/// Cached `VkPhysicalDeviceMemoryProperties`. The properties are
|
||||
/// immutable for the physical device's lifetime, so we query once
|
||||
/// at `init` time instead of on every `findMemoryType` call (which
|
||||
/// happens for every Buffer/Texture/Target allocation).
|
||||
memory_properties: vk.VkPhysicalDeviceMemoryProperties,
|
||||
|
||||
dispatch: Dispatch,
|
||||
|
||||
/// Process-wide mutex protecting access to `queue`. Vulkan requires
|
||||
/// external synchronization of `VkQueue` — `vkQueueSubmit` and
|
||||
/// `vkQueueWaitIdle` from multiple threads must not overlap. Splits
|
||||
/// and tabs share the host's single queue (one VkQueue per process),
|
||||
/// so the mutex serializes submissions across all renderer threads.
|
||||
/// Use via `Device.queueSubmit` / `Device.queueWaitIdle`.
|
||||
var queue_mutex: std.Thread.Mutex = .{};
|
||||
|
||||
/// Externally-synchronized `vkQueueSubmit`. ALL submissions to the
|
||||
/// host queue (Frame, atlas upload, etc.) MUST go through this so
|
||||
/// concurrent renderer threads from splits/tabs don't race the
|
||||
/// driver into a hang.
|
||||
pub fn queueSubmit(
|
||||
self: *const Device,
|
||||
submit_count: u32,
|
||||
submits: [*c]const vk.VkSubmitInfo,
|
||||
fence: vk.VkFence,
|
||||
) vk.VkResult {
|
||||
queue_mutex.lock();
|
||||
defer queue_mutex.unlock();
|
||||
return self.dispatch.queueSubmit(self.queue, submit_count, submits, fence);
|
||||
}
|
||||
|
||||
/// Externally-synchronized `vkQueueWaitIdle`. Same reasoning as
|
||||
/// `queueSubmit`.
|
||||
pub fn queueWaitIdle(self: *const Device) vk.VkResult {
|
||||
queue_mutex.lock();
|
||||
defer queue_mutex.unlock();
|
||||
return self.dispatch.queueWaitIdle(self.queue);
|
||||
}
|
||||
|
||||
// ---- API ------------------------------------------------------------
|
||||
|
||||
/// Pre-resolved host-Vulkan handles passed into `Device.init`. Keeps
|
||||
/// `pkg/vulkan` independent of any apprt type — callers (e.g.
|
||||
/// libghostty's `src/renderer/Vulkan.zig`) translate their own
|
||||
/// platform-callback struct into this neutral shape.
|
||||
pub const HostBootstrap = struct {
|
||||
instance: vk.VkInstance,
|
||||
physical_device: vk.VkPhysicalDevice,
|
||||
device: vk.VkDevice,
|
||||
queue: vk.VkQueue,
|
||||
queue_family_index: u32,
|
||||
/// Root proc-addr resolver. `Device.init` uses this to pull
|
||||
/// `vkGetInstanceProcAddr` itself plus every instance-level
|
||||
/// function it needs to bootstrap the dispatch table.
|
||||
get_instance_proc_addr_raw: *const anyopaque,
|
||||
};
|
||||
|
||||
/// Build a `Device` from pre-resolved host handles. Performs:
|
||||
/// 1. Load the instance-level dispatch via `vkGetInstanceProcAddr`.
|
||||
/// 2. Verify `physicalDeviceProperties.apiVersion >= 1.3`.
|
||||
/// 3. Verify every entry in `REQUIRED_DEVICE_EXTENSIONS` is present
|
||||
/// on the physical device.
|
||||
/// 4. Load the device-level dispatch via `vkGetDeviceProcAddr`.
|
||||
///
|
||||
/// On success the returned `Device` is ready for the renderer to
|
||||
/// build pipelines / images / command buffers against. The host
|
||||
/// retains ownership of `instance` / `device` / `queue` — `deinit`
|
||||
/// is a no-op stub for symmetry.
|
||||
pub fn init(
|
||||
alloc: Allocator,
|
||||
boot: HostBootstrap,
|
||||
) (Error || Allocator.Error)!Device {
|
||||
const instance = boot.instance;
|
||||
const physical_device = boot.physical_device;
|
||||
const device = boot.device;
|
||||
const queue = boot.queue;
|
||||
const queue_family_index = boot.queue_family_index;
|
||||
|
||||
// ---- instance-level dispatch ---------------------------------
|
||||
// The caller-provided get_instance_proc_addr is our root entry
|
||||
// point. We resolve other functions via vkGetInstanceProcAddr
|
||||
// (instance, name); per the Vulkan spec, passing a non-null
|
||||
// instance is valid for any function that takes an instance,
|
||||
// physical device, device, or child object of any of these — i.e.
|
||||
// everything we care about.
|
||||
const get_instance_proc_addr: std.meta.Child(vk.PFN_vkGetInstanceProcAddr) =
|
||||
@ptrCast(@alignCast(boot.get_instance_proc_addr_raw));
|
||||
|
||||
const InstanceLoader = struct {
|
||||
instance: vk.VkInstance,
|
||||
get_instance_proc_addr: std.meta.Child(vk.PFN_vkGetInstanceProcAddr),
|
||||
|
||||
fn load(self: @This(), comptime T: type, name: [*:0]const u8) Error!std.meta.Child(T) {
|
||||
const fp = self.get_instance_proc_addr(self.instance, name) orelse {
|
||||
log.err("vkGetInstanceProcAddr returned null for {s}", .{name});
|
||||
return error.HostHandleMissing;
|
||||
};
|
||||
return @ptrCast(fp);
|
||||
}
|
||||
};
|
||||
const il: InstanceLoader = .{
|
||||
.instance = instance,
|
||||
.get_instance_proc_addr = get_instance_proc_addr,
|
||||
};
|
||||
|
||||
const get_physical_device_properties =
|
||||
try il.load(vk.PFN_vkGetPhysicalDeviceProperties, "vkGetPhysicalDeviceProperties");
|
||||
const get_physical_device_memory_properties =
|
||||
try il.load(vk.PFN_vkGetPhysicalDeviceMemoryProperties, "vkGetPhysicalDeviceMemoryProperties");
|
||||
const get_physical_device_format_properties =
|
||||
try il.load(vk.PFN_vkGetPhysicalDeviceFormatProperties, "vkGetPhysicalDeviceFormatProperties");
|
||||
const get_physical_device_format_properties_2 =
|
||||
try il.load(vk.PFN_vkGetPhysicalDeviceFormatProperties2, "vkGetPhysicalDeviceFormatProperties2");
|
||||
const enumerate_device_extension_properties =
|
||||
try il.load(vk.PFN_vkEnumerateDeviceExtensionProperties, "vkEnumerateDeviceExtensionProperties");
|
||||
const get_device_proc_addr =
|
||||
try il.load(vk.PFN_vkGetDeviceProcAddr, "vkGetDeviceProcAddr");
|
||||
|
||||
// ---- version check ------------------------------------------
|
||||
var props: vk.VkPhysicalDeviceProperties = std.mem.zeroes(vk.VkPhysicalDeviceProperties);
|
||||
get_physical_device_properties(physical_device, &props);
|
||||
if (props.apiVersion < MIN_API_VERSION) {
|
||||
log.err(
|
||||
"host VkPhysicalDevice reports Vulkan {}.{}.{}, need >= {}.{}.{}",
|
||||
.{
|
||||
vk.VK_API_VERSION_MAJOR(props.apiVersion),
|
||||
vk.VK_API_VERSION_MINOR(props.apiVersion),
|
||||
vk.VK_API_VERSION_PATCH(props.apiVersion),
|
||||
vk.VK_API_VERSION_MAJOR(MIN_API_VERSION),
|
||||
vk.VK_API_VERSION_MINOR(MIN_API_VERSION),
|
||||
vk.VK_API_VERSION_PATCH(MIN_API_VERSION),
|
||||
},
|
||||
);
|
||||
return error.UnsupportedVulkanVersion;
|
||||
}
|
||||
|
||||
// ---- extension check ----------------------------------------
|
||||
var ext_count: u32 = 0;
|
||||
{
|
||||
const r = enumerate_device_extension_properties(physical_device, null, &ext_count, null);
|
||||
// SUCCESS or INCOMPLETE both populate `ext_count`. INCOMPLETE
|
||||
// shouldn't happen on the count-only call (no buffer to
|
||||
// truncate) but we accept it defensively.
|
||||
if (r != vk.VK_SUCCESS and r != vk.VK_INCOMPLETE) {
|
||||
log.err("vkEnumerateDeviceExtensionProperties (count) failed: result={}", .{r});
|
||||
return error.HostHandleMissing;
|
||||
}
|
||||
}
|
||||
const exts = try alloc.alloc(vk.VkExtensionProperties, ext_count);
|
||||
defer alloc.free(exts);
|
||||
{
|
||||
const r = enumerate_device_extension_properties(physical_device, null, &ext_count, exts.ptr);
|
||||
if (r != vk.VK_SUCCESS and r != vk.VK_INCOMPLETE) {
|
||||
log.err("vkEnumerateDeviceExtensionProperties (fill) failed: result={}", .{r});
|
||||
return error.HostHandleMissing;
|
||||
}
|
||||
// VK_INCOMPLETE here means the extension list grew between
|
||||
// the count and fill calls (race with a driver hot-reload —
|
||||
// very unlikely in practice but spec-permitted). The
|
||||
// partially-filled buffer is still authoritative for the
|
||||
// entries it does contain, but a required extension not yet
|
||||
// populated would be missed. Treat as a hard fail since the
|
||||
// extension presence check below would silently pass on a
|
||||
// truncated list.
|
||||
if (r == vk.VK_INCOMPLETE) {
|
||||
log.err(
|
||||
"vkEnumerateDeviceExtensionProperties returned INCOMPLETE; " ++
|
||||
"device extension list changed between count and fill",
|
||||
.{},
|
||||
);
|
||||
return error.HostHandleMissing;
|
||||
}
|
||||
}
|
||||
|
||||
inline for (REQUIRED_DEVICE_EXTENSIONS) |required| {
|
||||
var found = false;
|
||||
for (exts) |ext| {
|
||||
const name_cstr: [*:0]const u8 = @ptrCast(&ext.extensionName);
|
||||
if (std.mem.eql(u8, std.mem.span(name_cstr), required)) {
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!found) {
|
||||
log.err("required Vulkan device extension missing: {s}", .{required});
|
||||
return error.MissingRequiredExtension;
|
||||
}
|
||||
}
|
||||
|
||||
// ---- device-level dispatch ----------------------------------
|
||||
const DeviceLoader = struct {
|
||||
device: vk.VkDevice,
|
||||
get_device_proc_addr: std.meta.Child(vk.PFN_vkGetDeviceProcAddr),
|
||||
|
||||
fn load(self: @This(), comptime T: type, name: [*:0]const u8) Error!std.meta.Child(T) {
|
||||
const fp = self.get_device_proc_addr(self.device, name) orelse {
|
||||
log.err("vkGetDeviceProcAddr returned null for {s}", .{name});
|
||||
return error.HostHandleMissing;
|
||||
};
|
||||
return @ptrCast(fp);
|
||||
}
|
||||
};
|
||||
const dl: DeviceLoader = .{
|
||||
.device = device,
|
||||
.get_device_proc_addr = get_device_proc_addr,
|
||||
};
|
||||
|
||||
const get_device_queue =
|
||||
try dl.load(vk.PFN_vkGetDeviceQueue, "vkGetDeviceQueue");
|
||||
const device_wait_idle =
|
||||
try dl.load(vk.PFN_vkDeviceWaitIdle, "vkDeviceWaitIdle");
|
||||
const create_sampler =
|
||||
try dl.load(vk.PFN_vkCreateSampler, "vkCreateSampler");
|
||||
const destroy_sampler =
|
||||
try dl.load(vk.PFN_vkDestroySampler, "vkDestroySampler");
|
||||
const create_image =
|
||||
try dl.load(vk.PFN_vkCreateImage, "vkCreateImage");
|
||||
const destroy_image =
|
||||
try dl.load(vk.PFN_vkDestroyImage, "vkDestroyImage");
|
||||
const get_image_memory_requirements =
|
||||
try dl.load(vk.PFN_vkGetImageMemoryRequirements, "vkGetImageMemoryRequirements");
|
||||
const allocate_memory =
|
||||
try dl.load(vk.PFN_vkAllocateMemory, "vkAllocateMemory");
|
||||
const free_memory =
|
||||
try dl.load(vk.PFN_vkFreeMemory, "vkFreeMemory");
|
||||
const bind_image_memory =
|
||||
try dl.load(vk.PFN_vkBindImageMemory, "vkBindImageMemory");
|
||||
const create_image_view =
|
||||
try dl.load(vk.PFN_vkCreateImageView, "vkCreateImageView");
|
||||
const destroy_image_view =
|
||||
try dl.load(vk.PFN_vkDestroyImageView, "vkDestroyImageView");
|
||||
const create_buffer =
|
||||
try dl.load(vk.PFN_vkCreateBuffer, "vkCreateBuffer");
|
||||
const destroy_buffer =
|
||||
try dl.load(vk.PFN_vkDestroyBuffer, "vkDestroyBuffer");
|
||||
const get_buffer_memory_requirements =
|
||||
try dl.load(vk.PFN_vkGetBufferMemoryRequirements, "vkGetBufferMemoryRequirements");
|
||||
const bind_buffer_memory =
|
||||
try dl.load(vk.PFN_vkBindBufferMemory, "vkBindBufferMemory");
|
||||
const map_memory =
|
||||
try dl.load(vk.PFN_vkMapMemory, "vkMapMemory");
|
||||
const unmap_memory =
|
||||
try dl.load(vk.PFN_vkUnmapMemory, "vkUnmapMemory");
|
||||
const create_command_pool =
|
||||
try dl.load(vk.PFN_vkCreateCommandPool, "vkCreateCommandPool");
|
||||
const destroy_command_pool =
|
||||
try dl.load(vk.PFN_vkDestroyCommandPool, "vkDestroyCommandPool");
|
||||
const allocate_command_buffers =
|
||||
try dl.load(vk.PFN_vkAllocateCommandBuffers, "vkAllocateCommandBuffers");
|
||||
const free_command_buffers =
|
||||
try dl.load(vk.PFN_vkFreeCommandBuffers, "vkFreeCommandBuffers");
|
||||
const begin_command_buffer =
|
||||
try dl.load(vk.PFN_vkBeginCommandBuffer, "vkBeginCommandBuffer");
|
||||
const end_command_buffer =
|
||||
try dl.load(vk.PFN_vkEndCommandBuffer, "vkEndCommandBuffer");
|
||||
const queue_submit =
|
||||
try dl.load(vk.PFN_vkQueueSubmit, "vkQueueSubmit");
|
||||
const queue_wait_idle =
|
||||
try dl.load(vk.PFN_vkQueueWaitIdle, "vkQueueWaitIdle");
|
||||
const cmd_pipeline_barrier =
|
||||
try dl.load(vk.PFN_vkCmdPipelineBarrier, "vkCmdPipelineBarrier");
|
||||
const cmd_copy_buffer_to_image =
|
||||
try dl.load(vk.PFN_vkCmdCopyBufferToImage, "vkCmdCopyBufferToImage");
|
||||
const cmd_fill_buffer =
|
||||
try dl.load(vk.PFN_vkCmdFillBuffer, "vkCmdFillBuffer");
|
||||
const cmd_clear_color_image =
|
||||
try dl.load(vk.PFN_vkCmdClearColorImage, "vkCmdClearColorImage");
|
||||
const cmd_bind_vertex_buffers =
|
||||
try dl.load(vk.PFN_vkCmdBindVertexBuffers, "vkCmdBindVertexBuffers");
|
||||
const create_shader_module =
|
||||
try dl.load(vk.PFN_vkCreateShaderModule, "vkCreateShaderModule");
|
||||
const destroy_shader_module =
|
||||
try dl.load(vk.PFN_vkDestroyShaderModule, "vkDestroyShaderModule");
|
||||
const create_descriptor_set_layout =
|
||||
try dl.load(vk.PFN_vkCreateDescriptorSetLayout, "vkCreateDescriptorSetLayout");
|
||||
const destroy_descriptor_set_layout =
|
||||
try dl.load(vk.PFN_vkDestroyDescriptorSetLayout, "vkDestroyDescriptorSetLayout");
|
||||
const create_pipeline_layout =
|
||||
try dl.load(vk.PFN_vkCreatePipelineLayout, "vkCreatePipelineLayout");
|
||||
const destroy_pipeline_layout =
|
||||
try dl.load(vk.PFN_vkDestroyPipelineLayout, "vkDestroyPipelineLayout");
|
||||
const create_graphics_pipelines =
|
||||
try dl.load(vk.PFN_vkCreateGraphicsPipelines, "vkCreateGraphicsPipelines");
|
||||
const destroy_pipeline =
|
||||
try dl.load(vk.PFN_vkDestroyPipeline, "vkDestroyPipeline");
|
||||
const get_memory_fd_khr =
|
||||
try dl.load(vk.PFN_vkGetMemoryFdKHR, "vkGetMemoryFdKHR");
|
||||
const get_image_subresource_layout =
|
||||
try dl.load(vk.PFN_vkGetImageSubresourceLayout, "vkGetImageSubresourceLayout");
|
||||
const get_image_drm_format_modifier_properties_ext =
|
||||
try dl.load(vk.PFN_vkGetImageDrmFormatModifierPropertiesEXT, "vkGetImageDrmFormatModifierPropertiesEXT");
|
||||
const create_fence =
|
||||
try dl.load(vk.PFN_vkCreateFence, "vkCreateFence");
|
||||
const destroy_fence =
|
||||
try dl.load(vk.PFN_vkDestroyFence, "vkDestroyFence");
|
||||
const wait_for_fences =
|
||||
try dl.load(vk.PFN_vkWaitForFences, "vkWaitForFences");
|
||||
const reset_fences =
|
||||
try dl.load(vk.PFN_vkResetFences, "vkResetFences");
|
||||
const reset_command_buffer =
|
||||
try dl.load(vk.PFN_vkResetCommandBuffer, "vkResetCommandBuffer");
|
||||
const cmd_begin_rendering =
|
||||
try dl.load(vk.PFN_vkCmdBeginRendering, "vkCmdBeginRendering");
|
||||
const cmd_end_rendering =
|
||||
try dl.load(vk.PFN_vkCmdEndRendering, "vkCmdEndRendering");
|
||||
const cmd_bind_pipeline =
|
||||
try dl.load(vk.PFN_vkCmdBindPipeline, "vkCmdBindPipeline");
|
||||
const cmd_set_viewport =
|
||||
try dl.load(vk.PFN_vkCmdSetViewport, "vkCmdSetViewport");
|
||||
const cmd_set_scissor =
|
||||
try dl.load(vk.PFN_vkCmdSetScissor, "vkCmdSetScissor");
|
||||
const cmd_draw =
|
||||
try dl.load(vk.PFN_vkCmdDraw, "vkCmdDraw");
|
||||
const cmd_copy_image_to_buffer =
|
||||
try dl.load(vk.PFN_vkCmdCopyImageToBuffer, "vkCmdCopyImageToBuffer");
|
||||
const create_descriptor_pool =
|
||||
try dl.load(vk.PFN_vkCreateDescriptorPool, "vkCreateDescriptorPool");
|
||||
const destroy_descriptor_pool =
|
||||
try dl.load(vk.PFN_vkDestroyDescriptorPool, "vkDestroyDescriptorPool");
|
||||
const reset_descriptor_pool =
|
||||
try dl.load(vk.PFN_vkResetDescriptorPool, "vkResetDescriptorPool");
|
||||
const allocate_descriptor_sets =
|
||||
try dl.load(vk.PFN_vkAllocateDescriptorSets, "vkAllocateDescriptorSets");
|
||||
const update_descriptor_sets =
|
||||
try dl.load(vk.PFN_vkUpdateDescriptorSets, "vkUpdateDescriptorSets");
|
||||
const cmd_bind_descriptor_sets =
|
||||
try dl.load(vk.PFN_vkCmdBindDescriptorSets, "vkCmdBindDescriptorSets");
|
||||
|
||||
// Snapshot the memory properties once. They never change for
|
||||
// the device's lifetime, so per-allocation re-queries (which
|
||||
// findMemoryType used to do) were pure waste.
|
||||
var memory_properties: vk.VkPhysicalDeviceMemoryProperties = undefined;
|
||||
get_physical_device_memory_properties(physical_device, &memory_properties);
|
||||
|
||||
return .{
|
||||
.instance = instance,
|
||||
.physical_device = physical_device,
|
||||
.device = device,
|
||||
.queue = queue,
|
||||
.queue_family_index = queue_family_index,
|
||||
.api_version = props.apiVersion,
|
||||
.memory_properties = memory_properties,
|
||||
.dispatch = .{
|
||||
.getPhysicalDeviceProperties = get_physical_device_properties,
|
||||
.getPhysicalDeviceMemoryProperties = get_physical_device_memory_properties,
|
||||
.getPhysicalDeviceFormatProperties = get_physical_device_format_properties,
|
||||
.getPhysicalDeviceFormatProperties2 = get_physical_device_format_properties_2,
|
||||
.enumerateDeviceExtensionProperties = enumerate_device_extension_properties,
|
||||
.getDeviceProcAddr = get_device_proc_addr,
|
||||
.getDeviceQueue = get_device_queue,
|
||||
.deviceWaitIdle = device_wait_idle,
|
||||
.createSampler = create_sampler,
|
||||
.destroySampler = destroy_sampler,
|
||||
.createImage = create_image,
|
||||
.destroyImage = destroy_image,
|
||||
.getImageMemoryRequirements = get_image_memory_requirements,
|
||||
.allocateMemory = allocate_memory,
|
||||
.freeMemory = free_memory,
|
||||
.bindImageMemory = bind_image_memory,
|
||||
.createImageView = create_image_view,
|
||||
.destroyImageView = destroy_image_view,
|
||||
.createBuffer = create_buffer,
|
||||
.destroyBuffer = destroy_buffer,
|
||||
.getBufferMemoryRequirements = get_buffer_memory_requirements,
|
||||
.bindBufferMemory = bind_buffer_memory,
|
||||
.mapMemory = map_memory,
|
||||
.unmapMemory = unmap_memory,
|
||||
.createCommandPool = create_command_pool,
|
||||
.destroyCommandPool = destroy_command_pool,
|
||||
.allocateCommandBuffers = allocate_command_buffers,
|
||||
.freeCommandBuffers = free_command_buffers,
|
||||
.beginCommandBuffer = begin_command_buffer,
|
||||
.endCommandBuffer = end_command_buffer,
|
||||
.queueSubmit = queue_submit,
|
||||
.queueWaitIdle = queue_wait_idle,
|
||||
.cmdPipelineBarrier = cmd_pipeline_barrier,
|
||||
.cmdCopyBufferToImage = cmd_copy_buffer_to_image,
|
||||
.cmdFillBuffer = cmd_fill_buffer,
|
||||
.cmdClearColorImage = cmd_clear_color_image,
|
||||
.cmdBindVertexBuffers = cmd_bind_vertex_buffers,
|
||||
.createShaderModule = create_shader_module,
|
||||
.destroyShaderModule = destroy_shader_module,
|
||||
.createDescriptorSetLayout = create_descriptor_set_layout,
|
||||
.destroyDescriptorSetLayout = destroy_descriptor_set_layout,
|
||||
.createPipelineLayout = create_pipeline_layout,
|
||||
.destroyPipelineLayout = destroy_pipeline_layout,
|
||||
.createGraphicsPipelines = create_graphics_pipelines,
|
||||
.destroyPipeline = destroy_pipeline,
|
||||
.getMemoryFdKHR = get_memory_fd_khr,
|
||||
.getImageSubresourceLayout = get_image_subresource_layout,
|
||||
.getImageDrmFormatModifierPropertiesEXT = get_image_drm_format_modifier_properties_ext,
|
||||
.createFence = create_fence,
|
||||
.destroyFence = destroy_fence,
|
||||
.waitForFences = wait_for_fences,
|
||||
.resetFences = reset_fences,
|
||||
.resetCommandBuffer = reset_command_buffer,
|
||||
.cmdBeginRendering = cmd_begin_rendering,
|
||||
.cmdEndRendering = cmd_end_rendering,
|
||||
.cmdBindPipeline = cmd_bind_pipeline,
|
||||
.cmdSetViewport = cmd_set_viewport,
|
||||
.cmdSetScissor = cmd_set_scissor,
|
||||
.cmdDraw = cmd_draw,
|
||||
.cmdCopyImageToBuffer = cmd_copy_image_to_buffer,
|
||||
.createDescriptorPool = create_descriptor_pool,
|
||||
.destroyDescriptorPool = destroy_descriptor_pool,
|
||||
.resetDescriptorPool = reset_descriptor_pool,
|
||||
.allocateDescriptorSets = allocate_descriptor_sets,
|
||||
.updateDescriptorSets = update_descriptor_sets,
|
||||
.cmdBindDescriptorSets = cmd_bind_descriptor_sets,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
/// Symmetry-only: every handle is host-owned. Provided so callers
|
||||
/// can `defer device.deinit()` without special-casing.
|
||||
pub fn deinit(self: *Device) void {
|
||||
self.* = undefined;
|
||||
}
|
||||
|
||||
/// Block until the device is idle. Useful before tearing down
|
||||
/// renderer resources to make sure no command buffers are in
|
||||
/// flight. On `VK_ERROR_DEVICE_LOST` (or any other failure) we
|
||||
/// log the result so callers proceeding to destroy resources on
|
||||
/// a dead device leave a diagnostic crumb instead of silently
|
||||
/// crashing on the subsequent vkDestroy*.
|
||||
pub fn waitIdle(self: *const Device) void {
|
||||
const r = self.dispatch.deviceWaitIdle(self.device);
|
||||
if (r != vk.VK_SUCCESS) {
|
||||
log.warn("vkDeviceWaitIdle returned {}; teardown proceeding anyway", .{r});
|
||||
}
|
||||
}
|
||||
|
||||
/// Find a `VkMemoryType` index satisfying the requirements from a
|
||||
/// `VkMemoryRequirements.memoryTypeBits` bitmask AND with all of
|
||||
/// `required_props` set. Returns null if nothing matches.
|
||||
///
|
||||
/// Used by `vulkan/Texture.zig` (and later `vulkan/Buffer.zig`) to
|
||||
/// pick an appropriate heap for a freshly created image/buffer.
|
||||
pub fn findMemoryType(
|
||||
self: *const Device,
|
||||
type_bits: u32,
|
||||
required_props: vk.VkMemoryPropertyFlags,
|
||||
) ?u32 {
|
||||
const props = &self.memory_properties;
|
||||
var i: u32 = 0;
|
||||
while (i < props.memoryTypeCount) : (i += 1) {
|
||||
const bit: u32 = @as(u32, 1) << @intCast(i);
|
||||
if (type_bits & bit == 0) continue;
|
||||
if (props.memoryTypes[i].propertyFlags & required_props == required_props) {
|
||||
return i;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
test {
|
||||
// Force type-checking of every decl in this file so the renderer
|
||||
// bring-up catches signature mismatches against the Vulkan
|
||||
// binding before the apprt-side wiring lands. The actual init
|
||||
// path requires a real host-provided Vulkan device and is
|
||||
// exercised end-to-end once the Qt frontend wires up
|
||||
// `ghostty_platform_vulkan_s`.
|
||||
std.testing.refAllDecls(@This());
|
||||
}
|
||||
|
|
@ -0,0 +1,163 @@
|
|||
//! Wrapper for `VkSampler` — the immutable filter / wrap configuration
|
||||
//! the GPU uses when sampling a texture.
|
||||
//!
|
||||
//! libghostty doesn't share samplers across textures (the OpenGL
|
||||
//! backend already creates one per texture-shaped need); we keep the
|
||||
//! same per-callsite ownership model so the renderer interface
|
||||
//! matches.
|
||||
//!
|
||||
//! Counterpart: `src/renderer/opengl/Sampler.zig`.
|
||||
|
||||
const Self = @This();
|
||||
|
||||
const std = @import("std");
|
||||
const vk = @import("c.zig").c;
|
||||
|
||||
const Device = @import("Device.zig");
|
||||
|
||||
const log = std.log.scoped(.vulkan);
|
||||
|
||||
/// Texel filter mode. Maps 1:1 to `VkFilter` (which is a `c_uint`).
|
||||
pub const Filter = enum(c_uint) {
|
||||
nearest = vk.VK_FILTER_NEAREST,
|
||||
linear = vk.VK_FILTER_LINEAR,
|
||||
};
|
||||
|
||||
/// Texture coordinate wrap mode. Maps 1:1 to `VkSamplerAddressMode`
|
||||
/// (a `c_uint`).
|
||||
pub const AddressMode = enum(c_uint) {
|
||||
repeat = vk.VK_SAMPLER_ADDRESS_MODE_REPEAT,
|
||||
mirrored_repeat = vk.VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT,
|
||||
clamp_to_edge = vk.VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
|
||||
clamp_to_border = vk.VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER,
|
||||
};
|
||||
|
||||
/// Sampler construction parameters. The same shape as the OpenGL
|
||||
/// backend's `Sampler.Options` (so generic.zig can call
|
||||
/// `Sampler.init(api.samplerOptions())` against either backend), with
|
||||
/// a `device` reference so we can call `vkCreateSampler` against the
|
||||
/// host's VkDevice without threading a global through.
|
||||
pub const Options = struct {
|
||||
device: *const Device,
|
||||
min_filter: Filter,
|
||||
mag_filter: Filter,
|
||||
wrap_s: AddressMode,
|
||||
wrap_t: AddressMode,
|
||||
|
||||
/// Vulkan-only: enable sampling with non-normalized texel
|
||||
/// coordinates (so `texture(s, p)` reads texel `p` directly
|
||||
/// rather than mapping `[0,1] x [0,1]` over the image).
|
||||
///
|
||||
/// This is what makes `sampler2D` behave like the OpenGL
|
||||
/// `sampler2DRect` the renderer's text shaders were originally
|
||||
/// authored against (after `vulkanizeGlsl` rewrites the type
|
||||
/// name). Vulkan imposes a long list of co-requirements when
|
||||
/// this is enabled — `init` forces them rather than rejecting
|
||||
/// inputs that violate them:
|
||||
///
|
||||
/// - `magFilter == minFilter` (we use `mag_filter`)
|
||||
/// - `mipmapMode = NEAREST`
|
||||
/// - `addressModeU/V` must be CLAMP_TO_EDGE / CLAMP_TO_BORDER
|
||||
/// (we force CLAMP_TO_EDGE, ignoring `wrap_s/wrap_t`)
|
||||
/// - `anisotropyEnable = FALSE`
|
||||
/// - `compareEnable = FALSE`
|
||||
/// - `minLod == maxLod == 0`
|
||||
///
|
||||
/// The bound image view must also be 1D or 2D with one mip
|
||||
/// level and one array layer — true for the glyph atlas.
|
||||
unnormalized_coordinates: bool = false,
|
||||
};
|
||||
|
||||
pub const Error = error{
|
||||
/// `vkCreateSampler` returned a non-success status. Logged with
|
||||
/// the raw `VkResult` value.
|
||||
VulkanFailed,
|
||||
};
|
||||
|
||||
sampler: vk.VkSampler,
|
||||
device: *const Device,
|
||||
|
||||
/// Create a sampler against the host's VkDevice. The sampler is
|
||||
/// destroyed in `deinit`; libghostty owns this handle's lifetime.
|
||||
pub fn init(opts: Options) Error!Self {
|
||||
const unnorm = opts.unnormalized_coordinates;
|
||||
const info: vk.VkSamplerCreateInfo = .{
|
||||
.sType = vk.VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO,
|
||||
.pNext = null,
|
||||
.flags = 0,
|
||||
// When unnormalized, magFilter must equal minFilter (the
|
||||
// sampling stage doesn't get to pick between them). Force
|
||||
// both to `mag_filter` rather than rejecting at the caller.
|
||||
.magFilter = @intFromEnum(opts.mag_filter),
|
||||
.minFilter = if (unnorm)
|
||||
@intFromEnum(opts.mag_filter)
|
||||
else
|
||||
@intFromEnum(opts.min_filter),
|
||||
// The glyph atlases are 2D textures without mips; the
|
||||
// renderer doesn't request mipmaps and the value here is
|
||||
// ignored when `lodMin == lodMax == 0`. Unnormalized
|
||||
// sampling requires NEAREST; we use LINEAR otherwise for
|
||||
// forward-compatibility if we ever generate atlas mips.
|
||||
.mipmapMode = if (unnorm)
|
||||
vk.VK_SAMPLER_MIPMAP_MODE_NEAREST
|
||||
else
|
||||
vk.VK_SAMPLER_MIPMAP_MODE_LINEAR,
|
||||
// Unnormalized requires CLAMP_TO_EDGE or CLAMP_TO_BORDER;
|
||||
// we don't have a use for the latter, so force CLAMP_TO_EDGE.
|
||||
.addressModeU = if (unnorm)
|
||||
vk.VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE
|
||||
else
|
||||
@intFromEnum(opts.wrap_s),
|
||||
.addressModeV = if (unnorm)
|
||||
vk.VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE
|
||||
else
|
||||
@intFromEnum(opts.wrap_t),
|
||||
// 2D textures never sample in W; the renderer ignores it. The
|
||||
// value still has to be valid — pick CLAMP_TO_EDGE.
|
||||
.addressModeW = vk.VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
|
||||
.mipLodBias = 0,
|
||||
// Anisotropy is a per-physical-device feature toggle; the
|
||||
// terminal grid doesn't benefit from it and gating on the
|
||||
// feature flag adds host coordination noise. Skip. (Also a
|
||||
// hard requirement for unnormalized sampling.)
|
||||
.anisotropyEnable = vk.VK_FALSE,
|
||||
.maxAnisotropy = 1,
|
||||
.compareEnable = vk.VK_FALSE,
|
||||
.compareOp = vk.VK_COMPARE_OP_ALWAYS,
|
||||
.minLod = 0,
|
||||
.maxLod = 0,
|
||||
.borderColor = vk.VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK,
|
||||
.unnormalizedCoordinates = if (unnorm) vk.VK_TRUE else vk.VK_FALSE,
|
||||
};
|
||||
|
||||
var sampler: vk.VkSampler = undefined;
|
||||
const result = opts.device.dispatch.createSampler(
|
||||
opts.device.device,
|
||||
&info,
|
||||
null,
|
||||
&sampler,
|
||||
);
|
||||
if (result != vk.VK_SUCCESS) {
|
||||
log.err("vkCreateSampler failed: result={}", .{result});
|
||||
return error.VulkanFailed;
|
||||
}
|
||||
|
||||
return .{
|
||||
.sampler = sampler,
|
||||
.device = opts.device,
|
||||
};
|
||||
}
|
||||
|
||||
/// `Sampler` is held by value at every call site (`const samp =
|
||||
/// try Sampler.init(...)`), so `deinit` takes `Self` not `*Self`
|
||||
/// — `const`-bound values can't be addressed-of for a `*Self`
|
||||
/// signature. CommandPool/DescriptorPool take `*Self` because
|
||||
/// they're held in mutable slots; this asymmetry follows
|
||||
/// container ownership, not a stylistic choice.
|
||||
pub fn deinit(self: Self) void {
|
||||
self.device.dispatch.destroySampler(self.device.device, self.sampler, null);
|
||||
}
|
||||
|
||||
test {
|
||||
std.testing.refAllDecls(@This());
|
||||
}
|
||||
|
|
@ -0,0 +1,14 @@
|
|||
const std = @import("std");
|
||||
|
||||
pub fn build(b: *std.Build) !void {
|
||||
// `addModule` registers "vulkan" on `b`'s module table; consumers
|
||||
// (`src/build/SharedDeps.zig`) reach it via
|
||||
// `b.lazyDependency("vulkan", ...).module("vulkan")`. No return
|
||||
// value or further wiring is needed here — Vulkan headers
|
||||
// (`vulkan-headers` package) sit on the default system include
|
||||
// path and libvulkan is link-system'd by the top-level build.
|
||||
// Same pattern as `pkg/opengl/build.zig`.
|
||||
_ = b.addModule("vulkan", .{
|
||||
.root_source_file = b.path("main.zig"),
|
||||
});
|
||||
}
|
||||
|
|
@ -0,0 +1,16 @@
|
|||
// Vulkan core API + the dmabuf-related extensions the renderer relies
|
||||
// on for zero-copy presentation:
|
||||
//
|
||||
// - VK_KHR_external_memory / VK_KHR_external_memory_fd
|
||||
// - VK_EXT_external_memory_dma_buf
|
||||
// - VK_EXT_image_drm_format_modifier
|
||||
//
|
||||
// VK_USE_PLATFORM_* macros are intentionally NOT set here — the
|
||||
// renderer talks to its host purely via dmabuf fds (handed back to
|
||||
// the apprt's `ghostty_platform_vulkan_s.present` callback), so
|
||||
// libghostty never sees a wl_display or xcb_connection. That keeps
|
||||
// the binding portable and lets the host (Qt RHI) do all the
|
||||
// platform-specific compositing.
|
||||
pub const c = @cImport({
|
||||
@cInclude("vulkan/vulkan.h");
|
||||
});
|
||||
|
|
@ -0,0 +1,30 @@
|
|||
//! Vulkan bindings.
|
||||
//!
|
||||
//! Shaped after `pkg/opengl/`: `c` is the raw C API (a thin `@cImport`
|
||||
//! wrapper around the system Vulkan headers); the per-resource files
|
||||
//! alongside provide opinionated typed wrappers the renderer
|
||||
//! consumes as primitives.
|
||||
//!
|
||||
//! The Vulkan renderer in `src/renderer/vulkan/` builds renderer
|
||||
//! policy on top of these (Pipeline / RenderPass / Frame / Target
|
||||
//! etc.); anything that's pure Vulkan-API plumbing belongs here.
|
||||
//!
|
||||
//! Vulkan core API + the dmabuf-related extensions the renderer relies
|
||||
//! on for zero-copy presentation:
|
||||
//!
|
||||
//! - VK_KHR_external_memory / VK_KHR_external_memory_fd
|
||||
//! - VK_EXT_external_memory_dma_buf
|
||||
//! - VK_EXT_image_drm_format_modifier
|
||||
//!
|
||||
//! VK_USE_PLATFORM_* macros are intentionally NOT set in `c.zig` —
|
||||
//! libghostty talks to its host purely via dmabuf fds (handed back to
|
||||
//! the apprt's `ghostty_platform_vulkan_s.present` callback), so it
|
||||
//! never sees a `wl_display` or `xcb_connection`. That keeps the
|
||||
//! binding portable and lets the host (Qt RHI) do all the
|
||||
//! platform-specific compositing.
|
||||
|
||||
pub const c = @import("c.zig").c;
|
||||
pub const Device = @import("Device.zig");
|
||||
pub const Sampler = @import("Sampler.zig");
|
||||
pub const CommandPool = @import("CommandPool.zig");
|
||||
pub const DescriptorPool = @import("DescriptorPool.zig");
|
||||
|
|
@ -30,7 +30,7 @@ set(CMAKE_AUTOMOC ON)
|
|||
include(GNUInstallDirs)
|
||||
|
||||
find_package(Qt6 REQUIRED COMPONENTS Gui Widgets OpenGL DBus
|
||||
Multimedia Svg)
|
||||
Multimedia Svg WaylandClient)
|
||||
# WindowBlur + XkbTracker use qpa/qplatformnativeinterface.h to reach
|
||||
# the wl_display / wl_surface / wl_seat for native compositor calls
|
||||
# (blur, layer-shell screen pinning, raw wl_keyboard listeners). The
|
||||
|
|
@ -43,7 +43,7 @@ find_package(Qt6 REQUIRED COMPONENTS Gui Widgets OpenGL DBus
|
|||
# CMake config (older Debian) and we fall back to hand-wiring the
|
||||
# include dir below.
|
||||
set(QT_NO_PRIVATE_MODULE_WARNING ON)
|
||||
find_package(Qt6 QUIET OPTIONAL_COMPONENTS GuiPrivate)
|
||||
find_package(Qt6 QUIET OPTIONAL_COMPONENTS GuiPrivate WaylandClientPrivate)
|
||||
|
||||
# LayerShellQt: the quick terminal is a wlr-layer-shell dropdown window.
|
||||
find_package(LayerShellQt REQUIRED)
|
||||
|
|
@ -53,6 +53,11 @@ find_package(LayerShellQt REQUIRED)
|
|||
# QPA native-handle accessors.
|
||||
find_package(PkgConfig REQUIRED)
|
||||
pkg_check_modules(WAYLAND_CLIENT REQUIRED IMPORTED_TARGET wayland-client)
|
||||
# libEGL is only needed by the OpenGL variant — `EglDmabufTarget`
|
||||
# uses EGL_MESA_image_dma_buf_export to export an FBO-backed
|
||||
# texture as a dmabuf. The Vulkan variant gets dmabufs straight
|
||||
# from `VK_KHR_external_memory_fd` and never calls into EGL, so
|
||||
# the EGL pkg-config + IMPORTED_TARGET is gated below.
|
||||
# libxkbcommon: derive the unshifted Unicode codepoint for a key event
|
||||
# from its XKB keycode, so libghostty's kitty encoder finds an entry for
|
||||
# punctuation keys (Qt's ev->key() reports the SHIFTED symbol, e.g.
|
||||
|
|
@ -60,22 +65,105 @@ pkg_check_modules(WAYLAND_CLIENT REQUIRED IMPORTED_TARGET wayland-client)
|
|||
pkg_check_modules(XKBCOMMON REQUIRED IMPORTED_TARGET xkbcommon)
|
||||
find_program(WAYLAND_SCANNER wayland-scanner REQUIRED)
|
||||
|
||||
# Generate client glue for the org_kde_kwin_blur protocol.
|
||||
set(BLUR_XML "${CMAKE_CURRENT_SOURCE_DIR}/protocols/blur.xml")
|
||||
set(BLUR_HEADER "${CMAKE_CURRENT_BINARY_DIR}/blur-client-protocol.h")
|
||||
set(BLUR_CODE "${CMAKE_CURRENT_BINARY_DIR}/blur-protocol.c")
|
||||
add_custom_command(OUTPUT "${BLUR_HEADER}"
|
||||
COMMAND "${WAYLAND_SCANNER}" client-header "${BLUR_XML}" "${BLUR_HEADER}"
|
||||
DEPENDS "${BLUR_XML}" VERBATIM)
|
||||
add_custom_command(OUTPUT "${BLUR_CODE}"
|
||||
COMMAND "${WAYLAND_SCANNER}" private-code "${BLUR_XML}" "${BLUR_CODE}"
|
||||
DEPENDS "${BLUR_XML}" VERBATIM)
|
||||
# `ghastty_wayland_protocol(<basename> <header_var> <code_var>)` —
|
||||
# Generates `<basename>-client-protocol.h` + `<basename>-protocol.c`
|
||||
# in `CMAKE_CURRENT_BINARY_DIR` from `protocols/<basename>.xml` via
|
||||
# `wayland-scanner`. Sets `<header_var>` and `<code_var>` in the
|
||||
# caller's scope to the generated paths so the caller can hand them
|
||||
# to `add_executable`'s source list.
|
||||
#
|
||||
# Each `add_custom_command` is independent — the `private-code`
|
||||
# output `#include`s the `client-header` output, but CMake creates
|
||||
# the dependency at target-source-list time when both files appear
|
||||
# in `add_executable`. Mirrors the pre-collapse pattern (two custom
|
||||
# commands per protocol) — only the boilerplate is gone.
|
||||
function(ghastty_wayland_protocol basename header_var code_var)
|
||||
set(xml "${CMAKE_CURRENT_SOURCE_DIR}/protocols/${basename}.xml")
|
||||
set(hdr "${CMAKE_CURRENT_BINARY_DIR}/${basename}-client-protocol.h")
|
||||
set(src "${CMAKE_CURRENT_BINARY_DIR}/${basename}-protocol.c")
|
||||
add_custom_command(OUTPUT "${hdr}"
|
||||
COMMAND "${WAYLAND_SCANNER}" client-header "${xml}" "${hdr}"
|
||||
DEPENDS "${xml}" VERBATIM)
|
||||
add_custom_command(OUTPUT "${src}"
|
||||
COMMAND "${WAYLAND_SCANNER}" private-code "${xml}" "${src}"
|
||||
DEPENDS "${xml}" VERBATIM)
|
||||
set("${header_var}" "${hdr}" PARENT_SCOPE)
|
||||
set("${code_var}" "${src}" PARENT_SCOPE)
|
||||
endfunction()
|
||||
|
||||
# Per-protocol notes:
|
||||
# - `blur` (`org_kde_kwin_blur`) — KWin background-blur.
|
||||
# - `linux-dmabuf-v1` — Vulkan present path:
|
||||
# wrap libghostty's dmabuf fd in a `wl_buffer` for the
|
||||
# wayland::SubsurfacePresenter's `wl_surface`.
|
||||
# - `viewporter` (`wp_viewporter`) — destination size in
|
||||
# surface-local coords; decouples the buffer's pixel dimensions
|
||||
# from how big the subsurface appears on screen (fractional
|
||||
# scaling).
|
||||
# - `fractional-scale-v1` (`wp_fractional_scale_v1`)
|
||||
# — compositor reports per-surface fractional scale (120ths).
|
||||
# Used as the authoritative scale for buffer sizing, avoiding
|
||||
# any sync lag with Qt's `devicePixelRatioF()` cache.
|
||||
ghastty_wayland_protocol(blur BLUR_HEADER BLUR_CODE)
|
||||
ghastty_wayland_protocol(linux-dmabuf-v1 DMABUF_HEADER DMABUF_CODE)
|
||||
ghastty_wayland_protocol(viewporter VIEWPORTER_HEADER VIEWPORTER_CODE)
|
||||
ghastty_wayland_protocol(fractional-scale-v1 FRACSCALE_HEADER FRACSCALE_CODE)
|
||||
# - `alpha-modifier-v1` (`wp_alpha_modifier_v1`)
|
||||
# — compositor-side per-surface alpha multiplier. QtWayland has no
|
||||
# built-in setWindowOpacity equivalent (the QPA plugin warns
|
||||
# "This plugin does not support setting window opacity" on every
|
||||
# call), so QuickTerminal's fade-in/out drives this protocol
|
||||
# directly. Supported on KWin, wlroots ≥0.17, Hyprland; NOT yet
|
||||
# on mutter/GNOME.
|
||||
ghastty_wayland_protocol(alpha-modifier-v1 ALPHAMOD_HEADER ALPHAMOD_CODE)
|
||||
|
||||
# libghostty is built out-of-tree by Zig.
|
||||
get_filename_component(GHOSTTY_ROOT "${CMAKE_CURRENT_SOURCE_DIR}/.." ABSOLUTE)
|
||||
set(GHOSTTY_LIB_DIR "${GHOSTTY_ROOT}/zig-out/lib")
|
||||
set(GHOSTTY_SO "${GHOSTTY_LIB_DIR}/ghostty-internal.so")
|
||||
|
||||
# Variant: which libghostty.so this build links against. The
|
||||
# rendering backend is baked into libghostty (Zig builds with
|
||||
# `-Drenderer=opengl` vs `-Drenderer=vulkan` produce ABI-compatible
|
||||
# but functionally distinct .so's), so the variant here is purely a
|
||||
# *compile-time selector*. The binary name and install layout do
|
||||
# NOT change — `${CMAKE_INSTALL_BINDIR}/ghastty` and
|
||||
# `${CMAKE_INSTALL_LIBDIR}/libghostty.so` for both. Developers who
|
||||
# want both flavors installed at once should use distinct prefixes
|
||||
# (`cmake --install --prefix /tmp/ghastty-vulkan`).
|
||||
#
|
||||
# Set via `cmake -DGHASTTY_VARIANT=vulkan -S qt -B qt/build-vulkan`.
|
||||
set(GHASTTY_VARIANT "opengl" CACHE STRING
|
||||
"Renderer variant: opengl (default) or vulkan")
|
||||
set_property(CACHE GHASTTY_VARIANT PROPERTY STRINGS opengl vulkan)
|
||||
# Validate the cache value: STRINGS only constrains the cmake-gui
|
||||
# dropdown, not the command-line. `-DGHASTTY_VARIANT=foo` would
|
||||
# otherwise silently fall into the OpenGL branch below.
|
||||
if(NOT GHASTTY_VARIANT STREQUAL "opengl" AND
|
||||
NOT GHASTTY_VARIANT STREQUAL "vulkan")
|
||||
message(FATAL_ERROR
|
||||
"GHASTTY_VARIANT='${GHASTTY_VARIANT}' is invalid; "
|
||||
"must be 'opengl' or 'vulkan'.")
|
||||
endif()
|
||||
message(STATUS "Building variant=${GHASTTY_VARIANT}")
|
||||
|
||||
# Compile-time renderer pick. Each binary is linked against exactly
|
||||
# one libghostty.so variant (opengl or vulkan), so the renderer
|
||||
# choice is inherent to the binary — no need for a runtime env var.
|
||||
# GhosttySurface.cpp branches on GHASTTY_USE_VULKAN to spin up the
|
||||
# Vulkan host vs the OpenGL context.
|
||||
if(GHASTTY_VARIANT STREQUAL "vulkan")
|
||||
add_compile_definitions(GHASTTY_USE_VULKAN)
|
||||
endif()
|
||||
|
||||
# libEGL: needed by `opengl/EglDmabufTarget.cpp` for the OpenGL
|
||||
# variant's zero-copy present path. Vulkan-variant binaries never
|
||||
# pull in this source file (gated below) so the loader doesn't have
|
||||
# to be installed for Vulkan-only systems.
|
||||
if(GHASTTY_VARIANT STREQUAL "opengl")
|
||||
pkg_check_modules(EGL REQUIRED IMPORTED_TARGET egl)
|
||||
endif()
|
||||
|
||||
if(NOT EXISTS "${GHOSTTY_SO}")
|
||||
message(FATAL_ERROR
|
||||
"libghostty not found at ${GHOSTTY_SO}\n"
|
||||
|
|
@ -119,12 +207,40 @@ add_executable(ghastty
|
|||
src/TabWidget.cpp
|
||||
src/undo/UndoStack.cpp
|
||||
src/Util.cpp
|
||||
src/wayland/AlphaModifier.cpp
|
||||
src/wayland/SubsurfacePresenter.cpp
|
||||
src/WindowBlur.cpp
|
||||
src/XkbTracker.cpp
|
||||
"${BLUR_CODE}"
|
||||
"${BLUR_HEADER}"
|
||||
"${DMABUF_CODE}"
|
||||
"${DMABUF_HEADER}"
|
||||
"${VIEWPORTER_CODE}"
|
||||
"${VIEWPORTER_HEADER}"
|
||||
"${FRACSCALE_CODE}"
|
||||
"${FRACSCALE_HEADER}"
|
||||
"${ALPHAMOD_CODE}"
|
||||
"${ALPHAMOD_HEADER}"
|
||||
)
|
||||
|
||||
# Vulkan host glue is variant-only. Adding it to the OpenGL build
|
||||
# would force an unconditional libvulkan link on a binary that
|
||||
# never calls into Vulkan, contradicting the side-by-side
|
||||
# `~/.local/lib/libghostty.so` story that the variant block above
|
||||
# documents.
|
||||
if(GHASTTY_VARIANT STREQUAL "vulkan")
|
||||
target_sources(ghastty PRIVATE src/vulkan/Host.cpp)
|
||||
endif()
|
||||
|
||||
# `opengl/EglDmabufTarget.cpp` is OpenGL-variant only. The Vulkan
|
||||
# variant exports dmabufs straight from VkDeviceMemory via
|
||||
# VK_KHR_external_memory_fd and never calls into EGL, so excluding
|
||||
# this source file from the Vulkan binary lets it stay free of
|
||||
# libEGL too.
|
||||
if(GHASTTY_VARIANT STREQUAL "opengl")
|
||||
target_sources(ghastty PRIVATE src/opengl/EglDmabufTarget.cpp)
|
||||
endif()
|
||||
|
||||
# Embed the app icon so it is available even running from the build tree.
|
||||
qt_add_resources(ghastty "appicon"
|
||||
PREFIX "/"
|
||||
|
|
@ -151,7 +267,32 @@ target_link_libraries(ghastty PRIVATE
|
|||
"${GHOSTTY_LINK_SO}"
|
||||
)
|
||||
|
||||
# libEGL is OpenGL-variant only — gated alongside the source file
|
||||
# in the variant block above. Vulkan-variant binaries don't pull
|
||||
# in libEGL at all.
|
||||
if(GHASTTY_VARIANT STREQUAL "opengl")
|
||||
target_link_libraries(ghastty PRIVATE PkgConfig::EGL)
|
||||
endif()
|
||||
|
||||
# libvulkan is Vulkan-variant only. The OpenGL variant compiles
|
||||
# nothing that references Vulkan symbols (vulkan/Host.cpp is gated
|
||||
# above), so not linking libvulkan keeps OpenGL-only systems from
|
||||
# needing the loader installed at runtime — matching the
|
||||
# documented side-by-side variant story above.
|
||||
if(GHASTTY_VARIANT STREQUAL "vulkan")
|
||||
target_link_libraries(ghastty PRIVATE vulkan)
|
||||
endif()
|
||||
|
||||
# Hook up the private QPA headers (see find_package above).
|
||||
#
|
||||
# Qt6::WaylandClientPrivate gives us QtWaylandClient::QWaylandWindow,
|
||||
# which we cast the QPA platform window to in GhosttySurface to call
|
||||
# `commit()` directly — that forces a parent wl_surface commit at the
|
||||
# moment our subsurface state is ready, instead of waiting on Qt's
|
||||
# backing-store flush which never fires for our translucent widget.
|
||||
if(TARGET Qt6::WaylandClientPrivate)
|
||||
target_link_libraries(ghastty PRIVATE Qt6::WaylandClientPrivate)
|
||||
endif()
|
||||
if(TARGET Qt6::GuiPrivate)
|
||||
target_link_libraries(ghastty PRIVATE Qt6::GuiPrivate)
|
||||
else()
|
||||
|
|
@ -187,6 +328,9 @@ endif()
|
|||
# actual zig-out artifact), and the .so's NEEDED entries also point
|
||||
# into zig-out/lib for transitive deps.
|
||||
# - Installed: libghostty.so lives next to the binary ($ORIGIN/../lib).
|
||||
# Same layout regardless of variant — the binary name doesn't change,
|
||||
# the .so path doesn't change. Side-by-side installs of two variants
|
||||
# need separate `--prefix`es.
|
||||
set_target_properties(ghastty PROPERTIES
|
||||
BUILD_RPATH "${GHOSTTY_LINK_DIR};${GHOSTTY_LIB_DIR}"
|
||||
INSTALL_RPATH "$ORIGIN/../${CMAKE_INSTALL_LIBDIR}"
|
||||
|
|
|
|||
|
|
@ -0,0 +1,118 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<protocol name="alpha_modifier_v1">
|
||||
<copyright>
|
||||
Copyright 2023 Xaver Hugl
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a
|
||||
copy of this software and associated documentation files (the "Software"),
|
||||
to deal in the Software without restriction, including without limitation
|
||||
the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
and/or sell copies of the Software, and to permit persons to whom the
|
||||
Software is furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice (including the next
|
||||
paragraph) shall be included in all copies or substantial portions of the
|
||||
Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
DEALINGS IN THE SOFTWARE.
|
||||
</copyright>
|
||||
|
||||
<description summary="surface alpha modifier">
|
||||
This interface allows a client to set a factor for the alpha values on a
|
||||
surface, which can be used to offload such operations to the compositor,
|
||||
which can in turn for example offload them to KMS.
|
||||
|
||||
Warning! The protocol described in this file is currently in the testing
|
||||
phase. Backward compatible changes may be added together with the
|
||||
corresponding interface version bump. Backward incompatible changes can
|
||||
only be done by creating a new major version of the extension.
|
||||
</description>
|
||||
|
||||
<interface name="wp_alpha_modifier_v1" version="1">
|
||||
<description summary="surface alpha modifier manager">
|
||||
This interface allows a client to set a factor for the alpha values on
|
||||
a surface, which can be used to offload such operations to the
|
||||
compositor. The default factor is UINT32_MAX.
|
||||
|
||||
This interface can be used to set an arbitrary alpha value for the
|
||||
surface, allowing it to be made fully transparent by setting the factor
|
||||
to 0, fully opaque by setting it to UINT32_MAX, or any value in
|
||||
between.
|
||||
|
||||
Warning! The protocol described in this file is currently in the
|
||||
testing phase. Backward compatible changes may be added together with
|
||||
the corresponding interface version bump. Backward incompatible changes
|
||||
can only be done by creating a new major version of the extension.
|
||||
</description>
|
||||
|
||||
<request name="destroy" type="destructor">
|
||||
<description summary="destroy the alpha modifier manager object">
|
||||
Destroy the alpha modifier manager. This doesn't destroy objects
|
||||
created with the manager.
|
||||
</description>
|
||||
</request>
|
||||
|
||||
<request name="get_surface">
|
||||
<description summary="create a new alpha modifier surface object">
|
||||
Create a new alpha modifier surface object associated with the given
|
||||
wl_surface. If there is already such an object associated with the
|
||||
wl_surface, the already_constructed error will be raised.
|
||||
</description>
|
||||
<arg name="id" type="new_id" interface="wp_alpha_modifier_surface_v1"/>
|
||||
<arg name="surface" type="object" interface="wl_surface"/>
|
||||
</request>
|
||||
|
||||
<enum name="error">
|
||||
<entry name="already_constructed" value="0"
|
||||
summary="wl_surface already has a alpha modifier object associated"/>
|
||||
</enum>
|
||||
</interface>
|
||||
|
||||
<interface name="wp_alpha_modifier_surface_v1" version="1">
|
||||
<description summary="modifier object for a surface">
|
||||
This interface allows the client to set a factor for the alpha values on
|
||||
a surface, which can be used to offload such operations to the
|
||||
compositor. Multiple alpha modifiers can be attached to the same
|
||||
surface, in which case the resulting alpha will be the product of all
|
||||
the multiplicative factors.
|
||||
|
||||
The default factor is UINT32_MAX.
|
||||
</description>
|
||||
|
||||
<request name="destroy" type="destructor">
|
||||
<description summary="remove the alpha modifier from the surface">
|
||||
This destroys the object, and is equivalent to set_multiplier with
|
||||
a value of UINT32_MAX, with the same double-buffered semantics as
|
||||
set_multiplier.
|
||||
</description>
|
||||
</request>
|
||||
|
||||
<request name="set_multiplier">
|
||||
<description summary="set the alpha multiplier">
|
||||
Sets the alpha multiplier for the surface. The alpha multiplier is
|
||||
double-buffered state, see wl_surface.commit for details.
|
||||
|
||||
The default factor is UINT32_MAX.
|
||||
|
||||
This factor is applied in the compositor's blending space, as an
|
||||
additional step after the processing of per-pixel alpha values for
|
||||
the surface. It allows to set an arbitrary alpha value for the
|
||||
surface, including making the surface partially transparent even when
|
||||
all the pixels are fully opaque, or fully transparent even when the
|
||||
pixels are not.
|
||||
</description>
|
||||
<arg name="factor" type="uint" summary="the new alpha multiplier for the surface"/>
|
||||
</request>
|
||||
|
||||
<enum name="error">
|
||||
<entry name="no_surface" value="0"
|
||||
summary="wl_surface was destroyed"/>
|
||||
</enum>
|
||||
</interface>
|
||||
</protocol>
|
||||
|
|
@ -0,0 +1,102 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<protocol name="fractional_scale_v1">
|
||||
<copyright>
|
||||
Copyright © 2022 Kenny Levinsen
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a
|
||||
copy of this software and associated documentation files (the "Software"),
|
||||
to deal in the Software without restriction, including without limitation
|
||||
the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
and/or sell copies of the Software, and to permit persons to whom the
|
||||
Software is furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice (including the next
|
||||
paragraph) shall be included in all copies or substantial portions of the
|
||||
Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
DEALINGS IN THE SOFTWARE.
|
||||
</copyright>
|
||||
|
||||
<description summary="Protocol for requesting fractional surface scales">
|
||||
This protocol allows a compositor to suggest for surfaces to render at
|
||||
fractional scales.
|
||||
|
||||
A client can submit scaled content by utilizing wp_viewport. This is done by
|
||||
creating a wp_viewport object for the surface and setting the destination
|
||||
rectangle to the surface size before the scale factor is applied.
|
||||
|
||||
The buffer size is calculated by multiplying the surface size by the
|
||||
intended scale.
|
||||
|
||||
The wl_surface buffer scale should remain set to 1.
|
||||
|
||||
If a surface has a surface-local size of 100 px by 50 px and wishes to
|
||||
submit buffers with a scale of 1.5, then a buffer of 150px by 75 px should
|
||||
be used and the wp_viewport destination rectangle should be 100 px by 50 px.
|
||||
|
||||
For toplevel surfaces, the size is rounded halfway away from zero. The
|
||||
rounding algorithm for subsurface position and size is not defined.
|
||||
</description>
|
||||
|
||||
<interface name="wp_fractional_scale_manager_v1" version="1">
|
||||
<description summary="fractional surface scale information">
|
||||
A global interface for requesting surfaces to use fractional scales.
|
||||
</description>
|
||||
|
||||
<request name="destroy" type="destructor">
|
||||
<description summary="unbind the fractional surface scale interface">
|
||||
Informs the server that the client will not be using this protocol
|
||||
object anymore. This does not affect any other objects,
|
||||
wp_fractional_scale_v1 objects included.
|
||||
</description>
|
||||
</request>
|
||||
|
||||
<enum name="error">
|
||||
<entry name="fractional_scale_exists" value="0"
|
||||
summary="the surface already has a fractional_scale object associated"/>
|
||||
</enum>
|
||||
|
||||
<request name="get_fractional_scale">
|
||||
<description summary="extend surface interface for scale information">
|
||||
Create an add-on object for the the wl_surface to let the compositor
|
||||
request fractional scales. If the given wl_surface already has a
|
||||
wp_fractional_scale_v1 object associated, the fractional_scale_exists
|
||||
protocol error is raised.
|
||||
</description>
|
||||
<arg name="id" type="new_id" interface="wp_fractional_scale_v1"
|
||||
summary="the new surface scale info interface id"/>
|
||||
<arg name="surface" type="object" interface="wl_surface"
|
||||
summary="the surface"/>
|
||||
</request>
|
||||
</interface>
|
||||
|
||||
<interface name="wp_fractional_scale_v1" version="1">
|
||||
<description summary="fractional scale interface to a wl_surface">
|
||||
An additional interface to a wl_surface object which allows the compositor
|
||||
to inform the client of the preferred scale.
|
||||
</description>
|
||||
|
||||
<request name="destroy" type="destructor">
|
||||
<description summary="remove surface scale information for surface">
|
||||
Destroy the fractional scale object. When this object is destroyed,
|
||||
preferred_scale events will no longer be sent.
|
||||
</description>
|
||||
</request>
|
||||
|
||||
<event name="preferred_scale">
|
||||
<description summary="notify of new preferred scale">
|
||||
Notification of a new preferred scale for this surface that the
|
||||
compositor suggests that the client should use.
|
||||
|
||||
The sent scale is the numerator of a fraction with a denominator of 120.
|
||||
</description>
|
||||
<arg name="scale" type="uint" summary="the new preferred scale"/>
|
||||
</event>
|
||||
</interface>
|
||||
</protocol>
|
||||
|
|
@ -0,0 +1,585 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<protocol name="linux_dmabuf_v1">
|
||||
|
||||
<copyright>
|
||||
Copyright © 2014, 2015 Collabora, Ltd.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a
|
||||
copy of this software and associated documentation files (the "Software"),
|
||||
to deal in the Software without restriction, including without limitation
|
||||
the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
and/or sell copies of the Software, and to permit persons to whom the
|
||||
Software is furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice (including the next
|
||||
paragraph) shall be included in all copies or substantial portions of the
|
||||
Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
DEALINGS IN THE SOFTWARE.
|
||||
</copyright>
|
||||
|
||||
<interface name="zwp_linux_dmabuf_v1" version="5">
|
||||
<description summary="factory for creating dmabuf-based wl_buffers">
|
||||
This interface offers ways to create generic dmabuf-based wl_buffers.
|
||||
|
||||
For more information about dmabuf, see:
|
||||
https://www.kernel.org/doc/html/next/userspace-api/dma-buf-alloc-exchange.html
|
||||
|
||||
Clients can use the get_surface_feedback request to get dmabuf feedback
|
||||
for a particular surface. If the client wants to retrieve feedback not
|
||||
tied to a surface, they can use the get_default_feedback request.
|
||||
|
||||
The following are required from clients:
|
||||
|
||||
- Clients must ensure that either all data in the dma-buf is
|
||||
coherent for all subsequent read access or that coherency is
|
||||
correctly handled by the underlying kernel-side dma-buf
|
||||
implementation.
|
||||
|
||||
- Don't make any more attachments after sending the buffer to the
|
||||
compositor. Making more attachments later increases the risk of
|
||||
the compositor not being able to use (re-import) an existing
|
||||
dmabuf-based wl_buffer.
|
||||
|
||||
The underlying graphics stack must ensure the following:
|
||||
|
||||
- The dmabuf file descriptors relayed to the server will stay valid
|
||||
for the whole lifetime of the wl_buffer. This means the server may
|
||||
at any time use those fds to import the dmabuf into any kernel
|
||||
sub-system that might accept it.
|
||||
|
||||
However, when the underlying graphics stack fails to deliver the
|
||||
promise, because of e.g. a device hot-unplug which raises internal
|
||||
errors, after the wl_buffer has been successfully created the
|
||||
compositor must not raise protocol errors to the client when dmabuf
|
||||
import later fails.
|
||||
|
||||
To create a wl_buffer from one or more dmabufs, a client creates a
|
||||
zwp_linux_dmabuf_params_v1 object with a zwp_linux_dmabuf_v1.create_params
|
||||
request. All planes required by the intended format are added with
|
||||
the 'add' request. Finally, a 'create' or 'create_immed' request is
|
||||
issued, which has the following outcome depending on the import success.
|
||||
|
||||
The 'create' request,
|
||||
- on success, triggers a 'created' event which provides the final
|
||||
wl_buffer to the client.
|
||||
- on failure, triggers a 'failed' event to convey that the server
|
||||
cannot use the dmabufs received from the client.
|
||||
|
||||
For the 'create_immed' request,
|
||||
- on success, the server immediately imports the added dmabufs to
|
||||
create a wl_buffer. No event is sent from the server in this case.
|
||||
- on failure, the server can choose to either:
|
||||
- terminate the client by raising a fatal error.
|
||||
- mark the wl_buffer as failed, and send a 'failed' event to the
|
||||
client. If the client uses a failed wl_buffer as an argument to any
|
||||
request, the behaviour is compositor implementation-defined.
|
||||
|
||||
For all DRM formats and unless specified in another protocol extension,
|
||||
pre-multiplied alpha is used for pixel values.
|
||||
|
||||
Unless specified otherwise in another protocol extension, implicit
|
||||
synchronization is used. In other words, compositors and clients must
|
||||
wait and signal fences implicitly passed via the DMA-BUF's reservation
|
||||
mechanism.
|
||||
</description>
|
||||
|
||||
<request name="destroy" type="destructor">
|
||||
<description summary="unbind the factory">
|
||||
Objects created through this interface, especially wl_buffers, will
|
||||
remain valid.
|
||||
</description>
|
||||
</request>
|
||||
|
||||
<request name="create_params">
|
||||
<description summary="create a temporary object for buffer parameters">
|
||||
This temporary object is used to collect multiple dmabuf handles into
|
||||
a single batch to create a wl_buffer. It can only be used once and
|
||||
should be destroyed after a 'created' or 'failed' event has been
|
||||
received.
|
||||
</description>
|
||||
<arg name="params_id" type="new_id" interface="zwp_linux_buffer_params_v1"
|
||||
summary="the new temporary"/>
|
||||
</request>
|
||||
|
||||
<event name="format" deprecated-since="4">
|
||||
<description summary="supported buffer format">
|
||||
This event advertises one buffer format that the server supports.
|
||||
All the supported formats are advertised once when the client
|
||||
binds to this interface. A roundtrip after binding guarantees
|
||||
that the client has received all supported formats.
|
||||
|
||||
For the definition of the format codes, see the
|
||||
zwp_linux_buffer_params_v1::create request.
|
||||
|
||||
Starting version 4, the format event is deprecated and must not be
|
||||
sent by compositors. Instead, use get_default_feedback or
|
||||
get_surface_feedback.
|
||||
</description>
|
||||
<arg name="format" type="uint" summary="DRM_FORMAT code"/>
|
||||
</event>
|
||||
|
||||
<event name="modifier" since="3" deprecated-since="4">
|
||||
<description summary="supported buffer format modifier">
|
||||
This event advertises the formats that the server supports, along with
|
||||
the modifiers supported for each format. All the supported modifiers
|
||||
for all the supported formats are advertised once when the client
|
||||
binds to this interface. A roundtrip after binding guarantees that
|
||||
the client has received all supported format-modifier pairs.
|
||||
|
||||
For legacy support, DRM_FORMAT_MOD_INVALID (that is, modifier_hi ==
|
||||
0x00ffffff and modifier_lo == 0xffffffff) is allowed in this event.
|
||||
It indicates that the server can support the format with an implicit
|
||||
modifier. When a plane has DRM_FORMAT_MOD_INVALID as its modifier, it
|
||||
is as if no explicit modifier is specified. The effective modifier
|
||||
will be derived from the dmabuf.
|
||||
|
||||
A compositor that sends valid modifiers and DRM_FORMAT_MOD_INVALID for
|
||||
a given format supports both explicit modifiers and implicit modifiers.
|
||||
|
||||
For the definition of the format and modifier codes, see the
|
||||
zwp_linux_buffer_params_v1::create and zwp_linux_buffer_params_v1::add
|
||||
requests.
|
||||
|
||||
Starting version 4, the modifier event is deprecated and must not be
|
||||
sent by compositors. Instead, use get_default_feedback or
|
||||
get_surface_feedback.
|
||||
</description>
|
||||
<arg name="format" type="uint" summary="DRM_FORMAT code"/>
|
||||
<arg name="modifier_hi" type="uint"
|
||||
summary="high 32 bits of layout modifier"/>
|
||||
<arg name="modifier_lo" type="uint"
|
||||
summary="low 32 bits of layout modifier"/>
|
||||
</event>
|
||||
|
||||
<!-- Version 4 additions -->
|
||||
|
||||
<request name="get_default_feedback" since="4">
|
||||
<description summary="get default feedback">
|
||||
This request creates a new wp_linux_dmabuf_feedback object not bound
|
||||
to a particular surface. This object will deliver feedback about dmabuf
|
||||
parameters to use if the client doesn't support per-surface feedback
|
||||
(see get_surface_feedback).
|
||||
</description>
|
||||
<arg name="id" type="new_id" interface="zwp_linux_dmabuf_feedback_v1"/>
|
||||
</request>
|
||||
|
||||
<request name="get_surface_feedback" since="4">
|
||||
<description summary="get feedback for a surface">
|
||||
This request creates a new wp_linux_dmabuf_feedback object for the
|
||||
specified wl_surface. This object will deliver feedback about dmabuf
|
||||
parameters to use for buffers attached to this surface.
|
||||
|
||||
If the surface is destroyed before the wp_linux_dmabuf_feedback object,
|
||||
the feedback object becomes inert.
|
||||
</description>
|
||||
<arg name="id" type="new_id" interface="zwp_linux_dmabuf_feedback_v1"/>
|
||||
<arg name="surface" type="object" interface="wl_surface"/>
|
||||
</request>
|
||||
</interface>
|
||||
|
||||
<interface name="zwp_linux_buffer_params_v1" version="5">
|
||||
<description summary="parameters for creating a dmabuf-based wl_buffer">
|
||||
This temporary object is a collection of dmabufs and other
|
||||
parameters that together form a single logical buffer. The temporary
|
||||
object may eventually create one wl_buffer unless cancelled by
|
||||
destroying it before requesting 'create'.
|
||||
|
||||
Single-planar formats only require one dmabuf, however
|
||||
multi-planar formats may require more than one dmabuf. For all
|
||||
formats, an 'add' request must be called once per plane (even if the
|
||||
underlying dmabuf fd is identical).
|
||||
|
||||
You must use consecutive plane indices ('plane_idx' argument for 'add')
|
||||
from zero to the number of planes used by the drm_fourcc format code.
|
||||
All planes required by the format must be given exactly once, but can
|
||||
be given in any order. Each plane index can only be set once; subsequent
|
||||
calls with a plane index which has already been set will result in a
|
||||
plane_set error being generated.
|
||||
</description>
|
||||
|
||||
<enum name="error">
|
||||
<entry name="already_used" value="0"
|
||||
summary="the dmabuf_batch object has already been used to create a wl_buffer"/>
|
||||
<entry name="plane_idx" value="1"
|
||||
summary="plane index out of bounds"/>
|
||||
<entry name="plane_set" value="2"
|
||||
summary="the plane index was already set"/>
|
||||
<entry name="incomplete" value="3"
|
||||
summary="missing or too many planes to create a buffer"/>
|
||||
<entry name="invalid_format" value="4"
|
||||
summary="format not supported"/>
|
||||
<entry name="invalid_dimensions" value="5"
|
||||
summary="invalid width or height"/>
|
||||
<entry name="out_of_bounds" value="6"
|
||||
summary="offset + stride * height goes out of dmabuf bounds"/>
|
||||
<entry name="invalid_wl_buffer" value="7"
|
||||
summary="invalid wl_buffer resulted from importing dmabufs via
|
||||
the create_immed request on given buffer_params"/>
|
||||
</enum>
|
||||
|
||||
<request name="destroy" type="destructor">
|
||||
<description summary="delete this object, used or not">
|
||||
Cleans up the temporary data sent to the server for dmabuf-based
|
||||
wl_buffer creation.
|
||||
</description>
|
||||
</request>
|
||||
|
||||
<request name="add">
|
||||
<description summary="add a dmabuf to the temporary set">
|
||||
This request adds one dmabuf to the set in this
|
||||
zwp_linux_buffer_params_v1.
|
||||
|
||||
The 64-bit unsigned value combined from modifier_hi and modifier_lo
|
||||
is the dmabuf layout modifier. DRM AddFB2 ioctl calls this the
|
||||
fb modifier, which is defined in drm_mode.h of Linux UAPI.
|
||||
This is an opaque token. Drivers use this token to express tiling,
|
||||
compression, etc. driver-specific modifications to the base format
|
||||
defined by the DRM fourcc code.
|
||||
|
||||
Starting from version 4, the invalid_format protocol error is sent if
|
||||
the format + modifier pair was not advertised as supported.
|
||||
|
||||
Starting from version 5, the invalid_format protocol error is sent if
|
||||
all planes don't use the same modifier.
|
||||
|
||||
This request raises the PLANE_IDX error if plane_idx is too large.
|
||||
The error PLANE_SET is raised if attempting to set a plane that
|
||||
was already set.
|
||||
</description>
|
||||
<arg name="fd" type="fd" summary="dmabuf fd"/>
|
||||
<arg name="plane_idx" type="uint" summary="plane index"/>
|
||||
<arg name="offset" type="uint" summary="offset in bytes"/>
|
||||
<arg name="stride" type="uint" summary="stride in bytes"/>
|
||||
<arg name="modifier_hi" type="uint"
|
||||
summary="high 32 bits of layout modifier"/>
|
||||
<arg name="modifier_lo" type="uint"
|
||||
summary="low 32 bits of layout modifier"/>
|
||||
</request>
|
||||
|
||||
<enum name="flags" bitfield="true">
|
||||
<entry name="y_invert" value="1" summary="contents are y-inverted"/>
|
||||
<entry name="interlaced" value="2" summary="content is interlaced"/>
|
||||
<entry name="bottom_first" value="4" summary="bottom field first"/>
|
||||
</enum>
|
||||
|
||||
<request name="create">
|
||||
<description summary="create a wl_buffer from the given dmabufs">
|
||||
This asks for creation of a wl_buffer from the added dmabuf
|
||||
buffers. The wl_buffer is not created immediately but returned via
|
||||
the 'created' event if the dmabuf sharing succeeds. The sharing
|
||||
may fail at runtime for reasons a client cannot predict, in
|
||||
which case the 'failed' event is triggered.
|
||||
|
||||
The 'format' argument is a DRM_FORMAT code, as defined by the
|
||||
libdrm's drm_fourcc.h. The Linux kernel's DRM sub-system is the
|
||||
authoritative source on how the format codes should work.
|
||||
|
||||
The 'flags' is a bitfield of the flags defined in enum "flags".
|
||||
'y_invert' means the that the image needs to be y-flipped.
|
||||
|
||||
Flag 'interlaced' means that the frame in the buffer is not
|
||||
progressive as usual, but interlaced. An interlaced buffer as
|
||||
supported here must always contain both top and bottom fields.
|
||||
The top field always begins on the first pixel row. The temporal
|
||||
ordering between the two fields is top field first, unless
|
||||
'bottom_first' is specified. It is undefined whether 'bottom_first'
|
||||
is ignored if 'interlaced' is not set.
|
||||
|
||||
This protocol does not convey any information about field rate,
|
||||
duration, or timing, other than the relative ordering between the
|
||||
two fields in one buffer. A compositor may have to estimate the
|
||||
intended field rate from the incoming buffer rate. It is undefined
|
||||
whether the time of receiving wl_surface.commit with a new buffer
|
||||
attached, applying the wl_surface state, wl_surface.frame callback
|
||||
trigger, presentation, or any other point in the compositor cycle
|
||||
is used to measure the frame or field times. There is no support
|
||||
for detecting missed or late frames/fields/buffers either, and
|
||||
there is no support whatsoever for cooperating with interlaced
|
||||
compositor output.
|
||||
|
||||
The composited image quality resulting from the use of interlaced
|
||||
buffers is explicitly undefined. A compositor may use elaborate
|
||||
hardware features or software to deinterlace and create progressive
|
||||
output frames from a sequence of interlaced input buffers, or it
|
||||
may produce substandard image quality. However, compositors that
|
||||
cannot guarantee reasonable image quality in all cases are recommended
|
||||
to just reject all interlaced buffers.
|
||||
|
||||
Any argument errors, including non-positive width or height,
|
||||
mismatch between the number of planes and the format, bad
|
||||
format, bad offset or stride, may be indicated by fatal protocol
|
||||
errors: INCOMPLETE, INVALID_FORMAT, INVALID_DIMENSIONS,
|
||||
OUT_OF_BOUNDS.
|
||||
|
||||
Dmabuf import errors in the server that are not obvious client
|
||||
bugs are returned via the 'failed' event as non-fatal. This
|
||||
allows attempting dmabuf sharing and falling back in the client
|
||||
if it fails.
|
||||
|
||||
This request can be sent only once in the object's lifetime, after
|
||||
which the only legal request is destroy. This object should be
|
||||
destroyed after issuing a 'create' request. Attempting to use this
|
||||
object after issuing 'create' raises ALREADY_USED protocol error.
|
||||
|
||||
It is not mandatory to issue 'create'. If a client wants to
|
||||
cancel the buffer creation, it can just destroy this object.
|
||||
</description>
|
||||
<arg name="width" type="int" summary="base plane width in pixels"/>
|
||||
<arg name="height" type="int" summary="base plane height in pixels"/>
|
||||
<arg name="format" type="uint" summary="DRM_FORMAT code"/>
|
||||
<arg name="flags" type="uint" enum="flags" summary="see enum flags"/>
|
||||
</request>
|
||||
|
||||
<event name="created">
|
||||
<description summary="buffer creation succeeded">
|
||||
This event indicates that the attempted buffer creation was
|
||||
successful. It provides the new wl_buffer referencing the dmabuf(s).
|
||||
|
||||
Upon receiving this event, the client should destroy the
|
||||
zwp_linux_buffer_params_v1 object.
|
||||
</description>
|
||||
<arg name="buffer" type="new_id" interface="wl_buffer"
|
||||
summary="the newly created wl_buffer"/>
|
||||
</event>
|
||||
|
||||
<event name="failed">
|
||||
<description summary="buffer creation failed">
|
||||
This event indicates that the attempted buffer creation has
|
||||
failed. It usually means that one of the dmabuf constraints
|
||||
has not been fulfilled.
|
||||
|
||||
Upon receiving this event, the client should destroy the
|
||||
zwp_linux_buffer_params_v1 object.
|
||||
</description>
|
||||
</event>
|
||||
|
||||
<request name="create_immed" since="2">
|
||||
<description summary="immediately create a wl_buffer from the given
|
||||
dmabufs">
|
||||
This asks for immediate creation of a wl_buffer by importing the
|
||||
added dmabufs.
|
||||
|
||||
In case of import success, no event is sent from the server, and the
|
||||
wl_buffer is ready to be used by the client.
|
||||
|
||||
Upon import failure, either of the following may happen, as seen fit
|
||||
by the implementation:
|
||||
- the client is terminated with one of the following fatal protocol
|
||||
errors:
|
||||
- INCOMPLETE, INVALID_FORMAT, INVALID_DIMENSIONS, OUT_OF_BOUNDS,
|
||||
in case of argument errors such as mismatch between the number
|
||||
of planes and the format, bad format, non-positive width or
|
||||
height, or bad offset or stride.
|
||||
- INVALID_WL_BUFFER, in case the cause for failure is unknown or
|
||||
platform specific.
|
||||
- the server creates an invalid wl_buffer, marks it as failed and
|
||||
sends a 'failed' event to the client. The result of using this
|
||||
invalid wl_buffer as an argument in any request by the client is
|
||||
defined by the compositor implementation.
|
||||
|
||||
This takes the same arguments as a 'create' request, and obeys the
|
||||
same restrictions.
|
||||
</description>
|
||||
<arg name="buffer_id" type="new_id" interface="wl_buffer"
|
||||
summary="id for the newly created wl_buffer"/>
|
||||
<arg name="width" type="int" summary="base plane width in pixels"/>
|
||||
<arg name="height" type="int" summary="base plane height in pixels"/>
|
||||
<arg name="format" type="uint" summary="DRM_FORMAT code"/>
|
||||
<arg name="flags" type="uint" enum="flags" summary="see enum flags"/>
|
||||
</request>
|
||||
</interface>
|
||||
|
||||
<interface name="zwp_linux_dmabuf_feedback_v1" version="5">
|
||||
<description summary="dmabuf feedback">
|
||||
This object advertises dmabuf parameters feedback. This includes the
|
||||
preferred devices and the supported formats/modifiers.
|
||||
|
||||
The parameters are sent once when this object is created and whenever they
|
||||
change. The done event is always sent once after all parameters have been
|
||||
sent. When a single parameter changes, all parameters are re-sent by the
|
||||
compositor.
|
||||
|
||||
Compositors can re-send the parameters when the current client buffer
|
||||
allocations are sub-optimal. Compositors should not re-send the
|
||||
parameters if re-allocating the buffers would not result in a more optimal
|
||||
configuration. In particular, compositors should avoid sending the exact
|
||||
same parameters multiple times in a row.
|
||||
|
||||
The tranche_target_device and tranche_formats events are grouped by
|
||||
tranches of preference. For each tranche, a tranche_target_device, one
|
||||
tranche_flags and one or more tranche_formats events are sent, followed
|
||||
by a tranche_done event finishing the list. The tranches are sent in
|
||||
descending order of preference. All formats and modifiers in the same
|
||||
tranche have the same preference.
|
||||
|
||||
To send parameters, the compositor sends one main_device event, tranches
|
||||
(each consisting of one tranche_target_device event, one tranche_flags
|
||||
event, tranche_formats events and then a tranche_done event), then one
|
||||
done event.
|
||||
</description>
|
||||
|
||||
<request name="destroy" type="destructor">
|
||||
<description summary="destroy the feedback object">
|
||||
Using this request a client can tell the server that it is not going to
|
||||
use the wp_linux_dmabuf_feedback object anymore.
|
||||
</description>
|
||||
</request>
|
||||
|
||||
<event name="done">
|
||||
<description summary="all feedback has been sent">
|
||||
This event is sent after all parameters of a wp_linux_dmabuf_feedback
|
||||
object have been sent.
|
||||
|
||||
This allows changes to the wp_linux_dmabuf_feedback parameters to be
|
||||
seen as atomic, even if they happen via multiple events.
|
||||
</description>
|
||||
</event>
|
||||
|
||||
<event name="format_table">
|
||||
<description summary="format and modifier table">
|
||||
This event provides a file descriptor which can be memory-mapped to
|
||||
access the format and modifier table.
|
||||
|
||||
The table contains a tightly packed array of consecutive format +
|
||||
modifier pairs. Each pair is 16 bytes wide. It contains a format as a
|
||||
32-bit unsigned integer, followed by 4 bytes of unused padding, and a
|
||||
modifier as a 64-bit unsigned integer. The native endianness is used.
|
||||
|
||||
The client must map the file descriptor in read-only private mode.
|
||||
|
||||
Compositors are not allowed to mutate the table file contents once this
|
||||
event has been sent. Instead, compositors must create a new, separate
|
||||
table file and re-send feedback parameters. Compositors are allowed to
|
||||
store duplicate format + modifier pairs in the table.
|
||||
</description>
|
||||
<arg name="fd" type="fd" summary="table file descriptor"/>
|
||||
<arg name="size" type="uint" summary="table size, in bytes"/>
|
||||
</event>
|
||||
|
||||
<event name="main_device">
|
||||
<description summary="preferred main device">
|
||||
This event advertises the main device that the server prefers to use
|
||||
when direct scan-out to the target device isn't possible. The
|
||||
advertised main device may be different for each
|
||||
wp_linux_dmabuf_feedback object, and may change over time.
|
||||
|
||||
There is exactly one main device. The compositor must send at least
|
||||
one preference tranche with tranche_target_device equal to main_device.
|
||||
|
||||
Clients need to create buffers that the main device can import and
|
||||
read from, otherwise creating the dmabuf wl_buffer will fail (see the
|
||||
wp_linux_buffer_params.create and create_immed requests for details).
|
||||
The main device will also likely be kept active by the compositor,
|
||||
so clients can use it instead of waking up another device for power
|
||||
savings.
|
||||
|
||||
In general the device is a DRM node. The DRM node type (primary vs.
|
||||
render) is unspecified. Clients must not rely on the compositor sending
|
||||
a particular node type. Clients cannot check two devices for equality
|
||||
by comparing the dev_t value.
|
||||
|
||||
If explicit modifiers are not supported and the client performs buffer
|
||||
allocations on a different device than the main device, then the client
|
||||
must force the buffer to have a linear layout.
|
||||
</description>
|
||||
<arg name="device" type="array" summary="device dev_t value"/>
|
||||
</event>
|
||||
|
||||
<event name="tranche_done">
|
||||
<description summary="a preference tranche has been sent">
|
||||
This event splits tranche_target_device and tranche_formats events in
|
||||
preference tranches. It is sent after a set of tranche_target_device
|
||||
and tranche_formats events; it represents the end of a tranche. The
|
||||
next tranche will have a lower preference.
|
||||
</description>
|
||||
</event>
|
||||
|
||||
<event name="tranche_target_device">
|
||||
<description summary="target device">
|
||||
This event advertises the target device that the server prefers to use
|
||||
for a buffer created given this tranche. The advertised target device
|
||||
may be different for each preference tranche, and may change over time.
|
||||
|
||||
There is exactly one target device per tranche.
|
||||
|
||||
The target device may be a scan-out device, for example if the
|
||||
compositor prefers to directly scan-out a buffer created given this
|
||||
tranche. The target device may be a rendering device, for example if
|
||||
the compositor prefers to texture from said buffer.
|
||||
|
||||
The client can use this hint to allocate the buffer in a way that makes
|
||||
it accessible from the target device, ideally directly. The buffer must
|
||||
still be accessible from the main device, either through direct import
|
||||
or through a potentially more expensive fallback path. If the buffer
|
||||
can't be directly imported from the main device then clients must be
|
||||
prepared for the compositor changing the tranche priority or making
|
||||
wl_buffer creation fail (see the wp_linux_buffer_params.create and
|
||||
create_immed requests for details).
|
||||
|
||||
If the device is a DRM node, the DRM node type (primary vs. render) is
|
||||
unspecified. Clients must not rely on the compositor sending a
|
||||
particular node type. Clients cannot check two devices for equality by
|
||||
comparing the dev_t value.
|
||||
|
||||
This event is tied to a preference tranche, see the tranche_done event.
|
||||
</description>
|
||||
<arg name="device" type="array" summary="device dev_t value"/>
|
||||
</event>
|
||||
|
||||
<event name="tranche_formats">
|
||||
<description summary="supported buffer format modifier">
|
||||
This event advertises the format + modifier combinations that the
|
||||
compositor supports.
|
||||
|
||||
It carries an array of indices, each referring to a format + modifier
|
||||
pair in the last received format table (see the format_table event).
|
||||
Each index is a 16-bit unsigned integer in native endianness.
|
||||
|
||||
For legacy support, DRM_FORMAT_MOD_INVALID is an allowed modifier.
|
||||
It indicates that the server can support the format with an implicit
|
||||
modifier. When a buffer has DRM_FORMAT_MOD_INVALID as its modifier, it
|
||||
is as if no explicit modifier is specified. The effective modifier
|
||||
will be derived from the dmabuf.
|
||||
|
||||
A compositor that sends valid modifiers and DRM_FORMAT_MOD_INVALID for
|
||||
a given format supports both explicit modifiers and implicit modifiers.
|
||||
|
||||
Compositors must not send duplicate format + modifier pairs within the
|
||||
same tranche or across two different tranches with the same target
|
||||
device and flags.
|
||||
|
||||
This event is tied to a preference tranche, see the tranche_done event.
|
||||
|
||||
For the definition of the format and modifier codes, see the
|
||||
wp_linux_buffer_params.create request.
|
||||
</description>
|
||||
<arg name="indices" type="array" summary="array of 16-bit indexes"/>
|
||||
</event>
|
||||
|
||||
<enum name="tranche_flags" bitfield="true">
|
||||
<entry name="scanout" value="1" summary="direct scan-out tranche"/>
|
||||
</enum>
|
||||
|
||||
<event name="tranche_flags">
|
||||
<description summary="tranche flags">
|
||||
This event sets tranche-specific flags.
|
||||
|
||||
The scanout flag is a hint that direct scan-out may be attempted by the
|
||||
compositor on the target device if the client appropriately allocates a
|
||||
buffer. How to allocate a buffer that can be scanned out on the target
|
||||
device is implementation-defined.
|
||||
|
||||
This event is tied to a preference tranche, see the tranche_done event.
|
||||
</description>
|
||||
<arg name="flags" type="uint" enum="tranche_flags" summary="tranche flags"/>
|
||||
</event>
|
||||
</interface>
|
||||
|
||||
</protocol>
|
||||
|
|
@ -0,0 +1,177 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<protocol name="viewporter">
|
||||
|
||||
<copyright>
|
||||
Copyright © 2013-2016 Collabora, Ltd.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a
|
||||
copy of this software and associated documentation files (the "Software"),
|
||||
to deal in the Software without restriction, including without limitation
|
||||
the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
and/or sell copies of the Software, and to permit persons to whom the
|
||||
Software is furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice (including the next
|
||||
paragraph) shall be included in all copies or substantial portions of the
|
||||
Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
DEALINGS IN THE SOFTWARE.
|
||||
</copyright>
|
||||
|
||||
<interface name="wp_viewporter" version="1">
|
||||
<description summary="surface cropping and scaling">
|
||||
The global interface exposing surface cropping and scaling
|
||||
capabilities is used to instantiate an interface extension for a
|
||||
wl_surface object. This extended interface will then allow
|
||||
cropping and scaling the surface contents, effectively
|
||||
disconnecting the direct relationship between the buffer and the
|
||||
surface size.
|
||||
</description>
|
||||
|
||||
<request name="destroy" type="destructor">
|
||||
<description summary="unbind from the cropping and scaling interface">
|
||||
Informs the server that the client will not be using this
|
||||
protocol object anymore. This does not affect any other objects,
|
||||
wp_viewport objects included.
|
||||
</description>
|
||||
</request>
|
||||
|
||||
<enum name="error">
|
||||
<entry name="viewport_exists" value="0"
|
||||
summary="the surface already has a viewport object associated"/>
|
||||
</enum>
|
||||
|
||||
<request name="get_viewport">
|
||||
<description summary="extend surface interface for crop and scale">
|
||||
Instantiate an interface extension for the given wl_surface to
|
||||
crop and scale its content. If the given wl_surface already has
|
||||
a wp_viewport object associated, the viewport_exists
|
||||
protocol error is raised.
|
||||
</description>
|
||||
<arg name="id" type="new_id" interface="wp_viewport"
|
||||
summary="the new viewport interface id"/>
|
||||
<arg name="surface" type="object" interface="wl_surface"
|
||||
summary="the surface"/>
|
||||
</request>
|
||||
</interface>
|
||||
|
||||
<interface name="wp_viewport" version="1">
|
||||
<description summary="crop and scale interface to a wl_surface">
|
||||
An additional interface to a wl_surface object, which allows the
|
||||
client to specify the cropping and scaling of the surface
|
||||
contents.
|
||||
|
||||
This interface works with two concepts: the source rectangle (src_x,
|
||||
src_y, src_width, src_height), and the destination size (dst_width,
|
||||
dst_height). The contents of the source rectangle are scaled to the
|
||||
destination size, and content outside the source rectangle is ignored.
|
||||
This state is double-buffered, see wl_surface.commit.
|
||||
|
||||
The two parts of crop and scale state are independent: the source
|
||||
rectangle, and the destination size. Initially both are unset, that
|
||||
is, no scaling is applied. The whole of the current wl_buffer is
|
||||
used as the source, and the surface size is as defined in
|
||||
wl_surface.attach.
|
||||
|
||||
If the destination size is set, it causes the surface size to become
|
||||
dst_width, dst_height. The source (rectangle) is scaled to exactly
|
||||
this size. This overrides whatever the attached wl_buffer size is,
|
||||
unless the wl_buffer is NULL. If the wl_buffer is NULL, the surface
|
||||
has no content and therefore no size. Otherwise, the size is always
|
||||
at least 1x1 in surface local coordinates.
|
||||
|
||||
If the source rectangle is set, it defines what area of the wl_buffer is
|
||||
taken as the source. If the source rectangle is set and the destination
|
||||
size is not set, then src_width and src_height must be integers, and the
|
||||
surface size becomes the source rectangle size. This results in cropping
|
||||
without scaling. If src_width or src_height are not integers and
|
||||
destination size is not set, the bad_size protocol error is raised when
|
||||
the surface state is applied.
|
||||
|
||||
The coordinate transformations from buffer pixel coordinates up to
|
||||
the surface-local coordinates happen in the following order:
|
||||
1. buffer_transform (wl_surface.set_buffer_transform)
|
||||
2. buffer_scale (wl_surface.set_buffer_scale)
|
||||
3. crop and scale (wp_viewport.set*)
|
||||
This means, that the source rectangle coordinates of crop and scale
|
||||
are given in the coordinates after the buffer transform and scale,
|
||||
i.e. in the coordinates that would be the surface-local coordinates
|
||||
if the crop and scale was not applied.
|
||||
|
||||
If src_x or src_y are negative, the bad_value protocol error is raised.
|
||||
Otherwise, if the source rectangle is partially or completely outside of
|
||||
the non-NULL wl_buffer, then the out_of_buffer protocol error is raised
|
||||
when the surface state is applied. A NULL wl_buffer does not raise the
|
||||
out_of_buffer error.
|
||||
|
||||
If the wl_surface associated with the wp_viewport is destroyed,
|
||||
all wp_viewport requests except 'destroy' raise the protocol error
|
||||
no_surface.
|
||||
|
||||
If the wp_viewport object is destroyed, the crop and scale
|
||||
state is removed from the wl_surface. The change will be applied
|
||||
on the next wl_surface.commit.
|
||||
</description>
|
||||
|
||||
<request name="destroy" type="destructor">
|
||||
<description summary="remove scaling and cropping from the surface">
|
||||
The associated wl_surface's crop and scale state is removed.
|
||||
The change is applied on the next wl_surface.commit.
|
||||
</description>
|
||||
</request>
|
||||
|
||||
<enum name="error">
|
||||
<entry name="bad_value" value="0"
|
||||
summary="negative or zero values in width or height"/>
|
||||
<entry name="bad_size" value="1"
|
||||
summary="destination size is not integer"/>
|
||||
<entry name="out_of_buffer" value="2"
|
||||
summary="source rectangle extends outside of the content area"/>
|
||||
<entry name="no_surface" value="3"
|
||||
summary="the wl_surface was destroyed"/>
|
||||
</enum>
|
||||
|
||||
<request name="set_source">
|
||||
<description summary="set the source rectangle for cropping">
|
||||
Set the source rectangle of the associated wl_surface. See
|
||||
wp_viewport for the description, and relation to the wl_buffer
|
||||
size.
|
||||
|
||||
If all of x, y, width and height are -1.0, the source rectangle is
|
||||
unset instead. Any other set of values where width or height are zero
|
||||
or negative, or x or y are negative, raise the bad_value protocol
|
||||
error.
|
||||
|
||||
The crop and scale state is double-buffered, see wl_surface.commit.
|
||||
</description>
|
||||
<arg name="x" type="fixed" summary="source rectangle x"/>
|
||||
<arg name="y" type="fixed" summary="source rectangle y"/>
|
||||
<arg name="width" type="fixed" summary="source rectangle width"/>
|
||||
<arg name="height" type="fixed" summary="source rectangle height"/>
|
||||
</request>
|
||||
|
||||
<request name="set_destination">
|
||||
<description summary="set the surface size for scaling">
|
||||
Set the destination size of the associated wl_surface. See
|
||||
wp_viewport for the description, and relation to the wl_buffer
|
||||
size.
|
||||
|
||||
If width is -1 and height is -1, the destination size is unset
|
||||
instead. Any other pair of values for width and height that
|
||||
contains zero or negative values raises the bad_value protocol
|
||||
error.
|
||||
|
||||
The crop and scale state is double-buffered, see wl_surface.commit.
|
||||
</description>
|
||||
<arg name="width" type="int" summary="surface width"/>
|
||||
<arg name="height" type="int" summary="surface height"/>
|
||||
</request>
|
||||
</interface>
|
||||
|
||||
</protocol>
|
||||
File diff suppressed because it is too large
Load Diff
|
|
@ -1,14 +1,30 @@
|
|||
#pragma once
|
||||
|
||||
#include <atomic>
|
||||
#include <condition_variable>
|
||||
#include <cstdint>
|
||||
#include <memory>
|
||||
#include <mutex>
|
||||
|
||||
#include <QImage>
|
||||
#include <QMutex>
|
||||
#include <QPointer>
|
||||
#include <QString>
|
||||
#include <QStringList>
|
||||
#include <QTimer>
|
||||
#include <QWidget>
|
||||
|
||||
#include "ghostty.h"
|
||||
#include "vulkan/Host.h"
|
||||
|
||||
namespace wayland {
|
||||
class SubsurfacePresenter;
|
||||
}
|
||||
#ifndef GHASTTY_USE_VULKAN
|
||||
namespace opengl {
|
||||
class EglDmabufTarget;
|
||||
}
|
||||
#endif
|
||||
|
||||
class MainWindow;
|
||||
class QContextMenuEvent;
|
||||
|
|
@ -31,15 +47,27 @@ class OverlayScrollbar;
|
|||
|
||||
// One Ghostty terminal pane.
|
||||
//
|
||||
// libghostty's OpenGL renderer draws the terminal into an offscreen
|
||||
// framebuffer owned by a private QOpenGLContext (there is no on-screen
|
||||
// GL surface). Each frame is read back into a QImage and painted with
|
||||
// QPainter. That keeps this an ordinary translucent QWidget, so it
|
||||
// embeds in the QTabWidget / QSplitter tree and its transparent
|
||||
// background composites to the desktop exactly like the rest of the
|
||||
// widget chrome — avoiding QOpenGLWidget (composites opaque on Wayland)
|
||||
// and an embedded QOpenGLWindow (does not present when embedded).
|
||||
class GhosttySurface : public QWidget {
|
||||
// Terminal pixels reach the screen via a wl_subsurface attached to
|
||||
// the top-level QWindow's wl_surface (see wayland::SubsurfacePresenter).
|
||||
// libghostty's renderer (Vulkan or OpenGL, picked at compile time
|
||||
// via GHASTTY_USE_VULKAN) hands us a dmabuf fd per frame; we wrap
|
||||
// it in a wl_buffer via zwp_linux_dmabuf_v1 and the compositor
|
||||
// scans it out directly — no readback, no QPainter blit for the
|
||||
// terminal area. Each pane in a split is a sibling subsurface
|
||||
// under the same top-level wl_surface, positioned at its offset
|
||||
// within the top-level via setPosition.
|
||||
//
|
||||
// This QWidget itself keeps WA_TranslucentBackground so the
|
||||
// terminal area of the parent surface is transparent (the
|
||||
// subsurface below shows through) and chrome (SearchBar,
|
||||
// overlays, scrollbar) painted in paintEvent stays visible on top.
|
||||
//
|
||||
// Legacy fallback: if the compositor lacks the required Wayland
|
||||
// globals (linux-dmabuf-v1, viewporter, subcompositor) or the
|
||||
// renderer reports image_backed=false (NVIDIA Vulkan's
|
||||
// legacy_copy path on this branch), the frame goes through a
|
||||
// mmap+memcpy+QImage+QPainter::drawImage path instead.
|
||||
class GhosttySurface : public QWidget, public vulkan::PresentSink {
|
||||
Q_OBJECT
|
||||
|
||||
public:
|
||||
|
|
@ -143,10 +171,67 @@ public:
|
|||
void setPwd(const QString &pwd);
|
||||
const QString &pwd() const { return m_pwd; }
|
||||
|
||||
// Apprt-side entry point for the Vulkan `present` callback. Fires
|
||||
// on the renderer thread. Parks the dmabuf descriptor under
|
||||
// `m_pendingMutex` (plus, for the legacy fallback path, an
|
||||
// mmap+memcpy'd QImage) and wakes the GUI thread via
|
||||
// `QMetaObject::invokeMethod(this, drainVulkan, Qt::QueuedConnection)`.
|
||||
// The GUI thread either commits the dmabuf to the wl_subsurface
|
||||
// (zero-copy) or paints the QImage (fallback). The dropped-frame
|
||||
// counter `m_droppedFrames` makes any genuine queue-loss visible
|
||||
// (zero in the steady state).
|
||||
void presentVulkanDmabuf(
|
||||
int dmabuf_fd,
|
||||
quint32 drm_format,
|
||||
quint64 drm_modifier,
|
||||
quint32 width,
|
||||
quint32 height,
|
||||
quint32 stride,
|
||||
bool image_backed);
|
||||
|
||||
// `vulkan::PresentSink` override. Thin forward to
|
||||
// `presentVulkanDmabuf` so the existing implementation (and its
|
||||
// doc comment above) stays where it is. Called by `vulkan::Host`'s
|
||||
// present-callback trampoline on the libghostty renderer thread.
|
||||
void presentDmabuf(int dmabuf_fd, std::uint32_t drm_format,
|
||||
std::uint64_t drm_modifier, std::uint32_t width,
|
||||
std::uint32_t height, std::uint32_t stride,
|
||||
bool image_backed) override {
|
||||
presentVulkanDmabuf(dmabuf_fd, drm_format, drm_modifier, width,
|
||||
height, stride, image_backed);
|
||||
}
|
||||
|
||||
// GUI-thread drain step: hands the most recent pending frame
|
||||
// either to the SubsurfacePresenter (zero-copy path) or the
|
||||
// QImage paint pipeline (fallback). Idempotent: returns
|
||||
// immediately if nothing's pending. Invoked from the polling
|
||||
// safety net AND from queued invocations triggered by the
|
||||
// renderer thread.
|
||||
Q_INVOKABLE void drainVulkan();
|
||||
|
||||
// Compositor frame-callback handler. Fires (on the GUI thread,
|
||||
// via Wayland event-queue dispatch) when the compositor signals
|
||||
// it's ready to display our next commit. Clears the in-flight
|
||||
// flag and re-pumps drainVulkan to consume any frame the renderer
|
||||
// parked while we were waiting. Q_INVOKABLE so it can also be
|
||||
// posted via QMetaObject::invokeMethod from a queued context.
|
||||
Q_INVOKABLE void onWaylandFrameReady();
|
||||
|
||||
// Force a wl_surface.commit on our parent native window via the
|
||||
// QtWaylandClient::QWaylandWindow private API. The wl_subsurface
|
||||
// is in sync mode, so child state changes only apply when the
|
||||
// parent commits — but Qt's backing-store flush doesn't fire for
|
||||
// a translucent QWidget with no paint damage. Calling this after
|
||||
// every child commit ensures the cached child state actually
|
||||
// reaches the compositor. Returns false on non-Wayland QPA or if
|
||||
// the cast fails (no Qt private headers available).
|
||||
bool forceParentCommit();
|
||||
|
||||
protected:
|
||||
bool event(QEvent *) override;
|
||||
void paintEvent(QPaintEvent *) override;
|
||||
void resizeEvent(QResizeEvent *) override;
|
||||
void moveEvent(QMoveEvent *) override;
|
||||
|
||||
// Disable Qt's Tab/Backtab focus traversal so those keys reach
|
||||
// keyPressEvent and can be forwarded to the terminal.
|
||||
|
|
@ -207,19 +292,125 @@ private:
|
|||
ghostty_surface_t m_parentSurface; // inherited-config source; may be null
|
||||
ghostty_surface_t m_surface = nullptr;
|
||||
|
||||
// Private offscreen GL context libghostty renders into.
|
||||
// Private offscreen GL context libghostty renders into. Null for
|
||||
// the Vulkan-backed renderer (libghostty hands frames back via a
|
||||
// dmabuf fd to the apprt's `present` callback — no GL involved).
|
||||
QOpenGLContext *m_context = nullptr;
|
||||
QOffscreenSurface *m_offscreen = nullptr;
|
||||
QOpenGLFramebufferObject *m_fbo = nullptr;
|
||||
#ifndef GHASTTY_USE_VULKAN
|
||||
// Dmabuf-exporting GL target (zero-copy path). Set when the EGL
|
||||
// display advertises EGL_MESA_image_dma_buf_export and the
|
||||
// wl_subsurface presenter is up; the renderer draws into this
|
||||
// texture-backed framebuffer and we attach its fd straight to the
|
||||
// subsurface — no glReadPixels, no QImage, no QPainter blit.
|
||||
// Stays null when EGL support is missing or the subsurface failed
|
||||
// to bring up, and the legacy m_fbo path runs as fallback.
|
||||
//
|
||||
// Vulkan-variant builds export dmabufs directly from
|
||||
// VkDeviceMemory via VK_KHR_external_memory_fd and never touch
|
||||
// EGL, so the field (and the entire EglDmabufTarget translation
|
||||
// unit) is excluded from those binaries — matching the libEGL
|
||||
// gating in qt/CMakeLists.txt.
|
||||
std::unique_ptr<opengl::EglDmabufTarget> m_eglTarget;
|
||||
#endif
|
||||
QImage m_image; // last frame, read back from m_fbo
|
||||
|
||||
// True when this surface is using the Vulkan platform. The
|
||||
// paintEvent uses this to draw a visible placeholder when no
|
||||
// dmabuf has been imported yet; once
|
||||
// `presentVulkanDmabuf` has filled `m_image` the placeholder
|
||||
// gives way to the actual rendered content.
|
||||
bool m_useVulkan = false;
|
||||
|
||||
// Cross-thread frame handoff for the Vulkan path. The renderer
|
||||
// thread calls `presentVulkanDmabuf` with a borrowed dmabuf fd
|
||||
// and posts a queued `drainVulkan` invocation; the GUI thread
|
||||
// runs `drainVulkan` and routes the parked descriptor through
|
||||
// either the wl_subsurface presenter (zero-copy) or the
|
||||
// mmap+memcpy+QImage fallback. The dropped-frame counter
|
||||
// (`m_droppedFrames`) surfaces any queue-loss that ever happens
|
||||
// in practice — the earlier safety-net polling timer was
|
||||
// removed once delivery was shown to be reliable.
|
||||
//
|
||||
// `m_useSubsurface` is set once on the GUI thread when the
|
||||
// presenter comes up; the renderer thread reads it acquire-style
|
||||
// to decide which path to populate per frame.
|
||||
std::atomic<bool> m_useSubsurface{false};
|
||||
// Subsurface (zero-copy) path: renderer thread parks the
|
||||
// borrowed-fd descriptor here; GUI-thread timer hands it to the
|
||||
// presenter.
|
||||
struct PendingDmabuf {
|
||||
int fd = -1;
|
||||
quint32 drm_format = 0;
|
||||
quint64 drm_modifier = 0;
|
||||
quint32 width = 0;
|
||||
quint32 height = 0;
|
||||
quint32 stride = 0;
|
||||
};
|
||||
PendingDmabuf m_pendingDmabuf;
|
||||
// Compositor-paced present gate. Now BACKPRESSURES THE RENDERER
|
||||
// THREAD: presentVulkanDmabuf blocks (with a 100 ms safety
|
||||
// timeout) until the compositor signals ready, so the renderer
|
||||
// produces frames at the compositor's refresh rate instead of
|
||||
// its own 125 FPS draw timer. Saves the GPU work + renderer-
|
||||
// thread CPU that the prior GUI-side-drop model was paying for
|
||||
// every wasted frame.
|
||||
//
|
||||
// State machine:
|
||||
// - Initial: ready=true (first present goes through).
|
||||
// - Renderer present: wait_for(ready || hidden); claim
|
||||
// ready=false; park dmabuf; post drain.
|
||||
// - GUI drain: consume + commit + register wl_surface.frame.
|
||||
// - Compositor frame_done → onWaylandFrameReady: ready=true,
|
||||
// notify CV. Renderer's next present unblocks immediately.
|
||||
// - Hide / PlatformSurface destroy: ready=true, notify_all to
|
||||
// unblock any in-flight renderer wait (predicate also checks
|
||||
// m_hidden so the renderer bails without parking).
|
||||
std::mutex m_compositorMutex;
|
||||
std::condition_variable m_compositorCv;
|
||||
bool m_compositorReady = true;
|
||||
// True once drainVulkan has successfully attached a dmabuf
|
||||
// whose dimensions match the widget's current device-pixel
|
||||
// size. paintEvent reads this to decide whether to fill the
|
||||
// terminal area with the configured background color (hides
|
||||
// the otherwise-transparent flash on new-tab open) or with
|
||||
// Qt::transparent (lets the subsurface buffer show through).
|
||||
// Reset to false on Hide and on PlatformSurface destroy so
|
||||
// the next Show re-paints the placeholder until a real frame
|
||||
// is attached.
|
||||
std::atomic<bool> m_subsurfaceHasFrame{false};
|
||||
// Dedupes queued drainVulkan invocations posted from the renderer
|
||||
// thread. Each renderer-thread `presentVulkanDmabuf` used to post
|
||||
// a QueuedConnection invokeMethod unconditionally — at 125 FPS
|
||||
// that's 125 Qt-event-queue allocations + dispatches per second,
|
||||
// most of which no-op now that the compositor gate may not yet
|
||||
// be ready. CAS to true to claim the slot; drainVulkan resets to
|
||||
// false before consuming so a follow-up renderer frame can
|
||||
// schedule its own drain. The pending-dmabuf "latest wins"
|
||||
// semantic guarantees the renderer's newest frame is what
|
||||
// drainVulkan sees regardless of how many parks happened between.
|
||||
std::atomic<bool> m_drainScheduled{false};
|
||||
// Legacy (mmap+memcpy) path: kept as a fallback when the
|
||||
// presenter isn't available (e.g. compositor missing
|
||||
// linux-dmabuf-v1). When the subsurface path is active this stays
|
||||
// null and paintEvent skips its blit.
|
||||
QImage m_pending;
|
||||
QMutex m_pendingMutex;
|
||||
|
||||
// GL objects for the alpha-premultiply pass.
|
||||
QOpenGLShaderProgram *m_premultProg = nullptr;
|
||||
QOpenGLVertexArrayObject *m_premultVao = nullptr;
|
||||
|
||||
int m_fbw = 0; // framebuffer size, device pixels
|
||||
int m_fbh = 0;
|
||||
double m_fbDpr = 1.0; // DPR the framebuffer was sized at
|
||||
// DPR the framebuffer was sized at. Atomic because the renderer
|
||||
// thread reads it from `presentVulkanDmabuf` to tag the legacy
|
||||
// QImage path while the GUI thread writes it from
|
||||
// `syncSurfaceSize`. `double` writes aren't guaranteed atomic
|
||||
// across threads on every architecture; std::atomic<double> uses
|
||||
// CAS-loop fallbacks where needed.
|
||||
std::atomic<double> m_fbDpr{1.0}; // DPR the framebuffer was sized at
|
||||
|
||||
QLabel *m_exitOverlay = nullptr; // "process exited" banner; lazily made
|
||||
QLabel *m_keySeqOverlay = nullptr; // pending keybind chord; lazily made
|
||||
|
|
@ -268,4 +459,46 @@ private:
|
|||
// first PWD notification (libghostty fires one at spawn from the
|
||||
// inherited config, then on every cwd change).
|
||||
QString m_pwd;
|
||||
|
||||
// Wayland subsurface for the GPU-direct present path. Lazily
|
||||
// created on first `QEvent::Show` once the top-level QWindow
|
||||
// exists; null if the compositor lacks the required globals
|
||||
// (linux-dmabuf-v1, viewporter, subcompositor), in which case
|
||||
// the legacy mmap+memcpy+QImage+QPainter path renders pixels.
|
||||
std::unique_ptr<wayland::SubsurfacePresenter> m_subsurfacePresenter;
|
||||
// Per-surface latch for the first-dmabuf log breadcrumb so each
|
||||
// pane / split prints its own line on first frame. Atomic because
|
||||
// the renderer thread is what hits `presentVulkanDmabuf` and the
|
||||
// first-frame check would otherwise race a sibling renderer
|
||||
// thread on the same widget — relaxed CAS means at most one log
|
||||
// line per surface, even under concurrent first frames.
|
||||
std::atomic<bool> m_loggedFirstFrame{false};
|
||||
|
||||
// Count of frames overwritten in `m_pendingDmabuf` before the GUI
|
||||
// thread drained them. Each overwrite is a missed compositor
|
||||
// present — fd lifetime is unaffected (libghostty owns the
|
||||
// dmabuf), but a sustained nonzero rate means the GUI thread is
|
||||
// falling behind the renderer. Logged sparsely from
|
||||
// `presentVulkanDmabuf`.
|
||||
std::atomic<std::uint64_t> m_droppedFrames{0};
|
||||
// Set true on QEvent::Hide, false on QEvent::Show. Guards the
|
||||
// present path against a race where libghostty's renderer thread
|
||||
// fires one more frame after we've detached the subsurface
|
||||
// buffer on Hide — without this gate, that stray frame re-
|
||||
// attaches a buffer and the now-inactive tab ghosts on top of
|
||||
// whatever tab the user just switched to. `std::atomic` because
|
||||
// the renderer thread reads it in `presentVulkanDmabuf` /
|
||||
// `drainVulkan` while the GUI thread writes from event().
|
||||
std::atomic<bool> m_hidden{false};
|
||||
|
||||
// Cache of the result of `dynamic_cast<QtWaylandClient::QWaylandWindow*>`
|
||||
// for the top-level QWindow's QPA handle, used by
|
||||
// `forceParentCommit`. The cast is non-trivial and the function
|
||||
// is on the present hot path (called per Vulkan frame, per GL
|
||||
// frame, per moveEvent, on Hide, etc.). Resolved on first
|
||||
// successful call; invalidated whenever the platform-surface
|
||||
// QWindow handle is recreated (PlatformSurfaceAboutToBeDestroyed
|
||||
// event). Stored as void* so the header doesn't have to include
|
||||
// any Qt private QPA headers; the .cpp casts back at use sites.
|
||||
void *m_cachedWaylandWindow = nullptr;
|
||||
};
|
||||
|
|
|
|||
|
|
@ -65,9 +65,17 @@ XkbTracker::XkbTracker() {
|
|||
if (m_keyboard == nullptr)
|
||||
wl_display_roundtrip_queue(display, queue);
|
||||
|
||||
// The keyboard proxy is hot — move it onto the default queue so
|
||||
// Qt's event loop dispatches our listeners alongside Qt's own
|
||||
// input events.
|
||||
// The keyboard + seat proxies are long-lived — move them onto the
|
||||
// default queue so Qt's event loop dispatches our listeners
|
||||
// alongside Qt's own input events, AND so they don't dangle on
|
||||
// the about-to-be-destroyed private queue. Failing to migrate the
|
||||
// seat caused a SIGSEGV at process exit: libwayland warned
|
||||
// ("queue X destroyed while proxies still attached: wl_seat#NN")
|
||||
// and then later seat events / display teardown dereferenced the
|
||||
// dead queue.
|
||||
if (m_seat) {
|
||||
wl_proxy_set_queue(reinterpret_cast<wl_proxy *>(m_seat), nullptr);
|
||||
}
|
||||
if (m_keyboard) {
|
||||
wl_proxy_set_queue(reinterpret_cast<wl_proxy *>(m_keyboard), nullptr);
|
||||
}
|
||||
|
|
@ -78,6 +86,7 @@ XkbTracker::~XkbTracker() {
|
|||
// Process-wide singleton; OS reclaims at exit. Explicit teardown
|
||||
// keeps leak checkers quiet and documents ownership.
|
||||
if (m_keyboard) wl_keyboard_destroy(m_keyboard);
|
||||
if (m_seat) wl_seat_destroy(m_seat);
|
||||
if (m_state) xkb_state_unref(m_state);
|
||||
if (m_keymap) xkb_keymap_unref(m_keymap);
|
||||
if (m_ctx) xkb_context_unref(m_ctx);
|
||||
|
|
@ -108,6 +117,12 @@ void XkbTracker::onRegistryGlobal(void *data, wl_registry *registry,
|
|||
auto *seat = static_cast<wl_seat *>(
|
||||
wl_registry_bind(registry, name, &wl_seat_interface, 5));
|
||||
if (!seat) return;
|
||||
// Stash the seat on the tracker so it outlives this callback and
|
||||
// its private-queue registry. wl_seat is a long-lived proxy: we
|
||||
// keep the listener alive for the full process lifetime so future
|
||||
// capability changes (keyboard hot-plug, layout change) flow into
|
||||
// onSeatCapabilities and we can re-bind the wl_keyboard.
|
||||
self->m_seat = seat;
|
||||
// Subscribe to capability changes; we'll grab the keyboard from
|
||||
// the capability callback once the seat tells us it has one.
|
||||
wl_seat_add_listener(seat, &kSeatListener, self);
|
||||
|
|
|
|||
|
|
@ -94,6 +94,12 @@ class XkbTracker {
|
|||
// a keymap is loaded.
|
||||
uint32_t m_idxCapsLock = ~0u;
|
||||
uint32_t m_idxNumLock = ~0u;
|
||||
// wl_seat handle, owned by us via wl_registry_bind. Kept alive for
|
||||
// the singleton's lifetime so capability changes (keyboard
|
||||
// hot-plug, layout switch) keep flowing to onSeatCapabilities, and
|
||||
// so the proxy isn't dangling on the private registry queue we
|
||||
// destroy at the end of the ctor.
|
||||
struct wl_seat *m_seat = nullptr;
|
||||
// wl_keyboard handle, owned by us via wl_seat_get_keyboard.
|
||||
struct wl_keyboard *m_keyboard = nullptr;
|
||||
};
|
||||
|
|
|
|||
|
|
@ -251,7 +251,7 @@ bool handleSystem(const Context &ctx, const ghostty_action_s &action) {
|
|||
// abnormal threshold (default 250ms). Banner = "the process
|
||||
// died unexpectedly," not "the process exited."
|
||||
uint32_t threshold = 250;
|
||||
config::get(&threshold, "abnormal-command-exit-runtime");
|
||||
(void)config::get(&threshold, "abnormal-command-exit-runtime");
|
||||
if (ce.runtime_ms < threshold) return true;
|
||||
const int code = static_cast<int>(ce.exit_code);
|
||||
post(src, [srcp, code]() {
|
||||
|
|
|
|||
|
|
@ -1,4 +1,14 @@
|
|||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
|
||||
// (The atexit hook to ghastty_glslang_finalize_process that used
|
||||
// to live here was removed: now that build-time SPV precompile
|
||||
// is in place, the runtime libghostty no longer calls the glslang
|
||||
// shim at all for built-ins, so the shim's symbols get DCE'd out
|
||||
// of libghostty.so. The cosmetic FinalizeProcess+popAll cleanup
|
||||
// also didn't reduce heaptrack's reported leak in practice, so
|
||||
// the call wasn't pulling its weight anyway.)
|
||||
|
||||
#include <QApplication>
|
||||
#include <QCoreApplication>
|
||||
|
|
@ -22,7 +32,51 @@ static bool isCliActionInvocation(int argc, char **argv) {
|
|||
return false;
|
||||
}
|
||||
|
||||
// Default-disable MangoHud for this process. The Vulkan implicit
|
||||
// layer hooks every vkQueueSubmit / vkAcquireNextImage / etc. to
|
||||
// render its own overlay, which on this branch's animated-shader
|
||||
// + multi-pane workload added ~25% extra main-thread CPU at idle
|
||||
// (measured against a baseline of ~10% for the Wayland-buffer
|
||||
// cache path). For a terminal, that's a steep tax on a feature
|
||||
// users typically associate with games. A system-wide MANGOHUD=1
|
||||
// (common in `~/.profile` for users who want the HUD on games) is
|
||||
// explicitly OVERRIDDEN here — the user is invoking ghastty, not
|
||||
// a game, and we don't want them to silently pay 25% extra CPU.
|
||||
//
|
||||
// Two layers of MangoHud's loading model:
|
||||
// - VK_LOADER_LAYERS_DISABLE: Vulkan loader skips the layer
|
||||
// entirely (no interception overhead).
|
||||
// - DISABLE_MANGOHUD: belt-and-suspenders if the loader didn't
|
||||
// honor the env var (older loaders) or another runtime force-
|
||||
// loaded the layer through a different path.
|
||||
//
|
||||
// Escape hatch: GHASTTY_ALLOW_OVERLAY=1 skips the guard entirely
|
||||
// so a user who genuinely wants MangoHud on the terminal (e.g.
|
||||
// debugging the renderer with the HUD's frame-time graph) can
|
||||
// opt back in without removing the layer JSON system-wide.
|
||||
//
|
||||
// setenv overwrite=1 throughout: the whole point is to override a
|
||||
// pre-existing MANGOHUD=1 / DISABLE_MANGOHUD=0 / etc.
|
||||
static void defaultDisableMangoHud() {
|
||||
if (const char *opt = ::getenv("GHASTTY_ALLOW_OVERLAY");
|
||||
opt && opt[0] == '1') return;
|
||||
::setenv("MANGOHUD", "0", 1);
|
||||
::setenv("DISABLE_MANGOHUD", "1", 1);
|
||||
::setenv("VK_LOADER_LAYERS_DISABLE", "*MANGOHUD*", 1);
|
||||
}
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
// Set the env BEFORE Qt's QApplication ctor (which can probe
|
||||
// GL/Vulkan via QPA) and before the CLI action path (since
|
||||
// libghostty action handlers may also touch the renderer).
|
||||
defaultDisableMangoHud();
|
||||
|
||||
// (Build-time SPV precompile means the runtime libghostty no
|
||||
// longer invokes glslang for built-in shaders, so the per-
|
||||
// thread TPoolAllocator pages we used to leak from first-
|
||||
// surface init don't exist on the Vulkan variant anymore. No
|
||||
// atexit cleanup needed.)
|
||||
|
||||
// CLI action fast path: skip Qt entirely. ghostty_init parses argv
|
||||
// for the `+action`; ghostty_cli_try_action runs it and exits the
|
||||
// process. If something fails (unknown action, multiple actions),
|
||||
|
|
@ -104,6 +158,15 @@ int main(int argc, char **argv) {
|
|||
return 1;
|
||||
}
|
||||
|
||||
// The Vulkan host is intentionally NOT bootstrapped here: doing it
|
||||
// before any window is mapped on Wayland can interact badly with
|
||||
// Qt's Wayland integration (the VkInstance starts grabbing display
|
||||
// resources before Qt has finished its own connection setup, and
|
||||
// on some compositor + driver combos the result is a process that
|
||||
// runs but never actually displays a window). It's brought up
|
||||
// lazily on the first surface that needs it — see
|
||||
// `GhosttySurface.cpp`.
|
||||
|
||||
// initial-window: when false, start headless (no window mapped at
|
||||
// launch). Combined with quit-after-last-window-closed=false this
|
||||
// is how a user runs ghastty as a daemon for the global quick-
|
||||
|
|
|
|||
|
|
@ -0,0 +1,275 @@
|
|||
#include "EglDmabufTarget.h"
|
||||
|
||||
#include <cstdio>
|
||||
#include <cstring>
|
||||
#include <unistd.h>
|
||||
|
||||
#include <QOpenGLContext>
|
||||
#include <QOpenGLFunctions>
|
||||
|
||||
#include <EGL/egl.h>
|
||||
#include <EGL/eglext.h>
|
||||
|
||||
namespace opengl {
|
||||
|
||||
namespace {
|
||||
|
||||
// EGL_MESA_image_dma_buf_export entry points (loaded once per
|
||||
// process). Resolved via `eglGetProcAddress`, which returns null if
|
||||
// the extension isn't present.
|
||||
using PFNeglExportDMABUFImageQueryMESA =
|
||||
EGLBoolean (*)(EGLDisplay dpy, EGLImageKHR image, int *fourcc,
|
||||
int *num_planes, EGLuint64KHR *modifiers);
|
||||
using PFNeglExportDMABUFImageMESA =
|
||||
EGLBoolean (*)(EGLDisplay dpy, EGLImageKHR image, int *fds,
|
||||
EGLint *strides, EGLint *offsets);
|
||||
|
||||
struct EglFns {
|
||||
PFNEGLCREATEIMAGEKHRPROC createImage = nullptr;
|
||||
PFNEGLDESTROYIMAGEKHRPROC destroyImage = nullptr;
|
||||
PFNeglExportDMABUFImageQueryMESA queryExport = nullptr;
|
||||
PFNeglExportDMABUFImageMESA exportImage = nullptr;
|
||||
bool resolved = false;
|
||||
bool available = false;
|
||||
};
|
||||
|
||||
EglFns &eglFns() {
|
||||
static EglFns f;
|
||||
return f;
|
||||
}
|
||||
|
||||
bool ensureEglFns(EGLDisplay display) {
|
||||
EglFns &f = eglFns();
|
||||
if (f.resolved) return f.available;
|
||||
f.resolved = true;
|
||||
|
||||
const char *exts = eglQueryString(display, EGL_EXTENSIONS);
|
||||
if (!exts) return false;
|
||||
auto hasExt = [exts](const char *name) {
|
||||
const std::size_t n = std::strlen(name);
|
||||
const char *p = exts;
|
||||
while ((p = std::strstr(p, name)) != nullptr) {
|
||||
if ((p == exts || p[-1] == ' ') && (p[n] == '\0' || p[n] == ' '))
|
||||
return true;
|
||||
p += n;
|
||||
}
|
||||
return false;
|
||||
};
|
||||
if (!hasExt("EGL_KHR_image_base") ||
|
||||
!hasExt("EGL_MESA_image_dma_buf_export")) {
|
||||
std::fprintf(stderr,
|
||||
"[ghastty] EglDmabufTarget: EGL display lacks "
|
||||
"EGL_KHR_image_base or EGL_MESA_image_dma_buf_export\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
f.createImage = reinterpret_cast<PFNEGLCREATEIMAGEKHRPROC>(
|
||||
eglGetProcAddress("eglCreateImageKHR"));
|
||||
f.destroyImage = reinterpret_cast<PFNEGLDESTROYIMAGEKHRPROC>(
|
||||
eglGetProcAddress("eglDestroyImageKHR"));
|
||||
f.queryExport = reinterpret_cast<PFNeglExportDMABUFImageQueryMESA>(
|
||||
eglGetProcAddress("eglExportDMABUFImageQueryMESA"));
|
||||
f.exportImage = reinterpret_cast<PFNeglExportDMABUFImageMESA>(
|
||||
eglGetProcAddress("eglExportDMABUFImageMESA"));
|
||||
if (!f.createImage || !f.destroyImage || !f.queryExport ||
|
||||
!f.exportImage) {
|
||||
std::fprintf(stderr,
|
||||
"[ghastty] EglDmabufTarget: eglGetProcAddress returned "
|
||||
"null for required entry points\n");
|
||||
return false;
|
||||
}
|
||||
f.available = true;
|
||||
return true;
|
||||
}
|
||||
|
||||
EGLDisplay currentEglDisplay() {
|
||||
return eglGetCurrentDisplay();
|
||||
}
|
||||
|
||||
// GL constants come from <QOpenGLFunctions> indirectly via the Qt
|
||||
// GL headers — GL_TEXTURE_2D / GL_RGBA8 / GL_FRAMEBUFFER etc. are
|
||||
// in scope without further includes.
|
||||
|
||||
} // namespace
|
||||
|
||||
bool EglDmabufTarget::available(QOpenGLContext *ctx) {
|
||||
if (!ctx) return false;
|
||||
if (!ctx->isValid()) return false;
|
||||
EGLDisplay dpy = currentEglDisplay();
|
||||
if (dpy == EGL_NO_DISPLAY) {
|
||||
std::fprintf(
|
||||
stderr,
|
||||
"[ghastty] EglDmabufTarget: no current EGL display (call after "
|
||||
"QOpenGLContext::makeCurrent on a Wayland QPA)\n");
|
||||
return false;
|
||||
}
|
||||
return ensureEglFns(dpy);
|
||||
}
|
||||
|
||||
std::unique_ptr<EglDmabufTarget> EglDmabufTarget::create(QOpenGLContext *ctx,
|
||||
int width_px,
|
||||
int height_px) {
|
||||
if (!ctx || !ctx->isValid()) return nullptr;
|
||||
if (width_px <= 0 || height_px <= 0) return nullptr;
|
||||
EGLDisplay dpy = currentEglDisplay();
|
||||
if (dpy == EGL_NO_DISPLAY) return nullptr;
|
||||
if (!ensureEglFns(dpy)) return nullptr;
|
||||
const EglFns &fns = eglFns();
|
||||
auto *gl = ctx->functions();
|
||||
if (!gl) return nullptr;
|
||||
|
||||
// We populate `target->m_*` AS we acquire each resource; on any
|
||||
// failure we just `return nullptr` and let the unique_ptr's
|
||||
// destructor unwind everything that's been stored so far. This is
|
||||
// the only cleanup path — no manual gl->glDeleteTextures /
|
||||
// ::close(fd) on early returns, which previously double-freed the
|
||||
// texture and made the cleanup logic asymmetric per branch.
|
||||
auto target = std::unique_ptr<EglDmabufTarget>(new EglDmabufTarget());
|
||||
target->m_eglDisplay = dpy;
|
||||
target->m_width = width_px;
|
||||
target->m_height = height_px;
|
||||
|
||||
// 1. Allocate a GL texture sized to the desired framebuffer.
|
||||
unsigned int tex = 0;
|
||||
gl->glGenTextures(1, &tex);
|
||||
if (tex == 0) return nullptr;
|
||||
target->m_texture = tex;
|
||||
gl->glBindTexture(GL_TEXTURE_2D, tex);
|
||||
gl->glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
|
||||
gl->glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
|
||||
gl->glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, width_px, height_px, 0, GL_RGBA,
|
||||
GL_UNSIGNED_BYTE, nullptr);
|
||||
gl->glBindTexture(GL_TEXTURE_2D, 0);
|
||||
|
||||
// 2. Wrap as an EGLImage targeting the GL texture.
|
||||
EGLImageKHR img = fns.createImage(
|
||||
dpy, ctx->nativeInterface<QNativeInterface::QEGLContext>()
|
||||
? reinterpret_cast<EGLContext>(
|
||||
ctx->nativeInterface<QNativeInterface::QEGLContext>()
|
||||
->nativeContext())
|
||||
: eglGetCurrentContext(),
|
||||
EGL_GL_TEXTURE_2D_KHR,
|
||||
reinterpret_cast<EGLClientBuffer>(static_cast<uintptr_t>(tex)), nullptr);
|
||||
if (img == EGL_NO_IMAGE_KHR) {
|
||||
std::fprintf(stderr,
|
||||
"[ghastty] EglDmabufTarget: eglCreateImageKHR failed (0x%x)\n",
|
||||
eglGetError());
|
||||
return nullptr;
|
||||
}
|
||||
target->m_eglImage = img;
|
||||
|
||||
// 3. Query the export metadata (fourcc, plane count, modifier).
|
||||
int fourcc = 0;
|
||||
int num_planes = 0;
|
||||
EGLuint64KHR modifier = 0;
|
||||
if (!fns.queryExport(dpy, img, &fourcc, &num_planes, &modifier)) {
|
||||
std::fprintf(stderr,
|
||||
"[ghastty] EglDmabufTarget: eglExportDMABUFImageQueryMESA "
|
||||
"failed (0x%x)\n",
|
||||
eglGetError());
|
||||
return nullptr;
|
||||
}
|
||||
if (num_planes != 1) {
|
||||
// Multi-plane modifiers need a wider present-callback ABI on the
|
||||
// subsurface side. NVIDIA / Mesa default tilings for RGBA are
|
||||
// single-plane in practice; refuse multi-plane cleanly and fall
|
||||
// back to the QImage path.
|
||||
std::fprintf(stderr,
|
||||
"[ghastty] EglDmabufTarget: refusing multi-plane export "
|
||||
"(num_planes=%d fourcc=0x%x mod=0x%llx)\n",
|
||||
num_planes, fourcc,
|
||||
static_cast<unsigned long long>(modifier));
|
||||
return nullptr;
|
||||
}
|
||||
target->m_drmFormat = static_cast<std::uint32_t>(fourcc);
|
||||
target->m_drmModifier = static_cast<std::uint64_t>(modifier);
|
||||
|
||||
// 4. Export the dmabuf fd + per-plane stride/offset.
|
||||
int fd = -1;
|
||||
EGLint stride = 0;
|
||||
EGLint offset = 0;
|
||||
if (!fns.exportImage(dpy, img, &fd, &stride, &offset) || fd < 0) {
|
||||
std::fprintf(stderr,
|
||||
"[ghastty] EglDmabufTarget: eglExportDMABUFImageMESA failed "
|
||||
"(0x%x fd=%d)\n",
|
||||
eglGetError(), fd);
|
||||
return nullptr;
|
||||
}
|
||||
target->m_fd = fd;
|
||||
target->m_stride = static_cast<std::uint32_t>(stride);
|
||||
// The `wayland::SubsurfacePresenter` present path hardcodes
|
||||
// `offset = 0` when wrapping this fd in a wl_buffer (see
|
||||
// SubsurfacePresenter.cpp's zwp_linux_buffer_params_v1_add call).
|
||||
// For LINEAR-tiled exports (the only thing this OpenGL path
|
||||
// produces, by EGL_MESA_image_dma_buf_export's contract for a
|
||||
// single-plane texture) `offset` is always 0 in practice. Reject
|
||||
// anything else loudly so a future EGL implementation that
|
||||
// returns a non-zero offset doesn't silently render at the wrong
|
||||
// location.
|
||||
if (offset != 0) {
|
||||
std::fprintf(stderr,
|
||||
"[ghastty] EglDmabufTarget: unexpected non-zero offset=%d "
|
||||
"from eglExportDMABUFImageMESA; SubsurfacePresenter assumes "
|
||||
"offset=0 for single-plane LINEAR exports\n",
|
||||
offset);
|
||||
::close(fd);
|
||||
target->m_fd = -1;
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
// 5. Attach to a framebuffer so libghostty can render into it.
|
||||
unsigned int fbo = 0;
|
||||
gl->glGenFramebuffers(1, &fbo);
|
||||
if (fbo == 0) return nullptr;
|
||||
target->m_framebuffer = fbo;
|
||||
gl->glBindFramebuffer(GL_FRAMEBUFFER, fbo);
|
||||
gl->glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0,
|
||||
GL_TEXTURE_2D, tex, 0);
|
||||
const unsigned int status = gl->glCheckFramebufferStatus(GL_FRAMEBUFFER);
|
||||
gl->glBindFramebuffer(GL_FRAMEBUFFER, 0);
|
||||
if (status != GL_FRAMEBUFFER_COMPLETE) {
|
||||
std::fprintf(stderr,
|
||||
"[ghastty] EglDmabufTarget: framebuffer incomplete (0x%x)\n",
|
||||
status);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
std::fprintf(stderr,
|
||||
"[ghastty] EglDmabufTarget: %dx%d fd=%d fourcc=0x%x mod=0x%llx "
|
||||
"stride=%u\n",
|
||||
width_px, height_px, fd, target->m_drmFormat,
|
||||
static_cast<unsigned long long>(target->m_drmModifier),
|
||||
target->m_stride);
|
||||
return target;
|
||||
}
|
||||
|
||||
EglDmabufTarget::EglDmabufTarget() = default;
|
||||
|
||||
EglDmabufTarget::~EglDmabufTarget() {
|
||||
// Caller must ensure the owning QOpenGLContext is current; on
|
||||
// GhosttySurface destruction we go through `makeCurrent` first.
|
||||
auto ctx = QOpenGLContext::currentContext();
|
||||
if (ctx) {
|
||||
auto *gl = ctx->functions();
|
||||
if (m_framebuffer) gl->glDeleteFramebuffers(1, &m_framebuffer);
|
||||
if (m_texture) gl->glDeleteTextures(1, &m_texture);
|
||||
}
|
||||
if (m_eglImage && m_eglDisplay) {
|
||||
eglFns().destroyImage(m_eglDisplay, m_eglImage);
|
||||
}
|
||||
if (m_fd >= 0) ::close(m_fd);
|
||||
}
|
||||
|
||||
void EglDmabufTarget::bind() const {
|
||||
auto ctx = QOpenGLContext::currentContext();
|
||||
if (!ctx || !m_framebuffer) return;
|
||||
ctx->functions()->glBindFramebuffer(GL_FRAMEBUFFER, m_framebuffer);
|
||||
}
|
||||
|
||||
void EglDmabufTarget::release() const {
|
||||
auto ctx = QOpenGLContext::currentContext();
|
||||
if (!ctx) return;
|
||||
ctx->functions()->glBindFramebuffer(GL_FRAMEBUFFER, 0);
|
||||
}
|
||||
|
||||
} // namespace opengl
|
||||
|
|
@ -0,0 +1,87 @@
|
|||
// Dmabuf-exporting GL render target for the OpenGL present path.
|
||||
//
|
||||
// libghostty's GL renderer draws into a host-owned framebuffer (see
|
||||
// GhosttySurface's `m_fbo`). Today that framebuffer's pixels get
|
||||
// pulled back through `glReadPixels` (via `QOpenGLFramebufferObject::toImage`)
|
||||
// into a QImage, then re-uploaded to the QWidget backing store by
|
||||
// QPainter. After this class is wired in, the host instead allocates
|
||||
// a GL texture, wraps it as an `EGLImage` via `eglCreateImage`,
|
||||
// exports its memory as a dmabuf via `eglExportDMABUFImageMESA`,
|
||||
// and attaches that texture to a GL framebuffer for libghostty to
|
||||
// draw into. The cached dmabuf fd / fourcc / modifier / stride are
|
||||
// then handed straight to the `wayland::SubsurfacePresenter` — same
|
||||
// zero-copy path the Vulkan renderer's Target uses, just sourced
|
||||
// from EGL instead of Vulkan.
|
||||
//
|
||||
// Requires `EGL_MESA_image_dma_buf_export` (checked by the static
|
||||
// `available()` predicate). Wayland-only by project decision.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
#include <memory>
|
||||
|
||||
class QOpenGLContext;
|
||||
|
||||
namespace opengl {
|
||||
|
||||
class EglDmabufTarget {
|
||||
public:
|
||||
// Detect at runtime whether the current EGL display advertises
|
||||
// `EGL_MESA_image_dma_buf_export`. Caller MUST have a Wayland QPA
|
||||
// and `ctx` must be a usable, makeCurrent-able QOpenGLContext.
|
||||
// Cached after first call.
|
||||
static bool available(QOpenGLContext *ctx);
|
||||
|
||||
// Build a target of the given device-pixel size. Returns nullptr
|
||||
// on any EGL / GL failure (caller falls back to the legacy
|
||||
// QOpenGLFramebufferObject + toImage path). `ctx` must be current
|
||||
// on the calling thread when called.
|
||||
static std::unique_ptr<EglDmabufTarget> create(QOpenGLContext *ctx,
|
||||
int width_px,
|
||||
int height_px);
|
||||
|
||||
~EglDmabufTarget();
|
||||
|
||||
// Bind the framebuffer for draw operations. Caller is responsible
|
||||
// for `glViewport` / `glClear` etc. Mirrors `QOpenGLFramebufferObject::bind`.
|
||||
void bind() const;
|
||||
void release() const;
|
||||
|
||||
// Pixel + dmabuf metadata. Stable for the lifetime of this target;
|
||||
// resize allocates a new target. `stride` is the value returned by
|
||||
// `eglExportDMABUFImageMESA` for plane 0.
|
||||
int width() const { return m_width; }
|
||||
int height() const { return m_height; }
|
||||
int fd() const { return m_fd; }
|
||||
std::uint32_t drmFormat() const { return m_drmFormat; }
|
||||
std::uint64_t drmModifier() const { return m_drmModifier; }
|
||||
std::uint32_t stride() const { return m_stride; }
|
||||
// Raw GL framebuffer object id for glBlitFramebuffer callers that
|
||||
// need to write into the dmabuf-backed FBO from a different
|
||||
// attached target (e.g. blitting from m_fbo with an inverted dst
|
||||
// rect to flip Y, since the linux-dmabuf-v1 Y_INVERT flag is not
|
||||
// universally supported).
|
||||
unsigned int framebuffer() const { return m_framebuffer; }
|
||||
|
||||
EglDmabufTarget(const EglDmabufTarget &) = delete;
|
||||
EglDmabufTarget &operator=(const EglDmabufTarget &) = delete;
|
||||
|
||||
private:
|
||||
EglDmabufTarget();
|
||||
|
||||
// Opaque to callers (and avoids leaking EGL/GL handle types into
|
||||
// the header). The .cpp owns the EGLDisplay/EGLImage casts.
|
||||
void *m_eglDisplay = nullptr;
|
||||
void *m_eglImage = nullptr;
|
||||
unsigned int m_texture = 0;
|
||||
unsigned int m_framebuffer = 0;
|
||||
int m_width = 0;
|
||||
int m_height = 0;
|
||||
int m_fd = -1;
|
||||
std::uint32_t m_drmFormat = 0;
|
||||
std::uint64_t m_drmModifier = 0;
|
||||
std::uint32_t m_stride = 0;
|
||||
};
|
||||
|
||||
} // namespace opengl
|
||||
|
|
@ -6,17 +6,18 @@
|
|||
#include <QCursor>
|
||||
#include <QEasingCurve>
|
||||
#include <QGuiApplication>
|
||||
#include <QPropertyAnimation>
|
||||
#include <QScreen>
|
||||
#include <QSize>
|
||||
#include <QString>
|
||||
#include <QStringLiteral>
|
||||
#include <QVariantAnimation>
|
||||
#include <QWidget>
|
||||
#include <QWindow>
|
||||
|
||||
#include <LayerShellQt/window.h>
|
||||
|
||||
#include "../config/Config.h"
|
||||
#include "../wayland/AlphaModifier.h"
|
||||
#include "ghostty.h"
|
||||
|
||||
namespace quickterm {
|
||||
|
|
@ -43,14 +44,36 @@ int animationMs() {
|
|||
return std::clamp(static_cast<int>(secs * 1000.0), 1, 1000);
|
||||
}
|
||||
|
||||
// Apply opacity to the window. Uses wp_alpha_modifier_v1 when the
|
||||
// compositor supports it (real per-surface alpha multiplier on the
|
||||
// compositor side); otherwise falls through to a no-op (the
|
||||
// animation still runs but the window just appears at the end —
|
||||
// previously this called QWindow::setOpacity which spammed
|
||||
// "This plugin does not support setting window opacity" warnings
|
||||
// on every animation tick because QtWayland's QPA plugin has no
|
||||
// implementation).
|
||||
void applyOpacity(QWidget *window, double opacity) {
|
||||
QWindow *handle = window->windowHandle();
|
||||
if (!handle) return;
|
||||
wayland::AlphaModifier::setOpacity(handle, opacity);
|
||||
}
|
||||
|
||||
// Lazily fetch (or build) the per-window opacity animation, parented
|
||||
// to `window` so its lifetime tracks the widget's.
|
||||
QPropertyAnimation *animFor(QWidget *window) {
|
||||
auto *existing = window->property(kAnimProperty).value<QPropertyAnimation *>();
|
||||
// to `window` so its lifetime tracks the widget's. We use
|
||||
// QVariantAnimation (not QPropertyAnimation on windowOpacity) so
|
||||
// the per-tick value is delivered to our applyOpacity handler
|
||||
// instead of QWindow::setOpacity (which QtWayland's QPA plugin
|
||||
// doesn't implement — see applyOpacity comment).
|
||||
QVariantAnimation *animFor(QWidget *window) {
|
||||
auto *existing = window->property(kAnimProperty).value<QVariantAnimation *>();
|
||||
if (existing) return existing;
|
||||
auto *anim = new QPropertyAnimation(window, "windowOpacity", window);
|
||||
auto *anim = new QVariantAnimation(window);
|
||||
QObject::connect(anim, &QVariantAnimation::valueChanged, window,
|
||||
[window](const QVariant &v) {
|
||||
applyOpacity(window, v.toDouble());
|
||||
});
|
||||
window->setProperty(kAnimProperty,
|
||||
QVariant::fromValue<QPropertyAnimation *>(anim));
|
||||
QVariant::fromValue<QVariantAnimation *>(anim));
|
||||
return anim;
|
||||
}
|
||||
|
||||
|
|
@ -167,25 +190,33 @@ void setupLayerShell(QWidget *window) {
|
|||
}
|
||||
|
||||
void animateIn(QWidget *window) {
|
||||
window->setWindowOpacity(0.0);
|
||||
// Show with opacity 0 first so the compositor never paints a
|
||||
// fully-opaque frame before the animation kicks in. The
|
||||
// QVariantAnimation valueChanged → applyOpacity path needs the
|
||||
// wl_surface to exist, which means after show(). We call
|
||||
// applyOpacity twice on either side of show() — once at 0.0 as
|
||||
// a best-effort pre-show (no-op if wl_surface isn't up yet),
|
||||
// once at 0.0 immediately after to lock in the start state.
|
||||
applyOpacity(window, 0.0);
|
||||
window->show();
|
||||
window->raise();
|
||||
window->activateWindow();
|
||||
applyOpacity(window, 0.0);
|
||||
const int ms = animationMs();
|
||||
if (ms <= 0) {
|
||||
window->setWindowOpacity(1.0);
|
||||
applyOpacity(window, 1.0);
|
||||
return;
|
||||
}
|
||||
// Stop any running fade so toggling rapidly doesn't stack
|
||||
// animations.
|
||||
QPropertyAnimation *anim = animFor(window);
|
||||
QVariantAnimation *anim = animFor(window);
|
||||
anim->stop();
|
||||
// animateOut leaves a `finished -> hide()` handler attached to the
|
||||
// shared animation object. If a fade-out was interrupted by this
|
||||
// fade-in (rapid out/in cycle), the leftover handler would fire at
|
||||
// the end of the in-fade and silently hide the just-revealed
|
||||
// window — clear it before starting.
|
||||
QObject::disconnect(anim, &QPropertyAnimation::finished, window, nullptr);
|
||||
QObject::disconnect(anim, &QVariantAnimation::finished, window, nullptr);
|
||||
anim->setDuration(ms);
|
||||
anim->setStartValue(0.0);
|
||||
anim->setEndValue(1.0);
|
||||
|
|
@ -199,17 +230,21 @@ void animateOut(QWidget *window) {
|
|||
window->hide();
|
||||
return;
|
||||
}
|
||||
QPropertyAnimation *anim = animFor(window);
|
||||
QVariantAnimation *anim = animFor(window);
|
||||
anim->stop();
|
||||
anim->setDuration(ms);
|
||||
anim->setStartValue(window->windowOpacity());
|
||||
// Start from the animation's last delivered value if we have one
|
||||
// (a rapid in-then-out cycle interrupts at some intermediate
|
||||
// alpha); otherwise assume the window was fully visible.
|
||||
const QVariant cur = anim->currentValue();
|
||||
anim->setStartValue(cur.isValid() ? cur.toDouble() : 1.0);
|
||||
anim->setEndValue(0.0);
|
||||
anim->setEasingCurve(QEasingCurve::InCubic);
|
||||
// Disconnect any previous handler before reconnecting; otherwise a
|
||||
// toggle-out-then-in cycle accumulates handlers that all fire on
|
||||
// the next out.
|
||||
QObject::disconnect(anim, &QPropertyAnimation::finished, window, nullptr);
|
||||
QObject::connect(anim, &QPropertyAnimation::finished, window,
|
||||
QObject::disconnect(anim, &QVariantAnimation::finished, window, nullptr);
|
||||
QObject::connect(anim, &QVariantAnimation::finished, window,
|
||||
[window]() { window->hide(); });
|
||||
anim->start();
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,267 @@
|
|||
// See `Host.h` for the contract.
|
||||
|
||||
#include "Host.h"
|
||||
|
||||
#include <array>
|
||||
#include <cstdio>
|
||||
#include <cstring>
|
||||
#include <mutex>
|
||||
#include <optional>
|
||||
#include <vector>
|
||||
|
||||
#include "../wayland/DmabufRegistry.h"
|
||||
|
||||
namespace vulkan {
|
||||
|
||||
namespace {
|
||||
|
||||
constexpr const char *kRequiredDeviceExtensions[] = {
|
||||
"VK_KHR_external_memory_fd",
|
||||
"VK_EXT_external_memory_dma_buf",
|
||||
// Needed so libghostty can allocate render images with a chosen
|
||||
// DRM modifier (vendor-tiled where supported) and query the
|
||||
// driver-chosen layout back via
|
||||
// `vkGetImageDrmFormatModifierPropertiesEXT`. Without it on the
|
||||
// host's VkDevice, the device-level proc-addr lookup for that
|
||||
// function returns null and Target.init fails.
|
||||
"VK_EXT_image_drm_format_modifier",
|
||||
};
|
||||
|
||||
bool hasRequiredExtensions(VkPhysicalDevice pd) {
|
||||
uint32_t n = 0;
|
||||
vkEnumerateDeviceExtensionProperties(pd, nullptr, &n, nullptr);
|
||||
if (n == 0) return false;
|
||||
std::vector<VkExtensionProperties> exts(n);
|
||||
vkEnumerateDeviceExtensionProperties(pd, nullptr, &n, exts.data());
|
||||
for (const char *req : kRequiredDeviceExtensions) {
|
||||
bool found = false;
|
||||
for (const auto &e : exts) {
|
||||
if (std::strcmp(e.extensionName, req) == 0) {
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!found) return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
std::optional<uint32_t> findGraphicsQueueFamily(VkPhysicalDevice pd) {
|
||||
uint32_t n = 0;
|
||||
vkGetPhysicalDeviceQueueFamilyProperties(pd, &n, nullptr);
|
||||
if (n == 0) return std::nullopt;
|
||||
std::vector<VkQueueFamilyProperties> props(n);
|
||||
vkGetPhysicalDeviceQueueFamilyProperties(pd, &n, props.data());
|
||||
for (uint32_t i = 0; i < n; ++i) {
|
||||
if (props[i].queueFlags & VK_QUEUE_GRAPHICS_BIT) return i;
|
||||
}
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
// ---- Platform callback trampolines ----------------------------------
|
||||
//
|
||||
// `ghostty_platform_vulkan_s` is a plain C ABI; the callback signatures
|
||||
// take a `void *userdata` that libghostty hands back to each callback.
|
||||
// The handle-lookup callbacks (instance / physical_device / device /
|
||||
// queue / queue_family_index / get_instance_proc_addr) ignore the
|
||||
// userdata and resolve through the process singleton — there's only
|
||||
// one Vulkan setup per process. The `present` callback DOES use the
|
||||
// userdata: it's the `GhosttySurface *` that owns the rendered
|
||||
// target, so we can hand the dmabuf back to the right widget.
|
||||
|
||||
void *cbGetInstanceProcAddr(void *ud, const char *name) {
|
||||
(void)ud;
|
||||
auto *host = Host::instance();
|
||||
if (host == nullptr) return nullptr;
|
||||
auto fp = vkGetInstanceProcAddr(host->vkInstance(), name);
|
||||
return reinterpret_cast<void *>(fp);
|
||||
}
|
||||
|
||||
void *cbInstance(void *ud) {
|
||||
(void)ud;
|
||||
auto *host = Host::instance();
|
||||
return host != nullptr ? host->vkInstance() : nullptr;
|
||||
}
|
||||
void *cbPhysicalDevice(void *ud) {
|
||||
(void)ud;
|
||||
auto *host = Host::instance();
|
||||
return host != nullptr ? host->vkPhysicalDevice() : nullptr;
|
||||
}
|
||||
void *cbDevice(void *ud) {
|
||||
(void)ud;
|
||||
auto *host = Host::instance();
|
||||
return host != nullptr ? host->vkDevice() : nullptr;
|
||||
}
|
||||
void *cbQueue(void *ud) {
|
||||
(void)ud;
|
||||
auto *host = Host::instance();
|
||||
return host != nullptr ? host->vkQueue() : nullptr;
|
||||
}
|
||||
uint32_t cbQueueFamilyIndex(void *ud) {
|
||||
(void)ud;
|
||||
auto *host = Host::instance();
|
||||
return host != nullptr ? host->vkQueueFamilyIndex() : 0;
|
||||
}
|
||||
|
||||
size_t cbGetSupportedModifiers(void *ud, uint32_t drm_format,
|
||||
uint64_t *out, size_t capacity) {
|
||||
(void)ud;
|
||||
// Lock-free read of an immutable table. The table is primed on the
|
||||
// GUI thread by `wayland::primeDmabufModifierRegistry`, called from
|
||||
// `GhosttySurface`'s ctor (Vulkan branch) BEFORE the libghostty
|
||||
// renderer thread is spawned for that surface. As long as that
|
||||
// ordering invariant holds, this read sees a fully-populated table.
|
||||
// `wayland::supportedDmabufModifiers` itself returns 0 if priming
|
||||
// hasn't happened yet, so the failure mode is fail-safe (renderer
|
||||
// gets an empty modifier list, falls back to legacy_copy mode).
|
||||
return ::wayland::supportedDmabufModifiers(drm_format, out, capacity);
|
||||
}
|
||||
|
||||
void cbPresent(
|
||||
void *ud,
|
||||
int dmabuf_fd,
|
||||
uint32_t drm_format,
|
||||
uint64_t drm_modifier,
|
||||
uint32_t width,
|
||||
uint32_t height,
|
||||
uint32_t stride,
|
||||
bool image_backed) {
|
||||
if (ud == nullptr) return;
|
||||
static_cast<PresentSink *>(ud)->presentDmabuf(
|
||||
dmabuf_fd, drm_format, drm_modifier, width, height, stride,
|
||||
image_backed);
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
bool Host::init() {
|
||||
// ---- instance ---------------------------------------------------
|
||||
VkApplicationInfo appInfo{};
|
||||
appInfo.sType = VK_STRUCTURE_TYPE_APPLICATION_INFO;
|
||||
appInfo.pApplicationName = "ghastty";
|
||||
appInfo.applicationVersion = 1;
|
||||
appInfo.pEngineName = "ghastty";
|
||||
appInfo.engineVersion = 1;
|
||||
appInfo.apiVersion = VK_API_VERSION_1_3;
|
||||
|
||||
VkInstanceCreateInfo instInfo{};
|
||||
instInfo.sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO;
|
||||
instInfo.pApplicationInfo = &appInfo;
|
||||
if (vkCreateInstance(&instInfo, nullptr, &m_instance) != VK_SUCCESS) {
|
||||
std::fprintf(stderr, "[vulkan] vkCreateInstance failed\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
// ---- physical device -------------------------------------------
|
||||
uint32_t pdCount = 0;
|
||||
vkEnumeratePhysicalDevices(m_instance, &pdCount, nullptr);
|
||||
if (pdCount == 0) {
|
||||
std::fprintf(stderr, "[vulkan] no physical devices\n");
|
||||
return false;
|
||||
}
|
||||
std::vector<VkPhysicalDevice> pds(pdCount);
|
||||
vkEnumeratePhysicalDevices(m_instance, &pdCount, pds.data());
|
||||
|
||||
for (auto pd : pds) {
|
||||
VkPhysicalDeviceProperties props;
|
||||
vkGetPhysicalDeviceProperties(pd, &props);
|
||||
if (props.apiVersion < VK_API_VERSION_1_3) continue;
|
||||
if (!hasRequiredExtensions(pd)) continue;
|
||||
auto qfi = findGraphicsQueueFamily(pd);
|
||||
if (!qfi) continue;
|
||||
m_physicalDevice = pd;
|
||||
m_queueFamilyIndex = *qfi;
|
||||
break;
|
||||
}
|
||||
if (m_physicalDevice == VK_NULL_HANDLE) {
|
||||
std::fprintf(stderr,
|
||||
"[vulkan] no suitable physical device "
|
||||
"(need Vulkan 1.3 + external_memory_fd + dma_buf)\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
// ---- logical device + queue ------------------------------------
|
||||
float queuePriority = 1.0f;
|
||||
VkDeviceQueueCreateInfo qci{};
|
||||
qci.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO;
|
||||
qci.queueFamilyIndex = m_queueFamilyIndex;
|
||||
qci.queueCount = 1;
|
||||
qci.pQueuePriorities = &queuePriority;
|
||||
|
||||
// libghostty's Vulkan renderer uses Vulkan 1.3 dynamic rendering
|
||||
// (vkCmdBeginRendering / vkCmdEndRendering, no VkRenderPass).
|
||||
// That feature has to be explicitly enabled at device creation
|
||||
// time via VkPhysicalDeviceVulkan13Features.
|
||||
VkPhysicalDeviceVulkan13Features vk13features{};
|
||||
vk13features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_3_FEATURES;
|
||||
vk13features.dynamicRendering = VK_TRUE;
|
||||
vk13features.synchronization2 = VK_TRUE;
|
||||
|
||||
VkDeviceCreateInfo dci{};
|
||||
dci.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO;
|
||||
dci.pNext = &vk13features;
|
||||
dci.queueCreateInfoCount = 1;
|
||||
dci.pQueueCreateInfos = &qci;
|
||||
dci.enabledExtensionCount =
|
||||
static_cast<uint32_t>(std::size(kRequiredDeviceExtensions));
|
||||
dci.ppEnabledExtensionNames = kRequiredDeviceExtensions;
|
||||
|
||||
if (vkCreateDevice(m_physicalDevice, &dci, nullptr, &m_device) != VK_SUCCESS) {
|
||||
std::fprintf(stderr, "[vulkan] vkCreateDevice failed\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
vkGetDeviceQueue(m_device, m_queueFamilyIndex, 0, &m_queue);
|
||||
|
||||
VkPhysicalDeviceProperties props;
|
||||
vkGetPhysicalDeviceProperties(m_physicalDevice, &props);
|
||||
std::fprintf(stderr,
|
||||
"[vulkan] device ready: %s (Vulkan %u.%u.%u, qfi=%u)\n",
|
||||
props.deviceName,
|
||||
VK_API_VERSION_MAJOR(props.apiVersion),
|
||||
VK_API_VERSION_MINOR(props.apiVersion),
|
||||
VK_API_VERSION_PATCH(props.apiVersion),
|
||||
m_queueFamilyIndex);
|
||||
return true;
|
||||
}
|
||||
|
||||
Host::~Host() {
|
||||
if (m_device != VK_NULL_HANDLE) vkDestroyDevice(m_device, nullptr);
|
||||
if (m_instance != VK_NULL_HANDLE) vkDestroyInstance(m_instance, nullptr);
|
||||
}
|
||||
|
||||
ghostty_platform_vulkan_s Host::asPlatform(PresentSink *sink) const {
|
||||
ghostty_platform_vulkan_s p{};
|
||||
p.userdata = sink;
|
||||
p.get_instance_proc_addr = cbGetInstanceProcAddr;
|
||||
p.instance = cbInstance;
|
||||
p.physical_device = cbPhysicalDevice;
|
||||
p.device = cbDevice;
|
||||
p.queue = cbQueue;
|
||||
p.queue_family_index = cbQueueFamilyIndex;
|
||||
p.get_supported_modifiers = cbGetSupportedModifiers;
|
||||
p.present = cbPresent;
|
||||
return p;
|
||||
}
|
||||
|
||||
Host *Host::instance() {
|
||||
static std::once_flag once;
|
||||
static std::unique_ptr<Host> host;
|
||||
std::call_once(once, []() {
|
||||
auto candidate = std::unique_ptr<Host>(new Host());
|
||||
if (candidate->init()) {
|
||||
host = std::move(candidate);
|
||||
}
|
||||
// candidate's destructor runs on init failure and cleans up
|
||||
// any partial state.
|
||||
});
|
||||
// The dmabuf modifier registry priming used to happen here too,
|
||||
// inside this `call_once`. It moved out to `GhosttySurface`'s
|
||||
// ctor: registry priming is a Wayland-protocol concern, not a
|
||||
// Vulkan one, and `Host::instance()` is logically about Vulkan
|
||||
// setup. Co-locating both in one trampoline coupled `Host` to a
|
||||
// wayland-side concern that doesn't need it.
|
||||
return host.get();
|
||||
}
|
||||
|
||||
} // namespace vulkan
|
||||
|
|
@ -0,0 +1,97 @@
|
|||
// Vulkan host setup for the Ghastty Qt frontend.
|
||||
//
|
||||
// libghostty (when built with `-Drenderer=vulkan`) doesn't create
|
||||
// its own VkInstance / VkDevice — the host does, then hands the
|
||||
// handles down via the `ghostty_platform_vulkan_s` callback struct
|
||||
// declared in `include/ghostty.h`. This class is the Qt-side owner
|
||||
// of those handles.
|
||||
//
|
||||
// The host is process-singleton (one Vulkan instance + device shared
|
||||
// across every `GhosttySurface`), constructed lazily on first use
|
||||
// via `instance()`. Requires a physical device that supports
|
||||
// VK_KHR_external_memory_fd, VK_EXT_external_memory_dma_buf, and
|
||||
// VK_EXT_image_drm_format_modifier — all three are needed for the
|
||||
// dmabuf-as-importable-image export path libghostty's Vulkan
|
||||
// renderer uses to hand frames back to the host.
|
||||
//
|
||||
// The compositor dmabuf modifier registry that this host's
|
||||
// `get_supported_modifiers` callback reads is primed elsewhere
|
||||
// (in `GhosttySurface`'s ctor on the GUI thread, via
|
||||
// `wayland::primeDmabufModifierRegistry` from
|
||||
// `qt/src/wayland/DmabufRegistry.h`). That priming is a Wayland
|
||||
// concern and used to leak into `Host::instance`'s `call_once` —
|
||||
// which made `Host` (a Vulkan object) responsible for a
|
||||
// Wayland-protocol concern it doesn't otherwise touch.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
#include <memory>
|
||||
|
||||
#include <vulkan/vulkan.h>
|
||||
|
||||
#include "ghostty.h"
|
||||
|
||||
namespace vulkan {
|
||||
|
||||
/// Receiver for a presented dmabuf-backed frame. Implemented by
|
||||
/// `GhosttySurface`; abstract so `vulkan::Host` doesn't need to
|
||||
/// know about the widget type. Replaces an earlier cross-TU
|
||||
/// forward declaration of a free function `presentToGhosttySurface`
|
||||
/// that coupled `Host.cpp` directly to `GhosttySurface.cpp`.
|
||||
class PresentSink {
|
||||
public:
|
||||
virtual ~PresentSink() = default;
|
||||
/// Hand off a rendered frame. Called on the libghostty renderer
|
||||
/// thread; the implementation is responsible for marshalling to
|
||||
/// whatever thread it composites on. The fd is borrowed for the
|
||||
/// duration of the call — implementations that need to retain
|
||||
/// it must `dup()`.
|
||||
virtual void presentDmabuf(int dmabuf_fd, std::uint32_t drm_format,
|
||||
std::uint64_t drm_modifier,
|
||||
std::uint32_t width, std::uint32_t height,
|
||||
std::uint32_t stride, bool image_backed) = 0;
|
||||
};
|
||||
|
||||
/// Process-wide Vulkan setup. One per Ghastty process; threadsafe
|
||||
/// to call `instance()` from anywhere (constructs once via
|
||||
/// std::call_once on first access).
|
||||
class Host {
|
||||
public:
|
||||
/// Return the process-wide host, or nullptr if Vulkan can't be
|
||||
/// brought up on this system. Cached after the first call so
|
||||
/// repeated lookups are cheap.
|
||||
static Host *instance();
|
||||
|
||||
/// Build a `ghostty_platform_vulkan_s` callback struct whose
|
||||
/// `present` callback delivers frames to `sink`. `sink` must
|
||||
/// outlive the lifetime of any libghostty surface that was
|
||||
/// configured with the returned platform struct. Other callbacks
|
||||
/// (handle lookups, modifier registry) ignore `sink` and route
|
||||
/// through the process singleton.
|
||||
ghostty_platform_vulkan_s asPlatform(PresentSink *sink) const;
|
||||
|
||||
VkInstance vkInstance() const { return m_instance; }
|
||||
VkPhysicalDevice vkPhysicalDevice() const { return m_physicalDevice; }
|
||||
VkDevice vkDevice() const { return m_device; }
|
||||
VkQueue vkQueue() const { return m_queue; }
|
||||
uint32_t vkQueueFamilyIndex() const { return m_queueFamilyIndex; }
|
||||
|
||||
~Host();
|
||||
|
||||
// No copy/move — singleton.
|
||||
Host(const Host &) = delete;
|
||||
Host &operator=(const Host &) = delete;
|
||||
|
||||
private:
|
||||
Host() = default;
|
||||
bool init();
|
||||
|
||||
VkInstance m_instance = VK_NULL_HANDLE;
|
||||
VkPhysicalDevice m_physicalDevice = VK_NULL_HANDLE;
|
||||
VkDevice m_device = VK_NULL_HANDLE;
|
||||
VkQueue m_queue = VK_NULL_HANDLE;
|
||||
uint32_t m_queueFamilyIndex = 0;
|
||||
};
|
||||
|
||||
} // namespace vulkan
|
||||
|
|
@ -0,0 +1,193 @@
|
|||
#include "AlphaModifier.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <cstdint>
|
||||
#include <cstring>
|
||||
#include <mutex>
|
||||
#include <unordered_map>
|
||||
|
||||
#include <QGuiApplication>
|
||||
#include <QWindow>
|
||||
#include <qpa/qplatformnativeinterface.h>
|
||||
|
||||
#include <wayland-client.h>
|
||||
|
||||
#include "alpha-modifier-v1-client-protocol.h"
|
||||
|
||||
namespace wayland {
|
||||
|
||||
namespace {
|
||||
|
||||
// Process-wide binding. Lazily initialised on first supported()/
|
||||
// setOpacity() call, then read lock-free via the atomic-by-fence
|
||||
// guarantee of `std::call_once`. Once bound it lives for the
|
||||
// process lifetime — there's no clean teardown path on Wayland
|
||||
// global teardown that would matter for a manager-style global.
|
||||
struct GlobalState {
|
||||
wl_display *display = nullptr;
|
||||
wp_alpha_modifier_v1 *manager = nullptr; // null if compositor lacks it
|
||||
bool ready = false; // call_once fired (success or failure)
|
||||
};
|
||||
|
||||
GlobalState &globalState() {
|
||||
static GlobalState g;
|
||||
return g;
|
||||
}
|
||||
|
||||
// Listener: discover wp_alpha_modifier_v1 in the registry. The
|
||||
// scoped wl_event_queue we use here is destroyed before the
|
||||
// listener data goes out of scope, so the registry's child
|
||||
// proxies (none survive past this binding pass) are safe.
|
||||
void onRegistryGlobal(void *data, wl_registry *registry, uint32_t name,
|
||||
const char *interface, uint32_t /*version*/) {
|
||||
auto *g = static_cast<GlobalState *>(data);
|
||||
if (std::strcmp(interface, wp_alpha_modifier_v1_interface.name) != 0)
|
||||
return;
|
||||
// Version 1 is the only version of this staging protocol so far.
|
||||
g->manager = static_cast<wp_alpha_modifier_v1 *>(
|
||||
wl_registry_bind(registry, name, &wp_alpha_modifier_v1_interface, 1));
|
||||
}
|
||||
|
||||
void onRegistryGlobalRemove(void *, wl_registry *, uint32_t) {}
|
||||
|
||||
const wl_registry_listener kRegistryListener = {
|
||||
&onRegistryGlobal,
|
||||
&onRegistryGlobalRemove,
|
||||
};
|
||||
|
||||
// Bind the manager global lazily on first use. Idempotent under
|
||||
// std::call_once. Mirrors the private-queue pattern in
|
||||
// XkbTracker — and like that, we migrate the bound proxy onto
|
||||
// the default queue before destroying the private queue, so
|
||||
// future calls (set_multiplier, get_surface) dispatch on Qt's
|
||||
// event loop instead of a dangling queue.
|
||||
void initOnce() {
|
||||
static std::once_flag once;
|
||||
std::call_once(once, []() {
|
||||
auto &g = globalState();
|
||||
QPlatformNativeInterface *native =
|
||||
QGuiApplication::platformNativeInterface();
|
||||
if (!native) {
|
||||
g.ready = true;
|
||||
return;
|
||||
}
|
||||
g.display = static_cast<wl_display *>(
|
||||
native->nativeResourceForIntegration("wl_display"));
|
||||
if (!g.display) {
|
||||
g.ready = true;
|
||||
return;
|
||||
}
|
||||
|
||||
wl_event_queue *queue = wl_display_create_queue(g.display);
|
||||
wl_registry *registry = wl_display_get_registry(g.display);
|
||||
wl_proxy_set_queue(reinterpret_cast<wl_proxy *>(registry), queue);
|
||||
wl_registry_add_listener(registry, &kRegistryListener, &g);
|
||||
wl_display_roundtrip_queue(g.display, queue);
|
||||
wl_registry_destroy(registry);
|
||||
|
||||
// Migrate the manager onto the default queue BEFORE destroying
|
||||
// the private one — otherwise compositor-side messages for the
|
||||
// manager (none expected for this protocol, but cleanliness
|
||||
// matters and Qt's event queue is the dispatch target we want
|
||||
// anyway) would target a destroyed queue, the same footgun that
|
||||
// produced the exit-time SIGSEGV in XkbTracker.
|
||||
if (g.manager) {
|
||||
wl_proxy_set_queue(reinterpret_cast<wl_proxy *>(g.manager), nullptr);
|
||||
}
|
||||
wl_event_queue_destroy(queue);
|
||||
g.ready = true;
|
||||
});
|
||||
}
|
||||
|
||||
// Per-wl_surface alpha modifier object cache. Cached so animation
|
||||
// ticks don't re-roundtrip get_surface every frame.
|
||||
//
|
||||
// Keyed by wl_surface* — that's stable for the wl_surface's
|
||||
// lifetime, and we explicitly drop on detach(). If a QWindow is
|
||||
// destroyed without detach() being called the wl_surface gets
|
||||
// destroyed by Qt; the cached wp_alpha_modifier_surface_v1 would
|
||||
// then be invalid on next get_surface, so callers MUST detach()
|
||||
// from the QWindow's destruction path. Map access is from the
|
||||
// GUI thread only.
|
||||
struct Cache {
|
||||
std::unordered_map<wl_surface *, wp_alpha_modifier_surface_v1 *> entries;
|
||||
};
|
||||
|
||||
Cache &cache() {
|
||||
static Cache c;
|
||||
return c;
|
||||
}
|
||||
|
||||
wl_surface *surfaceFor(QWindow *window) {
|
||||
if (!window) return nullptr;
|
||||
QPlatformNativeInterface *native =
|
||||
QGuiApplication::platformNativeInterface();
|
||||
if (!native) return nullptr;
|
||||
return static_cast<wl_surface *>(
|
||||
native->nativeResourceForWindow("surface", window));
|
||||
}
|
||||
|
||||
wp_alpha_modifier_surface_v1 *getOrCreate(wl_surface *surface) {
|
||||
auto &c = cache();
|
||||
auto it = c.entries.find(surface);
|
||||
if (it != c.entries.end()) return it->second;
|
||||
auto *manager = globalState().manager;
|
||||
if (!manager) return nullptr;
|
||||
auto *obj = wp_alpha_modifier_v1_get_surface(manager, surface);
|
||||
if (!obj) return nullptr;
|
||||
c.entries.emplace(surface, obj);
|
||||
return obj;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
bool AlphaModifier::supported() {
|
||||
initOnce();
|
||||
return globalState().manager != nullptr;
|
||||
}
|
||||
|
||||
bool AlphaModifier::setOpacity(QWindow *window, double opacity) {
|
||||
initOnce();
|
||||
auto &g = globalState();
|
||||
if (!g.manager) return false;
|
||||
wl_surface *surface = surfaceFor(window);
|
||||
if (!surface) return false;
|
||||
auto *mod = getOrCreate(surface);
|
||||
if (!mod) return false;
|
||||
|
||||
// Convert [0.0, 1.0] → [0, UINT32_MAX]. Clamp first; lround
|
||||
// gives the closest integer, matching what users expect at the
|
||||
// endpoints (1.0 → fully opaque, 0.0 → fully transparent) without
|
||||
// off-by-one rounding drift at intermediate values.
|
||||
const double clamped = std::clamp(opacity, 0.0, 1.0);
|
||||
const uint32_t factor = static_cast<uint32_t>(
|
||||
std::lround(clamped * static_cast<double>(UINT32_MAX)));
|
||||
wp_alpha_modifier_surface_v1_set_multiplier(mod, factor);
|
||||
// Alpha multiplier is double-buffered on the wl_surface; the
|
||||
// change applies on the next wl_surface.commit. Commit here so
|
||||
// the caller doesn't need to know about Wayland's double-buffer
|
||||
// semantics. For Qt-managed top-level windows we don't have a
|
||||
// clean Qt API to force a parent commit, so we wl_surface.commit
|
||||
// the surface directly — same trick used elsewhere in this code
|
||||
// for subsurface state changes.
|
||||
wl_surface_commit(surface);
|
||||
// And flush so the commit reaches the compositor immediately
|
||||
// rather than sitting in libwayland-client's send buffer until
|
||||
// Qt's next event-loop iteration. Otherwise rapid animation
|
||||
// ticks would coalesce into one frame at the end of the tick
|
||||
// cycle, defeating the smooth fade.
|
||||
wl_display_flush(g.display);
|
||||
return true;
|
||||
}
|
||||
|
||||
void AlphaModifier::detach(QWindow *window) {
|
||||
wl_surface *surface = surfaceFor(window);
|
||||
if (!surface) return;
|
||||
auto &c = cache();
|
||||
auto it = c.entries.find(surface);
|
||||
if (it == c.entries.end()) return;
|
||||
wp_alpha_modifier_surface_v1_destroy(it->second);
|
||||
c.entries.erase(it);
|
||||
}
|
||||
|
||||
} // namespace wayland
|
||||
|
|
@ -0,0 +1,51 @@
|
|||
// Per-window alpha multiplier via wp_alpha_modifier_v1.
|
||||
//
|
||||
// QtWayland's QPA plugin doesn't implement QWindow::setOpacity (it
|
||||
// logs "This plugin does not support setting window opacity" on
|
||||
// every call). For the QuickTerminal fade-in/out we need real
|
||||
// per-surface alpha, so we drive the wp_alpha_modifier_v1 staging
|
||||
// Wayland protocol ourselves.
|
||||
//
|
||||
// Compositor support (as of 2026-05): KWin (KDE 6+), wlroots
|
||||
// (≥0.17), Hyprland — yes. mutter/GNOME — no. If the protocol
|
||||
// isn't advertised, `setOpacity` returns false and the caller can
|
||||
// either skip the animation or fall back to instant show/hide.
|
||||
//
|
||||
// Wayland-only by project decision (see feedback-qt-no-x11 memory).
|
||||
|
||||
#pragma once
|
||||
|
||||
struct wp_alpha_modifier_v1;
|
||||
struct wp_alpha_modifier_surface_v1;
|
||||
class QWindow;
|
||||
|
||||
namespace wayland {
|
||||
|
||||
class AlphaModifier {
|
||||
public:
|
||||
// Returns true if the compositor advertises wp_alpha_modifier_v1
|
||||
// and we've successfully bound it. Cheap after the first call
|
||||
// (the binding is cached process-wide). Use this to decide
|
||||
// whether to drive an opacity animation or fall through to
|
||||
// instant show/hide.
|
||||
static bool supported();
|
||||
|
||||
// Set the window's alpha multiplier in [0.0, 1.0]. Must be
|
||||
// called on the GUI thread (the thread that owns wl_display
|
||||
// dispatch). Returns false if `window`'s native wl_surface
|
||||
// isn't available yet (e.g. before first show), or if the
|
||||
// compositor doesn't support the protocol.
|
||||
//
|
||||
// The wp_alpha_modifier_surface_v1 object is created lazily per
|
||||
// wl_surface and cached for the surface's lifetime — repeated
|
||||
// calls during an animation just emit set_multiplier + commit.
|
||||
static bool setOpacity(QWindow *window, double opacity);
|
||||
|
||||
// Release the per-surface alpha modifier object for this window.
|
||||
// Call when the window is being destroyed (or before re-creating
|
||||
// its native surface). Equivalent to set_multiplier(UINT32_MAX)
|
||||
// followed by destroy on the surface object.
|
||||
static void detach(QWindow *window);
|
||||
};
|
||||
|
||||
} // namespace wayland
|
||||
|
|
@ -0,0 +1,55 @@
|
|||
// Compositor dmabuf modifier registry.
|
||||
//
|
||||
// Process-wide read-only table of `(drm_format, [modifier])` pairs the
|
||||
// compositor advertises via `zwp_linux_dmabuf_v1`. libghostty's Vulkan
|
||||
// renderer queries this through the
|
||||
// `ghostty_platform_vulkan_s.get_supported_modifiers` callback when
|
||||
// picking a modifier the compositor will accept on attach — without
|
||||
// that intersection, drivers that don't expose `COLOR_ATTACHMENT_BIT`
|
||||
// for `LINEAR` (NVIDIA) can't get into Target's direct-export mode at
|
||||
// all and have to fall back to the legacy CPU-readback path.
|
||||
//
|
||||
// Why a header of its own instead of living on
|
||||
// `wayland::SubsurfacePresenter`? The presenter is per-widget; the
|
||||
// registry is process-wide and read-only after a one-shot prime. They
|
||||
// share `globalState()` machinery internally
|
||||
// (`SubsurfacePresenter.cpp`) but their public surfaces are unrelated
|
||||
// concerns.
|
||||
//
|
||||
// Wayland-only by project decision (the Qt frontend is Wayland-only;
|
||||
// see `feedback-qt-no-x11` memory). On non-Wayland QPA both functions
|
||||
// are no-ops — `primeDmabufModifierRegistry` returns immediately and
|
||||
// `supportedDmabufModifiers` returns 0 — so callers can stay
|
||||
// runtime-agnostic.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
|
||||
namespace wayland {
|
||||
|
||||
// Eagerly discover the compositor's dmabuf modifier list on the
|
||||
// CALLING THREAD. MUST be called from the GUI thread before any
|
||||
// `supportedDmabufModifiers` reader runs (typically the libghostty
|
||||
// renderer thread). Safe to call multiple times — discovery happens
|
||||
// exactly once via the underlying `globalState`'s latched `searched`
|
||||
// flag.
|
||||
//
|
||||
// Idempotent no-op if the QPA isn't Wayland or the
|
||||
// QPlatformNativeInterface lookup fails.
|
||||
void primeDmabufModifierRegistry();
|
||||
|
||||
// Read the cached compositor-supported DRM modifiers for the given
|
||||
// DRM_FORMAT_* fourcc. Returns the number of modifiers actually
|
||||
// written to `out` (capped at `capacity`). Pass `out=nullptr,
|
||||
// capacity=0` to query the total count.
|
||||
//
|
||||
// Thread-safe for readers once `primeDmabufModifierRegistry` has
|
||||
// returned. Returns 0 if the registry hasn't been primed yet or the
|
||||
// format isn't advertised.
|
||||
std::size_t supportedDmabufModifiers(std::uint32_t drm_format,
|
||||
std::uint64_t *out,
|
||||
std::size_t capacity);
|
||||
|
||||
} // namespace wayland
|
||||
|
|
@ -0,0 +1,785 @@
|
|||
#include "SubsurfacePresenter.h"
|
||||
#include "DmabufRegistry.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <climits>
|
||||
#include <cstdint>
|
||||
#include <cstdio>
|
||||
#include <cstring>
|
||||
#include <sys/stat.h> // ::fstat — wl_buffer cache identity via st_ino
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
|
||||
#include <QGuiApplication>
|
||||
#include <QLatin1String>
|
||||
#include <QWindow>
|
||||
#include <qpa/qplatformnativeinterface.h>
|
||||
|
||||
#include <wayland-client.h>
|
||||
|
||||
#include "fractional-scale-v1-client-protocol.h"
|
||||
#include "linux-dmabuf-v1-client-protocol.h"
|
||||
#include "viewporter-client-protocol.h"
|
||||
|
||||
namespace wayland {
|
||||
|
||||
namespace {
|
||||
|
||||
// Process-wide bindings for the Wayland globals the presenter needs,
|
||||
// plus the (format → modifiers) table the compositor advertises via
|
||||
// zwp_linux_dmabuf_v1's format/modifier events. Populated once by
|
||||
// `discoverGlobals` on the GUI thread; subsequent reads from the
|
||||
// renderer thread are safe because the table is never mutated after
|
||||
// the initial discovery completes.
|
||||
struct PresenterGlobals {
|
||||
wl_compositor *compositor = nullptr;
|
||||
wl_subcompositor *subcompositor = nullptr;
|
||||
zwp_linux_dmabuf_v1 *dmabuf = nullptr;
|
||||
wp_viewporter *viewporter = nullptr;
|
||||
wp_fractional_scale_manager_v1 *fractionalScale = nullptr;
|
||||
std::unordered_map<uint32_t, std::vector<uint64_t>> modifiers;
|
||||
bool searched = false;
|
||||
};
|
||||
|
||||
PresenterGlobals &globalState() {
|
||||
static PresenterGlobals g;
|
||||
return g;
|
||||
}
|
||||
|
||||
// Pre-v4 dmabuf format event. We ignore it: v3 also fires `modifier`
|
||||
// events for every (format, modifier) tuple including LINEAR — the
|
||||
// `format` event is legacy from v1/v2 when modifiers didn't exist.
|
||||
void dmabufFormat(void *, zwp_linux_dmabuf_v1 *, uint32_t /*format*/) {}
|
||||
|
||||
// `modifier` event: compositor advertises one (format, modifier) it
|
||||
// can scan out. Fires once per pair during the bind roundtrip; we
|
||||
// stash them all in the per-format vector. Only fires from inside
|
||||
// `discoverGlobals` because we keep the dmabuf proxy on a private
|
||||
// queue that's never dispatched after discovery — see the queue-
|
||||
// retention comment in `discoverGlobals`. That guarantee is what
|
||||
// lets the renderer thread read `globals.modifiers` without a
|
||||
// lock, and is also why we don't bother deduping (one bind round
|
||||
// only fires each pair once).
|
||||
void dmabufModifier(void *data, zwp_linux_dmabuf_v1 *, uint32_t format,
|
||||
uint32_t modifier_hi, uint32_t modifier_lo) {
|
||||
auto *g = static_cast<PresenterGlobals *>(data);
|
||||
const uint64_t modifier =
|
||||
(static_cast<uint64_t>(modifier_hi) << 32) | modifier_lo;
|
||||
g->modifiers[format].push_back(modifier);
|
||||
}
|
||||
|
||||
const zwp_linux_dmabuf_v1_listener kDmabufListener = {
|
||||
dmabufFormat,
|
||||
dmabufModifier,
|
||||
};
|
||||
|
||||
void registryGlobal(void *data, wl_registry *registry, uint32_t name,
|
||||
const char *interface, uint32_t version) {
|
||||
auto *g = static_cast<PresenterGlobals *>(data);
|
||||
if (std::strcmp(interface, wl_compositor_interface.name) == 0) {
|
||||
// Bind wl_compositor at version 3+ so child wl_surfaces we
|
||||
// create support `set_buffer_scale` (added in v3, used by the
|
||||
// presenter on HiDPI displays). Cap at v6 (the highest we've
|
||||
// tested against); if the compositor advertises less, take
|
||||
// what we get and `presentDmabuf` will skip the buffer_scale
|
||||
// call on those compositors.
|
||||
const uint32_t v = std::min<uint32_t>(version, 6u);
|
||||
g->compositor = static_cast<wl_compositor *>(
|
||||
wl_registry_bind(registry, name, &wl_compositor_interface, v));
|
||||
} else if (std::strcmp(interface, wl_subcompositor_interface.name) == 0) {
|
||||
g->subcompositor = static_cast<wl_subcompositor *>(
|
||||
wl_registry_bind(registry, name, &wl_subcompositor_interface, 1));
|
||||
} else if (std::strcmp(interface, zwp_linux_dmabuf_v1_interface.name) == 0) {
|
||||
// We want at least v3 for `create_immed` (synchronous wl_buffer
|
||||
// creation — v1/v2 have only the async `create` + `created`/
|
||||
// `failed` dance). A compositor that only advertises v1/v2
|
||||
// can't satisfy our protocol assumptions; binding at v3 against
|
||||
// such a compositor would protocol-error and tear down the
|
||||
// entire wl_display. Skip the bind in that case so the
|
||||
// legacy QImage fallback engages cleanly.
|
||||
if (version < 3) {
|
||||
std::fprintf(stderr,
|
||||
"[ghastty] wayland: linux-dmabuf-v1 advertised at "
|
||||
"version %u; need >= 3 for create_immed, falling back "
|
||||
"to QImage path\n",
|
||||
version);
|
||||
} else {
|
||||
// Cap at v3 — v4 adds the dynamic format/modifier feedback
|
||||
// dance which we don't consume.
|
||||
const uint32_t v = std::min<uint32_t>(version, 3u);
|
||||
g->dmabuf = static_cast<zwp_linux_dmabuf_v1 *>(wl_registry_bind(
|
||||
registry, name, &zwp_linux_dmabuf_v1_interface, v));
|
||||
// Add the listener immediately so the modifier events queued
|
||||
// by the bind get delivered when the dispatch loop continues.
|
||||
zwp_linux_dmabuf_v1_add_listener(g->dmabuf, &kDmabufListener, g);
|
||||
}
|
||||
} else if (std::strcmp(interface, wp_viewporter_interface.name) == 0) {
|
||||
g->viewporter = static_cast<wp_viewporter *>(
|
||||
wl_registry_bind(registry, name, &wp_viewporter_interface, 1));
|
||||
} else if (std::strcmp(
|
||||
interface, wp_fractional_scale_manager_v1_interface.name) == 0) {
|
||||
g->fractionalScale = static_cast<wp_fractional_scale_manager_v1 *>(
|
||||
wl_registry_bind(registry, name,
|
||||
&wp_fractional_scale_manager_v1_interface, 1));
|
||||
}
|
||||
}
|
||||
void registryGlobalRemove(void *, wl_registry *, uint32_t) {}
|
||||
|
||||
const wl_registry_listener kRegistryListener = {
|
||||
registryGlobal,
|
||||
registryGlobalRemove,
|
||||
};
|
||||
|
||||
PresenterGlobals *discoverGlobals(wl_display *display) {
|
||||
PresenterGlobals &globals = globalState();
|
||||
if (globals.searched) return &globals;
|
||||
globals.searched = true;
|
||||
|
||||
wl_event_queue *queue = wl_display_create_queue(display);
|
||||
wl_registry *registry = wl_display_get_registry(display);
|
||||
wl_proxy_set_queue(reinterpret_cast<wl_proxy *>(registry), queue);
|
||||
wl_registry_add_listener(registry, &kRegistryListener, &globals);
|
||||
// Roundtrip 1: bind compositor/subcompositor/dmabuf. Inside the
|
||||
// registry callback we attach the dmabuf listener immediately, so
|
||||
// any format/modifier events that arrive in the same dispatch
|
||||
// pass fire on it. A negative return means the wl_display
|
||||
// disconnected mid-startup; subsequent tryCreate calls fall
|
||||
// through to the QImage path (g->compositor etc. stay null).
|
||||
if (wl_display_roundtrip_queue(display, queue) < 0) {
|
||||
std::fprintf(stderr,
|
||||
"[ghastty] wayland: discoverGlobals roundtrip 1 failed; "
|
||||
"subsurface present path disabled\n");
|
||||
}
|
||||
wl_registry_destroy(registry);
|
||||
// Roundtrip 2: belt-and-suspenders for any compositor that defers
|
||||
// the modifier events past the bind reply (most don't, but some
|
||||
// batch them). After this returns the modifier table is fully
|
||||
// populated and frozen for the process lifetime.
|
||||
if (globals.dmabuf && wl_display_roundtrip_queue(display, queue) < 0) {
|
||||
std::fprintf(stderr,
|
||||
"[ghastty] wayland: discoverGlobals roundtrip 2 failed; "
|
||||
"modifier table is incomplete — disabling dmabuf path\n");
|
||||
// Drop whatever modifier entries we did get. A partially-
|
||||
// populated table is dangerous: presentDmabuf would treat it
|
||||
// as authoritative, hand a "supported" modifier to the
|
||||
// compositor that the compositor may actually not accept, and
|
||||
// the resulting `invalid_format` is a FATAL protocol error
|
||||
// that kills the entire wl_display. Falling back to QImage
|
||||
// path (modifiers map empty → tryCreate's checks fail / the
|
||||
// Vulkan renderer drops to legacy_copy mode) is much safer.
|
||||
globals.modifiers.clear();
|
||||
globals.dmabuf = nullptr;
|
||||
}
|
||||
|
||||
std::size_t total_mods = 0;
|
||||
for (const auto &kv : globals.modifiers) total_mods += kv.second.size();
|
||||
std::fprintf(stderr,
|
||||
"[ghastty] wayland: discovered %zu dmabuf (format,modifier) "
|
||||
"pairs across %zu formats\n",
|
||||
total_mods, globals.modifiers.size());
|
||||
|
||||
// Move the bound proxies back to the default queue so Qt's main
|
||||
// dispatch drives subsequent events on them, then drop the private
|
||||
// queue. (Same lifecycle dance as `blurManager`.)
|
||||
//
|
||||
// EXCEPT the dmabuf proxy: its listener mutates `globals.modifiers`
|
||||
// on every `modifier` event, and the renderer thread reads that
|
||||
// map from `supportedDmabufModifiers` without locking. If we
|
||||
// moved the proxy back to the default queue, a compositor
|
||||
// restart / hot-plug fires more `modifier` events that would
|
||||
// race the reader. Keep the proxy on `queue` and intentionally
|
||||
// never dispatch that queue again — the events queue up
|
||||
// harmlessly and are reaped at proxy destruction. The map is
|
||||
// genuinely frozen post-discovery now.
|
||||
if (globals.compositor)
|
||||
wl_proxy_set_queue(reinterpret_cast<wl_proxy *>(globals.compositor),
|
||||
nullptr);
|
||||
if (globals.subcompositor)
|
||||
wl_proxy_set_queue(reinterpret_cast<wl_proxy *>(globals.subcompositor),
|
||||
nullptr);
|
||||
if (globals.viewporter)
|
||||
wl_proxy_set_queue(reinterpret_cast<wl_proxy *>(globals.viewporter),
|
||||
nullptr);
|
||||
if (globals.fractionalScale)
|
||||
wl_proxy_set_queue(reinterpret_cast<wl_proxy *>(globals.fractionalScale),
|
||||
nullptr);
|
||||
// We deliberately leak `queue` (and leave globals.dmabuf attached
|
||||
// to it) for the process lifetime — it has no resources beyond a
|
||||
// small kernel-side buffer and going away would put dmabuf events
|
||||
// back on the default queue.
|
||||
|
||||
return &globals;
|
||||
}
|
||||
|
||||
wl_display *acquireWaylandDisplay() {
|
||||
if (!QGuiApplication::platformName().startsWith(QLatin1String("wayland")))
|
||||
return nullptr;
|
||||
QPlatformNativeInterface *native = QGuiApplication::platformNativeInterface();
|
||||
if (!native) return nullptr;
|
||||
return static_cast<wl_display *>(
|
||||
native->nativeResourceForIntegration("wl_display"));
|
||||
}
|
||||
|
||||
// wl_buffer::release listener: the compositor is done sampling the
|
||||
// buffer for any committed surface state. We KEEP the wl_buffer
|
||||
// alive across releases — libghostty re-uses the same dmabuf fd
|
||||
// across frames until resize, so we re-attach the cached wl_buffer
|
||||
// on every present (see `m_cachedBuffer` in the header). The buffer
|
||||
// is destroyed only when (a) the dmabuf shape changes (next
|
||||
// `presentDmabuf` invalidates the cache) or (b) the presenter is
|
||||
// destroyed.
|
||||
//
|
||||
// The underlying dmabuf memory is owned by libghostty; we never
|
||||
// close that fd here (the SCM_RIGHTS transfer in
|
||||
// zwp_linux_buffer_params.add gave the compositor its own
|
||||
// reference, which lives independently of our wl_buffer).
|
||||
void bufferRelease(void *, wl_buffer *) {
|
||||
// No-op. See cache rationale above.
|
||||
}
|
||||
const wl_buffer_listener kBufferListener = {
|
||||
bufferRelease,
|
||||
};
|
||||
|
||||
// wl_callback::done listener for compositor-paced presents. Single-
|
||||
// shot per callback — the proxy is destroyed here and the
|
||||
// presenter's m_frameCallback field is cleared so the next present
|
||||
// knows to register a fresh one. After cleanup, invoke the
|
||||
// presenter's onFrameReady hook (set by GhosttySurface to pump the
|
||||
// next pending frame).
|
||||
void frameCallbackDone(void *data, wl_callback *cb, uint32_t /*time*/) {
|
||||
auto *p = static_cast<wayland::SubsurfacePresenter *>(data);
|
||||
// Defensive: if the listener fires after the proxy was destroyed
|
||||
// by ~SubsurfacePresenter (Wayland guarantees no events on a
|
||||
// destroyed proxy, so this shouldn't happen, but if a future
|
||||
// refactor destroys the presenter before flushing the queue we'd
|
||||
// rather no-op than UAF).
|
||||
if (!p) {
|
||||
wl_callback_destroy(cb);
|
||||
return;
|
||||
}
|
||||
p->onFrameCallbackDone(cb);
|
||||
}
|
||||
const wl_callback_listener kFrameCallbackListener = {
|
||||
frameCallbackDone,
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
||||
void primeDmabufModifierRegistry() {
|
||||
if (wl_display *display = acquireWaylandDisplay()) {
|
||||
(void)discoverGlobals(display);
|
||||
}
|
||||
}
|
||||
|
||||
std::size_t supportedDmabufModifiers(std::uint32_t drm_format,
|
||||
std::uint64_t *out,
|
||||
std::size_t capacity) {
|
||||
const PresenterGlobals &g = globalState();
|
||||
if (!g.searched) return 0;
|
||||
auto it = g.modifiers.find(drm_format);
|
||||
if (it == g.modifiers.end()) return 0;
|
||||
const std::size_t available = it->second.size();
|
||||
if (out == nullptr || capacity == 0) return available;
|
||||
const std::size_t copied = std::min(available, capacity);
|
||||
std::memcpy(out, it->second.data(), copied * sizeof(std::uint64_t));
|
||||
return copied;
|
||||
}
|
||||
|
||||
std::unique_ptr<SubsurfacePresenter>
|
||||
SubsurfacePresenter::tryCreate(QWindow *topLevel) {
|
||||
if (!topLevel) return nullptr;
|
||||
|
||||
if (!QGuiApplication::platformName().startsWith(QLatin1String("wayland"))) {
|
||||
std::fprintf(stderr,
|
||||
"[ghastty] SubsurfacePresenter: not on Wayland QPA\n");
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
QPlatformNativeInterface *native = QGuiApplication::platformNativeInterface();
|
||||
if (!native) return nullptr;
|
||||
|
||||
auto *display = static_cast<wl_display *>(
|
||||
native->nativeResourceForIntegration("wl_display"));
|
||||
auto *parentSurface = static_cast<wl_surface *>(
|
||||
native->nativeResourceForWindow("surface", topLevel));
|
||||
if (!display || !parentSurface) {
|
||||
std::fprintf(stderr,
|
||||
"[ghastty] SubsurfacePresenter: missing wl_display or "
|
||||
"parent wl_surface (display=%p surface=%p)\n",
|
||||
static_cast<void *>(display),
|
||||
static_cast<void *>(parentSurface));
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
PresenterGlobals *g = discoverGlobals(display);
|
||||
if (!g->compositor || !g->subcompositor || !g->dmabuf || !g->viewporter) {
|
||||
std::fprintf(
|
||||
stderr,
|
||||
"[ghastty] SubsurfacePresenter: compositor missing required globals "
|
||||
"(compositor=%p subcompositor=%p dmabuf=%p viewporter=%p)\n",
|
||||
static_cast<void *>(g->compositor),
|
||||
static_cast<void *>(g->subcompositor), static_cast<void *>(g->dmabuf),
|
||||
static_cast<void *>(g->viewporter));
|
||||
return nullptr;
|
||||
}
|
||||
// wp_fractional_scale_manager_v1 is optional — if missing we
|
||||
// assume integer scale 1.0 and let wp_viewport.set_destination
|
||||
// still do its job. Most modern compositors support it.
|
||||
|
||||
wl_surface *child = wl_compositor_create_surface(g->compositor);
|
||||
if (!child) return nullptr;
|
||||
|
||||
wl_subsurface *sub =
|
||||
wl_subcompositor_get_subsurface(g->subcompositor, child, parentSurface);
|
||||
if (!sub) {
|
||||
wl_surface_destroy(child);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
// Sync mode (the wl_subsurface default): wl_surface.commit on
|
||||
// the child caches state until the parent commits, at which point
|
||||
// both apply atomically. This is what guarantees lockstep resize
|
||||
// behavior — parent grows to the new size and our matching
|
||||
// new-size buffer apply in the same compositor frame, no gap.
|
||||
//
|
||||
// Sync mode requires the parent to commit for our state to
|
||||
// apply. Qt's backing-store flush doesn't fire for our
|
||||
// translucent QWidget (paintEvent produces no damage), so
|
||||
// GhosttySurface forces the parent commit explicitly via
|
||||
// QtWaylandClient::QWaylandWindow::commit() (Qt6::WaylandClient-
|
||||
// Private) after every child commit + viewport update. See
|
||||
// `forceParentCommit` in GhosttySurface.cpp.
|
||||
//
|
||||
// The earlier desync-mode attempt avoided the Qt-private
|
||||
// dependency but couldn't deliver lockstep resize because the
|
||||
// two surfaces commit independently in that mode.
|
||||
|
||||
// Initial subsurface position: (0,0) in parent-surface coords.
|
||||
// GhosttySurface immediately calls setPosition after tryCreate
|
||||
// returns with the pane's real offset within the top-level (and
|
||||
// updates it on every moveEvent / resizeEvent).
|
||||
wl_subsurface_set_position(sub, 0, 0);
|
||||
|
||||
// Stack the subsurface BELOW the parent so Qt's child widgets
|
||||
// (SearchBar, overlays, scrollbar, exit/health/link/resize hints)
|
||||
// remain visible — they're painted into the parent's backing
|
||||
// store, and Wayland's default subsurface stacking is *above*
|
||||
// parent which would hide all of them. With place_below the
|
||||
// parent QWidget renders on top; WA_TranslucentBackground means
|
||||
// the terminal area of the parent is transparent so the
|
||||
// subsurface shows through, while the chrome painted by
|
||||
// paintEvent stays visible on top.
|
||||
wl_subsurface_place_below(sub, parentSurface);
|
||||
|
||||
// Set an empty input region so pointer/touch events fall through
|
||||
// to the parent surface (Qt's QWindow). The default input region
|
||||
// is the whole attached buffer, which would mean our subsurface
|
||||
// captures every click in the terminal area — Qt's QWidget would
|
||||
// never see contextMenuEvent (right-click menu), mouse press/
|
||||
// release, or any other pointer event in the terminal. wl_region
|
||||
// with no add_rectangle calls = empty = "no input." The region
|
||||
// can be destroyed immediately after set_input_region; the
|
||||
// compositor copies its state into the surface's pending state.
|
||||
wl_region *empty = wl_compositor_create_region(g->compositor);
|
||||
if (empty) {
|
||||
wl_surface_set_input_region(child, empty);
|
||||
wl_region_destroy(empty);
|
||||
}
|
||||
|
||||
// wp_viewport: per-surface object that lets us tell the compositor
|
||||
// the destination size in surface-local coords, independent of
|
||||
// the buffer's pixel dimensions. With fractional scaling we
|
||||
// render at, say, 960x720 device pixels into an 800x600 surface
|
||||
// area, and the viewport handles the mapping.
|
||||
wp_viewport *viewport =
|
||||
wp_viewporter_get_viewport(g->viewporter, child);
|
||||
if (!viewport) {
|
||||
wl_subsurface_destroy(sub);
|
||||
wl_surface_destroy(child);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
// wp_fractional_scale_v1: subscribe to the compositor's
|
||||
// per-surface preferred scale. Optional — if the global is
|
||||
// missing we stick with default 120 (= 1.0×).
|
||||
wp_fractional_scale_v1 *frac_scale = nullptr;
|
||||
if (g->fractionalScale) {
|
||||
frac_scale = wp_fractional_scale_manager_v1_get_fractional_scale(
|
||||
g->fractionalScale, child);
|
||||
}
|
||||
|
||||
wl_display_flush(display);
|
||||
if (int err = wl_display_get_error(display); err != 0) {
|
||||
std::fprintf(stderr,
|
||||
"[ghastty] SubsurfacePresenter: wl_display error %d after "
|
||||
"subsurface creation\n",
|
||||
err);
|
||||
if (frac_scale) wp_fractional_scale_v1_destroy(frac_scale);
|
||||
wp_viewport_destroy(viewport);
|
||||
wl_subsurface_destroy(sub);
|
||||
wl_surface_destroy(child);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
std::fprintf(stderr,
|
||||
"[ghastty] SubsurfacePresenter: ready (parent=%p child=%p "
|
||||
"sub=%p dmabuf=%p viewport=%p frac_scale=%p)\n",
|
||||
static_cast<void *>(parentSurface), static_cast<void *>(child),
|
||||
static_cast<void *>(sub), static_cast<void *>(g->dmabuf),
|
||||
static_cast<void *>(viewport),
|
||||
static_cast<void *>(frac_scale));
|
||||
|
||||
return std::unique_ptr<SubsurfacePresenter>(new SubsurfacePresenter(
|
||||
display, child, sub, g->dmabuf, viewport, frac_scale));
|
||||
}
|
||||
|
||||
const wp_fractional_scale_v1_listener kFractionalScaleListener = {
|
||||
SubsurfacePresenter::onPreferredScale,
|
||||
};
|
||||
|
||||
void SubsurfacePresenter::onPreferredScale(void *data,
|
||||
wp_fractional_scale_v1 *,
|
||||
uint32_t scale) {
|
||||
auto *self = static_cast<SubsurfacePresenter *>(data);
|
||||
if (scale == 0) return; // guard against compositor bugs
|
||||
if (scale != self->m_preferredScale120) {
|
||||
std::fprintf(stderr,
|
||||
"[ghastty] SubsurfacePresenter: preferred scale %u/120 = "
|
||||
"%.3f\n",
|
||||
scale, static_cast<double>(scale) / 120.0);
|
||||
self->m_preferredScale120 = scale;
|
||||
}
|
||||
}
|
||||
|
||||
SubsurfacePresenter::SubsurfacePresenter(wl_display *display, wl_surface *child,
|
||||
wl_subsurface *sub,
|
||||
zwp_linux_dmabuf_v1 *dmabuf,
|
||||
wp_viewport *viewport,
|
||||
wp_fractional_scale_v1 *frac_scale)
|
||||
: m_display(display),
|
||||
m_childSurface(child),
|
||||
m_subsurface(sub),
|
||||
m_dmabuf(dmabuf),
|
||||
m_viewport(viewport),
|
||||
m_fractionalScale(frac_scale) {
|
||||
if (m_fractionalScale) {
|
||||
wp_fractional_scale_v1_add_listener(m_fractionalScale,
|
||||
&kFractionalScaleListener, this);
|
||||
}
|
||||
}
|
||||
|
||||
SubsurfacePresenter::~SubsurfacePresenter() {
|
||||
// Destroy the pending frame callback first: subsequent dispatches
|
||||
// of the wl_event_queue won't deliver its done event (Wayland
|
||||
// guarantees no events on a destroyed proxy), so the dangling
|
||||
// `this` pointer in the listener data can't fire.
|
||||
if (m_frameCallback) {
|
||||
wl_callback_destroy(m_frameCallback);
|
||||
m_frameCallback = nullptr;
|
||||
}
|
||||
// Destroy the cached wl_buffer BEFORE the child surface — the
|
||||
// buffer may still be attached. wl_buffer_destroy is safe whether
|
||||
// or not the compositor has released it (Wayland guarantees no
|
||||
// further events on a destroyed proxy).
|
||||
if (m_cachedBuffer) {
|
||||
wl_buffer_destroy(m_cachedBuffer);
|
||||
m_cachedBuffer = nullptr;
|
||||
}
|
||||
if (m_fractionalScale) wp_fractional_scale_v1_destroy(m_fractionalScale);
|
||||
if (m_viewport) wp_viewport_destroy(m_viewport);
|
||||
if (m_subsurface) wl_subsurface_destroy(m_subsurface);
|
||||
if (m_childSurface) wl_surface_destroy(m_childSurface);
|
||||
if (m_display) wl_display_flush(m_display);
|
||||
}
|
||||
|
||||
void SubsurfacePresenter::onFrameCallbackDone(wl_callback *cb) {
|
||||
// The single-shot wl_callback is now spent. Destroy the proxy and
|
||||
// clear our slot so the next present registers a fresh callback.
|
||||
// Guard against the rare cb-mismatch case (shouldn't happen — the
|
||||
// listener data routes to exactly this presenter and we only ever
|
||||
// have one outstanding callback — but be defensive against future
|
||||
// refactors).
|
||||
if (cb == m_frameCallback) m_frameCallback = nullptr;
|
||||
wl_callback_destroy(cb);
|
||||
// Notify the consumer (e.g. GhosttySurface) that the compositor
|
||||
// is ready for the next frame. The callback runs on the same
|
||||
// thread that pumps Wayland events (the Qt GUI thread), so it can
|
||||
// touch GUI-thread state directly.
|
||||
if (m_onFrameReady) m_onFrameReady();
|
||||
}
|
||||
|
||||
void SubsurfacePresenter::presentDmabuf(int fd, uint32_t drm_format,
|
||||
uint64_t drm_modifier, uint32_t width,
|
||||
uint32_t height, uint32_t stride,
|
||||
int dest_width, int dest_height,
|
||||
bool y_invert) {
|
||||
if (fd < 0 || !m_dmabuf || !m_childSurface || !m_viewport) return;
|
||||
if (dest_width <= 0) dest_width = 1;
|
||||
if (dest_height <= 0) dest_height = 1;
|
||||
|
||||
// System-boundary input validation. width/height/stride flow in
|
||||
// from libghostty's renderer thread and are about to be passed
|
||||
// verbatim to the compositor. linux-dmabuf-v1 protocol errors
|
||||
// (`invalid_dimensions`, `invalid_format`, etc.) are FATAL — they
|
||||
// tear down the entire wl_display, killing every window in the
|
||||
// process. We MUST reject malformed inputs locally rather than
|
||||
// letting the compositor do it.
|
||||
//
|
||||
// Specifically reject: zero dimensions or stride, or any value
|
||||
// that would silently flip negative when cast to int32_t at the
|
||||
// create_immed call below (the wayland C API takes signed ints
|
||||
// for dimensions; uint32_t >= 2^31 wraps to negative).
|
||||
constexpr uint32_t kMaxDim = static_cast<uint32_t>(INT32_MAX);
|
||||
if (width == 0 || height == 0 || stride == 0 ||
|
||||
width > kMaxDim || height > kMaxDim || stride > kMaxDim) {
|
||||
std::fprintf(stderr,
|
||||
"[ghastty] SubsurfacePresenter: rejecting dmabuf with "
|
||||
"out-of-range dimensions (w=%u h=%u stride=%u)\n",
|
||||
width, height, stride);
|
||||
return;
|
||||
}
|
||||
// Stride sanity: must be at least 4 bytes per pixel for
|
||||
// 32-bit ARGB/XRGB/etc. — the only formats this presenter
|
||||
// currently advertises support for. Tighter than the protocol's
|
||||
// minimum but matches what the compositor will accept on attach.
|
||||
if (stride < static_cast<uint64_t>(width) * 4) {
|
||||
std::fprintf(stderr,
|
||||
"[ghastty] SubsurfacePresenter: rejecting dmabuf with "
|
||||
"stride=%u too small for width=%u (need >= %llu)\n",
|
||||
stride, width,
|
||||
static_cast<unsigned long long>(static_cast<uint64_t>(width) * 4));
|
||||
return;
|
||||
}
|
||||
|
||||
// Validate the (format, modifier) pair against the compositor's
|
||||
// advertised list before handing it to `create_immed`. If the
|
||||
// pair isn't on the list, the compositor will reject the
|
||||
// subsequent `create_immed` with `invalid_format` — a FATAL
|
||||
// protocol error that kills the entire wl_display, taking down
|
||||
// every window in the process. Better to drop this single frame
|
||||
// than to take down the app.
|
||||
{
|
||||
const PresenterGlobals &g = globalState();
|
||||
const auto it = g.modifiers.find(drm_format);
|
||||
bool ok = false;
|
||||
if (it != g.modifiers.end()) {
|
||||
for (const uint64_t m : it->second) {
|
||||
if (m == drm_modifier) { ok = true; break; }
|
||||
}
|
||||
}
|
||||
if (!ok) {
|
||||
std::fprintf(stderr,
|
||||
"[ghastty] SubsurfacePresenter: refusing dmabuf "
|
||||
"(fourcc=0x%08x mod=0x%llx) — compositor doesn't "
|
||||
"advertise this (format, modifier) pair\n",
|
||||
drm_format,
|
||||
static_cast<unsigned long long>(drm_modifier));
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// Wrap libghostty's borrowed fd in a wl_buffer. Cached across
|
||||
// frames by (kernel inode, shape) — see m_cachedInode in the
|
||||
// header for the full rationale. fstat the dmabuf fd to get the
|
||||
// anon_inode that uniquely identifies the dma-buf object; it's
|
||||
// stable across the dup that GhosttySurface did before parking,
|
||||
// and changes only when libghostty allocates a new Target.
|
||||
// fstat failure (rare; would indicate a closed fd, which we
|
||||
// already check above via `fd < 0`) falls through to cache miss
|
||||
// → create_immed will likely fail too, but the error path there
|
||||
// already logs cleanly.
|
||||
struct stat st;
|
||||
unsigned long inode = 0;
|
||||
if (::fstat(fd, &st) == 0) inode = static_cast<unsigned long>(st.st_ino);
|
||||
const bool cache_hit = m_cachedBuffer != nullptr &&
|
||||
inode != 0 &&
|
||||
m_cachedInode == inode &&
|
||||
m_cachedWidth == width &&
|
||||
m_cachedHeight == height &&
|
||||
m_cachedStride == stride &&
|
||||
m_cachedFormat == drm_format &&
|
||||
m_cachedModifier == drm_modifier &&
|
||||
m_cachedYInvert == y_invert;
|
||||
wl_buffer *buffer = nullptr;
|
||||
if (cache_hit) {
|
||||
buffer = m_cachedBuffer;
|
||||
} else {
|
||||
// Cache miss — destroy any stale buffer first so a failed
|
||||
// create_immed below leaves the cache empty (rather than half-
|
||||
// populated with the previous buffer that no longer matches the
|
||||
// new inputs).
|
||||
if (m_cachedBuffer) {
|
||||
wl_buffer_destroy(m_cachedBuffer);
|
||||
m_cachedBuffer = nullptr;
|
||||
m_cachedInode = 0;
|
||||
}
|
||||
zwp_linux_buffer_params_v1 *params =
|
||||
zwp_linux_dmabuf_v1_create_params(m_dmabuf);
|
||||
if (!params) return;
|
||||
zwp_linux_buffer_params_v1_add(params, fd, /*plane_idx*/ 0,
|
||||
/*offset*/ 0, stride,
|
||||
static_cast<uint32_t>(drm_modifier >> 32),
|
||||
static_cast<uint32_t>(drm_modifier & 0xFFFFFFFFu));
|
||||
const uint32_t buffer_flags =
|
||||
y_invert ? ZWP_LINUX_BUFFER_PARAMS_V1_FLAGS_Y_INVERT : 0;
|
||||
buffer = zwp_linux_buffer_params_v1_create_immed(
|
||||
params, static_cast<int32_t>(width), static_cast<int32_t>(height),
|
||||
drm_format, buffer_flags);
|
||||
zwp_linux_buffer_params_v1_destroy(params);
|
||||
if (!buffer) {
|
||||
// Surface the wl_display error code if the failure was a
|
||||
// protocol-fatal error (compositor rejected the buffer with
|
||||
// `invalid_format` / `invalid_dimensions` / etc., which kills
|
||||
// the wl_display). Without this, every subsequent presentDmabuf
|
||||
// call silently no-ops on the dead display and the cause stays
|
||||
// hidden until something else logs the disconnection.
|
||||
const int wl_err = wl_display_get_error(m_display);
|
||||
std::fprintf(stderr,
|
||||
"[ghastty] SubsurfacePresenter: create_immed returned null "
|
||||
"(fd=%d %ux%u fmt=0x%x mod=0x%llx wl_display_error=%d)\n",
|
||||
fd, width, height, drm_format,
|
||||
static_cast<unsigned long long>(drm_modifier), wl_err);
|
||||
return;
|
||||
}
|
||||
// Listener data is unused — see `bufferRelease` for why this is
|
||||
// nullptr (and the no-op release semantics that make the cache
|
||||
// safe).
|
||||
wl_buffer_add_listener(buffer, &kBufferListener, nullptr);
|
||||
m_cachedBuffer = buffer;
|
||||
m_cachedInode = inode;
|
||||
m_cachedWidth = width;
|
||||
m_cachedHeight = height;
|
||||
m_cachedStride = stride;
|
||||
m_cachedFormat = drm_format;
|
||||
m_cachedModifier = drm_modifier;
|
||||
m_cachedYInvert = y_invert;
|
||||
}
|
||||
|
||||
// Tell the compositor the destination size in surface-local
|
||||
// coordinates. With fractional scaling this is the logical pixel
|
||||
// size (e.g. 800x600) while the buffer is at device pixels (e.g.
|
||||
// 960x720 for 1.2× DPR). wp_viewport handles the mapping;
|
||||
// wl_surface.set_buffer_scale is intentionally NOT used here
|
||||
// because (a) it only supports integer scales, and (b) when
|
||||
// wp_fractional_scale_v1 is active the protocol forbids using
|
||||
// set_buffer_scale to anything other than 1.
|
||||
if (dest_width != m_lastDestWidth || dest_height != m_lastDestHeight) {
|
||||
wp_viewport_set_destination(m_viewport, dest_width, dest_height);
|
||||
m_lastDestWidth = dest_width;
|
||||
m_lastDestHeight = dest_height;
|
||||
}
|
||||
|
||||
wl_surface_attach(m_childSurface, buffer, 0, 0);
|
||||
// Damage the full buffer extent — terminals tend to update large
|
||||
// dirty rects anyway (cursor blink, scroll, repaint) so a precise
|
||||
// damage region wouldn't save much, and `damage_buffer` (vs
|
||||
// `damage`) uses buffer coordinates so it's resolution-correct.
|
||||
wl_surface_damage_buffer(m_childSurface, 0, 0, static_cast<int32_t>(width),
|
||||
static_cast<int32_t>(height));
|
||||
// Register a wl_surface.frame callback BEFORE the commit so the
|
||||
// compositor knows we want to be paced. Only request a new one if
|
||||
// none is outstanding — re-requesting before the prior fires would
|
||||
// leak callbacks. The done handler clears m_frameCallback, so the
|
||||
// next call here will register fresh.
|
||||
if (!m_frameCallback) {
|
||||
m_frameCallback = wl_surface_frame(m_childSurface);
|
||||
if (m_frameCallback) {
|
||||
wl_callback_add_listener(m_frameCallback, &kFrameCallbackListener,
|
||||
this);
|
||||
}
|
||||
}
|
||||
wl_surface_commit(m_childSurface);
|
||||
|
||||
wl_display_flush(m_display);
|
||||
if (int err = wl_display_get_error(m_display); err != 0) {
|
||||
std::fprintf(
|
||||
stderr,
|
||||
"[ghastty] SubsurfacePresenter: wl_display error %d after present\n",
|
||||
err);
|
||||
}
|
||||
}
|
||||
|
||||
void SubsurfacePresenter::resizeDestination(int dest_width, int dest_height) {
|
||||
if (!m_viewport || !m_childSurface) return;
|
||||
if (dest_width <= 0 || dest_height <= 0) return;
|
||||
if (dest_width == m_lastDestWidth && dest_height == m_lastDestHeight) return;
|
||||
|
||||
// Update destination + commit child WITHOUT attaching a new buffer.
|
||||
// In desync mode the commit applies immediately and the compositor
|
||||
// stretches the currently-attached buffer to the new dest extent.
|
||||
// The next presentDmabuf will overwrite this with a properly-sized
|
||||
// buffer, but until then the subsurface fills the new area instead
|
||||
// of leaving a transparent gap during the parent's resize commit.
|
||||
wp_viewport_set_destination(m_viewport, dest_width, dest_height);
|
||||
m_lastDestWidth = dest_width;
|
||||
m_lastDestHeight = dest_height;
|
||||
wl_surface_commit(m_childSurface);
|
||||
wl_display_flush(m_display);
|
||||
}
|
||||
|
||||
void SubsurfacePresenter::setPosition(int x, int y) {
|
||||
if (!m_subsurface) return;
|
||||
if (x == m_lastX && y == m_lastY) return;
|
||||
wl_subsurface_set_position(m_subsurface, x, y);
|
||||
m_lastX = x;
|
||||
m_lastY = y;
|
||||
// Position is double-buffered on the parent surface — the caller
|
||||
// must trigger a parent commit (forceParentCommit on the GhosttySurface
|
||||
// side) for the change to land. We flush so the request is on the
|
||||
// wire when that happens.
|
||||
wl_display_flush(m_display);
|
||||
}
|
||||
|
||||
void SubsurfacePresenter::hide() {
|
||||
if (!m_childSurface) return;
|
||||
// Attach NULL = no buffer. After commit + parent commit, the
|
||||
// subsurface contributes nothing to the compositor's frame.
|
||||
// Caller is responsible for forceParentCommit on its side.
|
||||
wl_surface_attach(m_childSurface, nullptr, 0, 0);
|
||||
wl_surface_commit(m_childSurface);
|
||||
wl_display_flush(m_display);
|
||||
}
|
||||
|
||||
void SubsurfacePresenter::flushDisplay() {
|
||||
if (m_display) wl_display_flush(m_display);
|
||||
}
|
||||
|
||||
bool SubsurfacePresenter::reattachCached() {
|
||||
if (!m_childSurface || !m_cachedBuffer) return false;
|
||||
// Re-show whatever we had attached before `hide()`. The cached
|
||||
// wl_buffer survives across hide/show because the release
|
||||
// listener no-ops (see `bufferRelease`). The dmabuf backing the
|
||||
// buffer is still alive — libghostty owns the underlying
|
||||
// VkDeviceMemory until the next Target.deinit (resize), and
|
||||
// dma-buf kernel ref-counting keeps the pages pinned regardless
|
||||
// of our client-side state.
|
||||
//
|
||||
// The content may be one frame stale (whatever was rendered just
|
||||
// before Hide), but that's better than a transparent gap while
|
||||
// the renderer thread spins up its first new frame after Show —
|
||||
// the parent surface has WA_TranslucentBackground, so without a
|
||||
// re-attach the user sees through to whatever is behind the
|
||||
// window. The renderer's next frame overwrites this within
|
||||
// DRAW_INTERVAL.
|
||||
wl_surface_attach(m_childSurface, m_cachedBuffer, 0, 0);
|
||||
wl_surface_damage_buffer(m_childSurface, 0, 0,
|
||||
static_cast<int32_t>(m_cachedWidth),
|
||||
static_cast<int32_t>(m_cachedHeight));
|
||||
// Register a frame callback so the consumer's pacing state machine
|
||||
// gets a "compositor is ready" event after this re-attach too —
|
||||
// otherwise a tab switch could leave m_compositorReady stuck false
|
||||
// (a stale frame callback from the pre-Hide commit may have been
|
||||
// discarded by the compositor on the NULL attach).
|
||||
if (!m_frameCallback) {
|
||||
m_frameCallback = wl_surface_frame(m_childSurface);
|
||||
if (m_frameCallback) {
|
||||
wl_callback_add_listener(m_frameCallback, &kFrameCallbackListener,
|
||||
this);
|
||||
}
|
||||
}
|
||||
wl_surface_commit(m_childSurface);
|
||||
wl_display_flush(m_display);
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace wayland
|
||||
|
|
@ -0,0 +1,240 @@
|
|||
// Wayland subsurface presenter for `GhosttySurface`.
|
||||
//
|
||||
// Owns one `wl_subsurface` parented to the `GhosttySurface`'s native
|
||||
// `wl_surface`, plus the `zwp_linux_dmabuf_v1` machinery for wrapping
|
||||
// libghostty's dmabuf fds in `wl_buffer`s and attaching them to that
|
||||
// subsurface. The compositor scans the buffers out directly — no
|
||||
// mmap, no memcpy, no QImage, no QPainter blit on the present path.
|
||||
//
|
||||
// The process-wide compositor modifier registry that used to share
|
||||
// this header now lives in `DmabufRegistry.h`. The implementations
|
||||
// share `globalState()` machinery in `SubsurfacePresenter.cpp` but
|
||||
// the API surfaces are disjoint: presenter is per-widget, registry
|
||||
// is process-wide and read-only.
|
||||
//
|
||||
// Wayland-only by project decision (the Qt frontend is Wayland-only;
|
||||
// see `feedback-qt-no-x11` memory). If the host isn't on a Wayland
|
||||
// QPA platform or the compositor lacks the required globals,
|
||||
// `tryCreate` returns nullptr — the caller decides whether that's a
|
||||
// fatal error.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <functional>
|
||||
#include <memory>
|
||||
|
||||
struct wl_buffer;
|
||||
struct wl_callback;
|
||||
struct wl_display;
|
||||
struct wl_subsurface;
|
||||
struct wl_surface;
|
||||
struct zwp_linux_dmabuf_v1;
|
||||
struct wp_viewport;
|
||||
struct wp_fractional_scale_v1;
|
||||
class QWindow;
|
||||
|
||||
namespace wayland {
|
||||
|
||||
class SubsurfacePresenter {
|
||||
public:
|
||||
// Build a subsurface parented to `topLevel`'s native `wl_surface`,
|
||||
// and bind the linux-dmabuf-v1 global on the same display. Pass
|
||||
// the TOP-LEVEL QWindow (e.g. `widget->window()->windowHandle()`)
|
||||
// — NOT a per-widget native QWindow. We attach all panes/splits
|
||||
// as siblings under the top-level surface and position each with
|
||||
// `setPosition`, instead of giving each pane its own QWindow
|
||||
// (which Qt's QSplitter-embedded child widgets don't handle
|
||||
// cleanly: "QWidgetWindow must be a top level window" warning,
|
||||
// and the result renders black).
|
||||
//
|
||||
// Returns nullptr if any prerequisite is missing (non-Wayland QPA,
|
||||
// null `wl_display`, `wl_subcompositor` unbindable,
|
||||
// `zwp_linux_dmabuf_v1` unbindable, etc.).
|
||||
static std::unique_ptr<SubsurfacePresenter> tryCreate(QWindow *topLevel);
|
||||
|
||||
~SubsurfacePresenter();
|
||||
|
||||
// Hand a dmabuf-backed frame to the compositor: wrap the fd in a
|
||||
// `wl_buffer` via `zwp_linux_buffer_params_v1.create_immed`, attach
|
||||
// to the subsurface, damage, commit. MUST be called on the Qt GUI
|
||||
// thread (the thread that owns the wl_display dispatch); the
|
||||
// renderer thread should marshal frames through a Qt-side queue.
|
||||
//
|
||||
// libghostty owns the fd; this method does not close it. The
|
||||
// wayland client library duplicates the fd kernel-side via
|
||||
// SCM_RIGHTS, so the compositor's reference survives even after
|
||||
// libghostty reuses or closes its handle.
|
||||
//
|
||||
// `dest_width` / `dest_height` are the size of the subsurface in
|
||||
// PARENT surface-local coordinates (i.e. logical pixels). For
|
||||
// integer scales they match the buffer dimensions divided by the
|
||||
// scale; for fractional scales they're independent (set via
|
||||
// wp_viewport.set_destination, which decouples buffer dimensions
|
||||
// from surface area).
|
||||
// `y_invert` requests the compositor flip the buffer vertically
|
||||
// when sampling. The OpenGL renderer's coordinate convention is
|
||||
// bottom-left origin (Y up), but Wayland/DRM samples top-down —
|
||||
// without the flag, GL frames render upside-down. Vulkan
|
||||
// rasterizes Y-down by default and passes false.
|
||||
void presentDmabuf(int fd, uint32_t drm_format, uint64_t drm_modifier,
|
||||
uint32_t width, uint32_t height, uint32_t stride,
|
||||
int dest_width, int dest_height,
|
||||
bool y_invert = false);
|
||||
|
||||
// Compositor-preferred fractional scale for this surface, in
|
||||
// units of 1/120 (e.g. 144 = 1.2, 180 = 1.5, 240 = 2.0). Returns
|
||||
// 120 (= 1.0) until the compositor sends its first
|
||||
// wp_fractional_scale_v1.preferred_scale event for our surface.
|
||||
//
|
||||
// Currently INFORMATIONAL only: GhosttySurface uses Qt's
|
||||
// devicePixelRatioF() for buffer sizing (which Qt derives from
|
||||
// the same protocol on Wayland), so the two values agree at
|
||||
// steady state. Exposed for diagnostics + a future direct-
|
||||
// protocol path that bypasses Qt's DPR cache lag during a
|
||||
// screen-change race.
|
||||
uint32_t preferredScale120() const { return m_preferredScale120; }
|
||||
|
||||
// Stretch the existing subsurface buffer to a new destination
|
||||
// size WITHOUT attaching a new buffer. Used at the *start* of a
|
||||
// resize, before the renderer has produced a new-size frame:
|
||||
// wp_viewport.set_destination is double-buffered on the child
|
||||
// surface, so committing the child here in desync mode applies
|
||||
// the new destination immediately and the compositor stretches
|
||||
// the old buffer to fill it. Result: the parent surface can grow
|
||||
// to its new size with the subsurface already covering the new
|
||||
// area (briefly stretched), instead of leaving a one-frame
|
||||
// transparent gap where the translucent parent shows through.
|
||||
//
|
||||
// The next presentDmabuf call (with the real new-size buffer)
|
||||
// replaces the stretched content, ending the brief blur.
|
||||
//
|
||||
// Same pattern mpv's vo_dmabuf_wayland uses for its video
|
||||
// subsurface during resize.
|
||||
void resizeDestination(int dest_width, int dest_height);
|
||||
|
||||
// Update the subsurface position in parent-surface-local coords.
|
||||
// For panes inside splits / tabs, position is the GhosttySurface
|
||||
// widget's offset within the top-level (`mapTo(window(),
|
||||
// QPoint(0,0))`). wl_subsurface.set_position is double-buffered
|
||||
// on the *parent* surface — caller must trigger a parent commit
|
||||
// (Qt's QtWaylandClient::QWaylandWindow::commit()) for the new
|
||||
// position to apply. No-op if the position hasn't changed.
|
||||
void setPosition(int x, int y);
|
||||
|
||||
// Detach the currently-attached buffer so the subsurface becomes
|
||||
// invisible. Called when the owning GhosttySurface hides (tab
|
||||
// switch) so the inactive pane's pixels don't ghost on top of
|
||||
// whatever the active tab is showing in the same on-screen
|
||||
// region. The next presentDmabuf call re-attaches a buffer and
|
||||
// the subsurface becomes visible again.
|
||||
void hide();
|
||||
|
||||
// Register a callback fired (on the GUI thread, via Wayland event
|
||||
// queue dispatch) when the compositor signals it's ready for the
|
||||
// next frame on this subsurface. Lets the caller pace presents at
|
||||
// the compositor's refresh rate instead of unconditionally
|
||||
// committing every renderer frame.
|
||||
//
|
||||
// The callback fires AT MOST ONCE per `presentDmabuf` /
|
||||
// `reattachCached` call — the underlying `wl_surface.frame`
|
||||
// request is single-shot per commit. After the callback fires,
|
||||
// the next present's commit will register a new frame_callback.
|
||||
using OnFrameReady = std::function<void()>;
|
||||
void setOnFrameReady(OnFrameReady cb) { m_onFrameReady = std::move(cb); }
|
||||
|
||||
// Flush the underlying wl_display to push any queued requests
|
||||
// to the compositor. Useful after a forceParentCommit on the
|
||||
// Qt side (which queues a parent wl_surface.commit but doesn't
|
||||
// wl_display_flush), so the combined "child commit + parent
|
||||
// commit" reach the compositor in one shot rather than racing
|
||||
// Qt's next event-loop flush.
|
||||
void flushDisplay();
|
||||
|
||||
// Re-attach + commit the most recently cached wl_buffer, if any.
|
||||
// Called from `QEvent::Show` so a tab-switch / re-show sees the
|
||||
// last frame immediately rather than a transparent area while
|
||||
// the renderer thread spins up its first new frame. Without this,
|
||||
// the parent surface paints through (WA_TranslucentBackground)
|
||||
// and the user sees a flash of whatever is behind the window.
|
||||
// Returns true if a cached buffer was actually re-attached;
|
||||
// false if the cache was empty (first show — caller is
|
||||
// responsible for the new-tab flash mitigation if needed).
|
||||
bool reattachCached();
|
||||
|
||||
// Called from the wp_fractional_scale_v1.preferred_scale event.
|
||||
// Public so the C-style listener struct at file scope in the .cpp
|
||||
// can name it; not part of the API for other call sites.
|
||||
static void onPreferredScale(void *data, wp_fractional_scale_v1 *,
|
||||
uint32_t scale);
|
||||
|
||||
// wl_callback::done dispatch from the file-scope listener. Public
|
||||
// for the same reason as onPreferredScale: C-style Wayland
|
||||
// listeners need a static-callable entry point and we route the
|
||||
// result back into the owning presenter via the listener's `data`
|
||||
// pointer. Destroys the callback proxy, clears m_frameCallback,
|
||||
// and invokes m_onFrameReady if set. Not part of the API for
|
||||
// other call sites.
|
||||
void onFrameCallbackDone(wl_callback *cb);
|
||||
|
||||
SubsurfacePresenter(const SubsurfacePresenter &) = delete;
|
||||
SubsurfacePresenter &operator=(const SubsurfacePresenter &) = delete;
|
||||
|
||||
private:
|
||||
SubsurfacePresenter(wl_display *display, wl_surface *child,
|
||||
wl_subsurface *sub, zwp_linux_dmabuf_v1 *dmabuf,
|
||||
wp_viewport *viewport,
|
||||
wp_fractional_scale_v1 *frac_scale);
|
||||
|
||||
wl_display *m_display;
|
||||
wl_surface *m_childSurface;
|
||||
wl_subsurface *m_subsurface;
|
||||
zwp_linux_dmabuf_v1 *m_dmabuf;
|
||||
wp_viewport *m_viewport;
|
||||
wp_fractional_scale_v1 *m_fractionalScale;
|
||||
uint32_t m_preferredScale120 = 120; // default: 1.0×
|
||||
int m_lastDestWidth = 0;
|
||||
int m_lastDestHeight = 0;
|
||||
int m_lastX = 0;
|
||||
int m_lastY = 0;
|
||||
|
||||
// Pending wl_surface.frame callback for compositor-paced presents.
|
||||
// Null between frame_done and the next presentDmabuf commit. Non-
|
||||
// null between presentDmabuf and frame_done. Single-shot — the
|
||||
// done handler destroys it and clears the field, then invokes
|
||||
// `m_onFrameReady` if set.
|
||||
wl_callback *m_frameCallback = nullptr;
|
||||
OnFrameReady m_onFrameReady;
|
||||
|
||||
// wl_buffer cache keyed by dma-buf identity (kernel inode of the
|
||||
// anon_inode backing the dma-buf, which is unique per Target
|
||||
// regardless of fd-number reuse) plus the layout-relevant shape.
|
||||
// libghostty re-uses the same dmabuf across frames until the
|
||||
// next Target.deinit (resize); cache hits skip the create_immed
|
||||
// round-trip + compositor-side dmabuf import that dominated
|
||||
// GUI-thread CPU at 125 FPS.
|
||||
//
|
||||
// We can't key on the caller's fd value because GhosttySurface
|
||||
// now dups the fd on the renderer thread (to outlive libghostty's
|
||||
// close — see 22713b0d3) so the value is fresh per frame. Inode
|
||||
// identity is stable across our dup AND across libghostty's
|
||||
// close → reopen cycles, so cache invalidation matches Target
|
||||
// identity exactly: same Target → same inode → cache hit; new
|
||||
// Target → new inode → cache miss → recreate.
|
||||
//
|
||||
// Cache only stores the wl_buffer; the compositor SCM_RIGHTS-
|
||||
// dup'd the fd into its own address space at create_immed time,
|
||||
// so the cached wl_buffer doesn't need our fd to outlive the
|
||||
// call. The caller owns + closes its own dup.
|
||||
wl_buffer *m_cachedBuffer = nullptr;
|
||||
unsigned long m_cachedInode = 0; // 0 = empty cache (anon_inode ino > 0)
|
||||
uint32_t m_cachedWidth = 0;
|
||||
uint32_t m_cachedHeight = 0;
|
||||
uint32_t m_cachedStride = 0;
|
||||
uint32_t m_cachedFormat = 0;
|
||||
uint64_t m_cachedModifier = 0;
|
||||
bool m_cachedYInvert = false;
|
||||
};
|
||||
|
||||
} // namespace wayland
|
||||
|
|
@ -353,6 +353,7 @@ pub const Platform = union(PlatformTag) {
|
|||
macos: MacOS,
|
||||
ios: IOS,
|
||||
opengl: OpenGL,
|
||||
vulkan: Vulkan,
|
||||
|
||||
// If our build target for libghostty is not darwin then we do
|
||||
// not include macos support at all.
|
||||
|
|
@ -395,6 +396,70 @@ pub const Platform = union(PlatformTag) {
|
|||
present: *const fn (?*anyopaque) callconv(.c) void,
|
||||
};
|
||||
|
||||
/// Configuration for a host that owns a Vulkan device libghostty
|
||||
/// should render against (fork-only). The host owns the
|
||||
/// VkInstance / VkPhysicalDevice / VkDevice / VkQueue — same
|
||||
/// ownership model as `OpenGL` above. Frames are handed back to
|
||||
/// the host as dmabuf file descriptors so the host can sample
|
||||
/// them without a CPU readback.
|
||||
///
|
||||
/// Handles are `?*anyopaque` here so callers don't need Vulkan
|
||||
/// headers to compile against the C API; treat them as VkInstance,
|
||||
/// VkPhysicalDevice, VkDevice, VkQueue respectively.
|
||||
pub const Vulkan = struct {
|
||||
userdata: ?*anyopaque,
|
||||
|
||||
/// Resolve `vkGetInstanceProcAddr` (returned as `?*anyopaque`).
|
||||
/// libghostty bootstraps the rest of the Vulkan loader from it.
|
||||
get_instance_proc_addr: *const fn (
|
||||
?*anyopaque,
|
||||
[*:0]const u8,
|
||||
) callconv(.c) ?*anyopaque,
|
||||
|
||||
/// Host-owned Vulkan handles. libghostty does not destroy
|
||||
/// these.
|
||||
instance: *const fn (?*anyopaque) callconv(.c) ?*anyopaque,
|
||||
physical_device: *const fn (?*anyopaque) callconv(.c) ?*anyopaque,
|
||||
device: *const fn (?*anyopaque) callconv(.c) ?*anyopaque,
|
||||
queue: *const fn (?*anyopaque) callconv(.c) ?*anyopaque,
|
||||
queue_family_index: *const fn (?*anyopaque) callconv(.c) u32,
|
||||
|
||||
/// Query the compositor-supported DRM modifiers for a given
|
||||
/// DRM_FORMAT_* fourcc. Two-pass usage: call with
|
||||
/// `out=null, capacity=0` for the count, then again with a
|
||||
/// buffer of that size. Returns the number of modifiers
|
||||
/// actually written. The renderer intersects this with the
|
||||
/// GPU's per-modifier feature set to pick a tiling the
|
||||
/// compositor will accept on attach.
|
||||
get_supported_modifiers: *const fn (
|
||||
?*anyopaque,
|
||||
u32, // DRM_FORMAT_*
|
||||
?[*]u64, // out
|
||||
usize, // capacity
|
||||
) callconv(.c) usize,
|
||||
|
||||
/// Hand off a rendered frame to the host as a dmabuf fd. The
|
||||
/// host imports it for composition; libghostty retains
|
||||
/// ownership of the underlying VkDeviceMemory and the fd is
|
||||
/// valid only for the duration of the call (host must `dup()`
|
||||
/// if it needs to hold the fd longer). `image_backed` tells
|
||||
/// the host whether the fd was exported from a VkImage
|
||||
/// (directly importable as a 2D image via linux-dmabuf-v1)
|
||||
/// or from a VkBuffer (only usable via mmap + CPU readback);
|
||||
/// see `vulkan/Target.zig` and `include/ghostty.h` for the
|
||||
/// full rationale.
|
||||
present: *const fn (
|
||||
?*anyopaque,
|
||||
i32, // dmabuf fd
|
||||
u32, // DRM_FORMAT_*
|
||||
u64, // DRM modifier
|
||||
u32, // width (pixels)
|
||||
u32, // height (pixels)
|
||||
u32, // stride (bytes)
|
||||
bool, // image_backed
|
||||
) callconv(.c) void,
|
||||
};
|
||||
|
||||
// The C ABI compatible version of this union. The tag is expected
|
||||
// to be stored elsewhere.
|
||||
pub const C = extern union {
|
||||
|
|
@ -416,6 +481,35 @@ pub const Platform = union(PlatformTag) {
|
|||
release_current: ?*const fn (?*anyopaque) callconv(.c) void,
|
||||
present: ?*const fn (?*anyopaque) callconv(.c) void,
|
||||
},
|
||||
|
||||
vulkan: extern struct {
|
||||
userdata: ?*anyopaque,
|
||||
get_instance_proc_addr: ?*const fn (
|
||||
?*anyopaque,
|
||||
[*:0]const u8,
|
||||
) callconv(.c) ?*anyopaque,
|
||||
instance: ?*const fn (?*anyopaque) callconv(.c) ?*anyopaque,
|
||||
physical_device: ?*const fn (?*anyopaque) callconv(.c) ?*anyopaque,
|
||||
device: ?*const fn (?*anyopaque) callconv(.c) ?*anyopaque,
|
||||
queue: ?*const fn (?*anyopaque) callconv(.c) ?*anyopaque,
|
||||
queue_family_index: ?*const fn (?*anyopaque) callconv(.c) u32,
|
||||
get_supported_modifiers: ?*const fn (
|
||||
?*anyopaque,
|
||||
u32,
|
||||
?[*]u64,
|
||||
usize,
|
||||
) callconv(.c) usize,
|
||||
present: ?*const fn (
|
||||
?*anyopaque,
|
||||
i32,
|
||||
u32,
|
||||
u64,
|
||||
u32,
|
||||
u32,
|
||||
u32,
|
||||
bool,
|
||||
) callconv(.c) void,
|
||||
},
|
||||
};
|
||||
|
||||
/// Initialize a Platform a tag and configuration from the C ABI.
|
||||
|
|
@ -450,6 +544,47 @@ pub const Platform = union(PlatformTag) {
|
|||
break :opengl error.PresentMustBeSet,
|
||||
} };
|
||||
},
|
||||
|
||||
.vulkan => vulkan: {
|
||||
const config = c_platform.vulkan;
|
||||
// Collapse the eight per-callback "MustBeSet"
|
||||
// variants into a single `error.MissingVulkanCallback`.
|
||||
// Pre-this, every caller of `Platform.init` had to
|
||||
// handle 8 separate error tags (or `try` swallow
|
||||
// them) — eight names that all mean "the host
|
||||
// didn't fill out one of these fields." Log which
|
||||
// one was null for diagnostics; the error tag
|
||||
// itself stays narrow.
|
||||
const which: ?[]const u8 = blk: {
|
||||
if (config.get_instance_proc_addr == null) break :blk "get_instance_proc_addr";
|
||||
if (config.instance == null) break :blk "instance";
|
||||
if (config.physical_device == null) break :blk "physical_device";
|
||||
if (config.device == null) break :blk "device";
|
||||
if (config.queue == null) break :blk "queue";
|
||||
if (config.queue_family_index == null) break :blk "queue_family_index";
|
||||
if (config.get_supported_modifiers == null) break :blk "get_supported_modifiers";
|
||||
if (config.present == null) break :blk "present";
|
||||
break :blk null;
|
||||
};
|
||||
if (which) |name| {
|
||||
std.log.scoped(.embedded).err(
|
||||
"ghostty_platform_vulkan_s.{s} is null",
|
||||
.{name},
|
||||
);
|
||||
break :vulkan error.MissingVulkanCallback;
|
||||
}
|
||||
break :vulkan .{ .vulkan = .{
|
||||
.userdata = config.userdata,
|
||||
.get_instance_proc_addr = config.get_instance_proc_addr.?,
|
||||
.instance = config.instance.?,
|
||||
.physical_device = config.physical_device.?,
|
||||
.device = config.device.?,
|
||||
.queue = config.queue.?,
|
||||
.queue_family_index = config.queue_family_index.?,
|
||||
.get_supported_modifiers = config.get_supported_modifiers.?,
|
||||
.present = config.present.?,
|
||||
} };
|
||||
},
|
||||
};
|
||||
}
|
||||
};
|
||||
|
|
@ -461,6 +596,8 @@ pub const PlatformTag = enum(c_int) {
|
|||
macos = 1,
|
||||
ios = 2,
|
||||
opengl = 3,
|
||||
// Fork-only platform tag for hosts that drive `src/renderer/Vulkan.zig`.
|
||||
vulkan = 4,
|
||||
};
|
||||
|
||||
pub const EnvVar = extern struct {
|
||||
|
|
@ -538,6 +675,25 @@ pub const Surface = struct {
|
|||
.x = @floatCast(opts.scale_factor),
|
||||
.y = @floatCast(opts.scale_factor),
|
||||
},
|
||||
// Initial surface size. Must be large enough for the
|
||||
// terminal to have at least a few cols/rows by default,
|
||||
// because the shell process is forked as part of
|
||||
// Surface.init and the PTY's winsize is whatever this
|
||||
// size translates to. Tools like fastfetch query winsize
|
||||
// (TIOCGWINSZ) on startup and lay out their kitty-image
|
||||
// escape codes based on what they see; if winsize reports
|
||||
// 0 cols × 0 rows, fastfetch sends the image with c=0
|
||||
// r=0, and `Placement.pixelSize` (graphics_storage.zig)
|
||||
// returns the image's NATIVE pixel dimensions — visible
|
||||
// to the user as a giant Kusanagi (or whatever logo)
|
||||
// filling the whole pane. 800×600 was the historic
|
||||
// default; restoring it. Race against a real wrong-size
|
||||
// first frame coinciding with the widget's device-pixel
|
||||
// size at a fractional DPR is handled separately by the
|
||||
// host apprt sending its real size as early as possible
|
||||
// (Qt: immediate ghostty_surface_set_size right after
|
||||
// ghostty_surface_new, inheriting the parent surface's
|
||||
// size for new tabs).
|
||||
.size = .{ .width = 800, .height = 600 },
|
||||
.cursor_pos = .{ .x = -1, .y = -1 },
|
||||
};
|
||||
|
|
|
|||
|
|
@ -688,6 +688,14 @@ pub const ExeEntrypoint = enum {
|
|||
webgen_config,
|
||||
webgen_actions,
|
||||
webgen_commands,
|
||||
/// Build-time tool: compiles one of the renderer's built-in
|
||||
/// GLSL shaders to SPIR-V and writes the bytes to stdout.
|
||||
/// Invoked by `src/build/VulkanSpv.zig` once per (shader, stage)
|
||||
/// pair so libghostty can `@embedFile` the resulting .spv
|
||||
/// instead of running glslang at runtime — eliminates the
|
||||
/// per-process TPoolAllocator high-water-mark leak (~10 MB)
|
||||
/// that the Vulkan path otherwise pays on first surface init.
|
||||
vulkan_spvgen,
|
||||
};
|
||||
|
||||
/// The release channel for the build.
|
||||
|
|
|
|||
|
|
@ -8,6 +8,7 @@ const HelpStrings = @import("HelpStrings.zig");
|
|||
const MetallibStep = @import("MetallibStep.zig");
|
||||
const UnicodeTables = @import("UnicodeTables.zig");
|
||||
const GhosttyFrameData = @import("GhosttyFrameData.zig");
|
||||
const VulkanSpv = @import("VulkanSpv.zig");
|
||||
const DistResource = @import("GhosttyDist.zig").Resource;
|
||||
|
||||
config: *const Config,
|
||||
|
|
@ -18,6 +19,9 @@ metallib: ?*MetallibStep,
|
|||
unicode_tables: UnicodeTables,
|
||||
framedata: GhosttyFrameData,
|
||||
uucode_tables: std.Build.LazyPath,
|
||||
/// Vulkan-only: build-time SPIR-V blobs for the renderer's
|
||||
/// built-in shaders. Null on non-Vulkan builds.
|
||||
vulkan_spv: ?VulkanSpv,
|
||||
|
||||
/// Used to keep track of a list of file sources.
|
||||
pub const LazyPathList = std.ArrayList(std.Build.LazyPath);
|
||||
|
|
@ -37,6 +41,15 @@ pub fn init(b: *std.Build, cfg: *const Config) !SharedDeps {
|
|||
.unicode_tables = try .init(b, uucode_tables),
|
||||
.framedata = try .init(b),
|
||||
.uucode_tables = uucode_tables,
|
||||
// Vulkan-only build artifact: precompiled SPV blobs for
|
||||
// the renderer's built-in shaders. Skipping the build
|
||||
// step entirely on non-Vulkan builds avoids paying for
|
||||
// a host-target glslang link the OpenGL/Metal renderers
|
||||
// would never use.
|
||||
.vulkan_spv = if (cfg.renderer == .vulkan)
|
||||
try VulkanSpv.init(b, cfg)
|
||||
else
|
||||
null,
|
||||
|
||||
// Setup by retarget
|
||||
.options = undefined,
|
||||
|
|
@ -452,6 +465,14 @@ pub fn add(
|
|||
if (b.lazyDependency("opengl", .{})) |dep| {
|
||||
step.root_module.addImport("opengl", dep.module("opengl"));
|
||||
}
|
||||
// The Vulkan binding is only loaded when the renderer is .vulkan
|
||||
// (still in development — see `src/renderer/Vulkan.zig`). Linking
|
||||
// libvulkan happens further down in `linkSystemDeps`.
|
||||
if (self.config.renderer == .vulkan) {
|
||||
if (b.lazyDependency("vulkan", .{})) |dep| {
|
||||
step.root_module.addImport("vulkan", dep.module("vulkan"));
|
||||
}
|
||||
}
|
||||
if (b.lazyDependency("vaxis", .{})) |dep| {
|
||||
step.root_module.addImport("vaxis", dep.module("vaxis"));
|
||||
}
|
||||
|
|
@ -600,6 +621,15 @@ pub fn add(
|
|||
});
|
||||
}
|
||||
|
||||
// Link the system Vulkan loader for the Vulkan renderer. The
|
||||
// bindings themselves are in `pkg/vulkan` (added above as a Zig
|
||||
// module). On Linux this resolves to libvulkan.so via the standard
|
||||
// dynamic linker; Vulkan headers (`vulkan/vulkan.h`) come from the
|
||||
// standard system include path (`vulkan-headers` package).
|
||||
if (self.config.renderer == .vulkan) {
|
||||
step.linkSystemLibrary2("vulkan", dynamic_link_opts);
|
||||
}
|
||||
|
||||
// If we're building an exe then we have additional dependencies.
|
||||
if (step.kind != .lib) {
|
||||
// When we're targeting flatpak we ALWAYS link GTK so we
|
||||
|
|
@ -615,6 +645,7 @@ pub fn add(
|
|||
self.help_strings.addImport(step);
|
||||
self.unicode_tables.addImport(step);
|
||||
self.framedata.addImport(step);
|
||||
if (self.vulkan_spv) |*v| v.addImport(step);
|
||||
|
||||
return static_libs;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,167 @@
|
|||
//! Build-time SPV precompile for the renderer's 9 built-in
|
||||
//! shaders. Builds a host-target executable from
|
||||
//! `src/vulkan_spvgen.zig` that takes (shader_name, stage) on
|
||||
//! argv and emits SPIR-V bytes on stdout, then runs it 9 times
|
||||
//! at build time and generates a `vulkan_spv.zig` module that
|
||||
//! exposes the resulting blobs as `pub const X: []const u8 =
|
||||
//! @embedFile("X.spv");` decls.
|
||||
//!
|
||||
//! Why: see `src/vulkan_spvgen.zig` for the leak/perf rationale.
|
||||
//! Pre-compiling built-ins at build time lets the runtime call
|
||||
//! `Module.initFromSpirv` instead of `Module.init`, skipping
|
||||
//! glslang entirely on the per-process first-surface init that
|
||||
//! otherwise hits glslang's TLS TPoolAllocator and leaves
|
||||
//! ~10 MB of un-releasable pool pages.
|
||||
//!
|
||||
//! Mirrors `HelpStrings.zig`'s structure. Conditional: only
|
||||
//! constructed when the build is targeting the Vulkan renderer
|
||||
//! (caller gates this).
|
||||
|
||||
const VulkanSpv = @This();
|
||||
|
||||
const std = @import("std");
|
||||
const Config = @import("Config.zig");
|
||||
|
||||
/// The (name, stage) tuples of the renderer's 9 built-in shaders.
|
||||
/// Keep in sync with the decls of `renderer.vulkan.shaders.source`
|
||||
/// and the corresponding `Module.init` call sites in
|
||||
/// `renderer/vulkan/shaders.zig::Shaders.init`.
|
||||
const Shader = struct { name: []const u8, stage: []const u8 };
|
||||
const shaders = [_]Shader{
|
||||
.{ .name = "bg_color_frag", .stage = "fragment" },
|
||||
.{ .name = "bg_image_frag", .stage = "fragment" },
|
||||
.{ .name = "bg_image_vert", .stage = "vertex" },
|
||||
.{ .name = "cell_bg_frag", .stage = "fragment" },
|
||||
.{ .name = "cell_text_frag", .stage = "fragment" },
|
||||
.{ .name = "cell_text_vert", .stage = "vertex" },
|
||||
.{ .name = "full_screen_vert", .stage = "vertex" },
|
||||
.{ .name = "image_frag", .stage = "fragment" },
|
||||
.{ .name = "image_vert", .stage = "vertex" },
|
||||
};
|
||||
|
||||
/// Host-target executable; built once, run 9 times.
|
||||
exe: *std.Build.Step.Compile,
|
||||
|
||||
/// LazyPath to the generated `vulkan_spv.zig` module.
|
||||
output: std.Build.LazyPath,
|
||||
|
||||
pub fn init(b: *std.Build, cfg: *const Config) !VulkanSpv {
|
||||
const exe = b.addExecutable(.{
|
||||
.name = "vulkan_spvgen",
|
||||
.root_module = b.createModule(.{
|
||||
// Through main.zig so the exe_entrypoint switch
|
||||
// resolves to vulkan_spvgen.zig. Matches the helpgen
|
||||
// pattern (also root_source_file=main.zig + the
|
||||
// entrypoint enum picks the actual main).
|
||||
.root_source_file = b.path("src/main.zig"),
|
||||
.target = b.graph.host,
|
||||
// ReleaseFast is required: Debug mode produces
|
||||
// R_X86_64_PC64 relocations when linking glslang's
|
||||
// large static library that Zig's bundled linker
|
||||
// can't handle. Release mode uses the small code
|
||||
// model + system linker.
|
||||
.optimize = .ReleaseFast,
|
||||
.strip = false,
|
||||
.omit_frame_pointer = false,
|
||||
.unwind_tables = .sync,
|
||||
}),
|
||||
});
|
||||
|
||||
// Pin the entrypoint via build_options.
|
||||
const spv_config = config: {
|
||||
var copy = cfg.*;
|
||||
copy.exe_entrypoint = .vulkan_spvgen;
|
||||
break :config copy;
|
||||
};
|
||||
const options = b.addOptions();
|
||||
try spv_config.addOptions(options);
|
||||
exe.root_module.addOptions("build_options", options);
|
||||
|
||||
// Transitive imports the gen tool needs (mirrors what
|
||||
// SharedDeps adds for the renderer build, but pinned to
|
||||
// b.graph.host since this exe runs on the build machine).
|
||||
if (b.lazyDependency("glslang", .{
|
||||
.target = b.graph.host,
|
||||
.optimize = .ReleaseFast,
|
||||
})) |glslang_dep| {
|
||||
exe.root_module.addImport("glslang", glslang_dep.module("glslang"));
|
||||
exe.linkLibrary(glslang_dep.artifact("glslang"));
|
||||
}
|
||||
// `vulkan` is a header-only Zig module — its build.zig only
|
||||
// calls `b.addModule(...)`, so it doesn't accept target /
|
||||
// optimize args.
|
||||
if (b.lazyDependency("vulkan", .{})) |vulkan_dep| {
|
||||
exe.root_module.addImport("vulkan", vulkan_dep.module("vulkan"));
|
||||
}
|
||||
|
||||
// Run the exe once per shader, capture stdout, drop the
|
||||
// resulting bytes into a single WriteFiles directory under
|
||||
// distinct .spv filenames. Also generate a .zig stub that
|
||||
// @embedFile()s each blob with a typed `[]const u8` decl
|
||||
// matching the shader name — that's what the renderer
|
||||
// imports as "vulkan_spv".
|
||||
var wf = b.addWriteFiles();
|
||||
var module_src: std.ArrayList(u8) = .empty;
|
||||
defer module_src.deinit(b.allocator);
|
||||
try module_src.appendSlice(b.allocator,
|
||||
\\// AUTO-GENERATED by src/build/VulkanSpv.zig — do not edit.
|
||||
\\// Re-run `zig build -Drenderer=vulkan` after editing any
|
||||
\\// of the renderer's built-in GLSL shaders.
|
||||
\\//
|
||||
\\// Each shader is exposed as `[]const u32` directly. The
|
||||
\\// underlying storage is a comptime-aligned u8 array
|
||||
\\// (`align(@alignOf(u32))`) so the bytesAsSlice cast is
|
||||
\\// safe — the previous `@alignCast` of an unaligned
|
||||
\\// @embedFile slice was UB and caused subtle SPIR-V
|
||||
\\// misinterpretation (images rendered at wrong size on
|
||||
\\// NVIDIA, which accepted the misaligned data and treated
|
||||
\\// it as a slightly different program). Module.initFromSpirv
|
||||
\\// takes []const u32 directly so callers can use these
|
||||
\\// decls without further casts.
|
||||
\\
|
||||
\\const std = @import("std");
|
||||
\\
|
||||
\\
|
||||
);
|
||||
for (shaders) |s| {
|
||||
const run = b.addRunArtifact(exe);
|
||||
run.addArgs(&.{ s.name, s.stage });
|
||||
const captured = run.captureStdOut();
|
||||
const file_name = b.fmt("{s}.spv", .{s.name});
|
||||
_ = wf.addCopyFile(captured, file_name);
|
||||
// Two declarations per shader:
|
||||
// - `<name>_raw` is the storage: a const array of u8
|
||||
// aligned to @alignOf(u32) (forces .rodata layout to
|
||||
// start on a 4-byte boundary, dereferences the
|
||||
// @embedFile pointer to put bytes inline).
|
||||
// - `<name>` is the public []const u32 view via
|
||||
// bytesAsSlice (which asserts the runtime pointer's
|
||||
// alignment matches the type's required alignment;
|
||||
// guaranteed by the align() on _raw).
|
||||
try module_src.writer(b.allocator).print(
|
||||
\\const {0s}_raw align(@alignOf(u32)) = @embedFile("{1s}").*;
|
||||
\\pub const {0s}: []const u32 = std.mem.bytesAsSlice(u32, {0s}_raw[0..]);
|
||||
\\
|
||||
,
|
||||
.{ s.name, file_name },
|
||||
);
|
||||
}
|
||||
const output = wf.add(
|
||||
"vulkan_spv.zig",
|
||||
try module_src.toOwnedSlice(b.allocator),
|
||||
);
|
||||
|
||||
return .{
|
||||
.exe = exe,
|
||||
.output = output,
|
||||
};
|
||||
}
|
||||
|
||||
/// Attach the generated `vulkan_spv` module to a step that
|
||||
/// builds libghostty (or anything else that needs the blobs).
|
||||
pub fn addImport(self: *const VulkanSpv, step: *std.Build.Step.Compile) void {
|
||||
self.output.addStepDependencies(&step.step);
|
||||
step.root_module.addAnonymousImport("vulkan_spv", .{
|
||||
.root_source_file = self.output,
|
||||
});
|
||||
}
|
||||
|
|
@ -10,6 +10,7 @@ const entrypoint = switch (build_config.exe_entrypoint) {
|
|||
.webgen_config => @import("build/webgen/main_config.zig"),
|
||||
.webgen_actions => @import("build/webgen/main_actions.zig"),
|
||||
.webgen_commands => @import("build/webgen/main_commands.zig"),
|
||||
.vulkan_spvgen => @import("vulkan_spvgen.zig"),
|
||||
};
|
||||
|
||||
/// The main entrypoint for the program.
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@ pub const Backend = @import("renderer/backend.zig").Backend;
|
|||
pub const GenericRenderer = @import("renderer/generic.zig").Renderer;
|
||||
pub const Metal = @import("renderer/Metal.zig");
|
||||
pub const OpenGL = @import("renderer/OpenGL.zig");
|
||||
pub const Vulkan = @import("renderer/Vulkan.zig");
|
||||
pub const WebGL = @import("renderer/WebGL.zig");
|
||||
pub const Options = @import("renderer/Options.zig");
|
||||
pub const Overlay = @import("renderer/Overlay.zig");
|
||||
|
|
@ -39,6 +40,7 @@ pub const Renderer = switch (build_config.renderer) {
|
|||
.metal => GenericRenderer(Metal),
|
||||
.opengl => GenericRenderer(OpenGL),
|
||||
.webgl => WebGL,
|
||||
.vulkan => GenericRenderer(Vulkan),
|
||||
};
|
||||
|
||||
/// The health status of a renderer. These must be shared across all
|
||||
|
|
|
|||
|
|
@ -100,9 +100,10 @@ pub fn init(alloc: Allocator, opts: rendererpkg.Options) !Metal {
|
|||
.macos => |v| v.nsview,
|
||||
.ios => |v| v.uiview,
|
||||
|
||||
// The OpenGL platform is only valid with the OpenGL
|
||||
// renderer; it cannot provide a view for Metal.
|
||||
.opengl => return error.UnsupportedPlatform,
|
||||
// The OpenGL / Vulkan platforms are only valid with
|
||||
// their respective renderers; neither provides a view
|
||||
// for Metal.
|
||||
.opengl, .vulkan => return error.UnsupportedPlatform,
|
||||
},
|
||||
},
|
||||
|
||||
|
|
@ -199,12 +200,29 @@ pub fn drawFrameEnd(self: *Metal) void {
|
|||
pub fn initShaders(
|
||||
self: *const Metal,
|
||||
alloc: Allocator,
|
||||
custom_shaders: []const [:0]const u8,
|
||||
custom_shaders: []const []const u8,
|
||||
) !shaders.Shaders {
|
||||
// `shadertoy.loadFromFiles` returns `[]const []const u8` (a unified
|
||||
// type so the SPV-target Vulkan path can share the loader); for
|
||||
// `.msl` the underlying allocation IS null-terminated
|
||||
// (`shadertoy.mslFromSpv` returns `[:0]const u8` and writes a
|
||||
// sentinel one past `.len`). Reattach the sentinel for our
|
||||
// downstream `Shaders.init` which expects `[:0]const u8`.
|
||||
// Same pattern as `OpenGL.initShaders`.
|
||||
const z_shaders = try alloc.alloc([:0]const u8, custom_shaders.len);
|
||||
defer alloc.free(z_shaders);
|
||||
for (custom_shaders, z_shaders) |bytes, *out| {
|
||||
// Sentinel guard: `@ptrCast` does NOT verify the sentinel,
|
||||
// so without this assert a future `loadFromFiles` change
|
||||
// that forgets the trailing null would surface as an
|
||||
// OOB read inside the Metal library compile.
|
||||
std.debug.assert(bytes.len == 0 or bytes.ptr[bytes.len] == 0);
|
||||
out.* = @ptrCast(bytes);
|
||||
}
|
||||
return try shaders.Shaders.init(
|
||||
alloc,
|
||||
self.device,
|
||||
custom_shaders,
|
||||
z_shaders,
|
||||
// Using an `*_srgb` pixel format makes Metal gamma encode
|
||||
// the pixels written to it *after* blending, which means
|
||||
// we get linear alpha blending rather than gamma-incorrect
|
||||
|
|
|
|||
|
|
@ -27,6 +27,11 @@ pub const custom_shader_target: shadertoy.Target = .glsl;
|
|||
// The fragCoord for OpenGL shaders is +Y = up.
|
||||
pub const custom_shader_y_is_down = false;
|
||||
|
||||
/// Custom shaders are supported (the renderer ships a working "post"
|
||||
/// pass that composites `CustomShaderState.back_texture` through the
|
||||
/// user's shader into `frame.target`).
|
||||
pub const supports_custom_shaders: bool = true;
|
||||
|
||||
/// Because OpenGL's frame completion is always
|
||||
/// sync, we have no need for multi-buffering.
|
||||
pub const swap_chain_count = 1;
|
||||
|
|
@ -211,8 +216,9 @@ pub fn surfaceInit(surface: *apprt.Surface) !void {
|
|||
try prepareContext(&gladHostLoader);
|
||||
},
|
||||
|
||||
// macOS and iOS use the Metal renderer.
|
||||
.macos, .ios => return error.UnsupportedPlatform,
|
||||
// macOS and iOS use the Metal renderer; the Vulkan platform
|
||||
// is only valid with the Vulkan renderer (currently a stub).
|
||||
.macos, .ios, .vulkan => return error.UnsupportedPlatform,
|
||||
},
|
||||
}
|
||||
|
||||
|
|
@ -295,12 +301,33 @@ pub fn drawFrameEnd(self: *OpenGL) void {
|
|||
pub fn initShaders(
|
||||
self: *const OpenGL,
|
||||
alloc: Allocator,
|
||||
custom_shaders: []const [:0]const u8,
|
||||
custom_shaders: []const []const u8,
|
||||
) !shaders.Shaders {
|
||||
_ = alloc;
|
||||
_ = self;
|
||||
// `shadertoy.loadFromFiles` returns `[]const []const u8` so the
|
||||
// SPV-target Vulkan path can share the loader, but for `.glsl`
|
||||
// the underlying allocation IS null-terminated
|
||||
// (`shadertoy.glslFromSpv` returns `[:0]const u8` and writes a
|
||||
// sentinel one past `.len`). Reattach the sentinel for our
|
||||
// downstream `Pipeline.init` calls that expect `[:0]const u8`.
|
||||
//
|
||||
// Use the caller-provided `alloc` (matches `Metal.initShaders`)
|
||||
// — this is a transient scratch slice torn down at function
|
||||
// exit.
|
||||
const z_shaders = try alloc.alloc([:0]const u8, custom_shaders.len);
|
||||
defer alloc.free(z_shaders);
|
||||
for (custom_shaders, z_shaders) |bytes, *out| {
|
||||
// Defense against a future `loadFromFiles` change that
|
||||
// forgets to null-terminate: assert the sentinel before we
|
||||
// pretend the slice is `[:0]const u8`. `@ptrCast` does NOT
|
||||
// verify the sentinel — without this assert, a missing
|
||||
// terminator surfaces as a downstream OOB read.
|
||||
std.debug.assert(bytes.len == 0 or bytes.ptr[bytes.len] == 0);
|
||||
out.* = @ptrCast(bytes);
|
||||
}
|
||||
return try shaders.Shaders.init(
|
||||
self.alloc,
|
||||
custom_shaders,
|
||||
alloc,
|
||||
z_shaders,
|
||||
);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -293,6 +293,18 @@ fn setQosClass(self: *const Thread) void {
|
|||
}
|
||||
|
||||
fn syncDrawTimer(self: *Thread) void {
|
||||
// Hidden surfaces have no business running the animation
|
||||
// draw timer — `drawFrame` would just early-return on the
|
||||
// `!flags.visible` check and we'd burn 125 wakeups/sec on
|
||||
// a no-op. With N background tabs each holding an animation
|
||||
// timer, this dominated CPU on multi-tab sessions. The
|
||||
// `.visible → true` mailbox handler re-runs `syncDrawTimer`
|
||||
// to re-arm when the tab becomes visible again.
|
||||
if (!self.flags.visible) {
|
||||
self.draw_active = false;
|
||||
return;
|
||||
}
|
||||
|
||||
skip: {
|
||||
// If our renderer supports animations and has them, then we
|
||||
// can apply draw timer based on custom shader animation configuration.
|
||||
|
|
@ -360,6 +372,12 @@ fn drainMailbox(self: *Thread) !void {
|
|||
// Visibility affects our QoS class
|
||||
self.setQosClass();
|
||||
|
||||
// Visibility also gates the animation draw timer
|
||||
// (see syncDrawTimer): hidden surfaces don't arm
|
||||
// the 125 FPS timer, visible ones do. Re-run on
|
||||
// every transition.
|
||||
self.syncDrawTimer();
|
||||
|
||||
// If we became visible then we immediately rebuild cells
|
||||
// (renderCallback skips updateFrame while invisible) and draw.
|
||||
if (v) {
|
||||
|
|
@ -623,8 +641,15 @@ fn renderCallback(
|
|||
) catch |err|
|
||||
log.warn("error rendering err={}", .{err});
|
||||
|
||||
// Draw
|
||||
t.drawFrame(false);
|
||||
// Draw. When the animation draw timer is already running
|
||||
// (custom-shader-animation engaged), it will pick up the
|
||||
// newly-updated cells at its next DRAW_INTERVAL tick — drawing
|
||||
// here too would double-up frames during animated-shader periods
|
||||
// and burn host-thread CPU (per-frame Wayland buffer attach +
|
||||
// commit on the Qt apprt) for no visible benefit. Without the
|
||||
// timer, wakeup-driven draws are the only way frames reach the
|
||||
// host, so we always draw in that case.
|
||||
if (!t.draw_active) t.drawFrame(false);
|
||||
|
||||
return .disarm;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,661 @@
|
|||
//! Vulkan graphics API for libghostty's `GenericRenderer`. Active
|
||||
//! on `-Drenderer=vulkan` builds; the host (e.g. the Qt frontend)
|
||||
//! supplies a VkInstance / VkDevice / VkQueue via the
|
||||
//! `ghostty_platform_vulkan_s` C ABI, libghostty drives all
|
||||
//! pipeline / image / command-buffer work against those handles,
|
||||
//! and rendered frames go back to the host as dmabuf fds for
|
||||
//! zero-copy compositing.
|
||||
//!
|
||||
//! Per-frame model: fence-paced submit-then-wait (one frame in
|
||||
//! flight), `Target` is the dmabuf-exportable render image,
|
||||
//! `Frame.complete` waits on the fence before handing the fd to
|
||||
//! the platform `present` callback.
|
||||
//!
|
||||
//! Submodules — pure Vulkan-API wrappers live in `pkg/vulkan/`
|
||||
//! (mirror of `pkg/opengl/`); renderer-policy modules live alongside
|
||||
//! this file under `vulkan/`.
|
||||
//!
|
||||
//! In `pkg/vulkan/` (re-exported from this file as
|
||||
//! `Vulkan.{Device,Sampler,CommandPool,DescriptorPool}`):
|
||||
//! - `Device.zig` — host-handle wrapper + dispatch table.
|
||||
//! - `Sampler.zig` — VkSampler.
|
||||
//! - `CommandPool.zig` — VkCommandPool + one-shot helper.
|
||||
//! - `DescriptorPool.zig`— per-frame descriptor pool.
|
||||
//!
|
||||
//! In `src/renderer/vulkan/`:
|
||||
//! - `Texture.zig` — VkImage + memory + view + staging upload.
|
||||
//! - `Target.zig` — dmabuf-exportable render target
|
||||
//! (direct or legacy_copy mode).
|
||||
//! - `buffer.zig` — Buffer(T) host-coherent.
|
||||
//! - `buffer_pool.zig` — cross-frame VkBuffer recycle pool
|
||||
//! (per-thread pending, shared ready).
|
||||
//! - `ThreadState.zig` — per-renderer-thread frame fence /
|
||||
//! command buffer / step pool / last-target.
|
||||
//! - `Pipeline.zig` — VkPipeline + layout (dynamic rendering).
|
||||
//! - `RenderPass.zig` — dynamic-rendering pass + step recorder.
|
||||
//! - `Frame.zig` — per-draw context (fence-paced).
|
||||
//! - `shaders.zig` — GLSL→SPIR-V→VkShaderModule + the
|
||||
//! OpenGL-GLSL → Vulkan-GLSL rewriter.
|
||||
|
||||
pub const Vulkan = @This();
|
||||
|
||||
const std = @import("std");
|
||||
const builtin = @import("builtin");
|
||||
const Allocator = std.mem.Allocator;
|
||||
const vulkan = @import("vulkan");
|
||||
const vk = vulkan.c;
|
||||
|
||||
const apprt = @import("../apprt.zig");
|
||||
const configpkg = @import("../config.zig");
|
||||
const font = @import("../font/main.zig");
|
||||
const rendererpkg = @import("../renderer.zig");
|
||||
const shadertoy = @import("shadertoy.zig");
|
||||
|
||||
pub const GraphicsAPI = Vulkan;
|
||||
// Device-dispatch primitives live in `pkg/vulkan/` so they can be
|
||||
// reused by anything that needs a typed Vulkan binding (mirrors how
|
||||
// `pkg/opengl/` houses Buffer/Program/Texture/etc.). The renderer
|
||||
// re-exports them from this top-level so call sites continue to write
|
||||
// `Vulkan.Device`, `Vulkan.Sampler`, etc.
|
||||
pub const Device = vulkan.Device;
|
||||
pub const Sampler = vulkan.Sampler;
|
||||
pub const CommandPool = vulkan.CommandPool;
|
||||
pub const DescriptorPool = vulkan.DescriptorPool;
|
||||
|
||||
// Renderer-policy primitives stay in `src/renderer/vulkan/` (dmabuf
|
||||
// export, our pipeline + render-pass wiring, frame fence pacing, the
|
||||
// GLSL→SPIR-V loader).
|
||||
pub const Texture = @import("vulkan/Texture.zig");
|
||||
pub const Target = @import("vulkan/Target.zig");
|
||||
pub const Pipeline = @import("vulkan/Pipeline.zig");
|
||||
pub const RenderPass = @import("vulkan/RenderPass.zig");
|
||||
pub const Frame = @import("vulkan/Frame.zig");
|
||||
pub const shaders = @import("vulkan/shaders.zig");
|
||||
|
||||
const bufferpkg = @import("vulkan/buffer.zig");
|
||||
pub const Buffer = bufferpkg.Buffer;
|
||||
|
||||
// ---- comptime contract --------------------------------------------------
|
||||
|
||||
/// Custom user shaders compile to SPIR-V directly — skip the
|
||||
/// GLSL → SPIR-V → GLSL roundtrip that `.glsl` would do. The
|
||||
/// roundtrip exists for backends that consume GLSL (OpenGL, Metal
|
||||
/// via MSL), but Vulkan ingests SPIR-V natively and we already have
|
||||
/// a glslang shim for the renderer's built-in shaders. Bypassing
|
||||
/// the roundtrip halves the per-shader compile cost AND avoids the
|
||||
/// spirv-cross-emitted main() losing the upstream `gl_FragCoord.xy`
|
||||
/// pattern we hook for the Y-flip fix.
|
||||
pub const custom_shader_target: shadertoy.Target = .spv;
|
||||
|
||||
/// Custom shaders ARE now supported on the Vulkan backend.
|
||||
/// `shaders.Shaders.init` builds one post pipeline per user shader
|
||||
/// (UBO at set 0 binding 1, iChannel0 sampler at set 1 binding 0,
|
||||
/// matching `shadertoy_prefix.glsl` after `vulkanizeGlsl` rewrites
|
||||
/// the layouts). The renderer's post pass at the end of `drawFrame`
|
||||
/// chains them — first pipeline samples `back_texture` and writes
|
||||
/// `front_texture`, swap, repeat; the last one writes
|
||||
/// `frame.target` instead.
|
||||
pub const supports_custom_shaders: bool = true;
|
||||
|
||||
/// Vulkan's clip-space Y axis points down (unlike OpenGL).
|
||||
pub const custom_shader_y_is_down = true;
|
||||
|
||||
/// Extra `#define` lines `shadertoy.loadFromFile` injects into the
|
||||
/// prefix between `#version` and the rest. `GHASTTY_VULKAN`
|
||||
/// activates the Vulkan-side `gl_FragCoord` flip + `texture()`
|
||||
/// upper-left wrap so `mainImage` sees shadertoy-convention coords
|
||||
/// even though Vulkan rasterizes Y-down. OpenGL/MSL backends omit
|
||||
/// this decl entirely and pass `&.{}` from `generic.zig`.
|
||||
pub const custom_shader_extra_defines: []const []const u8 = &.{"GHASTTY_VULKAN 1"};
|
||||
|
||||
/// GLSL → GLSL rewriter `shadertoy.loadFromFile` runs after the
|
||||
/// prefix splice and before the SPIR-V compile. Plugs the
|
||||
/// `vulkanizeGlsl` pass that rewrites `layout(binding = N)` into
|
||||
/// `layout(set = S, binding = N)` so the resulting SPIR-V matches
|
||||
/// the renderer's multi-set descriptor layout. Without this, the
|
||||
/// shader's `iChannel0` lands at set 0 binding 0 while the post
|
||||
/// pipeline binds it at set 1 binding 0 → sampler returns garbage.
|
||||
pub const rewriteCustomShaderSource = shaders.vulkanizeGlsl;
|
||||
|
||||
/// Single-buffered for v1; fence-paced submit-then-wait means there's
|
||||
/// only ever one frame in flight.
|
||||
pub const swap_chain_count = 1;
|
||||
|
||||
const log = std.log.scoped(.vulkan);
|
||||
|
||||
// ---- per-surface state --------------------------------------------------
|
||||
|
||||
alloc: Allocator,
|
||||
blending: configpkg.Config.AlphaBlending,
|
||||
rt_surface: *apprt.Surface,
|
||||
|
||||
/// Process-wide Vulkan device. The host owns one VkDevice shared
|
||||
/// across every surface, so we mirror that as a single global slot
|
||||
/// (not threadlocal — the renderer thread is distinct from the main
|
||||
/// thread that constructs the surface, and threadlocal doesn't
|
||||
/// survive that boundary).
|
||||
///
|
||||
/// Initialized in `Vulkan.init` on the surface-construction thread;
|
||||
/// read by every other thread via `devicePtr` after that. The renderer
|
||||
/// holds `*const Vulkan` from `generic.zig` so we can't mutate fields
|
||||
/// on the value — same reason OpenGL uses a `threadlocal var gl_host`
|
||||
/// (though OpenGL gets away with threadlocal because the OpenGL
|
||||
/// platform callbacks are read on the same thread that set them).
|
||||
var device: ?Device = null;
|
||||
|
||||
/// Refcount of live `Vulkan` renderer instances that share `device`.
|
||||
/// Each `init` increments; each `deinit` decrements. The device is
|
||||
/// only torn down when the count returns to 0, so closing one tab
|
||||
/// (or one split) doesn't yank the VkDevice out from under the
|
||||
/// surfaces still running in other tabs. Process-wide (matches
|
||||
/// `device`'s scope). Mutated under `device_mutex` because
|
||||
/// surfaces' renderer threads run independently and may init/deinit
|
||||
/// concurrently.
|
||||
var device_refcount: usize = 0;
|
||||
var device_mutex: std.Thread.Mutex = .{};
|
||||
|
||||
/// Cross-frame buffer recycle pool. See `vulkan/buffer_pool.zig`
|
||||
/// for the full lifecycle / multi-thread contract. Re-exported so
|
||||
/// existing callers (`Vulkan.buffer_pool.cycle` etc.) keep working
|
||||
/// unchanged.
|
||||
pub const buffer_pool = @import("vulkan/buffer_pool.zig");
|
||||
|
||||
/// Per-renderer-thread state (frame command buffer, fence, descriptor
|
||||
/// pool, last-target pointer). See `vulkan/ThreadState.zig` for the
|
||||
/// lifecycle.
|
||||
const ThreadState = @import("vulkan/ThreadState.zig");
|
||||
|
||||
// ---- lifecycle ----------------------------------------------------------
|
||||
|
||||
pub fn init(alloc: Allocator, opts: rendererpkg.Options) !Vulkan {
|
||||
// Vulkan needs the device populated before the renderer's
|
||||
// `FrameState.init` starts asking for buffer/texture options.
|
||||
// Process-wide (not threadlocal): the renderer thread is
|
||||
// distinct from the main thread that constructs the surface.
|
||||
device_mutex.lock();
|
||||
defer device_mutex.unlock();
|
||||
if (device == null) {
|
||||
switch (apprt.runtime) {
|
||||
// The Vulkan renderer is embedded-only by design: the
|
||||
// host owns the VkInstance/Device/Queue and hands them
|
||||
// to libghostty via `ghostty_platform_vulkan_s`. There
|
||||
// is no Vulkan path through the GTK apprt and never
|
||||
// will be from this side. Compile-error any other
|
||||
// runtime so a misconfigured `-Drenderer=vulkan
|
||||
// -Dapp-runtime=gtk` build fails loudly at compile time
|
||||
// instead of crashing at first surface init. Mirrors
|
||||
// OpenGL.zig's `@compileError("unsupported app
|
||||
// runtime for OpenGL")` pattern.
|
||||
else => @compileError("unsupported app runtime for Vulkan (embedded-only)"),
|
||||
apprt.embedded => switch (opts.rt_surface.platform) {
|
||||
.vulkan => |platform| {
|
||||
device = try Device.init(alloc, try bootstrapFromPlatform(platform));
|
||||
log.info(
|
||||
"Vulkan device ready (api=0x{x})",
|
||||
.{device.?.api_version},
|
||||
);
|
||||
},
|
||||
// The Platform union is decided at host-call time
|
||||
// (the C ABI lets the host pick), so this arm
|
||||
// really is a runtime check — the host plugged us
|
||||
// into a non-Vulkan surface.
|
||||
.opengl, .macos, .ios => return error.UnsupportedPlatform,
|
||||
},
|
||||
}
|
||||
}
|
||||
device_refcount += 1;
|
||||
return .{
|
||||
.alloc = alloc,
|
||||
.blending = opts.config.blending,
|
||||
.rt_surface = opts.rt_surface,
|
||||
};
|
||||
}
|
||||
|
||||
pub fn deinit(self: *Vulkan) void {
|
||||
// ThreadState.cleanup is NOT called here — it runs in
|
||||
// `threadExit` on the renderer thread, which is where the
|
||||
// `threadlocal var` state was populated. Calling it here would
|
||||
// read the GUI thread's empty TLS and silently leak everything.
|
||||
// See the comment in `threadExit` for the full rationale.
|
||||
|
||||
// Decrement the shared-device refcount; only the last surface
|
||||
// to deinit gets to destroy the VkDevice. Closing one of N tabs
|
||||
// must NOT pull the device out from under the others — that
|
||||
// crashes (or invisibly silences) every other surface's
|
||||
// renderer thread.
|
||||
{
|
||||
device_mutex.lock();
|
||||
defer device_mutex.unlock();
|
||||
// Refcount-underflow guard. Was `std.debug.assert(refcount > 0)`,
|
||||
// but assertions compile out in ReleaseFast / ReleaseSmall — a
|
||||
// double-deinit would silently underflow the unsigned counter
|
||||
// to a huge value, blocking the device tear-down forever (the
|
||||
// refcount==0 branch below would never trigger). Hard-log
|
||||
// even in release: a stale deinit is a contract violation
|
||||
// we'd rather surface than mask. We still poison `self` at
|
||||
// function exit so the caller sees consistent UB on either
|
||||
// path.
|
||||
if (device_refcount == 0) {
|
||||
log.err("Vulkan.deinit: refcount underflow — double-deinit?", .{});
|
||||
} else {
|
||||
device_refcount -= 1;
|
||||
if (device_refcount == 0) {
|
||||
// Last surface: NOW we can safely drain the shared
|
||||
// `ready` list of the buffer pool and tear the device
|
||||
// down. The waitIdle is needed because non-final
|
||||
// deinits skipped it. Each surface's deinit already
|
||||
// drained its own per-thread `pending` (via
|
||||
// buffer_pool.drainSelf above), so this path only
|
||||
// needs to handle the cross-thread `ready`.
|
||||
if (device) |*d| {
|
||||
d.waitIdle();
|
||||
buffer_pool.drainShared(d);
|
||||
d.deinit();
|
||||
}
|
||||
device = null;
|
||||
}
|
||||
}
|
||||
}
|
||||
self.* = undefined;
|
||||
}
|
||||
|
||||
/// Early per-surface setup hook. No-op for Vulkan: the host
|
||||
/// hasn't finished installing the platform callbacks at this
|
||||
/// point, so all device wiring waits until `Vulkan.init` (which
|
||||
/// runs after the platform is plumbed through `opts`).
|
||||
pub fn surfaceInit(surface: *apprt.Surface) !void {
|
||||
_ = surface;
|
||||
}
|
||||
|
||||
/// Main-thread setup just before the renderer thread spins up.
|
||||
/// No-op: device construction happens in `Vulkan.init` (the
|
||||
/// renderer's FrameState init path calls option getters before
|
||||
/// `threadEnter`, and those getters need the device — so it has
|
||||
/// to be ready earlier than OpenGL needs it to be).
|
||||
pub fn finalizeSurfaceInit(self: *const Vulkan, surface: *apprt.Surface) !void {
|
||||
_ = self;
|
||||
_ = surface;
|
||||
}
|
||||
|
||||
pub fn threadEnter(self: *const Vulkan, surface: *apprt.Surface) !void {
|
||||
_ = self;
|
||||
_ = surface;
|
||||
// No-op: device is brought up in `init` (the renderer's
|
||||
// FrameState init path calls option getters before threadEnter
|
||||
// and those need the device). Decl kept so
|
||||
// `@hasDecl(GraphicsAPI, "threadEnter")` still resolves true in
|
||||
// `generic.zig`.
|
||||
}
|
||||
|
||||
pub fn threadExit(self: *const Vulkan) void {
|
||||
_ = self;
|
||||
if (device) |*d| {
|
||||
// ThreadState.cleanup MUST run here, on the renderer thread,
|
||||
// not in Vulkan.deinit (which runs on the GUI thread AFTER
|
||||
// the renderer thread has joined — see Surface.deinit). Our
|
||||
// per-thread Vulkan state lives in `threadlocal var` slots
|
||||
// populated on this thread; calling cleanup from the GUI
|
||||
// thread reads the GUI thread's empty TLS, the destroys
|
||||
// no-op, and the per-tab DescriptorPool / VkCommandBuffer /
|
||||
// VkFence + buffer_pool pending list leak forever. heaptrack
|
||||
// on a 20-tab open+close session attributed ~6 MB / 42 calls
|
||||
// of NVIDIA driver-internal state to exactly this:
|
||||
// DescriptorPool.init → ThreadState.ensureInit pages that
|
||||
// nothing ever released.
|
||||
//
|
||||
// Cleanup needs the device alive: refcount stays > 0 until
|
||||
// Vulkan.deinit decrements it on the GUI thread, so the
|
||||
// shared VkDevice is still valid here.
|
||||
ThreadState.cleanup(d);
|
||||
// waitIdle was the pre-fix behavior — keep it as belt-and-
|
||||
// suspenders for any non-ThreadState in-flight work this
|
||||
// thread may have submitted via the shared queue.
|
||||
d.waitIdle();
|
||||
}
|
||||
}
|
||||
|
||||
pub fn displayRealized(self: *Vulkan) void {
|
||||
_ = self;
|
||||
}
|
||||
|
||||
pub fn displayUnrealized(self: *Vulkan) void {
|
||||
_ = self;
|
||||
}
|
||||
|
||||
pub fn drawFrameStart(self: *Vulkan) void {
|
||||
_ = self;
|
||||
}
|
||||
|
||||
pub fn drawFrameEnd(self: *Vulkan) void {
|
||||
_ = self;
|
||||
}
|
||||
|
||||
pub fn initShaders(
|
||||
self: *const Vulkan,
|
||||
alloc: Allocator,
|
||||
/// For Vulkan these are SPIR-V binaries (loaded with
|
||||
/// `shadertoy.Target = .spv`), not GLSL strings — see
|
||||
/// `custom_shader_target` above.
|
||||
custom_shaders: []const []const u8,
|
||||
) !shaders.Shaders {
|
||||
_ = self;
|
||||
return try shaders.Shaders.init(alloc, devicePtr(), custom_shaders);
|
||||
}
|
||||
|
||||
pub fn initTarget(self: *const Vulkan, width: usize, height: usize) !Target {
|
||||
// SRGB format so the hardware gamma-encodes the linear premultiplied
|
||||
// shader output at framebuffer-write time. The renderer's shaders
|
||||
// produce linear premultiplied alpha; without an sRGB format the
|
||||
// bytes in memory would be linear and Qt (which expects sRGB
|
||||
// premultiplied) would render them as if they were already gamma
|
||||
// encoded — colors would look way too dark. The DRM fourcc the
|
||||
// host sees is still ARGB8888; SRGB encoding is a Vulkan-side
|
||||
// concern only.
|
||||
//
|
||||
// Per-surface platform: pulled from rt_surface so the `present`
|
||||
// callback's `userdata` points at THIS surface's window. Splits
|
||||
// and tabs share the process-wide Device but each owns its own
|
||||
// platform copy — without per-surface routing here, all dmabuf
|
||||
// frames would funnel through whichever surface initialized the
|
||||
// device first.
|
||||
const platform = surfacePlatform(self.rt_surface) orelse
|
||||
return error.UnsupportedPlatform;
|
||||
return try Target.init(.{
|
||||
.device = devicePtr(),
|
||||
.format = vk.VK_FORMAT_B8G8R8A8_SRGB,
|
||||
.width = @intCast(width),
|
||||
.height = @intCast(height),
|
||||
.platform = platform,
|
||||
});
|
||||
}
|
||||
|
||||
/// Translate the apprt's `Platform.Vulkan` callback struct into the
|
||||
/// neutral `Device.HostBootstrap` the binding expects. Resolves the
|
||||
/// host's handles + the root proc-addr resolver up-front so the
|
||||
/// binding stays free of any apprt type. Any null host handle ->
|
||||
/// `error.HostHandleMissing`.
|
||||
fn bootstrapFromPlatform(
|
||||
platform: apprt.embedded.Platform.Vulkan,
|
||||
) Device.Error!Device.HostBootstrap {
|
||||
const instance_handle = platform.instance(platform.userdata) orelse
|
||||
return error.HostHandleMissing;
|
||||
const physical_device_handle = platform.physical_device(platform.userdata) orelse
|
||||
return error.HostHandleMissing;
|
||||
const device_handle = platform.device(platform.userdata) orelse
|
||||
return error.HostHandleMissing;
|
||||
const queue_handle = platform.queue(platform.userdata) orelse
|
||||
return error.HostHandleMissing;
|
||||
const get_instance_proc_addr_raw = platform.get_instance_proc_addr(
|
||||
platform.userdata,
|
||||
"vkGetInstanceProcAddr",
|
||||
) orelse return error.HostHandleMissing;
|
||||
|
||||
return .{
|
||||
.instance = @ptrCast(instance_handle),
|
||||
.physical_device = @ptrCast(physical_device_handle),
|
||||
.device = @ptrCast(device_handle),
|
||||
.queue = @ptrCast(queue_handle),
|
||||
.queue_family_index = platform.queue_family_index(platform.userdata),
|
||||
.get_instance_proc_addr_raw = get_instance_proc_addr_raw,
|
||||
};
|
||||
}
|
||||
|
||||
/// Extract the Vulkan platform callbacks from a surface, when the
|
||||
/// surface was created with the Vulkan platform tag. Returns null
|
||||
/// when the surface was tagged with a non-Vulkan platform — the
|
||||
/// caller is expected to reject the surface with
|
||||
/// `error.UnsupportedPlatform`. (`Vulkan.init` already does the same
|
||||
/// reject up-front, so reaching this function with a non-Vulkan
|
||||
/// platform implies a surface plumbed through after that gate.)
|
||||
fn surfacePlatform(rt_surface: *apprt.Surface) ?apprt.embedded.Platform.Vulkan {
|
||||
// `init()` already gates non-embedded runtimes with a
|
||||
// `@compileError`, so reaching this function on anything other
|
||||
// than `apprt.embedded` is impossible. Direct embedded match
|
||||
// here keeps the function single-arm.
|
||||
if (apprt.runtime != apprt.embedded)
|
||||
@compileError("unsupported app runtime for Vulkan (embedded-only)");
|
||||
return switch (rt_surface.platform) {
|
||||
.vulkan => |p| p,
|
||||
else => null,
|
||||
};
|
||||
}
|
||||
|
||||
pub fn surfaceSize(self: *const Vulkan) !struct { width: u32, height: u32 } {
|
||||
const size = self.rt_surface.size;
|
||||
return .{ .width = size.width, .height = size.height };
|
||||
}
|
||||
|
||||
pub fn present(self: *Vulkan, target: *Target) !void {
|
||||
_ = self;
|
||||
// The target is already populated by the time we get here:
|
||||
// `Frame.complete` ended the command buffer, submitted with the
|
||||
// fence, and waited for the GPU to finish before returning. So
|
||||
// the dmabuf fd is safe to hand off.
|
||||
target.present();
|
||||
// Remember the target's address so `presentLastTarget` can
|
||||
// re-present it on no-op frames. We store the pointer — not a
|
||||
// value copy — so a subsequent `frame.resize` (which destroys
|
||||
// the old Target and overwrites the FrameState's slot with a
|
||||
// new one) is transparently followed. A value copy would leave
|
||||
// us holding a closed fd and freed VkImage handles.
|
||||
ThreadState.last_target = target;
|
||||
}
|
||||
|
||||
pub fn presentLastTarget(self: *Vulkan) !void {
|
||||
if (ThreadState.last_target) |t| try self.present(t);
|
||||
}
|
||||
|
||||
pub fn beginFrame(
|
||||
self: *const Vulkan,
|
||||
renderer: *rendererpkg.Renderer,
|
||||
target: *Target,
|
||||
) !Frame {
|
||||
_ = self;
|
||||
const dev = devicePtr();
|
||||
|
||||
// Lazy per-thread resource init (no-op after the first frame on
|
||||
// this thread). Sets up the command pool + buffer + fence +
|
||||
// descriptor pool that get reused for every subsequent frame.
|
||||
try ThreadState.ensureInit(dev);
|
||||
|
||||
// Reset this frame's per-frame state. The fence is the load-
|
||||
// bearing piece for tear-down correctness: any error path that
|
||||
// could leave the fence in an UNSIGNALED-with-no-pending-submit
|
||||
// state will hang the next `Vulkan.deinit` on
|
||||
// `waitForFences(UINT64_MAX)`.
|
||||
//
|
||||
// Defense: register the re-signal `errdefer` BEFORE the
|
||||
// `beginFrameReset` call (which is the one that calls
|
||||
// `vkResetFences`). If any reset fails, the errdefer fires
|
||||
// an empty submit with this fence as the signal target,
|
||||
// restoring the signaled state.
|
||||
errdefer {
|
||||
// Empty submit with this fence as the signal target is the
|
||||
// simplest portable way to push it back to signaled without
|
||||
// recording any commands. The fence in this errdefer can
|
||||
// be in any of three states:
|
||||
// 1. Reset by `beginFrameReset` (the failing path). The
|
||||
// empty submit signals it cleanly.
|
||||
// 2. Still in its prior-frame state (the resetFences call
|
||||
// failed — spec says the fence is in an undefined
|
||||
// state). The empty submit re-signals once any prior
|
||||
// pending submit on the queue retires; queueSubmit
|
||||
// spec semantics guarantee the fence is signaled
|
||||
// after all earlier submits complete.
|
||||
// 3. Driver-lost on DEVICE_LOST. queueSubmit returns
|
||||
// DEVICE_LOST too; we fall back to deviceWaitIdle.
|
||||
// The fallback `vkDeviceWaitIdle` is the actual safety net
|
||||
// — without one of those signaling paths succeeding, the
|
||||
// next `Vulkan.deinit` hangs on `waitForFences(UINT64_MAX)`.
|
||||
const empty: vk.VkSubmitInfo = .{
|
||||
.sType = vk.VK_STRUCTURE_TYPE_SUBMIT_INFO,
|
||||
.pNext = null,
|
||||
.waitSemaphoreCount = 0,
|
||||
.pWaitSemaphores = null,
|
||||
.pWaitDstStageMask = null,
|
||||
.commandBufferCount = 0,
|
||||
.pCommandBuffers = null,
|
||||
.signalSemaphoreCount = 0,
|
||||
.pSignalSemaphores = null,
|
||||
};
|
||||
const sr = dev.queueSubmit(1, &empty, ThreadState.frame_fence);
|
||||
if (sr != vk.VK_SUCCESS) {
|
||||
log.warn(
|
||||
"beginFrame errdefer: empty queueSubmit failed " ++
|
||||
"(result={}); waiting device idle to ensure the fence " ++
|
||||
"doesn't hang the next deinit",
|
||||
.{sr},
|
||||
);
|
||||
_ = dev.dispatch.deviceWaitIdle(dev.device);
|
||||
}
|
||||
}
|
||||
try ThreadState.beginFrameReset(dev);
|
||||
|
||||
return try Frame.begin(
|
||||
.{
|
||||
.cb = ThreadState.frame_cb,
|
||||
.fence = ThreadState.frame_fence,
|
||||
.step_pool = if (ThreadState.step_pool) |*p| p else null,
|
||||
},
|
||||
dev,
|
||||
renderer,
|
||||
target,
|
||||
);
|
||||
}
|
||||
|
||||
// ---- buffer / texture / sampler option getters --------------------------
|
||||
//
|
||||
// `GenericRenderer` calls these without knowing or caring about Vulkan
|
||||
// specifics; the returned `Options` structs are what each backend's
|
||||
// resource wrapper expects to be passed back to its `init`. The
|
||||
// Vulkan-flavored ones embed a `*const Device` reference plus
|
||||
// backend-specific usage flags.
|
||||
|
||||
inline fn devicePtr() *const Device {
|
||||
// Indirected through a getter so future refactors (e.g. allocating
|
||||
// `Device` on the heap) don't ripple. Today the device is a
|
||||
// process-wide `?Device` populated in `Vulkan.init` BEFORE the
|
||||
// renderer's `FrameState.init` calls any of the option getters.
|
||||
// A null here means the device construction failed AND someone
|
||||
// called an option getter anyway — a programming error, not a
|
||||
// runtime condition we can recover from.
|
||||
return &(device orelse {
|
||||
@panic("Vulkan.devicePtr: device not initialized — option getter called before Vulkan.init succeeded");
|
||||
});
|
||||
}
|
||||
|
||||
/// Default buffer options. Vulkan needs an explicit usage bitmask;
|
||||
/// callers that want a specific kind override via the per-kind getters
|
||||
/// below. (Self is unused — the device comes from the threadlocal.)
|
||||
pub fn bufferOptions(_: *const Vulkan) bufferpkg.Options {
|
||||
return .{
|
||||
.device = devicePtr(),
|
||||
.usage = vk.VK_BUFFER_USAGE_VERTEX_BUFFER_BIT,
|
||||
};
|
||||
}
|
||||
|
||||
pub fn instanceBufferOptions(_: *const Vulkan) bufferpkg.Options {
|
||||
return .{
|
||||
.device = devicePtr(),
|
||||
.usage = vk.VK_BUFFER_USAGE_VERTEX_BUFFER_BIT,
|
||||
};
|
||||
}
|
||||
|
||||
pub fn uniformBufferOptions(_: *const Vulkan) bufferpkg.Options {
|
||||
return .{
|
||||
.device = devicePtr(),
|
||||
.usage = vk.VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT,
|
||||
};
|
||||
}
|
||||
|
||||
pub fn fgBufferOptions(self: *const Vulkan) bufferpkg.Options {
|
||||
return self.instanceBufferOptions();
|
||||
}
|
||||
|
||||
pub fn bgBufferOptions(_: *const Vulkan) bufferpkg.Options {
|
||||
// The bg cells buffer is consumed as a STORAGE BUFFER by the
|
||||
// cell_bg fragment shader (binding `bg_cells`) and the cell_text
|
||||
// vertex shader (same binding). The OpenGL backend doesn't
|
||||
// distinguish — every buffer is reusable across roles — but
|
||||
// Vulkan validates usage flags at descriptor-write time.
|
||||
return .{
|
||||
.device = devicePtr(),
|
||||
.usage = vk.VK_BUFFER_USAGE_STORAGE_BUFFER_BIT,
|
||||
};
|
||||
}
|
||||
|
||||
pub fn imageBufferOptions(self: *const Vulkan) bufferpkg.Options {
|
||||
return self.instanceBufferOptions();
|
||||
}
|
||||
|
||||
pub fn bgImageBufferOptions(self: *const Vulkan) bufferpkg.Options {
|
||||
return self.instanceBufferOptions();
|
||||
}
|
||||
|
||||
pub fn textureOptions(_: *const Vulkan) Texture.Options {
|
||||
// The renderer uses `textureOptions()`-shaped textures both for
|
||||
// glyph atlases (sampled-only) AND for the custom-shader
|
||||
// back_texture (which is BOTH sampled AND a render target).
|
||||
// We hand back the wider usage set so both work. The format
|
||||
// matches the renderer's `initTarget` choice
|
||||
// (`B8G8R8A8_SRGB`) so a render → sample → render chain
|
||||
// through the custom-shader pass keeps the same color format.
|
||||
return .{
|
||||
.device = devicePtr(),
|
||||
.format = vk.VK_FORMAT_B8G8R8A8_SRGB,
|
||||
.usage = vk.VK_IMAGE_USAGE_SAMPLED_BIT |
|
||||
vk.VK_IMAGE_USAGE_TRANSFER_DST_BIT |
|
||||
vk.VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT,
|
||||
};
|
||||
}
|
||||
|
||||
pub fn samplerOptions(_: *const Vulkan) Sampler.Options {
|
||||
return .{
|
||||
.device = devicePtr(),
|
||||
.min_filter = .linear,
|
||||
.mag_filter = .linear,
|
||||
.wrap_s = .clamp_to_edge,
|
||||
.wrap_t = .clamp_to_edge,
|
||||
};
|
||||
}
|
||||
|
||||
/// Re-export so callers can write `Vulkan.ImageTextureFormat` —
|
||||
/// matches the `OpenGL.ImageTextureFormat` shape on the OpenGL side.
|
||||
/// Definition lives in `vulkan/Texture.zig` next to `Texture`
|
||||
/// itself.
|
||||
pub const ImageTextureFormat = Texture.ImageTextureFormat;
|
||||
|
||||
pub fn imageTextureOptions(
|
||||
_: *const Vulkan,
|
||||
format: ImageTextureFormat,
|
||||
srgb: bool,
|
||||
) Texture.Options {
|
||||
return .{
|
||||
.device = devicePtr(),
|
||||
.format = format.toVk(srgb),
|
||||
.usage = vk.VK_IMAGE_USAGE_SAMPLED_BIT |
|
||||
vk.VK_IMAGE_USAGE_TRANSFER_DST_BIT,
|
||||
};
|
||||
}
|
||||
|
||||
pub fn initAtlasTexture(
|
||||
_: *const Vulkan,
|
||||
atlas: *const font.Atlas,
|
||||
) !Texture {
|
||||
const fmt: vk.VkFormat = switch (atlas.format) {
|
||||
.grayscale => vk.VK_FORMAT_R8_UNORM,
|
||||
.bgra => vk.VK_FORMAT_B8G8R8A8_UNORM,
|
||||
else => return error.UnsupportedAtlasFormat,
|
||||
};
|
||||
return try Texture.init(
|
||||
.{
|
||||
.device = devicePtr(),
|
||||
.format = fmt,
|
||||
.usage = vk.VK_IMAGE_USAGE_SAMPLED_BIT |
|
||||
vk.VK_IMAGE_USAGE_TRANSFER_DST_BIT,
|
||||
},
|
||||
atlas.size,
|
||||
atlas.size,
|
||||
null,
|
||||
);
|
||||
}
|
||||
|
|
@ -6,6 +6,11 @@ pub const Backend = enum {
|
|||
opengl,
|
||||
metal,
|
||||
webgl,
|
||||
/// Vulkan is on this fork only. Embedded-only — the host owns
|
||||
/// the VkInstance/Device/Queue and hands them in via
|
||||
/// `ghostty_platform_vulkan_s`; libghostty renders against
|
||||
/// those handles and exports the result as a dmabuf fd.
|
||||
vulkan,
|
||||
|
||||
pub fn default(
|
||||
target: std.Target,
|
||||
|
|
|
|||
|
|
@ -838,14 +838,52 @@ pub fn Renderer(comptime GraphicsAPI: type) type {
|
|||
defer arena.deinit();
|
||||
const arena_alloc = arena.allocator();
|
||||
|
||||
// Load our custom shaders
|
||||
const custom_shaders: []const [:0]const u8 = shadertoy.loadFromFiles(
|
||||
arena_alloc,
|
||||
self.config.custom_shaders,
|
||||
GraphicsAPI.custom_shader_target,
|
||||
) catch |err| err: {
|
||||
log.warn("error loading custom shaders err={}", .{err});
|
||||
break :err &.{};
|
||||
// Load our custom shaders.
|
||||
//
|
||||
// GraphicsAPI advertises whether it can actually run them
|
||||
// (`supports_custom_shaders`). The Vulkan backend currently
|
||||
// can't — its post-pass / compositor pipeline that wires
|
||||
// CustomShaderState.back_texture → frame.target through the
|
||||
// user's shader hasn't been built yet. Loading + flagging
|
||||
// `has_custom_shaders` anyway would route bg_color into the
|
||||
// back_texture and leave frame.target blank. Skip the load
|
||||
// when the backend can't consume the result, and emit a
|
||||
// one-line warning so the user knows their config item was
|
||||
// ignored.
|
||||
const can_use_custom = !@hasDecl(GraphicsAPI, "supports_custom_shaders") or
|
||||
GraphicsAPI.supports_custom_shaders;
|
||||
const custom_shaders: []const []const u8 = if (can_use_custom)
|
||||
(shadertoy.loadFromFiles(
|
||||
arena_alloc,
|
||||
self.config.custom_shaders,
|
||||
.{
|
||||
.target = GraphicsAPI.custom_shader_target,
|
||||
// Optional per-backend hooks. Resolved at
|
||||
// comptime via `@hasDecl`, so backends that
|
||||
// don't need them stay free of extra-define /
|
||||
// GLSL-rewrite logic.
|
||||
.extra_defines = if (@hasDecl(GraphicsAPI, "custom_shader_extra_defines"))
|
||||
GraphicsAPI.custom_shader_extra_defines
|
||||
else
|
||||
&.{},
|
||||
.rewrite = if (@hasDecl(GraphicsAPI, "rewriteCustomShaderSource"))
|
||||
GraphicsAPI.rewriteCustomShaderSource
|
||||
else
|
||||
null,
|
||||
},
|
||||
) catch |err| err: {
|
||||
log.warn("error loading custom shaders err={}", .{err});
|
||||
break :err &.{};
|
||||
})
|
||||
else custom: {
|
||||
if (self.config.custom_shaders.value.items.len > 0) {
|
||||
log.warn(
|
||||
"custom-shader config ignored: backend lacks " ++
|
||||
"post-pipeline support (Vulkan TODO)",
|
||||
.{},
|
||||
);
|
||||
}
|
||||
break :custom &.{};
|
||||
};
|
||||
|
||||
const has_custom_shaders = custom_shaders.len > 0;
|
||||
|
|
@ -1431,15 +1469,6 @@ pub fn Renderer(comptime GraphicsAPI: type) type {
|
|||
self: *Self,
|
||||
sync: bool,
|
||||
) !void {
|
||||
// const start = std.time.Instant.now() catch unreachable;
|
||||
// const start_micro = std.time.microTimestamp();
|
||||
// defer {
|
||||
// const end = std.time.Instant.now() catch unreachable;
|
||||
// log.warn(
|
||||
// "[drawFrame time] start_micro={} duration={}ns",
|
||||
// .{ start_micro, end.since(start) / std.time.ns_per_us },
|
||||
// );
|
||||
// }
|
||||
|
||||
// We hold a the draw mutex to prevent changes to any
|
||||
// data we access while we're in the middle of drawing.
|
||||
|
|
@ -1632,6 +1661,7 @@ pub fn Renderer(comptime GraphicsAPI: type) type {
|
|||
self.images.draw(
|
||||
&self.api,
|
||||
self.shaders.pipelines.image,
|
||||
frame.uniforms,
|
||||
&pass,
|
||||
.kitty_below_bg,
|
||||
);
|
||||
|
|
@ -1648,6 +1678,7 @@ pub fn Renderer(comptime GraphicsAPI: type) type {
|
|||
self.images.draw(
|
||||
&self.api,
|
||||
self.shaders.pipelines.image,
|
||||
frame.uniforms,
|
||||
&pass,
|
||||
.kitty_below_text,
|
||||
);
|
||||
|
|
@ -1675,6 +1706,7 @@ pub fn Renderer(comptime GraphicsAPI: type) type {
|
|||
self.images.draw(
|
||||
&self.api,
|
||||
self.shaders.pipelines.image,
|
||||
frame.uniforms,
|
||||
&pass,
|
||||
.kitty_above_text,
|
||||
);
|
||||
|
|
@ -1684,6 +1716,7 @@ pub fn Renderer(comptime GraphicsAPI: type) type {
|
|||
if (self.overlay != null) self.images.draw(
|
||||
&self.api,
|
||||
self.shaders.pipelines.image,
|
||||
frame.uniforms,
|
||||
&pass,
|
||||
.overlay,
|
||||
);
|
||||
|
|
|
|||
|
|
@ -105,6 +105,7 @@ pub const State = struct {
|
|||
self: *State,
|
||||
api: *GraphicsAPI,
|
||||
pipeline: GraphicsAPI.Pipeline,
|
||||
uniforms: GraphicsAPI.Buffer(GraphicsAPI.shaders.Uniforms),
|
||||
pass: *GraphicsAPI.RenderPass,
|
||||
placement_type: DrawPlacements,
|
||||
) void {
|
||||
|
|
@ -168,6 +169,21 @@ pub const State = struct {
|
|||
|
||||
pass.step(.{
|
||||
.pipeline = pipeline,
|
||||
// Bind uniforms explicitly per image step. Without
|
||||
// this, the image pipeline relied on whatever
|
||||
// uniforms a previous (cell_bg / cell_text) step
|
||||
// happened to bind in the same render pass — works
|
||||
// if the renderer always draws cells before images,
|
||||
// but a race on first-frame init (precompiled-SPV
|
||||
// path returned from Shaders.init fast enough that
|
||||
// image.draw could fire before the cell steps had
|
||||
// populated the descriptor set) showed the image
|
||||
// shader reading garbage cell_size from a stale
|
||||
// UBO binding, producing image quads that covered
|
||||
// the entire viewport. Defensive explicit bind
|
||||
// makes the image pipeline's UBO source independent
|
||||
// of prior-step ordering.
|
||||
.uniforms = uniforms.buffer,
|
||||
.buffers = &.{buf.buffer},
|
||||
.textures = &.{texture},
|
||||
.draw = .{
|
||||
|
|
|
|||
|
|
@ -43,5 +43,10 @@ void main() {
|
|||
vec2 image_pos = (cell_size * grid_pos) + cell_offset;
|
||||
image_pos += dest_size * corner;
|
||||
|
||||
gl_Position = projection_matrix * vec4(image_pos.xy, 1.0, 1.0);
|
||||
// Z=0 (not 1) so we land in the middle of Vulkan's [0,1] NDC
|
||||
// depth range after `ortho2d`'s `-1` z scale. OpenGL accepts
|
||||
// either since there's no depth attachment, but Vulkan clips
|
||||
// NDC z<0 (which `vec4(_, _, 1.0, 1.0)` would produce) and
|
||||
// erases the entire image. Matches `cell_text.v.glsl`.
|
||||
gl_Position = projection_matrix * vec4(image_pos.xy, 0.0, 1.0);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -49,4 +49,24 @@ layout(location = 0) out vec4 _fragColor;
|
|||
#define texture2D texture
|
||||
|
||||
void mainImage( out vec4 fragColor, in vec2 fragCoord );
|
||||
void main() { mainImage (_fragColor, gl_FragCoord.xy); }
|
||||
|
||||
// Vulkan-only: wrap `texture(sampler2D, vec2)` so iChannel0
|
||||
// (back_texture, in Vulkan top-left orientation) appears to
|
||||
// the author in OpenGL/shadertoy convention (lower-left).
|
||||
// Defined BEFORE the `#define`, so the inner `texture(s, ...)`
|
||||
// call here resolves to the GLSL built-in, not back to ourselves
|
||||
// (no preprocessor recursion).
|
||||
#ifdef GHASTTY_VULKAN
|
||||
vec4 _ghastty_tex2d(sampler2D s, vec2 uv) {
|
||||
return texture(s, vec2(uv.x, 1.0 - uv.y));
|
||||
}
|
||||
#define texture _ghastty_tex2d
|
||||
#endif
|
||||
|
||||
void main() {
|
||||
#ifdef GHASTTY_VULKAN
|
||||
mainImage(_fragColor, vec2(gl_FragCoord.x, iResolution.y - gl_FragCoord.y));
|
||||
#else
|
||||
mainImage(_fragColor, gl_FragCoord.xy);
|
||||
#endif
|
||||
}
|
||||
|
|
|
|||
|
|
@ -40,16 +40,69 @@ pub const Uniforms = extern struct {
|
|||
};
|
||||
|
||||
/// The target to load shaders for.
|
||||
pub const Target = enum { glsl, msl };
|
||||
///
|
||||
/// - `.glsl`: roundtripped through SPIR-V back to GLSL via
|
||||
/// spirv-cross. Normalizes/validates the source. The OpenGL
|
||||
/// backend consumes this.
|
||||
/// - `.msl`: spirv-cross translation to Metal Shading Language.
|
||||
/// - `.spv`: raw SPIR-V binary (no spirv-cross roundtrip). The
|
||||
/// Vulkan backend consumes this — Vulkan compiles GLSL → SPIR-V
|
||||
/// itself via glslang for its built-in shaders, and feeding
|
||||
/// the user shader through GLSL→SPIR-V→GLSL→SPIR-V again costs
|
||||
/// 2× the compile work AND loses the original source structure
|
||||
/// (which broke our `gl_FragCoord` Y-flip rewrite when the
|
||||
/// spirv-cross-emitted main() didn't match the upstream prefix).
|
||||
pub const Target = enum { glsl, msl, spv };
|
||||
|
||||
/// Optional GLSL → GLSL rewriter applied between the prefix splice
|
||||
/// and the SPIR-V compile. Vulkan plugs in `vulkanizeGlsl` here so
|
||||
/// SPIR-V output uses the renderer's multi-set descriptor layout;
|
||||
/// other backends pass `null`. Owns its allocation under the
|
||||
/// caller's allocator (`shadertoy.loadFromFile` runs it inside an
|
||||
/// arena that's torn down at function exit, so the rewriter's
|
||||
/// returned slice may be arena-owned).
|
||||
pub const Rewriter = *const fn (
|
||||
alloc: Allocator,
|
||||
src: []const u8,
|
||||
) Allocator.Error![:0]const u8;
|
||||
|
||||
/// What `loadFromFile`/`loadFromFiles` need beyond the path itself.
|
||||
/// Keeps the function decoupled from any specific backend — every
|
||||
/// backend-flavored knob becomes an explicit field, and `shadertoy`
|
||||
/// itself reaches into no other backend's submodules.
|
||||
pub const LoadOptions = struct {
|
||||
/// Output language / format. See `Target` for the per-variant
|
||||
/// rationale.
|
||||
target: Target,
|
||||
|
||||
/// `#define <body>` lines injected after the prefix's
|
||||
/// `#version` directive. Vulkan passes
|
||||
/// `&.{"GHASTTY_VULKAN 1"}` so the prefix's `main()` flips
|
||||
/// `gl_FragCoord.y` and wraps `texture()` for upper-left
|
||||
/// sampling; OpenGL/MSL pass `&.{}`.
|
||||
extra_defines: []const []const u8 = &.{},
|
||||
|
||||
/// Optional second-pass GLSL transform run between the prefix
|
||||
/// splice and the SPIR-V compile. Vulkan installs
|
||||
/// `vulkan/shaders.zig:vulkanizeGlsl` here for the multi-set
|
||||
/// descriptor layout rewrite; other backends leave it null.
|
||||
rewrite: ?Rewriter = null,
|
||||
};
|
||||
|
||||
/// Load a set of shaders from files and convert them to the target
|
||||
/// format. The shader order is preserved.
|
||||
///
|
||||
/// Result element type depends on `opts.target`: `.glsl`/`.msl`
|
||||
/// produce null-terminated UTF-8 source strings; `.spv` produces
|
||||
/// SPIR-V binary bytes (4-byte-aligned, no trailing null). We unify
|
||||
/// the return type as `[]const []const u8` and have the caller cast/
|
||||
/// reinterpret as needed.
|
||||
pub fn loadFromFiles(
|
||||
alloc_gpa: Allocator,
|
||||
paths: configpkg.RepeatablePath,
|
||||
target: Target,
|
||||
) ![]const [:0]const u8 {
|
||||
var list: std.ArrayList([:0]const u8) = .empty;
|
||||
opts: LoadOptions,
|
||||
) ![]const []const u8 {
|
||||
var list: std.ArrayList([]const u8) = .empty;
|
||||
defer list.deinit(alloc_gpa);
|
||||
errdefer for (list.items) |shader| alloc_gpa.free(shader);
|
||||
|
||||
|
|
@ -59,13 +112,19 @@ pub fn loadFromFiles(
|
|||
.required => |path| .{ path, false },
|
||||
};
|
||||
|
||||
const shader = loadFromFile(alloc_gpa, path, target) catch |err| {
|
||||
const shader = loadFromFile(alloc_gpa, path, opts) catch |err| {
|
||||
if (err == error.FileNotFound and optional) {
|
||||
continue;
|
||||
}
|
||||
|
||||
return err;
|
||||
};
|
||||
// Take ownership of `shader` immediately. If the subsequent
|
||||
// `list.append` itself OOMs, the freshly-loaded slice would
|
||||
// leak — `errdefer` at the function level only iterates
|
||||
// `list.items`, and `shader` isn't in `list` yet. Free it
|
||||
// explicitly on the error path before propagating.
|
||||
errdefer alloc_gpa.free(shader);
|
||||
log.info("loaded custom shader path={s}", .{path});
|
||||
try list.append(alloc_gpa, shader);
|
||||
}
|
||||
|
|
@ -75,11 +134,16 @@ pub fn loadFromFiles(
|
|||
|
||||
/// Load a single shader from a file and convert it to the target language
|
||||
/// ready to be used with renderers.
|
||||
///
|
||||
/// For `.glsl` / `.msl` the returned slice is a null-terminated UTF-8
|
||||
/// source string; the underlying allocation is `[:0]const u8` and
|
||||
/// callers that need the sentinel may safely cast. For `.spv` the
|
||||
/// returned slice is raw SPIR-V bytes — no terminator, 4-byte aligned.
|
||||
pub fn loadFromFile(
|
||||
alloc_gpa: Allocator,
|
||||
path: []const u8,
|
||||
target: Target,
|
||||
) ![:0]const u8 {
|
||||
opts: LoadOptions,
|
||||
) ![]const u8 {
|
||||
var arena = ArenaAllocator.init(alloc_gpa);
|
||||
defer arena.deinit();
|
||||
const alloc = arena.allocator();
|
||||
|
|
@ -97,14 +161,33 @@ pub fn loadFromFile(
|
|||
);
|
||||
};
|
||||
|
||||
// Convert to full GLSL
|
||||
const glsl: [:0]const u8 = glsl: {
|
||||
// Convert to full GLSL. `opts.extra_defines` lets a backend
|
||||
// inject `#define <body>` lines after the prefix's `#version`
|
||||
// directive — Vulkan uses this to flip `gl_FragCoord.y` and
|
||||
// wrap `texture()` for upper-left sampling so `mainImage` sees
|
||||
// shadertoy-convention coords; OpenGL/MSL pass `&.{}` and use
|
||||
// the GL-native paths unchanged.
|
||||
const glsl_raw: [:0]const u8 = glsl: {
|
||||
var stream: std.Io.Writer.Allocating = .init(alloc);
|
||||
try glslFromShader(&stream.writer, src);
|
||||
try glslFromShader(&stream.writer, src, opts.extra_defines);
|
||||
try stream.writer.writeByte(0);
|
||||
break :glsl stream.written()[0 .. stream.written().len - 1 :0];
|
||||
};
|
||||
|
||||
// Optional second-pass GLSL transform. Vulkan installs
|
||||
// `vulkanizeGlsl` here so the resulting SPIR-V uses the
|
||||
// renderer's multi-set descriptor layout (UBO=set 0,
|
||||
// samplers=set 1, storage=set 2). Without that rewrite,
|
||||
// glslang assigns everything to `set 0` and the post pipeline's
|
||||
// descriptor set layout points at the wrong slots — the
|
||||
// shader's `iChannel0` ends up at set 0 binding 0 while the
|
||||
// pipeline binds it at set 1 binding 0, sampling returns
|
||||
// garbage / zero, output is transparent.
|
||||
const glsl: [:0]const u8 = if (opts.rewrite) |f|
|
||||
try f(alloc, glsl_raw)
|
||||
else
|
||||
glsl_raw;
|
||||
|
||||
// Convert to SPIR-V
|
||||
const spirv: []const u8 = spirv: {
|
||||
var stream: std.Io.Writer.Allocating = .init(alloc);
|
||||
|
|
@ -129,12 +212,47 @@ pub fn loadFromFile(
|
|||
break :spirv list.items;
|
||||
};
|
||||
|
||||
// Convert to MSL
|
||||
return switch (target) {
|
||||
// Important: using the alloc_gpa here on purpose because this
|
||||
// is the final result that will be returned to the caller.
|
||||
// Validate the SPIR-V regardless of target. glslang has succeeded
|
||||
// at this point but a zero-length output would crash
|
||||
// `vkCreateShaderModule` on the Vulkan path AND would make
|
||||
// `glslFromSpv` / `mslFromSpv` produce empty/garbage GLSL/MSL
|
||||
// with poor diagnostics. Hoist the checks above the switch so
|
||||
// every backend gets the same defensive validation.
|
||||
if (spirv.len < 4) {
|
||||
std.log.warn(
|
||||
"shadertoy: empty SPIR-V output (size={})",
|
||||
.{spirv.len},
|
||||
);
|
||||
return error.InvalidShader;
|
||||
}
|
||||
// First 4 bytes are the SPIR-V magic word 0x07230203
|
||||
// (little-endian). Reject anything else loudly.
|
||||
const magic = std.mem.readInt(u32, spirv[0..4], .little);
|
||||
if (magic != 0x07230203) {
|
||||
std.log.warn(
|
||||
"shadertoy: SPIR-V output missing magic word " ++
|
||||
"(got 0x{x:0>8}, expected 0x07230203)",
|
||||
.{magic},
|
||||
);
|
||||
return error.InvalidShader;
|
||||
}
|
||||
|
||||
// Important: using the alloc_gpa here on purpose because this is
|
||||
// the final result that will be returned to the caller (the arena
|
||||
// gets torn down on function exit).
|
||||
return switch (opts.target) {
|
||||
.glsl => try glslFromSpv(alloc_gpa, spirv),
|
||||
.msl => try mslFromSpv(alloc_gpa, spirv),
|
||||
.spv => spv: {
|
||||
// Copy the SPIR-V binary out of the arena into a
|
||||
// 4-byte-aligned allocation under `alloc_gpa`. Vulkan
|
||||
// expects `pCode: []const u32`, so over-aligning is safe;
|
||||
// we return as `[]const u8` to share the unified return
|
||||
// type with the GLSL/MSL paths.
|
||||
const dst = try alloc_gpa.alignedAlloc(u8, .of(u32), spirv.len);
|
||||
@memcpy(dst, spirv);
|
||||
break :spv dst;
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
|
|
@ -144,19 +262,97 @@ pub fn loadFromFile(
|
|||
/// mainImage function and don't define any of the uniforms. This function
|
||||
/// will convert the ShaderToy shader into a valid GLSL shader that can be
|
||||
/// compiled and linked.
|
||||
pub fn glslFromShader(writer: *std.Io.Writer, src: []const u8) !void {
|
||||
pub fn glslFromShader(
|
||||
writer: *std.Io.Writer,
|
||||
src: []const u8,
|
||||
/// Macros to inject as `#define <body>` lines after the prefix's
|
||||
/// `#version` directive (GLSL requires `#version` first, so we
|
||||
/// can't simply prepend). Empty for the default OpenGL/MSL paths;
|
||||
/// the Vulkan SPV path uses this to flag the prefix's `main()`
|
||||
/// to Y-flip `gl_FragCoord`.
|
||||
defines: []const []const u8,
|
||||
) !void {
|
||||
const prefix = @embedFile("shaders/shadertoy_prefix.glsl");
|
||||
try writer.writeAll(prefix);
|
||||
if (defines.len == 0) {
|
||||
try writer.writeAll(prefix);
|
||||
} else {
|
||||
// GLSL requires `#version` to be the first non-blank line,
|
||||
// so we can't simply prepend defines. Find the first
|
||||
// newline after `#version …` and inject defines on the
|
||||
// following line.
|
||||
//
|
||||
// The prefix is `@embedFile`'d at comptime, so its bytes
|
||||
// are known to the compiler — assert it has a newline once
|
||||
// here rather than threading branchy fallback paths
|
||||
// through the runtime. A future prefix edit that loses its
|
||||
// trailing newline will fail at comptime, not silently at
|
||||
// runtime.
|
||||
comptime {
|
||||
if (std.mem.indexOfScalar(u8, prefix, '\n') == null) {
|
||||
@compileError(
|
||||
"shadertoy_prefix.glsl must contain at least one newline " ++
|
||||
"for `#define` injection — see glslFromShader",
|
||||
);
|
||||
}
|
||||
if (!std.mem.startsWith(u8, prefix, "#version")) {
|
||||
@compileError(
|
||||
"shadertoy_prefix.glsl must start with `#version` " ++
|
||||
"(GLSL spec requirement) — see glslFromShader",
|
||||
);
|
||||
}
|
||||
}
|
||||
const first_nl = comptime std.mem.indexOfScalar(u8, prefix, '\n').?;
|
||||
try writer.writeAll(prefix[0 .. first_nl + 1]);
|
||||
for (defines) |def| {
|
||||
try writer.writeAll("#define ");
|
||||
try writer.writeAll(def);
|
||||
try writer.writeAll("\n");
|
||||
}
|
||||
try writer.writeAll(prefix[first_nl + 1 ..]);
|
||||
}
|
||||
try writer.writeAll("\n\n");
|
||||
try writer.writeAll(src);
|
||||
}
|
||||
|
||||
/// Process-wide cache of compiled SPIR-V keyed by GLSL source bytes.
|
||||
/// The C-API glslang path (`Shader.create` / `program.spirvGenerate`)
|
||||
/// used below pulls allocations from glslang's thread-local
|
||||
/// TPoolAllocator on every call — pages that are never released
|
||||
/// because Zig pthreads don't run C++ thread_local destructors. With
|
||||
/// N tabs each calling `loadFromFiles` → `loadFromFile` →
|
||||
/// `spirvFromGlsl` for the same custom shader file, that's N
|
||||
/// renderer threads each leaking a per-thread pool. Caching the SPV
|
||||
/// bytes lets every call after the first short-circuit without
|
||||
/// touching glslang.
|
||||
///
|
||||
/// Same problem and same fix as the C++ shim's spv_cache in
|
||||
/// pkg/glslang/override/ghastty_vk_shim.cpp; this one covers the
|
||||
/// C-API path that the shim doesn't see.
|
||||
var spv_cache_mutex: std.Thread.Mutex = .{};
|
||||
var spv_cache: std.StringHashMapUnmanaged([]const u8) = .empty;
|
||||
|
||||
/// Convert a GLSL shader into SPIR-V assembly.
|
||||
pub fn spirvFromGlsl(
|
||||
writer: *std.Io.Writer,
|
||||
errlog: ?*SpirvLog,
|
||||
src: [:0]const u8,
|
||||
) !void {
|
||||
// Cache check. On hit, write the cached SPV to the writer and
|
||||
// return without entering glslang. Strict-equality keying on
|
||||
// the source bytes (incl. the NUL terminator) — the input is
|
||||
// deterministically generated upstream from a stable shader
|
||||
// file + a small set of `#define` lines, so identical sources
|
||||
// produce identical SPV.
|
||||
{
|
||||
spv_cache_mutex.lock();
|
||||
defer spv_cache_mutex.unlock();
|
||||
const key: []const u8 = src[0..src.len];
|
||||
if (spv_cache.get(key)) |cached| {
|
||||
try writer.writeAll(cached);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// So we can run unit tests without fear.
|
||||
if (builtin.is_test) try glslang.testing.ensureInit();
|
||||
|
||||
|
|
@ -205,6 +401,26 @@ pub fn spirvFromGlsl(
|
|||
const ptr_u8: [*]u8 = @ptrCast(ptr);
|
||||
const slice_u8: []u8 = ptr_u8[0 .. size * 4];
|
||||
try writer.writeAll(slice_u8);
|
||||
|
||||
// Populate the cache so the next surface's compile of the same
|
||||
// source short-circuits. Allocations are process-lifetime
|
||||
// (smp_allocator, never freed) — the keys + values are bounded
|
||||
// by the number of distinct shaders the user has configured,
|
||||
// which is small (typically 1-3); even at 100 KB per shader
|
||||
// the total cache cost is negligible against the per-tab pool
|
||||
// pages we'd otherwise leak.
|
||||
spv_cache_mutex.lock();
|
||||
defer spv_cache_mutex.unlock();
|
||||
const key: []const u8 = src[0..src.len];
|
||||
if (!spv_cache.contains(key)) {
|
||||
const key_copy = std.heap.smp_allocator.dupe(u8, key) catch return;
|
||||
errdefer std.heap.smp_allocator.free(key_copy);
|
||||
const spv_copy = std.heap.smp_allocator.dupe(u8, slice_u8) catch return;
|
||||
spv_cache.put(std.heap.smp_allocator, key_copy, spv_copy) catch {
|
||||
std.heap.smp_allocator.free(spv_copy);
|
||||
return;
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
/// Retrieve errors from spirv compilation.
|
||||
|
|
@ -348,7 +564,7 @@ fn spvCross(
|
|||
fn testGlslZ(alloc: Allocator, src: []const u8) ![:0]const u8 {
|
||||
var buf: std.Io.Writer.Allocating = .init(alloc);
|
||||
defer buf.deinit();
|
||||
try glslFromShader(&buf.writer, src);
|
||||
try glslFromShader(&buf.writer, src, &.{});
|
||||
return try buf.toOwnedSliceSentinel(0);
|
||||
}
|
||||
|
||||
|
|
@ -424,4 +640,3 @@ test "shadertoy to glsl" {
|
|||
|
||||
const test_crt = @embedFile("shaders/test_shadertoy_crt.glsl");
|
||||
const test_invalid = @embedFile("shaders/test_shadertoy_invalid.glsl");
|
||||
const test_focus = @embedFile("shaders/test_shadertoy_focus.glsl");
|
||||
|
|
|
|||
|
|
@ -0,0 +1,242 @@
|
|||
//! Per-draw recording context. Lifecycle: `begin` → caller records
|
||||
//! commands (via the eventual `renderPass()` accessor) → `complete`.
|
||||
//!
|
||||
//! Unlike `opengl/Frame.zig` (which is a zero-state wrapper around
|
||||
//! the implicit GL context), Vulkan's Frame drives the explicit
|
||||
//! sync model: a fence is signaled when the GPU finishes the
|
||||
//! frame's submit, and `complete` waits on it before handing the
|
||||
//! dmabuf fd to the host. That's required for correctness — the
|
||||
//! host shouldn't sample memory the GPU is still writing — and
|
||||
//! acceptable for perf because terminal frames cap at ~60Hz.
|
||||
//!
|
||||
//! Ownership: the command buffer and fence are owned by the
|
||||
//! top-level renderer (`Vulkan.zig`, not yet wired) and passed into
|
||||
//! `begin` via `Options`. Frame just borrows them. The top-level
|
||||
//! is responsible for creating/destroying them and for resetting
|
||||
//! the fence to unsignaled state before `begin` (this layer would
|
||||
//! conflate ownership otherwise).
|
||||
//!
|
||||
//! Why not semaphores? With dmabuf export to the host (rather than
|
||||
//! a `VkSwapchain` we own), we have no acquire/present semaphore
|
||||
//! pair to sync against. Fence-only is the right model when
|
||||
//! libghostty hands the host a "GPU is done writing to this fd"
|
||||
//! guarantee at present time. The host's own compositor handles
|
||||
//! display sync from there.
|
||||
//!
|
||||
//! `renderPass()` will land alongside `vulkan/RenderPass.zig` in a
|
||||
//! follow-up commit. For now it's not declared — calling code that
|
||||
//! tries to record into a frame will fail to compile, which is
|
||||
//! intentional: the recording path isn't ready.
|
||||
//!
|
||||
//! Counterpart: `src/renderer/opengl/Frame.zig`.
|
||||
|
||||
const Self = @This();
|
||||
|
||||
const std = @import("std");
|
||||
const vulkan = @import("vulkan");
|
||||
const vk = vulkan.c;
|
||||
|
||||
const Device = vulkan.Device;
|
||||
const DescriptorPool = vulkan.DescriptorPool;
|
||||
const Target = @import("Target.zig");
|
||||
const RenderPass = @import("RenderPass.zig");
|
||||
|
||||
const Vulkan = @import("../Vulkan.zig");
|
||||
const Renderer = @import("../generic.zig").Renderer(Vulkan);
|
||||
const Health = @import("../../renderer.zig").Health;
|
||||
|
||||
const log = std.log.scoped(.vulkan);
|
||||
|
||||
pub const Options = struct {
|
||||
/// Command buffer this frame's commands record into. Caller
|
||||
/// resets it to a fresh state before `begin` is called.
|
||||
cb: vk.VkCommandBuffer,
|
||||
|
||||
/// Fence that gets signaled when the submit completes. Caller
|
||||
/// resets it to unsignaled before `begin` is called.
|
||||
fence: vk.VkFence,
|
||||
|
||||
/// Per-frame descriptor pool. `RenderPass.step` borrows it for
|
||||
/// the per-call descriptor sets it allocates whenever a
|
||||
/// pipeline is re-used within a single pass. The pool is
|
||||
/// caller-owned (top-level `Vulkan.zig` keeps it threadlocal)
|
||||
/// and must be reset (`vkResetDescriptorPool`) by the caller
|
||||
/// before each Frame.begin so this frame's allocations don't
|
||||
/// pile on the previous frame's.
|
||||
step_pool: ?*DescriptorPool = null,
|
||||
};
|
||||
|
||||
pub const Error = error{
|
||||
/// `vkBeginCommandBuffer` / `vkEndCommandBuffer` /
|
||||
/// `vkQueueSubmit` / `vkWaitForFences` returned a non-success
|
||||
/// status.
|
||||
VulkanFailed,
|
||||
};
|
||||
|
||||
device: *const Device,
|
||||
renderer: *Renderer,
|
||||
target: *Target,
|
||||
cb: vk.VkCommandBuffer,
|
||||
fence: vk.VkFence,
|
||||
step_pool: ?*DescriptorPool = null,
|
||||
|
||||
/// Begin recording a frame. The command buffer is reset and started
|
||||
/// with `ONE_TIME_SUBMIT` since we always submit before the next
|
||||
/// `begin` overwrites it.
|
||||
pub fn begin(
|
||||
opts: Options,
|
||||
device: *const Device,
|
||||
renderer: *Renderer,
|
||||
target: *Target,
|
||||
) Error!Self {
|
||||
const begin_info: vk.VkCommandBufferBeginInfo = .{
|
||||
.sType = vk.VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
|
||||
.pNext = null,
|
||||
.flags = vk.VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
|
||||
.pInheritanceInfo = null,
|
||||
};
|
||||
const r = device.dispatch.beginCommandBuffer(opts.cb, &begin_info);
|
||||
if (r != vk.VK_SUCCESS) {
|
||||
log.err("vkBeginCommandBuffer (frame) failed: result={}", .{r});
|
||||
return error.VulkanFailed;
|
||||
}
|
||||
|
||||
return .{
|
||||
.device = device,
|
||||
.renderer = renderer,
|
||||
.target = target,
|
||||
.cb = opts.cb,
|
||||
.fence = opts.fence,
|
||||
.step_pool = opts.step_pool,
|
||||
};
|
||||
}
|
||||
|
||||
/// End recording, submit to the queue with `self.fence`, and (if
|
||||
/// `sync` is true, which it always is for our dmabuf-export model)
|
||||
/// wait on the fence so the GPU is guaranteed to be done before
|
||||
/// the host imports the target's dmabuf.
|
||||
///
|
||||
/// `sync == false` is accepted by the interface for parity with
|
||||
/// `opengl/Frame.zig`, but currently still does the wait — without
|
||||
/// it, handing the dmabuf fd to the host would race the GPU. The
|
||||
/// argument may eventually drive multi-frame pipelining once a
|
||||
/// proper queue of frames is in flight.
|
||||
pub fn complete(self: *const Self, sync: bool) void {
|
||||
// `sync` is part of the cross-backend `Frame.complete` interface
|
||||
// (OpenGL / Metal / Vulkan all share it). The Vulkan path is
|
||||
// always synchronous today: we waitForFences before handing the
|
||||
// dmabuf fd to the host, and the host cannot sample a buffer
|
||||
// mid-GPU-write. So `sync=false` is silently treated as
|
||||
// `sync=true`. If multi-frame pipelining ever lands, this is
|
||||
// where the param would gate the wait.
|
||||
_ = sync;
|
||||
const dev = self.device;
|
||||
|
||||
// `health` becomes `.unhealthy` on any GPU-side error below. We
|
||||
// ALWAYS run `buffer_pool.cycle` and `frameCompleted` on the
|
||||
// way out — skipping them on error left every retired buffer
|
||||
// stuck in `pending` (unbounded growth) and held the renderer's
|
||||
// swap-chain semaphore forever, so the NEXT `drawFrame` would
|
||||
// hang with no diagnostic.
|
||||
var health: Health = .healthy;
|
||||
var submitted = false;
|
||||
|
||||
// Make the rendered pixels visible to the host's mmap read. In
|
||||
// `.direct` mode this is just a memory barrier; in `.legacy_copy`
|
||||
// mode it also runs `vkCmdCopyImageToBuffer`. See `Target.zig`.
|
||||
self.target.recordPresentBarrier(self.cb);
|
||||
|
||||
end_cb: {
|
||||
const r = dev.dispatch.endCommandBuffer(self.cb);
|
||||
if (r != vk.VK_SUCCESS) {
|
||||
log.err("vkEndCommandBuffer (frame) failed: result={}", .{r});
|
||||
health = .unhealthy;
|
||||
break :end_cb;
|
||||
}
|
||||
|
||||
const submit_info: vk.VkSubmitInfo = .{
|
||||
.sType = vk.VK_STRUCTURE_TYPE_SUBMIT_INFO,
|
||||
.pNext = null,
|
||||
.waitSemaphoreCount = 0,
|
||||
.pWaitSemaphores = null,
|
||||
.pWaitDstStageMask = null,
|
||||
.commandBufferCount = 1,
|
||||
.pCommandBuffers = &self.cb,
|
||||
.signalSemaphoreCount = 0,
|
||||
.pSignalSemaphores = null,
|
||||
};
|
||||
// Externally-synchronized via `Device.queueSubmit` — splits
|
||||
// and tabs share the host's VkQueue and Vulkan rejects
|
||||
// concurrent unsynchronized access.
|
||||
const sr = dev.queueSubmit(1, &submit_info, self.fence);
|
||||
if (sr != vk.VK_SUCCESS) {
|
||||
log.err("vkQueueSubmit (frame) failed: result={}", .{sr});
|
||||
health = .unhealthy;
|
||||
break :end_cb;
|
||||
}
|
||||
submitted = true;
|
||||
|
||||
// Wait for the GPU to finish writing the target before letting
|
||||
// the host import the dmabuf. UINT64_MAX = "wait indefinitely".
|
||||
const wr = dev.dispatch.waitForFences(
|
||||
dev.device,
|
||||
1,
|
||||
&self.fence,
|
||||
vk.VK_TRUE,
|
||||
std.math.maxInt(u64),
|
||||
);
|
||||
if (wr != vk.VK_SUCCESS) {
|
||||
log.err("vkWaitForFences (frame) failed: result={}", .{wr});
|
||||
health = .unhealthy;
|
||||
}
|
||||
}
|
||||
|
||||
// Recycle the per-frame Buffer pool. Even on the error path we
|
||||
// still want to cycle: buffers that the failed submit referenced
|
||||
// are now stuck (we can't prove the GPU is done with them), so
|
||||
// we conservatively wait the device idle when submit DID happen
|
||||
// but the fence wait failed (DEVICE_LOST etc.) before draining.
|
||||
// Without that wait, every failed submit could leak the buffers
|
||||
// the renderer queued for the frame.
|
||||
if (health == .unhealthy and submitted) {
|
||||
_ = dev.dispatch.deviceWaitIdle(dev.device);
|
||||
}
|
||||
Vulkan.buffer_pool.cycle(dev);
|
||||
|
||||
// Hand the rendered target off to the host. On the unhealthy
|
||||
// path we skip present — the dmabuf may be partially written
|
||||
// and the host should see the previous frame instead (the
|
||||
// generic renderer's no-op-frame logic re-presents
|
||||
// `last_target`).
|
||||
if (health == .healthy) {
|
||||
self.renderer.api.present(self.target) catch |err| {
|
||||
log.err("present failed: {}", .{err});
|
||||
health = .unhealthy;
|
||||
};
|
||||
}
|
||||
|
||||
// Tell the generic renderer the frame is done so it releases the
|
||||
// swap-chain semaphore. Without this, `SwapChain.nextFrame()`
|
||||
// blocks the second call to `drawFrame` forever (one buffer in
|
||||
// the chain, never freed). MUST run regardless of `health`.
|
||||
self.renderer.frameCompleted(health);
|
||||
}
|
||||
|
||||
/// Begin a render pass recording into this frame's command buffer.
|
||||
/// The returned `RenderPass` accepts `step()` calls for the
|
||||
/// per-pipeline draw work, and is finalized with `complete()`.
|
||||
pub inline fn renderPass(
|
||||
self: *const Self,
|
||||
attachments: []const RenderPass.Options.Attachment,
|
||||
) RenderPass {
|
||||
return RenderPass.begin(.{
|
||||
.device = self.device,
|
||||
.cb = self.cb,
|
||||
.step_pool = self.step_pool,
|
||||
.attachments = attachments,
|
||||
});
|
||||
}
|
||||
|
||||
test {
|
||||
std.testing.refAllDecls(@This());
|
||||
}
|
||||
|
|
@ -0,0 +1,466 @@
|
|||
//! `VkPipeline` (graphics) + the `VkPipelineLayout` that backs it.
|
||||
//!
|
||||
//! Vulkan 1.3 with **dynamic rendering**: we use
|
||||
//! `VkPipelineRenderingCreateInfo` (chained into the pipeline create
|
||||
//! info via `pNext`) instead of constructing a `VkRenderPass` + a
|
||||
//! framebuffer per target. This removes the entire RenderPass /
|
||||
//! Framebuffer object lifecycle the OpenGL backend never had to
|
||||
//! think about — saves significant boilerplate.
|
||||
//!
|
||||
//! Wrapper scope: the renderer-level "what shaders + what attachment
|
||||
//! format" lives in `vulkan/shaders.zig`'s eventual `Shaders` struct
|
||||
//! (mirroring `opengl/shaders.zig`). This file is the bare
|
||||
//! `VkPipeline` wrapper that takes everything explicitly:
|
||||
//! pre-compiled shader modules, descriptor set layouts, push
|
||||
//! constant ranges, vertex input description, color attachment
|
||||
//! format. The renderer's pipeline-collection assembly layer is
|
||||
//! responsible for plumbing those together — Pipeline.zig has no
|
||||
//! per-shader knowledge.
|
||||
//!
|
||||
//! Counterpart: `src/renderer/opengl/Pipeline.zig`.
|
||||
|
||||
const Self = @This();
|
||||
|
||||
const std = @import("std");
|
||||
const vulkan = @import("vulkan");
|
||||
const vk = vulkan.c;
|
||||
|
||||
const Device = vulkan.Device;
|
||||
const DescriptorPool = vulkan.DescriptorPool;
|
||||
|
||||
const log = std.log.scoped(.vulkan);
|
||||
|
||||
pub const StepFunction = enum {
|
||||
/// Constant value across all vertices (no vertex input).
|
||||
constant,
|
||||
/// One per vertex.
|
||||
per_vertex,
|
||||
/// One per instance (`VK_VERTEX_INPUT_RATE_INSTANCE`).
|
||||
per_instance,
|
||||
};
|
||||
|
||||
/// Vertex input description. Pass `null` for shaders that don't read
|
||||
/// vertex attributes (e.g. screen-quad shaders that derive position
|
||||
/// from `gl_VertexIndex`).
|
||||
pub const VertexInput = struct {
|
||||
/// Byte stride of the vertex buffer.
|
||||
stride: u32,
|
||||
|
||||
/// Whether the buffer is stepped per-vertex or per-instance.
|
||||
step_fn: StepFunction = .per_vertex,
|
||||
|
||||
/// `binding = 0` attribute descriptions describing each field of
|
||||
/// the vertex struct. The caller is responsible for building
|
||||
/// these (offsets, formats) — Pipeline doesn't introspect.
|
||||
attributes: []const vk.VkVertexInputAttributeDescription,
|
||||
};
|
||||
|
||||
/// Maximum descriptor sets a single pipeline can address. The
|
||||
/// preprocessor in `shaders.zig` bins resources into 3 sets (UBO=0,
|
||||
/// sampler=1, storage=2), so 3 is sufficient. Bump if/when a fourth
|
||||
/// resource class is introduced.
|
||||
pub const MAX_DESCRIPTOR_SETS: usize = 3;
|
||||
|
||||
pub const Options = struct {
|
||||
device: *const Device,
|
||||
|
||||
/// Optional descriptor pool. If provided, `Pipeline.init`
|
||||
/// allocates one descriptor set per non-null entry in
|
||||
/// `descriptor_set_layouts` and stores them on
|
||||
/// `Pipeline.descriptor_sets[i]`, indexed by set number.
|
||||
/// `RenderPass.step` updates + binds them per frame.
|
||||
descriptor_pool: ?*DescriptorPool = null,
|
||||
|
||||
/// Shader modules. The caller owns these — Pipeline does not
|
||||
/// destroy them on deinit (they're typically reused across
|
||||
/// multiple pipelines and outlive any one of them).
|
||||
vertex_module: vk.VkShaderModule,
|
||||
fragment_module: vk.VkShaderModule,
|
||||
|
||||
/// Optional vertex input. `null` ⇒ no vertex bindings.
|
||||
vertex_input: ?VertexInput = null,
|
||||
|
||||
/// Per-set descriptor layouts. Element i corresponds to `set = i`
|
||||
/// in the shader. `null` slots are placeholders for sets the
|
||||
/// pipeline doesn't actually use — Vulkan requires the pipeline
|
||||
/// layout's `pSetLayouts` to be contiguous up to the max used
|
||||
/// set number, so we substitute `empty_set_layout` for nulls.
|
||||
descriptor_set_layouts: []const ?vk.VkDescriptorSetLayout = &.{},
|
||||
|
||||
/// 0-binding placeholder layout used to fill `null` entries in
|
||||
/// `descriptor_set_layouts`. Required when any entry is null;
|
||||
/// can stay null when every entry is non-null. Owned by the
|
||||
/// caller (`Shaders.init` caches one and reuses it).
|
||||
empty_set_layout: vk.VkDescriptorSetLayout = null,
|
||||
|
||||
/// Push constant ranges referenced by the shaders.
|
||||
push_constant_ranges: []const vk.VkPushConstantRange = &.{},
|
||||
|
||||
/// Default sampler the pipeline owns and uses for every
|
||||
/// combined-image-sampler binding the caller doesn't supply a
|
||||
/// sampler for. Lets the renderer pass plain `textures` (parallel
|
||||
/// to OpenGL's per-texture `glBindTextureUnit` model) without
|
||||
/// having to also track per-binding samplers; the pipeline knows
|
||||
/// the right sampler for its own atlases (e.g. cell_text uses
|
||||
/// unnormalized coords for `sampler2D` standing in for the old
|
||||
/// `sampler2DRect`). The handle is borrowed, not owned by
|
||||
/// `Pipeline` — `Shaders.init` owns the lifetime.
|
||||
sampler: vk.VkSampler = null,
|
||||
|
||||
/// Color attachment format. With dynamic rendering this must
|
||||
/// match the format of the image the renderer eventually targets
|
||||
/// in `vkCmdBeginRendering`.
|
||||
color_format: vk.VkFormat,
|
||||
|
||||
/// Pre-multiplied-alpha source-over blending. Disable for
|
||||
/// the bg_color pass (full opaque background).
|
||||
blending_enabled: bool = true,
|
||||
|
||||
/// Primitive topology. The renderer's shaders use TRIANGLE_STRIP
|
||||
/// for the full-screen quad and TRIANGLE_LIST for instanced cells.
|
||||
topology: vk.VkPrimitiveTopology = vk.VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST,
|
||||
};
|
||||
|
||||
pub const Error = error{
|
||||
/// `vkCreatePipelineLayout` or `vkCreateGraphicsPipelines`
|
||||
/// returned a non-success status.
|
||||
VulkanFailed,
|
||||
};
|
||||
|
||||
device: *const Device,
|
||||
pipeline: vk.VkPipeline,
|
||||
layout: vk.VkPipelineLayout,
|
||||
|
||||
/// Descriptor sets allocated from `opts.descriptor_pool`, indexed by
|
||||
/// set number. `descriptor_sets[i]` is the set bound at `set = i` in
|
||||
/// the shader; `null` means the pipeline doesn't use that set (so
|
||||
/// `RenderPass.step` skips updating/binding it). `set_count` is one
|
||||
/// past the last non-null index, matching what
|
||||
/// `vkCmdBindDescriptorSets` needs as `setCount`.
|
||||
///
|
||||
/// HOT-PATH NOTE: these sets are SHARED across all `step()` calls
|
||||
/// that bind this pipeline within a single command buffer, but
|
||||
/// `vkCmdDraw` reads descriptors at submit time, so re-using the
|
||||
/// same pipeline twice with different per-call resources would
|
||||
/// cause both draws to see the LAST update's bindings.
|
||||
/// `RenderPass.step` defends against this by allocating a fresh
|
||||
/// per-call set from the pass's `step_pool` whenever the per-step
|
||||
/// resources differ; these `descriptor_sets[i]` slots act as
|
||||
/// pre-warmed defaults (used only when the call site is
|
||||
/// single-step-per-pipeline like bg_color / cell_bg).
|
||||
descriptor_sets: [MAX_DESCRIPTOR_SETS]vk.VkDescriptorSet = .{ null, null, null },
|
||||
set_count: u32 = 0,
|
||||
|
||||
/// Descriptor set layouts associated with this pipeline, indexed by
|
||||
/// set number. `null` matches a `null` slot in `descriptor_sets`.
|
||||
/// Stored so `RenderPass.step` can allocate per-call sets from the
|
||||
/// pass's per-frame descriptor pool without round-tripping through
|
||||
/// the original `Shaders.init` layout-creation code path.
|
||||
descriptor_set_layouts: [MAX_DESCRIPTOR_SETS]vk.VkDescriptorSetLayout = .{ null, null, null },
|
||||
|
||||
/// Binding number that `Step.uniforms` writes to within set 0.
|
||||
/// Defaults to 1 to match `common.glsl`'s
|
||||
/// `layout(binding = 1, std140) uniform Globals`. Override per
|
||||
/// pipeline if a different shader uses a different slot.
|
||||
uniforms_binding: u32 = 1,
|
||||
|
||||
/// Pipeline-owned fallback sampler. See `Options.sampler`.
|
||||
sampler: vk.VkSampler = null,
|
||||
|
||||
/// Vertex buffer stride (bytes). Needed so `RenderPass.step` can
|
||||
/// bind a vertex buffer with the right per-instance/per-vertex
|
||||
/// stride. Defaults to 0 (no vertex buffer); set automatically when
|
||||
/// `Options.vertex_input` is non-null.
|
||||
vertex_stride: u32 = 0,
|
||||
|
||||
pub fn init(opts: Options) Error!Self {
|
||||
const dev = opts.device;
|
||||
|
||||
if (opts.descriptor_set_layouts.len > MAX_DESCRIPTOR_SETS) {
|
||||
log.err(
|
||||
"Pipeline.init: {} descriptor sets exceeds MAX_DESCRIPTOR_SETS={}",
|
||||
.{ opts.descriptor_set_layouts.len, MAX_DESCRIPTOR_SETS },
|
||||
);
|
||||
return error.VulkanFailed;
|
||||
}
|
||||
|
||||
// ---- pipeline layout ---------------------------------------
|
||||
//
|
||||
// Build a flat array of VkDescriptorSetLayout where index i is
|
||||
// the layout for set=i. Null entries in `opts.descriptor_set_layouts`
|
||||
// get substituted with `opts.empty_set_layout` — Vulkan rejects
|
||||
// VK_NULL_HANDLE in `pSetLayouts`. `Shaders.init` always supplies
|
||||
// an empty layout when any null appears.
|
||||
var flat_dsls: [MAX_DESCRIPTOR_SETS]vk.VkDescriptorSetLayout = .{ null, null, null };
|
||||
for (opts.descriptor_set_layouts, 0..) |maybe_dsl, i| {
|
||||
if (maybe_dsl) |dsl| {
|
||||
flat_dsls[i] = dsl;
|
||||
} else if (opts.empty_set_layout != null) {
|
||||
flat_dsls[i] = opts.empty_set_layout;
|
||||
} else {
|
||||
log.err(
|
||||
"Pipeline.init: set {} is null but no empty_set_layout was provided",
|
||||
.{i},
|
||||
);
|
||||
return error.VulkanFailed;
|
||||
}
|
||||
}
|
||||
const layout_info: vk.VkPipelineLayoutCreateInfo = .{
|
||||
.sType = vk.VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
|
||||
.pNext = null,
|
||||
.flags = 0,
|
||||
.setLayoutCount = @intCast(opts.descriptor_set_layouts.len),
|
||||
.pSetLayouts = if (opts.descriptor_set_layouts.len > 0) &flat_dsls else null,
|
||||
.pushConstantRangeCount = @intCast(opts.push_constant_ranges.len),
|
||||
.pPushConstantRanges = if (opts.push_constant_ranges.len > 0)
|
||||
opts.push_constant_ranges.ptr
|
||||
else
|
||||
null,
|
||||
};
|
||||
var layout: vk.VkPipelineLayout = undefined;
|
||||
{
|
||||
const r = dev.dispatch.createPipelineLayout(dev.device, &layout_info, null, &layout);
|
||||
if (r != vk.VK_SUCCESS) {
|
||||
log.err("vkCreatePipelineLayout failed: result={}", .{r});
|
||||
return error.VulkanFailed;
|
||||
}
|
||||
}
|
||||
errdefer dev.dispatch.destroyPipelineLayout(dev.device, layout, null);
|
||||
|
||||
// ---- shader stages -----------------------------------------
|
||||
const stages: [2]vk.VkPipelineShaderStageCreateInfo = .{
|
||||
.{
|
||||
.sType = vk.VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
|
||||
.pNext = null,
|
||||
.flags = 0,
|
||||
.stage = vk.VK_SHADER_STAGE_VERTEX_BIT,
|
||||
.module = opts.vertex_module,
|
||||
.pName = "main",
|
||||
.pSpecializationInfo = null,
|
||||
},
|
||||
.{
|
||||
.sType = vk.VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
|
||||
.pNext = null,
|
||||
.flags = 0,
|
||||
.stage = vk.VK_SHADER_STAGE_FRAGMENT_BIT,
|
||||
.module = opts.fragment_module,
|
||||
.pName = "main",
|
||||
.pSpecializationInfo = null,
|
||||
},
|
||||
};
|
||||
|
||||
// ---- vertex input -------------------------------------------
|
||||
var vi_binding: vk.VkVertexInputBindingDescription = undefined;
|
||||
const vertex_input: vk.VkPipelineVertexInputStateCreateInfo = if (opts.vertex_input) |vi| blk: {
|
||||
vi_binding = .{
|
||||
.binding = 0,
|
||||
.stride = vi.stride,
|
||||
.inputRate = switch (vi.step_fn) {
|
||||
.constant, .per_vertex => vk.VK_VERTEX_INPUT_RATE_VERTEX,
|
||||
.per_instance => vk.VK_VERTEX_INPUT_RATE_INSTANCE,
|
||||
},
|
||||
};
|
||||
break :blk .{
|
||||
.sType = vk.VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
|
||||
.pNext = null,
|
||||
.flags = 0,
|
||||
.vertexBindingDescriptionCount = 1,
|
||||
.pVertexBindingDescriptions = &vi_binding,
|
||||
.vertexAttributeDescriptionCount = @intCast(vi.attributes.len),
|
||||
.pVertexAttributeDescriptions = vi.attributes.ptr,
|
||||
};
|
||||
} else .{
|
||||
.sType = vk.VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
|
||||
.pNext = null,
|
||||
.flags = 0,
|
||||
.vertexBindingDescriptionCount = 0,
|
||||
.pVertexBindingDescriptions = null,
|
||||
.vertexAttributeDescriptionCount = 0,
|
||||
.pVertexAttributeDescriptions = null,
|
||||
};
|
||||
|
||||
// ---- input assembly + viewport (dynamic) + raster + ms ------
|
||||
const input_assembly: vk.VkPipelineInputAssemblyStateCreateInfo = .{
|
||||
.sType = vk.VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
|
||||
.pNext = null,
|
||||
.flags = 0,
|
||||
.topology = opts.topology,
|
||||
.primitiveRestartEnable = vk.VK_FALSE,
|
||||
};
|
||||
const viewport_state: vk.VkPipelineViewportStateCreateInfo = .{
|
||||
.sType = vk.VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
|
||||
.pNext = null,
|
||||
.flags = 0,
|
||||
.viewportCount = 1,
|
||||
.pViewports = null,
|
||||
.scissorCount = 1,
|
||||
.pScissors = null,
|
||||
};
|
||||
const rasterization: vk.VkPipelineRasterizationStateCreateInfo = .{
|
||||
.sType = vk.VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
|
||||
.pNext = null,
|
||||
.flags = 0,
|
||||
.depthClampEnable = vk.VK_FALSE,
|
||||
.rasterizerDiscardEnable = vk.VK_FALSE,
|
||||
.polygonMode = vk.VK_POLYGON_MODE_FILL,
|
||||
.cullMode = vk.VK_CULL_MODE_NONE,
|
||||
.frontFace = vk.VK_FRONT_FACE_COUNTER_CLOCKWISE,
|
||||
.depthBiasEnable = vk.VK_FALSE,
|
||||
.depthBiasConstantFactor = 0,
|
||||
.depthBiasClamp = 0,
|
||||
.depthBiasSlopeFactor = 0,
|
||||
.lineWidth = 1.0,
|
||||
};
|
||||
const multisample: vk.VkPipelineMultisampleStateCreateInfo = .{
|
||||
.sType = vk.VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
|
||||
.pNext = null,
|
||||
.flags = 0,
|
||||
.rasterizationSamples = vk.VK_SAMPLE_COUNT_1_BIT,
|
||||
.sampleShadingEnable = vk.VK_FALSE,
|
||||
.minSampleShading = 0,
|
||||
.pSampleMask = null,
|
||||
.alphaToCoverageEnable = vk.VK_FALSE,
|
||||
.alphaToOneEnable = vk.VK_FALSE,
|
||||
};
|
||||
|
||||
// ---- color blend --------------------------------------------
|
||||
// Pre-multiplied alpha source-over: out = src + dst*(1-src.a).
|
||||
// Same as the OpenGL backend's default blend (and what the
|
||||
// shaders are written to produce).
|
||||
const blend_attachment: vk.VkPipelineColorBlendAttachmentState = .{
|
||||
.blendEnable = if (opts.blending_enabled) vk.VK_TRUE else vk.VK_FALSE,
|
||||
.srcColorBlendFactor = vk.VK_BLEND_FACTOR_ONE,
|
||||
.dstColorBlendFactor = vk.VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA,
|
||||
.colorBlendOp = vk.VK_BLEND_OP_ADD,
|
||||
.srcAlphaBlendFactor = vk.VK_BLEND_FACTOR_ONE,
|
||||
.dstAlphaBlendFactor = vk.VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA,
|
||||
.alphaBlendOp = vk.VK_BLEND_OP_ADD,
|
||||
.colorWriteMask = vk.VK_COLOR_COMPONENT_R_BIT |
|
||||
vk.VK_COLOR_COMPONENT_G_BIT |
|
||||
vk.VK_COLOR_COMPONENT_B_BIT |
|
||||
vk.VK_COLOR_COMPONENT_A_BIT,
|
||||
};
|
||||
const blend_state: vk.VkPipelineColorBlendStateCreateInfo = .{
|
||||
.sType = vk.VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
|
||||
.pNext = null,
|
||||
.flags = 0,
|
||||
.logicOpEnable = vk.VK_FALSE,
|
||||
.logicOp = vk.VK_LOGIC_OP_COPY,
|
||||
.attachmentCount = 1,
|
||||
.pAttachments = &blend_attachment,
|
||||
.blendConstants = .{ 0, 0, 0, 0 },
|
||||
};
|
||||
|
||||
// ---- dynamic state -----------------------------------------
|
||||
const dynamic_states = [_]vk.VkDynamicState{
|
||||
vk.VK_DYNAMIC_STATE_VIEWPORT,
|
||||
vk.VK_DYNAMIC_STATE_SCISSOR,
|
||||
};
|
||||
const dynamic_state: vk.VkPipelineDynamicStateCreateInfo = .{
|
||||
.sType = vk.VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
|
||||
.pNext = null,
|
||||
.flags = 0,
|
||||
.dynamicStateCount = @intCast(dynamic_states.len),
|
||||
.pDynamicStates = &dynamic_states,
|
||||
};
|
||||
|
||||
// ---- dynamic rendering info (chained via pNext) ------------
|
||||
var color_format = opts.color_format;
|
||||
const rendering_info: vk.VkPipelineRenderingCreateInfo = .{
|
||||
.sType = vk.VK_STRUCTURE_TYPE_PIPELINE_RENDERING_CREATE_INFO,
|
||||
.pNext = null,
|
||||
.viewMask = 0,
|
||||
.colorAttachmentCount = 1,
|
||||
.pColorAttachmentFormats = &color_format,
|
||||
.depthAttachmentFormat = vk.VK_FORMAT_UNDEFINED,
|
||||
.stencilAttachmentFormat = vk.VK_FORMAT_UNDEFINED,
|
||||
};
|
||||
|
||||
// ---- assemble + create -------------------------------------
|
||||
const pipeline_info: vk.VkGraphicsPipelineCreateInfo = .{
|
||||
.sType = vk.VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
|
||||
.pNext = &rendering_info,
|
||||
.flags = 0,
|
||||
.stageCount = stages.len,
|
||||
.pStages = &stages,
|
||||
.pVertexInputState = &vertex_input,
|
||||
.pInputAssemblyState = &input_assembly,
|
||||
.pTessellationState = null,
|
||||
.pViewportState = &viewport_state,
|
||||
.pRasterizationState = &rasterization,
|
||||
.pMultisampleState = &multisample,
|
||||
.pDepthStencilState = null,
|
||||
.pColorBlendState = &blend_state,
|
||||
.pDynamicState = &dynamic_state,
|
||||
.layout = layout,
|
||||
// renderPass / subpass intentionally null — dynamic rendering.
|
||||
.renderPass = null,
|
||||
.subpass = 0,
|
||||
.basePipelineHandle = null,
|
||||
.basePipelineIndex = -1,
|
||||
};
|
||||
var pipeline: vk.VkPipeline = undefined;
|
||||
{
|
||||
const r = dev.dispatch.createGraphicsPipelines(
|
||||
dev.device,
|
||||
null, // pipeline cache
|
||||
1,
|
||||
&pipeline_info,
|
||||
null,
|
||||
&pipeline,
|
||||
);
|
||||
if (r != vk.VK_SUCCESS) {
|
||||
log.err("vkCreateGraphicsPipelines failed: result={}", .{r});
|
||||
return error.VulkanFailed;
|
||||
}
|
||||
}
|
||||
errdefer dev.dispatch.destroyPipeline(dev.device, pipeline, null);
|
||||
|
||||
// Allocate one descriptor set per non-null entry in
|
||||
// `opts.descriptor_set_layouts`. Null entries are placeholders
|
||||
// (the shader's set=i isn't actually used) — nothing to allocate.
|
||||
// Also remember the layouts on `Self` so `RenderPass.step` can
|
||||
// allocate fresh per-call sets from a per-frame pool without
|
||||
// re-creating layouts.
|
||||
var dsets: [MAX_DESCRIPTOR_SETS]vk.VkDescriptorSet = .{ null, null, null };
|
||||
var dsls: [MAX_DESCRIPTOR_SETS]vk.VkDescriptorSetLayout = .{ null, null, null };
|
||||
if (opts.descriptor_pool) |pool_ptr| {
|
||||
for (opts.descriptor_set_layouts, 0..) |maybe_dsl, i| {
|
||||
if (maybe_dsl) |dsl| {
|
||||
dsls[i] = dsl;
|
||||
dsets[i] = pool_ptr.allocate(dsl) catch |err| {
|
||||
log.err(
|
||||
"Pipeline.init: descriptor set {} allocation failed: {}",
|
||||
.{ i, err },
|
||||
);
|
||||
return error.VulkanFailed;
|
||||
};
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for (opts.descriptor_set_layouts, 0..) |maybe_dsl, i| {
|
||||
if (maybe_dsl) |dsl| dsls[i] = dsl;
|
||||
}
|
||||
}
|
||||
|
||||
return .{
|
||||
.device = dev,
|
||||
.pipeline = pipeline,
|
||||
.layout = layout,
|
||||
.descriptor_sets = dsets,
|
||||
.descriptor_set_layouts = dsls,
|
||||
.set_count = @intCast(opts.descriptor_set_layouts.len),
|
||||
.sampler = opts.sampler,
|
||||
.vertex_stride = if (opts.vertex_input) |vi| vi.stride else 0,
|
||||
};
|
||||
}
|
||||
|
||||
pub fn deinit(self: *const Self) void {
|
||||
const dev = self.device;
|
||||
dev.dispatch.destroyPipeline(dev.device, self.pipeline, null);
|
||||
dev.dispatch.destroyPipelineLayout(dev.device, self.layout, null);
|
||||
}
|
||||
|
||||
test {
|
||||
std.testing.refAllDecls(@This());
|
||||
}
|
||||
|
|
@ -0,0 +1,49 @@
|
|||
# Vulkan renderer backend
|
||||
|
||||
This directory holds the **renderer-policy** Vulkan files for libghostty.
|
||||
Pure Vulkan-API wrappers (Device dispatch table, Sampler, CommandPool,
|
||||
DescriptorPool) live in `pkg/vulkan/`, mirroring how `pkg/opengl/`
|
||||
relates to `src/renderer/opengl/`.
|
||||
|
||||
## File layout
|
||||
|
||||
Renderer policy (this directory):
|
||||
|
||||
| File | OpenGL counterpart | Notes |
|
||||
| ------------------- | ------------------------- | ------------------------------------------------------------------ |
|
||||
| `Target.zig` | `opengl/Target.zig` | Render image + dmabuf export (direct or legacy_copy mode). |
|
||||
| `Texture.zig` | `opengl/Texture.zig` | `VkImage` + `VkImageView` + upload helpers for the glyph atlas. |
|
||||
| `buffer.zig` | `opengl/buffer.zig` | `Buffer(T)` host-coherent. |
|
||||
| `buffer_pool.zig` | (none — GL implicit) | Cross-frame `VkBuffer` recycle pool, per-thread pending list. |
|
||||
| `ThreadState.zig` | (none — GL implicit) | Per-renderer-thread frame fence / CB / descriptor pool / last-tgt. |
|
||||
| `Pipeline.zig` | `opengl/Pipeline.zig` | Graphics pipeline + descriptor set layout creation. |
|
||||
| `RenderPass.zig` | `opengl/RenderPass.zig` | Dynamic-rendering pass + step recorder. |
|
||||
| `Frame.zig` | `opengl/Frame.zig` | Per-draw command buffer + fence-paced submit-then-wait. |
|
||||
| `shaders.zig` | `opengl/shaders.zig` | GLSL → SPIR-V via glslang + the OpenGL-GLSL → Vulkan-GLSL rewrite. |
|
||||
|
||||
Pure Vulkan-API wrappers (in `pkg/vulkan/`):
|
||||
|
||||
| File | OpenGL counterpart | Notes |
|
||||
| --------------------- | ------------------------ | ------------------------------------------------------------------ |
|
||||
| `Device.zig` | (no analogue — GL ctx) | Host-provided VkInstance/Device/Queue + function dispatch table. |
|
||||
| `Sampler.zig` | `pkg/opengl/Sampler.zig` | `VkSampler` (linear for atlases, nearest for cells). |
|
||||
| `CommandPool.zig` | (none) | `VkCommandPool` + one-shot record/submit helper. |
|
||||
| `DescriptorPool.zig` | (none) | Per-frame `VkDescriptorPool`. |
|
||||
|
||||
The renderer's top-level lives one directory up at `../Vulkan.zig`
|
||||
and is the single module imported by `src/renderer.zig` when
|
||||
`build_config.renderer == .vulkan`. It re-exports the `pkg/vulkan/`
|
||||
types as `Vulkan.Device`, `Vulkan.Sampler`, etc., so call sites use a
|
||||
single `Vulkan.*` namespace regardless of where each type physically
|
||||
lives.
|
||||
|
||||
## Why dmabuf, not Vulkan swapchains?
|
||||
|
||||
The Qt frontend wants to keep `GhosttySurface` as a `QWidget` so that
|
||||
splits (`QSplitter`), tabs (`QTabWidget`), and translucent composition
|
||||
keep working. That rules out `QVulkanWindow`. Instead libghostty
|
||||
exports the rendered `VkImage` memory as a dmabuf fd
|
||||
(`VK_KHR_external_memory_fd` + `VK_EXT_image_drm_format_modifier`); the
|
||||
Qt side imports it via `zwp_linux_dmabuf_v1` and attaches it to a
|
||||
`wl_subsurface` parented to the top-level `wl_surface`. The compositor
|
||||
scans the buffer out directly — no readback, no QImage round trip.
|
||||
|
|
@ -0,0 +1,673 @@
|
|||
//! Per-pass recording helper for `vkCmdBeginRendering` /
|
||||
//! `vkCmdEndRendering` (Vulkan 1.3 dynamic rendering — no
|
||||
//! `VkRenderPass` object needed) plus the per-`step` resource
|
||||
//! binding + draw-call emission.
|
||||
//!
|
||||
//! `begin` transitions the attachment from its current layout to
|
||||
//! `COLOR_ATTACHMENT_OPTIMAL` and opens a rendering scope with the
|
||||
//! caller's clear color. `step` updates the pipeline's descriptor
|
||||
//! sets from the Step's resources and records a draw call;
|
||||
//! `complete` closes the rendering scope and transitions the
|
||||
//! attachment to its consumer-facing layout (SHADER_READ_ONLY for
|
||||
//! intermediate textures, GENERAL for the dmabuf-backed target).
|
||||
//!
|
||||
//! Counterpart: `src/renderer/opengl/RenderPass.zig`.
|
||||
|
||||
const Self = @This();
|
||||
|
||||
const std = @import("std");
|
||||
const vulkan = @import("vulkan");
|
||||
const vk = vulkan.c;
|
||||
|
||||
const Device = vulkan.Device;
|
||||
const DescriptorPool = vulkan.DescriptorPool;
|
||||
const Sampler = vulkan.Sampler;
|
||||
const Pipeline = @import("Pipeline.zig");
|
||||
const Target = @import("Target.zig");
|
||||
const Texture = @import("Texture.zig");
|
||||
const bufferpkg = @import("buffer.zig");
|
||||
|
||||
const log = std.log.scoped(.vulkan);
|
||||
|
||||
/// Primitive topology. Variant names match `pkg/opengl/primitives.zig`'s
|
||||
/// `gl.Primitive` so the renderer's call sites in `generic.zig` (e.g.
|
||||
/// `.draw = .{ .type = .triangle, ... }`) work against either backend
|
||||
/// without per-backend branching. Mapped to `VkPrimitiveTopology` at
|
||||
/// command-recording time.
|
||||
pub const Primitive = enum {
|
||||
point,
|
||||
line,
|
||||
line_strip,
|
||||
triangle,
|
||||
triangle_strip,
|
||||
|
||||
pub fn toVk(self: Primitive) vk.VkPrimitiveTopology {
|
||||
return switch (self) {
|
||||
.point => vk.VK_PRIMITIVE_TOPOLOGY_POINT_LIST,
|
||||
.line => vk.VK_PRIMITIVE_TOPOLOGY_LINE_LIST,
|
||||
.line_strip => vk.VK_PRIMITIVE_TOPOLOGY_LINE_STRIP,
|
||||
.triangle => vk.VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST,
|
||||
.triangle_strip => vk.VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,
|
||||
};
|
||||
}
|
||||
};
|
||||
|
||||
pub const Options = struct {
|
||||
/// Device + dispatch table for recording commands.
|
||||
device: *const Device,
|
||||
/// Caller-recorded command buffer to emit commands into. Provided
|
||||
/// by the enclosing `Frame`.
|
||||
cb: vk.VkCommandBuffer,
|
||||
|
||||
/// Per-frame descriptor pool. Used by `step` to allocate fresh
|
||||
/// descriptor sets on the SECOND and later step() calls that
|
||||
/// bind the same pipeline within this pass — without it,
|
||||
/// mutating the pipeline's static `descriptor_sets[i]` for the
|
||||
/// second call would overwrite the first call's bindings before
|
||||
/// the GPU has read them (vkCmdDraw reads at submit time).
|
||||
/// Optional: passes that never re-use a pipeline (bg_color,
|
||||
/// cell_bg, cell_text) work without it.
|
||||
step_pool: ?*DescriptorPool = null,
|
||||
|
||||
/// Color attachments for the pass. With dynamic rendering each
|
||||
/// attachment is a render target + optional clear color.
|
||||
attachments: []const Attachment,
|
||||
|
||||
pub const Attachment = struct {
|
||||
// Held by value to match the OpenGL backend's Attachment
|
||||
// shape (so `generic.zig`'s call sites remain identical).
|
||||
// Vulkan's `Texture` and `Target` carry a `layout` field
|
||||
// that mutates across passes — `RenderPass.begin` reads it
|
||||
// to emit the right source-layout barrier, and
|
||||
// `RenderPass.complete` updates the value-copy here. Because
|
||||
// the value is a copy, that update doesn't propagate back
|
||||
// to the caller; the call sites in `generic.zig` are
|
||||
// intentionally fine with that — they always pass the
|
||||
// CURRENT `frame.target` / `state.{front,back}_texture`
|
||||
// (whose `layout` was last updated by the previous pass's
|
||||
// `recordPresentBarrier` / pipeline-end barrier in
|
||||
// `Target.recordPresentBarrier` / `Texture.replaceRegion`)
|
||||
// when constructing a new pass.
|
||||
target: union(enum) {
|
||||
texture: Texture,
|
||||
target: Target,
|
||||
},
|
||||
clear_color: ?[4]f32 = null,
|
||||
};
|
||||
};
|
||||
|
||||
/// Describes one rendering step within the pass: which pipeline to
|
||||
/// bind, which resources (uniforms / vertex buffers / textures /
|
||||
/// samplers) to bind, and the draw call to issue.
|
||||
pub const Step = struct {
|
||||
pipeline: Pipeline,
|
||||
uniforms: ?vk.VkBuffer = null,
|
||||
buffers: []const ?vk.VkBuffer = &.{},
|
||||
textures: []const ?Texture = &.{},
|
||||
samplers: []const ?Sampler = &.{},
|
||||
draw: Draw,
|
||||
|
||||
pub const Draw = struct {
|
||||
type: Primitive,
|
||||
vertex_count: usize,
|
||||
instance_count: usize = 1,
|
||||
};
|
||||
};
|
||||
|
||||
pub const Error = error{
|
||||
/// Reserved for command-recording failures. Currently unused —
|
||||
/// the recorder relies on Vulkan's silent-failure model
|
||||
/// (record bad input → validation flags it / next submit
|
||||
/// returns DEVICE_LOST), but the slot stays open in case a
|
||||
/// future step wants to fail-fast at record time.
|
||||
VulkanFailed,
|
||||
};
|
||||
|
||||
attachments: []const Options.Attachment,
|
||||
cb: vk.VkCommandBuffer,
|
||||
device: *const Device,
|
||||
step_pool: ?*DescriptorPool = null,
|
||||
step_number: usize = 0,
|
||||
|
||||
/// VkPipeline handles already used by an earlier `step` in this
|
||||
/// pass. On second-and-later use of the same pipeline we allocate
|
||||
/// a fresh per-call descriptor set from `step_pool` instead of
|
||||
/// mutating `pipeline.descriptor_sets[i]` (vkCmdDraw reads at
|
||||
/// submit time, so re-updating the same set in place would
|
||||
/// overwrite the prior call's bindings before the GPU has read
|
||||
/// them). Capacity covers our worst case: per-pass image draws
|
||||
/// can fire dozens of pipeline reuses. The slice is empty when no
|
||||
/// step_pool was provided.
|
||||
seen_pipelines: [MAX_SEEN_PIPELINES]vk.VkPipeline = .{null} ** MAX_SEEN_PIPELINES,
|
||||
seen_pipelines_len: usize = 0,
|
||||
|
||||
/// Last `Step.uniforms` value seen in this pass. The OpenGL backend
|
||||
/// keeps the bound UBO across draw calls implicitly (GL state
|
||||
/// persists), and the renderer's image/overlay draw calls in
|
||||
/// `image.zig` don't pass `uniforms` at all — they expect the
|
||||
/// previously-bound UBO to still be live. Vulkan needs explicit
|
||||
/// descriptor-set updates per pipeline, so we cache the last UBO
|
||||
/// buffer here and reuse it when a step doesn't supply one. Reset
|
||||
/// to null at `begin`.
|
||||
last_uniforms: ?vk.VkBuffer = null,
|
||||
|
||||
/// Cap on the number of distinct pipelines we'll track per pass
|
||||
/// for "first-use vs re-use" detection. The renderer's pass shape
|
||||
/// is: bg_color (1), cell_bg (1), cell_text (1), bg_image (1),
|
||||
/// image (varies). 8 is generous; we degrade gracefully to "always
|
||||
/// allocate fresh" past this cap.
|
||||
const MAX_SEEN_PIPELINES: usize = 8;
|
||||
|
||||
/// Begin a render pass. Transitions the first attachment to
|
||||
/// `COLOR_ATTACHMENT_OPTIMAL` and opens a `vkCmdBeginRendering`
|
||||
/// scope with the caller's clear color (defaults to opaque black).
|
||||
///
|
||||
/// We only act on attachments[0] for now — the renderer's calls
|
||||
/// always pass exactly one attachment per pass, matching the
|
||||
/// OpenGL backend's `RenderPass.Options.attachments` use.
|
||||
pub fn begin(opts: Options) Self {
|
||||
const self: Self = .{
|
||||
.attachments = opts.attachments,
|
||||
.cb = opts.cb,
|
||||
.device = opts.device,
|
||||
.step_pool = opts.step_pool,
|
||||
};
|
||||
|
||||
if (opts.attachments.len == 0) return self;
|
||||
|
||||
const attach = opts.attachments[0];
|
||||
const view: vk.VkImageView, const image: vk.VkImage, const width: u32, const height: u32, const old_layout: vk.VkImageLayout = switch (attach.target) {
|
||||
.texture => |t| .{ t.view, t.image, @intCast(t.width), @intCast(t.height), t.layout },
|
||||
.target => |t| .{ t.view, t.image, t.width, t.height, t.layout },
|
||||
};
|
||||
// Always Y-flip the viewport regardless of attachment kind.
|
||||
//
|
||||
// `cell_text` is projection-driven (vertex shader applies
|
||||
// `projection_matrix` to pixel coords) while `cell_bg` is
|
||||
// fragment-position-driven (derives grid_pos from
|
||||
// `gl_FragCoord.xy / cell_size`). For those two to agree on
|
||||
// where "row 0" lands in the framebuffer, the viewport
|
||||
// orientation must be the same for both — anything else
|
||||
// produces the cell-bg-at-top-while-cell-text-at-bottom
|
||||
// disagreement seen on the custom-shader (back_texture) path.
|
||||
// For the dmabuf `Target` we needed the Y-flip anyway (Qt mmaps
|
||||
// origin-upper-left). For shadertoy sampling: with both the
|
||||
// back_texture and frame.target Y-flipped, an upper-left
|
||||
// `gl_FragCoord` in the post fragment maps to texel y=0 (top
|
||||
// of back_texture = top of original render), which is what
|
||||
// `uv = fragCoord/iResolution` + `texture(iChannel0, uv)`
|
||||
// expects in Vulkan-native convention.
|
||||
|
||||
// Transition to COLOR_ATTACHMENT_OPTIMAL. The attachment's
|
||||
// current layout drives the source-side of the barrier so a
|
||||
// re-used target (e.g. `Target` in `.direct` mode after the
|
||||
// previous frame's `recordDirectBarrier` left it in GENERAL,
|
||||
// or `.legacy_copy` after `recordCopyToDmabuf` left it in
|
||||
// TRANSFER_SRC_OPTIMAL, or a `Texture` after the previous
|
||||
// pass's `complete` left it in SHADER_READ_ONLY_OPTIMAL) is
|
||||
// transitioned correctly. UNDEFINED is the implicit-discard
|
||||
// initial layout for a fresh image; we'd also accept it for
|
||||
// an image whose contents we don't care about, but `loadOp =
|
||||
// CLEAR` covers that case explicitly so we always pass a
|
||||
// truthful old layout to validation.
|
||||
{
|
||||
// Source access depends on what the previous owner of the
|
||||
// layout could have left in flight. For COLOR_ATTACHMENT_*
|
||||
// it's the color-write access; for TRANSFER_SRC the read
|
||||
// already retired but we conservatively name it; for
|
||||
// SHADER_READ_ONLY the prior fragment-stage read; UNDEFINED
|
||||
// and GENERAL want a no-op source mask (GENERAL was last
|
||||
// written by the present-barrier and `recordDirectBarrier`
|
||||
// has already chained that visibility into HOST — the next
|
||||
// frame doesn't need to re-flush it).
|
||||
const src_access: vk.VkAccessFlags = switch (old_layout) {
|
||||
vk.VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL => vk.VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,
|
||||
vk.VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL => vk.VK_ACCESS_TRANSFER_READ_BIT,
|
||||
vk.VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL => vk.VK_ACCESS_SHADER_READ_BIT,
|
||||
else => 0,
|
||||
};
|
||||
const src_stage: vk.VkPipelineStageFlags = switch (old_layout) {
|
||||
vk.VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL => vk.VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
|
||||
vk.VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL => vk.VK_PIPELINE_STAGE_TRANSFER_BIT,
|
||||
vk.VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL => vk.VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT,
|
||||
else => vk.VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
|
||||
};
|
||||
const barrier: vk.VkImageMemoryBarrier = .{
|
||||
.sType = vk.VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
|
||||
.pNext = null,
|
||||
.srcAccessMask = src_access,
|
||||
.dstAccessMask = vk.VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,
|
||||
.oldLayout = old_layout,
|
||||
.newLayout = vk.VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
|
||||
.srcQueueFamilyIndex = vk.VK_QUEUE_FAMILY_IGNORED,
|
||||
.dstQueueFamilyIndex = vk.VK_QUEUE_FAMILY_IGNORED,
|
||||
.image = image,
|
||||
.subresourceRange = .{
|
||||
.aspectMask = vk.VK_IMAGE_ASPECT_COLOR_BIT,
|
||||
.baseMipLevel = 0,
|
||||
.levelCount = 1,
|
||||
.baseArrayLayer = 0,
|
||||
.layerCount = 1,
|
||||
},
|
||||
};
|
||||
opts.device.dispatch.cmdPipelineBarrier(
|
||||
opts.cb,
|
||||
src_stage,
|
||||
vk.VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
|
||||
0,
|
||||
0,
|
||||
null,
|
||||
0,
|
||||
null,
|
||||
1,
|
||||
&barrier,
|
||||
);
|
||||
}
|
||||
|
||||
const clear_value: vk.VkClearValue = if (attach.clear_color) |c| .{
|
||||
.color = .{ .float32 = c },
|
||||
} else .{ .color = .{ .float32 = .{ 0, 0, 0, 1 } } };
|
||||
|
||||
const color_attachment: vk.VkRenderingAttachmentInfo = .{
|
||||
.sType = vk.VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO,
|
||||
.pNext = null,
|
||||
.imageView = view,
|
||||
.imageLayout = vk.VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
|
||||
.resolveMode = vk.VK_RESOLVE_MODE_NONE,
|
||||
.resolveImageView = null,
|
||||
.resolveImageLayout = vk.VK_IMAGE_LAYOUT_UNDEFINED,
|
||||
// Always clear: the renderer redraws every cell each frame,
|
||||
// so prior contents are never useful. CLEAR is also free on
|
||||
// tiled GPUs (avoids a full attachment load).
|
||||
.loadOp = vk.VK_ATTACHMENT_LOAD_OP_CLEAR,
|
||||
.storeOp = vk.VK_ATTACHMENT_STORE_OP_STORE,
|
||||
.clearValue = clear_value,
|
||||
};
|
||||
const info: vk.VkRenderingInfo = .{
|
||||
.sType = vk.VK_STRUCTURE_TYPE_RENDERING_INFO,
|
||||
.pNext = null,
|
||||
.flags = 0,
|
||||
.renderArea = .{
|
||||
.offset = .{ .x = 0, .y = 0 },
|
||||
.extent = .{ .width = width, .height = height },
|
||||
},
|
||||
.layerCount = 1,
|
||||
.viewMask = 0,
|
||||
.colorAttachmentCount = 1,
|
||||
.pColorAttachments = &color_attachment,
|
||||
.pDepthAttachment = null,
|
||||
.pStencilAttachment = null,
|
||||
};
|
||||
opts.device.dispatch.cmdBeginRendering(opts.cb, &info);
|
||||
|
||||
// Dynamic state: viewport + scissor follow the attachment size.
|
||||
//
|
||||
// Negative `height` (Vulkan 1.1 maintenance1 / core) flips the Y
|
||||
// axis at viewport time so the renderer's OpenGL-style projection
|
||||
// matrices (Y-up clip space, `ortho2d` with bottom > top) keep
|
||||
// producing pixels at the expected location on screen. Without
|
||||
// this, everything renders upside-down — text intended for the
|
||||
// top of the window appears at the bottom. `gl_FragCoord` still
|
||||
// reports origin-upper-left, matching `cell_bg.f.glsl`'s
|
||||
// `layout(origin_upper_left)` request.
|
||||
const viewport: vk.VkViewport = .{
|
||||
.x = 0,
|
||||
.y = @floatFromInt(height),
|
||||
.width = @floatFromInt(width),
|
||||
.height = -@as(f32, @floatFromInt(height)),
|
||||
.minDepth = 0,
|
||||
.maxDepth = 1,
|
||||
};
|
||||
opts.device.dispatch.cmdSetViewport(opts.cb, 0, 1, &viewport);
|
||||
const scissor: vk.VkRect2D = .{
|
||||
.offset = .{ .x = 0, .y = 0 },
|
||||
.extent = .{ .width = width, .height = height },
|
||||
};
|
||||
opts.device.dispatch.cmdSetScissor(opts.cb, 0, 1, &scissor);
|
||||
|
||||
return self;
|
||||
}
|
||||
|
||||
/// Record one step of the pass.
|
||||
///
|
||||
/// Updates the pipeline's descriptor sets from the Step's resources
|
||||
/// and emits the draw call. Resource conventions match the OpenGL
|
||||
/// backend (so `generic.zig` call sites work unchanged):
|
||||
///
|
||||
/// - `uniforms` → set 0, binding `pipeline.uniforms_binding`
|
||||
/// (UBO; the Globals block from `common.glsl`)
|
||||
/// - `buffers[0]` → vertex buffer at binding 0 (when the pipeline
|
||||
/// has a non-zero `vertex_stride`; ignored
|
||||
/// otherwise). Matches OpenGL's "0th buffer is
|
||||
/// the VBO" convention.
|
||||
/// - `buffers[i]`, i≥1
|
||||
/// → set 2, binding `i` (storage buffer)
|
||||
/// - `textures[i]` → set 1, binding `i` (combined image sampler).
|
||||
/// The sampler is `samplers[i]` if provided,
|
||||
/// otherwise the pipeline's owned fallback
|
||||
/// `pipeline.sampler` (so the renderer can pass
|
||||
/// plain textures and let the pipeline pick the
|
||||
/// sampler config it needs).
|
||||
///
|
||||
/// Skips when the pipeline hasn't been constructed yet
|
||||
/// (`VkPipeline == null`) — pipelines for shaders we haven't wired
|
||||
/// up are default-null and we filter them out instead of crashing
|
||||
/// on a null handle. A null pipeline reaching here once
|
||||
/// shader bring-up has completed indicates a config / build issue
|
||||
/// (e.g. a custom-shader compile failure that left the post pipeline
|
||||
/// half-init); log so the missing draw is visible instead of a
|
||||
/// silently-blank surface.
|
||||
pub fn step(self: *Self, s: Step) void {
|
||||
if (s.pipeline.pipeline == null) {
|
||||
log.warn("RenderPass.step: skipping draw — pipeline not constructed", .{});
|
||||
return;
|
||||
}
|
||||
if (s.draw.vertex_count == 0) return;
|
||||
|
||||
const dev = self.device;
|
||||
|
||||
// ---- vertex buffer (buffers[0]) ----------------------------
|
||||
if (s.pipeline.vertex_stride > 0 and s.buffers.len > 0) {
|
||||
if (s.buffers[0]) |vbo| {
|
||||
const offsets = [_]vk.VkDeviceSize{0};
|
||||
const bufs = [_]vk.VkBuffer{vbo};
|
||||
dev.dispatch.cmdBindVertexBuffers(
|
||||
self.cb,
|
||||
0, // first binding
|
||||
1, // binding count
|
||||
&bufs,
|
||||
&offsets,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// Pick effective descriptor sets for this step.
|
||||
//
|
||||
// First time we see a given pipeline within this pass, we use
|
||||
// its pre-allocated `descriptor_sets[]` slots and update them
|
||||
// in place — cheap and avoids a per-pass-pool allocation in
|
||||
// the common single-step case (bg_color/cell_bg/cell_text).
|
||||
//
|
||||
// SECOND-and-later use of the same pipeline within the same
|
||||
// pass requires fresh sets: vkCmdDraw reads the descriptor
|
||||
// contents at SUBMIT time, so re-updating the static sets in
|
||||
// place would silently make every prior draw bound to this
|
||||
// pipeline read the LAST update's UBO/sampler/storage. The
|
||||
// image / kitty path issues N draws on the same `image`
|
||||
// pipeline with per-call vertex buffers and textures — without
|
||||
// this fix every kitty image rendered with the FINAL image's
|
||||
// texture and the final draw's vertex buffer.
|
||||
//
|
||||
// The fresh sets come from `step_pool`, owned by the enclosing
|
||||
// Frame and reset at frame start. When `step_pool` is null
|
||||
// (test harnesses, smoke tests) we fall back to the static
|
||||
// sets and accept the limitation.
|
||||
var effective_sets: [Pipeline.MAX_DESCRIPTOR_SETS]vk.VkDescriptorSet =
|
||||
s.pipeline.descriptor_sets;
|
||||
const reused = self.markPipelineUsed(s.pipeline.pipeline);
|
||||
if (reused) {
|
||||
// No step_pool means the renderer thread has no per-frame
|
||||
// descriptor pool wired up (test harness, smoke test). We
|
||||
// can't safely re-use this pipeline — updating the static
|
||||
// set in place would corrupt the prior draw's bindings.
|
||||
// Drop the draw rather than corrupt the frame.
|
||||
const pool = self.step_pool orelse {
|
||||
log.err(
|
||||
"RenderPass.step: pipeline re-used but no step_pool " ++
|
||||
"available; dropping draw to avoid corrupting prior draws",
|
||||
.{},
|
||||
);
|
||||
return;
|
||||
};
|
||||
for (s.pipeline.descriptor_set_layouts, 0..) |maybe_dsl, i| {
|
||||
if (i >= s.pipeline.set_count) break;
|
||||
const dsl = maybe_dsl orelse continue;
|
||||
if (pool.allocate(dsl)) |fresh| {
|
||||
effective_sets[i] = fresh;
|
||||
} else |err| {
|
||||
// Pool exhausted. The previous behavior was to
|
||||
// fall back to the pipeline's static set, but that
|
||||
// re-introduces the exact corruption the step_pool
|
||||
// mechanism exists to prevent. Drop the draw; the
|
||||
// user sees one missed image rather than every
|
||||
// image rendered with the last image's bindings.
|
||||
log.err(
|
||||
"RenderPass.step: per-call descriptor set " ++
|
||||
"allocation for set {} failed ({}); dropping draw " ++
|
||||
"(step_pool exhausted — increase STEP_POOL_MAX_SETS)",
|
||||
.{ i, err },
|
||||
);
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ---- update descriptor sets ---------------------------------
|
||||
//
|
||||
// We do one vkUpdateDescriptorSets call per descriptor write to
|
||||
// keep the code straightforward; the total writes per frame are
|
||||
// tiny (1 UBO + a handful of storage buffers + a handful of
|
||||
// samplers) so batching wouldn't move the needle.
|
||||
|
||||
// UBO (set 0). The OpenGL backend's image/overlay draws don't
|
||||
// pass `uniforms` — they expect the previously-bound UBO to
|
||||
// persist. Fall back to `last_uniforms` when the Step doesn't
|
||||
// supply one. Track the new one for later steps.
|
||||
const ubo: ?vk.VkBuffer = s.uniforms orelse self.last_uniforms;
|
||||
if (s.uniforms) |b| self.last_uniforms = b;
|
||||
if (effective_sets[0] != null) if (ubo) |ubo_buffer| {
|
||||
const buffer_info: vk.VkDescriptorBufferInfo = .{
|
||||
.buffer = ubo_buffer,
|
||||
.offset = 0,
|
||||
.range = vk.VK_WHOLE_SIZE,
|
||||
};
|
||||
const write: vk.VkWriteDescriptorSet = .{
|
||||
.sType = vk.VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
|
||||
.pNext = null,
|
||||
.dstSet = effective_sets[0],
|
||||
.dstBinding = s.pipeline.uniforms_binding,
|
||||
.dstArrayElement = 0,
|
||||
.descriptorCount = 1,
|
||||
.descriptorType = vk.VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
|
||||
.pImageInfo = null,
|
||||
.pBufferInfo = &buffer_info,
|
||||
.pTexelBufferView = null,
|
||||
};
|
||||
dev.dispatch.updateDescriptorSets(dev.device, 1, &write, 0, null);
|
||||
};
|
||||
|
||||
// Samplers (set 1)
|
||||
if (effective_sets[1] != null) {
|
||||
const slot_count = @max(s.textures.len, s.samplers.len);
|
||||
for (0..slot_count) |slot| {
|
||||
const tex_opt: ?Texture = if (slot < s.textures.len) s.textures[slot] else null;
|
||||
const tex = tex_opt orelse continue;
|
||||
const samp_opt: ?Sampler = if (slot < s.samplers.len) s.samplers[slot] else null;
|
||||
const sampler_handle: vk.VkSampler = if (samp_opt) |samp|
|
||||
samp.sampler
|
||||
else if (s.pipeline.sampler != null)
|
||||
s.pipeline.sampler
|
||||
else
|
||||
continue;
|
||||
|
||||
const image_info: vk.VkDescriptorImageInfo = .{
|
||||
.sampler = sampler_handle,
|
||||
.imageView = tex.view,
|
||||
.imageLayout = vk.VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
|
||||
};
|
||||
const write: vk.VkWriteDescriptorSet = .{
|
||||
.sType = vk.VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
|
||||
.pNext = null,
|
||||
.dstSet = effective_sets[1],
|
||||
.dstBinding = @intCast(slot),
|
||||
.dstArrayElement = 0,
|
||||
.descriptorCount = 1,
|
||||
.descriptorType = vk.VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
|
||||
.pImageInfo = &image_info,
|
||||
.pBufferInfo = null,
|
||||
.pTexelBufferView = null,
|
||||
};
|
||||
dev.dispatch.updateDescriptorSets(dev.device, 1, &write, 0, null);
|
||||
}
|
||||
}
|
||||
|
||||
// Storage buffers (set 2). `buffers[0]` is reserved for the
|
||||
// vertex buffer (handled above), so storage starts at slot 1.
|
||||
if (effective_sets[2] != null and s.buffers.len > 1) {
|
||||
for (s.buffers[1..], 1..) |maybe_buf, slot| {
|
||||
const buf = maybe_buf orelse continue;
|
||||
const buffer_info: vk.VkDescriptorBufferInfo = .{
|
||||
.buffer = buf,
|
||||
.offset = 0,
|
||||
.range = vk.VK_WHOLE_SIZE,
|
||||
};
|
||||
const write: vk.VkWriteDescriptorSet = .{
|
||||
.sType = vk.VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
|
||||
.pNext = null,
|
||||
.dstSet = effective_sets[2],
|
||||
.dstBinding = @intCast(slot),
|
||||
.dstArrayElement = 0,
|
||||
.descriptorCount = 1,
|
||||
.descriptorType = vk.VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
|
||||
.pImageInfo = null,
|
||||
.pBufferInfo = &buffer_info,
|
||||
.pTexelBufferView = null,
|
||||
};
|
||||
dev.dispatch.updateDescriptorSets(dev.device, 1, &write, 0, null);
|
||||
}
|
||||
}
|
||||
|
||||
// ---- bind descriptor sets -----------------------------------
|
||||
//
|
||||
// `cmdBindDescriptorSets` only accepts contiguous, non-null
|
||||
// handles starting at `firstSet`. To handle the cell_bg case
|
||||
// (sets 0 and 2, no set 1), we make one call per maximal
|
||||
// contiguous run of non-null sets.
|
||||
var start: usize = 0;
|
||||
while (start < s.pipeline.set_count) {
|
||||
if (effective_sets[start] == null) {
|
||||
start += 1;
|
||||
continue;
|
||||
}
|
||||
var end = start + 1;
|
||||
while (end < s.pipeline.set_count and effective_sets[end] != null) : (end += 1) {}
|
||||
dev.dispatch.cmdBindDescriptorSets(
|
||||
self.cb,
|
||||
vk.VK_PIPELINE_BIND_POINT_GRAPHICS,
|
||||
s.pipeline.layout,
|
||||
@intCast(start),
|
||||
@intCast(end - start),
|
||||
&effective_sets[start],
|
||||
0,
|
||||
null,
|
||||
);
|
||||
start = end;
|
||||
}
|
||||
|
||||
dev.dispatch.cmdBindPipeline(
|
||||
self.cb,
|
||||
vk.VK_PIPELINE_BIND_POINT_GRAPHICS,
|
||||
s.pipeline.pipeline,
|
||||
);
|
||||
dev.dispatch.cmdDraw(
|
||||
self.cb,
|
||||
@intCast(s.draw.vertex_count),
|
||||
@intCast(s.draw.instance_count),
|
||||
0,
|
||||
0,
|
||||
);
|
||||
self.step_number += 1;
|
||||
}
|
||||
|
||||
/// Mark `pipeline` as used in this pass and report whether it was
|
||||
/// already seen. Returns `false` on the FIRST call (so `step` can
|
||||
/// safely update the pipeline's static descriptor sets in place);
|
||||
/// `true` on every subsequent call (so `step` allocates fresh sets
|
||||
/// from `step_pool` to avoid clobbering the prior call's bindings).
|
||||
///
|
||||
/// Beyond `MAX_SEEN_PIPELINES` we conservatively report `true` so
|
||||
/// callers always allocate fresh — the alternative (silently
|
||||
/// reverting to in-place updates) is the bug this whole mechanism
|
||||
/// exists to prevent.
|
||||
fn markPipelineUsed(self: *Self, pipeline: vk.VkPipeline) bool {
|
||||
for (self.seen_pipelines[0..self.seen_pipelines_len]) |seen| {
|
||||
if (seen == pipeline) return true;
|
||||
}
|
||||
if (self.seen_pipelines_len >= MAX_SEEN_PIPELINES) return true;
|
||||
self.seen_pipelines[self.seen_pipelines_len] = pipeline;
|
||||
self.seen_pipelines_len += 1;
|
||||
return false;
|
||||
}
|
||||
|
||||
/// Close the rendering scope and leave the attachment in a layout
|
||||
/// the host can read back via the dmabuf export. `GENERAL` is the
|
||||
/// safest choice for unknown consumer access patterns; the host
|
||||
/// (Qt RHI) can transition again if it wants something more
|
||||
/// specific.
|
||||
pub fn complete(self: *const Self) void {
|
||||
if (self.attachments.len == 0) return;
|
||||
|
||||
self.device.dispatch.cmdEndRendering(self.cb);
|
||||
|
||||
// Final layout depends on what consumes the attachment next.
|
||||
// A `.texture` attachment is the custom-shader back_texture, read
|
||||
// by the post pass's sampler — transition to SHADER_READ_ONLY so
|
||||
// the descriptor write's declared layout matches reality
|
||||
// (otherwise validation flags VUID-vkCmdDraw-imageLayout-00344
|
||||
// and some drivers can mishandle sampling from an out-of-spec
|
||||
// layout). A `.target` attachment is the dmabuf-backed
|
||||
// `frame.target`; the next op is
|
||||
// `Target.recordPresentBarrier` which expects GENERAL on entry
|
||||
// (it either stays in GENERAL in `.direct` mode or transitions to
|
||||
// TRANSFER_SRC_OPTIMAL in `.legacy_copy`), so leave it in GENERAL here.
|
||||
const image: vk.VkImage, const new_layout: vk.VkImageLayout, const dst_stage: vk.VkPipelineStageFlags, const dst_access: vk.VkAccessFlags =
|
||||
switch (self.attachments[0].target) {
|
||||
.texture => |t| .{
|
||||
t.image,
|
||||
vk.VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
|
||||
vk.VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT,
|
||||
vk.VK_ACCESS_SHADER_READ_BIT,
|
||||
},
|
||||
.target => |t| .{
|
||||
t.image,
|
||||
vk.VK_IMAGE_LAYOUT_GENERAL,
|
||||
vk.VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
|
||||
0,
|
||||
},
|
||||
};
|
||||
|
||||
const barrier: vk.VkImageMemoryBarrier = .{
|
||||
.sType = vk.VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
|
||||
.pNext = null,
|
||||
.srcAccessMask = vk.VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,
|
||||
.dstAccessMask = dst_access,
|
||||
.oldLayout = vk.VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
|
||||
.newLayout = new_layout,
|
||||
.srcQueueFamilyIndex = vk.VK_QUEUE_FAMILY_IGNORED,
|
||||
.dstQueueFamilyIndex = vk.VK_QUEUE_FAMILY_IGNORED,
|
||||
.image = image,
|
||||
.subresourceRange = .{
|
||||
.aspectMask = vk.VK_IMAGE_ASPECT_COLOR_BIT,
|
||||
.baseMipLevel = 0,
|
||||
.levelCount = 1,
|
||||
.baseArrayLayer = 0,
|
||||
.layerCount = 1,
|
||||
},
|
||||
};
|
||||
self.device.dispatch.cmdPipelineBarrier(
|
||||
self.cb,
|
||||
vk.VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
|
||||
dst_stage,
|
||||
0,
|
||||
0,
|
||||
null,
|
||||
0,
|
||||
null,
|
||||
1,
|
||||
&barrier,
|
||||
);
|
||||
}
|
||||
|
||||
test {
|
||||
std.testing.refAllDecls(@This());
|
||||
}
|
||||
|
|
@ -0,0 +1,914 @@
|
|||
//! Render target: a `VkImage` whose memory is exported as a dmabuf
|
||||
//! fd so the host (Qt) can present it via
|
||||
//! `ghostty_platform_vulkan_s.present` without a CPU readback round
|
||||
//! trip through libghostty.
|
||||
//!
|
||||
//! Two construction modes, picked at `init` time after probing
|
||||
//! `VK_EXT_image_drm_format_modifier`:
|
||||
//!
|
||||
//! - `.direct` — the render image itself is allocated with
|
||||
//! `VkImageDrmFormatModifierExplicitCreateInfoEXT`
|
||||
//! (`DRM_FORMAT_MOD_LINEAR`, single plane). Its `VkDeviceMemory`
|
||||
//! is what we `vkGetMemoryFdKHR` and hand to the host. No second
|
||||
//! allocation, no end-of-frame copy. Used when the driver
|
||||
//! advertises `COLOR_ATTACHMENT_BIT | TRANSFER_SRC_BIT |
|
||||
//! SAMPLED_BIT` for the LINEAR modifier in
|
||||
//! `VkDrmFormatModifierPropertiesEXT.drmFormatModifierTilingFeatures`.
|
||||
//!
|
||||
//! - `.legacy_copy` — fallback for drivers (notably NVIDIA at time
|
||||
//! of writing) that don't expose `COLOR_ATTACHMENT_BIT` for
|
||||
//! LINEAR via either the legacy `vkGetPhysicalDeviceFormatProperties`
|
||||
//! query or the modifier-extension query. Allocates an OPTIMAL-
|
||||
//! tiled render image plus a separate dmabuf-exported LINEAR
|
||||
//! `VkBuffer`, and inserts a `vkCmdCopyImageToBuffer` at the end
|
||||
//! of each frame. Behavior identical to the pre-modifier-path
|
||||
//! code.
|
||||
//!
|
||||
//! Why two modes? NVIDIA's `linearTilingFeatures` for BGRA8 doesn't
|
||||
//! include `COLOR_ATTACHMENT_BIT`, so a LINEAR `VkImage` silently
|
||||
//! rasterizes nothing (confirmed via
|
||||
//! `vkGetPhysicalDeviceFormatProperties`: linearTilingFeatures=0x1dc03
|
||||
//! for `B8G8R8A8_UNORM`). The modifier-extension query is a separate
|
||||
//! channel and *may* expose different feature bits per modifier — so
|
||||
//! we always probe. Where the probe says yes, we drop the redundant
|
||||
//! buffer + copy; where it says no, we keep working.
|
||||
//!
|
||||
//! Ownership: libghostty owns the image, any buffer, all memory, and
|
||||
//! the dmabuf fd for the lifetime of the `Target`. The fd is passed
|
||||
//! to the host via `present` as a borrow; the host must `dup()` if
|
||||
//! it needs to hold it past the call. `deinit` closes the fd and
|
||||
//! frees all the memory.
|
||||
//!
|
||||
//! Counterpart: `src/renderer/opengl/Target.zig`.
|
||||
|
||||
const Self = @This();
|
||||
|
||||
const std = @import("std");
|
||||
const vk = @import("vulkan").c;
|
||||
|
||||
const apprt = @import("../../apprt.zig");
|
||||
const Device = @import("vulkan").Device;
|
||||
|
||||
const log = std.log.scoped(.vulkan);
|
||||
|
||||
/// DRM modifier sentinel for "linear, no tiling". Matches
|
||||
/// `DRM_FORMAT_MOD_LINEAR` from `<drm/drm_fourcc.h>`. Hardcoded so we
|
||||
/// don't pull in libdrm headers just for a single constant.
|
||||
pub const DRM_FORMAT_MOD_LINEAR: u64 = 0;
|
||||
|
||||
/// Upper bound for the number of DRM format modifiers we ever expect
|
||||
/// a driver to expose for a single format. Real-world drivers expose
|
||||
/// well under 20 (mostly LINEAR + a handful of vendor tiled variants);
|
||||
/// 64 gives us comfortable headroom with a ~1.5 KiB stack buffer and
|
||||
/// avoids allocator threading through the per-surface init path.
|
||||
const MAX_MODIFIERS: usize = 64;
|
||||
|
||||
/// Which dmabuf-export strategy this `Target` settled on. See the
|
||||
/// module-level doc comment for the full rationale.
|
||||
pub const Tiling = enum {
|
||||
/// Render image's own memory is exported as the dmabuf. Single
|
||||
/// plane, `DRM_FORMAT_MOD_LINEAR`. No separate buffer, no copy.
|
||||
direct,
|
||||
|
||||
/// OPTIMAL render image + separate LINEAR `VkBuffer` dmabuf
|
||||
/// target. End-of-frame `vkCmdCopyImageToBuffer`. Used when
|
||||
/// neither tiling channel exposes `COLOR_ATTACHMENT_BIT` for
|
||||
/// LINEAR.
|
||||
legacy_copy,
|
||||
};
|
||||
|
||||
pub const Options = struct {
|
||||
device: *const Device,
|
||||
format: vk.VkFormat,
|
||||
width: u32,
|
||||
height: u32,
|
||||
/// Extra `VkImageUsageFlagBits` for the render image, beyond the
|
||||
/// defaults (`COLOR_ATTACHMENT_BIT | SAMPLED_BIT |
|
||||
/// TRANSFER_SRC_BIT`). Rarely needed.
|
||||
extra_usage: vk.VkImageUsageFlags = 0,
|
||||
|
||||
/// Per-surface platform callbacks. The host's process-wide
|
||||
/// VkDevice is shared across splits/tabs, but each surface gets
|
||||
/// its own platform copy with the right `userdata`, so
|
||||
/// `present()` reaches the right window — and `pickModifier`
|
||||
/// asks the right host (compositor and host can in principle
|
||||
/// differ across surfaces, e.g. mixed-DPI multi-screen).
|
||||
platform: apprt.embedded.Platform.Vulkan,
|
||||
};
|
||||
|
||||
pub const Error = error{
|
||||
VulkanFailed,
|
||||
NoSuitableMemoryType,
|
||||
UnsupportedFormat,
|
||||
};
|
||||
|
||||
device: *const Device,
|
||||
|
||||
/// Per-surface platform — see `Options.platform`.
|
||||
platform: apprt.embedded.Platform.Vulkan,
|
||||
|
||||
/// Which present strategy this target uses. Decides whether
|
||||
/// `recordPresentBarrier` emits a copy.
|
||||
tiling: Tiling,
|
||||
|
||||
// ---- render image ---------------------------------------------------
|
||||
// In `.direct` mode this image's memory is the dmabuf; in
|
||||
// `.legacy_copy` mode it's internal OPTIMAL memory we copy out of.
|
||||
image: vk.VkImage,
|
||||
image_memory: vk.VkDeviceMemory,
|
||||
view: vk.VkImageView,
|
||||
|
||||
// ---- dmabuf buffer (legacy mode only) -------------------------------
|
||||
// `null` in `.direct` mode — the image's memory is the dmabuf.
|
||||
dmabuf_buffer: ?vk.VkBuffer,
|
||||
dmabuf_memory: ?vk.VkDeviceMemory,
|
||||
|
||||
format: vk.VkFormat,
|
||||
width: u32,
|
||||
height: u32,
|
||||
|
||||
fd: i32,
|
||||
drm_format: u32,
|
||||
drm_modifier: u64,
|
||||
stride: u32,
|
||||
|
||||
/// Current layout of the render image. Tracked so
|
||||
/// `recordPresentBarrier` knows what oldLayout to use in its barrier.
|
||||
/// The renderer transitions it elsewhere too (RenderPass).
|
||||
layout: vk.VkImageLayout = vk.VK_IMAGE_LAYOUT_UNDEFINED,
|
||||
|
||||
pub fn init(opts: Options) Error!Self {
|
||||
const dev = opts.device;
|
||||
const drm_format = try vkFormatToDrmFourcc(opts.format);
|
||||
|
||||
const required_features: vk.VkFormatFeatureFlags =
|
||||
@as(vk.VkFormatFeatureFlags, vk.VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT) |
|
||||
vk.VK_FORMAT_FEATURE_TRANSFER_SRC_BIT |
|
||||
vk.VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT;
|
||||
|
||||
const picked = try pickModifier(dev, opts.platform, opts.format, drm_format, required_features);
|
||||
if (picked) |m| {
|
||||
const tag: []const u8 = if (m == DRM_FORMAT_MOD_LINEAR)
|
||||
"LINEAR"
|
||||
else
|
||||
"vendor-tiled";
|
||||
log.info(
|
||||
"Target: direct dmabuf export ({s} modifier 0x{x}) {}x{}",
|
||||
.{ tag, m, opts.width, opts.height },
|
||||
);
|
||||
return try initDirect(opts, drm_format, m);
|
||||
}
|
||||
log.warn(
|
||||
"Target: no usable single-plane modifier with COLOR_ATTACHMENT " ++
|
||||
"in compositor ∩ GPU intersection; falling back to " ++
|
||||
"OPTIMAL render + LINEAR-buffer copy",
|
||||
.{},
|
||||
);
|
||||
return try initLegacyCopy(opts, drm_format);
|
||||
}
|
||||
|
||||
/// Intersect the compositor's accepted modifier list (from the host
|
||||
/// callback) with the GPU's supported modifiers for `format` (queried
|
||||
/// via `VK_EXT_image_drm_format_modifier`), filtered by single-plane
|
||||
/// + the required format-feature flags. Prefer the first non-LINEAR
|
||||
/// hit (vendor-tiled — NVIDIA block-linear, AMD DCC variants, Intel
|
||||
/// Y-tiled; these are where the perf win lives on most hardware).
|
||||
/// Fall back to LINEAR if it's in the intersection. Return null when
|
||||
/// no modifier qualifies — the caller drops to `.legacy_copy`.
|
||||
///
|
||||
/// Why both intersections matter:
|
||||
/// - GPU-only: passes on AMD/Intel for LINEAR but NVIDIA never
|
||||
/// exposes COLOR_ATTACHMENT for LINEAR — direct mode would
|
||||
/// create the image OK but rasterize nothing.
|
||||
/// - Compositor-only: GPU may not be able to render into the
|
||||
/// compositor's preferred tilings (drivers don't always expose
|
||||
/// COLOR_ATTACHMENT for every modifier).
|
||||
fn pickModifier(
|
||||
dev: *const Device,
|
||||
platform: apprt.embedded.Platform.Vulkan,
|
||||
format: vk.VkFormat,
|
||||
drm_format: u32,
|
||||
required_features: vk.VkFormatFeatureFlags,
|
||||
) Error!?u64 {
|
||||
// Compositor side: ask the host what it will accept on attach.
|
||||
// Two-pass query (NULL out + capacity 0 returns count). Empty
|
||||
// result means the compositor doesn't speak linux-dmabuf-v1 or
|
||||
// doesn't advertise this format — direct mode would still likely
|
||||
// work for AMD/Intel LINEAR but the compositor attach would
|
||||
// fail, so treat it as "no intersection."
|
||||
var host_mods: [MAX_MODIFIERS]u64 = undefined;
|
||||
const host_returned = platform.get_supported_modifiers(
|
||||
platform.userdata,
|
||||
drm_format,
|
||||
&host_mods,
|
||||
MAX_MODIFIERS,
|
||||
);
|
||||
// Clamp defensively. The C ABI contract is "host returns ≤ capacity",
|
||||
// but we don't get to assume the host's implementation is correct
|
||||
// — and in safe builds an OOB read on `host_mods[..host_returned]`
|
||||
// panics, hiding the real diagnostic.
|
||||
const host_count: usize = @min(host_returned, MAX_MODIFIERS);
|
||||
if (host_count == 0) {
|
||||
log.warn(
|
||||
"host advertises no dmabuf modifiers for format 0x{x}; " ++
|
||||
"cannot use direct mode",
|
||||
.{drm_format},
|
||||
);
|
||||
return null;
|
||||
}
|
||||
|
||||
// GPU side: enumerate modifiers + their per-modifier feature bits.
|
||||
var gpu_mods: [MAX_MODIFIERS]vk.VkDrmFormatModifierPropertiesEXT = undefined;
|
||||
var mod_list: vk.VkDrmFormatModifierPropertiesListEXT = .{
|
||||
.sType = vk.VK_STRUCTURE_TYPE_DRM_FORMAT_MODIFIER_PROPERTIES_LIST_EXT,
|
||||
.pNext = null,
|
||||
.drmFormatModifierCount = 0,
|
||||
.pDrmFormatModifierProperties = null,
|
||||
};
|
||||
var props2: vk.VkFormatProperties2 = .{
|
||||
.sType = vk.VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_2,
|
||||
.pNext = &mod_list,
|
||||
.formatProperties = std.mem.zeroes(vk.VkFormatProperties),
|
||||
};
|
||||
dev.dispatch.getPhysicalDeviceFormatProperties2(
|
||||
dev.physical_device,
|
||||
format,
|
||||
&props2,
|
||||
);
|
||||
if (mod_list.drmFormatModifierCount == 0) return null;
|
||||
if (mod_list.drmFormatModifierCount > MAX_MODIFIERS) {
|
||||
log.warn(
|
||||
"GPU modifier list truncated: driver reports {}, MAX_MODIFIERS={}",
|
||||
.{ mod_list.drmFormatModifierCount, MAX_MODIFIERS },
|
||||
);
|
||||
mod_list.drmFormatModifierCount = MAX_MODIFIERS;
|
||||
}
|
||||
mod_list.pDrmFormatModifierProperties = &gpu_mods[0];
|
||||
dev.dispatch.getPhysicalDeviceFormatProperties2(
|
||||
dev.physical_device,
|
||||
format,
|
||||
&props2,
|
||||
);
|
||||
|
||||
var has_linear: bool = false;
|
||||
var best_tiled: ?u64 = null;
|
||||
for (gpu_mods[0..mod_list.drmFormatModifierCount]) |gm| {
|
||||
// Single-plane only: present callback ABI passes one fd /
|
||||
// offset / stride. Multi-plane (AMD AFBC, some video
|
||||
// formats) needs a wider ABI.
|
||||
if (gm.drmFormatModifierPlaneCount != 1) continue;
|
||||
if ((gm.drmFormatModifierTilingFeatures & required_features) != required_features) continue;
|
||||
// Intersect with what the compositor accepts.
|
||||
var compositor_ok = false;
|
||||
for (host_mods[0..host_count]) |hm| {
|
||||
if (hm == gm.drmFormatModifier) {
|
||||
compositor_ok = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!compositor_ok) continue;
|
||||
if (gm.drmFormatModifier == DRM_FORMAT_MOD_LINEAR) {
|
||||
has_linear = true;
|
||||
} else if (best_tiled == null) {
|
||||
best_tiled = gm.drmFormatModifier;
|
||||
}
|
||||
}
|
||||
|
||||
if (best_tiled) |m| return m;
|
||||
if (has_linear) return DRM_FORMAT_MOD_LINEAR;
|
||||
return null;
|
||||
}
|
||||
|
||||
/// `.direct` mode: allocate the render image with
|
||||
/// `VK_EXT_image_drm_format_modifier` so its own memory can be
|
||||
/// exported as the dmabuf. Two create-info variants depending on
|
||||
/// the chosen modifier:
|
||||
/// - LINEAR: EXPLICIT layout (we know rowPitch = width*bpp).
|
||||
/// Lets us populate `stride` deterministically without a
|
||||
/// post-create driver query.
|
||||
/// - non-LINEAR (vendor-tiled): LIST with a single-modifier list.
|
||||
/// The driver picks the only option and computes its own
|
||||
/// internal layout; we recover the chosen modifier via
|
||||
/// `vkGetImageDrmFormatModifierPropertiesEXT` (sanity check —
|
||||
/// it should equal `chosen_mod`) and the per-plane layout via
|
||||
/// `vkGetImageSubresourceLayout` for the right `stride` value.
|
||||
fn initDirect(opts: Options, drm_format: u32, chosen_mod: u64) Error!Self {
|
||||
const dev = opts.device;
|
||||
|
||||
const image_usage = @as(vk.VkImageUsageFlags, vk.VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) |
|
||||
vk.VK_IMAGE_USAGE_SAMPLED_BIT |
|
||||
vk.VK_IMAGE_USAGE_TRANSFER_SRC_BIT |
|
||||
opts.extra_usage;
|
||||
|
||||
const bytes_per_pixel: u32 = 4;
|
||||
const row_pitch: vk.VkDeviceSize = @as(vk.VkDeviceSize, opts.width) * bytes_per_pixel;
|
||||
|
||||
// ---- 1. Image: modifier-aware, externally-shareable -----------
|
||||
const plane_layout: vk.VkSubresourceLayout = .{
|
||||
.offset = 0,
|
||||
.size = 0, // ignored for EXPLICIT create-info
|
||||
.rowPitch = row_pitch,
|
||||
.arrayPitch = 0,
|
||||
.depthPitch = 0,
|
||||
};
|
||||
const explicit_create: vk.VkImageDrmFormatModifierExplicitCreateInfoEXT = .{
|
||||
.sType = vk.VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_EXPLICIT_CREATE_INFO_EXT,
|
||||
.pNext = null,
|
||||
.drmFormatModifier = DRM_FORMAT_MOD_LINEAR,
|
||||
.drmFormatModifierPlaneCount = 1,
|
||||
.pPlaneLayouts = &plane_layout,
|
||||
};
|
||||
// Single-modifier list — the driver "picks" the only option, but
|
||||
// crucially computes its own opaque internal layout for the
|
||||
// tiling, which we don't have to know.
|
||||
const list_mod = chosen_mod;
|
||||
const list_create: vk.VkImageDrmFormatModifierListCreateInfoEXT = .{
|
||||
.sType = vk.VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_LIST_CREATE_INFO_EXT,
|
||||
.pNext = null,
|
||||
.drmFormatModifierCount = 1,
|
||||
.pDrmFormatModifiers = &list_mod,
|
||||
};
|
||||
const mod_pnext: ?*const anyopaque = if (chosen_mod == DRM_FORMAT_MOD_LINEAR)
|
||||
@ptrCast(&explicit_create)
|
||||
else
|
||||
@ptrCast(&list_create);
|
||||
const ext_image_info: vk.VkExternalMemoryImageCreateInfo = .{
|
||||
.sType = vk.VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO,
|
||||
.pNext = mod_pnext,
|
||||
.handleTypes = vk.VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT,
|
||||
};
|
||||
const image_info: vk.VkImageCreateInfo = .{
|
||||
.sType = vk.VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
|
||||
.pNext = &ext_image_info,
|
||||
.flags = 0,
|
||||
.imageType = vk.VK_IMAGE_TYPE_2D,
|
||||
.format = opts.format,
|
||||
.extent = .{ .width = opts.width, .height = opts.height, .depth = 1 },
|
||||
.mipLevels = 1,
|
||||
.arrayLayers = 1,
|
||||
.samples = vk.VK_SAMPLE_COUNT_1_BIT,
|
||||
.tiling = vk.VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT,
|
||||
.usage = image_usage,
|
||||
.sharingMode = vk.VK_SHARING_MODE_EXCLUSIVE,
|
||||
.queueFamilyIndexCount = 0,
|
||||
.pQueueFamilyIndices = null,
|
||||
.initialLayout = vk.VK_IMAGE_LAYOUT_UNDEFINED,
|
||||
};
|
||||
var image: vk.VkImage = undefined;
|
||||
if (dev.dispatch.createImage(dev.device, &image_info, null, &image) != vk.VK_SUCCESS) {
|
||||
log.err("vkCreateImage (Target direct, mod=0x{x}) failed", .{chosen_mod});
|
||||
return error.VulkanFailed;
|
||||
}
|
||||
errdefer dev.dispatch.destroyImage(dev.device, image, null);
|
||||
|
||||
// ---- 2. Image memory: exportable ---------------------------------
|
||||
var image_reqs: vk.VkMemoryRequirements = undefined;
|
||||
dev.dispatch.getImageMemoryRequirements(dev.device, image, &image_reqs);
|
||||
|
||||
// In direct mode the host doesn't mmap the dmabuf — it imports it
|
||||
// as a 2D image into the compositor (`image_backed=true` per
|
||||
// `Target.present`). So DEVICE_LOCAL is the right choice: GPU-
|
||||
// local memory is faster for the COLOR_ATTACHMENT_OUTPUT writes,
|
||||
// and vendor-tiled modifiers often require it on drivers like
|
||||
// NVIDIA (which won't expose HOST_VISIBLE memory types for the
|
||||
// bits a tiled exportable image requires anyway).
|
||||
const image_mem_idx = dev.findMemoryType(
|
||||
image_reqs.memoryTypeBits,
|
||||
vk.VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
|
||||
) orelse {
|
||||
log.err(
|
||||
"no DEVICE_LOCAL memory type for direct dmabuf image " ++
|
||||
"(mod=0x{x} typeBits=0x{x})",
|
||||
.{ chosen_mod, image_reqs.memoryTypeBits },
|
||||
);
|
||||
return error.NoSuitableMemoryType;
|
||||
};
|
||||
const export_info: vk.VkExportMemoryAllocateInfo = .{
|
||||
.sType = vk.VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO,
|
||||
.pNext = null,
|
||||
.handleTypes = vk.VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT,
|
||||
};
|
||||
const image_alloc: vk.VkMemoryAllocateInfo = .{
|
||||
.sType = vk.VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
|
||||
.pNext = &export_info,
|
||||
.allocationSize = image_reqs.size,
|
||||
.memoryTypeIndex = image_mem_idx,
|
||||
};
|
||||
var image_memory: vk.VkDeviceMemory = undefined;
|
||||
if (dev.dispatch.allocateMemory(dev.device, &image_alloc, null, &image_memory) != vk.VK_SUCCESS) {
|
||||
log.err("vkAllocateMemory (Target direct image) failed", .{});
|
||||
return error.VulkanFailed;
|
||||
}
|
||||
errdefer dev.dispatch.freeMemory(dev.device, image_memory, null);
|
||||
if (dev.dispatch.bindImageMemory(dev.device, image, image_memory, 0) != vk.VK_SUCCESS) {
|
||||
log.err("vkBindImageMemory (Target direct image) failed", .{});
|
||||
return error.VulkanFailed;
|
||||
}
|
||||
|
||||
// ---- 3. View ---------------------------------------------------
|
||||
const view = try createView(dev, image, opts.format);
|
||||
errdefer dev.dispatch.destroyImageView(dev.device, view, null);
|
||||
|
||||
// ---- 4. Export memory as dmabuf fd -----------------------------
|
||||
const fd = try exportDmabufFd(dev, image_memory);
|
||||
errdefer std.posix.close(fd);
|
||||
|
||||
// ---- 5. Confirm the actual modifier + plane layout -------------
|
||||
// For non-LINEAR we used LIST create-info (one entry), so the
|
||||
// driver "picked" the only option. We query back via
|
||||
// `vkGetImageDrmFormatModifierPropertiesEXT` as a sanity check
|
||||
// and log a warning if the driver returned a different modifier
|
||||
// — that would indicate a driver bug or our list being ignored.
|
||||
var actual_mod = chosen_mod;
|
||||
if (chosen_mod != DRM_FORMAT_MOD_LINEAR) {
|
||||
var mod_props: vk.VkImageDrmFormatModifierPropertiesEXT = .{
|
||||
.sType = vk.VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_PROPERTIES_EXT,
|
||||
.pNext = null,
|
||||
.drmFormatModifier = 0,
|
||||
};
|
||||
if (dev.dispatch.getImageDrmFormatModifierPropertiesEXT(
|
||||
dev.device,
|
||||
image,
|
||||
&mod_props,
|
||||
) == vk.VK_SUCCESS) {
|
||||
actual_mod = mod_props.drmFormatModifier;
|
||||
if (actual_mod != chosen_mod) {
|
||||
log.warn(
|
||||
"driver chose modifier 0x{x}, we asked for 0x{x}",
|
||||
.{ actual_mod, chosen_mod },
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Plane 0 layout: rowPitch is what we report as `stride` to the
|
||||
// compositor. For LINEAR this is width*bpp (possibly padded).
|
||||
// For vendor-tiled formats the value is implementation-specific —
|
||||
// the compositor's GPU knows how to interpret it given the
|
||||
// modifier we report alongside.
|
||||
var subres: vk.VkImageSubresource = .{
|
||||
.aspectMask = vk.VK_IMAGE_ASPECT_MEMORY_PLANE_0_BIT_EXT,
|
||||
.mipLevel = 0,
|
||||
.arrayLayer = 0,
|
||||
};
|
||||
var layout: vk.VkSubresourceLayout = undefined;
|
||||
dev.dispatch.getImageSubresourceLayout(dev.device, image, &subres, &layout);
|
||||
|
||||
return .{
|
||||
.device = dev,
|
||||
.platform = opts.platform,
|
||||
.tiling = .direct,
|
||||
.image = image,
|
||||
.image_memory = image_memory,
|
||||
.view = view,
|
||||
.dmabuf_buffer = null,
|
||||
.dmabuf_memory = null,
|
||||
.format = opts.format,
|
||||
.width = opts.width,
|
||||
.height = opts.height,
|
||||
.fd = fd,
|
||||
.drm_format = drm_format,
|
||||
.drm_modifier = actual_mod,
|
||||
.stride = stride: {
|
||||
// VkSubresourceLayout.rowPitch is u64 but the platform
|
||||
// present callback accepts u32 stride. For a sanely-
|
||||
// sized terminal target stride fits comfortably in u32,
|
||||
// but vendor-tiled drivers at exotic resolutions could
|
||||
// legitimately exceed it. Fail the init explicitly
|
||||
// instead of letting `@intCast` panic in safe builds.
|
||||
if (layout.rowPitch > std.math.maxInt(u32)) {
|
||||
log.err(
|
||||
"Target.initDirect: rowPitch {} > u32 max; refusing direct mode",
|
||||
.{layout.rowPitch},
|
||||
);
|
||||
return error.UnsupportedFormat;
|
||||
}
|
||||
break :stride @intCast(layout.rowPitch);
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
/// `.legacy_copy` mode: OPTIMAL render image + separate LINEAR
|
||||
/// dmabuf-exported `VkBuffer`. Behavior identical to the
|
||||
/// pre-modifier-path code.
|
||||
fn initLegacyCopy(opts: Options, drm_format: u32) Error!Self {
|
||||
const dev = opts.device;
|
||||
|
||||
// BGRA8 — 4 bytes/pixel, packed (no per-row padding).
|
||||
const bytes_per_pixel: u32 = 4;
|
||||
const stride: u32 = opts.width * bytes_per_pixel;
|
||||
const buffer_size: vk.VkDeviceSize = @as(vk.VkDeviceSize, stride) * opts.height;
|
||||
|
||||
// ---- 1. Render image: OPTIMAL tiling, internal memory ----------
|
||||
const image_usage = @as(vk.VkImageUsageFlags, vk.VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) |
|
||||
vk.VK_IMAGE_USAGE_SAMPLED_BIT |
|
||||
vk.VK_IMAGE_USAGE_TRANSFER_SRC_BIT |
|
||||
opts.extra_usage;
|
||||
const image_info: vk.VkImageCreateInfo = .{
|
||||
.sType = vk.VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
|
||||
.pNext = null,
|
||||
.flags = 0,
|
||||
.imageType = vk.VK_IMAGE_TYPE_2D,
|
||||
.format = opts.format,
|
||||
.extent = .{ .width = opts.width, .height = opts.height, .depth = 1 },
|
||||
.mipLevels = 1,
|
||||
.arrayLayers = 1,
|
||||
.samples = vk.VK_SAMPLE_COUNT_1_BIT,
|
||||
.tiling = vk.VK_IMAGE_TILING_OPTIMAL,
|
||||
.usage = image_usage,
|
||||
.sharingMode = vk.VK_SHARING_MODE_EXCLUSIVE,
|
||||
.queueFamilyIndexCount = 0,
|
||||
.pQueueFamilyIndices = null,
|
||||
.initialLayout = vk.VK_IMAGE_LAYOUT_UNDEFINED,
|
||||
};
|
||||
var image: vk.VkImage = undefined;
|
||||
if (dev.dispatch.createImage(dev.device, &image_info, null, &image) != vk.VK_SUCCESS) {
|
||||
log.err("vkCreateImage (Target legacy render) failed", .{});
|
||||
return error.VulkanFailed;
|
||||
}
|
||||
errdefer dev.dispatch.destroyImage(dev.device, image, null);
|
||||
|
||||
var image_reqs: vk.VkMemoryRequirements = undefined;
|
||||
dev.dispatch.getImageMemoryRequirements(dev.device, image, &image_reqs);
|
||||
const image_mem_idx = dev.findMemoryType(
|
||||
image_reqs.memoryTypeBits,
|
||||
vk.VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
|
||||
) orelse return error.NoSuitableMemoryType;
|
||||
const image_alloc: vk.VkMemoryAllocateInfo = .{
|
||||
.sType = vk.VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
|
||||
.pNext = null,
|
||||
.allocationSize = image_reqs.size,
|
||||
.memoryTypeIndex = image_mem_idx,
|
||||
};
|
||||
var image_memory: vk.VkDeviceMemory = undefined;
|
||||
if (dev.dispatch.allocateMemory(dev.device, &image_alloc, null, &image_memory) != vk.VK_SUCCESS) {
|
||||
log.err("vkAllocateMemory (Target legacy render image) failed", .{});
|
||||
return error.VulkanFailed;
|
||||
}
|
||||
errdefer dev.dispatch.freeMemory(dev.device, image_memory, null);
|
||||
if (dev.dispatch.bindImageMemory(dev.device, image, image_memory, 0) != vk.VK_SUCCESS) {
|
||||
log.err("vkBindImageMemory (Target legacy render image) failed", .{});
|
||||
return error.VulkanFailed;
|
||||
}
|
||||
|
||||
// ---- 2. View ---------------------------------------------------
|
||||
const view = try createView(dev, image, opts.format);
|
||||
errdefer dev.dispatch.destroyImageView(dev.device, view, null);
|
||||
|
||||
// ---- 3. Dmabuf buffer: LINEAR pixel data, external memory -----
|
||||
const ext_buffer_info: vk.VkExternalMemoryBufferCreateInfo = .{
|
||||
.sType = vk.VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_BUFFER_CREATE_INFO,
|
||||
.pNext = null,
|
||||
.handleTypes = vk.VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT,
|
||||
};
|
||||
const buffer_info: vk.VkBufferCreateInfo = .{
|
||||
.sType = vk.VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
|
||||
.pNext = &ext_buffer_info,
|
||||
.flags = 0,
|
||||
.size = buffer_size,
|
||||
.usage = vk.VK_BUFFER_USAGE_TRANSFER_DST_BIT,
|
||||
.sharingMode = vk.VK_SHARING_MODE_EXCLUSIVE,
|
||||
.queueFamilyIndexCount = 0,
|
||||
.pQueueFamilyIndices = null,
|
||||
};
|
||||
var dmabuf_buffer: vk.VkBuffer = undefined;
|
||||
if (dev.dispatch.createBuffer(dev.device, &buffer_info, null, &dmabuf_buffer) != vk.VK_SUCCESS) {
|
||||
log.err("vkCreateBuffer (Target dmabuf) failed", .{});
|
||||
return error.VulkanFailed;
|
||||
}
|
||||
errdefer dev.dispatch.destroyBuffer(dev.device, dmabuf_buffer, null);
|
||||
|
||||
var buf_reqs: vk.VkMemoryRequirements = undefined;
|
||||
dev.dispatch.getBufferMemoryRequirements(dev.device, dmabuf_buffer, &buf_reqs);
|
||||
// Prefer HOST_CACHED so reads from the mmap'd dmabuf are fast.
|
||||
// Without it (HOST_VISIBLE | HOST_COHERENT only), NVIDIA gives
|
||||
// back write-combining memory: GPU writes are fast but HOST reads
|
||||
// crawl (~10 MB/s) because the mapping is uncached. The Qt
|
||||
// `presentVulkanDmabuf` `QImage::copy()` reads every pixel, so a
|
||||
// small ~3 MB frame took ~260 ms there. HOST_COHERENT is still
|
||||
// requested so we don't need explicit flushes between GPU writes
|
||||
// and host reads; HOST_CACHED on top makes the host reads
|
||||
// cacheable.
|
||||
const host_flags_cached =
|
||||
@as(vk.VkMemoryPropertyFlags, vk.VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) |
|
||||
vk.VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
|
||||
vk.VK_MEMORY_PROPERTY_HOST_CACHED_BIT;
|
||||
const host_flags_uncached =
|
||||
@as(vk.VkMemoryPropertyFlags, vk.VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) |
|
||||
vk.VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
|
||||
const dmabuf_mem_idx = dev.findMemoryType(buf_reqs.memoryTypeBits, host_flags_cached) orelse
|
||||
dev.findMemoryType(buf_reqs.memoryTypeBits, host_flags_uncached) orelse
|
||||
{
|
||||
log.err(
|
||||
"no HOST_VISIBLE memory type for dmabuf (typeBits=0x{x})",
|
||||
.{buf_reqs.memoryTypeBits},
|
||||
);
|
||||
return error.NoSuitableMemoryType;
|
||||
};
|
||||
const export_info: vk.VkExportMemoryAllocateInfo = .{
|
||||
.sType = vk.VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO,
|
||||
.pNext = null,
|
||||
.handleTypes = vk.VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT,
|
||||
};
|
||||
const buf_alloc: vk.VkMemoryAllocateInfo = .{
|
||||
.sType = vk.VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
|
||||
.pNext = &export_info,
|
||||
.allocationSize = buf_reqs.size,
|
||||
.memoryTypeIndex = dmabuf_mem_idx,
|
||||
};
|
||||
var dmabuf_memory: vk.VkDeviceMemory = undefined;
|
||||
if (dev.dispatch.allocateMemory(dev.device, &buf_alloc, null, &dmabuf_memory) != vk.VK_SUCCESS) {
|
||||
log.err("vkAllocateMemory (Target dmabuf) failed", .{});
|
||||
return error.VulkanFailed;
|
||||
}
|
||||
errdefer dev.dispatch.freeMemory(dev.device, dmabuf_memory, null);
|
||||
if (dev.dispatch.bindBufferMemory(dev.device, dmabuf_buffer, dmabuf_memory, 0) != vk.VK_SUCCESS) {
|
||||
log.err("vkBindBufferMemory (Target dmabuf) failed", .{});
|
||||
return error.VulkanFailed;
|
||||
}
|
||||
|
||||
const fd = try exportDmabufFd(dev, dmabuf_memory);
|
||||
errdefer std.posix.close(fd);
|
||||
|
||||
return .{
|
||||
.device = dev,
|
||||
.platform = opts.platform,
|
||||
.tiling = .legacy_copy,
|
||||
.image = image,
|
||||
.image_memory = image_memory,
|
||||
.view = view,
|
||||
.dmabuf_buffer = dmabuf_buffer,
|
||||
.dmabuf_memory = dmabuf_memory,
|
||||
.format = opts.format,
|
||||
.width = opts.width,
|
||||
.height = opts.height,
|
||||
.fd = fd,
|
||||
.drm_format = drm_format,
|
||||
.drm_modifier = DRM_FORMAT_MOD_LINEAR,
|
||||
.stride = stride,
|
||||
};
|
||||
}
|
||||
|
||||
fn createView(
|
||||
dev: *const Device,
|
||||
image: vk.VkImage,
|
||||
format: vk.VkFormat,
|
||||
) Error!vk.VkImageView {
|
||||
const view_info: vk.VkImageViewCreateInfo = .{
|
||||
.sType = vk.VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
|
||||
.pNext = null,
|
||||
.flags = 0,
|
||||
.image = image,
|
||||
.viewType = vk.VK_IMAGE_VIEW_TYPE_2D,
|
||||
.format = format,
|
||||
.components = .{
|
||||
.r = vk.VK_COMPONENT_SWIZZLE_IDENTITY,
|
||||
.g = vk.VK_COMPONENT_SWIZZLE_IDENTITY,
|
||||
.b = vk.VK_COMPONENT_SWIZZLE_IDENTITY,
|
||||
.a = vk.VK_COMPONENT_SWIZZLE_IDENTITY,
|
||||
},
|
||||
.subresourceRange = .{
|
||||
.aspectMask = vk.VK_IMAGE_ASPECT_COLOR_BIT,
|
||||
.baseMipLevel = 0,
|
||||
.levelCount = 1,
|
||||
.baseArrayLayer = 0,
|
||||
.layerCount = 1,
|
||||
},
|
||||
};
|
||||
var view: vk.VkImageView = undefined;
|
||||
if (dev.dispatch.createImageView(dev.device, &view_info, null, &view) != vk.VK_SUCCESS) {
|
||||
log.err("vkCreateImageView (Target) failed", .{});
|
||||
return error.VulkanFailed;
|
||||
}
|
||||
return view;
|
||||
}
|
||||
|
||||
fn exportDmabufFd(dev: *const Device, memory: vk.VkDeviceMemory) Error!i32 {
|
||||
const fd_info: vk.VkMemoryGetFdInfoKHR = .{
|
||||
.sType = vk.VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR,
|
||||
.pNext = null,
|
||||
.memory = memory,
|
||||
.handleType = vk.VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT,
|
||||
};
|
||||
var fd: c_int = -1;
|
||||
if (dev.dispatch.getMemoryFdKHR(dev.device, &fd_info, &fd) != vk.VK_SUCCESS or fd < 0) {
|
||||
log.err("vkGetMemoryFdKHR (Target) failed: fd={}", .{fd});
|
||||
return error.VulkanFailed;
|
||||
}
|
||||
return fd;
|
||||
}
|
||||
|
||||
pub fn deinit(self: *Self) void {
|
||||
const dev = self.device;
|
||||
if (self.fd >= 0) std.posix.close(self.fd);
|
||||
if (self.dmabuf_buffer) |b| dev.dispatch.destroyBuffer(dev.device, b, null);
|
||||
if (self.dmabuf_memory) |m| dev.dispatch.freeMemory(dev.device, m, null);
|
||||
dev.dispatch.destroyImageView(dev.device, self.view, null);
|
||||
dev.dispatch.destroyImage(dev.device, self.image, null);
|
||||
dev.dispatch.freeMemory(dev.device, self.image_memory, null);
|
||||
self.* = undefined;
|
||||
}
|
||||
|
||||
/// Record the end-of-frame barrier(s) that make the rendered pixels
|
||||
/// visible to the host's later mmap read. Dispatches on `self.tiling`:
|
||||
///
|
||||
/// - `.direct`: just an image layout/memory barrier — the render
|
||||
/// image's own memory is the dmabuf, so we transition
|
||||
/// `GENERAL → GENERAL` with `COLOR_ATTACHMENT_WRITE → HOST_READ`
|
||||
/// visibility (`COLOR_ATTACHMENT_OUTPUT → HOST` stages). The
|
||||
/// LINEAR-modifier image stays in GENERAL throughout — it's both
|
||||
/// the render target and the host-mapped surface.
|
||||
///
|
||||
/// - `.legacy_copy`: the original behavior — transition the
|
||||
/// render image to `TRANSFER_SRC_OPTIMAL`, `vkCmdCopyImageToBuffer`
|
||||
/// into the dmabuf buffer, buffer-memory barrier for HOST_READ
|
||||
/// visibility.
|
||||
///
|
||||
/// Call this AFTER all RenderPass work has been recorded but BEFORE
|
||||
/// `vkEndCommandBuffer`.
|
||||
pub fn recordPresentBarrier(self: *Self, cb: vk.VkCommandBuffer) void {
|
||||
switch (self.tiling) {
|
||||
.direct => self.recordDirectBarrier(cb),
|
||||
.legacy_copy => self.recordCopyToDmabuf(cb),
|
||||
}
|
||||
}
|
||||
|
||||
fn recordDirectBarrier(self: *Self, cb: vk.VkCommandBuffer) void {
|
||||
const dev = self.device;
|
||||
|
||||
// Image stays in GENERAL — it's the render target AND the
|
||||
// host-mapped surface. We only need a memory barrier so the host's
|
||||
// mmap read sees the writes from the COLOR_ATTACHMENT_OUTPUT stage.
|
||||
const img_barrier: vk.VkImageMemoryBarrier = .{
|
||||
.sType = vk.VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
|
||||
.pNext = null,
|
||||
.srcAccessMask = vk.VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,
|
||||
.dstAccessMask = vk.VK_ACCESS_HOST_READ_BIT,
|
||||
.oldLayout = vk.VK_IMAGE_LAYOUT_GENERAL,
|
||||
.newLayout = vk.VK_IMAGE_LAYOUT_GENERAL,
|
||||
.srcQueueFamilyIndex = vk.VK_QUEUE_FAMILY_IGNORED,
|
||||
.dstQueueFamilyIndex = vk.VK_QUEUE_FAMILY_IGNORED,
|
||||
.image = self.image,
|
||||
.subresourceRange = .{
|
||||
.aspectMask = vk.VK_IMAGE_ASPECT_COLOR_BIT,
|
||||
.baseMipLevel = 0,
|
||||
.levelCount = 1,
|
||||
.baseArrayLayer = 0,
|
||||
.layerCount = 1,
|
||||
},
|
||||
};
|
||||
dev.dispatch.cmdPipelineBarrier(
|
||||
cb,
|
||||
vk.VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
|
||||
vk.VK_PIPELINE_STAGE_HOST_BIT,
|
||||
0,
|
||||
0,
|
||||
null,
|
||||
0,
|
||||
null,
|
||||
1,
|
||||
&img_barrier,
|
||||
);
|
||||
|
||||
self.layout = vk.VK_IMAGE_LAYOUT_GENERAL;
|
||||
}
|
||||
|
||||
fn recordCopyToDmabuf(self: *Self, cb: vk.VkCommandBuffer) void {
|
||||
const dev = self.device;
|
||||
|
||||
// Image: GENERAL → TRANSFER_SRC_OPTIMAL (the RenderPass leaves us
|
||||
// in GENERAL on complete, but if it was UNDEFINED for some reason
|
||||
// we still need a valid transition; UNDEFINED is also legal).
|
||||
const img_barrier: vk.VkImageMemoryBarrier = .{
|
||||
.sType = vk.VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
|
||||
.pNext = null,
|
||||
.srcAccessMask = vk.VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,
|
||||
.dstAccessMask = vk.VK_ACCESS_TRANSFER_READ_BIT,
|
||||
.oldLayout = vk.VK_IMAGE_LAYOUT_GENERAL,
|
||||
.newLayout = vk.VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
|
||||
.srcQueueFamilyIndex = vk.VK_QUEUE_FAMILY_IGNORED,
|
||||
.dstQueueFamilyIndex = vk.VK_QUEUE_FAMILY_IGNORED,
|
||||
.image = self.image,
|
||||
.subresourceRange = .{
|
||||
.aspectMask = vk.VK_IMAGE_ASPECT_COLOR_BIT,
|
||||
.baseMipLevel = 0,
|
||||
.levelCount = 1,
|
||||
.baseArrayLayer = 0,
|
||||
.layerCount = 1,
|
||||
},
|
||||
};
|
||||
dev.dispatch.cmdPipelineBarrier(
|
||||
cb,
|
||||
vk.VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
|
||||
vk.VK_PIPELINE_STAGE_TRANSFER_BIT,
|
||||
0,
|
||||
0,
|
||||
null,
|
||||
0,
|
||||
null,
|
||||
1,
|
||||
&img_barrier,
|
||||
);
|
||||
|
||||
// Copy image → buffer. BGRA8, packed (stride = width*4).
|
||||
const region: vk.VkBufferImageCopy = .{
|
||||
.bufferOffset = 0,
|
||||
.bufferRowLength = 0, // 0 = tightly packed (uses imageExtent.width)
|
||||
.bufferImageHeight = 0,
|
||||
.imageSubresource = .{
|
||||
.aspectMask = vk.VK_IMAGE_ASPECT_COLOR_BIT,
|
||||
.mipLevel = 0,
|
||||
.baseArrayLayer = 0,
|
||||
.layerCount = 1,
|
||||
},
|
||||
.imageOffset = .{ .x = 0, .y = 0, .z = 0 },
|
||||
.imageExtent = .{ .width = self.width, .height = self.height, .depth = 1 },
|
||||
};
|
||||
dev.dispatch.cmdCopyImageToBuffer(
|
||||
cb,
|
||||
self.image,
|
||||
vk.VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
|
||||
self.dmabuf_buffer.?,
|
||||
1,
|
||||
®ion,
|
||||
);
|
||||
|
||||
// Memory barrier so the host's later mmap read sees the bytes.
|
||||
// HOST_READ_BIT is the destination access; HOST_BIT is the
|
||||
// destination stage. (External fd consumers may need an explicit
|
||||
// sync2 release barrier, but for an mmap-based read after a
|
||||
// fence-wait this is sufficient on the GPU side.)
|
||||
const buf_barrier: vk.VkBufferMemoryBarrier = .{
|
||||
.sType = vk.VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
|
||||
.pNext = null,
|
||||
.srcAccessMask = vk.VK_ACCESS_TRANSFER_WRITE_BIT,
|
||||
.dstAccessMask = vk.VK_ACCESS_HOST_READ_BIT,
|
||||
.srcQueueFamilyIndex = vk.VK_QUEUE_FAMILY_IGNORED,
|
||||
.dstQueueFamilyIndex = vk.VK_QUEUE_FAMILY_IGNORED,
|
||||
.buffer = self.dmabuf_buffer.?,
|
||||
.offset = 0,
|
||||
.size = vk.VK_WHOLE_SIZE,
|
||||
};
|
||||
dev.dispatch.cmdPipelineBarrier(
|
||||
cb,
|
||||
vk.VK_PIPELINE_STAGE_TRANSFER_BIT,
|
||||
vk.VK_PIPELINE_STAGE_HOST_BIT,
|
||||
0,
|
||||
0,
|
||||
null,
|
||||
1,
|
||||
&buf_barrier,
|
||||
0,
|
||||
null,
|
||||
);
|
||||
|
||||
// Track the new image layout so the next frame's RenderPass.begin
|
||||
// doesn't see stale state (it currently transitions from UNDEFINED
|
||||
// unconditionally, but be defensive).
|
||||
self.layout = vk.VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL;
|
||||
}
|
||||
|
||||
pub fn present(self: *const Self) void {
|
||||
// Per-surface platform — its `userdata` points at THIS surface's
|
||||
// GhosttySurface, so present reaches the right window.
|
||||
const platform = self.platform;
|
||||
// `image_backed` is the host's signal that this fd is importable
|
||||
// by a 2D-image consumer (Wayland linux-dmabuf-v1, Vulkan
|
||||
// external image, etc.). True in `.direct` mode where the fd was
|
||||
// exported from a VkImage; false in `.legacy_copy` where it was
|
||||
// exported from a VkBuffer and can only be read via mmap.
|
||||
platform.present(
|
||||
platform.userdata,
|
||||
self.fd,
|
||||
self.drm_format,
|
||||
self.drm_modifier,
|
||||
self.width,
|
||||
self.height,
|
||||
self.stride,
|
||||
self.tiling == .direct,
|
||||
);
|
||||
}
|
||||
|
||||
fn vkFormatToDrmFourcc(format: vk.VkFormat) Error!u32 {
|
||||
const fourcc = struct {
|
||||
fn make(a: u8, b: u8, c: u8, d: u8) u32 {
|
||||
return (@as(u32, a)) |
|
||||
(@as(u32, b) << 8) |
|
||||
(@as(u32, c) << 16) |
|
||||
(@as(u32, d) << 24);
|
||||
}
|
||||
};
|
||||
return switch (format) {
|
||||
vk.VK_FORMAT_B8G8R8A8_UNORM,
|
||||
vk.VK_FORMAT_B8G8R8A8_SRGB,
|
||||
=> fourcc.make('A', 'R', '2', '4'),
|
||||
vk.VK_FORMAT_R8G8B8A8_UNORM,
|
||||
vk.VK_FORMAT_R8G8B8A8_SRGB,
|
||||
=> fourcc.make('A', 'B', '2', '4'),
|
||||
else => error.UnsupportedFormat,
|
||||
};
|
||||
}
|
||||
|
||||
test {
|
||||
std.testing.refAllDecls(@This());
|
||||
}
|
||||
|
|
@ -0,0 +1,430 @@
|
|||
//! Wrapper for `VkImage` + `VkDeviceMemory` + `VkImageView` with a
|
||||
//! staging-buffer upload path.
|
||||
//!
|
||||
//! Holds a 2D image, the backing device-local memory, and a view
|
||||
//! configured for color sampling. All three handles are libghostty-
|
||||
//! owned and destroyed in `deinit`.
|
||||
//!
|
||||
//! Uploads go through a temporary `Buffer(u8)` staging buffer
|
||||
//! (`HOST_VISIBLE | HOST_COHERENT | TRANSFER_SRC`) and a per-call
|
||||
//! `CommandPool` that drives the layout-transition →
|
||||
//! `vkCmdCopyBufferToImage` → layout-transition sequence. Both
|
||||
//! resources are destroyed by the time `replaceRegion` returns — the
|
||||
//! upload is synchronous from the caller's perspective. That's the
|
||||
//! right tradeoff for atlas resizes (rare; the renderer can afford
|
||||
//! the stall) but won't fit the eventual per-frame upload path,
|
||||
//! which will reuse a long-lived `CommandPool` and fence-paced
|
||||
//! submission.
|
||||
//!
|
||||
//! Layout tracking: a single `layout: VkImageLayout` field records
|
||||
//! whether the image currently sits in `UNDEFINED` (fresh) or
|
||||
//! `SHADER_READ_ONLY_OPTIMAL` (after at least one upload). The
|
||||
//! barrier sequence in `replaceRegion` reads this field to pick the
|
||||
//! right `srcAccessMask` / `srcStageMask`.
|
||||
//!
|
||||
//! Counterpart: `src/renderer/opengl/Texture.zig`.
|
||||
|
||||
const Self = @This();
|
||||
|
||||
const std = @import("std");
|
||||
const vulkan = @import("vulkan");
|
||||
const vk = vulkan.c;
|
||||
|
||||
const Device = vulkan.Device;
|
||||
const CommandPool = vulkan.CommandPool;
|
||||
const bufferpkg = @import("buffer.zig");
|
||||
|
||||
const log = std.log.scoped(.vulkan);
|
||||
|
||||
/// Pixel format hint matching `opengl/OpenGL.zig`'s `ImageTextureFormat`.
|
||||
/// Used by `Vulkan.imageTextureOptions` to pick a `VkFormat` for kitty
|
||||
/// graphics / background-image uploads. Lives here (next to `Texture`)
|
||||
/// instead of in the renderer top-level so the rendering policy that
|
||||
/// owns it (the SRGB-vs-UNORM choice for color channels) can be
|
||||
/// inspected in one place.
|
||||
pub const ImageTextureFormat = enum {
|
||||
gray,
|
||||
rgba,
|
||||
bgra,
|
||||
|
||||
pub fn toVk(self: ImageTextureFormat, srgb: bool) vk.VkFormat {
|
||||
return switch (self) {
|
||||
// `gray` is a single-channel R8 (no color, no gamma).
|
||||
.gray => vk.VK_FORMAT_R8_UNORM,
|
||||
// Color channels honor `srgb`: when an image was
|
||||
// authored in sRGB (the common case for kitty graphics),
|
||||
// selecting the SRGB format lets the sampler auto-
|
||||
// linearize on read so `texture()` returns linear values
|
||||
// that the renderer's `unlinearize()` then re-encodes
|
||||
// for the sRGB framebuffer. UNORM here would skip the
|
||||
// sampler decode, leaving sRGB bytes for `unlinearize`
|
||||
// to encode-again, which is then encoded a third time
|
||||
// by the SRGB framebuffer — visible as washed-out kitty
|
||||
// graphics.
|
||||
.rgba => if (srgb) vk.VK_FORMAT_R8G8B8A8_SRGB else vk.VK_FORMAT_R8G8B8A8_UNORM,
|
||||
.bgra => if (srgb) vk.VK_FORMAT_B8G8R8A8_SRGB else vk.VK_FORMAT_B8G8R8A8_UNORM,
|
||||
};
|
||||
}
|
||||
};
|
||||
|
||||
/// Texture construction parameters. Vulkan-native rather than mirroring
|
||||
/// the OpenGL backend's separate `format` / `internal_format` — Vulkan
|
||||
/// encodes both into one `VkFormat`.
|
||||
pub const Options = struct {
|
||||
device: *const Device,
|
||||
|
||||
/// Pixel format. Common choices:
|
||||
/// - `VK_FORMAT_R8G8B8A8_UNORM` — color atlases, render target.
|
||||
/// - `VK_FORMAT_R8G8B8A8_SRGB` — sRGB color atlases.
|
||||
/// - `VK_FORMAT_R8_UNORM` — grayscale glyph atlas.
|
||||
format: vk.VkFormat,
|
||||
|
||||
/// `VkImageUsageFlagBits` for the image. Typical:
|
||||
/// - Atlas: `SAMPLED | TRANSFER_DST`
|
||||
/// - Render target: `COLOR_ATTACHMENT | SAMPLED` (+ external
|
||||
/// memory flags wired in by the export path)
|
||||
/// `TRANSFER_DST_BIT` is forced on at create time so the upload
|
||||
/// path always works — callers don't have to remember.
|
||||
usage: vk.VkImageUsageFlags,
|
||||
|
||||
/// Aspect mask for the image view. Defaults to color; depth images
|
||||
/// would override.
|
||||
aspect: vk.VkImageAspectFlags = vk.VK_IMAGE_ASPECT_COLOR_BIT,
|
||||
};
|
||||
|
||||
pub const Error = error{
|
||||
/// A `vkCreate*` or `vkAllocate*` returned a non-success status.
|
||||
/// Logged with the raw `VkResult`.
|
||||
VulkanFailed,
|
||||
/// `findMemoryType` couldn't find a `DEVICE_LOCAL` memory type
|
||||
/// matching the image's requirements. Effectively unrecoverable
|
||||
/// — typical Vulkan devices always expose at least one.
|
||||
NoSuitableMemoryType,
|
||||
};
|
||||
|
||||
image: vk.VkImage,
|
||||
memory: vk.VkDeviceMemory,
|
||||
view: vk.VkImageView,
|
||||
format: vk.VkFormat,
|
||||
/// Aspect mask the image was created with (e.g. COLOR_BIT for
|
||||
/// renderable textures, DEPTH_BIT for depth attachments). Stored
|
||||
/// so per-frame `replaceRegion` barrier/copy use the same aspect
|
||||
/// the image view was made with — hardcoding COLOR_BIT here was a
|
||||
/// silent validation error for any non-color caller.
|
||||
aspect: vk.VkImageAspectFlags,
|
||||
width: usize,
|
||||
height: usize,
|
||||
device: *const Device,
|
||||
|
||||
/// Current image layout. Starts at `UNDEFINED`; `replaceRegion`
|
||||
/// drives it to `SHADER_READ_ONLY_OPTIMAL` on the first call and
|
||||
/// keeps it there afterwards. Read by the barrier sequence in
|
||||
/// `replaceRegion` to pick the right transition source.
|
||||
layout: vk.VkImageLayout = vk.VK_IMAGE_LAYOUT_UNDEFINED,
|
||||
|
||||
/// Create a 2D texture. With non-null `data`, the image is uploaded
|
||||
/// and ends in `SHADER_READ_ONLY_OPTIMAL`. With null `data`, the
|
||||
/// image is left in `UNDEFINED` — the caller transitions it later
|
||||
/// (typically via `replaceRegion` or as a render target).
|
||||
pub fn init(
|
||||
opts: Options,
|
||||
width: usize,
|
||||
height: usize,
|
||||
data: ?[]const u8,
|
||||
) Error!Self {
|
||||
const dev = opts.device;
|
||||
|
||||
// ---- 1. VkImage ---------------------------------------------
|
||||
// Force TRANSFER_DST_BIT so `replaceRegion` always works without
|
||||
// callers having to remember to set it.
|
||||
const usage = opts.usage | @as(vk.VkImageUsageFlags, vk.VK_IMAGE_USAGE_TRANSFER_DST_BIT);
|
||||
const image_info: vk.VkImageCreateInfo = .{
|
||||
.sType = vk.VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
|
||||
.pNext = null,
|
||||
.flags = 0,
|
||||
.imageType = vk.VK_IMAGE_TYPE_2D,
|
||||
.format = opts.format,
|
||||
.extent = .{
|
||||
.width = @intCast(width),
|
||||
.height = @intCast(height),
|
||||
.depth = 1,
|
||||
},
|
||||
.mipLevels = 1,
|
||||
.arrayLayers = 1,
|
||||
.samples = vk.VK_SAMPLE_COUNT_1_BIT,
|
||||
.tiling = vk.VK_IMAGE_TILING_OPTIMAL,
|
||||
.usage = usage,
|
||||
.sharingMode = vk.VK_SHARING_MODE_EXCLUSIVE,
|
||||
.queueFamilyIndexCount = 0,
|
||||
.pQueueFamilyIndices = null,
|
||||
.initialLayout = vk.VK_IMAGE_LAYOUT_UNDEFINED,
|
||||
};
|
||||
var image: vk.VkImage = undefined;
|
||||
{
|
||||
const r = dev.dispatch.createImage(dev.device, &image_info, null, &image);
|
||||
if (r != vk.VK_SUCCESS) {
|
||||
log.err("vkCreateImage failed: result={}", .{r});
|
||||
return error.VulkanFailed;
|
||||
}
|
||||
}
|
||||
errdefer dev.dispatch.destroyImage(dev.device, image, null);
|
||||
|
||||
// ---- 2. VkDeviceMemory --------------------------------------
|
||||
var reqs: vk.VkMemoryRequirements = undefined;
|
||||
dev.dispatch.getImageMemoryRequirements(dev.device, image, &reqs);
|
||||
|
||||
const memory_type_index = dev.findMemoryType(
|
||||
reqs.memoryTypeBits,
|
||||
vk.VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
|
||||
) orelse {
|
||||
log.err(
|
||||
"no DEVICE_LOCAL memory type found for image (typeBits=0x{x})",
|
||||
.{reqs.memoryTypeBits},
|
||||
);
|
||||
return error.NoSuitableMemoryType;
|
||||
};
|
||||
|
||||
const alloc_info: vk.VkMemoryAllocateInfo = .{
|
||||
.sType = vk.VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
|
||||
.pNext = null,
|
||||
.allocationSize = reqs.size,
|
||||
.memoryTypeIndex = memory_type_index,
|
||||
};
|
||||
var memory: vk.VkDeviceMemory = undefined;
|
||||
{
|
||||
const r = dev.dispatch.allocateMemory(dev.device, &alloc_info, null, &memory);
|
||||
if (r != vk.VK_SUCCESS) {
|
||||
log.err("vkAllocateMemory failed: result={}", .{r});
|
||||
return error.VulkanFailed;
|
||||
}
|
||||
}
|
||||
errdefer dev.dispatch.freeMemory(dev.device, memory, null);
|
||||
|
||||
{
|
||||
const r = dev.dispatch.bindImageMemory(dev.device, image, memory, 0);
|
||||
if (r != vk.VK_SUCCESS) {
|
||||
log.err("vkBindImageMemory failed: result={}", .{r});
|
||||
return error.VulkanFailed;
|
||||
}
|
||||
}
|
||||
|
||||
// ---- 3. VkImageView -----------------------------------------
|
||||
const view_info: vk.VkImageViewCreateInfo = .{
|
||||
.sType = vk.VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
|
||||
.pNext = null,
|
||||
.flags = 0,
|
||||
.image = image,
|
||||
.viewType = vk.VK_IMAGE_VIEW_TYPE_2D,
|
||||
.format = opts.format,
|
||||
.components = .{
|
||||
.r = vk.VK_COMPONENT_SWIZZLE_IDENTITY,
|
||||
.g = vk.VK_COMPONENT_SWIZZLE_IDENTITY,
|
||||
.b = vk.VK_COMPONENT_SWIZZLE_IDENTITY,
|
||||
.a = vk.VK_COMPONENT_SWIZZLE_IDENTITY,
|
||||
},
|
||||
.subresourceRange = .{
|
||||
.aspectMask = opts.aspect,
|
||||
.baseMipLevel = 0,
|
||||
.levelCount = 1,
|
||||
.baseArrayLayer = 0,
|
||||
.layerCount = 1,
|
||||
},
|
||||
};
|
||||
var view: vk.VkImageView = undefined;
|
||||
{
|
||||
const r = dev.dispatch.createImageView(dev.device, &view_info, null, &view);
|
||||
if (r != vk.VK_SUCCESS) {
|
||||
log.err("vkCreateImageView failed: result={}", .{r});
|
||||
return error.VulkanFailed;
|
||||
}
|
||||
}
|
||||
errdefer dev.dispatch.destroyImageView(dev.device, view, null);
|
||||
|
||||
var self: Self = .{
|
||||
.image = image,
|
||||
.memory = memory,
|
||||
.view = view,
|
||||
.format = opts.format,
|
||||
.aspect = opts.aspect,
|
||||
.width = width,
|
||||
.height = height,
|
||||
.device = dev,
|
||||
};
|
||||
|
||||
if (data) |d| try self.replaceRegion(0, 0, width, height, d);
|
||||
return self;
|
||||
}
|
||||
|
||||
pub fn deinit(self: Self) void {
|
||||
const dev = self.device;
|
||||
dev.dispatch.destroyImageView(dev.device, self.view, null);
|
||||
dev.dispatch.destroyImage(dev.device, self.image, null);
|
||||
dev.dispatch.freeMemory(dev.device, self.memory, null);
|
||||
}
|
||||
|
||||
/// Replace a region of the texture with the provided data. Performs:
|
||||
/// 1. Allocate a host-coherent staging buffer holding `data`.
|
||||
/// 2. One-shot command buffer:
|
||||
/// a. Barrier: current layout → TRANSFER_DST_OPTIMAL.
|
||||
/// b. `vkCmdCopyBufferToImage`.
|
||||
/// c. Barrier: TRANSFER_DST_OPTIMAL → SHADER_READ_ONLY_OPTIMAL.
|
||||
/// 3. Submit + `vkQueueWaitIdle`.
|
||||
/// 4. Free staging buffer + command pool.
|
||||
///
|
||||
/// On success, `self.layout` is `SHADER_READ_ONLY_OPTIMAL`.
|
||||
pub fn replaceRegion(
|
||||
self: *Self,
|
||||
x: usize,
|
||||
y: usize,
|
||||
width: usize,
|
||||
height: usize,
|
||||
data: []const u8,
|
||||
) Error!void {
|
||||
// Empty-data / zero-region call: full no-op (does NOT transition
|
||||
// the image layout). Callers passing nothing-to-upload are
|
||||
// saying just that; transitioning anyway would issue a one-shot
|
||||
// command-buffer + queueWaitIdle for no reason and would surprise
|
||||
// a caller relying on the texture's current layout being
|
||||
// preserved. If a caller ever needs a layout-only transition,
|
||||
// add a separate `transitionToShaderRead` API rather than
|
||||
// overloading replaceRegion's empty-data path.
|
||||
if (data.len == 0 or width == 0 or height == 0) return;
|
||||
const dev = self.device;
|
||||
|
||||
// ---- staging buffer -----------------------------------------
|
||||
var staging = try bufferpkg.Buffer(u8).initFill(.{
|
||||
.device = dev,
|
||||
.usage = vk.VK_BUFFER_USAGE_TRANSFER_SRC_BIT,
|
||||
}, data);
|
||||
// `destroyImmediate` instead of `deinit`: replaceRegion runs
|
||||
// synchronously on the calling thread (typically the main /
|
||||
// app-init thread, NOT the renderer thread), and
|
||||
// `OneShot.endAndSubmit` below calls `vkQueueWaitIdle` so the
|
||||
// staging buffer is provably done with the GPU before this
|
||||
// defer fires. Routing it into `Vulkan.buffer_pool` from a
|
||||
// non-renderer thread would leak it forever — the pool's
|
||||
// `cycle()` runs only on the renderer thread.
|
||||
defer staging.destroyImmediate();
|
||||
|
||||
// ---- command pool (one-shot) --------------------------------
|
||||
var pool = try CommandPool.init(dev);
|
||||
defer pool.deinit();
|
||||
const session = try pool.beginOneShot();
|
||||
|
||||
// ---- barrier: current → TRANSFER_DST_OPTIMAL ----------------
|
||||
const old_layout = self.layout;
|
||||
const src_access: vk.VkAccessFlags = switch (old_layout) {
|
||||
vk.VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL => vk.VK_ACCESS_SHADER_READ_BIT,
|
||||
else => 0,
|
||||
};
|
||||
const src_stage: vk.VkPipelineStageFlags = switch (old_layout) {
|
||||
vk.VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL => vk.VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT,
|
||||
else => vk.VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
|
||||
};
|
||||
{
|
||||
const barrier: vk.VkImageMemoryBarrier = .{
|
||||
.sType = vk.VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
|
||||
.pNext = null,
|
||||
.srcAccessMask = src_access,
|
||||
.dstAccessMask = vk.VK_ACCESS_TRANSFER_WRITE_BIT,
|
||||
.oldLayout = old_layout,
|
||||
.newLayout = vk.VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
|
||||
.srcQueueFamilyIndex = vk.VK_QUEUE_FAMILY_IGNORED,
|
||||
.dstQueueFamilyIndex = vk.VK_QUEUE_FAMILY_IGNORED,
|
||||
.image = self.image,
|
||||
.subresourceRange = .{
|
||||
.aspectMask = self.aspect,
|
||||
.baseMipLevel = 0,
|
||||
.levelCount = 1,
|
||||
.baseArrayLayer = 0,
|
||||
.layerCount = 1,
|
||||
},
|
||||
};
|
||||
dev.dispatch.cmdPipelineBarrier(
|
||||
session.cb,
|
||||
src_stage,
|
||||
vk.VK_PIPELINE_STAGE_TRANSFER_BIT,
|
||||
0, // dependencyFlags
|
||||
0,
|
||||
null, // memory barriers
|
||||
0,
|
||||
null, // buffer memory barriers
|
||||
1,
|
||||
&barrier,
|
||||
);
|
||||
}
|
||||
|
||||
// ---- vkCmdCopyBufferToImage ---------------------------------
|
||||
{
|
||||
const region: vk.VkBufferImageCopy = .{
|
||||
.bufferOffset = 0,
|
||||
.bufferRowLength = 0, // tightly packed
|
||||
.bufferImageHeight = 0,
|
||||
.imageSubresource = .{
|
||||
.aspectMask = self.aspect,
|
||||
.mipLevel = 0,
|
||||
.baseArrayLayer = 0,
|
||||
.layerCount = 1,
|
||||
},
|
||||
.imageOffset = .{
|
||||
.x = @intCast(x),
|
||||
.y = @intCast(y),
|
||||
.z = 0,
|
||||
},
|
||||
.imageExtent = .{
|
||||
.width = @intCast(width),
|
||||
.height = @intCast(height),
|
||||
.depth = 1,
|
||||
},
|
||||
};
|
||||
dev.dispatch.cmdCopyBufferToImage(
|
||||
session.cb,
|
||||
staging.buffer,
|
||||
self.image,
|
||||
vk.VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
|
||||
1,
|
||||
®ion,
|
||||
);
|
||||
}
|
||||
|
||||
// ---- barrier: TRANSFER_DST → SHADER_READ_ONLY ---------------
|
||||
{
|
||||
const barrier: vk.VkImageMemoryBarrier = .{
|
||||
.sType = vk.VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
|
||||
.pNext = null,
|
||||
.srcAccessMask = vk.VK_ACCESS_TRANSFER_WRITE_BIT,
|
||||
.dstAccessMask = vk.VK_ACCESS_SHADER_READ_BIT,
|
||||
.oldLayout = vk.VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
|
||||
.newLayout = vk.VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
|
||||
.srcQueueFamilyIndex = vk.VK_QUEUE_FAMILY_IGNORED,
|
||||
.dstQueueFamilyIndex = vk.VK_QUEUE_FAMILY_IGNORED,
|
||||
.image = self.image,
|
||||
.subresourceRange = .{
|
||||
.aspectMask = self.aspect,
|
||||
.baseMipLevel = 0,
|
||||
.levelCount = 1,
|
||||
.baseArrayLayer = 0,
|
||||
.layerCount = 1,
|
||||
},
|
||||
};
|
||||
dev.dispatch.cmdPipelineBarrier(
|
||||
session.cb,
|
||||
vk.VK_PIPELINE_STAGE_TRANSFER_BIT,
|
||||
vk.VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT,
|
||||
0,
|
||||
0,
|
||||
null,
|
||||
0,
|
||||
null,
|
||||
1,
|
||||
&barrier,
|
||||
);
|
||||
}
|
||||
|
||||
try session.endAndSubmit();
|
||||
self.layout = vk.VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
|
||||
}
|
||||
|
||||
test {
|
||||
std.testing.refAllDecls(@This());
|
||||
}
|
||||
|
|
@ -0,0 +1,232 @@
|
|||
//! Per-renderer-thread Vulkan state. Lifecycle:
|
||||
//!
|
||||
//! - first `Vulkan.beginFrame` on a thread → `ensureInit(dev)`
|
||||
//! lazily creates a `CommandPool`, a single command buffer
|
||||
//! allocated from it, a fence (created signaled), and a
|
||||
//! `DescriptorPool` sized for one frame's worst-case usage.
|
||||
//! All four are reused across frames; only the descriptor
|
||||
//! pool is reset every frame.
|
||||
//! - `Vulkan.deinit` on a surface (one per renderer thread) →
|
||||
//! `cleanup(dev)` waits the per-thread fence, frees CB,
|
||||
//! destroys pool + fence, drops the cached `last_target`
|
||||
//! pointer, and drains the per-thread `buffer_pool` pending
|
||||
//! list (which is bounded by the same fence we just waited).
|
||||
//!
|
||||
//! Why threadlocal? Splits/tabs share the host's process-wide
|
||||
//! `VkDevice`, but each renderer thread submits independently and
|
||||
//! its fence-paced single-frame-in-flight model needs its own
|
||||
//! fence + command buffer to avoid stomping the previous frame's
|
||||
//! still-in-flight work. Threadlocal also matches the lifetime of
|
||||
//! the buffer-pool's per-thread `pending` list (both are bounded
|
||||
//! by the same `Frame.complete` fence wait).
|
||||
//!
|
||||
//! `last_target` lives here too because it's logically per-thread:
|
||||
//! `presentLastTarget` re-presents whatever the renderer thread
|
||||
//! handed to `present` last, and pointing at another thread's
|
||||
//! target would route a different surface's frames to this
|
||||
//! thread's window.
|
||||
|
||||
const std = @import("std");
|
||||
const vulkan = @import("vulkan");
|
||||
const vk = vulkan.c;
|
||||
|
||||
const Device = vulkan.Device;
|
||||
const CommandPool = vulkan.CommandPool;
|
||||
const DescriptorPool = vulkan.DescriptorPool;
|
||||
const Target = @import("Target.zig");
|
||||
const buffer_pool = @import("buffer_pool.zig");
|
||||
|
||||
const log = std.log.scoped(.vulkan);
|
||||
|
||||
/// Caps for the per-frame `step_pool`. Sized for the worst pass
|
||||
/// shape (kitty image with N placements + the post pipelines): one
|
||||
/// set per (image_step × MAX_DESCRIPTOR_SETS) plus a handful of
|
||||
/// the renderer's other pipelines stepped once each. 256 is generous
|
||||
/// — actual frames stabilize well under that. If a frame ever
|
||||
/// exhausts the pool, `RenderPass.step` falls back to the pipeline's
|
||||
/// static set with a warning logged.
|
||||
pub const STEP_POOL_MAX_SETS: u32 = 256;
|
||||
pub const STEP_POOL_UNIFORM_BUFFERS: u32 = 256;
|
||||
pub const STEP_POOL_COMBINED_IMAGE_SAMPLERS: u32 = 256;
|
||||
pub const STEP_POOL_STORAGE_BUFFERS: u32 = 256;
|
||||
|
||||
pub const Error = error{
|
||||
/// `vkAllocateCommandBuffers` / `vkCreateFence` returned a
|
||||
/// non-success status. Wrapped here so the lazy-init path in
|
||||
/// `ensureInit` can surface a single error type to callers.
|
||||
VulkanFailed,
|
||||
/// `DescriptorPool.init` rejected the caps we passed it (e.g.
|
||||
/// max_sets == 0). Surfaces here so callers' error set matches.
|
||||
InvalidPoolConfig,
|
||||
} || std.mem.Allocator.Error;
|
||||
|
||||
/// Most recently presented target, used by `presentLastTarget` when
|
||||
/// the renderer decides nothing new needs drawing. Stored as a
|
||||
/// POINTER (not a value copy) into the FrameState's `target` slot
|
||||
/// so it follows the target through a resize: `frame.resize` calls
|
||||
/// `target.deinit()` on the old Target and overwrites the slot with
|
||||
/// a new one — a value copy would now reference a closed fd and
|
||||
/// freed VkImage/VkBuffer/VkDeviceMemory handles, and Qt's mmap on
|
||||
/// the closed fd could read whatever a later open() recycled the fd
|
||||
/// for. Following the pointer instead always re-presents the
|
||||
/// currently-live target.
|
||||
pub threadlocal var last_target: ?*Target = null;
|
||||
|
||||
/// Per-surface (per-thread) command pool used for the frame's
|
||||
/// command buffer. Lazily created in `ensureInit` on the first call;
|
||||
/// destroyed in `cleanup`.
|
||||
pub threadlocal var frame_pool: ?CommandPool = null;
|
||||
|
||||
/// The single command buffer allocated from `frame_pool` and reused
|
||||
/// across frames. `vkResetCommandBuffer` is called at the start of
|
||||
/// each `beginFrameReset` to clear prior recording.
|
||||
pub threadlocal var frame_cb: vk.VkCommandBuffer = null;
|
||||
|
||||
/// Fence signaled when each frame's submit completes. Caller waits
|
||||
/// on it in `Frame.complete` before handing the target dmabuf to
|
||||
/// the host.
|
||||
pub threadlocal var frame_fence: vk.VkFence = null;
|
||||
|
||||
/// Per-thread descriptor pool used by `RenderPass.step` to allocate
|
||||
/// fresh descriptor sets when the same pipeline is bound more than
|
||||
/// once in a single pass (vkCmdDraw reads descriptors at submit
|
||||
/// time, so re-using the pipeline's static set would silently
|
||||
/// corrupt prior draws). Reset at the start of every
|
||||
/// `beginFrameReset` so this frame's allocations don't pile on the
|
||||
/// previous frame's; the per-pass usage is bounded by a small
|
||||
/// constant — see the `STEP_POOL_*` caps above.
|
||||
pub threadlocal var step_pool: ?DescriptorPool = null;
|
||||
|
||||
/// Lazy per-thread resource init. The first call on a renderer
|
||||
/// thread sets up the command pool + buffer + fence + descriptor
|
||||
/// pool that get reused for every subsequent frame. Subsequent
|
||||
/// calls are no-ops.
|
||||
///
|
||||
/// Failure-mode contract: on error the threadlocal state is rolled
|
||||
/// back to its pre-call values so the next `ensureInit` retries
|
||||
/// cleanly. Without rollback, a partial failure would leave e.g.
|
||||
/// `frame_pool != null and frame_cb == null`, and the next call's
|
||||
/// `if (frame_pool == null)` guard would skip re-init — locking the
|
||||
/// thread out of the renderer permanently.
|
||||
pub fn ensureInit(dev: *const Device) Error!void {
|
||||
if (frame_pool == null) {
|
||||
// Stage everything into locals; only commit to threadlocals
|
||||
// after every step succeeds. errdefers chain rollback.
|
||||
var pool = try CommandPool.init(dev);
|
||||
errdefer pool.deinit();
|
||||
|
||||
const alloc_info: vk.VkCommandBufferAllocateInfo = .{
|
||||
.sType = vk.VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
|
||||
.pNext = null,
|
||||
.commandPool = pool.pool,
|
||||
.level = vk.VK_COMMAND_BUFFER_LEVEL_PRIMARY,
|
||||
.commandBufferCount = 1,
|
||||
};
|
||||
var cb: vk.VkCommandBuffer = null;
|
||||
if (dev.dispatch.allocateCommandBuffers(dev.device, &alloc_info, &cb) != vk.VK_SUCCESS)
|
||||
return error.VulkanFailed;
|
||||
errdefer dev.dispatch.freeCommandBuffers(dev.device, pool.pool, 1, &cb);
|
||||
|
||||
const fence_info: vk.VkFenceCreateInfo = .{
|
||||
.sType = vk.VK_STRUCTURE_TYPE_FENCE_CREATE_INFO,
|
||||
.pNext = null,
|
||||
// Created signaled so the very first `Frame.complete`
|
||||
// doesn't try to reset an unsignaled fence.
|
||||
.flags = vk.VK_FENCE_CREATE_SIGNALED_BIT,
|
||||
};
|
||||
var fence: vk.VkFence = null;
|
||||
if (dev.dispatch.createFence(dev.device, &fence_info, null, &fence) != vk.VK_SUCCESS)
|
||||
return error.VulkanFailed;
|
||||
// No errdefer for fence — past this point all three threadlocals
|
||||
// are about to be set together, atomically from the caller's
|
||||
// perspective, so any later error in this function is impossible.
|
||||
// (`if (step_pool == null)` is a separate block.)
|
||||
|
||||
frame_pool = pool;
|
||||
frame_cb = cb;
|
||||
frame_fence = fence;
|
||||
}
|
||||
if (step_pool == null) {
|
||||
// Independent of the frame_pool/cb/fence triple — its own
|
||||
// failure leaves those committed and only step_pool null,
|
||||
// which the next ensureInit() call retries correctly.
|
||||
step_pool = try DescriptorPool.init(.{
|
||||
.device = dev,
|
||||
.max_sets = STEP_POOL_MAX_SETS,
|
||||
.uniform_buffers = STEP_POOL_UNIFORM_BUFFERS,
|
||||
.combined_image_samplers = STEP_POOL_COMBINED_IMAGE_SAMPLERS,
|
||||
.storage_buffers = STEP_POOL_STORAGE_BUFFERS,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
/// Reset per-frame state at the start of `beginFrame`. Caller is
|
||||
/// responsible for installing an `errdefer` re-signal of the fence
|
||||
/// so a failure here doesn't hang the next `Vulkan.deinit` on
|
||||
/// `waitForFences(UINT64_MAX)` — see the comment in
|
||||
/// `Vulkan.beginFrame` for the full rationale.
|
||||
pub fn beginFrameReset(dev: *const Device) error{VulkanFailed}!void {
|
||||
if (dev.dispatch.resetCommandBuffer(frame_cb, 0) != vk.VK_SUCCESS)
|
||||
return error.VulkanFailed;
|
||||
if (step_pool) |*p| {
|
||||
if (dev.dispatch.resetDescriptorPool(dev.device, p.pool, 0) != vk.VK_SUCCESS)
|
||||
return error.VulkanFailed;
|
||||
}
|
||||
if (dev.dispatch.resetFences(dev.device, 1, &frame_fence) != vk.VK_SUCCESS)
|
||||
return error.VulkanFailed;
|
||||
}
|
||||
|
||||
/// Tear down THIS thread's state. Called from `Vulkan.deinit` on
|
||||
/// each surface. Waits the per-thread fence (covers any in-flight
|
||||
/// submit), then destroys the fence, frees the command buffer,
|
||||
/// destroys the pools, drains the per-thread `buffer_pool` pending
|
||||
/// list (bounded by the same fence wait), and clears `last_target`.
|
||||
///
|
||||
/// Per-surface teardown only needs THIS surface's submissions to be
|
||||
/// done — block on this thread's frame fence (if it exists) instead
|
||||
/// of `vkDeviceWaitIdle` on the shared device, which would stall
|
||||
/// every other tab/split's in-flight GPU work just to close one.
|
||||
/// The final-refcount path in `Vulkan.deinit` does the device-wide
|
||||
/// waitIdle.
|
||||
pub fn cleanup(dev: *const Device) void {
|
||||
if (frame_fence != null) {
|
||||
const wait_r = dev.dispatch.waitForFences(
|
||||
dev.device,
|
||||
1,
|
||||
&frame_fence,
|
||||
vk.VK_TRUE,
|
||||
std.math.maxInt(u64),
|
||||
);
|
||||
if (wait_r != vk.VK_SUCCESS) {
|
||||
log.warn(
|
||||
"ThreadState.cleanup: vkWaitForFences returned {}, falling back to device-wide wait",
|
||||
.{wait_r},
|
||||
);
|
||||
dev.waitIdle();
|
||||
}
|
||||
dev.dispatch.destroyFence(dev.device, frame_fence, null);
|
||||
frame_fence = null;
|
||||
}
|
||||
if (frame_pool != null and frame_cb != null) {
|
||||
dev.dispatch.freeCommandBuffers(dev.device, frame_pool.?.pool, 1, &frame_cb);
|
||||
frame_cb = null;
|
||||
}
|
||||
if (frame_pool) |*p| {
|
||||
p.deinit();
|
||||
frame_pool = null;
|
||||
}
|
||||
if (step_pool) |*p| {
|
||||
p.deinit();
|
||||
step_pool = null;
|
||||
}
|
||||
// Drain THIS thread's pending buffer-pool entries. The
|
||||
// frame-fence wait above proved the GPU is done with them,
|
||||
// and we have to do this from THIS thread because the
|
||||
// pending list is in this thread's threadlocal storage —
|
||||
// the final-refcount drainShared can't reach it.
|
||||
buffer_pool.drainSelf(dev);
|
||||
// `last_target` is a borrow into this thread's FrameState
|
||||
// target slot. The SwapChain teardown destroys the target;
|
||||
// we just drop our reference.
|
||||
last_target = null;
|
||||
}
|
||||
|
|
@ -0,0 +1,352 @@
|
|||
//! Host-coherent `VkBuffer` wrapper, generic over element type.
|
||||
//!
|
||||
//! Mirrors `src/renderer/opengl/buffer.zig`: `Buffer(T)` returns a
|
||||
//! struct that holds one buffer's worth of `T`s, with init / initFill
|
||||
//! / sync / syncFromArrayLists semantics that match the OpenGL
|
||||
//! contract.
|
||||
//!
|
||||
//! Storage strategy: `HOST_VISIBLE | HOST_COHERENT` memory.
|
||||
//! - HOST_VISIBLE lets us `vkMapMemory` the buffer and write directly.
|
||||
//! - HOST_COHERENT means the writes are visible to the GPU without a
|
||||
//! `vkFlushMappedMemoryRanges` round-trip.
|
||||
//! - This is the simplest "dynamic" buffer pattern in Vulkan. It does
|
||||
//! pay a small cost over device-local + staging on discrete GPUs,
|
||||
//! but the renderer's per-frame buffer payloads are KBs (cell
|
||||
//! instances + uniforms), not bandwidth-bound. The OpenGL backend
|
||||
//! uses `dynamic_draw` for the same buffers, which behaves
|
||||
//! similarly on most drivers.
|
||||
//!
|
||||
//! Growth policy: matches the OpenGL backend — `sync` doubles the
|
||||
//! buffer when content outgrows it, with no shrink. The buffer is
|
||||
//! recreated (destroy/create) on growth because Vulkan buffers are
|
||||
//! immutable in size.
|
||||
|
||||
const std = @import("std");
|
||||
const Allocator = std.mem.Allocator;
|
||||
const vulkan = @import("vulkan");
|
||||
const vk = vulkan.c;
|
||||
|
||||
const Device = vulkan.Device;
|
||||
|
||||
const log = std.log.scoped(.vulkan);
|
||||
|
||||
/// Buffer construction parameters. The OpenGL backend's `target` /
|
||||
/// `usage` enums don't map to Vulkan — `target` (vertex vs element
|
||||
/// binding point) is replaced by descriptor binding at draw time, and
|
||||
/// `usage` (static_draw / dynamic_draw / etc.) is implicit in our
|
||||
/// host-coherent allocation strategy. What's left is the Vulkan
|
||||
/// `VkBufferUsageFlags` bitmask, which the renderer's `api.*BufferOptions`
|
||||
/// methods will return differently per buffer kind (VERTEX_BUFFER_BIT
|
||||
/// for instance buffers, UNIFORM_BUFFER_BIT for uniforms, etc.).
|
||||
pub const Options = struct {
|
||||
device: *const Device,
|
||||
/// `VkBufferUsageFlagBits` for the buffer.
|
||||
usage: vk.VkBufferUsageFlags,
|
||||
};
|
||||
|
||||
pub const Error = error{
|
||||
/// A `vkCreate*` / `vkAllocateMemory` / `vkBindBufferMemory` /
|
||||
/// `vkMapMemory` returned a non-success status.
|
||||
VulkanFailed,
|
||||
/// `Device.findMemoryType` couldn't find a `HOST_VISIBLE | HOST_COHERENT`
|
||||
/// memory type matching the buffer's requirements. Unlikely on any
|
||||
/// real driver but worth flagging distinctly.
|
||||
NoSuitableMemoryType,
|
||||
};
|
||||
|
||||
/// `Buffer(T)`: a `VkBuffer` + backing `VkDeviceMemory` typed to hold
|
||||
/// some number of `T`s. Mirrors `opengl/buffer.zig`'s `Buffer(T)` so
|
||||
/// the renderer's call sites don't need a per-backend branch.
|
||||
pub fn Buffer(comptime T: type) type {
|
||||
return struct {
|
||||
const Self = @This();
|
||||
|
||||
/// Underlying `VkBuffer` handle.
|
||||
buffer: vk.VkBuffer,
|
||||
/// Backing memory. Host-coherent; mappable directly.
|
||||
memory: vk.VkDeviceMemory,
|
||||
/// Options this buffer was allocated with.
|
||||
opts: Options,
|
||||
/// Current capacity, in number of `T`s.
|
||||
len: usize,
|
||||
|
||||
/// Initialize a buffer with capacity for `len` `T`s. Contents
|
||||
/// are uninitialized; call `sync` to populate.
|
||||
pub fn init(opts: Options, len: usize) Error!Self {
|
||||
return try create(opts, len);
|
||||
}
|
||||
|
||||
/// Initialize a buffer pre-filled with the provided data.
|
||||
pub fn initFill(opts: Options, data: []const T) Error!Self {
|
||||
var self = try create(opts, data.len);
|
||||
errdefer self.deinit();
|
||||
try self.write(0, data);
|
||||
return self;
|
||||
}
|
||||
|
||||
/// Hand the (VkBuffer, VkDeviceMemory) pair back to the
|
||||
/// process-wide pool. The pool (see `Vulkan.buffer_pool`)
|
||||
/// holds the entry until the current frame's fence has
|
||||
/// signaled (the GPU is done with our recorded references)
|
||||
/// and then makes it available to a future `Buffer.create`
|
||||
/// call. Returning to the pool solves both:
|
||||
/// - `renderer/image.zig:draw`'s `defer buf.deinit()` no
|
||||
/// longer use-after-frees the in-flight buffer.
|
||||
/// - It avoids the per-frame allocation thrash that
|
||||
/// drove the driver to SIGSEGV on image-heavy frames.
|
||||
///
|
||||
/// MUST be called only from the renderer thread (the path
|
||||
/// whose fence will eventually retire references to this
|
||||
/// buffer in `Frame.complete`). One-shot uploads (atlas
|
||||
/// staging buffers, etc.) that already block on
|
||||
/// `vkQueueWaitIdle` post-submit must use
|
||||
/// `destroyImmediate` instead — they don't share the
|
||||
/// renderer thread's fence cycle.
|
||||
pub fn deinit(self: Self) void {
|
||||
const dev = self.opts.device;
|
||||
const bp = @import("../Vulkan.zig").buffer_pool;
|
||||
const capacity_bytes: u64 = @as(u64, self.len) * @sizeOf(T);
|
||||
bp.release(
|
||||
dev,
|
||||
self.buffer,
|
||||
self.memory,
|
||||
self.opts.usage,
|
||||
capacity_bytes,
|
||||
) catch |err| {
|
||||
// OOM growing the pool. The buffer may still be
|
||||
// referenced by an in-flight command buffer, so we
|
||||
// wait the entire device idle before destroying —
|
||||
// expensive but correct.
|
||||
log.warn(
|
||||
"Buffer.deinit: pool release failed ({}); falling " ++
|
||||
"back to vkDeviceWaitIdle + destroy",
|
||||
.{err},
|
||||
);
|
||||
_ = dev.dispatch.deviceWaitIdle(dev.device);
|
||||
dev.dispatch.destroyBuffer(dev.device, self.buffer, null);
|
||||
dev.dispatch.freeMemory(dev.device, self.memory, null);
|
||||
};
|
||||
}
|
||||
|
||||
/// Destroy the buffer immediately, bypassing the recycle
|
||||
/// pool. The caller MUST ensure no in-flight command buffer
|
||||
/// references this buffer (e.g. by having waited on a fence
|
||||
/// or `vkQueueWaitIdle` covering its submission).
|
||||
///
|
||||
/// Used by short-lived staging buffers like
|
||||
/// `Texture.replaceRegion` whose lifetime is bounded by a
|
||||
/// `OneShot.endAndSubmit` that already drains the queue;
|
||||
/// stuffing those into the pool from a non-renderer thread
|
||||
/// would leak them (the renderer thread's `cycle` runs the
|
||||
/// pool, so an upload thread's pushes never get reused).
|
||||
pub fn destroyImmediate(self: Self) void {
|
||||
const dev = self.opts.device;
|
||||
dev.dispatch.destroyBuffer(dev.device, self.buffer, null);
|
||||
dev.dispatch.freeMemory(dev.device, self.memory, null);
|
||||
}
|
||||
|
||||
/// Replace the buffer's contents. Grows (doubles) if needed —
|
||||
/// matches the OpenGL backend's behavior. Data shorter than
|
||||
/// the current capacity leaves the trailing slots untouched.
|
||||
pub fn sync(self: *Self, data: []const T) Error!void {
|
||||
if (data.len > self.len) try self.grow(data.len * 2);
|
||||
try self.write(0, data);
|
||||
}
|
||||
|
||||
/// Like `sync` but pulls from multiple `ArrayList`s in
|
||||
/// sequence; returns the total number of elements written.
|
||||
pub fn syncFromArrayLists(
|
||||
self: *Self,
|
||||
lists: []const std.ArrayListUnmanaged(T),
|
||||
) Error!usize {
|
||||
var total: usize = 0;
|
||||
for (lists) |list| total += list.items.len;
|
||||
|
||||
if (total > self.len) try self.grow(total * 2);
|
||||
|
||||
var off: usize = 0;
|
||||
for (lists) |list| {
|
||||
if (list.items.len == 0) continue;
|
||||
try self.write(off, list.items);
|
||||
off += list.items.len;
|
||||
}
|
||||
return total;
|
||||
}
|
||||
|
||||
// ---- internals -------------------------------------------
|
||||
|
||||
fn create(opts: Options, len: usize) Error!Self {
|
||||
const dev = opts.device;
|
||||
// Vulkan requires `size > 0` for buffer creation. Round up
|
||||
// a zero request to 1 so the buffer exists and can be
|
||||
// grown later via `sync`. (OpenGL silently accepts size=0.)
|
||||
//
|
||||
// Compute byte size in u64 to avoid the usize multiply
|
||||
// overflowing on 32-bit hosts (or, theoretically, on a
|
||||
// 64-bit host with `len` near `maxInt(usize)/@sizeOf(T)`,
|
||||
// though that's astronomical for any real renderer
|
||||
// payload). `std.math.mul` returns `error.Overflow` on
|
||||
// overflow; map that onto `error.VulkanFailed` since the
|
||||
// request is unservicable — Vulkan can't allocate a
|
||||
// buffer that big regardless of why we computed it.
|
||||
const len_u64: u64 = @intCast(len);
|
||||
const byte_size_raw = std.math.mul(u64, len_u64, @sizeOf(T)) catch
|
||||
return error.VulkanFailed;
|
||||
const byte_size: u64 = @max(1, byte_size_raw);
|
||||
|
||||
// Reach into the buffer pool first — a previous frame's
|
||||
// released VkBuffer of matching usage+capacity is safe to
|
||||
// reuse, no allocator round trip needed. Image-draw
|
||||
// frames stabilize at ~hundreds of pool entries per
|
||||
// (usage, size) bucket.
|
||||
const bp = @import("../Vulkan.zig").buffer_pool;
|
||||
if (bp.acquire(opts.usage, byte_size)) |entry| {
|
||||
return .{
|
||||
.buffer = entry.buffer,
|
||||
.memory = entry.memory,
|
||||
.opts = opts,
|
||||
.len = @intCast(entry.capacity / @sizeOf(T)),
|
||||
};
|
||||
}
|
||||
|
||||
const info: vk.VkBufferCreateInfo = .{
|
||||
.sType = vk.VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
|
||||
.pNext = null,
|
||||
.flags = 0,
|
||||
.size = byte_size,
|
||||
.usage = opts.usage,
|
||||
.sharingMode = vk.VK_SHARING_MODE_EXCLUSIVE,
|
||||
.queueFamilyIndexCount = 0,
|
||||
.pQueueFamilyIndices = null,
|
||||
};
|
||||
var buffer: vk.VkBuffer = undefined;
|
||||
{
|
||||
const r = dev.dispatch.createBuffer(dev.device, &info, null, &buffer);
|
||||
if (r != vk.VK_SUCCESS) {
|
||||
log.err("vkCreateBuffer failed: result={}", .{r});
|
||||
return error.VulkanFailed;
|
||||
}
|
||||
}
|
||||
errdefer dev.dispatch.destroyBuffer(dev.device, buffer, null);
|
||||
|
||||
var reqs: vk.VkMemoryRequirements = undefined;
|
||||
dev.dispatch.getBufferMemoryRequirements(dev.device, buffer, &reqs);
|
||||
|
||||
const type_index = dev.findMemoryType(
|
||||
reqs.memoryTypeBits,
|
||||
vk.VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
|
||||
vk.VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
|
||||
) orelse {
|
||||
log.err(
|
||||
"no HOST_VISIBLE|HOST_COHERENT memory type for buffer (typeBits=0x{x})",
|
||||
.{reqs.memoryTypeBits},
|
||||
);
|
||||
return error.NoSuitableMemoryType;
|
||||
};
|
||||
|
||||
const alloc_info: vk.VkMemoryAllocateInfo = .{
|
||||
.sType = vk.VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
|
||||
.pNext = null,
|
||||
.allocationSize = reqs.size,
|
||||
.memoryTypeIndex = type_index,
|
||||
};
|
||||
var memory: vk.VkDeviceMemory = undefined;
|
||||
{
|
||||
const r = dev.dispatch.allocateMemory(dev.device, &alloc_info, null, &memory);
|
||||
if (r != vk.VK_SUCCESS) {
|
||||
log.err("vkAllocateMemory (buffer) failed: result={}", .{r});
|
||||
return error.VulkanFailed;
|
||||
}
|
||||
}
|
||||
errdefer dev.dispatch.freeMemory(dev.device, memory, null);
|
||||
|
||||
{
|
||||
const r = dev.dispatch.bindBufferMemory(dev.device, buffer, memory, 0);
|
||||
if (r != vk.VK_SUCCESS) {
|
||||
log.err("vkBindBufferMemory failed: result={}", .{r});
|
||||
return error.VulkanFailed;
|
||||
}
|
||||
}
|
||||
|
||||
return .{
|
||||
.buffer = buffer,
|
||||
.memory = memory,
|
||||
.opts = opts,
|
||||
.len = len,
|
||||
};
|
||||
}
|
||||
|
||||
/// Grow the buffer to hold at least `new_len` Ts. Vulkan
|
||||
/// buffers are immutable in size, so we allocate a fresh
|
||||
/// one and then route the old one through the recycle pool
|
||||
/// (it may still be referenced by the in-flight command
|
||||
/// buffer — destroying it directly would race the GPU same
|
||||
/// as `deinit` would). Contents are discarded; callers
|
||||
/// always `sync` immediately after `grow` returns.
|
||||
///
|
||||
/// Order is critical: `create` first, `release` second.
|
||||
/// If we released the old buffer first and `create`
|
||||
/// failed, `self.{buffer,memory}` would be left dangling
|
||||
/// at freed handles, and the caller's eventual
|
||||
/// `self.deinit()` would double-destroy via the pool.
|
||||
fn grow(self: *Self, new_len: usize) Error!void {
|
||||
const dev = self.opts.device;
|
||||
const replacement = try create(self.opts, new_len);
|
||||
// From here on `self.{buffer,memory}` are the OLD pair;
|
||||
// release them. If `release` itself OOMs, we have to
|
||||
// destroy directly (same fallback as `deinit`), but the
|
||||
// new pair is already constructed and `self.* =
|
||||
// replacement` will reach a healthy state regardless.
|
||||
const bp = @import("../Vulkan.zig").buffer_pool;
|
||||
const capacity_bytes: u64 = @as(u64, self.len) * @sizeOf(T);
|
||||
bp.release(
|
||||
dev,
|
||||
self.buffer,
|
||||
self.memory,
|
||||
self.opts.usage,
|
||||
capacity_bytes,
|
||||
) catch {
|
||||
_ = dev.dispatch.deviceWaitIdle(dev.device);
|
||||
dev.dispatch.destroyBuffer(dev.device, self.buffer, null);
|
||||
dev.dispatch.freeMemory(dev.device, self.memory, null);
|
||||
};
|
||||
self.* = replacement;
|
||||
}
|
||||
|
||||
/// Copy `data` into the buffer starting at element offset
|
||||
/// `elem_off`. Host-coherent memory means the GPU sees the
|
||||
/// writes without an explicit flush.
|
||||
fn write(self: *const Self, elem_off: usize, data: []const T) Error!void {
|
||||
if (data.len == 0) return;
|
||||
const dev = self.opts.device;
|
||||
const byte_off: u64 = elem_off * @sizeOf(T);
|
||||
const byte_size: u64 = data.len * @sizeOf(T);
|
||||
var mapped: ?*anyopaque = null;
|
||||
{
|
||||
const r = dev.dispatch.mapMemory(
|
||||
dev.device,
|
||||
self.memory,
|
||||
byte_off,
|
||||
byte_size,
|
||||
0,
|
||||
&mapped,
|
||||
);
|
||||
if (r != vk.VK_SUCCESS) {
|
||||
log.err("vkMapMemory failed: result={}", .{r});
|
||||
return error.VulkanFailed;
|
||||
}
|
||||
}
|
||||
defer dev.dispatch.unmapMemory(dev.device, self.memory);
|
||||
|
||||
const dst: [*]u8 = @ptrCast(mapped.?);
|
||||
const src: [*]const u8 = @ptrCast(data.ptr);
|
||||
@memcpy(dst[0..byte_size], src[0..byte_size]);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
test {
|
||||
// Exercise top-level decls of a representative instantiation so
|
||||
// type errors in the generic body surface during compile-check.
|
||||
std.testing.refAllDecls(Buffer(u32));
|
||||
}
|
||||
|
|
@ -0,0 +1,189 @@
|
|||
//! Process-wide pool of `(VkBuffer, VkDeviceMemory)` pairs recycled
|
||||
//! across frames on the renderer thread. Solves two problems
|
||||
//! together:
|
||||
//!
|
||||
//! 1. Lifetime: `vulkan/buffer.zig`'s `Buffer.deinit` is called
|
||||
//! mid-frame (by `renderer/image.zig:draw`'s `defer buf.deinit()`)
|
||||
//! while the command buffer that references the buffer hasn't
|
||||
//! been submitted yet. Naive immediate destroy → use-after-free.
|
||||
//! 2. Allocation thrash: a frame with N kitty-image placements
|
||||
//! would otherwise allocate N tiny VkBuffers + VkDeviceMemories
|
||||
//! per frame, every frame. NVIDIA driver SIGSEGVs after a few
|
||||
//! seconds of that.
|
||||
//!
|
||||
//! Multi-thread design: `pending` is THREADLOCAL (each renderer
|
||||
//! thread accumulates the buffers IT released during the current
|
||||
//! frame), while `ready` is process-wide and mutex-protected (any
|
||||
//! thread can recycle from it). Splits/tabs run independent
|
||||
//! renderer threads against the SAME shared VkDevice — a single
|
||||
//! shared `pending` list would let thread A's `Frame.complete`
|
||||
//! retire buffers thread B released but whose fence hasn't
|
||||
//! signaled yet, handing B's still-GPU-in-flight buffer back to a
|
||||
//! new `acquire`. Per-thread pending bounds the visibility of
|
||||
//! each entry to the thread that knows when its fence signals.
|
||||
//!
|
||||
//! Lifecycle:
|
||||
//! - `release(dev, …)` (renderer thread) pushes to THAT thread's
|
||||
//! `pending`.
|
||||
//! - `cycle(dev)` (renderer thread, after `vkWaitForFences` on
|
||||
//! the SAME thread's per-frame fence) moves THAT thread's
|
||||
//! `pending` → shared `ready` under the mutex.
|
||||
//! - `acquire(…)` (any thread) pops a matching entry from `ready`
|
||||
//! under the mutex.
|
||||
//!
|
||||
//! Caller responsibilities:
|
||||
//! - Only call `release` from the renderer thread whose fence
|
||||
//! the frame's GPU work signals; calling from a thread that
|
||||
//! never reaches its own `Frame.complete` would leak entries
|
||||
//! (they sit in that thread's `pending` forever). For one-shot
|
||||
//! uploads from a non-renderer thread (atlas staging), use
|
||||
//! `Buffer.destroyImmediate` instead, which bypasses this
|
||||
//! pool entirely.
|
||||
|
||||
const std = @import("std");
|
||||
const vulkan = @import("vulkan");
|
||||
const vk = vulkan.c;
|
||||
|
||||
const Device = vulkan.Device;
|
||||
|
||||
const log = std.log.scoped(.vulkan);
|
||||
|
||||
pub const Entry = struct {
|
||||
buffer: vk.VkBuffer,
|
||||
memory: vk.VkDeviceMemory,
|
||||
usage: vk.VkBufferUsageFlags,
|
||||
capacity: u64,
|
||||
};
|
||||
|
||||
/// Guards the process-wide `ready` list. Per-thread `pending` is
|
||||
/// threadlocal and never under this mutex.
|
||||
var ready_mutex: std.Thread.Mutex = .{};
|
||||
|
||||
/// Per-thread pending list. Entries here were released by THIS
|
||||
/// thread during the current frame and are bounded by the
|
||||
/// fence THIS thread will wait on in `Frame.complete`. Moved
|
||||
/// to the shared `ready` list by `cycle()` after that wait
|
||||
/// returns.
|
||||
threadlocal var pending: std.ArrayList(Entry) = .{};
|
||||
|
||||
/// Process-wide ready list. Entries here are provably retired
|
||||
/// (the bounding fence has signaled) and any thread may
|
||||
/// `acquire` them.
|
||||
var ready: std.ArrayList(Entry) = .{};
|
||||
|
||||
/// Queue a buffer for recycling. The buffer cannot be reused
|
||||
/// until the next fence-wait (handled by `cycle`); it sits in
|
||||
/// THIS thread's `pending` until then. Bounded by THIS thread's
|
||||
/// per-frame fence — see the per-thread pending rationale at
|
||||
/// the top of this module.
|
||||
pub fn release(
|
||||
dev: *const Device,
|
||||
buffer: vk.VkBuffer,
|
||||
memory: vk.VkDeviceMemory,
|
||||
usage: vk.VkBufferUsageFlags,
|
||||
capacity: u64,
|
||||
) !void {
|
||||
_ = dev;
|
||||
// No mutex: `pending` is threadlocal, only THIS thread
|
||||
// touches it.
|
||||
try pending.append(std.heap.smp_allocator, .{
|
||||
.buffer = buffer,
|
||||
.memory = memory,
|
||||
.usage = usage,
|
||||
.capacity = capacity,
|
||||
});
|
||||
}
|
||||
|
||||
/// Pop a `ready` entry whose usage matches and whose capacity is
|
||||
/// >= the requested size. Linear scan — pools tend to have a
|
||||
/// small number of distinct (usage, size) shapes (image: 48B
|
||||
/// VERTEX, bg_image: 8B VERTEX) so this stays cheap.
|
||||
pub fn acquire(
|
||||
usage: vk.VkBufferUsageFlags,
|
||||
min_capacity: u64,
|
||||
) ?Entry {
|
||||
ready_mutex.lock();
|
||||
defer ready_mutex.unlock();
|
||||
var i: usize = 0;
|
||||
while (i < ready.items.len) : (i += 1) {
|
||||
const e = ready.items[i];
|
||||
if (e.usage == usage and e.capacity >= min_capacity) {
|
||||
_ = ready.swapRemove(i);
|
||||
return e;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/// Move THIS thread's `pending` entries to the shared `ready` —
|
||||
/// THIS thread's fence has signaled, so the GPU is done with
|
||||
/// every buffer in `pending`. Call from `Frame.complete` after
|
||||
/// `vkWaitForFences`.
|
||||
///
|
||||
/// `dev` is needed only on the OOM fallback path: if `ready`
|
||||
/// can't grow to absorb `pending`, we wait the device idle
|
||||
/// (OUTSIDE the mutex — see below) and then destroy the pending
|
||||
/// entries directly so the next frame doesn't double up on a
|
||||
/// pending list that can never drain.
|
||||
pub fn cycle(dev: *const Device) void {
|
||||
// Try the fast path first — append THIS thread's `pending`
|
||||
// to the shared `ready` under the lock, then clear pending.
|
||||
// On OOM we have to destroy the pending entries, but
|
||||
// `vkDeviceWaitIdle` is slow and holding the pool mutex
|
||||
// across it would block every other renderer thread's
|
||||
// release/acquire/cycle. Move the pending list into a
|
||||
// local outside the lock, then drain.
|
||||
var oom_pending: std.ArrayList(Entry) = .{};
|
||||
defer oom_pending.deinit(std.heap.smp_allocator);
|
||||
{
|
||||
ready_mutex.lock();
|
||||
defer ready_mutex.unlock();
|
||||
if (ready.appendSlice(std.heap.smp_allocator, pending.items)) {
|
||||
pending.clearRetainingCapacity();
|
||||
return;
|
||||
} else |_| {
|
||||
// OOM. Move THIS thread's `pending` into our local
|
||||
// so we can drain without holding the mutex.
|
||||
oom_pending = pending;
|
||||
pending = .{};
|
||||
}
|
||||
}
|
||||
// Mutex released. Other threads can release/acquire/cycle
|
||||
// while we wait the device idle and destroy our slice.
|
||||
_ = dev.dispatch.deviceWaitIdle(dev.device);
|
||||
for (oom_pending.items) |e| {
|
||||
dev.dispatch.destroyBuffer(dev.device, e.buffer, null);
|
||||
dev.dispatch.freeMemory(dev.device, e.memory, null);
|
||||
}
|
||||
}
|
||||
|
||||
/// Destroy THIS thread's `pending` entries directly. Call from
|
||||
/// the same thread's `Vulkan.deinit` AFTER `vkWaitForFences`
|
||||
/// on this thread's frame fence — the bounding fence has
|
||||
/// signaled so the GPU is provably done with these buffers.
|
||||
///
|
||||
/// Each renderer thread is responsible for cleaning up its own
|
||||
/// pending list because Zig threadlocal storage is the calling
|
||||
/// thread's; the final-refcount tear-down (`drainShared`) only
|
||||
/// handles the process-wide `ready` list.
|
||||
pub fn drainSelf(dev: *const Device) void {
|
||||
for (pending.items) |e| {
|
||||
dev.dispatch.destroyBuffer(dev.device, e.buffer, null);
|
||||
dev.dispatch.freeMemory(dev.device, e.memory, null);
|
||||
}
|
||||
pending.clearRetainingCapacity();
|
||||
}
|
||||
|
||||
/// Destroy every entry in the shared `ready` list. Call only
|
||||
/// from the FINAL surface tear-down (the path that hits
|
||||
/// `device_refcount == 0`) and only after every other renderer
|
||||
/// thread has already run `drainSelf` on its own pending list.
|
||||
pub fn drainShared(dev: *const Device) void {
|
||||
ready_mutex.lock();
|
||||
defer ready_mutex.unlock();
|
||||
for (ready.items) |e| {
|
||||
dev.dispatch.destroyBuffer(dev.device, e.buffer, null);
|
||||
dev.dispatch.freeMemory(dev.device, e.memory, null);
|
||||
}
|
||||
ready.clearRetainingCapacity();
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
|
|
@ -1051,10 +1051,26 @@ fn resizeCols(
|
|||
break :wrapped wrapped;
|
||||
};
|
||||
|
||||
// `c.y` is the cursor row from BEFORE this resize. When the
|
||||
// call sequence is `resizeWithoutReflow(new_rows, old_cols)`
|
||||
// → `resizeCols(new_cols)` (the `.lt` arm above), `self.rows`
|
||||
// has already been reduced to the new row count by the time
|
||||
// we run, so a cursor strictly past the new bottom (`c.y >=
|
||||
// self.rows`) would underflow `self.rows - c.y - 1`. Clamp
|
||||
// to zero remaining rows in that case — the cursor
|
||||
// effectively sits on the last visible row after the
|
||||
// shrink. Note: `c.y == self.rows - 1` (cursor AT the new
|
||||
// bottom) does NOT underflow, but the `c.y + 1 >= self.rows`
|
||||
// form still returns 0 there, matching the old
|
||||
// `self.rows - c.y - 1 == 0` result.
|
||||
const remaining_rows: usize = if (c.y + 1 >= self.rows)
|
||||
0
|
||||
else
|
||||
self.rows - c.y - 1;
|
||||
break :cursor .{
|
||||
.tracked_pin = c.pin orelse try self.trackPin(p),
|
||||
.untrack = c.pin == null,
|
||||
.remaining_rows = self.rows - c.y - 1,
|
||||
.remaining_rows = remaining_rows,
|
||||
.wrapped_rows = wrapped,
|
||||
};
|
||||
} else null;
|
||||
|
|
|
|||
|
|
@ -0,0 +1,88 @@
|
|||
//! Build-time tool: compiles one of `src/renderer/vulkan/shaders.zig`'s
|
||||
//! `source.*` constants to SPIR-V and writes the bytes to stdout.
|
||||
//!
|
||||
//! Invoked by `src/build/VulkanSpv.zig` once per (shader_name, stage)
|
||||
//! pair so the renderer can `@embedFile` the resulting .spv blobs
|
||||
//! and call `Module.initFromSpirv` for built-ins instead of going
|
||||
//! through `glslang.vk.compileToSpv` at runtime. The runtime path
|
||||
//! is what populates glslang's per-thread `TPoolAllocator`, which
|
||||
//! never releases its high-water-mark pages (Zig pthreads don't
|
||||
//! run C++ thread_local destructors) — heaptrack attributed ~10 MB
|
||||
//! to that residual leak on the Vulkan variant, exactly the delta
|
||||
//! over OpenGL (which never invokes glslang for its built-ins
|
||||
//! because the GPU driver compiles GLSL natively).
|
||||
//!
|
||||
//! Usage:
|
||||
//! vulkan_spvgen <shader_name> <stage>
|
||||
//!
|
||||
//! Where `shader_name` is one of the public decls of
|
||||
//! `vulkan.shaders.source` (e.g. `bg_color_frag`, `cell_text_vert`)
|
||||
//! and `stage` is `vertex` or `fragment`.
|
||||
//!
|
||||
//! On success: writes binary SPIR-V to stdout, exits 0.
|
||||
//! On failure: writes a diagnostic to stderr, exits 1.
|
||||
|
||||
const std = @import("std");
|
||||
const shaders = @import("renderer/vulkan/shaders.zig");
|
||||
const glslang = @import("glslang");
|
||||
|
||||
pub fn main() !void {
|
||||
var gpa: std.heap.GeneralPurposeAllocator(.{}) = .{};
|
||||
defer _ = gpa.deinit();
|
||||
const alloc = gpa.allocator();
|
||||
|
||||
const args = try std.process.argsAlloc(alloc);
|
||||
defer std.process.argsFree(alloc, args);
|
||||
|
||||
if (args.len != 3) {
|
||||
std.debug.print(
|
||||
"usage: {s} <shader_name> <vertex|fragment>\n",
|
||||
.{args[0]},
|
||||
);
|
||||
std.process.exit(1);
|
||||
}
|
||||
const name = args[1];
|
||||
const stage = std.meta.stringToEnum(shaders.Stage, args[2]) orelse {
|
||||
std.debug.print("invalid stage: {s}\n", .{args[2]});
|
||||
std.process.exit(1);
|
||||
};
|
||||
|
||||
try glslang.init();
|
||||
defer glslang.finalize();
|
||||
|
||||
// Resolve the source by name. The runtime renderer accesses
|
||||
// `shaders.source.bg_color_frag` etc. directly; we look up the
|
||||
// matching decl by name at comptime so the build step can pass
|
||||
// any of the 9 built-ins by string argv.
|
||||
const src: [:0]const u8 = src: {
|
||||
inline for (@typeInfo(shaders.source).@"struct".decls) |decl| {
|
||||
if (std.mem.eql(u8, decl.name, name)) {
|
||||
break :src @field(shaders.source, decl.name);
|
||||
}
|
||||
}
|
||||
std.debug.print("unknown shader: {s}\n", .{name});
|
||||
std.process.exit(1);
|
||||
};
|
||||
|
||||
// Vulkan-flavor rewrite (gl_VertexID → gl_VertexIndex, multi-set
|
||||
// descriptor layout, etc.). Same path the runtime took before
|
||||
// this precompile change.
|
||||
const translated = try shaders.vulkanizeGlsl(alloc, src);
|
||||
defer alloc.free(translated);
|
||||
|
||||
const spv = try glslang.vk.compileToSpv(
|
||||
alloc,
|
||||
translated,
|
||||
stage.vkBindingStage(),
|
||||
);
|
||||
defer alloc.free(spv);
|
||||
|
||||
// Write the raw SPIR-V words (u32 little-endian on every host
|
||||
// we build for; Vulkan loaders accept the in-memory byte order
|
||||
// of the platform). The build step captures stdout into a .spv
|
||||
// file the renderer @embedFiles at compile time.
|
||||
var buf: [4096]u8 = undefined;
|
||||
var stdout = std.fs.File.stdout().writerStreaming(&buf);
|
||||
try stdout.interface.writeAll(std.mem.sliceAsBytes(spv));
|
||||
try stdout.end();
|
||||
}
|
||||
Loading…
Reference in New Issue