qt/wayland: zero-copy dmabuf present via wl_subsurface

The SubsurfacePresenter now binds zwp_linux_dmabuf_v1 (vendored XML;
hermetic build), wraps libghostty's dmabuf fd in a wl_buffer via
create_immed, and attach/damage/commits it to the subsurface. The
compositor scans the buffer out directly — no mmap, no memcpy, no
QImage, no QPainter blit on the terminal pixels. paintEvent skips
its blit when the subsurface path is active so the translucent
QWidget background lets the subsurface show through; chrome (split
dim, bell flash, resize overlay) still paints on top.

Frame delivery is QMetaObject::invokeMethod(Qt::QueuedConnection)
per present, with a 2 ms QTimer as a safety net for any missed
queued lambda (the prior 16 ms poll was a leftover from the QImage
path, capped present at 60 Hz, and added up to a frame of latency).

C ABI: ghostty_platform_vulkan_s.present grows a bool `image_backed`
parameter. NVIDIA in legacy_copy mode exports the dmabuf from a
VkBuffer that linux-dmabuf-v1 cannot import as a 2D image —
attempting it would trigger an `invalid_wl_buffer` protocol error,
which is fatal for the wl_display connection. Target.present sets
the flag based on Target.tiling; the host only takes the subsurface
path when set, falls back to the QImage/QPainter path otherwise.

Verified on NVIDIA RTX 2080 (legacy_copy → image_backed=0 →
path=qimage → no protocol error). Subsurface presenter still
constructs and would activate on AMD/Intel hardware where Phase 1's
direct mode succeeds. Subsequent phases will add vendor-tiled
modifier support so NVIDIA can use the zero-copy path too.

Co-Authored-By: claude-flow <ruv@ruv.net>
pull/12846/head
Nathan 2026-05-24 22:56:07 -05:00
parent 4a890b96bd
commit 9a7a31ac37
10 changed files with 956 additions and 135 deletions

View File

@ -514,7 +514,21 @@ typedef struct {
uint32_t (*queue_family_index)(void* userdata);
// Hand off a rendered frame to the host as a dmabuf fd. The host
// imports it (e.g. into Qt's RHI as a QRhiTexture) and composites.
// imports it (e.g. into Qt's RHI as a QRhiTexture, or attaches to
// a wl_subsurface via linux-dmabuf-v1) and composites.
//
// `image_backed` is true when the dmabuf was exported from a
// VkImage allocated with VK_EXT_image_drm_format_modifier — i.e.
// it's directly importable as a 2D image by the compositor or any
// GPU-side consumer. false when it was exported from a VkBuffer
// (the legacy NVIDIA fallback path where the driver doesn't
// advertise COLOR_ATTACHMENT for the LINEAR modifier on
// exportable images, so libghostty renders into an OPTIMAL image
// and copies the bytes into a linear VkBuffer for export). In the
// !image_backed case the fd is only usable via mmap + CPU
// readback — attempting a linux-dmabuf-v1 import will trigger an
// `invalid_wl_buffer` protocol error.
//
// libghostty retains ownership of the underlying VkDeviceMemory;
// the host must dup() the fd if it needs to hold it past the call.
void (*present)(
@ -524,7 +538,8 @@ typedef struct {
uint64_t drm_modifier,
uint32_t width,
uint32_t height,
uint32_t stride);
uint32_t stride,
bool image_backed);
} ghostty_platform_vulkan_s;
typedef union {

View File

@ -71,6 +71,21 @@ add_custom_command(OUTPUT "${BLUR_CODE}"
COMMAND "${WAYLAND_SCANNER}" private-code "${BLUR_XML}" "${BLUR_CODE}"
DEPENDS "${BLUR_XML}" VERBATIM)
# Generate client glue for the linux-dmabuf-v1 protocol (used by the
# Vulkan present path: wrap libghostty's dmabuf fd in a wl_buffer and
# attach it to the wayland::SubsurfacePresenter's wl_surface). Vendored
# in qt/protocols/ so the build doesn't depend on
# /usr/share/wayland-protocols being installed.
set(DMABUF_XML "${CMAKE_CURRENT_SOURCE_DIR}/protocols/linux-dmabuf-v1.xml")
set(DMABUF_HEADER "${CMAKE_CURRENT_BINARY_DIR}/linux-dmabuf-v1-client-protocol.h")
set(DMABUF_CODE "${CMAKE_CURRENT_BINARY_DIR}/linux-dmabuf-v1-protocol.c")
add_custom_command(OUTPUT "${DMABUF_HEADER}"
COMMAND "${WAYLAND_SCANNER}" client-header "${DMABUF_XML}" "${DMABUF_HEADER}"
DEPENDS "${DMABUF_XML}" VERBATIM)
add_custom_command(OUTPUT "${DMABUF_CODE}"
COMMAND "${WAYLAND_SCANNER}" private-code "${DMABUF_XML}" "${DMABUF_CODE}"
DEPENDS "${DMABUF_XML}" VERBATIM)
# libghostty is built out-of-tree by Zig.
get_filename_component(GHOSTTY_ROOT "${CMAKE_CURRENT_SOURCE_DIR}/.." ABSOLUTE)
set(GHOSTTY_LIB_DIR "${GHOSTTY_ROOT}/zig-out/lib")
@ -152,6 +167,8 @@ add_executable(ghastty
src/XkbTracker.cpp
"${BLUR_CODE}"
"${BLUR_HEADER}"
"${DMABUF_CODE}"
"${DMABUF_HEADER}"
)
# Embed the app icon so it is available even running from the build tree.

View File

@ -0,0 +1,585 @@
<?xml version="1.0" encoding="UTF-8"?>
<protocol name="linux_dmabuf_v1">
<copyright>
Copyright © 2014, 2015 Collabora, Ltd.
Permission is hereby granted, free of charge, to any person obtaining a
copy of this software and associated documentation files (the "Software"),
to deal in the Software without restriction, including without limitation
the rights to use, copy, modify, merge, publish, distribute, sublicense,
and/or sell copies of the Software, and to permit persons to whom the
Software is furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice (including the next
paragraph) shall be included in all copies or substantial portions of the
Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
</copyright>
<interface name="zwp_linux_dmabuf_v1" version="5">
<description summary="factory for creating dmabuf-based wl_buffers">
This interface offers ways to create generic dmabuf-based wl_buffers.
For more information about dmabuf, see:
https://www.kernel.org/doc/html/next/userspace-api/dma-buf-alloc-exchange.html
Clients can use the get_surface_feedback request to get dmabuf feedback
for a particular surface. If the client wants to retrieve feedback not
tied to a surface, they can use the get_default_feedback request.
The following are required from clients:
- Clients must ensure that either all data in the dma-buf is
coherent for all subsequent read access or that coherency is
correctly handled by the underlying kernel-side dma-buf
implementation.
- Don't make any more attachments after sending the buffer to the
compositor. Making more attachments later increases the risk of
the compositor not being able to use (re-import) an existing
dmabuf-based wl_buffer.
The underlying graphics stack must ensure the following:
- The dmabuf file descriptors relayed to the server will stay valid
for the whole lifetime of the wl_buffer. This means the server may
at any time use those fds to import the dmabuf into any kernel
sub-system that might accept it.
However, when the underlying graphics stack fails to deliver the
promise, because of e.g. a device hot-unplug which raises internal
errors, after the wl_buffer has been successfully created the
compositor must not raise protocol errors to the client when dmabuf
import later fails.
To create a wl_buffer from one or more dmabufs, a client creates a
zwp_linux_dmabuf_params_v1 object with a zwp_linux_dmabuf_v1.create_params
request. All planes required by the intended format are added with
the 'add' request. Finally, a 'create' or 'create_immed' request is
issued, which has the following outcome depending on the import success.
The 'create' request,
- on success, triggers a 'created' event which provides the final
wl_buffer to the client.
- on failure, triggers a 'failed' event to convey that the server
cannot use the dmabufs received from the client.
For the 'create_immed' request,
- on success, the server immediately imports the added dmabufs to
create a wl_buffer. No event is sent from the server in this case.
- on failure, the server can choose to either:
- terminate the client by raising a fatal error.
- mark the wl_buffer as failed, and send a 'failed' event to the
client. If the client uses a failed wl_buffer as an argument to any
request, the behaviour is compositor implementation-defined.
For all DRM formats and unless specified in another protocol extension,
pre-multiplied alpha is used for pixel values.
Unless specified otherwise in another protocol extension, implicit
synchronization is used. In other words, compositors and clients must
wait and signal fences implicitly passed via the DMA-BUF's reservation
mechanism.
</description>
<request name="destroy" type="destructor">
<description summary="unbind the factory">
Objects created through this interface, especially wl_buffers, will
remain valid.
</description>
</request>
<request name="create_params">
<description summary="create a temporary object for buffer parameters">
This temporary object is used to collect multiple dmabuf handles into
a single batch to create a wl_buffer. It can only be used once and
should be destroyed after a 'created' or 'failed' event has been
received.
</description>
<arg name="params_id" type="new_id" interface="zwp_linux_buffer_params_v1"
summary="the new temporary"/>
</request>
<event name="format" deprecated-since="4">
<description summary="supported buffer format">
This event advertises one buffer format that the server supports.
All the supported formats are advertised once when the client
binds to this interface. A roundtrip after binding guarantees
that the client has received all supported formats.
For the definition of the format codes, see the
zwp_linux_buffer_params_v1::create request.
Starting version 4, the format event is deprecated and must not be
sent by compositors. Instead, use get_default_feedback or
get_surface_feedback.
</description>
<arg name="format" type="uint" summary="DRM_FORMAT code"/>
</event>
<event name="modifier" since="3" deprecated-since="4">
<description summary="supported buffer format modifier">
This event advertises the formats that the server supports, along with
the modifiers supported for each format. All the supported modifiers
for all the supported formats are advertised once when the client
binds to this interface. A roundtrip after binding guarantees that
the client has received all supported format-modifier pairs.
For legacy support, DRM_FORMAT_MOD_INVALID (that is, modifier_hi ==
0x00ffffff and modifier_lo == 0xffffffff) is allowed in this event.
It indicates that the server can support the format with an implicit
modifier. When a plane has DRM_FORMAT_MOD_INVALID as its modifier, it
is as if no explicit modifier is specified. The effective modifier
will be derived from the dmabuf.
A compositor that sends valid modifiers and DRM_FORMAT_MOD_INVALID for
a given format supports both explicit modifiers and implicit modifiers.
For the definition of the format and modifier codes, see the
zwp_linux_buffer_params_v1::create and zwp_linux_buffer_params_v1::add
requests.
Starting version 4, the modifier event is deprecated and must not be
sent by compositors. Instead, use get_default_feedback or
get_surface_feedback.
</description>
<arg name="format" type="uint" summary="DRM_FORMAT code"/>
<arg name="modifier_hi" type="uint"
summary="high 32 bits of layout modifier"/>
<arg name="modifier_lo" type="uint"
summary="low 32 bits of layout modifier"/>
</event>
<!-- Version 4 additions -->
<request name="get_default_feedback" since="4">
<description summary="get default feedback">
This request creates a new wp_linux_dmabuf_feedback object not bound
to a particular surface. This object will deliver feedback about dmabuf
parameters to use if the client doesn't support per-surface feedback
(see get_surface_feedback).
</description>
<arg name="id" type="new_id" interface="zwp_linux_dmabuf_feedback_v1"/>
</request>
<request name="get_surface_feedback" since="4">
<description summary="get feedback for a surface">
This request creates a new wp_linux_dmabuf_feedback object for the
specified wl_surface. This object will deliver feedback about dmabuf
parameters to use for buffers attached to this surface.
If the surface is destroyed before the wp_linux_dmabuf_feedback object,
the feedback object becomes inert.
</description>
<arg name="id" type="new_id" interface="zwp_linux_dmabuf_feedback_v1"/>
<arg name="surface" type="object" interface="wl_surface"/>
</request>
</interface>
<interface name="zwp_linux_buffer_params_v1" version="5">
<description summary="parameters for creating a dmabuf-based wl_buffer">
This temporary object is a collection of dmabufs and other
parameters that together form a single logical buffer. The temporary
object may eventually create one wl_buffer unless cancelled by
destroying it before requesting 'create'.
Single-planar formats only require one dmabuf, however
multi-planar formats may require more than one dmabuf. For all
formats, an 'add' request must be called once per plane (even if the
underlying dmabuf fd is identical).
You must use consecutive plane indices ('plane_idx' argument for 'add')
from zero to the number of planes used by the drm_fourcc format code.
All planes required by the format must be given exactly once, but can
be given in any order. Each plane index can only be set once; subsequent
calls with a plane index which has already been set will result in a
plane_set error being generated.
</description>
<enum name="error">
<entry name="already_used" value="0"
summary="the dmabuf_batch object has already been used to create a wl_buffer"/>
<entry name="plane_idx" value="1"
summary="plane index out of bounds"/>
<entry name="plane_set" value="2"
summary="the plane index was already set"/>
<entry name="incomplete" value="3"
summary="missing or too many planes to create a buffer"/>
<entry name="invalid_format" value="4"
summary="format not supported"/>
<entry name="invalid_dimensions" value="5"
summary="invalid width or height"/>
<entry name="out_of_bounds" value="6"
summary="offset + stride * height goes out of dmabuf bounds"/>
<entry name="invalid_wl_buffer" value="7"
summary="invalid wl_buffer resulted from importing dmabufs via
the create_immed request on given buffer_params"/>
</enum>
<request name="destroy" type="destructor">
<description summary="delete this object, used or not">
Cleans up the temporary data sent to the server for dmabuf-based
wl_buffer creation.
</description>
</request>
<request name="add">
<description summary="add a dmabuf to the temporary set">
This request adds one dmabuf to the set in this
zwp_linux_buffer_params_v1.
The 64-bit unsigned value combined from modifier_hi and modifier_lo
is the dmabuf layout modifier. DRM AddFB2 ioctl calls this the
fb modifier, which is defined in drm_mode.h of Linux UAPI.
This is an opaque token. Drivers use this token to express tiling,
compression, etc. driver-specific modifications to the base format
defined by the DRM fourcc code.
Starting from version 4, the invalid_format protocol error is sent if
the format + modifier pair was not advertised as supported.
Starting from version 5, the invalid_format protocol error is sent if
all planes don't use the same modifier.
This request raises the PLANE_IDX error if plane_idx is too large.
The error PLANE_SET is raised if attempting to set a plane that
was already set.
</description>
<arg name="fd" type="fd" summary="dmabuf fd"/>
<arg name="plane_idx" type="uint" summary="plane index"/>
<arg name="offset" type="uint" summary="offset in bytes"/>
<arg name="stride" type="uint" summary="stride in bytes"/>
<arg name="modifier_hi" type="uint"
summary="high 32 bits of layout modifier"/>
<arg name="modifier_lo" type="uint"
summary="low 32 bits of layout modifier"/>
</request>
<enum name="flags" bitfield="true">
<entry name="y_invert" value="1" summary="contents are y-inverted"/>
<entry name="interlaced" value="2" summary="content is interlaced"/>
<entry name="bottom_first" value="4" summary="bottom field first"/>
</enum>
<request name="create">
<description summary="create a wl_buffer from the given dmabufs">
This asks for creation of a wl_buffer from the added dmabuf
buffers. The wl_buffer is not created immediately but returned via
the 'created' event if the dmabuf sharing succeeds. The sharing
may fail at runtime for reasons a client cannot predict, in
which case the 'failed' event is triggered.
The 'format' argument is a DRM_FORMAT code, as defined by the
libdrm's drm_fourcc.h. The Linux kernel's DRM sub-system is the
authoritative source on how the format codes should work.
The 'flags' is a bitfield of the flags defined in enum "flags".
'y_invert' means the that the image needs to be y-flipped.
Flag 'interlaced' means that the frame in the buffer is not
progressive as usual, but interlaced. An interlaced buffer as
supported here must always contain both top and bottom fields.
The top field always begins on the first pixel row. The temporal
ordering between the two fields is top field first, unless
'bottom_first' is specified. It is undefined whether 'bottom_first'
is ignored if 'interlaced' is not set.
This protocol does not convey any information about field rate,
duration, or timing, other than the relative ordering between the
two fields in one buffer. A compositor may have to estimate the
intended field rate from the incoming buffer rate. It is undefined
whether the time of receiving wl_surface.commit with a new buffer
attached, applying the wl_surface state, wl_surface.frame callback
trigger, presentation, or any other point in the compositor cycle
is used to measure the frame or field times. There is no support
for detecting missed or late frames/fields/buffers either, and
there is no support whatsoever for cooperating with interlaced
compositor output.
The composited image quality resulting from the use of interlaced
buffers is explicitly undefined. A compositor may use elaborate
hardware features or software to deinterlace and create progressive
output frames from a sequence of interlaced input buffers, or it
may produce substandard image quality. However, compositors that
cannot guarantee reasonable image quality in all cases are recommended
to just reject all interlaced buffers.
Any argument errors, including non-positive width or height,
mismatch between the number of planes and the format, bad
format, bad offset or stride, may be indicated by fatal protocol
errors: INCOMPLETE, INVALID_FORMAT, INVALID_DIMENSIONS,
OUT_OF_BOUNDS.
Dmabuf import errors in the server that are not obvious client
bugs are returned via the 'failed' event as non-fatal. This
allows attempting dmabuf sharing and falling back in the client
if it fails.
This request can be sent only once in the object's lifetime, after
which the only legal request is destroy. This object should be
destroyed after issuing a 'create' request. Attempting to use this
object after issuing 'create' raises ALREADY_USED protocol error.
It is not mandatory to issue 'create'. If a client wants to
cancel the buffer creation, it can just destroy this object.
</description>
<arg name="width" type="int" summary="base plane width in pixels"/>
<arg name="height" type="int" summary="base plane height in pixels"/>
<arg name="format" type="uint" summary="DRM_FORMAT code"/>
<arg name="flags" type="uint" enum="flags" summary="see enum flags"/>
</request>
<event name="created">
<description summary="buffer creation succeeded">
This event indicates that the attempted buffer creation was
successful. It provides the new wl_buffer referencing the dmabuf(s).
Upon receiving this event, the client should destroy the
zwp_linux_buffer_params_v1 object.
</description>
<arg name="buffer" type="new_id" interface="wl_buffer"
summary="the newly created wl_buffer"/>
</event>
<event name="failed">
<description summary="buffer creation failed">
This event indicates that the attempted buffer creation has
failed. It usually means that one of the dmabuf constraints
has not been fulfilled.
Upon receiving this event, the client should destroy the
zwp_linux_buffer_params_v1 object.
</description>
</event>
<request name="create_immed" since="2">
<description summary="immediately create a wl_buffer from the given
dmabufs">
This asks for immediate creation of a wl_buffer by importing the
added dmabufs.
In case of import success, no event is sent from the server, and the
wl_buffer is ready to be used by the client.
Upon import failure, either of the following may happen, as seen fit
by the implementation:
- the client is terminated with one of the following fatal protocol
errors:
- INCOMPLETE, INVALID_FORMAT, INVALID_DIMENSIONS, OUT_OF_BOUNDS,
in case of argument errors such as mismatch between the number
of planes and the format, bad format, non-positive width or
height, or bad offset or stride.
- INVALID_WL_BUFFER, in case the cause for failure is unknown or
platform specific.
- the server creates an invalid wl_buffer, marks it as failed and
sends a 'failed' event to the client. The result of using this
invalid wl_buffer as an argument in any request by the client is
defined by the compositor implementation.
This takes the same arguments as a 'create' request, and obeys the
same restrictions.
</description>
<arg name="buffer_id" type="new_id" interface="wl_buffer"
summary="id for the newly created wl_buffer"/>
<arg name="width" type="int" summary="base plane width in pixels"/>
<arg name="height" type="int" summary="base plane height in pixels"/>
<arg name="format" type="uint" summary="DRM_FORMAT code"/>
<arg name="flags" type="uint" enum="flags" summary="see enum flags"/>
</request>
</interface>
<interface name="zwp_linux_dmabuf_feedback_v1" version="5">
<description summary="dmabuf feedback">
This object advertises dmabuf parameters feedback. This includes the
preferred devices and the supported formats/modifiers.
The parameters are sent once when this object is created and whenever they
change. The done event is always sent once after all parameters have been
sent. When a single parameter changes, all parameters are re-sent by the
compositor.
Compositors can re-send the parameters when the current client buffer
allocations are sub-optimal. Compositors should not re-send the
parameters if re-allocating the buffers would not result in a more optimal
configuration. In particular, compositors should avoid sending the exact
same parameters multiple times in a row.
The tranche_target_device and tranche_formats events are grouped by
tranches of preference. For each tranche, a tranche_target_device, one
tranche_flags and one or more tranche_formats events are sent, followed
by a tranche_done event finishing the list. The tranches are sent in
descending order of preference. All formats and modifiers in the same
tranche have the same preference.
To send parameters, the compositor sends one main_device event, tranches
(each consisting of one tranche_target_device event, one tranche_flags
event, tranche_formats events and then a tranche_done event), then one
done event.
</description>
<request name="destroy" type="destructor">
<description summary="destroy the feedback object">
Using this request a client can tell the server that it is not going to
use the wp_linux_dmabuf_feedback object anymore.
</description>
</request>
<event name="done">
<description summary="all feedback has been sent">
This event is sent after all parameters of a wp_linux_dmabuf_feedback
object have been sent.
This allows changes to the wp_linux_dmabuf_feedback parameters to be
seen as atomic, even if they happen via multiple events.
</description>
</event>
<event name="format_table">
<description summary="format and modifier table">
This event provides a file descriptor which can be memory-mapped to
access the format and modifier table.
The table contains a tightly packed array of consecutive format +
modifier pairs. Each pair is 16 bytes wide. It contains a format as a
32-bit unsigned integer, followed by 4 bytes of unused padding, and a
modifier as a 64-bit unsigned integer. The native endianness is used.
The client must map the file descriptor in read-only private mode.
Compositors are not allowed to mutate the table file contents once this
event has been sent. Instead, compositors must create a new, separate
table file and re-send feedback parameters. Compositors are allowed to
store duplicate format + modifier pairs in the table.
</description>
<arg name="fd" type="fd" summary="table file descriptor"/>
<arg name="size" type="uint" summary="table size, in bytes"/>
</event>
<event name="main_device">
<description summary="preferred main device">
This event advertises the main device that the server prefers to use
when direct scan-out to the target device isn't possible. The
advertised main device may be different for each
wp_linux_dmabuf_feedback object, and may change over time.
There is exactly one main device. The compositor must send at least
one preference tranche with tranche_target_device equal to main_device.
Clients need to create buffers that the main device can import and
read from, otherwise creating the dmabuf wl_buffer will fail (see the
wp_linux_buffer_params.create and create_immed requests for details).
The main device will also likely be kept active by the compositor,
so clients can use it instead of waking up another device for power
savings.
In general the device is a DRM node. The DRM node type (primary vs.
render) is unspecified. Clients must not rely on the compositor sending
a particular node type. Clients cannot check two devices for equality
by comparing the dev_t value.
If explicit modifiers are not supported and the client performs buffer
allocations on a different device than the main device, then the client
must force the buffer to have a linear layout.
</description>
<arg name="device" type="array" summary="device dev_t value"/>
</event>
<event name="tranche_done">
<description summary="a preference tranche has been sent">
This event splits tranche_target_device and tranche_formats events in
preference tranches. It is sent after a set of tranche_target_device
and tranche_formats events; it represents the end of a tranche. The
next tranche will have a lower preference.
</description>
</event>
<event name="tranche_target_device">
<description summary="target device">
This event advertises the target device that the server prefers to use
for a buffer created given this tranche. The advertised target device
may be different for each preference tranche, and may change over time.
There is exactly one target device per tranche.
The target device may be a scan-out device, for example if the
compositor prefers to directly scan-out a buffer created given this
tranche. The target device may be a rendering device, for example if
the compositor prefers to texture from said buffer.
The client can use this hint to allocate the buffer in a way that makes
it accessible from the target device, ideally directly. The buffer must
still be accessible from the main device, either through direct import
or through a potentially more expensive fallback path. If the buffer
can't be directly imported from the main device then clients must be
prepared for the compositor changing the tranche priority or making
wl_buffer creation fail (see the wp_linux_buffer_params.create and
create_immed requests for details).
If the device is a DRM node, the DRM node type (primary vs. render) is
unspecified. Clients must not rely on the compositor sending a
particular node type. Clients cannot check two devices for equality by
comparing the dev_t value.
This event is tied to a preference tranche, see the tranche_done event.
</description>
<arg name="device" type="array" summary="device dev_t value"/>
</event>
<event name="tranche_formats">
<description summary="supported buffer format modifier">
This event advertises the format + modifier combinations that the
compositor supports.
It carries an array of indices, each referring to a format + modifier
pair in the last received format table (see the format_table event).
Each index is a 16-bit unsigned integer in native endianness.
For legacy support, DRM_FORMAT_MOD_INVALID is an allowed modifier.
It indicates that the server can support the format with an implicit
modifier. When a buffer has DRM_FORMAT_MOD_INVALID as its modifier, it
is as if no explicit modifier is specified. The effective modifier
will be derived from the dmabuf.
A compositor that sends valid modifiers and DRM_FORMAT_MOD_INVALID for
a given format supports both explicit modifiers and implicit modifiers.
Compositors must not send duplicate format + modifier pairs within the
same tranche or across two different tranches with the same target
device and flags.
This event is tied to a preference tranche, see the tranche_done event.
For the definition of the format and modifier codes, see the
wp_linux_buffer_params.create request.
</description>
<arg name="indices" type="array" summary="array of 16-bit indexes"/>
</event>
<enum name="tranche_flags" bitfield="true">
<entry name="scanout" value="1" summary="direct scan-out tranche"/>
</enum>
<event name="tranche_flags">
<description summary="tranche flags">
This event sets tranche-specific flags.
The scanout flag is a hint that direct scan-out may be attempted by the
compositor on the target device if the client appropriately allocates a
buffer. How to allocate a buffer that can be scanned out on the target
device is implementation-defined.
This event is tied to a preference tranche, see the tranche_done event.
</description>
<arg name="flags" type="uint" enum="tranche_flags" summary="tranche flags"/>
</event>
</interface>
</protocol>

View File

@ -127,21 +127,18 @@ GhosttySurface::GhosttySurface(ghostty_app_t app, MainWindow *owner,
sc.platform_tag = GHOSTTY_PLATFORM_VULKAN;
sc.platform.vulkan = vk_host->asPlatform(this);
// Polling timer on the GUI thread: every 16ms, check if the
// renderer thread parked a new frame in `m_pending` and swap
// it into `m_image` for paintEvent to pick up.
// GUI-thread frame drain. The renderer thread wakes us per frame
// via QMetaObject::invokeMethod (Qt::QueuedConnection) on each
// present — see `presentVulkanDmabuf`. The 2 ms timer is a
// safety net: if `invokeMethod` ever fails to deliver (the
// earlier QImage-handoff diagnostics suggested this could
// happen), the next tick drains the parked frame within at most
// 2 ms. Idle case has negligible CPU cost because `drainVulkan`
// returns immediately when nothing is pending.
m_vulkanPollTimer = new QTimer(this);
m_vulkanPollTimer->setInterval(16); // ≈60 Hz
connect(m_vulkanPollTimer, &QTimer::timeout, this, [this]() {
QImage frame;
{
QMutexLocker lock(&m_pendingMutex);
if (m_pending.isNull()) return;
frame = std::move(m_pending);
}
m_image = std::move(frame);
update();
});
m_vulkanPollTimer->setInterval(2);
connect(m_vulkanPollTimer, &QTimer::timeout, this,
[this]() { drainVulkan(); });
m_vulkanPollTimer->start();
} else {
sc.platform_tag = GHOSTTY_PLATFORM_OPENGL;
@ -324,9 +321,18 @@ bool GhosttySurface::event(QEvent *e) {
// WA_NativeWindow ensures windowHandle() is non-null even if
// GhosttySurface is embedded in a non-native parent.
setAttribute(Qt::WA_NativeWindow);
if (auto *h = windowHandle())
if (auto *h = windowHandle()) {
m_subsurfacePresenter =
wayland::SubsurfacePresenter::tryCreate(h);
if (m_subsurfacePresenter && m_useVulkan) {
// Flip the Vulkan present path over to the zero-copy
// wl_subsurface route. Release-style store pairs with
// the renderer thread's acquire-load — once it observes
// true, it stops parking QImages and just hands us the
// dmabuf descriptor for compositor handoff.
m_useSubsurface.store(true, std::memory_order_release);
}
}
}
} else if (e->type() == QEvent::Hide) {
ghostty_surface_set_occlusion(m_surface, false);
@ -424,6 +430,14 @@ void GhosttySurface::renderTerminal() {
}
void GhosttySurface::paintEvent(QPaintEvent *) {
// Subsurface zero-copy path: the wl_subsurface IS the terminal
// pixels — they reach the compositor without ever touching our
// QPainter. With `WA_TranslucentBackground` set, the QWidget
// paints transparent over the subsurface so chrome (dim overlay,
// bell flash, resize hint) still composites on top.
const bool subsurfaceActive =
m_useSubsurface.load(std::memory_order_acquire) && m_subsurfacePresenter;
// No frame yet — leave the widget background untouched. With
// `WA_TranslucentBackground` set the area is transparent until
// the first frame imports, matching the OpenGL path. New surfaces
@ -431,18 +445,20 @@ void GhosttySurface::paintEvent(QPaintEvent *) {
// thread has emitted its first frame; the gap is short enough
// that flashing a debug placeholder is more jarring than the
// brief see-through.
if (m_image.isNull()) return;
if (!subsurfaceActive && m_image.isNull()) return;
QPainter painter(this);
// Blit the framebuffer 1:1. m_image carries the device pixel ratio, so
// the QPointF overload draws it at its true logical size. When in
// sync that exactly fills the widget; mid-resize, the previous frame
// stays at its real size in the top-left corner (rather than being
// stretched to the new widget rect, which the user dislikes more
// than the transient gap).
// CompositionMode_Source replaces the transparent widget pixels with
// the terminal image, alpha included, so its translucency is kept.
painter.setCompositionMode(QPainter::CompositionMode_Source);
painter.drawImage(QPointF(0, 0), m_image);
if (!subsurfaceActive) {
// Blit the framebuffer 1:1. m_image carries the device pixel ratio, so
// the QPointF overload draws it at its true logical size. When in
// sync that exactly fills the widget; mid-resize, the previous frame
// stays at its real size in the top-left corner (rather than being
// stretched to the new widget rect, which the user dislikes more
// than the transient gap).
// CompositionMode_Source replaces the transparent widget pixels with
// the terminal image, alpha included, so its translucency is kept.
painter.setCompositionMode(QPainter::CompositionMode_Source);
painter.drawImage(QPointF(0, 0), m_image);
}
// Unfocused-split dimming: a translucent fill over an inactive pane.
// Only split panes (a QSplitter parent) are dimmed, matching GTK.
@ -1343,13 +1359,34 @@ void GhosttySurface::presentVulkanDmabuf(
quint64 drm_modifier,
quint32 width,
quint32 height,
quint32 stride) {
// Called from the renderer thread. We mmap the dmabuf, copy the
// bytes into a QImage, and hand the QImage to the GUI thread for
// paint via `QMetaObject::invokeMethod`. The fd is a borrow (per
// the `ghostty_platform_vulkan_s` contract); libghostty closes it
// when the underlying memory is freed.
(void)drm_modifier; // LINEAR for v1; not used here.
quint32 stride,
bool image_backed) {
// Called from the renderer thread. Two paths, picked per frame
// based on whether the wl_subsurface presenter is up:
//
// Subsurface (zero-copy): park the dmabuf metadata; GUI thread
// wraps the fd in a wl_buffer and attach/commits to our
// wl_subsurface. The compositor scans it out directly.
//
// Fallback (legacy mmap+memcpy): map the fd, copy into a
// QImage, GUI thread paints via QPainter. Used when the
// subsurface presenter failed to come up (e.g. compositor
// missing linux-dmabuf-v1).
//
// The fd is a borrow per the `ghostty_platform_vulkan_s` contract;
// libghostty closes it when the underlying memory is freed. In
// the subsurface path the wayland client lib SCM_RIGHTS-dups the
// fd so the compositor's reference outlives our park-and-drain.
// The subsurface path requires `image_backed` (i.e. the renderer
// is in `.direct` mode and the fd points at a VkImage). When the
// renderer falls back to `.legacy_copy` — NVIDIA today, the fd is
// a VkBuffer — linux-dmabuf-v1 import would fail with
// `invalid_wl_buffer` and that's a fatal protocol error on the
// wl_display. So we gate per-frame and stay on the QImage path
// when the fd isn't compositor-importable.
const bool useSubsurface =
image_backed && m_useSubsurface.load(std::memory_order_acquire);
// One-shot breadcrumb so logs confirm the dmabuf hand-off is
// wired. Subsequent frames are silent so we don't spam stderr.
@ -1357,15 +1394,31 @@ void GhosttySurface::presentVulkanDmabuf(
if (!logged_first) {
logged_first = true;
std::fprintf(stderr,
"[ghastty] first Vulkan dmabuf frame: fd=%d %ux%u stride=%u fourcc=0x%08x mod=0x%lx\n",
"[ghastty] first Vulkan dmabuf frame: fd=%d %ux%u stride=%u "
"fourcc=0x%08x mod=0x%lx image_backed=%d path=%s\n",
dmabuf_fd, width, height, stride, drm_format,
static_cast<unsigned long>(drm_modifier));
static_cast<unsigned long>(drm_modifier), image_backed ? 1 : 0,
useSubsurface ? "subsurface" : "qimage");
}
// sanity check the size before we allocate / mmap.
if (dmabuf_fd < 0 || width == 0 || height == 0 || stride < width * 4)
return;
if (useSubsurface) {
// Subsurface path. Park the descriptor under the mutex (so
// a concurrent drainVulkan sees a consistent snapshot) and
// wake the GUI thread.
{
QMutexLocker lock(&m_pendingMutex);
m_pendingDmabuf = PendingDmabuf{
dmabuf_fd, drm_format, drm_modifier, width, height, stride,
};
}
QMetaObject::invokeMethod(this, "drainVulkan", Qt::QueuedConnection);
return;
}
// Fallback: mmap + memcpy into a QImage.
const size_t bytes = static_cast<size_t>(stride) * height;
void *mapped = ::mmap(nullptr, bytes, PROT_READ, MAP_SHARED, dmabuf_fd, 0);
if (mapped == MAP_FAILED) {
@ -1373,19 +1426,12 @@ void GhosttySurface::presentVulkanDmabuf(
dmabuf_fd, std::strerror(errno));
return;
}
// QImage holds the pixel data by copying when constructed with
// `Format_ARGB32_Premultiplied` from a buffer with explicit stride.
// We then detach (copy()) so the QImage survives the unmap.
//
// drm_format ARGB8888 (0x34325241 = "AR24") matches QImage's
// ARGB32 byte order on little-endian (B,G,R,A in memory).
//
// We use the *premultiplied* variant because the renderer's
// fragment shaders output premultiplied alpha and the render
// target is `VK_FORMAT_B8G8R8A8_SRGB` (hardware gamma-encodes the
// linear shader output at framebuffer-write time). The bytes
// landing in this buffer are therefore sRGB-encoded premultiplied
// ARGB — exactly what Format_ARGB32_Premultiplied expects.
// ARGB32 byte order on little-endian (B,G,R,A in memory). The
// renderer's fragment shaders output premultiplied alpha into
// `VK_FORMAT_B8G8R8A8_SRGB`, so the buffer is sRGB-encoded
// premultiplied ARGB — exactly what Format_ARGB32_Premultiplied
// expects.
(void)drm_format;
const QImage stamped(
static_cast<const uchar *>(mapped),
@ -1396,20 +1442,45 @@ void GhosttySurface::presentVulkanDmabuf(
QImage owned = stamped.copy();
::munmap(mapped, bytes);
// Tell QPainter the image's pixels are device pixels at the same
// DPR the framebuffer was sized at. Without this, `drawImage` would
// treat the image as logical pixels and re-scale to framebuffer
// pixels on a HiDPI display (DPR>1) — glyphs come out 2× too big.
// `m_fbDpr` is the DPR `syncSurfaceSize` used when telling
// libghostty the framebuffer size, so it matches what the renderer
// actually drew.
if (m_fbDpr > 0) owned.setDevicePixelRatio(m_fbDpr);
// Stash for the GUI-thread polling timer to pick up.
{
QMutexLocker lock(&m_pendingMutex);
m_pending = std::move(owned);
}
QMetaObject::invokeMethod(this, "drainVulkan", Qt::QueuedConnection);
}
void GhosttySurface::drainVulkan() {
// Subsurface (zero-copy) path: take the parked dmabuf descriptor
// under the mutex, then dispatch it to the presenter outside the
// lock so a renderer-thread `presentVulkanDmabuf` parking the
// next frame doesn't block on wl_display_flush.
if (m_useSubsurface.load(std::memory_order_acquire) &&
m_subsurfacePresenter) {
PendingDmabuf frame;
{
QMutexLocker lock(&m_pendingMutex);
if (m_pendingDmabuf.fd < 0) return;
frame = m_pendingDmabuf;
m_pendingDmabuf.fd = -1; // mark consumed
}
const int scale =
std::max(1, static_cast<int>(std::lround(devicePixelRatioF())));
m_subsurfacePresenter->presentDmabuf(frame.fd, frame.drm_format,
frame.drm_modifier, frame.width,
frame.height, frame.stride, scale);
return;
}
// Fallback: hand the QImage to paintEvent.
QImage frame;
{
QMutexLocker lock(&m_pendingMutex);
if (m_pending.isNull()) return;
frame = std::move(m_pending);
}
m_image = std::move(frame);
update();
}
// Trampoline so `Host.cpp` doesn't need to include the full
@ -1425,10 +1496,12 @@ void presentToGhosttySurface(
uint64_t drm_modifier,
uint32_t width,
uint32_t height,
uint32_t stride) {
uint32_t stride,
bool image_backed) {
if (surface == nullptr) return;
static_cast<GhosttySurface *>(surface)->presentVulkanDmabuf(
dmabuf_fd, drm_format, drm_modifier, width, height, stride);
dmabuf_fd, drm_format, drm_modifier, width, height, stride,
image_backed);
}
} // namespace vulkan

View File

@ -150,20 +150,30 @@ public:
void setPwd(const QString &pwd);
const QString &pwd() const { return m_pwd; }
// Apprt-side entry point for the Vulkan `present` callback.
// libghostty hands us a dmabuf fd pointing at the rendered
// VkImage's memory; we mmap it (LINEAR tiling means the bytes
// are directly readable as BGRA), copy the pixels into a QImage,
// and schedule a repaint. Thread-safe: the callback fires from
// the renderer thread; the QImage handoff goes through
// `QMetaObject::invokeMethod` to the GUI thread.
// Apprt-side entry point for the Vulkan `present` callback. Fires
// on the renderer thread. Parks the dmabuf descriptor under
// `m_pendingMutex` (plus, for the legacy fallback path, an
// mmap+memcpy'd QImage) and wakes the GUI thread via
// `QMetaObject::invokeMethod(this, drainVulkan, Qt::QueuedConnection)`.
// The GUI thread either commits the dmabuf to the wl_subsurface
// (zero-copy) or paints the QImage (fallback). A 2 ms safety-net
// poll catches anything `invokeMethod` ever fails to deliver.
Q_INVOKABLE void presentVulkanDmabuf(
int dmabuf_fd,
quint32 drm_format,
quint64 drm_modifier,
quint32 width,
quint32 height,
quint32 stride);
quint32 stride,
bool image_backed);
// GUI-thread drain step: hands the most recent pending frame
// either to the SubsurfacePresenter (zero-copy path) or the
// QImage paint pipeline (fallback). Idempotent: returns
// immediately if nothing's pending. Invoked from the polling
// safety net AND from queued invocations triggered by the
// renderer thread.
Q_INVOKABLE void drainVulkan();
protected:
bool event(QEvent *) override;
@ -244,15 +254,35 @@ private:
// gives way to the actual rendered content.
bool m_useVulkan = false;
// Cross-thread frame handoff for the Vulkan path. `presentVulkanDmabuf`
// (renderer thread) writes a freshly-imported QImage to `m_pending`
// under `m_pendingMutex`; a 16 ms `QTimer` on the GUI thread checks
// `m_pending`, atomically swaps it into `m_image`, and triggers a
// repaint. The polling timer is the simplest reliable cross-thread
// path we could land — the obvious Qt mechanisms
// (QMetaObject::invokeMethod / postEvent) were both not firing
// their queued lambdas under the renderer-thread → GUI-thread
// handoff, see the commit message for diagnostics.
// Cross-thread frame handoff for the Vulkan path. The renderer
// thread calls `presentVulkanDmabuf` with a borrowed dmabuf fd; a
// 16 ms `QTimer` on the GUI thread drains the pending frame and
// routes it through the wl_subsurface (zero-copy) when the
// SubsurfacePresenter is available, or falls back to the
// mmap+memcpy+QImage path otherwise. The polling timer was kept
// (rather than QMetaObject::invokeMethod) because queued lambdas
// from the renderer thread were unreliable in earlier diagnostics.
//
// `m_useSubsurface` is set once on the GUI thread when the
// presenter comes up; the renderer thread reads it acquire-style
// to decide which path to populate per frame.
std::atomic<bool> m_useSubsurface{false};
// Subsurface (zero-copy) path: renderer thread parks the
// borrowed-fd descriptor here; GUI-thread timer hands it to the
// presenter.
struct PendingDmabuf {
int fd = -1;
quint32 drm_format = 0;
quint64 drm_modifier = 0;
quint32 width = 0;
quint32 height = 0;
quint32 stride = 0;
};
PendingDmabuf m_pendingDmabuf;
// Legacy (mmap+memcpy) path: kept as a fallback when the
// presenter isn't available (e.g. compositor missing
// linux-dmabuf-v1). When the subsurface path is active this stays
// null and paintEvent skips its blit.
QImage m_pending;
QMutex m_pendingMutex;
QTimer *m_vulkanPollTimer = nullptr;

View File

@ -22,7 +22,8 @@ void presentToGhosttySurface(
uint64_t drm_modifier,
uint32_t width,
uint32_t height,
uint32_t stride);
uint32_t stride,
bool image_backed);
namespace {
@ -114,10 +115,11 @@ void cbPresent(
uint64_t drm_modifier,
uint32_t width,
uint32_t height,
uint32_t stride) {
uint32_t stride,
bool image_backed) {
if (ud == nullptr) return;
::vulkan::presentToGhosttySurface(ud, dmabuf_fd, drm_format,
drm_modifier, width, height, stride);
::vulkan::presentToGhosttySurface(ud, dmabuf_fd, drm_format, drm_modifier,
width, height, stride, image_backed);
}
} // namespace

View File

@ -10,6 +10,8 @@
#include <wayland-client.h>
#include "linux-dmabuf-v1-client-protocol.h"
namespace wayland {
namespace {
@ -21,6 +23,7 @@ namespace {
struct PresenterGlobals {
wl_compositor *compositor = nullptr;
wl_subcompositor *subcompositor = nullptr;
zwp_linux_dmabuf_v1 *dmabuf = nullptr;
bool searched = false;
};
@ -33,6 +36,14 @@ void registryGlobal(void *data, wl_registry *registry, uint32_t name,
} else if (std::strcmp(interface, wl_subcompositor_interface.name) == 0) {
g->subcompositor = static_cast<wl_subcompositor *>(
wl_registry_bind(registry, name, &wl_subcompositor_interface, 1));
} else if (std::strcmp(interface, zwp_linux_dmabuf_v1_interface.name) == 0) {
// v3 has `create_immed`, which we want (synchronous wl_buffer
// creation — the v2 async `create` + `created`/`failed` event
// dance would add a layer of callback machinery for no real win
// in our renderer's strict-fd-validity scenario). v4 adds the
// dynamic format/modifier feedback dance; we don't need it yet.
g->dmabuf = static_cast<zwp_linux_dmabuf_v1 *>(wl_registry_bind(
registry, name, &zwp_linux_dmabuf_v1_interface, 3));
}
}
void registryGlobalRemove(void *, wl_registry *, uint32_t) {}
@ -63,20 +74,32 @@ PresenterGlobals *discoverGlobals(wl_display *display) {
if (globals.subcompositor)
wl_proxy_set_queue(reinterpret_cast<wl_proxy *>(globals.subcompositor),
nullptr);
if (globals.dmabuf)
wl_proxy_set_queue(reinterpret_cast<wl_proxy *>(globals.dmabuf), nullptr);
wl_event_queue_destroy(queue);
return &globals;
}
// wl_buffer::release listener: the compositor is done sampling the
// buffer for any committed surface state, so we can destroy our
// client-side handle. The underlying dmabuf memory is owned by
// libghostty; we never close that fd here (the SCM_RIGHTS transfer
// in zwp_linux_buffer_params.add gave the compositor its own
// reference, which lives independently of our wl_buffer).
void bufferRelease(void *, wl_buffer *buffer) {
wl_buffer_destroy(buffer);
}
const wl_buffer_listener kBufferListener = {
bufferRelease,
};
} // namespace
std::unique_ptr<SubsurfacePresenter>
SubsurfacePresenter::tryCreate(QWindow *parent) {
if (!parent) return nullptr;
// The Qt frontend is Wayland-only; if we're not on Wayland, the
// native-interface lookups below would return null anyway, but
// bail explicitly so the log message is useful.
if (!QGuiApplication::platformName().startsWith(QLatin1String("wayland"))) {
std::fprintf(stderr,
"[ghastty] SubsurfacePresenter: not on Wayland QPA\n");
@ -100,13 +123,13 @@ SubsurfacePresenter::tryCreate(QWindow *parent) {
}
PresenterGlobals *g = discoverGlobals(display);
if (!g->compositor || !g->subcompositor) {
if (!g->compositor || !g->subcompositor || !g->dmabuf) {
std::fprintf(stderr,
"[ghastty] SubsurfacePresenter: compositor lacks "
"wl_compositor or wl_subcompositor (compositor=%p "
"subcompositor=%p)\n",
"[ghastty] SubsurfacePresenter: compositor missing required "
"globals (compositor=%p subcompositor=%p dmabuf=%p)\n",
static_cast<void *>(g->compositor),
static_cast<void *>(g->subcompositor));
static_cast<void *>(g->subcompositor),
static_cast<void *>(g->dmabuf));
return nullptr;
}
@ -126,18 +149,13 @@ SubsurfacePresenter::tryCreate(QWindow *parent) {
// for the parent's next commit. `set_desync` is what allows that.
wl_subsurface_set_desync(sub);
// Subsurface covers the parent at the origin. Phase 3 will keep
// this in sync on resize; for Phase 2 it doesn't matter because
// we never attach a buffer.
// Subsurface covers the parent at the origin. Phase 4 will keep
// this in sync on splits/tabs/etc.; for now the GhosttySurface
// forces WA_NativeWindow so its QWindow IS the terminal's native
// wayland surface and (0,0) is correct.
wl_subsurface_set_position(sub, 0, 0);
// Flush so the compositor sees the subsurface creation. We do NOT
// commit the child surface — per protocol an uncommitted subsurface
// with no attached buffer contributes nothing to the parent's
// display, which is exactly the no-behavior-change state we want
// for Phase 2.
wl_display_flush(display);
if (int err = wl_display_get_error(display); err != 0) {
std::fprintf(stderr,
"[ghastty] SubsurfacePresenter: wl_display error %d after "
@ -149,18 +167,22 @@ SubsurfacePresenter::tryCreate(QWindow *parent) {
}
std::fprintf(stderr,
"[ghastty] SubsurfacePresenter: subsurface ready (parent=%p "
"child=%p sub=%p)\n",
static_cast<void *>(parentSurface),
static_cast<void *>(child), static_cast<void *>(sub));
"[ghastty] SubsurfacePresenter: ready (parent=%p child=%p "
"sub=%p dmabuf=%p)\n",
static_cast<void *>(parentSurface), static_cast<void *>(child),
static_cast<void *>(sub), static_cast<void *>(g->dmabuf));
return std::unique_ptr<SubsurfacePresenter>(
new SubsurfacePresenter(display, child, sub));
new SubsurfacePresenter(display, child, sub, g->dmabuf));
}
SubsurfacePresenter::SubsurfacePresenter(wl_display *display, wl_surface *child,
wl_subsurface *sub)
: m_display(display), m_childSurface(child), m_subsurface(sub) {}
wl_subsurface *sub,
zwp_linux_dmabuf_v1 *dmabuf)
: m_display(display),
m_childSurface(child),
m_subsurface(sub),
m_dmabuf(dmabuf) {}
SubsurfacePresenter::~SubsurfacePresenter() {
if (m_subsurface) wl_subsurface_destroy(m_subsurface);
@ -168,4 +190,60 @@ SubsurfacePresenter::~SubsurfacePresenter() {
if (m_display) wl_display_flush(m_display);
}
void SubsurfacePresenter::presentDmabuf(int fd, uint32_t drm_format,
uint64_t drm_modifier, uint32_t width,
uint32_t height, uint32_t stride,
int buffer_scale) {
if (fd < 0 || !m_dmabuf || !m_childSurface) return;
if (buffer_scale < 1) buffer_scale = 1;
// Wrap libghostty's borrowed fd in a wl_buffer.
zwp_linux_buffer_params_v1 *params =
zwp_linux_dmabuf_v1_create_params(m_dmabuf);
if (!params) return;
zwp_linux_buffer_params_v1_add(params, fd, /*plane_idx*/ 0,
/*offset*/ 0, stride,
static_cast<uint32_t>(drm_modifier >> 32),
static_cast<uint32_t>(drm_modifier & 0xFFFFFFFFu));
wl_buffer *buffer = zwp_linux_buffer_params_v1_create_immed(
params, static_cast<int32_t>(width), static_cast<int32_t>(height),
drm_format, /*flags*/ 0);
zwp_linux_buffer_params_v1_destroy(params);
if (!buffer) {
std::fprintf(stderr,
"[ghastty] SubsurfacePresenter: create_immed returned null "
"(fd=%d %ux%u fmt=0x%x mod=0x%llx)\n",
fd, width, height, drm_format,
static_cast<unsigned long long>(drm_modifier));
return;
}
wl_buffer_add_listener(buffer, &kBufferListener, this);
// Set buffer scale only when it changes — calling on every present
// is harmless but the compositor's bookkeeping is cheaper if we
// skip the redundant request.
if (buffer_scale != m_lastBufferScale) {
wl_surface_set_buffer_scale(m_childSurface, buffer_scale);
m_lastBufferScale = buffer_scale;
}
wl_surface_attach(m_childSurface, buffer, 0, 0);
// Damage the full buffer extent — terminals tend to update large
// dirty rects anyway (cursor blink, scroll, repaint) so a precise
// damage region wouldn't save much, and `damage_buffer` (vs
// `damage`) uses buffer coordinates so it's resolution-correct
// regardless of buffer_scale.
wl_surface_damage_buffer(m_childSurface, 0, 0, static_cast<int32_t>(width),
static_cast<int32_t>(height));
wl_surface_commit(m_childSurface);
wl_display_flush(m_display);
if (int err = wl_display_get_error(m_display); err != 0) {
std::fprintf(
stderr,
"[ghastty] SubsurfacePresenter: wl_display error %d after present\n",
err);
}
}
} // namespace wayland

View File

@ -1,67 +1,75 @@
// Wayland subsurface presenter for `GhosttySurface`.
//
// Scaffolding for the GPU-direct present path (issue: Phase 2 of the
// dmabuf-as-importable-surface rework). This class owns one
// `wl_subsurface` parented to the `GhosttySurface`'s native
// `wl_surface`. Its eventual job is to receive dmabuf fds from
// libghostty's renderer, wrap each one in a `wl_buffer` via
// `zwp_linux_dmabuf_v1`, and attach it to the subsurface so the
// compositor scans it out directly — bypassing the current mmap +
// memcpy + QImage + QPainter pipeline.
//
// In Phase 2 (this commit) the presenter only creates and tears down
// the subsurface. No buffer is ever attached; the existing
// `presentVulkanDmabuf` path keeps running unchanged. The proof this
// scaffolding works is that `ghastty-vulkan` still launches and
// renders identically with no Wayland protocol errors.
// Owns one `wl_subsurface` parented to the `GhosttySurface`'s native
// `wl_surface`, plus the `zwp_linux_dmabuf_v1` machinery for wrapping
// libghostty's dmabuf fds in `wl_buffer`s and attaching them to that
// subsurface. The compositor scans the buffers out directly — no
// mmap, no memcpy, no QImage, no QPainter blit on the present path.
//
// Wayland-only by project decision (the Qt frontend is Wayland-only;
// see `feedback-qt-no-x11` memory). If the host isn't on a Wayland
// QPA platform or the compositor lacks `wl_subcompositor`,
// `tryCreate` returns nullptr — Phase 2 silently ignores that
// because nothing consumes the presenter yet; Phase 3 will treat it
// as fatal.
// QPA platform or the compositor lacks the required globals,
// `tryCreate` returns nullptr — the caller decides whether that's a
// fatal error.
#pragma once
#include <cstdint>
#include <memory>
struct wl_display;
struct wl_subsurface;
struct wl_surface;
struct zwp_linux_dmabuf_v1;
class QWindow;
namespace wayland {
class SubsurfacePresenter {
public:
// Build a subsurface parented to `parent`'s native `wl_surface`.
// Build a subsurface parented to `parent`'s native `wl_surface`,
// and bind the linux-dmabuf-v1 global on the same display.
// Returns nullptr if any prerequisite is missing (non-Wayland QPA,
// null `wl_display`, `wl_subcompositor` unbindable, etc.).
// null `wl_display`, `wl_subcompositor` unbindable,
// `zwp_linux_dmabuf_v1` unbindable, etc.).
//
// Forces `Qt::WA_NativeWindow` on the caller is the *caller's*
// Forcing `Qt::WA_NativeWindow` on the caller is the *caller's*
// responsibility — `tryCreate` only reads `parent->surfaceHandle`.
static std::unique_ptr<SubsurfacePresenter> tryCreate(QWindow *parent);
~SubsurfacePresenter();
// Phase-3 accessors: when the present path moves to dmabuf-attach,
// the caller will need the child `wl_surface` to attach buffers to
// and the `wl_display` to flush. Exposed now so the API surface
// doesn't churn between phases.
wl_surface *childSurface() const { return m_childSurface; }
wl_display *display() const { return m_display; }
// Hand a dmabuf-backed frame to the compositor: wrap the fd in a
// `wl_buffer` via `zwp_linux_buffer_params_v1.create_immed`, attach
// to the subsurface, damage, commit. MUST be called on the Qt GUI
// thread (the thread that owns the wl_display dispatch); the
// renderer thread should marshal frames through a Qt-side queue.
//
// libghostty owns the fd; this method does not close it. The
// wayland client library duplicates the fd kernel-side via
// SCM_RIGHTS, so the compositor's reference survives even after
// libghostty reuses or closes its handle.
//
// `buffer_scale` is the Wayland buffer scale factor (1 for stock
// DPI, 2 for HiDPI, etc.) — set on the child surface so the
// compositor scales the buffer correctly relative to the parent's
// surface-local coordinates.
void presentDmabuf(int fd, uint32_t drm_format, uint64_t drm_modifier,
uint32_t width, uint32_t height, uint32_t stride,
int buffer_scale);
SubsurfacePresenter(const SubsurfacePresenter &) = delete;
SubsurfacePresenter &operator=(const SubsurfacePresenter &) = delete;
private:
SubsurfacePresenter(wl_display *display, wl_surface *child,
wl_subsurface *sub);
wl_subsurface *sub, zwp_linux_dmabuf_v1 *dmabuf);
wl_display *m_display;
wl_surface *m_childSurface;
wl_subsurface *m_subsurface;
zwp_linux_dmabuf_v1 *m_dmabuf;
int m_lastBufferScale = 0;
};
} // namespace wayland

View File

@ -428,7 +428,12 @@ pub const Platform = union(PlatformTag) {
/// host imports it for composition; libghostty retains
/// ownership of the underlying VkDeviceMemory and the fd is
/// valid only for the duration of the call (host must `dup()`
/// if it needs to hold the fd longer).
/// if it needs to hold the fd longer). `image_backed` tells
/// the host whether the fd was exported from a VkImage
/// (directly importable as a 2D image via linux-dmabuf-v1)
/// or from a VkBuffer (only usable via mmap + CPU readback);
/// see `vulkan/Target.zig` and `include/ghostty.h` for the
/// full rationale.
present: *const fn (
?*anyopaque,
i32, // dmabuf fd
@ -437,6 +442,7 @@ pub const Platform = union(PlatformTag) {
u32, // width (pixels)
u32, // height (pixels)
u32, // stride (bytes)
bool, // image_backed
) callconv(.c) void,
};
@ -481,6 +487,7 @@ pub const Platform = union(PlatformTag) {
u32,
u32,
u32,
bool,
) callconv(.c) void,
},
};

View File

@ -747,6 +747,11 @@ pub fn present(self: *const Self) void {
// Fall back to the device's singleton copy when no platform was
// attached (only the smoke test does this).
const platform = if (self.platform) |p| p else self.device.platform;
// `image_backed` is the host's signal that this fd is importable
// by a 2D-image consumer (Wayland linux-dmabuf-v1, Vulkan
// external image, etc.). True in `.direct` mode where the fd was
// exported from a VkImage; false in `.legacy_copy` where it was
// exported from a VkBuffer and can only be read via mmap.
platform.present(
platform.userdata,
self.fd,
@ -755,6 +760,7 @@ pub fn present(self: *const Self) void {
self.width,
self.height,
self.stride,
self.tiling == .direct,
);
}