qt/wayland: pace presents via wl_surface.frame + dedupe queued drains
Each visible pane was committing to the compositor at the renderer's
own rate (125 FPS with custom-shader-animation engaged), regardless
of display refresh. The compositor's per-commit work — dmabuf
import, page-flip scheduling, atomic kernel commit — scales
linearly with our commit rate, so N visible panes paid N × the
overshoot.
Switch the GUI thread to compositor-paced presents via wl_surface
.frame callbacks:
- SubsurfacePresenter registers a wl_callback after each commit
(presentDmabuf + reattachCached); the done handler invokes a
user-set OnFrameReady hook on the GUI thread.
- GhosttySurface gates drainVulkan on m_compositorReady; consume
+ commit flips it false. onWaylandFrameReady (wired into the
presenter) flips it true and re-pumps drainVulkan.
- Renderer still produces at 125 FPS into m_pendingDmabuf with
"latest wins" semantics — intermediate frames between
compositor refreshes get overwritten in the slot, not committed.
Also dedupe queued drainVulkan invocations from presentVulkanDmabuf
via an atomic m_drainScheduled flag. At 125 renderer FPS the
unconditional invokeMethod was 125 Qt-event-queue allocations +
dispatches/sec on the GUI thread, most no-op now that the gate
may be closed. CAS-once: false→true winner posts, others skip;
drainVulkan resets to false before consuming so any frame parked
between clear-and-consume still schedules its own drain.
Wires the gate back to ready on PlatformSurface destroy (no more
frame_done coming for a destroyed presenter) so the rebuilt
presenter's first present after Show goes through immediately.
Co-Authored-By: claude-flow <ruv@ruv.net>
pull/12846/head
parent
1b1c913ba4
commit
e78d3f7beb
|
|
@ -513,6 +513,10 @@ bool GhosttySurface::event(QEvent *e) {
|
|||
}
|
||||
#endif
|
||||
m_subsurfacePresenter.reset();
|
||||
// Presenter is gone — no frame_done callback will arrive.
|
||||
// Reset the gate so the rebuilt presenter's first present
|
||||
// (on next Show) goes through immediately.
|
||||
m_compositorReady = true;
|
||||
}
|
||||
// SurfaceCreated is handled implicitly: the next QEvent::Show
|
||||
// (which Qt always fires after the platform surface comes up)
|
||||
|
|
@ -575,6 +579,16 @@ bool GhosttySurface::event(QEvent *e) {
|
|||
// moveEvent updates it on layout changes.
|
||||
const QPoint pos = mapTo(window(), QPoint(0, 0));
|
||||
m_subsurfacePresenter->setPosition(pos.x(), pos.y());
|
||||
// Wire compositor-paced presents: the presenter requests
|
||||
// a wl_surface.frame callback on every commit; when the
|
||||
// compositor signals ready, onWaylandFrameReady flips
|
||||
// m_compositorReady and re-pumps drainVulkan.
|
||||
m_subsurfacePresenter->setOnFrameReady(
|
||||
[this]() { onWaylandFrameReady(); });
|
||||
// Fresh presenter starts in "ready to present" state —
|
||||
// first present goes through immediately; subsequent
|
||||
// presents wait for the frame callback.
|
||||
m_compositorReady = true;
|
||||
if (m_useVulkan) {
|
||||
m_useSubsurface.store(true, std::memory_order_release);
|
||||
} else {
|
||||
|
|
@ -1812,7 +1826,18 @@ void GhosttySurface::presentVulkanDmabuf(
|
|||
static_cast<unsigned long long>(count));
|
||||
}
|
||||
}
|
||||
QMetaObject::invokeMethod(this, "drainVulkan", Qt::QueuedConnection);
|
||||
// Dedupe queued drainVulkan: only post if no prior post is
|
||||
// still pending. drainVulkan clears m_drainScheduled before
|
||||
// checking the pending dmabuf, so a renderer frame parked
|
||||
// between "clear" and "consume" still kicks a fresh queued
|
||||
// drain. The atomic CAS is wait-free; the false→true winner
|
||||
// posts, others skip.
|
||||
bool was_scheduled = false;
|
||||
if (m_drainScheduled.compare_exchange_strong(
|
||||
was_scheduled, true, std::memory_order_acq_rel)) {
|
||||
QMetaObject::invokeMethod(this, "drainVulkan",
|
||||
Qt::QueuedConnection);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
|
|
@ -1872,10 +1897,35 @@ void GhosttySurface::presentVulkanDmabuf(
|
|||
static_cast<unsigned long long>(count));
|
||||
}
|
||||
}
|
||||
QMetaObject::invokeMethod(this, "drainVulkan", Qt::QueuedConnection);
|
||||
// Same dedupe as the subsurface path: at most one queued drain
|
||||
// pending at a time. drainVulkan resets the flag before consuming.
|
||||
bool was_scheduled = false;
|
||||
if (m_drainScheduled.compare_exchange_strong(
|
||||
was_scheduled, true, std::memory_order_acq_rel)) {
|
||||
QMetaObject::invokeMethod(this, "drainVulkan", Qt::QueuedConnection);
|
||||
}
|
||||
}
|
||||
|
||||
void GhosttySurface::onWaylandFrameReady() {
|
||||
// Compositor has signaled it's ready for our next commit. Flip
|
||||
// the gate and re-pump drainVulkan to consume any frame the
|
||||
// renderer parked while we were waiting. If nothing is parked,
|
||||
// drainVulkan no-ops; the next renderer-driven present will fire
|
||||
// a queued drainVulkan that finds the gate open and goes through
|
||||
// immediately.
|
||||
m_compositorReady = true;
|
||||
drainVulkan();
|
||||
}
|
||||
|
||||
void GhosttySurface::drainVulkan() {
|
||||
// Release the dedupe slot FIRST so a renderer frame parked while
|
||||
// this drain runs can immediately schedule its own queued drain
|
||||
// (instead of the next post being silently dropped). The atomic
|
||||
// ordering: clear-before-consume means a presentVulkanDmabuf that
|
||||
// races us still wins the CAS and posts a follow-up drain, so no
|
||||
// parked frame is forgotten.
|
||||
m_drainScheduled.store(false, std::memory_order_release);
|
||||
|
||||
// Subsurface (zero-copy) path: take the parked dmabuf descriptor
|
||||
// under the mutex, then dispatch it to the presenter outside the
|
||||
// lock so a renderer-thread `presentVulkanDmabuf` parking the
|
||||
|
|
@ -1893,6 +1943,15 @@ void GhosttySurface::drainVulkan() {
|
|||
}
|
||||
if (m_useSubsurface.load(std::memory_order_acquire) &&
|
||||
m_subsurfacePresenter) {
|
||||
// Compositor-paced gate. If the compositor hasn't signaled
|
||||
// ready yet (we're mid-flight on the previous commit), leave
|
||||
// the parked descriptor in m_pendingDmabuf — onWaylandFrameReady
|
||||
// will re-post drainVulkan when the wl_surface.frame callback
|
||||
// fires. The renderer may overwrite m_pendingDmabuf with a
|
||||
// newer frame in the meantime; that's fine, "latest wins" is
|
||||
// the right semantic for terminal output that hasn't been
|
||||
// displayed yet.
|
||||
if (!m_compositorReady) return;
|
||||
PendingDmabuf frame;
|
||||
{
|
||||
QMutexLocker lock(&m_pendingMutex);
|
||||
|
|
@ -1912,6 +1971,9 @@ void GhosttySurface::drainVulkan() {
|
|||
// parent wl_surface.commit so the cached state applies and the
|
||||
// frame becomes visible.
|
||||
forceParentCommit();
|
||||
// Mark the gate closed until the compositor's wl_surface.frame
|
||||
// callback fires (onWaylandFrameReady).
|
||||
m_compositorReady = false;
|
||||
return;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -207,6 +207,14 @@ public:
|
|||
// renderer thread.
|
||||
Q_INVOKABLE void drainVulkan();
|
||||
|
||||
// Compositor frame-callback handler. Fires (on the GUI thread,
|
||||
// via Wayland event-queue dispatch) when the compositor signals
|
||||
// it's ready to display our next commit. Clears the in-flight
|
||||
// flag and re-pumps drainVulkan to consume any frame the renderer
|
||||
// parked while we were waiting. Q_INVOKABLE so it can also be
|
||||
// posted via QMetaObject::invokeMethod from a queued context.
|
||||
Q_INVOKABLE void onWaylandFrameReady();
|
||||
|
||||
// Force a wl_surface.commit on our parent native window via the
|
||||
// QtWaylandClient::QWaylandWindow private API. The wl_subsurface
|
||||
// is in sync mode, so child state changes only apply when the
|
||||
|
|
@ -339,6 +347,26 @@ private:
|
|||
quint32 stride = 0;
|
||||
};
|
||||
PendingDmabuf m_pendingDmabuf;
|
||||
// Compositor-paced present gate. True when we can issue the next
|
||||
// wl_subsurface commit; flipped false after a present and back to
|
||||
// true on the wl_surface.frame callback (onWaylandFrameReady). The
|
||||
// renderer thread keeps producing frames at its own rate (125 FPS
|
||||
// with custom-shader-animation), but only the latest parked frame
|
||||
// reaches the compositor on each refresh — drops every-other (or
|
||||
// more) frame to match compositor refresh, halving Wayland-commit
|
||||
// CPU on the GUI thread. GUI-thread only, no atomic.
|
||||
bool m_compositorReady = true;
|
||||
// Dedupes queued drainVulkan invocations posted from the renderer
|
||||
// thread. Each renderer-thread `presentVulkanDmabuf` used to post
|
||||
// a QueuedConnection invokeMethod unconditionally — at 125 FPS
|
||||
// that's 125 Qt-event-queue allocations + dispatches per second,
|
||||
// most of which no-op now that the compositor gate may not yet
|
||||
// be ready. CAS to true to claim the slot; drainVulkan resets to
|
||||
// false before consuming so a follow-up renderer frame can
|
||||
// schedule its own drain. The pending-dmabuf "latest wins"
|
||||
// semantic guarantees the renderer's newest frame is what
|
||||
// drainVulkan sees regardless of how many parks happened between.
|
||||
std::atomic<bool> m_drainScheduled{false};
|
||||
// Legacy (mmap+memcpy) path: kept as a fallback when the
|
||||
// presenter isn't available (e.g. compositor missing
|
||||
// linux-dmabuf-v1). When the subsurface path is active this stays
|
||||
|
|
|
|||
|
|
@ -239,6 +239,29 @@ const wl_buffer_listener kBufferListener = {
|
|||
bufferRelease,
|
||||
};
|
||||
|
||||
// wl_callback::done listener for compositor-paced presents. Single-
|
||||
// shot per callback — the proxy is destroyed here and the
|
||||
// presenter's m_frameCallback field is cleared so the next present
|
||||
// knows to register a fresh one. After cleanup, invoke the
|
||||
// presenter's onFrameReady hook (set by GhosttySurface to pump the
|
||||
// next pending frame).
|
||||
void frameCallbackDone(void *data, wl_callback *cb, uint32_t /*time*/) {
|
||||
auto *p = static_cast<wayland::SubsurfacePresenter *>(data);
|
||||
// Defensive: if the listener fires after the proxy was destroyed
|
||||
// by ~SubsurfacePresenter (Wayland guarantees no events on a
|
||||
// destroyed proxy, so this shouldn't happen, but if a future
|
||||
// refactor destroys the presenter before flushing the queue we'd
|
||||
// rather no-op than UAF).
|
||||
if (!p) {
|
||||
wl_callback_destroy(cb);
|
||||
return;
|
||||
}
|
||||
p->onFrameCallbackDone(cb);
|
||||
}
|
||||
const wl_callback_listener kFrameCallbackListener = {
|
||||
frameCallbackDone,
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
||||
void primeDmabufModifierRegistry() {
|
||||
|
|
@ -445,6 +468,14 @@ SubsurfacePresenter::SubsurfacePresenter(wl_display *display, wl_surface *child,
|
|||
}
|
||||
|
||||
SubsurfacePresenter::~SubsurfacePresenter() {
|
||||
// Destroy the pending frame callback first: subsequent dispatches
|
||||
// of the wl_event_queue won't deliver its done event (Wayland
|
||||
// guarantees no events on a destroyed proxy), so the dangling
|
||||
// `this` pointer in the listener data can't fire.
|
||||
if (m_frameCallback) {
|
||||
wl_callback_destroy(m_frameCallback);
|
||||
m_frameCallback = nullptr;
|
||||
}
|
||||
// Destroy the cached wl_buffer BEFORE the child surface — the
|
||||
// buffer may still be attached. wl_buffer_destroy is safe whether
|
||||
// or not the compositor has released it (Wayland guarantees no
|
||||
|
|
@ -460,6 +491,22 @@ SubsurfacePresenter::~SubsurfacePresenter() {
|
|||
if (m_display) wl_display_flush(m_display);
|
||||
}
|
||||
|
||||
void SubsurfacePresenter::onFrameCallbackDone(wl_callback *cb) {
|
||||
// The single-shot wl_callback is now spent. Destroy the proxy and
|
||||
// clear our slot so the next present registers a fresh callback.
|
||||
// Guard against the rare cb-mismatch case (shouldn't happen — the
|
||||
// listener data routes to exactly this presenter and we only ever
|
||||
// have one outstanding callback — but be defensive against future
|
||||
// refactors).
|
||||
if (cb == m_frameCallback) m_frameCallback = nullptr;
|
||||
wl_callback_destroy(cb);
|
||||
// Notify the consumer (e.g. GhosttySurface) that the compositor
|
||||
// is ready for the next frame. The callback runs on the same
|
||||
// thread that pumps Wayland events (the Qt GUI thread), so it can
|
||||
// touch GUI-thread state directly.
|
||||
if (m_onFrameReady) m_onFrameReady();
|
||||
}
|
||||
|
||||
void SubsurfacePresenter::presentDmabuf(int fd, uint32_t drm_format,
|
||||
uint64_t drm_modifier, uint32_t width,
|
||||
uint32_t height, uint32_t stride,
|
||||
|
|
@ -622,6 +669,18 @@ void SubsurfacePresenter::presentDmabuf(int fd, uint32_t drm_format,
|
|||
// `damage`) uses buffer coordinates so it's resolution-correct.
|
||||
wl_surface_damage_buffer(m_childSurface, 0, 0, static_cast<int32_t>(width),
|
||||
static_cast<int32_t>(height));
|
||||
// Register a wl_surface.frame callback BEFORE the commit so the
|
||||
// compositor knows we want to be paced. Only request a new one if
|
||||
// none is outstanding — re-requesting before the prior fires would
|
||||
// leak callbacks. The done handler clears m_frameCallback, so the
|
||||
// next call here will register fresh.
|
||||
if (!m_frameCallback) {
|
||||
m_frameCallback = wl_surface_frame(m_childSurface);
|
||||
if (m_frameCallback) {
|
||||
wl_callback_add_listener(m_frameCallback, &kFrameCallbackListener,
|
||||
this);
|
||||
}
|
||||
}
|
||||
wl_surface_commit(m_childSurface);
|
||||
|
||||
wl_display_flush(m_display);
|
||||
|
|
@ -695,6 +754,18 @@ void SubsurfacePresenter::reattachCached() {
|
|||
wl_surface_damage_buffer(m_childSurface, 0, 0,
|
||||
static_cast<int32_t>(m_cachedWidth),
|
||||
static_cast<int32_t>(m_cachedHeight));
|
||||
// Register a frame callback so the consumer's pacing state machine
|
||||
// gets a "compositor is ready" event after this re-attach too —
|
||||
// otherwise a tab switch could leave m_compositorReady stuck false
|
||||
// (a stale frame callback from the pre-Hide commit may have been
|
||||
// discarded by the compositor on the NULL attach).
|
||||
if (!m_frameCallback) {
|
||||
m_frameCallback = wl_surface_frame(m_childSurface);
|
||||
if (m_frameCallback) {
|
||||
wl_callback_add_listener(m_frameCallback, &kFrameCallbackListener,
|
||||
this);
|
||||
}
|
||||
}
|
||||
wl_surface_commit(m_childSurface);
|
||||
wl_display_flush(m_display);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -22,9 +22,11 @@
|
|||
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <functional>
|
||||
#include <memory>
|
||||
|
||||
struct wl_buffer;
|
||||
struct wl_callback;
|
||||
struct wl_display;
|
||||
struct wl_subsurface;
|
||||
struct wl_surface;
|
||||
|
|
@ -129,6 +131,19 @@ public:
|
|||
// the subsurface becomes visible again.
|
||||
void hide();
|
||||
|
||||
// Register a callback fired (on the GUI thread, via Wayland event
|
||||
// queue dispatch) when the compositor signals it's ready for the
|
||||
// next frame on this subsurface. Lets the caller pace presents at
|
||||
// the compositor's refresh rate instead of unconditionally
|
||||
// committing every renderer frame.
|
||||
//
|
||||
// The callback fires AT MOST ONCE per `presentDmabuf` /
|
||||
// `reattachCached` call — the underlying `wl_surface.frame`
|
||||
// request is single-shot per commit. After the callback fires,
|
||||
// the next present's commit will register a new frame_callback.
|
||||
using OnFrameReady = std::function<void()>;
|
||||
void setOnFrameReady(OnFrameReady cb) { m_onFrameReady = std::move(cb); }
|
||||
|
||||
// Re-attach + commit the most recently cached wl_buffer, if any.
|
||||
// Called from `QEvent::Show` so a tab-switch / re-show sees the
|
||||
// last frame immediately rather than a transparent area while
|
||||
|
|
@ -146,6 +161,15 @@ public:
|
|||
static void onPreferredScale(void *data, wp_fractional_scale_v1 *,
|
||||
uint32_t scale);
|
||||
|
||||
// wl_callback::done dispatch from the file-scope listener. Public
|
||||
// for the same reason as onPreferredScale: C-style Wayland
|
||||
// listeners need a static-callable entry point and we route the
|
||||
// result back into the owning presenter via the listener's `data`
|
||||
// pointer. Destroys the callback proxy, clears m_frameCallback,
|
||||
// and invokes m_onFrameReady if set. Not part of the API for
|
||||
// other call sites.
|
||||
void onFrameCallbackDone(wl_callback *cb);
|
||||
|
||||
SubsurfacePresenter(const SubsurfacePresenter &) = delete;
|
||||
SubsurfacePresenter &operator=(const SubsurfacePresenter &) = delete;
|
||||
|
||||
|
|
@ -167,6 +191,14 @@ private:
|
|||
int m_lastX = 0;
|
||||
int m_lastY = 0;
|
||||
|
||||
// Pending wl_surface.frame callback for compositor-paced presents.
|
||||
// Null between frame_done and the next presentDmabuf commit. Non-
|
||||
// null between presentDmabuf and frame_done. Single-shot — the
|
||||
// done handler destroys it and clears the field, then invokes
|
||||
// `m_onFrameReady` if set.
|
||||
wl_callback *m_frameCallback = nullptr;
|
||||
OnFrameReady m_onFrameReady;
|
||||
|
||||
// wl_buffer cache. libghostty re-uses the same dmabuf fd across
|
||||
// frames until the next Target.deinit (i.e. until a resize), so
|
||||
// we can wrap the fd in a wl_buffer ONCE and re-attach it every
|
||||
|
|
|
|||
Loading…
Reference in New Issue