diff --git a/qt/src/wayland/SubsurfacePresenter.cpp b/qt/src/wayland/SubsurfacePresenter.cpp index 792be3c4b..b5703dd16 100644 --- a/qt/src/wayland/SubsurfacePresenter.cpp +++ b/qt/src/wayland/SubsurfacePresenter.cpp @@ -6,6 +6,7 @@ #include #include #include +#include // ::fstat — wl_buffer cache identity via st_ino #include #include @@ -578,15 +579,21 @@ void SubsurfacePresenter::presentDmabuf(int fd, uint32_t drm_format, } // Wrap libghostty's borrowed fd in a wl_buffer. Cached across - // frames: libghostty re-uses the same dmabuf fd until the next - // Target.deinit (a resize), so the shape inputs below stay stable - // for hundreds-to-thousands of consecutive frames at an animated- - // shader frame rate. Pre-cache, every present round-tripped - // `create_immed` to the compositor (Wayland sync call + compositor- - // side dmabuf import) and destroyed the buffer on release — ~half - // the GUI-thread CPU at 125 FPS. + // frames by (kernel inode, shape) — see m_cachedInode in the + // header for the full rationale. fstat the dmabuf fd to get the + // anon_inode that uniquely identifies the dma-buf object; it's + // stable across the dup that GhosttySurface did before parking, + // and changes only when libghostty allocates a new Target. + // fstat failure (rare; would indicate a closed fd, which we + // already check above via `fd < 0`) falls through to cache miss + // → create_immed will likely fail too, but the error path there + // already logs cleanly. + struct stat st; + unsigned long inode = 0; + if (::fstat(fd, &st) == 0) inode = static_cast(st.st_ino); const bool cache_hit = m_cachedBuffer != nullptr && - m_cachedFd == fd && + inode != 0 && + m_cachedInode == inode && m_cachedWidth == width && m_cachedHeight == height && m_cachedStride == stride && @@ -604,7 +611,7 @@ void SubsurfacePresenter::presentDmabuf(int fd, uint32_t drm_format, if (m_cachedBuffer) { wl_buffer_destroy(m_cachedBuffer); m_cachedBuffer = nullptr; - m_cachedFd = -1; + m_cachedInode = 0; } zwp_linux_buffer_params_v1 *params = zwp_linux_dmabuf_v1_create_params(m_dmabuf); @@ -639,7 +646,7 @@ void SubsurfacePresenter::presentDmabuf(int fd, uint32_t drm_format, // safe). wl_buffer_add_listener(buffer, &kBufferListener, nullptr); m_cachedBuffer = buffer; - m_cachedFd = fd; + m_cachedInode = inode; m_cachedWidth = width; m_cachedHeight = height; m_cachedStride = stride; diff --git a/qt/src/wayland/SubsurfacePresenter.h b/qt/src/wayland/SubsurfacePresenter.h index 60658425b..086834023 100644 --- a/qt/src/wayland/SubsurfacePresenter.h +++ b/qt/src/wayland/SubsurfacePresenter.h @@ -199,16 +199,28 @@ private: wl_callback *m_frameCallback = nullptr; OnFrameReady m_onFrameReady; - // wl_buffer cache. libghostty re-uses the same dmabuf fd across - // frames until the next Target.deinit (i.e. until a resize), so - // we can wrap the fd in a wl_buffer ONCE and re-attach it every - // frame instead of round-tripping `create_immed` per present. - // create_immed costs a Wayland round-trip + compositor-side - // dmabuf import; at 125 FPS (animated post shader) with multiple - // panes this was ~half of the GUI-thread CPU at idle. Invalidate - // the cache when any of the dmabuf-shape inputs change. + // wl_buffer cache keyed by dma-buf identity (kernel inode of the + // anon_inode backing the dma-buf, which is unique per Target + // regardless of fd-number reuse) plus the layout-relevant shape. + // libghostty re-uses the same dmabuf across frames until the + // next Target.deinit (resize); cache hits skip the create_immed + // round-trip + compositor-side dmabuf import that dominated + // GUI-thread CPU at 125 FPS. + // + // We can't key on the caller's fd value because GhosttySurface + // now dups the fd on the renderer thread (to outlive libghostty's + // close — see 22713b0d3) so the value is fresh per frame. Inode + // identity is stable across our dup AND across libghostty's + // close → reopen cycles, so cache invalidation matches Target + // identity exactly: same Target → same inode → cache hit; new + // Target → new inode → cache miss → recreate. + // + // Cache only stores the wl_buffer; the compositor SCM_RIGHTS- + // dup'd the fd into its own address space at create_immed time, + // so the cached wl_buffer doesn't need our fd to outlive the + // call. The caller owns + closes its own dup. wl_buffer *m_cachedBuffer = nullptr; - int m_cachedFd = -1; + unsigned long m_cachedInode = 0; // 0 = empty cache (anon_inode ino > 0) uint32_t m_cachedWidth = 0; uint32_t m_cachedHeight = 0; uint32_t m_cachedStride = 0;