diff --git a/include/ghostty.h b/include/ghostty.h index fbfe3ee2c..9332def7e 100644 --- a/include/ghostty.h +++ b/include/ghostty.h @@ -1094,6 +1094,7 @@ GHOSTTY_API void ghostty_app_keyboard_changed(ghostty_app_t); GHOSTTY_API void ghostty_app_open_config(ghostty_app_t); GHOSTTY_API void ghostty_app_update_config(ghostty_app_t, ghostty_config_t); GHOSTTY_API bool ghostty_app_needs_confirm_quit(ghostty_app_t); +GHOSTTY_API void ghostty_app_quit(ghostty_app_t, uint64_t); GHOSTTY_API bool ghostty_app_has_global_keybinds(ghostty_app_t); GHOSTTY_API void ghostty_app_set_color_scheme(ghostty_app_t, ghostty_color_scheme_e); diff --git a/macos/Sources/App/macOS/AppDelegate.swift b/macos/Sources/App/macOS/AppDelegate.swift index 9700a31ae..60542d70b 100644 --- a/macos/Sources/App/macOS/AppDelegate.swift +++ b/macos/Sources/App/macOS/AppDelegate.swift @@ -362,14 +362,31 @@ class AppDelegate: NSObject, return derivedConfig.shouldQuitAfterLastWindowClosed } + /// Initiates graceful application termination by giving every child + /// process a chance to exit before the app terminates. + /// + /// The wait happens inside libghostty: it sends SIGHUP to all processes + /// in parallel, then waits up to `timeout` (total) for them to exit. + /// This blocks the main thread, which is acceptable here because we've + /// already committed to terminating. + private func terminateGracefully( + timeout: TimeInterval = 0.5 + ) -> NSApplication.TerminateReply { + if let app = ghostty.app { + Ghostty.logger.debug("waiting for child processes to exit") + ghostty_app_quit(app, UInt64(max(0, timeout * 1000))) + } + return .terminateNow + } + func applicationShouldTerminate(_ sender: NSApplication) -> NSApplication.TerminateReply { let windows = NSApplication.shared.windows - if windows.isEmpty { return .terminateNow } + if windows.isEmpty { return terminateGracefully() } // If we've already accepted to install an update, then we don't need to // confirm quit. The user is already expecting the update to happen. if updateController.isInstalling { - return .terminateNow + return terminateGracefully() } // This probably isn't fully safe. The isEmpty check above is aspirational, it doesn't @@ -380,7 +397,7 @@ class AppDelegate: NSObject, // here because I don't want to remove it in a patch release cycle but we should // target removing it soon. if (windows.allSatisfy { !$0.isVisible }) { - return .terminateNow + return terminateGracefully() } // If the user is shutting down, restarting, or logging out, we don't confirm quit. @@ -393,8 +410,7 @@ class AppDelegate: NSObject, if let why = event.attributeDescriptor(forKeyword: keyword) { switch why.typeCodeValue { case kAEShutDown, kAERestart, kAEReallyLogOut: - return .terminateNow - + return terminateGracefully() default: break } @@ -402,7 +418,7 @@ class AppDelegate: NSObject, } // If our app says we don't need to confirm, we can exit now. - if !ghostty.needsConfirmQuit { return .terminateNow } + if !ghostty.needsConfirmQuit { return terminateGracefully() } return terminate() } @@ -1301,7 +1317,7 @@ extension AppDelegate { .filter { !$0.windowCanBeClosedWithoutConfirmation() } guard !controllersNeedConfirmation.isEmpty else { - return .terminateNow + return terminateGracefully() } if controllersNeedConfirmation.count == 1 { @@ -1313,6 +1329,7 @@ extension AppDelegate { ) if [.OK, .alertFirstButtonReturn].contains(response) { + await MainActor.run { _ = self.terminateGracefully() } await NSApp.reply(toApplicationShouldTerminate: true) } else { await NSApp.reply(toApplicationShouldTerminate: false) @@ -1334,7 +1351,7 @@ extension AppDelegate { reviewWindows(controllersNeedConfirmation) return .terminateLater case .alertSecondButtonReturn: - return .terminateNow + return terminateGracefully() default: return .terminateCancel } @@ -1360,6 +1377,7 @@ extension AppDelegate { return } } + await MainActor.run { _ = self.terminateGracefully() } await NSApp.reply(toApplicationShouldTerminate: true) } } diff --git a/src/App.zig b/src/App.zig index 93ee7dea1..3200baa96 100644 --- a/src/App.zig +++ b/src/App.zig @@ -102,6 +102,40 @@ pub fn init( }; } +/// Send SIGHUP to every surface's child process and wait up to timeout_ms +/// (total, across all surfaces) for them to exit. Intended to be called +/// once during app shutdown so that children get a chance to clean up +/// (e.g., shells writing history) before the apprt tears down each +/// surface. Surfaces and resources are not freed here; that happens via +/// the normal apprt deinit path afterwards. +/// +/// We drain the mailbox during the wait so that child_exited messages +/// posted by IO threads are observed; otherwise we'd block the same +/// main thread that the message pump runs on. +pub fn quit(self: *App, rt_app: *apprt.App, timeout_ms: u64) void { + // Hang up all children in parallel so they shut down concurrently + // rather than one-at-a-time during sequential surface deinits. + for (self.surfaces.items) |surface| surface.core().hangupProcess(); + + const deadline: i64 = std.time.milliTimestamp() +| @as(i64, @intCast(timeout_ms)); + while (std.time.milliTimestamp() < deadline) { + self.drainMailbox(rt_app) catch |err| + log.warn("error draining mailbox during quit err={}", .{err}); + + var all_exited = true; + for (self.surfaces.items) |surface| { + if (!surface.core().child_exited) { + all_exited = false; + break; + } + } + if (all_exited) return; + std.Thread.sleep(10 * std.time.ns_per_ms); + } + + log.warn("not all child processes exited within {}ms", .{timeout_ms}); +} + pub fn deinit(self: *App) void { // Clean up all our surfaces for (self.surfaces.items) |surface| surface.deinit(); diff --git a/src/Surface.zig b/src/Surface.zig index 99c740c89..e8b16109b 100644 --- a/src/Surface.zig +++ b/src/Surface.zig @@ -843,6 +843,17 @@ pub fn close(self: *Surface) void { self.rt_surface.close(self.needsConfirmQuit()); } +/// Send SIGHUP to the child process without waiting for it to exit. +/// child_exited will be set once the child process has stopped. +/// +/// Used during app shutdown to hang up all children in parallel before +/// any individual surface starts its (bounded) wait in deinit. +pub fn hangupProcess(self: *Surface) void { + switch (self.io.backend) { + .exec => |*exec| exec.subprocess.hangup(), + } +} + /// Returns a mailbox that can be used to send messages to this surface. inline fn surfaceMailbox(self: *Surface) Mailbox { return .{ diff --git a/src/apprt/embedded.zig b/src/apprt/embedded.zig index 7310159cc..59d0fddb8 100644 --- a/src/apprt/embedded.zig +++ b/src/apprt/embedded.zig @@ -1442,6 +1442,15 @@ pub const CAPI = struct { core_app.destroy(); } + /// Begin a graceful application shutdown by sending SIGHUP to every + /// surface's child process and waiting up to timeout_ms (total, across + /// all children) for them to exit. Intended to be called once during + /// app termination so children can clean up (e.g., shells writing + /// history) before the apprt tears down each surface. + export fn ghostty_app_quit(v: *App, timeout_ms: u64) void { + v.core_app.quit(v, timeout_ms); + } + /// Update the focused state of the app. export fn ghostty_app_set_focus( app: *App, diff --git a/src/termio/Exec.zig b/src/termio/Exec.zig index 87d47807c..26f58570a 100644 --- a/src/termio/Exec.zig +++ b/src/termio/Exec.zig @@ -34,6 +34,13 @@ const log = std.log.scoped(.io_exec); /// The termios poll rate in milliseconds. const TERMIOS_POLL_MS = 200; +/// Default deadline for waiting on the child to exit after SIGHUP, in +/// milliseconds. SIGHUP delivery is effectively instant and well-behaved +/// shells exit within microseconds; the deadline only matters when a child +/// is hung (e.g., in trapped SIGHUP and refusing to exit) and we don't +/// want that to stall application shutdown. +const stop_deadline_ms: u64 = 500; + /// If we build with flatpak support then we have to keep track of /// a potential execution on the host. const FlatpakHostCommand = if (!build_config.flatpak) struct { @@ -1053,7 +1060,7 @@ const Subprocess = struct { else => return err, } }; - errdefer killCommand(&cmd) catch |err| { + errdefer killCommand(&cmd, stop_deadline_ms) catch |err| { log.warn("error killing command during cleanup err={}", .{err}); }; log.info("started subcommand path={s} pid={?}", .{ self.args[0], cmd.pid }); @@ -1086,17 +1093,51 @@ const Subprocess = struct { self.process = null; } - /// Stop the subprocess. This is safe to call anytime. This will wait - /// for the subprocess to register that it has been signalled, but not - /// for it to terminate, so it will not block. + /// Send SIGHUP to the subprocess without waiting for it to exit. + /// + /// Calling this before stop() lets multiple subprocesses begin shutting + /// down in parallel rather than serially: hangup all, then deinit each + /// (which only has to wait for exit, not also for signal delivery). + /// + /// This is safe to call multiple times and will do nothing if the + /// process has already been stopped. + pub fn hangup(self: *Subprocess) void { + switch (self.process orelse return) { + .fork_exec => |*cmd| { + if (cmd.pid) |pid| switch (builtin.os.tag) { + .windows => { + if (windows.kernel32.TerminateProcess(pid, 0) == 0) { + log.err( + "error terminating command: {}", + .{windows.kernel32.GetLastError()}, + ); + } + }, + + else => _ = hangupPid(pid) catch |err| + log.err("error sending SIGHUP to command: {}", .{err}), + }; + }, + + .flatpak => |*cmd| if (comptime build_config.flatpak) { + killCommandFlatpak(cmd) catch |err| + log.err("error sending SIGHUP to command: {}", .{err}); + }, + } + } + + /// Stop the subprocess. This is safe to call anytime. Sends SIGHUP and + /// waits up to stop_deadline_ms for the child to exit before returning. + /// Calling hangup() first is harmless and lets the child start exiting + /// sooner, which shortens this wait in practice. /// This does not close the pty. pub fn stop(self: *Subprocess) void { switch (self.process orelse return) { .fork_exec => |*cmd| { // Note: this will also wait for the command to exit, so // DO NOT call cmd.wait - killCommand(cmd) catch |err| - log.err("error sending SIGHUP to command, may hang: {}", .{err}); + killCommand(cmd, stop_deadline_ms) catch |err| + log.err("error stopping command: {}", .{err}); }, .flatpak => |*cmd| if (comptime build_config.flatpak) { @@ -1134,9 +1175,10 @@ const Subprocess = struct { } /// Kill the underlying subprocess. This sends a SIGHUP to the child - /// process. This also waits for the command to exit and will return the - /// exit code. - fn killCommand(command: *Command) !void { + /// process and then waits up to deadline_ms for it to exit. Returns + /// regardless of whether the child has actually exited once the + /// deadline has passed; a stuck child shouldn't stall teardown. + fn killCommand(command: *Command, deadline_ms: u64) !void { if (command.pid) |pid| { switch (builtin.os.tag) { .windows => { @@ -1147,36 +1189,47 @@ const Subprocess = struct { _ = try command.wait(false); }, - else => try killPid(pid), + else => try killPid(pid, deadline_ms), } } } - fn killPid(pid: c.pid_t) !void { - const pgid = getpgid(pid) orelse return; + /// Send SIGHUP to a process group. Returns true on success, false if + /// the process group could not be found (already exited). + fn hangupPid(pid: c.pid_t) !bool { + const pgid = getpgid(pid) orelse return false; + + switch (posix.errno(c.killpg(pgid, c.SIGHUP))) { + .SUCCESS => log.debug("process group killed pgid={}", .{pgid}), + else => |err| { + // killpg returns EPERM on Darwin even when delivery succeeds + // (see https://openradar.appspot.com/radar?id=4970011239673856). + if ((comptime builtin.target.os.tag.isDarwin()) and + err == .PERM) + { + log.debug("killpg failed with EPERM, expected on Darwin and ignoring", .{}); + return true; + } + + log.warn("error killing process group pgid={} err={}", .{ pgid, err }); + return error.KillFailed; + }, + } + return true; + } + + fn killPid(pid: c.pid_t, deadline_ms: u64) !void { + const deadline: i64 = std.time.milliTimestamp() +| @as(i64, @intCast(deadline_ms)); // It is possible to send a killpg between the time that // our child process calls setsid but before or simultaneous // to calling execve. In this case, the direct child dies // but grandchildren survive. To work around this, we loop // and repeatedly kill the process group until all - // descendents are well and truly dead. We will not rest - // until the entire family tree is obliterated. + // descendents are well and truly dead, or until our deadline + // expires. while (true) { - switch (posix.errno(c.killpg(pgid, c.SIGHUP))) { - .SUCCESS => log.debug("process group killed pgid={}", .{pgid}), - else => |err| killpg: { - if ((comptime builtin.target.os.tag.isDarwin()) and - err == .PERM) - { - log.debug("killpg failed with EPERM, expected on Darwin and ignoring", .{}); - break :killpg; - } - - log.warn("error killing process group pgid={} err={}", .{ pgid, err }); - return error.KillFailed; - }, - } + if (!try hangupPid(pid)) return; // See Command.zig wait for why we specify WNOHANG. // The gist is that it lets us detect when children @@ -1185,6 +1238,12 @@ const Subprocess = struct { const res = posix.waitpid(pid, std.c.W.NOHANG); log.debug("waitpid result={}", .{res.pid}); if (res.pid != 0) break; + + if (std.time.milliTimestamp() >= deadline) { + log.warn("child pid={} did not exit within {}ms", .{ pid, deadline_ms }); + break; + } + std.Thread.sleep(10 * std.time.ns_per_ms); } }