- commit
- 12a19ee
- parent
- 35f56e6
- author
- Ian Tay
- date
- 2026-03-08 12:54:06 -0400 EDT
fix: use platform-correct O_NONBLOCK for fcntl; fix PTY write and double-close The hardcoded value 0o4000 is Linux-specific; on macOS O_NONBLOCK is 0x4. posix.SOCK.NONBLOCK is for socket()/accept4(), not fcntl(F_SETFL) — it only worked on Linux by coincidence where both constants share the same value. On macOS, non-blocking mode was never actually set on the PTY, client socket, or stdin. Setting O_NONBLOCK correctly exposes two latent issues, also fixed here: - PTY writes in handleInput/handleRun did `_ = try posix.write()`, discarding short-write counts and propagating WouldBlock as an error that crashed the daemon. Added ptyWriteAll() that polls on WouldBlock and retries short writes — same blocking semantics macOS had implicitly. - ensureSession's errdefer + defer both closed server_sock_fd when daemonLoop errored, causing EBADF (posix.close treats this as unreachable → panic in safe builds). Restructured so spawnPty failure is handled by an explicit catch; the defer is the sole owner after. Additionally, O_NONBLOCK is set on the open file description (shared with the parent shell), so stdin's original flags must be restored on exit to avoid leaving the parent shell's stdin in non-blocking mode.
1 files changed,
+51,
-16
+51,
-16
1@@ -33,6 +33,9 @@ fn zmxLogFn(
2 var sigwinch_received: std.atomic.Value(bool) = std.atomic.Value(bool).init(false);
3 var sigterm_received: std.atomic.Value(bool) = std.atomic.Value(bool).init(false);
4
5+// https://github.com/ziglang/zig/blob/738d2be9d6b6ef3ff3559130c05159ef53336224/lib/std/posix.zig#L3505
6+const O_NONBLOCK: usize = 1 << @bitOffsetOf(posix.O, "NONBLOCK");
7+
8 pub fn main() !void {
9 // use c_allocator to avoid "reached unreachable code" panic in DebugAllocator when forking
10 const alloc = std.heap.c_allocator;
11@@ -334,7 +337,7 @@ const Daemon = struct {
12
13 // make pty non-blocking
14 const flags = try posix.fcntl(master_fd, posix.F.GETFL, 0);
15- _ = try posix.fcntl(master_fd, posix.F.SETFL, flags | @as(u32, 0o4000));
16+ _ = try posix.fcntl(master_fd, posix.F.SETFL, flags | O_NONBLOCK);
17 return master_fd;
18 }
19
20@@ -372,12 +375,19 @@ const Daemon = struct {
21 defer self.alloc.free(session_log_path);
22 try log_system.init(self.alloc, session_log_path);
23
24- errdefer {
25+ // If spawnPty fails, clean up here. Once it succeeds,
26+ // the inner block's defer takes ownership of cleanup to
27+ // avoid double-closing server_sock_fd on daemonLoop error.
28+ const pty_fd = self.spawnPty() catch |err| {
29 posix.close(server_sock_fd);
30 dir.deleteFile(self.session_name) catch {};
31- }
32- const pty_fd = try self.spawnPty();
33+ return err;
34+ };
35+
36 defer {
37+ self.handleKill();
38+ self.deinit();
39+ _ = posix.waitpid(self.pid, 0);
40 posix.close(pty_fd);
41 posix.close(server_sock_fd);
42 std.log.info("deleting socket file session_name={s}", .{self.session_name});
43@@ -385,10 +395,8 @@ const Daemon = struct {
44 std.log.warn("failed to delete socket file err={s}", .{@errorName(err)});
45 };
46 }
47+
48 try daemonLoop(self, server_sock_fd, pty_fd);
49- self.handleKill();
50- _ = posix.waitpid(self.pid, 0);
51- self.deinit();
52 return .{ .created = true, .is_daemon = true };
53 }
54 posix.close(server_sock_fd);
55@@ -399,10 +407,33 @@ const Daemon = struct {
56 return .{ .created = false, .is_daemon = false };
57 }
58
59- pub fn handleInput(self: *Daemon, pty_fd: i32, payload: []const u8) !void {
60+ /// Best-effort write to the (non-blocking) PTY fd. Retries short writes
61+ /// until complete, but on WouldBlock (kernel buffer full) gives up and
62+ /// drops the remainder — the daemon is single-threaded, so blocking here
63+ /// to wait for POLLOUT would deadlock against a shell that's itself
64+ /// blocked writing echo to a full PTY output buffer that we're not
65+ /// draining. Dropping is the same trade-off the old code made implicitly
66+ /// (short writes were silently truncated), just without the crash.
67+ fn ptyWrite(pty_fd: i32, data: []const u8) void {
68+ var remaining = data;
69+ while (remaining.len > 0) {
70+ const n = posix.write(pty_fd, remaining) catch |err| {
71+ if (err == error.WouldBlock) {
72+ std.log.warn("pty write dropped {d}/{d} bytes (buffer full)", .{ remaining.len, data.len });
73+ } else {
74+ std.log.warn("pty write failed, {d} bytes lost: {s}", .{ remaining.len, @errorName(err) });
75+ }
76+ return;
77+ };
78+ if (n == 0) return;
79+ remaining = remaining[n..];
80+ }
81+ }
82+
83+ pub fn handleInput(self: *Daemon, pty_fd: i32, payload: []const u8) void {
84 _ = self;
85 if (payload.len > 0) {
86- _ = try posix.write(pty_fd, payload);
87+ ptyWrite(pty_fd, payload);
88 }
89 }
90
91@@ -575,7 +606,7 @@ const Daemon = struct {
92 self.is_task_mode = true;
93
94 if (payload.len > 0) {
95- _ = try posix.write(pty_fd, payload);
96+ ptyWrite(pty_fd, payload);
97 }
98 try ipc.appendMessage(self.alloc, &client.write_buf, .Ack, "");
99 client.has_pending_output = true;
100@@ -1071,8 +1102,9 @@ fn clientLoop(client_sock_fd: i32) !void {
101 setupSigwinchHandler();
102
103 // Make socket non-blocking to avoid blocking on writes
104- const sock_flags = try posix.fcntl(client_sock_fd, posix.F.GETFL, 0);
105- _ = try posix.fcntl(client_sock_fd, posix.F.SETFL, sock_flags | posix.SOCK.NONBLOCK);
106+ var sock_flags = try posix.fcntl(client_sock_fd, posix.F.GETFL, 0);
107+ sock_flags |= O_NONBLOCK;
108+ _ = try posix.fcntl(client_sock_fd, posix.F.SETFL, sock_flags);
109
110 // Buffer for outgoing socket writes
111 var sock_write_buf = try std.ArrayList(u8).initCapacity(alloc, 4096);
112@@ -1093,9 +1125,12 @@ fn clientLoop(client_sock_fd: i32) !void {
113
114 const stdin_fd = posix.STDIN_FILENO;
115
116- // Make stdin non-blocking
117- const flags = try posix.fcntl(stdin_fd, posix.F.GETFL, 0);
118- _ = try posix.fcntl(stdin_fd, posix.F.SETFL, flags | posix.SOCK.NONBLOCK);
119+ // Make stdin non-blocking. O_NONBLOCK is set on the open file description,
120+ // which is shared with the parent shell; restore on exit to avoid
121+ // corrupting the parent's stdin.
122+ const stdin_orig_flags = try posix.fcntl(stdin_fd, posix.F.GETFL, 0);
123+ _ = try posix.fcntl(stdin_fd, posix.F.SETFL, stdin_orig_flags | O_NONBLOCK);
124+ defer _ = posix.fcntl(stdin_fd, posix.F.SETFL, stdin_orig_flags) catch {};
125
126 while (true) {
127 // Check for pending SIGWINCH
128@@ -1369,7 +1404,7 @@ fn daemonLoop(daemon: *Daemon, server_sock_fd: i32, pty_fd: i32) !void {
129
130 while (client.read_buf.next()) |msg| {
131 switch (msg.header.tag) {
132- .Input => try daemon.handleInput(pty_fd, msg.payload),
133+ .Input => daemon.handleInput(pty_fd, msg.payload),
134 .Init => try daemon.handleInit(client, pty_fd, &term, msg.payload),
135 .Resize => try daemon.handleResize(pty_fd, &term, msg.payload),
136 .Detach => {