Skip to content

Commit b7cb275

Browse files
authored
fix: stop tearing down non-TTY processes on SSH session end (#18673)
(possibly temporary) fix for #18519 Matches OpenSSH for non-tty sessions, where we don't actively terminate the process. Adds explicit tracking to the SSH server for these processes so that if we are shutting down we terminate them: this ensures that we can shut down quickly to allow shutdown scripts to run. It also ensures our tests don't leak system resources.
1 parent 9ccaf86 commit b7cb275

File tree

3 files changed

+54
-18
lines changed

3 files changed

+54
-18
lines changed

agent/agentssh/agentssh.go

Lines changed: 41 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,7 @@ type Server struct {
129129
listeners map[net.Listener]struct{}
130130
conns map[net.Conn]struct{}
131131
sessions map[ssh.Session]struct{}
132+
processes map[*os.Process]struct{}
132133
closing chan struct{}
133134
// Wait for goroutines to exit, waited without
134135
// a lock on mu but protected by closing.
@@ -188,6 +189,7 @@ func NewServer(ctx context.Context, logger slog.Logger, prometheusRegistry *prom
188189
fs: fs,
189190
conns: make(map[net.Conn]struct{}),
190191
sessions: make(map[ssh.Session]struct{}),
192+
processes: make(map[*os.Process]struct{}),
191193
logger: logger,
192194

193195
config: config,
@@ -606,7 +608,10 @@ func (s *Server) startNonPTYSession(logger slog.Logger, session ssh.Session, mag
606608
// otherwise context cancellation will not propagate properly
607609
// and SSH server close may be delayed.
608610
cmd.SysProcAttr = cmdSysProcAttr()
609-
cmd.Cancel = cmdCancel(session.Context(), logger, cmd)
611+
612+
// to match OpenSSH, we don't actually tear a non-TTY command down, even if the session ends.
613+
// c.f. https://github.com/coder/coder/issues/18519#issuecomment-3019118271
614+
cmd.Cancel = nil
610615

611616
cmd.Stdout = session
612617
cmd.Stderr = session.Stderr()
@@ -629,6 +634,16 @@ func (s *Server) startNonPTYSession(logger slog.Logger, session ssh.Session, mag
629634
s.metrics.sessionErrors.WithLabelValues(magicTypeLabel, "no", "start_command").Add(1)
630635
return xerrors.Errorf("start: %w", err)
631636
}
637+
638+
// Since we don't cancel the process when the session stops, we still need to tear it down if we are closing. So
639+
// track it here.
640+
if !s.trackProcess(cmd.Process, true) {
641+
// must be closing
642+
err = cmdCancel(logger, cmd.Process)
643+
return xerrors.Errorf("failed to track process: %w", err)
644+
}
645+
defer s.trackProcess(cmd.Process, false)
646+
632647
sigs := make(chan ssh.Signal, 1)
633648
session.Signals(sigs)
634649
defer func() {
@@ -1089,6 +1104,27 @@ func (s *Server) trackSession(ss ssh.Session, add bool) (ok bool) {
10891104
return true
10901105
}
10911106

1107+
// trackCommand registers the process with the server. If the server is
1108+
// closing, the process is not registered and should be closed.
1109+
//
1110+
//nolint:revive
1111+
func (s *Server) trackProcess(p *os.Process, add bool) (ok bool) {
1112+
s.mu.Lock()
1113+
defer s.mu.Unlock()
1114+
if add {
1115+
if s.closing != nil {
1116+
// Server closed.
1117+
return false
1118+
}
1119+
s.wg.Add(1)
1120+
s.processes[p] = struct{}{}
1121+
return true
1122+
}
1123+
s.wg.Done()
1124+
delete(s.processes, p)
1125+
return true
1126+
}
1127+
10921128
// Close the server and all active connections. Server can be re-used
10931129
// after Close is done.
10941130
func (s *Server) Close() error {
@@ -1128,6 +1164,10 @@ func (s *Server) Close() error {
11281164
_ = c.Close()
11291165
}
11301166

1167+
for p := range s.processes {
1168+
_ = cmdCancel(s.logger, p)
1169+
}
1170+
11311171
s.logger.Debug(ctx, "closing SSH server")
11321172
err := s.srv.Close()
11331173

agent/agentssh/exec_other.go

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ package agentssh
44

55
import (
66
"context"
7-
"os/exec"
7+
"os"
88
"syscall"
99

1010
"cdr.dev/slog"
@@ -16,9 +16,7 @@ func cmdSysProcAttr() *syscall.SysProcAttr {
1616
}
1717
}
1818

19-
func cmdCancel(ctx context.Context, logger slog.Logger, cmd *exec.Cmd) func() error {
20-
return func() error {
21-
logger.Debug(ctx, "cmdCancel: sending SIGHUP to process and children", slog.F("pid", cmd.Process.Pid))
22-
return syscall.Kill(-cmd.Process.Pid, syscall.SIGHUP)
23-
}
19+
func cmdCancel(logger slog.Logger, p *os.Process) error {
20+
logger.Debug(context.Background(), "cmdCancel: sending SIGHUP to process and children", slog.F("pid", p.Pid))
21+
return syscall.Kill(-p.Pid, syscall.SIGHUP)
2422
}

agent/agentssh/exec_windows.go

Lines changed: 9 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ package agentssh
22

33
import (
44
"context"
5-
"os/exec"
5+
"os"
66
"syscall"
77

88
"cdr.dev/slog"
@@ -12,14 +12,12 @@ func cmdSysProcAttr() *syscall.SysProcAttr {
1212
return &syscall.SysProcAttr{}
1313
}
1414

15-
func cmdCancel(ctx context.Context, logger slog.Logger, cmd *exec.Cmd) func() error {
16-
return func() error {
17-
logger.Debug(ctx, "cmdCancel: killing process", slog.F("pid", cmd.Process.Pid))
18-
// Windows doesn't support sending signals to process groups, so we
19-
// have to kill the process directly. In the future, we may want to
20-
// implement a more sophisticated solution for process groups on
21-
// Windows, but for now, this is a simple way to ensure that the
22-
// process is terminated when the context is cancelled.
23-
return cmd.Process.Kill()
24-
}
15+
func cmdCancel(logger slog.Logger, p *os.Process) error {
16+
logger.Debug(context.Background(), "cmdCancel: killing process", slog.F("pid", p.Pid))
17+
// Windows doesn't support sending signals to process groups, so we
18+
// have to kill the process directly. In the future, we may want to
19+
// implement a more sophisticated solution for process groups on
20+
// Windows, but for now, this is a simple way to ensure that the
21+
// process is terminated when the context is cancelled.
22+
return p.Kill()
2523
}

0 commit comments

Comments
 (0)