Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
78 changes: 44 additions & 34 deletions src/tmux-manager.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1153,19 +1153,23 @@ export class TmuxManager extends EventEmitter implements TerminalMultiplexer {
}

try {
const psOutput = execSync(`ps -o rss=,pcpu= -p ${session.pid} 2>/dev/null || echo "0 0"`, {
encoding: 'utf-8',
timeout: EXEC_TIMEOUT_MS,
}).trim();
const psOutput = (
await execAsync(`ps -o rss=,pcpu= -p ${session.pid} 2>/dev/null || echo "0 0"`, {
encoding: 'utf-8',
timeout: EXEC_TIMEOUT_MS,
})
).stdout.trim();

const [rss, cpu] = psOutput.split(/\s+/).map((x) => parseFloat(x) || 0);

let childCount = 0;
try {
const childOutput = execSync(`pgrep -P ${session.pid} | wc -l`, {
encoding: 'utf-8',
timeout: EXEC_TIMEOUT_MS,
}).trim();
const childOutput = (
await execAsync(`pgrep -P ${session.pid} | wc -l`, {
encoding: 'utf-8',
timeout: EXEC_TIMEOUT_MS,
})
).stdout.trim();
childCount = parseInt(childOutput, 10) || 0;
} catch {
// No children or command failed
Expand Down Expand Up @@ -1202,13 +1206,15 @@ export class TmuxManager extends EventEmitter implements TerminalMultiplexer {
// Step 1: Get descendant PIDs
const descendantMap = new Map<number, number[]>();

const pgrepOutput = execSync(
`for p in ${sessionPids.join(' ')}; do children=$(pgrep -P $p 2>/dev/null | tr '\\n' ','); echo "$p:$children"; done`,
{
encoding: 'utf-8',
timeout: EXEC_TIMEOUT_MS,
}
).trim();
const pgrepOutput = (
await execAsync(
`for p in ${sessionPids.join(' ')}; do children=$(pgrep -P $p 2>/dev/null | tr '\\n' ','); echo "$p:$children"; done`,
{
encoding: 'utf-8',
timeout: EXEC_TIMEOUT_MS,
}
)
).stdout.trim();

for (const line of pgrepOutput.split('\n')) {
const [pidStr, childrenStr] = line.split(':');
Expand All @@ -1233,10 +1239,12 @@ export class TmuxManager extends EventEmitter implements TerminalMultiplexer {
// Step 3: Single ps call
const pidArray = Array.from(allPids);
if (pidArray.length > 0) {
const psOutput = execSync(`ps -o pid=,rss=,pcpu= -p ${pidArray.join(',')} 2>/dev/null || true`, {
encoding: 'utf-8',
timeout: EXEC_TIMEOUT_MS,
}).trim();
const psOutput = (
await execAsync(`ps -o pid=,rss=,pcpu= -p ${pidArray.join(',')} 2>/dev/null || true`, {
encoding: 'utf-8',
timeout: EXEC_TIMEOUT_MS,
})
).stdout.trim();

const processStats = new Map<number, { rss: number; cpu: number }>();
for (const line of psOutput.split('\n')) {
Expand Down Expand Up @@ -1324,11 +1332,11 @@ export class TmuxManager extends EventEmitter implements TerminalMultiplexer {
clearInterval(this.mouseSyncInterval);
}

this.mouseSyncInterval = setInterval(() => {
this.mouseSyncInterval = setInterval(async () => {
if (IS_TEST_MODE) return;

for (const session of this.sessions.values()) {
const panes = this.listPanes(session.muxName);
const panes = await this.listPanes(session.muxName);
const count = panes.length;
if (count === 0) continue;

Expand All @@ -1337,12 +1345,12 @@ export class TmuxManager extends EventEmitter implements TerminalMultiplexer {

// Pane count changed — toggle mouse mode
if (count > 1) {
if (this.enableMouseMode(session.muxName)) {
if (await this.enableMouseMode(session.muxName)) {
this.lastPaneCount.set(session.muxName, count);
}
// If enableMouseMode fails, DON'T update lastPaneCount — retry next poll
} else {
if (this.disableMouseMode(session.muxName)) {
if (await this.disableMouseMode(session.muxName)) {
this.lastPaneCount.set(session.muxName, count);
}
}
Expand Down Expand Up @@ -1473,15 +1481,15 @@ export class TmuxManager extends EventEmitter implements TerminalMultiplexer {
* Allows clicking to select panes in agent team split-pane layouts.
* When mouse mode is on, tmux intercepts mouse events (slow selection, no browser copy).
*/
enableMouseMode(muxName: string): boolean {
async enableMouseMode(muxName: string): Promise<boolean> {
if (IS_TEST_MODE) return true;
if (!isValidMuxName(muxName)) {
console.error('[TmuxManager] Invalid session name in enableMouseMode:', muxName);
return false;
}

try {
execSync(`${this.tmux()} set-option -t "${muxName}" mouse on`, {
await execAsync(`${this.tmux()} set-option -t "${muxName}" mouse on`, {
encoding: 'utf-8',
timeout: EXEC_TIMEOUT_MS,
});
Expand All @@ -1497,15 +1505,15 @@ export class TmuxManager extends EventEmitter implements TerminalMultiplexer {
* Disable mouse mode for an existing tmux session.
* Restores native xterm.js text selection and browser clipboard copy.
*/
disableMouseMode(muxName: string): boolean {
async disableMouseMode(muxName: string): Promise<boolean> {
if (IS_TEST_MODE) return true;
if (!isValidMuxName(muxName)) {
console.error('[TmuxManager] Invalid session name in disableMouseMode:', muxName);
return false;
}

try {
execSync(`${this.tmux()} set-option -t "${muxName}" mouse off`, {
await execAsync(`${this.tmux()} set-option -t "${muxName}" mouse off`, {
encoding: 'utf-8',
timeout: EXEC_TIMEOUT_MS,
});
Expand All @@ -1522,9 +1530,9 @@ export class TmuxManager extends EventEmitter implements TerminalMultiplexer {
* Called by TeamWatcher when teammates spawn/despawn panes.
* Uses `tmux list-panes` for bulletproof detection — counts actual panes, not config.
*/
syncMouseMode(muxName: string): boolean {
async syncMouseMode(muxName: string): Promise<boolean> {
if (IS_TEST_MODE) return true;
const panes = this.listPanes(muxName);
const panes = await this.listPanes(muxName);
if (panes.length > 1) {
return this.enableMouseMode(muxName);
} else {
Expand All @@ -1536,18 +1544,20 @@ export class TmuxManager extends EventEmitter implements TerminalMultiplexer {
* List all panes in a tmux session.
* Returns structured info for each pane.
*/
listPanes(muxName: string): PaneInfo[] {
async listPanes(muxName: string): Promise<PaneInfo[]> {
if (IS_TEST_MODE) return [];
if (!isValidMuxName(muxName)) {
console.error('[TmuxManager] Invalid session name in listPanes:', muxName);
return [];
}

try {
const output = execSync(
`${this.tmux()} list-panes -t "${muxName}" -F '#{pane_id}:#{pane_index}:#{pane_pid}:#{pane_width}:#{pane_height}'`,
{ encoding: 'utf-8', timeout: EXEC_TIMEOUT_MS }
).trim();
const output = (
await execAsync(
`${this.tmux()} list-panes -t "${muxName}" -F '#{pane_id}:#{pane_index}:#{pane_pid}:#{pane_width}:#{pane_height}'`,
{ encoding: 'utf-8', timeout: EXEC_TIMEOUT_MS }
)
).stdout.trim();

return output
.split('\n')
Expand Down
54 changes: 54 additions & 0 deletions src/utils/event-loop-monitor.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
/**
* @fileoverview Event-loop lag monitor.
*
* Node is single-threaded: any synchronous work (e.g. a blocking `execSync`)
* freezes the whole event loop, so the HTTP server stops answering on its port
* while the process stays alive and other ports are unaffected. Such stalls
* self-heal and never restart the process, so a periodic loopback healthcheck
* misses them entirely — they leave no trace.
*
* This monitor samples how late a fixed-interval timer actually fires versus when
* it was scheduled; the excess is time the loop was blocked. When that exceeds a
* threshold it logs the measured stall, turning otherwise-invisible "port briefly
* unreachable" incidents into a timestamped, quantified log line.
*
* @module utils/event-loop-monitor
*/

export interface EventLoopMonitorHandle {
stop(): void;
}

/**
* Start sampling event-loop lag.
*
* @param sampleMs How often to sample (and the baseline interval lag is measured against).
* @param thresholdMs Only stalls at or above this many ms are logged (noise floor).
* @param log Sink for stall reports; defaults to console.warn (lands in the web log).
*/
export function startEventLoopMonitor(
sampleMs = 1000,
thresholdMs = 1000,
log: (msg: string) => void = (m) => console.warn(m)
): EventLoopMonitorHandle {
let last = performance.now();

const timer = setInterval(() => {
const now = performance.now();
// Lag = elapsed beyond the scheduled interval = time the loop was blocked.
const lag = Math.round(now - last - sampleMs);
if (lag >= thresholdMs) {
log(`[EventLoopLag] event loop blocked ~${lag}ms (at ${new Date().toISOString()})`);
}
last = now;
}, sampleMs);

// Never keep the process alive solely for this monitor.
timer.unref?.();

return {
stop() {
clearInterval(timer);
},
};
}
2 changes: 2 additions & 0 deletions src/utils/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
export { BufferAccumulator } from './buffer-accumulator.js';
export { CleanupManager } from './cleanup-manager.js';
export { Debouncer, KeyedDebouncer } from './debouncer.js';
export { startEventLoopMonitor } from './event-loop-monitor.js';
export type { EventLoopMonitorHandle } from './event-loop-monitor.js';
export { StaleExpirationMap } from './stale-expiration-map.js';
export {
ANSI_ESCAPE_PATTERN_FULL,
Expand Down
13 changes: 12 additions & 1 deletion src/web/server.ts
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,8 @@ import {
type ImageDetectedEvent,
DEFAULT_NICE_CONFIG,
} from '../types.js';
import { CleanupManager, KeyedDebouncer, StaleExpirationMap } from '../utils/index.js';
import { CleanupManager, KeyedDebouncer, StaleExpirationMap, startEventLoopMonitor } from '../utils/index.js';
import type { EventLoopMonitorHandle } from '../utils/index.js';
import { MAX_CONCURRENT_SESSIONS, MAX_SSE_CLIENTS } from '../config/map-limits.js';
import { SseEvent } from './sse-events.js';
import type { ScheduledRun } from './ports/index.js';
Expand Down Expand Up @@ -231,6 +232,7 @@ export class WebServer extends EventEmitter {
private teamWatcher: TeamWatcher = new TeamWatcher();
private _orchestratorLoop: import('../orchestrator-loop.js').OrchestratorLoop | null = null;
private _pasteImageGcStop: (() => void) | null = null;
private _eventLoopMonitor: EventLoopMonitorHandle | null = null;
private teamWatcherHandlers: {
teamCreated: (config: unknown) => void;
teamUpdated: (config: unknown) => void;
Expand Down Expand Up @@ -1551,6 +1553,10 @@ export class WebServer extends EventEmitter {
// older than 7 days from each live session's .claude-images/ hourly.
if (!this.testMode) {
this._pasteImageGcStop = startPasteImageGc({ sessions: this.sessions });
// Surface event-loop stalls (e.g. a slow synchronous tmux/ps call) so the
// intermittent ":3000 briefly unreachable, process never restarts" class of
// incident leaves a quantified log line instead of vanishing silently.
this._eventLoopMonitor = startEventLoopMonitor();
}

await this.app.listen({ port: this.port, host: '0.0.0.0' });
Expand Down Expand Up @@ -1911,6 +1917,11 @@ export class WebServer extends EventEmitter {
this._pasteImageGcStop = null;
}

if (this._eventLoopMonitor) {
this._eventLoopMonitor.stop();
this._eventLoopMonitor = null;
}

// Dispose all managed timers (intervals + resettable timeouts)
this.cleanup.dispose();

Expand Down