diff --git a/internal/aiagents/hook/runtime.go b/internal/aiagents/hook/runtime.go index 57ce2b0..3c07995 100644 --- a/internal/aiagents/hook/runtime.go +++ b/internal/aiagents/hook/runtime.go @@ -27,7 +27,6 @@ import ( "github.com/step-security/dev-machine-guard/internal/aiagents/identity" "github.com/step-security/dev-machine-guard/internal/aiagents/ingest" "github.com/step-security/dev-machine-guard/internal/aiagents/redact" - "github.com/step-security/dev-machine-guard/internal/aiagents/state" "github.com/step-security/dev-machine-guard/internal/executor" ) @@ -116,16 +115,6 @@ func (rt *Runtime) Run(parent context.Context, hookType event.HookEvent) error { var ev *event.Event defer func() { rt.emitDecidedResponse(ev, decision) }() - // Server-driven kill switch. The reconciler writes ~/.stepsecurity/ - // hooks-state.json on every telemetry tick; a UI flip propagates - // to disabled here in O(1) microseconds and short-circuits the - // hot path before any enrichment, identity probe, or upload runs. - // Missing/corrupt cache returns Default()=enabled so first-run - // after install continues to work. - if cur, _ := state.Read(); !cur.Hooks.Enabled { - return nil - } - cfg, _ := ingest.Snapshot() id := identity.Resolve(ctx, rt.Exec, cfg.CustomerID) upload := rt.resolveUpload() diff --git a/internal/aiagents/hook/runtime_test.go b/internal/aiagents/hook/runtime_test.go index 449e6f6..0e240bf 100644 --- a/internal/aiagents/hook/runtime_test.go +++ b/internal/aiagents/hook/runtime_test.go @@ -6,7 +6,6 @@ import ( "encoding/json" "errors" "io" - "path/filepath" "strings" "sync" "testing" @@ -14,7 +13,6 @@ import ( cc "github.com/step-security/dev-machine-guard/internal/aiagents/adapter/claudecode" "github.com/step-security/dev-machine-guard/internal/aiagents/event" - "github.com/step-security/dev-machine-guard/internal/aiagents/state" "github.com/step-security/dev-machine-guard/internal/executor" ) @@ -422,48 +420,6 @@ func TestRunUploadFailureFailsOpen(t *testing.T) { // When no UploadEvent is wired (any runtime without enterprise config), // the runtime must still complete — just with no upload attempt. -// withDisabledStateCache writes a disabled state file and restores -// the override on cleanup. Returns the path for assertions. -func withDisabledStateCache(t *testing.T) { - t.Helper() - dir := t.TempDir() - path := dir + string(filepath.Separator) + state.CacheFilename - restore := state.SetCachePathForTest(path) - t.Cleanup(restore) - s := state.Default() - s.Hooks.Enabled = false - s.Source = state.SourcePoll - if err := state.Write(s); err != nil { - t.Fatalf("seed disabled cache: %v", err) - } -} - -func TestRunHonorsDisabledStateCache(t *testing.T) { - withDisabledStateCache(t) - - stdin := strings.NewReader(`{ - "session_id":"abc","cwd":"/tmp","tool_name":"Bash", - "tool_input":{"command":"npm install lodash","cwd":"/tmp"} - }`) - var stdout, stderr bytes.Buffer - rt, cap := newRuntime(t, stdin, &stdout, &stderr) - - if err := rt.Run(context.Background(), event.HookPreToolUse); err != nil { - t.Fatalf("Run: %v", err) - } - // Allow response still emitted (fail-open contract). - if !strings.HasPrefix(strings.TrimSpace(stdout.String()), "{") { - t.Errorf("stdout should still be the allow JSON: %q", stdout.String()) - } - // No upload, no enrichment, no error log lines. - if len(cap.events) != 0 { - t.Fatalf("disabled cache must short-circuit upload; got %d events", len(cap.events)) - } - if len(cap.errs) != 0 { - t.Fatalf("disabled cache must not log errors; got %v", cap.errs) - } -} - func TestRunSkipsUploadWithoutSeam(t *testing.T) { stdin := strings.NewReader(`{ "session_id":"s","cwd":"/tmp","tool_name":"Bash","tool_input":{"command":"ls"} diff --git a/internal/aiagents/state/cache.go b/internal/aiagents/state/cache.go deleted file mode 100644 index 8e6e886..0000000 --- a/internal/aiagents/state/cache.go +++ /dev/null @@ -1,126 +0,0 @@ -package state - -import ( - "encoding/json" - "os" - "path/filepath" -) - -// CacheFilename is the basename of the cache file. Lives under -// ~/.stepsecurity/ alongside config.json and ai-agent-hook-errors.jsonl. -const CacheFilename = "hooks-state.json" - -const ( - cacheFileMode os.FileMode = 0o600 - cacheParentDirMode os.FileMode = 0o700 -) - -// cachePathOverride lets tests redirect reads/writes to a tempdir. -// Production leaves it empty. Mutating from outside this package is -// a test-only concern; same pattern as cli.errorLogPathOverride. -var cachePathOverride string - -// SetCachePathForTest redirects CachePath() to the given absolute path -// and returns a restore function. Test-only; production code never -// calls this. Living on the package surface (rather than as a -// build-tagged file) keeps cross-package tests in hook/* and main_test -// able to drive the override without an internal-import trick. -func SetCachePathForTest(p string) (restore func()) { - prev := cachePathOverride - cachePathOverride = p - return func() { cachePathOverride = prev } -} - -// CachePath returns the absolute cache path, honoring the test -// override when set. -func CachePath() string { - if cachePathOverride != "" { - return cachePathOverride - } - home, err := os.UserHomeDir() - if err != nil || home == "" { - return "" - } - return filepath.Join(home, ".stepsecurity", CacheFilename) -} - -// Read returns (state, true) on a successful parse. Any I/O or parse -// error returns (Default(), false) — Read never surfaces an error -// because the hot path must remain fail-open. -func Read() (State, bool) { - path := CachePath() - if path == "" { - return Default(), false - } - // #nosec G304 -- path is CachePath(): either a test override set by - // SetCachePathForTest, or os.UserHomeDir() joined with the package - // constant CacheFilename. Never derived from external input. - b, err := os.ReadFile(path) - if err != nil { - return Default(), false - } - var s State - if err := json.Unmarshal(b, &s); err != nil { - return Default(), false - } - if s.SchemaVersion == 0 { - // Forward-compat tolerance: missing schema_version reads as the - // current version. A future breaking change would gate on a - // specific value here. - s.SchemaVersion = SchemaVersion - } - return s, true -} - -// Write atomically replaces the cache file. No backups are kept — the -// cache is rewritten on every reconcile tick, and orphaned backups -// would accumulate trash. Parent dir is created with 0o700. -func Write(s State) error { - path := CachePath() - if path == "" { - return errNoHomeDir - } - data, err := json.MarshalIndent(s, "", " ") - if err != nil { - return err - } - data = append(data, '\n') - - parent := filepath.Dir(path) - if err := os.MkdirAll(parent, cacheParentDirMode); err != nil { - return err - } - - tmp, err := os.CreateTemp(parent, "."+CacheFilename+".tmp-*") - if err != nil { - return err - } - tmpPath := tmp.Name() - defer func() { - if _, statErr := os.Stat(tmpPath); statErr == nil { - _ = os.Remove(tmpPath) - } - }() - - if _, err := tmp.Write(data); err != nil { - _ = tmp.Close() - return err - } - if err := tmp.Sync(); err != nil { - _ = tmp.Close() - return err - } - if err := tmp.Close(); err != nil { - return err - } - if err := os.Chmod(tmpPath, cacheFileMode); err != nil { - return err - } - return os.Rename(tmpPath, path) -} - -type cacheError string - -func (e cacheError) Error() string { return string(e) } - -const errNoHomeDir = cacheError("state: cannot resolve home directory") diff --git a/internal/aiagents/state/cache_test.go b/internal/aiagents/state/cache_test.go deleted file mode 100644 index 59d8760..0000000 --- a/internal/aiagents/state/cache_test.go +++ /dev/null @@ -1,122 +0,0 @@ -package state - -import ( - "encoding/json" - "os" - "path/filepath" - "runtime" - "testing" - "time" -) - -// withTempCache redirects CachePath to a tempdir for the duration of -// the test. Returns the absolute path the cache will be written to. -func withTempCache(t *testing.T) string { - t.Helper() - dir := t.TempDir() - p := filepath.Join(dir, CacheFilename) - prev := cachePathOverride - cachePathOverride = p - t.Cleanup(func() { cachePathOverride = prev }) - return p -} - -func TestReadMissingFileReturnsDefault(t *testing.T) { - withTempCache(t) - s, ok := Read() - if ok { - t.Fatal("Read of missing file should report ok=false") - } - if !s.Hooks.Enabled { - t.Fatal("missing-file Read must yield Default (enabled)") - } -} - -func TestWriteThenReadRoundTrip(t *testing.T) { - withTempCache(t) - in := State{ - SchemaVersion: SchemaVersion, - FetchedAt: time.Date(2026, 5, 14, 8, 0, 0, 0, time.UTC), - Source: SourcePoll, - Hooks: Hooks{Enabled: false}, - } - if err := Write(in); err != nil { - t.Fatalf("Write: %v", err) - } - out, ok := Read() - if !ok { - t.Fatal("Read after Write should report ok=true") - } - if out.Hooks.Enabled != false || out.Source != SourcePoll { - t.Fatalf("round-trip mismatch: %+v", out) - } - if !out.FetchedAt.Equal(in.FetchedAt) { - t.Fatalf("FetchedAt drift: got %v want %v", out.FetchedAt, in.FetchedAt) - } -} - -func TestReadMalformedReturnsDefault(t *testing.T) { - path := withTempCache(t) - if err := os.WriteFile(path, []byte("not json"), 0o600); err != nil { - t.Fatalf("seed: %v", err) - } - s, ok := Read() - if ok { - t.Fatal("malformed file should report ok=false") - } - if !s.Hooks.Enabled { - t.Fatal("malformed Read must yield Default (enabled)") - } -} - -func TestWriteFileMode(t *testing.T) { - if runtime.GOOS == "windows" { - t.Skip("file mode bits not meaningful on Windows") - } - path := withTempCache(t) - if err := Write(Default()); err != nil { - t.Fatalf("Write: %v", err) - } - info, err := os.Stat(path) - if err != nil { - t.Fatalf("stat: %v", err) - } - if got := info.Mode().Perm(); got != cacheFileMode { - t.Fatalf("mode = %o, want %o", got, cacheFileMode) - } -} - -func TestWriteReplacesExistingFile(t *testing.T) { - path := withTempCache(t) - if err := os.WriteFile(path, []byte(`{"hooks":{"enabled":true}}`), 0o600); err != nil { - t.Fatalf("seed: %v", err) - } - next := Default() - next.Hooks.Enabled = false - if err := Write(next); err != nil { - t.Fatalf("Write: %v", err) - } - out, ok := Read() - if !ok || out.Hooks.Enabled { - t.Fatalf("expected disabled after rewrite, got %+v (ok=%v)", out, ok) - } -} - -func TestReadForwardCompatMissingSchemaVersion(t *testing.T) { - path := withTempCache(t) - raw := map[string]any{"hooks": map[string]any{"enabled": false}} - b, _ := json.Marshal(raw) - if err := os.WriteFile(path, b, 0o600); err != nil { - t.Fatalf("seed: %v", err) - } - out, ok := Read() - if !ok { - t.Fatal("legacy-shape file should still parse") - } - if out.SchemaVersion != SchemaVersion { - t.Fatalf("schema_version should default to %d, got %d", SchemaVersion, out.SchemaVersion) - } - if out.Hooks.Enabled { - t.Fatal("legacy disabled value should round-trip") - } -} diff --git a/internal/aiagents/state/doc.go b/internal/aiagents/state/doc.go index ae3fde6..d3844b5 100644 --- a/internal/aiagents/state/doc.go +++ b/internal/aiagents/state/doc.go @@ -1,20 +1,14 @@ -// Package state owns the server-driven hook enable/disable cache. +// Package state owns the server-driven hook enable/disable pull path. // // Flow: // -// scheduled tick / install ──▶ Reconciler.Reconcile -// │ -// ├─ Fetcher.Fetch (GET /developer-mdm-agent/features) -// ├─ cache.Write (~/.stepsecurity/hooks-state.json) -// └─ InstallFn / UninstallFn (idempotent) +// scheduled tick ──▶ Reconciler.Reconcile +// │ +// ├─ Fetcher.Fetch (GET /developer-mdm-agent/features) +// └─ InstallFn / UninstallFn (idempotent) // -// _hook hot path ──▶ cache.Read ──▶ short-circuit to allow if disabled -// -// The cache file is the single source of truth for the hot path. Both -// the polling reconciler (this package) and any future WebSocket -// transport are expected to converge on the same file, so the hot path -// never has to know which transport is active. -// -// Defaults: cache missing or unparseable ⇒ Default() (enabled). Hot path -// is fail-open by contract; a corrupt cache must not break the agent. +// The presence of the managed hook entry in the agent's settings file +// (~/.claude/settings.json, ~/.codex/config.toml) is the single source +// of truth. The hot path runs iff the entry is present; there is no +// separate on-disk enable flag for it to consult. package state diff --git a/internal/aiagents/state/reconciler.go b/internal/aiagents/state/reconciler.go index 03cfaf5..7b2ac3e 100644 --- a/internal/aiagents/state/reconciler.go +++ b/internal/aiagents/state/reconciler.go @@ -5,7 +5,6 @@ import ( "errors" "fmt" "io" - "time" "github.com/step-security/dev-machine-guard/internal/executor" ) @@ -29,22 +28,14 @@ type Reconciler struct { Stderr io.Writer InstallFn HookCommandFn UninstallFn HookCommandFn - Now func() time.Time } -// Reconcile fetches desired state, writes the cache, and converges -// settings to match by calling InstallFn / UninstallFn. Both are -// idempotent so we don't need to detect the current state — install -// is a no-op when entries are already in place, uninstall is a no-op -// when no DMG-owned entries exist. -// -// Order: cache write first, then settings reconciliation. If the -// settings reconciliation fails, the cache still reflects the desired -// state — the hot path honors the new value immediately, and the next -// tick retries the settings change. -// -// Errors are returned to the caller for logging via cli.AppendError; -// Reconcile itself never panics into the caller's hot path. +// Reconcile fetches desired state and calls InstallFn / UninstallFn to +// converge settings.json (Claude Code) and config.toml (Codex) to +// match. Both seams are idempotent — install is a no-op when entries +// are already present, uninstall is a no-op when none are. The +// presence of the managed entry in the agent's settings file is the +// single source of truth; no on-disk state is kept by this package. func (r *Reconciler) Reconcile(ctx context.Context) error { if r.Fetcher == nil { return errors.New("state: nil fetcher") @@ -55,19 +46,6 @@ func (r *Reconciler) Reconcile(ctx context.Context) error { return fmt.Errorf("state: fetch: %w", err) } - now := time.Now().UTC - if r.Now != nil { - now = r.Now - } - next := Default() - next.FetchedAt = now() - next.Source = SourcePoll - next.Hooks.Enabled = res.Enabled - - if err := Write(next); err != nil { - return fmt.Errorf("state: write cache: %w", err) - } - switch { case res.Enabled: if r.InstallFn == nil { diff --git a/internal/aiagents/state/reconciler_test.go b/internal/aiagents/state/reconciler_test.go index 7439c6e..0dc5ec3 100644 --- a/internal/aiagents/state/reconciler_test.go +++ b/internal/aiagents/state/reconciler_test.go @@ -5,7 +5,6 @@ import ( "errors" "io" "testing" - "time" "github.com/step-security/dev-machine-guard/internal/executor" ) @@ -33,9 +32,7 @@ func (r *callRec) fn(name string) HookCommandFn { } } -func newReconciler(t *testing.T, fetch FetchResult, fetchErr error, exitCode int) (*Reconciler, *callRec) { - t.Helper() - withTempCache(t) +func newReconciler(_ *testing.T, fetch FetchResult, fetchErr error, exitCode int) (*Reconciler, *callRec) { rec := &callRec{exit: exitCode} return &Reconciler{ Exec: executor.NewMock(), @@ -46,11 +43,10 @@ func newReconciler(t *testing.T, fetch FetchResult, fetchErr error, exitCode int Stderr: io.Discard, InstallFn: rec.fn("install"), UninstallFn: rec.fn("uninstall"), - Now: func() time.Time { return time.Date(2026, 5, 14, 8, 0, 0, 0, time.UTC) }, }, rec } -func TestReconcileEnabledCallsInstallAndWritesCache(t *testing.T) { +func TestReconcileEnabledCallsInstall(t *testing.T) { r, rec := newReconciler(t, FetchResult{Enabled: true}, nil, 0) if err := r.Reconcile(context.Background()); err != nil { t.Fatalf("Reconcile: %v", err) @@ -58,13 +54,6 @@ func TestReconcileEnabledCallsInstallAndWritesCache(t *testing.T) { if len(rec.calls) != 1 || rec.calls[0] != "install" { t.Fatalf("calls = %v, want [install]", rec.calls) } - s, ok := Read() - if !ok { - t.Fatal("cache should be written") - } - if !s.Hooks.Enabled || s.Source != SourcePoll { - t.Fatalf("cache = %+v", s) - } } func TestReconcileDisabledCallsUninstall(t *testing.T) { @@ -75,49 +64,26 @@ func TestReconcileDisabledCallsUninstall(t *testing.T) { if len(rec.calls) != 1 || rec.calls[0] != "uninstall" { t.Fatalf("calls = %v, want [uninstall]", rec.calls) } - s, _ := Read() - if s.Hooks.Enabled { - t.Fatal("cache should record disabled") - } } -func TestReconcileFetchErrorPreservesCache(t *testing.T) { +func TestReconcileFetchErrorSkipsConverge(t *testing.T) { r, rec := newReconciler(t, FetchResult{}, errors.New("network down"), 0) - // Seed prior cache so we can verify it's untouched. - prior := Default() - prior.Hooks.Enabled = false - prior.Source = SourcePoll - if err := Write(prior); err != nil { - t.Fatalf("seed: %v", err) - } - if err := r.Reconcile(context.Background()); err == nil { t.Fatal("Reconcile should surface fetch error") } if len(rec.calls) != 0 { t.Fatalf("no install/uninstall on fetch error; got %v", rec.calls) } - s, ok := Read() - if !ok || s.Hooks.Enabled || s.Source != SourcePoll { - t.Fatalf("cache should be untouched, got %+v ok=%v", s, ok) - } } func TestReconcileInstallFailureSurfacesError(t *testing.T) { r, _ := newReconciler(t, FetchResult{Enabled: true}, nil, 1) - err := r.Reconcile(context.Background()) - if err == nil { + if err := r.Reconcile(context.Background()); err == nil { t.Fatal("non-zero install exit should surface as error") } - // Cache should still be written — settings retry is the next tick's job. - s, ok := Read() - if !ok || !s.Hooks.Enabled { - t.Fatalf("cache should still reflect desired state, got %+v ok=%v", s, ok) - } } func TestReconcileNilFetcherIsError(t *testing.T) { - withTempCache(t) r := &Reconciler{} if err := r.Reconcile(context.Background()); err == nil { t.Fatal("nil fetcher should error") diff --git a/internal/aiagents/state/state.go b/internal/aiagents/state/state.go deleted file mode 100644 index 4781c5c..0000000 --- a/internal/aiagents/state/state.go +++ /dev/null @@ -1,39 +0,0 @@ -package state - -import "time" - -// SchemaVersion is the wire/disk version of the cache file. Bump only -// on a breaking shape change; older daemons keep parsing v1. -const SchemaVersion = 1 - -// Source values record where the cache write came from. Diagnostic -// only — the hot path never branches on Source. -const ( - SourcePoll = "poll" - SourceManual = "manual" - SourceInstall = "install" - SourceWebsocket = "websocket" // reserved -) - -// State is the on-disk cache shape. JSON keys are the wire format. -type State struct { - SchemaVersion int `json:"schema_version"` - FetchedAt time.Time `json:"fetched_at"` - Source string `json:"source,omitempty"` - Hooks Hooks `json:"hooks"` -} - -// Hooks carries the feature toggles the hot path reads. Today there's -// only Enabled; per-agent granularity goes here when the contract -// grows. -type Hooks struct { - Enabled bool `json:"enabled"` -} - -// Default is the in-memory fallback for any read failure. Enabled is -// true: the hot path only runs because settings carry a DMG entry, so -// defaulting to disabled would silently turn off first-run after -// install (before the reconciler has had a chance to write the cache). -func Default() State { - return State{SchemaVersion: SchemaVersion, Hooks: Hooks{Enabled: true}} -} diff --git a/internal/aiagents/state/state_test.go b/internal/aiagents/state/state_test.go deleted file mode 100644 index cacc30b..0000000 --- a/internal/aiagents/state/state_test.go +++ /dev/null @@ -1,12 +0,0 @@ -package state - -import "testing" - -func TestDefaultIsEnabled(t *testing.T) { - if !Default().Hooks.Enabled { - t.Fatal("Default() must be enabled; otherwise first-run after install breaks") - } - if Default().SchemaVersion != SchemaVersion { - t.Fatalf("Default schema_version = %d, want %d", Default().SchemaVersion, SchemaVersion) - } -}