diff --git a/cmd/stepsecurity-dev-machine-guard/main.go b/cmd/stepsecurity-dev-machine-guard/main.go index ba3edbd..d23442a 100644 --- a/cmd/stepsecurity-dev-machine-guard/main.go +++ b/cmd/stepsecurity-dev-machine-guard/main.go @@ -6,7 +6,9 @@ import ( "fmt" "io" "os" + "path/filepath" "runtime" + "strings" "time" aiagentscli "github.com/step-security/dev-machine-guard/internal/aiagents/cli" @@ -20,7 +22,9 @@ import ( "github.com/step-security/dev-machine-guard/internal/executor" "github.com/step-security/dev-machine-guard/internal/launchd" "github.com/step-security/dev-machine-guard/internal/output" + "github.com/step-security/dev-machine-guard/internal/paths" "github.com/step-security/dev-machine-guard/internal/progress" + "github.com/step-security/dev-machine-guard/internal/progress/filelog" "github.com/step-security/dev-machine-guard/internal/scan" "github.com/step-security/dev-machine-guard/internal/schtasks" "github.com/step-security/dev-machine-guard/internal/systemd" @@ -84,6 +88,40 @@ func main() { exec := executor.NewReal() + // Install dir resolution (see internal/paths.Home for the canonical + // chain): --install-dir CLI flag > $STEPSECURITY_HOME env var > + // install_dir config field > ~/.stepsecurity default. The CLI flag + // wins because it is the most explicit per-invocation override the + // operator can supply. We feed it to paths via SetOverride; an + // explicit `--install-dir=` (empty) is preserved and short-circuits + // the path computation below to disable file logging for this run. + // + // The capture is installed before the logger so every subsequent + // stderr write — including the pipe-tee in + // internal/telemetry/logcapture.go, which nests inside this one — + // flows through to disk. + if cfg.InstallDirSet { + paths.SetOverride(cfg.InstallDir) // may be "" = disabled + } + installDir := paths.Home() + disabled := cfg.InstallDirSet && cfg.InstallDir == "" + logFilePath := "" + if !disabled && installDir != "" { + logFilePath = filepath.Join(installDir, filelog.Filename) + // Pre-rotate BOTH files unconditionally. In interactive mode the + // stderr rotation is redundant with filelog.Start's own rotation + // pass (Start re-checks and no-ops on a missing path); in service + // mode StartIfEligible early-returns and Start never runs, so this + // explicit call is the only thing keeping agent.error.log bounded + // when the OS-level scheduler redirect is writing it. agent.log + // has the same property — the agent never writes it directly, so + // the only opportunity to cap it is at startup. + filelog.RotateIfOverCap(logFilePath, filelog.DefaultMaxBytes) + filelog.RotateIfOverCap(filepath.Join(installDir, filelog.StdoutFilename), filelog.DefaultMaxBytes) + } + capture, captureErr := filelog.StartIfEligible(logFilePath, filelog.DefaultMaxBytes) + defer func() { _ = capture.Stop() }() + // Log level resolution: default info → config file → CLI flag → --verbose → JSON override. level := progress.LevelInfo if config.LogLevel != "" { @@ -104,6 +142,23 @@ func main() { level = progress.LevelError } log := progress.NewLogger(level) + if captureErr != nil { + // Non-fatal: a read-only $HOME shouldn't block the run. + log.Warn("file logging disabled: %v", captureErr) + } + + // Migration heads-up: if the operator has moved the install dir but + // the legacy ~/.stepsecurity still has agent state, surface that so + // they can decide whether to copy over old diagnostic files. Don't + // auto-move — too risky for v1 (silent overwrites, races with other + // processes, perms changes). Just point at the leftovers. + legacy := paths.LegacyHome() + if !disabled && legacy != "" && installDir != "" && installDir != legacy { + if leftovers := findLegacyLeftovers(legacy); len(leftovers) > 0 { + log.Warn("install dir is %s but the legacy default %s still has files: %s — copy them over manually if you want their history.", + installDir, legacy, strings.Join(leftovers, ", ")) + } + } log.Debug("resolved log level: %s (config=%q cli=%q verbose=%v output=%s)", level, config.LogLevel, cfg.LogLevel, cfg.Verbose, cfg.OutputFormat) log.Debug("config loaded: enterprise=%v api_endpoint=%q scan_freq=%q search_dirs=%v log_level=%q", @@ -350,6 +405,28 @@ func scanJSONEncoder(w io.Writer) *json.Encoder { return enc } +// findLegacyLeftovers checks the legacy ~/.stepsecurity dir for agent +// files the operator may have moved (intentionally) to a new install +// dir. Returns basenames of present diagnostic files (config.json is +// excluded — it must stay at the legacy path as the bootstrap, so its +// presence there is expected and not a leftover to migrate). +func findLegacyLeftovers(legacy string) []string { + candidates := []string{ + "agent.error.log", + "agent.error.log.prev", + "agent.log", + "agent.log.prev", + "ai-agent-hook-errors.jsonl", + } + var found []string + for _, name := range candidates { + if _, err := os.Stat(filepath.Join(legacy, name)); err == nil { + found = append(found, name) + } + } + return found +} + // runHookStateReconcile polls agent-api for the desired AI-agent hook // state and reconciles local hook installation to match. Silent no-op // in community mode (enterprise config missing) — the existing scan diff --git a/internal/aiagents/cli/.stepsecurity/ai-agent-hook-errors.jsonl b/internal/aiagents/cli/.stepsecurity/ai-agent-hook-errors.jsonl new file mode 100644 index 0000000..bb005ea --- /dev/null +++ b/internal/aiagents/cli/.stepsecurity/ai-agent-hook-errors.jsonl @@ -0,0 +1 @@ +{"ts":"2026-05-21T16:38:14.309922Z","stage":"install","code":"no_home","message":"should be silently dropped"} diff --git a/internal/aiagents/cli/errlog.go b/internal/aiagents/cli/errlog.go index 9973738..e5512cb 100644 --- a/internal/aiagents/cli/errlog.go +++ b/internal/aiagents/cli/errlog.go @@ -7,6 +7,7 @@ import ( "time" "github.com/step-security/dev-machine-guard/internal/aiagents/redact" + "github.com/step-security/dev-machine-guard/internal/paths" ) // ErrorLogFilename is the basename of the per-user errors log. It lives @@ -99,9 +100,9 @@ func errorLogPath() string { if errorLogPathOverride != "" { return errorLogPathOverride } - home, err := os.UserHomeDir() - if err != nil || home == "" { + dir := paths.Home() + if dir == "" { return "" } - return filepath.Join(home, ".stepsecurity", ErrorLogFilename) + return filepath.Join(dir, ErrorLogFilename) } diff --git a/internal/cli/cli.go b/internal/cli/cli.go index 4cfa828..7caded8 100644 --- a/internal/cli/cli.go +++ b/internal/cli/cli.go @@ -24,6 +24,8 @@ type Config struct { ColorMode string // "auto", "always", "never" Verbose bool // --verbose (shortcut for --log-level=debug) LogLevel string // "" = unset; one of "error", "warn", "info", "debug" + InstallDir string // --install-dir=DIR base install directory; all non-bootstrap files (logs, hook errors, binary placement) live under this dir. "" w/ InstallDirSet=true means "explicitly disabled" (no file logging). + InstallDirSet bool // true if --install-dir was passed (empty value = disable file logging for this run) EnableNPMScan *bool // nil=auto, true/false=explicit EnableBrewScan *bool // nil=auto, true/false=explicit EnablePythonScan *bool // nil=auto, true/false=explicit @@ -220,6 +222,16 @@ func Parse(args []string) (*Config, error) { default: return nil, fmt.Errorf("invalid log level: %s (must be error, warn, info, or debug)", level) } + case strings.HasPrefix(arg, "--install-dir="): + cfg.InstallDir = strings.TrimPrefix(arg, "--install-dir=") + cfg.InstallDirSet = true + case arg == "--install-dir": + i++ + if i >= len(args) { + return nil, fmt.Errorf("--install-dir requires a directory argument (use --install-dir= to disable file logging)") + } + cfg.InstallDir = args[i] + cfg.InstallDirSet = true case arg == "-v" || arg == "--version" || arg == "version": _, _ = fmt.Fprintf(os.Stdout, "StepSecurity Dev Machine Guard v%s\n", buildinfo.VersionString()) os.Exit(0) @@ -290,11 +302,21 @@ func parseHooks(args []string) (*Config, error) { return nil, fmt.Errorf("unsupported agent: %s (supported: %s)", name, strings.Join(supportedHookAgents, ", ")) } cfg.HooksAgent = name + case strings.HasPrefix(arg, "--install-dir="): + cfg.InstallDir = strings.TrimPrefix(arg, "--install-dir=") + cfg.InstallDirSet = true + case arg == "--install-dir": + i++ + if i >= len(rest) { + return nil, fmt.Errorf("--install-dir requires a directory argument (use --install-dir= to disable file logging)") + } + cfg.InstallDir = rest[i] + cfg.InstallDirSet = true case arg == "-h" || arg == "--help": printHooksHelp() os.Exit(0) default: - return nil, fmt.Errorf("unknown option for `hooks %s`: %s (only --agent is accepted)", verb, arg) + return nil, fmt.Errorf("unknown option for `hooks %s`: %s (only --agent and --install-dir are accepted)", verb, arg) } } @@ -316,6 +338,9 @@ Subcommands: Options: --agent Target a specific agent (default: every detected agent). Supported: %s + --install-dir=DIR Base directory the agent puts its files under + (default: ~/.stepsecurity). Pass --install-dir= (empty) + to disable file logging. Equivalent to $STEPSECURITY_HOME. Examples: %s hooks install # install for every detected agent @@ -361,6 +386,14 @@ Options: --npmrc Run ONLY the npm config audit (verbose pretty view; --json supported) --pipconfig Run ONLY the pip config audit (verbose pretty view; --json supported) --log-level=LEVEL Log level: error | warn | info | debug (default: info) + --install-dir=DIR Base directory the agent puts ALL its files under + (logs, hook errors, binary placement via loader). + Default: ~/.stepsecurity. The diagnostic log file is + /agent.error.log, rotated at 5 MiB to .prev. + Equivalent to STEPSECURITY_HOME env var. Pass + --install-dir= (empty) to disable file logging for + this run. Note: config.json itself always lives at + ~/.stepsecurity/config.json for bootstrap. --verbose Shortcut for --log-level=debug --color=WHEN Color mode: auto | always | never (default: auto) -v, --version Show version diff --git a/internal/cli/cli_test.go b/internal/cli/cli_test.go index a9cd5b8..21c8cf4 100644 --- a/internal/cli/cli_test.go +++ b/internal/cli/cli_test.go @@ -269,7 +269,9 @@ func TestParse_HooksAgentMissingValue(t *testing.T) { } } -// DMG global flags must not leak into the hooks group. +// DMG global flags must not leak into the hooks group. --install-dir +// is the deliberate exception — when hooks fail, the customer needs the +// same on-disk diagnostic file every other command produces. func TestParse_HooksRejectsGlobalFlags(t *testing.T) { cases := [][]string{ {"hooks", "install", "--json"}, @@ -287,6 +289,80 @@ func TestParse_HooksRejectsGlobalFlags(t *testing.T) { } } +func TestParse_InstallDir_EqualsForm(t *testing.T) { + cfg, err := Parse([]string{"--install-dir=/opt/sec"}) + if err != nil { + t.Fatal(err) + } + if cfg.InstallDir != "/opt/sec" { + t.Errorf("InstallDir = %q, want /opt/sec", cfg.InstallDir) + } + if !cfg.InstallDirSet { + t.Error("InstallDirSet should be true after --install-dir=") + } +} + +func TestParse_InstallDir_SpaceForm(t *testing.T) { + cfg, err := Parse([]string{"--install-dir", "/opt/sec"}) + if err != nil { + t.Fatal(err) + } + if cfg.InstallDir != "/opt/sec" { + t.Errorf("InstallDir = %q, want /opt/sec", cfg.InstallDir) + } + if !cfg.InstallDirSet { + t.Error("InstallDirSet should be true after --install-dir ") + } +} + +func TestParse_InstallDir_EmptyValueDisables(t *testing.T) { + cfg, err := Parse([]string{"--install-dir="}) + if err != nil { + t.Fatal(err) + } + if cfg.InstallDir != "" { + t.Errorf("InstallDir = %q, want empty (disabled)", cfg.InstallDir) + } + if !cfg.InstallDirSet { + t.Error("InstallDirSet should be true (explicit empty is opt-out)") + } +} + +func TestParse_InstallDir_SpaceFormMissingValue(t *testing.T) { + _, err := Parse([]string{"--install-dir"}) + if err == nil { + t.Error("expected error for --install-dir without value (use --install-dir= to disable)") + } +} + +func TestParse_InstallDir_AbsentLeavesUnset(t *testing.T) { + cfg, err := Parse([]string{}) + if err != nil { + t.Fatal(err) + } + if cfg.InstallDir != "" || cfg.InstallDirSet { + t.Errorf("absent --install-dir should yield InstallDir=%q InstallDirSet=%v", cfg.InstallDir, cfg.InstallDirSet) + } +} + +func TestParseHooks_AcceptsInstallDir(t *testing.T) { + cfg, err := Parse([]string{"hooks", "install", "--install-dir=/opt/sec"}) + if err != nil { + t.Fatalf("hooks install --install-dir rejected: %v", err) + } + if cfg.InstallDir != "/opt/sec" { + t.Errorf("InstallDir = %q, want /opt/sec", cfg.InstallDir) + } + + cfg, err = Parse([]string{"hooks", "uninstall", "--install-dir", "/opt/u"}) + if err != nil { + t.Fatalf("hooks uninstall --install-dir rejected: %v", err) + } + if cfg.InstallDir != "/opt/u" { + t.Errorf("InstallDir = %q, want /opt/u", cfg.InstallDir) + } +} + // The `_hook` runtime is intentionally not handled by Parse — main.go // intercepts it before any init runs to honor the fail-open contract. // See internal/aiagents/cli/hook_test.go for handler-level tests and diff --git a/internal/config/config.go b/internal/config/config.go index 61a6fb5..d72f36e 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -23,6 +23,7 @@ var ( OutputFormat string // "" means default (pretty) HTMLOutputFile string // "" means not set LogLevel string // "" means default (info); one of error/warn/info/debug + InstallDir string // "" means default (~/.stepsecurity); non-empty makes the agent put all its files (logs, hook errors, future state) under this directory. Bootstrap config.json itself stays at the legacy location. Per-run opt-out is the CLI flag --install-dir=. Resolution: --install-dir flag > STEPSECURITY_HOME env > this field > default — see internal/paths. ) // ConfigFile is the JSON structure persisted to ~/.stepsecurity/config.json. @@ -39,6 +40,7 @@ type ConfigFile struct { OutputFormat string `json:"output_format,omitempty"` HTMLOutputFile string `json:"html_output_file,omitempty"` LogLevel string `json:"log_level,omitempty"` + InstallDir string `json:"install_dir,omitempty"` } // userConfigDir returns ~/.stepsecurity — the per-user config location. @@ -93,6 +95,26 @@ func WriteConfigFilePath() string { return filepath.Join(writeConfigDir(), "config.json") } +// LegacyDirName is the basename of the per-user agent directory under +// $HOME. config.json always lives here so the agent can bootstrap; +// other files (logs, hook errors, the binary) may be relocated via the +// resolved install dir — see internal/paths. +const LegacyDirName = ".stepsecurity" + +// LegacyDir returns the per-user agent directory (~/.stepsecurity), used +// as the reference point for the install-dir migration warning in main: +// if the operator has moved the install dir but this directory still +// holds diagnostic files, the agent surfaces a heads-up. Returns "" when +// $HOME can't be resolved. +// +// Distinct from ConfigFilePath / WriteConfigFilePath above: those follow +// the machine-vs-user resolution that lets MSI-deployed installs share +// config with a scheduled task running as a logged-in user. LegacyDir is +// always per-user, regardless of elevation. +func LegacyDir() string { + return userConfigDir() +} + // Load reads the config file and applies values to the package-level variables. // Values already set (not placeholders) are not overridden — build-time values take precedence. func Load() { @@ -142,6 +164,9 @@ func Load() { if cfg.LogLevel != "" && LogLevel == "" { LogLevel = cfg.LogLevel } + if cfg.InstallDir != "" && InstallDir == "" { + InstallDir = cfg.InstallDir + } } // IsEnterpriseMode returns true if valid enterprise credentials are configured. @@ -296,6 +321,15 @@ func RunConfigure() error { existing.LogLevel = "info" } + // Install directory override (empty = ~/.stepsecurity). All + // non-bootstrap files live under this directory: agent.log, + // agent.error.log (+ .prev rotation), ai-agent-hook-errors.jsonl, + // and the binary itself when placed via the loader script. + // Bootstrap config.json keeps living at the legacy ~/.stepsecurity + // path so the agent can always find it. To temporarily override + // for one run, pass --install-dir=PATH or set $STEPSECURITY_HOME. + existing.InstallDir = promptValue(reader, "Install Directory (blank = default)", existing.InstallDir) + // Save if err := save(existing); err != nil { return fmt.Errorf("saving configuration: %w", err) @@ -422,6 +456,7 @@ func ShowConfigure() { fmt.Printf(" %-24s %s\n", "HTML Output File:", displayValue(cfg.HTMLOutputFile)) } fmt.Printf(" %-24s %s\n", "Log Level:", displayLogLevel(cfg.LogLevel)) + fmt.Printf(" %-24s %s\n", "Install Directory:", displayInstallDir(cfg.InstallDir)) } func displayValue(v string) string { @@ -494,6 +529,13 @@ func displayLogLevel(level string) string { } } +func displayInstallDir(v string) string { + if v == "" { + return "~/.stepsecurity (default)" + } + return v +} + func isPlaceholder(v string) bool { return strings.Contains(v, "{{") } diff --git a/internal/config/config_test.go b/internal/config/config_test.go index f9c3886..d8a0bf6 100644 --- a/internal/config/config_test.go +++ b/internal/config/config_test.go @@ -1,8 +1,8 @@ package config import ( + "bytes" "encoding/json" - "os" "path/filepath" "testing" ) @@ -47,48 +47,63 @@ func TestIsPlaceholder(t *testing.T) { } func TestSaveAndLoad(t *testing.T) { - // This test exercises the ConfigFile JSON marshal/unmarshal contract - // against a plain temp file — it does NOT cover save()/ConfigFilePath(), - // which depend on $HOME resolution and (on Windows) elevation checks. - // See config_nonint_test.go for tests that go through those helpers. - tmpDir := t.TempDir() - tmpConfigPath := filepath.Join(tmpDir, "config.json") - - cfg := &ConfigFile{ + // Drives the actual save()/Load() round-trip by pointing $HOME at a + // temp dir, so LegacyDir() / ConfigFilePath() resolve into the test + // sandbox. Verifies the on-disk JSON layout matches the in-memory + // package vars after a Load, plus omitempty behaviour for absent + // fields. Avoids machine-wide / elevation paths — those are covered + // in config_nonint_test.go's Windows-specific tests. + tmpHome := t.TempDir() + t.Setenv("HOME", tmpHome) + // On Windows, os.UserHomeDir() also checks USERPROFILE; keep them in + // sync so the test runs cross-platform. + t.Setenv("USERPROFILE", tmpHome) + + want := &ConfigFile{ CustomerID: "test-customer", APIEndpoint: "https://api.example.com", APIKey: "sk-test-key", ScanFrequencyHours: "4", SearchDirs: []string{"/tmp", "/opt/code"}, + InstallDir: "/opt/stepsecurity", } - - data, err := json.MarshalIndent(cfg, "", " ") - if err != nil { - t.Fatal(err) + if err := save(want); err != nil { + t.Fatalf("save: %v", err) } - if err := os.WriteFile(tmpConfigPath, data, 0o600); err != nil { - t.Fatal(err) + if got := ConfigFilePath(); got != filepath.Join(tmpHome, ".stepsecurity", "config.json") { + t.Errorf("ConfigFilePath = %q, want under %s", got, tmpHome) } - // Read it back - readData, err := os.ReadFile(tmpConfigPath) - if err != nil { - t.Fatal(err) - } + // Reset package-level vars to placeholders so Load actually populates + // them (Load is intentionally idempotent — only sets unset values). + prev := struct { + CustomerID, APIKey, InstallDir string + SearchDirs []string + }{CustomerID, APIKey, InstallDir, SearchDirs} + CustomerID = "{{CUSTOMER_ID}}" + APIKey = "{{API_KEY}}" + InstallDir = "" + SearchDirs = nil + t.Cleanup(func() { + CustomerID = prev.CustomerID + APIKey = prev.APIKey + InstallDir = prev.InstallDir + SearchDirs = prev.SearchDirs + }) - var loaded ConfigFile - if err := json.Unmarshal(readData, &loaded); err != nil { - t.Fatal(err) - } + Load() - if loaded.CustomerID != "test-customer" { - t.Errorf("customer_id: expected test-customer, got %s", loaded.CustomerID) + if CustomerID != "test-customer" { + t.Errorf("CustomerID after Load = %q, want test-customer", CustomerID) } - if loaded.APIKey != "sk-test-key" { - t.Errorf("api_key: expected sk-test-key, got %s", loaded.APIKey) + if APIKey != "sk-test-key" { + t.Errorf("APIKey after Load = %q, want sk-test-key", APIKey) } - if len(loaded.SearchDirs) != 2 { - t.Errorf("search_dirs: expected 2 dirs, got %d", len(loaded.SearchDirs)) + if InstallDir != "/opt/stepsecurity" { + t.Errorf("InstallDir after Load = %q, want /opt/stepsecurity", InstallDir) + } + if len(SearchDirs) != 2 || SearchDirs[0] != "/tmp" || SearchDirs[1] != "/opt/code" { + t.Errorf("SearchDirs after Load = %v, want [/tmp /opt/code]", SearchDirs) } } @@ -116,3 +131,32 @@ func TestConfigFile_JSON(t *testing.T) { t.Error("empty api_endpoint should be omitted") } } + +func TestConfigFile_InstallDir_JSONRoundTrip(t *testing.T) { + in := ConfigFile{InstallDir: "/opt/stepsecurity"} + data, err := json.Marshal(in) + if err != nil { + t.Fatal(err) + } + if !bytes.Contains(data, []byte(`"install_dir":"/opt/stepsecurity"`)) { + t.Errorf("install_dir not serialized: %s", data) + } + + var out ConfigFile + if err := json.Unmarshal(data, &out); err != nil { + t.Fatal(err) + } + if out.InstallDir != "/opt/stepsecurity" { + t.Errorf("InstallDir round-trip = %q, want /opt/stepsecurity", out.InstallDir) + } + + // Empty InstallDir is omitted from JSON (omitempty). + empty := ConfigFile{} + data, err = json.Marshal(empty) + if err != nil { + t.Fatal(err) + } + if bytes.Contains(data, []byte("install_dir")) { + t.Errorf("empty install_dir should be omitted: %s", data) + } +} diff --git a/internal/launchd/launchd.go b/internal/launchd/launchd.go index 42233a4..5f8bdcd 100644 --- a/internal/launchd/launchd.go +++ b/internal/launchd/launchd.go @@ -10,6 +10,7 @@ import ( "github.com/step-security/dev-machine-guard/internal/config" "github.com/step-security/dev-machine-guard/internal/executor" + "github.com/step-security/dev-machine-guard/internal/paths" "github.com/step-security/dev-machine-guard/internal/progress" ) @@ -51,10 +52,15 @@ func Install(exec executor.Executor, log *progress.Logger) error { plistPath := daemonPlistPath logDir := systemLogDir + // Non-root LaunchAgent: log dir follows the configured install dir + // (defaults to ~/.stepsecurity). The plist will also bake + // STEPSECURITY_HOME so scheduler-invoked runs of the binary land in + // the same place. Root LaunchDaemon stays on the system path + // (/var/log/stepsecurity) — service-mode root daemons are a + // well-known macOS convention and we don't reroute them yet. if !exec.IsRoot() { plistPath = agentPlistPath() - homeDir, _ := os.UserHomeDir() - logDir = homeDir + "/.stepsecurity" + logDir = paths.Home() } // Ensure directories exist @@ -83,13 +89,25 @@ func Install(exec executor.Executor, log *progress.Logger) error { } } + // StepSecurityHome is baked into the plist so when launchd invokes + // the binary on its own schedule, paths.Home() resolves to the same + // directory the operator configured at install time — without + // depending on config.json or the operator's shell environment. + // Empty when paths.Home() can't resolve (no $HOME for root daemons, + // say); the binary then falls back through its own resolution chain. + stepHome := "" + if !exec.IsRoot() { + stepHome = paths.Home() + } + // Generate plist plistData := plistTemplateData{ - Label: label, - BinaryPath: binaryPath, - IntervalSeconds: intervalSeconds, - LogDir: logDir, - UserHome: userHome, + Label: label, + BinaryPath: binaryPath, + IntervalSeconds: intervalSeconds, + LogDir: logDir, + UserHome: userHome, + StepSecurityHome: stepHome, } f, err := os.Create(plistPath) @@ -179,11 +197,12 @@ func isConfigured(ctx context.Context, exec executor.Executor) bool { } type plistTemplateData struct { - Label string - BinaryPath string - IntervalSeconds int - LogDir string - UserHome string // non-empty when running as root; baked into plist as HOME env var + Label string + BinaryPath string + IntervalSeconds int + LogDir string + UserHome string // non-empty when running as root; baked into plist as HOME env var + StepSecurityHome string // non-empty for LaunchAgent; sets STEPSECURITY_HOME so paths.Home() matches install-time choice } const plistTmpl = ` @@ -200,11 +219,13 @@ const plistTmpl = ` StartInterval {{.IntervalSeconds}} RunAtLoad - {{if .UserHome}} + {{if or .UserHome .StepSecurityHome}} EnvironmentVariables - + {{if .UserHome}} HOME - {{.UserHome}} + {{.UserHome}}{{end}}{{if .StepSecurityHome}} + STEPSECURITY_HOME + {{.StepSecurityHome}}{{end}} {{end}} StandardOutPath {{.LogDir}}/agent.log diff --git a/internal/paths/paths.go b/internal/paths/paths.go new file mode 100644 index 0000000..086b1e3 --- /dev/null +++ b/internal/paths/paths.go @@ -0,0 +1,61 @@ +// Package paths owns the single source of truth for "where does the +// agent put its files." It resolves a base directory (the install dir) +// from a layered set of sources so callers can stop deriving +// ~/.stepsecurity independently: +// +// 1. --install-dir CLI flag (set by main via SetOverride) +// 2. $STEPSECURITY_HOME environment variable (set by service unit / loader) +// 3. install_dir config field (loaded by internal/config) +// 4. ~/.stepsecurity (legacy default) +// +// config.json itself stays at the legacy location regardless — see +// internal/config.LegacyDir — so the agent can always bootstrap. All +// other files (logs, hook errors, the binary placed by the loader) live +// under Home(). +package paths + +import ( + "os" + + "github.com/step-security/dev-machine-guard/internal/config" +) + +// HomeEnvVar is the environment variable consulted in resolution +// step 2. Service installers bake this into their unit files so +// scheduler-invoked runs see the same install dir as interactive ones. +const HomeEnvVar = "STEPSECURITY_HOME" + +// cliOverride captures the --install-dir CLI flag value (step 1). +// Set once at startup by main; never mutated thereafter. +var cliOverride string + +// SetOverride installs the CLI-flag value. Called by main after +// cli.Parse and before any code that calls Home() — see +// cmd/stepsecurity-dev-machine-guard/main.go. +func SetOverride(s string) { + cliOverride = s +} + +// Home returns the resolved install dir. Falls back to LegacyHome when +// nothing else is set. Empty string is possible only when the home +// directory itself cannot be resolved. +func Home() string { + if cliOverride != "" { + return cliOverride + } + if v := os.Getenv(HomeEnvVar); v != "" { + return v + } + if config.InstallDir != "" { + return config.InstallDir + } + return LegacyHome() +} + +// LegacyHome returns ~/.stepsecurity. Exposed for the migration check +// in main and for ShowConfigure displays. Mirrors config.LegacyDir but +// kept here so callers can grab the legacy path without taking a +// package dependency on config. +func LegacyHome() string { + return config.LegacyDir() +} diff --git a/internal/paths/paths_test.go b/internal/paths/paths_test.go new file mode 100644 index 0000000..1b61cbf --- /dev/null +++ b/internal/paths/paths_test.go @@ -0,0 +1,97 @@ +package paths + +import ( + "os" + "strings" + "testing" + + "github.com/step-security/dev-machine-guard/internal/config" +) + +// withOverride temporarily replaces the CLI override and restores it on +// cleanup. The override is package-level mutable state, so tests serialise +// through this helper. +func withOverride(t *testing.T, v string) { + t.Helper() + prev := cliOverride + cliOverride = v + t.Cleanup(func() { cliOverride = prev }) +} + +func withEnv(t *testing.T, key, value string) { + t.Helper() + prev, had := os.LookupEnv(key) + t.Setenv(key, value) + t.Cleanup(func() { + if had { + os.Setenv(key, prev) + } else { + os.Unsetenv(key) + } + }) +} + +func withConfigInstallDir(t *testing.T, v string) { + t.Helper() + prev := config.InstallDir + config.InstallDir = v + t.Cleanup(func() { config.InstallDir = prev }) +} + +func TestHome_DefaultsToLegacy(t *testing.T) { + withOverride(t, "") + withEnv(t, HomeEnvVar, "") + withConfigInstallDir(t, "") + + got := Home() + legacy := LegacyHome() + if legacy == "" { + t.Skip("home dir unresolved in this environment") + } + if got != legacy { + t.Errorf("Home() = %q, want LegacyHome() %q", got, legacy) + } + if !strings.HasSuffix(got, ".stepsecurity") { + t.Errorf("Home() = %q, expected suffix .stepsecurity", got) + } +} + +func TestHome_ConfigOverridesLegacy(t *testing.T) { + withOverride(t, "") + withEnv(t, HomeEnvVar, "") + withConfigInstallDir(t, "/from/config") + + if got := Home(); got != "/from/config" { + t.Errorf("Home() = %q, want /from/config", got) + } +} + +func TestHome_EnvVarOverridesConfig(t *testing.T) { + withOverride(t, "") + withConfigInstallDir(t, "/from/config") + withEnv(t, HomeEnvVar, "/from/env") + + if got := Home(); got != "/from/env" { + t.Errorf("Home() = %q, want /from/env (env > config)", got) + } +} + +func TestHome_CLIOverridesEnv(t *testing.T) { + withConfigInstallDir(t, "/from/config") + withEnv(t, HomeEnvVar, "/from/env") + withOverride(t, "/from/cli") + + if got := Home(); got != "/from/cli" { + t.Errorf("Home() = %q, want /from/cli (cli > env > config)", got) + } +} + +func TestSetOverride_Sticks(t *testing.T) { + withOverride(t, "") + SetOverride("/sticky") + t.Cleanup(func() { SetOverride("") }) + + if got := Home(); got != "/sticky" { + t.Errorf("Home() = %q, want /sticky", got) + } +} diff --git a/internal/progress/filelog/filelog.go b/internal/progress/filelog/filelog.go new file mode 100644 index 0000000..bc6ec2a --- /dev/null +++ b/internal/progress/filelog/filelog.go @@ -0,0 +1,237 @@ +// Package filelog tees the agent's stderr stream to a file on disk so +// that when agent-api is unreachable (network failure, firewall, expired +// credentials), the customer can be asked to share the local log file +// with support. +// +// Mechanism: Start() opens an os.Pipe, swaps os.Stderr for the pipe's +// write end, and spawns a goroutine that reads from the pipe and writes +// to both the original stderr file and the on-disk log file. Stop() +// closes the write end, waits for the goroutine to drain, restores +// os.Stderr, and closes the file. This mirrors the in-memory capture +// already used by internal/telemetry/logcapture.go, with a file in +// place of the bytes.Buffer. +// +// The file is opened with O_APPEND|O_SYNC. O_SYNC matters because +// os.Exit (called from many sites in main.go) skips defers, so the tee +// goroutine may not flush before the process dies — O_SYNC makes each +// individual Write durable at the cost of a few extra ms per run. +// +// Rotation is single-file and per-open: if the existing file exceeds +// maxBytes at Start time, it is renamed to path+".prev" so one prior +// run survives. This is a diagnostic snapshot, not an audit log; the +// 5 MiB cap matches the precedent set by +// internal/aiagents/cli/errlog.go. +// +// In service mode (launchd LaunchAgent, systemd user units, Windows +// Task Scheduler) the OS-level redirect is already writing the agent's +// stderr to a file. StartIfEligible detects this via ShouldSkip +// (os.Stderr.Stat reports a regular file) and returns (nil, nil) +// without opening anything, so the OS redirect remains the sole writer +// and no lines are duplicated. +package filelog + +import ( + "errors" + "fmt" + "io" + "os" + "path/filepath" + "sync" + "sync/atomic" + + "github.com/step-security/dev-machine-guard/internal/paths" +) + +// DefaultMaxBytes is the size cap that triggers single-file rotation at +// Start time. Matches internal/aiagents/cli/errlog.go's MaxErrorLogBytes +// for consistency with the existing on-disk-log convention. +const DefaultMaxBytes int64 = 5 * 1024 * 1024 + +// Filename is the basename of the stderr log file the agent writes +// into the configured base directory. Exported so callers (main, +// configure prompts) can show the full resolved path to the user +// without duplicating the literal. +const Filename = "agent.error.log" + +// StdoutFilename is the basename of the stdout log file the OS-level +// scheduler redirect writes to in service mode. The agent itself does +// not write here, but RotateIfOverCap is applied to it at startup so +// it can't grow unbounded. +const StdoutFilename = "agent.log" + +const ( + fileMode os.FileMode = 0o600 + dirMode os.FileMode = 0o700 +) + +// Capture owns the swapped os.Stderr, the pipe ends, the log file, and +// the tee goroutine's lifecycle. A nil receiver is safe to call Stop on +// — that simplifies main.go where StartIfEligible may return nil. +type Capture struct { + origErr *os.File + pipeRead *os.File + pipeWrite *os.File + file *os.File + done chan struct{} + stopped atomic.Bool + stopMu sync.Mutex +} + +// DefaultDir returns the resolved install dir (paths.Home()) — that is +// the directory the agent writes agent.error.log into by default. +// Empty string if the home directory cannot be resolved; callers treat +// empty as "disabled." +func DefaultDir() string { + return paths.Home() +} + +// DefaultPath is a convenience for DefaultDir() + Filename. +func DefaultPath() string { + dir := DefaultDir() + if dir == "" { + return "" + } + return filepath.Join(dir, Filename) +} + +// ShouldSkip reports whether the in-process file writer should be +// skipped because the given stream is already redirected to a regular +// file by the OS (service mode). Terminals report ModeCharDevice and +// pipes report ModeNamedPipe; only regular files trigger the skip. +// +// Errors from Stat are treated as "no skip" — better to log to a fresh +// file than to log nowhere. +func ShouldSkip(stderr *os.File) bool { + if stderr == nil { + return false + } + info, err := stderr.Stat() + if err != nil { + return false + } + return info.Mode().IsRegular() +} + +// RotateIfOverCap renames path to path+".prev" when its current size +// exceeds maxBytes, freeing the original name for a fresh log. Used by +// Start before opening the stderr file, and by main at startup for the +// stdout-redirect file (agent.log) so neither log can grow unbounded. +// Best-effort: any stat/rename failure is swallowed so a missing file +// or a cross-device rename doesn't take down the agent. +func RotateIfOverCap(path string, maxBytes int64) { + if path == "" || maxBytes <= 0 { + return + } + info, err := os.Stat(path) + if err != nil || info.Size() <= maxBytes { + return + } + // Windows os.Rename fails when the destination already exists, so + // rotation would silently stop working from the second rotation + // onwards once a .prev was on disk. POSIX rename atomically + // overwrites, so this Remove is a no-op there. Ignore the error — + // if .prev can't be removed (permissions, in use), the Rename below + // will fail, and we drop the rotation for this run rather than + // taking down the agent. + _ = os.Remove(path + ".prev") + _ = os.Rename(path, path+".prev") +} + +// Start installs the stderr tee, opening (and rotating) the log file +// and swapping os.Stderr in a single best-effort sequence. On any +// failure before os.Stderr is reassigned, the global state is left +// untouched and an error is returned. +// +// path must be non-empty. To install conditionally based on path / +// service-mode detection, use StartIfEligible instead. +func Start(path string, maxBytes int64) (*Capture, error) { + if path == "" { + return nil, errors.New("filelog: empty path") + } + + if err := os.MkdirAll(filepath.Dir(path), dirMode); err != nil { + return nil, fmt.Errorf("filelog: mkdir parent: %w", err) + } + + RotateIfOverCap(path, maxBytes) + + // #nosec G304 -- path is operator-controlled: it comes from the + // cascade default (~/.stepsecurity/agent.error.log) → config file → + // --log-file CLI flag, all owned by the user invoking the binary. + // Same threat model as internal/aiagents/cli/errlog.go's append. + f, err := os.OpenFile(path, os.O_WRONLY|os.O_CREATE|os.O_APPEND|os.O_SYNC, fileMode) + if err != nil { + return nil, fmt.Errorf("filelog: open %s: %w", path, err) + } + + r, w, err := os.Pipe() + if err != nil { + _ = f.Close() + return nil, fmt.Errorf("filelog: pipe: %w", err) + } + + c := &Capture{ + origErr: os.Stderr, + pipeRead: r, + pipeWrite: w, + file: f, + done: make(chan struct{}), + } + os.Stderr = w + + go c.teeLoop() + return c, nil +} + +// StartIfEligible is the main.go-facing entrypoint. It returns +// (nil, nil) when capture should be skipped (empty path, or +// ShouldSkip(os.Stderr) returns true), and the result of Start +// otherwise. Callers can `defer cap.Stop()` unconditionally because +// Stop on a nil receiver is a no-op. +func StartIfEligible(path string, maxBytes int64) (*Capture, error) { + if path == "" { + return nil, nil + } + if ShouldSkip(os.Stderr) { + return nil, nil + } + return Start(path, maxBytes) +} + +// Stop closes the pipe write end, waits for the tee goroutine to +// finish draining, restores os.Stderr, and closes the file. Idempotent +// and safe to call on a nil receiver. +func (c *Capture) Stop() error { + if c == nil { + return nil + } + c.stopMu.Lock() + defer c.stopMu.Unlock() + if !c.stopped.CompareAndSwap(false, true) { + return nil + } + + _ = c.pipeWrite.Close() + <-c.done + + os.Stderr = c.origErr + _ = c.pipeRead.Close() + return c.file.Close() +} + +func (c *Capture) teeLoop() { + defer close(c.done) + dst := io.MultiWriter(c.origErr, c.file) + buf := make([]byte, 4096) + for { + n, err := c.pipeRead.Read(buf) + if n > 0 { + // Best-effort: a failed write to dst (file full, disk + // removed) must not stall the agent. + _, _ = dst.Write(buf[:n]) + } + if err != nil { + return + } + } +} diff --git a/internal/progress/filelog/filelog_test.go b/internal/progress/filelog/filelog_test.go new file mode 100644 index 0000000..960a585 --- /dev/null +++ b/internal/progress/filelog/filelog_test.go @@ -0,0 +1,328 @@ +package filelog + +import ( + "bytes" + "fmt" + "os" + "path/filepath" + "runtime" + "strings" + "sync" + "testing" +) + +// withStderr swaps os.Stderr for the duration of the test and restores +// it on cleanup. Centralises the dance so test failures don't leak the +// swap into other tests in the same package run. +func withStderr(t *testing.T, f *os.File) { + t.Helper() + orig := os.Stderr + os.Stderr = f + t.Cleanup(func() { os.Stderr = orig }) +} + +func TestDefaultDir(t *testing.T) { + got := DefaultDir() + if got == "" { + t.Skip("home dir unresolved in this environment") + } + if !strings.HasSuffix(got, ".stepsecurity") { + t.Errorf("DefaultDir() = %q, expected suffix .stepsecurity", got) + } +} + +func TestDefaultPath(t *testing.T) { + got := DefaultPath() + if got == "" { + t.Skip("home dir unresolved in this environment") + } + if !strings.HasSuffix(got, filepath.Join(".stepsecurity", Filename)) { + t.Errorf("DefaultPath() = %q, expected suffix .stepsecurity/%s", got, Filename) + } +} + +func TestStartStopWritesFile(t *testing.T) { + dir := t.TempDir() + path := filepath.Join(dir, "agent.error.log") + + origStderr := os.Stderr + t.Cleanup(func() { os.Stderr = origStderr }) + + cap, err := Start(path, DefaultMaxBytes) + if err != nil { + t.Fatalf("Start: %v", err) + } + + if _, err := fmt.Fprintln(os.Stderr, "hello world"); err != nil { + t.Fatalf("Fprintln: %v", err) + } + + if err := cap.Stop(); err != nil { + t.Fatalf("Stop: %v", err) + } + + data, err := os.ReadFile(path) + if err != nil { + t.Fatalf("ReadFile: %v", err) + } + if !bytes.Contains(data, []byte("hello world")) { + t.Errorf("file missing payload: %q", data) + } + + info, err := os.Stat(path) + if err != nil { + t.Fatalf("Stat: %v", err) + } + // File mode is best-effort on Windows; only assert on POSIX. + if runtime.GOOS != "windows" { + if mode := info.Mode().Perm(); mode != fileMode { + t.Errorf("file mode = %#o, want %#o", mode, fileMode) + } + } +} + +func TestRotateIfOverCap_OverwritesExistingPrev(t *testing.T) { + // Regression guard: on Windows, os.Rename fails when the destination + // already exists, so a stale .prev would block all subsequent + // rotations. RotateIfOverCap removes the prior .prev first. + dir := t.TempDir() + path := filepath.Join(dir, "agent.error.log") + prevPath := path + ".prev" + + if err := os.WriteFile(prevPath, []byte("STALE prior rotation"), fileMode); err != nil { + t.Fatalf("seed .prev: %v", err) + } + if err := os.WriteFile(path, bytes.Repeat([]byte("y"), 200), fileMode); err != nil { + t.Fatalf("seed current: %v", err) + } + + RotateIfOverCap(path, 100) + + got, err := os.ReadFile(prevPath) + if err != nil { + t.Fatalf(".prev gone after rotation: %v", err) + } + // .prev now holds the FRESH old content (200 bytes of 'y'), not the + // stale "STALE prior rotation" string. + if bytes.Equal(got, []byte("STALE prior rotation")) { + t.Errorf(".prev still has stale content; rotation failed to overwrite") + } + if len(got) != 200 { + t.Errorf(".prev size = %d, want 200 (fresh rotated content)", len(got)) + } + if _, err := os.Stat(path); !os.IsNotExist(err) { + t.Errorf("original path should be gone after rename: stat err = %v", err) + } +} + +func TestRotateIfOverCap_NoopUnderCap(t *testing.T) { + dir := t.TempDir() + path := filepath.Join(dir, "agent.error.log") + if err := os.WriteFile(path, []byte("small"), fileMode); err != nil { + t.Fatalf("seed: %v", err) + } + RotateIfOverCap(path, 1024) + if _, err := os.Stat(path + ".prev"); !os.IsNotExist(err) { + t.Errorf(".prev unexpectedly created when file was under cap") + } +} + +func TestStartRotatesWhenOverCap(t *testing.T) { + dir := t.TempDir() + path := filepath.Join(dir, "agent.error.log") + + // Pre-populate with content exceeding our cap. + oldContent := bytes.Repeat([]byte("x"), 200) + if err := os.WriteFile(path, oldContent, fileMode); err != nil { + t.Fatalf("seed: %v", err) + } + + origStderr := os.Stderr + t.Cleanup(func() { os.Stderr = origStderr }) + + cap, err := Start(path, 100) // cap = 100 bytes, seeded with 200 + if err != nil { + t.Fatalf("Start: %v", err) + } + if err := cap.Stop(); err != nil { + t.Fatalf("Stop: %v", err) + } + + prev, err := os.ReadFile(path + ".prev") + if err != nil { + t.Fatalf("missing .prev after rotation: %v", err) + } + if !bytes.Equal(prev, oldContent) { + t.Errorf(".prev content mismatch: got %d bytes, want %d", len(prev), len(oldContent)) + } + + cur, err := os.ReadFile(path) + if err != nil { + t.Fatalf("missing current file after rotation: %v", err) + } + if len(cur) != 0 { + t.Errorf("current file should be empty after rotation, got %d bytes", len(cur)) + } +} + +func TestStartReturnsErrOnUnwritableDir(t *testing.T) { + if runtime.GOOS == "windows" { + t.Skip("chmod 0o500 semantics differ on Windows") + } + if os.Geteuid() == 0 { + t.Skip("root bypasses directory permissions") + } + + dir := t.TempDir() + readonly := filepath.Join(dir, "ro") + if err := os.Mkdir(readonly, 0o500); err != nil { + t.Fatalf("mkdir: %v", err) + } + path := filepath.Join(readonly, "child", "agent.error.log") + + origStderr := os.Stderr + t.Cleanup(func() { os.Stderr = origStderr }) + + cap, err := Start(path, DefaultMaxBytes) + if err == nil { + t.Fatalf("expected error opening under unwritable dir") + } + if cap != nil { + t.Errorf("expected nil Capture on error, got %v", cap) + } + if os.Stderr != origStderr { + t.Errorf("os.Stderr was mutated despite Start failure") + } +} + +func TestConcurrentWritesIntact(t *testing.T) { + dir := t.TempDir() + path := filepath.Join(dir, "agent.error.log") + + origStderr := os.Stderr + t.Cleanup(func() { os.Stderr = origStderr }) + + cap, err := Start(path, DefaultMaxBytes) + if err != nil { + t.Fatalf("Start: %v", err) + } + + const goroutines = 8 + const linesPer = 100 + + var wg sync.WaitGroup + for g := range goroutines { + wg.Add(1) + go func(g int) { + defer wg.Done() + for i := range linesPer { + fmt.Fprintf(os.Stderr, "g%d-line-%03d\n", g, i) + } + }(g) + } + wg.Wait() + + if err := cap.Stop(); err != nil { + t.Fatalf("Stop: %v", err) + } + + data, err := os.ReadFile(path) + if err != nil { + t.Fatalf("ReadFile: %v", err) + } + for g := range goroutines { + for i := range linesPer { + line := fmt.Sprintf("g%d-line-%03d\n", g, i) + if !bytes.Contains(data, []byte(line)) { + t.Errorf("missing line: %q", strings.TrimSpace(line)) + } + } + } +} + +func TestShouldSkipDetectsRegularFile(t *testing.T) { + tmp, err := os.CreateTemp("", "filelog-skip-*") + if err != nil { + t.Fatalf("CreateTemp: %v", err) + } + t.Cleanup(func() { + tmp.Close() + os.Remove(tmp.Name()) + }) + if !ShouldSkip(tmp) { + t.Error("regular file should trigger skip") + } + + r, w, err := os.Pipe() + if err != nil { + t.Fatalf("Pipe: %v", err) + } + t.Cleanup(func() { r.Close(); w.Close() }) + if ShouldSkip(r) { + t.Error("pipe must not trigger skip") + } + + if ShouldSkip(nil) { + t.Error("nil must not trigger skip") + } +} + +func TestStartIfEligibleSkipsForRegularFile(t *testing.T) { + dir := t.TempDir() + logPath := filepath.Join(dir, "agent.error.log") + + fakeStderr, err := os.Create(filepath.Join(dir, "fake-stderr")) + if err != nil { + t.Fatalf("Create: %v", err) + } + t.Cleanup(func() { fakeStderr.Close() }) + withStderr(t, fakeStderr) + + cap, err := StartIfEligible(logPath, DefaultMaxBytes) + if err != nil { + t.Fatalf("StartIfEligible: %v", err) + } + if cap != nil { + t.Errorf("expected nil Capture when stderr is regular file, got %v", cap) + _ = cap.Stop() + } + if _, err := os.Stat(logPath); !os.IsNotExist(err) { + t.Errorf("log file should not have been created: stat err = %v", err) + } +} + +func TestStartIfEligibleSkipsForEmptyPath(t *testing.T) { + cap, err := StartIfEligible("", DefaultMaxBytes) + if err != nil { + t.Fatalf("StartIfEligible: %v", err) + } + if cap != nil { + t.Errorf("expected nil Capture for empty path, got %v", cap) + } +} + +func TestStopIsIdempotent(t *testing.T) { + dir := t.TempDir() + path := filepath.Join(dir, "agent.error.log") + + origStderr := os.Stderr + t.Cleanup(func() { os.Stderr = origStderr }) + + cap, err := Start(path, DefaultMaxBytes) + if err != nil { + t.Fatalf("Start: %v", err) + } + if err := cap.Stop(); err != nil { + t.Fatalf("first Stop: %v", err) + } + if err := cap.Stop(); err != nil { + t.Errorf("second Stop returned error: %v", err) + } +} + +func TestStopOnNilReceiverIsNoOp(t *testing.T) { + var c *Capture + if err := c.Stop(); err != nil { + t.Errorf("Stop on nil receiver returned error: %v", err) + } +} diff --git a/internal/schtasks/schtasks.go b/internal/schtasks/schtasks.go index 032b74c..7025ead 100644 --- a/internal/schtasks/schtasks.go +++ b/internal/schtasks/schtasks.go @@ -8,6 +8,7 @@ import ( "github.com/step-security/dev-machine-guard/internal/config" "github.com/step-security/dev-machine-guard/internal/executor" + "github.com/step-security/dev-machine-guard/internal/paths" "github.com/step-security/dev-machine-guard/internal/progress" ) @@ -57,8 +58,21 @@ func Install(exec executor.Executor, log *progress.Logger) error { } } - args := buildCreateArgs(binaryPath, logDir, hours, exec.IsRoot()) - log.Debug("schtasks create: binary=%q log_dir=%q hours=%d is_admin=%v", binaryPath, logDir, hours, exec.IsRoot()) + // Bake STEPSECURITY_HOME into the task command. Admin/INTERACTIVE + // tasks (machine-wide installs) anchor at C:\ProgramData\StepSecurity + // so the scheduled task and the MSI's earlier write_config agree on + // the dir, regardless of which interactive user is logged on at fire + // time. Non-admin tasks track paths.Home() (whatever the user's own + // install dir resolves to). + var stepHome string + if exec.IsRoot() { + stepHome = `C:\ProgramData\StepSecurity` + } else { + stepHome = paths.Home() + } + + args := buildCreateArgs(binaryPath, logDir, stepHome, hours, exec.IsRoot()) + log.Debug("schtasks create: binary=%q log_dir=%q step_home=%q hours=%d is_admin=%v", binaryPath, logDir, stepHome, hours, exec.IsRoot()) _, stderr, exitCode, err := exec.Run(ctx, "schtasks", args...) log.Debug("schtasks /create: exit_code=%d err=%v", exitCode, err) @@ -110,9 +124,16 @@ func isConfigured(ctx context.Context, exec executor.Executor) bool { return exitCode == 0 } -func buildCreateArgs(binaryPath, logDir string, hours int, isAdmin bool) []string { - taskCmd := fmt.Sprintf(`cmd /c "\"%s\" send-telemetry >> \"%s\agent.log\" 2>> \"%s\agent.error.log\""`, - binaryPath, logDir, logDir) +func buildCreateArgs(binaryPath, logDir, stepHome string, hours int, isAdmin bool) []string { + // `set "VAR=value"` is the safe cmd.exe form when the value may + // contain spaces or `&` / `|` / `<` / `>` metacharacters — without + // the quotes, cmd.exe would split on the first space and the + // remainder would be treated as a separate command. Inside the outer + // `cmd /c "..."` quotes the inner double-quotes are escaped as `""`. + // stepHome ultimately comes from $HOME (e.g. `C:\Users\John Doe`), + // so quoting is load-bearing here. + taskCmd := fmt.Sprintf(`cmd /c "set ""STEPSECURITY_HOME=%s"" && \"%s\" send-telemetry >> \"%s\agent.log\" 2>> \"%s\agent.error.log\""`, + stepHome, binaryPath, logDir, logDir) args := []string{"/create", "/tn", taskName, "/tr", taskCmd, "/sc", "HOURLY", "/mo", strconv.Itoa(hours), "/f"} if isAdmin { @@ -131,6 +152,12 @@ func resolveLogDir(exec executor.Executor) string { if exec.IsRoot() { return `C:\ProgramData\StepSecurity` } + if dir := paths.Home(); dir != "" { + return dir + } + // paths.Home() resolves $HOME via os.UserHomeDir, which can fail on + // systems where HOME/USERPROFILE aren't set; fall back to the + // executor's known-good lookup before resorting to ProgramData. homeDir, _ := exec.CurrentUser() if homeDir != nil { return homeDir.HomeDir + `\.stepsecurity` diff --git a/internal/schtasks/schtasks_test.go b/internal/schtasks/schtasks_test.go index 34afc11..6df61b0 100644 --- a/internal/schtasks/schtasks_test.go +++ b/internal/schtasks/schtasks_test.go @@ -73,6 +73,11 @@ func TestInstall_CreateFails(t *testing.T) { } func TestResolveLogDir_NonAdmin(t *testing.T) { + // paths.Home() is the primary source post-refactor. Drive it via + // STEPSECURITY_HOME so the test exercises the same code path that + // the launchd/systemd installers feed. + t.Setenv("STEPSECURITY_HOME", `C:\Users\testuser\.stepsecurity`) + mock := executor.NewMock() mock.SetGOOS("windows") mock.SetIsRoot(false) @@ -98,7 +103,7 @@ func TestResolveLogDir_Admin(t *testing.T) { } func TestBuildCreateArgs_CustomFrequency(t *testing.T) { - args := buildCreateArgs(`C:\agent.exe`, `C:\logs`, 6, false) + args := buildCreateArgs(`C:\agent.exe`, `C:\logs`, `C:\logs`, 6, false) // Find the /mo argument and check its value foundMo := false @@ -116,7 +121,7 @@ func TestBuildCreateArgs_CustomFrequency(t *testing.T) { } func TestBuildCreateArgs_Admin(t *testing.T) { - args := buildCreateArgs(`C:\agent.exe`, `C:\logs`, 4, true) + args := buildCreateArgs(`C:\agent.exe`, `C:\logs`, `C:\ProgramData\StepSecurity`, 4, true) foundRU := false for i, a := range args { @@ -133,7 +138,7 @@ func TestBuildCreateArgs_Admin(t *testing.T) { } func TestBuildCreateArgs_NonAdmin(t *testing.T) { - args := buildCreateArgs(`C:\agent.exe`, `C:\logs`, 4, false) + args := buildCreateArgs(`C:\agent.exe`, `C:\logs`, `C:\logs`, 4, false) for _, a := range args { if a == "/ru" { diff --git a/internal/systemd/systemd.go b/internal/systemd/systemd.go index 3a15d37..8d61b92 100644 --- a/internal/systemd/systemd.go +++ b/internal/systemd/systemd.go @@ -11,6 +11,7 @@ import ( "github.com/step-security/dev-machine-guard/internal/config" "github.com/step-security/dev-machine-guard/internal/executor" + "github.com/step-security/dev-machine-guard/internal/paths" "github.com/step-security/dev-machine-guard/internal/progress" ) @@ -41,7 +42,7 @@ func Install(exec executor.Executor, log *progress.Logger) error { } homeDir, _ := os.UserHomeDir() - logDir := filepath.Join(homeDir, ".stepsecurity") + logDir := paths.Home() if err := os.MkdirAll(logDir, 0o755); err != nil { return fmt.Errorf("creating log directory: %w", err) } @@ -52,9 +53,10 @@ func Install(exec executor.Executor, log *progress.Logger) error { } data := unitTemplateData{ - BinaryPath: systemdEscape(binaryPath), - LogDir: systemdEscape(logDir), - Hours: hours, + BinaryPath: systemdEscape(binaryPath), + LogDir: systemdEscape(logDir), + Hours: hours, + StepSecurityHome: logDir, // unescaped; systemd Environment= handles its own quoting } // Write service unit @@ -180,9 +182,10 @@ func writeTemplate(path, tmplStr string, data unitTemplateData) error { } type unitTemplateData struct { - BinaryPath string // systemd-escaped (spaces replaced with \x20) - LogDir string - Hours int + BinaryPath string // systemd-escaped (spaces replaced with \x20) + LogDir string + Hours int + StepSecurityHome string // baked into Environment= so paths.Home() resolves under timer-invoked runs } // systemdEscape escapes a file path for use in systemd unit files. @@ -191,12 +194,20 @@ func systemdEscape(path string) string { return strings.ReplaceAll(path, " ", `\x20`) } +// Environment="VAR=value" with the value-bearing form quoted so paths +// containing spaces (e.g. /opt/Step Security/) survive systemd's +// whitespace splitting. Per systemd.exec(5), the entire "VAR=value" can +// be wrapped in either single or double quotes; we use double quotes +// for consistency with the rest of the unit and only need to worry +// about embedded `"` characters in the value, which would themselves be +// unusual in a filesystem path. const serviceTmpl = `[Unit] Description=StepSecurity Dev Machine Guard scan [Service] Type=oneshot ExecStart={{.BinaryPath}} send-telemetry +Environment="STEPSECURITY_HOME={{.StepSecurityHome}}" StandardOutput=append:{{.LogDir}}/agent.log StandardError=append:{{.LogDir}}/agent.error.log ` diff --git a/internal/telemetry/logcapture.go b/internal/telemetry/logcapture.go index 31ede93..1b25652 100644 --- a/internal/telemetry/logcapture.go +++ b/internal/telemetry/logcapture.go @@ -11,6 +11,14 @@ import ( // LogCapture captures all stderr output during telemetry execution. // The captured output is base64-encoded and included in the execution_logs payload. +// +// Nesting with internal/progress/filelog: when filelog is active, +// os.Stderr is already the filelog pipe's write end. StartCapture saves +// that value as origErr, swaps os.Stderr to its own pipe, and on +// Finalize restores os.Stderr = origErr — re-enabling the filelog tee. +// Do not change Finalize to assign os.Stderr to the "real" stderr +// directly; that would orphan filelog mid-run and lose the suffix of +// the log file. type LogCapture struct { buf bytes.Buffer mu sync.Mutex