Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,16 @@ jobs:
steps:
- uses: actions/checkout@v6

# Migration-safety gate (truehomie-db DROP hardening, task D3): any
# destructive DDL (DROP TABLE/DATABASE/SCHEMA/COLUMN/ROLE/USER, TRUNCATE,
# DELETE FROM without WHERE) in a migrations/*.sql file fails CI unless
# the file carries an explicit `-- destructive: acknowledged <reason>`
# marker. Self-test runs first so a regression in the gate itself reds CI.
- name: Migration safety gate
run: |
bash scripts/check-migration-safety.sh --self-test
bash scripts/check-migration-safety.sh

- name: Checkout proto sibling (replace ../proto)
uses: actions/checkout@v6
with:
Expand Down
69 changes: 69 additions & 0 deletions internal/backend/mongo/dropguard_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
package mongo

// dropguard_test.go — name-convention guard tests for the Mongo Deprovision
// path (truehomie hardening, task D3). The invariant: a reserved/system naming
// token is refused BEFORE any connection, and a reserved derived candidate name
// is never passed to dropUser/dropDatabase.

import (
"context"
"errors"
"testing"
"time"

"instant.dev/provisioner/internal/dropguard"
)

// TestLocalDeprovision_RefusedToken_NeverConnects points the backend at an
// unreachable URI: a refused token must return dropguard.ErrRefused instantly,
// proving the guard runs before connect (a connect attempt would instead
// return a server-selection error after the timeout).
func TestLocalDeprovision_RefusedToken_NeverConnects(t *testing.T) {
b := newLocalBackend("mongodb://127.0.0.1:1", "127.0.0.1:1")
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
defer cancel()
for _, tok := range []string{"postgres", "instant_customers", "", "admin", "a b"} {
err := b.Deprovision(ctx, tok, "")
if !errors.Is(err, dropguard.ErrRefused) {
t.Fatalf("Deprovision(%q): expected dropguard.ErrRefused before connect, got %v", tok, err)
}
}
}

// TestLocalDeprovision_ReservedLegacyCandidate_SkippedNotFatal uses a token
// whose 12-char legacy truncation collides with a reserved identifier
// ("instant_customersabc"[:12] == "instant_cust"). The legacy candidate must
// be SKIPPED (refused, logged) while the canonical candidate proceeds — the
// deprovision still succeeds.
func TestLocalDeprovision_ReservedLegacyCandidate_SkippedNotFatal(t *testing.T) {
uri, ok := liveMongoURI(t)
if !ok {
t.Skip("CUSTOMER_MONGO_URL unreachable; skipping")
}
b := newLocalBackend(uri, hostFromURI(uri))
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
defer cancel()
if err := b.Deprovision(ctx, "instant_customersabc", ""); err != nil {
t.Fatalf("Deprovision with reserved legacy candidate: want nil (skip), got %v", err)
}
}

// TestLocalDeprovision_ReservedCanonicalDB_ReturnsError uses a token that
// passes the token guard ("ad-min") but whose canonical dash-stripped form is
// the reserved identifier "admin" — so the CANONICAL candidates (usr_admin,
// db_admin) embed a reserved tail. A refused canonical DB candidate is a hard
// error (unlike a legacy candidate, which is skipped): the deprovision must
// fail loudly rather than silently leak the real database.
func TestLocalDeprovision_ReservedCanonicalDB_ReturnsError(t *testing.T) {
uri, ok := liveMongoURI(t)
if !ok {
t.Skip("CUSTOMER_MONGO_URL unreachable; skipping")
}
b := newLocalBackend(uri, hostFromURI(uri))
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
defer cancel()
err := b.Deprovision(ctx, "ad-min", "")
if !errors.Is(err, dropguard.ErrRefused) {
t.Fatalf("Deprovision(%q): expected dropguard.ErrRefused for reserved canonical db candidate, got %v", "ad-min", err)
}
}
41 changes: 35 additions & 6 deletions internal/backend/mongo/mongo.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import (
"go.mongodb.org/mongo-driver/mongo"
"go.mongodb.org/mongo-driver/mongo/options"

"instant.dev/provisioner/internal/dropguard"
"instant.dev/provisioner/internal/poolident"
)

Expand Down Expand Up @@ -196,6 +197,25 @@ func (b *LocalBackend) StorageBytes(ctx context.Context, token, providerResource
// Deprovision drops the user and database for the given token.
// Drops user first, then drops the database.
func (b *LocalBackend) Deprovision(ctx context.Context, token, providerResourceID string) error {
// P0-2: a pool-claimed database/user are named from the pool token, not
// the request token. Resolve the canonical naming token from
// provider_resource_id so dropUser/dropDatabase target the real infra
// instead of no-op'ing on db_<real-token>, which would leak it forever.
namingToken := poolident.NamingToken(token, providerResourceID)

// Name-convention guard (truehomie hardening, task D3): every candidate
// user/database name below derives from this token; refuse the whole
// deprovision — before even connecting — when the token is empty,
// malformed, or a reserved system identifier. Per-candidate names are
// additionally validated in the loops so a future naming.go refactor
// stays covered.
if guardErr := dropguard.CheckNamingToken(namingToken); guardErr != nil {
slog.Error("provisioner.drop.refused",
"event", "provisioner.drop.refused", "site", "nosql.Deprovision",
"token", token, "naming_token", namingToken, "error", guardErr)
return fmt.Errorf("nosql.Deprovision: %w", guardErr)
}

client, err := b.connect(ctx, options.Client().ApplyURI(b.adminURI).
SetServerSelectionTimeout(connectTimeout))
if err != nil {
Expand All @@ -207,18 +227,18 @@ func (b *LocalBackend) Deprovision(ctx context.Context, token, providerResourceI
}
}()

// P0-2: a pool-claimed database/user are named from the pool token, not
// the request token. Resolve the canonical naming token from
// provider_resource_id so dropUser/dropDatabase target the real infra
// instead of no-op'ing on db_<real-token>, which would leak it forever.
namingToken := poolident.NamingToken(token, providerResourceID)

// Drop every candidate user. The resource was provisioned under exactly
// one scheme, but we cannot know which without a stored name, so we drop
// the canonical name and every legacy form. dropUser on a non-existent
// user is harmless (logged, non-fatal).
adminDB := client.Database("admin")
for _, username := range legacyMongoUserNames(namingToken) {
if guardErr := dropguard.CheckUserName(username); guardErr != nil {
slog.Error("provisioner.drop.refused",
"event", "provisioner.drop.refused", "site", "nosql.Deprovision",
"token", token, "user", username, "error", guardErr)
continue
}
if r := adminDB.RunCommand(ctx, bson.D{{Key: "dropUser", Value: username}}); r.Err() != nil {
slog.Debug("nosql.Deprovision: dropUser miss (continuing)", "token", token, "user", username, "error", r.Err())
}
Expand All @@ -229,6 +249,15 @@ func (b *LocalBackend) Deprovision(ctx context.Context, token, providerResourceI
// the canonical name still propagates.
canonicalDB := mongoDBName(namingToken)
for _, dbName := range legacyMongoDBNames(namingToken) {
if guardErr := dropguard.CheckDatabaseName(dbName); guardErr != nil {
slog.Error("provisioner.drop.refused",
"event", "provisioner.drop.refused", "site", "nosql.Deprovision",
"token", token, "db", dbName, "error", guardErr)
if dbName == canonicalDB {
return fmt.Errorf("nosql.Deprovision: %w", guardErr)
}
continue
}
if dropErr := client.Database(dbName).Drop(ctx); dropErr != nil {
if dbName == canonicalDB {
return fmt.Errorf("nosql.Deprovision: drop database %s: %w", dbName, dropErr)
Expand Down
7 changes: 7 additions & 0 deletions internal/backend/postgres/dedicated.go
Original file line number Diff line number Diff line change
Expand Up @@ -330,6 +330,13 @@ func (p *DedicatedProvider) deprovisionLocal(ctx context.Context, token string)
dbName := "dedicated_db_" + token
username := "dedicated_usr_" + token

// Name-convention guard (truehomie hardening, task D3): validate the FINAL
// constructed identifiers before the destructive statements run. Refusal is
// an error — a wrong-name bug must never reach DROP.
if guardErr := validateDropTargets("db.dedicated.deprovisionLocal", token, dbName, username); guardErr != nil {
return fmt.Errorf("db.dedicated.deprovisionLocal: %w", guardErr)
}

adminDSN := p.localAdminDSN()
conn, err := pgxConnect(ctx, adminDSN)
if err != nil {
Expand Down
31 changes: 31 additions & 0 deletions internal/backend/postgres/dropcheck.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
package postgres

// dropcheck.go — the postgres-package seam for the dropguard name-convention
// guard (truehomie hardening, task D3). Every DROP DATABASE / DROP USER this
// package executes validates its FINAL constructed identifiers here first, so
// a bug constructing a wrong target (empty token, system database, admin role)
// is refused before the destructive statement runs.

import (
"log/slog"

"instant.dev/provisioner/internal/dropguard"
)

// validateDropTargets refuses a (dbName, username) pair that does not match the
// per-tenant naming convention. On refusal it emits the structured
// `provisioner.drop.refused` audit event (same event name as the server
// chokepoint, so one NR/grep surface covers every refusal site) and returns the
// dropguard error for the caller to wrap. site names the calling function.
func validateDropTargets(site, token, dbName, username string) error {
err := dropguard.CheckDatabaseName(dbName)
if err == nil {
err = dropguard.CheckUserName(username)
}
if err != nil {
slog.Error("provisioner.drop.refused",
"event", "provisioner.drop.refused", "site", site,
"token", token, "db", dbName, "user", username, "error", err)
}
return err
}
87 changes: 87 additions & 0 deletions internal/backend/postgres/dropcheck_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
package postgres

// dropcheck_test.go — unit tests for the name-convention guard on every DROP
// DATABASE / DROP USER site in this package (truehomie hardening, task D3).
// The invariant: a wrong-name bug must never reach a destructive statement,
// and legitimate per-tenant names must pass untouched.

import (
"context"
"errors"
"testing"

"instant.dev/provisioner/internal/dropguard"
)

func TestValidateDropTargets_OK(t *testing.T) {
if err := validateDropTargets("test.site", "tok",
"db_96edf9eed8ed42929036b63298ec5b2b",
"usr_96edf9eed8ed42929036b63298ec5b2b"); err != nil {
t.Fatalf("validateDropTargets: unexpected refusal: %v", err)
}
}

func TestValidateDropTargets_RefusesBadDatabase(t *testing.T) {
err := validateDropTargets("test.site", "tok", "instant_customers", "usr_ok123")
if !errors.Is(err, dropguard.ErrRefused) {
t.Fatalf("expected ErrRefused for system database, got %v", err)
}
}

func TestValidateDropTargets_RefusesBadUser(t *testing.T) {
// Database valid, user invalid — exercises the second check independently.
err := validateDropTargets("test.site", "tok", "db_ok123", "instanode_admin")
if !errors.Is(err, dropguard.ErrRefused) {
t.Fatalf("expected ErrRefused for admin role, got %v", err)
}
}

// TestLocalDeprovision_RefusedToken_NeverConnects asserts the guard refuses a
// reserved/system naming token BEFORE any admin connection or DROP statement.
// The backend is constructed with a nil router: if the guard let the call
// through, the router deref would panic — passing proves the early return.
func TestLocalDeprovision_RefusedToken_NeverConnects(t *testing.T) {
b := &LocalBackend{}
for _, tok := range []string{"postgres", "instant_customers", "", "x y"} {
err := b.Deprovision(context.Background(), tok, "")
if !errors.Is(err, dropguard.ErrRefused) {
t.Fatalf("Deprovision(%q): expected ErrRefused, got %v", tok, err)
}
}
}

func TestCleanupProvisionPartial_RefusedNames_ExecutesNothing(t *testing.T) {
conn := &fakePGConn{}
cleanupProvisionPartial(conn, "instant_customers", "usr_ok123")
if len(conn.execCalls) != 0 {
t.Fatalf("rollback executed %d statements for a refused database name: %v", len(conn.execCalls), conn.execCalls)
}
cleanupProvisionPartial(conn, "db_ok123", "instanode_admin")
if len(conn.execCalls) != 0 {
t.Fatalf("rollback executed %d statements for a refused user name: %v", len(conn.execCalls), conn.execCalls)
}
}

func TestCleanupProvisionPartial_ValidNames_StillExecutesDrops(t *testing.T) {
// Behaviour preservation: the legitimate rollback path still runs both
// IF EXISTS drops.
conn := &fakePGConn{}
cleanupProvisionPartial(conn, "db_ok123", "usr_ok123")
if len(conn.execCalls) != 2 {
t.Fatalf("rollback: want 2 statements (DROP DATABASE + DROP USER), got %d: %v", len(conn.execCalls), conn.execCalls)
}
}

// TestDedicatedDeprovisionLocal_RefusedToken_NeverConnects mirrors the local
// test for the dedicated provider: a reserved token is refused before the
// admin DSN is ever dialed (empty adminDSN would otherwise attempt a real
// connection to the default customers URL).
func TestDedicatedDeprovisionLocal_RefusedToken_NeverConnects(t *testing.T) {
p := &DedicatedProvider{}
for _, tok := range []string{"postgres", "", "a b"} {
err := p.deprovisionLocal(context.Background(), tok)
if !errors.Is(err, dropguard.ErrRefused) {
t.Fatalf("deprovisionLocal(%q): expected ErrRefused, got %v", tok, err)
}
}
}
16 changes: 16 additions & 0 deletions internal/backend/postgres/local.go
Original file line number Diff line number Diff line change
Expand Up @@ -252,6 +252,14 @@ func (b *LocalBackend) Provision(ctx context.Context, token, tier string, connLi
// even if the incoming request context is on the verge of its deadline. Both
// DROPs use IF EXISTS so partial states (db created, user not) are safe.
func cleanupProvisionPartial(conn pgConn, dbName, username string) {
// Name-convention guard (truehomie hardening, task D3): this rollback path
// does NOT pass through the server's guardedDrop chokepoint, so it
// validates its own targets. A wrong-name bug must SKIP the drops, never
// execute them — the cost of a refusal is a leaked just-created db, not a
// destroyed customer one.
if validateDropTargets("db.local.cleanupProvisionPartial", "", dbName, username) != nil {
return
}
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
defer cancel()
if _, err := conn.Exec(ctx, fmt.Sprintf("DROP DATABASE IF EXISTS %q WITH (FORCE)", dbName)); err != nil {
Expand Down Expand Up @@ -307,6 +315,14 @@ func (b *LocalBackend) Deprovision(ctx context.Context, token, providerResourceI
dbName := dbNamePrefix + namingToken
username := userNamePrefix + namingToken

// Name-convention guard (truehomie hardening, task D3): validate the FINAL
// constructed identifiers right before the destructive statements run, so a
// future refactor that builds them differently is still covered. Refusal is
// an error (the caller's deprovision fails loudly) — never a wrong drop.
if guardErr := validateDropTargets("db.local.Deprovision", token, dbName, username); guardErr != nil {
return fmt.Errorf("db.local.Deprovision: %w", guardErr)
}

adminURL := b.router.AdminURLForResource(poolident.BasePRID(providerResourceID))
conn, err := pgxConnect(ctx, adminURL)
if err != nil {
Expand Down
10 changes: 10 additions & 0 deletions internal/backend/redis/dedicated.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ import (
"log/slog"

goredis "github.com/redis/go-redis/v9"

"instant.dev/provisioner/internal/dropguard"
)

// DedicatedProvider provisions a dedicated Redis instance.
Expand Down Expand Up @@ -228,6 +230,14 @@ func (p *DedicatedProvider) deprovisionLocal(ctx context.Context, token, provide
probes = append(probes, legacy)
}
for _, username := range probes {
// Name-convention guard (truehomie hardening, task D3): refuse a
// DELUSER of any non-tenant-shaped name (e.g. "default", admin users).
if guardErr := dropguard.CheckUserName(username); guardErr != nil {
slog.Error("provisioner.drop.refused",
"event", "provisioner.drop.refused", "site", "cache.dedicated.deprovisionLocal",
"token", token, "user", username, "error", guardErr)
continue
}
if err := p.rdb.Do(ctx, "ACL", "DELUSER", username).Err(); err != nil {
// Best-effort — user may not exist.
slog.Warn("cache.dedicated.deprovisionLocal: ACL DELUSER (non-fatal)",
Expand Down
Loading
Loading