diff --git a/.env.example b/.env.example index 4685eb8..b0f0471 100644 --- a/.env.example +++ b/.env.example @@ -148,6 +148,15 @@ ADMIN_TOKEN= # Read by: Application.kt SENTRY_DSN= +# Better Stack log ingest. Vector sidecar scrapes every docker container +# on the host and ships NDJSON to https:/// with the bearer token. +# Source: Better Stack → Sources → → Connect tab. +# Leave both blank for local dev — Vector will fail to start, which is +# fine because compose only includes it in the prod profile. +# Read by: ops/vector.toml (via docker-compose.prod.yml) +BETTERSTACK_SOURCE_TOKEN= +BETTERSTACK_INGEST_HOST= + # ===================================================================== # Workers / token rotation tuning # ===================================================================== diff --git a/docker-compose.prod.yml b/docker-compose.prod.yml index ac509b1..1669b6e 100644 --- a/docker-compose.prod.yml +++ b/docker-compose.prod.yml @@ -162,6 +162,29 @@ services: max-size: "50m" max-file: "5" + # Centralized log shipper. Scrapes every container on the host via the + # docker socket (read-only mount) and ships NDJSON to Better Stack. One + # agent covers both the free-tier stack and the paid-backend stack + # because docker socket sees all containers regardless of which compose + # file launched them. Token sourced from .env on the VPS. + vector: + image: timberio/vector:0.45.0-alpine + restart: unless-stopped + mem_limit: 256m + environment: + BETTERSTACK_SOURCE_TOKEN: ${BETTERSTACK_SOURCE_TOKEN} + BETTERSTACK_INGEST_HOST: ${BETTERSTACK_INGEST_HOST} + volumes: + - /var/run/docker.sock:/var/run/docker.sock:ro + - ./ops/vector.toml:/etc/vector/vector.toml:ro + # Vector's own logs go to local docker (json-file). If we shipped + # them through itself we'd bill for our own chatter and create a loop. + logging: + driver: "json-file" + options: + max-size: "20m" + max-file: "3" + volumes: pgdata: msdata: diff --git a/ops/vector.toml b/ops/vector.toml new file mode 100644 index 0000000..60d228e --- /dev/null +++ b/ops/vector.toml @@ -0,0 +1,82 @@ +# Vector config — scrapes every docker container on the VPS and ships +# structured logs to Better Stack. One agent for the whole host so both +# the free-tier stack (this compose file) and the paid-backend stack +# (separate compose at /opt/paid-backend) are covered by mounting the +# host's docker socket read-only. +# +# Bearer token is sourced from the BETTERSTACK_SOURCE_TOKEN environment +# variable that docker compose injects from the VPS .env file. Token +# never appears in this file or git. +# +# KNOWN LIMITATION (docker.sock attack surface): +# Mounting /var/run/docker.sock into this container — even with :ro — gives +# Vector full Docker API access (the :ro flag restricts the socket FILE, +# not the API operations the daemon will perform on its behalf). A Vector +# container compromise would let an attacker enumerate, exec into, or +# replace any container on the host. Mitigated for now by: +# (1) pinning the Vector image to an exact tag (no :latest), +# (2) reviewing image releases before bumping the tag, +# (3) the VPS being single-tenant. +# Upgrade path: front the socket with `tecnativa/docker-socket-proxy` +# limiting the daemon API to GET /containers + GET /events + log streams +# only. Adds one container, ~5 MB RAM. Defer until traffic justifies the +# extra moving piece. + +# ── Source: every container's stdout/stderr ────────────────────────────────── +[sources.docker] +type = "docker_logs" +# Skip Vector's own logs so we don't bill ourselves for our own chatter. +# Docker Compose prefixes container names with the project + index +# (e.g. `github-store-backend-vector-1`), so an exact-match exclusion of +# `"vector"` doesn't fire. exclude_containers accepts substring prefix +# matching — every entry below is "does the container_name start with this +# prefix". The bare project-name variants cover bring-up before this branch +# lands; the prefixed variants cover the Compose-managed names. +exclude_containers = [ + "vector", + "github-store-backend-vector", + "github-store-backend_vector", + "paid-backend-vector", + "paid-backend_vector", +] + +# ── Transform: tag each event with backend identity ────────────────────────── +# Adds a `service` field derived from the container name so Better Stack +# filters can distinguish free-tier (app, postgres, meilisearch, caddy) +# from paid-backend (paid-app, paid-postgres, …). +[transforms.tagged] +type = "remap" +inputs = ["docker"] +source = ''' + .service = .container_name + .host = "komi-vps" +''' + +# ── Sink: Better Stack HTTP ingest ─────────────────────────────────────────── +# NDJSON over HTTPS. Their docs: +# https://betterstack.com/docs/logs/http-rest-api/ +[sinks.betterstack] +type = "http" +inputs = ["tagged"] +uri = "https://${BETTERSTACK_INGEST_HOST}/" +method = "post" +encoding.codec = "json" +framing.method = "newline_delimited" +compression = "gzip" + + [sinks.betterstack.auth] + strategy = "bearer" + token = "${BETTERSTACK_SOURCE_TOKEN}" + + [sinks.betterstack.request.headers] + Content-Type = "application/json" + + # Buffer + retry so a brief Better Stack outage doesn't drop events. + [sinks.betterstack.buffer] + type = "memory" + max_events = 5000 + when_full = "drop_newest" + + [sinks.betterstack.request] + retry_attempts = 5 + retry_max_duration_secs = 30