diff --git a/.gitignore b/.gitignore index d8fe975a7bd..fd70ee1ce5c 100644 --- a/.gitignore +++ b/.gitignore @@ -29,3 +29,13 @@ requestdata.json # screenshots) written during local UI debugging. Not source. .playwright-mcp/ model-picker-open.png + +# Live cluster dumps from `kubectl get -o yaml > …`. NEVER commit: +# Darwin's ConfigMap currently contains real secrets in plaintext (Slack +# tokens, GEN_AI client secret, Jira token, Opsgenie key, etc.) — those +# values would be committed verbatim if temp/ ever got tracked. Real +# secret values for the new k8s/ layout live in gitignored *.env files +# under overlays/. +darwin-kubernetes/temp/ +k8s/overlays/*/secrets.env +k8s/overlays/*/*.secrets.env diff --git a/AGENTS.md b/AGENTS.md index cb86cb4cd8c..93fb7815714 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -64,7 +64,7 @@ moved on substantially. This table is the explicit map. | Indexing runtime | Celery `docfetching` + `docprocessing` workers | **Dask `LocalCluster`** in `update.py` (Celery only does maintenance) | | Number of Celery workers | Eight specialized workers (primary, light, heavy, kg_processing, monitoring, beat, etc.) | One worker + beat, spawned by `dev_run_background_jobs.py` | | Celery task definition | `@shared_task` under `background/celery/tasks/` | `@celery_app.task` in `background/celery/celery_app.py` | -| Celery broker | Redis | SQLAlchemy/Postgres (`sqla+postgresql+psycopg2://…`) | +| Celery broker | Redis | SQLAlchemy/Postgres by default; **optionally Redis** via `CELERY_BROKER_REDIS_ENABLED=true` (logical DB `CELERY_REDIS_DB_NUMBER`, default 1). Prod enables it to keep Celery's queue traffic off Postgres. Indexing is still Dask either way. | | Error handling | `raise OnyxError(OnyxErrorCode.X, …)` everywhere; no `HTTPException` | Plain `HTTPException(status_code=…, detail=…)` is the norm here. `OnyxError` doesn't exist. | | FastAPI return types | "Don't use `response_model=`, just type the function" | Both styles exist in this fork (the typed-return-annotation form is the majority — `response_model=` only appears once in `connector.py:560`). New endpoints should use the typed-return form. Don't strip the existing `response_model=` without checking serialization behavior. | | LLM call instrumentation | Every call must open a `LLMFlow`-tagged span via `traced_llm_call(...)` | No tracing system. `LLMFlow` doesn't exist. | @@ -75,6 +75,7 @@ moved on substantially. This table is the explicit map. | Test buckets | `backend/tests/{unit,external_dependency_unit,integration}` + Playwright e2e | No comparable structure here. Most code lacks tests; add tests with the change if practical, otherwise note in PR. | | Plan template | The "Creating a Plan" section in their `CLAUDE.md` (Issues / Notes / Strategy / Tests) | Useful template; can be borrowed for non-trivial changes here too. | | Frontend stack | Next.js 15+, React 18+ | Next.js 14.2.x (App Router), React 18 | +| K8s manifest path | `deployment/kubernetes/*` is what upstream documents | **`darwin-kubernetes/*` is the source of truth for the Darwin prod cluster.** `deployment/kubernetes/*` is upstream legacy / scratch — Darwin doesn't apply from there. New manifests for Darwin go in `darwin-kubernetes/`. See critical fact §9. | **Rule of thumb when reading upstream code or upstream guidance:** assume it doesn't apply unless you can verify the same construct exists here. @@ -164,8 +165,9 @@ web/src/ deployment/docker_compose/ docker-compose.dev.yml ← local stack (relational_db + index/Vespa + api_server + web_server + model_server + - background + nginx). Note: no Redis - here — Celery uses Postgres as its broker. + background + nginx). Celery brokers on + Postgres by default, or Redis when + CELERY_BROKER_REDIS_ENABLED=true. ``` --- @@ -340,6 +342,96 @@ auto-parse entirely with a raw `requests.get` against the `/drives/{drive_id}/items/{item_id}/content` endpoint using the bearer token. Don't reintroduce the lossy re-serialization. +### 9. `darwin-kubernetes/` is the source of truth for the Darwin cluster + +The repo has two parallel k8s manifest trees and they are **not** kept +in sync: + +| Path | What it is | When to touch | +|---|---|---| +| `darwin-kubernetes/*.yaml` | **The actual manifests applied to Darwin's AKS cluster (the `darwin` kube context).** Image registry is `sfbrdevhelmweacr.azurecr.io/...`, configmap is `env-configmap`, secrets is `danswer-secrets`, indexing pods have `indexcpu`-pool affinity + `darwin/indexing` toleration, env vars come from the Darwin configmap. | **Edit here for any prod-affecting change**, including new deployments. | +| `deployment/kubernetes/*.yaml` | Upstream-style manifests inherited from Onyx / authored to match the OSS docker-compose. Generic image (`danswer/danswer-backend:latest`), no Azure-specific affinity / tolerations, no Darwin-specific configmap wiring. | Reference only — not deployed to Darwin. Useful for seeing the "upstream shape" of a new component before adapting it to `darwin-kubernetes/`. | + +When upstream (or a branch like `feature/backgroundscaling`) adds a +new manifest in `deployment/kubernetes/`, the corresponding +`darwin-kubernetes/` version must be hand-ported with: + +- Image: `sfbrdevhelmweacr.azurecr.io/danswer/danswer-backend:` +- `envFrom: configMapRef name: env-configmap` +- POSTGRES_USER / POSTGRES_PASSWORD via `secretKeyRef name: danswer-secrets` +- REDIS_PASSWORD via `secretKeyRef name: danswer-secrets, optional: true` + (so unauth'd in-cluster Redis still works) +- For indexing-related pods: `nodeAffinity` on `agentpool=indexcpu` + + `tolerations` for `darwin/indexing/NoSchedule` + `dynamic-pvc` / + `file-connector-pvc` volume mounts. + +A drop-in port that misses any of these will boot in Darwin but +mis-route, miss secrets, or end up on the wrong node pool. The +existing `darwin-kubernetes/background-deployment.yaml` and +`api_server-service-deployment.yaml` are the canonical templates for +the conventions. + +### 10. NEVER use `:latest` (or a floating tag) for Vespa — pin the exact version + +**This caused a full prod outage.** Vespa's config server refuses an +auto-upgrade spanning more than ~30 releases (`VersionState +.verifyVersionIntervalForUpgrade` → `Cannot upgrade from X to Y ... +interval too large`). If a manifest change bumps the Vespa image to a +much newer version, **every Vespa StatefulSet rolls and the config +server crash-loops on bootstrap**, taking the whole cluster down +(config tier → no quorum → cluster-wide `upstream connect error / +connection refused` 503s on search AND the api-server's +`ensure_indices_exist`). + +What triggered it: an image spec of bare `vespaengine/vespa` (which +pulls `:latest` at pull time) was changed to an explicit +`vespaengine/vespa:latest`, and on the next `kubectl apply` `:latest` +had moved 30+ releases ahead of the running version. + +Rules: +- **Pin Vespa to the exact version the cluster runs.** As of this + writing that is **`8.600.35`** — it's the on-disk format the content + nodes' index (1.6M+ docs, 100Gi PVCs) is written in. See the pinned + `images:` entry + comment in `k8s/overlays/{prod,local}/kustomization.yaml`. +- **Upgrades are STEPWISE and deliberate** — at most ~30 releases per + hop, applied as an ordered operation, never a bare tag bump. Do NOT + set `VESPA_SKIP_UPGRADE_CHECK=true` to force a big jump on prod; it + risks the index format. +- **Upgrade with `k8s/scripts/vespa-upgrade.sh [ns]`, NOT a + kustomize apply.** Ordering across the 5 StatefulSets (configserver → + admin → content one-ordinal-at-a-time → feed → query, health-gated + between each) is impossible to express declaratively — a `kubectl + apply` rolls them all at once. The manifests support the script via + **per-role logical image names** (`vespa-configserver`, `vespa-admin`, + `vespa-content`, `vespa-feed`, `vespa-query` in `k8s/base/vespa/`) so + versions move independently, plus readiness probes on content/admin + with `publishNotReadyAddresses: true` on `vespa-internal` (peer + discovery must not be readiness-gated). Run `DRY_RUN=1` first. After a + successful upgrade, sync the per-role `newTag`s in the overlays. +- **`k8s/scripts/guarded-apply.sh ` is the everyday-apply safety + net, not the upgrade tool.** The guard reads the live running Vespa + version, compares it to what the overlay would deploy, and refuses a + >30-minor upgrade / major change / floating tag (and warns on big + downgrades) before it can reach the cluster. It checks against *live* + (not the repo's previous pin) because config drifts out of git. But it + still rolls all roles at once — for an actual version change use + `vespa-upgrade.sh`. +- This applies to any version-stateful StatefulSet, but Vespa is the + one that bites. + +**Recovery if it happens again** (data is safe — it lives on the +content PVCs, untouched): set all 5 Vespa StatefulSets' image back to +the running version (`kubectl set image statefulset/vespa-* ...`), +delete the config-server pods to recreate on the correct version, wait +for `:19071/state/v1/health` → 200, then restart the api-server so +`ensure_indices_exist` redeploys the schema. (Clearing the +config-server ZooKeeper state via `vespa-configserver-remove-state` is +only needed if the ZK state is genuinely corrupt — the version +mismatch alone does NOT require it.) Vespa nodes also have **no +liveness probes by design** (an aggressive one kills slow-but-healthy +nodes); readiness probes on the Service-backed nodes +(configserver/query/feed) gate traffic during the slow bootstrap. + --- ## Common workflows diff --git a/MIGRATION.md b/MIGRATION.md new file mode 100644 index 00000000000..a2e321437fe --- /dev/null +++ b/MIGRATION.md @@ -0,0 +1,404 @@ +# Migration Guide + +This branch combines three independent slices of work: + +1. **Background indexing scaling** — Dask scheduler topology, split out + into separate k8s deployments +2. **Redis caching + rate limiting** — read-through KV cache, per-user + request rate limiter, persona-list cache with write-through + invalidation +3. **Assistants UX rework** — Manage Assistants + Assistant Gallery + pages, seed script for local UX testing + +> **TL;DR — everything new is default OFF.** Deploying this branch +> as-is does **not** change runtime behaviour for the chat path or the +> background workers. You opt in per feature by setting env vars. + +The only mandatory deltas at deploy time are: +- Two new Python deps (`redis`, `bokeh`) installed automatically when + the backend image rebuilds against the new `requirements/default.txt`. +- A few non-secret env vars added to the configmap (all defaulting to + empty/false — safe). + +Everything else (Redis pod, cache enablement, rate limits, new +background topology) is opt-in. + +--- + +## 1. What's in this branch — quick map + +### Backend / infra + +| Slice | Files | Default state | +|---|---|---| +| Redis foundation + KV cache | `backend/danswer/redis/redis_pool.py`, `dynamic_configs/store.py`, `factory.py`, `configs/app_configs.py` | `REDIS_KV_CACHE_ENABLED=""` → OFF | +| Per-user request rate limiter | `backend/danswer/server/middleware/request_rate_limit.py`, wired on `/send-message` + `/stream-answer-with-quote` | `REQUEST_RATE_LIMIT_ENABLED=""` → OFF | +| Persona list cache | `backend/danswer/db/persona_cache.py`, `db/persona.py` (write-through invalidation), `ee/danswer/db/user_group.py` | `PERSONA_CACHE_ENABLED=""` → OFF | +| Dask scheduler topology | `backend/danswer/background/update.py`, new `deployment/kubernetes/*` manifests | Existing single `background` pod still runs; new manifests not applied unless you `kubectl apply` them | + +### Frontend / UX + +| Page | What changed | Risk | +|---|---|---| +| `/assistants/mine` (Manage) | Drag-and-drop reorder, default pin, visibility toggle, search, bulk actions, undo toast | Cosmetic — backend unchanged | +| `/assistants/gallery` (Browse) | Sections, filter chips, sort, column picker (persists in localStorage), doc-set names | Cosmetic — backend unchanged | + +### Tooling + +| Item | Purpose | +|---|---| +| `backend/scripts/seed_assistants.py` | Local dev seed of ~50 assistants for UX testing | +| `REDIS_CACHING_PLAN.md` | Design rationale; not load-bearing for deploy | + +--- + +## 2. New Python dependencies + +`backend/requirements/default.txt` adds two pins: + +``` +redis==5.0.8 # Redis client for the caching/rate-limit layer +bokeh>=2.4.2,<3.0 # Dask scheduler dashboard at :8787 (bg-scaling commit) +``` + +**Action**: rebuild the backend image. If you `pip install -r` in a +venv, also re-run that. + +--- + +## 3. New env vars (env-configmap) + +All default to empty/false. Add to `darwin-kubernetes/env-configmap.yaml` +(already done on this branch — verify and apply): + +```yaml +# Redis connection (only used when one of the *_ENABLED flags below is true) +REDIS_HOST: "redis" # in-cluster service name +REDIS_PORT: "6379" +REDIS_DB_NUMBER: "0" +REDIS_SSL: "" + +# Feature flags — default OFF +REDIS_KV_CACHE_ENABLED: "" # set to "true" to enable read-through KV cache +REDIS_KV_CACHE_TTL_SECONDS: "86400" +REQUEST_RATE_LIMIT_ENABLED: "" # set to "true" to enable +REQUEST_RATE_LIMIT_PER_MINUTE: "" # set a number (e.g. "20") to cap; 0/empty = no per-min cap +REQUEST_RATE_LIMIT_PER_HOUR: "" # set a number (e.g. "300") to cap; 0/empty = no per-hour cap +``` + +`PERSONA_CACHE_ENABLED` / `PERSONA_CACHE_TTL_SECONDS` live in +`backend/danswer/configs/app_configs.py` defaults — you can override +via env if you want to enable, but they're not in the configmap by +default. Add a line if you plan to enable. + +--- + +## 4. New secrets + +`darwin-kubernetes/secrets.yaml` gains one optional key: + +```yaml +stringData: + redis_password: "" # empty for unauth'd in-cluster Redis +``` + +The `api_server` and `background` deployments reference it with +`optional: true`, so an absent or empty value is fine for the unauth'd +in-cluster Redis StatefulSet. + +--- + +## 5. New Kubernetes manifests + +### 5a. Redis StatefulSet (always needed if any Redis flag is on) + +```bash +kubectl apply -f darwin-kubernetes/redis-statefulset.yaml +``` + +What it ships: a single-replica Redis 7.2-alpine, cache-only config +(no AOF, no RDB snapshots, `maxmemory 256mb`, `allkeys-lru`), exposed +as the `redis` ClusterIP Service on 6379. Pod restart drops the cache +— that's intentional; the source of truth is Postgres, and counters +self-heal as windows expire. + +### 5b. Dask scaling topology (opt-in, Darwin manifests ready) + +The bg-scaling commit added 5 upstream-style manifests under +`deployment/kubernetes/` (legacy / reference tree; **not** what +Darwin applies from — see AGENTS.md "Critical fact §9"). A later +commit on this branch ported each one to Darwin conventions under +`darwin-kubernetes/`, with the right image registry, configmap / +secrets wiring, REDIS_PASSWORD (optional), indexcpu node affinity, +darwin/indexing toleration, and PVCs: + +- `darwin-kubernetes/background-beat-deployment.yaml` +- `darwin-kubernetes/background-celery-deployment.yaml` +- `darwin-kubernetes/background-indexer-scheduler-deployment.yaml` +- `darwin-kubernetes/dask-scheduler-service-deployment.yaml` +- `darwin-kubernetes/dask-worker-deployment.yaml` + +Plus `deployment/docker_compose/docker-compose.dask-distributed.yml` +(compose variant, for local reproduction of the remote-scheduler +topology — not part of the prod deploy). + +Darwin currently runs `darwin-kubernetes/background-deployment.yaml` +(a single combined beat+celery+indexer pod via supervisord). **The new +manifests are NOT applied automatically** by `kubectl apply -f +darwin-kubernetes/` because the combined deployment is still in place +— you apply each new file explicitly when you want to switch. + +To switch Darwin to the split topology: + +```bash +# 1. Apply the new five (order doesn't matter; they self-discover +# the scheduler Service once it's up). +kubectl apply -f darwin-kubernetes/dask-scheduler-service-deployment.yaml +kubectl apply -f darwin-kubernetes/dask-worker-deployment.yaml +kubectl apply -f darwin-kubernetes/background-beat-deployment.yaml +kubectl apply -f darwin-kubernetes/background-celery-deployment.yaml +kubectl apply -f darwin-kubernetes/background-indexer-scheduler-deployment.yaml + +# 2. Wait for all five to be Ready. +kubectl get pods -l 'app in (background-beat,background-celery,background-indexer-scheduler,dask-scheduler,dask-worker)' + +# 3. Once healthy + you've seen an indexing attempt dispatch through +# the new dask-scheduler-service (check the indexer-scheduler +# pod logs), scale the old combined deployment to 0: +kubectl scale deploy/background-deployment --replicas=0 + +# 4. If anything goes wrong, scale back up: +kubectl scale deploy/background-deployment --replicas=1 +# The split pods will keep running but no harm — only one set is +# actually doing the work (whichever has --replicas > 0). +``` + +Both deployments can coexist briefly during cutover, but **do NOT +run both at non-zero replicas long-term** — two beat schedulers on +the same Postgres broker fire every crontab task twice. + +--- + +## 6. Deployment order + +Safe to roll out **in this order, defaults OFF**: + +1. Apply the configmap (no behaviour change — flags default OFF): + ```bash + kubectl apply -f darwin-kubernetes/env-configmap.yaml + ``` +2. Apply the (possibly updated) secrets: + ```bash + kubectl apply -f darwin-kubernetes/secrets.yaml + ``` +3. Apply the Redis StatefulSet: + ```bash + kubectl apply -f darwin-kubernetes/redis-statefulset.yaml + ``` +4. Rebuild + push the backend image (so it has `redis` and `bokeh` deps). +5. Rebuild + push the web image (so the UX rewrites ship). +6. Roll out the deployments: + ```bash + kubectl rollout restart deploy/api-server-deployment deploy/background-deployment deploy/web-server-deployment + ``` +7. Wait for health, then verify (§7). + +**At this point nothing has changed for users** — Redis is up but +nothing is using it, and the api_server / background pods just have +new dependencies + new env vars they're ignoring. + +--- + +## 7. Verification checklist (after deploy, BEFORE flipping flags) + +- [ ] All pods healthy: `kubectl get pods -l app=api-server -l app=background -l app=redis` +- [ ] Redis responds: `kubectl exec deploy/redis -- redis-cli PING` → `PONG` +- [ ] api_server logs show no errors importing the new modules +- [ ] `/api/health` returns 200 +- [ ] Open `/assistants/mine` — drag handles visible on visible rows, default-pin shows on first row, search input present +- [ ] Open `/assistants/gallery` — sees Yours / Featured sections (and Shared if applicable), filter chips, sort dropdown, columns dropdown +- [ ] Send a chat message — succeeds (proves rate limiter, even though OFF, didn't break the dependency wiring) +- [ ] Background indexer still picks up new indexing attempts (bg-scaling change in `update.py`) + +--- + +## 8. Enabling features (per environment, in any order) + +Each flag is independent. Flipping one doesn't require the others. + +### 8a. Redis KV cache (settings, tokens, invited users) + +```bash +kubectl set env configmap/env-configmap REDIS_KV_CACHE_ENABLED=true +kubectl rollout restart deploy/api-server-deployment +``` + +**Smoke test:** change an admin setting in pod A, verify it's visible +on pod B within seconds (not TTL). + +### 8b. Per-user request rate limit + +Pick window values per your traffic shape. Recommended at "few +hundred users" scale: + +```bash +kubectl set env configmap/env-configmap \ + REQUEST_RATE_LIMIT_ENABLED=true \ + REQUEST_RATE_LIMIT_PER_MINUTE=20 \ + REQUEST_RATE_LIMIT_PER_HOUR=300 +kubectl rollout restart deploy/api-server-deployment +``` + +**Smoke test:** send 21 chat messages in <60s — the 21st returns 429 +with `Retry-After` header. + +### 8c. Persona list cache + +```bash +# add to the configmap: +PERSONA_CACHE_ENABLED: "true" +PERSONA_CACHE_TTL_SECONDS: "86400" + +kubectl apply -f darwin-kubernetes/env-configmap.yaml +kubectl rollout restart deploy/api-server-deployment deploy/background-deployment +``` + +> Background pod also gets restarted because `ee/danswer/db/user_group.py` +> mutations from there must bust the cache. + +**Smoke test:** load `/assistants/mine`, edit one assistant's name in +the admin UI, refresh — the name updates immediately (not on TTL). + +### 8d. Bg-scaling Dask topology + +Not enabled by env flag — it's a deployment-shape change. Out of +scope for this PR's flip-a-switch flow; if/when adopted, see §5b. + +--- + +## 9. Rollback + +Each feature flag flips off independently. The two emergency knobs: + +- **Disable a feature flag** (no restart needed for new requests after + flag propagates): + ```bash + kubectl set env configmap/env-configmap REDIS_KV_CACHE_ENABLED="" + kubectl rollout restart deploy/api-server-deployment + ``` +- **Redis pod dies entirely** — every Redis call in this codebase is + wrapped fail-open. The app falls back to direct Postgres reads (cache), + permissive (rate limit), or no invalidation (persona cache; + worst-case 24h staleness via TTL). **No outage.** Logs will be noisy + with `Redis GET/SET/DEL failed: …` warnings — that's the signal that + Redis needs attention. + +To roll back the **code** entirely: revert the merge commit, redeploy. +All features default OFF means even without revert, setting all +`*_ENABLED=""` returns the app to pre-PR behaviour. + +--- + +## 10. Known footguns + +### 10a. ~~Bg-scaling k8s manifests don't have `REDIS_PASSWORD` wired~~ — RESOLVED + +**Closed for the Darwin path.** The 5 ported manifests under +`darwin-kubernetes/` (added in `19335e31`) all wire `REDIS_PASSWORD` +via `secretKeyRef` with `optional: true`, matching the existing +`darwin-kubernetes/background-deployment.yaml` pattern. So persona- +cache invalidation from any future Celery / indexer-scheduler / +dask-worker task path will work correctly once you switch to the +split topology. + +The upstream `deployment/kubernetes/*` files are still missing +`REDIS_PASSWORD` env wiring, but **Darwin doesn't apply from that +tree** — it's reference-only (see AGENTS.md "Critical fact §9"). +Leave them alone unless/until you adopt the upstream-style +deployment shape outside Darwin. + +### 10b. `backend/scripts/seed_assistants.py` bypasses persona-cache invalidation + +The seed script writes rows via raw `session.add(Persona(...))` rather +than going through `upsert_persona()`, so `invalidate_personas_all()` +never fires. + +- **Today's impact: none** if `PERSONA_CACHE_ENABLED` is the default + OFF. +- **If you seed with the cache enabled**, `/persona` will keep + returning the pre-seed list until either a real mutation flows + through the proper code path or the 24h TTL kicks in. Manual fix: + ```bash + kubectl exec deploy/redis -- redis-cli DEL danswer:personas:all:not_deleted + ``` +- **One-line code fix** if this becomes a recurring problem: import + `invalidate_personas_all` and call it at the end of `main()` in + `seed_assistants.py`. + +### 10c. `update.py` indexing scheduler change needs human eyes + +CLAUDE.md flags `update.py` / scheduler changes for manual confirmation +(past breakage was silent — worker died with no logs). The bg-scaling +commit modifies the Dask scheduler / submission path; verify locally +via `python scripts/dev_run_background_jobs.py` and confirm the worker +boots cleanly + dispatches an indexing attempt without errors. + +### 10d. Frontend's `chosen_assistants` array can hold stale ids after seed wipe + +If you run `python -m scripts.seed_assistants --clear` after seeding, +deleted persona ids may remain in your `User.chosen_assistants` array. +This is harmless — `get_personas` filters out non-existent ids — but +will be cleaned up by the next preference write (any Manage page +reorder / hide / show action). + +--- + +## 11. Manual tests recommended before merge + +These need eyes — automated coverage doesn't catch them: + +- [ ] **Background worker boots cleanly** on the rebased branch + (CLAUDE.md gate). `python scripts/dev_run_background_jobs.py`, + confirm clean startup and an indexing attempt dispatches. +- [ ] **Seed 50 assistants locally**, open `/assistants/mine` and + `/assistants/gallery`, exercise: drag-reorder, set default, hide via + toggle, click a hidden row (toggle should pulse), search, bulk + select, undo from a toast, switch column count in gallery, refresh + page → column choice persisted. +- [ ] **With KV cache enabled**, edit a setting on pod A while pod B + is serving — second pod sees the new value without waiting for TTL. +- [ ] **With rate limit enabled**, exceed the per-minute cap; verify + 429 with `Retry-After` header. +- [ ] **With persona cache enabled**, edit an assistant via admin UI; + `/persona` reflects the edit immediately. + +--- + +## 12. Branch contents at-a-glance + +17 commits on top of `feature/darwin` (which now includes the merged +`rajiv/add-claude` work — PR #45): + +``` +[BG-scale] darwin-kubernetes: port split-background manifests + lock convention in AGENTS.md +[BG-scale] Scale indexing via remote Dask scheduler topology + +[Docs] docs: add MIGRATION.md covering Redis / bg-scaling / UX + +[UX] Gallery: column picker as dropdown to match Sort +[UX] Gallery: user-controllable column count (segmented control, persists) +[UX] Show document-set names on assistant cards (was: count only) +[UX] Parameterize gallery grid column count (default 3) +[UX] Remove tools chip from Manage Assistants page +[UX] Assistants UX polish: toggle highlight + gallery declutter +[UX] Add backend/scripts/seed_assistants.py for local UX testing +[UX] Assistant Gallery page UX overhaul +[UX] Manage Assistants page UX overhaul + +[Redis] Persona list cache with explicit write-through invalidation +[Redis] P2: per-user request rate limiter on chat/query endpoints +[Redis] P1: Redis foundation + read-through KV cache +[Redis] docs: add Redis caching & scaling plan +``` + +Total: **51 files changed, +6372 / −499**. 63 unit tests pass. diff --git a/REDIS_CACHING_PLAN.md b/REDIS_CACHING_PLAN.md new file mode 100644 index 00000000000..754a3540222 --- /dev/null +++ b/REDIS_CACHING_PLAN.md @@ -0,0 +1,243 @@ +# Redis Caching & Scaling Plan + +**Goal:** expose the chat interface to a few hundred users. Evaluate and +introduce Redis-based caching where it makes sense, alongside the scaling +work that actually gates that user count. + +**Status:** plan only — no code yet. Follows the fork's plan template +(Issues / Important Notes / Strategy / Tests). Treat each phase as an +independently shippable PR. + +--- + +## Context & key findings + +- **This fork has zero Redis today.** The only references in the repo are + comments in `db/index_attempt.py` and `db/retention.py` explaining how + the fork *avoids* Redis (Postgres advisory locks instead of fences, + Postgres as the Celery broker). Adding Redis is **net-new infrastructure**, + which AGENTS.md flags as a substantial dependency, not a drive-by. +- **The real near-term scaling ceiling is the DB connection pool, not the + DB's query throughput.** `db/engine.py:72` sets `pool_size=40, + max_overflow=10` → 50 connections per api_server process. `get_session` + (`db/engine.py:94`) yields one session held for the **whole request**, and + `/send-message` (`server/query_and_chat/chat_backend.py:276`, + `handle_new_chat_message`) returns a `StreamingResponse` — so a connection + is pinned for the entire LLM stream (10–60s). At a few hundred users this + exhausts the pool before query volume ever stresses Postgres. **No cache + fixes this.** +- **A rate limiter already exists but is the wrong kind.** + `server/query_and_chat/token_limit.py::check_token_rate_limits` enforces a + *token-budget* limit (global, DB-backed, EE-overridable). It is not a + *request-rate* limiter and `any_rate_limit_exists()` is gated by a + per-process `@lru_cache` (`token_limit.py:122`) that won't reflect changes + across replicas. +- **The highest-leverage cache seam already exists:** the + `DynamicConfigStore` abstraction (`dynamic_configs/interface.py`, + `store.py`, `factory.py`) is the fork's equivalent of upstream's + `PgRedisKVStore`. Wrapping it gives transparent, write-through-invalidated + caching for everything routed through it with zero call-site changes. + +### Non-goals (explicitly out of scope) + +- **Do not** move the Celery broker to Redis (stays on Postgres — deliberate + divergence). +- **Do not** replace indexing advisory-lock fences with Redis fences. +- **Do not** cache chat sessions / messages (too mutable; correctness risk). +- **Do not** cache LLM/embedding *responses* (semantic/correctness risk). +- **Do not** add tenant key-prefixing — this fork is single-tenant. + +--- + +## Phase summary + +| Phase | What | Caching? | Gates the user count? | +|---|---|---|---| +| **P0** | Connection-pool / session-holding fix + multi-replica | No | **Yes — do first** | +| **P1** | Redis foundation + `DynamicConfigStore` read-through cache | Yes (flagship) | Enables the rest | +| **P2** | Redis-backed per-user request rate limiting | No (protection) | Yes, for cost/abuse | +| **P3** | Per-chat-turn config caches (LLM provider, embedding settings) | Yes | Measured add-on | +| **Opt** | Document sets, connector OAuth/API caches, Redis sessions | Yes | Situational | + +--- + +## P0 — Connection pool & session lifetime (prerequisite, not caching) + +### Issues to address +At a few hundred users, concurrent streaming chats pin all 50 connections +per process; unrelated (even cached) requests then queue. This is the first +thing that breaks. + +### Important notes +- `handle_new_chat_message` holds `Depends(get_session)` for the full + `StreamingResponse`. The fix is to scope DB work to *before* the stream + starts (load everything needed, commit the user message), then run the + stream without a pinned pooled connection, opening short-lived sessions + only for the final persistence write. +- This touches the core chat path — **per CLAUDE.md, confirm with the human + and verify the worker/stream boots cleanly** before/after. High blast + radius; ship as its own PR with manual load verification. +- Independently: run **multiple api_server replicas** (k8s) behind nginx, + and size `pool_size` against Postgres `max_connections` ÷ replica count. + +### Implementation strategy +1. Audit `handle_new_chat_message` and the `process_message` generator for + what truly needs the session during streaming vs. before it. +2. Introduce a pattern where the streaming generator uses + `get_session_context_manager()` for short writes rather than the + request-scoped `Depends(get_session)`. +3. Bump replica count in + `darwin-kubernetes/api_server-service-deployment.yaml`; re-tune pool. + +### Tests +- Load test: N concurrent streaming chats (N > pool size) — confirm + non-chat endpoints (settings, session list) stay responsive. +- Verify no `QueuePool limit ... connection timed out` under load. + +--- + +## P1 — Redis foundation + DynamicConfigStore cache (flagship) + +### Issues to address +Cache the highest-frequency, fires-on-every-page reads (settings, tokens, +invited users) at one central, low-risk seam, with correct cross-replica +invalidation. + +### Important notes +- Mirror upstream's `PgRedisKVStore` *shape* but fit this fork's interface: + `store(key, val, encrypt)` / `load(key)` / `delete(key)` raising + `ConfigNotFoundError` (`dynamic_configs/interface.py`). +- Write-through invalidation is **free** here — the same `store()`/`delete()` + that writes Postgres updates/clears Redis, so all replicas see changes. +- Single-tenant → plain key prefix (e.g. `danswer_kv:`), no tenant wrapper. +- Redis must be **fail-open**: if Redis is down, fall back to Postgres so an + outage degrades latency, not availability. + +### Implementation strategy +1. **Dependency:** add `redis==` to `backend/requirements/default.txt`. +2. **Config:** add `REDIS_HOST/REDIS_PORT/REDIS_PASSWORD/REDIS_DB_NUMBER` + to `configs/app_configs.py` via the existing `os.environ.get` pattern; + add a `REDIS_KEY_VALUE_CACHE_TTL` (default ~1 day, mirroring upstream). +3. **Client module:** new `backend/danswer/redis/redis_pool.py` — a + `ConnectionPool` singleton + `get_redis_client()`. (Upstream's + `redis_pool.py` is the template, minus IAM/tenant code.) +4. **Cache layer:** add `CachedDynamicConfigStore` (decorator/wrapper around + `PostgresBackedDynamicConfigStore`) in `dynamic_configs/store.py`, or add + Redis read-through directly to the PG store. `load()` checks Redis → + misses fall to PG and repopulate; `store()`/`delete()` write PG then + set/clear Redis. Route it via `dynamic_configs/factory.py` + (`get_dynamic_config_store`) behind a `DYNAMIC_CONFIG_STORE` value so it's + toggleable. +5. **Deployment:** Redis statefulset + service in `darwin-kubernetes/`; + redis service in `deployment/docker_compose/docker-compose.dev.yml`; + wire env in `env-configmap.yaml` + password in `secrets.yaml`. + +### What this transparently caches +Everything through `get_dynamic_config_store()`: app settings +(`server/settings/store.py`, key `danswer_settings`), Slack bot tokens, +invited users, telemetry id, Gmail/GDrive connector-auth blobs. + +### Tests +- Unit: `load` hits Redis on 2nd call (mock PG, assert one PG query); + `store`/`delete` invalidate; Redis-down path falls back to PG (fail-open). +- Integration: change settings via the admin endpoint → second replica (or + fresh client) reads the new value without a TTL wait. + +--- + +## P2 — Redis-backed per-user request rate limiting (protection) + +### Issues to address +A few hundred users on chat = real risk of runaway LLM **cost** and hitting +the **provider's** rate limits. Need per-user request-rate limiting that is +correct across replicas (in-memory counters let through ~N× at N pods). + +### Important notes +- This **complements**, does not replace, the existing token-budget limiter + in `token_limit.py`. Keep that; add request-rate limiting on top. +- Also fixes the latent multi-replica issue: the per-process `@lru_cache` on + `any_rate_limit_exists()` (`token_limit.py:122`) can be made Redis-backed + or given a short TTL so all pods agree. +- No rate-limit middleware exists today (only `latency_logging.py`) — this is + net-new. fastapi 0.109.2 is compatible with `fastapi-limiter` or a small + custom `incr`+`expire` limiter. + +### Implementation strategy +1. Add a Redis counter limiter: key `ratelimit:msg:{user_id}:{bucket}`, + atomic `incr` + `expire(window, NX)` (or a small Lua script for + multi-tier limits). Reuse the P1 `redis_pool` client. +2. Apply at the chat entrypoint (`/send-message`) as a dependency, before any + LLM work; raise `HTTPException(429)` (this fork uses `HTTPException`, not + `OnyxError`). +3. Make limits env-configurable in `configs/app_configs.py` (per-user + per-minute / per-hour). Default off via env so it's opt-in per environment. + +### Tests +- Unit: counter increments/expires; exceeds → 429. +- Integration: two clients simulating two replicas share the same limit + (single Redis), confirm the aggregate cap holds. + +--- + +## P3 — Per-chat-turn config caches (measured add-on) + +### Issues to address +Fired on every chat turn × hundreds of users → meaningful aggregate even +though each query is cheap. + +### Important notes +- **Cache the serialized Pydantic snapshot, not the ORM object** — these + return SQLAlchemy models with lazy relationships; caching the ORM instance + risks `DetachedInstanceError` / stale relationship reads. +- Invalidation is **not** free here (unlike P1) — must add explicit + bust/refresh calls inside the relevant `db/` mutation functions. This is + the added surface area; only do it after P0/P1 and after measuring. + +### Implementation strategy +- **Default LLM provider:** cache `db/llm.py::fetch_default_provider` / + `fetch_existing_llm_providers`; invalidate in the provider create/update/ + delete paths in `db/llm.py` and the admin endpoint. +- **Current embedding/search settings:** cache + `db/embedding_model.py::get_current_db_embedding_model`; invalidate on + index-swap (when a new `EmbeddingModel` becomes `PRESENT`). +- Use short TTLs as a backstop even with explicit invalidation. + +### Tests +- Unit: cached fetch returns snapshot; mutation path clears it. +- Integration: change default provider → chat picks it up without restart. + +--- + +## Optional / deferred + +| Item | Where | Note | +|---|---|---| +| **Document sets** | `db/document_set.py::fetch_document_sets` | Global key in this fork (base version ignores `user_id`); write-through on the ~5 mutation fns. Admin-page frequency, modest win. | +| **Connector OAuth / external-API caches** | per-connector (cf. upstream Confluence/Slack) | Only if those connectors are active; cuts external rate-limit pressure. Short TTL. | +| **Redis auth sessions** | `auth/users.py` (fastapi-users RedisStrategy) | Offloads per-request auth from Postgres; bigger change + security/invalidation care. Defer until auth DB load shows up. `SESSION_EXPIRE_TIME_SECONDS` already exists. | +| **Personas list** | — | **Skip backend cache** (per-user + group-membership invalidation trap). Use frontend (SWR) caching instead. | + +--- + +## Cross-cutting + +### New files / touched files +- New: `backend/danswer/redis/redis_pool.py`, Redis k8s manifests. +- Touched: `requirements/default.txt`, `configs/app_configs.py`, + `dynamic_configs/store.py`, `dynamic_configs/factory.py`, + `docker-compose.dev.yml`, `darwin-kubernetes/{env-configmap,secrets}.yaml`, + `api_server-service-deployment.yaml` (replicas). P2/P3 touch + `chat_backend.py`, `token_limit.py`, `db/llm.py`, `db/embedding_model.py`. + +### Restart / bounce list (per CLAUDE.md) +- New env vars / requirements → rebuild + restart api_server (`dapi`), + background jobs (`dbe`), Slack listener (`dsl`). +- `redis` dependency add → `pip install` in the venv before running. + +### Open questions for the human +1. **P0 session-refactor sign-off** — high blast radius on the chat path; + confirm approach + manual load verification before merge. +2. Redis deployment shape in `darwin-kubernetes` — single statefulset vs. + managed Redis? Persistence needed (cache-only ⇒ probably not)? +3. Default rate-limit values for P2 (per-user/min, per-user/hour). +4. Sequencing: is P0 acceptable to do in parallel with P1, or strictly first? diff --git a/backend/.gitignore b/backend/.gitignore index 6b3219cc30e..f166ca65e72 100644 --- a/backend/.gitignore +++ b/backend/.gitignore @@ -9,3 +9,9 @@ api_keys.py vespa-app.zip dynamic_config_storage/ celerybeat-schedule* + +# Pywikibot drops these in cwd when the mediawiki connector test runs. +# Local debugging artifacts, not source. +apicache/ +throttle.ctrl +.pytest_cache/ diff --git a/backend/alembic/versions/b2c3d4e5f6a7_analytics_user_daily_stats.py b/backend/alembic/versions/b2c3d4e5f6a7_analytics_user_daily_stats.py new file mode 100644 index 00000000000..bb86201ef3c --- /dev/null +++ b/backend/alembic/versions/b2c3d4e5f6a7_analytics_user_daily_stats.py @@ -0,0 +1,58 @@ +"""Analytics per-user daily stats table (durable leaderboard source) + +Durable per-user-per-day chat activity counts so the "top users by +activity" leaderboard survives chat retention and spans full history. +Upserted daily by the rollup BEFORE the retention sweep (see +db/models.py::AnalyticsUserDailyStats and db/analytics_rollup.py). + +Composite PK (user_id, date). No FK to `user` — email is joined live, so +a deleted user drops off the leaderboard without erasing history. + +Revision ID: b2c3d4e5f6a7 +Revises: e7f8a9b0c1d2 +Create Date: 2026-05-31 + +""" +from alembic import op +import sqlalchemy as sa +import fastapi_users_db_sqlalchemy + + +# revision identifiers, used by Alembic. +revision = "b2c3d4e5f6a7" +down_revision = "e7f8a9b0c1d2" +branch_labels: None = None +depends_on: None = None + + +def upgrade() -> None: + op.create_table( + "analytics_user_daily_stats", + sa.Column( + "user_id", fastapi_users_db_sqlalchemy.generics.GUID(), nullable=False + ), + sa.Column("date", sa.Date(), nullable=False), + sa.Column("message_count", sa.Integer(), server_default="0", nullable=False), + sa.Column("like_count", sa.Integer(), server_default="0", nullable=False), + sa.Column("dislike_count", sa.Integer(), server_default="0", nullable=False), + sa.Column( + "rolled_up_at", + sa.DateTime(timezone=True), + server_default=sa.text("now()"), + nullable=False, + ), + sa.PrimaryKeyConstraint("user_id", "date"), + ) + op.create_index( + "ix_analytics_user_daily_stats_date", + "analytics_user_daily_stats", + ["date"], + ) + + +def downgrade() -> None: + op.drop_index( + "ix_analytics_user_daily_stats_date", + table_name="analytics_user_daily_stats", + ) + op.drop_table("analytics_user_daily_stats") diff --git a/backend/alembic/versions/c3d4e5f6a7b8_analytics_persona_daily_stats.py b/backend/alembic/versions/c3d4e5f6a7b8_analytics_persona_daily_stats.py new file mode 100644 index 00000000000..ab3cf393b61 --- /dev/null +++ b/backend/alembic/versions/c3d4e5f6a7b8_analytics_persona_daily_stats.py @@ -0,0 +1,57 @@ +"""Analytics per-assistant daily stats table + +Durable per-assistant-per-day chat activity counts so the "most-used +assistants" leaderboard (and an approximate datasets-in-use view derived +via persona__document_set) survives chat retention and spans full history. +Upserted daily by the rollup BEFORE the retention sweep (see +db/models.py::AnalyticsPersonaDailyStats and db/analytics_rollup.py). + +Composite PK (persona_id, date). No FK to `persona` — name joined live, so +a deleted assistant drops off without erasing history. + +Revision ID: c3d4e5f6a7b8 +Revises: b2c3d4e5f6a7 +Create Date: 2026-05-31 + +""" +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision = "c3d4e5f6a7b8" +down_revision = "b2c3d4e5f6a7" +branch_labels: None = None +depends_on: None = None + + +def upgrade() -> None: + op.create_table( + "analytics_persona_daily_stats", + sa.Column("persona_id", sa.Integer(), nullable=False), + sa.Column("date", sa.Date(), nullable=False), + sa.Column("session_count", sa.Integer(), server_default="0", nullable=False), + sa.Column("message_count", sa.Integer(), server_default="0", nullable=False), + sa.Column("like_count", sa.Integer(), server_default="0", nullable=False), + sa.Column("dislike_count", sa.Integer(), server_default="0", nullable=False), + sa.Column( + "rolled_up_at", + sa.DateTime(timezone=True), + server_default=sa.text("now()"), + nullable=False, + ), + sa.PrimaryKeyConstraint("persona_id", "date"), + ) + op.create_index( + "ix_analytics_persona_daily_stats_date", + "analytics_persona_daily_stats", + ["date"], + ) + + +def downgrade() -> None: + op.drop_index( + "ix_analytics_persona_daily_stats_date", + table_name="analytics_persona_daily_stats", + ) + op.drop_table("analytics_persona_daily_stats") diff --git a/backend/alembic/versions/d4e5f6a7b8c9_file_store_object_key.py b/backend/alembic/versions/d4e5f6a7b8c9_file_store_object_key.py new file mode 100644 index 00000000000..f2c4e5c88d8 --- /dev/null +++ b/backend/alembic/versions/d4e5f6a7b8c9_file_store_object_key.py @@ -0,0 +1,34 @@ +"""file_store: add object_key, make lobj_oid nullable (object-store backend) + +Lets the file_store table locate bytes either in a Postgres large object +(lobj_oid) OR an object-storage blob (object_key). Both nullable so +PostgresBackedFileStore and AzureBlobFileStore coexist during migration. +See db/models.py::PGFileStore and file_store/file_store.py. + +Revision ID: d4e5f6a7b8c9 +Revises: c3d4e5f6a7b8 +Create Date: 2026-06-01 + +""" +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision = "d4e5f6a7b8c9" +down_revision = "c3d4e5f6a7b8" +branch_labels: None = None +depends_on: None = None + + +def upgrade() -> None: + op.add_column("file_store", sa.Column("object_key", sa.String(), nullable=True)) + op.alter_column("file_store", "lobj_oid", existing_type=sa.Integer(), nullable=True) + + +def downgrade() -> None: + # NOTE: only safe if no rows rely on object_key (all bytes back in lobjs). + op.alter_column( + "file_store", "lobj_oid", existing_type=sa.Integer(), nullable=False + ) + op.drop_column("file_store", "object_key") diff --git a/backend/alembic/versions/e5f6a7b8c9d0_document_indexed_content_hash.py b/backend/alembic/versions/e5f6a7b8c9d0_document_indexed_content_hash.py new file mode 100644 index 00000000000..bd6c0d21a61 --- /dev/null +++ b/backend/alembic/versions/e5f6a7b8c9d0_document_indexed_content_hash.py @@ -0,0 +1,35 @@ +"""document: add indexed_content_hash (skip re-index of unchanged content) + +Stores the sha256 of a document's indexed content as of the last successful +Vespa write. The indexing pipeline skips the expensive Vespa clear-and-rewrite +when a connector re-emits a document whose content is unchanged even though its +doc_updated_at advanced (e.g. Salesforce LastModifiedDate churn re-pulling the +whole corpus every poll). Nullable: existing rows fall back to the +doc_updated_at skip until they're next indexed. See +db/models.py::Document and indexing/indexing_pipeline.py::get_doc_ids_to_update. + +Revision ID: e5f6a7b8c9d0 +Revises: d4e5f6a7b8c9 +Create Date: 2026-06-03 + +""" +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision = "e5f6a7b8c9d0" +down_revision = "d4e5f6a7b8c9" +branch_labels: None = None +depends_on: None = None + + +def upgrade() -> None: + op.add_column( + "document", + sa.Column("indexed_content_hash", sa.String(), nullable=True), + ) + + +def downgrade() -> None: + op.drop_column("document", "indexed_content_hash") diff --git a/backend/alembic/versions/e7f8a9b0c1d2_analytics_user_first_seen.py b/backend/alembic/versions/e7f8a9b0c1d2_analytics_user_first_seen.py new file mode 100644 index 00000000000..52dc15a6743 --- /dev/null +++ b/backend/alembic/versions/e7f8a9b0c1d2_analytics_user_first_seen.py @@ -0,0 +1,56 @@ +"""Analytics user-first-seen table (chat adoption curve) + +Durable per-user "first date this user used chat" aggregate so the +adoption curve on the admin Analytics page survives chat retention +deletes. Populated incrementally by the rollup BEFORE the retention sweep +(see db/models.py::AnalyticsUserFirstSeen and db/analytics_rollup.py). + +No FK to `user` on purpose — deleting a user must not erase the historical +fact that they once adopted chat, nor cascade into this aggregate (mirrors +analytics_daily_rollup). + +Revision ID: e7f8a9b0c1d2 +Revises: c8a4e2f9d1b3 +Create Date: 2026-05-31 + +""" +from alembic import op +import sqlalchemy as sa +import fastapi_users_db_sqlalchemy + + +# revision identifiers, used by Alembic. +revision = "e7f8a9b0c1d2" +down_revision = "c8a4e2f9d1b3" +branch_labels: None = None +depends_on: None = None + + +def upgrade() -> None: + op.create_table( + "analytics_user_first_seen", + sa.Column( + "user_id", fastapi_users_db_sqlalchemy.generics.GUID(), nullable=False + ), + sa.Column("first_seen_date", sa.Date(), nullable=False), + sa.Column( + "created_at", + sa.DateTime(timezone=True), + server_default=sa.text("now()"), + nullable=False, + ), + sa.PrimaryKeyConstraint("user_id"), + ) + op.create_index( + "ix_analytics_user_first_seen_first_seen_date", + "analytics_user_first_seen", + ["first_seen_date"], + ) + + +def downgrade() -> None: + op.drop_index( + "ix_analytics_user_first_seen_first_seen_date", + table_name="analytics_user_first_seen", + ) + op.drop_table("analytics_user_first_seen") diff --git a/backend/danswer/background/celery/celery_app.py b/backend/danswer/background/celery/celery_app.py index cce46a9b9f2..ed21fc40576 100644 --- a/backend/danswer/background/celery/celery_app.py +++ b/backend/danswer/background/celery/celery_app.py @@ -14,7 +14,13 @@ from danswer.background.task_utils import name_cc_cleanup_task from danswer.background.task_utils import name_cc_prune_task from danswer.background.task_utils import name_document_set_sync_task +from danswer.configs.app_configs import CELERY_BROKER_REDIS_ENABLED +from danswer.configs.app_configs import CELERY_REDIS_DB_NUMBER from danswer.configs.app_configs import JOB_TIMEOUT +from danswer.configs.app_configs import REDIS_HOST +from danswer.configs.app_configs import REDIS_PASSWORD +from danswer.configs.app_configs import REDIS_PORT +from danswer.configs.app_configs import REDIS_SSL from danswer.connectors.factory import instantiate_connector from danswer.connectors.models import InputType from danswer.db.connector_credential_pair import get_connector_credential_pair @@ -22,7 +28,7 @@ from danswer.db.connector_credential_pair import release_deletion_lock from danswer.db.connector_credential_pair import try_acquire_deletion_lock from danswer.db.deletion_attempt import check_deletion_attempt_is_allowed -from danswer.db.document import get_documents_for_connector_credential_pair +from danswer.db.document import get_document_ids_for_connector_credential_pair from danswer.db.document import prepare_to_modify_documents from danswer.db.document_set import delete_document_set from danswer.db.document_set import fetch_document_sets @@ -41,10 +47,31 @@ logger = setup_logger() -connection_string = build_connection_string(db_api=SYNC_DB_API) -celery_broker_url = f"sqla+{connection_string}" -celery_backend_url = f"db+{connection_string}" +if CELERY_BROKER_REDIS_ENABLED: + # Redis broker + result backend. Removes Celery's queue traffic from + # Postgres (the default sqla+/db+ transport polls and writes the DB). + # A dedicated logical DB (CELERY_REDIS_DB_NUMBER) keeps Celery's keys + # off the cache/rate-limit DB. Task status is tracked in our own + # task_queue_jobs table, not this backend, so it's safe to relocate. + _redis_scheme = "rediss" if REDIS_SSL else "redis" + _redis_auth = f":{REDIS_PASSWORD}@" if REDIS_PASSWORD else "" + _redis_url = ( + f"{_redis_scheme}://{_redis_auth}{REDIS_HOST}:{REDIS_PORT}" + f"/{CELERY_REDIS_DB_NUMBER}" + ) + celery_broker_url = _redis_url + celery_backend_url = _redis_url +else: + connection_string = build_connection_string(db_api=SYNC_DB_API) + celery_broker_url = f"sqla+{connection_string}" + celery_backend_url = f"db+{connection_string}" celery_app = Celery(__name__, broker=celery_broker_url, backend=celery_backend_url) +# Retry the broker connection during worker startup instead of crashing if the +# broker isn't reachable yet. Matters now that Redis can be the broker (a hard +# dependency) — the worker may boot before Redis is ready. Also silences the +# Celery 5.3 CPendingDeprecationWarning about this becoming the explicit +# default in 6.0. +celery_app.conf.broker_connection_retry_on_startup = True _SYNC_BATCH_SIZE = 100 @@ -171,14 +198,13 @@ def prune_documents_task(connector_id: int, credential_id: int) -> None: runnable_connector ) - all_indexed_document_ids = { - doc.id - for doc in get_documents_for_connector_credential_pair( + all_indexed_document_ids = set( + get_document_ids_for_connector_credential_pair( db_session=db_session, connector_id=connector_id, credential_id=credential_id, ) - } + ) doc_ids_to_remove = list(all_indexed_document_ids - all_connector_doc_ids) @@ -248,7 +274,7 @@ def _sync_document_batch(document_ids: list[str], db_session: Session) -> None: try: cursor = None while True: - document_batch, cursor = fetch_documents_for_document_set_paginated( + document_id_batch, cursor = fetch_documents_for_document_set_paginated( document_set_id=document_set_id, db_session=db_session, current_only=False, @@ -256,7 +282,7 @@ def _sync_document_batch(document_ids: list[str], db_session: Session) -> None: limit=_SYNC_BATCH_SIZE, ) _sync_document_batch( - document_ids=[document.id for document in document_batch], + document_ids=list(document_id_batch), db_session=db_session, ) if cursor is None: diff --git a/backend/danswer/background/update.py b/backend/danswer/background/update.py index 8d6f819fd52..d00aadcf108 100755 --- a/backend/danswer/background/update.py +++ b/backend/danswer/background/update.py @@ -1,4 +1,5 @@ import logging +import os import time from datetime import datetime from typing import Any @@ -506,9 +507,35 @@ def update_loop(delay: int = 10, num_workers: int = NUM_INDEXING_WORKERS) -> Non model_server_port=MODEL_SERVER_PORT, ) + # Pick the indexing-execution backend in priority order: + # + # 1. DASK_SCHEDULER_ADDRESS — production mode in K8s. Indexing + # work is dispatched to a remote Dask scheduler service that + # fans out to a horizontally-scalable pool of `dask-worker` + # pods. This is the only mode that supports scaling indexing + # concurrency by adding pods (vs the in-pod LocalCluster which + # is bounded by the host's RAM). + # 2. DASK_JOB_CLIENT_ENABLED — legacy in-process Dask LocalCluster. + # All workers in the same Python process. Used in dev and in + # pre-distributed-mode prod deployments. + # 3. SimpleJobClient — bare ProcessPoolExecutor-style fallback. + # Used by some local dev flows that don't want the Dask + # overhead. + # + # The remote-scheduler path uses two named queues — `primary` for + # the active embedding model and `secondary` for the in-flight + # secondary index during model swaps — so a single dask-scheduler + # service serves both without code changes elsewhere. client_primary: Client | SimpleJobClient client_secondary: Client | SimpleJobClient - if DASK_JOB_CLIENT_ENABLED: + dask_scheduler_address = os.environ.get("DASK_SCHEDULER_ADDRESS") + if dask_scheduler_address: + logger.info("Connecting to remote Dask scheduler at %s", dask_scheduler_address) + client_primary = Client(dask_scheduler_address) + client_secondary = Client(dask_scheduler_address) + if LOG_LEVEL.lower() == "debug": + client_primary.register_worker_plugin(ResourceLogger()) + elif DASK_JOB_CLIENT_ENABLED: cluster_primary = LocalCluster( n_workers=num_workers, threads_per_worker=1, diff --git a/backend/danswer/configs/app_configs.py b/backend/danswer/configs/app_configs.py index 321a8bdca01..6b5b82906bf 100644 --- a/backend/danswer/configs/app_configs.py +++ b/backend/danswer/configs/app_configs.py @@ -142,6 +142,38 @@ POSTGRES_PORT = os.environ.get("POSTGRES_PORT") or "5432" POSTGRES_DB = os.environ.get("POSTGRES_DB") or "postgres" +# SQLAlchemy connection-pool sizing, PER PROCESS. Max connections a single +# process can open to Postgres is POSTGRES_POOL_SIZE + POSTGRES_POOL_OVERFLOW. +# The cluster-wide total is (that) × (replicas of every pod that imports the +# engine: api-server, background, model servers if they touch the DB), and it +# must stay under Postgres `max_connections` with headroom. Defaults preserve +# the previous hardcoded 40+10; override DOWN per deployment as you scale +# replicas (e.g. a small api-server pool when running many replicas). +POSTGRES_POOL_SIZE = int(os.environ.get("POSTGRES_POOL_SIZE") or 40) +POSTGRES_POOL_OVERFLOW = int(os.environ.get("POSTGRES_POOL_OVERFLOW") or 10) + +# File store backend — where uploaded files / chat attachments / connector +# blobs live. Default "PostgresBackedFileStore" (Postgres large objects). +# Set to "AzureBlobFileStore" to offload the BYTES to Azure Blob Storage +# (metadata stays in the file_store table): keeps the DB/WAL/backups lean and +# stops file reads from holding a Postgres connection for the whole stream. +FILE_STORE_TYPE = os.environ.get("FILE_STORE_TYPE") or "PostgresBackedFileStore" +# Only used when FILE_STORE_TYPE=AzureBlobFileStore (secret — set in +# danswer-secrets). Container is auto-created on first use if absent. +AZURE_BLOB_CONNECTION_STRING = os.environ.get("AZURE_BLOB_CONNECTION_STRING") or "" +AZURE_BLOB_CONTAINER = os.environ.get("AZURE_BLOB_CONTAINER") or "danswer-files" + +# Chat file-upload limits. A chat-attached doc is stuffed WHOLE into the LLM +# prompt (no retrieval), so it's bounded by the model context window. Two +# guards: a cheap byte cap (all types), and a token cap on the extracted text +# (the real protection — rejects docs that would overflow). The token budget +# is CHAT_FILE_MAX_TOKEN_FRACTION of the model's max input tokens, leaving room +# for the system prompt, history, and the response. +CHAT_FILE_MAX_SIZE_MB = int(os.environ.get("CHAT_FILE_MAX_SIZE_MB") or 25) +CHAT_FILE_MAX_TOKEN_FRACTION = float( + os.environ.get("CHAT_FILE_MAX_TOKEN_FRACTION") or 0.5 +) + ##### # Connector Configs @@ -174,6 +206,15 @@ WEB_CONNECTOR_OAUTH_CLIENT_SECRET = os.environ.get("WEB_CONNECTOR_OAUTH_CLIENT_SECRET") WEB_CONNECTOR_OAUTH_TOKEN_URL = os.environ.get("WEB_CONNECTOR_OAUTH_TOKEN_URL") WEB_CONNECTOR_VALIDATE_URLS = os.environ.get("WEB_CONNECTOR_VALIDATE_URLS") +# Hard cap on pages visited in a single recursive web crawl. Bounds runtime so a +# large site can't run for hours and get killed mid-run (which marked the whole +# attempt FAILED). 0/empty = unlimited. +WEB_CONNECTOR_MAX_PAGES = int(os.environ.get("WEB_CONNECTOR_MAX_PAGES") or 5000) +# Per-page navigation timeout (ms) and retry count for transient fetch failures. +WEB_CONNECTOR_PAGE_TIMEOUT_MS = int( + os.environ.get("WEB_CONNECTOR_PAGE_TIMEOUT_MS") or 30000 +) +WEB_CONNECTOR_MAX_RETRIES = int(os.environ.get("WEB_CONNECTOR_MAX_RETRIES") or 3) HTML_BASED_CONNECTOR_TRANSFORM_LINKS_STRATEGY = os.environ.get( "HTML_BASED_CONNECTOR_TRANSFORM_LINKS_STRATEGY", @@ -307,6 +348,95 @@ ) +##### +# Redis (cache + rate limiting) +##### +# Connection details. All env-driven; safe defaults for local dev. +REDIS_HOST = os.environ.get("REDIS_HOST") or "localhost" +REDIS_PORT = int(os.environ.get("REDIS_PORT") or 6379) +REDIS_PASSWORD = os.environ.get("REDIS_PASSWORD") or "" +REDIS_DB_NUMBER = int(os.environ.get("REDIS_DB_NUMBER") or 0) +REDIS_SSL = os.environ.get("REDIS_SSL", "").lower() == "true" +REDIS_POOL_MAX_CONNECTIONS = int(os.environ.get("REDIS_POOL_MAX_CONNECTIONS") or 50) +REDIS_HEALTH_CHECK_INTERVAL = int(os.environ.get("REDIS_HEALTH_CHECK_INTERVAL") or 60) +REDIS_SOCKET_TIMEOUT_SECONDS = int(os.environ.get("REDIS_SOCKET_TIMEOUT_SECONDS") or 3) + +# Celery broker + result backend on Redis (instead of the default +# SQLAlchemy/Postgres transport). Default OFF so local dev without Redis +# still boots on the Postgres broker. When ON, Celery stops polling/writing +# Postgres for its queue, removing that load from the DB. Uses a SEPARATE +# Redis logical DB from the cache (CELERY_REDIS_DB_NUMBER, default 1) so +# Celery's keys never collide with cache/rate-limit keys on REDIS_DB_NUMBER. +# Task STATUS is unaffected — this fork tracks it in its own task_queue_jobs +# table, not Celery's result backend. +CELERY_BROKER_REDIS_ENABLED = ( + os.environ.get("CELERY_BROKER_REDIS_ENABLED", "").lower() == "true" +) +CELERY_REDIS_DB_NUMBER = int(os.environ.get("CELERY_REDIS_DB_NUMBER") or 1) + +# Read-through KV cache layered atop PostgresBackedDynamicConfigStore. +# When false (default), the store behaves exactly as before; when true, +# reads check Redis first and writes/deletes invalidate Redis. Fail-open: +# Redis errors degrade to direct Postgres, never an outage. +REDIS_KV_CACHE_ENABLED = os.environ.get("REDIS_KV_CACHE_ENABLED", "").lower() == "true" +# TTL (seconds) for KV entries cached in Redis (1 day default). +REDIS_KV_CACHE_TTL_SECONDS = int(os.environ.get("REDIS_KV_CACHE_TTL_SECONDS") or 86400) + +# Per-user request-rate limiter (Redis-backed). Default OFF — complements +# the token-budget limiter in token_limit.py with a request-count cap that +# is correct across api_server replicas. +REQUEST_RATE_LIMIT_ENABLED = ( + os.environ.get("REQUEST_RATE_LIMIT_ENABLED", "").lower() == "true" +) +# Per-minute and per-hour message-send caps per (user|ip). 0 disables that +# window (so you can enforce only one of them if you prefer). +REQUEST_RATE_LIMIT_PER_MINUTE = int( + os.environ.get("REQUEST_RATE_LIMIT_PER_MINUTE") or 0 +) +REQUEST_RATE_LIMIT_PER_HOUR = int(os.environ.get("REQUEST_RATE_LIMIT_PER_HOUR") or 0) + +# Per-user persona ("assistant") list cache. Caches the global persona list +# + per-user group memberships in Redis; permission filter runs in Python +# at request time. Explicit write-through invalidation lives in the +# db/persona.py and ee/.../user_group.py mutation paths — the TTL below is +# only a long-tail safety net for missed busts. Default OFF. +PERSONA_CACHE_ENABLED = os.environ.get("PERSONA_CACHE_ENABLED", "").lower() == "true" +PERSONA_CACHE_TTL_SECONDS = int( + os.environ.get("PERSONA_CACHE_TTL_SECONDS") or 86400 # 24 h backstop +) + +# Basic connector/cc-pair info cache (the /manage/indexing-status read the +# chat page uses to derive available source types). That read does a +# per-cc-pair document-count aggregation that measured ~300ms on the live +# DB and runs on every chat page load — the page's slowest fan-out call. +# Pure TTL cache, global (same for all users), fail-open. No explicit +# invalidation: the data (which connectors exist + have indexed docs) +# changes slowly and brief staleness is harmless (it only feeds the source- +# filter list + the "sources incomplete" setup modal), so a short TTL is +# the whole strategy. Default OFF. +CC_PAIR_INFO_CACHE_ENABLED = ( + os.environ.get("CC_PAIR_INFO_CACHE_ENABLED", "").lower() == "true" +) +CC_PAIR_INFO_CACHE_TTL_SECONDS = int( + os.environ.get("CC_PAIR_INFO_CACHE_TTL_SECONDS") or 60 +) + +# Global document-set list cache (the /document-set read on the chat-page +# bundle). In Danswer MIT document sets aren't permission-filtered (every user +# sees all), so one shared global list is correct — 200 concurrent first-loads +# collapse to one DB query. MIT-scoped with no EE dependency: if a deployment +# enables EE (per-user filtering), the cache bypasses to a direct DB read so it +# can't leak sets across users. Write-through: every doc-set mutation busts the +# key; the TTL is a short backstop (staleness is cosmetic — documents stay +# permission-enforced at search time). Default OFF. +DOCUMENT_SET_CACHE_ENABLED = ( + os.environ.get("DOCUMENT_SET_CACHE_ENABLED", "").lower() == "true" +) +DOCUMENT_SET_CACHE_TTL_SECONDS = int( + os.environ.get("DOCUMENT_SET_CACHE_TTL_SECONDS") or 300 +) + + ##### # Enterprise Edition Configs ##### diff --git a/backend/danswer/connectors/danswer_jira/connector.py b/backend/danswer/connectors/danswer_jira/connector.py index 06b5a132bc2..83a9c2aa96a 100644 --- a/backend/danswer/connectors/danswer_jira/connector.py +++ b/backend/danswer/connectors/danswer_jira/connector.py @@ -1,4 +1,5 @@ import os +import re from collections.abc import Iterable from datetime import datetime from datetime import timezone @@ -17,6 +18,7 @@ from danswer.connectors.danswer_jira.utils import extract_text_from_content from danswer.connectors.danswer_jira.utils import get_comment_strs from danswer.connectors.interfaces import GenerateDocumentsOutput +from danswer.connectors.interfaces import IdConnector from danswer.connectors.interfaces import LoadConnector from danswer.connectors.interfaces import PollConnector from danswer.connectors.interfaces import SecondsSinceUnixEpoch @@ -31,6 +33,36 @@ JIRA_API_VERSION = os.environ.get("JIRA_API_VERSION") or "3" _JIRA_FULL_PAGE_SIZE = 50 +# Matches a top-level trailing ORDER BY clause (case-insensitive). +_JQL_ORDER_BY_RE = re.compile(r"\border\s+by\b", re.IGNORECASE) + + +def _add_time_window_to_jql( + jira_filter: str, start_date_str: str, end_date_str: str +) -> str: + """Add the poll's `updated` time window to a user-supplied JQL filter. + + JQL requires all WHERE conditions to come BEFORE any `ORDER BY`. Naively + appending `AND updated >= ...` to a filter that ends in `ORDER BY ...` + produces invalid JQL — Jira rejects it with HTTP 400 "Expecting ',' but got + 'AND'". So if the filter has a trailing ORDER BY, inject the window in front + of it; otherwise just append. + """ + window = f"updated >= '{start_date_str}' AND updated <= '{end_date_str}'" + jira_filter = jira_filter.strip() + + match = _JQL_ORDER_BY_RE.search(jira_filter) + if match: + where_part = jira_filter[: match.start()].rstrip() + order_part = jira_filter[match.start() :].strip() + if where_part: + return f"{where_part} AND {window} {order_part}" + return f"{window} {order_part}" + + if jira_filter: + return f"{jira_filter} AND {window}" + return window + def _paginate_jql_search( jira_client: JIRA, @@ -66,78 +98,92 @@ def fetch_jira_issues_batch( jql=jql, max_results=batch_size, ): - if labels_to_skip: - if any(label in issue.fields.labels for label in labels_to_skip): + # Per-issue error tolerance: a single malformed issue (odd field shape, + # missing data, etc.) should be logged and skipped, NOT abort the whole + # connector run. Previously one bad ticket failed the entire attempt. + issue_key = getattr(issue, "key", "") + try: + if labels_to_skip and any( + label in issue.fields.labels for label in labels_to_skip + ): logger.info( - f"Skipping {issue.key} because it has a label to skip. Found " + f"Skipping {issue_key} because it has a label to skip. Found " f"labels: {issue.fields.labels}. Labels to skip: {labels_to_skip}." ) continue - description = ( - issue.fields.description or "" - if JIRA_API_VERSION == "2" - else extract_text_from_content(issue.raw["fields"].get("description")) - ) - comments = get_comment_strs( - issue=issue, - comment_email_blacklist=comment_email_blacklist, - ) - ticket_content = f"{description}\n" + "\n".join( - [f"Comment: {comment}" for comment in comments if comment] - ) - - # Check ticket size - if len(ticket_content.encode("utf-8")) > JIRA_CONNECTOR_MAX_TICKET_SIZE: - logger.info( - f"Skipping {issue.key} because it exceeds the maximum size of " - f"{JIRA_CONNECTOR_MAX_TICKET_SIZE} bytes." + description = ( + issue.fields.description or "" + if JIRA_API_VERSION == "2" + else extract_text_from_content(issue.raw["fields"].get("description")) + ) + comments = get_comment_strs( + issue=issue, + comment_email_blacklist=comment_email_blacklist, + ) + ticket_content = f"{description}\n" + "\n".join( + [f"Comment: {comment}" for comment in comments if comment] ) - continue - page_url = f"{jira_client.client_info()}/browse/{issue.key}" + # Check ticket size + if len(ticket_content.encode("utf-8")) > JIRA_CONNECTOR_MAX_TICKET_SIZE: + logger.info( + f"Skipping {issue_key} because it exceeds the maximum size of " + f"{JIRA_CONNECTOR_MAX_TICKET_SIZE} bytes." + ) + continue - people = set() - try: - creator = best_effort_get_field_from_issue(issue, "creator") - if basic_expert_info := best_effort_basic_expert_info(creator): - people.add(basic_expert_info) - except Exception: - # Author should exist but if not, doesn't matter - pass + page_url = f"{jira_client.client_info()}/browse/{issue_key}" + + people = set() + for role in ("creator", "reporter", "assignee"): + try: + field_value = best_effort_get_field_from_issue(issue, role) + if basic_expert_info := best_effort_basic_expert_info(field_value): + people.add(basic_expert_info) + except Exception: + # role may be absent on some issues; not critical + pass + + metadata_dict: dict[str, Any] = {} + if priority := best_effort_get_field_from_issue(issue, "priority"): + metadata_dict["priority"] = priority.name + if status := best_effort_get_field_from_issue(issue, "status"): + metadata_dict["status"] = status.name + if resolution := best_effort_get_field_from_issue(issue, "resolution"): + metadata_dict["resolution"] = resolution.name + if labels := best_effort_get_field_from_issue(issue, "labels"): + metadata_dict["label"] = labels + if issuetype := best_effort_get_field_from_issue(issue, "issuetype"): + metadata_dict["issuetype"] = issuetype.name + if reporter := best_effort_get_field_from_issue(issue, "reporter"): + if reporter_name := getattr(reporter, "displayName", None): + metadata_dict["reporter"] = reporter_name + if project := best_effort_get_field_from_issue(issue, "project"): + if project_key := getattr(project, "key", None): + metadata_dict["project"] = project_key + + doc = Document( + id=page_url, + sections=[Section(link=page_url, text=ticket_content)], + source=DocumentSource.JIRA, + semantic_identifier=f"{issue_key}: {issue.fields.summary}", + title=f"{issue_key} {issue.fields.summary}", + doc_updated_at=time_str_to_utc(issue.fields.updated), + primary_owners=list(people) or None, + # TODO add secondary_owners (commenters) if needed + metadata=metadata_dict, + ) + except Exception as e: + logger.exception( + f"Failed to process Jira issue {issue_key}, skipping it: {e}" + ) + continue - try: - assignee = best_effort_get_field_from_issue(issue, "assignee") - if basic_expert_info := best_effort_basic_expert_info(assignee): - people.add(basic_expert_info) - except Exception: - # Author should exist but if not, doesn't matter - pass - - metadata_dict = {} - if priority := best_effort_get_field_from_issue(issue, "priority"): - metadata_dict["priority"] = priority.name - if status := best_effort_get_field_from_issue(issue, "status"): - metadata_dict["status"] = status.name - if resolution := best_effort_get_field_from_issue(issue, "resolution"): - metadata_dict["resolution"] = resolution.name - if labels := best_effort_get_field_from_issue(issue, "labels"): - metadata_dict["label"] = labels - - yield Document( - id=page_url, - sections=[Section(link=page_url, text=ticket_content)], - source=DocumentSource.JIRA, - semantic_identifier=f"{issue.key}: {issue.fields.summary}", - title=f"{issue.key} {issue.fields.summary}", - doc_updated_at=time_str_to_utc(issue.fields.updated), - primary_owners=list(people) or None, - # TODO add secondary_owners (commenters) if needed - metadata=metadata_dict, - ) + yield doc -class JiraConnector(LoadConnector, PollConnector): +class JiraConnector(LoadConnector, PollConnector, IdConnector): def __init__( self, jira_base_url: str, @@ -186,7 +232,11 @@ def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None return None def load_from_state(self) -> GenerateDocumentsOutput: - jql = f"project = {self.quoted_jira_project}" + # Full (unbounded) load = the configured filter with no time window. + # Previously this referenced self.quoted_jira_project, which __init__ + # never sets — an AttributeError on any call (notably the prune path, + # which falls back to load_from_state for non-IdConnectors). + jql = self.jira_filter document_batch = [] for doc in fetch_jira_issues_batch( @@ -216,11 +266,7 @@ def poll_source( "%Y-%m-%d %H:%M" ) - jql = ( - f"{self.jira_filter} AND " - f"updated >= '{start_date_str}' AND " - f"updated <= '{end_date_str}'" - ) + jql = _add_time_window_to_jql(self.jira_filter, start_date_str, end_date_str) document_batch = [] for doc in fetch_jira_issues_batch( @@ -237,6 +283,27 @@ def poll_source( yield document_batch + def retrieve_all_source_ids(self) -> set[str]: + """ID-only listing for the prune path. Returns the document ids (same + `/browse/` form used at index time) for every issue matching + the filter, fetching ONLY the `key` field. Implementing IdConnector lets + pruning detect deleted issues cheaply, instead of loading every full + document just to read its id (and instead of hitting the old + load_from_state, which was broken).""" + if self.jira_client is None: + raise ConnectorMissingCredentialError("Jira") + + base = self.jira_client.client_info() + all_ids: set[str] = set() + for issue in _paginate_jql_search( + jira_client=self.jira_client, + jql=self.jira_filter, + max_results=_JIRA_FULL_PAGE_SIZE, + fields="key", + ): + all_ids.add(f"{base}/browse/{issue.key}") + return all_ids + if __name__ == "__main__": import os diff --git a/backend/danswer/connectors/factory.py b/backend/danswer/connectors/factory.py index 5bedb9b4245..4f33eb38d67 100644 --- a/backend/danswer/connectors/factory.py +++ b/backend/danswer/connectors/factory.py @@ -64,6 +64,15 @@ def identify_connector_class( DocumentSource.SLACK: { InputType.LOAD_STATE: SlackLoadConnector, InputType.POLL: SlackPollConnector, + # Slack is the only dict-mapped source, so unlike single-class + # connectors it needs an explicit PRUNE entry — without it the + # prune task fails with "Connector not found for source=SLACK" on + # every run. Use the POLL connector (NOT SlackLoadConnector, which + # requires an `export_path_str` and reads a Slack export file — + # incompatible with an API connector's config). For pruning, + # extract_ids_from_runnable_connector calls poll_source(epoch, now) + # to enumerate every current message id (no cheaper Slack listing). + InputType.PRUNE: SlackPollConnector, }, DocumentSource.GITHUB: GithubConnector, DocumentSource.GITHUB_FILES: GithubFilesConnector, diff --git a/backend/danswer/connectors/file/connector.py b/backend/danswer/connectors/file/connector.py index 44d824d75b0..608b67506be 100644 --- a/backend/danswer/connectors/file/connector.py +++ b/backend/danswer/connectors/file/connector.py @@ -42,7 +42,12 @@ def _read_files_and_metadata( metadata: dict[str, Any] = {} directory_path = os.path.dirname(file_name) - file_content = get_default_file_store(db_session).read_file(file_name, mode="b") + # use_tempfile=True: stream the file into a SpooledTemporaryFile (spills to + # disk past 30MB) instead of BytesIO(read())-ing the whole thing into RAM. + # Without this, a large uploaded file OOM-crashes the indexing process. + file_content = get_default_file_store(db_session).read_file( + file_name, mode="b", use_tempfile=True + ) if extension == ".zip": for file_info, file, metadata in load_files_from_zip( diff --git a/backend/danswer/connectors/google_site/connector.py b/backend/danswer/connectors/google_site/connector.py index 9cfcf224e3f..658a14ebc4d 100644 --- a/backend/danswer/connectors/google_site/connector.py +++ b/backend/danswer/connectors/google_site/connector.py @@ -70,8 +70,10 @@ def load_from_state(self) -> GenerateDocumentsOutput: documents: list[Document] = [] with Session(get_sqlalchemy_engine()) as db_session: + # use_tempfile=True: stream the (potentially large) site zip to a + # temp file instead of loading it fully into memory (OOM risk). file_content_io = get_default_file_store(db_session).read_file( - self.zip_path, mode="b" + self.zip_path, mode="b", use_tempfile=True ) # load the HTML files diff --git a/backend/danswer/connectors/models.py b/backend/danswer/connectors/models.py index 37ed2e22bd5..0c00b8607e2 100644 --- a/backend/danswer/connectors/models.py +++ b/backend/danswer/connectors/models.py @@ -1,3 +1,4 @@ +import hashlib from datetime import datetime from enum import Enum from typing import Any @@ -131,6 +132,36 @@ def get_metadata_str_attributes(self) -> list[str] | None: attributes.append(k + INDEX_SEPARATOR + v) return attributes + def get_content_hash(self) -> str: + """Stable hash of the fields that determine this document's INDEXED + representation: section text/links, title, semantic identifier, + metadata, and owners. + + Used by the indexing pipeline to skip re-indexing a document whose + content is unchanged even though its `doc_updated_at` advanced — e.g. + a Salesforce automation bumps LastModifiedDate on records whose indexed + fields didn't actually change, which otherwise forces a full (and + expensive) Vespa clear-and-rewrite of every record on every poll. + + Deliberately EXCLUDES doc_updated_at: a newer timestamp alone must not + force a re-index. Uses \\x1f (unit separator) as the field delimiter so + adjacent fields can't collide. Order within metadata/owners is made + deterministic so the hash is stable across runs. + """ + parts: list[str] = [self.semantic_identifier or "", self.title or ""] + for section in self.sections: + parts.append(section.link or "") + parts.append(section.text) + for key in sorted(self.metadata or {}): + value = self.metadata[key] + if isinstance(value, list): + parts.append(f"{key}={'|'.join(value)}") + else: + parts.append(f"{key}={value}") + for owner in (self.primary_owners or []) + (self.secondary_owners or []): + parts.append(f"{owner.display_name or ''}<{owner.email or ''}>") + return hashlib.sha256("\x1f".join(parts).encode("utf-8")).hexdigest() + class Document(DocumentBase): id: str # This must be unique or during indexing/reindexing, chunks will be overwritten diff --git a/backend/danswer/connectors/web/connector.py b/backend/danswer/connectors/web/connector.py index 9238467abbc..b1e3ff36715 100644 --- a/backend/danswer/connectors/web/connector.py +++ b/backend/danswer/connectors/web/connector.py @@ -1,6 +1,8 @@ import io import ipaddress +import random import socket +import time from datetime import datetime from datetime import timezone from enum import Enum @@ -19,9 +21,12 @@ from requests_oauthlib import OAuth2Session # type:ignore from danswer.configs.app_configs import INDEX_BATCH_SIZE +from danswer.configs.app_configs import WEB_CONNECTOR_MAX_PAGES +from danswer.configs.app_configs import WEB_CONNECTOR_MAX_RETRIES from danswer.configs.app_configs import WEB_CONNECTOR_OAUTH_CLIENT_ID from danswer.configs.app_configs import WEB_CONNECTOR_OAUTH_CLIENT_SECRET from danswer.configs.app_configs import WEB_CONNECTOR_OAUTH_TOKEN_URL +from danswer.configs.app_configs import WEB_CONNECTOR_PAGE_TIMEOUT_MS from danswer.configs.app_configs import WEB_CONNECTOR_VALIDATE_URLS from danswer.configs.constants import DocumentSource from danswer.connectors.interfaces import GenerateDocumentsOutput @@ -36,6 +41,29 @@ logger = setup_logger() +# Many docs sites / WAFs (Cloudflare etc.) 403 or rate-limit the default +# headless-Chromium / bare-requests user agent. Present as a normal browser. +DEFAULT_USER_AGENT = ( + "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 " + "(KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36" +) + + +def _is_browser_dead(exc: Exception) -> bool: + """Heuristic: did the exception kill the browser/context (vs. just this + page)? Only then is a full Playwright restart warranted; otherwise we retry + with a fresh page on the existing browser.""" + msg = str(exc).lower() + return any( + marker in msg + for marker in ( + "browser has been closed", + "browser closed", + "crash", + "target closed", + ) + ) + class WEB_CONNECTOR_VALID_SETTINGS(str, Enum): # Given a base site, index everything under that path @@ -123,7 +151,7 @@ def start_playwright() -> Tuple[Playwright, BrowserContext]: playwright = sync_playwright().start() browser = playwright.chromium.launch(headless=True) - context = browser.new_context() + context = browser.new_context(user_agent=DEFAULT_USER_AGENT) if ( WEB_CONNECTOR_OAUTH_CLIENT_ID @@ -309,9 +337,23 @@ def load_from_state(self, is_polling: bool = False) -> GenerateDocumentsOutput: at_least_one_doc = False last_error = None + # One upfront connectivity check. This used to run per page — a full + # extra GET for every URL (doubling network work) that ALSO 403'd on + # bot-protected sites Playwright loads fine, and a failure tore down the + # whole browser. Once, on the base URL, is enough. + check_internet_connection(base_url) + playwright, context = start_playwright() restart_playwright = False + pages_visited = 0 while to_visit: + if WEB_CONNECTOR_MAX_PAGES and pages_visited >= WEB_CONNECTOR_MAX_PAGES: + logger.info( + f"Reached WEB_CONNECTOR_MAX_PAGES ({WEB_CONNECTOR_MAX_PAGES}); " + f"stopping crawl with {len(to_visit)} URL(s) still queued." + ) + break + current_url = to_visit.pop() if current_url in visited_links: continue @@ -325,18 +367,23 @@ def load_from_state(self, is_polling: bool = False) -> GenerateDocumentsOutput: continue logger.info(f"Visiting {current_url}") - - try: - check_internet_connection(current_url) - if restart_playwright: - playwright, context = start_playwright() - restart_playwright = False - - if current_url.split(".")[-1] == "pdf": - # PDF files are not checked for links - response = requests.get(current_url) + pages_visited += 1 + + # Reinit the browser if a previous batch/crash flagged it. Done for + # every page (as before) so the browser is always live when we reach + # the batch-yield / final-stop below. + if restart_playwright: + playwright, context = start_playwright() + restart_playwright = False + + # --- PDF: fetch directly (no browser). timeout so a hung download + # can't stall the whole attempt. Matches the original: PDFs don't + # trigger the per-batch flush. --- + if current_url.split(".")[-1] == "pdf": + try: + response = requests.get(current_url, timeout=60) + response.raise_for_status() page_text = pdf_to_text(file=io.BytesIO(response.content)) - doc_batch.append( Document( id=current_url, @@ -346,57 +393,91 @@ def load_from_state(self, is_polling: bool = False) -> GenerateDocumentsOutput: metadata={}, ) ) - continue - - page = context.new_page() - page_response = page.goto(current_url) - final_page = page.url - if final_page != current_url: - logger.info(f"Redirected to {final_page}") - protected_url_check(final_page) - current_url = final_page - if current_url in visited_links: - logger.info("Redirected page already indexed") - continue - visited_links.add(current_url) - - content = page.content() - soup = BeautifulSoup(content, "html.parser") - - # Only get internal links if we're not in polling mode and recursive is enabled - if self.recursive and not is_polling: - internal_links = get_internal_links(base_url, current_url, soup) - for link in internal_links: - if link not in visited_links: - to_visit.append(link) - - if page_response and str(page_response.status)[0] in ("4", "5"): - last_error = f"Skipped indexing {current_url} due to HTTP {page_response.status} response" - logger.info(last_error) - continue - - parsed_html = web_html_cleanup(soup, self.mintlify_cleanup) - - doc_batch.append( - Document( - id=current_url, - sections=[ - Section(link=current_url, text=parsed_html.cleaned_text) - ], - source=DocumentSource.WEB, - semantic_identifier=parsed_html.title or current_url, - metadata={}, - ) - ) - - page.close() - except Exception as e: - last_error = f"Failed to fetch '{current_url}': {e}" - logger.error(last_error) - playwright.stop() - restart_playwright = True + except Exception as e: + last_error = f"Failed to fetch PDF '{current_url}': {e}" + logger.error(last_error) continue + # --- HTML via Playwright, with retries. A single page error retries + # with a FRESH PAGE on the same browser (exponential backoff); only + # a browser-level crash restarts Playwright. One bad page no longer + # tears down the browser or fails the attempt. --- + page_doc: Document | None = None + for attempt in range(WEB_CONNECTOR_MAX_RETRIES): + if attempt > 0: + time.sleep(min(2**attempt + random.uniform(0, 1), 10)) + try: + page = context.new_page() + try: + page_response = page.goto( + current_url, + timeout=WEB_CONNECTOR_PAGE_TIMEOUT_MS, + # 'domcontentloaded' (DOM parsed) instead of the + # default 'load' (waits for every image/font/etc.) — + # far faster and enough for text extraction. + wait_until="domcontentloaded", + ) + final_page = page.url + if final_page != current_url: + logger.info(f"Redirected to {final_page}") + protected_url_check(final_page) + current_url = final_page + if current_url in visited_links: + logger.info("Redirected page already indexed") + break + visited_links.add(current_url) + + content = page.content() + soup = BeautifulSoup(content, "html.parser") + + if self.recursive and not is_polling: + for link in get_internal_links(base_url, current_url, soup): + if link not in visited_links: + to_visit.append(link) + + if page_response and str(page_response.status)[0] in ( + "4", + "5", + ): + last_error = ( + f"Skipped indexing {current_url} due to HTTP " + f"{page_response.status} response" + ) + logger.info(last_error) + break # a real 4xx/5xx — don't retry + + parsed_html = web_html_cleanup(soup, self.mintlify_cleanup) + page_doc = Document( + id=current_url, + sections=[ + Section(link=current_url, text=parsed_html.cleaned_text) + ], + source=DocumentSource.WEB, + semantic_identifier=parsed_html.title or current_url, + metadata={}, + ) + break # success + finally: + page.close() + except Exception as e: + last_error = ( + f"Failed to fetch '{current_url}' " + f"(attempt {attempt + 1}/{WEB_CONNECTOR_MAX_RETRIES}): {e}" + ) + logger.warning(last_error) + if _is_browser_dead(e): + # Browser/context crashed — restart it so the next + # attempt (and subsequent pages) have a live browser. + try: + playwright.stop() + except Exception: + pass + playwright, context = start_playwright() + # else: transient page error — retry with a fresh page. + + if page_doc is not None: + doc_batch.append(page_doc) + if len(doc_batch) >= self.batch_size: playwright.stop() restart_playwright = True diff --git a/backend/danswer/db/analytics.py b/backend/danswer/db/analytics.py index 4af60b7a80d..bafcd5c0fec 100644 --- a/backend/danswer/db/analytics.py +++ b/backend/danswer/db/analytics.py @@ -26,12 +26,19 @@ from sqlalchemy.orm import Session from danswer.configs.constants import MessageType +from danswer.db.models import AnalyticsPersonaDailyStats +from danswer.db.models import AnalyticsUserDailyStats +from danswer.db.models import AnalyticsUserFirstSeen from danswer.db.models import ChatMessage from danswer.db.models import ChatMessageFeedback from danswer.db.models import ChatSession from danswer.db.models import Connector from danswer.db.models import ConnectorCredentialPair from danswer.db.models import Document +from danswer.db.models import DocumentSet +from danswer.db.models import Persona +from danswer.db.models import Persona__DocumentSet +from danswer.db.models import User def fetch_query_analytics( @@ -137,6 +144,153 @@ def fetch_per_user_query_analytics( return db_session.execute(stmt).all() # type: ignore +def fetch_user_adoption( + start: datetime.datetime, + end: datetime.datetime, + db_session: Session, +) -> list[tuple[datetime.date, int, int]]: + """Per-day ``(date, new_users, cumulative_users)`` from the durable + ``analytics_user_first_seen`` table — the chat adoption curve. + + Read from the aggregate, NOT raw chat, so it spans the full history + regardless of RETENTION_DAYS_CHAT. ``cumulative_users`` folds in users + whose first-seen predates ``start`` so the running total is continuous. + Only days on which at least one user first appeared are returned. + """ + start_date = start.date() + end_date = end.date() + rows = db_session.execute( + select( + AnalyticsUserFirstSeen.first_seen_date, + func.count().label("new_users"), + ) + .where(AnalyticsUserFirstSeen.first_seen_date <= end_date) + .group_by(AnalyticsUserFirstSeen.first_seen_date) + .order_by(AnalyticsUserFirstSeen.first_seen_date) + ).all() + + out: list[tuple[datetime.date, int, int]] = [] + cumulative = 0 + for day, new_users in rows: + cumulative += int(new_users) + if day >= start_date: + out.append((day, int(new_users), cumulative)) + return out + + +def fetch_per_user_chat_stats( + start: datetime.datetime, + end: datetime.datetime, + db_session: Session, + limit: int = 100, +) -> Sequence[tuple[UUID, str, int, int, int, datetime.date]]: + """Top ``limit`` users by message volume over ``[start, end]``, with + like/dislike tallies and last-active date, joined to ``user`` for email. + + Reads the durable ``analytics_user_daily_stats`` aggregate (upserted + daily by the rollup), NOT raw chat — so it spans the full history + regardless of RETENTION_DAYS_CHAT. Inner join on ``user`` drops + anonymous sessions and deleted users (whose counts persist in the + aggregate but shouldn't surface by email). + """ + start_date = start.date() + end_date = end.date() + stmt = ( + # SA's select() overloads don't type a 6-col sum/max projection; + # the runtime is fine (the existing analytics selects do the same). + select( # type: ignore[call-overload] + User.id, + User.email, + func.coalesce(func.sum(AnalyticsUserDailyStats.message_count), 0), + func.coalesce(func.sum(AnalyticsUserDailyStats.like_count), 0), + func.coalesce(func.sum(AnalyticsUserDailyStats.dislike_count), 0), + func.max(AnalyticsUserDailyStats.date), + ) + .select_from(AnalyticsUserDailyStats) + .join(User, User.id == AnalyticsUserDailyStats.user_id) + .where(AnalyticsUserDailyStats.date >= start_date) + .where(AnalyticsUserDailyStats.date <= end_date) + .group_by(User.id, User.email) + .order_by(func.sum(AnalyticsUserDailyStats.message_count).desc()) + .limit(limit) + ) + return db_session.execute(stmt).all() # type: ignore + + +def fetch_persona_usage( + start: datetime.datetime, + end: datetime.datetime, + db_session: Session, + limit: int = 100, +) -> Sequence[tuple[int, str, int, int, int, int, datetime.date]]: + """Top ``limit`` assistants by message volume over ``[start, end]`` from + the durable ``analytics_persona_daily_stats`` aggregate — spans full + history. Joined to ``persona`` for the name (a deleted assistant drops + off). Returns (persona_id, name, sessions, messages, likes, dislikes, + last_active).""" + start_date = start.date() + end_date = end.date() + stmt = ( + select( # type: ignore[call-overload] + Persona.id, + Persona.name, + func.coalesce(func.sum(AnalyticsPersonaDailyStats.session_count), 0), + func.coalesce(func.sum(AnalyticsPersonaDailyStats.message_count), 0), + func.coalesce(func.sum(AnalyticsPersonaDailyStats.like_count), 0), + func.coalesce(func.sum(AnalyticsPersonaDailyStats.dislike_count), 0), + func.max(AnalyticsPersonaDailyStats.date), + ) + .select_from(AnalyticsPersonaDailyStats) + .join(Persona, Persona.id == AnalyticsPersonaDailyStats.persona_id) + .where(AnalyticsPersonaDailyStats.date >= start_date) + .where(AnalyticsPersonaDailyStats.date <= end_date) + .group_by(Persona.id, Persona.name) + .order_by(func.sum(AnalyticsPersonaDailyStats.message_count).desc()) + .limit(limit) + ) + return db_session.execute(stmt).all() # type: ignore + + +def fetch_document_set_usage( + start: datetime.datetime, + end: datetime.datetime, + db_session: Session, + limit: int = 100, +) -> Sequence[tuple[int, str, int]]: + """APPROXIMATE "datasets in use" over ``[start, end]``: each assistant's + message volume attributed to every document set currently attached to it + (via persona__document_set). + + This is availability-weighted, not retrieval-truth: an assistant's + messages are counted toward ALL its document sets (so totals can exceed + the real query count), and it uses CURRENT attachments (membership drift + isn't historical). There is no per-query record of which document set + actually served a result, so this is the best durable signal without new + instrumentation. Returns (document_set_id, name, attributed_messages). + """ + start_date = start.date() + end_date = end.date() + stmt = ( + select( + DocumentSet.id, + DocumentSet.name, + func.coalesce(func.sum(AnalyticsPersonaDailyStats.message_count), 0), + ) + .select_from(AnalyticsPersonaDailyStats) + .join( + Persona__DocumentSet, + Persona__DocumentSet.persona_id == AnalyticsPersonaDailyStats.persona_id, + ) + .join(DocumentSet, DocumentSet.id == Persona__DocumentSet.document_set_id) + .where(AnalyticsPersonaDailyStats.date >= start_date) + .where(AnalyticsPersonaDailyStats.date <= end_date) + .group_by(DocumentSet.id, DocumentSet.name) + .order_by(func.sum(AnalyticsPersonaDailyStats.message_count).desc()) + .limit(limit) + ) + return db_session.execute(stmt).all() # type: ignore + + def fetch_danswerbot_analytics( start: datetime.datetime, end: datetime.datetime, diff --git a/backend/danswer/db/analytics_rollup.py b/backend/danswer/db/analytics_rollup.py index 9fa8abc5637..eaa0cf0a634 100644 --- a/backend/danswer/db/analytics_rollup.py +++ b/backend/danswer/db/analytics_rollup.py @@ -48,6 +48,7 @@ from sqlalchemy import cast from sqlalchemy import Date from sqlalchemy import func +from sqlalchemy import literal from sqlalchemy import or_ from sqlalchemy import select from sqlalchemy import text @@ -57,6 +58,9 @@ from danswer.configs.constants import MessageType from danswer.db.engine import get_sqlalchemy_engine from danswer.db.models import AnalyticsDailyRollup +from danswer.db.models import AnalyticsPersonaDailyStats +from danswer.db.models import AnalyticsUserDailyStats +from danswer.db.models import AnalyticsUserFirstSeen from danswer.db.models import ChatMessage from danswer.db.models import ChatMessageFeedback from danswer.db.models import ChatSession @@ -309,6 +313,176 @@ def upsert_rollup_for_date( return metrics +def capture_first_seen_for_date( + db_session: Session, target_date: datetime.date +) -> None: + """Record ``first_seen_date`` for every user active on ``target_date`` + who isn't already in ``analytics_user_first_seen``. + + INSERT … SELECT … ON CONFLICT (user_id) DO NOTHING: a user already + present keeps their stored date, so first-seen never moves forward. + Because :func:`run_rollup` walks dates ascending, the earliest date in + the processed window on which a user appears is the one recorded — and + for the full backfill that's their true first-ever day. Once written, + the row is immune to chat retention deletes (this is the whole point: + the adoption curve must outlive the raw chat_message rows).""" + start, end = _day_bounds(target_date) + active_user_ids = ( + select( + ChatSession.user_id.label("user_id"), + literal(target_date, Date).label("first_seen_date"), + ) + .select_from(ChatMessage) + .join(ChatSession, ChatSession.id == ChatMessage.chat_session_id) + .where(ChatMessage.time_sent >= start) + .where(ChatMessage.time_sent < end) + .where(ChatMessage.message_type == MessageType.ASSISTANT) + .where(ChatSession.user_id.is_not(None)) + .distinct() + ) + stmt = ( + pg_insert(AnalyticsUserFirstSeen.__table__) + .from_select(["user_id", "first_seen_date"], active_user_ids) + .on_conflict_do_nothing(index_elements=[AnalyticsUserFirstSeen.user_id]) + ) + db_session.execute(stmt) + db_session.commit() + + +def upsert_user_daily_stats_for_date( + db_session: Session, target_date: datetime.date +) -> None: + """Upsert one row per active user for ``target_date`` into + ``analytics_user_daily_stats`` (message / like / dislike counts). + + Single INSERT … SELECT … ON CONFLICT (user_id, date) DO UPDATE, so a + re-run over the sliding window recomputes that day's per-user counts + (reflecting late feedback). Once written the rows outlive the raw + chat_message rows that retention deletes — the leaderboard reads this + aggregate, so it spans full history rather than the last + RETENTION_DAYS_CHAT.""" + start, end = _day_bounds(target_date) + per_user = ( + select( + ChatSession.user_id.label("user_id"), + literal(target_date, Date).label("date"), + # distinct: the feedback outerjoin can fan out a message into + # multiple rows (a message may have >1 feedback row). + func.count(func.distinct(ChatMessage.id)).label("message_count"), + func.coalesce( + func.sum(case((ChatMessageFeedback.is_positive, 1), else_=0)), 0 + ).label("like_count"), + func.coalesce( + func.sum( + case( + (ChatMessageFeedback.is_positive == False, 1), # noqa: E712 + else_=0, + ) + ), + 0, + ).label("dislike_count"), + ) + .select_from(ChatMessage) + .join(ChatSession, ChatSession.id == ChatMessage.chat_session_id) + .outerjoin( + ChatMessageFeedback, + ChatMessageFeedback.chat_message_id == ChatMessage.id, + ) + .where(ChatMessage.time_sent >= start) + .where(ChatMessage.time_sent < end) + .where(ChatMessage.message_type == MessageType.ASSISTANT) + .where(ChatSession.user_id.is_not(None)) + .group_by(ChatSession.user_id) + ) + stmt = pg_insert(AnalyticsUserDailyStats.__table__).from_select( + ["user_id", "date", "message_count", "like_count", "dislike_count"], + per_user, + ) + stmt = stmt.on_conflict_do_update( + index_elements=[ + AnalyticsUserDailyStats.user_id, + AnalyticsUserDailyStats.date, + ], + set_={ + "message_count": stmt.excluded.message_count, + "like_count": stmt.excluded.like_count, + "dislike_count": stmt.excluded.dislike_count, + "rolled_up_at": func.now(), + }, + ) + db_session.execute(stmt) + db_session.commit() + + +def upsert_persona_daily_stats_for_date( + db_session: Session, target_date: datetime.date +) -> None: + """Upsert one row per assistant (persona) active on ``target_date`` into + ``analytics_persona_daily_stats``. + + Same durable/idempotent contract as the per-user variant. ``persona_id`` + lives on chat_session, so this is a clean group-by. ``message_count`` + uses COUNT(DISTINCT message) because the feedback outerjoin can fan a + message into multiple rows.""" + start, end = _day_bounds(target_date) + per_persona = ( + select( + ChatSession.persona_id.label("persona_id"), + literal(target_date, Date).label("date"), + func.count(func.distinct(ChatSession.id)).label("session_count"), + func.count(func.distinct(ChatMessage.id)).label("message_count"), + func.coalesce( + func.sum(case((ChatMessageFeedback.is_positive, 1), else_=0)), 0 + ).label("like_count"), + func.coalesce( + func.sum( + case( + (ChatMessageFeedback.is_positive == False, 1), # noqa: E712 + else_=0, + ) + ), + 0, + ).label("dislike_count"), + ) + .select_from(ChatMessage) + .join(ChatSession, ChatSession.id == ChatMessage.chat_session_id) + .outerjoin( + ChatMessageFeedback, + ChatMessageFeedback.chat_message_id == ChatMessage.id, + ) + .where(ChatMessage.time_sent >= start) + .where(ChatMessage.time_sent < end) + .where(ChatMessage.message_type == MessageType.ASSISTANT) + .group_by(ChatSession.persona_id) + ) + stmt = pg_insert(AnalyticsPersonaDailyStats.__table__).from_select( + [ + "persona_id", + "date", + "session_count", + "message_count", + "like_count", + "dislike_count", + ], + per_persona, + ) + stmt = stmt.on_conflict_do_update( + index_elements=[ + AnalyticsPersonaDailyStats.persona_id, + AnalyticsPersonaDailyStats.date, + ], + set_={ + "session_count": stmt.excluded.session_count, + "message_count": stmt.excluded.message_count, + "like_count": stmt.excluded.like_count, + "dislike_count": stmt.excluded.dislike_count, + "rolled_up_at": func.now(), + }, + ) + db_session.execute(stmt) + db_session.commit() + + # --------------------------------------------------------------------------- # Batch operations — sliding window (daily task) + full backfill # --------------------------------------------------------------------------- @@ -429,6 +603,12 @@ def run_rollup(today: datetime.date | None = None) -> int: current = start while current <= today: upsert_rollup_for_date(db_session, current) + # Capture first-seen + per-user daily stats in the same ascending + # pass, before retention can delete the day's chat rows (rollup + # runs 07:30, sweep 08:00). + capture_first_seen_for_date(db_session, current) + upsert_user_daily_stats_for_date(db_session, current) + upsert_persona_daily_stats_for_date(db_session, current) current += datetime.timedelta(days=1) n += 1 @@ -458,6 +638,11 @@ def backfill_all_rollups(start_date: datetime.date, end_date: datetime.date) -> current = start_date while current <= end_date: upsert_rollup_for_date(db_session, current) + # Walk ascending so each user's first_seen_date is their true + # first-ever active day across all currently-available history. + capture_first_seen_for_date(db_session, current) + upsert_user_daily_stats_for_date(db_session, current) + upsert_persona_daily_stats_for_date(db_session, current) current += datetime.timedelta(days=1) n += 1 if n % 30 == 0: diff --git a/backend/danswer/db/chat.py b/backend/danswer/db/chat.py index 11a4cfec047..4c791bcfc2e 100644 --- a/backend/danswer/db/chat.py +++ b/backend/danswer/db/chat.py @@ -98,14 +98,18 @@ def delete_search_doc_message_relationship( def delete_orphaned_search_docs(db_session: Session) -> None: - orphaned_docs = ( - db_session.query(SearchDoc) + # Delete SearchDoc rows no longer referenced by any chat_message__search_doc. + # Previously this fetched every orphan as a full ORM row (incl. blurb/ + # content) just to delete it in a loop; a single bulk DELETE over the same + # set avoids materializing them. Orphans have no association rows by + # definition, so there is nothing for an ORM cascade to handle (matches the + # raw-SQL orphan cleanup in db/retention.py). + orphan_ids = ( + select(SearchDoc.id) .outerjoin(ChatMessage__SearchDoc) - .filter(ChatMessage__SearchDoc.chat_message_id.is_(None)) - .all() + .where(ChatMessage__SearchDoc.chat_message_id.is_(None)) ) - for doc in orphaned_docs: - db_session.delete(doc) + db_session.execute(delete(SearchDoc).where(SearchDoc.id.in_(orphan_ids))) db_session.commit() diff --git a/backend/danswer/db/connector.py b/backend/danswer/db/connector.py index 2e4b1ed4c3e..8046b074fea 100644 --- a/backend/danswer/db/connector.py +++ b/backend/danswer/db/connector.py @@ -189,6 +189,13 @@ def fetch_latest_index_attempt_by_connector( return [] for connector in connectors: + # NOTE: legacy Query.first() DOES emit LIMIT 1, so this ordered query + # over the large index_attempt table is safe despite running per + # connector. If you ever migrate this to the 2.x style + # `db_session.execute(select(...).order_by(...)).scalars().first()`, + # you MUST add `.limit(1)` — Result.first() does NOT add LIMIT and would + # materialize the connector's entire attempt history (the exact bug that + # was fixed in db/index_attempt.py::get_last_attempt). latest_index_attempt = ( db_session.query(IndexAttempt) .filter(IndexAttempt.connector_id == connector.id) diff --git a/backend/danswer/db/connector_credential_pair.py b/backend/danswer/db/connector_credential_pair.py index 4fa2f8a0a09..f5ba20ccebc 100644 --- a/backend/danswer/db/connector_credential_pair.py +++ b/backend/danswer/db/connector_credential_pair.py @@ -5,6 +5,7 @@ from sqlalchemy import desc from sqlalchemy import select from sqlalchemy import text +from sqlalchemy.orm import joinedload from sqlalchemy.orm import Session from danswer.db.connector import fetch_connector_by_id @@ -77,11 +78,20 @@ def release_deletion_lock( def get_connector_credential_pairs( - db_session: Session, include_disabled: bool = True + db_session: Session, + include_disabled: bool = True, + eager_load_connector: bool = False, ) -> list[ConnectorCredentialPair]: stmt = select(ConnectorCredentialPair) if not include_disabled: stmt = stmt.where(ConnectorCredentialPair.connector.disabled == False) # noqa + # Callers that read `cc_pair.connector.*` for every row (e.g. the + # basic indexing-status endpoint on the chat page, which derives + # available source types) MUST set this — otherwise the lazy + # relationship fires one query per cc-pair (an N+1 that, at a few + # hundred cc-pairs against a remote Postgres, dominates page load). + if eager_load_connector: + stmt = stmt.options(joinedload(ConnectorCredentialPair.connector)) results = db_session.scalars(stmt) return list(results.all()) diff --git a/backend/danswer/db/document.py b/backend/danswer/db/document.py index befb8675748..03302561113 100644 --- a/backend/danswer/db/document.py +++ b/backend/danswer/db/document.py @@ -45,6 +45,29 @@ def get_documents_for_connector_credential_pair( return db_session.scalars(stmt).all() +def get_document_ids_for_connector_credential_pair( + db_session: Session, connector_id: int, credential_id: int +) -> list[str]: + """Same document set as get_documents_for_connector_credential_pair, but + selects ONLY the id column. + + Callers that just need the set of indexed document ids (e.g. the prune task, + which diffs them against the connector's current docs) were materializing + full DbDocument ORM rows for the connector's ENTIRE corpus just to read + `.id` — hundreds of MB on large connectors. Same WHERE + DISTINCT, so the + returned id set is identical.""" + initial_doc_ids_stmt = select(DocumentByConnectorCredentialPair.id).where( + and_( + DocumentByConnectorCredentialPair.connector_id == connector_id, + DocumentByConnectorCredentialPair.credential_id == credential_id, + ) + ) + stmt = ( + select(DbDocument.id).where(DbDocument.id.in_(initial_doc_ids_stmt)).distinct() + ) + return list(db_session.scalars(stmt).all()) + + def get_documents_by_ids( document_ids: list[str], db_session: Session, @@ -223,14 +246,28 @@ def upsert_document_by_connector_credential_pair( def update_docs_updated_at( ids_to_new_updated_at: dict[str, datetime], db_session: Session, + ids_to_new_content_hash: dict[str, str] | None = None, ) -> None: - doc_ids = list(ids_to_new_updated_at.keys()) + """Record post-successful-index state on the document rows. + + `ids_to_new_content_hash` (optional) stores the sha256 of the indexed + content so a later run can skip re-indexing unchanged docs. Default None + keeps the original updated-at-only behavior for any other caller. + """ + ids_to_new_content_hash = ids_to_new_content_hash or {} + doc_ids = list(set(ids_to_new_updated_at) | set(ids_to_new_content_hash)) + if not doc_ids: + return + documents_to_update = ( db_session.query(DbDocument).filter(DbDocument.id.in_(doc_ids)).all() ) for document in documents_to_update: - document.doc_updated_at = ids_to_new_updated_at[document.id] + if document.id in ids_to_new_updated_at: + document.doc_updated_at = ids_to_new_updated_at[document.id] + if document.id in ids_to_new_content_hash: + document.indexed_content_hash = ids_to_new_content_hash[document.id] db_session.commit() diff --git a/backend/danswer/db/document_set.py b/backend/danswer/db/document_set.py index 51064f78e2f..cd68527a7b2 100644 --- a/backend/danswer/db/document_set.py +++ b/backend/danswer/db/document_set.py @@ -9,6 +9,7 @@ from sqlalchemy import select from sqlalchemy.orm import Session +from danswer.db.document_set_cache import invalidate_document_sets_all from danswer.db.models import ConnectorCredentialPair from danswer.db.models import Document from danswer.db.models import DocumentByConnectorCredentialPair @@ -130,6 +131,9 @@ def insert_document_set( ) db_session.commit() + # Write-through: drop the per-user document-set cache (no-op if + # disabled). After commit so a concurrent reader can't refill stale. + invalidate_document_sets_all() except: db_session.rollback() raise @@ -194,6 +198,7 @@ def update_document_set( ] db_session.add_all(ds_cc_pairs) db_session.commit() + invalidate_document_sets_all() # write-through bust (see insert_document_set) except: db_session.rollback() raise @@ -214,6 +219,7 @@ def mark_document_set_as_synced(document_set_id: int, db_session: Session) -> No db_session=db_session, document_set_id=document_set_id, is_current=False ) db_session.commit() + invalidate_document_sets_all() # write-through bust (background sync changes membership) def delete_document_set( @@ -225,6 +231,7 @@ def delete_document_set( ) db_session.delete(document_set_row) db_session.commit() + invalidate_document_sets_all() # write-through bust (see insert_document_set) def mark_document_set_as_to_be_deleted( @@ -265,6 +272,7 @@ def mark_document_set_as_to_be_deleted( # are no more relationships to cc pairs document_set_row.is_up_to_date = False db_session.commit() + invalidate_document_sets_all() # write-through bust (see insert_document_set) except: db_session.rollback() raise @@ -388,9 +396,12 @@ def fetch_documents_for_document_set_paginated( current_only: bool = True, last_document_id: str | None = None, limit: int = 100, -) -> tuple[Sequence[Document], str | None]: +) -> tuple[Sequence[str], str | None]: + # Selects only Document.id — the sole caller (document-set sync) uses just + # the ids, and the keyset cursor is the last id. Selecting full Document + # ORM rows per batch was needless materialization. stmt = ( - select(Document) + select(Document.id) .join( DocumentByConnectorCredentialPair, DocumentByConnectorCredentialPair.id == Document.id, @@ -426,8 +437,8 @@ def fetch_documents_for_document_set_paginated( ) stmt = stmt.distinct() - documents = db_session.scalars(stmt).all() - return documents, documents[-1].id if documents else None + document_ids = db_session.scalars(stmt).all() + return document_ids, document_ids[-1] if document_ids else None def fetch_document_sets_for_documents( @@ -486,6 +497,7 @@ def get_or_create_document_set_by_name( db_session.add(new_doc_set) db_session.commit() + invalidate_document_sets_all() # write-through bust (see insert_document_set) return new_doc_set diff --git a/backend/danswer/db/document_set_cache.py b/backend/danswer/db/document_set_cache.py new file mode 100644 index 00000000000..db1e7f8990e --- /dev/null +++ b/backend/danswer/db/document_set_cache.py @@ -0,0 +1,193 @@ +"""Global document-set list cache, Redis-backed (MIT-scoped). + +The chat-page bundle fires ``GET /document-set`` → +``server/features/document_set/api.py::list_document_sets`` → +``db/document_set.py::fetch_user_document_sets`` on *every* page load. +That read is a multi-join (DocumentSet ⋈ cc-pair mapping ⋈ +ConnectorCredentialPair). At a few hundred users clicking around chat it +adds avoidable DB-pool pressure. + +In Danswer **MIT**, document sets are *not* permission-filtered — every +user sees the same full list (they're organizational; the documents +themselves are permission-enforced at search time). So one **global** +cached list is correct for everyone, and 200 concurrent first-loads +collapse to a single DB query. + +This module has **no dependency on the EE package** (different license). +It only reads the MIT-core flag ``global_version.get_is_ee_version()`` to +stay safe: if a deployment enables EE, ``fetch_user_document_sets`` starts +filtering per user, at which point a shared global list would leak sets +across users — so under EE we simply **bypass the cache** and read the DB +directly. Nothing here imports ``ee.*``; the global build uses the +``user_id=None`` path, which the core resolves to the MIT base query +without going through the versioned (EE) dispatch at all. + +**Invalidation:** write-through. Every committing mutation in +``db/document_set.py`` calls :func:`invalidate_document_sets_all` after +commit (a single ``DEL`` of the global key). The +``DOCUMENT_SET_CACHE_TTL_SECONDS`` backstop heals any missed bust; +staleness is cosmetic (names/membership in the UI list). + +**Fail-open**: any Redis error logs and falls through to a direct DB +build. **Default OFF**: ``DOCUMENT_SET_CACHE_ENABLED=false``. +""" +from __future__ import annotations + +import json +from typing import Any +from typing import cast +from uuid import UUID + +from sqlalchemy.orm import Session + +from danswer.configs.app_configs import DOCUMENT_SET_CACHE_ENABLED +from danswer.configs.app_configs import DOCUMENT_SET_CACHE_TTL_SECONDS +from danswer.redis.redis_pool import DANSWER_REDIS_KEY_PREFIX +from danswer.redis.redis_pool import get_redis_client +from danswer.server.features.document_set.models import DocumentSet +from danswer.utils.logger import setup_logger +from danswer.utils.variable_functionality import global_version + + +logger = setup_logger() + + +# Single shared key — the full document-set list, identical for all users in +# MIT. Any document-set mutation must invalidate it. +_DOC_SETS_ALL_KEY = DANSWER_REDIS_KEY_PREFIX + "document_sets:all" + + +# --------------------------------------------------------------------------- +# Public API — read path +# --------------------------------------------------------------------------- + + +def get_document_sets_for_user_cached( + user_id: UUID | None, db_session: Session +) -> list[DocumentSet]: + """Return the ``DocumentSet`` list for ``user_id``. + + * Cache disabled, OR EE enabled (per-user filtering) → direct DB build + for this user. The EE bypass avoids serving one user's filtered list + to another; we never import EE, only check the MIT-core version flag. + * MIT + enabled → the shared global list (built once, reused by all). + """ + if not DOCUMENT_SET_CACHE_ENABLED or global_version.get_is_ee_version(): + return _build(user_id, db_session) + + hit, cached = _safe_get(_DOC_SETS_ALL_KEY) + if hit and isinstance(cached, list): + try: + return [DocumentSet.parse_obj(d) for d in cached] + except Exception as e: + # Schema drift since the entry was cached — treat as a miss. + logger.warning( + "Cached document-set list failed DocumentSet parse, refilling: %s", e + ) + + # Build the global list via the user_id=None path — in the core this is + # the MIT base query (all sets), and it never touches the versioned/EE + # dispatch. + result = _build(None, db_session) + _safe_set(_DOC_SETS_ALL_KEY, [json.loads(ds.json()) for ds in result]) + return result + + +# --------------------------------------------------------------------------- +# Public API — invalidation +# --------------------------------------------------------------------------- + + +def invalidate_document_sets_all() -> None: + """Drop the cached global document-set list. + + Call *after* ``db_session.commit()`` in any document-set mutation. + Cheap no-op when the cache is disabled. + """ + if not DOCUMENT_SET_CACHE_ENABLED: + return + try: + get_redis_client().delete(_DOC_SETS_ALL_KEY) + except Exception as e: + # Fail-open — the TTL backstop heals it. Loud log for a persistent + # Redis outage. + logger.warning("invalidate_document_sets_all: Redis DEL failed: %s", e) + + +# --------------------------------------------------------------------------- +# Internals +# --------------------------------------------------------------------------- + + +def _build(user_id: UUID | None, db_session: Session) -> list[DocumentSet]: + """Build the ``DocumentSet`` list exactly as the endpoint did. + + Local imports keep this module free of an import cycle: ``db.document_set`` + imports :func:`invalidate_document_sets_all` from here at module load. + """ + from danswer.db.document_set import fetch_user_document_sets + from danswer.server.documents.models import ConnectorCredentialPairDescriptor + from danswer.server.documents.models import ConnectorSnapshot + from danswer.server.documents.models import CredentialSnapshot + + document_set_info = fetch_user_document_sets(user_id=user_id, db_session=db_session) + return [ + DocumentSet( + id=document_set_db_model.id, + name=document_set_db_model.name, + description=document_set_db_model.description, + contains_non_public=any(not cc_pair.is_public for cc_pair in cc_pairs), + cc_pair_descriptors=[ + ConnectorCredentialPairDescriptor( + id=cc_pair.id, + name=cc_pair.name, + connector=ConnectorSnapshot.from_connector_db_model( + cc_pair.connector + ), + credential=CredentialSnapshot.from_credential_db_model( + cc_pair.credential + ), + ) + for cc_pair in cc_pairs + ], + is_up_to_date=document_set_db_model.is_up_to_date, + is_public=document_set_db_model.is_public, + users=[user.id for user in document_set_db_model.users], + groups=[group.id for group in document_set_db_model.groups], + ) + for document_set_db_model, cc_pairs in document_set_info + ] + + +# ---- Fail-open Redis helpers (mirror persona_cache.py) ---- + + +def _safe_get(key: str) -> tuple[bool, Any]: + """Return ``(hit, value)``; ``hit=False`` covers miss AND any Redis or + decode error — the caller treats them all as "go to the DB".""" + try: + # decode_responses=False on the pool → bytes | None. The cast just + # collapses redis-py's sync/async overload union for mypy. + raw = cast("bytes | None", get_redis_client().get(key)) + except Exception as e: + logger.warning("document_set_cache: Redis GET failed for %s: %s", key, e) + return (False, None) + if raw is None: + return (False, None) + try: + return (True, json.loads(raw)) + except (TypeError, ValueError) as e: + logger.warning("document_set_cache: corrupt entry at %s, ignoring: %s", key, e) + return (False, None) + + +def _safe_set(key: str, val: Any) -> None: + try: + payload = json.dumps(val) + except (TypeError, ValueError) as e: + logger.warning("document_set_cache: skipping non-JSON value at %s: %s", key, e) + return + try: + get_redis_client().set(key, payload, ex=DOCUMENT_SET_CACHE_TTL_SECONDS) + except Exception as e: + logger.warning("document_set_cache: Redis SET failed for %s: %s", key, e) diff --git a/backend/danswer/db/embedding_model.py b/backend/danswer/db/embedding_model.py index ae2b98d514f..3a449e92d21 100644 --- a/backend/danswer/db/embedding_model.py +++ b/backend/danswer/db/embedding_model.py @@ -47,6 +47,7 @@ def get_current_db_embedding_model(db_session: Session) -> EmbeddingModel: select(EmbeddingModel) .where(EmbeddingModel.status == IndexModelStatus.PRESENT) .order_by(EmbeddingModel.id.desc()) + .limit(1) # .scalars().first() doesn't add LIMIT; table is tiny, be explicit ) result = db_session.execute(query) latest_model = result.scalars().first() @@ -62,6 +63,7 @@ def get_secondary_db_embedding_model(db_session: Session) -> EmbeddingModel | No select(EmbeddingModel) .where(EmbeddingModel.status == IndexModelStatus.FUTURE) .order_by(EmbeddingModel.id.desc()) + .limit(1) # .scalars().first() doesn't add LIMIT; table is tiny, be explicit ) result = db_session.execute(query) latest_model = result.scalars().first() diff --git a/backend/danswer/db/engine.py b/backend/danswer/db/engine.py index 14174f20e6d..1c3c0a69cae 100644 --- a/backend/danswer/db/engine.py +++ b/backend/danswer/db/engine.py @@ -16,6 +16,8 @@ from danswer.configs.app_configs import POSTGRES_DB from danswer.configs.app_configs import POSTGRES_HOST from danswer.configs.app_configs import POSTGRES_PASSWORD +from danswer.configs.app_configs import POSTGRES_POOL_OVERFLOW +from danswer.configs.app_configs import POSTGRES_POOL_SIZE from danswer.configs.app_configs import POSTGRES_PORT from danswer.configs.app_configs import POSTGRES_USER from danswer.utils.logger import setup_logger @@ -69,8 +71,8 @@ def get_sqlalchemy_engine() -> Engine: _SYNC_ENGINE = create_engine( connection_string, - pool_size=40, - max_overflow=10, + pool_size=POSTGRES_POOL_SIZE, + max_overflow=POSTGRES_POOL_OVERFLOW, pool_pre_ping=True, connect_args=keepalive_kwargs, ) @@ -82,7 +84,9 @@ def get_sqlalchemy_async_engine() -> AsyncEngine: if _ASYNC_ENGINE is None: connection_string = build_connection_string() _ASYNC_ENGINE = create_async_engine( - connection_string, pool_size=40, max_overflow=10 + connection_string, + pool_size=POSTGRES_POOL_SIZE, + max_overflow=POSTGRES_POOL_OVERFLOW, ) return _ASYNC_ENGINE diff --git a/backend/danswer/db/index_attempt.py b/backend/danswer/db/index_attempt.py index 6bb188360d2..fcd4a18b938 100644 --- a/backend/danswer/db/index_attempt.py +++ b/backend/danswer/db/index_attempt.py @@ -6,6 +6,7 @@ from sqlalchemy import desc from sqlalchemy import func from sqlalchemy import or_ +from sqlalchemy import Select from sqlalchemy import select from sqlalchemy import text from sqlalchemy import update @@ -238,6 +239,15 @@ def get_last_attempt( # Note, the below is using time_created instead of time_updated stmt = stmt.order_by(desc(IndexAttempt.time_created)) + # LIMIT 1 in SQL — NOT just Result.first(). `execute(stmt).scalars().first()` + # does not add a LIMIT, so without this the DB returns the cc-pair's ENTIRE + # attempt history (psycopg2 buffers it all client-side, the ORM materializes + # every row) and we throw all but one away. The indexing scheduler calls this + # once per cc-pair every loop, so with a large index_attempt table that spiked + # the scheduler to multi-GB per cycle (OOMKilled). With LIMIT 1 the DB returns + # one row. See update.py::create_indexing_jobs. + stmt = stmt.limit(1) + return db_session.execute(stmt).scalars().first() @@ -292,7 +302,12 @@ def get_index_attempts_for_cc_pair( cc_pair_identifier: ConnectorCredentialPairIdentifier, only_current: bool = True, disinclude_finished: bool = False, + limit: int | None = None, ) -> Sequence[IndexAttempt]: + # `limit` is optional and defaults to None (unbounded — unchanged behavior). + # IndexAttempt rows carry large Text columns (error_msg, full_exception_trace), + # so callers that only need existence or a recent slice should pass a limit + # rather than materialize a busy cc-pair's entire history. stmt = select(IndexAttempt).where( and_( IndexAttempt.connector_id == cc_pair_identifier.connector_id, @@ -311,6 +326,52 @@ def get_index_attempts_for_cc_pair( ) stmt = stmt.order_by(IndexAttempt.time_created.desc()) + if limit is not None: + stmt = stmt.limit(limit) + return db_session.execute(stmt).scalars().all() + + +def _cc_pair_index_attempts_base_stmt( + cc_pair_identifier: ConnectorCredentialPairIdentifier, + only_current: bool, +) -> Select: + """Shared WHERE/JOIN for the cc-pair index-attempt queries (count + + paginated fetch) so they always agree on what counts as 'in scope'.""" + stmt = select(IndexAttempt).where( + and_( + IndexAttempt.connector_id == cc_pair_identifier.connector_id, + IndexAttempt.credential_id == cc_pair_identifier.credential_id, + ) + ) + if only_current: + stmt = stmt.join(EmbeddingModel).where( + EmbeddingModel.status == IndexModelStatus.PRESENT + ) + return stmt + + +def count_index_attempts_for_cc_pair( + db_session: Session, + cc_pair_identifier: ConnectorCredentialPairIdentifier, + only_current: bool = True, +) -> int: + base = _cc_pair_index_attempts_base_stmt(cc_pair_identifier, only_current) + count_stmt = select(func.count()).select_from(base.subquery()) + return db_session.execute(count_stmt).scalar_one() + + +def get_paginated_index_attempts_for_cc_pair( + db_session: Session, + cc_pair_identifier: ConnectorCredentialPairIdentifier, + page: int, + page_size: int, + only_current: bool = True, +) -> Sequence[IndexAttempt]: + """One page of a cc-pair's index attempts, newest first. `page` is 0-based. + Server-side LIMIT/OFFSET so the API never materializes the full history.""" + stmt = _cc_pair_index_attempts_base_stmt(cc_pair_identifier, only_current) + stmt = stmt.order_by(IndexAttempt.time_created.desc()) + stmt = stmt.limit(page_size).offset(max(page, 0) * page_size) return db_session.execute(stmt).scalars().all() diff --git a/backend/danswer/db/models.py b/backend/danswer/db/models.py index 4a07d4c2887..f99b0da8f24 100644 --- a/backend/danswer/db/models.py +++ b/backend/danswer/db/models.py @@ -11,6 +11,7 @@ from fastapi_users_db_sqlalchemy import SQLAlchemyBaseOAuthAccountTableUUID from fastapi_users_db_sqlalchemy import SQLAlchemyBaseUserTableUUID from fastapi_users_db_sqlalchemy.access_token import SQLAlchemyBaseAccessTokenTableUUID +from fastapi_users_db_sqlalchemy.generics import GUID from sqlalchemy import Boolean from sqlalchemy import Date from sqlalchemy import DateTime @@ -327,6 +328,13 @@ class Document(Base): doc_updated_at: Mapped[datetime.datetime | None] = mapped_column( DateTime(timezone=True), nullable=True ) + # sha256 of the document's INDEXED content (sections/title/metadata/owners, + # NOT doc_updated_at) as of the last SUCCESSFUL index into Vespa. Lets the + # indexing pipeline skip the expensive Vespa clear-and-rewrite when a + # connector re-emits a document whose timestamp advanced but whose content + # is identical (e.g. Salesforce LastModifiedDate churn). Nullable: rows + # indexed before this column existed fall back to the doc_updated_at skip. + indexed_content_hash: Mapped[str | None] = mapped_column(String, nullable=True) # The following are not attached to User because the account/email may not be known # within Danswer # Something like the document creator @@ -1177,7 +1185,12 @@ class PGFileStore(Base): file_origin: Mapped[FileOrigin] = mapped_column(Enum(FileOrigin, native_enum=False)) file_type: Mapped[str] = mapped_column(String, default="text/plain") file_metadata: Mapped[JSON_ro] = mapped_column(postgresql.JSONB(), nullable=True) - lobj_oid: Mapped[int] = mapped_column(Integer, nullable=False) + # Exactly one of these locates the bytes: + # lobj_oid — Postgres large object (PostgresBackedFileStore) + # object_key — Blob/object key (AzureBlobFileStore); metadata stays here + # Both nullable so the two backends coexist during migration. + lobj_oid: Mapped[int | None] = mapped_column(Integer, nullable=True) + object_key: Mapped[str | None] = mapped_column(String, nullable=True) """ @@ -1453,7 +1466,7 @@ class AnalyticsDailyRollup(Base): retention deletes. `chat_message` / `chat_session` rows older than RETENTION_DAYS_CHAT - (default 30d) are purged by the daily retention sweep. The analytics + (default 90d) are purged by the daily retention sweep. The analytics endpoints used to read directly from those tables, so any date range older than ~30 days returned zeros. This rollup table is computed BEFORE the retention sweep each day (Celery beat at 07:30 UTC, sweep @@ -1502,3 +1515,107 @@ class AnalyticsDailyRollup(Base): server_default=func.now(), onupdate=func.now(), ) + + +class AnalyticsUserFirstSeen(Base): + """Durable record of the first UTC date each user used chat (asked a + question). Powers the adoption curve ("how many distinct users have ever + tried chat") on the admin Analytics page. + + Populated incrementally by the analytics rollup (BEFORE the retention + sweep), one row per user, ever — ``first_seen_date`` is written once and + never moves forward (INSERT ... ON CONFLICT DO NOTHING). This is what + makes adoption survive chat retention: once chat_message rows age out of + RETENTION_DAYS_CHAT they're deleted, so "first time we saw user X" can no + longer be recomputed from raw data — it must be captured here while the + data still exists. + + Deliberately NO foreign key to ``user`` (mirrors AnalyticsDailyRollup's + no-FK stance): deleting a user must not erase the historical fact that + they once adopted chat, and must not cascade into this aggregate. + """ + + __tablename__ = "analytics_user_first_seen" + + user_id: Mapped[UUID] = mapped_column(GUID(), primary_key=True) + first_seen_date: Mapped[datetime.date] = mapped_column( + Date, nullable=False, index=True + ) + created_at: Mapped[datetime.datetime] = mapped_column( + DateTime(timezone=True), nullable=False, server_default=func.now() + ) + + +class AnalyticsUserDailyStats(Base): + """Durable per-user-per-day chat activity counts. Powers the "top users + by activity" leaderboard on the admin Analytics page. + + Same durability contract as AnalyticsDailyRollup / AnalyticsUserFirstSeen: + upserted daily by the rollup (one row per active user per UTC day) BEFORE + the retention sweep, then kept indefinitely. Reading the leaderboard from + this aggregate — instead of raw chat_message — means it spans the full + history regardless of RETENTION_DAYS_CHAT, not just the last window. + + Idempotent: the rollup re-upserts the sliding recompute window with + ON CONFLICT (user_id, date) DO UPDATE, so late-arriving feedback is + reflected. No FK to `user` (see AnalyticsUserFirstSeen) — the email is + joined live at query time, so a deleted user simply drops off the + leaderboard without erasing the historical counts. + """ + + __tablename__ = "analytics_user_daily_stats" + + user_id: Mapped[UUID] = mapped_column(GUID(), primary_key=True) + date: Mapped[datetime.date] = mapped_column(Date, primary_key=True) + message_count: Mapped[int] = mapped_column( + Integer, nullable=False, default=0, server_default="0" + ) + like_count: Mapped[int] = mapped_column( + Integer, nullable=False, default=0, server_default="0" + ) + dislike_count: Mapped[int] = mapped_column( + Integer, nullable=False, default=0, server_default="0" + ) + rolled_up_at: Mapped[datetime.datetime] = mapped_column( + DateTime(timezone=True), + nullable=False, + server_default=func.now(), + onupdate=func.now(), + ) + + +class AnalyticsPersonaDailyStats(Base): + """Durable per-assistant-per-day chat activity counts. Powers the + "most-used assistants" leaderboard, and (by joining persona__document_set + at query time) an approximate "datasets in use" view. + + Same durability contract as the other analytics rollups: upserted daily + BEFORE the retention sweep, kept indefinitely, so usage spans the full + history regardless of RETENTION_DAYS_CHAT. No FK to `persona` — the name + is joined live, so a deleted assistant drops off the leaderboard without + erasing historical counts. ``session_count`` is distinct chat sessions + that had at least one assistant reply that day. + """ + + __tablename__ = "analytics_persona_daily_stats" + + persona_id: Mapped[int] = mapped_column(Integer, primary_key=True) + date: Mapped[datetime.date] = mapped_column(Date, primary_key=True) + session_count: Mapped[int] = mapped_column( + Integer, nullable=False, default=0, server_default="0" + ) + message_count: Mapped[int] = mapped_column( + Integer, nullable=False, default=0, server_default="0" + ) + like_count: Mapped[int] = mapped_column( + Integer, nullable=False, default=0, server_default="0" + ) + dislike_count: Mapped[int] = mapped_column( + Integer, nullable=False, default=0, server_default="0" + ) + rolled_up_at: Mapped[datetime.datetime] = mapped_column( + DateTime(timezone=True), + nullable=False, + server_default=func.now(), + onupdate=func.now(), + ) diff --git a/backend/danswer/db/persona.py b/backend/danswer/db/persona.py index 946a3f897e4..e39eb47306b 100644 --- a/backend/danswer/db/persona.py +++ b/backend/danswer/db/persona.py @@ -24,6 +24,7 @@ from danswer.db.models import Tool from danswer.db.models import User from danswer.db.models import User__UserGroup +from danswer.db.persona_cache import invalidate_personas_all from danswer.search.enums import RecencyBiasSetting from danswer.server.features.persona.models import CreatePersonaRequest from danswer.server.features.persona.models import PersonaSnapshot @@ -48,6 +49,7 @@ def make_persona_private( db_session.add(Persona__User(persona_id=persona_id, user_id=user_uuid)) db_session.commit() + invalidate_personas_all() # Persona__User membership changed # May cause error if someone switches down to MIT from EE if group_ids: @@ -218,6 +220,7 @@ def mark_persona_as_deleted( ) persona.deleted = True db_session.commit() + invalidate_personas_all() def mark_persona_as_not_deleted( @@ -231,6 +234,7 @@ def mark_persona_as_not_deleted( if persona.deleted: persona.deleted = False db_session.commit() + invalidate_personas_all() else: raise ValueError(f"Persona with ID {persona_id} is not deleted.") @@ -246,6 +250,7 @@ def mark_delete_persona_by_name( db_session.execute(stmt) db_session.commit() + invalidate_personas_all() def update_all_personas_display_priority( @@ -262,6 +267,7 @@ def update_all_personas_display_priority( persona.display_priority = display_priority_map[persona.id] db_session.commit() + invalidate_personas_all() def upsert_prompt( @@ -430,9 +436,14 @@ def upsert_persona( if commit: db_session.commit() + invalidate_personas_all() else: # flush the session so that the persona has an ID db_session.flush() + # No bust here — caller hasn't committed. They are responsible for + # invalidating after their final commit, OR they're being called + # by a wrapper like create_update_persona whose subsequent steps + # (make_persona_private) will commit and bust. return persona @@ -460,6 +471,7 @@ def delete_old_default_personas( db_session.execute(stmt) db_session.commit() + invalidate_personas_all() def update_persona_visibility( @@ -470,6 +482,7 @@ def update_persona_visibility( persona = get_persona_by_id(persona_id=persona_id, user=None, db_session=db_session) persona.is_visible = is_visible db_session.commit() + invalidate_personas_all() def check_user_can_edit_persona(user: User | None, persona: Persona) -> None: @@ -636,6 +649,7 @@ def delete_persona_by_name( db_session.execute(stmt) db_session.commit() + invalidate_personas_all() def get_persona_with_docset_and_prompts( diff --git a/backend/danswer/db/persona_cache.py b/backend/danswer/db/persona_cache.py new file mode 100644 index 00000000000..81905150234 --- /dev/null +++ b/backend/danswer/db/persona_cache.py @@ -0,0 +1,299 @@ +"""Per-user persona ("assistant") list cache, Redis-backed. + +The "Manage Assistants" tile in the chat UI fires +``GET /persona`` → ``server/features/persona/api.py::list_personas`` → +``db/persona.py::get_personas(user_id, …)``. That query is a multi-OR +permission filter over ``Persona`` joined with ``Persona__User``, +``Persona__UserGroup`` and ``User__UserGroup`` (the user's groups). It +runs once per user per page-load; at hundreds of users opening chat +around the same time, the burst hits the DB connection pool harder +than it deserves to. + +This module shifts the work to Redis with a **global cache + Python +filter** shape: + + ``personas:all:not_deleted`` — JSON list of every visible + ``PersonaSnapshot``. Shared + across users, so 200 concurrent + first-clicks become ~1 DB + query rather than 200. + + ``personas:groups:{user_id}`` — JSON list of the user's group + ids; cheap one-row indexed + lookup but worth caching since + it's hit on every persona-list + call. + +Because ``PersonaSnapshot`` already carries the permission inputs +(``is_public``, ``users``, ``groups``), the filter runs in Python on +the cached list: + + persona.is_public + OR user_id in {u.id for u in persona.users} # direct grant + OR (user_group_ids ∩ set(persona.groups)) # group grant + +This mirrors the SQL OR-block in :func:`danswer.db.persona.get_personas` +exactly — the parity is locked down by tests. + +**Invalidation:** explicit, write-through. Every mutation that affects +``Persona`` / ``Persona__User`` / ``Persona__UserGroup`` calls +:func:`invalidate_personas_all` after commit; every change to +``User__UserGroup`` calls :func:`invalidate_user_groups(user_id)`. The +``PERSONA_CACHE_TTL_SECONDS`` (24 h default) is *only* a long-tail +safety net for missed busts — the primary mechanism is explicit. + +**Fail-open**: any Redis error logs and falls through to a direct DB +read. A Redis outage degrades latency, not availability. + +**Default OFF**: ``PERSONA_CACHE_ENABLED=false`` keeps the existing +direct-DB path. Enable per environment once Redis is reachable. +""" +from __future__ import annotations + +import json +from typing import Any +from uuid import UUID + +from sqlalchemy import select +from sqlalchemy.orm import Session + +from danswer.configs.app_configs import PERSONA_CACHE_ENABLED +from danswer.configs.app_configs import PERSONA_CACHE_TTL_SECONDS +from danswer.db.models import User__UserGroup +from danswer.redis.redis_pool import DANSWER_REDIS_KEY_PREFIX +from danswer.redis.redis_pool import get_redis_client +from danswer.server.features.persona.models import PersonaSnapshot +from danswer.utils.logger import setup_logger + + +logger = setup_logger() + + +# Single shared key for the "all non-deleted personas" snapshot list. Any +# Persona / Persona__User / Persona__UserGroup mutation must invalidate this. +_PERSONAS_ALL_KEY = DANSWER_REDIS_KEY_PREFIX + "personas:all:not_deleted" + +# Per-user namespace for cached group memberships. User__UserGroup mutations +# must invalidate the affected user(s). +_USER_GROUPS_KEY_PREFIX = DANSWER_REDIS_KEY_PREFIX + "personas:groups:" + + +# --------------------------------------------------------------------------- +# Public API — read path +# --------------------------------------------------------------------------- + + +def get_personas_for_user_cached( + user_id: UUID | None, + db_session: Session, + include_deleted: bool = False, +) -> list[PersonaSnapshot]: + """Return the persona list visible to ``user_id`` as ``PersonaSnapshot``s. + + Routing: + + * Cache disabled OR ``include_deleted=True`` → direct DB read via the + existing :func:`danswer.db.persona.get_personas`. The + ``include_deleted`` case is rare admin-only and we deliberately + don't cache it — keeping the cache key set small avoids accidental + mis-keying on the hot path. + * ``user_id is None`` (admin call) → return the global cached list + unfiltered. + * Authenticated user → load the global list + user's groups from + cache (or DB on miss), apply the Python permission filter. + """ + # Local import to keep this module importable from `db.persona` itself + # without a circular import — `get_personas` is the fallback only. + from danswer.db.persona import get_personas + + if not PERSONA_CACHE_ENABLED or include_deleted: + personas = get_personas( + user_id=user_id, + db_session=db_session, + include_deleted=include_deleted, + ) + return [PersonaSnapshot.from_model(p) for p in personas] + + all_snapshots = _get_all_personas_cached(db_session) + if user_id is None: + # Admin / no-auth path: no permission filter needed. + return all_snapshots + + user_group_ids = _get_user_group_ids_cached(user_id, db_session) + return _filter_personas_for_user(all_snapshots, user_id, user_group_ids) + + +# --------------------------------------------------------------------------- +# Public API — invalidation +# --------------------------------------------------------------------------- + + +def invalidate_personas_all() -> None: + """Drop the cached global persona list. + + Call this *after* ``db_session.commit()`` in any mutation that + changes ``Persona``, ``Persona__User``, or ``Persona__UserGroup``. + Before-commit invalidation has a stale-cache-fill race: a concurrent + reader between bust and commit would refill the cache with the + pre-mutation snapshot. + + Cheap when the cache is disabled — short-circuits before any Redis + call so mutation paths don't pay an ambient cost. + """ + if not PERSONA_CACHE_ENABLED: + return + try: + get_redis_client().delete(_PERSONAS_ALL_KEY) + except Exception as e: + # Fail-open — TTL safety net will heal eventually. Loud log so + # the dashboard catches a persistent Redis outage. + logger.warning("invalidate_personas_all: Redis DEL failed: %s", e) + + +def invalidate_user_groups(user_id: UUID) -> None: + """Drop ``user_id``'s cached group-membership list. + + Call this when ``User__UserGroup`` rows for the user are inserted + or removed. Same after-commit ordering rule as + :func:`invalidate_personas_all`. + """ + if not PERSONA_CACHE_ENABLED: + return + try: + get_redis_client().delete(_USER_GROUPS_KEY_PREFIX + str(user_id)) + except Exception as e: + logger.warning( + "invalidate_user_groups(user_id=%s): Redis DEL failed: %s", + user_id, + e, + ) + + +# --------------------------------------------------------------------------- +# Internals +# --------------------------------------------------------------------------- + + +def _get_all_personas_cached(db_session: Session) -> list[PersonaSnapshot]: + """Load all non-deleted personas as ``PersonaSnapshot``s. + + Cache hit: deserialize the stored JSON straight back into Pydantic + models — no DB call. + Cache miss / Redis error: fall through to the existing + :func:`get_personas` (with ``user_id=None``) so the source of truth + is reused. + """ + hit, cached = _safe_get(_PERSONAS_ALL_KEY) + if hit and isinstance(cached, list): + try: + return [PersonaSnapshot.parse_obj(d) for d in cached] + except Exception as e: + # Pydantic schema drift (e.g. a new required field was added + # since the entry was cached) — treat as a miss so the next + # read repopulates with the current schema. + logger.warning( + "Cached persona list failed PersonaSnapshot parse, refilling: %s", + e, + ) + + from danswer.db.persona import get_personas + + personas = get_personas( + user_id=None, + db_session=db_session, + include_deleted=False, + ) + snapshots = [PersonaSnapshot.from_model(p) for p in personas] + + # Round-trip through PersonaSnapshot.json() so nested types (UUID, + # enums, datetimes) get the same serializer Pydantic uses on the wire. + payload: list[Any] = [json.loads(s.json()) for s in snapshots] + _safe_set(_PERSONAS_ALL_KEY, payload) + return snapshots + + +def _get_user_group_ids_cached(user_id: UUID, db_session: Session) -> list[int]: + """Return the ``user_group_id``s the user belongs to. + + Tiny indexed lookup — caching wins not on per-call latency but on + aggregate, since it's hit on every persona-list call across all + users. + """ + key = _USER_GROUPS_KEY_PREFIX + str(user_id) + hit, cached = _safe_get(key) + if hit and isinstance(cached, list): + return [int(x) for x in cached] + + rows = db_session.scalars( + select(User__UserGroup.user_group_id).where(User__UserGroup.user_id == user_id) + ).all() + group_ids = [int(r) for r in rows] + _safe_set(key, group_ids) + return group_ids + + +def _filter_personas_for_user( + personas: list[PersonaSnapshot], + user_id: UUID, + user_group_ids: list[int], +) -> list[PersonaSnapshot]: + """Apply the same OR-filter ``get_personas`` runs in SQL. + + SQL: + Persona.is_public + OR Persona.id IN (Persona__User where user_id = U) + OR Persona.id IN (Persona__UserGroup + where user_group_id IN ) + + The parity vs SQL is covered by tests with representative permission + shapes; if you change one side, change the other. + """ + user_group_set = set(user_group_ids) + out: list[PersonaSnapshot] = [] + for p in personas: + if p.is_public: + out.append(p) + continue + if any(u.id == user_id for u in p.users): + out.append(p) + continue + if user_group_set.intersection(p.groups): + out.append(p) + continue + return out + + +# ---- Fail-open Redis helpers (mirror the P1 cache module's posture) ---- + + +def _safe_get(key: str) -> tuple[bool, Any]: + """Return ``(hit, value)``. ``hit=False`` covers miss AND any Redis + or decode error — the caller treats them all as "go to the DB". + """ + try: + raw = get_redis_client().get(key) + except Exception as e: + logger.warning("persona_cache: Redis GET failed for %s: %s", key, e) + return (False, None) + if raw is None: + return (False, None) + try: + return (True, json.loads(raw)) + except (TypeError, ValueError) as e: + logger.warning("persona_cache: corrupt entry at %s, ignoring: %s", key, e) + return (False, None) + + +def _safe_set(key: str, val: Any) -> None: + try: + payload = json.dumps(val) + except (TypeError, ValueError) as e: + # Defensive — _get_all_personas_cached/_get_user_group_ids_cached + # only ever cache JSON-clean values. If this fires the cache is + # silently skipped and the inner read still served the caller. + logger.warning("persona_cache: skipping non-JSON value at %s: %s", key, e) + return + try: + get_redis_client().set(key, payload, ex=PERSONA_CACHE_TTL_SECONDS) + except Exception as e: + logger.warning("persona_cache: Redis SET failed for %s: %s", key, e) diff --git a/backend/danswer/db/pg_file_store.py b/backend/danswer/db/pg_file_store.py index 1333dcd6cee..685c3469ee9 100644 --- a/backend/danswer/db/pg_file_store.py +++ b/backend/danswer/db/pg_file_store.py @@ -102,8 +102,10 @@ def delete_lobj_by_name( logger.info(f"no file with name {lobj_name} found") return - pg_conn = get_pg_conn_from_session(db_session) - pg_conn.lobject(pgfilestore.lobj_oid).unlink() + # Only unlink a Postgres large object; object-store rows have no lobj. + if pgfilestore.lobj_oid is not None: + pg_conn = get_pg_conn_from_session(db_session) + pg_conn.lobject(pgfilestore.lobj_oid).unlink() delete_pgfilestore_by_file_name(lobj_name, db_session) db_session.commit() @@ -114,25 +116,35 @@ def upsert_pgfilestore( display_name: str | None, file_origin: FileOrigin, file_type: str, - lobj_oid: int, db_session: Session, + lobj_oid: int | None = None, + object_key: str | None = None, commit: bool = False, file_metadata: dict | None = None, ) -> PGFileStore: + """Upsert a file_store metadata row. The bytes live in EITHER a Postgres + large object (``lobj_oid``) or an object-storage blob (``object_key``); + pass exactly one. Both backends share this metadata row.""" pgfilestore = db_session.query(PGFileStore).filter_by(file_name=file_name).first() if pgfilestore: - try: - # This should not happen in normal execution - delete_lobj_by_id(lobj_oid=pgfilestore.lobj_oid, db_session=db_session) - except Exception: - # If the delete fails as well, the large object doesn't exist anyway and even if it - # fails to delete, it's not too terrible as most files sizes are insignificant - logger.error( - f"Failed to delete large object with oid {pgfilestore.lobj_oid}" - ) + # Clean up the previous backing bytes only if it was a Postgres lobj + # (object-store blobs are managed by the AzureBlobFileStore itself). + if pgfilestore.lobj_oid is not None: + try: + delete_lobj_by_id(lobj_oid=pgfilestore.lobj_oid, db_session=db_session) + except Exception: + # Best-effort — if the lobj is already gone that's fine. + logger.error( + f"Failed to delete large object with oid {pgfilestore.lobj_oid}" + ) pgfilestore.lobj_oid = lobj_oid + pgfilestore.object_key = object_key + pgfilestore.display_name = display_name or file_name + pgfilestore.file_origin = file_origin + pgfilestore.file_type = file_type + pgfilestore.file_metadata = file_metadata else: pgfilestore = PGFileStore( file_name=file_name, @@ -141,6 +153,7 @@ def upsert_pgfilestore( file_type=file_type, file_metadata=file_metadata, lobj_oid=lobj_oid, + object_key=object_key, ) db_session.add(pgfilestore) diff --git a/backend/danswer/db/retention.py b/backend/danswer/db/retention.py index e5cf43699d4..4b210a97c37 100644 --- a/backend/danswer/db/retention.py +++ b/backend/danswer/db/retention.py @@ -87,7 +87,7 @@ def _env_int(name: str, default: int, minimum: int = 0) -> int: # based + keep-last-N pruning by setting RETENTION_DAYS_INDEX_ATTEMPT to a # positive integer; the executor short-circuits when days <= 0. RETENTION_DAYS_INDEX_ATTEMPT = _env_int("RETENTION_DAYS_INDEX_ATTEMPT", 0) -RETENTION_DAYS_CHAT = _env_int("RETENTION_DAYS_CHAT", 30) +RETENTION_DAYS_CHAT = _env_int("RETENTION_DAYS_CHAT", 90) RETENTION_DAYS_USAGE_REPORTS = _env_int("RETENTION_DAYS_USAGE_REPORTS", 90) RETENTION_DAYS_PERMISSION_SYNC = _env_int("RETENTION_DAYS_PERMISSION_SYNC", 30) RETENTION_KEEP_LAST_N_INDEX_ATTEMPTS = _env_int( diff --git a/backend/danswer/db/tag.py b/backend/danswer/db/tag.py index 66418b948e7..a539f2471a5 100644 --- a/backend/danswer/db/tag.py +++ b/backend/danswer/db/tag.py @@ -110,6 +110,7 @@ def get_tags_by_value_prefix_for_source_types( tag_value_prefix: str | None, sources: list[DocumentSource] | None, db_session: Session, + limit: int | None = None, ) -> list[Tag]: query = select(Tag) @@ -119,6 +120,12 @@ def get_tags_by_value_prefix_for_source_types( if sources: query = query.where(Tag.source.in_(sources)) + # Optional bound (default None = unbounded, unchanged). When no prefix is + # given this would otherwise load every Tag row for the source(s); the knob + # lets callers cap it without changing existing behavior. + if limit is not None: + query = query.limit(limit) + result = db_session.execute(query) tags = result.scalars().all() diff --git a/backend/danswer/document_index/vespa/index.py b/backend/danswer/document_index/vespa/index.py index 24156d11aae..8605a9bdd86 100644 --- a/backend/danswer/document_index/vespa/index.py +++ b/backend/danswer/document_index/vespa/index.py @@ -248,16 +248,33 @@ def _delete_vespa_doc_chunks( doc_chunk_ids = _get_vespa_chunk_ids_by_document_id( document_id=document_id, index_name=index_name ) + if not doc_chunk_ids: + return - for chunk_id in doc_chunk_ids: - try: - res = http_client.delete( - f"{DOCUMENT_ID_ENDPOINT.format(index_name=index_name)}/{chunk_id}" - ) - res.raise_for_status() - except httpx.HTTPStatusError as e: - logger.error(f"Failed to delete chunk, details: {e.response.text}") - raise + def _delete_chunk(chunk_id: str) -> None: + res = http_client.delete( + f"{DOCUMENT_ID_ENDPOINT.format(index_name=index_name)}/{chunk_id}" + ) + res.raise_for_status() + + # Delete a document's chunks concurrently rather than one blocking HTTP + # round-trip at a time — sequential per-chunk DELETEs were a large part of + # the per-document re-index cost for multi-chunk docs. Bounded local pool + # (capped low so that, combined with the per-document executor in + # _delete_vespa_docs, total in-flight requests stay reasonable); a fresh + # ThreadPoolExecutor here can't deadlock against that outer pool since its + # threads are independent. httpx.Client is safe for concurrent use. The + # @retry on this function still covers transient failures. + with concurrent.futures.ThreadPoolExecutor( + max_workers=min(len(doc_chunk_ids), 8) + ) as executor: + futures = [executor.submit(_delete_chunk, cid) for cid in doc_chunk_ids] + for future in concurrent.futures.as_completed(futures): + try: + future.result() + except httpx.HTTPStatusError as e: + logger.error(f"Failed to delete chunk, details: {e.response.text}") + raise def _delete_vespa_docs( diff --git a/backend/danswer/dynamic_configs/factory.py b/backend/danswer/dynamic_configs/factory.py index 44b6e096b6d..60e46950df0 100644 --- a/backend/danswer/dynamic_configs/factory.py +++ b/backend/danswer/dynamic_configs/factory.py @@ -1,15 +1,33 @@ from danswer.configs.app_configs import DYNAMIC_CONFIG_STORE +from danswer.configs.app_configs import REDIS_KV_CACHE_ENABLED +from danswer.configs.app_configs import REDIS_KV_CACHE_TTL_SECONDS from danswer.dynamic_configs.interface import DynamicConfigStore from danswer.dynamic_configs.store import FileSystemBackedDynamicConfigStore from danswer.dynamic_configs.store import PostgresBackedDynamicConfigStore +from danswer.dynamic_configs.store import RedisCachedDynamicConfigStore def get_dynamic_config_store() -> DynamicConfigStore: + """Resolve the configured KV store. + + The Postgres-backed store is the source of truth. When + ``REDIS_KV_CACHE_ENABLED`` is true, we transparently wrap it with a + read-through / write-through Redis cache — call sites are unchanged. + Wrapping is additive on top of the configured backend rather than a + distinct ``DYNAMIC_CONFIG_STORE`` value so "which backend" and "do I + cache" are independently controllable. + """ dynamic_config_store_type = DYNAMIC_CONFIG_STORE if dynamic_config_store_type == FileSystemBackedDynamicConfigStore.__name__: raise NotImplementedError("File based config store no longer supported") if dynamic_config_store_type == PostgresBackedDynamicConfigStore.__name__: - return PostgresBackedDynamicConfigStore() + inner: DynamicConfigStore = PostgresBackedDynamicConfigStore() + if REDIS_KV_CACHE_ENABLED: + return RedisCachedDynamicConfigStore( + inner=inner, + ttl_seconds=REDIS_KV_CACHE_TTL_SECONDS, + ) + return inner # TODO: change exception type raise Exception("Unknown dynamic config store type") diff --git a/backend/danswer/dynamic_configs/store.py b/backend/danswer/dynamic_configs/store.py index ee4ac3d09ae..59df2c807dd 100644 --- a/backend/danswer/dynamic_configs/store.py +++ b/backend/danswer/dynamic_configs/store.py @@ -1,11 +1,14 @@ import json import os +from collections.abc import Callable from collections.abc import Iterator from contextlib import contextmanager from pathlib import Path from typing import cast from filelock import FileLock +from redis import Redis +from redis import RedisError from sqlalchemy.orm import Session from danswer.db.engine import SessionFactory @@ -13,6 +16,12 @@ from danswer.dynamic_configs.interface import ConfigNotFoundError from danswer.dynamic_configs.interface import DynamicConfigStore from danswer.dynamic_configs.interface import JSON_ro +from danswer.redis.redis_pool import DANSWER_REDIS_KEY_PREFIX +from danswer.redis.redis_pool import get_redis_client +from danswer.utils.logger import setup_logger + + +logger = setup_logger() FILE_LOCK_TIMEOUT = 10 @@ -99,3 +108,119 @@ def delete(self, key: str) -> None: if result == 0: raise ConfigNotFoundError session.commit() + + +class RedisCachedDynamicConfigStore(DynamicConfigStore): + """Read-through / write-through Redis cache over an inner ``DynamicConfigStore``. + + Mirrors the shape of upstream Onyx's ``PgRedisKVStore`` but composed + via wrapping rather than inheritance so the inner store stays + single-purpose and the cache layer can wrap *any* future backend. + + Semantics: + * ``load``: probe Redis; on hit, return; on miss, read inner and + repopulate Redis (with TTL). Encrypted entries are never cached + plaintext — they always fall through to the inner store. + * ``store``: write the inner store first (source of truth), then + refresh Redis (or invalidate if ``encrypt=True``). The + inner-first order means a Redis success after an inner failure + cannot leave Redis holding a value the source of truth lacks. + * ``delete``: delete inner first, then Redis. Same ordering reason. + + Fail-open: every Redis operation is wrapped — a Redis outage degrades + latency, not availability. Wrap-then-log-then-fall-through is the + rule throughout. + + Single-tenant: keys are namespaced by :data:`_KEY_PREFIX` only (no + tenant id), reflecting this fork's divergence from upstream. + """ + + _KEY_PREFIX = DANSWER_REDIS_KEY_PREFIX + "kv:" + + def __init__( + self, + inner: DynamicConfigStore, + ttl_seconds: int, + client_factory: Callable[[], Redis] | None = None, + ) -> None: + self._inner = inner + self._ttl = ttl_seconds + # Indirection lets tests inject a fake client without monkey- + # patching the global pool. Production wiring uses the default. + self._client_factory = client_factory or get_redis_client + + # ---- DynamicConfigStore surface ---- + + def store(self, key: str, val: JSON_ro, encrypt: bool = False) -> None: + self._inner.store(key, val, encrypt=encrypt) + if encrypt: + # Never hold plaintext of an encrypted value in Redis — that + # would silently defeat the encryption-at-rest guarantee. + # Also invalidates any stale plaintext entry from before + # the value was switched to encrypted. + self._safe_redis_delete(key) + return + self._safe_redis_set(key, val) + + def load(self, key: str) -> JSON_ro: + hit, cached = self._safe_redis_get(key) + if hit: + return cached + # May raise ConfigNotFoundError — propagate without caching the miss. + # (Negative caching has its own correctness traps; skip for now.) + val = self._inner.load(key) + self._safe_redis_set(key, val) + return val + + def delete(self, key: str) -> None: + self._inner.delete(key) + self._safe_redis_delete(key) + + # ---- private Redis helpers (all fail-open) ---- + + def _redis_key(self, key: str) -> str: + return self._KEY_PREFIX + key + + def _safe_redis_get(self, key: str) -> tuple[bool, JSON_ro]: + """Return ``(hit, value)``. ``hit=False`` means cache miss OR + Redis error — caller treats both the same (read inner). + """ + try: + raw = self._client_factory().get(self._redis_key(key)) + except RedisError as e: + logger.warning("Redis GET failed for kv key=%s: %s", key, e) + return (False, None) + if raw is None: + return (False, None) + try: + return (True, cast(JSON_ro, json.loads(raw))) + except (TypeError, ValueError) as e: + # Corrupt or legacy-format entry — treat as a miss so the + # next read repopulates from the inner store. + logger.warning("Corrupt Redis kv entry for key=%s, ignoring: %s", key, e) + return (False, None) + + def _safe_redis_set(self, key: str, val: JSON_ro) -> None: + try: + payload = json.dumps(val) + except (TypeError, ValueError) as e: + # Caller stored a value the inner store accepts but JSON + # doesn't — log and skip the cache (inner still holds truth). + logger.warning( + "Skipping Redis cache for non-JSON-serialisable key=%s: %s", key, e + ) + return + try: + self._client_factory().set( + self._redis_key(key), + payload, + ex=self._ttl, + ) + except RedisError as e: + logger.warning("Redis SET failed for kv key=%s: %s", key, e) + + def _safe_redis_delete(self, key: str) -> None: + try: + self._client_factory().delete(self._redis_key(key)) + except RedisError as e: + logger.warning("Redis DEL failed for kv key=%s: %s", key, e) diff --git a/backend/danswer/file_store/file_store.py b/backend/danswer/file_store/file_store.py index 9bc4c41d361..3c1c71c6d99 100644 --- a/backend/danswer/file_store/file_store.py +++ b/backend/danswer/file_store/file_store.py @@ -1,9 +1,16 @@ +import threading from abc import ABC from abc import abstractmethod +from io import BytesIO +from tempfile import SpooledTemporaryFile +from typing import Any from typing import IO from sqlalchemy.orm import Session +from danswer.configs.app_configs import AZURE_BLOB_CONNECTION_STRING +from danswer.configs.app_configs import AZURE_BLOB_CONTAINER +from danswer.configs.app_configs import FILE_STORE_TYPE from danswer.configs.constants import FileOrigin from danswer.db.models import PGFileStore from danswer.db.pg_file_store import create_populate_lobj @@ -12,6 +19,10 @@ from danswer.db.pg_file_store import get_pgfilestore_by_file_name from danswer.db.pg_file_store import read_lobj from danswer.db.pg_file_store import upsert_pgfilestore +from danswer.file_store.constants import MAX_IN_MEMORY_SIZE +from danswer.utils.logger import setup_logger + +logger = setup_logger() class FileStore(ABC): @@ -106,6 +117,14 @@ def read_file( file_record = get_pgfilestore_by_file_name( file_name=file_name, db_session=self.db_session ) + if file_record.lobj_oid is None: + # Row was written by an object-store backend — can't read it as a + # large object. Indicates FILE_STORE_TYPE was changed without + # migrating, or the wrong backend is active. + raise RuntimeError( + f"File '{file_name}' has no Postgres large object " + f"(object_key={file_record.object_key!r}); is FILE_STORE_TYPE correct?" + ) return read_lobj( lobj_oid=file_record.lobj_oid, db_session=self.db_session, @@ -125,7 +144,156 @@ def delete_file(self, file_name: str) -> None: file_record = get_pgfilestore_by_file_name( file_name=file_name, db_session=self.db_session ) - delete_lobj_by_id(file_record.lobj_oid, db_session=self.db_session) + if file_record.lobj_oid is not None: + delete_lobj_by_id(file_record.lobj_oid, db_session=self.db_session) + delete_pgfilestore_by_file_name( + file_name=file_name, db_session=self.db_session + ) + self.db_session.commit() + except Exception: + self.db_session.rollback() + raise + + +# --- Azure Blob backend ----------------------------------------------------- +# The azure SDK is an OPTIONAL dependency: file_store.py is imported app-wide, +# so we must NOT import azure at module load. It's lazily imported only when +# the Azure backend is actually constructed (and the package is present in the +# image). The container client is a process-wide lazy singleton. +_az_container_client: Any = None +_az_lock = threading.Lock() + + +def _parse_azure_conn_str(conn_str: str) -> dict[str, str]: + """Parse an Azure Storage connection string into its parts. Split on the + FIRST '=' per segment so values containing '=' (AccountKey ends with '==', + BlobEndpoint has '://') survive intact.""" + return dict(seg.split("=", 1) for seg in conn_str.split(";") if "=" in seg) + + +def _get_azure_container_client() -> Any: + global _az_container_client + if _az_container_client is None: + with _az_lock: + if _az_container_client is None: + if not AZURE_BLOB_CONNECTION_STRING: + raise RuntimeError( + "FILE_STORE_TYPE=AzureBlobFileStore but " + "AZURE_BLOB_CONNECTION_STRING is unset." + ) + # Lazy import — optional dependency (azure-storage-blob). + try: + from azure.storage.blob import BlobServiceClient # type: ignore + except ImportError as e: + raise RuntimeError( + "FILE_STORE_TYPE=AzureBlobFileStore requires the " + "azure-storage-blob package (it's in requirements; " + "rebuild the image or `pip install azure-storage-blob`)." + ) from e + + svc = BlobServiceClient.from_connection_string( + AZURE_BLOB_CONNECTION_STRING + ) + cc = svc.get_container_client(AZURE_BLOB_CONTAINER) + try: + cc.create_container() + except Exception: + # Already exists (or no create permission) — fine. + pass + _az_container_client = cc + return _az_container_client + + +class AzureBlobFileStore(FileStore): + """File store that keeps the BYTES in Azure Blob Storage and the METADATA + row in Postgres (``file_store`` table, ``object_key`` column). + + Why hybrid: the metadata is small and queryable, but the blob bytes are + what bloat Postgres and (via read_lobj) pin a DB connection for the whole + read. Moving only the bytes off-DB fixes both — reads stream straight from + Blob and never hold a Postgres connection. + + Reads fall back to the Postgres large object when a row hasn't been + migrated yet (``object_key is None`` but ``lobj_oid`` set), so the cutover + is graceful: flip FILE_STORE_TYPE, new files go to Blob, old files keep + working until the migration script moves them. + """ + + def __init__(self, db_session: Session): + self.db_session = db_session + + def save_file( + self, + file_name: str, + content: IO, + display_name: str | None, + file_origin: FileOrigin, + file_type: str, + file_metadata: dict | None = None, + ) -> None: + object_key = file_name # file_name is already the unique identifier + try: + # upload_blob streams `content` in chunks — no whole-file-in-memory. + _get_azure_container_client().upload_blob( + name=object_key, data=content, overwrite=True + ) + upsert_pgfilestore( + file_name=file_name, + display_name=display_name or file_name, + file_origin=file_origin, + file_type=file_type, + object_key=object_key, + lobj_oid=None, + db_session=self.db_session, + file_metadata=file_metadata, + ) + self.db_session.commit() + except Exception: + self.db_session.rollback() + raise + + def read_file( + self, file_name: str, mode: str | None = None, use_tempfile: bool = False + ) -> IO: + record = get_pgfilestore_by_file_name( + file_name=file_name, db_session=self.db_session + ) + if record.object_key is None: + # Not yet migrated — read from the legacy Postgres large object. + if record.lobj_oid is None: + raise RuntimeError( + f"File '{file_name}' has neither object_key nor lobj_oid." + ) + return read_lobj( + lobj_oid=record.lobj_oid, + db_session=self.db_session, + mode=mode, + use_tempfile=use_tempfile, + ) + + downloader = _get_azure_container_client().download_blob(record.object_key) + if use_tempfile: + temp_file: IO = SpooledTemporaryFile(max_size=MAX_IN_MEMORY_SIZE) + downloader.readinto(temp_file) + temp_file.seek(0) + return temp_file + return BytesIO(downloader.readall()) + + def delete_file(self, file_name: str) -> None: + try: + record = get_pgfilestore_by_file_name( + file_name=file_name, db_session=self.db_session + ) + if record.object_key is not None: + try: + _get_azure_container_client().delete_blob(record.object_key) + except Exception: + logger.error( + f"Failed to delete blob {record.object_key}; " + "removing the metadata row anyway." + ) + elif record.lobj_oid is not None: + delete_lobj_by_id(record.lobj_oid, db_session=self.db_session) delete_pgfilestore_by_file_name( file_name=file_name, db_session=self.db_session ) @@ -134,7 +302,99 @@ def delete_file(self, file_name: str) -> None: self.db_session.rollback() raise + def generate_upload_sas_url(self, file_name: str, expiry_minutes: int = 30) -> str: + """Mint a short-lived, write/create-scoped SAS URL so a client can PUT + bytes DIRECTLY to Blob (bypassing the server). Used by the chat + direct-upload flow. The blob is `file_name`; record the metadata row + afterward with :meth:`register_object`.""" + import datetime + + from azure.storage.blob import BlobSasPermissions # type: ignore + from azure.storage.blob import BlobServiceClient # type: ignore + from azure.storage.blob import generate_blob_sas # type: ignore + + # Let the SDK parse the connection string — it's authoritative about + # the blob endpoint (handles Azurite, custom endpoints, and key + # casing/ordering that a hand-rolled parse trips on). + try: + svc = BlobServiceClient.from_connection_string(AZURE_BLOB_CONNECTION_STRING) + except Exception as e: + # Surface a SAFE diagnostic (key NAMES + length only, never the + # secret value). The usual cause: the shell split the value on a + # ';' so the process only got the first segment. + keys = sorted(_parse_azure_conn_str(AZURE_BLOB_CONNECTION_STRING).keys()) + raise RuntimeError( + f"AZURE_BLOB_CONNECTION_STRING is malformed: the process received " + f"a value of length {len(AZURE_BLOB_CONNECTION_STRING)} with keys " + f"{keys}. If that's just ['DefaultEndpointsProtocol'] it was " + f"truncated at the first ';' — single-quote the value where it's " + f"set AND restart the api-server in that shell (it reads the env " + f"once at startup)." + ) from e + blob_endpoint = svc.url.rstrip("/") + + # Account-key connection string → mint a short-lived, scoped, per-blob + # SAS (write+create only, expiry below). This is the secure shape: the + # browser only ever gets a one-blob, minutes-long token — never a broad + # account/service SAS. Prefer the SDK-parsed credential (also handles + # `UseDevelopmentStorage=true`), fall back to a case-insensitive parse. + account_key = getattr(getattr(svc, "credential", None), "account_key", None) + if not account_key: + cfg = { + k.lower(): v + for k, v in _parse_azure_conn_str(AZURE_BLOB_CONNECTION_STRING).items() + } + account_key = cfg.get("accountkey") + if not account_key: + raise RuntimeError( + "AZURE_BLOB_CONNECTION_STRING has no AccountKey. Direct chat " + "uploads require the account-key connection string (Storage " + "account → Access keys → Connection string) so the server can " + "mint a scoped, short-lived per-blob upload SAS. SAS-token / " + "managed-identity connection strings aren't supported here." + ) + + sas = generate_blob_sas( + account_name=svc.account_name, + container_name=AZURE_BLOB_CONTAINER, + blob_name=file_name, + account_key=account_key, + permission=BlobSasPermissions(write=True, create=True), + expiry=datetime.datetime.utcnow() + + datetime.timedelta(minutes=expiry_minutes), + ) + return f"{blob_endpoint}/{AZURE_BLOB_CONTAINER}/{file_name}?{sas}" + + def register_object( + self, + file_name: str, + display_name: str | None, + file_origin: FileOrigin, + file_type: str, + file_metadata: dict | None = None, + ) -> None: + """Record the metadata row for a blob uploaded out-of-band (e.g. a + client direct-to-Blob SAS upload). Does NOT touch the bytes — they're + already in the container under `file_name`.""" + upsert_pgfilestore( + file_name=file_name, + display_name=display_name or file_name, + file_origin=file_origin, + file_type=file_type, + object_key=file_name, + lobj_oid=None, + db_session=self.db_session, + file_metadata=file_metadata, + commit=True, + ) + def get_default_file_store(db_session: Session) -> FileStore: - # The only supported file store now is the Postgres File Store + """Resolve the configured file-store backend (FILE_STORE_TYPE). + + Default is Postgres large objects. AzureBlobFileStore offloads the bytes + to Azure Blob Storage (metadata stays in Postgres) — opt-in per env. + """ + if FILE_STORE_TYPE == AzureBlobFileStore.__name__: + return AzureBlobFileStore(db_session=db_session) return PostgresBackedFileStore(db_session=db_session) diff --git a/backend/danswer/indexing/indexing_pipeline.py b/backend/danswer/indexing/indexing_pipeline.py index 2506b4715ac..98b4f70e85e 100644 --- a/backend/danswer/indexing/indexing_pipeline.py +++ b/backend/danswer/indexing/indexing_pipeline.py @@ -91,20 +91,39 @@ def upsert_documents_in_db( def get_doc_ids_to_update( documents: list[Document], db_docs: list[DBDocument] ) -> list[Document]: - """Figures out which documents actually need to be updated. If a document is already present - and the `updated_at` hasn't changed, we shouldn't need to do anything with it.""" - id_update_time_map = { - doc.id: doc.doc_updated_at for doc in db_docs if doc.doc_updated_at - } + """Figures out which documents actually need to be (re)indexed. + + Two skip conditions, checked per already-present document: + + 1. Content-hash match: if the stored `indexed_content_hash` equals the + document's current content hash, the indexed representation is identical + and we skip — even if `doc_updated_at` advanced. This is the important + one for sources that bump their modified-timestamp without changing + content (e.g. Salesforce LastModifiedDate churn re-pulling the whole + corpus every poll). Benefits ALL connectors, not just Salesforce. + 2. Timestamp fallback: for rows with no stored hash yet (indexed before + this existed), keep the original behavior — skip if `doc_updated_at` + isn't newer than what's stored. + """ + id_to_db_doc = {doc.id: doc for doc in db_docs} updatable_docs: list[Document] = [] for doc in documents: - if ( - doc.id in id_update_time_map - and doc.doc_updated_at - and doc.doc_updated_at <= id_update_time_map[doc.id] - ): - continue + db_doc = id_to_db_doc.get(doc.id) + if db_doc is not None: + # (1) content unchanged — skip regardless of timestamp + if ( + db_doc.indexed_content_hash is not None + and db_doc.indexed_content_hash == doc.get_content_hash() + ): + continue + # (2) fallback: no newer content per the source timestamp + if ( + doc.doc_updated_at is not None + and db_doc.doc_updated_at is not None + and doc.doc_updated_at <= db_doc.doc_updated_at + ): + continue updatable_docs.append(doc) return updatable_docs @@ -140,6 +159,19 @@ def index_doc_batch( ) updatable_ids = [doc.id for doc in updatable_docs] + # Visibility into the content-hash / timestamp skip: how many docs in this + # batch were unchanged and therefore skip the expensive embed + Vespa + # clear-and-rewrite. Aggregated across an attempt's batches this confirms, + # in prod logs, that a churny source (e.g. Salesforce LastModifiedDate) + # is no longer re-indexing unchanged records. Only logged when >0 to keep + # steady-state logs quiet. + num_skipped = len(documents) - len(updatable_docs) + if num_skipped: + logger.info( + f"Skipping {num_skipped}/{len(documents)} documents in batch " + "(unchanged since last successful index — no re-embed / re-index)." + ) + # Create records in the source of truth about these documents, # does not include doc_updated_at which is also used to indicate a successful update upsert_documents_in_db( @@ -203,15 +235,25 @@ def index_doc_batch( doc for doc in updatable_docs if doc.id in successful_doc_ids ] - # Update the time of latest version of the doc successfully indexed + # Record post-success state: the latest updated-at (skip docs that + # don't carry one) AND the content hash (for every successful doc, so a + # later run can skip re-indexing it if its content is unchanged). The + # hash is stored only here — after a confirmed Vespa write — so it + # always reflects what's actually in the index. ids_to_new_updated_at = {} for doc in successful_docs: if doc.doc_updated_at is None: continue ids_to_new_updated_at[doc.id] = doc.doc_updated_at + ids_to_new_content_hash = { + doc.id: doc.get_content_hash() for doc in successful_docs + } + update_docs_updated_at( - ids_to_new_updated_at=ids_to_new_updated_at, db_session=db_session + ids_to_new_updated_at=ids_to_new_updated_at, + ids_to_new_content_hash=ids_to_new_content_hash, + db_session=db_session, ) return len([r for r in insertion_records if r.already_existed is False]), len( diff --git a/backend/danswer/redis/__init__.py b/backend/danswer/redis/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/backend/danswer/redis/redis_pool.py b/backend/danswer/redis/redis_pool.py new file mode 100644 index 00000000000..567bb0a5160 --- /dev/null +++ b/backend/danswer/redis/redis_pool.py @@ -0,0 +1,110 @@ +"""Process-local Redis client and connection pool. + +This fork is single-tenant, so there is no per-tenant key prefixing +(upstream Onyx's ``TenantRedisClient`` is intentionally not ported). +Instead, all keys written by this codebase must namespace themselves +under :data:`DANSWER_REDIS_KEY_PREFIX` so a shared Redis (or a +later multi-app deployment) does not collide. + +The pool is lazily built on first use and reused for the life of the +process. The :func:`get_redis_client` helper hands out a thin ``Redis`` +wrapper around the shared pool — cheap to call repeatedly, no need to +cache the result at call sites. + +Errors are NOT swallowed here. Callers that want to fail open (e.g. the +KV cache layer, the rate limiter) wrap their own try/except — that +choice belongs to the caller, not the connection helper. +""" +from __future__ import annotations + +import threading +from typing import Any + +import redis +from redis import ConnectionPool +from redis import Redis + +from danswer.configs.app_configs import REDIS_DB_NUMBER +from danswer.configs.app_configs import REDIS_HEALTH_CHECK_INTERVAL +from danswer.configs.app_configs import REDIS_HOST +from danswer.configs.app_configs import REDIS_PASSWORD +from danswer.configs.app_configs import REDIS_POOL_MAX_CONNECTIONS +from danswer.configs.app_configs import REDIS_PORT +from danswer.configs.app_configs import REDIS_SOCKET_TIMEOUT_SECONDS +from danswer.configs.app_configs import REDIS_SSL +from danswer.utils.logger import setup_logger + + +logger = setup_logger() + +# Every key written by this codebase MUST start with this prefix. Sub-modules +# append their own namespace (e.g. ``DANSWER_REDIS_KEY_PREFIX + "kv:"`` in the +# KV cache). Keeping the namespace centralised here avoids the +# "two callers picked the same key by accident" footgun. +DANSWER_REDIS_KEY_PREFIX = "danswer:" + + +_pool: ConnectionPool | None = None +_pool_lock = threading.Lock() + + +def _build_pool() -> ConnectionPool: + """Construct the connection pool from the current env-driven config. + + Kept private so callers can't accidentally instantiate parallel pools. + """ + kwargs: dict[str, Any] = { + "host": REDIS_HOST, + "port": REDIS_PORT, + "db": REDIS_DB_NUMBER, + "max_connections": REDIS_POOL_MAX_CONNECTIONS, + "health_check_interval": REDIS_HEALTH_CHECK_INTERVAL, + "socket_timeout": REDIS_SOCKET_TIMEOUT_SECONDS, + "socket_connect_timeout": REDIS_SOCKET_TIMEOUT_SECONDS, + "socket_keepalive": True, + "retry_on_timeout": True, + # We store JSON / counters as bytes; consumers decode as needed. + # decode_responses=False keeps us out of accidental str/bytes mixups. + "decode_responses": False, + } + if REDIS_PASSWORD: + kwargs["password"] = REDIS_PASSWORD + if REDIS_SSL: + # SSLConnection picks up REDIS_SSL_CA_CERTS / REDIS_SSL_CERT_REQS + # from env via the redis-py default — extend here if needed. + kwargs["connection_class"] = redis.SSLConnection + + logger.info( + "Building Redis ConnectionPool host=%s port=%s db=%s ssl=%s max=%s", + REDIS_HOST, + REDIS_PORT, + REDIS_DB_NUMBER, + REDIS_SSL, + REDIS_POOL_MAX_CONNECTIONS, + ) + return ConnectionPool(**kwargs) + + +def get_redis_client() -> Redis: + """Return a thin Redis client backed by the shared, lazily-built pool. + + Safe to call from any thread; uses double-checked locking so the pool + is constructed exactly once per process. + """ + global _pool + if _pool is None: + with _pool_lock: + if _pool is None: + _pool = _build_pool() + return Redis(connection_pool=_pool) + + +def reset_pool_for_tests() -> None: + """Drop the cached pool so the next ``get_redis_client`` rebuilds it. + + Tests only — never call this in production code. Lets a test mutate + env vars (host/port/etc.) and observe the effect on the next call. + """ + global _pool + with _pool_lock: + _pool = None diff --git a/backend/danswer/server/analytics/api.py b/backend/danswer/server/analytics/api.py index dbd773a5a25..31a7fd8086e 100644 --- a/backend/danswer/server/analytics/api.py +++ b/backend/danswer/server/analytics/api.py @@ -21,8 +21,12 @@ import danswer.db.models as db_models from danswer.auth.users import current_admin_user from danswer.db.analytics import fetch_docs_per_source +from danswer.db.analytics import fetch_document_set_usage +from danswer.db.analytics import fetch_per_user_chat_stats +from danswer.db.analytics import fetch_persona_usage from danswer.db.analytics import fetch_slack_bot_channel_stats from danswer.db.analytics import fetch_total_docs_indexed +from danswer.db.analytics import fetch_user_adoption from danswer.db.analytics_rollup import fetch_danswerbot_analytics_from_rollup from danswer.db.analytics_rollup import fetch_query_analytics_from_rollup from danswer.db.analytics_rollup import fetch_user_analytics_from_rollup @@ -106,6 +110,152 @@ def get_user_analytics( ] +class UserAdoptionResponse(BaseModel): + # Users who first used chat on this date. + new_users: int + # Running total of distinct users who had ever used chat as of this date. + cumulative_users: int + date: datetime.date + + +@router.get("/admin/user-adoption") +def get_user_adoption_analytics( + start: datetime.datetime | None = None, + end: datetime.datetime | None = None, + _: db_models.User | None = Depends(current_admin_user), + db_session: Session = Depends(get_session), +) -> list[UserAdoptionResponse]: + """Chat adoption curve: new + cumulative distinct users per day, served + from the durable `analytics_user_first_seen` table (survives chat + retention).""" + rows = fetch_user_adoption( + start=start or (datetime.datetime.utcnow() - datetime.timedelta(days=90)), + end=end or datetime.datetime.utcnow(), + db_session=db_session, + ) + return [ + UserAdoptionResponse( + new_users=new_users, cumulative_users=cumulative_users, date=date + ) + for date, new_users, cumulative_users in rows + ] + + +class PerUserChatStatsResponse(BaseModel): + user_id: str + email: str + total_messages: int + total_likes: int + total_dislikes: int + last_active: datetime.date + + +@router.get("/admin/per-user") +def get_per_user_analytics( + start: datetime.datetime | None = None, + end: datetime.datetime | None = None, + limit: int = 100, + _: db_models.User | None = Depends(current_admin_user), + db_session: Session = Depends(get_session), +) -> list[PerUserChatStatsResponse]: + """Top users by message volume over the range, from the durable + analytics_user_daily_stats aggregate — spans full history (survives + chat retention).""" + rows = fetch_per_user_chat_stats( + start=start or (datetime.datetime.utcnow() - datetime.timedelta(days=90)), + end=end or datetime.datetime.utcnow(), + db_session=db_session, + limit=limit, + ) + return [ + PerUserChatStatsResponse( + user_id=str(user_id), + email=email, + total_messages=int(total_messages), + total_likes=int(total_likes), + total_dislikes=int(total_dislikes), + last_active=last_active, + ) + for user_id, email, total_messages, total_likes, total_dislikes, last_active in rows + ] + + +class PersonaUsageResponse(BaseModel): + persona_id: int + name: str + sessions: int + messages: int + likes: int + dislikes: int + last_active: datetime.date + + +@router.get("/admin/persona-usage") +def get_persona_usage_analytics( + start: datetime.datetime | None = None, + end: datetime.datetime | None = None, + limit: int = 100, + _: db_models.User | None = Depends(current_admin_user), + db_session: Session = Depends(get_session), +) -> list[PersonaUsageResponse]: + """Most-used assistants over the range, from the durable + analytics_persona_daily_stats aggregate (spans full history).""" + rows = fetch_persona_usage( + start=start or (datetime.datetime.utcnow() - datetime.timedelta(days=90)), + end=end or datetime.datetime.utcnow(), + db_session=db_session, + limit=limit, + ) + return [ + PersonaUsageResponse( + persona_id=persona_id, + name=name, + sessions=int(sessions), + messages=int(messages), + likes=int(likes), + dislikes=int(dislikes), + last_active=last_active, + ) + for persona_id, name, sessions, messages, likes, dislikes, last_active in rows + ] + + +class DocumentSetUsageResponse(BaseModel): + document_set_id: int + name: str + # APPROXIMATE: assistant message volume attributed to every document set + # attached to the assistant (see fetch_document_set_usage). Not a + # per-query retrieval count. + attributed_messages: int + + +@router.get("/admin/document-set-usage") +def get_document_set_usage_analytics( + start: datetime.datetime | None = None, + end: datetime.datetime | None = None, + limit: int = 100, + _: db_models.User | None = Depends(current_admin_user), + db_session: Session = Depends(get_session), +) -> list[DocumentSetUsageResponse]: + """Approximate datasets-in-use over the range, derived from assistant + usage × current persona→document-set attachments (see the db function's + caveats).""" + rows = fetch_document_set_usage( + start=start or (datetime.datetime.utcnow() - datetime.timedelta(days=90)), + end=end or datetime.datetime.utcnow(), + db_session=db_session, + limit=limit, + ) + return [ + DocumentSetUsageResponse( + document_set_id=document_set_id, + name=name, + attributed_messages=int(attributed_messages), + ) + for document_set_id, name, attributed_messages in rows + ] + + class DanswerbotAnalyticsResponse(BaseModel): total_queries: int auto_resolved: int diff --git a/backend/danswer/server/documents/cc_pair.py b/backend/danswer/server/documents/cc_pair.py index 2a67efe2b56..735d51fb85e 100644 --- a/backend/danswer/server/documents/cc_pair.py +++ b/backend/danswer/server/documents/cc_pair.py @@ -14,11 +14,14 @@ from danswer.db.connector_credential_pair import remove_credential_from_connector from danswer.db.document import get_document_cnts_for_cc_pairs from danswer.db.engine import get_session +from danswer.db.index_attempt import count_index_attempts_for_cc_pair from danswer.db.index_attempt import get_index_attempts_for_cc_pair +from danswer.db.index_attempt import get_paginated_index_attempts_for_cc_pair from danswer.db.models import User from danswer.server.documents.models import CCPairFullInfo from danswer.server.documents.models import ConnectorCredentialPairIdentifier from danswer.server.documents.models import ConnectorCredentialPairMetadata +from danswer.server.documents.models import PaginatedIndexAttempts from danswer.server.models import StatusResponse router = APIRouter(prefix="/manage", dependencies=[Depends(validate_api_key)]) @@ -45,7 +48,15 @@ def get_cc_pair_full_info( credential_id=cc_pair.credential_id, ) - index_attempts = get_index_attempts_for_cc_pair( + # Only the latest attempt + a count are needed for the detail page; the + # full history is served (paginated) by the endpoint below. + latest_index_attempts = get_index_attempts_for_cc_pair( + db_session=db_session, + cc_pair_identifier=cc_pair_identifier, + limit=1, + ) + latest_index_attempt = latest_index_attempts[0] if latest_index_attempts else None + num_index_attempts = count_index_attempts_for_cc_pair( db_session=db_session, cc_pair_identifier=cc_pair_identifier, ) @@ -68,12 +79,59 @@ def get_cc_pair_full_info( return CCPairFullInfo.from_models( cc_pair_model=cc_pair, - index_attempt_models=list(index_attempts), + latest_index_attempt=latest_index_attempt, + num_index_attempts=num_index_attempts, latest_deletion_attempt=latest_deletion_attempt, num_docs_indexed=documents_indexed, ) +@router.get("/admin/cc-pair/{cc_pair_id}/index-attempts") +def get_cc_pair_index_attempts( + cc_pair_id: int, + page: int = 0, + page_size: int = 10, + _: User | None = Depends(current_admin_user), + db_session: Session = Depends(get_session), +) -> PaginatedIndexAttempts: + cc_pair = get_connector_credential_pair_from_id( + cc_pair_id=cc_pair_id, + db_session=db_session, + ) + if cc_pair is None: + raise HTTPException( + status_code=400, + detail=f"Connector with ID {cc_pair_id} not found. Has it been deleted?", + ) + + page = max(page, 0) + page_size = min(max(page_size, 1), 100) # clamp to a sane range + + cc_pair_identifier = ConnectorCredentialPairIdentifier( + connector_id=cc_pair.connector_id, + credential_id=cc_pair.credential_id, + ) + + total_count = count_index_attempts_for_cc_pair( + db_session=db_session, + cc_pair_identifier=cc_pair_identifier, + ) + index_attempts = get_paginated_index_attempts_for_cc_pair( + db_session=db_session, + cc_pair_identifier=cc_pair_identifier, + page=page, + page_size=page_size, + ) + total_pages = max((total_count + page_size - 1) // page_size, 1) + + return PaginatedIndexAttempts.from_models( + index_attempt_models=list(index_attempts), + page=page, + total_pages=total_pages, + total_count=total_count, + ) + + class CCPairRenameRequest(BaseModel): name: str diff --git a/backend/danswer/server/documents/connector.py b/backend/danswer/server/documents/connector.py index 799083eaaa9..03e35cdb604 100644 --- a/backend/danswer/server/documents/connector.py +++ b/backend/danswer/server/documents/connector.py @@ -1,3 +1,4 @@ +import json import os import uuid from typing import cast @@ -15,6 +16,8 @@ from danswer.auth.users import current_admin_user from danswer.auth.users import current_user from danswer.background.task_utils import name_cc_cleanup_task +from danswer.configs.app_configs import CC_PAIR_INFO_CACHE_ENABLED +from danswer.configs.app_configs import CC_PAIR_INFO_CACHE_TTL_SECONDS from danswer.configs.app_configs import ENABLED_CONNECTOR_TYPES from danswer.configs.constants import DocumentSource from danswer.configs.constants import FileOrigin @@ -71,6 +74,8 @@ from danswer.db.tasks import get_latest_tasks_by_names from danswer.dynamic_configs.interface import ConfigNotFoundError from danswer.file_store.file_store import get_default_file_store +from danswer.redis.redis_pool import DANSWER_REDIS_KEY_PREFIX +from danswer.redis.redis_pool import get_redis_client from danswer.server.documents.models import AuthStatus from danswer.server.documents.models import AuthUrl from danswer.server.documents.models import ConnectorBase @@ -90,11 +95,14 @@ from danswer.server.documents.models import RunConnectorRequest from danswer.server.documents.models import UpdateIndexAttemptPriorityRequest from danswer.server.models import StatusResponse +from danswer.utils.logger import setup_logger _GMAIL_CREDENTIAL_ID_COOKIE_NAME = "gmail_credential_id" _GOOGLE_DRIVE_CREDENTIAL_ID_COOKIE_NAME = "google_drive_credential_id" +logger = setup_logger() + router = APIRouter(prefix="/manage", dependencies=[Depends(validate_api_key)]) @@ -670,6 +678,9 @@ def connector_run_once( ), only_current=True, disinclude_finished=True, + # Used only for truthiness ("any unfinished attempt?"); one row is + # enough — don't materialize the full set just to test existence. + limit=1, db_session=db_session, ) ] @@ -869,12 +880,16 @@ class BasicCCPairInfo(BaseModel): source: DocumentSource -@router.get("/indexing-status") -def get_basic_connector_indexing_status( - _: User = Depends(current_user), - db_session: Session = Depends(get_session), -) -> list[BasicCCPairInfo]: - cc_pairs = get_connector_credential_pairs(db_session) +_CC_PAIR_INFO_CACHE_KEY = DANSWER_REDIS_KEY_PREFIX + "cc_pair_basic_info" + + +def _build_basic_cc_pair_info(db_session: Session) -> list[BasicCCPairInfo]: + # eager_load_connector: the return comprehension reads + # `cc_pair.connector.source` for every cc-pair. Without eager loading + # that's an N+1 (one query per cc-pair) — at ~hundreds of cc-pairs + # against a remote Postgres that was seconds; eager loading collapses + # it to a couple of queries. + cc_pairs = get_connector_credential_pairs(db_session, eager_load_connector=True) cc_pair_identifiers = [ ConnectorCredentialPairIdentifier( connector_id=cc_pair.connector_id, credential_id=cc_pair.credential_id @@ -901,3 +916,43 @@ def get_basic_connector_indexing_status( for cc_pair in cc_pairs if cc_pair.connector.source != DocumentSource.INGESTION_API ] + + +@router.get("/indexing-status") +def get_basic_connector_indexing_status( + _: User = Depends(current_user), + db_session: Session = Depends(get_session), +) -> list[BasicCCPairInfo]: + # This is the chat page's slowest fan-out call: the per-cc-pair + # document-count aggregation in _build_basic_cc_pair_info measured + # ~300ms on the live DB and runs on every chat page load. The result + # is identical for all users and changes slowly (only when connectors + # are added/removed or an indexing run completes), so we front it with + # a short-TTL global Redis cache. Fail-open: any Redis error falls + # straight through to a direct DB build. Default OFF + # (CC_PAIR_INFO_CACHE_ENABLED). + if not CC_PAIR_INFO_CACHE_ENABLED: + return _build_basic_cc_pair_info(db_session) + + try: + # decode_responses=False on the pool → bytes | None. The cast just + # collapses redis-py's sync/async overload union for mypy. + raw = cast("bytes | None", get_redis_client().get(_CC_PAIR_INFO_CACHE_KEY)) + except Exception as e: + logger.warning("cc-pair-info cache GET failed, using DB: %s", e) + raw = None + if raw is not None: + try: + return [BasicCCPairInfo(**d) for d in json.loads(raw)] + except Exception as e: + logger.warning("cc-pair-info cache entry corrupt, rebuilding: %s", e) + + result = _build_basic_cc_pair_info(db_session) + try: + payload = json.dumps([json.loads(item.json()) for item in result]) + get_redis_client().set( + _CC_PAIR_INFO_CACHE_KEY, payload, ex=CC_PAIR_INFO_CACHE_TTL_SECONDS + ) + except Exception as e: + logger.warning("cc-pair-info cache SET failed (DB result still served): %s", e) + return result diff --git a/backend/danswer/server/documents/models.py b/backend/danswer/server/documents/models.py index 9726958b350..a5337100043 100644 --- a/backend/danswer/server/documents/models.py +++ b/backend/danswer/server/documents/models.py @@ -131,14 +131,20 @@ class CCPairFullInfo(BaseModel): num_docs_indexed: int connector: ConnectorSnapshot credential: CredentialSnapshot - index_attempts: list[IndexAttemptSnapshot] + # The full index-attempt history is paginated via a dedicated endpoint + # (GET /admin/cc-pair/{id}/index-attempts) — embedding it here loaded a + # busy cc-pair's entire history (thousands of rows w/ full tracebacks) on + # every page view. The detail page only needs the latest attempt + a count. + latest_index_attempt: IndexAttemptSnapshot | None + num_index_attempts: int latest_deletion_attempt: DeletionAttemptSnapshot | None @classmethod def from_models( cls, cc_pair_model: ConnectorCredentialPair, - index_attempt_models: list[IndexAttempt], + latest_index_attempt: IndexAttempt | None, + num_index_attempts: int, latest_deletion_attempt: DeletionAttemptSnapshot | None, num_docs_indexed: int, # not ideal, but this must be computed separately ) -> "CCPairFullInfo": @@ -152,11 +158,38 @@ def from_models( credential=CredentialSnapshot.from_credential_db_model( cc_pair_model.credential ), + latest_index_attempt=( + IndexAttemptSnapshot.from_index_attempt_db_model(latest_index_attempt) + if latest_index_attempt is not None + else None + ), + num_index_attempts=num_index_attempts, + latest_deletion_attempt=latest_deletion_attempt, + ) + + +class PaginatedIndexAttempts(BaseModel): + index_attempts: list[IndexAttemptSnapshot] + page: int + total_pages: int + total_count: int + + @classmethod + def from_models( + cls, + index_attempt_models: list[IndexAttempt], + page: int, + total_pages: int, + total_count: int, + ) -> "PaginatedIndexAttempts": + return cls( index_attempts=[ - IndexAttemptSnapshot.from_index_attempt_db_model(index_attempt_model) - for index_attempt_model in index_attempt_models + IndexAttemptSnapshot.from_index_attempt_db_model(m) + for m in index_attempt_models ], - latest_deletion_attempt=latest_deletion_attempt, + page=page, + total_pages=total_pages, + total_count=total_count, ) diff --git a/backend/danswer/server/features/document_set/api.py b/backend/danswer/server/features/document_set/api.py index 3cdaf7b9c21..de49346f969 100644 --- a/backend/danswer/server/features/document_set/api.py +++ b/backend/danswer/server/features/document_set/api.py @@ -8,15 +8,12 @@ from danswer.auth.users import current_user from danswer.db.document_set import check_document_sets_are_public from danswer.db.document_set import fetch_all_document_sets -from danswer.db.document_set import fetch_user_document_sets from danswer.db.document_set import insert_document_set from danswer.db.document_set import mark_document_set_as_to_be_deleted from danswer.db.document_set import update_document_set +from danswer.db.document_set_cache import get_document_sets_for_user_cached from danswer.db.engine import get_session from danswer.db.models import User -from danswer.server.documents.models import ConnectorCredentialPairDescriptor -from danswer.server.documents.models import ConnectorSnapshot -from danswer.server.documents.models import CredentialSnapshot from danswer.server.features.document_set.models import CheckDocSetPublicRequest from danswer.server.features.document_set.models import CheckDocSetPublicResponse from danswer.server.features.document_set.models import DocumentSet @@ -92,35 +89,13 @@ def list_document_sets( user: User | None = Depends(current_user), db_session: Session = Depends(get_session), ) -> list[DocumentSet]: - document_set_info = fetch_user_document_sets( + # Read-through Redis cache (per user, fail-open, default OFF). On the + # chat-page bundle this fires on every load; the cache collapses a + # user's repeat loads to one DB build per TTL. The build logic lives in + # the cache module so cached/uncached paths stay identical. + return get_document_sets_for_user_cached( user_id=user.id if user else None, db_session=db_session ) - return [ - DocumentSet( - id=document_set_db_model.id, - name=document_set_db_model.name, - description=document_set_db_model.description, - contains_non_public=any([not cc_pair.is_public for cc_pair in cc_pairs]), - cc_pair_descriptors=[ - ConnectorCredentialPairDescriptor( - id=cc_pair.id, - name=cc_pair.name, - connector=ConnectorSnapshot.from_connector_db_model( - cc_pair.connector - ), - credential=CredentialSnapshot.from_credential_db_model( - cc_pair.credential - ), - ) - for cc_pair in cc_pairs - ], - is_up_to_date=document_set_db_model.is_up_to_date, - is_public=document_set_db_model.is_public, - users=[user.id for user in document_set_db_model.users], - groups=[group.id for group in document_set_db_model.groups], - ) - for document_set_db_model, cc_pairs in document_set_info - ] @router.get("/document-set-public") diff --git a/backend/danswer/server/features/persona/api.py b/backend/danswer/server/features/persona/api.py index cd7deb5321a..73f0cd6e3e8 100644 --- a/backend/danswer/server/features/persona/api.py +++ b/backend/danswer/server/features/persona/api.py @@ -18,6 +18,7 @@ from danswer.db.persona import update_all_personas_display_priority from danswer.db.persona import update_persona_shared_users from danswer.db.persona import update_persona_visibility +from danswer.db.persona_cache import get_personas_for_user_cached from danswer.llm.answering.prompts.utils import build_dummy_prompt from danswer.server.features.persona.models import CreatePersonaRequest from danswer.server.features.persona.models import PersonaSnapshot @@ -163,13 +164,15 @@ def list_personas( db_session: Session = Depends(get_session), include_deleted: bool = False, ) -> list[PersonaSnapshot]: + # Routes through the Redis-backed cache when PERSONA_CACHE_ENABLED; + # otherwise behaves exactly as before (direct DB read + serialize). + # The cache handles the include_deleted=True case by falling through. user_id = user.id if user is not None else None - return [ - PersonaSnapshot.from_model(persona) - for persona in get_personas( - user_id=user_id, include_deleted=include_deleted, db_session=db_session - ) - ] + return get_personas_for_user_cached( + user_id=user_id, + db_session=db_session, + include_deleted=include_deleted, + ) @basic_router.get("/{persona_id}") diff --git a/backend/danswer/server/middleware/__init__.py b/backend/danswer/server/middleware/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/backend/danswer/server/middleware/request_rate_limit.py b/backend/danswer/server/middleware/request_rate_limit.py new file mode 100644 index 00000000000..a4a07a75465 --- /dev/null +++ b/backend/danswer/server/middleware/request_rate_limit.py @@ -0,0 +1,179 @@ +"""Per-user request-rate limiter — Redis-backed, multi-window, fail-open. + +Why this exists, in one sentence: the existing +``danswer.server.query_and_chat.token_limit.check_token_rate_limits`` is a +**token-budget** limiter (sum of tokens over a window, DB-backed). It +caps cost, not request volume, and its in-process ``@lru_cache`` short- +circuit (``any_rate_limit_exists``) is per-pod, so two replicas can +disagree on whether limits are configured at all. This module is the +**request-rate** complement: a per-user (or per-IP for anonymous) cap on +the number of /send-message calls per minute / per hour, with Redis as +the shared counter so the cap holds across replicas. + +Design notes: + +* **Fixed-window buckets.** ``bucket = floor(time() / window)``. Simpler + and cheaper than sliding-window log; the trade-off is that a user + can burst up to ``2 * limit`` across a window boundary. Acceptable + for the protection target (abuse / runaway cost), not for strict SLA + enforcement. +* **Atomic ``INCR`` + ``EXPIRE NX``.** The expiry is set only on the + first increment of the bucket so the window boundary is preserved + across concurrent requests racing for the first slot. Without ``NX``, + every request would push the expiry forward and the bucket would + never reset. +* **Fail-open.** Any Redis error allows the request through with a log. + Refusing the chat path because the *rate limiter* is down is a worse + outcome than serving a few extra requests during a Redis blip. +* **Default OFF.** Even when Redis is up, the limiter does nothing + until ``REQUEST_RATE_LIMIT_ENABLED=true`` AND at least one window + limit (per-minute or per-hour) is > 0. This is a protection feature, + not an always-on guard. +* **Anonymous callers are keyed by IP** (X-Forwarded-For first hop, + falling back to the socket peer). If the IP can't be determined we + silently skip — no key, nothing to limit. +""" +from __future__ import annotations + +import time + +from fastapi import Depends +from fastapi import HTTPException +from fastapi import Request + +from danswer.auth.users import current_user +from danswer.configs.app_configs import REQUEST_RATE_LIMIT_ENABLED +from danswer.configs.app_configs import REQUEST_RATE_LIMIT_PER_HOUR +from danswer.configs.app_configs import REQUEST_RATE_LIMIT_PER_MINUTE +from danswer.db.models import User +from danswer.redis.redis_pool import DANSWER_REDIS_KEY_PREFIX +from danswer.redis.redis_pool import get_redis_client +from danswer.utils.logger import setup_logger + + +logger = setup_logger() + + +# All counters live under this prefix so a global FLUSHDB-by-prefix on +# this namespace is trivial in incident response. Sub-key shape: +# {actor}:{label}:{bucket} +# where actor is "u:" or "ip:", label is "min" or "hour", +# and bucket is floor(unix_seconds / window). +_KEY_PREFIX = DANSWER_REDIS_KEY_PREFIX + "ratelimit:msg:" + +_MIN_WINDOW_SECONDS = 60 +_HOUR_WINDOW_SECONDS = 3600 + + +def check_message_request_rate_limit( + request: Request, + user: User | None = Depends(current_user), +) -> None: + """FastAPI dependency that 429s a caller over their per-window cap. + + Cheap fast-path when disabled — no Redis call, no env reads beyond + the module-level constants. Safe to attach to every chat / query + endpoint; the cost when off is one tuple-truthy check. + """ + if not REQUEST_RATE_LIMIT_ENABLED: + return + if REQUEST_RATE_LIMIT_PER_MINUTE <= 0 and REQUEST_RATE_LIMIT_PER_HOUR <= 0: + # Nothing to enforce — saves the Redis round-trip when an + # operator enabled the flag but hasn't picked window values yet. + return + + actor = _actor_key(user, request) + if actor is None: + return # no key material; nothing we can fairly attribute + + # Order matters: enforce the tighter window first. If a user trips + # the per-minute cap we don't need to also increment per-hour for + # this request — but we do anyway so per-hour accounting stays + # honest across bursts that don't trip the minute window. + if REQUEST_RATE_LIMIT_PER_MINUTE > 0: + _enforce_window( + actor=actor, + label="min", + window_seconds=_MIN_WINDOW_SECONDS, + limit=REQUEST_RATE_LIMIT_PER_MINUTE, + ) + if REQUEST_RATE_LIMIT_PER_HOUR > 0: + _enforce_window( + actor=actor, + label="hour", + window_seconds=_HOUR_WINDOW_SECONDS, + limit=REQUEST_RATE_LIMIT_PER_HOUR, + ) + + +def _actor_key(user: User | None, request: Request) -> str | None: + """Identifier the limit is attributed to. + + Authenticated users are keyed by uuid (stable, survives IP changes). + Anonymous traffic falls back to the first X-Forwarded-For hop set + by the ingress; if nothing usable is present, we return None and + skip — better to under-enforce than to bucket everyone behind a + misconfigured proxy under the LB's own IP. + """ + if user is not None: + return f"u:{user.id}" + + xff = request.headers.get("x-forwarded-for", "") + if xff: + client_ip = xff.split(",", 1)[0].strip() + elif request.client is not None: + client_ip = request.client.host + else: + client_ip = "" + + if not client_ip: + return None + return f"ip:{client_ip}" + + +def _enforce_window(*, actor: str, label: str, window_seconds: int, limit: int) -> None: + """Increment-and-check one window for one actor. + + Raises ``HTTPException(429)`` if the post-increment count exceeds + ``limit``. The Retry-After header tells the caller exactly how long + until the current bucket rolls over — handy for clients that back + off intelligently. + """ + bucket = int(time.time() // window_seconds) + key = f"{_KEY_PREFIX}{actor}:{label}:{bucket}" + + try: + client = get_redis_client() + pipe = client.pipeline() + pipe.incr(key, 1) + # ``nx=True`` here means "set expiry only if no expiry yet" so + # the first increment of the bucket fixes the window boundary. + # Without it, every increment pushes expiry forward and the + # bucket never resets. + pipe.expire(key, window_seconds, nx=True) + result = pipe.execute() + count = int(result[0]) + except Exception as e: + # Fail-open: better to let a request through than to wedge the + # chat path because Redis is unhappy. Loud log so it's obvious + # in the dashboard, but no exception propagation. + logger.warning( + "Rate-limit check skipped due to Redis error (actor=%s window=%s): %s", + actor, + label, + e, + ) + return + + if count > limit: + # Seconds remaining in the current bucket — tells the caller + # when to retry without us needing to look up the TTL. + retry_after = window_seconds - (int(time.time()) % window_seconds) + raise HTTPException( + status_code=429, + detail=( + f"Request rate limit exceeded " + f"({limit} per {label}). Retry in {retry_after}s." + ), + headers={"Retry-After": str(retry_after)}, + ) diff --git a/backend/danswer/server/query_and_chat/chat_backend.py b/backend/danswer/server/query_and_chat/chat_backend.py index 4e5a1bb2138..c8eff35bfba 100644 --- a/backend/danswer/server/query_and_chat/chat_backend.py +++ b/backend/danswer/server/query_and_chat/chat_backend.py @@ -1,5 +1,6 @@ import io import uuid +from typing import cast from fastapi import APIRouter from fastapi import Depends @@ -15,6 +16,9 @@ from danswer.auth.users import current_user from danswer.chat.chat_utils import create_chat_chain from danswer.chat.process_message import stream_chat_message +from danswer.configs.app_configs import CHAT_FILE_MAX_SIZE_MB +from danswer.configs.app_configs import CHAT_FILE_MAX_TOKEN_FRACTION +from danswer.configs.app_configs import FILE_STORE_TYPE from danswer.configs.app_configs import WEB_DOMAIN from danswer.configs.constants import FileOrigin from danswer.configs.constants import MessageType @@ -37,6 +41,7 @@ from danswer.document_index.document_index_utils import get_both_index_names from danswer.document_index.factory import get_default_document_index from danswer.file_processing.extract_file_text import extract_file_text +from danswer.file_store.file_store import AzureBlobFileStore from danswer.file_store.file_store import get_default_file_store from danswer.file_store.models import ChatFileType from danswer.file_store.models import FileDescriptor @@ -47,9 +52,13 @@ from danswer.llm.factory import get_default_llms from danswer.llm.headers import get_litellm_additional_request_headers from danswer.llm.utils import get_default_llm_tokenizer +from danswer.llm.utils import get_max_input_tokens from danswer.secondary_llm_flows.chat_session_naming import ( get_renamed_conversation_name, ) +from danswer.server.middleware.request_rate_limit import ( + check_message_request_rate_limit, +) from danswer.server.query_and_chat.models import ChatFeedbackRequest from danswer.server.query_and_chat.models import ChatMessageIdentifier from danswer.server.query_and_chat.models import ChatRenameRequest @@ -74,6 +83,52 @@ # api_router = APIRouter(prefix="/chat", dependencies=[Depends(validate_api_key)]) +# --- Chat file-upload limits ---------------------------------------------- +# A chat-attached doc is stuffed WHOLE into the LLM prompt (no retrieval), so +# the real ceiling is the model context window. _reject_if_text_too_long is +# the meaningful guard; the byte cap is a cheap pre-filter. +def _max_chat_file_tokens() -> int: + """CHAT_FILE_MAX_TOKEN_FRACTION of the default LLM's max input tokens. + Falls back to a conservative default if the model map can't be resolved, + so a lookup failure never blocks uploads.""" + try: + llm, _ = get_default_llms() + max_input = get_max_input_tokens( + model_name=llm.config.model_name, + model_provider=llm.config.model_provider, + ) + except Exception: + max_input = 128_000 + return int(max_input * CHAT_FILE_MAX_TOKEN_FRACTION) + + +def _reject_if_file_too_large(size: int | None, filename: str | None) -> None: + if size and size > CHAT_FILE_MAX_SIZE_MB * 1024 * 1024: + raise HTTPException( + status_code=400, + detail=( + f"File '{filename or ''}' is {size // (1024 * 1024)}MB; the " + f"upload limit is {CHAT_FILE_MAX_SIZE_MB}MB." + ), + ) + + +def _reject_if_text_too_long(text: str, filename: str | None) -> None: + n_tokens = len(get_default_llm_tokenizer().encode(text)) + budget = _max_chat_file_tokens() + if n_tokens > budget: + raise HTTPException( + status_code=400, + detail=( + f"Document '{filename or ''}' is too large to chat with " + f"(~{n_tokens:,} tokens; limit {budget:,}). The whole document " + "is sent to the model, so it must fit the context window — " + "upload a smaller excerpt, or add it as a connector to search " + "over it instead." + ), + ) + + @router.get("/get-user-chat-sessions") def get_user_chat_sessions( user: User | None = Depends(current_user), @@ -278,6 +333,10 @@ def handle_new_chat_message( chat_message_req: CreateChatMessageRequest, request: Request, user: User | None = Depends(current_user), + # Request-rate cap (Redis-backed, default off) runs BEFORE the + # token-budget check — cheap fast-path means a 429'd caller never + # touches the DB-backed token-usage query. + _rate_limit: None = Depends(check_message_request_rate_limit), _: None = Depends(check_token_rate_limits), ) -> StreamingResponse: """This endpoint is both used for all the following purposes: @@ -515,6 +574,10 @@ def upload_files_for_chat( ) raise HTTPException(status_code=400, detail=error_detail) + # Byte cap, all types (cheap pre-filter; the token gate below is the + # real protection for text/docs). + _reject_if_file_too_large(file.size, file.filename) + if ( file.content_type in image_content_types and file.size @@ -550,6 +613,13 @@ def upload_files_for_chat( # to re-extract it every time we send a message if file_type == ChatFileType.DOC: extracted_text = extract_file_text(file_name=file.filename, file=file.file) + # Token gate: the extracted text gets stuffed whole into the prompt. + # Reject (and drop the just-stored raw file) if it can't fit. + try: + _reject_if_text_too_long(extracted_text, file.filename) + except HTTPException: + file_store.delete_file(file_id) + raise text_file_id = str(uuid.uuid4()) file_store.save_file( file_name=text_file_id, @@ -563,6 +633,17 @@ def upload_files_for_chat( # message file_info.append((text_file_id, file.filename, ChatFileType.PLAIN_TEXT)) else: + if file_type == ChatFileType.PLAIN_TEXT: + # Plain text is stuffed as-is — token-gate it too (read the + # just-stored copy back so we don't depend on stream position). + raw = file_store.read_file(file_id, mode="b", use_tempfile=True) + try: + _reject_if_text_too_long( + raw.read().decode("utf-8", errors="ignore"), file.filename + ) + except HTTPException: + file_store.delete_file(file_id) + raise file_info.append((file_id, file.filename, file_type)) return { @@ -573,6 +654,147 @@ def upload_files_for_chat( } +# --- Direct-to-Blob upload (SAS) ------------------------------------------- +# Lets the browser PUT files straight to Azure Blob, bypassing the server +# (faster + offloads api-server bandwidth). Two steps: mint a SAS URL, then +# confirm so the server records metadata (+ extracts text for docs). Only +# active when the Azure file store is configured; otherwise the client falls +# back to the two-hop POST /chat/file above. + +_IMAGE_CONTENT_TYPES = {"image/jpeg", "image/png", "image/webp"} +_DOCUMENT_CONTENT_TYPES = { + "application/pdf", + "application/vnd.openxmlformats-officedocument.wordprocessingml.document", + "application/vnd.openxmlformats-officedocument.presentationml.presentation", + "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", + "message/rfc822", + "application/epub+zip", +} + + +def _chat_file_type_for(content_type: str | None) -> ChatFileType: + if content_type in _IMAGE_CONTENT_TYPES: + return ChatFileType.IMAGE + if content_type in _DOCUMENT_CONTENT_TYPES: + return ChatFileType.DOC + return ChatFileType.PLAIN_TEXT + + +class ChatFileUploadUrlItem(BaseModel): + name: str + content_type: str | None = None + # Client-reported size — byte-gated here; the authoritative content gate + # is the token check at /file/confirm (after extraction). + size: int | None = None + + +class ChatFileUploadUrlRequest(BaseModel): + files: list[ChatFileUploadUrlItem] + + +class ChatFileUploadUrlResponseItem(BaseModel): + file_id: str + upload_url: str + content_type: str | None = None + + +class ChatFileUploadUrlResponse(BaseModel): + # False → the active file store can't do direct uploads (e.g. Postgres); + # the client should fall back to the two-hop POST /chat/file. + direct_upload: bool + files: list[ChatFileUploadUrlResponseItem] = [] + + +@router.post("/file/upload-url") +def get_chat_file_upload_urls( + req: ChatFileUploadUrlRequest, + db_session: Session = Depends(get_session), + _: User | None = Depends(current_user), +) -> ChatFileUploadUrlResponse: + """Mint short-lived SAS URLs so the client PUTs files DIRECTLY to Blob.""" + if FILE_STORE_TYPE != AzureBlobFileStore.__name__: + return ChatFileUploadUrlResponse(direct_upload=False) + store = cast(AzureBlobFileStore, get_default_file_store(db_session)) + items: list[ChatFileUploadUrlResponseItem] = [] + for f in req.files: + _reject_if_file_too_large(f.size, f.name) + file_id = str(uuid.uuid4()) + items.append( + ChatFileUploadUrlResponseItem( + file_id=file_id, + upload_url=store.generate_upload_sas_url(file_id), + content_type=f.content_type, + ) + ) + return ChatFileUploadUrlResponse(direct_upload=True, files=items) + + +class ChatFileConfirmItem(BaseModel): + file_id: str + name: str | None = None + content_type: str | None = None + + +class ChatFileConfirmRequest(BaseModel): + files: list[ChatFileConfirmItem] + + +@router.post("/file/confirm") +def confirm_chat_file_uploads( + req: ChatFileConfirmRequest, + db_session: Session = Depends(get_session), + _: User | None = Depends(current_user), +) -> dict[str, list[FileDescriptor]]: + """After the client direct-uploads to Blob, record each file's metadata + row and (for docs) extract text server-side — mirrors the tail of + upload_files_for_chat. Returns the FileDescriptors to attach to a message.""" + store = cast(AzureBlobFileStore, get_default_file_store(db_session)) + file_info: list[tuple[str, str | None, ChatFileType]] = [] + for f in req.files: + file_type = _chat_file_type_for(f.content_type) + store.register_object( + file_name=f.file_id, + display_name=f.name, + file_origin=FileOrigin.CHAT_UPLOAD, + file_type=f.content_type or file_type.value, + ) + if file_type == ChatFileType.DOC: + raw = store.read_file(f.file_id, mode="b", use_tempfile=True) + extracted_text = extract_file_text(file_name=f.name, file=raw) + # Token gate (stuffed whole into the prompt). On reject, drop the + # orphan blob the client already uploaded. + try: + _reject_if_text_too_long(extracted_text, f.name) + except HTTPException: + store.delete_file(f.file_id) + raise + text_file_id = str(uuid.uuid4()) + store.save_file( + file_name=text_file_id, + content=io.BytesIO(extracted_text.encode()), + display_name=f.name, + file_origin=FileOrigin.CHAT_UPLOAD, + file_type="text/plain", + ) + file_info.append((text_file_id, f.name, ChatFileType.PLAIN_TEXT)) + else: + if file_type == ChatFileType.PLAIN_TEXT: + raw = store.read_file(f.file_id, mode="b", use_tempfile=True) + try: + _reject_if_text_too_long( + raw.read().decode("utf-8", errors="ignore"), f.name + ) + except HTTPException: + store.delete_file(f.file_id) + raise + file_info.append((f.file_id, f.name, file_type)) + return { + "files": [ + {"id": fid, "type": ftype, "name": fname} for fid, fname, ftype in file_info + ] + } + + @router.get("/file/{file_id:path}") def fetch_chat_file( file_id: str, diff --git a/backend/danswer/server/query_and_chat/query_backend.py b/backend/danswer/server/query_and_chat/query_backend.py index ff632e0613a..57f49e143b6 100644 --- a/backend/danswer/server/query_and_chat/query_backend.py +++ b/backend/danswer/server/query_and_chat/query_backend.py @@ -1,6 +1,7 @@ from fastapi import APIRouter from fastapi import Depends from fastapi import HTTPException +from fastapi import Request from fastapi.responses import StreamingResponse from sqlalchemy.orm import Session @@ -23,6 +24,9 @@ from danswer.search.utils import chunks_or_sections_to_search_docs from danswer.secondary_llm_flows.query_validation import get_query_answerability from danswer.secondary_llm_flows.query_validation import stream_query_answerability +from danswer.server.middleware.request_rate_limit import ( + check_message_request_rate_limit, +) from danswer.server.query_and_chat.models import AdminSearchRequest from danswer.server.query_and_chat.models import AdminSearchResponse from danswer.server.query_and_chat.models import HelperResponse @@ -89,6 +93,9 @@ def get_tags( # If this is empty or None, then tags for all sources are considered sources: list[DocumentSource] | None = None, allow_prefix: bool = True, # This is currently the only option + # Optional cap on tags returned. Default None preserves the existing + # unbounded behavior; a client can pass a limit to bound the response. + limit: int | None = None, _: User = Depends(current_user), db_session: Session = Depends(get_session), ) -> TagResponse: @@ -98,6 +105,7 @@ def get_tags( db_tags = get_tags_by_value_prefix_for_source_types( tag_value_prefix=match_pattern, sources=sources, + limit=limit, db_session=db_session, ) server_tags = [ @@ -150,7 +158,11 @@ def stream_query_validation( @basic_router.post("/stream-answer-with-quote") def get_answer_with_quote( query_request: DirectQARequest, + request: Request, user: User = Depends(current_user), + # Mirrors /chat/send-message: request-rate cap first (cheap when + # off), token-budget check second. + _rate_limit: None = Depends(check_message_request_rate_limit), _: None = Depends(check_token_rate_limits), ) -> StreamingResponse: query = query_request.messages[0].message diff --git a/backend/danswer/server/settings/models.py b/backend/danswer/server/settings/models.py index 3f00eb85794..15e3d86b4ac 100644 --- a/backend/danswer/server/settings/models.py +++ b/backend/danswer/server/settings/models.py @@ -18,6 +18,10 @@ class Settings(BaseModel): # stored value wins, so flip it in Admin → Settings on existing deployments. default_page: PageType = PageType.CHAT maximum_chat_retention_days: int | None = None + # Env-driven (CHAT_FILE_MAX_SIZE_MB), injected in load_settings — surfaced + # here so the chat UI pre-checks against the SAME value the backend enforces + # instead of a hardcoded duplicate. + chat_file_max_size_mb: int = 25 def check_validity(self) -> None: chat_page_enabled = self.chat_page_enabled diff --git a/backend/danswer/server/settings/store.py b/backend/danswer/server/settings/store.py index ead1e3652a9..29293afaab8 100644 --- a/backend/danswer/server/settings/store.py +++ b/backend/danswer/server/settings/store.py @@ -1,5 +1,6 @@ from typing import cast +from danswer.configs.app_configs import CHAT_FILE_MAX_SIZE_MB from danswer.dynamic_configs.factory import get_dynamic_config_store from danswer.dynamic_configs.interface import ConfigNotFoundError from danswer.server.settings.models import Settings @@ -16,6 +17,10 @@ def load_settings() -> Settings: settings = Settings() dynamic_config_store.store(_SETTINGS_KEY, settings.dict()) + # Env-controlled, not admin-stored — always reflect the current env so the + # chat UI pre-check matches the backend's CHAT_FILE_MAX_SIZE_MB. + settings.chat_file_max_size_mb = CHAT_FILE_MAX_SIZE_MB + return settings diff --git a/backend/ee/danswer/db/user_group.py b/backend/ee/danswer/db/user_group.py index 0451db9b633..74c79ddf20c 100644 --- a/backend/ee/danswer/db/user_group.py +++ b/backend/ee/danswer/db/user_group.py @@ -14,6 +14,7 @@ from danswer.db.models import User__UserGroup from danswer.db.models import UserGroup from danswer.db.models import UserGroup__ConnectorCredentialPair +from danswer.db.persona_cache import invalidate_user_groups from danswer.server.documents.models import ConnectorCredentialPairIdentifier from ee.danswer.server.user_group.models import UserGroupCreate from ee.danswer.server.user_group.models import UserGroupUpdate @@ -180,6 +181,10 @@ def insert_user_group(db_session: Session, user_group: UserGroupCreate) -> UserG ) db_session.commit() + # New User__UserGroup rows for these users — bust their cached group lists + # so the next persona-list call sees the new group's persona grants. + for affected_user_id in user_group.user_ids: + invalidate_user_groups(affected_user_id) return db_user_group @@ -221,9 +226,10 @@ def update_user_group( cc_pairs_updated = set([cc_pair.id for cc_pair in existing_cc_pairs]) != set( user_group.cc_pair_ids ) - users_updated = set([user.id for user in db_user_group.users]) != set( - user_group.user_ids - ) + # Snapshot existing members BEFORE the cleanup mutation, so we know + # which users to invalidate. The new member set is on the request. + existing_user_ids = {user.id for user in db_user_group.users} + users_updated = existing_user_ids != set(user_group.user_ids) if users_updated: _cleanup_user__user_group_relationships__no_commit( @@ -249,6 +255,12 @@ def update_user_group( db_user_group.is_up_to_date = False db_session.commit() + if users_updated: + # Bust both removed (existing - new) and added (new - existing) users. + # Symmetric difference would be enough, but unioning both sides is + # cheap and avoids missing edge cases when membership reshuffles. + for affected_user_id in existing_user_ids | set(user_group.user_ids): + invalidate_user_groups(affected_user_id) return db_user_group @@ -275,6 +287,11 @@ def prepare_user_group_for_deletion(db_session: Session, user_group_id: int) -> _check_user_group_is_modifiable(db_user_group) + # Snapshot current members before cleanup so we can bust their caches + # after commit. The cleanup helper deletes the User__UserGroup rows, + # so reading after cleanup would give an empty set. + affected_user_ids = [user.id for user in db_user_group.users] + _cleanup_user__user_group_relationships__no_commit( db_session=db_session, user_group_id=user_group_id ) @@ -288,6 +305,8 @@ def prepare_user_group_for_deletion(db_session: Session, user_group_id: int) -> db_user_group.is_up_to_date = False db_user_group.is_up_for_deletion = True db_session.commit() + for affected_user_id in affected_user_ids: + invalidate_user_groups(affected_user_id) def _cleanup_user_group__cc_pair_relationships__no_commit( diff --git a/backend/requirements/default.txt b/backend/requirements/default.txt index 8391736906f..cc13447c8fa 100644 --- a/backend/requirements/default.txt +++ b/backend/requirements/default.txt @@ -2,8 +2,15 @@ aiohttp==3.9.4 alembic==1.10.4 asyncpg==0.27.0 atlassian-python-api==3.37.0 +# Optional file-store backend (FILE_STORE_TYPE=AzureBlobFileStore). Imported +# lazily in file_store.py, so only needed when that backend is selected. +azure-storage-blob==12.19.1 bcrypt==4.0.1 # pin: passlib 1.7.4 reads bcrypt.__about__, removed in bcrypt 4.1+ beautifulsoup4==4.12.2 +# bokeh powers the Dask scheduler dashboard at :8787. Required by +# Dask 2023.8.1 when the scheduler is started without --no-dashboard. +# Pin matches Dask's `dashboard` extra (bokeh!=3.0.*,>=2.4.2). +bokeh>=2.4.2,<3.0 boto3==1.34.84 celery==5.3.4 chardet==5.2.0 @@ -61,6 +68,7 @@ retry==0.9.2 # This pulls in py which is in CVE-2022-42969, must remove py from rfc3986==1.5.0 rt==3.1.2 simple-salesforce==1.12.6 +redis==5.0.8 slack-sdk==3.20.2 SQLAlchemy[mypy]==2.0.15 starlette==0.36.3 diff --git a/backend/scripts/backfill_analytics_rollup.py b/backend/scripts/backfill_analytics_rollup.py index 61571674436..217a4f268d7 100644 --- a/backend/scripts/backfill_analytics_rollup.py +++ b/backend/scripts/backfill_analytics_rollup.py @@ -1,8 +1,11 @@ -"""Populate `analytics_daily_rollup` from existing chat data. +"""Populate `analytics_daily_rollup` AND `analytics_user_first_seen` from +existing chat data. Run this ONCE after deploying the rollup feature, before the next chat retention sweep deletes any old data. After this completes, the daily -Celery beat task (`run_analytics_rollup_task`) keeps the table fresh. +Celery beat task (`run_analytics_rollup_task`) keeps both tables fresh. +Walking history ascending means each user's first_seen_date is their true +first-ever active day (within the data that still exists). Usage: diff --git a/backend/scripts/dev_run_dask_distributed.py b/backend/scripts/dev_run_dask_distributed.py new file mode 100644 index 00000000000..f63d0af1dd5 --- /dev/null +++ b/backend/scripts/dev_run_dask_distributed.py @@ -0,0 +1,298 @@ +"""Dev helper that spawns the full Dask-Distributed background stack +as plain subprocesses. + +Mirrors `dev_run_background_jobs.py` (one parent Python process, child +processes for each background role, Ctrl-C tears down the tree) but +with the prod-shape topology: + + dask-scheduler TCP RPC + dashboard + │ + ├── dask-worker × N actual indexing executors + ├── indexer-scheduler runs update.py polling loop, submits + │ to dask-scheduler instead of an + │ in-process LocalCluster + ├── celery-worker unchanged + └── celery-beat unchanged + +Use this when you want to reproduce production indexing behavior +locally without K8s or Docker. For day-to-day connector-code work, +keep using `dev_run_background_jobs.py` — it's faster to start and +the LocalCluster mode is sufficient for most testing. + +Usage: + cd backend + PYTHONPATH=$(pwd) python scripts/dev_run_dask_distributed.py + # ...or with custom worker count: + PYTHONPATH=$(pwd) python scripts/dev_run_dask_distributed.py \\ + --num-workers 4 +""" +from __future__ import annotations + +import argparse +import os +import signal +import socket +import subprocess +import sys +import threading +import time + + +SCHEDULER_HOST = "127.0.0.1" + + +def monitor_process(process_name: str, process: subprocess.Popen) -> None: + """Stream a child's stdout/stderr to our own stdout with a label.""" + assert process.stdout is not None + while True: + output = process.stdout.readline() + if output: + print(f"{process_name}: {output.strip()}", flush=True) + if process.poll() is not None: + break + + +def wait_for_port(host: str, port: int, timeout: float = 30.0) -> bool: + """Poll a TCP port until something accepts connections, or timeout. + + Used to gate dask-worker spawn on the scheduler being reachable — + without this, workers crash with `ConnectionRefusedError` and have + to retry on their own backoff. + """ + deadline = time.monotonic() + timeout + while time.monotonic() < deadline: + try: + with socket.create_connection((host, port), timeout=1.0): + return True + except OSError: + time.sleep(0.5) + return False + + +def spawn( + name: str, + cmd: list[str], + env: dict[str, str] | None = None, +) -> tuple[subprocess.Popen, threading.Thread]: + """Start a subprocess + a thread tailing its output.""" + process = subprocess.Popen( + cmd, + env=env, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + text=True, + ) + thread = threading.Thread(target=monitor_process, args=(name, process), daemon=True) + thread.start() + return process, thread + + +def run( + num_workers: int, + scheduler_port: int, + dashboard_port: int, + no_celery: bool, + no_indexer: bool, +) -> int: + # Children inherit our env (Postgres / Vespa / GenAI / model-server + # creds etc.) plus a guaranteed PYTHONPATH=. so that subprocess'd + # `dask worker` can import `danswer.*` when deserializing the + # run_indexing_entrypoint callable. + base_env = os.environ.copy() + base_env["PYTHONPATH"] = "." + + scheduler_addr = f"tcp://{SCHEDULER_HOST}:{scheduler_port}" + children: list[subprocess.Popen] = [] + + def shutdown(*_args: object) -> None: + print("\n[dev_run_dask_distributed] Caught signal; shutting down…") + for proc in children: + if proc.poll() is None: + try: + proc.terminate() + except Exception: + pass + # Give them a moment to terminate cleanly before SIGKILL. + deadline = time.monotonic() + 5.0 + for proc in children: + timeout = max(0.1, deadline - time.monotonic()) + try: + proc.wait(timeout=timeout) + except subprocess.TimeoutExpired: + try: + proc.kill() + except Exception: + pass + sys.exit(0) + + signal.signal(signal.SIGINT, shutdown) + signal.signal(signal.SIGTERM, shutdown) + + # 1. Dask scheduler — must come up first so workers don't have to + # back off + retry. The bind host is 127.0.0.1 not 0.0.0.0 + # because this is a dev-only helper; nothing should reach it from + # outside the host. + print(f"[dev_run_dask_distributed] starting dask-scheduler on {scheduler_addr}") + sched_proc, _ = spawn( + "DASK-SCHED", + [ + "dask", + "scheduler", + "--host", + SCHEDULER_HOST, + "--port", + str(scheduler_port), + "--dashboard-address", + f":{dashboard_port}", + ], + env=base_env, + ) + children.append(sched_proc) + + if not wait_for_port(SCHEDULER_HOST, scheduler_port, timeout=30.0): + print( + f"[dev_run_dask_distributed] scheduler did not bind {scheduler_addr} " + "within 30s; aborting." + ) + shutdown() + return 1 + print( + "[dev_run_dask_distributed] scheduler is up. " + f"Dashboard: http://{SCHEDULER_HOST}:{dashboard_port}" + ) + + # 2. Dask workers — N processes, each one thread / one worker, so + # each gets its own RSS envelope. Same pattern as the K8s + # `dask-worker-deployment.yaml`. + worker_env = base_env.copy() + worker_env["CURRENT_PROCESS_IS_AN_INDEXING_JOB"] = "true" + for i in range(num_workers): + proc, _ = spawn( + f"DASK-WORKER-{i}", + [ + "dask", + "worker", + scheduler_addr, + "--nworkers=1", + "--nthreads=1", + "--memory-limit=4GB", + ], + env=worker_env, + ) + children.append(proc) + print(f"[dev_run_dask_distributed] started {num_workers} dask-worker(s)") + + # 3. Indexer-scheduler — runs the update.py polling loop and + # submits work to the scheduler we just started. + if not no_indexer: + indexer_env = base_env.copy() + indexer_env["DASK_SCHEDULER_ADDRESS"] = scheduler_addr + indexer_env["CURRENT_PROCESS_IS_AN_INDEXING_JOB"] = "true" + proc, _ = spawn( + "INDEXER", + ["python", "danswer/background/update.py"], + env=indexer_env, + ) + children.append(proc) + print("[dev_run_dask_distributed] started indexer-scheduler") + + # 4. Celery worker + beat — unchanged from dev_run_background_jobs.py. + # Indexing isn't routed through Celery in this fork, so these + # exist solely to handle prune / sync / retention / cleanup / etc. + if not no_celery: + worker_proc, _ = spawn( + "CELERY-WORKER", + [ + "celery", + "-A", + "ee.danswer.background.celery.celery_app", + "worker", + "--pool=threads", + "--concurrency=10", + "--loglevel=INFO", + ], + env=base_env, + ) + children.append(worker_proc) + + beat_proc, _ = spawn( + "CELERY-BEAT", + [ + "celery", + "-A", + "ee.danswer.background.celery.celery_app", + "beat", + "--loglevel=INFO", + ], + env=base_env, + ) + children.append(beat_proc) + print("[dev_run_dask_distributed] started celery worker + beat") + + print( + "[dev_run_dask_distributed] all processes launched. " + "Ctrl-C to tear down the whole tree." + ) + + # Block forever, watching for any child to die. If the scheduler + # or indexer goes down we don't try to recover here (it's a dev + # helper, not a supervisor) — just exit and let the dev see why. + try: + while True: + for proc in children: + if proc.poll() is not None: + print( + f"[dev_run_dask_distributed] child process exited " + f"with code {proc.returncode}; tearing down." + ) + shutdown() + return proc.returncode or 1 + time.sleep(1.0) + except KeyboardInterrupt: + shutdown() + return 0 + + +def main() -> int: + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument( + "--num-workers", + type=int, + default=2, + help="Number of dask-worker subprocesses (default: 2)", + ) + parser.add_argument( + "--scheduler-port", + type=int, + default=8786, + help="Dask scheduler RPC port (default: 8786)", + ) + parser.add_argument( + "--dashboard-port", + type=int, + default=8787, + help="Dask scheduler dashboard port (default: 8787)", + ) + parser.add_argument( + "--no-celery", + action="store_true", + help="Skip Celery worker + beat (useful when only testing indexing)", + ) + parser.add_argument( + "--no-indexer", + action="store_true", + help="Skip the indexer-scheduler (useful when bringing your own " + "by running update.py manually with DASK_SCHEDULER_ADDRESS set)", + ) + args = parser.parse_args() + return run( + num_workers=args.num_workers, + scheduler_port=args.scheduler_port, + dashboard_port=args.dashboard_port, + no_celery=args.no_celery, + no_indexer=args.no_indexer, + ) + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/backend/scripts/migrate_file_store_to_azure_blob.py b/backend/scripts/migrate_file_store_to_azure_blob.py new file mode 100644 index 00000000000..3232731b60c --- /dev/null +++ b/backend/scripts/migrate_file_store_to_azure_blob.py @@ -0,0 +1,99 @@ +"""Migrate file bytes from Postgres large objects → Azure Blob Storage. + +For every `file_store` row that still has its bytes in a Postgres large +object (`lobj_oid` set, `object_key` NULL), this streams the lobj up to the +Azure Blob container, points the row at the blob (`object_key`), clears +`lobj_oid`, and frees the large object. + +Idempotent — already-migrated rows (object_key set) are skipped, so it's +safe to re-run / resume. Reads use a spooled temp file, so a huge file +won't OOM the migrator. + +Requires the Azure backend to be configured in the environment: + FILE_STORE_TYPE=AzureBlobFileStore (not strictly required, but matches prod) + AZURE_BLOB_CONNECTION_STRING=... (the storage account connection string) + AZURE_BLOB_CONTAINER=danswer-files + +Usage: + cd backend + PYTHONPATH=$(pwd) python scripts/migrate_file_store_to_azure_blob.py + PYTHONPATH=$(pwd) python scripts/migrate_file_store_to_azure_blob.py --dry-run + +Cutover: deploy the image (with azure-storage-blob) + the migration that +adds object_key, set the secret, flip FILE_STORE_TYPE=AzureBlobFileStore, +then run this once. Reads of un-migrated rows fall back to the lobj in the +meantime, so there's no hard ordering requirement — but run it promptly so +the lobjs (and the DB bloat) actually go away. +""" +from __future__ import annotations + +import argparse +import sys + +from sqlalchemy import select +from sqlalchemy.orm import Session + +from danswer.db.engine import get_sqlalchemy_engine +from danswer.db.models import PGFileStore +from danswer.db.pg_file_store import delete_lobj_by_id +from danswer.db.pg_file_store import read_lobj +from danswer.file_store.file_store import _get_azure_container_client + + +def main() -> int: + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument( + "--dry-run", + action="store_true", + help="List what would migrate; upload/modify nothing.", + ) + args = parser.parse_args() + + engine = get_sqlalchemy_engine() + with Session(engine) as db_session: + rows = db_session.scalars( + select(PGFileStore) + .where(PGFileStore.lobj_oid.isnot(None)) + .where(PGFileStore.object_key.is_(None)) + ).all() + print(f"{len(rows)} file(s) to migrate (lobj → blob).") + if args.dry_run: + for r in rows: + print(f" would migrate: {r.file_name} (lobj_oid={r.lobj_oid})") + return 0 + + container = _get_azure_container_client() + migrated = 0 + for r in rows: + old_lobj = r.lobj_oid + # Spooled temp file → bounded memory even for large blobs. + stream = read_lobj( + lobj_oid=old_lobj, db_session=db_session, use_tempfile=True + ) + container.upload_blob(name=r.file_name, data=stream, overwrite=True) + + # Point the row at the blob, then free the lobj. Commit the row + # first so a crash leaves it readable from the blob (the lobj + # delete is best-effort cleanup). + r.object_key = r.file_name + r.lobj_oid = None + db_session.commit() + try: + delete_lobj_by_id(old_lobj, db_session=db_session) + db_session.commit() + except Exception as e: + print( + f" WARN: uploaded {r.file_name} but failed to free lobj {old_lobj}: {e}" + ) + db_session.rollback() + + migrated += 1 + if migrated % 50 == 0: + print(f" migrated {migrated}/{len(rows)}…") + + print(f"Done. Migrated {migrated} file(s) to Azure Blob.") + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/backend/scripts/seed_assistants.py b/backend/scripts/seed_assistants.py new file mode 100644 index 00000000000..8f900dbe142 --- /dev/null +++ b/backend/scripts/seed_assistants.py @@ -0,0 +1,393 @@ +"""Seed N varied personas/assistants into the local DB for UX testing. + +WARNING — local dev tool only. Runs whatever `DATABASE_URL` / `POSTGRES_*` +env vars point at. NEVER point this at a prod Postgres. If `POSTGRES_HOST` +contains anything that smells like prod (configured to error below), the +script aborts. + +Produces a realistic mix for exercising the redesigned gallery page: + + ~30% "Yours" — owned by the target user (private) + ~20% "Shared with you" — owned by another user, target user in users[] + ~50% "Featured" — public (is_public=True, no specific owner) + +Each row gets a random subset of available tools / document sets so the +{n} tools / {n} sources chips render with variety. Half of "Yours" land +in the user's chosen_assistants picker, half do not — so the "Already +added" / "Available to add" filter chips have content on both sides. + +Usage (from repo root): + + cd backend + source ../.venv/bin/activate + python -m scripts.seed_assistants --email you@example.com --count 50 + + # Wipe just the seeded rows (by name prefix) and re-seed: + python -m scripts.seed_assistants --clear + python -m scripts.seed_assistants --email you@example.com --count 50 + +Notes: + * Re-running without --clear stacks more rows. Use --prefix to namespace. + * If --email isn't supplied, picks the first admin user in the DB. + * If only one user exists, the "Shared with you" tier is folded into + "Featured" since there's no one else to own them. +""" +from __future__ import annotations + +import argparse +import os +import random +import sys +from collections.abc import Sequence + +from sqlalchemy import select +from sqlalchemy.orm import Session + +from danswer.auth.schemas import UserRole +from danswer.db.engine import SessionFactory +from danswer.db.models import DocumentSet +from danswer.db.models import Persona +from danswer.db.models import Persona__User +from danswer.db.models import Tool +from danswer.db.models import User +from danswer.search.enums import RecencyBiasSetting + + +# --- safety: don't blast prod by accident --------------------------------- + +# If POSTGRES_HOST contains any of these substrings, bail. Extend as +# needed. The whole point is: this script generates fake data; you only +# want it on your own laptop's Postgres. +_PROD_HOST_FINGERPRINTS = ( + "azure.com", # Azure managed Postgres (darwin uses one) + "amazonaws.com", + "rds.", + "gcp.", + ".cloud.", + "prod", + "production", +) + + +def _abort_if_pointed_at_prod() -> None: + host = (os.environ.get("POSTGRES_HOST") or "").lower() + for marker in _PROD_HOST_FINGERPRINTS: + if marker in host: + print( + f"REFUSING TO RUN: POSTGRES_HOST={host!r} looks like a prod DB.\n" + f"Point POSTGRES_HOST at localhost / your dev container first.", + file=sys.stderr, + ) + sys.exit(2) + + +# --- content pools -------------------------------------------------------- + +# 60 distinct names so we can cover the requested ~50 without dup. +_NAMES: list[str] = [ + "Research Pal", + "Code Reviewer", + "SQL Helper", + "Email Drafter", + "Bug Triage", + "API Documenter", + "Test Writer", + "Meeting Summarizer", + "Slack Digest", + "Stand-up Buddy", + "Customer Insights", + "Onboarding Guide", + "Roadmap Reviewer", + "Incident Reporter", + "Refactor Assistant", + "Release Notes", + "Spec Reader", + "RFC Writer", + "PR Summarizer", + "Postmortem Helper", + "Design Critic", + "Architecture Sketch", + "Security Reviewer", + "Threat Modeler", + "Compliance Auditor", + "Pricing Analyst", + "Sales Enabler", + "Renewal Scout", + "Churn Predictor", + "Marketing Riff", + "Blog Draftsman", + "Tweet Polisher", + "Tagline Brewer", + "FAQ Generator", + "Support Tier-1", + "Escalation Helper", + "Runbook Walker", + "Migration Planner", + "Schema Diff Reader", + "Index Tuner", + "Query Explainer", + "Log Whisperer", + "Metric Hunter", + "Alert Wrangler", + "Dashboard Builder", + "Hire Brief", + "Interview Scribe", + "Skill Mapper", + "Doc Search", + "Wiki Pal", + "Note Taker", + "Action-Items Finder", + "Standup Cliff-Notes", + "Investor FAQ", + "Roadblock Spotter", + "OKR Reviewer", + "Quarterly Recap", + "Pitch Sharpener", + "Customer-Reply Drafter", + "Demo Outline", +] + +# 30 description templates — varied tones / scopes so the cards don't all +# read the same. +_DESCRIPTIONS: list[str] = [ + "Answers questions about our codebase using semantic search across the indexed repos.", + "Drafts polished customer-facing emails in the company's voice.", + "Summarizes long Slack threads and surfaces decisions and action items.", + "Reads design docs and points out the assumptions and the risky bits.", + "Generates SQL against the analytics warehouse from a plain-English question.", + "Triages new bug reports — classifies severity, finds duplicates, and assigns.", + "Writes release notes from a list of merged PR titles.", + "Cross-references Jira tickets and surfaces blocked dependencies.", + "Helps onboard new engineers by answering 'where does X live?' questions.", + "Reviews pull requests for naming, structure, and style consistency.", + "Drafts incident postmortems from log excerpts and timeline notes.", + "Translates marketing copy into different audience voices.", + "Walks runbooks step by step, asking before each destructive action.", + "Reads the customer-success knowledge base and answers tier-1 tickets.", + "Explains an unfamiliar SQL query — joins, CTEs, window functions.", + "Reviews quarterly OKR drafts for measurability and ambition.", + "Builds the outline of a sales demo from a list of pain points.", + "Tightens taglines — shorter, sharper, fewer adjectives.", + "Sketches an architecture diagram outline from a design doc.", + "Surfaces churn-risk signals from a list of recent customer emails.", + "Answers HR / benefits FAQ from the employee handbook.", + "Reads RFCs and writes the executive summary at the top.", + "Indexes API documentation and answers 'how do I do X' questions.", + "Drafts response templates for support tickets matching common patterns.", + "Generates test cases for a function or endpoint from its signature.", + "Reviews threat models against OWASP top-10 categories.", + "Plans data migrations — pre-checks, batch sizing, rollback steps.", + "Reads incident-channel logs and produces a concise five-line summary.", + "Brainstorms blog post angles given a working title.", + "Helps interviewers stay structured — drafts notes, scores, follow-ups.", +] + + +# --- helpers -------------------------------------------------------------- + + +def _pick(rng: random.Random, items: Sequence, k_min: int, k_max: int) -> list: + """Return between k_min and k_max random items (without replacement). + + Tolerates `items` being shorter than k_max — caps at available length. + """ + if not items: + return [] + upper = min(k_max, len(items)) + k = rng.randint(k_min, upper) + if k <= 0: + return [] + return rng.sample(list(items), k) + + +def _resolve_target_user(session: Session, email: str | None) -> User | None: + if email: + user = session.scalar(select(User).where(User.email == email)) + if user is None: + print(f"No user with email {email!r} found.", file=sys.stderr) + return user + # No email given — prefer an admin user, fall back to any user. + admin = session.scalar(select(User).where(User.role == UserRole.ADMIN).limit(1)) + if admin is not None: + return admin + return session.scalar(select(User).limit(1)) + + +def _pick_other_user(session: Session, target_user_id) -> User | None: + """Find a user other than the target to own the "shared with you" rows.""" + return session.scalar(select(User).where(User.id != target_user_id).limit(1)) + + +def _clear(session: Session, prefix: str) -> int: + """Soft-delete by name prefix is risky if a real persona shares the + prefix. We assert prefix is non-empty and unmistakably synthetic. + """ + if not prefix or len(prefix) < 3: + print( + f"Refusing to clear with suspiciously short prefix {prefix!r}.", + file=sys.stderr, + ) + sys.exit(2) + personas = session.scalars( + select(Persona).where(Persona.name.startswith(prefix)) + ).all() + n = 0 + for p in personas: + # Hard delete — these are synthetic seed rows, not user data. + # Junction rows clean up via cascade configured on the model. + session.delete(p) + n += 1 + session.commit() + return n + + +# --- main ----------------------------------------------------------------- + + +def main() -> None: + _abort_if_pointed_at_prod() + + ap = argparse.ArgumentParser(description=__doc__.splitlines()[0]) + ap.add_argument("--count", type=int, default=50, help="How many to create.") + ap.add_argument( + "--email", + help="Target user email — the 'me' for testing. Default: first admin user.", + ) + ap.add_argument( + "--prefix", + default="[seed] ", + help="Name prefix so seeded rows are easy to spot / clear. (default: '[seed] ')", + ) + ap.add_argument( + "--clear", + action="store_true", + help="Delete previously seeded personas (by --prefix) and exit.", + ) + ap.add_argument( + "--seed", + type=int, + default=42, + help="RNG seed — same seed = same data each run. Default: 42.", + ) + args = ap.parse_args() + + with SessionFactory() as session: + if args.clear: + n = _clear(session, args.prefix) + print(f"Cleared {n} seeded personas (prefix={args.prefix!r}).") + return + + target_user = _resolve_target_user(session, args.email) + if target_user is None: + print( + "No users in DB. Sign in to the app first so a user row " + "exists, then re-run.", + file=sys.stderr, + ) + sys.exit(1) + + other_user = _pick_other_user(session, target_user.id) + tools = list(session.scalars(select(Tool)).all()) + doc_sets = list(session.scalars(select(DocumentSet)).all()) + + rng = random.Random(args.seed) + + if args.count > len(_NAMES): + print( + f"--count={args.count} exceeds {len(_NAMES)} unique names; " + f"will cycle with numeric suffixes.", + file=sys.stderr, + ) + + # Yours: ~30%, Shared: ~20% (only if other_user exists), rest Featured. + yours_n = max(1, args.count * 30 // 100) + shared_n = args.count * 20 // 100 if other_user is not None else 0 + featured_n = args.count - yours_n - shared_n + + # Track which Yours rows land in the user's picker (half do). + # We'll mutate chosen_assistants at the end of the run. + new_chosen_ids: list[int] = [] + + created = 0 + for i in range(args.count): + base_name = _NAMES[i % len(_NAMES)] + suffix = "" if i < len(_NAMES) else f" #{i // len(_NAMES) + 1}" + name = f"{args.prefix}{base_name}{suffix}" + desc = rng.choice(_DESCRIPTIONS) + persona_tools = _pick(rng, tools, 0, 3) + persona_docs = _pick(rng, doc_sets, 0, 2) + + if i < yours_n: + owner_id = target_user.id + is_public = False + shared_target = None + elif i < yours_n + shared_n: + # Owned by someone else, granted to target user via Persona__User. + owner_id = other_user.id if other_user else None + is_public = False + shared_target = target_user.id + else: + # Public / featured — no specific owner. + owner_id = None + is_public = True + shared_target = None + + persona = Persona( + name=name, + description=desc, + user_id=owner_id, + is_public=is_public, + # Required scalars on Persona — pick sensible defaults so + # the row is queryable by get_personas without errors. + llm_relevance_filter=False, + llm_filter_extraction=False, + recency_bias=RecencyBiasSetting.AUTO, + default_persona=False, + is_visible=True, + deleted=False, + num_chunks=None, + llm_model_provider_override=None, + llm_model_version_override=None, + starter_messages=None, + tools=persona_tools, + document_sets=persona_docs, + ) + session.add(persona) + session.flush() # populate persona.id + + if shared_target is not None: + session.add(Persona__User(persona_id=persona.id, user_id=shared_target)) + + # Half of "Yours" auto-land in the picker; the other half are + # available-to-add. Featured rows never auto-add (the user can + # add them from the gallery). Shared rows auto-add so the user + # sees their permitted assistants in chat immediately. + if i < yours_n and i % 2 == 0: + new_chosen_ids.append(persona.id) + elif yours_n <= i < yours_n + shared_n: + new_chosen_ids.append(persona.id) + + created += 1 + + # Merge with the target user's existing chosen_assistants (if any). + # We APPEND so we don't disturb whatever order they already have. + if new_chosen_ids: + existing = list(target_user.chosen_assistants or []) + target_user.chosen_assistants = existing + new_chosen_ids + + session.commit() + + print(f"Created {created} personas under prefix {args.prefix!r}.") + print(f" Target user : {target_user.email}") + if other_user is not None: + print(f" Shared-from user : {other_user.email}") + print(f" Yours : {yours_n}") + print(f" Shared with you : {shared_n}") + print(f" Featured / public : {featured_n}") + print(f" Auto-added to picker: {len(new_chosen_ids)}") + print() + print("Open /assistants/gallery to see them. Run with --clear to wipe.") + + +if __name__ == "__main__": + main() diff --git a/backend/scripts/test_dask_distributed_e2e.py b/backend/scripts/test_dask_distributed_e2e.py new file mode 100644 index 00000000000..4ce389e4854 --- /dev/null +++ b/backend/scripts/test_dask_distributed_e2e.py @@ -0,0 +1,517 @@ +"""End-to-end test for the Dask-Distributed background topology. + +Spawns a real `dask scheduler` + N `dask worker` subprocesses on the +local machine, exercises the topology with synthetic tasks via the +`distributed.Client` API, and asserts the behaviors that matter for +the indexing-scaling design: + + POSITIVE + P1 All N workers register with the scheduler within a bounded + time window. + P2 M concurrent tasks run in parallel across workers — wall + time is bounded by ceil(M/N) × per_task_seconds, not M × + per_task_seconds. (This is THE assertion proving "multiple + workers pick work in parallel".) + P3 Tasks fan out across at least 2 distinct workers when + M > 1. (Catches a degenerate scheduler that pins everything + to one worker.) + + NEGATIVE + N1 Worker death mid-task — surviving workers continue accepting + new submissions; cluster doesn't deadlock. + N2 Connecting to a non-existent scheduler fails fast with a + clear error rather than hanging indefinitely. + N3 Scheduler death — Client.submit() against a dead scheduler + raises within a bounded time, doesn't hang. + +The test is self-contained (no Postgres/Vespa/model-server needed) +and uses random ports per run so concurrent invocations don't +collide. Pass --runs N to repeat the whole suite N times — useful +for catching flakes. + +Usage: + cd backend + PYTHONPATH=$(pwd) python scripts/test_dask_distributed_e2e.py [--runs N] [--workers M] + +Exits 0 if every run passes, non-zero otherwise. +""" +from __future__ import annotations + +import argparse +import os +import socket +import subprocess +import sys +import time +from collections.abc import Iterator +from contextlib import closing +from contextlib import contextmanager +from pathlib import Path + +from dask.distributed import Client + + +# Path to the `dask` CLI that ships with the same venv we're running +# under. `sys.executable` always points at the active python, even +# when the venv was invoked directly without `source activate` (which +# leaves `.venv/bin` off PATH). Falling back to the bare name lets the +# test still work if `dask` is on PATH for some other reason. +_VENV_BIN = Path(sys.executable).parent +_DASK_CLI = str(_VENV_BIN / "dask") if (_VENV_BIN / "dask").exists() else "dask" + + +def _subprocess_env() -> dict[str, str]: + """Env for dask child processes — prepend the venv's bin so the + `dask` CLI (and anything else it shells out to) is resolvable.""" + env = os.environ.copy() + env["PATH"] = f"{_VENV_BIN}{os.pathsep}{env.get('PATH', '')}" + return env + + +_PASS = "\033[32mPASS\033[0m" +_FAIL = "\033[31mFAIL\033[0m" +_INFO = "\033[33mINFO\033[0m" + + +# --------------------------------------------------------------------------- +# Output helpers +# --------------------------------------------------------------------------- + + +def section(title: str) -> None: + print(f"\n=== {title} ===") + + +def ok(msg: str) -> None: + print(f" [{_PASS}] {msg}") + + +def fail(msg: str) -> None: + print(f" [{_FAIL}] {msg}") + + +def info(msg: str) -> None: + print(f" [{_INFO}] {msg}") + + +# --------------------------------------------------------------------------- +# Subprocess plumbing +# --------------------------------------------------------------------------- + + +def find_free_port() -> int: + """Pick a random unused TCP port. Used to avoid 8786 collisions + when the user runs multiple suites concurrently or alongside a + real dev stack.""" + with closing(socket.socket()) as s: + s.bind(("127.0.0.1", 0)) + return s.getsockname()[1] + + +def wait_for_port(host: str, port: int, timeout: float) -> bool: + """Poll until something accepts on host:port, or timeout.""" + deadline = time.monotonic() + timeout + while time.monotonic() < deadline: + try: + with closing(socket.create_connection((host, port), timeout=1.0)): + return True + except OSError: + time.sleep(0.2) + return False + + +def start_scheduler(port: int) -> subprocess.Popen: + """Start a dask scheduler bound to localhost. Dashboard is set to + a random ephemeral port so it doesn't fight with anything.""" + proc = subprocess.Popen( + [ + _DASK_CLI, + "scheduler", + "--host", + "127.0.0.1", + "--port", + str(port), + "--dashboard-address", + ":0", + ], + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + env=_subprocess_env(), + ) + if not wait_for_port("127.0.0.1", port, timeout=20.0): + proc.kill() + raise RuntimeError(f"scheduler did not bind 127.0.0.1:{port} within 20s") + return proc + + +def start_worker(scheduler_addr: str) -> subprocess.Popen: + return subprocess.Popen( + [ + _DASK_CLI, + "worker", + scheduler_addr, + "--nworkers=1", + "--nthreads=1", + "--memory-limit=1GB", + ], + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + env=_subprocess_env(), + ) + + +def kill(proc: subprocess.Popen, grace_seconds: float = 3.0) -> None: + if proc.poll() is not None: + return + proc.terminate() + try: + proc.wait(timeout=grace_seconds) + except subprocess.TimeoutExpired: + proc.kill() + + +@contextmanager +def cluster(num_workers: int) -> Iterator[tuple[str, list[subprocess.Popen]]]: + """Bring up a scheduler + N workers, hand back (addr, worker_procs). + Tears everything down on exit, even on exception.""" + sched_port = find_free_port() + scheduler_addr = f"tcp://127.0.0.1:{sched_port}" + sched_proc = start_scheduler(sched_port) + workers: list[subprocess.Popen] = [] + try: + for _ in range(num_workers): + workers.append(start_worker(scheduler_addr)) + yield scheduler_addr, workers + finally: + for w in workers: + kill(w) + kill(sched_proc) + + +def wait_for_workers(client: Client, expected: int, timeout: float) -> int: + """Poll the scheduler until it reports `expected` workers (or + timeout). Returns the actual count seen at the end.""" + deadline = time.monotonic() + timeout + last = 0 + while time.monotonic() < deadline: + last = len(client.scheduler_info()["workers"]) + if last >= expected: + return last + time.sleep(0.5) + return last + + +# --------------------------------------------------------------------------- +# Synthetic tasks (run inside dask-worker subprocesses) +# --------------------------------------------------------------------------- + + +def _sleep_task(duration: float) -> str: + """Sleep + return the worker's hostname so we can verify + distribution. Defined at module level so Dask can pickle it.""" + import socket as _socket + import time as _time + + _time.sleep(duration) + return _socket.gethostname() + + +def _quick_task(x: int) -> int: + """Trivial task to verify task plumbing without sleeping.""" + return x * 2 + + +# --------------------------------------------------------------------------- +# Test phases +# --------------------------------------------------------------------------- + + +def phase_setup(num_workers: int, scheduler_addr: str) -> tuple[Client, bool]: + """P1: every worker registers with the scheduler within a bounded + window. Returns (client, ok_flag).""" + section("Phase 1 — workers register with scheduler") + try: + client = Client(scheduler_addr, timeout=10) + except Exception as e: + fail(f"could not connect to scheduler: {e}") + return None, False # type: ignore[return-value] + seen = wait_for_workers(client, expected=num_workers, timeout=20.0) + if seen >= num_workers: + ok(f"scheduler reports {seen} worker(s) registered") + return client, True + fail( + f"only {seen}/{num_workers} workers registered after 20s; " + "did the workers crash on startup?" + ) + return client, False + + +def phase_parallelism(client: Client, num_workers: int) -> bool: + """P2: M concurrent tasks should run in parallel. + + With M = 2 × num_workers and per-task sleep = 3s, sequential time + is M × 3 = 6×num_workers seconds; parallel time is 2 × 3 = 6 + seconds (plus scheduler overhead). We bound the wall time at + `ceil(M/N) × per_task + slack` and assert. + """ + section("Phase 2 — concurrent tasks run in parallel") + per_task = 3.0 + num_tasks = num_workers * 2 + expected_parallel_time = (num_tasks / num_workers) * per_task + # Allow generous overhead: scheduler dispatch, Python startup, + # GC, CI noise. 5s slack is plenty in practice. + upper_bound = expected_parallel_time + 5.0 + + start = time.monotonic() + futures = [ + client.submit(_sleep_task, per_task, pure=False) for _ in range(num_tasks) + ] + # gather() blocks until all are done; raises if any failed. + try: + results = client.gather(futures) + except Exception as e: + fail(f"gather() raised: {e}") + return False + elapsed = time.monotonic() - start + + if elapsed <= upper_bound: + ok( + f"{num_tasks} tasks × {per_task}s each finished in " + f"{elapsed:.1f}s (bound {upper_bound:.1f}s)" + ) + info( + f"sequential lower bound would be {num_tasks * per_task:.1f}s; " + f"parallelism is real." + ) + info(f"task return values (worker hostnames): {sorted(set(results))[:5]!r}") + return True + fail( + f"{num_tasks} tasks took {elapsed:.1f}s, expected <{upper_bound:.1f}s. " + "Tasks may be running sequentially — check that --nthreads=1 " + "isn't pinning everything to one worker." + ) + return False + + +def phase_distribution(client: Client) -> bool: + """P3: tasks land on at least 2 distinct workers.""" + section("Phase 3 — tasks distribute across workers") + futures = [client.submit(_sleep_task, 0.2, pure=False) for _ in range(20)] + results = client.gather(futures) + distinct_workers = set(results) + # `_sleep_task` returns hostname; in a single-host test all + # workers share a hostname. So instead of hostname-cardinality + # we ask the scheduler directly which workers ran tasks. + who_has = client.scheduler_info()["workers"] + workers_used = set() + for fut in futures: + try: + who = client.who_has(fut).get(fut.key, ()) + workers_used.update(who) + except Exception: + pass + used_count = len(workers_used) if workers_used else len(distinct_workers) + if used_count >= 2: + ok(f"work spread across {used_count} workers (out of {len(who_has)})") + return True + info( + f"only {used_count} worker(s) used — possibly all tasks finished too " + "fast for the scheduler to spread, or the cluster is single-worker." + ) + # Don't hard-fail this with N=1 worker (degenerate); only fail if + # we expected spread. + return len(who_has) < 2 or False + + +def phase_worker_death( + client: Client, workers: list[subprocess.Popen], scheduler_addr: str +) -> bool: + """N1: kill a worker mid-task; surviving workers continue + accepting submissions and the scheduler doesn't deadlock.""" + section("Phase 4 — worker death does not deadlock the cluster") + if len(workers) < 2: + info("skipping — need ≥2 workers for this test") + return True + + # Submit a long task on each worker so at least one is busy when + # we kill it. + busy_futures = [ + client.submit(_sleep_task, 4.0, pure=False) for _ in range(len(workers)) + ] + time.sleep(0.5) # let the scheduler dispatch them + + # Pick the first live worker and kill it. + victim = None + for w in workers: + if w.poll() is None: + victim = w + break + if victim is None: + fail("no live workers to kill") + return False + info(f"killing worker pid={victim.pid} mid-task") + kill(victim) + + # The future on the killed worker will likely raise. We don't + # care which one fails; we care that the cluster STAYS USABLE. + # gather() with errors='skip' returns successful ones. + for f in busy_futures: + try: + f.result(timeout=10.0) + except Exception: + pass + + # Cluster usable test: submit a trivial task, gather, must + # succeed within a few seconds on a surviving worker. + try: + result = client.submit(_quick_task, 21, pure=False).result(timeout=10.0) + except Exception as e: + fail(f"cluster unusable after worker death: {e}") + return False + if result == 42: + ok("cluster still serves new submissions after a worker died") + return True + fail(f"unexpected result {result} from quick task") + return False + + +def phase_unreachable_scheduler() -> bool: + """N2: connecting to a non-existent scheduler fails fast.""" + section("Phase 5 — connecting to a dead scheduler fails fast") + bogus_port = find_free_port() # nothing listening here + bogus_addr = f"tcp://127.0.0.1:{bogus_port}" + start = time.monotonic() + try: + # Short timeout — we'd rather see "couldn't connect" than hang. + Client(bogus_addr, timeout=3) + except Exception as e: + elapsed = time.monotonic() - start + if elapsed < 8.0: + ok( + f"Client({bogus_addr}) raised {type(e).__name__} in " + f"{elapsed:.1f}s (bounded as expected)" + ) + return True + fail(f"Client raised but took {elapsed:.1f}s — too slow for a fail-fast") + return False + fail("Client connected to a non-existent scheduler — expected an exception") + return False + + +def phase_scheduler_death(client: Client, sched_killer) -> bool: + """N3: scheduler death is observable to the client. After + sched_killer() runs, client.submit() must error within a bounded + time rather than hanging.""" + section("Phase 6 — scheduler death surfaces to client without hanging") + sched_killer() + # Give the client a moment to notice the dropped connection. + time.sleep(2.0) + start = time.monotonic() + try: + f = client.submit(_quick_task, 1, pure=False) + f.result(timeout=10.0) + except Exception as e: + elapsed = time.monotonic() - start + ok( + f"submit/result against dead scheduler raised {type(e).__name__} " + f"in {elapsed:.1f}s (bounded)" + ) + return True + fail("submit/result succeeded against a dead scheduler — unexpected") + return False + + +# --------------------------------------------------------------------------- +# Driver +# --------------------------------------------------------------------------- + + +def run_once(num_workers: int) -> bool: + """One full pass of all phases. Returns True iff everything passed.""" + print(f"\n{'#' * 60}\n# Run start — {num_workers} workers\n{'#' * 60}") + sched_port = find_free_port() + scheduler_addr = f"tcp://127.0.0.1:{sched_port}" + sched_proc = start_scheduler(sched_port) + worker_procs: list[subprocess.Popen] = [] + overall_ok = True + client: Client | None = None + try: + for _ in range(num_workers): + worker_procs.append(start_worker(scheduler_addr)) + + client, ok_setup = phase_setup(num_workers, scheduler_addr) + if not ok_setup: + return False + + if not phase_parallelism(client, num_workers): + overall_ok = False + + if not phase_distribution(client): + overall_ok = False + + if not phase_worker_death(client, worker_procs, scheduler_addr): + overall_ok = False + + if not phase_unreachable_scheduler(): + overall_ok = False + + # Scheduler-death must run last — it kills the scheduler we + # were using and we'd have to restart it for any subsequent + # phase. + def _kill_scheduler() -> None: + kill(sched_proc) + + if not phase_scheduler_death(client, _kill_scheduler): + overall_ok = False + + finally: + if client is not None: + try: + client.close() + except Exception: + pass + for w in worker_procs: + kill(w) + kill(sched_proc) + return overall_ok + + +def main() -> int: + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument( + "--runs", + type=int, + default=1, + help="How many times to repeat the full suite (default: 1)", + ) + parser.add_argument( + "--workers", + type=int, + default=3, + help="Number of dask-worker subprocesses per run (default: 3)", + ) + args = parser.parse_args() + + failures: list[int] = [] + for i in range(1, args.runs + 1): + print(f"\n{'=' * 60}\n=== Run {i}/{args.runs}\n{'=' * 60}") + try: + if not run_once(args.workers): + failures.append(i) + except Exception as e: + fail(f"run {i} crashed: {type(e).__name__}: {e}") + failures.append(i) + + print() + if not failures: + print(f"[{_PASS}] dask-distributed e2e: {args.runs} run(s), all passed") + return 0 + print( + f"[{_FAIL}] dask-distributed e2e: {len(failures)}/{args.runs} run(s) failed: " + f"{failures}" + ) + return 1 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/backend/tests/integration/__init__.py b/backend/tests/integration/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/backend/tests/integration/danswer/__init__.py b/backend/tests/integration/danswer/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/backend/tests/integration/danswer/file_store/__init__.py b/backend/tests/integration/danswer/file_store/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/backend/tests/integration/danswer/file_store/test_azure_blob_file_store.py b/backend/tests/integration/danswer/file_store/test_azure_blob_file_store.py new file mode 100644 index 00000000000..aef57b42e38 --- /dev/null +++ b/backend/tests/integration/danswer/file_store/test_azure_blob_file_store.py @@ -0,0 +1,99 @@ +"""Integration test for the Azure Blob file store — big-file round-trip. + +Verifies the real AzureBlobFileStore end-to-end against a live Blob endpoint +(Azurite emulator locally, or a real storage account): a LARGE file is +streamed up, streamed back down, and its bytes must match — exercising the +streaming/spool paths that the OOM crash exposed. Metadata lives in the +Postgres `file_store` table, so a reachable + migrated DB is also required. + +This test is SKIPPED unless AZURE_BLOB_CONNECTION_STRING is set, so it never +runs (or breaks) in environments without Blob configured. + +Run it (locally, against Azurite): + + # 1. Start Azurite (Azure Storage emulator): + docker run -d -p 10000:10000 mcr.microsoft.com/azure-storage/azurite \ + azurite-blob --blobHost 0.0.0.0 + # 2. Install the optional dep into your venv: + pip install azure-storage-blob==12.19.1 + # 3. Point the test at Azurite (well-known dev connection string) + your + # local Postgres (must have run `alembic upgrade head` for object_key): + export AZURE_BLOB_CONNECTION_STRING="DefaultEndpointsProtocol=http;\ +AccountName=devstoreaccount1;\ +AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==;\ +BlobEndpoint=http://127.0.0.1:10000/devstoreaccount1;" + export AZURE_BLOB_CONTAINER=danswer-files-test + # 4. Run: + PYTHONPATH=$(pwd) pytest tests/integration/danswer/file_store/test_azure_blob_file_store.py -v +""" +import hashlib +import os +import uuid +from io import BytesIO + +import pytest +from sqlalchemy.orm import Session + +from danswer.configs.constants import FileOrigin +from danswer.db.engine import get_sqlalchemy_engine + +pytestmark = pytest.mark.skipif( + not os.environ.get("AZURE_BLOB_CONNECTION_STRING"), + reason="AZURE_BLOB_CONNECTION_STRING unset — Azure Blob integration test skipped.", +) + +# 40 MB — comfortably above MAX_IN_MEMORY_SIZE (30 MB), so the read path must +# spill to the spooled temp file rather than holding it all in memory. +BIG_SIZE = 40 * 1024 * 1024 + + +def _sha256(stream) -> str: + h = hashlib.sha256() + for chunk in iter(lambda: stream.read(1024 * 1024), b""): + h.update(chunk) + return h.hexdigest() + + +def test_azure_blob_big_file_round_trip() -> None: + from danswer.file_store.file_store import AzureBlobFileStore + + file_name = f"integration-test/big-{uuid.uuid4()}.bin" + content = os.urandom(BIG_SIZE) + expected = hashlib.sha256(content).hexdigest() + + with Session(get_sqlalchemy_engine()) as db_session: + store = AzureBlobFileStore(db_session=db_session) + try: + # --- streaming upload --- + store.save_file( + file_name=file_name, + content=BytesIO(content), + display_name="big upload integration test", + file_origin=FileOrigin.OTHER, + file_type="application/octet-stream", + ) + + # --- streaming download (use_tempfile=True → spools to disk) --- + got = store.read_file(file_name, mode="b", use_tempfile=True) + assert _sha256(got) == expected, "round-tripped bytes differ (streamed)" + + # --- in-memory download path too --- + got2 = store.read_file(file_name, mode="b") + assert got2.read() == content, "round-tripped bytes differ (in-memory)" + + # --- metadata row points at Blob, not a lobj --- + from danswer.db.pg_file_store import get_pgfilestore_by_file_name + + record = get_pgfilestore_by_file_name(file_name, db_session) + assert record.object_key is not None + assert record.lobj_oid is None + finally: + # Always clean up the blob + metadata row. + try: + store.delete_file(file_name) + except Exception: + pass + + # --- deletion removed it --- + with pytest.raises(Exception): + store.read_file(file_name, mode="b") diff --git a/backend/tests/unit/danswer/db/test_persona_cache.py b/backend/tests/unit/danswer/db/test_persona_cache.py new file mode 100644 index 00000000000..a2160da450a --- /dev/null +++ b/backend/tests/unit/danswer/db/test_persona_cache.py @@ -0,0 +1,490 @@ +"""Unit tests for ``danswer.db.persona_cache``. + +What we lock down here: + +1. **Filter parity vs SQL.** The Python filter in + ``_filter_personas_for_user`` must match the OR-block in + :func:`danswer.db.persona.get_personas` for every representative + permission shape — public, direct-user grant, group grant, and the + negative (none of the above). If either filter drifts, users see the + wrong assistants. Each case here corresponds 1:1 to an SQL branch. + +2. **Read path** with cache enabled: + - Miss → DB call → Redis SET (with TTL) + - Hit → no DB call (the perf promise) + - Per-user-groups miss/hit independently of the global personas miss/hit + +3. **Read path** with cache disabled: + - Always reads the DB; never touches Redis. + - ``include_deleted=True`` always falls through to DB even when the + cache is otherwise enabled — we deliberately don't cache that + less-common shape. + +4. **Invalidation:** + - ``invalidate_personas_all`` deletes the right Redis key. + - ``invalidate_user_groups(uid)`` deletes the per-user key. + - Both short-circuit (no Redis call) when the cache is disabled — + mutation paths shouldn't pay ambient cost in the off state. + +5. **Fail-open** on Redis errors: + - GET error → treated as miss → DB read → no crash. + - SET / DELETE errors swallowed with a log; calling code sees nothing. + +Redis is stubbed with a tiny in-memory fake; the inner DB function and +``PersonaSnapshot.from_model`` are patched. No real Postgres or Redis. +""" +from __future__ import annotations + +import unittest +import uuid +from typing import Any +from unittest.mock import MagicMock +from unittest.mock import patch + +from danswer.db import persona_cache as pc + + +# ---------- shared fakes ---------- + + +class _FakeRedis: + """In-memory Redis fake covering get/set/delete only. + + Stores bytes the same way redis-py does so the cache module's + JSON encode/decode actually runs in tests. + """ + + def __init__(self) -> None: + self.store: dict[str, bytes] = {} + self.get_calls: list[str] = [] + self.set_calls: list[tuple[str, Any, int | None]] = [] + self.delete_calls: list[str] = [] + + def get(self, key: str) -> bytes | None: + self.get_calls.append(key) + return self.store.get(key) + + def set(self, key: str, value: Any, ex: int | None = None) -> bool: + self.set_calls.append((key, value, ex)) + if isinstance(value, str): + self.store[key] = value.encode("utf-8") + elif isinstance(value, bytes): + self.store[key] = value + else: + self.store[key] = str(value).encode("utf-8") + return True + + def delete(self, *keys: str) -> int: + removed = 0 + for k in keys: + self.delete_calls.append(k) + if k in self.store: + del self.store[k] + removed += 1 + return removed + + +class _FakePersonaSnapshot: + """Stand-in for ``PersonaSnapshot`` for filter tests only. + + The real Pydantic model has ~20 required fields; the filter function + touches just three of them. We use a duck-typed mock so test cases + stay focused on permission semantics, not Pydantic field plumbing. + """ + + def __init__( + self, + *, + persona_id: int, + is_public: bool, + user_ids_with_access: list[uuid.UUID], + group_ids_with_access: list[int], + ) -> None: + self.id = persona_id + self.is_public = is_public + # Match PersonaSnapshot.users: list[MinimalUserSnapshot] (has .id) + self.users = [MagicMock(id=uid) for uid in user_ids_with_access] + # Match PersonaSnapshot.groups: list[int] + self.groups = group_ids_with_access + + +# ---------- filter parity ---------- + + +class TestFilterParityVsSqlOrBlock(unittest.TestCase): + """One test per SQL branch in get_personas's OR-filter. + + SQL (paraphrased): + Persona.is_public + OR Persona.id IN (Persona__User where user_id = U) + OR Persona.id IN (Persona__UserGroup where group_id IN ) + + Each case below isolates one branch; the last asserts the negative. + """ + + def setUp(self) -> None: + self.user_id = uuid.uuid4() + self.other_user_id = uuid.uuid4() + self.user_group_ids = [10, 20] + + def test_public_persona_always_visible(self) -> None: + """Branch 1: ``is_public`` → visible regardless of grants. This + is the most-traveled path and must stay correct even when the + user has no direct or group grant.""" + p = _FakePersonaSnapshot( + persona_id=1, + is_public=True, + user_ids_with_access=[], + group_ids_with_access=[], + ) + result = pc._filter_personas_for_user([p], self.user_id, self.user_group_ids) + self.assertEqual([x.id for x in result], [1]) + + def test_direct_user_grant_visible(self) -> None: + """Branch 2: not public, but the user is in the persona's + ``users`` list. Mirrors a row in Persona__User.""" + p = _FakePersonaSnapshot( + persona_id=2, + is_public=False, + user_ids_with_access=[self.user_id], + group_ids_with_access=[], + ) + result = pc._filter_personas_for_user([p], self.user_id, self.user_group_ids) + self.assertEqual([x.id for x in result], [2]) + + def test_group_grant_visible_if_user_in_one_of_those_groups(self) -> None: + """Branch 3: not public, no direct grant, but a group the user + belongs to has access. Mirrors a row in Persona__UserGroup + joined with User__UserGroup.""" + p = _FakePersonaSnapshot( + persona_id=3, + is_public=False, + user_ids_with_access=[], + group_ids_with_access=[20, 999], # 20 is one of the user's groups + ) + result = pc._filter_personas_for_user([p], self.user_id, self.user_group_ids) + self.assertEqual([x.id for x in result], [3]) + + def test_no_access_hidden(self) -> None: + """Negative: not public, not in users, no overlapping group → + must be filtered out. If any branch leaks into this case we have + a permission bug.""" + p = _FakePersonaSnapshot( + persona_id=4, + is_public=False, + user_ids_with_access=[self.other_user_id], # different user + group_ids_with_access=[999, 888], # no overlap with [10, 20] + ) + result = pc._filter_personas_for_user([p], self.user_id, self.user_group_ids) + self.assertEqual(result, []) + + def test_mixed_list_returns_only_visible(self) -> None: + """A realistic mix: 4 personas, only the first 3 should pass + the filter (one per branch + one denied). Verifies that the + denial path doesn't accidentally short-circuit later visible + items in the list.""" + personas = [ + _FakePersonaSnapshot( + persona_id=1, + is_public=True, + user_ids_with_access=[], + group_ids_with_access=[], + ), + _FakePersonaSnapshot( + persona_id=2, + is_public=False, + user_ids_with_access=[self.user_id], + group_ids_with_access=[], + ), + _FakePersonaSnapshot( + persona_id=3, + is_public=False, + user_ids_with_access=[], + group_ids_with_access=[10], + ), + _FakePersonaSnapshot( + persona_id=4, + is_public=False, + user_ids_with_access=[self.other_user_id], + group_ids_with_access=[888], + ), + ] + result = pc._filter_personas_for_user( + personas, self.user_id, self.user_group_ids + ) + self.assertEqual(sorted(x.id for x in result), [1, 2, 3]) + + def test_user_with_no_groups_still_sees_public_and_direct_grants(self) -> None: + """Edge case: user belongs to zero groups. The group branch + contributes nothing, but public + direct grants must still + work — otherwise zero-group users get a broken assistant list.""" + personas = [ + _FakePersonaSnapshot( + persona_id=1, + is_public=True, + user_ids_with_access=[], + group_ids_with_access=[], + ), + _FakePersonaSnapshot( + persona_id=2, + is_public=False, + user_ids_with_access=[self.user_id], + group_ids_with_access=[], + ), + _FakePersonaSnapshot( + persona_id=3, + is_public=False, + user_ids_with_access=[], + group_ids_with_access=[10], + ), + ] + result = pc._filter_personas_for_user(personas, self.user_id, []) + self.assertEqual(sorted(x.id for x in result), [1, 2]) + + +# ---------- read path ---------- + + +class TestUserGroupCache(unittest.TestCase): + """The per-user group-ids cache: cheap query, big aggregate win.""" + + def test_miss_then_hit_only_one_db_read(self) -> None: + """First call hits the DB, subsequent calls within TTL serve + from Redis. Locks in the central performance promise of the + per-user side of the cache.""" + fake = _FakeRedis() + db_session = MagicMock() + rows = MagicMock() + rows.all.return_value = [10, 20, 30] + db_session.scalars.return_value = rows + user_id = uuid.uuid4() + + with patch.object(pc, "PERSONA_CACHE_ENABLED", True), patch.object( + pc, "PERSONA_CACHE_TTL_SECONDS", 60 + ), patch.object(pc, "get_redis_client", return_value=fake): + first = pc._get_user_group_ids_cached(user_id, db_session) + second = pc._get_user_group_ids_cached(user_id, db_session) + + self.assertEqual(first, [10, 20, 30]) + self.assertEqual(second, [10, 20, 30]) + self.assertEqual( + db_session.scalars.call_count, + 1, + "second lookup must come from Redis, not the DB", + ) + + def test_set_uses_configured_ttl(self) -> None: + """The TTL is the safety net for missed busts — if it isn't + applied, a stale entry could live forever after a missed + invalidation. Lock down that ``ex=`` is the configured value. + """ + fake = _FakeRedis() + db_session = MagicMock() + rows = MagicMock() + rows.all.return_value = [] + db_session.scalars.return_value = rows + + with patch.object(pc, "PERSONA_CACHE_ENABLED", True), patch.object( + pc, "PERSONA_CACHE_TTL_SECONDS", 1234 + ), patch.object(pc, "get_redis_client", return_value=fake): + pc._get_user_group_ids_cached(uuid.uuid4(), db_session) + + self.assertEqual(len(fake.set_calls), 1) + _key, _val, ex = fake.set_calls[0] + self.assertEqual(ex, 1234) + + +# ---------- routing / disabled mode ---------- + + +class TestGetPersonasForUserCached(unittest.TestCase): + def test_disabled_falls_through_to_get_personas(self) -> None: + """With the flag off, the wrapper must NOT call Redis at all — + it must behave exactly like the previous direct-DB code path. + Important so enabling/disabling the feature is a clean toggle. + """ + db_session = MagicMock() + snap = MagicMock() + + with patch.object(pc, "PERSONA_CACHE_ENABLED", False), patch( + "danswer.db.persona.get_personas", return_value=[MagicMock()] + ) as mock_get_personas, patch( + "danswer.db.persona_cache.PersonaSnapshot.from_model", return_value=snap + ), patch.object( + pc, "get_redis_client" + ) as mock_client: + result = pc.get_personas_for_user_cached( + user_id=uuid.uuid4(), db_session=db_session + ) + + self.assertEqual(result, [snap]) + mock_get_personas.assert_called_once() + mock_client.assert_not_called() + + def test_include_deleted_true_bypasses_cache_even_when_enabled(self) -> None: + """We deliberately don't cache the ``include_deleted=True`` shape — + keeps the cache key set small and avoids accidental mis-keying. + Locks down that this path skips Redis entirely. + """ + db_session = MagicMock() + with patch.object(pc, "PERSONA_CACHE_ENABLED", True), patch( + "danswer.db.persona.get_personas", return_value=[] + ) as mock_get_personas, patch( + "danswer.db.persona_cache.PersonaSnapshot.from_model" + ), patch.object( + pc, "get_redis_client" + ) as mock_client: + pc.get_personas_for_user_cached( + user_id=uuid.uuid4(), + db_session=db_session, + include_deleted=True, + ) + + mock_get_personas.assert_called_once() + # include_deleted=True was passed through to the DB read + self.assertTrue(mock_get_personas.call_args.kwargs["include_deleted"]) + mock_client.assert_not_called() + + def test_admin_call_returns_unfiltered_global_cache(self) -> None: + """``user_id=None`` is the admin / no-auth case. The cache + already holds the full list with no permission filter, so we + skip the Python filter step. Locks down the fast path for + admin endpoints that share the same cache. + """ + all_snaps = [ + _FakePersonaSnapshot( + persona_id=i, + is_public=False, + user_ids_with_access=[], + group_ids_with_access=[], + ) + for i in [1, 2, 3] + ] + with patch.object(pc, "PERSONA_CACHE_ENABLED", True), patch.object( + pc, "_get_all_personas_cached", return_value=all_snaps + ) as mock_get_all, patch.object( + pc, "_get_user_group_ids_cached" + ) as mock_get_groups: + result = pc.get_personas_for_user_cached( + user_id=None, db_session=MagicMock() + ) + + self.assertEqual([x.id for x in result], [1, 2, 3]) + mock_get_all.assert_called_once() + # Critically, we did NOT look up groups for the admin path. + mock_get_groups.assert_not_called() + + +# ---------- invalidation ---------- + + +class TestInvalidation(unittest.TestCase): + def test_invalidate_personas_all_deletes_right_key(self) -> None: + """The bust call must target ``personas:all:not_deleted``. Any + drift between this key and the SET key in the read path would + produce a stuck cache.""" + fake = _FakeRedis() + with patch.object(pc, "PERSONA_CACHE_ENABLED", True), patch.object( + pc, "get_redis_client", return_value=fake + ): + pc.invalidate_personas_all() + self.assertIn("danswer:personas:all:not_deleted", fake.delete_calls) + + def test_invalidate_user_groups_deletes_per_user_key(self) -> None: + """Each user gets their own key. The bust must include the + user_id in string form (UUIDs are not JSON-stringified + consistently otherwise).""" + fake = _FakeRedis() + uid = uuid.uuid4() + with patch.object(pc, "PERSONA_CACHE_ENABLED", True), patch.object( + pc, "get_redis_client", return_value=fake + ): + pc.invalidate_user_groups(uid) + self.assertIn(f"danswer:personas:groups:{uid}", fake.delete_calls) + + def test_invalidate_when_disabled_short_circuits(self) -> None: + """When the flag is off, mutation paths must not pay a Redis + round-trip cost. Without this, every assistant edit would touch + Redis even on a deployment that's opted out.""" + with patch.object(pc, "PERSONA_CACHE_ENABLED", False), patch.object( + pc, "get_redis_client" + ) as mock_client: + pc.invalidate_personas_all() + pc.invalidate_user_groups(uuid.uuid4()) + mock_client.assert_not_called() + + def test_redis_error_during_bust_is_swallowed(self) -> None: + """If the bust call fails (Redis down, network blip), we don't + want to roll back the user's mutation — the DB write already + committed. Loud log, no exception.""" + bad = MagicMock() + bad.delete.side_effect = RuntimeError("redis exploded") + with patch.object(pc, "PERSONA_CACHE_ENABLED", True), patch.object( + pc, "get_redis_client", return_value=bad + ): + # Both must complete without raising. + pc.invalidate_personas_all() + pc.invalidate_user_groups(uuid.uuid4()) + + +# ---------- fail-open on read ---------- + + +class TestFailOpenOnRedisRead(unittest.TestCase): + def test_redis_get_error_treated_as_miss(self) -> None: + """Redis GET exploding (timeout, conn refused) must NOT + propagate — the wrapper falls through to a direct DB read so + a Redis outage degrades latency, not availability. + + We stub _safe_set out: the set path's round-trip serialization + (s.json() → json.loads) requires a real PersonaSnapshot. Here + we're verifying the GET-error fallback, not the SET path. + """ + bad = MagicMock() + bad.get.side_effect = RuntimeError("connection refused") + db_session = MagicMock() + snap = MagicMock(is_public=True, users=[], groups=[]) + # The cache module round-trips via json.loads(s.json()) before + # the SET — needs a real JSON string here. + snap.json.return_value = '{"id":1,"is_public":true}' + + with patch.object(pc, "PERSONA_CACHE_ENABLED", True), patch.object( + pc, "get_redis_client", return_value=bad + ), patch("danswer.db.persona.get_personas", return_value=[MagicMock()]), patch( + "danswer.db.persona_cache.PersonaSnapshot.from_model", return_value=snap + ), patch.object( + pc, "_safe_set" + ): + # Must not raise; must return the DB result. + result = pc._get_all_personas_cached(db_session) + + self.assertEqual(result, [snap]) + + def test_corrupt_cache_entry_treated_as_miss(self) -> None: + """Non-JSON bytes under our key (legacy format, manual SET, + schema migration race) must not crash. Fall through to DB and + overwrite the corrupt entry on the next SET. (Same _safe_set + stub rationale as above.) + """ + fake = _FakeRedis() + fake.store["danswer:personas:all:not_deleted"] = b"not-json-at-all" + db_session = MagicMock() + snap = MagicMock(is_public=True, users=[], groups=[]) + # The cache module round-trips via json.loads(s.json()) before + # the SET — needs a real JSON string here. + snap.json.return_value = '{"id":1,"is_public":true}' + + with patch.object(pc, "PERSONA_CACHE_ENABLED", True), patch.object( + pc, "get_redis_client", return_value=fake + ), patch("danswer.db.persona.get_personas", return_value=[MagicMock()]), patch( + "danswer.db.persona_cache.PersonaSnapshot.from_model", return_value=snap + ), patch.object( + pc, "_safe_set" + ): + result = pc._get_all_personas_cached(db_session) + + self.assertEqual(result, [snap]) + + +if __name__ == "__main__": + unittest.main() diff --git a/backend/tests/unit/danswer/dynamic_configs/__init__.py b/backend/tests/unit/danswer/dynamic_configs/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/backend/tests/unit/danswer/dynamic_configs/test_redis_cached_store.py b/backend/tests/unit/danswer/dynamic_configs/test_redis_cached_store.py new file mode 100644 index 00000000000..0da4077086f --- /dev/null +++ b/backend/tests/unit/danswer/dynamic_configs/test_redis_cached_store.py @@ -0,0 +1,353 @@ +"""Unit tests for ``RedisCachedDynamicConfigStore`` — the read-through / +write-through Redis cache wrapper around any ``DynamicConfigStore``. + +The behaviour we lock down here is the contract the rest of the app +relies on: + + 1. **Read-through:** first ``load`` reads the inner store and + repopulates Redis; subsequent ``load``s hit Redis only and never + touch the inner store. + 2. **Write-through:** ``store`` writes the inner store first, then + refreshes Redis with the new value + TTL so other replicas see + the change without waiting for the TTL to expire. + 3. **Delete invalidates:** ``delete`` removes the inner row and clears + Redis. Inner store is removed first — a Redis success that arrives + before an inner failure must not leave Redis caching a value the + source of truth no longer has. + 4. **Fail-open:** any ``RedisError`` is logged and silently swallowed. + ``GET`` failures become misses; ``SET``/``DEL`` failures don't + bubble up. The point is that a Redis outage degrades latency, not + availability. + 5. **Encrypted values are never cached plaintext.** ``store(..., encrypt=True)`` + invalidates the Redis entry rather than writing plaintext into it, + so the encryption-at-rest guarantee isn't silently bypassed. + 6. **Cache miss vs None:** the wrapper distinguishes "Redis returned + ``nil``" (miss) from "Redis returned the JSON literal ``null``" + (cached None value). Both look like Python ``None`` if you're + careless; we test that a cached ``None`` is served from Redis + without re-hitting the inner store. + +The inner store and the Redis client are mocks — no real Postgres or +Redis required. +""" +from __future__ import annotations + +import json +import unittest +from typing import Any +from unittest.mock import MagicMock + +from redis import RedisError + +from danswer.dynamic_configs.interface import ConfigNotFoundError +from danswer.dynamic_configs.store import RedisCachedDynamicConfigStore + + +# We can't import the real prefix without importing redis_pool, which +# imports app_configs and is fine — but expressing it here documents +# the on-disk key shape we expect. +_EXPECTED_PREFIX = "danswer:kv:" + + +def _make_inner() -> MagicMock: + """Inner DynamicConfigStore mock with the methods we exercise.""" + inner = MagicMock() + inner.store = MagicMock() + inner.load = MagicMock() + inner.delete = MagicMock() + return inner + + +def _make_redis() -> MagicMock: + """In-memory fake Redis covering get/set/delete only. + + Stores raw bytes the same way redis-py does, so JSON encode/decode + in the wrapper actually runs. + """ + storage: dict[str, bytes] = {} + + fake = MagicMock() + + def fake_get(key: str) -> bytes | None: + return storage.get(key) + + def fake_set(key: str, value: Any, ex: int | None = None) -> bool: + if isinstance(value, str): + storage[key] = value.encode("utf-8") + elif isinstance(value, bytes): + storage[key] = value + else: + storage[key] = str(value).encode("utf-8") + # ``ex`` is observed via the mock for the TTL assertion below. + return True + + def fake_delete(*keys: str) -> int: + removed = 0 + for k in keys: + if k in storage: + del storage[k] + removed += 1 + return removed + + fake.get.side_effect = fake_get + fake.set.side_effect = fake_set + fake.delete.side_effect = fake_delete + fake._storage = storage # expose for assertions + return fake + + +class TestRedisCachedDynamicConfigStore(unittest.TestCase): + # ------------- read-through ------------- + + def test_load_miss_then_hit_only_one_inner_read(self) -> None: + """First ``load`` is a Redis miss → falls through to the inner + store and repopulates Redis. The second ``load`` must serve from + Redis alone — the inner store must NOT be touched again. This + is the central performance promise of the cache. + """ + inner = _make_inner() + inner.load.return_value = {"feature_flag": True} + redis = _make_redis() + store = RedisCachedDynamicConfigStore( + inner=inner, ttl_seconds=60, client_factory=lambda: redis + ) + + first = store.load("settings") + second = store.load("settings") + + self.assertEqual(first, {"feature_flag": True}) + self.assertEqual(second, {"feature_flag": True}) + self.assertEqual( + inner.load.call_count, + 1, + "second load must come from Redis, not the inner store", + ) + + def test_load_populates_redis_with_ttl(self) -> None: + """On a miss, the wrapper must SET into Redis with an expiry — + otherwise the cache would never evict and a value written by + another pod would be served stale forever. + """ + inner = _make_inner() + inner.load.return_value = {"a": 1} + redis = _make_redis() + store = RedisCachedDynamicConfigStore( + inner=inner, ttl_seconds=120, client_factory=lambda: redis + ) + + store.load("k1") + + redis.set.assert_called_once() + args, kwargs = redis.set.call_args + self.assertEqual(args[0], _EXPECTED_PREFIX + "k1") + # JSON-serialised payload, with the TTL kwarg matching the ctor. + self.assertEqual(json.loads(args[1]), {"a": 1}) + self.assertEqual(kwargs.get("ex"), 120) + + def test_load_propagates_not_found_without_caching_miss(self) -> None: + """If the inner store has nothing, the wrapper must raise + ``ConfigNotFoundError`` and NOT cache the absence — negative + caching has its own correctness gotchas (a later ``store`` would + race with the stale "missing" entry), and the plan deliberately + defers it. + """ + inner = _make_inner() + inner.load.side_effect = ConfigNotFoundError + redis = _make_redis() + store = RedisCachedDynamicConfigStore( + inner=inner, ttl_seconds=60, client_factory=lambda: redis + ) + + with self.assertRaises(ConfigNotFoundError): + store.load("absent") + # No SET — we didn't cache the miss. + redis.set.assert_not_called() + + def test_cached_none_is_distinguished_from_miss(self) -> None: + """``None`` is a legal stored value (the KV store can hold a + JSON ``null``). We must serve a cached ``null`` without falling + through to the inner store — otherwise every read of a None + value is effectively uncached. + """ + inner = _make_inner() + inner.load.return_value = None + redis = _make_redis() + store = RedisCachedDynamicConfigStore( + inner=inner, ttl_seconds=60, client_factory=lambda: redis + ) + + first = store.load("nullable") # miss → inner → cache + second = store.load("nullable") # hit + + self.assertIsNone(first) + self.assertIsNone(second) + self.assertEqual( + inner.load.call_count, + 1, + "second load of a cached None must hit Redis, not the inner store", + ) + + # ------------- write-through / invalidation ------------- + + def test_store_writes_inner_then_refreshes_redis(self) -> None: + """``store`` must write the inner store first (source of truth), + then refresh Redis. Order matters: a Redis success after an + inner failure would leave Redis ahead of the source of truth. + """ + inner = _make_inner() + redis = _make_redis() + call_order: list[str] = [] + inner.store.side_effect = lambda *a, **kw: call_order.append("inner") + # Wrap the existing side_effect to record set ordering. + original_set = redis.set.side_effect + + def recording_set(*a: Any, **kw: Any) -> Any: + call_order.append("redis") + return original_set(*a, **kw) + + redis.set.side_effect = recording_set + + store = RedisCachedDynamicConfigStore( + inner=inner, ttl_seconds=30, client_factory=lambda: redis + ) + store.store("settings", {"v": 7}) + + inner.store.assert_called_once_with("settings", {"v": 7}, encrypt=False) + self.assertEqual( + call_order, ["inner", "redis"], "inner store must be written first" + ) + # Subsequent load returns the new value from Redis only. + inner.load.reset_mock() + result = store.load("settings") + self.assertEqual(result, {"v": 7}) + inner.load.assert_not_called() + + def test_encrypted_store_invalidates_redis(self) -> None: + """``encrypt=True`` means "Postgres holds this encrypted." We + must NOT mirror plaintext into Redis (which has no encryption + guarantee), and we must invalidate any prior plaintext entry + in case the value was just switched to encrypted. + """ + inner = _make_inner() + redis = _make_redis() + # Pre-seed a stale plaintext entry to confirm it gets cleared. + redis._storage[_EXPECTED_PREFIX + "secret"] = b'"old"' + + store = RedisCachedDynamicConfigStore( + inner=inner, ttl_seconds=60, client_factory=lambda: redis + ) + store.store("secret", "new-value", encrypt=True) + + inner.store.assert_called_once_with("secret", "new-value", encrypt=True) + redis.set.assert_not_called() # no plaintext mirror + redis.delete.assert_called_once_with(_EXPECTED_PREFIX + "secret") + self.assertNotIn(_EXPECTED_PREFIX + "secret", redis._storage) + + def test_delete_clears_inner_and_redis(self) -> None: + """``delete`` clears both layers. Inner first — same ordering + invariant as ``store``: Redis must never be cleaner than the + source of truth. + """ + inner = _make_inner() + redis = _make_redis() + redis._storage[_EXPECTED_PREFIX + "k"] = b'{"x":1}' + call_order: list[str] = [] + inner.delete.side_effect = lambda *a, **kw: call_order.append("inner") + original_delete = redis.delete.side_effect + + def recording_delete(*a: Any, **kw: Any) -> Any: + call_order.append("redis") + return original_delete(*a, **kw) + + redis.delete.side_effect = recording_delete + + store = RedisCachedDynamicConfigStore( + inner=inner, ttl_seconds=60, client_factory=lambda: redis + ) + store.delete("k") + + inner.delete.assert_called_once_with("k") + self.assertEqual(call_order, ["inner", "redis"]) + self.assertNotIn(_EXPECTED_PREFIX + "k", redis._storage) + + # ------------- fail-open behaviour ------------- + + def test_redis_get_error_falls_through_to_inner(self) -> None: + """Redis ``GET`` exploding (timeout, conn refused, network + partition) must NOT propagate. The wrapper degrades to a plain + read against the inner store so a Redis outage costs latency, + not availability. + """ + inner = _make_inner() + inner.load.return_value = "from-postgres" + redis = MagicMock() + redis.get.side_effect = RedisError("connection refused") + + store = RedisCachedDynamicConfigStore( + inner=inner, ttl_seconds=60, client_factory=lambda: redis + ) + result = store.load("k") + + self.assertEqual(result, "from-postgres") + inner.load.assert_called_once_with("k") + + def test_redis_set_error_does_not_break_store(self) -> None: + """SET failing must not bubble out of ``store`` — the inner + write already succeeded, returning an error to the caller would + lie about the durability of the write. + """ + inner = _make_inner() + redis = MagicMock() + redis.set.side_effect = RedisError("OOM") + + store = RedisCachedDynamicConfigStore( + inner=inner, ttl_seconds=60, client_factory=lambda: redis + ) + # Must not raise. + store.store("k", {"v": 1}) + inner.store.assert_called_once() + + def test_corrupt_cache_entry_treated_as_miss(self) -> None: + """If something else wrote non-JSON bytes under our key (legacy + format, manual ``SET``, race during a schema change), the next + read must not crash — it must fall through to the inner store + and overwrite the corrupt entry on the next SET. + """ + inner = _make_inner() + inner.load.return_value = "ok" + redis = _make_redis() + redis._storage[_EXPECTED_PREFIX + "k"] = b"not-json-at-all" + + store = RedisCachedDynamicConfigStore( + inner=inner, ttl_seconds=60, client_factory=lambda: redis + ) + result = store.load("k") + + self.assertEqual(result, "ok") + inner.load.assert_called_once_with("k") + # Wrapper repopulated Redis with the good value. + self.assertEqual(json.loads(redis._storage[_EXPECTED_PREFIX + "k"]), "ok") + + def test_non_json_serialisable_value_skips_cache_but_inner_still_written( + self, + ) -> None: + """If a caller hands us a Python object json can't serialise + (sets, complex numbers, etc.), the inner store still gets it — + Redis just silently skips the cache write. The inner is the + source of truth; the cache is best-effort. + """ + inner = _make_inner() + redis = _make_redis() + store = RedisCachedDynamicConfigStore( + inner=inner, ttl_seconds=60, client_factory=lambda: redis + ) + + # set() is not JSON-serialisable. + store.store("k", {1, 2, 3}) # type: ignore[arg-type] + + inner.store.assert_called_once() + redis.set.assert_not_called() + + +if __name__ == "__main__": + unittest.main() diff --git a/backend/tests/unit/danswer/indexing/__init__.py b/backend/tests/unit/danswer/indexing/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/backend/tests/unit/danswer/indexing/test_get_doc_ids_to_update.py b/backend/tests/unit/danswer/indexing/test_get_doc_ids_to_update.py new file mode 100644 index 00000000000..5556285018f --- /dev/null +++ b/backend/tests/unit/danswer/indexing/test_get_doc_ids_to_update.py @@ -0,0 +1,73 @@ +"""Unit tests for get_doc_ids_to_update — the content-hash + timestamp skip +logic that decides which documents actually need (re)indexing. + +Covers the new content-hash skip (so timestamp churn like Salesforce's +LastModifiedDate doesn't force a full re-index) AND the backward-compatible +fallback to the original doc_updated_at behavior for rows with no stored hash. +""" +from datetime import datetime +from datetime import timezone +from types import SimpleNamespace + +from danswer.configs.constants import DocumentSource +from danswer.connectors.models import Document +from danswer.connectors.models import Section +from danswer.indexing.indexing_pipeline import get_doc_ids_to_update + + +OLD = datetime(2024, 1, 1, tzinfo=timezone.utc) +NEW = datetime(2024, 6, 1, tzinfo=timezone.utc) + + +def _doc(doc_id: str, text: str, updated_at: datetime | None = NEW) -> Document: + return Document( + id=doc_id, + sections=[Section(text=text, link=None)], + source=DocumentSource.SALESFORCE, + semantic_identifier=doc_id, + metadata={}, + doc_updated_at=updated_at, + ) + + +def _db_doc(doc_id: str, content_hash: str | None, updated_at: datetime | None): + # get_doc_ids_to_update only reads .id, .indexed_content_hash, .doc_updated_at + return SimpleNamespace( + id=doc_id, indexed_content_hash=content_hash, doc_updated_at=updated_at + ) + + +def _ids(docs: list[Document]) -> set[str]: + return {d.id for d in docs} + + +def test_new_document_is_updatable() -> None: + doc = _doc("a", "hello") + assert _ids(get_doc_ids_to_update([doc], db_docs=[])) == {"a"} + + +def test_unchanged_content_is_skipped_even_when_timestamp_advances() -> None: + # The Salesforce case: LastModifiedDate moved forward but content is identical. + doc = _doc("a", "hello", updated_at=NEW) + db = _db_doc("a", content_hash=doc.get_content_hash(), updated_at=OLD) + assert get_doc_ids_to_update([doc], db_docs=[db]) == [] + + +def test_changed_content_is_updatable() -> None: + doc = _doc("a", "new text", updated_at=NEW) + stale_hash = _doc("a", "old text").get_content_hash() + db = _db_doc("a", content_hash=stale_hash, updated_at=OLD) + assert _ids(get_doc_ids_to_update([doc], db_docs=[db])) == {"a"} + + +def test_backcompat_null_hash_skips_when_not_newer() -> None: + # Pre-existing row (no stored hash): original updated_at behavior applies. + doc = _doc("a", "hello", updated_at=OLD) + db = _db_doc("a", content_hash=None, updated_at=NEW) + assert get_doc_ids_to_update([doc], db_docs=[db]) == [] + + +def test_backcompat_null_hash_updates_when_newer() -> None: + doc = _doc("a", "hello", updated_at=NEW) + db = _db_doc("a", content_hash=None, updated_at=OLD) + assert _ids(get_doc_ids_to_update([doc], db_docs=[db])) == {"a"} diff --git a/backend/tests/unit/danswer/redis_layer/__init__.py b/backend/tests/unit/danswer/redis_layer/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/backend/tests/unit/danswer/redis_layer/test_redis_pool.py b/backend/tests/unit/danswer/redis_layer/test_redis_pool.py new file mode 100644 index 00000000000..a50e8d8bd7b --- /dev/null +++ b/backend/tests/unit/danswer/redis_layer/test_redis_pool.py @@ -0,0 +1,87 @@ +"""Unit tests for ``danswer.redis.redis_pool``. + +We exercise only what can be verified without a real Redis server: + + 1. The pool is a process-wide singleton — calling ``get_redis_client`` + repeatedly does not build a new ``ConnectionPool`` each time. + 2. ``reset_pool_for_tests`` forces the next ``get_redis_client`` to + rebuild — important so other tests can swap env vars and observe + the change. + 3. The global key prefix is the documented value. If this ever + changes silently it would orphan every cached entry in production + on the next deploy; lock it down with a string equality assertion. + +Live socket-level behaviour (pool sizing, TCP timeouts, SSL handshake) +is intentionally out of scope here — those need an integration test +against a real Redis. +""" +from __future__ import annotations + +import unittest +from unittest.mock import patch + +from danswer.redis import redis_pool + + +class TestRedisPool(unittest.TestCase): + def setUp(self) -> None: + # Each test starts with a fresh, unbuilt pool so the singleton + # state from earlier tests can't bleed in. + redis_pool.reset_pool_for_tests() + + def tearDown(self) -> None: + redis_pool.reset_pool_for_tests() + + def test_prefix_is_stable(self) -> None: + """The on-the-wire key prefix is part of the persistence + contract — every cached entry in production starts with it. + Renaming it requires intentional migration, not a drive-by edit. + """ + self.assertEqual(redis_pool.DANSWER_REDIS_KEY_PREFIX, "danswer:") + + def test_pool_built_lazily_and_reused(self) -> None: + """``get_redis_client`` must build the pool on first use and + reuse it after. We assert this by counting calls to the pool + constructor under a patch. + """ + with patch.object( + redis_pool, "ConnectionPool", wraps=redis_pool.ConnectionPool + ) as mock_pool: + client_a = redis_pool.get_redis_client() + client_b = redis_pool.get_redis_client() + client_c = redis_pool.get_redis_client() + + self.assertEqual( + mock_pool.call_count, + 1, + "ConnectionPool should be constructed exactly once across " + "repeated get_redis_client() calls", + ) + # Different Redis() instances are fine — they share the pool. + self.assertIs( + client_a.connection_pool, + client_b.connection_pool, + "all clients must share the singleton pool", + ) + self.assertIs(client_b.connection_pool, client_c.connection_pool) + + def test_reset_for_tests_drops_singleton(self) -> None: + """After ``reset_pool_for_tests`` the next ``get_redis_client`` + must rebuild — otherwise tests can't observe config changes. + """ + with patch.object( + redis_pool, "ConnectionPool", wraps=redis_pool.ConnectionPool + ) as mock_pool: + redis_pool.get_redis_client() + redis_pool.reset_pool_for_tests() + redis_pool.get_redis_client() + + self.assertEqual( + mock_pool.call_count, + 2, + "reset_pool_for_tests should force the next call to rebuild", + ) + + +if __name__ == "__main__": + unittest.main() diff --git a/backend/tests/unit/danswer/server/__init__.py b/backend/tests/unit/danswer/server/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/backend/tests/unit/danswer/server/middleware/__init__.py b/backend/tests/unit/danswer/server/middleware/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/backend/tests/unit/danswer/server/middleware/test_request_rate_limit.py b/backend/tests/unit/danswer/server/middleware/test_request_rate_limit.py new file mode 100644 index 00000000000..95c06699b78 --- /dev/null +++ b/backend/tests/unit/danswer/server/middleware/test_request_rate_limit.py @@ -0,0 +1,344 @@ +"""Unit tests for the Redis-backed per-user request rate limiter. + +What we lock down here is the contract a chat endpoint relies on when it +attaches ``Depends(check_message_request_rate_limit)``: + + 1. **Default off:** with the feature flag down OR both window limits + at 0, the dependency must short-circuit before touching Redis. + This matters because the dependency is mounted on the hot path of + every chat message — any cost in the off case is paid on every + request forever. + 2. **Per-window enforcement:** the Nth request through the same + bucket exceeds the cap and 429s; the same caller in the next + bucket gets a fresh window. + 3. **Per-user isolation:** two distinct users must not share counters + even if their requests interleave in the same bucket. + 4. **Anonymous keying by IP:** unauth'd callers are bucketed by + X-Forwarded-For first hop (matching the ingress shape), falling + back to the socket peer; otherwise the dependency skips. + 5. **EXPIRE NX semantics:** the first ``INCR`` of a bucket sets the + TTL; subsequent ``INCR`` calls must NOT extend it (a sliding TTL + would make the bucket never reset and effectively cap *forever* + after the first burst). + 6. **Fail-open:** any Redis error allows the request through. The + limiter is protection, not authorization — a Redis blip is not a + reason to wedge the chat path. + 7. **Retry-After header:** a 429 carries seconds-until-bucket-rollover + so well-behaved clients can back off precisely. + +Redis is mocked at the ``get_redis_client`` boundary; the FastAPI +``Request`` and ``User`` are dummy objects. No HTTP layer, no real +Redis — pure dependency-function tests. +""" +from __future__ import annotations + +import unittest +import uuid +from typing import Any +from unittest.mock import MagicMock +from unittest.mock import patch + +from fastapi import HTTPException + +from danswer.server.middleware import request_rate_limit as rrl + + +# ---------- shared fakes ---------- + + +class _FakePipeline: + """Minimal stand-in for redis.client.Pipeline. + + We only need .incr, .expire, .execute — that's the full surface + used in _enforce_window. We also remember every .expire(..., nx=) + call so the NX-semantics test can inspect it. + """ + + def __init__(self, storage: dict[str, int], expiry: dict[str, bool]) -> None: + self._storage = storage + self._expiry = expiry + self._ops: list[tuple[str, Any, Any]] = [] + + def incr(self, key: str, amount: int = 1) -> "_FakePipeline": + self._ops.append(("incr", key, amount)) + return self + + def expire(self, key: str, seconds: int, nx: bool = False) -> "_FakePipeline": + self._ops.append(("expire", key, (seconds, nx))) + return self + + def execute(self) -> list[Any]: + results: list[Any] = [] + for op, key, arg in self._ops: + if op == "incr": + self._storage[key] = self._storage.get(key, 0) + int(arg) + results.append(self._storage[key]) + elif op == "expire": + seconds, nx = arg + if nx and self._expiry.get(key): + results.append(False) # already has TTL — refused + else: + self._expiry[key] = True + results.append(True) + self._ops.clear() + return results + + +class _FakeRedis: + """Fake Redis client exposing only the methods the limiter uses.""" + + def __init__(self) -> None: + self._counters: dict[str, int] = {} + self._has_expiry: dict[str, bool] = {} + self.expire_calls: list[tuple[str, int, bool]] = [] + + def pipeline(self) -> _FakePipeline: + pipe = _FakePipeline(self._counters, self._has_expiry) + # Wrap pipe.expire to record every call for inspection. + original_expire = pipe.expire + + def recording_expire(key: str, seconds: int, nx: bool = False) -> Any: + self.expire_calls.append((key, seconds, nx)) + return original_expire(key, seconds, nx=nx) + + pipe.expire = recording_expire # type: ignore[method-assign] + return pipe + + +def _make_request( + headers: dict[str, str] | None = None, peer_host: str | None = None +) -> MagicMock: + """Minimal Starlette Request stand-in.""" + req = MagicMock() + req.headers = headers or {} + req.client = MagicMock(host=peer_host) if peer_host is not None else None + return req + + +def _make_user(uid: uuid.UUID | None = None) -> MagicMock: + user = MagicMock() + user.id = uid or uuid.uuid4() + return user + + +# ---------- tests ---------- + + +class TestRequestRateLimitDisabled(unittest.TestCase): + """When disabled, the dependency must do nothing — not even + construct a Redis client. The hot path can't afford ambient cost + that callers thought they'd avoided by turning the flag off. + """ + + def test_flag_off_short_circuits_before_redis(self) -> None: + request = _make_request() + user = _make_user() + with patch.object(rrl, "REQUEST_RATE_LIMIT_ENABLED", False), patch.object( + rrl, "get_redis_client" + ) as mock_client: + rrl.check_message_request_rate_limit(request=request, user=user) + mock_client.assert_not_called() + + def test_both_windows_zero_short_circuits_before_redis(self) -> None: + """Flag on but no limits configured = nothing to enforce. The + operator probably enabled the flag and hasn't picked numbers + yet; we must not pay the Redis round-trip in that interim + state. + """ + request = _make_request() + user = _make_user() + with patch.object(rrl, "REQUEST_RATE_LIMIT_ENABLED", True), patch.object( + rrl, "REQUEST_RATE_LIMIT_PER_MINUTE", 0 + ), patch.object(rrl, "REQUEST_RATE_LIMIT_PER_HOUR", 0), patch.object( + rrl, "get_redis_client" + ) as mock_client: + rrl.check_message_request_rate_limit(request=request, user=user) + mock_client.assert_not_called() + + +class TestRequestRateLimitEnforcement(unittest.TestCase): + def _patch_enabled(self, per_min: int = 0, per_hour: int = 0) -> Any: + """Helper: turn the limiter on with the given window caps.""" + return _MultiPatch( + (rrl, "REQUEST_RATE_LIMIT_ENABLED", True), + (rrl, "REQUEST_RATE_LIMIT_PER_MINUTE", per_min), + (rrl, "REQUEST_RATE_LIMIT_PER_HOUR", per_hour), + ) + + def test_within_limit_allows_request(self) -> None: + """Under the cap = no 429. Sanity, but also makes sure the + ``count > limit`` boundary is strict (the Nth allowed request + is the *limit*-th, not limit-minus-one). + """ + fake = _FakeRedis() + request = _make_request() + user = _make_user() + with self._patch_enabled(per_min=3), patch.object( + rrl, "get_redis_client", return_value=fake + ): + for _ in range(3): + rrl.check_message_request_rate_limit(request=request, user=user) + # No exception raised — all three under the cap of 3. + + def test_request_above_cap_raises_429_with_retry_after(self) -> None: + """The (limit+1)-th call in a bucket must 429, and the response + must carry Retry-After. Clients without Retry-After back off + with guesswork; we should hand them the exact answer. + """ + fake = _FakeRedis() + request = _make_request() + user = _make_user() + with self._patch_enabled(per_min=2), patch.object( + rrl, "get_redis_client", return_value=fake + ): + rrl.check_message_request_rate_limit(request=request, user=user) + rrl.check_message_request_rate_limit(request=request, user=user) + with self.assertRaises(HTTPException) as ctx: + rrl.check_message_request_rate_limit(request=request, user=user) + self.assertEqual(ctx.exception.status_code, 429) + retry_after = ctx.exception.headers and ctx.exception.headers.get("Retry-After") + self.assertIsNotNone(retry_after) + self.assertTrue(retry_after.isdigit()) # type: ignore[union-attr] + # 0 < retry_after <= window. (Equal to window iff time landed + # exactly on the boundary — possible but rare, allow it.) + self.assertGreaterEqual(int(retry_after), 0) # type: ignore[arg-type] + self.assertLessEqual(int(retry_after), 60) # type: ignore[arg-type] + + def test_two_users_have_independent_counters(self) -> None: + """Distinct user UUIDs must NOT share a bucket. If they did, a + loud user could 429 a quiet one. + """ + fake = _FakeRedis() + request = _make_request() + alice = _make_user() + bob = _make_user() + with self._patch_enabled(per_min=1), patch.object( + rrl, "get_redis_client", return_value=fake + ): + rrl.check_message_request_rate_limit(request=request, user=alice) + # Bob's first request must succeed even though Alice already + # used her one allowed call in this bucket. + rrl.check_message_request_rate_limit(request=request, user=bob) + # Alice's second request hits her cap — should 429. + with self.assertRaises(HTTPException) as ctx: + rrl.check_message_request_rate_limit(request=request, user=alice) + self.assertEqual(ctx.exception.status_code, 429) + + def test_next_bucket_resets_count(self) -> None: + """When time advances past the window boundary, the bucket key + changes (it's keyed by ``floor(time / window)``) and the new + bucket starts at 0. Without this, the limit is forever rather + than per-window. + """ + fake = _FakeRedis() + request = _make_request() + user = _make_user() + with self._patch_enabled(per_min=1), patch.object( + rrl, "get_redis_client", return_value=fake + ): + with patch.object(rrl.time, "time", return_value=1_000_000.0): + rrl.check_message_request_rate_limit(request=request, user=user) + # Same bucket -> over cap. + with self.assertRaises(HTTPException): + rrl.check_message_request_rate_limit(request=request, user=user) + # Jump 90s — new minute bucket. + with patch.object(rrl.time, "time", return_value=1_000_000.0 + 90): + rrl.check_message_request_rate_limit(request=request, user=user) + + def test_expire_uses_nx_so_ttl_is_set_only_once(self) -> None: + """Every ``INCR`` is paired with ``EXPIRE`` — but if NX weren't + set, each increment would push the expiry forward and the + bucket would never roll over. Lock down ``nx=True`` so a future + refactor doesn't accidentally make every limited window become + a permanent ban after the first burst. + """ + fake = _FakeRedis() + request = _make_request() + user = _make_user() + with self._patch_enabled(per_min=10), patch.object( + rrl, "get_redis_client", return_value=fake + ): + for _ in range(3): + rrl.check_message_request_rate_limit(request=request, user=user) + # All EXPIRE calls used nx=True. (At least one happened.) + self.assertGreater(len(fake.expire_calls), 0) + for _key, _seconds, nx in fake.expire_calls: + self.assertTrue( + nx, "EXPIRE must use NX so TTL isn't extended on every INCR" + ) + + def test_anonymous_user_keyed_by_xff_first_hop(self) -> None: + """Anonymous traffic keys on the first XFF hop (the real client + IP behind nginx), not on the LB's own peer address. Otherwise + every anonymous request would share one bucket. + """ + fake = _FakeRedis() + # Two distinct anonymous IPs in XFF. + req_a = _make_request(headers={"x-forwarded-for": "10.1.1.1, 10.0.0.1"}) + req_b = _make_request(headers={"x-forwarded-for": "10.1.1.2, 10.0.0.1"}) + with self._patch_enabled(per_min=1), patch.object( + rrl, "get_redis_client", return_value=fake + ): + rrl.check_message_request_rate_limit(request=req_a, user=None) + # Different XFF first hop => different bucket, allowed. + rrl.check_message_request_rate_limit(request=req_b, user=None) + # Same XFF as req_a => second hit, exceeds cap. + with self.assertRaises(HTTPException): + rrl.check_message_request_rate_limit(request=req_a, user=None) + + def test_anonymous_with_no_ip_skips_silently(self) -> None: + """If neither XFF nor a client peer is present, we have nothing + to attribute the request to. Skipping is the only honest + option — bucketing everyone under "" would silently flatten + every anonymous client into one counter. + """ + fake = _FakeRedis() + request = _make_request(headers={}, peer_host=None) + with self._patch_enabled(per_min=1), patch.object( + rrl, "get_redis_client", return_value=fake + ) as mock_client: + # Call twice — both must pass; the limiter must not even + # have constructed a key to enforce against. + rrl.check_message_request_rate_limit(request=request, user=None) + rrl.check_message_request_rate_limit(request=request, user=None) + mock_client.assert_not_called() + + def test_redis_error_fails_open(self) -> None: + """A pipeline that explodes (timeout, broken connection, + whatever) must NOT raise out of the dependency. The chat path + keeps serving — a request slipped past the limiter is better + than a chat outage caused by the limiter itself. + """ + bad_client = MagicMock() + bad_client.pipeline.side_effect = RuntimeError("redis exploded") + request = _make_request() + user = _make_user() + with self._patch_enabled(per_min=1), patch.object( + rrl, "get_redis_client", return_value=bad_client + ): + # Two calls back-to-back — neither raises, because the + # limiter swallows the Redis error. + rrl.check_message_request_rate_limit(request=request, user=user) + rrl.check_message_request_rate_limit(request=request, user=user) + + +class _MultiPatch: + """Context manager that applies several ``patch.object`` patches at + once. Used to make per-test "turn on the limiter with these + windows" blocks readable. + """ + + def __init__(self, *patches: tuple[Any, str, Any]) -> None: + self._patches = [patch.object(obj, attr, val) for obj, attr, val in patches] + + def __enter__(self) -> None: + for p in self._patches: + p.start() + + def __exit__(self, *exc: Any) -> None: + for p in reversed(self._patches): + p.stop() + + +if __name__ == "__main__": + unittest.main() diff --git a/darwin-kubernetes/api_server-service-deployment.yaml b/darwin-kubernetes/api_server-service-deployment.yaml deleted file mode 100644 index 2959e1409a2..00000000000 --- a/darwin-kubernetes/api_server-service-deployment.yaml +++ /dev/null @@ -1,86 +0,0 @@ -apiVersion: v1 -kind: Service -metadata: - name: api-server-service -spec: - selector: - app: api-server - ports: - - name: api-server-port - protocol: TCP - port: 80 - targetPort: 8080 - type: ClusterIP ---- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: api-server-deployment -spec: - replicas: 1 - selector: - matchLabels: - app: api-server - template: - metadata: - labels: - app: api-server - spec: - containers: - - name: api-server - image: sfbrdevhelmweacr.azurecr.io/danswer/danswer-backend:vha-5 - imagePullPolicy: IfNotPresent - command: - - "/bin/sh" - - "-c" - - | - #sleep 1000000 && - alembic upgrade heads && - echo "Starting Danswer Api Server" && - uvicorn danswer.main:app --host 0.0.0.0 --port 8080 - ports: - - containerPort: 8080 - # There are some extra values since this is shared between services - # There are no conflicts though, extra env variables are simply ignored - env: - - name: POSTGRES_USER - valueFrom: - secretKeyRef: - name: danswer-secrets - key: postgres_user - - name: POSTGRES_PASSWORD - valueFrom: - secretKeyRef: - name: danswer-secrets - key: postgres_password - # --- Microsoft / Entra ID OIDC --- - - name: OAUTH_CLIENT_ID - valueFrom: - secretKeyRef: - name: danswer-secrets - key: oauth_client_id - - name: OAUTH_CLIENT_SECRET - valueFrom: - secretKeyRef: - name: danswer-secrets - key: oauth_client_secret - - name: USER_AUTH_SECRET - valueFrom: - secretKeyRef: - name: danswer-secrets - key: user_auth_secret - envFrom: - - configMapRef: - name: env-configmap - volumeMounts: - - name: dynamic-storage - mountPath: /home/storage - - name: file-connector-storage - mountPath: /home/file_connector_storage - volumes: - - name: dynamic-storage - persistentVolumeClaim: - claimName: dynamic-pvc - - name: file-connector-storage - persistentVolumeClaim: - claimName: file-connector-pvc diff --git a/darwin-kubernetes/background-deployment.yaml b/darwin-kubernetes/background-deployment.yaml deleted file mode 100644 index 538cf007bd4..00000000000 --- a/darwin-kubernetes/background-deployment.yaml +++ /dev/null @@ -1,63 +0,0 @@ -apiVersion: apps/v1 -kind: Deployment -metadata: - name: background-deployment -spec: - replicas: 1 - strategy: - type: Recreate - selector: - matchLabels: - app: background - template: - metadata: - labels: - app: background - spec: - affinity: - nodeAffinity: - requiredDuringSchedulingIgnoredDuringExecution: - nodeSelectorTerms: - - matchExpressions: - - key: agentpool - operator: In - values: - - indexcpu - containers: - - name: background - image: sfbrdevhelmweacr.azurecr.io/danswer/danswer-backend:vha-5 - imagePullPolicy: IfNotPresent - command: ["/usr/bin/supervisord"] - # There are some extra values since this is shared between services - # There are no conflicts though, extra env variables are simply ignored - env: - - name: POSTGRES_USER - valueFrom: - secretKeyRef: - key: postgres_user - name: danswer-secrets - - name: POSTGRES_PASSWORD - valueFrom: - secretKeyRef: - key: postgres_password - name: danswer-secrets - envFrom: - - configMapRef: - name: env-configmap - volumeMounts: - - name: dynamic-storage - mountPath: /home/storage - - name: file-connector-storage - mountPath: /home/file_connector_storage - tolerations: - - effect: NoSchedule - key: darwin - operator: Equal - value: indexing - volumes: - - name: dynamic-storage - persistentVolumeClaim: - claimName: dynamic-pvc - - name: file-connector-storage - persistentVolumeClaim: - claimName: file-connector-pvc diff --git a/darwin-kubernetes/backup/persistent-volumes.yaml.bkp b/darwin-kubernetes/backup/persistent-volumes.yaml.bkp deleted file mode 100644 index 700a6ebffd6..00000000000 --- a/darwin-kubernetes/backup/persistent-volumes.yaml.bkp +++ /dev/null @@ -1,21 +0,0 @@ -apiVersion: v1 -kind: PersistentVolumeClaim -metadata: - name: dynamic-pvc -spec: - accessModes: - - ReadWriteOnce - resources: - requests: - storage: 5Gi ---- -apiVersion: v1 -kind: PersistentVolumeClaim -metadata: - name: file-connector-pvc -spec: - accessModes: - - ReadWriteOnce - resources: - requests: - storage: 5Gi diff --git a/darwin-kubernetes/backup/postgres-service-deployment.yaml.bkp b/darwin-kubernetes/backup/postgres-service-deployment.yaml.bkp deleted file mode 100644 index f33efa2bafd..00000000000 --- a/darwin-kubernetes/backup/postgres-service-deployment.yaml.bkp +++ /dev/null @@ -1,57 +0,0 @@ -apiVersion: v1 -kind: Service -metadata: - name: relational-db-service -spec: - selector: - app: relational-db - ports: - - protocol: TCP - port: 5432 - targetPort: 5432 - clusterIP: None ---- -apiVersion: apps/v1 -kind: StatefulSet -metadata: - name: relational-db-statefulset -spec: - serviceName: relational-db-service - replicas: 1 - selector: - matchLabels: - app: relational-db - template: - metadata: - labels: - app: relational-db - spec: - containers: - - name: relational-db - image: postgres:15.2-alpine - env: - - name: POSTGRES_USER - valueFrom: - secretKeyRef: - name: danswer-secrets - key: postgres_user - - name: POSTGRES_PASSWORD - valueFrom: - secretKeyRef: - name: danswer-secrets - key: postgres_password - ports: - - containerPort: 5432 - volumeMounts: - - mountPath: /var/lib/postgresql/data - name: db-storage - subPath: postgres - volumeClaimTemplates: - - metadata: - name: db-storage - spec: - accessModes: ["ReadWriteOnce"] - resources: - requests: - # Adjust the storage request size as needed. - storage: 1Gi diff --git a/darwin-kubernetes/backup/psql.yaml.bkp b/darwin-kubernetes/backup/psql.yaml.bkp deleted file mode 100644 index f0c85154156..00000000000 --- a/darwin-kubernetes/backup/psql.yaml.bkp +++ /dev/null @@ -1,22 +0,0 @@ -apiVersion: v1 -kind: Pod -metadata: - name: postgresql-client - labels: - app: postgresql-client - annotations: - cluster-autoscaler.kubernetes.io/safe-to-evict: "true" -spec: - securityContext: - runAsNonRoot: true - supplementalGroups: [ 10001] - fsGroup: 10001 - containers: - - name: postgresql-client - image: andreswebs/postgresql-client - imagePullPolicy: Always - securityContext: - runAsUser: 1000 - stdin: true - tty: true - command: ["/bin/sh"] diff --git a/darwin-kubernetes/backup/vespa-service-deployment.yaml b/darwin-kubernetes/backup/vespa-service-deployment.yaml deleted file mode 100644 index 2841ea27545..00000000000 --- a/darwin-kubernetes/backup/vespa-service-deployment.yaml +++ /dev/null @@ -1,63 +0,0 @@ -apiVersion: v1 -kind: Service -metadata: - name: document-index-service -spec: - selector: - app: vespa - ports: - - name: vespa-tenant-port - protocol: TCP - port: 19071 - targetPort: 19071 - - name: vespa-port - protocol: TCP - port: 8081 - targetPort: 8081 - type: LoadBalancer ---- -apiVersion: apps/v1 -kind: StatefulSet -metadata: - name: vespa - labels: - app: vespa -spec: - replicas: 1 - serviceName: vespa - selector: - matchLabels: - app: vespa - template: - metadata: - labels: - app: vespa - spec: - containers: - - name: vespa - image: vespaengine/vespa:8.277.17 - imagePullPolicy: IfNotPresent - securityContext: - privileged: true - runAsUser: 0 - ports: - - containerPort: 19071 - - containerPort: 8081 - readinessProbe: - httpGet: - path: /state/v1/health - port: 19071 - scheme: HTTP - volumeMounts: - - name: vespa-storage - mountPath: /opt/vespa/var/ - volumeClaimTemplates: - - metadata: - name: vespa-storage - spec: - accessModes: - - ReadWriteOnce - resources: - requests: - # Adjust the storage request size as needed. - storage: 200Gi diff --git a/darwin-kubernetes/env-configmap.yaml b/darwin-kubernetes/env-configmap.yaml deleted file mode 100644 index eb0cfb25312..00000000000 --- a/darwin-kubernetes/env-configmap.yaml +++ /dev/null @@ -1,96 +0,0 @@ -apiVersion: v1 -kind: ConfigMap -metadata: - name: env-configmap -data: - # Auth Setting, also check the secrets file - AUTH_TYPE: "oidc" # Microsoft / Entra ID OIDC (oauth_client_id/secret + user_auth_secret in danswer-secrets) - # Entra OIDC discovery doc; tenant id is the path segment. - OPENID_CONFIG_URL: "https://login.microsoftonline.com/d8353d2a-b153-4d17-8827-902c51f72357/v2.0/.well-known/openid-configuration" - # Comma-separated emails granted ADMIN on first sign-in (replaces the old Istio admin allowlist). - DEFAULT_ADMIN_EMAILS: "user1@uipath.com,user2@uipath.com" - ENCRYPTION_KEY_SECRET: "" # This should not be specified directly in the yaml, this is just for reference - SESSION_EXPIRE_TIME_SECONDS: "86400" # 1 Day Default - VALID_EMAIL_DOMAINS: "" # Can be something like danswer.ai, as an extra double-check - SMTP_SERVER: "" # For sending verification emails, if unspecified then defaults to 'smtp.gmail.com' - SMTP_PORT: "" # For sending verification emails, if unspecified then defaults to '587' - SMTP_USER: "" # 'your-email@company.com' - SMTP_PASS: "" # 'your-gmail-password' - EMAIL_FROM: "" # 'your-email@company.com' SMTP_USER missing used instead - # Gen AI Settings - GEN_AI_MODEL_PROVIDER: "custom" - GEN_AI_API_ENDPOINT: "https://alpha.uipath.com/llmgateway_/openai/deployments/gpt-4o-mini-2024-07-18/chat/completions?api-version=2024-06-01" - GEN_AI_IDENTITY_ENDPOINT: "https://alpha.uipath.com/identity_/connect/token" - GEN_AI_CLIENT_ID: "XXX" - GEN_AI_CLIENT_SECRET: "XXX" - GEN_AI_MODEL_VERSION: "" - FAST_GEN_AI_MODEL_VERSION: "" - GEN_AI_API_KEY: "" - GEN_AI_API_VERSION: "" - GEN_AI_LLM_PROVIDER_TYPE: "" - GEN_AI_MAX_TOKENS: "" - QA_TIMEOUT: "60" - MAX_CHUNKS_FED_TO_CHAT: "" - DISABLE_LLM_FILTER_EXTRACTION: "true" - DISABLE_LLM_CHUNK_FILTER: "true" - DISABLE_LLM_CHOOSE_SEARCH: "true" - DISABLE_LLM_QUERY_REPHRASE: "" - # Query Options - DOC_TIME_DECAY: "" - HYBRID_ALPHA: "" - EDIT_KEYWORD_QUERY: "" - MULTILINGUAL_QUERY_EXPANSION: "" - QA_PROMPT_OVERRIDE: "" - # Other Services - POSTGRES_HOST: "darwin-postgres.postgres.database.azure.com" - VESPA_CONFIG_SERVER_HOST: "vespa-configserver" - VESPA_HOST: "vespa-query" - VESPA_PORT: "8080" - VESPA_FEED_HOST: "vespa-feed" - VESPA_FEED_PORT: "8080" - # Don't change the NLP models unless you know what you're doing - DOCUMENT_ENCODER_MODEL: "" - NORMALIZE_EMBEDDINGS: "" - ASYM_QUERY_PREFIX: "" - ASYM_PASSAGE_PREFIX: "" - ENABLE_RERANKING_REAL_TIME_FLOW: "" - ENABLE_RERANKING_ASYNC_FLOW: "" - MODEL_SERVER_HOST: "inference-model-server-service" - MODEL_SERVER_PORT: "" - INDEXING_MODEL_SERVER_HOST: "indexing-model-server-service" - MIN_THREADS_ML_MODELS: "" - # Indexing Configs - NUM_INDEXING_WORKERS: "4" - ENABLED_CONNECTOR_TYPES: "" - DISABLE_INDEX_UPDATE_ON_SWAP: "" - DASK_JOB_CLIENT_ENABLED: "true" - CONTINUE_ON_CONNECTOR_FAILURE: "" - EXPERIMENTAL_CHECKPOINTING_ENABLED: "" - CONFLUENCE_CONNECTOR_LABELS_TO_SKIP: "" - JIRA_API_VERSION: "" - WEB_CONNECTOR_VALIDATE_URLS: "" - GONG_CONNECTOR_START_TIME: "" - NOTION_CONNECTOR_ENABLE_RECURSIVE_PAGE_LOOKUP: "" - # DanswerBot SlackBot Configs - DANSWER_BOT_SLACK_APP_TOKEN: "" - DANSWER_BOT_SLACK_BOT_TOKEN: "" - DANSWER_BOT_DISABLE_DOCS_ONLY_ANSWER: "" - DANSWER_BOT_DISPLAY_ERROR_MSGS: "" - DANSWER_BOT_RESPOND_EVERY_CHANNEL: "" - DANSWER_BOT_DISABLE_COT: "" # Currently unused - OPSGENIE_API_KEY: "" - NOTIFY_SLACKBOT_NO_ANSWER: "" - # Logging - # Optional Telemetry, please keep it on (nothing sensitive is collected)? <3 - # https://docs.danswer.dev/more/telemetry - DISABLE_TELEMETRY: "true" - LOG_LEVEL: "info" - LOG_ALL_MODEL_INTERACTIONS: "" - LOG_VESPA_TIMING_INFORMATION: "" - # Shared or Non-backend Related - INTERNAL_URL: "http://api-server-service:80" # for web server - # MUST be the externally-reachable https origin — builds the OIDC redirect_uri - # and makes the session cookie Secure. Mismatch => AADSTS50011 redirect error. - WEB_DOMAIN: "https://darwin.westeurope.cloudapp.azure.com" # for web server and api server - DOMAIN: "darwin.westeurope.cloudapp.azure.com" # for nginx - APPLY_MIGRATIONS: "true" diff --git a/darwin-kubernetes/index_model_server-statefulset.yaml b/darwin-kubernetes/index_model_server-statefulset.yaml deleted file mode 100644 index 2fc1154f29e..00000000000 --- a/darwin-kubernetes/index_model_server-statefulset.yaml +++ /dev/null @@ -1,71 +0,0 @@ -apiVersion: v1 -kind: Service -metadata: - name: indexing-model-server-service -spec: - selector: - app: indexing-model-server - ports: - - name: indexing-model-server-port - protocol: TCP - port: 9000 - targetPort: 9000 - type: ClusterIP ---- -apiVersion: apps/v1 -kind: StatefulSet -metadata: - name: indexing-model-server-statefulset -spec: - replicas: 2 - selector: - matchLabels: - app: indexing-model-server - name: indexing-model-server - serviceName: indexing-model-server-service - template: - metadata: - labels: - app: indexing-model-server - name: indexing-model-server - spec: - affinity: - nodeAffinity: - requiredDuringSchedulingIgnoredDuringExecution: - nodeSelectorTerms: - - matchExpressions: - - key: agentpool - operator: In - values: - - indexcpu - containers: - - name: indexing-model-server - image: danswer/danswer-model-server:v0.3.94 - imagePullPolicy: IfNotPresent - command: [ "uvicorn", "model_server.main:app", "--host", "0.0.0.0", "--port", "9000" ] - ports: - - containerPort: 9000 - envFrom: - - configMapRef: - name: env-configmap - env: - - name: INDEXING_ONLY - value: "True" - volumeMounts: - - name: indexing-model-storage - mountPath: /root/.cache - tolerations: - - effect: NoSchedule - key: darwin - operator: Equal - value: indexing - volumeClaimTemplates: - - metadata: - name: indexing-model-storage - spec: - accessModes: [ "ReadWriteOnce" ] - resources: - requests: - storage: 10Gi ---- - diff --git a/darwin-kubernetes/indexing_model_server-service-deployment.yaml b/darwin-kubernetes/indexing_model_server-service-deployment.yaml deleted file mode 100644 index 8cb8e0fe1dd..00000000000 --- a/darwin-kubernetes/indexing_model_server-service-deployment.yaml +++ /dev/null @@ -1,59 +0,0 @@ -apiVersion: v1 -kind: Service -metadata: - name: indexing-model-server-service -spec: - selector: - app: indexing-model-server - ports: - - name: indexing-model-server-port - protocol: TCP - port: 9000 - targetPort: 9000 - type: ClusterIP ---- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: indexing-model-server-deployment -spec: - replicas: 1 - selector: - matchLabels: - app: indexing-model-server - template: - metadata: - labels: - app: indexing-model-server - spec: - containers: - - name: indexing-model-server - image: danswer/danswer-model-server:v0.3.94 - imagePullPolicy: IfNotPresent - command: [ "uvicorn", "model_server.main:app", "--host", "0.0.0.0", "--port", "9000" ] - ports: - - containerPort: 9000 - envFrom: - - configMapRef: - name: env-configmap - env: - - name: INDEXING_ONLY - value: "True" - volumeMounts: - - name: indexing-model-storage - mountPath: /root/.cache - volumes: - - name: indexing-model-storage - persistentVolumeClaim: - claimName: indexing-model-pvc ---- -apiVersion: v1 -kind: PersistentVolumeClaim -metadata: - name: indexing-model-pvc -spec: - accessModes: - - ReadWriteOnce - resources: - requests: - storage: 10Gi diff --git a/darwin-kubernetes/indexing_model_server-service-deployment_gpu.yaml b/darwin-kubernetes/indexing_model_server-service-deployment_gpu.yaml deleted file mode 100644 index 96bd9f04f69..00000000000 --- a/darwin-kubernetes/indexing_model_server-service-deployment_gpu.yaml +++ /dev/null @@ -1,69 +0,0 @@ -apiVersion: v1 -kind: Service -metadata: - name: indexing-model-server-service -spec: - selector: - app: indexing-model-server - ports: - - name: indexing-model-server-port - protocol: TCP - port: 9000 - targetPort: 9000 - type: ClusterIP ---- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: indexing-model-server-deployment -spec: - replicas: 1 - selector: - matchLabels: - app: indexing-model-server - template: - metadata: - labels: - app: indexing-model-server - spec: - containers: - - name: indexing-model-server - image: danswer/danswer-model-server:v0.3.94 - imagePullPolicy: IfNotPresent - command: [ "uvicorn", "model_server.main:app", "--host", "0.0.0.0", "--port", "9000" ] - ports: - - containerPort: 9000 - envFrom: - - configMapRef: - name: env-configmap - env: - - name: INDEXING_ONLY - value: "True" - volumeMounts: - - name: indexing-model-storage - mountPath: /root/.cache - resources: - requests: - nvidia.com/gpu: 1 - limits: - nvidia.com/gpu: 1 - volumes: - - name: indexing-model-storage - persistentVolumeClaim: - claimName: indexing-model-pvc - tolerations: - - effect: NoSchedule - key: sku - operator: Equal - value: gpu ---- -apiVersion: v1 -kind: PersistentVolumeClaim -metadata: - name: indexing-model-pvc -spec: - accessModes: - - ReadWriteOnce - resources: - requests: - storage: 10Gi diff --git a/darwin-kubernetes/indexing_model_statefulset.yaml b/darwin-kubernetes/indexing_model_statefulset.yaml deleted file mode 100644 index 78f9984d04f..00000000000 --- a/darwin-kubernetes/indexing_model_statefulset.yaml +++ /dev/null @@ -1,69 +0,0 @@ -apiVersion: apps/v1 -kind: StatefulSet -metadata: - name: indexing-model-server-statefulset -spec: - replicas: 2 - selector: - matchLabels: - app: indexing-model-server - name: indexing-model-server - serviceName: indexing-model-server-service - template: - metadata: - labels: - app: indexing-model-server - name: indexing-model-server - spec: - affinity: - nodeAffinity: - requiredDuringSchedulingIgnoredDuringExecution: - nodeSelectorTerms: - - matchExpressions: - - key: agentpool - operator: In - values: - - indexcpu - containers: - - name: indexing-model-server - image: danswer/danswer-model-server:v0.3.94 - imagePullPolicy: IfNotPresent - command: [ "uvicorn", "model_server.main:app", "--host", "0.0.0.0", "--port", "9000" ] - ports: - - containerPort: 9000 - envFrom: - - configMapRef: - name: env-configmap - env: - - name: INDEXING_ONLY - value: "True" - volumeMounts: - - name: indexing-model-storage - mountPath: /root/.cache - tolerations: - - effect: NoSchedule - key: darwin - operator: Equal - value: indexing - volumeClaimTemplates: - - metadata: - name: indexing-model-storage - spec: - accessModes: [ "ReadWriteOnce" ] - resources: - requests: - storage: 10Gi ---- -apiVersion: v1 -kind: Service -metadata: - name: indexing-model-server-service -spec: - selector: - app: indexing-model-server - ports: - - name: indexing-model-server-port - protocol: TCP - port: 9000 - targetPort: 9000 - type: ClusterIP diff --git a/darwin-kubernetes/inference_model_server-service-deployment.yaml b/darwin-kubernetes/inference_model_server-service-deployment.yaml deleted file mode 100644 index db7788efca3..00000000000 --- a/darwin-kubernetes/inference_model_server-service-deployment.yaml +++ /dev/null @@ -1,70 +0,0 @@ -apiVersion: v1 -kind: Service -metadata: - name: inference-model-server-service -spec: - selector: - app: inference-model-server - ports: - - name: inference-model-server-port - protocol: TCP - port: 9000 - targetPort: 9000 - type: ClusterIP ---- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: inference-model-server-deployment -spec: - replicas: 1 - selector: - matchLabels: - app: inference-model-server - template: - metadata: - labels: - app: inference-model-server - spec: - affinity: - nodeAffinity: - requiredDuringSchedulingIgnoredDuringExecution: - nodeSelectorTerms: - - matchExpressions: - - key: agentpool - operator: In - values: - - indexcpu - containers: - - name: inference-model-server - image: danswer/danswer-model-server:v0.3.94 - imagePullPolicy: IfNotPresent - command: [ "uvicorn", "model_server.main:app", "--host", "0.0.0.0", "--port", "9000" ] - ports: - - containerPort: 9000 - envFrom: - - configMapRef: - name: env-configmap - volumeMounts: - - name: inference-model-storage - mountPath: /root/.cache - volumes: - - name: inference-model-storage - persistentVolumeClaim: - claimName: inference-model-pvc - tolerations: - - effect: NoSchedule - key: darwin - operator: Equal - value: indexing ---- -apiVersion: v1 -kind: PersistentVolumeClaim -metadata: - name: inference-model-pvc -spec: - accessModes: - - ReadWriteOnce - resources: - requests: - storage: 3Gi diff --git a/darwin-kubernetes/inference_model_server-statefulset.yaml b/darwin-kubernetes/inference_model_server-statefulset.yaml deleted file mode 100644 index 24b0ce72cf7..00000000000 --- a/darwin-kubernetes/inference_model_server-statefulset.yaml +++ /dev/null @@ -1,66 +0,0 @@ -apiVersion: v1 -kind: Service -metadata: - name: inference-model-server-service -spec: - selector: - app: inference-model-server - ports: - - name: inference-model-server-port - protocol: TCP - port: 9000 - targetPort: 9000 - type: ClusterIP ---- -apiVersion: apps/v1 -kind: StatefulSet -metadata: - name: inference-model-server-deployment -spec: - replicas: 1 - selector: - matchLabels: - app: inference-model-server - name: inference-model-server - serviceName: inference-model-server-service - template: - metadata: - labels: - app: inference-model-server - name: inference-model-server - spec: - affinity: - nodeAffinity: - requiredDuringSchedulingIgnoredDuringExecution: - nodeSelectorTerms: - - matchExpressions: - - key: agentpool - operator: In - values: - - indexcpu - containers: - - name: inference-model-server - image: danswer/danswer-model-server:v0.3.94 - imagePullPolicy: IfNotPresent - command: [ "uvicorn", "model_server.main:app", "--host", "0.0.0.0", "--port", "9000" ] - ports: - - containerPort: 9000 - envFrom: - - configMapRef: - name: env-configmap - volumeMounts: - - name: inference-model-storage - mountPath: /root/.cache - tolerations: - - effect: NoSchedule - key: darwin - operator: Equal - value: indexing - volumeClaimTemplates: - - metadata: - name: inference-model-storage - spec: - accessModes: [ "ReadWriteOnce" ] - resources: - requests: - storage: 3Gi diff --git a/darwin-kubernetes/nginx-configmap.yaml b/darwin-kubernetes/nginx-configmap.yaml deleted file mode 100644 index 08b945d599c..00000000000 --- a/darwin-kubernetes/nginx-configmap.yaml +++ /dev/null @@ -1,44 +0,0 @@ -apiVersion: v1 -kind: ConfigMap -metadata: - name: nginx-configmap -data: - nginx.conf: | - upstream api_server { - server api-server-service:80 fail_timeout=0; - } - - upstream web_server { - server web-server-service:80 fail_timeout=0; - } - - server { - listen 80; - server_name $$DOMAIN; - - client_max_body_size 5G; # Maximum upload size - - location ~ ^/api(.*)$ { - rewrite ^/api(/.*)$ $1 break; - proxy_set_header X-Real-IP $remote_addr; - proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; - proxy_set_header X-Forwarded-Proto $scheme; - proxy_set_header X-Forwarded-Host $host; - proxy_set_header Host $host; - proxy_http_version 1.1; - proxy_buffering off; - proxy_redirect off; - proxy_pass http://api_server; - } - - location / { - proxy_set_header X-Real-IP $remote_addr; - proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; - proxy_set_header X-Forwarded-Proto $scheme; - proxy_set_header X-Forwarded-Host $host; - proxy_set_header Host $host; - proxy_http_version 1.1; - proxy_redirect off; - proxy_pass http://web_server; - } - } diff --git a/darwin-kubernetes/nginx-service-deployment.yaml b/darwin-kubernetes/nginx-service-deployment.yaml deleted file mode 100644 index 27b14794ee3..00000000000 --- a/darwin-kubernetes/nginx-service-deployment.yaml +++ /dev/null @@ -1,55 +0,0 @@ -apiVersion: v1 -kind: Service -metadata: - name: nginx-service -spec: - selector: - app: nginx - ports: - - name: http - protocol: TCP - port: 80 - targetPort: 80 - - name: danswer - protocol: TCP - port: 3000 - targetPort: 80 - type: LoadBalancer ---- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: nginx-deployment -spec: - replicas: 1 - selector: - matchLabels: - app: nginx - template: - metadata: - labels: - app: nginx - spec: - containers: - - name: nginx - image: nginx:1.23.4-alpine - ports: - - containerPort: 80 - env: - - name: DOMAIN - valueFrom: - configMapKeyRef: - name: env-configmap - key: DOMAIN - volumeMounts: - - name: nginx-conf - mountPath: /etc/nginx/conf.d - command: - - /bin/sh - - -c - - | - while :; do sleep 6h & wait $$!; nginx -s reload; done & nginx -g "daemon off;" - volumes: - - name: nginx-conf - configMap: - name: nginx-configmap diff --git a/darwin-kubernetes/secrets.yaml b/darwin-kubernetes/secrets.yaml deleted file mode 100644 index 352ffd16d6a..00000000000 --- a/darwin-kubernetes/secrets.yaml +++ /dev/null @@ -1,22 +0,0 @@ -# Real secret values must NOT be committed. Fill the placeholders below and -# apply out-of-band (or move to a sealed-secret / external secret manager). -apiVersion: v1 -kind: Secret -metadata: - name: danswer-secrets -type: Opaque -stringData: - # --- Postgres --- - postgres_user: "postgres" - postgres_password: "" - - # --- Microsoft / Entra ID OIDC --- - # Application (client) ID — an identifier, not a secret. - oauth_client_id: "xxxx" - # Entra client secret — ROTATE the one shared in chat and paste the new one. - oauth_client_secret: "" - # Signs the fastapi-users session + OAuth state JWT. Generate once with: - # openssl rand -hex 32 - # MUST be identical across all replicas and stable across restarts/rollouts, - # or in-flight logins fail and existing sessions are invalidated. - user_auth_secret: "" diff --git a/deployment/docker_compose/docker-compose.dask-distributed.yml b/deployment/docker_compose/docker-compose.dask-distributed.yml new file mode 100644 index 00000000000..505edea9521 --- /dev/null +++ b/deployment/docker_compose/docker-compose.dask-distributed.yml @@ -0,0 +1,96 @@ +# Opt-in compose overlay that simulates the production +# Dask-Distributed indexing topology locally. +# +# Why use this: +# - Reproduce a production-mode bug without going to K8s. +# - Watch the Dask scheduler dashboard at http://localhost:8787 +# while triggering re-indexes from the admin UI. +# - Verify that scaling `dask-worker` replicas actually parallelizes +# indexing across pods. +# +# Why NOT use this for daily dev work: +# - The default docker-compose.dev.yml already runs `update.py` in +# a single container's LocalCluster — fast, simple, all you need +# for connector-code work. +# +# Usage: +# docker compose \ +# -f deployment/docker_compose/docker-compose.dev.yml \ +# -f deployment/docker_compose/docker-compose.dask-distributed.yml \ +# up +# +# # scale workers up/down at any time: +# docker compose ... up -d --scale dask-worker=5 + +services: + dask-scheduler: + image: danswer/danswer-backend:latest + build: + context: ../../backend + dockerfile: Dockerfile + command: + - dask + - scheduler + - --host=0.0.0.0 + - --port=8786 + - --dashboard-address=:8787 + ports: + - "8786:8786" # scheduler RPC — Client connects here + - "8787:8787" # web dashboard — visit http://localhost:8787 + restart: always + + dask-worker: + image: danswer/danswer-backend:latest + build: + context: ../../backend + dockerfile: Dockerfile + # `--nworkers 1 --nthreads 1` per pod, matching the K8s + # manifest. Scale this service's `replicas` (or `--scale` flag) + # to add concurrent indexing capacity. + command: + - dask + - worker + - tcp://dask-scheduler:8786 + - --nworkers=1 + - --nthreads=1 + - --memory-limit=4GB + depends_on: + - dask-scheduler + - relational_db + - index + - indexing_model_server + deploy: + replicas: 2 + restart: always + environment: + # PYTHONPATH so the worker can import danswer.* when + # deserializing the run_indexing_entrypoint callable. + - PYTHONPATH=/app + - CURRENT_PROCESS_IS_AN_INDEXING_JOB=true + # Inherit everything the existing `background` service uses so + # connector credentials, model-server endpoints, Vespa hosts, + # GenAI keys, etc. all work identically inside dask-worker. + - POSTGRES_HOST=relational_db + - VESPA_HOST=index + - VESPA_PORT=8081 + - INDEXING_MODEL_SERVER_HOST=indexing_model_server + - INDEXING_MODEL_SERVER_PORT=9000 + - MODEL_SERVER_HOST=inference_model_server + - MODEL_SERVER_PORT=9000 + - GEN_AI_MODEL_PROVIDER=${GEN_AI_MODEL_PROVIDER:-} + - GEN_AI_API_KEY=${GEN_AI_API_KEY:-} + - GEN_AI_API_ENDPOINT=${GEN_AI_API_ENDPOINT:-} + - GEN_AI_IDENTITY_ENDPOINT=${GEN_AI_IDENTITY_ENDPOINT:-} + - GEN_AI_CLIENT_ID=${GEN_AI_CLIENT_ID:-} + - GEN_AI_CLIENT_SECRET=${GEN_AI_CLIENT_SECRET:-} + + # Override the existing `background` service to point its + # update.py loop at the remote scheduler instead of an in-process + # LocalCluster. Everything else about the service stays the same + # (supervisord, celery beat, celery worker still run in this + # container — only the indexing dispatch path is rerouted). + background: + environment: + - DASK_SCHEDULER_ADDRESS=tcp://dask-scheduler:8786 + depends_on: + - dask-scheduler diff --git a/deployment/docker_compose/docker-compose.dev.yml b/deployment/docker_compose/docker-compose.dev.yml index 294b22deff1..491cbe4cefe 100644 --- a/deployment/docker_compose/docker-compose.dev.yml +++ b/deployment/docker_compose/docker-compose.dev.yml @@ -314,6 +314,31 @@ services: - db_volume:/var/lib/postgresql/data + # Cache + per-user request rate limiting. Cache-only — no persistence; an LRU + # eviction policy bounds memory so a runaway producer can't OOM the node. + # Not used as a Celery broker (this fork uses Postgres for that). + redis: + image: redis:7.2-alpine + restart: always + command: + - redis-server + - --appendonly + - "no" + - --save + - "" + - --maxmemory + - "256mb" + - --maxmemory-policy + - allkeys-lru + ports: + - "6379:6379" + healthcheck: + test: ["CMD", "redis-cli", "ping"] + interval: 10s + timeout: 3s + retries: 5 + + # This container name cannot have an underscore in it due to Vespa expectations of the URL index: image: vespaengine/vespa:8.277.17 diff --git a/deployment/helm/.gitignore b/deployment/helm/.gitignore deleted file mode 100644 index b442275d6b5..00000000000 --- a/deployment/helm/.gitignore +++ /dev/null @@ -1,3 +0,0 @@ -### Helm ### -# Chart dependencies -**/charts/*.tgz diff --git a/deployment/helm/.helmignore b/deployment/helm/.helmignore deleted file mode 100644 index 0e8a0eb36f4..00000000000 --- a/deployment/helm/.helmignore +++ /dev/null @@ -1,23 +0,0 @@ -# Patterns to ignore when building packages. -# This supports shell glob matching, relative path matching, and -# negation (prefixed with !). Only one pattern per line. -.DS_Store -# Common VCS dirs -.git/ -.gitignore -.bzr/ -.bzrignore -.hg/ -.hgignore -.svn/ -# Common backup files -*.swp -*.bak -*.tmp -*.orig -*~ -# Various IDEs -.project -.idea/ -*.tmproj -.vscode/ diff --git a/deployment/helm/Chart.lock b/deployment/helm/Chart.lock deleted file mode 100644 index 918b44f6ebf..00000000000 --- a/deployment/helm/Chart.lock +++ /dev/null @@ -1,12 +0,0 @@ -dependencies: -- name: postgresql - repository: https://charts.bitnami.com/bitnami - version: 14.3.1 -- name: vespa - repository: https://unoplat.github.io/vespa-helm-charts - version: 0.2.3 -- name: nginx - repository: oci://registry-1.docker.io/bitnamicharts - version: 15.14.0 -digest: sha256:ab17b5d2c3883055cb4a26bf530043521be5220c24f804e954bb428273d16ba8 -generated: "2024-05-24T16:55:30.598279-07:00" diff --git a/deployment/helm/Chart.yaml b/deployment/helm/Chart.yaml deleted file mode 100644 index 7763f33bec5..00000000000 --- a/deployment/helm/Chart.yaml +++ /dev/null @@ -1,35 +0,0 @@ -apiVersion: v2 -name: danswer-stack -description: A Helm chart for Kubernetes -home: https://www.danswer.ai/ -sources: - - "https://github.com/danswer-ai/danswer" -type: application -version: 0.2.0 -appVersion: "latest" -annotations: - category: Productivity - licenses: MIT - images: | - - name: webserver - image: docker.io/danswer/danswer-web-server:latest - - name: background - image: docker.io/danswer/danswer-backend:latest - - name: vespa - image: vespaengine/vespa:8.277.17 -dependencies: - - name: postgresql - version: 14.3.1 - repository: https://charts.bitnami.com/bitnami - condition: postgresql.enabled - - name: vespa - version: 0.2.3 - repository: https://unoplat.github.io/vespa-helm-charts - condition: vespa.enabled - - name: nginx - version: 15.14.0 - repository: oci://registry-1.docker.io/bitnamicharts - condition: nginx.enabled - - - \ No newline at end of file diff --git a/deployment/helm/templates/_helpers.tpl b/deployment/helm/templates/_helpers.tpl deleted file mode 100644 index 483a5b5e5af..00000000000 --- a/deployment/helm/templates/_helpers.tpl +++ /dev/null @@ -1,83 +0,0 @@ -{{/* -Expand the name of the chart. -*/}} -{{- define "danswer-stack.name" -}} -{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} -{{- end }} - -{{/* -Create a default fully qualified app name. -We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). -If release name contains chart name it will be used as a full name. -*/}} -{{- define "danswer-stack.fullname" -}} -{{- if .Values.fullnameOverride }} -{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} -{{- else }} -{{- $name := default .Chart.Name .Values.nameOverride }} -{{- if contains $name .Release.Name }} -{{- .Release.Name | trunc 63 | trimSuffix "-" }} -{{- else }} -{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} -{{- end }} -{{- end }} -{{- end }} - -{{/* -Create chart name and version as used by the chart label. -*/}} -{{- define "danswer-stack.chart" -}} -{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} -{{- end }} - -{{/* -Common labels -*/}} -{{- define "danswer-stack.labels" -}} -helm.sh/chart: {{ include "danswer-stack.chart" . }} -{{ include "danswer-stack.selectorLabels" . }} -{{- if .Chart.AppVersion }} -app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} -{{- end }} -app.kubernetes.io/managed-by: {{ .Release.Service }} -{{- end }} - -{{/* -Selector labels -*/}} -{{- define "danswer-stack.selectorLabels" -}} -app.kubernetes.io/name: {{ include "danswer-stack.name" . }} -app.kubernetes.io/instance: {{ .Release.Name }} -{{- end }} - -{{/* -Create the name of the service account to use -*/}} -{{- define "danswer-stack.serviceAccountName" -}} -{{- if .Values.serviceAccount.create }} -{{- default (include "danswer-stack.fullname" .) .Values.serviceAccount.name }} -{{- else }} -{{- default "default" .Values.serviceAccount.name }} -{{- end }} -{{- end }} - -{{/* -Set secret name -*/}} -{{- define "danswer-stack.secretName" -}} -{{- default (default "danswer-secrets" .Values.auth.secretName) .Values.auth.existingSecret }} -{{- end }} - -{{/* -Create env vars from secrets -*/}} -{{- define "danswer-stack.envSecrets" -}} - {{- range $name, $key := .Values.auth.secretKeys }} -- name: {{ $name | upper | replace "-" "_" | quote }} - valueFrom: - secretKeyRef: - name: {{ include "danswer-stack.secretName" $ }} - key: {{ default $name $key }} - {{- end }} -{{- end }} - diff --git a/deployment/helm/templates/api-deployment.yaml b/deployment/helm/templates/api-deployment.yaml deleted file mode 100644 index 7f10bffafd0..00000000000 --- a/deployment/helm/templates/api-deployment.yaml +++ /dev/null @@ -1,59 +0,0 @@ -apiVersion: apps/v1 -kind: Deployment -metadata: - name: {{ include "danswer-stack.fullname" . }}-api-deployment - labels: - {{- include "danswer-stack.labels" . | nindent 4 }} -spec: - {{- if not .Values.api.autoscaling.enabled }} - replicas: {{ .Values.api.replicaCount }} - {{- end }} - selector: - matchLabels: - {{- include "danswer-stack.selectorLabels" . | nindent 6 }} - {{- if .Values.api.deploymentLabels }} - {{- toYaml .Values.api.deploymentLabels | nindent 6 }} - {{- end }} - template: - metadata: - {{- with .Values.api.podAnnotations }} - annotations: - {{- toYaml . | nindent 8 }} - {{- end }} - labels: - {{- include "danswer-stack.labels" . | nindent 8 }} - {{- with .Values.api.podLabels }} - {{- toYaml . | nindent 8 }} - {{- end }} - spec: - {{- with .Values.imagePullSecrets }} - imagePullSecrets: - {{- toYaml . | nindent 8 }} - {{- end }} - serviceAccountName: {{ include "danswer-stack.serviceAccountName" . }} - securityContext: - {{- toYaml .Values.api.podSecurityContext | nindent 8 }} - containers: - - name: api-server - securityContext: - {{- toYaml .Values.api.securityContext | nindent 12 }} - image: "{{ .Values.api.image.repository }}:{{ .Values.api.image.tag | default .Chart.AppVersion }}" - imagePullPolicy: {{ .Values.api.image.pullPolicy }} - command: - - "/bin/sh" - - "-c" - - | - alembic upgrade head && - echo "Starting Danswer Api Server" && - uvicorn danswer.main:app --host 0.0.0.0 --port 8080 - ports: - - name: api-server-port - containerPort: {{ .Values.api.service.port }} - protocol: TCP - resources: - {{- toYaml .Values.api.resources | nindent 12 }} - envFrom: - - configMapRef: - name: {{ .Values.config.envConfigMapName }} - env: - {{- include "danswer-stack.envSecrets" . | nindent 12}} diff --git a/deployment/helm/templates/api-hpa.yaml b/deployment/helm/templates/api-hpa.yaml deleted file mode 100644 index 378c39715ad..00000000000 --- a/deployment/helm/templates/api-hpa.yaml +++ /dev/null @@ -1,32 +0,0 @@ -{{- if .Values.api.autoscaling.enabled }} -apiVersion: autoscaling/v2 -kind: HorizontalPodAutoscaler -metadata: - name: {{ include "danswer-stack.fullname" . }}-api - labels: - {{- include "danswer-stack.labels" . | nindent 4 }} -spec: - scaleTargetRef: - apiVersion: apps/v1 - kind: Deployment - name: {{ include "danswer-stack.fullname" . }} - minReplicas: {{ .Values.api.autoscaling.minReplicas }} - maxReplicas: {{ .Values.api.autoscaling.maxReplicas }} - metrics: - {{- if .Values.api.autoscaling.targetCPUUtilizationPercentage }} - - type: Resource - resource: - name: cpu - target: - type: Utilization - averageUtilization: {{ .Values.api.autoscaling.targetCPUUtilizationPercentage }} - {{- end }} - {{- if .Values.api.autoscaling.targetMemoryUtilizationPercentage }} - - type: Resource - resource: - name: memory - target: - type: Utilization - averageUtilization: {{ .Values.api.autoscaling.targetMemoryUtilizationPercentage }} - {{- end }} -{{- end }} diff --git a/deployment/helm/templates/api-service.yaml b/deployment/helm/templates/api-service.yaml deleted file mode 100644 index 1fd74d4ddf5..00000000000 --- a/deployment/helm/templates/api-service.yaml +++ /dev/null @@ -1,22 +0,0 @@ -apiVersion: v1 -kind: Service -metadata: - # INTERNAL_URL env variable depends on this, don't change without changing INTERNAL_URL - name: {{ include "danswer-stack.fullname" . }}-api-service - labels: - {{- include "danswer-stack.labels" . | nindent 4 }} - {{- if .Values.api.deploymentLabels }} - {{- toYaml .Values.api.deploymentLabels | nindent 4 }} - {{- end }} -spec: - type: {{ .Values.api.service.type }} - ports: - - port: {{ .Values.api.service.port }} - targetPort: api-server-port - protocol: TCP - name: api-server-port - selector: - {{- include "danswer-stack.selectorLabels" . | nindent 4 }} - {{- if .Values.api.deploymentLabels }} - {{- toYaml .Values.api.deploymentLabels | nindent 4 }} - {{- end }} diff --git a/deployment/helm/templates/background-deployment.yaml b/deployment/helm/templates/background-deployment.yaml deleted file mode 100644 index 3cd65a99af4..00000000000 --- a/deployment/helm/templates/background-deployment.yaml +++ /dev/null @@ -1,51 +0,0 @@ -apiVersion: apps/v1 -kind: Deployment -metadata: - name: {{ include "danswer-stack.fullname" . }}-background - labels: - {{- include "danswer-stack.labels" . | nindent 4 }} -spec: - {{- if not .Values.background.autoscaling.enabled }} - replicas: {{ .Values.background.replicaCount }} - {{- end }} - selector: - matchLabels: - {{- include "danswer-stack.selectorLabels" . | nindent 6 }} - {{- if .Values.background.deploymentLabels }} - {{- toYaml .Values.background.deploymentLabels | nindent 6 }} - {{- end }} - template: - metadata: - {{- with .Values.background.podAnnotations }} - annotations: - {{- toYaml . | nindent 8 }} - {{- end }} - labels: - {{- include "danswer-stack.labels" . | nindent 8 }} - {{- with .Values.background.podLabels }} - {{- toYaml . | nindent 8 }} - {{- end }} - spec: - {{- with .Values.imagePullSecrets }} - imagePullSecrets: - {{- toYaml . | nindent 8 }} - {{- end }} - serviceAccountName: {{ include "danswer-stack.serviceAccountName" . }} - securityContext: - {{- toYaml .Values.background.podSecurityContext | nindent 8 }} - containers: - - name: background - securityContext: - {{- toYaml .Values.background.securityContext | nindent 12 }} - image: "{{ .Values.background.image.repository }}:{{ .Values.background.image.tag | default .Chart.AppVersion }}" - imagePullPolicy: {{ .Values.background.image.pullPolicy }} - command: ["/usr/bin/supervisord"] - resources: - {{- toYaml .Values.background.resources | nindent 12 }} - envFrom: - - configMapRef: - name: {{ .Values.config.envConfigMapName }} - env: - - name: ENABLE_MINI_CHUNK - value: "{{ .Values.background.enableMiniChunk }}" - {{- include "danswer-stack.envSecrets" . | nindent 12}} diff --git a/deployment/helm/templates/background-hpa.yaml b/deployment/helm/templates/background-hpa.yaml deleted file mode 100644 index 009daf10f05..00000000000 --- a/deployment/helm/templates/background-hpa.yaml +++ /dev/null @@ -1,32 +0,0 @@ -{{- if .Values.background.autoscaling.enabled }} -apiVersion: autoscaling/v2 -kind: HorizontalPodAutoscaler -metadata: - name: {{ include "danswer-stack.fullname" . }}-background - labels: - {{- include "danswer-stack.labels" . | nindent 4 }} -spec: - scaleTargetRef: - apiVersion: apps/v1 - kind: Deployment - name: {{ include "danswer-stack.fullname" . }} - minReplicas: {{ .Values.background.autoscaling.minReplicas }} - maxReplicas: {{ .Values.background.autoscaling.maxReplicas }} - metrics: - {{- if .Values.background.autoscaling.targetCPUUtilizationPercentage }} - - type: Resource - resource: - name: cpu - target: - type: Utilization - averageUtilization: {{ .Values.background.autoscaling.targetCPUUtilizationPercentage }} - {{- end }} - {{- if .Values.background.autoscaling.targetMemoryUtilizationPercentage }} - - type: Resource - resource: - name: memory - target: - type: Utilization - averageUtilization: {{ .Values.background.autoscaling.targetMemoryUtilizationPercentage }} - {{- end }} -{{- end }} diff --git a/deployment/helm/templates/configmap.yaml b/deployment/helm/templates/configmap.yaml deleted file mode 100755 index 8119ae0459c..00000000000 --- a/deployment/helm/templates/configmap.yaml +++ /dev/null @@ -1,15 +0,0 @@ -apiVersion: v1 -kind: ConfigMap -metadata: - name: {{ .Values.config.envConfigMapName }} - labels: - {{- include "danswer-stack.labels" . | nindent 4 }} -data: - INTERNAL_URL: "http://{{ include "danswer-stack.fullname" . }}-api-service:{{ .Values.api.service.port | default 8080 }}" - POSTGRES_HOST: {{ .Release.Name }}-postgresql - VESPA_HOST: "document-index-service" - MODEL_SERVER_HOST: "{{ include "danswer-stack.fullname" . }}-inference-model-service" - INDEXING_MODEL_SERVER_HOST: "{{ include "danswer-stack.fullname" . }}-indexing-model-service" -{{- range $key, $value := .Values.configMap }} - {{ $key }}: "{{ $value }}" -{{- end }} \ No newline at end of file diff --git a/deployment/helm/templates/danswer-secret.yaml b/deployment/helm/templates/danswer-secret.yaml deleted file mode 100644 index 6b2aa317204..00000000000 --- a/deployment/helm/templates/danswer-secret.yaml +++ /dev/null @@ -1,11 +0,0 @@ -{{- if not .Values.auth.existingSecret -}} -apiVersion: v1 -kind: Secret -metadata: - name: {{ include "danswer-stack.secretName" . }} -type: Opaque -stringData: - {{- range $name, $value := .Values.auth.secrets }} - {{ $name }}: {{ $value | quote }} - {{- end }} -{{- end }} \ No newline at end of file diff --git a/deployment/helm/templates/indexing-model-deployment.yaml b/deployment/helm/templates/indexing-model-deployment.yaml deleted file mode 100644 index cc88aefb79a..00000000000 --- a/deployment/helm/templates/indexing-model-deployment.yaml +++ /dev/null @@ -1,51 +0,0 @@ -apiVersion: apps/v1 -kind: Deployment -metadata: - name: {{ include "danswer-stack.fullname" . }}-indexing-model - labels: - {{- include "danswer-stack.labels" . | nindent 4 }} -spec: - replicas: 1 - selector: - matchLabels: - {{- include "danswer-stack.selectorLabels" . | nindent 6 }} - {{- if .Values.indexCapability.deploymentLabels }} - {{- toYaml .Values.indexCapability.deploymentLabels | nindent 6 }} - {{- end }} - template: - metadata: - {{- with .Values.indexCapability.podAnnotations }} - annotations: - {{- toYaml . | nindent 8 }} - {{- end }} - labels: - {{- include "danswer-stack.labels" . | nindent 8 }} - {{- with .Values.indexCapability.podLabels }} - {{- toYaml . | nindent 8 }} - {{- end }} - spec: - containers: - - name: indexing-model-server - image: danswer/danswer-model-server:latest - imagePullPolicy: IfNotPresent - command: [ "uvicorn", "model_server.main:app", "--host", "0.0.0.0", "--port", "9000", "--limit-concurrency", "10" ] - ports: - - containerPort: 9000 - envFrom: - - configMapRef: - name: {{ .Values.config.envConfigMapName }} - env: - - name: INDEXING_ONLY - value: "{{ default "True" .Values.indexCapability.indexingOnly }}" - {{- include "danswer-stack.envSecrets" . | nindent 10}} - volumeMounts: - {{- range .Values.indexCapability.volumeMounts }} - - name: {{ .name }} - mountPath: {{ .mountPath }} - {{- end }} - volumes: - {{- range .Values.indexCapability.volumes }} - - name: {{ .name }} - persistentVolumeClaim: - claimName: {{ .persistentVolumeClaim.claimName }} - {{- end }} diff --git a/deployment/helm/templates/indexing-model-pvc.yaml b/deployment/helm/templates/indexing-model-pvc.yaml deleted file mode 100644 index e5825557d5b..00000000000 --- a/deployment/helm/templates/indexing-model-pvc.yaml +++ /dev/null @@ -1,10 +0,0 @@ -apiVersion: v1 -kind: PersistentVolumeClaim -metadata: - name: {{ .Values.indexCapability.indexingModelPVC.name }} -spec: - accessModes: - - {{ .Values.indexCapability.indexingModelPVC.accessMode | quote }} - resources: - requests: - storage: {{ .Values.indexCapability.indexingModelPVC.storage | quote }} \ No newline at end of file diff --git a/deployment/helm/templates/indexing-model-service.yaml b/deployment/helm/templates/indexing-model-service.yaml deleted file mode 100644 index fbbeb6bee86..00000000000 --- a/deployment/helm/templates/indexing-model-service.yaml +++ /dev/null @@ -1,18 +0,0 @@ -apiVersion: v1 -kind: Service -metadata: - name: {{ include "danswer-stack.fullname" . }}-indexing-model-service - labels: - {{- include "danswer-stack.labels" . | nindent 4 }} -spec: - selector: - {{- include "danswer-stack.selectorLabels" . | nindent 4 }} - {{- if .Values.indexCapability.deploymentLabels }} - {{- toYaml .Values.indexCapability.deploymentLabels | nindent 4 }} - {{- end }} - ports: - - name: {{ .Values.indexCapability.service.name }} - protocol: TCP - port: {{ .Values.indexCapability.service.port }} - targetPort: {{ .Values.indexCapability.service.port }} - type: {{ .Values.indexCapability.service.type }} \ No newline at end of file diff --git a/deployment/helm/templates/inference-model-deployment.yaml b/deployment/helm/templates/inference-model-deployment.yaml deleted file mode 100644 index 43caddd29c3..00000000000 --- a/deployment/helm/templates/inference-model-deployment.yaml +++ /dev/null @@ -1,45 +0,0 @@ -apiVersion: apps/v1 -kind: Deployment -metadata: - name: {{ include "danswer-stack.fullname" . }}-inference-model - labels: - {{- range .Values.inferenceCapability.deployment.labels }} - {{ .key }}: {{ .value }} - {{- end }} -spec: - replicas: {{ .Values.inferenceCapability.deployment.replicas }} - selector: - matchLabels: - {{- range .Values.inferenceCapability.deployment.labels }} - {{ .key }}: {{ .value }} - {{- end }} - template: - metadata: - labels: - {{- range .Values.inferenceCapability.podLabels }} - {{ .key }}: {{ .value }} - {{- end }} - spec: - containers: - - name: {{ .Values.inferenceCapability.service.name }} - image: {{ .Values.inferenceCapability.deployment.image.repository }}:{{ .Values.inferenceCapability.deployment.image.tag }} - imagePullPolicy: {{ .Values.inferenceCapability.deployment.image.pullPolicy }} - command: {{ toYaml .Values.inferenceCapability.deployment.command | nindent 14 }} - ports: - - containerPort: {{ .Values.inferenceCapability.service.port }} - envFrom: - - configMapRef: - name: {{ .Values.config.envConfigMapName }} - env: - {{- include "danswer-stack.envSecrets" . | nindent 12}} - volumeMounts: - {{- range .Values.inferenceCapability.deployment.volumeMounts }} - - name: {{ .name }} - mountPath: {{ .mountPath }} - {{- end }} - volumes: - {{- range .Values.inferenceCapability.deployment.volumes }} - - name: {{ .name }} - persistentVolumeClaim: - claimName: {{ .persistentVolumeClaim.claimName }} - {{- end }} diff --git a/deployment/helm/templates/inference-model-pvc.yaml b/deployment/helm/templates/inference-model-pvc.yaml deleted file mode 100644 index fe47fa879a0..00000000000 --- a/deployment/helm/templates/inference-model-pvc.yaml +++ /dev/null @@ -1,10 +0,0 @@ -apiVersion: v1 -kind: PersistentVolumeClaim -metadata: - name: {{ .Values.inferenceCapability.pvc.name }} -spec: - accessModes: - {{- toYaml .Values.inferenceCapability.pvc.accessModes | nindent 4 }} - resources: - requests: - storage: {{ .Values.inferenceCapability.pvc.storage }} diff --git a/deployment/helm/templates/inference-model-service.yaml b/deployment/helm/templates/inference-model-service.yaml deleted file mode 100644 index 74433ac11da..00000000000 --- a/deployment/helm/templates/inference-model-service.yaml +++ /dev/null @@ -1,15 +0,0 @@ -apiVersion: v1 -kind: Service -metadata: - name: {{ include "danswer-stack.fullname" . }}-inference-model-service -spec: - type: {{ .Values.inferenceCapability.service.type }} - ports: - - port: {{ .Values.inferenceCapability.service.port }} - targetPort: {{ .Values.inferenceCapability.service.port }} - protocol: TCP - name: {{ .Values.inferenceCapability.service.name }} - selector: - {{- range .Values.inferenceCapability.deployment.labels }} - {{ .key }}: {{ .value }} - {{- end }} diff --git a/deployment/helm/templates/nginx-conf.yaml b/deployment/helm/templates/nginx-conf.yaml deleted file mode 100644 index 81ecbaaa2f6..00000000000 --- a/deployment/helm/templates/nginx-conf.yaml +++ /dev/null @@ -1,44 +0,0 @@ -apiVersion: v1 -kind: ConfigMap -metadata: - name: danswer-nginx-conf -data: - nginx.conf: | - upstream api_server { - server {{ include "danswer-stack.fullname" . }}-api-service:{{ .Values.api.service.port }} fail_timeout=0; - } - - upstream web_server { - server {{ include "danswer-stack.fullname" . }}-webserver:{{ .Values.webserver.service.port }} fail_timeout=0; - } - - server { - listen 1024; - server_name $$DOMAIN; - - client_max_body_size 5G; # Maximum upload size - - location ~ ^/api(.*)$ { - rewrite ^/api(/.*)$ $1 break; - proxy_set_header X-Real-IP $remote_addr; - proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; - proxy_set_header X-Forwarded-Proto $scheme; - proxy_set_header X-Forwarded-Host $host; - proxy_set_header Host $host; - proxy_http_version 1.1; - proxy_buffering off; - proxy_redirect off; - proxy_pass http://api_server; - } - - location / { - proxy_set_header X-Real-IP $remote_addr; - proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; - proxy_set_header X-Forwarded-Proto $scheme; - proxy_set_header X-Forwarded-Host $host; - proxy_set_header Host $host; - proxy_http_version 1.1; - proxy_redirect off; - proxy_pass http://web_server; - } - } diff --git a/deployment/helm/templates/serviceaccount.yaml b/deployment/helm/templates/serviceaccount.yaml deleted file mode 100644 index afd351217ba..00000000000 --- a/deployment/helm/templates/serviceaccount.yaml +++ /dev/null @@ -1,13 +0,0 @@ -{{- if .Values.serviceAccount.create -}} -apiVersion: v1 -kind: ServiceAccount -metadata: - name: {{ include "danswer-stack.serviceAccountName" . }} - labels: - {{- include "danswer-stack.labels" . | nindent 4 }} - {{- with .Values.serviceAccount.annotations }} - annotations: - {{- toYaml . | nindent 4 }} - {{- end }} -automountServiceAccountToken: {{ .Values.serviceAccount.automount }} -{{- end }} diff --git a/deployment/helm/templates/tests/test-connection.yaml b/deployment/helm/templates/tests/test-connection.yaml deleted file mode 100644 index 60fbd1054c1..00000000000 --- a/deployment/helm/templates/tests/test-connection.yaml +++ /dev/null @@ -1,15 +0,0 @@ -apiVersion: v1 -kind: Pod -metadata: - name: "{{ include "danswer-stack.fullname" . }}-test-connection" - labels: - {{- include "danswer-stack.labels" . | nindent 4 }} - annotations: - "helm.sh/hook": test -spec: - containers: - - name: wget - image: busybox - command: ['wget'] - args: ['{{ include "danswer-stack.fullname" . }}:{{ .Values.webserver.service.port }}'] - restartPolicy: Never diff --git a/deployment/helm/templates/webserver-deployment.yaml b/deployment/helm/templates/webserver-deployment.yaml deleted file mode 100644 index c3505248fc6..00000000000 --- a/deployment/helm/templates/webserver-deployment.yaml +++ /dev/null @@ -1,60 +0,0 @@ -apiVersion: apps/v1 -kind: Deployment -metadata: - name: {{ include "danswer-stack.fullname" . }}-webserver - labels: - {{- include "danswer-stack.labels" . | nindent 4 }} -spec: - {{- if not .Values.webserver.autoscaling.enabled }} - replicas: {{ .Values.webserver.replicaCount }} - {{- end }} - selector: - matchLabels: - {{- include "danswer-stack.selectorLabels" . | nindent 6 }} - {{- if .Values.webserver.deploymentLabels }} - {{- toYaml .Values.webserver.deploymentLabels | nindent 6 }} - {{- end }} - template: - metadata: - {{- with .Values.webserver.podAnnotations }} - annotations: - {{- toYaml . | nindent 8 }} - {{- end }} - labels: - {{- include "danswer-stack.labels" . | nindent 8 }} - {{- with .Values.webserver.podLabels }} - {{- toYaml . | nindent 8 }} - {{- end }} - spec: - {{- with .Values.imagePullSecrets }} - imagePullSecrets: - {{- toYaml . | nindent 8 }} - {{- end }} - serviceAccountName: {{ include "danswer-stack.serviceAccountName" . }} - securityContext: - {{- toYaml .Values.webserver.podSecurityContext | nindent 8 }} - containers: - - name: web-server - securityContext: - {{- toYaml .Values.webserver.securityContext | nindent 12 }} - image: "{{ .Values.webserver.image.repository }}:{{ .Values.webserver.image.tag | default .Chart.AppVersion }}" - imagePullPolicy: {{ .Values.webserver.image.pullPolicy }} - ports: - - name: http - containerPort: {{ .Values.webserver.service.port }} - protocol: TCP - resources: - {{- toYaml .Values.webserver.resources | nindent 12 }} - envFrom: - - configMapRef: - name: {{ .Values.config.envConfigMapName }} - env: - {{- include "danswer-stack.envSecrets" . | nindent 12}} - {{- with .Values.webserver.volumeMounts }} - volumeMounts: - {{- toYaml . | nindent 12 }} - {{- end }} - {{- with .Values.webserver.volumes }} - volumes: - {{- toYaml . | nindent 8 }} - {{- end }} diff --git a/deployment/helm/templates/webserver-hpa.yaml b/deployment/helm/templates/webserver-hpa.yaml deleted file mode 100644 index b46820a7fac..00000000000 --- a/deployment/helm/templates/webserver-hpa.yaml +++ /dev/null @@ -1,32 +0,0 @@ -{{- if .Values.webserver.autoscaling.enabled }} -apiVersion: autoscaling/v2 -kind: HorizontalPodAutoscaler -metadata: - name: {{ include "danswer-stack.fullname" . }}-webserver - labels: - {{- include "danswer-stack.labels" . | nindent 4 }} -spec: - scaleTargetRef: - apiVersion: apps/v1 - kind: Deployment - name: {{ include "danswer-stack.fullname" . }} - minReplicas: {{ .Values.webserver.autoscaling.minReplicas }} - maxReplicas: {{ .Values.webserver.autoscaling.maxReplicas }} - metrics: - {{- if .Values.webserver.autoscaling.targetCPUUtilizationPercentage }} - - type: Resource - resource: - name: cpu - target: - type: Utilization - averageUtilization: {{ .Values.webserver.autoscaling.targetCPUUtilizationPercentage }} - {{- end }} - {{- if .Values.webserver.autoscaling.targetMemoryUtilizationPercentage }} - - type: Resource - resource: - name: memory - target: - type: Utilization - averageUtilization: {{ .Values.webserver.autoscaling.targetMemoryUtilizationPercentage }} - {{- end }} -{{- end }} diff --git a/deployment/helm/templates/webserver-service.yaml b/deployment/helm/templates/webserver-service.yaml deleted file mode 100644 index 3e33566fce1..00000000000 --- a/deployment/helm/templates/webserver-service.yaml +++ /dev/null @@ -1,21 +0,0 @@ -apiVersion: v1 -kind: Service -metadata: - name: {{ include "danswer-stack.fullname" . }}-webserver - labels: - {{- include "danswer-stack.labels" . | nindent 4 }} - {{- if .Values.webserver.deploymentLabels }} - {{- toYaml .Values.webserver.deploymentLabels | nindent 4 }} - {{- end }} -spec: - type: {{ .Values.webserver.service.type }} - ports: - - port: {{ .Values.webserver.service.port }} - targetPort: http - protocol: TCP - name: http - selector: - {{- include "danswer-stack.selectorLabels" . | nindent 4 }} - {{- if .Values.webserver.deploymentLabels }} - {{- toYaml .Values.webserver.deploymentLabels | nindent 4 }} - {{- end }} diff --git a/deployment/helm/values.yaml b/deployment/helm/values.yaml deleted file mode 100644 index 53b82ddc6b9..00000000000 --- a/deployment/helm/values.yaml +++ /dev/null @@ -1,457 +0,0 @@ -# Default values for danswer-stack. -# This is a YAML-formatted file. -# Declare variables to be passed into your templates. - -imagePullSecrets: [] -nameOverride: "" -fullnameOverride: "" - -inferenceCapability: - service: - name: inference-model-server-service - type: ClusterIP - port: 9000 - pvc: - name: inference-model-pvc - accessModes: - - ReadWriteOnce - storage: 3Gi - deployment: - name: inference-model-server-deployment - replicas: 1 - labels: - - key: app - value: inference-model-server - image: - repository: danswer/danswer-model-server - tag: latest - pullPolicy: IfNotPresent - command: ["uvicorn", "model_server.main:app", "--host", "0.0.0.0", "--port", "9000"] - port: 9000 - volumeMounts: - - name: inference-model-storage - mountPath: /root/.cache - volumes: - - name: inference-model-storage - persistentVolumeClaim: - claimName: inference-model-pvc - podLabels: - - key: app - value: inference-model-server - -indexCapability: - service: - type: ClusterIP - port: 9000 - name: indexing-model-server-port - deploymentLabels: - app: indexing-model-server - podLabels: - app: indexing-model-server - indexingOnly: "True" - podAnnotations: {} - volumeMounts: - - name: indexing-model-storage - mountPath: /root/.cache - volumes: - - name: indexing-model-storage - persistentVolumeClaim: - claimName: indexing-model-storage - indexingModelPVC: - name: indexing-model-storage - accessMode: "ReadWriteOnce" - storage: "3Gi" - -config: - envConfigMapName: env-configmap - -serviceAccount: - # Specifies whether a service account should be created - create: false - # Automatically mount a ServiceAccount's API credentials? - automount: true - # Annotations to add to the service account - annotations: {} - # The name of the service account to use. - # If not set and create is true, a name is generated using the fullname template - name: "" - -postgresql: - primary: - persistence: - size: 5Gi - enabled: true - auth: - existingSecret: danswer-secrets - secretKeys: - adminPasswordKey: postgres_password #overwriting as postgres typically expects 'postgres-password' - -nginx: - containerPorts: - http: 1024 - extraEnvVars: - - name: DOMAIN - value: localhost - service: - ports: - http: 80 - danswer: 3000 - targetPort: - http: http - danswer: http - - existingServerBlockConfigmap: danswer-nginx-conf - -webserver: - replicaCount: 1 - image: - repository: danswer/danswer-web-server - pullPolicy: IfNotPresent - # Overrides the image tag whose default is the chart appVersion. - tag: "" - deploymentLabels: - app: web-server - podAnnotations: {} - podLabels: - app: web-server - podSecurityContext: {} - # fsGroup: 2000 - - securityContext: {} - # capabilities: - # drop: - # - ALL - # readOnlyRootFilesystem: true - # runAsNonRoot: true - # runAsUser: 1000 - - service: - type: ClusterIP - port: 3000 - - resources: {} - # We usually recommend not to specify default resources and to leave this as a conscious - # choice for the user. This also increases chances charts run on environments with little - # resources, such as Minikube. If you do want to specify resources, uncomment the following - # lines, adjust them as necessary, and remove the curly braces after 'resources:'. - # limits: - # cpu: 100m - # memory: 128Mi - # requests: - # cpu: 100m - # memory: 128Mi - - autoscaling: - enabled: false - minReplicas: 1 - maxReplicas: 100 - targetCPUUtilizationPercentage: 80 - # targetMemoryUtilizationPercentage: 80 - - # Additional volumes on the output Deployment definition. - volumes: [] - # - name: foo - # secret: - # secretName: mysecret - # optional: false - - # Additional volumeMounts on the output Deployment definition. - volumeMounts: [] - # - name: foo - # mountPath: "/etc/foo" - # readOnly: true - - nodeSelector: {} - tolerations: [] - affinity: {} - -api: - replicaCount: 1 - image: - repository: danswer/danswer-backend - pullPolicy: IfNotPresent - # Overrides the image tag whose default is the chart appVersion. - tag: "" - deploymentLabels: - app: api-server - podAnnotations: {} - podLabels: - scope: danswer-backend - app: api-server - - podSecurityContext: {} - # fsGroup: 2000 - - securityContext: {} - # capabilities: - # drop: - # - ALL - # readOnlyRootFilesystem: true - # runAsNonRoot: true - # runAsUser: 1000 - - service: - type: ClusterIP - port: 8080 - - resources: {} - # We usually recommend not to specify default resources and to leave this as a conscious - # choice for the user. This also increases chances charts run on environments with little - # resources, such as Minikube. If you do want to specify resources, uncomment the following - # lines, adjust them as necessary, and remove the curly braces after 'resources:'. - # requests: - # cpu: 1000m # Requests 1 CPU core - # memory: 1Gi # Requests 1 GiB of memory - # limits: - # cpu: 2000m # Limits to 2 CPU cores - # memory: 2Gi # Limits to 2 GiB of memory - - autoscaling: - enabled: false - minReplicas: 1 - maxReplicas: 100 - targetCPUUtilizationPercentage: 80 - # targetMemoryUtilizationPercentage: 80 - - # Additional volumes on the output Deployment definition. - volumes: [] - # - name: foo - # secret: - # secretName: mysecret - # optional: false - - # Additional volumeMounts on the output Deployment definition. - volumeMounts: [] - # - name: foo - # mountPath: "/etc/foo" - # readOnly: true - - nodeSelector: {} - tolerations: [] - - -background: - replicaCount: 1 - image: - repository: danswer/danswer-backend - pullPolicy: IfNotPresent - # Overrides the image tag whose default is the chart appVersion. - tag: latest - podAnnotations: {} - podLabels: - scope: danswer-backend - app: background - deploymentLabels: - app: background - podSecurityContext: {} - # fsGroup: 2000 - - securityContext: {} - # capabilities: - # drop: - # - ALL - # readOnlyRootFilesystem: true - # runAsNonRoot: true - # runAsUser: 1000 - enableMiniChunk: "true" - resources: {} - # We usually recommend not to specify default resources and to leave this as a conscious - # choice for the user. This also increases chances charts run on environments with little - # resources, such as Minikube. If you do want to specify resources, uncomment the following - # lines, adjust them as necessary, and remove the curly braces after 'resources:'. - # requests: - # cpu: 1000m # Requests 1 CPU core - # memory: 1Gi # Requests 1 GiB of memory - # limits: - # cpu: 2000m # Limits to 2 CPU cores - # memory: 2Gi # Limits to 2 GiB of memory - - autoscaling: - enabled: false - minReplicas: 1 - maxReplicas: 100 - targetCPUUtilizationPercentage: 80 - # targetMemoryUtilizationPercentage: 80 - - # Additional volumes on the output Deployment definition. - volumes: [] - # - name: foo - # secret: - # secretName: mysecret - # optional: false - - # Additional volumeMounts on the output Deployment definition. - volumeMounts: [] - # - name: foo - # mountPath: "/etc/foo" - # readOnly: true - - nodeSelector: {} - tolerations: [] - -vespa: - replicaCount: 1 - image: - repository: vespa - pullPolicy: IfNotPresent - tag: "8.277.17" - podAnnotations: {} - podLabels: - app: vespa - app.kubernetes.io/instance: danswer - app.kubernetes.io/name: vespa - enabled: true - - podSecurityContext: {} - # fsGroup: 2000 - - securityContext: - privileged: true - runAsUser: 0 - # capabilities: - # drop: - # - ALL - # readOnlyRootFilesystem: true - # runAsNonRoot: true - # runAsUser: 1000 - - resources: - # The Vespa Helm chart specifies default resources, which are quite modest. We override - # them here to increase chances of the chart running successfully. - requests: - cpu: 1500m - memory: 4000Mi - limits: - cpu: 1500m - memory: 4000Mi - - nodeSelector: {} - tolerations: [] - affinity: {} - - -#ingress: -# enabled: false -# className: "" -# annotations: {} -# # kubernetes.io/ingress.class: nginx -# # kubernetes.io/tls-acme: "true" -# hosts: -# - host: chart-example.local -# paths: -# - path: / -# pathType: ImplementationSpecific -# tls: [] -# # - secretName: chart-example-tls -# # hosts: -# # - chart-example.local - -persistence: - vespa: - enabled: true - existingClaim: "" - storageClassName: "" - accessModes: - - ReadWriteOnce - size: 5Gi - -auth: - # for storing smtp, oauth, slack, and other secrets - # keys are lowercased version of env vars (e.g. SMTP_USER -> smtp_user) - existingSecret: "" # danswer-secrets - # optionally override the secret keys to reference in the secret - secretKeys: - postgres_password: "postgres_password" - smtp_pass: "" - oauth_client_id: "" - oauth_client_secret: "" - oauth_cookie_secret: "" - gen_ai_api_key: "" - danswer_bot_slack_app_token: "" - danswer_bot_slack_bot_token: "" - opsgenie_api_key: "" - # will be overridden by the existingSecret if set - secretName: "danswer-secrets" - # set values as strings, they will be base64 encoded - secrets: - postgres_password: "postgres" - smtp_pass: "" - oauth_client_id: "" - oauth_client_secret: "" - oauth_cookie_secret: "" - gen_ai_api_key: "" - danswer_bot_slack_app_token: "" - danswer_bot_slack_bot_token: "" - opsgenie_api_key: "" - -configMap: - AUTH_TYPE: "disabled" # Change this for production uses unless Danswer is only accessible behind VPN - SESSION_EXPIRE_TIME_SECONDS: "86400" # 1 Day Default - VALID_EMAIL_DOMAINS: "" # Can be something like danswer.ai, as an extra double-check - SMTP_SERVER: "" # For sending verification emails, if unspecified then defaults to 'smtp.gmail.com' - SMTP_PORT: "" # For sending verification emails, if unspecified then defaults to '587' - SMTP_USER: "" # 'your-email@company.com' - # SMTP_PASS: "" # 'your-gmail-password' - EMAIL_FROM: "" # 'your-email@company.com' SMTP_USER missing used instead - # Gen AI Settings - GEN_AI_MODEL_PROVIDER: "" - GEN_AI_MODEL_VERSION: "" - FAST_GEN_AI_MODEL_VERSION: "" - # GEN_AI_API_KEY: "" - GEN_AI_API_ENDPOINT: "" - GEN_AI_API_VERSION: "" - GEN_AI_LLM_PROVIDER_TYPE: "" - GEN_AI_MAX_TOKENS: "" - QA_TIMEOUT: "60" - MAX_CHUNKS_FED_TO_CHAT: "" - DISABLE_LLM_FILTER_EXTRACTION: "" - DISABLE_LLM_CHUNK_FILTER: "" - DISABLE_LLM_CHOOSE_SEARCH: "" - DISABLE_LLM_QUERY_REPHRASE: "" - # Query Options - DOC_TIME_DECAY: "" - HYBRID_ALPHA: "" - EDIT_KEYWORD_QUERY: "" - MULTILINGUAL_QUERY_EXPANSION: "" - LANGUAGE_HINT: "" - LANGUAGE_CHAT_NAMING_HINT: "" - QA_PROMPT_OVERRIDE: "" - # Internet Search Tool - BING_API_KEY: "" - # Don't change the NLP models unless you know what you're doing - DOCUMENT_ENCODER_MODEL: "" - NORMALIZE_EMBEDDINGS: "" - ASYM_QUERY_PREFIX: "" - ASYM_PASSAGE_PREFIX: "" - ENABLE_RERANKING_REAL_TIME_FLOW: "" - ENABLE_RERANKING_ASYNC_FLOW: "" - MODEL_SERVER_PORT: "" - MIN_THREADS_ML_MODELS: "" - # Indexing Configs - NUM_INDEXING_WORKERS: "" - DISABLE_INDEX_UPDATE_ON_SWAP: "" - DASK_JOB_CLIENT_ENABLED: "" - CONTINUE_ON_CONNECTOR_FAILURE: "" - EXPERIMENTAL_CHECKPOINTING_ENABLED: "" - CONFLUENCE_CONNECTOR_LABELS_TO_SKIP: "" - JIRA_API_VERSION: "" - GONG_CONNECTOR_START_TIME: "" - NOTION_CONNECTOR_ENABLE_RECURSIVE_PAGE_LOOKUP: "" - # DanswerBot SlackBot Configs - # DANSWER_BOT_SLACK_APP_TOKEN: "" - # DANSWER_BOT_SLACK_BOT_TOKEN: "" - DANSWER_BOT_DISABLE_DOCS_ONLY_ANSWER: "" - DANSWER_BOT_DISPLAY_ERROR_MSGS: "" - DANSWER_BOT_RESPOND_EVERY_CHANNEL: "" - DANSWER_BOT_DISABLE_COT: "" # Currently unused - NOTIFY_SLACKBOT_NO_ANSWER: "" - # Logging - # Optional Telemetry, please keep it on (nothing sensitive is collected)? <3 - # https://docs.danswer.dev/more/telemetry - DISABLE_TELEMETRY: "" - LOG_LEVEL: "" - LOG_ALL_MODEL_INTERACTIONS: "" - LOG_VESPA_TIMING_INFORMATION: "" - # Shared or Non-backend Related - WEB_DOMAIN: "http://localhost:3000" # for web server and api server - DOMAIN: "localhost" # for nginx diff --git a/deployment/kubernetes/analytics-bootstrap-job.yaml b/deployment/kubernetes/analytics-bootstrap-job.yaml deleted file mode 100644 index e3ff12d2d2e..00000000000 --- a/deployment/kubernetes/analytics-bootstrap-job.yaml +++ /dev/null @@ -1,142 +0,0 @@ -# One-time Kubernetes Job for the Darwin analytics rollup bootstrap. -# -# What it does (in order): -# 1. `alembic upgrade heads` — applies any pending migrations, -# including this PR's `c8a4e2f9d1b3_analytics_daily_rollup`. Idempotent: -# already-applied revisions are skipped. Safe to re-run. -# 2. `scripts/backfill_analytics_rollup.py` — walks every historical -# date that still has chat data, computes the daily metrics, and -# writes them into `analytics_daily_rollup` (also seeds the -# `analytics_rollup_state` checkpoint in `key_value_store`). -# Idempotent via INSERT…ON CONFLICT(date) DO UPDATE. -# -# Why a Job and not a Deployment / Pod: -# - Deployment auto-restarts on container exit — wrong for one-time -# work; the migration would loop. -# - Bare Pod doesn't track success / failure cleanly. -# - Job has run-to-completion semantics + retry-on-failure + -# TTL-after-finish for auto-cleanup. Standard K8s pattern. -# -# When to apply: -# - ONCE, after the new backend image (with this PR's code) is rolled -# out to the api-server and background-deployment, and BEFORE the -# next 08:00 UTC retention sweep on a fresh DB. If retention runs -# first, it deletes chat data older than 30 days and the backfill -# will then write zero counts for those days. -# - Re-applying is safe (both steps are idempotent), but normally -# unnecessary — the daily Celery beat task takes over. -# -# How to apply: -# 1. Update IMAGE_TAG to the tag containing the merged PR code -# (currently the api-server runs vha-119; replace as needed). -# 2. kubectl apply -f deployment/kubernetes/analytics-bootstrap-job.yaml -# 3. Watch logs: -# kubectl logs -n darwin -f job/darwin-analytics-bootstrap -# 4. Verify completion: -# kubectl get -n darwin job/darwin-analytics-bootstrap -# # COMPLETIONS should read 1/1 -# 5. Verify the rollup table: -# kubectl exec -n darwin -- psql ... \ -# -c "SELECT count(*), max(rolled_up_at) FROM analytics_daily_rollup;" -# -# How to clean up: nothing required — `ttlSecondsAfterFinished: 3600` -# auto-deletes the Job and its Pod 1 hour after success. Manual delete: -# kubectl delete -n darwin job/darwin-analytics-bootstrap -# -# Behaviour on failure: `backoffLimit: 3` retries up to 3 times -# (with exponential backoff). After that the Job is marked Failed and -# you can inspect logs of the failed Pod via `kubectl logs ...`. -apiVersion: batch/v1 -kind: Job -metadata: - name: darwin-analytics-bootstrap - namespace: darwin - labels: - app: darwin-analytics-bootstrap - purpose: one-time-migration -spec: - # Auto-cleanup the Job + its completed Pod 1 hour after success. - # Tune higher if you want more time to inspect logs. - ttlSecondsAfterFinished: 3600 - # Retry the whole pipeline up to 3 times on failure (each step is - # idempotent so retries are safe). 4xx/5xx pods are inspectable until - # ttlSecondsAfterFinished kicks in. - backoffLimit: 3 - # Hard kill if the Job runs longer than 30 minutes — backfill on a - # large chat history can take a few minutes; 30m is a generous ceiling. - activeDeadlineSeconds: 1800 - template: - metadata: - labels: - app: darwin-analytics-bootstrap - spec: - # OnFailure → if the container exits non-zero, kubelet restarts - # it within the same Pod (faster than scheduling a new Pod). - # Combined with backoffLimit above for the cross-Pod retry. - restartPolicy: OnFailure - containers: - - name: bootstrap - # IMPORTANT: bump this to the image tag that includes this PR's - # backend code (the analytics_rollup module + new migration). - # The api-server deployment is currently on vha-119; you'll - # likely roll a vha-120 (or similar) once the PR merges. - image: sfbrdevhelmweacr.azurecr.io/danswer/danswer-backend:vha-121 - imagePullPolicy: IfNotPresent - command: - - /bin/sh - - -c - - | - # `pipefail` isn't supported by the image's /bin/sh - # (BusyBox ash / dash). The script has no pipes anyway, - # so plain `-eu` is sufficient: any non-zero exit aborts. - set -eu - echo "=== Step 1/2: alembic upgrade heads ===" - alembic upgrade heads - echo - echo "=== Step 2/2: backfill analytics_daily_rollup ===" - # PYTHONPATH=. is needed because the script imports - # `danswer.*` and the image's WORKDIR is the backend/ dir. - PYTHONPATH=. python scripts/backfill_analytics_rollup.py - echo - echo "=== Bootstrap complete ===" - # Same Postgres creds as the api-server / background pods. - env: - - name: POSTGRES_USER - valueFrom: - secretKeyRef: - key: postgres_user - name: danswer-secrets - - name: POSTGRES_PASSWORD - valueFrom: - secretKeyRef: - key: postgres_password - name: danswer-secrets - # Same shared config as the api-server. The backfill reads - # POSTGRES_HOST, encryption keys, etc. from here. - envFrom: - - configMapRef: - name: env-configmap - # PVCs match the api-server. Strictly speaking the backfill - # doesn't write to either, but mirroring the api-server config - # avoids surprises if anything in the import chain reads from - # /home/storage or /home/file_connector_storage. - volumeMounts: - - mountPath: /home/storage - name: dynamic-storage - - mountPath: /home/file_connector_storage - name: file-connector-storage - # Modest resource ask — backfill is mostly DB I/O. - resources: - requests: - cpu: "100m" - memory: "256Mi" - limits: - cpu: "1" - memory: "1Gi" - volumes: - - name: dynamic-storage - persistentVolumeClaim: - claimName: dynamic-pvc - - name: file-connector-storage - persistentVolumeClaim: - claimName: file-connector-pvc diff --git a/deployment/kubernetes/api_server-service-deployment.yaml b/deployment/kubernetes/api_server-service-deployment.yaml deleted file mode 100644 index eeac5fecc96..00000000000 --- a/deployment/kubernetes/api_server-service-deployment.yaml +++ /dev/null @@ -1,57 +0,0 @@ -apiVersion: v1 -kind: Service -metadata: - name: api-server-service -spec: - selector: - app: api-server - ports: - - name: api-server-port - protocol: TCP - port: 80 - targetPort: 8080 - type: ClusterIP ---- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: api-server-deployment -spec: - replicas: 1 - selector: - matchLabels: - app: api-server - template: - metadata: - labels: - app: api-server - spec: - containers: - - name: api-server - image: danswer/danswer-backend:latest - imagePullPolicy: IfNotPresent - command: - - "/bin/sh" - - "-c" - - | - alembic upgrade head && - echo "Starting Danswer Api Server" && - uvicorn danswer.main:app --host 0.0.0.0 --port 8080 - ports: - - containerPort: 8080 - # There are some extra values since this is shared between services - # There are no conflicts though, extra env variables are simply ignored - env: - - name: OAUTH_CLIENT_ID - valueFrom: - secretKeyRef: - name: danswer-secrets - key: google_oauth_client_id - - name: OAUTH_CLIENT_SECRET - valueFrom: - secretKeyRef: - name: danswer-secrets - key: google_oauth_client_secret - envFrom: - - configMapRef: - name: env-configmap diff --git a/deployment/kubernetes/background-deployment.yaml b/deployment/kubernetes/background-deployment.yaml deleted file mode 100644 index 18521b0f5ad..00000000000 --- a/deployment/kubernetes/background-deployment.yaml +++ /dev/null @@ -1,24 +0,0 @@ -apiVersion: apps/v1 -kind: Deployment -metadata: - name: background-deployment -spec: - replicas: 1 - selector: - matchLabels: - app: background - template: - metadata: - labels: - app: background - spec: - containers: - - name: background - image: danswer/danswer-backend:latest - imagePullPolicy: IfNotPresent - command: ["/usr/bin/supervisord", "-c", "/etc/supervisor/conf.d/supervisord.conf"] - # There are some extra values since this is shared between services - # There are no conflicts though, extra env variables are simply ignored - envFrom: - - configMapRef: - name: env-configmap diff --git a/deployment/kubernetes/env-configmap.yaml b/deployment/kubernetes/env-configmap.yaml deleted file mode 100644 index ebfcc9deb81..00000000000 --- a/deployment/kubernetes/env-configmap.yaml +++ /dev/null @@ -1,107 +0,0 @@ -apiVersion: v1 -kind: ConfigMap -metadata: - name: env-configmap -data: - # Auth Setting, also check the secrets file - AUTH_TYPE: "disabled" # Change this for production uses unless Danswer is only accessible behind VPN - ENCRYPTION_KEY_SECRET: "" # This should not be specified directly in the yaml, this is just for reference - SESSION_EXPIRE_TIME_SECONDS: "86400" # 1 Day Default - VALID_EMAIL_DOMAINS: "" # Can be something like danswer.ai, as an extra double-check - SMTP_SERVER: "" # For sending verification emails, if unspecified then defaults to 'smtp.gmail.com' - SMTP_PORT: "" # For sending verification emails, if unspecified then defaults to '587' - SMTP_USER: "" # 'your-email@company.com' - SMTP_PASS: "" # 'your-gmail-password' - EMAIL_FROM: "" # 'your-email@company.com' SMTP_USER missing used instead - # Gen AI Settings - GEN_AI_MODEL_PROVIDER: "" - GEN_AI_MODEL_VERSION: "" - FAST_GEN_AI_MODEL_VERSION: "" - GEN_AI_API_KEY: "" - GEN_AI_API_ENDPOINT: "" - GEN_AI_API_VERSION: "" - GEN_AI_LLM_PROVIDER_TYPE: "" - GEN_AI_MAX_TOKENS: "" - QA_TIMEOUT: "60" - MAX_CHUNKS_FED_TO_CHAT: "" - DISABLE_LLM_FILTER_EXTRACTION: "" - DISABLE_LLM_CHUNK_FILTER: "" - DISABLE_LLM_CHOOSE_SEARCH: "" - DISABLE_LLM_QUERY_REPHRASE: "" - # Query Options - DOC_TIME_DECAY: "" - HYBRID_ALPHA: "" - EDIT_KEYWORD_QUERY: "" - MULTILINGUAL_QUERY_EXPANSION: "" - LANGUAGE_HINT: "" - LANGUAGE_CHAT_NAMING_HINT: "" - QA_PROMPT_OVERRIDE: "" - # Other Services - POSTGRES_HOST: "relational-db-service" - VESPA_HOST: "document-index-service" - # Internet Search Tool - BING_API_KEY: "" - # Don't change the NLP models unless you know what you're doing - DOCUMENT_ENCODER_MODEL: "" - NORMALIZE_EMBEDDINGS: "" - ASYM_QUERY_PREFIX: "" - ASYM_PASSAGE_PREFIX: "" - ENABLE_RERANKING_REAL_TIME_FLOW: "" - ENABLE_RERANKING_ASYNC_FLOW: "" - MODEL_SERVER_HOST: "inference-model-server-service" - MODEL_SERVER_PORT: "" - INDEXING_MODEL_SERVER_HOST: "indexing-model-server-service" - MIN_THREADS_ML_MODELS: "" - # Indexing Configs - NUM_INDEXING_WORKERS: "" - # Per-DocumentSource concurrency cap when NUM_INDEXING_WORKERS > 1. - # Default 1 = at most one indexing attempt per source type at a time - # (prevents a single PAT/credential from getting rate-limited). - # 0 = uncapped. Enforced scheduler-side in update.py. - INDEXING_PER_SOURCE_CAP: "" - ENABLED_CONNECTOR_TYPES: "" - DISABLE_INDEX_UPDATE_ON_SWAP: "" - DASK_JOB_CLIENT_ENABLED: "" - CONTINUE_ON_CONNECTOR_FAILURE: "" - EXPERIMENTAL_CHECKPOINTING_ENABLED: "" - CONFLUENCE_CONNECTOR_LABELS_TO_SKIP: "" - JIRA_API_VERSION: "" - WEB_CONNECTOR_VALIDATE_URLS: "" - GONG_CONNECTOR_START_TIME: "" - NOTION_CONNECTOR_ENABLE_RECURSIVE_PAGE_LOOKUP: "" - # DB Retention (daily Celery beat at 08:00 UTC; backend/danswer/db/retention.py). - # All defaults are sensible — override only when you need a tighter window. - RETENTION_DAYS_KOMBU: "" # default 7 (Celery broker queue) - RETENTION_DAYS_TASK_QUEUE: "" # default 30 (terminal task_queue_jobs only) - RETENTION_DAYS_INDEX_ATTEMPT: "" # default 0 = disabled (opt-in to keep history) - RETENTION_KEEP_LAST_N_INDEX_ATTEMPTS: "" # default 20 per (cc-pair, embedding model) - RETENTION_DAYS_CHAT: "" # default 30 (chat_session + chat_message + LO blobs) - RETENTION_DAYS_PERMISSION_SYNC: "" # default 30 (terminal permission_sync_run only) - RETENTION_DAYS_USAGE_REPORTS: "" # default 90 (usage_reports + file_store + LO blobs) - RETENTION_BATCH_SIZE: "" # default 5000 rows per DELETE - RETENTION_MAX_BATCHES: "" # default 200 batches per policy per run - # Analytics rollup (daily Celery beat at 07:30 UTC, 30 min before retention; - # backend/danswer/db/analytics_rollup.py). The lookback is the late-feedback - # grace period — MUST be < RETENTION_DAYS_CHAT to avoid recomputing days - # whose source rows have already been deleted. - ANALYTICS_LATE_FEEDBACK_BUFFER_DAYS: "" # default 2 - # DanswerBot SlackBot Configs - DANSWER_BOT_SLACK_APP_TOKEN: "" - DANSWER_BOT_SLACK_BOT_TOKEN: "" - DANSWER_BOT_DISABLE_DOCS_ONLY_ANSWER: "" - DANSWER_BOT_DISPLAY_ERROR_MSGS: "" - DANSWER_BOT_RESPOND_EVERY_CHANNEL: "" - DANSWER_BOT_DISABLE_COT: "" # Currently unused - OPSGENIE_API_KEY: "" - NOTIFY_SLACKBOT_NO_ANSWER: "" - # Logging - # Optional Telemetry, please keep it on (nothing sensitive is collected)? <3 - # https://docs.danswer.dev/more/telemetry - DISABLE_TELEMETRY: "" - LOG_LEVEL: "" - LOG_ALL_MODEL_INTERACTIONS: "" - LOG_VESPA_TIMING_INFORMATION: "" - # Shared or Non-backend Related - INTERNAL_URL: "http://api-server-service:80" # for web server - WEB_DOMAIN: "http://localhost:3000" # for web server and api server - DOMAIN: "localhost" # for nginx diff --git a/deployment/kubernetes/indexing_model_server-service-deployment.yaml b/deployment/kubernetes/indexing_model_server-service-deployment.yaml deleted file mode 100644 index d44b52e9289..00000000000 --- a/deployment/kubernetes/indexing_model_server-service-deployment.yaml +++ /dev/null @@ -1,59 +0,0 @@ -apiVersion: v1 -kind: Service -metadata: - name: indexing-model-server-service -spec: - selector: - app: indexing-model-server - ports: - - name: indexing-model-server-port - protocol: TCP - port: 9000 - targetPort: 9000 - type: ClusterIP ---- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: indexing-model-server-deployment -spec: - replicas: 1 - selector: - matchLabels: - app: indexing-model-server - template: - metadata: - labels: - app: indexing-model-server - spec: - containers: - - name: indexing-model-server - image: danswer/danswer-model-server:latest - imagePullPolicy: IfNotPresent - command: [ "uvicorn", "model_server.main:app", "--host", "0.0.0.0", "--port", "9000" ] - ports: - - containerPort: 9000 - envFrom: - - configMapRef: - name: env-configmap - env: - - name: INDEXING_ONLY - value: "True" - volumeMounts: - - name: indexing-model-storage - mountPath: /root/.cache - volumes: - - name: indexing-model-storage - persistentVolumeClaim: - claimName: indexing-model-pvc ---- -apiVersion: v1 -kind: PersistentVolumeClaim -metadata: - name: indexing-model-pvc -spec: - accessModes: - - ReadWriteOnce - resources: - requests: - storage: 3Gi diff --git a/deployment/kubernetes/nginx-configmap.yaml b/deployment/kubernetes/nginx-configmap.yaml deleted file mode 100644 index 08b945d599c..00000000000 --- a/deployment/kubernetes/nginx-configmap.yaml +++ /dev/null @@ -1,44 +0,0 @@ -apiVersion: v1 -kind: ConfigMap -metadata: - name: nginx-configmap -data: - nginx.conf: | - upstream api_server { - server api-server-service:80 fail_timeout=0; - } - - upstream web_server { - server web-server-service:80 fail_timeout=0; - } - - server { - listen 80; - server_name $$DOMAIN; - - client_max_body_size 5G; # Maximum upload size - - location ~ ^/api(.*)$ { - rewrite ^/api(/.*)$ $1 break; - proxy_set_header X-Real-IP $remote_addr; - proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; - proxy_set_header X-Forwarded-Proto $scheme; - proxy_set_header X-Forwarded-Host $host; - proxy_set_header Host $host; - proxy_http_version 1.1; - proxy_buffering off; - proxy_redirect off; - proxy_pass http://api_server; - } - - location / { - proxy_set_header X-Real-IP $remote_addr; - proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; - proxy_set_header X-Forwarded-Proto $scheme; - proxy_set_header X-Forwarded-Host $host; - proxy_set_header Host $host; - proxy_http_version 1.1; - proxy_redirect off; - proxy_pass http://web_server; - } - } diff --git a/deployment/kubernetes/nginx-service-deployment.yaml b/deployment/kubernetes/nginx-service-deployment.yaml deleted file mode 100644 index 27b14794ee3..00000000000 --- a/deployment/kubernetes/nginx-service-deployment.yaml +++ /dev/null @@ -1,55 +0,0 @@ -apiVersion: v1 -kind: Service -metadata: - name: nginx-service -spec: - selector: - app: nginx - ports: - - name: http - protocol: TCP - port: 80 - targetPort: 80 - - name: danswer - protocol: TCP - port: 3000 - targetPort: 80 - type: LoadBalancer ---- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: nginx-deployment -spec: - replicas: 1 - selector: - matchLabels: - app: nginx - template: - metadata: - labels: - app: nginx - spec: - containers: - - name: nginx - image: nginx:1.23.4-alpine - ports: - - containerPort: 80 - env: - - name: DOMAIN - valueFrom: - configMapKeyRef: - name: env-configmap - key: DOMAIN - volumeMounts: - - name: nginx-conf - mountPath: /etc/nginx/conf.d - command: - - /bin/sh - - -c - - | - while :; do sleep 6h & wait $$!; nginx -s reload; done & nginx -g "daemon off;" - volumes: - - name: nginx-conf - configMap: - name: nginx-configmap diff --git a/deployment/kubernetes/postgres-service-deployment.yaml b/deployment/kubernetes/postgres-service-deployment.yaml deleted file mode 100644 index 17330204c1e..00000000000 --- a/deployment/kubernetes/postgres-service-deployment.yaml +++ /dev/null @@ -1,57 +0,0 @@ -apiVersion: v1 -kind: Service -metadata: - name: relational-db-service -spec: - selector: - app: relational-db - ports: - - protocol: TCP - port: 5432 - targetPort: 5432 - clusterIP: None ---- -apiVersion: apps/v1 -kind: StatefulSet -metadata: - name: relational-db-statefulset -spec: - serviceName: relational-db-service - replicas: 1 - selector: - matchLabels: - app: relational-db - template: - metadata: - labels: - app: relational-db - spec: - containers: - - name: relational-db - image: postgres:15.2-alpine - env: - - name: POSTGRES_USER - valueFrom: - secretKeyRef: - name: danswer-secrets - key: postgres_user - - name: POSTGRES_PASSWORD - valueFrom: - secretKeyRef: - name: danswer-secrets - key: postgres_password - ports: - - containerPort: 5432 - volumeMounts: - - mountPath: /var/lib/postgresql/data - name: db-storage - subPath: postgres - volumeClaimTemplates: - - metadata: - name: db-storage - spec: - accessModes: ["ReadWriteOnce"] - resources: - requests: - # Adjust the storage request size as needed. - storage: 5Gi diff --git a/deployment/kubernetes/secrets.yaml b/deployment/kubernetes/secrets.yaml deleted file mode 100644 index c135a29f676..00000000000 --- a/deployment/kubernetes/secrets.yaml +++ /dev/null @@ -1,11 +0,0 @@ -# The values in this file should be changed -apiVersion: v1 -kind: Secret -metadata: - name: danswer-secrets -type: Opaque -data: - postgres_user: cG9zdGdyZXM= # "postgres" base64 encoded - postgres_password: cGFzc3dvcmQ= # "password" base64 encoded - google_oauth_client_id: ZXhhbXBsZS1jbGllbnQtaWQ= # "example-client-id" base64 encoded. You will need to provide this, use echo -n "your-client-id" | base64 - google_oauth_client_secret: example_google_oauth_secret # "example-client-secret" base64 encoded. You will need to provide this, use echo -n "your-client-id" | base64 diff --git a/deployment/kubernetes/vespa-service-deployment.yaml b/deployment/kubernetes/vespa-service-deployment.yaml deleted file mode 100644 index 5016258b757..00000000000 --- a/deployment/kubernetes/vespa-service-deployment.yaml +++ /dev/null @@ -1,63 +0,0 @@ -apiVersion: v1 -kind: Service -metadata: - name: document-index-service -spec: - selector: - app: vespa - ports: - - name: vespa-tenant-port - protocol: TCP - port: 19071 - targetPort: 19071 - - name: vespa-port - protocol: TCP - port: 8081 - targetPort: 8081 - type: LoadBalancer ---- -apiVersion: apps/v1 -kind: StatefulSet -metadata: - name: vespa - labels: - app: vespa -spec: - replicas: 1 - serviceName: vespa - selector: - matchLabels: - app: vespa - template: - metadata: - labels: - app: vespa - spec: - containers: - - name: vespa - image: vespaengine/vespa:8.277.17 - imagePullPolicy: IfNotPresent - securityContext: - privileged: true - runAsUser: 0 - ports: - - containerPort: 19071 - - containerPort: 8081 - readinessProbe: - httpGet: - path: /state/v1/health - port: 19071 - scheme: HTTP - volumeMounts: - - name: vespa-storage - mountPath: /opt/vespa/var/ - volumeClaimTemplates: - - metadata: - name: vespa-storage - spec: - accessModes: - - ReadWriteOnce - resources: - requests: - # Adjust the storage request size as needed. - storage: 5Gi diff --git a/deployment/kubernetes/web_server-service-deployment.yaml b/deployment/kubernetes/web_server-service-deployment.yaml deleted file mode 100644 index b19b8e37986..00000000000 --- a/deployment/kubernetes/web_server-service-deployment.yaml +++ /dev/null @@ -1,38 +0,0 @@ -apiVersion: v1 -kind: Service -metadata: - name: web-server-service -spec: - selector: - app: web-server - ports: - - protocol: TCP - port: 80 - targetPort: 3000 - type: ClusterIP ---- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: web-server-deployment -spec: - replicas: 1 - selector: - matchLabels: - app: web-server - template: - metadata: - labels: - app: web-server - spec: - containers: - - name: web-server - image: danswer/danswer-web-server:latest - imagePullPolicy: IfNotPresent - ports: - - containerPort: 3000 - # There are some extra values since this is shared between services - # There are no conflicts though, extra env variables are simply ignored - envFrom: - - configMapRef: - name: env-configmap diff --git a/k8s/README.md b/k8s/README.md new file mode 100644 index 00000000000..3af163867d4 --- /dev/null +++ b/k8s/README.md @@ -0,0 +1,706 @@ +# `k8s/` — Kustomize-based Kubernetes manifests + +``` +k8s/ +├── base/ Environment-neutral manifests +├── overlays/ +│ ├── prod/ Darwin AKS production +│ └── local/ Local dev (Rancher Desktop, Docker Desktop, kind) +└── optional/ Opt-in kustomize components (split-background + Dask) +``` + +## Quick start + +```bash +# Preview what would be applied: +kubectl kustomize k8s/overlays/prod + +# Apply via the guarded wrapper (verify context first!). It diffs vs live +# and REFUSES a Vespa version jump >30 minor releases (which would crash +# the cluster — see "Vespa version guard" below and AGENTS.md §10): +kubectl config current-context # → 'darwin' for prod +k8s/scripts/guarded-apply.sh prod + +# Local: +kubectl config use-context rancher-desktop # or docker-desktop / kind / etc. +k8s/scripts/guarded-apply.sh local +``` + +> Raw `kubectl apply -k k8s/overlays/prod` still works, but prefer +> `guarded-apply.sh` — it runs the Vespa version check + `kubectl diff` +> before applying. Raw apply has no guard; that's how the Vespa outage +> happened. +> +> **Vespa is no longer part of the app overlays.** `apply -k overlays/prod` +> deploys the app only and never touches Vespa. Vespa has its own overlays +> (`overlays/prod-vespa`, `overlays/local-vespa`) and is applied deliberately: +> `k8s/scripts/guarded-apply.sh prod-vespa` (or `kubectl apply -k +> k8s/overlays/prod-vespa`). Version upgrades still go through +> `k8s/scripts/vespa-upgrade.sh`. See "Upgrade Vespa" below. + +## What lives where + +### `base/` + +One file per logical service. Each file contains the resources that *belong +together* (e.g. `nginx.yaml` has the Deployment + Service + ConfigMap for +nginx). Image references use *logical names* (`danswer-backend`, not +`sfbrdevhelmweacr.azurecr.io/danswer/danswer-backend:vha-138`) — the overlay's +`images:` block rewrites them at render time. + +`base/kustomization.yaml` lists every resource. To add a new service, drop +its YAML in here and add it to that list. + +The **Vespa** cluster lives in its own subfolder, `base/vespa/`, with its own +`kustomization.yaml`. It is **deliberately NOT referenced from +`base/kustomization.yaml`** — so the app overlays (`prod`/`local`) don't deploy +it. It's a stateful subsystem with a distinct lifecycle (pinned version, +per-role image names, ordered upgrade procedure — see "Upgrade Vespa" below), +and re-applying it on every routine app rollout risks rolling the StatefulSets +on any manifest drift. Instead it has dedicated overlays — `overlays/prod-vespa` +and `overlays/local-vespa` — that point at `base/vespa/` and carry the pinned +Vespa images + namespace. Apply Vespa via those, never via the app overlay. + +**Base does NOT contain:** +- `env-configmap` — generated by each overlay from `env.properties` +- `danswer-secrets` — generated by each overlay from `secrets.env` (gitignored) + +### `overlays/prod/` + +Single source of truth for the Darwin production cluster: + +| File | What it controls | +|---|---| +| `kustomization.yaml` | image tags, replica counts, generator wiring, namespace (`darwin`) | +| `env.properties` | every non-secret env var (committed) | +| `secrets.env.example` | template for `secrets.env` (committed, placeholders) | +| `secrets.env` | real prod secrets (**gitignored — never commit**) | + +To change a deployed image tag: edit `kustomization.yaml`'s `images:` block. +To change a config value: edit `env.properties`. To rotate a secret: edit +`secrets.env` and reapply. **One file per concern, in one place.** + +### `overlays/local/` + +Same shape as `prod/` but with local-dev defaults: AUTH_TYPE=disabled, +DOMAIN=localhost, smaller replica counts, latest image tags, hosts pointed at +`host.docker.internal` so external Postgres / Vespa running on the host +machine (e.g. via docker-compose) are reachable. + +### `optional/` + +Opt-in kustomize **components**. Each is a directory with its own +`kustomization.yaml` of `kind: Component`. Image refs inside use the +same logical names as base (`danswer-backend`), so when an overlay +opts in, the overlay's `images:` block parameterizes them — identical +to base. (They are NOT meant for standalone `kubectl apply -f`; the +logical image name only resolves through an overlay.) + +Two kinds of thing live under `optional/`: + +- **Components** (`kind: Component`) — opt-in *into a danswer overlay* via + its `components:` block. Image refs use the same logical names as base, + so the overlay's `images:` block parameterizes them. NOT for standalone + `kubectl apply -f`. +- **Standalone installs** (`kind: Kustomization`) — cluster-scoped + infrastructure applied on their own with `kubectl apply -k`, not pulled + into an overlay. + +| Path | Kind | What it ships | When to use | +|---|---|---|---| +| `background-scaling/` | Component | `background-lite` (beat + celery + slack-listener co-located, 1 pod) + `background-indexer-scheduler` + remote Dask (`dask-scheduler`, `dask-worker`), replacing the combined `background` deployment in base | Horizontal scaling of background/indexing tasks | +| `keda-indexing-autoscale/` | Component | A KEDA `ScaledObject` + `TriggerAuthentication` that autoscales `dask-worker` on indexing backlog | Bursty indexing; scale workers to 0 when idle (needs KEDA + background-scaling) | +| `keda/` | Standalone | The KEDA operator itself (CRDs + operator), pinned, into the `keda` namespace | Prereq for `keda-indexing-autoscale` — install once per cluster | + +**The "flag" for opting a *component* in** is a single line in the +overlay's `kustomization.yaml` `components:` block (see "Apply an optional +component" below). To add another: create a new directory under +`optional/` with a `kind: Component` `kustomization.yaml`, use logical +image names in its manifests, and reference it from the overlay's +`components:`. + +## First-time setup + +```bash +# Bootstrap your prod secrets file from the template: +cp k8s/overlays/prod/secrets.env.example k8s/overlays/prod/secrets.env +$EDITOR k8s/overlays/prod/secrets.env # fill in real values + +# (Same for local if you'll use the local overlay.) +cp k8s/overlays/local/secrets.env.example k8s/overlays/local/secrets.env +$EDITOR k8s/overlays/local/secrets.env + +# Verify the render before applying: +kubectl kustomize k8s/overlays/prod | less + +# Apply the app (does NOT include Vespa): +k8s/scripts/guarded-apply.sh prod + +# On a FRESH cluster, also apply Vespa once (separate overlay — guarded): +k8s/scripts/guarded-apply.sh prod-vespa +``` + +## Common workflows + +### Bump a backend image tag in prod + +1. Edit `k8s/overlays/prod/kustomization.yaml`, change `images: - name: danswer-backend` → `newTag: `. +2. Commit + push. +3. On your machine with the `darwin` kube context active: + ```bash + kubectl apply -k k8s/overlays/prod + ``` + +### Upgrade Vespa (DANGER — use the script, never a bare apply) + +Vespa is version-stateful: the config server refuses an auto-upgrade +spanning **>30 minor releases**, and forcing it past that risks the +on-disk index format. A careless bump = cluster-wide outage (this +happened — AGENTS.md §10). The five Vespa StatefulSets also must roll in +a specific order, one at a time, health-checked between steps — ordering +that **cannot** be expressed in kustomize (it's declarative; a `kubectl +apply` of a bumped tag rolls every role at once, unordered). + +So Vespa upgrades go through a dedicated script, **not** `guarded-apply.sh` +and **not** a hand-edit of the overlay tags. The manifests support this in +two ways: each StatefulSet uses a **per-role logical image name** +(`vespa-configserver`, `vespa-admin`, `vespa-content`, `vespa-feed`, +`vespa-query` — see `base/vespa/`), so versions can move independently; +and `content`/`admin` have readiness probes (with +`publishNotReadyAddresses: true` on `vespa-internal` so peer discovery is +never gated) so the rollout waits for each pod to be healthy. + +```bash +# 1. Dry-run first — reads the live version, prints the ordered plan, changes nothing: +DRY_RUN=1 k8s/scripts/vespa-upgrade.sh 8.620.43 darwin + +# 2. Run it. Upgrades in order: configserver → admin → content (one +# ordinal at a time, via partition stepping) → feed → query, waiting +# for /state/v1/health 200 between each. Refuses >30-minor / major / +# downgrade jumps (FORCE=1 to override). Single hop per run — for a +# bigger jump, run repeatedly with intermediate versions +# (8.600 → 8.630 → 8.660 → …), each image must exist + be format-compatible. +k8s/scripts/vespa-upgrade.sh 8.620.43 darwin + +# 3. After success, sync the per-role vespa newTag values in +# k8s/overlays/{prod,local}-vespa/kustomization.yaml to the new version, +# so git ≈ live (the script reminds you). +``` + +The script uses `kubectl exec → localhost` for health checks (the cluster +runs Istio; external probes hit mTLS), and stops immediately if any pod +fails to come back healthy, leaving the cluster mid-upgrade for you to +investigate rather than barrelling into the next role. + +### Vespa version guard (`k8s/scripts/guarded-apply.sh`) + +The wrapper reads the **live** running Vespa version and the version your +overlay would deploy, and: +- **refuses** an upgrade that jumps >30 minor releases (Vespa's limit), +- **refuses** a major-version change (needs a dedicated migration), +- **refuses** a floating/unparseable tag (`:latest`), +- **warns + requires `FORCE=1`** on a large downgrade (legit only when + recovering to the on-disk version), +- otherwise runs `kubectl diff` then `kubectl apply -k`. + +It checks against **live**, not the repo's previous pin, on purpose — +config can drift out of git, and live is the only truth that matters at +apply time. + +`guarded-apply.sh` is a **safety net for everyday applies** (config/image +changes), not the Vespa upgrade tool — it would refuse a dangerous Vespa +jump, but it still rolls all roles at once with no ordering. To actually +upgrade Vespa, use `vespa-upgrade.sh` (see "Upgrade Vespa" above). + +### Add a new env var (non-secret) + +1. Append a `KEY=value` line in `k8s/overlays/prod/env.properties`. +2. (Same for `overlays/local/env.properties` with the local value.) +3. `kubectl apply -k k8s/overlays/prod` — kustomize regenerates the + `env-configmap` with the new value. +4. **Restart the consumers — the apply does NOT do this for you.** We set + `disableNameSuffixHash: true`, so the ConfigMap keeps a stable name + and its content change does NOT trigger a pod rollout. `envFrom` reads + env only at pod start, so running pods keep the old values until + restarted. See "Which workloads to restart after a config change" below. + +### Add a new secret + +1. Add the key to `k8s/overlays/prod/secrets.env.example` with a placeholder. +2. Add the same key to your local `k8s/overlays/prod/secrets.env` with the real value (gitignored). +3. If the secret needs to be exposed to the app, either: + - Add an `env` entry with `secretKeyRef` to the relevant deployment in `base/`, or + - Rely on the `envFrom: secretRef: danswer-secrets` wiring already in `api-server.yaml` and `background.yaml` — every key in `secrets.env` is automatically exposed as an env var matching the key name. + +### Enable the Redis cache + per-user rate limiter + +Redis ships in `base/` (deployed in every environment). To turn the +features on, flip the flags in `k8s/overlays/prod/env.properties`: + +``` +REDIS_KV_CACHE_ENABLED=true # read-through cache on settings/tokens/invited-users +REQUEST_RATE_LIMIT_ENABLED=true # per-USER request cap (20/min, 300/hr below) +REQUEST_RATE_LIMIT_PER_MINUTE=20 # per authenticated user (per-IP for anon), not global +REQUEST_RATE_LIMIT_PER_HOUR=300 +PERSONA_CACHE_ENABLED=true # global persona-list cache + per-user group cache +CC_PAIR_INFO_CACHE_ENABLED=true # chat-page connector indexing-status (~300ms read), 60s global TTL +DOCUMENT_SET_CACHE_ENABLED=true # per-user document-set list (chat bundle), write-through busted, 300s backstop +``` + +Then apply **and restart the consumers** (the apply alone won't — see the +note below on `disableNameSuffixHash`): + +```bash +k8s/scripts/guarded-apply.sh prod # or: kubectl apply -k k8s/overlays/prod +kubectl rollout restart deploy/api-server-deployment deploy/background-deployment -n darwin +kubectl rollout status deploy/api-server-deployment -n darwin +``` + +Redis (`base/redis.yaml`) is already running, so the pods connect on +restart. If Redis were down, the code is fail-open — it degrades to +direct Postgres / permissive, never an outage. + +### Verify Redis caching is actually working + +After enabling + restarting, confirm the cache is being populated and hit. +All commands run against the in-cluster Redis (`redis-0`, no auth). The +cache uses these key namespaces: + +| Key | Feature | +|---|---| +| `danswer:kv:` | KV cache (settings, tokens, invited users) — `REDIS_KV_CACHE_ENABLED` | +| `danswer:personas:all:not_deleted` | Assistants list (global) — `PERSONA_CACHE_ENABLED` | +| `danswer:personas:groups:` | Per-user group cache — `PERSONA_CACHE_ENABLED` | +| `danswer:ratelimit:msg:::` | Per-user request counters — `REQUEST_RATE_LIMIT_ENABLED` | +| `danswer:cc_pair_basic_info` | Chat-page connector indexing-status — `CC_PAIR_INFO_CACHE_ENABLED` | +| `danswer:document_sets:all` | Global document-set list (chat bundle, MIT) — `DOCUMENT_SET_CACHE_ENABLED` | + +**1. Are the cache keys present?** (fastest "is it on" check — use `--scan`, never `KEYS`, on a live Redis) +```bash +kubectl exec -n darwin redis-0 -c redis -- redis-cli --scan --pattern 'danswer:*' +``` +Seeing `danswer:personas:all:not_deleted` means the assistants API +(`GET /persona`) has cached. No `danswer:*` keys at all = the flags +didn't take effect (pods not restarted? flag not `true`?). + +**2. Is it being hit?** (hit/miss ratio — cluster-wide, but proves reads hit cache) +```bash +kubectl exec -n darwin redis-0 -c redis -- redis-cli INFO stats | grep keyspace +# keyspace_hits should climb far faster than keyspace_misses +``` + +**3. Inspect a specific entry** (TTL counting down + real payload): +```bash +kubectl exec -n darwin redis-0 -c redis -- redis-cli TTL danswer:personas:all:not_deleted # ~86400, decreasing +kubectl exec -n darwin redis-0 -c redis -- redis-cli STRLEN danswer:personas:all:not_deleted # bytes of cached JSON +``` + +**4. Watch a live request hit the cache** (definitive — run, then load Manage Assistants in the UI): +```bash +kubectl exec -it -n darwin redis-0 -c redis -- redis-cli MONITOR +# hit: "GET" "danswer:personas:all:not_deleted" +# miss: "GET" ... (nil) then "SET" "danswer:personas:all:not_deleted" "[...]" "EX" "86400" +``` +⚠️ Stop `MONITOR` quickly (Ctrl-C) — it echoes every command and is heavy on a busy Redis. + +**5. Force a miss→refill** (proves the read-through path; safe — just one extra Postgres read): +```bash +kubectl exec -n darwin redis-0 -c redis -- redis-cli DEL danswer:personas:all:not_deleted +# load the assistants page once, then: +kubectl exec -n darwin redis-0 -c redis -- redis-cli EXISTS danswer:personas:all:not_deleted # 1 = repopulated +``` + +**6. Verify write-through invalidation** (no stale assistant lists): +```bash +kubectl exec -n darwin redis-0 -c redis -- redis-cli TTL danswer:personas:all:not_deleted # note it exists +# rename an assistant in the admin UI, then re-check: +kubectl exec -n darwin redis-0 -c redis -- redis-cli TTL danswer:personas:all:not_deleted # -2 = busted by the mutation +# next page load refills it with the new name. +``` + +**Note:** the cache code is **silent on success** and only logs on Redis +errors (fail-open). So there's nothing in the api-server logs confirming +a hit — Redis-side inspection above is the only way to observe it. A +`Redis GET/SET/DEL failed` warning in api-server/background logs means +Redis is unreachable and the app is silently falling back to Postgres. + +### Which workloads to restart after a config change + +Because of `disableNameSuffixHash: true`, an `env-configmap` change does +**not** auto-roll pods (see Footguns). After any `env.properties` edit, +restart the pods that actually consume the changed vars: + +| Changed vars | Restart | Why | +|---|---|---| +| Redis flags (`REDIS_*`, `REQUEST_RATE_LIMIT_*`, `PERSONA_CACHE_*`, `CC_PAIR_INFO_CACHE_*`, `DOCUMENT_SET_CACHE_*`) | `api-server` + `background` | api-server runs the rate limiter + reads the caches; background reads config (Slack/OAuth tokens) and busts the persona + document-set caches on mutations (incl. the doc-set sync task) | +| Celery broker (`CELERY_BROKER_REDIS_ENABLED`, `CELERY_REDIS_DB_NUMBER`) | `background` | the Celery worker + beat (in the `background` pod) read the broker URL at startup. **Restart worker AND beat together** so they don't split across two brokers mid-flight | +| DB pool (`POSTGRES_POOL_SIZE`, `POSTGRES_POOL_OVERFLOW`) | `api-server` + `background` | engine pool is built once per process at first DB use; both pods build their own | +| LLM / search / connector vars (`GEN_AI_*`, `QA_TIMEOUT`, `MULTILINGUAL_*`, etc.) | `api-server` + `background` | both run the chat/search/index paths | +| Model-server vars (`DOCUMENT_ENCODER_MODEL`, `NORMALIZE_EMBEDDINGS`, …) | `inference-model-server` + `indexing-model-server` | only the model servers read these | +| `WEB_DOMAIN` / `INTERNAL_URL` / frontend | `web-server` (+ `api-server` for `WEB_DOMAIN` / OIDC redirect) | | + +```bash +# The common case (Redis features, or any backend env change): +kubectl rollout restart deploy/api-server-deployment deploy/background-deployment -n darwin +``` + +> **Split-background topology** (the `background-scaling` component): there +> is no `background-deployment` — restart the split pods that run danswer +> backend code instead: +> `kubectl rollout restart deploy/background-lite-deployment deploy/background-indexer-scheduler-deployment deploy/dask-worker-deployment -n darwin`. +> The model servers, `nginx`, and `web-server` do **not** use the Redis +> features — no need to restart them for a Redis flag flip. + +### Celery on Redis + Postgres pool sizing + +Two knobs that reduce/contain Postgres connection pressure (the real ceiling +for chat at scale — DB sessions are held through the LLM stream): + +- **`CELERY_BROKER_REDIS_ENABLED=true`** (prod on, local off) moves Celery's + broker + result backend from Postgres to Redis (logical DB + `CELERY_REDIS_DB_NUMBER`, default `1` — separate from the cache/rate-limit + DB `0`). This stops the Celery worker + beat from polling/writing Postgres + for their queue. Task **status** is unaffected: this fork tracks it in its + own `task_queue_jobs` table, not the Celery backend. Indexing is still Dask. + Note: unlike the **cache** (fail-open), the **broker** is a hard dependency + — if Redis is down, Celery maintenance tasks (prune / doc-set sync / + user-group sync / deletion / analytics / retention) won't run until it's + back. Chat and indexing are unaffected (they don't use Celery). +- **`POSTGRES_POOL_SIZE` / `POSTGRES_POOL_OVERFLOW`** (default 40 / 10) size + the SQLAlchemy pool **per process**. A pod's max connections is + `(size + overflow)` per engine, and api-server uses both a sync and an + async engine — so up to `2 × (size + overflow)` per api-server pod. + Cluster total = that × replicas of every DB-touching pod, and must stay + under Azure Postgres `max_connections` with headroom for boot migrations. + **Lower these as you add api-server replicas.** + +Verify the Celery queue is on Redis (not Postgres) after enabling: +```bash +# Celery keys live in DB 1; you should see kombu/celery keys appear here: +kubectl exec -n darwin redis-0 -c redis -- redis-cli -n 1 --scan --pattern '*' | head +# And the old Postgres broker tables should stop growing (kombu_message). +``` + +### File store: offload bytes to Azure Blob + +By default uploaded files / chat attachments / connector blobs are stored +as **Postgres large objects**. At scale that bloats the DB/WAL/backups, and +every read pins a Postgres connection for the whole file stream (competes +with the chat connection pool). The `AzureBlobFileStore` backend keeps the +small **metadata row** in Postgres but moves the **bytes** to Azure Blob. + +Cutover (graceful — un-migrated files keep reading from their lobj): + +```bash +# 1. The image must include azure-storage-blob (it's in requirements now) — +# rebuild/redeploy the backend image. +# 2. alembic upgrade head (adds file_store.object_key, makes lobj_oid nullable) +# 3. Put the ACCOUNT-KEY connection string in secrets.env (Storage account → +# Access keys → Connection string — the one with AccountName + AccountKey): +# AZURE_BLOB_CONNECTION_STRING=... (see secrets.env.example) +# Direct upload needs the account KEY so the server can mint scoped +# per-blob SAS (see below); a SAS-token connection string is rejected. +# 4. Flip the backend in env.properties: +# FILE_STORE_TYPE=AzureBlobFileStore +# AZURE_BLOB_CONTAINER=danswer-files +# 5. Apply + restart api-server & background (configmap won't auto-roll). +# 6. Migrate existing lobjs → Blob (idempotent; safe to resume): +cd backend && PYTHONPATH=$(pwd) python scripts/migrate_file_store_to_azure_blob.py +# (--dry-run first to see the count) +``` + +Notes: +- Steps 4–5 can precede 6: new uploads go to Blob immediately, and reads of + not-yet-migrated files transparently fall back to the lobj. Run the + migration promptly so the lobjs (and the bloat) actually go away. +- `azure-storage-blob` is **lazy-imported** — the app runs fine without it + unless `FILE_STORE_TYPE=AzureBlobFileStore` is set, so the dep/flag are + decoupled. +- Default (`PostgresBackedFileStore`) is unchanged; this is fully opt-in. +- **Setting the connection string in a shell?** Single-quote it: + `export AZURE_BLOB_CONNECTION_STRING='...AccountKey=...==;...'`. The `;` + separators are shell command separators — unquoted, the value is silently + truncated at the first `;` (you'll see `KeyError: 'AccountName'` / + "Connection string missing required connection details"). Verify with + `python -c "import os;print(repr(os.environ.get('AZURE_BLOB_CONNECTION_STRING')))"`. +- The account key is the storage account's **master credential** — keep it + in `secrets.env` / `danswer-secrets` only (never `env.properties` or git), + and **rotate it** if it's ever exposed (Access keys → Rotate). + +#### Direct-to-Blob chat uploads (requires Storage CORS) + +When the Azure backend is active, chat file uploads go **straight from the +browser to Blob** via a short-lived SAS URL (`POST /chat/file/upload-url` → +browser `PUT` → `POST /chat/file/confirm`), bypassing the api-server — much +faster and it shows a real progress bar. On the Postgres backend the client +auto-falls-back to the two-hop server upload. + +The api-server mints the SAS from the **account key** in the connection +string: a per-blob, write+create-only, 30-minute token. The master key never +leaves the server; the browser only ever sees a token scoped to one blob. +This is why step 3 above requires the account-key connection string — a +SAS-token connection string has no key to sign with and is rejected with a +clear error. + +For the browser `PUT` to succeed, the storage account needs **CORS rules** +allowing each web origin (one-time, per account). Add a rule for **every** +origin that will upload — prod *and* local dev if you test against this +account: + +```bash +az storage cors add --services b \ + --methods PUT OPTIONS GET \ + --origins https://darwin.westeurope.cloudapp.azure.com http://localhost:3000 \ + --allowed-headers '*' --exposed-headers '*' --max-age 3600 \ + --account-name --account-key +``` + +Troubleshooting direct uploads: +- **"network error during upload"** in the browser = missing/incorrect CORS + rule (the preflight `OPTIONS` fails). First thing to check. The two-hop + server path doesn't need CORS, so this only bites the Blob backend. +- The target container is **auto-created** on first use (the account key has + create permission), so you don't have to pre-create it. + +#### Chat upload limits + +Chat-attached files are stuffed **whole** into the LLM prompt (the search +tool is disabled when files are attached), so they're bounded by two env +gates — enforced on the backend and pre-checked in the browser: + +- `CHAT_FILE_MAX_SIZE_MB` (default `25`) — hard byte cap; oversize uploads + are rejected with a message. Surfaced to the web client as + `Settings.chat_file_max_size_mb` so it can reject before uploading. +- `CHAT_FILE_MAX_TOKEN_FRACTION` (default `0.5`) — after text extraction, a + file whose token count exceeds this fraction of the model's input window + is rejected (it would crowd out the actual conversation). + +### Apply an optional component (split-background + Dask) + +Optional features are kustomize components, opted into from the overlay +so they inherit its image tags / namespace / generated config. The +component carries its own replica counts (in +`optional/background-scaling/kustomization.yaml`) and env-neutral +manifests; the overlay adds image tags and any env-specific scheduling. + +**To enable it, two edits to the overlay's `kustomization.yaml` are +REQUIRED (steps 1–2); step 3 is prod-only and optional.** + +```yaml +# 1. REQUIRED — pull the component in: +components: + - ../../optional/background-scaling + +# 2. REQUIRED — scale the base combined `background` deployment to 0, or +# you run two Celery beat schedulers on one broker (every periodic task +# fires twice). This is the ONLY entry you must add to the overlay's +# replicas: block; the split deployments' counts come from the component +# (see "Replica counts" below). +replicas: + - name: background-deployment + count: 0 + +# 3. OPTIONAL (prod only) — the component manifests are env-neutral — no node +# affinity. To pin the indexing-side pods to the Darwin indexcpu pool, +# add a patch. Skip this on local (no such node pool): +patches: + - target: + kind: Deployment + labelSelector: "app in (background-lite,background-indexer-scheduler,dask-scheduler,dask-worker)" + patch: |- + - op: add + path: /spec/template/spec/affinity + value: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - {key: agentpool, operator: In, values: [indexcpu]} + - op: add + path: /spec/template/spec/tolerations + value: + - {effect: NoSchedule, key: darwin, operator: Equal, value: indexing} +``` + +Then preview, verify, and apply — these are the commands to deploy the +background-scaling topology (there is no standalone `kubectl apply -f` +for `optional/background-scaling/`: its manifests use the logical +`danswer-backend` image name, which only resolves through the overlay's +`images:` block): + +```bash +# Preview the rendered split-background + Dask pods: +kubectl kustomize k8s/overlays/prod | grep -E "name: (background|dask)" + +# Diff against the live cluster before committing to it: +kubectl diff -k k8s/overlays/prod + +# Apply (verify context first!): +kubectl config current-context # → 'darwin' for prod +kubectl apply -k k8s/overlays/prod + +# Watch the new pods come up: +kubectl rollout status deploy/dask-scheduler-deployment +kubectl rollout status deploy/background-lite-deployment +``` + +kustomize applies everything together; the new pods reference the same +overlay-generated `env-configmap` / `danswer-secrets`, and their +`danswer-backend` image is rewritten to the overlay's pinned tag. + +#### Replica counts + +The four split deployments get their counts from the **`replicas:` block in +`optional/background-scaling/kustomization.yaml`** — that's the single +source of truth: + +```yaml +replicas: + - name: background-lite-deployment # singleton — beat + slack; never >1 + count: 1 + - name: background-indexer-scheduler-deployment # singleton — the update.py loop + count: 1 + - name: dask-scheduler-deployment # singleton + count: 1 + - name: dask-worker-deployment # ← THE indexing-throughput knob + count: 2 +``` + +The `replicas: N` you see inside each deployment YAML is just a manifest +**default** — kustomize's `replicas:` transformer overrides it at render +time, so editing the YAML directly has no effect through kustomize. To +scale indexing, change `dask-worker-deployment`'s count here. (You can also +override any of these from the *overlay's* own `replicas:` block — the +overlay is applied last and wins — handy if you want a different +dask-worker count per environment without editing the shared component.) + +`background-lite`, `background-indexer-scheduler`, and `dask-scheduler` are +**hard singletons** (`count: 1`); raising them double-runs beat / the Slack +websocket / the scheduler loop. Only `dask-worker` scales. + +#### Volumes / PVCs — the split deployments mount NONE + +None of the four split deployments mount `dynamic-pvc` or +`file-connector-pvc`. In this fork the file store (File-connector uploads) +is **Postgres-backed** (`PGFileStore` large objects) and the dynamic config +store is Postgres-backed too — nothing reads `/home/storage` or +`/home/file_connector_storage` (grep the code: zero references). The mounts +were upstream carryover. The base `api-server` / `background` deployments +also no longer mount them. The `dynamic-pvc` / `file-connector-pvc` claims +are still **defined** in `persistent-volumes.yaml` (so the live volumes +aren't deleted) — they're simply unmounted everywhere now. + +(Aside: `dynamic-pvc` is declared `ReadWriteOnce` yet was mounted by +api-server + background on different nodes in prod — that "worked" only +because `azurefile-csi` is an SMB share, not a block device, so it ignores +the single-attach restriction. The RWO label was misleading-for-usage, not +an active outage risk. Moot now that nothing mounts it.) + +Rollback: remove the `components:` line (and the patch), set +`background-deployment` back to `count: 1`, re-apply. (The split pods are +pruned on the next apply if you use `kubectl apply -k --prune`, or delete +them by label.) Do NOT run the combined `background` deployment and +`background-lite` at non-zero replicas simultaneously — both run a celery +beat, and two beats on one broker fire every periodic task twice. + +### KEDA indexing autoscale (autoscale dask-worker on backlog) + +`optional/keda-indexing-autoscale/` autoscales `dask-worker-deployment` +based on real indexing demand read from Postgres, instead of a fixed +replica count. Use it when indexing load is bursty and you'd rather not +pay for idle workers. + +**How the metric works.** A KEDA PostgreSQL scaler runs this every 30s: + +```sql +SELECT COALESCE(SUM(LEAST(1, cnt)), 0) FROM ( + SELECT con.source, COUNT(*) cnt + FROM index_attempt ia JOIN connector con ON ia.connector_id = con.id + WHERE ia.status IN ('NOT_STARTED','IN_PROGRESS') + GROUP BY con.source) s +``` + +It returns the number of attempts that can run **concurrently** right now +— respecting `INDEXING_PER_SOURCE_CAP` (default 1, one per source). It is +deliberately **not** a raw pending count: 10 queued attempts of the same +source still only run one at a time, so spinning up 10 workers would waste +9. `targetQueryValue: 1` → desired replicas = the metric. + +**Why it's safe to scale down:** the metric counts `IN_PROGRESS` too, so +replicas never drop below the number of running jobs — KEDA won't scale a +busy worker away. Scale-to-0 happens only when nothing is queued or +running. (`status` is stored UPPERCASE — `native_enum=False` — verified +against the live DB.) + +**Prerequisites + how to enable:** +1. Install the KEDA operator cluster-wide (CRDs + operator, into its own + `keda` namespace — pinned, no Helm): + ```bash + kubectl apply --server-side -k k8s/optional/keda + kubectl get pods -n keda # operator + metrics-apiserver Running + ``` + (Installed once per cluster, independent of the danswer overlays. + `--server-side` is required — KEDA's CRDs are too large for client-side + apply. To bump KEDA, edit the version in `k8s/optional/keda/kustomization.yaml`.) +2. Opt in **after** background-scaling, and **remove** `dask-worker-deployment` + from the background-scaling `replicas:` block (KEDA owns that count now — + leaving a static replicas entry fights the autoscaler): + ```yaml + # k8s/overlays/prod/kustomization.yaml + components: + - ../../optional/background-scaling + - ../../optional/keda-indexing-autoscale + ``` +3. The scaler's `host`/`userName`/`dbName` in `scaledobject.yaml` are the + Darwin prod Postgres coords — make sure they match the overlay's + `POSTGRES_*`. The password comes from `danswer-secrets` via a + `TriggerAuthentication` (no duplication). +4. `k8s/scripts/guarded-apply.sh prod`, then watch: + ```bash + kubectl get scaledobject,hpa -n darwin + kubectl get pods -n darwin -l app=dask-worker -w + ``` + +**Tuning:** `maxReplicaCount` (default 4) should be ≈ your number of +distinct active source types (more is wasted under `PER_SOURCE_CAP=1`). +`minReplicaCount: 0` saves idle cost but adds ~30s-2min cold start on the +first index after idle — set to 1 to keep a worker warm. + +**Recommended companion change** (in `background-scaling/dask-worker.yaml`): +give the worker a `terminationGracePeriodSeconds` and a preStop that +retires the Dask worker, so a scale-down lets the current index attempt +finish instead of being killed mid-run. Even without it, a killed attempt +is retried by the indexing pipeline (no data loss, just rework). + +## Conventions + +- **`k8s/overlays/*/secrets.env` is gitignored.** Never commit it — it + holds real secret values. Commit `secrets.env.example` instead. +- **The `deployment/kubernetes/*` tree is upstream Onyx reference only.** + Not applied to Darwin. See AGENTS.md "Critical fact §9". + +## Footguns + +- **`disableNameSuffixHash: true`** is set in both overlays. Without it, + kustomize appends a content hash to generated ConfigMap/Secret names + (`env-configmap-abc123`), which would break deployments referencing + `env-configmap`/`danswer-secrets` by their plain names. Don't remove. + **Consequence:** because the name is stable, a ConfigMap/Secret content + change does NOT trigger an automatic pod rollout (the hash-suffix + behavior is exactly what would). After any `env.properties` / + `secrets.env` change you must **manually `kubectl rollout restart`** the + consuming workloads — see "Which workloads to restart after a config + change". (The trade-off is deliberate: stable names so the optional + components + secretKeyRefs resolve, at the cost of manual restarts.) +- **`behavior: create`** on the configMapGenerator means "create new", not + "merge with an existing one in base". Base intentionally ships no + ConfigMap with this name; the overlay owns it. If you ever add one to + base, switch to `behavior: merge` or `replace` to avoid name clash. +- **Image refs in base must match `images:` entries by *name*.** If you + add a new image to base/ (e.g. `redis-exporter`), you must also add it + to the `images:` block in each overlay — otherwise the literal string + ships unchanged. +- **`secrets.env` writes a Secret of type `Opaque`** with the key=value + pairs as base64-encoded data fields. If you need a different secret + type (e.g. `kubernetes.io/dockerconfigjson` for pull secrets), generate + it separately or override in the overlay. diff --git a/k8s/base/api-server.yaml b/k8s/base/api-server.yaml new file mode 100644 index 00000000000..b8175131e03 --- /dev/null +++ b/k8s/base/api-server.yaml @@ -0,0 +1,112 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: api-server-deployment +spec: + replicas: 1 + selector: + matchLabels: + app: api-server + strategy: + rollingUpdate: + maxSurge: 25% + maxUnavailable: 25% + type: RollingUpdate + template: + metadata: + labels: + app: api-server + spec: + containers: + - command: + - /bin/sh + - -c + - 'alembic upgrade heads && + + echo "Starting Danswer Api Server" && + + uvicorn danswer.main:app --host 0.0.0.0 --port 8080 + + ' + env: + - name: POSTGRES_USER + valueFrom: + secretKeyRef: + key: postgres_user + name: danswer-secrets + - name: POSTGRES_PASSWORD + valueFrom: + secretKeyRef: + key: postgres_password + name: danswer-secrets + - name: OAUTH_CLIENT_ID + valueFrom: + secretKeyRef: + key: oauth_client_id + name: danswer-secrets + - name: OAUTH_CLIENT_SECRET + valueFrom: + secretKeyRef: + key: oauth_client_secret + name: danswer-secrets + - name: USER_AUTH_SECRET + valueFrom: + secretKeyRef: + key: user_auth_secret + name: danswer-secrets + envFrom: + - configMapRef: + name: env-configmap + - secretRef: + name: danswer-secrets + image: danswer-backend + imagePullPolicy: IfNotPresent + name: api-server + ports: + - containerPort: 8080 + protocol: TCP + # startupProbe: the container runs `alembic upgrade heads` BEFORE + # uvicorn, so /health isn't up until migrations finish. This gives + # migrations + boot up to ~5 min (30 × 10s) before readiness/ + # liveness start counting. It also transitively gates on Postgres: + # no migrations → no HTTP → never passes startup → pod isn't Ready. + startupProbe: + httpGet: + path: /health + port: 8080 + periodSeconds: 10 + failureThreshold: 30 + timeoutSeconds: 5 + # readinessProbe: gates the api-server Service so it never routes + # to a pod still booting. Checks the app's OWN /health — NOT Vespa + # or Redis (those are partial/optional deps; coupling the API's + # availability to them would amplify outages — see the Vespa + # incident). No liveness probe by design: an aggressive liveness on + # a slow-starting api-server could kill it mid-migration. + readinessProbe: + httpGet: + path: /health + port: 8080 + periodSeconds: 10 + timeoutSeconds: 5 + failureThreshold: 3 + resources: {} + # No volumes: the file store (uploads) and dynamic config are + # Postgres-backed (or Azure Blob for files) — nothing reads + # /home/storage or /home/file_connector_storage (grep the code: zero + # references). The dynamic-pvc / file-connector-pvc claims still exist + # in persistent-volumes.yaml; they're just no longer mounted here. +--- +apiVersion: v1 +kind: Service +metadata: + name: api-server-service +spec: + ports: + - name: api-server-port + port: 80 + protocol: TCP + targetPort: 8080 + selector: + app: api-server + type: ClusterIP diff --git a/k8s/base/background.yaml b/k8s/base/background.yaml new file mode 100644 index 00000000000..964dc6d3230 --- /dev/null +++ b/k8s/base/background.yaml @@ -0,0 +1,46 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: background-deployment +spec: + replicas: 1 + selector: + matchLabels: + app: background + strategy: + type: Recreate + template: + metadata: + labels: + app: background + spec: + containers: + - command: + - /usr/bin/supervisord + env: + - name: POSTGRES_USER + valueFrom: + secretKeyRef: + key: postgres_user + name: danswer-secrets + - name: POSTGRES_PASSWORD + valueFrom: + secretKeyRef: + key: postgres_password + name: danswer-secrets + envFrom: + - configMapRef: + name: env-configmap + - secretRef: + name: danswer-secrets + image: danswer-backend + imagePullPolicy: IfNotPresent + name: background + resources: + requests: + cpu: '2' + memory: 10Gi + # No volumes — vestigial like the split deployments: the file store and + # dynamic config are Postgres-backed (files optionally Azure Blob), so + # nothing reads /home/storage or /home/file_connector_storage. The PVCs + # remain defined in persistent-volumes.yaml, just unmounted here. diff --git a/k8s/base/indexing-model-server.yaml b/k8s/base/indexing-model-server.yaml new file mode 100644 index 00000000000..363eac06f61 --- /dev/null +++ b/k8s/base/indexing-model-server.yaml @@ -0,0 +1,78 @@ +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: indexing-model-server-statefulset +spec: + persistentVolumeClaimRetentionPolicy: + whenDeleted: Retain + whenScaled: Retain + podManagementPolicy: OrderedReady + replicas: 2 + selector: + matchLabels: + app: indexing-model-server + name: indexing-model-server + serviceName: indexing-model-server-service + template: + metadata: + labels: + app: indexing-model-server + name: indexing-model-server + spec: + containers: + - command: + - uvicorn + - model_server.main:app + - --host + - 0.0.0.0 + - --port + - '9000' + env: + - name: INDEXING_ONLY + value: 'True' + envFrom: + - configMapRef: + name: env-configmap + image: danswer-model-server + imagePullPolicy: IfNotPresent + name: indexing-model-server + ports: + - containerPort: 9000 + protocol: TCP + resources: {} + volumeMounts: + - mountPath: /root/.cache + name: indexing-model-storage + updateStrategy: + rollingUpdate: + partition: 0 + type: RollingUpdate + volumeClaimTemplates: + - apiVersion: v1 + kind: PersistentVolumeClaim + metadata: + creationTimestamp: null + name: indexing-model-storage + spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 10Gi + volumeMode: Filesystem + status: + phase: Pending +--- +apiVersion: v1 +kind: Service +metadata: + name: indexing-model-server-service +spec: + ports: + - name: indexing-model-server-port + port: 9000 + protocol: TCP + targetPort: 9000 + selector: + app: indexing-model-server + type: ClusterIP diff --git a/deployment/kubernetes/inference_model_server-service-deployment.yaml b/k8s/base/inference-model-server.yaml similarity index 58% rename from deployment/kubernetes/inference_model_server-service-deployment.yaml rename to k8s/base/inference-model-server.yaml index 790dc633db8..68b9cd66edc 100644 --- a/deployment/kubernetes/inference_model_server-service-deployment.yaml +++ b/k8s/base/inference-model-server.yaml @@ -1,17 +1,3 @@ -apiVersion: v1 -kind: Service -metadata: - name: inference-model-server-service -spec: - selector: - app: inference-model-server - ports: - - name: inference-model-server-port - protocol: TCP - port: 9000 - targetPort: 9000 - type: ClusterIP ---- apiVersion: apps/v1 kind: Deployment metadata: @@ -21,36 +7,52 @@ spec: selector: matchLabels: app: inference-model-server + strategy: + rollingUpdate: + maxSurge: 25% + maxUnavailable: 25% + type: RollingUpdate template: metadata: labels: app: inference-model-server spec: containers: - - name: inference-model-server - image: danswer/danswer-model-server:latest - imagePullPolicy: IfNotPresent - command: [ "uvicorn", "model_server.main:app", "--host", "0.0.0.0", "--port", "9000" ] - ports: - - containerPort: 9000 + - command: + - uvicorn + - model_server.main:app + - --host + - 0.0.0.0 + - --port + - '9000' envFrom: - configMapRef: name: env-configmap + image: danswer-model-server + imagePullPolicy: IfNotPresent + name: inference-model-server + ports: + - containerPort: 9000 + protocol: TCP + resources: {} volumeMounts: - - name: inference-model-storage - mountPath: /root/.cache + - mountPath: /root/.cache + name: inference-model-storage volumes: - name: inference-model-storage persistentVolumeClaim: claimName: inference-model-pvc --- apiVersion: v1 -kind: PersistentVolumeClaim +kind: Service metadata: - name: inference-model-pvc + name: inference-model-server-service spec: - accessModes: - - ReadWriteOnce - resources: - requests: - storage: 3Gi + ports: + - name: inference-model-server-port + port: 9000 + protocol: TCP + targetPort: 9000 + selector: + app: inference-model-server + type: ClusterIP diff --git a/k8s/base/kustomization.yaml b/k8s/base/kustomization.yaml new file mode 100644 index 00000000000..77e204e9ed0 --- /dev/null +++ b/k8s/base/kustomization.yaml @@ -0,0 +1,32 @@ +# Base manifests — environment-neutral. Overlays under k8s/overlays/{prod,local} +# layer on: +# - Image registry + tag (via `images:` in the overlay kustomization) +# - Replica counts (via `replicas:` in the overlay kustomization) +# - The env-configmap ConfigMap (via configMapGenerator from env.properties) +# - The danswer-secrets Secret (via secretGenerator from secrets.env — gitignored) +# +# Image refs in base use logical names (e.g. `danswer-backend` not +# `sfbrdevhelmweacr.azurecr.io/danswer/danswer-backend:vha-138`). Kustomize +# rewrites them via the overlay's `images:` block. +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization + +resources: + # Core danswer app + - api-server.yaml + - background.yaml + - web-server.yaml + - nginx.yaml + - inference-model-server.yaml + - indexing-model-server.yaml + - persistent-volumes.yaml + - redis.yaml # cache + rate limiting; both prod and local deploy it + + # NOTE: Vespa is intentionally NOT part of base. It is a stateful subsystem + # with a PINNED version and an ordered, health-gated upgrade procedure, so + # re-applying it on every routine app rollout (`kubectl apply -k + # overlays/{prod,local}`) is dangerous — a drifted manifest could roll the + # StatefulSets. Vespa manifests live in base/vespa/ and are applied + # DELIBERATELY via their own overlay: `kubectl apply -k + # k8s/overlays/{prod,local}-vespa`. Version upgrades go through + # k8s/scripts/vespa-upgrade.sh. See k8s/README.md. diff --git a/k8s/base/nginx.yaml b/k8s/base/nginx.yaml new file mode 100644 index 00000000000..6e56438f2fd --- /dev/null +++ b/k8s/base/nginx.yaml @@ -0,0 +1,86 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: nginx-deployment +spec: + replicas: 1 + selector: + matchLabels: + app: nginx + strategy: + rollingUpdate: + maxSurge: 25% + maxUnavailable: 25% + type: RollingUpdate + template: + metadata: + labels: + app: nginx + spec: + containers: + - command: + - /bin/sh + - -c + - 'while :; do sleep 6h & wait $$!; nginx -s reload; done & nginx -g "daemon + off;" + + ' + env: + - name: DOMAIN + valueFrom: + configMapKeyRef: + key: DOMAIN + name: env-configmap + image: nginx + imagePullPolicy: IfNotPresent + name: nginx + ports: + - containerPort: 80 + protocol: TCP + resources: {} + volumeMounts: + - mountPath: /etc/nginx/conf.d + name: nginx-conf + volumes: + - configMap: + defaultMode: 420 + name: nginx-configmap + name: nginx-conf +--- +apiVersion: v1 +kind: Service +metadata: + name: nginx-service +spec: + ports: + - name: http + port: 80 + protocol: TCP + targetPort: 80 + - name: danswer + port: 3000 + protocol: TCP + targetPort: 80 + selector: + app: nginx + type: ClusterIP +--- +apiVersion: v1 +data: + nginx.conf: "upstream api_server {\n server api-server-service:80 fail_timeout=0;\n\ + }\n\nupstream web_server {\n server web-server-service:80 fail_timeout=0;\n\ + }\n\nserver {\n listen 80;\n server_name $$DOMAIN;\n\n client_max_body_size\ + \ 5G; # Maximum upload size\n\n location ~ ^/api(.*)$ {\n rewrite\ + \ ^/api(/.*)$ $1 break;\n proxy_set_header X-Real-IP $remote_addr;\n \ + \ proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;\n proxy_set_header\ + \ X-Forwarded-Proto $scheme;\n proxy_set_header X-Forwarded-Host $host;\n\ + \ proxy_set_header Host $host;\n proxy_http_version 1.1;\n \ + \ proxy_buffering off;\n proxy_redirect off;\n proxy_pass http://api_server;\n\ + \ }\n\n location / {\n proxy_set_header X-Real-IP $remote_addr;\n\ + \ proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;\n \ + \ proxy_set_header X-Forwarded-Proto $scheme;\n proxy_set_header X-Forwarded-Host\ + \ $host;\n proxy_set_header Host $host;\n proxy_http_version 1.1;\n\ + \ proxy_redirect off;\n proxy_pass http://web_server;\n }\n}\n" +kind: ConfigMap +metadata: + name: nginx-configmap diff --git a/darwin-kubernetes/persistent-volumes.yaml b/k8s/base/persistent-volumes.yaml similarity index 90% rename from darwin-kubernetes/persistent-volumes.yaml rename to k8s/base/persistent-volumes.yaml index ad83d5a3640..f6fc5fbcb32 100644 --- a/darwin-kubernetes/persistent-volumes.yaml +++ b/k8s/base/persistent-volumes.yaml @@ -5,7 +5,7 @@ metadata: spec: storageClassName: azurefile-csi-premium accessModes: - - ReadWriteOnce + - ReadWriteOnce resources: requests: storage: 5Gi @@ -17,7 +17,7 @@ metadata: spec: storageClassName: azurefile-csi-premium accessModes: - - ReadWriteMany + - ReadWriteMany resources: requests: storage: 5Gi diff --git a/k8s/base/redis.yaml b/k8s/base/redis.yaml new file mode 100644 index 00000000000..14b69e17612 --- /dev/null +++ b/k8s/base/redis.yaml @@ -0,0 +1,66 @@ +apiVersion: v1 +kind: Service +metadata: + name: redis + labels: + app: redis +spec: + selector: + app: redis + ports: + - name: redis + port: 6379 + targetPort: 6379 + type: ClusterIP +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: redis +spec: + serviceName: redis + replicas: 1 + selector: + matchLabels: + app: redis + template: + metadata: + labels: + app: redis + spec: + containers: + - name: redis + image: redis:7.2-alpine + ports: + - containerPort: 6379 + name: redis + args: + - --appendonly + - 'no' + - --save + - '' + - --maxmemory + - 512mb + - --maxmemory-policy + - allkeys-lru + # Container limit sits comfortably above --maxmemory because Redis + # uses extra RSS beyond the data limit for client output buffers, + # COW pages during BGSAVE, and fragmentation (~1.3-1.5x is typical; + # we set ~2x for safety on a single-replica setup where OOM = downtime). + resources: + requests: + memory: 256Mi + cpu: 50m + limits: + memory: 1Gi + cpu: 500m + readinessProbe: + tcpSocket: + port: 6379 + initialDelaySeconds: 5 + periodSeconds: 10 + livenessProbe: + tcpSocket: + port: 6379 + initialDelaySeconds: 15 + periodSeconds: 20 diff --git a/k8s/base/vespa/kustomization.yaml b/k8s/base/vespa/kustomization.yaml new file mode 100644 index 00000000000..21c1926871d --- /dev/null +++ b/k8s/base/vespa/kustomization.yaml @@ -0,0 +1,24 @@ +# Vespa search-index cluster — grouped into its own kustomization because it +# is a distinct stateful subsystem with a lifecycle unlike the rest of base: +# - the image version is PINNED and must never float (:latest took prod +# down once — see AGENTS.md "Critical facts §10"); +# - upgrades must step minor versions within Vespa's allowed hop AND roll +# the StatefulSets in a specific order. That ordering can't live in +# declarative manifests — it's in k8s/scripts/vespa-upgrade.sh. +# +# Each StatefulSet uses a PER-ROLE logical image name (vespa-configserver, +# vespa-admin, vespa-content, vespa-feed, vespa-query) rather than one shared +# `vespa` name. They all resolve to vespaengine/vespa at the same pinned tag +# via the overlay `images:` block, but the per-role split is what lets the +# upgrade script move one role's version at a time. +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization + +resources: + - vespa-config.yaml + - vespa-internal-service.yaml + - vespa-configserver.yaml + - vespa-admin.yaml + - vespa-content.yaml + - vespa-feed.yaml + - vespa-query.yaml diff --git a/k8s/base/vespa/vespa-admin.yaml b/k8s/base/vespa/vespa-admin.yaml new file mode 100644 index 00000000000..6687fc05324 --- /dev/null +++ b/k8s/base/vespa/vespa-admin.yaml @@ -0,0 +1,53 @@ +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: vespa-admin +spec: + persistentVolumeClaimRetentionPolicy: + whenDeleted: Retain + whenScaled: Retain + podManagementPolicy: OrderedReady + replicas: 1 + selector: + matchLabels: + app: vespa-admin + name: vespa-internal + serviceName: vespa-internal + template: + metadata: + labels: + app: vespa-admin + name: vespa-internal + spec: + containers: + - args: + - services + envFrom: + - configMapRef: + name: vespa-config + # Per-role logical image name — see vespa-configserver.yaml. + image: vespa-admin + imagePullPolicy: Always + name: vespa-admin + # Readiness on the metrics proxy's /state/v1/health (port 19092 — + # node-agnostic, same as content). Gates rollout; readiness-only (no + # liveness). See the content-node note re: publishNotReadyAddresses. + readinessProbe: + httpGet: + path: /state/v1/health + port: 19092 + initialDelaySeconds: 30 + periodSeconds: 10 + timeoutSeconds: 5 + failureThreshold: 6 + resources: + limits: + memory: 2G + requests: + memory: 1G + securityContext: + runAsUser: 1000 + updateStrategy: + rollingUpdate: + partition: 0 + type: RollingUpdate diff --git a/k8s/base/vespa/vespa-config.yaml b/k8s/base/vespa/vespa-config.yaml new file mode 100644 index 00000000000..ffd2ec26527 --- /dev/null +++ b/k8s/base/vespa/vespa-config.yaml @@ -0,0 +1,6 @@ +apiVersion: v1 +data: + VESPA_CONFIGSERVERS: vespa-configserver-0.vespa-internal.darwin.svc.cluster.local,vespa-configserver-1.vespa-internal.darwin.svc.cluster.local,vespa-configserver-2.vespa-internal.darwin.svc.cluster.local +kind: ConfigMap +metadata: + name: vespa-config diff --git a/k8s/base/vespa/vespa-configserver.yaml b/k8s/base/vespa/vespa-configserver.yaml new file mode 100644 index 00000000000..06457987cea --- /dev/null +++ b/k8s/base/vespa/vespa-configserver.yaml @@ -0,0 +1,152 @@ +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: vespa-configserver +spec: + persistentVolumeClaimRetentionPolicy: + whenDeleted: Retain + whenScaled: Retain + podManagementPolicy: OrderedReady + replicas: 3 + selector: + matchLabels: + app: vespa-configserver + name: vespa-internal + serviceName: vespa-internal + template: + metadata: + labels: + app: vespa-configserver + name: vespa-internal + spec: + containers: + - args: + - configserver,services + envFrom: + - configMapRef: + name: vespa-config + # Per-role logical image name (overlay images: resolves it to + # vespaengine/vespa:). Split per role so the upgrade script can + # move one StatefulSet's version at a time — see vespa-upgrade.sh. + image: vespa-configserver + imagePullPolicy: Always + name: vespa-configserver + # Gates the vespa-configserver Service so consumers (api-server's + # ensure_indices_exist, other Vespa nodes) only route here once the + # config server actually serves — not the instant the container + # starts. Readiness-only: a slow/booting config server is pulled + # from rotation but never restarted (an aggressive liveness probe + # could kill a healthy-but-slow node and cause a restart loop). + # initialDelay is generous because config-server bootstrap (ZK + + # app load) takes 1-2 min. + readinessProbe: + httpGet: + path: /state/v1/health + port: 19071 + initialDelaySeconds: 45 + periodSeconds: 10 + timeoutSeconds: 5 + failureThreshold: 6 + resources: + limits: + memory: 4096M + requests: + memory: 4096M + securityContext: + runAsUser: 1000 + volumeMounts: + - mountPath: /opt/vespa/var + name: vespa-var1 + - mountPath: /opt/vespa/logs + name: vespa-logs1 + - mountPath: /workspace + name: vespa-workspace1 + initContainers: + - command: + - sh + - -c + - chown -R 1000 /opt/vespa/var + image: busybox + imagePullPolicy: Always + name: chown-var + resources: {} + securityContext: + runAsUser: 0 + volumeMounts: + - mountPath: /opt/vespa/var + name: vespa-var1 + - command: + - sh + - -c + - chown -R 1000 /opt/vespa/logs + image: busybox + imagePullPolicy: Always + name: chown-logs + resources: {} + securityContext: + runAsUser: 0 + volumeMounts: + - mountPath: /opt/vespa/logs + name: vespa-logs1 + updateStrategy: + rollingUpdate: + partition: 0 + type: RollingUpdate + volumeClaimTemplates: + - apiVersion: v1 + kind: PersistentVolumeClaim + metadata: + creationTimestamp: null + name: vespa-var1 + spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 5Gi + volumeMode: Filesystem + status: + phase: Pending + - apiVersion: v1 + kind: PersistentVolumeClaim + metadata: + creationTimestamp: null + name: vespa-logs1 + spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 5Gi + volumeMode: Filesystem + status: + phase: Pending + - apiVersion: v1 + kind: PersistentVolumeClaim + metadata: + creationTimestamp: null + name: vespa-workspace1 + spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 1Gi + volumeMode: Filesystem + status: + phase: Pending +--- +apiVersion: v1 +kind: Service +metadata: + name: vespa-configserver +spec: + ports: + - name: vespa-tenant-port + nodePort: 31439 + port: 19071 + protocol: TCP + targetPort: 19071 + selector: + app: vespa-configserver + type: NodePort diff --git a/k8s/base/vespa/vespa-content.yaml b/k8s/base/vespa/vespa-content.yaml new file mode 100644 index 00000000000..f4486b5a228 --- /dev/null +++ b/k8s/base/vespa/vespa-content.yaml @@ -0,0 +1,92 @@ +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: vespa-content +spec: + persistentVolumeClaimRetentionPolicy: + whenDeleted: Retain + whenScaled: Retain + podManagementPolicy: OrderedReady + replicas: 3 + selector: + matchLabels: + app: vespa-content + name: vespa-internal + serviceName: vespa-internal + template: + metadata: + labels: + app: vespa-content + name: vespa-internal + spec: + containers: + - args: + - services + envFrom: + - configMapRef: + name: vespa-config + # Per-role logical image name — see vespa-configserver.yaml. + image: vespa-content + imagePullPolicy: Always + name: vespa-content + # Readiness on the metrics proxy's /state/v1/health (port 19092 — + # verified serving 200 on the live content nodes; this port exists on + # every Vespa node type, unlike the containers' 8080). Gates rollout + # so an upgrade won't move to the next content pod until this one is + # back up. Readiness-only — NO liveness (a slow content node loading + # its index must not be killed). Crucially, vespa-internal sets + # publishNotReadyAddresses: true, so a not-ready content pod is still + # resolvable for peer discovery — this probe gates rollout WITHOUT + # dropping the node from the cluster. initialDelay is generous because + # content nodes load buckets on boot. + readinessProbe: + httpGet: + path: /state/v1/health + port: 19092 + initialDelaySeconds: 45 + periodSeconds: 10 + timeoutSeconds: 5 + failureThreshold: 6 + resources: + limits: + memory: 12G + requests: + memory: 6G + securityContext: + runAsUser: 1000 + volumeMounts: + - mountPath: /opt/vespa/var + name: vespa-var + initContainers: + - command: + - sh + - -c + - chown -R 1000 /opt/vespa/var + image: busybox + imagePullPolicy: Always + name: chown-var + resources: {} + securityContext: + runAsUser: 0 + volumeMounts: + - mountPath: /opt/vespa/var + name: vespa-var + updateStrategy: + rollingUpdate: + partition: 0 + type: RollingUpdate + volumeClaimTemplates: + - apiVersion: v1 + kind: PersistentVolumeClaim + metadata: + creationTimestamp: null + name: vespa-var + spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 100Gi + volumeMode: Filesystem + status: + phase: Pending diff --git a/k8s/base/vespa/vespa-feed.yaml b/k8s/base/vespa/vespa-feed.yaml new file mode 100644 index 00000000000..dc9f4b27ba3 --- /dev/null +++ b/k8s/base/vespa/vespa-feed.yaml @@ -0,0 +1,69 @@ +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: vespa-feed-container +spec: + persistentVolumeClaimRetentionPolicy: + whenDeleted: Retain + whenScaled: Retain + podManagementPolicy: OrderedReady + replicas: 2 + selector: + matchLabels: + app: vespa-feed-container + name: vespa-internal + serviceName: vespa-internal + template: + metadata: + labels: + app: vespa-feed-container + name: vespa-internal + spec: + containers: + - args: + - services + envFrom: + - configMapRef: + name: vespa-config + # Per-role logical image name — see vespa-configserver.yaml. + image: vespa-feed + imagePullPolicy: Always + name: vespa-feed-container + # Gates the vespa-feed Service so indexing/feed only routes here + # once the feed container serves. Readiness-only (see configserver + # note). + readinessProbe: + httpGet: + path: /state/v1/health + port: 8080 + initialDelaySeconds: 30 + periodSeconds: 10 + timeoutSeconds: 5 + failureThreshold: 6 + resources: + limits: + memory: 4G + requests: + memory: 1500M + securityContext: + runAsUser: 1000 + updateStrategy: + rollingUpdate: + partition: 0 + type: RollingUpdate +--- +apiVersion: v1 +kind: Service +metadata: + labels: + app: vespa + name: vespa-feed +spec: + ports: + - name: api + port: 8080 + protocol: TCP + targetPort: 8080 + selector: + app: vespa-feed-container + type: ClusterIP diff --git a/k8s/base/vespa/vespa-internal-service.yaml b/k8s/base/vespa/vespa-internal-service.yaml new file mode 100644 index 00000000000..76086f9caa5 --- /dev/null +++ b/k8s/base/vespa/vespa-internal-service.yaml @@ -0,0 +1,20 @@ +apiVersion: v1 +kind: Service +metadata: + labels: + name: vespa-internal + name: vespa-internal +spec: + clusterIP: None + clusterIPs: + - None + # Peer discovery (VESPA_CONFIGSERVERS, stable pod DNS like + # vespa-content-0.vespa-internal...) MUST NOT be gated by pod readiness: + # nodes need to resolve each other to *become* ready, and the content/admin + # readiness probes added for rollout-gating would otherwise drop a slow or + # booting node from this headless service's DNS and disrupt the cluster. + # Publishing not-ready addresses decouples discovery from readiness. + publishNotReadyAddresses: true + selector: + name: vespa-internal + type: ClusterIP diff --git a/k8s/base/vespa/vespa-query.yaml b/k8s/base/vespa/vespa-query.yaml new file mode 100644 index 00000000000..224c8212ba5 --- /dev/null +++ b/k8s/base/vespa/vespa-query.yaml @@ -0,0 +1,71 @@ +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: vespa-query-container +spec: + persistentVolumeClaimRetentionPolicy: + whenDeleted: Retain + whenScaled: Retain + podManagementPolicy: OrderedReady + replicas: 2 + selector: + matchLabels: + app: vespa-query-container + name: vespa-internal + serviceName: vespa-internal + template: + metadata: + labels: + app: vespa-query-container + name: vespa-internal + spec: + containers: + - args: + - services + envFrom: + - configMapRef: + name: vespa-config + # Per-role logical image name — see vespa-configserver.yaml. + image: vespa-query + imagePullPolicy: Always + name: vespa-query-container + # Gates the vespa-query Service — the path the app hits for search. + # This is the probe that directly prevents the "503 / connection + # refused" window: the Service won't route a query here until + # /state/v1/health is up. Readiness-only (no liveness — see the + # configserver note). + readinessProbe: + httpGet: + path: /state/v1/health + port: 8080 + initialDelaySeconds: 30 + periodSeconds: 10 + timeoutSeconds: 5 + failureThreshold: 6 + resources: + limits: + memory: 4G + requests: + memory: 1500M + securityContext: + runAsUser: 1000 + updateStrategy: + rollingUpdate: + partition: 0 + type: RollingUpdate +--- +apiVersion: v1 +kind: Service +metadata: + labels: + app: vespa + name: vespa-query +spec: + ports: + - name: api + port: 8080 + protocol: TCP + targetPort: 8080 + selector: + app: vespa-query-container + type: ClusterIP diff --git a/darwin-kubernetes/web_server-service-deployment.yaml b/k8s/base/web-server.yaml similarity index 71% rename from darwin-kubernetes/web_server-service-deployment.yaml rename to k8s/base/web-server.yaml index faa2e230e04..42f6ea6ab44 100644 --- a/darwin-kubernetes/web_server-service-deployment.yaml +++ b/k8s/base/web-server.yaml @@ -1,16 +1,3 @@ -apiVersion: v1 -kind: Service -metadata: - name: web-server-service -spec: - selector: - app: web-server - ports: - - protocol: TCP - port: 80 - targetPort: 3000 - type: ClusterIP ---- apiVersion: apps/v1 kind: Deployment metadata: @@ -20,30 +7,51 @@ spec: selector: matchLabels: app: web-server + strategy: + rollingUpdate: + maxSurge: 25% + maxUnavailable: 25% + type: RollingUpdate template: metadata: labels: app: web-server spec: containers: - - name: web-server - image: sfbrdevhelmweacr.azurecr.io/danswer/danswer-web-server:latest - imagePullPolicy: IfNotPresent - ports: - - containerPort: 3000 - # There are some extra values since this is shared between services - # There are no conflicts though, extra env variables are simply ignored - env: + - env: - name: POSTGRES_USER valueFrom: secretKeyRef: - name: danswer-secrets key: postgres_user + name: danswer-secrets - name: POSTGRES_PASSWORD valueFrom: secretKeyRef: - name: danswer-secrets key: postgres_password + name: danswer-secrets envFrom: - configMapRef: name: env-configmap + image: danswer-web-server + imagePullPolicy: IfNotPresent + name: web-server + ports: + - containerPort: 3000 + protocol: TCP + resources: + requests: + cpu: 500m + memory: 4Gi +--- +apiVersion: v1 +kind: Service +metadata: + name: web-server-service +spec: + ports: + - port: 80 + protocol: TCP + targetPort: 3000 + selector: + app: web-server + type: ClusterIP diff --git a/k8s/optional/background-scaling/background-indexer-scheduler.yaml b/k8s/optional/background-scaling/background-indexer-scheduler.yaml new file mode 100644 index 00000000000..6be92c2b148 --- /dev/null +++ b/k8s/optional/background-scaling/background-indexer-scheduler.yaml @@ -0,0 +1,59 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: background-indexer-scheduler-deployment +spec: + strategy: + type: Recreate + selector: + matchLabels: + app: background-indexer-scheduler + template: + metadata: + labels: + app: background-indexer-scheduler + spec: + containers: + - name: indexer-scheduler + image: danswer-backend + imagePullPolicy: IfNotPresent + command: + - python + - danswer/background/update.py + env: + - name: DASK_SCHEDULER_ADDRESS + value: tcp://dask-scheduler-service:8786 + - name: CURRENT_PROCESS_IS_AN_INDEXING_JOB + value: 'true' + - name: POSTGRES_USER + valueFrom: + secretKeyRef: + key: postgres_user + name: danswer-secrets + - name: POSTGRES_PASSWORD + valueFrom: + secretKeyRef: + key: postgres_password + name: danswer-secrets + envFrom: + - configMapRef: + name: env-configmap + - secretRef: + name: danswer-secrets + # No PVCs — the scheduler only polls Postgres + submits Dask futures; + # the file store / dynamic config are Postgres-backed (see README). + # + # Sized for the FIXED scheduler. The old ~7.5Gi/cycle spikes were a + # query bug, not real work: get_last_attempt() ran an unbounded + # ORDER BY (no LIMIT) once per cc-pair, materializing each cc-pair's + # FULL index_attempt history just to take the newest. Fixed with + # LIMIT 1 in db/index_attempt.py. Verified on the fixed image + # (vha-140): RSS sits FLAT at ~430Mi across update cycles, no spike. + # 512Mi request / 2Gi limit = comfortable headroom over that. + resources: + requests: + cpu: 200m + memory: 512Mi + limits: + cpu: 500m + memory: 2Gi diff --git a/k8s/optional/background-scaling/background-lite.yaml b/k8s/optional/background-scaling/background-lite.yaml new file mode 100644 index 00000000000..4af4927cca5 --- /dev/null +++ b/k8s/optional/background-scaling/background-lite.yaml @@ -0,0 +1,159 @@ +# background-lite — the low-traffic singletons that don't scale with +# indexing load, co-located in one pod as three containers: +# - celery-worker maintenance tasks (prune/sync/deletion/analytics); +# autoscales 3-10 threads inside this one container +# - celery-beat periodic-task scheduler (singleton) +# - slack-listener Slack Socket Mode websocket (singleton) +# +# This replaces three separate deployments (background-celery, +# background-beat, slack-listener) — none of them benefits from being its +# own Deployment for the indexing-scaling goal, and collapsing them trims +# the pod count and per-deployment resource reservations. +# +# Indexing is deliberately NOT here — that's the Dask path +# (background-indexer-scheduler → dask-scheduler → dask-worker), which is +# the part that actually scales. +# +# SINGLETON: replicas managed in kustomization.yaml at count: 1. The pod +# contains celery-beat (dup beats double-fire periodic tasks) and the +# Slack websocket listener (dup double-processes events), so this pod +# must never run more than one replica. Each container restarts +# independently if it crashes; the pod is rescheduled as a unit. +# +# Env-neutral like the rest of the component (logical danswer-backend +# image; config/secrets via env-configmap + danswer-secrets). The Slack +# tokens reach the listener through the envFrom secretRef. +apiVersion: apps/v1 +kind: Deployment +metadata: + name: background-lite-deployment +spec: + # MUST stay 1 — see the singleton note above. Set in the component + # kustomization replicas: block too; kept here as the manifest default. + replicas: 1 + strategy: + # Recreate (not RollingUpdate): celery-beat must never overlap with a + # second beat during a rollout, or periodic tasks double-fire. + type: Recreate + selector: + matchLabels: + app: background-lite + template: + metadata: + labels: + app: background-lite + spec: + containers: + # --- celery worker (maintenance tasks) --- + - name: celery-worker + image: danswer-backend + imagePullPolicy: IfNotPresent + command: + - celery + - -A + - danswer.background.celery.celery_run:celery_app + - worker + - --pool=threads + # Fixed thread count, NOT --autoscale. Autoscale calls pool.grow()/shrink() + # which the threads TaskPool doesn't implement -> on the first task burst + # the worker dies with "AttributeError: 'TaskPool' object has no attribute + # 'grow'" and CrashLoopBackOffs. Autoscale is prefork-only. These + # maintenance tasks are I/O-bound, so a fixed pool of threads is fine. + - --concurrency=10 + - --loglevel=INFO + env: + - name: POSTGRES_USER + valueFrom: + secretKeyRef: + key: postgres_user + name: danswer-secrets + - name: POSTGRES_PASSWORD + valueFrom: + secretKeyRef: + key: postgres_password + name: danswer-secrets + envFrom: + - configMapRef: + name: env-configmap + - secretRef: + name: danswer-secrets + # No PVCs: the file store (File connector uploads) and dynamic config + # are Postgres-backed in this fork (PGFileStore lobjs + + # PostgresBackedDynamicConfigStore). Nothing here reads /home/storage + # or /home/file_connector_storage. See README "Apply an optional + # component" for why the split deployments mount no volumes. + resources: + requests: + cpu: 200m + memory: 512Mi + limits: + cpu: '1' + memory: 2Gi + # --- celery beat (periodic scheduler) --- + - name: celery-beat + image: danswer-backend + imagePullPolicy: IfNotPresent + command: + - celery + - -A + - danswer.background.celery.celery_run:celery_app + - beat + - --loglevel=INFO + env: + - name: POSTGRES_USER + valueFrom: + secretKeyRef: + key: postgres_user + name: danswer-secrets + - name: POSTGRES_PASSWORD + valueFrom: + secretKeyRef: + key: postgres_password + name: danswer-secrets + envFrom: + - configMapRef: + name: env-configmap + - secretRef: + name: danswer-secrets + # beat is light at runtime, but `celery -A ... beat` still imports the + # whole danswer app (langchain/llama-index/tokenizers), which alone + # exceeds 256Mi → OOMKilled on startup. Stable at a 1Gi limit; request + # doubled (256Mi→512Mi) for a guaranteed floor and limit lifted to 2Gi + # for headroom. + resources: + requests: + cpu: 50m + memory: 512Mi + limits: + cpu: 200m + memory: 2Gi + # --- slack bot listener --- + - name: slack-listener + image: danswer-backend + imagePullPolicy: IfNotPresent + command: + - python + - danswer/danswerbot/slack/listener.py + env: + - name: POSTGRES_USER + valueFrom: + secretKeyRef: + key: postgres_user + name: danswer-secrets + - name: POSTGRES_PASSWORD + valueFrom: + secretKeyRef: + key: postgres_password + name: danswer-secrets + envFrom: + - configMapRef: + name: env-configmap + - secretRef: + name: danswer-secrets + resources: + requests: + cpu: 100m + memory: 512Mi + limits: + cpu: 500m + memory: 1Gi diff --git a/k8s/optional/background-scaling/dask-scheduler.yaml b/k8s/optional/background-scaling/dask-scheduler.yaml new file mode 100644 index 00000000000..7358402359d --- /dev/null +++ b/k8s/optional/background-scaling/dask-scheduler.yaml @@ -0,0 +1,70 @@ +apiVersion: v1 +kind: Service +metadata: + name: dask-scheduler-service +spec: + selector: + app: dask-scheduler + ports: + - name: rpc + protocol: TCP + port: 8786 + targetPort: 8786 + - name: dashboard + protocol: TCP + port: 8787 + targetPort: 8787 + type: ClusterIP +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: dask-scheduler-deployment +spec: + strategy: + type: Recreate + selector: + matchLabels: + app: dask-scheduler + template: + metadata: + labels: + app: dask-scheduler + spec: + containers: + - name: scheduler + image: danswer-backend + imagePullPolicy: IfNotPresent + command: + - dask + - scheduler + - --host=0.0.0.0 + - --port=8786 + - --dashboard-address=:8787 + ports: + - name: rpc + containerPort: 8786 + - name: dashboard + containerPort: 8787 + envFrom: + - configMapRef: + name: env-configmap + - secretRef: + name: danswer-secrets + resources: + requests: + cpu: 100m + memory: 256Mi + limits: + cpu: 500m + memory: 512Mi + readinessProbe: + tcpSocket: + port: 8786 + initialDelaySeconds: 5 + periodSeconds: 10 + livenessProbe: + tcpSocket: + port: 8786 + initialDelaySeconds: 15 + periodSeconds: 20 diff --git a/k8s/optional/background-scaling/dask-worker.yaml b/k8s/optional/background-scaling/dask-worker.yaml new file mode 100644 index 00000000000..27ed4553533 --- /dev/null +++ b/k8s/optional/background-scaling/dask-worker.yaml @@ -0,0 +1,73 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: dask-worker-deployment +spec: + selector: + matchLabels: + app: dask-worker + template: + metadata: + labels: + app: dask-worker + spec: + containers: + - name: worker + image: danswer-backend + imagePullPolicy: IfNotPresent + # Wait for the Dask scheduler to accept connections before starting the + # worker, then exec it. A worker that boots before the scheduler is + # ready fails to register its Nanny and exits 1 -> CrashLoopBackOff + # until pod ordering happens to work out. + # + # This retry loop is environment-agnostic — plain TCP connect, no + # dependency on any mesh or platform feature; it behaves the same with + # or without istio. It lives in the MAIN container (not an + # initContainer) for exactly that portability: under istio, + # initContainers run before the sidecar proxy, so their traffic to a + # mesh Service is blackholed until envoy is up — an init-based wait + # would hang there. The main-container loop also naturally waits out + # envoy when istio IS present. `exec` hands PID to the worker so + # signals/termination propagate normally. + command: + - /bin/sh + - -c + - | + until python -c "import socket; socket.create_connection(('dask-scheduler-service', 8786), timeout=3)" 2>/dev/null; do + echo "waiting for dask-scheduler-service:8786..." + sleep 2 + done + echo "dask-scheduler reachable; starting worker" + exec dask worker tcp://dask-scheduler-service:8786 --nworkers=1 --nthreads=1 --memory-limit=4GB + env: + - name: PYTHONPATH + value: /app + - name: CURRENT_PROCESS_IS_AN_INDEXING_JOB + value: 'true' + - name: POSTGRES_USER + valueFrom: + secretKeyRef: + key: postgres_user + name: danswer-secrets + - name: POSTGRES_PASSWORD + valueFrom: + secretKeyRef: + key: postgres_password + name: danswer-secrets + envFrom: + - configMapRef: + name: env-configmap + - secretRef: + name: danswer-secrets + # No PVCs — indexing reads File-connector uploads from PGFileStore + # (Postgres large objects), not /home/file_connector_storage; the + # dynamic config store is Postgres-backed too. Connector temp files + # (if any) go to the container's /tmp. This also keeps each worker + # off the RWO dynamic-pvc, which can't multi-attach. See README. + resources: + requests: + cpu: "1" + memory: 4Gi + limits: + cpu: '2' + memory: 8Gi diff --git a/k8s/optional/background-scaling/kustomization.yaml b/k8s/optional/background-scaling/kustomization.yaml new file mode 100644 index 00000000000..a09f2e26d27 --- /dev/null +++ b/k8s/optional/background-scaling/kustomization.yaml @@ -0,0 +1,50 @@ +# Kustomize Component — split-background + remote-Dask indexing topology. +# +# Replaces the single combined `background` deployment (in base) with FOUR +# deployments: +# - background-lite beat + celery + slack-listener, co-located +# (3 containers, 1 pod) — the low-traffic +# singletons that don't scale with indexing +# - background-indexer-scheduler the update.py polling loop, in remote-Dask mode +# - dask-scheduler Dask scheduler Service + Deployment +# - dask-worker Dask worker pods (scale these for indexing throughput) +# +# Parameterization mirrors base: +# - Image refs use the logical name `danswer-backend`; the including +# overlay's `images:` block rewrites them (no images: needed here). +# - Secrets/config load identically to base: explicit POSTGRES_USER / +# POSTGRES_PASSWORD via secretKeyRef, plus +# `envFrom: [configMapRef env-configmap, secretRef danswer-secrets]`. +# - Replica counts live in the `replicas:` block below (one place). +# dask-worker is the knob you turn for more indexing throughput. +# - Manifests are environment-neutral (NO node affinity / tolerations). +# Darwin's indexcpu-pool scheduling is added by the prod overlay when +# it opts in (see k8s/README.md → "Apply an optional component"). +# +# Only meaningful when pulled into an overlay via its `components:` field: +# +# # k8s/overlays/prod/kustomization.yaml +# components: +# - ../../optional/background-scaling +# +# When you opt in, also set base `background-deployment` to count: 0 in the +# overlay's replicas: block so you don't run two Celery beat schedulers on +# the same broker. +apiVersion: kustomize.config.k8s.io/v1alpha1 +kind: Component + +resources: + - background-lite.yaml + - background-indexer-scheduler.yaml + - dask-scheduler.yaml + - dask-worker.yaml + +replicas: + - name: background-lite-deployment + count: 1 # singleton — beat + slack websocket; never >1 + - name: background-indexer-scheduler-deployment + count: 1 # singleton — the update.py polling loop + - name: dask-scheduler-deployment + count: 1 # singleton bookkeeping process + - name: dask-worker-deployment + count: 2 # max concurrent indexing jobs (1 connector per pod); raise if attempts backlog diff --git a/k8s/optional/keda-indexing-autoscale/kustomization.yaml b/k8s/optional/keda-indexing-autoscale/kustomization.yaml new file mode 100644 index 00000000000..9efad85be4a --- /dev/null +++ b/k8s/optional/keda-indexing-autoscale/kustomization.yaml @@ -0,0 +1,19 @@ +# Kustomize Component — KEDA autoscaling for the Dask indexing workers. +# +# Opt in from an overlay (after the background-scaling component, since it +# scales that component's dask-worker-deployment): +# +# # k8s/overlays/prod/kustomization.yaml +# components: +# - ../../optional/background-scaling +# - ../../optional/keda-indexing-autoscale +# +# AND remove dask-worker-deployment from the background-scaling component's +# replicas: block — KEDA owns that count once this is active. +# +# Requires the KEDA operator installed cluster-wide (the keda.sh CRDs). +apiVersion: kustomize.config.k8s.io/v1alpha1 +kind: Component + +resources: + - scaledobject.yaml diff --git a/k8s/optional/keda-indexing-autoscale/scaledobject.yaml b/k8s/optional/keda-indexing-autoscale/scaledobject.yaml new file mode 100644 index 00000000000..04cf0ef37c5 --- /dev/null +++ b/k8s/optional/keda-indexing-autoscale/scaledobject.yaml @@ -0,0 +1,86 @@ +# KEDA autoscaler for the Dask indexing workers. +# +# Scales dask-worker-deployment 0..N based on how much indexing work is +# actually runnable right now, read straight from Postgres. +# +# PREREQUISITES (see README "KEDA indexing autoscale"): +# 1. KEDA operator installed cluster-wide. +# 2. The background-scaling component is opted in (dask-worker exists). +# 3. dask-worker is NOT also pinned by a static replicas: entry — KEDA +# owns its replica count (remove it from the background-scaling +# kustomization replicas: block, or they fight). +# +# Password comes from danswer-secrets via TriggerAuthentication; the +# host/user/db below are the Darwin prod Postgres coordinates and must +# match the overlay's POSTGRES_* (this is a prod-scaling feature). +--- +apiVersion: keda.sh/v1alpha1 +kind: TriggerAuthentication +metadata: + name: keda-indexing-pg-auth +spec: + secretTargetRef: + - parameter: password + name: danswer-secrets + key: postgres_password +--- +apiVersion: keda.sh/v1alpha1 +kind: ScaledObject +metadata: + name: dask-worker-indexing +spec: + scaleTargetRef: + name: dask-worker-deployment + # Scale to zero when nothing is indexing — saves the idle 4Gi/worker. + # Cold-start cost on the first index after idle is ~30s-2min (pod start + # + Dask worker connect); fine for background indexing. Set to 1 if you + # want a worker always warm. + minReplicaCount: 0 + # Ceiling on burst. Useful concurrency is bounded by + # (distinct active source types × INDEXING_PER_SOURCE_CAP), so there's + # no point setting this far above your number of source types. + maxReplicaCount: 4 + pollingInterval: 60 # how often the query runs (seconds) + cooldownPeriod: 300 # wait 5m after metric hits 0 before scaling to 0 + advanced: + horizontalPodAutoscalerConfig: + behavior: + scaleDown: + # Damp scale-down so finishing jobs don't cause thrash. + stabilizationWindowSeconds: 300 + triggers: + - type: postgresql + authenticationRef: + name: keda-indexing-pg-auth + metadata: + host: darwin-postgres.postgres.database.azure.com + port: "5432" + userName: postgres + dbName: postgres + sslmode: require + # Desired replicas = ceil(query_result / targetQueryValue). + targetQueryValue: "1" + # The metric = number of indexing attempts that can run CONCURRENTLY + # right now, respecting PER_SOURCE_CAP (default 1 → one per source). + # NOT a raw pending count: 10 queued attempts of the same source + # still only run 1 at a time, so we must not spin up 10 workers. + # + # Includes IN_PROGRESS on purpose: that keeps replicas >= running + # jobs, so KEDA never scales a busy worker away — scale-to-0 only + # happens when there is genuinely no work. + # + # Status is stored UPPERCASE (Enum(..., native_enum=False) stores + # the member NAME, not its value) — verified against the live DB. + # IndexAttempt links directly to connector (this fork has + # connector_id on index_attempt, not connector_credential_pair_id). + # + # If you raise INDEXING_PER_SOURCE_CAP above 1, change LEAST(1,...) + # to LEAST(,...) to match. + query: >- + SELECT COALESCE(SUM(LEAST(1, cnt)), 0) FROM ( + SELECT con.source, COUNT(*) AS cnt + FROM index_attempt ia + JOIN connector con ON ia.connector_id = con.id + WHERE ia.status IN ('NOT_STARTED', 'IN_PROGRESS') + GROUP BY con.source + ) s diff --git a/k8s/optional/keda/kustomization.yaml b/k8s/optional/keda/kustomization.yaml new file mode 100644 index 00000000000..d939623dfbd --- /dev/null +++ b/k8s/optional/keda/kustomization.yaml @@ -0,0 +1,30 @@ +# KEDA operator install — cluster-scoped infrastructure (CRDs + RBAC + +# the operator/metrics-apiserver Deployments). Installed ONCE per cluster, +# independent of the danswer overlays — this is NOT a kustomize Component +# pulled into prod/local; apply it on its own. +# +# No Helm (per repo preference). We just reference KEDA's official release +# bundle, PINNED to an exact version. GitHub release assets are immutable, +# so the URL is effectively a content pin — never use a moving ref +# (same lesson as the Vespa :latest incident; see AGENTS.md §10). +# +# KEDA's bundle creates and installs into its OWN namespace `keda` +# (the resources carry `namespace: keda` internally) — do NOT add a +# `namespace:` here, that would try to re-namespace the cluster-scoped +# CRDs and break the install. +# +# Install / upgrade: +# kubectl apply --server-side -k k8s/optional/keda +# # --server-side: KEDA's CRDs are large and exceed the client-side +# # last-applied-configuration annotation size limit. +# +# Verify: +# kubectl get pods -n keda +# kubectl get crd | grep keda.sh # scaledobjects, triggerauthentications, ... +# +# To bump KEDA: change the version in the URL below, re-apply --server-side. +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization + +resources: + - https://github.com/kedacore/keda/releases/download/v2.14.0/keda-2.14.0.yaml diff --git a/k8s/overlays/local-vespa/kustomization.yaml b/k8s/overlays/local-vespa/kustomization.yaml new file mode 100644 index 00000000000..f75ee49031a --- /dev/null +++ b/k8s/overlays/local-vespa/kustomization.yaml @@ -0,0 +1,36 @@ +# Vespa apply target for LOCAL dev — deliberately SEPARATE from the app +# overlay (Vespa was removed from base/, so `kubectl apply -k +# k8s/overlays/local` no longer touches it). Apply Vespa with: +# +# kubectl apply -k k8s/overlays/local-vespa # context: local cluster +# +# Vespa is resource-heavy; skip this on machines where you point at a remote +# Vespa or don't need search locally. +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization + +resources: + - ../../base/vespa + +namespace: default + +# Pinned, same as prod — never :latest for Vespa (see prod-vespa overlay note). +images: + - name: vespa-configserver + newName: vespaengine/vespa + newTag: "8.600.35" + - name: vespa-admin + newName: vespaengine/vespa + newTag: "8.600.35" + - name: vespa-content + newName: vespaengine/vespa + newTag: "8.600.35" + - name: vespa-feed + newName: vespaengine/vespa + newTag: "8.600.35" + - name: vespa-query + newName: vespaengine/vespa + newTag: "8.600.35" + - name: busybox + newName: busybox + newTag: "1.36.1" diff --git a/k8s/overlays/local/env.properties b/k8s/overlays/local/env.properties new file mode 100644 index 00000000000..b75f1d918ed --- /dev/null +++ b/k8s/overlays/local/env.properties @@ -0,0 +1,142 @@ +# Local-dev env-configmap values. +# +# Assumes Postgres + Vespa are reachable at `host.docker.internal` (so you +# can run them via docker-compose alongside this k8s cluster on Rancher +# Desktop / Docker Desktop). If you run them in-cluster instead, swap the +# *_HOST values. + +# --- Auth: disabled for local dev --- +AUTH_TYPE=disabled +SESSION_EXPIRE_TIME_SECONDS=86400 +OPENID_CONFIG_URL= +DEFAULT_ADMIN_EMAILS= +VALID_EMAIL_DOMAINS= +APPLY_MIGRATIONS=true + +# --- Domain: localhost --- +DOMAIN=localhost +WEB_DOMAIN=http://localhost:3000 +INTERNAL_URL=http://api-server-service:80 + +# --- External services reachable on the host --- +POSTGRES_HOST=host.docker.internal +VESPA_HOST=host.docker.internal +VESPA_CONFIG_SERVER_HOST=host.docker.internal +VESPA_FEED_HOST=host.docker.internal +VESPA_PORT=8080 +VESPA_FEED_PORT=8080 + +# --- Model servers (in-cluster) --- +MODEL_SERVER_HOST=inference-model-server-service +INDEXING_MODEL_SERVER_HOST=indexing-model-server-service +MODEL_SERVER_PORT= +MIN_THREADS_ML_MODELS= +DOCUMENT_ENCODER_MODEL= +NORMALIZE_EMBEDDINGS= +ASYM_QUERY_PREFIX= +ASYM_PASSAGE_PREFIX= +ENABLE_RERANKING_REAL_TIME_FLOW= +ENABLE_RERANKING_ASYNC_FLOW= + +# --- LLM --- +GEN_AI_MODEL_PROVIDER=custom +GEN_AI_VENDOR=openai +GEN_AI_MODEL_NAME=gpt-4o-2024-11-20 +GEN_AI_MODEL_VERSION= +FAST_GEN_AI_MODEL_VERSION= +GEN_AI_API_VERSION= +GEN_AI_LLM_PROVIDER_TYPE= +GEN_AI_MAX_TOKENS= +GEN_AI_ACCOUNT_ID= +GEN_AI_TENANT_ID= +GEN_AI_API_ENDPOINT= +GEN_AI_IDENTITY_ENDPOINT= + +# --- Query options --- +QA_TIMEOUT=60 +MAX_CHUNKS_FED_TO_CHAT= +DISABLE_LLM_FILTER_EXTRACTION=true +DISABLE_LLM_CHUNK_FILTER=true +DISABLE_LLM_CHOOSE_SEARCH=true +DISABLE_LLM_QUERY_REPHRASE=false +DOC_TIME_DECAY= +HYBRID_ALPHA=0.8 +EDIT_KEYWORD_QUERY= +MULTILINGUAL_QUERY_EXPANSION= +QA_PROMPT_OVERRIDE= +LANGUAGE_HINT= +DISABLE_GENERATIVE_AI= + +# --- Indexing (smaller for local) --- +NUM_INDEXING_WORKERS=1 +ENABLED_CONNECTOR_TYPES= +DISABLE_INDEX_UPDATE_ON_SWAP= +DASK_JOB_CLIENT_ENABLED=false +CONTINUE_ON_CONNECTOR_FAILURE=true +EXPERIMENTAL_CHECKPOINTING_ENABLED= +CONFLUENCE_CONNECTOR_LABELS_TO_SKIP= +JIRA_API_VERSION= +JIRA_SERVER_URL= +WEB_CONNECTOR_VALIDATE_URLS= +GONG_CONNECTOR_START_TIME= +NOTION_CONNECTOR_ENABLE_RECURSIVE_PAGE_LOOKUP= + +# --- Slack bot disabled locally --- +DANSWER_BOT_DISABLE_DOCS_ONLY_ANSWER= +DANSWER_BOT_DISPLAY_ERROR_MSGS= +DANSWER_BOT_RESPOND_EVERY_CHANNEL= +DANSWER_BOT_DISABLE_COT= +NOTIFY_SLACKBOT_NO_ANSWER= + +# --- SMTP unused locally --- +SMTP_SERVER= +SMTP_PORT= +SMTP_USER= +EMAIL_FROM= + +# --- Telemetry / logging (more verbose for local debugging) --- +DISABLE_TELEMETRY=true +LOG_LEVEL=debug +LOG_ALL_MODEL_INTERACTIONS=true +LOG_VESPA_TIMING_INFORMATION=true + +# --- Redis --- +# Points at the in-cluster Redis StatefulSet deployed by this overlay +# (see kustomization.yaml resources: ../../optional/redis.yaml). +REDIS_HOST=redis +REDIS_PORT=6379 +REDIS_DB_NUMBER=0 +REDIS_SSL= +REDIS_KV_CACHE_ENABLED= +REDIS_KV_CACHE_TTL_SECONDS=86400 +REQUEST_RATE_LIMIT_ENABLED= +REQUEST_RATE_LIMIT_PER_MINUTE= +REQUEST_RATE_LIMIT_PER_HOUR= +PERSONA_CACHE_ENABLED= +PERSONA_CACHE_TTL_SECONDS=86400 +# Chat-page connector indexing-status cache (see prod env.properties for the +# rationale). Empty = off; set to true to enable the short global TTL cache. +CC_PAIR_INFO_CACHE_ENABLED= +CC_PAIR_INFO_CACHE_TTL_SECONDS=60 +# Per-user document-set list cache (see prod env.properties). Empty = off. +DOCUMENT_SET_CACHE_ENABLED= +DOCUMENT_SET_CACHE_TTL_SECONDS=300 +# Celery broker on Redis (logical DB 1). Empty = off → falls back to the +# Postgres broker, which is fine for local. Set to true to mirror prod. +CELERY_BROKER_REDIS_ENABLED= +CELERY_REDIS_DB_NUMBER=1 + +# --- Postgres connection pool (per process; empty = code defaults 40+10) --- +POSTGRES_POOL_SIZE= +POSTGRES_POOL_OVERFLOW= + +# Prune indexing-run history: terminal attempts older than 30d, always keeping +# the last 20 per cc-pair. Empty = code default (0 = off). Mirrors prod. +RETENTION_DAYS_INDEX_ATTEMPT=30 + +# --- File store (default Postgres; see prod env.properties to use Azure Blob) --- +FILE_STORE_TYPE=PostgresBackedFileStore +AZURE_BLOB_CONTAINER=danswer-files +# Chat upload limits (see prod env.properties for rationale). +CHAT_FILE_MAX_SIZE_MB=25 +CHAT_FILE_MAX_TOKEN_FRACTION=0.5 diff --git a/k8s/overlays/local/kustomization.yaml b/k8s/overlays/local/kustomization.yaml new file mode 100644 index 00000000000..770c2c341e6 --- /dev/null +++ b/k8s/overlays/local/kustomization.yaml @@ -0,0 +1,60 @@ +# Local-dev overlay → Rancher Desktop / Docker Desktop / kind / any local cluster. +# +# Apply: kubectl apply -k k8s/overlays/local +# Preview: kubectl kustomize k8s/overlays/local +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization + +resources: + - ../../base + +namespace: default + +# Local-dev image refs. `latest` is fine here — for prod we pin (see prod +# overlay). +images: + - name: danswer-backend + newName: danswer/danswer-backend + newTag: latest + - name: danswer-web-server + newName: danswer/danswer-web-server + newTag: latest + - name: danswer-model-server + newName: danswer/danswer-model-server + newTag: v0.3.94 + - name: nginx + newName: nginx + newTag: 1.23.4-alpine + # Vespa images live in the separate Vespa overlay (k8s/overlays/local-vespa) + # now that Vespa is applied independently of the app. + +replicas: + - name: api-server-deployment + count: 1 + - name: background-deployment + count: 1 + - name: web-server-deployment + count: 1 + - name: nginx-deployment + count: 1 + - name: inference-model-server-deployment + count: 1 + +configMapGenerator: + - name: env-configmap + behavior: create + envs: + - env.properties + +secretGenerator: + - name: danswer-secrets + envs: + - secrets.env + +generatorOptions: + disableNameSuffixHash: true + +# No affinity/toleration patches needed — the live-cluster dump that +# seeded base/ doesn't use indexcpu-pool affinity. If you re-introduce +# Darwin-specific scheduling in base/ later, add JSON patches here to +# strip them for local. diff --git a/k8s/overlays/local/secrets.env.example b/k8s/overlays/local/secrets.env.example new file mode 100644 index 00000000000..feebcca690c --- /dev/null +++ b/k8s/overlays/local/secrets.env.example @@ -0,0 +1,44 @@ +# Template for the local secrets.env file. Copy to secrets.env and fill in +# whatever you actually need for local dev. secrets.env is gitignored. +# +# Most local-dev runs can leave everything blank — AUTH_TYPE=disabled means +# nothing checks the OAuth secrets, and external integrations (Slack, Jira, +# Opsgenie) just no-op when their tokens are empty. + +# --- Database --- +postgres_user=postgres +postgres_password=password + +# --- OIDC / Entra (unused when AUTH_TYPE=disabled) --- +oauth_client_id= +oauth_client_secret= +user_auth_secret=local-dev-secret-not-for-production + +# --- Google OAuth (legacy / unused locally) --- +google_oauth_client_id= +google_oauth_client_secret= + +# --- Redis (unauth'd locally) --- +redis_password= + +# --- Encryption key (generate one if you'll test encrypted connector creds) --- +ENCRYPTION_KEY_SECRET= + +# --- Slack bot (leave empty unless testing the bot) --- +DANSWER_BOT_SLACK_APP_TOKEN= +DANSWER_BOT_SLACK_BOT_TOKEN= + +# --- LLM credentials --- +GEN_AI_API_KEY= +GEN_AI_CLIENT_ID= +GEN_AI_CLIENT_SECRET= + +# --- Jira (leave empty unless testing the connector) --- +JIRA_API_TOKEN= +JIRA_EMAIL= + +# --- Opsgenie --- +OPSGENIE_API_KEY= + +# --- SMTP --- +SMTP_PASS= diff --git a/k8s/overlays/prod-vespa/kustomization.yaml b/k8s/overlays/prod-vespa/kustomization.yaml new file mode 100644 index 00000000000..39602a483f7 --- /dev/null +++ b/k8s/overlays/prod-vespa/kustomization.yaml @@ -0,0 +1,48 @@ +# Vespa apply target for PROD — deliberately SEPARATE from the app overlay. +# +# `kubectl apply -k k8s/overlays/prod` no longer touches Vespa (Vespa was +# removed from base/). Apply Vespa only when you actually intend to, with: +# +# kubectl apply -k k8s/overlays/prod-vespa # context: darwin +# +# Version upgrades do NOT go through a blanket apply — use the ordered, +# health-gated k8s/scripts/vespa-upgrade.sh, then sync the tags below so git +# ≈ cluster. +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization + +resources: + - ../../base/vespa + +namespace: darwin + +# All roles pinned to the SAME version the cluster actually runs. +# NEVER use :latest — it pulled 8.696.20, a >30-release jump from 8.600.35, +# which Vespa's config server refuses (incompatible-upgrade guard) → +# cluster-wide outage. 8.600.35 is the deployed on-disk version (the content +# nodes' index is in this format). +# +# Do NOT hand-edit these tags to a higher version and `kubectl apply` — that +# bumps all roles at once with no ordering or version-stepping. Upgrades go +# through k8s/scripts/vespa-upgrade.sh (ordered, health-gated, ≤30 releases +# per hop); afterwards, sync these tags to match live. +images: + - name: vespa-configserver + newName: vespaengine/vespa + newTag: "8.600.35" + - name: vespa-admin + newName: vespaengine/vespa + newTag: "8.600.35" + - name: vespa-content + newName: vespaengine/vespa + newTag: "8.600.35" + - name: vespa-feed + newName: vespaengine/vespa + newTag: "8.600.35" + - name: vespa-query + newName: vespaengine/vespa + newTag: "8.600.35" + - name: busybox + # Pinned (was :latest). Only an init chown helper, but same drift hygiene. + newName: busybox + newTag: "1.36.1" diff --git a/k8s/overlays/prod/env.properties b/k8s/overlays/prod/env.properties new file mode 100644 index 00000000000..331889a5472 --- /dev/null +++ b/k8s/overlays/prod/env.properties @@ -0,0 +1,179 @@ +# Non-secret env-configmap values for the Darwin production cluster. +# Real secrets (Slack tokens, API tokens, OAuth client secret, etc.) live in +# secrets.env (gitignored — see secrets.env.example for the template). + +# --- Auth --- +AUTH_TYPE=oidc +SESSION_EXPIRE_TIME_SECONDS=86400 +OPENID_CONFIG_URL=https://login.microsoftonline.com/d8353d2a-b153-4d17-8827-902c51f72357/v2.0/.well-known/openid-configuration +DEFAULT_ADMIN_EMAILS=user1@uipath.com,user2@uipath.com +VALID_EMAIL_DOMAINS= +APPLY_MIGRATIONS=true + +# --- Domain --- +DOMAIN=darwin.westeurope.cloudapp.azure.com +WEB_DOMAIN=https://darwin.westeurope.cloudapp.azure.com +INTERNAL_URL=http://api-server-service:80 + +# --- Database --- +POSTGRES_HOST=darwin-postgres.postgres.database.azure.com +# SQLAlchemy pool sizing PER PROCESS. Max DB connections one pod can hold = +# (size + overflow) per engine; api-server uses BOTH a sync and an async +# engine, so a single api-server pod can hold up to 2×(size+overflow). +# Cluster total = that × replicas of every DB-touching pod (api-server + +# background), and must stay under Azure Postgres max_connections (SKU- +# dependent) with headroom for migrations on boot. These match the previous +# hardcoded 40+10 — LOWER them as you add api-server replicas (e.g. 20+10 at +# a few replicas) so replicas × pool stays under the cap. +POSTGRES_POOL_SIZE=40 +POSTGRES_POOL_OVERFLOW=10 +# Chat data retention: delete chat_session/chat_message older than this many +# days (daily retention sweep, 08:00 UTC). Analytics survive the purge — +# they're pre-aggregated into analytics_daily_rollup + analytics_user_first_seen +# at 07:30 UTC, before the sweep. The rollup recompute window auto-caps just +# under this value so it never re-reads deleted days. +RETENTION_DAYS_CHAT=90 + +# Indexing-run history. Default is OFF (kept forever). The index_attempt table +# had grown to ~518k rows; the scheduler reads the latest attempt per cc-pair +# every loop, so unbounded history is real cost. This prunes TERMINAL attempts +# older than 30d while always keeping the last 20 per (connector, credential, +# embedding model) — so every connector retains recent debug history. Runs in +# the same 08:00 UTC sweep. Tune keep-N via RETENTION_KEEP_LAST_N_INDEX_ATTEMPTS +# (default 20). +RETENTION_DAYS_INDEX_ATTEMPT=30 + +# File-store backend. Default keeps bytes in Postgres (large objects). To +# offload bytes to Azure Blob (keeps the DB lean; stops file reads holding a +# PG connection): set FILE_STORE_TYPE=AzureBlobFileStore, put +# AZURE_BLOB_CONNECTION_STRING in secrets.env, then run +# scripts/migrate_file_store_to_azure_blob.py once. Until then this is inert. +#FILE_STORE_TYPE=PostgresBackedFileStore +FILE_STORE_TYPE=AzureBlobFileStore +AZURE_BLOB_CONTAINER=danswer-files +# Chat upload limits: a chat-attached doc is stuffed whole into the prompt, so +# it must fit the model context window. Byte cap (all types) + a token cap = +# fraction of the model's max input tokens (rejects docs that would overflow). +CHAT_FILE_MAX_SIZE_MB=25 +CHAT_FILE_MAX_TOKEN_FRACTION=0.5 + +# --- Vespa search index --- +VESPA_HOST=vespa-query +VESPA_CONFIG_SERVER_HOST=vespa-configserver +VESPA_FEED_HOST=vespa-feed +VESPA_PORT=8080 +VESPA_FEED_PORT=8080 + +# --- Model servers --- +MODEL_SERVER_HOST=inference-model-server-service +INDEXING_MODEL_SERVER_HOST=indexing-model-server-service +MODEL_SERVER_PORT= +MIN_THREADS_ML_MODELS= +DOCUMENT_ENCODER_MODEL= +NORMALIZE_EMBEDDINGS= +ASYM_QUERY_PREFIX= +ASYM_PASSAGE_PREFIX= +ENABLE_RERANKING_REAL_TIME_FLOW= +ENABLE_RERANKING_ASYNC_FLOW= + +# --- LLM --- +GEN_AI_MODEL_PROVIDER=custom +GEN_AI_VENDOR=openai +GEN_AI_MODEL_NAME=gpt-4o-2024-11-20 +GEN_AI_MODEL_VERSION= +FAST_GEN_AI_MODEL_VERSION= +GEN_AI_API_VERSION= +GEN_AI_LLM_PROVIDER_TYPE= +GEN_AI_MAX_TOKENS= +GEN_AI_ACCOUNT_ID=bc2ddac5-57bc-40e6-93fe-3b319b60ce36 +GEN_AI_TENANT_ID=e367ca54-053b-4b86-89a2-6b9e89e85e7a +GEN_AI_API_ENDPOINT=https://alpha.uipath.com/bc2ddac5-57bc-40e6-93fe-3b319b60ce36/e367ca54-053b-4b86-89a2-6b9e89e85e7a/llmgateway_/api/raw/vendor/openai/model/gpt-4.1-mini-2025-04-14/completions +GEN_AI_IDENTITY_ENDPOINT=https://alpha.uipath.com/identity_/connect/token + +# --- Query options --- +QA_TIMEOUT=60 +MAX_CHUNKS_FED_TO_CHAT= +DISABLE_LLM_FILTER_EXTRACTION=true +DISABLE_LLM_CHUNK_FILTER=true +DISABLE_LLM_CHOOSE_SEARCH=true +DISABLE_LLM_QUERY_REPHRASE=false +DOC_TIME_DECAY= +HYBRID_ALPHA=0.8 +EDIT_KEYWORD_QUERY= +# Disabled: the expansion path uses a hardcoded 5s LLM timeout +# (query_expansion.py) which gpt-4o behind the gateway routinely exceeds, +# causing ReadTimeouts. Re-enable only with a genuinely fast model wired +# to FAST_GEN_AI_MODEL_VERSION. +MULTILINGUAL_QUERY_EXPANSION= +QA_PROMPT_OVERRIDE= +LANGUAGE_HINT=IMPORTANT: Always respond in English regardless of the language of the query or documents. +DISABLE_GENERATIVE_AI= + +# --- Indexing --- +NUM_INDEXING_WORKERS=2 +ENABLED_CONNECTOR_TYPES= +DISABLE_INDEX_UPDATE_ON_SWAP= +DASK_JOB_CLIENT_ENABLED=true +CONTINUE_ON_CONNECTOR_FAILURE=true +EXPERIMENTAL_CHECKPOINTING_ENABLED= +CONFLUENCE_CONNECTOR_LABELS_TO_SKIP= +JIRA_API_VERSION= +JIRA_SERVER_URL=https://uipath.atlassian.net +WEB_CONNECTOR_VALIDATE_URLS= +GONG_CONNECTOR_START_TIME= +NOTION_CONNECTOR_ENABLE_RECURSIVE_PAGE_LOOKUP= + +# --- Slack bot (flags only — tokens are in secrets.env) --- +DANSWER_BOT_DISABLE_DOCS_ONLY_ANSWER= +DANSWER_BOT_DISPLAY_ERROR_MSGS= +DANSWER_BOT_RESPOND_EVERY_CHANNEL= +DANSWER_BOT_DISABLE_COT= +NOTIFY_SLACKBOT_NO_ANSWER= + +# --- SMTP (non-secret bits) --- +SMTP_SERVER= +SMTP_PORT= +SMTP_USER= +EMAIL_FROM= + +# --- Telemetry / logging --- +DISABLE_TELEMETRY=true +LOG_LEVEL=info +LOG_ALL_MODEL_INTERACTIONS=true +LOG_VESPA_TIMING_INFORMATION= + +# --- Redis (cache + rate limiting; from this branch's Redis work) --- +# Prod deploys the in-cluster Redis StatefulSet (see the components: block +# in kustomization.yaml referencing ../../optional/redis). REDIS_HOST is +# the Service name. redis_password in secrets.env is empty by default +# because the in-cluster Redis runs without requirepass; populate the +# secret + add `--requirepass $(REDIS_PASSWORD)` to the StatefulSet args +# if you want auth. +REDIS_HOST=redis +REDIS_PORT=6379 +REDIS_DB_NUMBER=0 +REDIS_SSL= +# Celery brokers + stores results on Redis instead of Postgres — removes +# Celery's queue polling/writes from the DB. Uses logical DB 1 (cache/rate +# limit use REDIS_DB_NUMBER=0), so the two never collide. +CELERY_BROKER_REDIS_ENABLED=true +CELERY_REDIS_DB_NUMBER=1 +REDIS_KV_CACHE_ENABLED=true +REQUEST_RATE_LIMIT_ENABLED=true +REQUEST_RATE_LIMIT_PER_MINUTE=20 +REQUEST_RATE_LIMIT_PER_HOUR=300 +REDIS_KV_CACHE_TTL_SECONDS=86400 +PERSONA_CACHE_ENABLED=true +PERSONA_CACHE_TTL_SECONDS=86400 +# Chat-page connector indexing-status cache. /manage/indexing-status runs a +# per-cc-pair doc-count aggregation (~300ms on the live DB) on every chat +# page load; the result is identical for all users and slow-changing, so a +# short global TTL cache fronts it. Pure TTL (no explicit invalidation) — +# staleness is at most CC_PAIR_INFO_CACHE_TTL_SECONDS and harmless. +CC_PAIR_INFO_CACHE_ENABLED=true +CC_PAIR_INFO_CACHE_TTL_SECONDS=60 +# Per-user document-set list cache (the /document-set read on the chat-page +# bundle). Write-through busted on every doc-set mutation; TTL is a short +# backstop. Staleness is cosmetic (documents stay permission-enforced). +DOCUMENT_SET_CACHE_ENABLED=true +DOCUMENT_SET_CACHE_TTL_SECONDS=300 diff --git a/k8s/overlays/prod/kustomization.yaml b/k8s/overlays/prod/kustomization.yaml new file mode 100644 index 00000000000..7e492172b1c --- /dev/null +++ b/k8s/overlays/prod/kustomization.yaml @@ -0,0 +1,82 @@ +# Production overlay → Darwin AKS cluster (kubectl context: `darwin`). +# +# Single source of truth for the prod environment: +# - env.properties non-secret config (committed) +# - secrets.env real secrets (gitignored; copy from .example) +# - images: below which image tags this env runs +# - replicas: below deployment scaling +# +# Apply: kubectl apply -k k8s/overlays/prod +# Preview: kubectl kustomize k8s/overlays/prod +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization + +resources: + - ../../base + +components: + - ../../optional/background-scaling + +namespace: darwin + +# Image refs are logical names in base/. Kustomize rewrites them to the +# concrete registry+tag below. Bump a `newTag` here to roll out a new +# image; no manifest edits required. +images: + - name: danswer-backend + newName: sfbrdevhelmweacr.azurecr.io/danswer/danswer-backend + newTag: vha-147 + - name: danswer-web-server + newName: sfbrdevhelmweacr.azurecr.io/danswer/danswer-web-server + newTag: vha-77 + - name: danswer-model-server + newName: danswer/danswer-model-server + newTag: v0.3.94 + - name: nginx + newName: nginx + newTag: 1.23.4-alpine + # Vespa images live in the separate Vespa overlay (k8s/overlays/prod-vespa) + # now that Vespa is applied independently of the app — see that file and + # k8s/README.md. + +replicas: + - name: api-server-deployment + count: 2 + - name: background-deployment + count: 0 + - name: web-server-deployment + count: 1 + - name: nginx-deployment + count: 1 + - name: inference-model-server-deployment + count: 1 + - name: background-lite-deployment # singleton — beat + slack; never >1 + count: 1 + - name: background-indexer-scheduler-deployment # singleton — the update.py loop + count: 1 + - name: dask-scheduler-deployment # singleton + count: 1 + - name: dask-worker-deployment # ← THE indexing-throughput knob + count: 2 + +# env-configmap: regenerated from env.properties. `behavior: create` because +# base doesn't ship a ConfigMap with this name — the overlay owns it. +configMapGenerator: + - name: env-configmap + behavior: create + envs: + - env.properties + +# danswer-secrets: regenerated from secrets.env (gitignored). Same name as the +# existing live Secret — deployments referencing `danswer-secrets` work +# unchanged. +secretGenerator: + - name: danswer-secrets + envs: + - secrets.env + +# Without this kustomize appends a content-hash suffix to generated names +# (e.g. danswer-secrets-abc123). Disabled so deployments' explicit +# secretKeyRef name=danswer-secrets continues to resolve. +generatorOptions: + disableNameSuffixHash: true diff --git a/k8s/overlays/prod/secrets.env.example b/k8s/overlays/prod/secrets.env.example new file mode 100644 index 00000000000..8f3837df1d7 --- /dev/null +++ b/k8s/overlays/prod/secrets.env.example @@ -0,0 +1,60 @@ +# Template for the production secrets.env file. Copy to secrets.env and fill in +# real values. secrets.env is gitignored — never commit real values. +# +# IMPORTANT — values marked LEAKED were previously stored as plaintext +# in the live env-configmap (visible to anyone with cluster read access). +# Rotate every LEAKED value before flipping to the new layout. + +# --- Database (already in the existing danswer-secrets) --- +postgres_user=postgres +postgres_password=REPLACE_ME + +# --- OIDC / Entra (already in the existing danswer-secrets) --- +oauth_client_id=REPLACE_ME +oauth_client_secret=REPLACE_ME +# Signs the fastapi-users session + OAuth state JWT. Generate once: +# openssl rand -hex 32 +# MUST be identical across all replicas and stable across restarts. +user_auth_secret=REPLACE_ME + +# --- Google OAuth (legacy; already in the existing danswer-secrets) --- +google_oauth_client_id= +google_oauth_client_secret= + +# --- Redis (added by this branch; optional) --- +# Leave empty for the unauth'd in-cluster Redis StatefulSet. +redis_password= + +# --- Encryption key (was leaked as empty in live configmap — generate one) --- +# Used to encrypt connector credentials at rest. +# Generate once: openssl rand -hex 32 +ENCRYPTION_KEY_SECRET=REPLACE_ME + +# --- Slack bot (LEAKED in live configmap as plaintext — ROTATE these) --- +DANSWER_BOT_SLACK_APP_TOKEN=REPLACE_ME +DANSWER_BOT_SLACK_BOT_TOKEN=REPLACE_ME + +# --- LLM gateway credentials (LEAKED — ROTATE) --- +GEN_AI_API_KEY= +GEN_AI_CLIENT_ID=REPLACE_ME +GEN_AI_CLIENT_SECRET=REPLACE_ME + +# --- Jira (LEAKED — ROTATE) --- +JIRA_API_TOKEN=REPLACE_ME +JIRA_EMAIL=REPLACE_ME + +# --- Opsgenie (LEAKED — ROTATE) --- +OPSGENIE_API_KEY=REPLACE_ME + +# --- SMTP --- +SMTP_PASS= + +# --- Azure Blob file store (only when FILE_STORE_TYPE=AzureBlobFileStore) --- +# Storage-account connection string. Injected as the AZURE_BLOB_CONNECTION_STRING +# env var via envFrom. MUST be the ACCOUNT-KEY string (AccountName + AccountKey) +# — the server signs per-blob upload SAS with that key; a SAS-token string is +# rejected. Get it with: +# az storage account show-connection-string -n -g -o tsv +# Setting it in a shell instead? Single-quote it — the ';' separators are shell +# command separators and silently truncate the value otherwise. +AZURE_BLOB_CONNECTION_STRING= diff --git a/k8s/scripts/build-deploy.sh b/k8s/scripts/build-deploy.sh new file mode 100755 index 00000000000..fdb0a515bf7 --- /dev/null +++ b/k8s/scripts/build-deploy.sh @@ -0,0 +1,295 @@ +#!/usr/bin/env bash +# +# build-deploy.sh [component ...] +# +# One command for the backend/web image lifecycle against the Darwin prod +# overlay. Stages are CUMULATIVE — each does everything the lighter stage +# does, then one more thing: +# +# build bump tag(s) from kustomization.yaml, docker build (linux/amd64) +# push build + docker tag + docker push to the ACR +# deploy push + rewrite kustomization.yaml newTag(s) + kubectl apply -k +# verify (standalone) compare LIVE cluster image tags vs the manifest, +# and report pod health (running / restarts / crashloops) +# +# Components default to BOTH (backend web). Restrict with positional args: +# build-deploy.sh push backend # only the backend image +# build-deploy.sh deploy web # only the web image +# build-deploy.sh build # both +# +# The next tag for each component is computed from the CURRENT newTag in +# k8s/overlays/prod/kustomization.yaml (vha-N -> vha-N+1). The manifest is the +# source of truth and is only EDITED at the `deploy` stage — `build`/`push` +# produce/push the next-tag image without touching the committed manifest, so +# you can build/push first and deploy later (or on another machine). +# +# This is your manual flow, automated: +# docker build -f ./backend/Dockerfile ./backend -t danswer/danswer-backend:latest --platform linux/amd64 +# docker build -f ./web/Dockerfile ./web -t danswer/danswer-web-server:latest --platform=linux/amd64 --load +# docker tag danswer/danswer-backend:latest $REGISTRY/danswer-backend:vha-N +# docker tag danswer/danswer-web-server:latest $REGISTRY/danswer-web-server:vha-M +# docker push $REGISTRY/danswer-backend:vha-N +# docker push $REGISTRY/danswer-web-server:vha-M +# +# Safety: +# - `deploy` refuses unless the kubectl context is the prod cluster +# ($PROD_CONTEXT) — the prod overlay targets it. Override with FORCE=1. +# - The manifest tag bump is NOT git-committed; the script reminds you. +# - DRY_RUN=1 prints every docker/kubectl command instead of running it. +# +# Registry auth (push/deploy stages): +# Credentials are read from the environment — export them in ~/.zshrc: +# export ACR_USERNAME= +# export ACR_PASSWORD= +# The script does `docker login` with them (via --password-stdin, never +# echoed). If either is unset, push/deploy EXIT immediately (no fallback). +# +# Disk pre-req (build stage): +# Before building, if the Docker disk is >= DISK_THRESHOLD% (default 80) full, +# it reclaims space (build cache -> dangling images -> unused images >7d) +# instead of letting the build fail with "no space left on device". +# Tune with DISK_THRESHOLD=90; bypass with SKIP_DISK_CHECK=1. +# +set -euo pipefail + +# ---- config --------------------------------------------------------------- +REGISTRY="sfbrdevhelmweacr.azurecr.io/danswer" +REGISTRY_HOST="${REGISTRY%%/*}" # sfbrdevhelmweacr.azurecr.io (login target) +PROD_CONTEXT="darwin" +NAMESPACE="darwin" +REPO_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)" +KUSTOMIZATION="$REPO_ROOT/k8s/overlays/prod/kustomization.yaml" +OVERLAY_DIR="$REPO_ROOT/k8s/overlays/prod" + +# Per-component config. Functions (not associative arrays) so this runs on the +# stock macOS bash 3.2 too — `declare -A` is bash 4+ only. +# backend: ./backend/Dockerfile ctx ./backend local tag danswer/danswer-backend +# web: ./web/Dockerfile ctx ./web local tag danswer/danswer-web-server +img_logical() { case "$1" in backend) echo danswer-backend;; web) echo danswer-web-server;; esac; } +img_local() { case "$1" in backend) echo danswer/danswer-backend;; web) echo danswer/danswer-web-server;; esac; } +img_dockerfile() { case "$1" in backend) echo "$REPO_ROOT/backend/Dockerfile";; web) echo "$REPO_ROOT/web/Dockerfile";; esac; } +img_context() { case "$1" in backend) echo "$REPO_ROOT/backend";; web) echo "$REPO_ROOT/web";; esac; } +# web build adds --load (matches your manual command); backend does not. +img_build_extra() { case "$1" in web) echo "--load";; *) echo "";; esac; } +# which live deployment to read the running tag from, for `verify` +img_verify_deploy(){ case "$1" in backend) echo api-server-deployment;; web) echo web-server-deployment;; esac; } + +# ---- logging -------------------------------------------------------------- +log() { printf '\033[1;34m==>\033[0m %s\n' "$*"; } +ok() { printf '\033[1;32m ok\033[0m %s\n' "$*"; } +warn() { printf '\033[1;33m !\033[0m %s\n' "$*" >&2; } +die() { printf '\033[1;31mERR\033[0m %s\n' "$*" >&2; exit 1; } +run() { if [ "${DRY_RUN:-0}" = "1" ]; then printf '\033[2m $ %s\033[0m\n' "$*"; else "$@"; fi; } + +# ---- registry login ------------------------------------------------------- +# Credentials come from the environment — export them in ~/.zshrc: +# export ACR_USERNAME= +# export ACR_PASSWORD= +# When you run this script from your zsh shell they're already inherited. As a +# fallback (e.g. invoked from a non-zsh context) we pull just those two exports +# out of ~/.zshrc rather than sourcing the whole file (zsh syntax can break +# under bash). Never echoed; piped via --password-stdin. +registry_login() { + if [ -z "${ACR_USERNAME:-}" ] || [ -z "${ACR_PASSWORD:-}" ]; then + if [ -f "$HOME/.zshrc" ]; then + eval "$(grep -E '^[[:space:]]*export[[:space:]]+(ACR_USERNAME|ACR_PASSWORD)=' "$HOME/.zshrc" 2>/dev/null || true)" + fi + fi + if [ -z "${ACR_USERNAME:-}" ] || [ -z "${ACR_PASSWORD:-}" ]; then + die "ACR_USERNAME/ACR_PASSWORD not set — add them to ~/.zshrc (export ACR_USERNAME=..., export ACR_PASSWORD=...) and retry." + fi + log "docker login $REGISTRY_HOST as $ACR_USERNAME" + if [ "${DRY_RUN:-0}" = "1" ]; then + printf '\033[2m $ docker login %s -u %s --password-stdin <<< $ACR_PASSWORD\033[0m\n' "$REGISTRY_HOST" "$ACR_USERNAME" + return 0 + fi + printf '%s' "$ACR_PASSWORD" | docker login "$REGISTRY_HOST" -u "$ACR_USERNAME" --password-stdin \ + || die "docker login to $REGISTRY_HOST failed — check ACR_USERNAME/ACR_PASSWORD in ~/.zshrc" + ok "logged in to $REGISTRY_HOST" +} + +# ---- disk pre-req --------------------------------------------------------- +# Before building, make sure there's room — a full Docker disk fails the build +# with "no space left on device" partway through. If usage >= DISK_THRESHOLD%, +# reclaim space with a graduated prune (cheapest/safest first) rather than +# letting the build die. On Docker Desktop (mac) the build runs in a Linux VM; +# DockerRootDir isn't a host path, so we fall back to df of the host root as a +# proxy — pruning the build cache / unused images still frees the VM's disk, +# which is what actually fills up. +disk_used_pct() { df -P "$1" 2>/dev/null | awk 'NR==2{gsub(/%/,"",$5); print $5+0}'; } +ensure_disk_space() { + [ "${SKIP_DISK_CHECK:-0}" = "1" ] && { warn "SKIP_DISK_CHECK=1 — skipping disk pre-req"; return 0; } + command -v docker >/dev/null 2>&1 || { warn "docker not found — skipping disk check"; return 0; } + local threshold="${DISK_THRESHOLD:-80}" root target used + root="$(docker info -f '{{.DockerRootDir}}' 2>/dev/null || true)" + target="/"; [ -n "$root" ] && [ -d "$root" ] && target="$root" + used="$(disk_used_pct "$target")"; used="${used:-0}" + log "disk pre-req: $target at ${used}% used (threshold ${threshold}%)" + docker system df 2>/dev/null || true + [ "$used" -lt "$threshold" ] && { ok "disk ok — no cleanup needed"; return 0; } + + warn "disk >= ${threshold}% — reclaiming Docker space before build" + run docker builder prune -f || true # build cache — usually the biggest, fully safe + run docker image prune -f || true # dangling (untagged) images — safe + used="$(disk_used_pct "$target")"; used="${used:-0}" + if [ "$used" -ge "$threshold" ]; then + warn "still ${used}% — pruning unused images older than 7d" + run docker image prune -af --filter "until=168h" || true # unused tagged images >7d old + used="$(disk_used_pct "$target")"; used="${used:-0}" + fi + if [ "$used" -ge "$threshold" ]; then + warn "still ${used}% after cleanup — build may hit 'no space left on device'." + warn "free space manually, or re-run with a higher DISK_THRESHOLD / SKIP_DISK_CHECK=1." + else + ok "reclaimed space — now ${used}% used" + fi +} + +# ---- arg parsing ---------------------------------------------------------- +STAGE="${1:-}"; shift || true +case "$STAGE" in build|push|deploy|verify) ;; *) + die "usage: build-deploy.sh [backend|web ...]"; esac + +COMPONENTS=("$@") +[ "${#COMPONENTS[@]}" -eq 0 ] && COMPONENTS=(backend web) +for c in "${COMPONENTS[@]}"; do + [ -n "$(img_logical "$c")" ] || die "unknown component '$c' (expected: backend web)" +done + +# ---- kustomization tag helpers -------------------------------------------- +# read the newTag for a logical image name out of kustomization.yaml +read_tag() { + local logical="$1" + awk -v img="$logical" ' + $1=="-" && $2=="name:" && $3==img {inblock=1; next} + inblock && $1=="newTag:" {print $2; exit} + inblock && $1=="-" {inblock=0} + ' "$KUSTOMIZATION" +} +# vha-146 -> vha-147 ; refuses anything not matching vha- +next_tag() { + local cur="$1" + [[ "$cur" =~ ^vha-([0-9]+)$ ]] || die "tag '$cur' is not vha- — refusing to auto-increment; bump it manually." + echo "vha-$(( ${BASH_REMATCH[1]} + 1 ))" +} +# rewrite the newTag line that follows `- name: ` in place +set_tag() { + local logical="$1" tag="$2" tmp + if [ "${DRY_RUN:-0}" = "1" ]; then + printf '\033[2m $ set newTag %s -> %s in kustomization.yaml\033[0m\n' "$logical" "$tag" + return 0 + fi + tmp="$(mktemp)" + awk -v img="$logical" -v newtag="$tag" ' + $1=="-" && $2=="name:" && $3==img {inblock=1} + inblock && $1=="newTag:" { sub(/newTag:.*/, "newTag: " newtag); inblock=0 } + {print} + ' "$KUSTOMIZATION" > "$tmp" && mv "$tmp" "$KUSTOMIZATION" +} + +# ---- verify (standalone) -------------------------------------------------- +if [ "$STAGE" = "verify" ]; then + ctx="$(kubectl config current-context 2>/dev/null || true)" + log "kubectl context: ${ctx:-} (expected prod: $PROD_CONTEXT)" + [ "$ctx" = "$PROD_CONTEXT" ] || warn "not on prod context — live values below are from '$ctx'." + rc=0 + for c in "${COMPONENTS[@]}"; do + logical="$(img_logical "$c")"; deploy="$(img_verify_deploy "$c")" + manifest_tag="$(read_tag "$logical")" + live_img="$(kubectl get deploy "$deploy" -n "$NAMESPACE" \ + -o jsonpath='{.spec.template.spec.containers[0].image}' 2>/dev/null || true)" + live_tag="${live_img##*:}" + if [ -z "$live_img" ]; then + warn "$c: could not read live image from deploy/$deploy" + rc=1 + elif [ "$live_tag" = "$manifest_tag" ]; then + ok "$c: live=$live_tag == manifest=$manifest_tag" + else + warn "$c: live=$live_tag != manifest=$manifest_tag (cluster does not match the manifest)" + rc=1 + fi + done + log "pod health in ns/$NAMESPACE (non-Running / restarts):" + kubectl get pods -n "$NAMESPACE" --no-headers 2>/dev/null | awk ' + { ready=$2; status=$3; restarts=$4; name=$1 + if (status!="Running" && status!="Completed") { print " ! " name " " status " ready=" ready; bad++ } + else if (restarts+0 > 0) { print " ~ " name " restarts=" restarts } + } + END { if (bad>0) exit 0 }' || true + [ "$rc" -eq 0 ] && ok "verify: cluster matches manifest" || warn "verify: drift or unreadable — see above" + exit "$rc" +fi + +# next tag for a component, computed fresh from the manifest each call (no +# associative-array state — keeps this bash-3.2 safe). +component_next_tag() { + local cur; cur="$(read_tag "$(img_logical "$1")")" + [ -n "$cur" ] || die "could not read current tag for '$1' in kustomization" + next_tag "$cur" +} + +# ---- preview next tags for the requested components ----------------------- +log "computing next tags from $(basename "$KUSTOMIZATION"):" +for c in "${COMPONENTS[@]}"; do + cur="$(read_tag "$(img_logical "$c")")" + printf ' %-8s %s -> %s\n' "$c" "$cur" "$(next_tag "$cur")" +done + +# ---- build ---------------------------------------------------------------- +ensure_disk_space +log "BUILD (linux/amd64)" +cd "$REPO_ROOT" +for c in "${COMPONENTS[@]}"; do + local_tag="$(img_local "$c"):latest" + log "build $c -> $local_tag" + # shellcheck disable=SC2046,SC2086 + run docker build -f "$(img_dockerfile "$c")" "$(img_context "$c")" \ + -t "$local_tag" --platform linux/amd64 $(img_build_extra "$c") + ok "built $local_tag" +done +[ "$STAGE" = "build" ] && { ok "build complete (no push/deploy)"; exit 0; } + +# ---- push ----------------------------------------------------------------- +log "PUSH -> $REGISTRY" +registry_login # docker login using $ACR_USERNAME/$ACR_PASSWORD (see helper) +for c in "${COMPONENTS[@]}"; do + local_tag="$(img_local "$c"):latest" + remote_tag="$REGISTRY/$(img_logical "$c"):$(component_next_tag "$c")" + run docker tag "$local_tag" "$remote_tag" + log "push $remote_tag" + if ! run docker push "$remote_tag"; then + warn "push failed — check ACR_USERNAME/ACR_PASSWORD in ~/.zshrc, or run: az acr login --name ${REGISTRY_HOST%%.*}" + die "aborting at push for $c" + fi + ok "pushed $remote_tag" +done +[ "$STAGE" = "push" ] && { ok "push complete (manifest NOT modified; run 'deploy' to roll out)"; exit 0; } + +# ---- deploy --------------------------------------------------------------- +log "DEPLOY" +ctx="$(kubectl config current-context 2>/dev/null || true)" +if [ "$ctx" != "$PROD_CONTEXT" ]; then + [ "${FORCE:-0}" = "1" ] || die "kubectl context is '$ctx', expected prod '$PROD_CONTEXT'. Switch context or set FORCE=1." + warn "context '$ctx' != '$PROD_CONTEXT' but FORCE=1 set — proceeding." +fi +# Capture the tags BEFORE editing — set_tag mutates the manifest, so +# component_next_tag would read the already-bumped value on a second call. +APPLIED=() +for c in "${COMPONENTS[@]}"; do + nxt="$(component_next_tag "$c")" + set_tag "$(img_logical "$c")" "$nxt" + APPLIED+=("$c=$nxt") + ok "kustomization newTag $(img_logical "$c") -> $nxt" +done +log "kubectl apply -k $OVERLAY_DIR (ns=$NAMESPACE, context=$ctx)" +run kubectl apply -k "$OVERLAY_DIR" +ok "applied. Rollout status:" +for c in "${COMPONENTS[@]}"; do + d="$(img_verify_deploy "$c")" + run kubectl rollout status "deploy/$d" -n "$NAMESPACE" --timeout=180s || \ + warn "rollout for $d did not complete in time — check manually" +done +warn "manifest tag bump is NOT committed. Commit it:" +printf ' git -C %s add %s && git commit -m "k8s(prod): bump %s"\n' \ + "$REPO_ROOT" "k8s/overlays/prod/kustomization.yaml" "${APPLIED[*]}" +ok "deploy complete" diff --git a/k8s/scripts/guarded-apply.sh b/k8s/scripts/guarded-apply.sh new file mode 100755 index 00000000000..1b006459ac2 --- /dev/null +++ b/k8s/scripts/guarded-apply.sh @@ -0,0 +1,90 @@ +#!/usr/bin/env bash +# +# guarded-apply.sh [namespace] +# +# Wrapper around `kubectl apply -k k8s/overlays/` that refuses to +# apply a Vespa version pin which would jump too far from the version the +# cluster is ACTUALLY running. +# +# Vespa is applied via its OWN overlays (prod-vespa / local-vespa); the app +# overlays (prod / local) no longer contain Vespa. So this guard only kicks +# in for an overlay that actually renders Vespa — for an app overlay it just +# diffs + applies. Run `guarded-apply.sh prod-vespa` to apply Vespa safely. +# +# Why this exists: Vespa's config server refuses an auto-upgrade spanning +# more than MAX_UPGRADE_HOP minor releases (see AGENTS.md "Critical facts +# §10"). A bare tag bump that crosses that gap crash-loops the config +# server and takes the whole cluster down. This guard catches it BEFORE +# the apply reaches the cluster. +# +# It checks against the LIVE running version (not the repo's previous pin) +# on purpose — config can drift out of git, so live is the only truth that +# matters at apply time. +# +# Usage: +# k8s/scripts/guarded-apply.sh prod +# k8s/scripts/guarded-apply.sh local default +# FORCE=1 k8s/scripts/guarded-apply.sh prod # override the guard (you accept the risk) +# +set -euo pipefail + +OVERLAY="${1:?usage: guarded-apply.sh [namespace]}" +DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)/overlays/${OVERLAY}" +# Default namespace per overlay (prod / prod-vespa → darwin, else → default). +if [ "${2:-}" != "" ]; then NS="$2"; elif [[ "$OVERLAY" == prod* ]]; then NS="darwin"; else NS="default"; fi +MAX_UPGRADE_HOP=30 # Vespa's documented limit (minor releases per upgrade) + +[ -d "$DIR" ] || { echo "ERROR: overlay dir not found: $DIR"; exit 2; } + +minor() { echo "$1" | cut -d. -f2; } +major() { echo "$1" | cut -d. -f1; } + +RENDERED=$(kubectl kustomize "$DIR" 2>/dev/null || true) + +# Does this overlay deploy Vespa at all? App overlays (prod/local) don't — +# Vespa is applied via the separate *-vespa overlays — so the guard is moot +# for them. `has_vespa` is any vespaengine/vespa image (any tag, even :latest). +# `|| true`: under `set -euo pipefail` a no-match grep exits 1, which would +# abort the whole script before applying. App overlays legitimately have no +# Vespa, so a non-match is expected, not an error. +has_vespa=$(echo "$RENDERED" | grep -oE 'image: *vespaengine/vespa:' | head -1 || true) + +# New Vespa version this overlay would deploy (pinned X.Y.Z only). +new_ver=$(echo "$RENDERED" \ + | grep -oE 'image: *vespaengine/vespa:[0-9]+\.[0-9]+\.[0-9]+' | head -1 | sed -E 's/.*://' || true) + +abort() { echo "REFUSING TO APPLY. $1"; echo "Override with FORCE=1 if you understand the risk."; [ "${FORCE:-0}" = "1" ] && { echo "FORCE=1 set — proceeding anyway."; return 0; }; exit 1; } + +if [ -z "$has_vespa" ]; then + echo "No Vespa in overlay '$OVERLAY' — skipping the Vespa version guard (Vespa is applied via the *-vespa overlays)." +else + # Current running version: the image tag on a live Vespa StatefulSet. + cur_ver=$(kubectl get statefulset vespa-content -n "$NS" \ + -o jsonpath='{.spec.template.spec.containers[0].image}' 2>/dev/null \ + | grep -oE '[0-9]+\.[0-9]+\.[0-9]+' || true) + + echo "Vespa version guard: current(live)=${cur_ver:-} new(overlay)=${new_ver:-} ns=$NS" + + if [ -z "$new_ver" ]; then + abort "Overlay deploys Vespa but on an unparseable/floating tag (e.g. :latest) — pin an explicit vespaengine/vespa:X.Y.Z." + elif [ -z "$cur_ver" ]; then + echo "WARNING: could not read the live Vespa version (no cluster access, or vespa-content not deployed yet). Skipping the gap check — verify manually." + elif [ "$(major "$cur_ver")" != "$(major "$new_ver")" ]; then + abort "Vespa MAJOR version change $cur_ver -> $new_ver. Major upgrades need a dedicated migration, not this guard. " + else + gap=$(( $(minor "$new_ver") - $(minor "$cur_ver") )) + if [ "$gap" -gt "$MAX_UPGRADE_HOP" ]; then + abort "Vespa UPGRADE $cur_ver -> $new_ver is $gap minor releases (> $MAX_UPGRADE_HOP). Vespa will refuse this and crash-loop the config server. Do a STEPWISE upgrade (<=$MAX_UPGRADE_HOP per hop). " + elif [ "$gap" -lt "-$MAX_UPGRADE_HOP" ]; then + echo "WARNING: large DOWNGRADE $cur_ver -> $new_ver (${gap} minors). This is OK only if $new_ver matches the on-disk index format (e.g. recovering after an accidental upgrade). If unsure, STOP." + [ "${FORCE:-0}" = "1" ] || { echo "Re-run with FORCE=1 to confirm the downgrade."; exit 1; } + else + echo "OK: Vespa $cur_ver -> $new_ver is within the $MAX_UPGRADE_HOP-release limit." + fi + fi +fi + +echo "--- kubectl diff (review before apply) ---" +kubectl diff -k "$DIR" || true # diff exits non-zero when there ARE differences; that's expected +echo "--- applying ---" +kubectl apply -k "$DIR" diff --git a/k8s/scripts/vespa-upgrade.sh b/k8s/scripts/vespa-upgrade.sh new file mode 100755 index 00000000000..0f22097af5c --- /dev/null +++ b/k8s/scripts/vespa-upgrade.sh @@ -0,0 +1,166 @@ +#!/usr/bin/env bash +# +# vespa-upgrade.sh [namespace] +# +# Performs an ORDERED, HEALTH-GATED, single-hop Vespa upgrade across the five +# StatefulSets. This logic deliberately lives in a script, NOT the manifests: +# kustomize is declarative and cannot sequence a multi-StatefulSet, version- +# stepped, health-gated rollout. A plain `kubectl apply` of a bumped tag rolls +# every role at once with no ordering — exactly what took prod down once. +# +# What it does, in order (Vespa's recommended sequence): +# 1. guard — refuse major-version / >MAX_HOP / downgrade jumps (as +# guarded-apply.sh does), checked against the LIVE version. +# 2. config servers (vespa-configserver) rollout, gated on readiness +# 3. admin/controller (vespa-admin) rollout, gated on readiness +# 4. content nodes (vespa-content) ONE ORDINAL AT A TIME via +# updateStrategy.partition stepping, with an explicit health +# check between each — this is the data tier, so we never let +# two content nodes be mid-upgrade at once. +# 5. feed containers (vespa-feed-container) rollout, gated on readiness +# 6. query containers (vespa-query-container) rollout, gated on readiness +# +# Health is checked from INSIDE each pod (kubectl exec → localhost), not via +# port-forward, because the cluster runs Istio and external probes hit mTLS. +# +# Single hop only: this refuses jumps Vespa itself refuses (> MAX_HOP minor +# releases). For a larger upgrade, run it repeatedly with intermediate +# versions (e.g. 8.600.35 -> 8.630.x -> 8.660.x -> ...). Each intermediate +# image must exist on the registry and be on-disk-format compatible. +# +# Usage: +# k8s/scripts/vespa-upgrade.sh 8.620.43 # prod (ns darwin) +# k8s/scripts/vespa-upgrade.sh 8.620.43 darwin +# DRY_RUN=1 k8s/scripts/vespa-upgrade.sh 8.620.43 # print actions, change nothing +# YES=1 k8s/scripts/vespa-upgrade.sh 8.620.43 # skip the confirm prompt +# FORCE=1 k8s/scripts/vespa-upgrade.sh 8.620.43 # override the version guard +# +# After a successful run, update the per-role vespa newTag values in the +# overlay kustomization so git ≈ live. +set -euo pipefail + +TARGET="${1:?usage: vespa-upgrade.sh [namespace]}" +NS="${2:-darwin}" +REGISTRY_IMAGE="vespaengine/vespa" +MAX_HOP=30 # Vespa's documented per-upgrade minor-release limit + +DRY_RUN="${DRY_RUN:-0}" +YES="${YES:-0}" +FORCE="${FORCE:-0}" + +# Per-role lookups via case (portable to bash 3.2, which macOS still ships — +# no associative arrays). StatefulSet name, container name, and health port. +ROLES_ORDER="configserver admin content feed query" +ss_of() { case "$1" in + configserver) echo vespa-configserver ;; admin) echo vespa-admin ;; + content) echo vespa-content ;; feed) echo vespa-feed-container ;; + query) echo vespa-query-container ;; *) die "unknown role $1" ;; esac; } +container_of() { ss_of "$1"; } # container name == StatefulSet name for every role +port_of() { case "$1" in + configserver) echo 19071 ;; admin) echo 19092 ;; content) echo 19092 ;; + feed) echo 8080 ;; query) echo 8080 ;; *) die "unknown role $1" ;; esac; } + +run() { echo "+ $*"; [ "$DRY_RUN" = "1" ] || "$@"; } +die() { echo "ERROR: $*" >&2; exit 1; } +minor() { echo "$1" | cut -d. -f2; } +major() { echo "$1" | cut -d. -f1; } + +[[ "$TARGET" =~ ^[0-9]+\.[0-9]+\.[0-9]+$ ]] || die "target must be X.Y.Z, got '$TARGET'" + +echo "Context: $(kubectl config current-context) namespace: $NS" + +# --- version guard (against the LIVE content-node image, the source of truth) --- +CUR=$(kubectl get statefulset "$(ss_of content)" -n "$NS" \ + -o jsonpath='{.spec.template.spec.containers[0].image}' 2>/dev/null \ + | grep -oE '[0-9]+\.[0-9]+\.[0-9]+' || true) +[ -n "$CUR" ] || die "could not read live Vespa version from $(ss_of content) in ns $NS (cluster access? deployed?)" + +echo "Current(live)=$CUR Target=$TARGET" +if [ "$CUR" = "$TARGET" ]; then echo "Already at $TARGET — nothing to do."; exit 0; fi +[ "$(major "$CUR")" = "$(major "$TARGET")" ] || \ + die "major-version change $CUR -> $TARGET needs a dedicated migration, not this script." +HOP=$(( $(minor "$TARGET") - $(minor "$CUR") )) +if [ "$HOP" -lt 0 ]; then + [ "$FORCE" = "1" ] || die "DOWNGRADE $CUR -> $TARGET. Only valid for outage recovery to the on-disk format. Re-run with FORCE=1 if you mean it." + echo "WARNING: downgrade $CUR -> $TARGET (FORCE=1)." +elif [ "$HOP" -gt "$MAX_HOP" ]; then + [ "$FORCE" = "1" ] || die "$CUR -> $TARGET is $HOP minors (> $MAX_HOP). Vespa will refuse it and crash-loop the config server. Upgrade STEPWISE via intermediate versions. (FORCE=1 to override — not advised.)" + echo "WARNING: $HOP-minor hop exceeds $MAX_HOP (FORCE=1)." +else + echo "OK: $HOP-minor hop is within the $MAX_HOP limit." +fi + +echo +echo "Plan (ns=$NS): set ${REGISTRY_IMAGE}:${TARGET} on, in order:" +for r in $ROLES_ORDER; do echo " - $r ($(ss_of "$r"))"; done +echo +if [ "$DRY_RUN" != "1" ] && [ "$YES" != "1" ]; then + read -r -p "Proceed against context '$(kubectl config current-context)' / ns '$NS'? [y/N] " ans + [ "$ans" = "y" ] || [ "$ans" = "Y" ] || die "aborted by user." +fi + +# Poll /state/v1/health on a specific pod from inside the vespa container. +health_ok() { + local pod="$1" container="$2" port="$3" + local code + code=$(kubectl exec -n "$NS" "$pod" -c "$container" -- \ + sh -c "curl -s -m 5 -o /dev/null -w '%{http_code}' http://localhost:${port}/state/v1/health" 2>/dev/null || echo "000") + [ "$code" = "200" ] +} + +wait_pod_healthy() { + local pod="$1" container="$2" port="$3" tries=60 + echo " waiting for $pod to be Ready + health 200 on :$port ..." + [ "$DRY_RUN" = "1" ] && { echo " (dry-run: skip wait)"; return 0; } + kubectl wait --for=condition=ready "pod/$pod" -n "$NS" --timeout=600s + for ((i=0;i>> [$r] $ss -> ${REGISTRY_IMAGE}:${TARGET}" + run kubectl set image "statefulset/$ss" "$c=${REGISTRY_IMAGE}:${TARGET}" -n "$NS" + echo " rolling out (one pod at a time, gated by readiness probe)..." + [ "$DRY_RUN" = "1" ] || kubectl rollout status "statefulset/$ss" -n "$NS" --timeout=900s +} + +upgrade_content() { # data tier: one ordinal at a time via partition stepping + local ss c port n + ss="$(ss_of content)"; c="$(container_of content)"; port="$(port_of content)" + n=$(kubectl get statefulset "$ss" -n "$NS" -o jsonpath='{.spec.replicas}') + echo ">>> [content] $ss ($n replicas) -> ${REGISTRY_IMAGE}:${TARGET}, ONE ordinal at a time" + # Freeze updates (partition above all ordinals), set image, then release + # ordinals from highest to lowest, verifying health between each. + run kubectl patch "statefulset/$ss" -n "$NS" --type merge \ + -p "{\"spec\":{\"updateStrategy\":{\"rollingUpdate\":{\"partition\":$n}}}}" + run kubectl set image "statefulset/$ss" "$c=${REGISTRY_IMAGE}:${TARGET}" -n "$NS" + for ((ord=n-1; ord>=0; ord--)); do + echo " -- releasing content ordinal $ord" + run kubectl patch "statefulset/$ss" -n "$NS" --type merge \ + -p "{\"spec\":{\"updateStrategy\":{\"rollingUpdate\":{\"partition\":$ord}}}}" + wait_pod_healthy "${ss}-${ord}" "$c" "$port" + done + echo " content tier fully upgraded." +} + +for r in $ROLES_ORDER; do + if [ "$r" = "content" ]; then upgrade_content; else upgrade_simple "$r"; fi + # Verify every pod of this role before advancing to the next role. + if [ "$DRY_RUN" != "1" ]; then + for pod in $(kubectl get pods -n "$NS" -l "app=$(ss_of "$r")" -o name | sed 's#pod/##'); do + health_ok "$pod" "$(container_of "$r")" "$(port_of "$r")" \ + || die "$pod unhealthy after upgrade — STOPPING before next role." + done + fi + echo "<<< [$r] done." + echo +done + +echo "Vespa upgrade $CUR -> $TARGET complete across all roles." +echo "NOW: update the per-role vespa newTag values to \"$TARGET\" in" +echo " k8s/overlays/{prod,local}-vespa/kustomization.yaml so git matches live." diff --git a/web/src/app/admin/analytics/page.tsx b/web/src/app/admin/analytics/page.tsx index 03adc9f21c9..adf36fb99c1 100644 --- a/web/src/app/admin/analytics/page.tsx +++ b/web/src/app/admin/analytics/page.tsx @@ -10,6 +10,17 @@ import { DateRangePickerValue, Grid, Metric, + Tab, + TabGroup, + TabList, + TabPanel, + TabPanels, + Table, + TableBody, + TableCell, + TableHead, + TableHeaderCell, + TableRow, Text, Title, } from "@tremor/react"; @@ -62,6 +73,37 @@ interface SlackChannelsResponse { enabled_channels: number; } +interface UserAdoptionRow { + new_users: number; + cumulative_users: number; + date: string; +} + +interface PerUserChatStatsRow { + user_id: string; + email: string; + total_messages: number; + total_likes: number; + total_dislikes: number; + last_active: string; +} + +interface PersonaUsageRow { + persona_id: number; + name: string; + sessions: number; + messages: number; + likes: number; + dislikes: number; + last_active: string; +} + +interface DocumentSetUsageRow { + document_set_id: number; + name: string; + attributed_messages: number; +} + type Granularity = "day" | "month"; const DEFAULT_LOOKBACK_DAYS = 30; @@ -161,6 +203,38 @@ export default function AnalyticsPage() { swrOpts ); + const { + data: adoptionData, + isLoading: adoptionLoading, + error: adoptionErr, + } = useSWR( + buildURL("/analytics/admin/user-adoption", range), + errorHandlingFetcher, + swrOpts + ); + + const { data: perUserData, error: perUserErr } = useSWR< + PerUserChatStatsRow[] + >( + buildURL("/analytics/admin/per-user", range), + errorHandlingFetcher, + swrOpts + ); + + const { data: personaData, error: personaErr } = useSWR( + buildURL("/analytics/admin/persona-usage", range), + errorHandlingFetcher, + swrOpts + ); + + const { data: docSetUsageData, error: docSetUsageErr } = useSWR< + DocumentSetUsageRow[] + >( + buildURL("/analytics/admin/document-set-usage", range), + errorHandlingFetcher, + swrOpts + ); + // Snapshot endpoints — independent of date range, refresh on mount only. const { data: totalDocs, error: totalDocsErr } = useSWR( buildURL("/analytics/admin/total-docs"), @@ -184,11 +258,16 @@ export default function AnalyticsPage() { const isInitialLoading = (queryLoading && !queryData) || (userLoading && !userData) || - (botLoading && !botData); + (botLoading && !botData) || + (adoptionLoading && !adoptionData); const hasError = queryErr || userErr || botErr || + adoptionErr || + perUserErr || + personaErr || + docSetUsageErr || totalDocsErr || docsBySourceErr || slackChannelsErr; @@ -238,13 +317,44 @@ export default function AnalyticsPage() { ? Math.round((totalAutoResolved / totalBotQueries) * 100) : null; + // Cumulative is monotonic, so the latest row in the range carries the + // running total of distinct users who have ever tried chat. + const adoptionRows = adoptionData ?? []; + const totalUsersEverTried = + adoptionRows.length > 0 + ? adoptionRows[adoptionRows.length - 1].cumulative_users + : 0; + const newUsersInRange = adoptionRows.reduce((s, r) => s + r.new_users, 0); + return { totalQueries, peakActiveUsers, autoResolvePct, positivity, + totalUsersEverTried, + newUsersInRange, }; - }, [queryData, userData, botData]); + }, [queryData, userData, botData, adoptionData]); + + // Adoption series: new users per period + the cumulative curve. + const adoptionDaily = useMemo( + () => + (adoptionData ?? []).map((r) => ({ + date: r.date, + "New Users": r.new_users, + "Cumulative Users": r.cumulative_users, + })), + [adoptionData] + ); + // Monthly: New Users sum within the month; Cumulative takes the peak + // (= end-of-month value) since summing a running total is meaningless. + const adoptionChartData = useMemo( + () => + granularity === "day" + ? adoptionDaily + : bucketToMonth(adoptionDaily, new Set(["Cumulative Users"])), + [adoptionDaily, granularity] + ); // Combined query-performance series: queries (from /query) overlaid // with active users (from /user). Date join is on ISO date string. @@ -293,6 +403,14 @@ export default function AnalyticsPage() { [docsBySource] ); + const docSetUsageBars = useMemo( + () => + (docSetUsageData ?? []) + .filter((r) => r.attributed_messages > 0) + .map((r) => ({ name: r.name, value: r.attributed_messages })), + [docSetUsageData] + ); + return (
} title="Analytics" /> @@ -330,133 +448,306 @@ export default function AnalyticsPage() { {isInitialLoading ? ( ) : ( - <> - {/* Top row: range-scoped KPIs */} - - - Total Queries (range) - {kpis.totalQueries.toLocaleString()} - - - Peak Daily Active Users - {kpis.peakActiveUsers.toLocaleString()} - - - Auto-Resolution Rate (Slack) - - {kpis.autoResolvePct !== null ? `${kpis.autoResolvePct}%` : "—"} - - - - - {/* Snapshot KPIs — current state, independent of date range */} - - - Total Docs Indexed - - {totalDocs - ? totalDocs.total_docs_indexed.toLocaleString() - : "—"} - - - {totalDocs - ? `${totalDocs.unique_docs.toLocaleString()} unique` - : ""} - - - - Slack Channels Enabled - - {slackChannels - ? slackChannels.enabled_channels.toLocaleString() - : "—"} - - - {slackChannels - ? `across ${slackChannels.total_configs} config(s)` - : ""} - - - - Positive Feedback % - - {kpis.positivity !== null ? `${kpis.positivity}%` : "—"} - - over selected date range - - - Sources Active - - {docsBySource ? docsBySourceBars.length.toLocaleString() : "—"} - - - {docsBySource ? `of ${docsBySource.length} configured` : ""} - - - - - - - Users and Query Trend - - {granularity === "day" - ? "Daily" - : "Monthly (Active Users = peak day)"}{" "} - assistant replies overlaid with active users - - - - - - Feedback Trend - - {granularity === "day" ? "Daily" : "Monthly"} thumbs up vs - thumbs down - - - - - - - Docs Indexed by Source - Snapshot — sum across all cc-pairs per source type - {docsBySourceBars.length > 0 ? ( - n.toLocaleString()} - /> - ) : ( - No documents indexed yet. - )} - - + + + Overview + User Activity + + + + {/* Top row: range-scoped KPIs */} + + + Total Queries (range) + {kpis.totalQueries.toLocaleString()} + + + Peak Daily Active Users + {kpis.peakActiveUsers.toLocaleString()} + + + Auto-Resolution Rate (Slack) + + {kpis.autoResolvePct !== null + ? `${kpis.autoResolvePct}%` + : "—"} + + + + + {/* Snapshot KPIs — current state, independent of date range */} + + + Total Docs Indexed + + {totalDocs + ? totalDocs.total_docs_indexed.toLocaleString() + : "—"} + + + {totalDocs + ? `${totalDocs.unique_docs.toLocaleString()} unique` + : ""} + + + + Slack Channels Enabled + + {slackChannels + ? slackChannels.enabled_channels.toLocaleString() + : "—"} + + + {slackChannels + ? `across ${slackChannels.total_configs} config(s)` + : ""} + + + + Positive Feedback % + + {kpis.positivity !== null ? `${kpis.positivity}%` : "—"} + + over selected date range + + + Sources Active + + {docsBySource + ? docsBySourceBars.length.toLocaleString() + : "—"} + + + {docsBySource ? `of ${docsBySource.length} configured` : ""} + + + + + + + Users and Query Trend + + {granularity === "day" + ? "Daily" + : "Monthly (Active Users = peak day)"}{" "} + assistant replies overlaid with active users + + + + + + Feedback Trend + + {granularity === "day" ? "Daily" : "Monthly"} thumbs up vs + thumbs down + + + + + + + Docs Indexed by Source + Snapshot — sum across all cc-pairs per source type + {docsBySourceBars.length > 0 ? ( + n.toLocaleString()} + /> + ) : ( + No documents indexed yet. + )} + + + + + + Chat Adoption + + {granularity === "day" ? "Daily" : "Monthly"} new users + overlaid with the cumulative number who have ever tried chat + +
+
+ Users who ever tried chat + {kpis.totalUsersEverTried.toLocaleString()} +
+
+ New users (range) + {kpis.newUsersInRange.toLocaleString()} +
+
+ +
+ + + Top Users by Activity + + Most active users over the selected range, by assistant + replies. From the durable daily aggregate, so it spans full + history even after old chats are purged by retention. + + {perUserData && perUserData.length > 0 ? ( +
+ + + + User + + Messages + + + Likes + + + Dislikes + + + Last Active + + + + + {perUserData.map((u) => ( + + {u.email} + + {u.total_messages.toLocaleString()} + + + {u.total_likes.toLocaleString()} + + + {u.total_dislikes.toLocaleString()} + + + {u.last_active} + + + ))} + +
+
+ ) : ( + + No chat activity in this date range. + + )} +
+ + + + Most-Used Assistants + + By assistant replies over the selected range. Durable + aggregate — spans full history. + + {personaData && personaData.length > 0 ? ( +
+ + + + Assistant + + Messages + + + Sessions + + + Likes + + + Dislikes + + + + + {personaData.map((p) => ( + + {p.name} + + {p.messages.toLocaleString()} + + + {p.sessions.toLocaleString()} + + + {p.likes.toLocaleString()} + + + {p.dislikes.toLocaleString()} + + + ))} + +
+
+ ) : ( + + No assistant activity in this date range. + + )} +
+ + + Datasets in Use (approximate) + + Assistant usage attributed to each document set attached to + the assistant. Approximate — counts an assistant's + messages toward all its datasets and uses current + attachments, not per-query retrieval. + + {docSetUsageBars.length > 0 ? ( + n.toLocaleString()} + /> + ) : ( + + No dataset usage in this date range. + + )} + +
+
+
+
)}
); diff --git a/web/src/app/admin/connector/[ccPairId]/IndexingAttemptsTable.tsx b/web/src/app/admin/connector/[ccPairId]/IndexingAttemptsTable.tsx index 7481a6ee342..07744c47e19 100644 --- a/web/src/app/admin/connector/[ccPairId]/IndexingAttemptsTable.tsx +++ b/web/src/app/admin/connector/[ccPairId]/IndexingAttemptsTable.tsx @@ -12,7 +12,7 @@ import { Divider, } from "@tremor/react"; import { IndexAttemptStatus } from "@/components/Status"; -import { CCPairFullInfo } from "./types"; +import { CCPairFullInfo, PaginatedIndexAttempts } from "./types"; import { useState } from "react"; import { PageSelector } from "@/components/PageSelector"; import { localizeAndPrettify } from "@/lib/time"; @@ -20,18 +20,39 @@ import { getDocsProcessedPerMinute } from "@/lib/indexAttempt"; import { Modal } from "@/components/Modal"; import { CheckmarkIcon, CopyIcon } from "@/components/icons/icons"; import { updateIndexAttemptPriority } from "@/lib/connector"; -import { mutate } from "swr"; -import { buildCCPairInfoUrl } from "./lib"; +import useSWR, { mutate } from "swr"; +import { errorHandlingFetcher } from "@/lib/fetcher"; +import { buildCCPairInfoUrl, buildIndexAttemptsUrl } from "./lib"; import { usePopup } from "@/components/admin/connectors/Popup"; +import { ThreeDotsLoader } from "@/components/Loading"; +import { ErrorCallout } from "@/components/ErrorCallout"; const NUM_IN_PAGE = 8; export function IndexingAttemptsTable({ ccPair }: { ccPair: CCPairFullInfo }) { const [page, setPage] = useState(1); + // Server-side pagination: fetch one page at a time (page is 0-based on the + // API). Changing `page` re-keys the SWR fetch. Avoids loading a busy + // cc-pair's entire attempt history (thousands of rows w/ full tracebacks). + const indexAttemptsUrl = buildIndexAttemptsUrl( + ccPair.id, + page - 1, + NUM_IN_PAGE + ); + const { + data: indexAttemptsData, + isLoading, + error, + mutate: mutateIndexAttempts, + } = useSWR(indexAttemptsUrl, errorHandlingFetcher); + + const indexAttempts = indexAttemptsData?.index_attempts ?? []; + const totalPages = indexAttemptsData?.total_pages ?? 1; + const [indexAttemptTracePopupId, setIndexAttemptTracePopupId] = useState< number | null >(null); - const indexAttemptToDisplayTraceFor = ccPair.index_attempts.find( + const indexAttemptToDisplayTraceFor = indexAttempts.find( (indexAttempt) => indexAttempt.id === indexAttemptTracePopupId ); const [copyClicked, setCopyClicked] = useState(false); @@ -56,9 +77,24 @@ export function IndexingAttemptsTable({ ccPair }: { ccPair: CCPairFullInfo }) { }); } setTimeout(() => setPopup(null), 3000); + // Refresh the current page of attempts + the detail (latest attempt). + mutateIndexAttempts(); mutate(buildCCPairInfoUrl(ccPair.id)); } + if (error) { + return ( + + ); + } + + if (!indexAttemptsData && isLoading) { + return ; + } + return ( <> {indexAttemptToDisplayTraceFor && @@ -114,114 +150,112 @@ export function IndexingAttemptsTable({ ccPair }: { ccPair: CCPairFullInfo }) { - {ccPair.index_attempts - .slice(NUM_IN_PAGE * (page - 1), NUM_IN_PAGE * page) - .map((indexAttempt) => { - const docsPerMinute = - getDocsProcessedPerMinute(indexAttempt)?.toFixed(2); - const priority = indexAttempt.indexing_priority ?? 0; - const isNotStarted = indexAttempt.status === "not_started"; - const isUpdating = updatingPriorityId === indexAttempt.id; - return ( - - - {indexAttempt.time_started - ? localizeAndPrettify(indexAttempt.time_started) - : "-"} - - - - {docsPerMinute && ( -
- {docsPerMinute} docs / min -
- )} -
- - {isNotStarted ? ( -
- - 0 - ? "text-xs font-semibold px-2 py-0.5 rounded bg-emerald-100 text-emerald-800" - : "text-xs px-2 py-0.5 text-subtle" - } - > - {priority} - - -
- ) : priority > 0 ? ( - + {indexAttempts.map((indexAttempt) => { + const docsPerMinute = + getDocsProcessedPerMinute(indexAttempt)?.toFixed(2); + const priority = indexAttempt.indexing_priority ?? 0; + const isNotStarted = indexAttempt.status === "not_started"; + const isUpdating = updatingPriorityId === indexAttempt.id; + return ( + + + {indexAttempt.time_started + ? localizeAndPrettify(indexAttempt.time_started) + : "-"} + + + + {docsPerMinute && ( +
+ {docsPerMinute} docs / min +
+ )} +
+ + {isNotStarted ? ( +
+ + 0 + ? "text-xs font-semibold px-2 py-0.5 rounded bg-emerald-100 text-emerald-800" + : "text-xs px-2 py-0.5 text-subtle" + } + > {priority} - ) : ( - - - )} - - -
-
-
{indexAttempt.new_docs_indexed}
- {indexAttempt.docs_removed_from_index > 0 && ( -
- (also removed {indexAttempt.docs_removed_from_index}{" "} - docs that were detected as deleted in the source) -
- )} -
+
-
- {indexAttempt.total_docs_indexed} - -
- - {indexAttempt.error_msg || "-"} - - {indexAttempt.full_exception_trace && ( -
{ - setIndexAttemptTracePopupId(indexAttempt.id); - }} - className="mt-2 text-link cursor-pointer select-none" - > - View Full Trace + ) : priority > 0 ? ( + + {priority} + + ) : ( + - + )} + + +
+
+
{indexAttempt.new_docs_indexed}
+ {indexAttempt.docs_removed_from_index > 0 && ( +
+ (also removed {indexAttempt.docs_removed_from_index}{" "} + docs that were detected as deleted in the source)
)}
- - - ); - })} +
+
+ {indexAttempt.total_docs_indexed} + +
+ + {indexAttempt.error_msg || "-"} + + {indexAttempt.full_exception_trace && ( +
{ + setIndexAttemptTracePopupId(indexAttempt.id); + }} + className="mt-2 text-link cursor-pointer select-none" + > + View Full Trace +
+ )} +
+
+ + ); + })} - {ccPair.index_attempts.length > NUM_IN_PAGE && ( + {totalPages > 1 && (
{ setPage(newPage); diff --git a/web/src/app/admin/connector/[ccPairId]/lib.ts b/web/src/app/admin/connector/[ccPairId]/lib.ts index e83f3d406d0..d5492ea0400 100644 --- a/web/src/app/admin/connector/[ccPairId]/lib.ts +++ b/web/src/app/admin/connector/[ccPairId]/lib.ts @@ -1,3 +1,11 @@ export function buildCCPairInfoUrl(ccPairId: string | number) { return `/api/manage/admin/cc-pair/${ccPairId}`; } + +export function buildIndexAttemptsUrl( + ccPairId: string | number, + page: number, + pageSize: number +) { + return `/api/manage/admin/cc-pair/${ccPairId}/index-attempts?page=${page}&page_size=${pageSize}`; +} diff --git a/web/src/app/admin/connector/[ccPairId]/page.tsx b/web/src/app/admin/connector/[ccPairId]/page.tsx index 2d3f9458c38..a1f9c194246 100644 --- a/web/src/app/admin/connector/[ccPairId]/page.tsx +++ b/web/src/app/admin/connector/[ccPairId]/page.tsx @@ -52,7 +52,7 @@ function Main({ ccPairId }: { ccPairId: number }) { ); } - const lastIndexAttempt = ccPair.index_attempts[0]; + const lastIndexAttempt = ccPair.latest_index_attempt; const isDeleting = isCurrentlyDeleting(ccPair.latest_deletion_attempt); // figure out if we need to artificially deflate the number of docs indexed. @@ -61,7 +61,7 @@ function Main({ ccPairId }: { ccPairId: number }) { // there is a mismatch between these two numbers which may confuse users. const totalDocsIndexed = lastIndexAttempt?.status === "in_progress" && - ccPair.index_attempts.length === 1 + ccPair.num_index_attempts === 1 ? lastIndexAttempt.total_docs_indexed : ccPair.num_docs_indexed; diff --git a/web/src/app/admin/connector/[ccPairId]/types.ts b/web/src/app/admin/connector/[ccPairId]/types.ts index ab4921180cf..6f9ff8d3048 100644 --- a/web/src/app/admin/connector/[ccPairId]/types.ts +++ b/web/src/app/admin/connector/[ccPairId]/types.ts @@ -11,6 +11,16 @@ export interface CCPairFullInfo { num_docs_indexed: number; connector: Connector; credential: Credential; - index_attempts: IndexAttemptSnapshot[]; + // Full attempt history is fetched (paginated) separately; the detail page + // only needs the most-recent attempt + a total count. + latest_index_attempt: IndexAttemptSnapshot | null; + num_index_attempts: number; latest_deletion_attempt: DeletionAttemptSnapshot | null; } + +export interface PaginatedIndexAttempts { + index_attempts: IndexAttemptSnapshot[]; + page: number; + total_pages: number; + total_count: number; +} diff --git a/web/src/app/admin/settings/interfaces.ts b/web/src/app/admin/settings/interfaces.ts index 02372ce2c74..c1f4da5b2cc 100644 --- a/web/src/app/admin/settings/interfaces.ts +++ b/web/src/app/admin/settings/interfaces.ts @@ -3,6 +3,8 @@ export interface Settings { search_page_enabled: boolean; default_page: "search" | "chat"; maximum_chat_retention_days: number | null; + // Byte cap for chat file uploads (mirrors backend CHAT_FILE_MAX_SIZE_MB). + chat_file_max_size_mb?: number; } export interface EnterpriseSettings { diff --git a/web/src/app/assistants/gallery/AssistantsGallery.tsx b/web/src/app/assistants/gallery/AssistantsGallery.tsx index 64c96af2cff..cfae8b122b0 100644 --- a/web/src/app/assistants/gallery/AssistantsGallery.tsx +++ b/web/src/app/assistants/gallery/AssistantsGallery.tsx @@ -1,204 +1,790 @@ "use client"; +/** + * Assistant Gallery — redesigned UX. + * + * Why this rewrite: with 50+ assistants and growing, the old flat 2-column + * grid had no hierarchy, no status signal (added vs not), and no real + * filtering — every card looked identical regardless of whether it was + * yours, shared, public, or already in your picker. This page is now + * structured around three questions: "is this mine?" (sections), "have + * I added this?" (availability filter + per-card chip), and "what does + * it do?" (denser layout + tool/source counts + tool filter chips). + * + * Changes packed in (numbers map to the design proposal): + * + * 1. Per-card "✓ In your picker" status chip + muted card style for + * already-added assistants. Eye finds the un-added ones fast. + * 2. Three implicit sections: Yours / Shared with you / Featured. + * Empty sections hide; section headers carry counts. + * 3. Filter chips above the grid: availability (All / Available / Added) + * + auto-generated per-tool chips (only tools that appear in ≥2 + * assistants, so the chip row doesn't bloat as the dataset grows). + * 4. Owner display: name-from-email fallback (split on '@'), with a + * "Built-in" badge for default_persona assistants — kills the + * fork-specific "Author: Darwin" magic string. + * 6. Responsive grid: 1 col on mobile, 2 / 3 / 4 by breakpoint. + * 7. Header matches the Manage page: title + subtitle on the left, + * "Back to my assistants" as a text link, "Create new" button + * top-right. The giant centered button + paragraph are gone. + * 8. Sort dropdown: Featured (API order) / A → Z / Newly added. + * 9. Search now includes tool names AND document-set names. Empty- + * search-result has a real empty state with a Clear button. + * 10. Compact chips ({n} tools / {n} sources), tool list on hover. + * Add/Remove buttons replace the heavy Tremor color="green/red" + * with flat buttons matching the design system. + * 11. Design tokens fixed — search input uses border-border / + * focus-ring-accent like the rest of the app. + * + * What is intentionally NOT here: + * - #5 (detail drawer / modal) — deferred per the proposal; revisit + * after seeing how users use the new gallery. + * - Bulk select — adding 5 assistants at once isn't a real use case. + * + * All mutations are optimistic + undoable, mirroring the Manage page. + */ + +import { useEffect, useMemo, useState } from "react"; import { Persona } from "@/app/admin/assistants/interfaces"; -import { AssistantIcon } from "@/components/assistants/AssistantIcon"; import { User } from "@/lib/types"; -import { Button } from "@tremor/react"; -import Link from "next/link"; -import { useState } from "react"; -import { FiMinus, FiPlus, FiX } from "react-icons/fi"; -import { NavigationButton } from "../NavigationButton"; -import { AssistantsPageTitle } from "../AssistantsPageTitle"; +import { AssistantIcon } from "@/components/assistants/AssistantIcon"; +import { Bubble } from "@/components/Bubble"; +import { usePopup } from "@/components/admin/connectors/Popup"; import { addAssistantToList, + reorderAssistantList, removeAssistantFromList, } from "@/lib/assistants/updateAssistantPreferences"; -import { usePopup } from "@/components/admin/connectors/Popup"; +import { checkUserOwnsAssistant } from "@/lib/assistants/checkOwnership"; +import { AssistantsPageTitle } from "../AssistantsPageTitle"; +import Link from "next/link"; import { useRouter } from "next/navigation"; -import { ToolsDisplay } from "../ToolsDisplay"; +import { FiBookmark, FiPlus, FiSearch, FiX } from "react-icons/fi"; + +// --------------------------------------------------------------------------- +// Types +// --------------------------------------------------------------------------- + +type Availability = "all" | "available" | "added"; +type SortMode = "featured" | "name-asc" | "recent"; + +interface SectionDef { + key: string; + label: string; + assistants: Persona[]; +} + +// --------------------------------------------------------------------------- +// Column-count parameterisation +// --------------------------------------------------------------------------- +// +// Each row in this table is a complete, static Tailwind class string so the +// purge step actually emits the classes. You CAN'T compute these at runtime +// (`md:grid-cols-${n}` won't survive purge). Add a row here to support a new +// column count. Each row scales 1-col on mobile up to N at the widest +// breakpoint, with one breakpoint per added column so cards stay roomy on +// medium screens. +const GRID_CLASSES: Record = { + 1: "grid-cols-1", + 2: "grid-cols-1 sm:grid-cols-2", + 3: "grid-cols-1 md:grid-cols-2 2xl:grid-cols-3", + 4: "grid-cols-1 md:grid-cols-2 lg:grid-cols-3 2xl:grid-cols-4", + 5: "grid-cols-1 md:grid-cols-2 lg:grid-cols-3 xl:grid-cols-4 2xl:grid-cols-5", +}; + +const DEFAULT_COLUMNS = 3; + +// Values exposed in the in-page column picker. The control lets users +// override the prop-derived default at runtime; the persisted choice +// lives in localStorage so it survives reloads. +// +// Below the smallest md breakpoint everything is 1-col regardless of +// this value (see GRID_CLASSES), so we don't bother exposing 1. +const COLUMN_PICKER_OPTIONS = [2, 3, 4]; +const COLUMNS_STORAGE_KEY = "danswer:assistants-gallery:columns"; + +// How many doc-set name chips to render before collapsing the rest into +// a "+N more" pill. Three keeps each card's scope visible without +// blowing the card width at narrower column counts. +const MAX_VISIBLE_DOC_SETS = 3; + +// --------------------------------------------------------------------------- +// Small helpers +// --------------------------------------------------------------------------- + +/** Best-effort author display name. Names aren't on MinimalUserSnapshot; + * we have email only. Split on '@' so "foo.bar@example.com" → "foo.bar" + * rather than dumping the full email at the user. */ +function ownerDisplayName(persona: Persona): string | null { + if (persona.default_persona) return null; // Built-in badge shown instead. + const email = persona.owner?.email; + if (!email) return null; + const local = email.split("@")[0]; + // Replace dots/underscores with spaces and trim — usually closer to + // "First Last" than the raw local-part. + return local.replace(/[._]/g, " ").trim() || email; +} + +// --------------------------------------------------------------------------- +// Single card +// --------------------------------------------------------------------------- + +interface CardProps { + assistant: Persona; + user: User | null; + isAdded: boolean; + onAdd: (a: Persona) => void; + onRemove: (a: Persona) => void; +} + +function GalleryCard({ assistant, user, isAdded, onAdd, onRemove }: CardProps) { + // Tool-related UI was intentionally removed from this page (filter + // chips + per-card counts) — the gallery is for browsing assistants, + // and tool execution isn't reliable enough to advertise. + const author = ownerDisplayName(assistant); + const isBuiltIn = assistant.default_persona; + + return ( +
+ {/* Header: icon + name (+ built-in badge if applicable). The + prior absolute top-right "In your picker" badge was dropped — + the muted card style + the Remove button in the footer + already signal "added"; the badge ate horizontal space and + crowded the title at narrower widths. */} +
+ +
+
+

+ {assistant.name} +

+ {isBuiltIn && ( + + Built-in + + )} +
+
+
+ + {/* Description — primary signal of "should I pick this?". */} + {assistant.description && ( +

+ {assistant.description} +

+ )} + + {/* Knowledge-scope chips — name the document sets the + assistant points at (counts alone don't help a chooser + decide). Cap at MAX_VISIBLE_DOC_SETS with a "+N more" + tooltip so a long list doesn't blow the card width. Tools + were removed entirely (see card-level comment). */} + {assistant.document_sets && assistant.document_sets.length > 0 && ( +
+ {assistant.document_sets.slice(0, MAX_VISIBLE_DOC_SETS).map((ds) => ( + +
+ + + {ds.name} + +
+
+ ))} + {assistant.document_sets.length > MAX_VISIBLE_DOC_SETS && ( + + d.name) + .join(", ")} + > + +{assistant.document_sets.length - MAX_VISIBLE_DOC_SETS} more + + + )} +
+ )} + + {/* Footer row: author (or built-in subtle text) + Add/Remove */} +
+
+ {isBuiltIn ? ( + Bundled assistant + ) : author ? ( + by {author} + ) : ( + // Public assistant with no owner record — rare; surface + // gracefully without the old "Author: Darwin" magic string. + Public + )} +
+ + {/* Add/Remove — flat, matches design system. Tremor's color="green" + for "Add to my list" was visually shoutier than the action. */} + {user && + (isAdded ? ( + + ) : ( + + ))} +
+
+ ); +} + +// --------------------------------------------------------------------------- +// Filter chip — small reusable toggle pill +// --------------------------------------------------------------------------- + +function FilterChip({ + active, + onClick, + children, + badge, +}: { + active: boolean; + onClick: () => void; + children: React.ReactNode; + badge?: number; +}) { + return ( + + ); +} + +// --------------------------------------------------------------------------- +// Main +// --------------------------------------------------------------------------- export function AssistantsGallery({ assistants, user, + columns: initialColumns = DEFAULT_COLUMNS, }: { assistants: Persona[]; user: User | null; + /** + * Initial max columns at the widest breakpoint. Acts as the default + * if the user has no stored preference yet; once the user picks via + * the in-page column control that choice (in localStorage) wins. + * Responsive scaling below the widest breakpoint is fixed + * (see GRID_CLASSES). Supported values: 1–5; out-of-range silently + * falls back to DEFAULT_COLUMNS so a bad prop can't break the page. + */ + columns?: number; }) { - function filterAssistants(assistants: Persona[], query: string): Persona[] { - return assistants.filter( - (assistant) => - assistant.name.toLowerCase().includes(query.toLowerCase()) || - assistant.description.toLowerCase().includes(query.toLowerCase()) - ); - } - const router = useRouter(); - const [searchQuery, setSearchQuery] = useState(""); + // User-chosen column count. `null` until the localStorage read in + // the effect below; SSR + first paint use the prop value so we + // don't get a hydration mismatch. After mount, the stored choice + // (if any) overrides the prop. + const [userColumns, setUserColumns] = useState(null); + useEffect(() => { + try { + const raw = window.localStorage.getItem(COLUMNS_STORAGE_KEY); + if (raw == null) return; + const n = Number.parseInt(raw, 10); + if (Number.isFinite(n) && n in GRID_CLASSES) { + setUserColumns(n); + } + } catch { + // localStorage can throw in some sandboxed contexts (Safari + // private mode in the past, certain iframe configs). Fall + // through to the prop default — the picker still works for + // the session, just doesn't persist. + } + }, []); + + const effectiveColumns = userColumns ?? initialColumns; + const gridClass = + GRID_CLASSES[effectiveColumns] ?? GRID_CLASSES[DEFAULT_COLUMNS]; + + const changeColumns = (n: number) => { + setUserColumns(n); + try { + window.localStorage.setItem(COLUMNS_STORAGE_KEY, String(n)); + } catch { + // See above — silently OK to skip persistence. + } + }; const { popup, setPopup } = usePopup(); - const allAssistantIds = assistants.map((assistant) => assistant.id); - const filteredAssistants = filterAssistants(assistants, searchQuery); + // Mirrors the Manage page: no preference = every accessible assistant + // is "in the picker" by default. + const initialChosen: number[] = + user?.preferences?.chosen_assistants ?? assistants.map((a) => a.id); + const [chosenAssistants, setChosenAssistants] = + useState(initialChosen); + const chosenSet = useMemo( + () => new Set(chosenAssistants), + [chosenAssistants] + ); + + // ---- filter / sort state ------------------------------------------------- + + const [search, setSearch] = useState(""); + const [availability, setAvailability] = useState("all"); + const [sortMode, setSortMode] = useState("featured"); + + // ---- derived: filtered + sorted list ------------------------------------- + // Tool-related filtering was removed from this page — the gallery is + // about assistants, not their internals. Search now only matches + // name + description + document-set names. + + const filtered: Persona[] = useMemo(() => { + const q = search.trim().toLowerCase(); + const out = assistants.filter((a) => { + if (q) { + const hay = [ + a.name, + a.description ?? "", + ...(a.document_sets ?? []).map((d) => d.name), + ] + .join(" ") + .toLowerCase(); + if (!hay.includes(q)) return false; + } + if (availability === "added" && !chosenSet.has(a.id)) return false; + if (availability === "available" && chosenSet.has(a.id)) return false; + return true; + }); + + if (sortMode === "name-asc") { + out.sort((a, b) => a.name.localeCompare(b.name)); + } else if (sortMode === "recent") { + // No created_at on Persona; id desc is a fair proxy ("newer ids + // were created later"). + out.sort((a, b) => b.id - a.id); + } + // "featured" = preserve API order (admins curate via display_priority). + return out; + }, [assistants, search, availability, sortMode, chosenSet]); + + // ---- derived: sections --------------------------------------------------- + + const sections: SectionDef[] = useMemo(() => { + const yours: Persona[] = []; + const shared: Persona[] = []; + const featured: Persona[] = []; + + for (const a of filtered) { + const ownedByUser = checkUserOwnsAssistant(user, a); + const sharedWithUser = + user != null && + !ownedByUser && + !a.is_public && + (a.users ?? []).some((u) => u.id === user.id); + + if (ownedByUser && !a.default_persona) { + yours.push(a); + } else if (sharedWithUser) { + shared.push(a); + } else { + // Public OR built-in OR (accessible via group permission). All + // surface here as "Featured & Built-in" — visually equivalent + // from a chooser's POV. + featured.push(a); + } + } + + const out: SectionDef[] = []; + if (yours.length > 0) + out.push({ key: "yours", label: "Yours", assistants: yours }); + if (shared.length > 0) + out.push({ key: "shared", label: "Shared with you", assistants: shared }); + if (featured.length > 0) + out.push({ + key: "featured", + label: "Featured & Built-in", + assistants: featured, + }); + return out; + }, [filtered, user]); + + // ---- counts for filter chips --------------------------------------------- + + const counts = useMemo(() => { + const q = search.trim().toLowerCase(); + let all = 0; + let added = 0; + let available = 0; + for (const a of assistants) { + if (q) { + const hay = [ + a.name, + a.description ?? "", + ...(a.document_sets ?? []).map((d) => d.name), + ] + .join(" ") + .toLowerCase(); + if (!hay.includes(q)) continue; + } + all++; + if (chosenSet.has(a.id)) added++; + else available++; + } + return { all, added, available }; + }, [assistants, search, chosenSet]); + + // ---- optimistic add/remove (mirrors Manage page persistOrder) ----------- + + const persistChosen = async ( + next: number[], + { + successMsg, + undoToOrder, + }: { successMsg?: string; undoToOrder?: number[] } = {} + ): Promise => { + const prev = chosenAssistants; + setChosenAssistants(next); + const ok = await reorderAssistantList(next); + if (!ok) { + setChosenAssistants(prev); + setPopup({ + message: "Couldn't update your assistant list — please try again.", + type: "error", + }); + return false; + } + if (successMsg) { + setPopup({ + message: successMsg, + type: "success", + undo: + undoToOrder !== undefined + ? { + onClick: async () => { + await persistChosen(undoToOrder); + }, + } + : undefined, + }); + } + router.refresh(); + return true; + }; + + const handleAdd = async (a: Persona) => { + if (!user) return; + if (chosenSet.has(a.id)) return; // already added — no-op + const prev = chosenAssistants; + const next = [...prev, a.id]; + // Use addAssistantToList specifically (idempotent) rather than the + // generic reorder helper — both PATCH the same endpoint, but this + // signals intent at the call-site. + setChosenAssistants(next); + const ok = await addAssistantToList(a.id, prev); + if (!ok) { + setChosenAssistants(prev); + setPopup({ + message: `Couldn't add "${a.name}". Try again?`, + type: "error", + }); + return; + } + setPopup({ + message: `"${a.name}" added to your picker.`, + type: "success", + undo: { + onClick: async () => { + await persistChosen(prev); + }, + }, + }); + router.refresh(); + }; + + const handleRemove = async (a: Persona) => { + if (!user) return; + if (chosenAssistants.length === 1 && chosenAssistants[0] === a.id) { + setPopup({ + message: + "You need at least one visible assistant — can't remove the last one.", + type: "error", + }); + return; + } + const prev = chosenAssistants; + const next = prev.filter((id) => id !== a.id); + setChosenAssistants(next); + const ok = await removeAssistantFromList(a.id, prev); + if (!ok) { + setChosenAssistants(prev); + setPopup({ + message: `Couldn't remove "${a.name}". Try again?`, + type: "error", + }); + return; + } + setPopup({ + message: `"${a.name}" removed from your picker.`, + type: "success", + undo: { + onClick: async () => { + await persistChosen(prev); + }, + }, + }); + router.refresh(); + }; + + // ---- handlers for filter UI ---------------------------------------------- + + const clearAllFilters = () => { + setSearch(""); + setAvailability("all"); + }; + + const hasAnyFilter = search.trim() !== "" || availability !== "all"; + + // ---- render ------------------------------------------------------------- return ( <> {popup} -
- Assistant Gallery -
- - View Your Assistants + +
+ {/* Header — matches the Manage page rebuild */} +
+
+ Assistant Gallery +

+ Browse every assistant available to you. Add the ones you want to + your chat picker. +

+
+ + Create new
-

- Discover and create custom assistants that combine instructions, extra - knowledge, and any combination of tools. -

+
+ + ← Back to my assistants + +
-
+ {/* Search */} +
+ setSearchQuery(e.target.value)} + type="search" + placeholder="Search by name, description, tool, or source…" + value={search} + onChange={(e) => setSearch(e.target.value)} className=" - w-full - p-2 - border - border-gray-300 - rounded - focus:outline-none - focus:ring-2 - focus:ring-blue-500 - " + w-full pl-10 pr-3 py-2 + rounded-md border border-border bg-background + focus:outline-none focus:ring-2 focus:ring-accent + " />
-
- {filteredAssistants.map((assistant) => ( -
+ setAvailability("all")} + badge={counts.all} + > + All + + setAvailability("available")} + badge={counts.available} + > + Available to add + + setAvailability("added")} + badge={counts.added} + > + Already added + + + {/* View controls — columns + sort — live at the right end of + the filter row so "narrow the list" (left) and "shape + the view" (right) are visually separated. */} +
+ {/* Column picker. Hidden below md since the layout falls + back to a single column there regardless. Pure + client-side state + localStorage — no fetch, no + router.refresh(), no DB hit. */} +
+ + +
+ + +
+
+ + {/* Empty state when filters exclude everything */} + {sections.length === 0 && ( +
+

No assistants match.

+

+ {hasAnyFilter + ? "Try a different filter, or clear all to see everything." + : "There are no assistants available to you yet."} +

+ {hasAnyFilter && ( + - ) : ( - - )} -
- )} -
- {assistant.tools.length > 0 && ( - - )} -

{assistant.description}

-

- Author: {assistant.owner?.email || "Darwin"} -

+ > + Clear all filters + + )} +
+ )} + + {/* Sections */} + {sections.map((section) => ( +
+
+

+ {section.label} +

+ + ({section.assistants.length}) +
- ))} -
+
+ {section.assistants.map((assistant) => ( + + ))} +
+ + ))}
); diff --git a/web/src/app/assistants/mine/AssistantsList.tsx b/web/src/app/assistants/mine/AssistantsList.tsx index 576a4c74add..33027fa1ed1 100644 --- a/web/src/app/assistants/mine/AssistantsList.tsx +++ b/web/src/app/assistants/mine/AssistantsList.tsx @@ -1,367 +1,964 @@ "use client"; -import { useState } from "react"; +/** + * Manage Assistants — redesigned UX. + * + * What changed vs the prior "move up / move down inside a 3-dot popover": + * + * 1. Drag-and-drop reorder via @dnd-kit (already in package.json), with + * a grab handle on each visible row. Up/down arrows removed. + * 2. Explicit "set as default" pin icon on each row. Filled = current + * default; the default row also gets an accent border. Ordering and + * default are now orthogonal. + * 3. Visibility is a row-level toggle, not a popover item. The page + * shows ONE list with a divider; hidden rows render under the + * "Hidden" divider at reduced opacity. + * 4. Client-side search filters by name + description + tool name. + * 5. Description font-weight bumped; tool chips moved behind a hover + * reveal so the visual hierarchy answers "should I pick this?". + * A "{n} sources" chip surfaces document-set count, which used to + * be hidden in expanded mode only. + * 6. Bulk select column + action bar (Show / Hide / Remove) appears + * only when something is selected. + * 7. Header: single title + 1-line subtitle + Create button top-right, + * "Browse all available" as a text link. Cut the giant tile pair. + * 8. Undo toast on reorder / default-change / visibility-toggle. + * Reuses the extended Popup component (`undo` field on PopupSpec). + * + * Everything is optimistic: local `chosenOrder` state mutates first, the + * PATCH runs after, and a failure rolls back + shows an error toast. + * + * NOTE: this file replaces the old up/down arrow flow entirely; the + * `moveAssistantUp` / `moveAssistantDown` helpers in + * `lib/assistants/updateAssistantPreferences.ts` are kept for any other + * callers but no longer used here. + */ + +import { useMemo, useRef, useState } from "react"; import { MinimalUserSnapshot, User } from "@/lib/types"; import { Persona } from "@/app/admin/assistants/interfaces"; -import { Divider, Text } from "@tremor/react"; +import { Text } from "@tremor/react"; import { - FiArrowDown, - FiArrowUp, + FiBookmark, FiEdit2, - FiMoreHorizontal, + FiEye, + FiEyeOff, FiPlus, FiSearch, - FiX, FiShare2, + FiStar, + FiTrash2, } from "react-icons/fi"; -import Link from "next/link"; -import { orderAssistantsForUser } from "@/lib/assistants/orderAssistants"; +import { MdDragIndicator } from "react-icons/md"; import { - addAssistantToList, - moveAssistantDown, - moveAssistantUp, - removeAssistantFromList, -} from "@/lib/assistants/updateAssistantPreferences"; + DndContext, + DragEndEvent, + PointerSensor, + closestCenter, + useSensor, + useSensors, +} from "@dnd-kit/core"; +import { restrictToVerticalAxis } from "@dnd-kit/modifiers"; +import { + SortableContext, + arrayMove, + useSortable, + verticalListSortingStrategy, +} from "@dnd-kit/sortable"; +import { CSS } from "@dnd-kit/utilities"; +import Link from "next/link"; +import { useRouter } from "next/navigation"; +import useSWR from "swr"; +import { errorHandlingFetcher } from "@/lib/fetcher"; import { AssistantIcon } from "@/components/assistants/AssistantIcon"; -import { DefaultPopover } from "@/components/popover/DefaultPopover"; +import { Bubble } from "@/components/Bubble"; import { PopupSpec, usePopup } from "@/components/admin/connectors/Popup"; -import { useRouter } from "next/navigation"; -import { NavigationButton } from "../NavigationButton"; -import { AssistantsPageTitle } from "../AssistantsPageTitle"; import { checkUserOwnsAssistant } from "@/lib/assistants/checkOwnership"; +import { + bulkAddToList, + bulkRemoveFromList, + reorderAssistantList, + setDefaultAssistant, +} from "@/lib/assistants/updateAssistantPreferences"; import { AssistantSharingModal } from "./AssistantSharingModal"; import { AssistantSharedStatusDisplay } from "../AssistantSharedStatus"; -import useSWR from "swr"; -import { errorHandlingFetcher } from "@/lib/fetcher"; -import { ToolsDisplay } from "../ToolsDisplay"; +import { AssistantsPageTitle } from "../AssistantsPageTitle"; -function AssistantListItem({ - assistant, - user, - allAssistantIds, - allUsers, - isFirst, - isLast, - isVisible, - setPopup, +// How many doc-set name chips to render before collapsing the rest +// into a "+N more" pill. Three keeps the row scannable on most widths +// without losing the most-relevant scope at a glance. +const MAX_VISIBLE_DOC_SETS = 3; + +// --------------------------------------------------------------------------- +// Small inline switch — avoids pulling in a new component library for one +// toggle. role="switch" gives screen readers the right semantics. +// --------------------------------------------------------------------------- + +function Toggle({ + checked, + onChange, + ariaLabel, + highlight = false, }: { + checked: boolean; + onChange: (next: boolean) => void; + ariaLabel: string; + // When true, draw a transient ring around the switch to direct the + // eye. Used by hidden rows so clicking the (faded) row body points + // the user at the action that brings the assistant back. + highlight?: boolean; +}) { + return ( + + ); +} + +// --------------------------------------------------------------------------- +// Single row — used both inside the sortable visible section AND in the +// hidden section. `isSortable` toggles drag affordances; the rest of the +// row is identical so the visual stays consistent across the divider. +// --------------------------------------------------------------------------- + +interface RowProps { assistant: Persona; user: User | null; - allUsers: MinimalUserSnapshot[]; - allAssistantIds: number[]; - isFirst: boolean; - isLast: boolean; + isDefault: boolean; isVisible: boolean; - setPopup: (popupSpec: PopupSpec | null) => void; -}) { - const router = useRouter(); - const [showSharingModal, setShowSharingModal] = useState(false); + isSelected: boolean; + onToggleSelect: (id: number) => void; + onSetDefault: (id: number) => void; + onToggleVisibility: (id: number, makeVisible: boolean) => void; + onShareClick: (id: number) => void; +} - const currentChosenAssistants = user?.preferences?.chosen_assistants; +function RowContent({ + assistant, + user, + isDefault, + isVisible, + isSelected, + onToggleSelect, + onSetDefault, + onToggleVisibility, + onShareClick, + // From useSortable when in sortable context; null otherwise. + dragHandleProps, +}: RowProps & { + dragHandleProps: + | (React.HTMLAttributes & { ref?: any }) + | null; +}) { const isOwnedByUser = checkUserOwnsAssistant(user, assistant); + const canEdit = isOwnedByUser; + const canShare = isOwnedByUser && !assistant.is_public; + + // Doc-set names are surfaced as small chips; tools count was removed + // intentionally — see the chip JSX below for the why. + + // Click-on-hidden-row affordance: a click anywhere on the row body + // (not on an interactive control) draws a transient ring around the + // visibility toggle to point at the action. Doesn't auto-enable — + // surprising a user reading the description into enabling it would + // be worse than the discoverability gap we're fixing. + const [highlightToggle, setHighlightToggle] = useState(false); + const highlightTimeoutRef = useRef | null>( + null + ); + const flashToggle = () => { + if (highlightTimeoutRef.current) { + clearTimeout(highlightTimeoutRef.current); + } + setHighlightToggle(true); + highlightTimeoutRef.current = setTimeout( + () => setHighlightToggle(false), + 1200 + ); + }; return ( - <> - { - setShowSharingModal(false); - router.refresh(); - }} - show={showSharingModal} - /> -
+ {/* Bulk-select checkbox. Hidden until hover or when something is + already selected on the page (the parent shows the action bar + based on that). Keyboard users always have it via focus. */} + onToggleSelect(assistant.id)} className=" - bg-background-emphasis - rounded-lg - shadow-md - p-4 - mb-4 - flex - justify-between - items-center + h-4 w-4 cursor-pointer + opacity-30 group-hover:opacity-100 focus:opacity-100 + checked:opacity-100 + transition-opacity " + /> + + {/* Drag handle — only meaningful for visible rows. Hidden rows + have no position to drag to. */} + {dragHandleProps ? ( + + ) : ( + // Reserve the slot so visible/hidden rows line up vertically. +
+ )} + + {/* CONTENT ZONE — fades on hidden rows. Action controls below + stay at full opacity so they remain the bright, clickable + targets on a dimmed row. */} +
-
-
- -

+ + +
+
+

{assistant.name}

+ {isDefault && ( + + Default + + )}
- {assistant.tools.length > 0 && ( - + + {/* Description bumped — used to be text-sm with no weight; now + it's the primary signal of what the assistant is for. */} + {assistant.description && ( +
+ {assistant.description} +
)} -
{assistant.description}
-
+ + {/* Sharing status, e.g. "Shared with 3 people". */} +
+ + {/* Knowledge-scope chips — show which document sets the + assistant points at, not just the count. With many sets, + show the first few and a "+N more" with the rest in a + tooltip so the row stays scannable. Tools chip was + intentionally removed: tool execution isn't reliable yet + and surfacing tool counts misleads users into picking an + assistant for a capability that may not work in practice. */} + {assistant.document_sets && assistant.document_sets.length > 0 && ( +
+ {assistant.document_sets + .slice(0, MAX_VISIBLE_DOC_SETS) + .map((ds) => ( + +
+ + + {ds.name} + +
+
+ ))} + {assistant.document_sets.length > MAX_VISIBLE_DOC_SETS && ( + + d.name) + .join(", ")} + > + +{assistant.document_sets.length - MAX_VISIBLE_DOC_SETS}{" "} + more + + + )} +
+ )}
- {isOwnedByUser && ( -
- {!assistant.is_public && ( -
setShowSharingModal(true)} - > - -
- )} - - - -
+
+ {/* End CONTENT ZONE. Actions below sit OUTSIDE the opacity + wrapper so they remain at full opacity on hidden rows — the + toggle must be the bright, clickable focus when the rest of + the row is dimmed. */} + + {/* Right-side actions. Order matters for scannability: default + pin first (most-used), visibility toggle, then ownership + actions (edit/share). stopPropagation prevents the row-body + flash-toggle handler from firing when the user clicks an + action directly. */} +
e.stopPropagation()} + > + {/* Pin / default. Only meaningful for visible rows — pinning a + hidden one would have to unhide it too; we surface that via + the visibility toggle instead. */} + {isVisible && ( + )} - - -
+ + {/* Visibility — switch instead of a buried popover item. + `highlight` is set by the row-body click handler on hidden + rows so a click anywhere on the (faded) row body draws the + eye to the action that brings the assistant back. */} + onToggleVisibility(assistant.id, next)} + highlight={highlightToggle} + ariaLabel={ + isVisible + ? `Hide ${assistant.name} from the picker` + : `Show ${assistant.name} in the picker` } - side="bottom" - align="start" - sideOffset={5} - > - {[ - ...(!isFirst - ? [ -
{ - const success = await moveAssistantUp( - assistant.id, - currentChosenAssistants || allAssistantIds - ); - if (success) { - setPopup({ - message: `"${assistant.name}" has been moved up.`, - type: "success", - }); - router.refresh(); - } else { - setPopup({ - message: `"${assistant.name}" could not be moved up.`, - type: "error", - }); - } - }} - > - Move Up -
, - ] - : []), - ...(!isLast - ? [ -
{ - const success = await moveAssistantDown( - assistant.id, - currentChosenAssistants || allAssistantIds - ); - if (success) { - setPopup({ - message: `"${assistant.name}" has been moved down.`, - type: "success", - }); - router.refresh(); - } else { - setPopup({ - message: `"${assistant.name}" could not be moved down.`, - type: "error", - }); - } - }} - > - Move Down -
, - ] - : []), - isVisible ? ( -
{ - if ( - currentChosenAssistants && - currentChosenAssistants.length === 1 - ) { - setPopup({ - message: `Cannot remove "${assistant.name}" - you must have at least one assistant.`, - type: "error", - }); - return; - } - - const success = await removeAssistantFromList( - assistant.id, - currentChosenAssistants || allAssistantIds - ); - if (success) { - setPopup({ - message: `"${assistant.name}" has been removed from your list.`, - type: "success", - }); - router.refresh(); - } else { - setPopup({ - message: `"${assistant.name}" could not be removed from your list.`, - type: "error", - }); - } - }} - > - {isOwnedByUser ? "Hide" : "Remove"} -
- ) : ( -
{ - const success = await addAssistantToList( - assistant.id, - currentChosenAssistants || allAssistantIds - ); - if (success) { - setPopup({ - message: `"${assistant.name}" has been added to your list.`, - type: "success", - }); - router.refresh(); - } else { - setPopup({ - message: `"${assistant.name}" could not be added to your list.`, - type: "error", - }); - } - }} - > - Add -
- ), - ]} - + /> + + {canShare && ( + + )} + {canEdit && ( + + + + )}

- +
+ ); +} + +// Sortable row — wraps RowContent and wires up @dnd-kit's transform/listeners. +function SortableAssistantRow(props: RowProps) { + const { + attributes, + listeners, + setNodeRef, + setActivatorNodeRef, + transform, + transition, + isDragging, + } = useSortable({ id: props.assistant.id }); + + const style: React.CSSProperties = { + transform: CSS.Transform.toString(transform), + transition, + opacity: isDragging ? 0.5 : 1, + }; + + return ( +
+ +
); } +// Static row — used for the hidden section (no DnD). +function StaticAssistantRow(props: RowProps) { + return ; +} + +// --------------------------------------------------------------------------- +// Bulk action bar — appears only when something is selected. The hide/show +// split mirrors the per-row visibility toggle; "Remove" matches the prior +// "Hide / Remove" semantic (removes from chosen_assistants regardless of +// ownership). +// --------------------------------------------------------------------------- + +function BulkActionsBar({ + selectedCount, + onClearSelection, + onShow, + onHide, + onRemove, +}: { + selectedCount: number; + onClearSelection: () => void; + onShow: () => void; + onHide: () => void; + onRemove: () => void; +}) { + return ( +
+ {selectedCount} selected + + + + +
+ ); +} + +// --------------------------------------------------------------------------- +// Main list. State model: +// - `chosenOrder`: the user's chosen_assistants array (ordered, visible) +// - hidden = every assistant the user has access to that's NOT in chosenOrder +// - selected: bulk-action set; orthogonal to visible/hidden +// - search: pure client-side filter applied to both groups before render +// +// All mutations are optimistic — update local state, fire PATCH; on error +// roll back and surface a toast. router.refresh() runs on success so the +// rest of the app (chat picker etc.) sees the new order. +// --------------------------------------------------------------------------- + interface AssistantsListProps { user: User | null; assistants: Persona[]; } export function AssistantsList({ user, assistants }: AssistantsListProps) { - const filteredAssistants = orderAssistantsForUser(assistants, user); - const ownedButHiddenAssistants = assistants.filter( - (assistant) => - checkUserOwnsAssistant(user, assistant) && - user?.preferences?.chosen_assistants && - !user?.preferences?.chosen_assistants?.includes(assistant.id) - ); - const allAssistantIds = assistants.map((assistant) => assistant.id); - + const router = useRouter(); const { popup, setPopup } = usePopup(); - const { data: users } = useSWR( + // When the user has no preference yet, treat every accessible + // assistant as "visible by default" — matches the previous behavior. + const initialChosen: number[] = + user?.preferences?.chosen_assistants ?? assistants.map((a) => a.id); + + const [chosenOrder, setChosenOrder] = useState(initialChosen); + const [search, setSearch] = useState(""); + const [selected, setSelected] = useState>(new Set()); + const [sharingAssistantId, setSharingAssistantId] = useState( + null + ); + + // Pulled from /api/users; used by the share modal. Same pattern as the + // pre-rewrite component. + const { data: allUsers } = useSWR( "/api/users", errorHandlingFetcher ); + // Derived: id-keyed lookup, visible/hidden splits, search-filtered. + const assistantsById = useMemo( + () => new Map(assistants.map((a) => [a.id, a])), + [assistants] + ); + const chosenSet = useMemo(() => new Set(chosenOrder), [chosenOrder]); + + const visibleAssistants: Persona[] = useMemo(() => { + const out: Persona[] = []; + for (const id of chosenOrder) { + const a = assistantsById.get(id); + if (a) out.push(a); + } + return out; + }, [chosenOrder, assistantsById]); + + const hiddenAssistants: Persona[] = useMemo( + () => assistants.filter((a) => !chosenSet.has(a.id)), + [assistants, chosenSet] + ); + + const matchesSearch = (a: Persona) => { + if (!search.trim()) return true; + const q = search.trim().toLowerCase(); + if (a.name.toLowerCase().includes(q)) return true; + if (a.description?.toLowerCase().includes(q)) return true; + return (a.tools ?? []).some((t) => t.name.toLowerCase().includes(q)); + }; + + const filteredVisible = visibleAssistants.filter(matchesSearch); + const filteredHidden = hiddenAssistants.filter(matchesSearch); + + // The default is just position 0 of chosen_assistants. If the user has + // no preference at all, there's no notion of "default yet" — leave it + // unset so no row shows the accent until the user picks. + const defaultId = + user?.preferences?.chosen_assistants && chosenOrder.length > 0 + ? chosenOrder[0] + : null; + + // ---- persistence with optimistic + undo -------------------------------- + + const persistOrder = async ( + nextOrder: number[], + { + successMsg, + undoToOrder, + }: { successMsg?: string; undoToOrder?: number[] } = {} + ): Promise => { + const prev = chosenOrder; + setChosenOrder(nextOrder); + const ok = await reorderAssistantList(nextOrder); + if (!ok) { + setChosenOrder(prev); + setPopup({ + message: "Couldn't update your assistant list — please try again.", + type: "error", + }); + return false; + } + if (successMsg) { + setPopup({ + message: successMsg, + type: "success", + undo: + undoToOrder !== undefined + ? { + onClick: async () => { + await persistOrder(undoToOrder); + }, + } + : undefined, + }); + } + // Refresh the SSR-fetched data so other parts of the app see the + // new order (chat picker, sidebar, etc.). + router.refresh(); + return true; + }; + + // ---- handlers ---------------------------------------------------------- + + const handleDragEnd = (event: DragEndEvent) => { + const { active, over } = event; + if (!over || active.id === over.id) return; + const oldIndex = chosenOrder.indexOf(Number(active.id)); + const newIndex = chosenOrder.indexOf(Number(over.id)); + if (oldIndex < 0 || newIndex < 0) return; + const next = arrayMove(chosenOrder, oldIndex, newIndex); + void persistOrder(next, { + successMsg: "Order updated.", + undoToOrder: chosenOrder, + }); + }; + + const handleSetDefault = async (id: number) => { + if (chosenOrder[0] === id) return; + const prev = chosenOrder; + const ok = await persistOrder( + [id, ...chosenOrder.filter((x) => x !== id)], + { + successMsg: `Default assistant updated.`, + undoToOrder: prev, + } + ); + if (!ok) { + // persistOrder already showed the error toast. + } else { + // setDefaultAssistant also handles the case where id wasn't in + // chosen_assistants; persistOrder above already prepended it. + void setDefaultAssistant(id, prev); // best-effort idempotent confirmation + } + }; + + const handleToggleVisibility = async (id: number, makeVisible: boolean) => { + const prev = chosenOrder; + if (makeVisible) { + // Add to end so reorder isn't surprising. + const next = [...chosenOrder, id]; + const assistant = assistantsById.get(id); + await persistOrder(next, { + successMsg: assistant + ? `"${assistant.name}" added to your picker.` + : "Added to your picker.", + undoToOrder: prev, + }); + } else { + if (chosenOrder.length === 1 && chosenOrder[0] === id) { + setPopup({ + message: + "You need at least one visible assistant — can't hide the last one.", + type: "error", + }); + return; + } + const next = chosenOrder.filter((x) => x !== id); + const assistant = assistantsById.get(id); + await persistOrder(next, { + successMsg: assistant + ? `"${assistant.name}" hidden from your picker.` + : "Hidden from your picker.", + undoToOrder: prev, + }); + } + }; + + const handleToggleSelect = (id: number) => { + setSelected((curr) => { + const next = new Set(curr); + if (next.has(id)) next.delete(id); + else next.add(id); + return next; + }); + }; + + const clearSelection = () => setSelected(new Set()); + + const handleBulkShow = async () => { + const ids = Array.from(selected); + const prev = chosenOrder; + const ok = await bulkAddToList(ids, chosenOrder); + if (!ok) { + setPopup({ message: "Couldn't show selected.", type: "error" }); + return; + } + // Mirror the optimistic update locally — the helper PATCHed the + // server; we just need to align local state. + const existing = new Set(chosenOrder); + const toAppend = ids.filter((id) => !existing.has(id)); + setChosenOrder([...chosenOrder, ...toAppend]); + setPopup({ + message: `${ids.length} assistant${ids.length === 1 ? "" : "s"} shown.`, + type: "success", + undo: { + onClick: async () => { + await persistOrder(prev); + }, + }, + }); + clearSelection(); + router.refresh(); + }; + + const handleBulkHide = async () => { + const ids = Array.from(selected); + // Don't let the user hide every visible row at once. + const remaining = chosenOrder.filter((id) => !ids.includes(id)); + if (remaining.length === 0 && chosenOrder.length > 0) { + setPopup({ + message: "Can't hide every visible assistant — keep at least one.", + type: "error", + }); + return; + } + const prev = chosenOrder; + const ok = await bulkRemoveFromList(ids, chosenOrder); + if (!ok) { + setPopup({ message: "Couldn't hide selected.", type: "error" }); + return; + } + setChosenOrder(remaining); + setPopup({ + message: `${ids.length} assistant${ids.length === 1 ? "" : "s"} hidden.`, + type: "success", + undo: { + onClick: async () => { + await persistOrder(prev); + }, + }, + }); + clearSelection(); + router.refresh(); + }; + + // "Remove" is the same backend op as Hide today — both just remove the + // ids from chosen_assistants. The label distinction is a UX hint: Hide + // is reversible by toggling the switch back on (or Undo); Remove + // implies "I don't want to see this any more." Functionally identical + // until we have a true "remove access" path. + const handleBulkRemove = handleBulkHide; + + // ---- DnD plumbing ------------------------------------------------------- + + // 6px activation distance: a click on the handle shouldn't immediately + // start a drag. Helps especially for the click-and-then-undo flow. + const sensors = useSensors( + useSensor(PointerSensor, { activationConstraint: { distance: 6 } }) + ); + + // ---- render ------------------------------------------------------------- + + const sharingAssistant = + sharingAssistantId != null + ? (assistantsById.get(sharingAssistantId) ?? null) + : null; + return ( <> {popup} -
- My Assistants - -
- - -
- - Create New Assistant -
-
- - - -
- - View Available Assistants -
-
+ {sharingAssistant && ( + { + setSharingAssistantId(null); + router.refresh(); + }} + show + /> + )} + +
+ {/* Header: title + 1-line subtitle + create button + browse link. + Cut the two-tile nav block and the explanatory paragraph. */} +
+
+ My Assistants + + Choose which assistants appear in the chat picker, set your + default, and reorder by dragging. + +
+ + Create
-

- Assistants allow you to customize your experience for a specific - purpose. Specifically, they combine instructions, extra knowledge, and - any combination of tools. -

- - - -

Active Assistants

- - - The order the assistants appear below will be the order they appear in - the Assistants dropdown. The first assistant listed will be your - default assistant when you start a new chat. - - -
- {filteredAssistants.map((assistant, index) => ( - - ))} +
+ + Browse all available assistants +
- {ownedButHiddenAssistants.length > 0 && ( - <> - - -

Your Hidden Assistants

+ {/* Search */} +
+ + setSearch(e.target.value)} + className=" + w-full pl-10 pr-3 py-2 + rounded-md border border-border bg-background + focus:outline-none focus:ring-2 focus:ring-accent + " + /> +
- - Assistants you've created that aren't currently visible - in the Assistants selector. - + {/* Bulk actions — only when something selected */} + {selected.size > 0 && ( + + )} -
- {ownedButHiddenAssistants.map((assistant, index) => ( - 0 ? ( + + a.id)} + strategy={verticalListSortingStrategy} + > + {filteredVisible.map((assistant) => ( + ))} + + + ) : ( + + )} + + {/* Hidden section — only show divider/header if there's anything */} + {filteredHidden.length > 0 && ( + <> +
+
+ + Hidden ({filteredHidden.length}) + +
+ {filteredHidden.map((assistant) => ( + + ))} )} + + {/* Search produced nothing at all */} + {filteredVisible.length === 0 && filteredHidden.length === 0 && ( + + )}
); } + +function EmptyState({ title, body }: { title: string; body: string }) { + return ( +
+

{title}

+

{body}

+
+ ); +} diff --git a/web/src/app/chat/ChatPage.tsx b/web/src/app/chat/ChatPage.tsx index c9deff70cbb..dd0d34cb341 100644 --- a/web/src/app/chat/ChatPage.tsx +++ b/web/src/app/chat/ChatPage.tsx @@ -1018,19 +1018,35 @@ export function ChatPage({ return; } + // Client-side byte pre-check, reading the SAME limit the backend enforces + // (CHAT_FILE_MAX_SIZE_MB, surfaced via settings; falls back to 25 if + // absent). The backend is still authoritative — a doc can additionally be + // rejected on the token gate after extraction. + const MAX_FILE_SIZE_MB = settings?.settings?.chat_file_max_size_mb ?? 25; + const tooLarge = acceptedFiles.find( + (file) => file.size > MAX_FILE_SIZE_MB * 1024 * 1024 + ); + if (tooLarge) { + setPopup({ + type: "error", + message: `"${tooLarge.name}" is too large (max ${MAX_FILE_SIZE_MB}MB).`, + }); + return; + } + const tempFileDescriptors = acceptedFiles.map((file) => ({ id: uuidv4(), type: file.type.startsWith("image/") ? ChatFileType.IMAGE : ChatFileType.DOCUMENT, isUploading: true, + progress: 0, })); - // only show loading spinner for reasonably large files - const totalSize = acceptedFiles.reduce((sum, file) => sum + file.size, 0); - if (totalSize > 50 * 1024) { - setCurrentMessageFiles((prev) => [...prev, ...tempFileDescriptors]); - } + // Always show the previews (with a progress bar) so the user can see the + // upload is actually happening — and so the send button stays gated until + // it finishes (see ChatInputBar's anyFilesUploading). + setCurrentMessageFiles((prev) => [...prev, ...tempFileDescriptors]); const removeTempFiles = (prev: FileDescriptor[]) => { return prev.filter( @@ -1038,7 +1054,18 @@ export function ChatPage({ ); }; - uploadFilesForChat(acceptedFiles).then(([files, error]) => { + // Per-file upload progress → update the matching temp descriptor. + const updateProgress = (index: number, percent: number) => { + const tempId = tempFileDescriptors[index]?.id; + if (!tempId) return; + setCurrentMessageFiles((prev) => + prev.map((file) => + file.id === tempId ? { ...file, progress: percent } : file + ) + ); + }; + + uploadFilesForChat(acceptedFiles, updateProgress).then(([files, error]) => { if (error) { setCurrentMessageFiles((prev) => removeTempFiles(prev)); setPopup({ diff --git a/web/src/app/chat/files/InputBarPreview.tsx b/web/src/app/chat/files/InputBarPreview.tsx index 8eee7bbf9cd..57512c4f612 100644 --- a/web/src/app/chat/files/InputBarPreview.tsx +++ b/web/src/app/chat/files/InputBarPreview.tsx @@ -56,6 +56,8 @@ export function InputBarPreview({ absolute inset-0 flex + flex-col + gap-1 items-center justify-center bg-black @@ -65,6 +67,11 @@ export function InputBarPreview({ " > + {file.progress != null && ( + + {file.progress}% + + )}
)} {renderContent()} diff --git a/web/src/app/chat/folders/FolderManagement.tsx b/web/src/app/chat/folders/FolderManagement.tsx index 1dd87ccd99f..6a147c30311 100644 --- a/web/src/app/chat/folders/FolderManagement.tsx +++ b/web/src/app/chat/folders/FolderManagement.tsx @@ -12,8 +12,12 @@ export async function createFolder(folderName: string): Promise { if (!response.ok) { throw new Error("Failed to create folder"); } - const data = await response.json(); - return data.folder_id; + // The backend endpoint (POST /folder) returns the new folder id as a + // bare integer, not an object — so parse it directly. (`data.folder_id` + // was always undefined; harmless until the create handler started using + // the returned id for optimistic insertion.) + const folderId = await response.json(); + return folderId; } // Function to add a chat session to a folder diff --git a/web/src/app/chat/input/ChatInputBar.tsx b/web/src/app/chat/input/ChatInputBar.tsx index 3d5ff9dd968..3e5763bcbec 100644 --- a/web/src/app/chat/input/ChatInputBar.tsx +++ b/web/src/app/chat/input/ChatInputBar.tsx @@ -79,6 +79,13 @@ export function ChatInputBar({ } }, [message]); + // Block sending while any attached file is still uploading — otherwise the + // message references a file_id whose file_store row doesn't exist yet, and + // the backend errors ("File by name ... does not exist"). Send re-enables + // automatically once the upload(s) finish (the per-file spinner clears). + const anyFilesUploading = files.some((file) => file.isUploading); + const canSubmit = !!message && !isStreaming && !anyFilesUploading; + const handlePaste = (event: React.ClipboardEvent) => { const items = event.clipboardData?.items; if (items) { @@ -217,7 +224,9 @@ export function ChatInputBar({ {filteredPersonas.map((currentPersona, index) => (
{ if (!isStreaming) { - if (message) { + if (canSubmit) { onSubmit(); } } else { @@ -434,8 +457,8 @@ export function ChatInputBar({
diff --git a/web/src/app/chat/interfaces.ts b/web/src/app/chat/interfaces.ts index 902f5b86553..ae3a7dd3e37 100644 --- a/web/src/app/chat/interfaces.ts +++ b/web/src/app/chat/interfaces.ts @@ -32,6 +32,8 @@ export interface FileDescriptor { name?: string | null; // FE only isUploading?: boolean; + // FE only — upload progress 0-100 while isUploading + progress?: number; } export interface ToolCallMetadata { diff --git a/web/src/app/chat/lib.tsx b/web/src/app/chat/lib.tsx index 70d1fc59b84..606b206f37b 100644 --- a/web/src/app/chat/lib.tsx +++ b/web/src/app/chat/lib.tsx @@ -569,24 +569,118 @@ export function buildChatUrl( return "/chat"; } -export async function uploadFilesForChat( - files: File[] +// PUT one file straight to Azure Blob via a SAS URL, reporting progress. +function putToBlobWithProgress( + url: string, + file: File, + onProgress: (percent: number) => void +): Promise { + return new Promise((resolve, reject) => { + const xhr = new XMLHttpRequest(); + xhr.open("PUT", url); + xhr.setRequestHeader("x-ms-blob-type", "BlockBlob"); + if (file.type) xhr.setRequestHeader("Content-Type", file.type); + xhr.upload.onprogress = (e) => { + if (e.lengthComputable) + onProgress(Math.round((e.loaded / e.total) * 100)); + }; + xhr.onload = () => + xhr.status >= 200 && xhr.status < 300 + ? resolve() + : reject(new Error(`Blob upload failed (${xhr.status})`)); + xhr.onerror = () => reject(new Error("network error during upload")); + xhr.send(file); + }); +} + +// Fallback: two-hop upload through the api-server (Postgres file store, or +// when direct upload is unavailable). XHR so we still get a progress bar. +function uploadViaServer( + files: File[], + onProgress?: (index: number, percent: number) => void ): Promise<[FileDescriptor[], string | null]> { - const formData = new FormData(); - files.forEach((file) => { - formData.append("files", file); + return new Promise((resolve) => { + const formData = new FormData(); + files.forEach((file) => formData.append("files", file)); + const xhr = new XMLHttpRequest(); + xhr.open("POST", "/api/chat/file"); + xhr.upload.onprogress = (e) => { + if (e.lengthComputable) { + const percent = Math.round((e.loaded / e.total) * 100); + files.forEach((_, i) => onProgress?.(i, percent)); + } + }; + xhr.onload = () => + xhr.status >= 200 && xhr.status < 300 + ? resolve([ + JSON.parse(xhr.responseText).files as FileDescriptor[], + null, + ]) + : resolve([[], `Failed to upload files (${xhr.status})`]); + xhr.onerror = () => resolve([[], "network error during upload"]); + xhr.send(formData); }); +} - const response = await fetch("/api/chat/file", { +export async function uploadFilesForChat( + files: File[], + onProgress?: (index: number, percent: number) => void +): Promise<[FileDescriptor[], string | null]> { + // 1. Ask the server for direct-to-Blob upload URLs (Azure backend only). + const urlResp = await fetch("/api/chat/file/upload-url", { method: "POST", - body: formData, + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ + files: files.map((f) => ({ + name: f.name, + content_type: f.type || null, + size: f.size, + })), + }), }); - if (!response.ok) { - return [[], `Failed to upload files - ${(await response.json()).detail}`]; + if (!urlResp.ok) { + return [[], `Failed to start upload - ${(await urlResp.json()).detail}`]; + } + const urlJson = await urlResp.json(); + + // 2a. Not Azure → fall back to the two-hop server upload. + if (!urlJson.direct_upload) { + return uploadViaServer(files, onProgress); + } + + // 2b. Azure → PUT each file directly to Blob (bypasses the server). + const items: { file_id: string; upload_url: string }[] = urlJson.files; + try { + await Promise.all( + items.map((item, i) => + putToBlobWithProgress(item.upload_url, files[i], (p) => + onProgress?.(i, p) + ) + ) + ); + } catch (e) { + return [[], `Failed to upload files - ${e}`]; } - const responseJson = await response.json(); - return [responseJson.files as FileDescriptor[], null]; + // 3. Confirm so the server records metadata (+ extracts doc text). + const confirmResp = await fetch("/api/chat/file/confirm", { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ + files: items.map((item, i) => ({ + file_id: item.file_id, + name: files[i].name, + content_type: files[i].type || null, + })), + }), + }); + if (!confirmResp.ok) { + return [ + [], + `Failed to finalize upload - ${(await confirmResp.json()).detail}`, + ]; + } + return [(await confirmResp.json()).files as FileDescriptor[], null]; } export function useScrollonStream({ diff --git a/web/src/app/chat/sessionSidebar/ChatSessionDisplay.tsx b/web/src/app/chat/sessionSidebar/ChatSessionDisplay.tsx index 782b35a8c66..20c76145fa7 100644 --- a/web/src/app/chat/sessionSidebar/ChatSessionDisplay.tsx +++ b/web/src/app/chat/sessionSidebar/ChatSessionDisplay.tsx @@ -95,6 +95,14 @@ export function ChatSessionDisplay({ scroll={false} draggable="true" onDragStart={(event) => { + // This row is a (an
), so the browser treats the + // drag as a *link* drag and auto-attaches the URL (text/uri-list). + // That's what makes some browsers (Arc/Edge/Safari) offer "open in + // split view" when you drag toward the edge. Clear that default + // link payload and mark this as a move so only our folder DnD + // applies. + event.dataTransfer.clearData(); + event.dataTransfer.effectAllowed = "move"; event.dataTransfer.setData( CHAT_SESSION_ID_KEY, chatSession.id.toString() @@ -103,6 +111,36 @@ export function ChatSessionDisplay({ FOLDER_ID_KEY, chatSession.folder_id?.toString() || "" ); + + // Replace the browser's default drag image (a translucent clone + // of this full-width row, which trails awkwardly across the + // sidebar) with a compact chip showing the chat name. Built + // off-screen, snapshotted by setDragImage, then removed. + const chip = document.createElement("div"); + chip.textContent = chatName || `Chat ${chatSession.id}`; + Object.assign(chip.style, { + position: "fixed", + top: "-1000px", + left: "-1000px", + maxWidth: "200px", + overflow: "hidden", + whiteSpace: "nowrap", + textOverflow: "ellipsis", + padding: "4px 10px", + borderRadius: "6px", + fontSize: "12px", + fontWeight: "500", + color: "#fff", + background: "rgba(30, 30, 30, 0.92)", + boxShadow: "0 2px 6px rgba(0, 0, 0, 0.25)", + pointerEvents: "none", + }); + document.body.appendChild(chip); + event.dataTransfer.setDragImage(chip, 12, 12); + // Remove once the browser has snapshotted it for the drag. + setTimeout(() => { + if (chip.parentNode) chip.parentNode.removeChild(chip); + }, 0); }} > diff --git a/web/src/app/chat/sessionSidebar/ChatSidebar.tsx b/web/src/app/chat/sessionSidebar/ChatSidebar.tsx index c145f7b4b8a..b6e094f6393 100644 --- a/web/src/app/chat/sessionSidebar/ChatSidebar.tsx +++ b/web/src/app/chat/sessionSidebar/ChatSidebar.tsx @@ -1,7 +1,13 @@ "use client"; -import { FiBook, FiEdit, FiFolderPlus, FiPlusSquare } from "react-icons/fi"; -import { useContext, useEffect, useRef, useState } from "react"; +import { + FiBook, + FiEdit, + FiFolderPlus, + FiLoader, + FiPlusSquare, +} from "react-icons/fi"; +import { useContext, useEffect, useRef, useState, useTransition } from "react"; import Link from "next/link"; import Image from "next/image"; import { useRouter } from "next/navigation"; @@ -38,6 +44,13 @@ export const ChatSidebar = ({ const router = useRouter(); const { popup, setPopup } = usePopup(); + // Navigating to "Manage Assistants" awaits the heavy fetchChatData + // bundle server-side. useTransition keeps the *current* page (with this + // sidebar) mounted and visible throughout — so it reads as an in-app + // transition, not a blank reload — while isPending drives an inline + // spinner on the button so the click clearly registers. + const [isNavigatingAssistants, startAssistantsNav] = useTransition(); + const currentChatId = currentChatSession?.id; // prevent the NextJS Router cache from causing the chat sidebar to not @@ -46,6 +59,15 @@ export const ChatSidebar = ({ router.refresh(); }, [currentChatId]); + // Local mirror of the server-provided folders so we can show a newly + // created folder instantly, without a full `router.refresh()` (which + // re-runs the entire heavy fetchChatData bundle just to add one empty + // folder). Re-synced whenever the server prop changes. + const [localFolders, setLocalFolders] = useState(folders); + useEffect(() => { + setLocalFolders(folders); + }, [folders]); + const combinedSettings = useContext(SettingsContext); if (!combinedSettings) { return null; @@ -118,8 +140,22 @@ export const ChatSidebar = ({ onClick={() => createFolder("New Folder") .then((folderId) => { - console.log(`Folder created with ID: ${folderId}`); - router.refresh(); + // Append the new (empty) folder to local state instead + // of router.refresh() — instant, no full refetch. The + // create POST itself is a single fast INSERT. + setLocalFolders((prev) => [ + ...prev, + { + folder_id: folderId, + folder_name: "New Folder", + display_priority: + prev.reduce( + (max, f) => Math.max(max, f.display_priority), + -1 + ) + 1, + chat_sessions: [], + }, + ]); }) .catch((error) => { console.error("Failed to create folder:", error); @@ -137,20 +173,30 @@ export const ChatSidebar = ({
- - +
+ + startAssistantsNav(() => router.push("/assistants/mine")) + } + >
- Manage Assistants + {isNavigatingAssistants ? ( + + ) : ( + + )} + {isNavigatingAssistants ? "Loading…" : "Manage Assistants"}
- +
diff --git a/web/src/components/admin/connectors/Popup.tsx b/web/src/components/admin/connectors/Popup.tsx index adfc0665c25..2bcc73b7d5b 100644 --- a/web/src/components/admin/connectors/Popup.tsx +++ b/web/src/components/admin/connectors/Popup.tsx @@ -3,15 +3,59 @@ import { useRef, useState } from "react"; export interface PopupSpec { message: string; type: "success" | "error"; + // Optional undo affordance. When present, the popup renders an "Undo" + // button next to the message; clicking it invokes onClick and dismisses + // the popup. The popup also stays on screen longer when undoable + // (default 4s → 6s) so the user has time to react. + undo?: { + label?: string; // defaults to "Undo" + onClick: () => Promise | void; + }; } -export const Popup: React.FC = ({ message, type }) => ( +export const Popup: React.FC< + PopupSpec & { onUndo?: () => void; onDismiss?: () => void } +> = ({ message, type, undo, onUndo, onDismiss }) => (
- {message} + {message} + {undo && ( + + )} + {onDismiss && ( + + )}
); @@ -27,13 +71,24 @@ export const usePopup = () => { } setPopup(popupSpec); - timeoutRef.current = setTimeout(() => { - setPopup(null); - }, 4000); + if (popupSpec) { + // Undoable popups stay on screen a bit longer — users need time to + // notice the affordance and click it. 6s vs 4s for plain toasts. + const ms = popupSpec.undo ? 6000 : 4000; + timeoutRef.current = setTimeout(() => { + setPopup(null); + }, ms); + } }; return { - popup: popup && , + popup: popup && ( + setPopupWithExpiration(null)} + onDismiss={() => setPopupWithExpiration(null)} + /> + ), setPopup: setPopupWithExpiration, }; }; diff --git a/web/src/lib/assistants/updateAssistantPreferences.ts b/web/src/lib/assistants/updateAssistantPreferences.ts index f902e561cf4..05b0b3e5302 100644 --- a/web/src/lib/assistants/updateAssistantPreferences.ts +++ b/web/src/lib/assistants/updateAssistantPreferences.ts @@ -1,3 +1,8 @@ +// PATCH the user's full `chosen_assistants` array. This single endpoint +// drives every preference mutation below — visibility, ordering, default +// — because the backend treats the array as both "which assistants are +// visible in the picker" (membership) AND "in what order" (positions), +// with position 0 = default. async function updateUserAssistantList( chosenAssistants: number[] ): Promise { @@ -60,3 +65,51 @@ export async function moveAssistantDown( } return false; } + +// --------------------------------------------------------------------------- +// Used by the new Manage Assistants UX +// --------------------------------------------------------------------------- + +/** Replace the user's full chosen_assistants list (drag-reorder, bulk ops). */ +export async function reorderAssistantList( + newOrder: number[] +): Promise { + return updateUserAssistantList(newOrder); +} + +/** + * Move `assistantId` to position 0 so it becomes the user's default. If the + * id isn't in the list it's prepended (i.e. set-as-default also unhides it). + */ +export async function setDefaultAssistant( + assistantId: number, + chosenAssistants: number[] +): Promise { + const withoutTarget = chosenAssistants.filter((id) => id !== assistantId); + return updateUserAssistantList([assistantId, ...withoutTarget]); +} + +/** Bulk: hide a set of assistant ids (remove them from chosen_assistants). */ +export async function bulkRemoveFromList( + assistantIds: number[], + chosenAssistants: number[] +): Promise { + const toRemove = new Set(assistantIds); + return updateUserAssistantList( + chosenAssistants.filter((id) => !toRemove.has(id)) + ); +} + +/** Bulk: add a set of assistant ids to the visible list (appended at the end). */ +export async function bulkAddToList( + assistantIds: number[], + chosenAssistants: number[] +): Promise { + const existing = new Set(chosenAssistants); + const toAppend = assistantIds.filter((id) => !existing.has(id)); + if (toAppend.length === 0) { + // Nothing to do, but report success so the UI can clear its selection. + return true; + } + return updateUserAssistantList([...chosenAssistants, ...toAppend]); +}