diff --git a/.gitignore b/.gitignore
index d8fe975a7bd..fd70ee1ce5c 100644
--- a/.gitignore
+++ b/.gitignore
@@ -29,3 +29,13 @@ requestdata.json
 # screenshots) written during local UI debugging. Not source.
 .playwright-mcp/
 model-picker-open.png
+
+# Live cluster dumps from `kubectl get -o yaml > …`. NEVER commit:
+# Darwin's ConfigMap currently contains real secrets in plaintext (Slack
+# tokens, GEN_AI client secret, Jira token, Opsgenie key, etc.) — those
+# values would be committed verbatim if temp/ ever got tracked. Real
+# secret values for the new k8s/ layout live in gitignored *.env files
+# under overlays/.
+darwin-kubernetes/temp/
+k8s/overlays/*/secrets.env
+k8s/overlays/*/*.secrets.env
diff --git a/AGENTS.md b/AGENTS.md
index cb86cb4cd8c..93fb7815714 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -64,7 +64,7 @@ moved on substantially. This table is the explicit map.
 | Indexing runtime | Celery `docfetching` + `docprocessing` workers | **Dask `LocalCluster`** in `update.py` (Celery only does maintenance) |
 | Number of Celery workers | Eight specialized workers (primary, light, heavy, kg_processing, monitoring, beat, etc.) | One worker + beat, spawned by `dev_run_background_jobs.py` |
 | Celery task definition | `@shared_task` under `background/celery/tasks/` | `@celery_app.task` in `background/celery/celery_app.py` |
-| Celery broker | Redis | SQLAlchemy/Postgres (`sqla+postgresql+psycopg2://…`) |
+| Celery broker | Redis | SQLAlchemy/Postgres by default; **optionally Redis** via `CELERY_BROKER_REDIS_ENABLED=true` (logical DB `CELERY_REDIS_DB_NUMBER`, default 1). Prod enables it to keep Celery's queue traffic off Postgres. Indexing is still Dask either way. |
 | Error handling | `raise OnyxError(OnyxErrorCode.X, …)` everywhere; no `HTTPException` | Plain `HTTPException(status_code=…, detail=…)` is the norm here. `OnyxError` doesn't exist. |
 | FastAPI return types | "Don't use `response_model=`, just type the function" | Both styles exist in this fork (the typed-return-annotation form is the majority — `response_model=` only appears once in `connector.py:560`). New endpoints should use the typed-return form. Don't strip the existing `response_model=` without checking serialization behavior. |
 | LLM call instrumentation | Every call must open a `LLMFlow`-tagged span via `traced_llm_call(...)` | No tracing system. `LLMFlow` doesn't exist. |
@@ -75,6 +75,7 @@ moved on substantially. This table is the explicit map.
 | Test buckets | `backend/tests/{unit,external_dependency_unit,integration}` + Playwright e2e | No comparable structure here. Most code lacks tests; add tests with the change if practical, otherwise note in PR. |
 | Plan template | The "Creating a Plan" section in their `CLAUDE.md` (Issues / Notes / Strategy / Tests) | Useful template; can be borrowed for non-trivial changes here too. |
 | Frontend stack | Next.js 15+, React 18+ | Next.js 14.2.x (App Router), React 18 |
+| K8s manifest path | `deployment/kubernetes/*` is what upstream documents | **`darwin-kubernetes/*` is the source of truth for the Darwin prod cluster.** `deployment/kubernetes/*` is upstream legacy / scratch — Darwin doesn't apply from there. New manifests for Darwin go in `darwin-kubernetes/`. See critical fact §9. |
 
 **Rule of thumb when reading upstream code or upstream guidance:** assume
 it doesn't apply unless you can verify the same construct exists here.
@@ -164,8 +165,9 @@ web/src/
 deployment/docker_compose/
   docker-compose.dev.yml           ← local stack (relational_db + index/Vespa +
                                      api_server + web_server + model_server +
-                                     background + nginx). Note: no Redis
-                                     here — Celery uses Postgres as its broker.
+                                     background + nginx). Celery brokers on
+                                     Postgres by default, or Redis when
+                                     CELERY_BROKER_REDIS_ENABLED=true.
 ```
 
 ---
@@ -340,6 +342,96 @@ auto-parse entirely with a raw `requests.get` against the
 `/drives/{drive_id}/items/{item_id}/content` endpoint using the bearer
 token. Don't reintroduce the lossy re-serialization.
 
+### 9. `darwin-kubernetes/` is the source of truth for the Darwin cluster
+
+The repo has two parallel k8s manifest trees and they are **not** kept
+in sync:
+
+| Path | What it is | When to touch |
+|---|---|---|
+| `darwin-kubernetes/*.yaml` | **The actual manifests applied to Darwin's AKS cluster (the `darwin` kube context).** Image registry is `sfbrdevhelmweacr.azurecr.io/...`, configmap is `env-configmap`, secrets is `danswer-secrets`, indexing pods have `indexcpu`-pool affinity + `darwin/indexing` toleration, env vars come from the Darwin configmap. | **Edit here for any prod-affecting change**, including new deployments. |
+| `deployment/kubernetes/*.yaml` | Upstream-style manifests inherited from Onyx / authored to match the OSS docker-compose. Generic image (`danswer/danswer-backend:latest`), no Azure-specific affinity / tolerations, no Darwin-specific configmap wiring. | Reference only — not deployed to Darwin. Useful for seeing the "upstream shape" of a new component before adapting it to `darwin-kubernetes/`. |
+
+When upstream (or a branch like `feature/backgroundscaling`) adds a
+new manifest in `deployment/kubernetes/`, the corresponding
+`darwin-kubernetes/` version must be hand-ported with:
+
+- Image: `sfbrdevhelmweacr.azurecr.io/danswer/danswer-backend:<tag>`
+- `envFrom: configMapRef name: env-configmap`
+- POSTGRES_USER / POSTGRES_PASSWORD via `secretKeyRef name: danswer-secrets`
+- REDIS_PASSWORD via `secretKeyRef name: danswer-secrets, optional: true`
+  (so unauth'd in-cluster Redis still works)
+- For indexing-related pods: `nodeAffinity` on `agentpool=indexcpu` +
+  `tolerations` for `darwin/indexing/NoSchedule` + `dynamic-pvc` /
+  `file-connector-pvc` volume mounts.
+
+A drop-in port that misses any of these will boot in Darwin but
+mis-route, miss secrets, or end up on the wrong node pool. The
+existing `darwin-kubernetes/background-deployment.yaml` and
+`api_server-service-deployment.yaml` are the canonical templates for
+the conventions.
+
+### 10. NEVER use `:latest` (or a floating tag) for Vespa — pin the exact version
+
+**This caused a full prod outage.** Vespa's config server refuses an
+auto-upgrade spanning more than ~30 releases (`VersionState
+.verifyVersionIntervalForUpgrade` → `Cannot upgrade from X to Y ...
+interval too large`). If a manifest change bumps the Vespa image to a
+much newer version, **every Vespa StatefulSet rolls and the config
+server crash-loops on bootstrap**, taking the whole cluster down
+(config tier → no quorum → cluster-wide `upstream connect error /
+connection refused` 503s on search AND the api-server's
+`ensure_indices_exist`).
+
+What triggered it: an image spec of bare `vespaengine/vespa` (which
+pulls `:latest` at pull time) was changed to an explicit
+`vespaengine/vespa:latest`, and on the next `kubectl apply` `:latest`
+had moved 30+ releases ahead of the running version.
+
+Rules:
+- **Pin Vespa to the exact version the cluster runs.** As of this
+  writing that is **`8.600.35`** — it's the on-disk format the content
+  nodes' index (1.6M+ docs, 100Gi PVCs) is written in. See the pinned
+  `images:` entry + comment in `k8s/overlays/{prod,local}/kustomization.yaml`.
+- **Upgrades are STEPWISE and deliberate** — at most ~30 releases per
+  hop, applied as an ordered operation, never a bare tag bump. Do NOT
+  set `VESPA_SKIP_UPGRADE_CHECK=true` to force a big jump on prod; it
+  risks the index format.
+- **Upgrade with `k8s/scripts/vespa-upgrade.sh <target> [ns]`, NOT a
+  kustomize apply.** Ordering across the 5 StatefulSets (configserver →
+  admin → content one-ordinal-at-a-time → feed → query, health-gated
+  between each) is impossible to express declaratively — a `kubectl
+  apply` rolls them all at once. The manifests support the script via
+  **per-role logical image names** (`vespa-configserver`, `vespa-admin`,
+  `vespa-content`, `vespa-feed`, `vespa-query` in `k8s/base/vespa/`) so
+  versions move independently, plus readiness probes on content/admin
+  with `publishNotReadyAddresses: true` on `vespa-internal` (peer
+  discovery must not be readiness-gated). Run `DRY_RUN=1` first. After a
+  successful upgrade, sync the per-role `newTag`s in the overlays.
+- **`k8s/scripts/guarded-apply.sh <overlay>` is the everyday-apply safety
+  net, not the upgrade tool.** The guard reads the live running Vespa
+  version, compares it to what the overlay would deploy, and refuses a
+  >30-minor upgrade / major change / floating tag (and warns on big
+  downgrades) before it can reach the cluster. It checks against *live*
+  (not the repo's previous pin) because config drifts out of git. But it
+  still rolls all roles at once — for an actual version change use
+  `vespa-upgrade.sh`.
+- This applies to any version-stateful StatefulSet, but Vespa is the
+  one that bites.
+
+**Recovery if it happens again** (data is safe — it lives on the
+content PVCs, untouched): set all 5 Vespa StatefulSets' image back to
+the running version (`kubectl set image statefulset/vespa-* ...`),
+delete the config-server pods to recreate on the correct version, wait
+for `:19071/state/v1/health` → 200, then restart the api-server so
+`ensure_indices_exist` redeploys the schema. (Clearing the
+config-server ZooKeeper state via `vespa-configserver-remove-state` is
+only needed if the ZK state is genuinely corrupt — the version
+mismatch alone does NOT require it.) Vespa nodes also have **no
+liveness probes by design** (an aggressive one kills slow-but-healthy
+nodes); readiness probes on the Service-backed nodes
+(configserver/query/feed) gate traffic during the slow bootstrap.
+
 ---
 
 ## Common workflows
diff --git a/MIGRATION.md b/MIGRATION.md
new file mode 100644
index 00000000000..a2e321437fe
--- /dev/null
+++ b/MIGRATION.md
@@ -0,0 +1,404 @@
+# Migration Guide
+
+This branch combines three independent slices of work:
+
+1. **Background indexing scaling** — Dask scheduler topology, split out
+   into separate k8s deployments
+2. **Redis caching + rate limiting** — read-through KV cache, per-user
+   request rate limiter, persona-list cache with write-through
+   invalidation
+3. **Assistants UX rework** — Manage Assistants + Assistant Gallery
+   pages, seed script for local UX testing
+
+> **TL;DR — everything new is default OFF.** Deploying this branch
+> as-is does **not** change runtime behaviour for the chat path or the
+> background workers. You opt in per feature by setting env vars.
+
+The only mandatory deltas at deploy time are:
+- Two new Python deps (`redis`, `bokeh`) installed automatically when
+  the backend image rebuilds against the new `requirements/default.txt`.
+- A few non-secret env vars added to the configmap (all defaulting to
+  empty/false — safe).
+
+Everything else (Redis pod, cache enablement, rate limits, new
+background topology) is opt-in.
+
+---
+
+## 1. What's in this branch — quick map
+
+### Backend / infra
+
+| Slice | Files | Default state |
+|---|---|---|
+| Redis foundation + KV cache | `backend/danswer/redis/redis_pool.py`, `dynamic_configs/store.py`, `factory.py`, `configs/app_configs.py` | `REDIS_KV_CACHE_ENABLED=""` → OFF |
+| Per-user request rate limiter | `backend/danswer/server/middleware/request_rate_limit.py`, wired on `/send-message` + `/stream-answer-with-quote` | `REQUEST_RATE_LIMIT_ENABLED=""` → OFF |
+| Persona list cache | `backend/danswer/db/persona_cache.py`, `db/persona.py` (write-through invalidation), `ee/danswer/db/user_group.py` | `PERSONA_CACHE_ENABLED=""` → OFF |
+| Dask scheduler topology | `backend/danswer/background/update.py`, new `deployment/kubernetes/*` manifests | Existing single `background` pod still runs; new manifests not applied unless you `kubectl apply` them |
+
+### Frontend / UX
+
+| Page | What changed | Risk |
+|---|---|---|
+| `/assistants/mine` (Manage) | Drag-and-drop reorder, default pin, visibility toggle, search, bulk actions, undo toast | Cosmetic — backend unchanged |
+| `/assistants/gallery` (Browse) | Sections, filter chips, sort, column picker (persists in localStorage), doc-set names | Cosmetic — backend unchanged |
+
+### Tooling
+
+| Item | Purpose |
+|---|---|
+| `backend/scripts/seed_assistants.py` | Local dev seed of ~50 assistants for UX testing |
+| `REDIS_CACHING_PLAN.md` | Design rationale; not load-bearing for deploy |
+
+---
+
+## 2. New Python dependencies
+
+`backend/requirements/default.txt` adds two pins:
+
+```
+redis==5.0.8                    # Redis client for the caching/rate-limit layer
+bokeh>=2.4.2,<3.0               # Dask scheduler dashboard at :8787 (bg-scaling commit)
+```
+
+**Action**: rebuild the backend image. If you `pip install -r` in a
+venv, also re-run that.
+
+---
+
+## 3. New env vars (env-configmap)
+
+All default to empty/false. Add to `darwin-kubernetes/env-configmap.yaml`
+(already done on this branch — verify and apply):
+
+```yaml
+# Redis connection (only used when one of the *_ENABLED flags below is true)
+REDIS_HOST: "redis"               # in-cluster service name
+REDIS_PORT: "6379"
+REDIS_DB_NUMBER: "0"
+REDIS_SSL: ""
+
+# Feature flags — default OFF
+REDIS_KV_CACHE_ENABLED: ""        # set to "true" to enable read-through KV cache
+REDIS_KV_CACHE_TTL_SECONDS: "86400"
+REQUEST_RATE_LIMIT_ENABLED: ""    # set to "true" to enable
+REQUEST_RATE_LIMIT_PER_MINUTE: "" # set a number (e.g. "20") to cap; 0/empty = no per-min cap
+REQUEST_RATE_LIMIT_PER_HOUR: ""   # set a number (e.g. "300") to cap; 0/empty = no per-hour cap
+```
+
+`PERSONA_CACHE_ENABLED` / `PERSONA_CACHE_TTL_SECONDS` live in
+`backend/danswer/configs/app_configs.py` defaults — you can override
+via env if you want to enable, but they're not in the configmap by
+default. Add a line if you plan to enable.
+
+---
+
+## 4. New secrets
+
+`darwin-kubernetes/secrets.yaml` gains one optional key:
+
+```yaml
+stringData:
+  redis_password: ""              # empty for unauth'd in-cluster Redis
+```
+
+The `api_server` and `background` deployments reference it with
+`optional: true`, so an absent or empty value is fine for the unauth'd
+in-cluster Redis StatefulSet.
+
+---
+
+## 5. New Kubernetes manifests
+
+### 5a. Redis StatefulSet (always needed if any Redis flag is on)
+
+```bash
+kubectl apply -f darwin-kubernetes/redis-statefulset.yaml
+```
+
+What it ships: a single-replica Redis 7.2-alpine, cache-only config
+(no AOF, no RDB snapshots, `maxmemory 256mb`, `allkeys-lru`), exposed
+as the `redis` ClusterIP Service on 6379. Pod restart drops the cache
+— that's intentional; the source of truth is Postgres, and counters
+self-heal as windows expire.
+
+### 5b. Dask scaling topology (opt-in, Darwin manifests ready)
+
+The bg-scaling commit added 5 upstream-style manifests under
+`deployment/kubernetes/` (legacy / reference tree; **not** what
+Darwin applies from — see AGENTS.md "Critical fact §9"). A later
+commit on this branch ported each one to Darwin conventions under
+`darwin-kubernetes/`, with the right image registry, configmap /
+secrets wiring, REDIS_PASSWORD (optional), indexcpu node affinity,
+darwin/indexing toleration, and PVCs:
+
+- `darwin-kubernetes/background-beat-deployment.yaml`
+- `darwin-kubernetes/background-celery-deployment.yaml`
+- `darwin-kubernetes/background-indexer-scheduler-deployment.yaml`
+- `darwin-kubernetes/dask-scheduler-service-deployment.yaml`
+- `darwin-kubernetes/dask-worker-deployment.yaml`
+
+Plus `deployment/docker_compose/docker-compose.dask-distributed.yml`
+(compose variant, for local reproduction of the remote-scheduler
+topology — not part of the prod deploy).
+
+Darwin currently runs `darwin-kubernetes/background-deployment.yaml`
+(a single combined beat+celery+indexer pod via supervisord). **The new
+manifests are NOT applied automatically** by `kubectl apply -f
+darwin-kubernetes/` because the combined deployment is still in place
+— you apply each new file explicitly when you want to switch.
+
+To switch Darwin to the split topology:
+
+```bash
+# 1. Apply the new five (order doesn't matter; they self-discover
+#    the scheduler Service once it's up).
+kubectl apply -f darwin-kubernetes/dask-scheduler-service-deployment.yaml
+kubectl apply -f darwin-kubernetes/dask-worker-deployment.yaml
+kubectl apply -f darwin-kubernetes/background-beat-deployment.yaml
+kubectl apply -f darwin-kubernetes/background-celery-deployment.yaml
+kubectl apply -f darwin-kubernetes/background-indexer-scheduler-deployment.yaml
+
+# 2. Wait for all five to be Ready.
+kubectl get pods -l 'app in (background-beat,background-celery,background-indexer-scheduler,dask-scheduler,dask-worker)'
+
+# 3. Once healthy + you've seen an indexing attempt dispatch through
+#    the new dask-scheduler-service (check the indexer-scheduler
+#    pod logs), scale the old combined deployment to 0:
+kubectl scale deploy/background-deployment --replicas=0
+
+# 4. If anything goes wrong, scale back up:
+kubectl scale deploy/background-deployment --replicas=1
+#    The split pods will keep running but no harm — only one set is
+#    actually doing the work (whichever has --replicas > 0).
+```
+
+Both deployments can coexist briefly during cutover, but **do NOT
+run both at non-zero replicas long-term** — two beat schedulers on
+the same Postgres broker fire every crontab task twice.
+
+---
+
+## 6. Deployment order
+
+Safe to roll out **in this order, defaults OFF**:
+
+1. Apply the configmap (no behaviour change — flags default OFF):
+   ```bash
+   kubectl apply -f darwin-kubernetes/env-configmap.yaml
+   ```
+2. Apply the (possibly updated) secrets:
+   ```bash
+   kubectl apply -f darwin-kubernetes/secrets.yaml
+   ```
+3. Apply the Redis StatefulSet:
+   ```bash
+   kubectl apply -f darwin-kubernetes/redis-statefulset.yaml
+   ```
+4. Rebuild + push the backend image (so it has `redis` and `bokeh` deps).
+5. Rebuild + push the web image (so the UX rewrites ship).
+6. Roll out the deployments:
+   ```bash
+   kubectl rollout restart deploy/api-server-deployment deploy/background-deployment deploy/web-server-deployment
+   ```
+7. Wait for health, then verify (§7).
+
+**At this point nothing has changed for users** — Redis is up but
+nothing is using it, and the api_server / background pods just have
+new dependencies + new env vars they're ignoring.
+
+---
+
+## 7. Verification checklist (after deploy, BEFORE flipping flags)
+
+- [ ] All pods healthy: `kubectl get pods -l app=api-server -l app=background -l app=redis`
+- [ ] Redis responds: `kubectl exec deploy/redis -- redis-cli PING` → `PONG`
+- [ ] api_server logs show no errors importing the new modules
+- [ ] `/api/health` returns 200
+- [ ] Open `/assistants/mine` — drag handles visible on visible rows, default-pin shows on first row, search input present
+- [ ] Open `/assistants/gallery` — sees Yours / Featured sections (and Shared if applicable), filter chips, sort dropdown, columns dropdown
+- [ ] Send a chat message — succeeds (proves rate limiter, even though OFF, didn't break the dependency wiring)
+- [ ] Background indexer still picks up new indexing attempts (bg-scaling change in `update.py`)
+
+---
+
+## 8. Enabling features (per environment, in any order)
+
+Each flag is independent. Flipping one doesn't require the others.
+
+### 8a. Redis KV cache (settings, tokens, invited users)
+
+```bash
+kubectl set env configmap/env-configmap REDIS_KV_CACHE_ENABLED=true
+kubectl rollout restart deploy/api-server-deployment
+```
+
+**Smoke test:** change an admin setting in pod A, verify it's visible
+on pod B within seconds (not TTL).
+
+### 8b. Per-user request rate limit
+
+Pick window values per your traffic shape. Recommended at "few
+hundred users" scale:
+
+```bash
+kubectl set env configmap/env-configmap \
+  REQUEST_RATE_LIMIT_ENABLED=true \
+  REQUEST_RATE_LIMIT_PER_MINUTE=20 \
+  REQUEST_RATE_LIMIT_PER_HOUR=300
+kubectl rollout restart deploy/api-server-deployment
+```
+
+**Smoke test:** send 21 chat messages in <60s — the 21st returns 429
+with `Retry-After` header.
+
+### 8c. Persona list cache
+
+```bash
+# add to the configmap:
+PERSONA_CACHE_ENABLED: "true"
+PERSONA_CACHE_TTL_SECONDS: "86400"
+
+kubectl apply -f darwin-kubernetes/env-configmap.yaml
+kubectl rollout restart deploy/api-server-deployment deploy/background-deployment
+```
+
+> Background pod also gets restarted because `ee/danswer/db/user_group.py`
+> mutations from there must bust the cache.
+
+**Smoke test:** load `/assistants/mine`, edit one assistant's name in
+the admin UI, refresh — the name updates immediately (not on TTL).
+
+### 8d. Bg-scaling Dask topology
+
+Not enabled by env flag — it's a deployment-shape change. Out of
+scope for this PR's flip-a-switch flow; if/when adopted, see §5b.
+
+---
+
+## 9. Rollback
+
+Each feature flag flips off independently. The two emergency knobs:
+
+- **Disable a feature flag** (no restart needed for new requests after
+  flag propagates):
+  ```bash
+  kubectl set env configmap/env-configmap REDIS_KV_CACHE_ENABLED=""
+  kubectl rollout restart deploy/api-server-deployment
+  ```
+- **Redis pod dies entirely** — every Redis call in this codebase is
+  wrapped fail-open. The app falls back to direct Postgres reads (cache),
+  permissive (rate limit), or no invalidation (persona cache;
+  worst-case 24h staleness via TTL). **No outage.** Logs will be noisy
+  with `Redis GET/SET/DEL failed: …` warnings — that's the signal that
+  Redis needs attention.
+
+To roll back the **code** entirely: revert the merge commit, redeploy.
+All features default OFF means even without revert, setting all
+`*_ENABLED=""` returns the app to pre-PR behaviour.
+
+---
+
+## 10. Known footguns
+
+### 10a. ~~Bg-scaling k8s manifests don't have `REDIS_PASSWORD` wired~~ — RESOLVED
+
+**Closed for the Darwin path.** The 5 ported manifests under
+`darwin-kubernetes/` (added in `19335e31`) all wire `REDIS_PASSWORD`
+via `secretKeyRef` with `optional: true`, matching the existing
+`darwin-kubernetes/background-deployment.yaml` pattern. So persona-
+cache invalidation from any future Celery / indexer-scheduler /
+dask-worker task path will work correctly once you switch to the
+split topology.
+
+The upstream `deployment/kubernetes/*` files are still missing
+`REDIS_PASSWORD` env wiring, but **Darwin doesn't apply from that
+tree** — it's reference-only (see AGENTS.md "Critical fact §9").
+Leave them alone unless/until you adopt the upstream-style
+deployment shape outside Darwin.
+
+### 10b. `backend/scripts/seed_assistants.py` bypasses persona-cache invalidation
+
+The seed script writes rows via raw `session.add(Persona(...))` rather
+than going through `upsert_persona()`, so `invalidate_personas_all()`
+never fires.
+
+- **Today's impact: none** if `PERSONA_CACHE_ENABLED` is the default
+  OFF.
+- **If you seed with the cache enabled**, `/persona` will keep
+  returning the pre-seed list until either a real mutation flows
+  through the proper code path or the 24h TTL kicks in. Manual fix:
+  ```bash
+  kubectl exec deploy/redis -- redis-cli DEL danswer:personas:all:not_deleted
+  ```
+- **One-line code fix** if this becomes a recurring problem: import
+  `invalidate_personas_all` and call it at the end of `main()` in
+  `seed_assistants.py`.
+
+### 10c. `update.py` indexing scheduler change needs human eyes
+
+CLAUDE.md flags `update.py` / scheduler changes for manual confirmation
+(past breakage was silent — worker died with no logs). The bg-scaling
+commit modifies the Dask scheduler / submission path; verify locally
+via `python scripts/dev_run_background_jobs.py` and confirm the worker
+boots cleanly + dispatches an indexing attempt without errors.
+
+### 10d. Frontend's `chosen_assistants` array can hold stale ids after seed wipe
+
+If you run `python -m scripts.seed_assistants --clear` after seeding,
+deleted persona ids may remain in your `User.chosen_assistants` array.
+This is harmless — `get_personas` filters out non-existent ids — but
+will be cleaned up by the next preference write (any Manage page
+reorder / hide / show action).
+
+---
+
+## 11. Manual tests recommended before merge
+
+These need eyes — automated coverage doesn't catch them:
+
+- [ ] **Background worker boots cleanly** on the rebased branch
+  (CLAUDE.md gate). `python scripts/dev_run_background_jobs.py`,
+  confirm clean startup and an indexing attempt dispatches.
+- [ ] **Seed 50 assistants locally**, open `/assistants/mine` and
+  `/assistants/gallery`, exercise: drag-reorder, set default, hide via
+  toggle, click a hidden row (toggle should pulse), search, bulk
+  select, undo from a toast, switch column count in gallery, refresh
+  page → column choice persisted.
+- [ ] **With KV cache enabled**, edit a setting on pod A while pod B
+  is serving — second pod sees the new value without waiting for TTL.
+- [ ] **With rate limit enabled**, exceed the per-minute cap; verify
+  429 with `Retry-After` header.
+- [ ] **With persona cache enabled**, edit an assistant via admin UI;
+  `/persona` reflects the edit immediately.
+
+---
+
+## 12. Branch contents at-a-glance
+
+17 commits on top of `feature/darwin` (which now includes the merged
+`rajiv/add-claude` work — PR #45):
+
+```
+[BG-scale] darwin-kubernetes: port split-background manifests + lock convention in AGENTS.md
+[BG-scale] Scale indexing via remote Dask scheduler topology
+
+[Docs]     docs: add MIGRATION.md covering Redis / bg-scaling / UX
+
+[UX]       Gallery: column picker as dropdown to match Sort
+[UX]       Gallery: user-controllable column count (segmented control, persists)
+[UX]       Show document-set names on assistant cards (was: count only)
+[UX]       Parameterize gallery grid column count (default 3)
+[UX]       Remove tools chip from Manage Assistants page
+[UX]       Assistants UX polish: toggle highlight + gallery declutter
+[UX]       Add backend/scripts/seed_assistants.py for local UX testing
+[UX]       Assistant Gallery page UX overhaul
+[UX]       Manage Assistants page UX overhaul
+
+[Redis]    Persona list cache with explicit write-through invalidation
+[Redis]    P2: per-user request rate limiter on chat/query endpoints
+[Redis]    P1: Redis foundation + read-through KV cache
+[Redis]    docs: add Redis caching & scaling plan
+```
+
+Total: **51 files changed, +6372 / −499**. 63 unit tests pass.
diff --git a/REDIS_CACHING_PLAN.md b/REDIS_CACHING_PLAN.md
new file mode 100644
index 00000000000..754a3540222
--- /dev/null
+++ b/REDIS_CACHING_PLAN.md
@@ -0,0 +1,243 @@
+# Redis Caching & Scaling Plan
+
+**Goal:** expose the chat interface to a few hundred users. Evaluate and
+introduce Redis-based caching where it makes sense, alongside the scaling
+work that actually gates that user count.
+
+**Status:** plan only — no code yet. Follows the fork's plan template
+(Issues / Important Notes / Strategy / Tests). Treat each phase as an
+independently shippable PR.
+
+---
+
+## Context & key findings
+
+- **This fork has zero Redis today.** The only references in the repo are
+  comments in `db/index_attempt.py` and `db/retention.py` explaining how
+  the fork *avoids* Redis (Postgres advisory locks instead of fences,
+  Postgres as the Celery broker). Adding Redis is **net-new infrastructure**,
+  which AGENTS.md flags as a substantial dependency, not a drive-by.
+- **The real near-term scaling ceiling is the DB connection pool, not the
+  DB's query throughput.** `db/engine.py:72` sets `pool_size=40,
+  max_overflow=10` → 50 connections per api_server process. `get_session`
+  (`db/engine.py:94`) yields one session held for the **whole request**, and
+  `/send-message` (`server/query_and_chat/chat_backend.py:276`,
+  `handle_new_chat_message`) returns a `StreamingResponse` — so a connection
+  is pinned for the entire LLM stream (10–60s). At a few hundred users this
+  exhausts the pool before query volume ever stresses Postgres. **No cache
+  fixes this.**
+- **A rate limiter already exists but is the wrong kind.**
+  `server/query_and_chat/token_limit.py::check_token_rate_limits` enforces a
+  *token-budget* limit (global, DB-backed, EE-overridable). It is not a
+  *request-rate* limiter and `any_rate_limit_exists()` is gated by a
+  per-process `@lru_cache` (`token_limit.py:122`) that won't reflect changes
+  across replicas.
+- **The highest-leverage cache seam already exists:** the
+  `DynamicConfigStore` abstraction (`dynamic_configs/interface.py`,
+  `store.py`, `factory.py`) is the fork's equivalent of upstream's
+  `PgRedisKVStore`. Wrapping it gives transparent, write-through-invalidated
+  caching for everything routed through it with zero call-site changes.
+
+### Non-goals (explicitly out of scope)
+
+- **Do not** move the Celery broker to Redis (stays on Postgres — deliberate
+  divergence).
+- **Do not** replace indexing advisory-lock fences with Redis fences.
+- **Do not** cache chat sessions / messages (too mutable; correctness risk).
+- **Do not** cache LLM/embedding *responses* (semantic/correctness risk).
+- **Do not** add tenant key-prefixing — this fork is single-tenant.
+
+---
+
+## Phase summary
+
+| Phase | What | Caching? | Gates the user count? |
+|---|---|---|---|
+| **P0** | Connection-pool / session-holding fix + multi-replica | No | **Yes — do first** |
+| **P1** | Redis foundation + `DynamicConfigStore` read-through cache | Yes (flagship) | Enables the rest |
+| **P2** | Redis-backed per-user request rate limiting | No (protection) | Yes, for cost/abuse |
+| **P3** | Per-chat-turn config caches (LLM provider, embedding settings) | Yes | Measured add-on |
+| **Opt** | Document sets, connector OAuth/API caches, Redis sessions | Yes | Situational |
+
+---
+
+## P0 — Connection pool & session lifetime (prerequisite, not caching)
+
+### Issues to address
+At a few hundred users, concurrent streaming chats pin all 50 connections
+per process; unrelated (even cached) requests then queue. This is the first
+thing that breaks.
+
+### Important notes
+- `handle_new_chat_message` holds `Depends(get_session)` for the full
+  `StreamingResponse`. The fix is to scope DB work to *before* the stream
+  starts (load everything needed, commit the user message), then run the
+  stream without a pinned pooled connection, opening short-lived sessions
+  only for the final persistence write.
+- This touches the core chat path — **per CLAUDE.md, confirm with the human
+  and verify the worker/stream boots cleanly** before/after. High blast
+  radius; ship as its own PR with manual load verification.
+- Independently: run **multiple api_server replicas** (k8s) behind nginx,
+  and size `pool_size` against Postgres `max_connections` ÷ replica count.
+
+### Implementation strategy
+1. Audit `handle_new_chat_message` and the `process_message` generator for
+   what truly needs the session during streaming vs. before it.
+2. Introduce a pattern where the streaming generator uses
+   `get_session_context_manager()` for short writes rather than the
+   request-scoped `Depends(get_session)`.
+3. Bump replica count in
+   `darwin-kubernetes/api_server-service-deployment.yaml`; re-tune pool.
+
+### Tests
+- Load test: N concurrent streaming chats (N > pool size) — confirm
+  non-chat endpoints (settings, session list) stay responsive.
+- Verify no `QueuePool limit ... connection timed out` under load.
+
+---
+
+## P1 — Redis foundation + DynamicConfigStore cache (flagship)
+
+### Issues to address
+Cache the highest-frequency, fires-on-every-page reads (settings, tokens,
+invited users) at one central, low-risk seam, with correct cross-replica
+invalidation.
+
+### Important notes
+- Mirror upstream's `PgRedisKVStore` *shape* but fit this fork's interface:
+  `store(key, val, encrypt)` / `load(key)` / `delete(key)` raising
+  `ConfigNotFoundError` (`dynamic_configs/interface.py`).
+- Write-through invalidation is **free** here — the same `store()`/`delete()`
+  that writes Postgres updates/clears Redis, so all replicas see changes.
+- Single-tenant → plain key prefix (e.g. `danswer_kv:`), no tenant wrapper.
+- Redis must be **fail-open**: if Redis is down, fall back to Postgres so an
+  outage degrades latency, not availability.
+
+### Implementation strategy
+1. **Dependency:** add `redis==<pin>` to `backend/requirements/default.txt`.
+2. **Config:** add `REDIS_HOST/REDIS_PORT/REDIS_PASSWORD/REDIS_DB_NUMBER`
+   to `configs/app_configs.py` via the existing `os.environ.get` pattern;
+   add a `REDIS_KEY_VALUE_CACHE_TTL` (default ~1 day, mirroring upstream).
+3. **Client module:** new `backend/danswer/redis/redis_pool.py` — a
+   `ConnectionPool` singleton + `get_redis_client()`. (Upstream's
+   `redis_pool.py` is the template, minus IAM/tenant code.)
+4. **Cache layer:** add `CachedDynamicConfigStore` (decorator/wrapper around
+   `PostgresBackedDynamicConfigStore`) in `dynamic_configs/store.py`, or add
+   Redis read-through directly to the PG store. `load()` checks Redis →
+   misses fall to PG and repopulate; `store()`/`delete()` write PG then
+   set/clear Redis. Route it via `dynamic_configs/factory.py`
+   (`get_dynamic_config_store`) behind a `DYNAMIC_CONFIG_STORE` value so it's
+   toggleable.
+5. **Deployment:** Redis statefulset + service in `darwin-kubernetes/`;
+   redis service in `deployment/docker_compose/docker-compose.dev.yml`;
+   wire env in `env-configmap.yaml` + password in `secrets.yaml`.
+
+### What this transparently caches
+Everything through `get_dynamic_config_store()`: app settings
+(`server/settings/store.py`, key `danswer_settings`), Slack bot tokens,
+invited users, telemetry id, Gmail/GDrive connector-auth blobs.
+
+### Tests
+- Unit: `load` hits Redis on 2nd call (mock PG, assert one PG query);
+  `store`/`delete` invalidate; Redis-down path falls back to PG (fail-open).
+- Integration: change settings via the admin endpoint → second replica (or
+  fresh client) reads the new value without a TTL wait.
+
+---
+
+## P2 — Redis-backed per-user request rate limiting (protection)
+
+### Issues to address
+A few hundred users on chat = real risk of runaway LLM **cost** and hitting
+the **provider's** rate limits. Need per-user request-rate limiting that is
+correct across replicas (in-memory counters let through ~N× at N pods).
+
+### Important notes
+- This **complements**, does not replace, the existing token-budget limiter
+  in `token_limit.py`. Keep that; add request-rate limiting on top.
+- Also fixes the latent multi-replica issue: the per-process `@lru_cache` on
+  `any_rate_limit_exists()` (`token_limit.py:122`) can be made Redis-backed
+  or given a short TTL so all pods agree.
+- No rate-limit middleware exists today (only `latency_logging.py`) — this is
+  net-new. fastapi 0.109.2 is compatible with `fastapi-limiter` or a small
+  custom `incr`+`expire` limiter.
+
+### Implementation strategy
+1. Add a Redis counter limiter: key `ratelimit:msg:{user_id}:{bucket}`,
+   atomic `incr` + `expire(window, NX)` (or a small Lua script for
+   multi-tier limits). Reuse the P1 `redis_pool` client.
+2. Apply at the chat entrypoint (`/send-message`) as a dependency, before any
+   LLM work; raise `HTTPException(429)` (this fork uses `HTTPException`, not
+   `OnyxError`).
+3. Make limits env-configurable in `configs/app_configs.py` (per-user
+   per-minute / per-hour). Default off via env so it's opt-in per environment.
+
+### Tests
+- Unit: counter increments/expires; exceeds → 429.
+- Integration: two clients simulating two replicas share the same limit
+  (single Redis), confirm the aggregate cap holds.
+
+---
+
+## P3 — Per-chat-turn config caches (measured add-on)
+
+### Issues to address
+Fired on every chat turn × hundreds of users → meaningful aggregate even
+though each query is cheap.
+
+### Important notes
+- **Cache the serialized Pydantic snapshot, not the ORM object** — these
+  return SQLAlchemy models with lazy relationships; caching the ORM instance
+  risks `DetachedInstanceError` / stale relationship reads.
+- Invalidation is **not** free here (unlike P1) — must add explicit
+  bust/refresh calls inside the relevant `db/` mutation functions. This is
+  the added surface area; only do it after P0/P1 and after measuring.
+
+### Implementation strategy
+- **Default LLM provider:** cache `db/llm.py::fetch_default_provider` /
+  `fetch_existing_llm_providers`; invalidate in the provider create/update/
+  delete paths in `db/llm.py` and the admin endpoint.
+- **Current embedding/search settings:** cache
+  `db/embedding_model.py::get_current_db_embedding_model`; invalidate on
+  index-swap (when a new `EmbeddingModel` becomes `PRESENT`).
+- Use short TTLs as a backstop even with explicit invalidation.
+
+### Tests
+- Unit: cached fetch returns snapshot; mutation path clears it.
+- Integration: change default provider → chat picks it up without restart.
+
+---
+
+## Optional / deferred
+
+| Item | Where | Note |
+|---|---|---|
+| **Document sets** | `db/document_set.py::fetch_document_sets` | Global key in this fork (base version ignores `user_id`); write-through on the ~5 mutation fns. Admin-page frequency, modest win. |
+| **Connector OAuth / external-API caches** | per-connector (cf. upstream Confluence/Slack) | Only if those connectors are active; cuts external rate-limit pressure. Short TTL. |
+| **Redis auth sessions** | `auth/users.py` (fastapi-users RedisStrategy) | Offloads per-request auth from Postgres; bigger change + security/invalidation care. Defer until auth DB load shows up. `SESSION_EXPIRE_TIME_SECONDS` already exists. |
+| **Personas list** | — | **Skip backend cache** (per-user + group-membership invalidation trap). Use frontend (SWR) caching instead. |
+
+---
+
+## Cross-cutting
+
+### New files / touched files
+- New: `backend/danswer/redis/redis_pool.py`, Redis k8s manifests.
+- Touched: `requirements/default.txt`, `configs/app_configs.py`,
+  `dynamic_configs/store.py`, `dynamic_configs/factory.py`,
+  `docker-compose.dev.yml`, `darwin-kubernetes/{env-configmap,secrets}.yaml`,
+  `api_server-service-deployment.yaml` (replicas). P2/P3 touch
+  `chat_backend.py`, `token_limit.py`, `db/llm.py`, `db/embedding_model.py`.
+
+### Restart / bounce list (per CLAUDE.md)
+- New env vars / requirements → rebuild + restart api_server (`dapi`),
+  background jobs (`dbe`), Slack listener (`dsl`).
+- `redis` dependency add → `pip install` in the venv before running.
+
+### Open questions for the human
+1. **P0 session-refactor sign-off** — high blast radius on the chat path;
+   confirm approach + manual load verification before merge.
+2. Redis deployment shape in `darwin-kubernetes` — single statefulset vs.
+   managed Redis? Persistence needed (cache-only ⇒ probably not)?
+3. Default rate-limit values for P2 (per-user/min, per-user/hour).
+4. Sequencing: is P0 acceptable to do in parallel with P1, or strictly first?
diff --git a/backend/.gitignore b/backend/.gitignore
index 6b3219cc30e..f166ca65e72 100644
--- a/backend/.gitignore
+++ b/backend/.gitignore
@@ -9,3 +9,9 @@ api_keys.py
 vespa-app.zip
 dynamic_config_storage/
 celerybeat-schedule*
+
+# Pywikibot drops these in cwd when the mediawiki connector test runs.
+# Local debugging artifacts, not source.
+apicache/
+throttle.ctrl
+.pytest_cache/
diff --git a/backend/alembic/versions/b2c3d4e5f6a7_analytics_user_daily_stats.py b/backend/alembic/versions/b2c3d4e5f6a7_analytics_user_daily_stats.py
new file mode 100644
index 00000000000..bb86201ef3c
--- /dev/null
+++ b/backend/alembic/versions/b2c3d4e5f6a7_analytics_user_daily_stats.py
@@ -0,0 +1,58 @@
+"""Analytics per-user daily stats table (durable leaderboard source)
+
+Durable per-user-per-day chat activity counts so the "top users by
+activity" leaderboard survives chat retention and spans full history.
+Upserted daily by the rollup BEFORE the retention sweep (see
+db/models.py::AnalyticsUserDailyStats and db/analytics_rollup.py).
+
+Composite PK (user_id, date). No FK to `user` — email is joined live, so
+a deleted user drops off the leaderboard without erasing history.
+
+Revision ID: b2c3d4e5f6a7
+Revises: e7f8a9b0c1d2
+Create Date: 2026-05-31
+
+"""
+from alembic import op
+import sqlalchemy as sa
+import fastapi_users_db_sqlalchemy
+
+
+# revision identifiers, used by Alembic.
+revision = "b2c3d4e5f6a7"
+down_revision = "e7f8a9b0c1d2"
+branch_labels: None = None
+depends_on: None = None
+
+
+def upgrade() -> None:
+    op.create_table(
+        "analytics_user_daily_stats",
+        sa.Column(
+            "user_id", fastapi_users_db_sqlalchemy.generics.GUID(), nullable=False
+        ),
+        sa.Column("date", sa.Date(), nullable=False),
+        sa.Column("message_count", sa.Integer(), server_default="0", nullable=False),
+        sa.Column("like_count", sa.Integer(), server_default="0", nullable=False),
+        sa.Column("dislike_count", sa.Integer(), server_default="0", nullable=False),
+        sa.Column(
+            "rolled_up_at",
+            sa.DateTime(timezone=True),
+            server_default=sa.text("now()"),
+            nullable=False,
+        ),
+        sa.PrimaryKeyConstraint("user_id", "date"),
+    )
+    op.create_index(
+        "ix_analytics_user_daily_stats_date",
+        "analytics_user_daily_stats",
+        ["date"],
+    )
+
+
+def downgrade() -> None:
+    op.drop_index(
+        "ix_analytics_user_daily_stats_date",
+        table_name="analytics_user_daily_stats",
+    )
+    op.drop_table("analytics_user_daily_stats")
diff --git a/backend/alembic/versions/c3d4e5f6a7b8_analytics_persona_daily_stats.py b/backend/alembic/versions/c3d4e5f6a7b8_analytics_persona_daily_stats.py
new file mode 100644
index 00000000000..ab3cf393b61
--- /dev/null
+++ b/backend/alembic/versions/c3d4e5f6a7b8_analytics_persona_daily_stats.py
@@ -0,0 +1,57 @@
+"""Analytics per-assistant daily stats table
+
+Durable per-assistant-per-day chat activity counts so the "most-used
+assistants" leaderboard (and an approximate datasets-in-use view derived
+via persona__document_set) survives chat retention and spans full history.
+Upserted daily by the rollup BEFORE the retention sweep (see
+db/models.py::AnalyticsPersonaDailyStats and db/analytics_rollup.py).
+
+Composite PK (persona_id, date). No FK to `persona` — name joined live, so
+a deleted assistant drops off without erasing history.
+
+Revision ID: c3d4e5f6a7b8
+Revises: b2c3d4e5f6a7
+Create Date: 2026-05-31
+
+"""
+from alembic import op
+import sqlalchemy as sa
+
+
+# revision identifiers, used by Alembic.
+revision = "c3d4e5f6a7b8"
+down_revision = "b2c3d4e5f6a7"
+branch_labels: None = None
+depends_on: None = None
+
+
+def upgrade() -> None:
+    op.create_table(
+        "analytics_persona_daily_stats",
+        sa.Column("persona_id", sa.Integer(), nullable=False),
+        sa.Column("date", sa.Date(), nullable=False),
+        sa.Column("session_count", sa.Integer(), server_default="0", nullable=False),
+        sa.Column("message_count", sa.Integer(), server_default="0", nullable=False),
+        sa.Column("like_count", sa.Integer(), server_default="0", nullable=False),
+        sa.Column("dislike_count", sa.Integer(), server_default="0", nullable=False),
+        sa.Column(
+            "rolled_up_at",
+            sa.DateTime(timezone=True),
+            server_default=sa.text("now()"),
+            nullable=False,
+        ),
+        sa.PrimaryKeyConstraint("persona_id", "date"),
+    )
+    op.create_index(
+        "ix_analytics_persona_daily_stats_date",
+        "analytics_persona_daily_stats",
+        ["date"],
+    )
+
+
+def downgrade() -> None:
+    op.drop_index(
+        "ix_analytics_persona_daily_stats_date",
+        table_name="analytics_persona_daily_stats",
+    )
+    op.drop_table("analytics_persona_daily_stats")
diff --git a/backend/alembic/versions/d4e5f6a7b8c9_file_store_object_key.py b/backend/alembic/versions/d4e5f6a7b8c9_file_store_object_key.py
new file mode 100644
index 00000000000..f2c4e5c88d8
--- /dev/null
+++ b/backend/alembic/versions/d4e5f6a7b8c9_file_store_object_key.py
@@ -0,0 +1,34 @@
+"""file_store: add object_key, make lobj_oid nullable (object-store backend)
+
+Lets the file_store table locate bytes either in a Postgres large object
+(lobj_oid) OR an object-storage blob (object_key). Both nullable so
+PostgresBackedFileStore and AzureBlobFileStore coexist during migration.
+See db/models.py::PGFileStore and file_store/file_store.py.
+
+Revision ID: d4e5f6a7b8c9
+Revises: c3d4e5f6a7b8
+Create Date: 2026-06-01
+
+"""
+from alembic import op
+import sqlalchemy as sa
+
+
+# revision identifiers, used by Alembic.
+revision = "d4e5f6a7b8c9"
+down_revision = "c3d4e5f6a7b8"
+branch_labels: None = None
+depends_on: None = None
+
+
+def upgrade() -> None:
+    op.add_column("file_store", sa.Column("object_key", sa.String(), nullable=True))
+    op.alter_column("file_store", "lobj_oid", existing_type=sa.Integer(), nullable=True)
+
+
+def downgrade() -> None:
+    # NOTE: only safe if no rows rely on object_key (all bytes back in lobjs).
+    op.alter_column(
+        "file_store", "lobj_oid", existing_type=sa.Integer(), nullable=False
+    )
+    op.drop_column("file_store", "object_key")
diff --git a/backend/alembic/versions/e5f6a7b8c9d0_document_indexed_content_hash.py b/backend/alembic/versions/e5f6a7b8c9d0_document_indexed_content_hash.py
new file mode 100644
index 00000000000..bd6c0d21a61
--- /dev/null
+++ b/backend/alembic/versions/e5f6a7b8c9d0_document_indexed_content_hash.py
@@ -0,0 +1,35 @@
+"""document: add indexed_content_hash (skip re-index of unchanged content)
+
+Stores the sha256 of a document's indexed content as of the last successful
+Vespa write. The indexing pipeline skips the expensive Vespa clear-and-rewrite
+when a connector re-emits a document whose content is unchanged even though its
+doc_updated_at advanced (e.g. Salesforce LastModifiedDate churn re-pulling the
+whole corpus every poll). Nullable: existing rows fall back to the
+doc_updated_at skip until they're next indexed. See
+db/models.py::Document and indexing/indexing_pipeline.py::get_doc_ids_to_update.
+
+Revision ID: e5f6a7b8c9d0
+Revises: d4e5f6a7b8c9
+Create Date: 2026-06-03
+
+"""
+from alembic import op
+import sqlalchemy as sa
+
+
+# revision identifiers, used by Alembic.
+revision = "e5f6a7b8c9d0"
+down_revision = "d4e5f6a7b8c9"
+branch_labels: None = None
+depends_on: None = None
+
+
+def upgrade() -> None:
+    op.add_column(
+        "document",
+        sa.Column("indexed_content_hash", sa.String(), nullable=True),
+    )
+
+
+def downgrade() -> None:
+    op.drop_column("document", "indexed_content_hash")
diff --git a/backend/alembic/versions/e7f8a9b0c1d2_analytics_user_first_seen.py b/backend/alembic/versions/e7f8a9b0c1d2_analytics_user_first_seen.py
new file mode 100644
index 00000000000..52dc15a6743
--- /dev/null
+++ b/backend/alembic/versions/e7f8a9b0c1d2_analytics_user_first_seen.py
@@ -0,0 +1,56 @@
+"""Analytics user-first-seen table (chat adoption curve)
+
+Durable per-user "first date this user used chat" aggregate so the
+adoption curve on the admin Analytics page survives chat retention
+deletes. Populated incrementally by the rollup BEFORE the retention sweep
+(see db/models.py::AnalyticsUserFirstSeen and db/analytics_rollup.py).
+
+No FK to `user` on purpose — deleting a user must not erase the historical
+fact that they once adopted chat, nor cascade into this aggregate (mirrors
+analytics_daily_rollup).
+
+Revision ID: e7f8a9b0c1d2
+Revises: c8a4e2f9d1b3
+Create Date: 2026-05-31
+
+"""
+from alembic import op
+import sqlalchemy as sa
+import fastapi_users_db_sqlalchemy
+
+
+# revision identifiers, used by Alembic.
+revision = "e7f8a9b0c1d2"
+down_revision = "c8a4e2f9d1b3"
+branch_labels: None = None
+depends_on: None = None
+
+
+def upgrade() -> None:
+    op.create_table(
+        "analytics_user_first_seen",
+        sa.Column(
+            "user_id", fastapi_users_db_sqlalchemy.generics.GUID(), nullable=False
+        ),
+        sa.Column("first_seen_date", sa.Date(), nullable=False),
+        sa.Column(
+            "created_at",
+            sa.DateTime(timezone=True),
+            server_default=sa.text("now()"),
+            nullable=False,
+        ),
+        sa.PrimaryKeyConstraint("user_id"),
+    )
+    op.create_index(
+        "ix_analytics_user_first_seen_first_seen_date",
+        "analytics_user_first_seen",
+        ["first_seen_date"],
+    )
+
+
+def downgrade() -> None:
+    op.drop_index(
+        "ix_analytics_user_first_seen_first_seen_date",
+        table_name="analytics_user_first_seen",
+    )
+    op.drop_table("analytics_user_first_seen")
diff --git a/backend/danswer/background/celery/celery_app.py b/backend/danswer/background/celery/celery_app.py
index cce46a9b9f2..ed21fc40576 100644
--- a/backend/danswer/background/celery/celery_app.py
+++ b/backend/danswer/background/celery/celery_app.py
@@ -14,7 +14,13 @@
 from danswer.background.task_utils import name_cc_cleanup_task
 from danswer.background.task_utils import name_cc_prune_task
 from danswer.background.task_utils import name_document_set_sync_task
+from danswer.configs.app_configs import CELERY_BROKER_REDIS_ENABLED
+from danswer.configs.app_configs import CELERY_REDIS_DB_NUMBER
 from danswer.configs.app_configs import JOB_TIMEOUT
+from danswer.configs.app_configs import REDIS_HOST
+from danswer.configs.app_configs import REDIS_PASSWORD
+from danswer.configs.app_configs import REDIS_PORT
+from danswer.configs.app_configs import REDIS_SSL
 from danswer.connectors.factory import instantiate_connector
 from danswer.connectors.models import InputType
 from danswer.db.connector_credential_pair import get_connector_credential_pair
@@ -22,7 +28,7 @@
 from danswer.db.connector_credential_pair import release_deletion_lock
 from danswer.db.connector_credential_pair import try_acquire_deletion_lock
 from danswer.db.deletion_attempt import check_deletion_attempt_is_allowed
-from danswer.db.document import get_documents_for_connector_credential_pair
+from danswer.db.document import get_document_ids_for_connector_credential_pair
 from danswer.db.document import prepare_to_modify_documents
 from danswer.db.document_set import delete_document_set
 from danswer.db.document_set import fetch_document_sets
@@ -41,10 +47,31 @@
 
 logger = setup_logger()
 
-connection_string = build_connection_string(db_api=SYNC_DB_API)
-celery_broker_url = f"sqla+{connection_string}"
-celery_backend_url = f"db+{connection_string}"
+if CELERY_BROKER_REDIS_ENABLED:
+    # Redis broker + result backend. Removes Celery's queue traffic from
+    # Postgres (the default sqla+/db+ transport polls and writes the DB).
+    # A dedicated logical DB (CELERY_REDIS_DB_NUMBER) keeps Celery's keys
+    # off the cache/rate-limit DB. Task status is tracked in our own
+    # task_queue_jobs table, not this backend, so it's safe to relocate.
+    _redis_scheme = "rediss" if REDIS_SSL else "redis"
+    _redis_auth = f":{REDIS_PASSWORD}@" if REDIS_PASSWORD else ""
+    _redis_url = (
+        f"{_redis_scheme}://{_redis_auth}{REDIS_HOST}:{REDIS_PORT}"
+        f"/{CELERY_REDIS_DB_NUMBER}"
+    )
+    celery_broker_url = _redis_url
+    celery_backend_url = _redis_url
+else:
+    connection_string = build_connection_string(db_api=SYNC_DB_API)
+    celery_broker_url = f"sqla+{connection_string}"
+    celery_backend_url = f"db+{connection_string}"
 celery_app = Celery(__name__, broker=celery_broker_url, backend=celery_backend_url)
+# Retry the broker connection during worker startup instead of crashing if the
+# broker isn't reachable yet. Matters now that Redis can be the broker (a hard
+# dependency) — the worker may boot before Redis is ready. Also silences the
+# Celery 5.3 CPendingDeprecationWarning about this becoming the explicit
+# default in 6.0.
+celery_app.conf.broker_connection_retry_on_startup = True
 
 
 _SYNC_BATCH_SIZE = 100
@@ -171,14 +198,13 @@ def prune_documents_task(connector_id: int, credential_id: int) -> None:
                 runnable_connector
             )
 
-            all_indexed_document_ids = {
-                doc.id
-                for doc in get_documents_for_connector_credential_pair(
+            all_indexed_document_ids = set(
+                get_document_ids_for_connector_credential_pair(
                     db_session=db_session,
                     connector_id=connector_id,
                     credential_id=credential_id,
                 )
-            }
+            )
 
             doc_ids_to_remove = list(all_indexed_document_ids - all_connector_doc_ids)
 
@@ -248,7 +274,7 @@ def _sync_document_batch(document_ids: list[str], db_session: Session) -> None:
         try:
             cursor = None
             while True:
-                document_batch, cursor = fetch_documents_for_document_set_paginated(
+                document_id_batch, cursor = fetch_documents_for_document_set_paginated(
                     document_set_id=document_set_id,
                     db_session=db_session,
                     current_only=False,
@@ -256,7 +282,7 @@ def _sync_document_batch(document_ids: list[str], db_session: Session) -> None:
                     limit=_SYNC_BATCH_SIZE,
                 )
                 _sync_document_batch(
-                    document_ids=[document.id for document in document_batch],
+                    document_ids=list(document_id_batch),
                     db_session=db_session,
                 )
                 if cursor is None:
diff --git a/backend/danswer/background/update.py b/backend/danswer/background/update.py
index 8d6f819fd52..d00aadcf108 100755
--- a/backend/danswer/background/update.py
+++ b/backend/danswer/background/update.py
@@ -1,4 +1,5 @@
 import logging
+import os
 import time
 from datetime import datetime
 from typing import Any
@@ -506,9 +507,35 @@ def update_loop(delay: int = 10, num_workers: int = NUM_INDEXING_WORKERS) -> Non
         model_server_port=MODEL_SERVER_PORT,
     )
 
+    # Pick the indexing-execution backend in priority order:
+    #
+    # 1. DASK_SCHEDULER_ADDRESS — production mode in K8s. Indexing
+    #    work is dispatched to a remote Dask scheduler service that
+    #    fans out to a horizontally-scalable pool of `dask-worker`
+    #    pods. This is the only mode that supports scaling indexing
+    #    concurrency by adding pods (vs the in-pod LocalCluster which
+    #    is bounded by the host's RAM).
+    # 2. DASK_JOB_CLIENT_ENABLED — legacy in-process Dask LocalCluster.
+    #    All workers in the same Python process. Used in dev and in
+    #    pre-distributed-mode prod deployments.
+    # 3. SimpleJobClient — bare ProcessPoolExecutor-style fallback.
+    #    Used by some local dev flows that don't want the Dask
+    #    overhead.
+    #
+    # The remote-scheduler path uses two named queues — `primary` for
+    # the active embedding model and `secondary` for the in-flight
+    # secondary index during model swaps — so a single dask-scheduler
+    # service serves both without code changes elsewhere.
     client_primary: Client | SimpleJobClient
     client_secondary: Client | SimpleJobClient
-    if DASK_JOB_CLIENT_ENABLED:
+    dask_scheduler_address = os.environ.get("DASK_SCHEDULER_ADDRESS")
+    if dask_scheduler_address:
+        logger.info("Connecting to remote Dask scheduler at %s", dask_scheduler_address)
+        client_primary = Client(dask_scheduler_address)
+        client_secondary = Client(dask_scheduler_address)
+        if LOG_LEVEL.lower() == "debug":
+            client_primary.register_worker_plugin(ResourceLogger())
+    elif DASK_JOB_CLIENT_ENABLED:
         cluster_primary = LocalCluster(
             n_workers=num_workers,
             threads_per_worker=1,
diff --git a/backend/danswer/configs/app_configs.py b/backend/danswer/configs/app_configs.py
index 321a8bdca01..6b5b82906bf 100644
--- a/backend/danswer/configs/app_configs.py
+++ b/backend/danswer/configs/app_configs.py
@@ -142,6 +142,38 @@
 POSTGRES_PORT = os.environ.get("POSTGRES_PORT") or "5432"
 POSTGRES_DB = os.environ.get("POSTGRES_DB") or "postgres"
 
+# SQLAlchemy connection-pool sizing, PER PROCESS. Max connections a single
+# process can open to Postgres is POSTGRES_POOL_SIZE + POSTGRES_POOL_OVERFLOW.
+# The cluster-wide total is (that) × (replicas of every pod that imports the
+# engine: api-server, background, model servers if they touch the DB), and it
+# must stay under Postgres `max_connections` with headroom. Defaults preserve
+# the previous hardcoded 40+10; override DOWN per deployment as you scale
+# replicas (e.g. a small api-server pool when running many replicas).
+POSTGRES_POOL_SIZE = int(os.environ.get("POSTGRES_POOL_SIZE") or 40)
+POSTGRES_POOL_OVERFLOW = int(os.environ.get("POSTGRES_POOL_OVERFLOW") or 10)
+
+# File store backend — where uploaded files / chat attachments / connector
+# blobs live. Default "PostgresBackedFileStore" (Postgres large objects).
+# Set to "AzureBlobFileStore" to offload the BYTES to Azure Blob Storage
+# (metadata stays in the file_store table): keeps the DB/WAL/backups lean and
+# stops file reads from holding a Postgres connection for the whole stream.
+FILE_STORE_TYPE = os.environ.get("FILE_STORE_TYPE") or "PostgresBackedFileStore"
+# Only used when FILE_STORE_TYPE=AzureBlobFileStore (secret — set in
+# danswer-secrets). Container is auto-created on first use if absent.
+AZURE_BLOB_CONNECTION_STRING = os.environ.get("AZURE_BLOB_CONNECTION_STRING") or ""
+AZURE_BLOB_CONTAINER = os.environ.get("AZURE_BLOB_CONTAINER") or "danswer-files"
+
+# Chat file-upload limits. A chat-attached doc is stuffed WHOLE into the LLM
+# prompt (no retrieval), so it's bounded by the model context window. Two
+# guards: a cheap byte cap (all types), and a token cap on the extracted text
+# (the real protection — rejects docs that would overflow). The token budget
+# is CHAT_FILE_MAX_TOKEN_FRACTION of the model's max input tokens, leaving room
+# for the system prompt, history, and the response.
+CHAT_FILE_MAX_SIZE_MB = int(os.environ.get("CHAT_FILE_MAX_SIZE_MB") or 25)
+CHAT_FILE_MAX_TOKEN_FRACTION = float(
+    os.environ.get("CHAT_FILE_MAX_TOKEN_FRACTION") or 0.5
+)
+
 
 #####
 # Connector Configs
@@ -174,6 +206,15 @@
 WEB_CONNECTOR_OAUTH_CLIENT_SECRET = os.environ.get("WEB_CONNECTOR_OAUTH_CLIENT_SECRET")
 WEB_CONNECTOR_OAUTH_TOKEN_URL = os.environ.get("WEB_CONNECTOR_OAUTH_TOKEN_URL")
 WEB_CONNECTOR_VALIDATE_URLS = os.environ.get("WEB_CONNECTOR_VALIDATE_URLS")
+# Hard cap on pages visited in a single recursive web crawl. Bounds runtime so a
+# large site can't run for hours and get killed mid-run (which marked the whole
+# attempt FAILED). 0/empty = unlimited.
+WEB_CONNECTOR_MAX_PAGES = int(os.environ.get("WEB_CONNECTOR_MAX_PAGES") or 5000)
+# Per-page navigation timeout (ms) and retry count for transient fetch failures.
+WEB_CONNECTOR_PAGE_TIMEOUT_MS = int(
+    os.environ.get("WEB_CONNECTOR_PAGE_TIMEOUT_MS") or 30000
+)
+WEB_CONNECTOR_MAX_RETRIES = int(os.environ.get("WEB_CONNECTOR_MAX_RETRIES") or 3)
 
 HTML_BASED_CONNECTOR_TRANSFORM_LINKS_STRATEGY = os.environ.get(
     "HTML_BASED_CONNECTOR_TRANSFORM_LINKS_STRATEGY",
@@ -307,6 +348,95 @@
 )
 
 
+#####
+# Redis (cache + rate limiting)
+#####
+# Connection details. All env-driven; safe defaults for local dev.
+REDIS_HOST = os.environ.get("REDIS_HOST") or "localhost"
+REDIS_PORT = int(os.environ.get("REDIS_PORT") or 6379)
+REDIS_PASSWORD = os.environ.get("REDIS_PASSWORD") or ""
+REDIS_DB_NUMBER = int(os.environ.get("REDIS_DB_NUMBER") or 0)
+REDIS_SSL = os.environ.get("REDIS_SSL", "").lower() == "true"
+REDIS_POOL_MAX_CONNECTIONS = int(os.environ.get("REDIS_POOL_MAX_CONNECTIONS") or 50)
+REDIS_HEALTH_CHECK_INTERVAL = int(os.environ.get("REDIS_HEALTH_CHECK_INTERVAL") or 60)
+REDIS_SOCKET_TIMEOUT_SECONDS = int(os.environ.get("REDIS_SOCKET_TIMEOUT_SECONDS") or 3)
+
+# Celery broker + result backend on Redis (instead of the default
+# SQLAlchemy/Postgres transport). Default OFF so local dev without Redis
+# still boots on the Postgres broker. When ON, Celery stops polling/writing
+# Postgres for its queue, removing that load from the DB. Uses a SEPARATE
+# Redis logical DB from the cache (CELERY_REDIS_DB_NUMBER, default 1) so
+# Celery's keys never collide with cache/rate-limit keys on REDIS_DB_NUMBER.
+# Task STATUS is unaffected — this fork tracks it in its own task_queue_jobs
+# table, not Celery's result backend.
+CELERY_BROKER_REDIS_ENABLED = (
+    os.environ.get("CELERY_BROKER_REDIS_ENABLED", "").lower() == "true"
+)
+CELERY_REDIS_DB_NUMBER = int(os.environ.get("CELERY_REDIS_DB_NUMBER") or 1)
+
+# Read-through KV cache layered atop PostgresBackedDynamicConfigStore.
+# When false (default), the store behaves exactly as before; when true,
+# reads check Redis first and writes/deletes invalidate Redis. Fail-open:
+# Redis errors degrade to direct Postgres, never an outage.
+REDIS_KV_CACHE_ENABLED = os.environ.get("REDIS_KV_CACHE_ENABLED", "").lower() == "true"
+# TTL (seconds) for KV entries cached in Redis (1 day default).
+REDIS_KV_CACHE_TTL_SECONDS = int(os.environ.get("REDIS_KV_CACHE_TTL_SECONDS") or 86400)
+
+# Per-user request-rate limiter (Redis-backed). Default OFF — complements
+# the token-budget limiter in token_limit.py with a request-count cap that
+# is correct across api_server replicas.
+REQUEST_RATE_LIMIT_ENABLED = (
+    os.environ.get("REQUEST_RATE_LIMIT_ENABLED", "").lower() == "true"
+)
+# Per-minute and per-hour message-send caps per (user|ip). 0 disables that
+# window (so you can enforce only one of them if you prefer).
+REQUEST_RATE_LIMIT_PER_MINUTE = int(
+    os.environ.get("REQUEST_RATE_LIMIT_PER_MINUTE") or 0
+)
+REQUEST_RATE_LIMIT_PER_HOUR = int(os.environ.get("REQUEST_RATE_LIMIT_PER_HOUR") or 0)
+
+# Per-user persona ("assistant") list cache. Caches the global persona list
+# + per-user group memberships in Redis; permission filter runs in Python
+# at request time. Explicit write-through invalidation lives in the
+# db/persona.py and ee/.../user_group.py mutation paths — the TTL below is
+# only a long-tail safety net for missed busts. Default OFF.
+PERSONA_CACHE_ENABLED = os.environ.get("PERSONA_CACHE_ENABLED", "").lower() == "true"
+PERSONA_CACHE_TTL_SECONDS = int(
+    os.environ.get("PERSONA_CACHE_TTL_SECONDS") or 86400  # 24 h backstop
+)
+
+# Basic connector/cc-pair info cache (the /manage/indexing-status read the
+# chat page uses to derive available source types). That read does a
+# per-cc-pair document-count aggregation that measured ~300ms on the live
+# DB and runs on every chat page load — the page's slowest fan-out call.
+# Pure TTL cache, global (same for all users), fail-open. No explicit
+# invalidation: the data (which connectors exist + have indexed docs)
+# changes slowly and brief staleness is harmless (it only feeds the source-
+# filter list + the "sources incomplete" setup modal), so a short TTL is
+# the whole strategy. Default OFF.
+CC_PAIR_INFO_CACHE_ENABLED = (
+    os.environ.get("CC_PAIR_INFO_CACHE_ENABLED", "").lower() == "true"
+)
+CC_PAIR_INFO_CACHE_TTL_SECONDS = int(
+    os.environ.get("CC_PAIR_INFO_CACHE_TTL_SECONDS") or 60
+)
+
+# Global document-set list cache (the /document-set read on the chat-page
+# bundle). In Danswer MIT document sets aren't permission-filtered (every user
+# sees all), so one shared global list is correct — 200 concurrent first-loads
+# collapse to one DB query. MIT-scoped with no EE dependency: if a deployment
+# enables EE (per-user filtering), the cache bypasses to a direct DB read so it
+# can't leak sets across users. Write-through: every doc-set mutation busts the
+# key; the TTL is a short backstop (staleness is cosmetic — documents stay
+# permission-enforced at search time). Default OFF.
+DOCUMENT_SET_CACHE_ENABLED = (
+    os.environ.get("DOCUMENT_SET_CACHE_ENABLED", "").lower() == "true"
+)
+DOCUMENT_SET_CACHE_TTL_SECONDS = int(
+    os.environ.get("DOCUMENT_SET_CACHE_TTL_SECONDS") or 300
+)
+
+
 #####
 # Enterprise Edition Configs
 #####
diff --git a/backend/danswer/connectors/danswer_jira/connector.py b/backend/danswer/connectors/danswer_jira/connector.py
index 06b5a132bc2..83a9c2aa96a 100644
--- a/backend/danswer/connectors/danswer_jira/connector.py
+++ b/backend/danswer/connectors/danswer_jira/connector.py
@@ -1,4 +1,5 @@
 import os
+import re
 from collections.abc import Iterable
 from datetime import datetime
 from datetime import timezone
@@ -17,6 +18,7 @@
 from danswer.connectors.danswer_jira.utils import extract_text_from_content
 from danswer.connectors.danswer_jira.utils import get_comment_strs
 from danswer.connectors.interfaces import GenerateDocumentsOutput
+from danswer.connectors.interfaces import IdConnector
 from danswer.connectors.interfaces import LoadConnector
 from danswer.connectors.interfaces import PollConnector
 from danswer.connectors.interfaces import SecondsSinceUnixEpoch
@@ -31,6 +33,36 @@
 JIRA_API_VERSION = os.environ.get("JIRA_API_VERSION") or "3"
 _JIRA_FULL_PAGE_SIZE = 50
 
+# Matches a top-level trailing ORDER BY clause (case-insensitive).
+_JQL_ORDER_BY_RE = re.compile(r"\border\s+by\b", re.IGNORECASE)
+
+
+def _add_time_window_to_jql(
+    jira_filter: str, start_date_str: str, end_date_str: str
+) -> str:
+    """Add the poll's `updated` time window to a user-supplied JQL filter.
+
+    JQL requires all WHERE conditions to come BEFORE any `ORDER BY`. Naively
+    appending `AND updated >= ...` to a filter that ends in `ORDER BY ...`
+    produces invalid JQL — Jira rejects it with HTTP 400 "Expecting ',' but got
+    'AND'". So if the filter has a trailing ORDER BY, inject the window in front
+    of it; otherwise just append.
+    """
+    window = f"updated >= '{start_date_str}' AND updated <= '{end_date_str}'"
+    jira_filter = jira_filter.strip()
+
+    match = _JQL_ORDER_BY_RE.search(jira_filter)
+    if match:
+        where_part = jira_filter[: match.start()].rstrip()
+        order_part = jira_filter[match.start() :].strip()
+        if where_part:
+            return f"{where_part} AND {window} {order_part}"
+        return f"{window} {order_part}"
+
+    if jira_filter:
+        return f"{jira_filter} AND {window}"
+    return window
+
 
 def _paginate_jql_search(
     jira_client: JIRA,
@@ -66,78 +98,92 @@ def fetch_jira_issues_batch(
         jql=jql,
         max_results=batch_size,
     ):
-        if labels_to_skip:
-            if any(label in issue.fields.labels for label in labels_to_skip):
+        # Per-issue error tolerance: a single malformed issue (odd field shape,
+        # missing data, etc.) should be logged and skipped, NOT abort the whole
+        # connector run. Previously one bad ticket failed the entire attempt.
+        issue_key = getattr(issue, "key", "<unknown>")
+        try:
+            if labels_to_skip and any(
+                label in issue.fields.labels for label in labels_to_skip
+            ):
                 logger.info(
-                    f"Skipping {issue.key} because it has a label to skip. Found "
+                    f"Skipping {issue_key} because it has a label to skip. Found "
                     f"labels: {issue.fields.labels}. Labels to skip: {labels_to_skip}."
                 )
                 continue
 
-        description = (
-            issue.fields.description or ""
-            if JIRA_API_VERSION == "2"
-            else extract_text_from_content(issue.raw["fields"].get("description"))
-        )
-        comments = get_comment_strs(
-            issue=issue,
-            comment_email_blacklist=comment_email_blacklist,
-        )
-        ticket_content = f"{description}\n" + "\n".join(
-            [f"Comment: {comment}" for comment in comments if comment]
-        )
-
-        # Check ticket size
-        if len(ticket_content.encode("utf-8")) > JIRA_CONNECTOR_MAX_TICKET_SIZE:
-            logger.info(
-                f"Skipping {issue.key} because it exceeds the maximum size of "
-                f"{JIRA_CONNECTOR_MAX_TICKET_SIZE} bytes."
+            description = (
+                issue.fields.description or ""
+                if JIRA_API_VERSION == "2"
+                else extract_text_from_content(issue.raw["fields"].get("description"))
+            )
+            comments = get_comment_strs(
+                issue=issue,
+                comment_email_blacklist=comment_email_blacklist,
+            )
+            ticket_content = f"{description}\n" + "\n".join(
+                [f"Comment: {comment}" for comment in comments if comment]
             )
-            continue
 
-        page_url = f"{jira_client.client_info()}/browse/{issue.key}"
+            # Check ticket size
+            if len(ticket_content.encode("utf-8")) > JIRA_CONNECTOR_MAX_TICKET_SIZE:
+                logger.info(
+                    f"Skipping {issue_key} because it exceeds the maximum size of "
+                    f"{JIRA_CONNECTOR_MAX_TICKET_SIZE} bytes."
+                )
+                continue
 
-        people = set()
-        try:
-            creator = best_effort_get_field_from_issue(issue, "creator")
-            if basic_expert_info := best_effort_basic_expert_info(creator):
-                people.add(basic_expert_info)
-        except Exception:
-            # Author should exist but if not, doesn't matter
-            pass
+            page_url = f"{jira_client.client_info()}/browse/{issue_key}"
+
+            people = set()
+            for role in ("creator", "reporter", "assignee"):
+                try:
+                    field_value = best_effort_get_field_from_issue(issue, role)
+                    if basic_expert_info := best_effort_basic_expert_info(field_value):
+                        people.add(basic_expert_info)
+                except Exception:
+                    # role may be absent on some issues; not critical
+                    pass
+
+            metadata_dict: dict[str, Any] = {}
+            if priority := best_effort_get_field_from_issue(issue, "priority"):
+                metadata_dict["priority"] = priority.name
+            if status := best_effort_get_field_from_issue(issue, "status"):
+                metadata_dict["status"] = status.name
+            if resolution := best_effort_get_field_from_issue(issue, "resolution"):
+                metadata_dict["resolution"] = resolution.name
+            if labels := best_effort_get_field_from_issue(issue, "labels"):
+                metadata_dict["label"] = labels
+            if issuetype := best_effort_get_field_from_issue(issue, "issuetype"):
+                metadata_dict["issuetype"] = issuetype.name
+            if reporter := best_effort_get_field_from_issue(issue, "reporter"):
+                if reporter_name := getattr(reporter, "displayName", None):
+                    metadata_dict["reporter"] = reporter_name
+            if project := best_effort_get_field_from_issue(issue, "project"):
+                if project_key := getattr(project, "key", None):
+                    metadata_dict["project"] = project_key
+
+            doc = Document(
+                id=page_url,
+                sections=[Section(link=page_url, text=ticket_content)],
+                source=DocumentSource.JIRA,
+                semantic_identifier=f"{issue_key}: {issue.fields.summary}",
+                title=f"{issue_key} {issue.fields.summary}",
+                doc_updated_at=time_str_to_utc(issue.fields.updated),
+                primary_owners=list(people) or None,
+                # TODO add secondary_owners (commenters) if needed
+                metadata=metadata_dict,
+            )
+        except Exception as e:
+            logger.exception(
+                f"Failed to process Jira issue {issue_key}, skipping it: {e}"
+            )
+            continue
 
-        try:
-            assignee = best_effort_get_field_from_issue(issue, "assignee")
-            if basic_expert_info := best_effort_basic_expert_info(assignee):
-                people.add(basic_expert_info)
-        except Exception:
-            # Author should exist but if not, doesn't matter
-            pass
-
-        metadata_dict = {}
-        if priority := best_effort_get_field_from_issue(issue, "priority"):
-            metadata_dict["priority"] = priority.name
-        if status := best_effort_get_field_from_issue(issue, "status"):
-            metadata_dict["status"] = status.name
-        if resolution := best_effort_get_field_from_issue(issue, "resolution"):
-            metadata_dict["resolution"] = resolution.name
-        if labels := best_effort_get_field_from_issue(issue, "labels"):
-            metadata_dict["label"] = labels
-
-        yield Document(
-            id=page_url,
-            sections=[Section(link=page_url, text=ticket_content)],
-            source=DocumentSource.JIRA,
-            semantic_identifier=f"{issue.key}: {issue.fields.summary}",
-            title=f"{issue.key} {issue.fields.summary}",
-            doc_updated_at=time_str_to_utc(issue.fields.updated),
-            primary_owners=list(people) or None,
-            # TODO add secondary_owners (commenters) if needed
-            metadata=metadata_dict,
-        )
+        yield doc
 
 
-class JiraConnector(LoadConnector, PollConnector):
+class JiraConnector(LoadConnector, PollConnector, IdConnector):
     def __init__(
         self,
         jira_base_url: str,
@@ -186,7 +232,11 @@ def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None
         return None
 
     def load_from_state(self) -> GenerateDocumentsOutput:
-        jql = f"project = {self.quoted_jira_project}"
+        # Full (unbounded) load = the configured filter with no time window.
+        # Previously this referenced self.quoted_jira_project, which __init__
+        # never sets — an AttributeError on any call (notably the prune path,
+        # which falls back to load_from_state for non-IdConnectors).
+        jql = self.jira_filter
 
         document_batch = []
         for doc in fetch_jira_issues_batch(
@@ -216,11 +266,7 @@ def poll_source(
             "%Y-%m-%d %H:%M"
         )
 
-        jql = (
-            f"{self.jira_filter} AND "
-            f"updated >= '{start_date_str}' AND "
-            f"updated <= '{end_date_str}'"
-        )
+        jql = _add_time_window_to_jql(self.jira_filter, start_date_str, end_date_str)
 
         document_batch = []
         for doc in fetch_jira_issues_batch(
@@ -237,6 +283,27 @@ def poll_source(
 
         yield document_batch
 
+    def retrieve_all_source_ids(self) -> set[str]:
+        """ID-only listing for the prune path. Returns the document ids (same
+        `<base>/browse/<KEY>` form used at index time) for every issue matching
+        the filter, fetching ONLY the `key` field. Implementing IdConnector lets
+        pruning detect deleted issues cheaply, instead of loading every full
+        document just to read its id (and instead of hitting the old
+        load_from_state, which was broken)."""
+        if self.jira_client is None:
+            raise ConnectorMissingCredentialError("Jira")
+
+        base = self.jira_client.client_info()
+        all_ids: set[str] = set()
+        for issue in _paginate_jql_search(
+            jira_client=self.jira_client,
+            jql=self.jira_filter,
+            max_results=_JIRA_FULL_PAGE_SIZE,
+            fields="key",
+        ):
+            all_ids.add(f"{base}/browse/{issue.key}")
+        return all_ids
+
 
 if __name__ == "__main__":
     import os
diff --git a/backend/danswer/connectors/factory.py b/backend/danswer/connectors/factory.py
index 5bedb9b4245..4f33eb38d67 100644
--- a/backend/danswer/connectors/factory.py
+++ b/backend/danswer/connectors/factory.py
@@ -64,6 +64,15 @@ def identify_connector_class(
         DocumentSource.SLACK: {
             InputType.LOAD_STATE: SlackLoadConnector,
             InputType.POLL: SlackPollConnector,
+            # Slack is the only dict-mapped source, so unlike single-class
+            # connectors it needs an explicit PRUNE entry — without it the
+            # prune task fails with "Connector not found for source=SLACK" on
+            # every run. Use the POLL connector (NOT SlackLoadConnector, which
+            # requires an `export_path_str` and reads a Slack export file —
+            # incompatible with an API connector's config). For pruning,
+            # extract_ids_from_runnable_connector calls poll_source(epoch, now)
+            # to enumerate every current message id (no cheaper Slack listing).
+            InputType.PRUNE: SlackPollConnector,
         },
         DocumentSource.GITHUB: GithubConnector,
         DocumentSource.GITHUB_FILES: GithubFilesConnector,
diff --git a/backend/danswer/connectors/file/connector.py b/backend/danswer/connectors/file/connector.py
index 44d824d75b0..608b67506be 100644
--- a/backend/danswer/connectors/file/connector.py
+++ b/backend/danswer/connectors/file/connector.py
@@ -42,7 +42,12 @@ def _read_files_and_metadata(
     metadata: dict[str, Any] = {}
     directory_path = os.path.dirname(file_name)
 
-    file_content = get_default_file_store(db_session).read_file(file_name, mode="b")
+    # use_tempfile=True: stream the file into a SpooledTemporaryFile (spills to
+    # disk past 30MB) instead of BytesIO(read())-ing the whole thing into RAM.
+    # Without this, a large uploaded file OOM-crashes the indexing process.
+    file_content = get_default_file_store(db_session).read_file(
+        file_name, mode="b", use_tempfile=True
+    )
 
     if extension == ".zip":
         for file_info, file, metadata in load_files_from_zip(
diff --git a/backend/danswer/connectors/google_site/connector.py b/backend/danswer/connectors/google_site/connector.py
index 9cfcf224e3f..658a14ebc4d 100644
--- a/backend/danswer/connectors/google_site/connector.py
+++ b/backend/danswer/connectors/google_site/connector.py
@@ -70,8 +70,10 @@ def load_from_state(self) -> GenerateDocumentsOutput:
         documents: list[Document] = []
 
         with Session(get_sqlalchemy_engine()) as db_session:
+            # use_tempfile=True: stream the (potentially large) site zip to a
+            # temp file instead of loading it fully into memory (OOM risk).
             file_content_io = get_default_file_store(db_session).read_file(
-                self.zip_path, mode="b"
+                self.zip_path, mode="b", use_tempfile=True
             )
 
         # load the HTML files
diff --git a/backend/danswer/connectors/models.py b/backend/danswer/connectors/models.py
index 37ed2e22bd5..0c00b8607e2 100644
--- a/backend/danswer/connectors/models.py
+++ b/backend/danswer/connectors/models.py
@@ -1,3 +1,4 @@
+import hashlib
 from datetime import datetime
 from enum import Enum
 from typing import Any
@@ -131,6 +132,36 @@ def get_metadata_str_attributes(self) -> list[str] | None:
                 attributes.append(k + INDEX_SEPARATOR + v)
         return attributes
 
+    def get_content_hash(self) -> str:
+        """Stable hash of the fields that determine this document's INDEXED
+        representation: section text/links, title, semantic identifier,
+        metadata, and owners.
+
+        Used by the indexing pipeline to skip re-indexing a document whose
+        content is unchanged even though its `doc_updated_at` advanced — e.g.
+        a Salesforce automation bumps LastModifiedDate on records whose indexed
+        fields didn't actually change, which otherwise forces a full (and
+        expensive) Vespa clear-and-rewrite of every record on every poll.
+
+        Deliberately EXCLUDES doc_updated_at: a newer timestamp alone must not
+        force a re-index. Uses \\x1f (unit separator) as the field delimiter so
+        adjacent fields can't collide. Order within metadata/owners is made
+        deterministic so the hash is stable across runs.
+        """
+        parts: list[str] = [self.semantic_identifier or "", self.title or ""]
+        for section in self.sections:
+            parts.append(section.link or "")
+            parts.append(section.text)
+        for key in sorted(self.metadata or {}):
+            value = self.metadata[key]
+            if isinstance(value, list):
+                parts.append(f"{key}={'|'.join(value)}")
+            else:
+                parts.append(f"{key}={value}")
+        for owner in (self.primary_owners or []) + (self.secondary_owners or []):
+            parts.append(f"{owner.display_name or ''}<{owner.email or ''}>")
+        return hashlib.sha256("\x1f".join(parts).encode("utf-8")).hexdigest()
+
 
 class Document(DocumentBase):
     id: str  # This must be unique or during indexing/reindexing, chunks will be overwritten
diff --git a/backend/danswer/connectors/web/connector.py b/backend/danswer/connectors/web/connector.py
index 9238467abbc..b1e3ff36715 100644
--- a/backend/danswer/connectors/web/connector.py
+++ b/backend/danswer/connectors/web/connector.py
@@ -1,6 +1,8 @@
 import io
 import ipaddress
+import random
 import socket
+import time
 from datetime import datetime
 from datetime import timezone
 from enum import Enum
@@ -19,9 +21,12 @@
 from requests_oauthlib import OAuth2Session  # type:ignore
 
 from danswer.configs.app_configs import INDEX_BATCH_SIZE
+from danswer.configs.app_configs import WEB_CONNECTOR_MAX_PAGES
+from danswer.configs.app_configs import WEB_CONNECTOR_MAX_RETRIES
 from danswer.configs.app_configs import WEB_CONNECTOR_OAUTH_CLIENT_ID
 from danswer.configs.app_configs import WEB_CONNECTOR_OAUTH_CLIENT_SECRET
 from danswer.configs.app_configs import WEB_CONNECTOR_OAUTH_TOKEN_URL
+from danswer.configs.app_configs import WEB_CONNECTOR_PAGE_TIMEOUT_MS
 from danswer.configs.app_configs import WEB_CONNECTOR_VALIDATE_URLS
 from danswer.configs.constants import DocumentSource
 from danswer.connectors.interfaces import GenerateDocumentsOutput
@@ -36,6 +41,29 @@
 
 logger = setup_logger()
 
+# Many docs sites / WAFs (Cloudflare etc.) 403 or rate-limit the default
+# headless-Chromium / bare-requests user agent. Present as a normal browser.
+DEFAULT_USER_AGENT = (
+    "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 "
+    "(KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
+)
+
+
+def _is_browser_dead(exc: Exception) -> bool:
+    """Heuristic: did the exception kill the browser/context (vs. just this
+    page)? Only then is a full Playwright restart warranted; otherwise we retry
+    with a fresh page on the existing browser."""
+    msg = str(exc).lower()
+    return any(
+        marker in msg
+        for marker in (
+            "browser has been closed",
+            "browser closed",
+            "crash",
+            "target closed",
+        )
+    )
+
 
 class WEB_CONNECTOR_VALID_SETTINGS(str, Enum):
     # Given a base site, index everything under that path
@@ -123,7 +151,7 @@ def start_playwright() -> Tuple[Playwright, BrowserContext]:
     playwright = sync_playwright().start()
     browser = playwright.chromium.launch(headless=True)
 
-    context = browser.new_context()
+    context = browser.new_context(user_agent=DEFAULT_USER_AGENT)
 
     if (
         WEB_CONNECTOR_OAUTH_CLIENT_ID
@@ -309,9 +337,23 @@ def load_from_state(self, is_polling: bool = False) -> GenerateDocumentsOutput:
         at_least_one_doc = False
         last_error = None
 
+        # One upfront connectivity check. This used to run per page — a full
+        # extra GET for every URL (doubling network work) that ALSO 403'd on
+        # bot-protected sites Playwright loads fine, and a failure tore down the
+        # whole browser. Once, on the base URL, is enough.
+        check_internet_connection(base_url)
+
         playwright, context = start_playwright()
         restart_playwright = False
+        pages_visited = 0
         while to_visit:
+            if WEB_CONNECTOR_MAX_PAGES and pages_visited >= WEB_CONNECTOR_MAX_PAGES:
+                logger.info(
+                    f"Reached WEB_CONNECTOR_MAX_PAGES ({WEB_CONNECTOR_MAX_PAGES}); "
+                    f"stopping crawl with {len(to_visit)} URL(s) still queued."
+                )
+                break
+
             current_url = to_visit.pop()
             if current_url in visited_links:
                 continue
@@ -325,18 +367,23 @@ def load_from_state(self, is_polling: bool = False) -> GenerateDocumentsOutput:
                 continue
 
             logger.info(f"Visiting {current_url}")
-
-            try:
-                check_internet_connection(current_url)
-                if restart_playwright:
-                    playwright, context = start_playwright()
-                    restart_playwright = False
-
-                if current_url.split(".")[-1] == "pdf":
-                    # PDF files are not checked for links
-                    response = requests.get(current_url)
+            pages_visited += 1
+
+            # Reinit the browser if a previous batch/crash flagged it. Done for
+            # every page (as before) so the browser is always live when we reach
+            # the batch-yield / final-stop below.
+            if restart_playwright:
+                playwright, context = start_playwright()
+                restart_playwright = False
+
+            # --- PDF: fetch directly (no browser). timeout so a hung download
+            # can't stall the whole attempt. Matches the original: PDFs don't
+            # trigger the per-batch flush. ---
+            if current_url.split(".")[-1] == "pdf":
+                try:
+                    response = requests.get(current_url, timeout=60)
+                    response.raise_for_status()
                     page_text = pdf_to_text(file=io.BytesIO(response.content))
-
                     doc_batch.append(
                         Document(
                             id=current_url,
@@ -346,57 +393,91 @@ def load_from_state(self, is_polling: bool = False) -> GenerateDocumentsOutput:
                             metadata={},
                         )
                     )
-                    continue
-
-                page = context.new_page()
-                page_response = page.goto(current_url)
-                final_page = page.url
-                if final_page != current_url:
-                    logger.info(f"Redirected to {final_page}")
-                    protected_url_check(final_page)
-                    current_url = final_page
-                    if current_url in visited_links:
-                        logger.info("Redirected page already indexed")
-                        continue
-                    visited_links.add(current_url)
-
-                content = page.content()
-                soup = BeautifulSoup(content, "html.parser")
-
-                # Only get internal links if we're not in polling mode and recursive is enabled
-                if self.recursive and not is_polling:
-                    internal_links = get_internal_links(base_url, current_url, soup)
-                    for link in internal_links:
-                        if link not in visited_links:
-                            to_visit.append(link)
-
-                if page_response and str(page_response.status)[0] in ("4", "5"):
-                    last_error = f"Skipped indexing {current_url} due to HTTP {page_response.status} response"
-                    logger.info(last_error)
-                    continue
-
-                parsed_html = web_html_cleanup(soup, self.mintlify_cleanup)
-
-                doc_batch.append(
-                    Document(
-                        id=current_url,
-                        sections=[
-                            Section(link=current_url, text=parsed_html.cleaned_text)
-                        ],
-                        source=DocumentSource.WEB,
-                        semantic_identifier=parsed_html.title or current_url,
-                        metadata={},
-                    )
-                )
-
-                page.close()
-            except Exception as e:
-                last_error = f"Failed to fetch '{current_url}': {e}"
-                logger.error(last_error)
-                playwright.stop()
-                restart_playwright = True
+                except Exception as e:
+                    last_error = f"Failed to fetch PDF '{current_url}': {e}"
+                    logger.error(last_error)
                 continue
 
+            # --- HTML via Playwright, with retries. A single page error retries
+            # with a FRESH PAGE on the same browser (exponential backoff); only
+            # a browser-level crash restarts Playwright. One bad page no longer
+            # tears down the browser or fails the attempt. ---
+            page_doc: Document | None = None
+            for attempt in range(WEB_CONNECTOR_MAX_RETRIES):
+                if attempt > 0:
+                    time.sleep(min(2**attempt + random.uniform(0, 1), 10))
+                try:
+                    page = context.new_page()
+                    try:
+                        page_response = page.goto(
+                            current_url,
+                            timeout=WEB_CONNECTOR_PAGE_TIMEOUT_MS,
+                            # 'domcontentloaded' (DOM parsed) instead of the
+                            # default 'load' (waits for every image/font/etc.) —
+                            # far faster and enough for text extraction.
+                            wait_until="domcontentloaded",
+                        )
+                        final_page = page.url
+                        if final_page != current_url:
+                            logger.info(f"Redirected to {final_page}")
+                            protected_url_check(final_page)
+                            current_url = final_page
+                            if current_url in visited_links:
+                                logger.info("Redirected page already indexed")
+                                break
+                            visited_links.add(current_url)
+
+                        content = page.content()
+                        soup = BeautifulSoup(content, "html.parser")
+
+                        if self.recursive and not is_polling:
+                            for link in get_internal_links(base_url, current_url, soup):
+                                if link not in visited_links:
+                                    to_visit.append(link)
+
+                        if page_response and str(page_response.status)[0] in (
+                            "4",
+                            "5",
+                        ):
+                            last_error = (
+                                f"Skipped indexing {current_url} due to HTTP "
+                                f"{page_response.status} response"
+                            )
+                            logger.info(last_error)
+                            break  # a real 4xx/5xx — don't retry
+
+                        parsed_html = web_html_cleanup(soup, self.mintlify_cleanup)
+                        page_doc = Document(
+                            id=current_url,
+                            sections=[
+                                Section(link=current_url, text=parsed_html.cleaned_text)
+                            ],
+                            source=DocumentSource.WEB,
+                            semantic_identifier=parsed_html.title or current_url,
+                            metadata={},
+                        )
+                        break  # success
+                    finally:
+                        page.close()
+                except Exception as e:
+                    last_error = (
+                        f"Failed to fetch '{current_url}' "
+                        f"(attempt {attempt + 1}/{WEB_CONNECTOR_MAX_RETRIES}): {e}"
+                    )
+                    logger.warning(last_error)
+                    if _is_browser_dead(e):
+                        # Browser/context crashed — restart it so the next
+                        # attempt (and subsequent pages) have a live browser.
+                        try:
+                            playwright.stop()
+                        except Exception:
+                            pass
+                        playwright, context = start_playwright()
+                    # else: transient page error — retry with a fresh page.
+
+            if page_doc is not None:
+                doc_batch.append(page_doc)
+
             if len(doc_batch) >= self.batch_size:
                 playwright.stop()
                 restart_playwright = True
diff --git a/backend/danswer/db/analytics.py b/backend/danswer/db/analytics.py
index 4af60b7a80d..bafcd5c0fec 100644
--- a/backend/danswer/db/analytics.py
+++ b/backend/danswer/db/analytics.py
@@ -26,12 +26,19 @@
 from sqlalchemy.orm import Session
 
 from danswer.configs.constants import MessageType
+from danswer.db.models import AnalyticsPersonaDailyStats
+from danswer.db.models import AnalyticsUserDailyStats
+from danswer.db.models import AnalyticsUserFirstSeen
 from danswer.db.models import ChatMessage
 from danswer.db.models import ChatMessageFeedback
 from danswer.db.models import ChatSession
 from danswer.db.models import Connector
 from danswer.db.models import ConnectorCredentialPair
 from danswer.db.models import Document
+from danswer.db.models import DocumentSet
+from danswer.db.models import Persona
+from danswer.db.models import Persona__DocumentSet
+from danswer.db.models import User
 
 
 def fetch_query_analytics(
@@ -137,6 +144,153 @@ def fetch_per_user_query_analytics(
     return db_session.execute(stmt).all()  # type: ignore
 
 
+def fetch_user_adoption(
+    start: datetime.datetime,
+    end: datetime.datetime,
+    db_session: Session,
+) -> list[tuple[datetime.date, int, int]]:
+    """Per-day ``(date, new_users, cumulative_users)`` from the durable
+    ``analytics_user_first_seen`` table — the chat adoption curve.
+
+    Read from the aggregate, NOT raw chat, so it spans the full history
+    regardless of RETENTION_DAYS_CHAT. ``cumulative_users`` folds in users
+    whose first-seen predates ``start`` so the running total is continuous.
+    Only days on which at least one user first appeared are returned.
+    """
+    start_date = start.date()
+    end_date = end.date()
+    rows = db_session.execute(
+        select(
+            AnalyticsUserFirstSeen.first_seen_date,
+            func.count().label("new_users"),
+        )
+        .where(AnalyticsUserFirstSeen.first_seen_date <= end_date)
+        .group_by(AnalyticsUserFirstSeen.first_seen_date)
+        .order_by(AnalyticsUserFirstSeen.first_seen_date)
+    ).all()
+
+    out: list[tuple[datetime.date, int, int]] = []
+    cumulative = 0
+    for day, new_users in rows:
+        cumulative += int(new_users)
+        if day >= start_date:
+            out.append((day, int(new_users), cumulative))
+    return out
+
+
+def fetch_per_user_chat_stats(
+    start: datetime.datetime,
+    end: datetime.datetime,
+    db_session: Session,
+    limit: int = 100,
+) -> Sequence[tuple[UUID, str, int, int, int, datetime.date]]:
+    """Top ``limit`` users by message volume over ``[start, end]``, with
+    like/dislike tallies and last-active date, joined to ``user`` for email.
+
+    Reads the durable ``analytics_user_daily_stats`` aggregate (upserted
+    daily by the rollup), NOT raw chat — so it spans the full history
+    regardless of RETENTION_DAYS_CHAT. Inner join on ``user`` drops
+    anonymous sessions and deleted users (whose counts persist in the
+    aggregate but shouldn't surface by email).
+    """
+    start_date = start.date()
+    end_date = end.date()
+    stmt = (
+        # SA's select() overloads don't type a 6-col sum/max projection;
+        # the runtime is fine (the existing analytics selects do the same).
+        select(  # type: ignore[call-overload]
+            User.id,
+            User.email,
+            func.coalesce(func.sum(AnalyticsUserDailyStats.message_count), 0),
+            func.coalesce(func.sum(AnalyticsUserDailyStats.like_count), 0),
+            func.coalesce(func.sum(AnalyticsUserDailyStats.dislike_count), 0),
+            func.max(AnalyticsUserDailyStats.date),
+        )
+        .select_from(AnalyticsUserDailyStats)
+        .join(User, User.id == AnalyticsUserDailyStats.user_id)
+        .where(AnalyticsUserDailyStats.date >= start_date)
+        .where(AnalyticsUserDailyStats.date <= end_date)
+        .group_by(User.id, User.email)
+        .order_by(func.sum(AnalyticsUserDailyStats.message_count).desc())
+        .limit(limit)
+    )
+    return db_session.execute(stmt).all()  # type: ignore
+
+
+def fetch_persona_usage(
+    start: datetime.datetime,
+    end: datetime.datetime,
+    db_session: Session,
+    limit: int = 100,
+) -> Sequence[tuple[int, str, int, int, int, int, datetime.date]]:
+    """Top ``limit`` assistants by message volume over ``[start, end]`` from
+    the durable ``analytics_persona_daily_stats`` aggregate — spans full
+    history. Joined to ``persona`` for the name (a deleted assistant drops
+    off). Returns (persona_id, name, sessions, messages, likes, dislikes,
+    last_active)."""
+    start_date = start.date()
+    end_date = end.date()
+    stmt = (
+        select(  # type: ignore[call-overload]
+            Persona.id,
+            Persona.name,
+            func.coalesce(func.sum(AnalyticsPersonaDailyStats.session_count), 0),
+            func.coalesce(func.sum(AnalyticsPersonaDailyStats.message_count), 0),
+            func.coalesce(func.sum(AnalyticsPersonaDailyStats.like_count), 0),
+            func.coalesce(func.sum(AnalyticsPersonaDailyStats.dislike_count), 0),
+            func.max(AnalyticsPersonaDailyStats.date),
+        )
+        .select_from(AnalyticsPersonaDailyStats)
+        .join(Persona, Persona.id == AnalyticsPersonaDailyStats.persona_id)
+        .where(AnalyticsPersonaDailyStats.date >= start_date)
+        .where(AnalyticsPersonaDailyStats.date <= end_date)
+        .group_by(Persona.id, Persona.name)
+        .order_by(func.sum(AnalyticsPersonaDailyStats.message_count).desc())
+        .limit(limit)
+    )
+    return db_session.execute(stmt).all()  # type: ignore
+
+
+def fetch_document_set_usage(
+    start: datetime.datetime,
+    end: datetime.datetime,
+    db_session: Session,
+    limit: int = 100,
+) -> Sequence[tuple[int, str, int]]:
+    """APPROXIMATE "datasets in use" over ``[start, end]``: each assistant's
+    message volume attributed to every document set currently attached to it
+    (via persona__document_set).
+
+    This is availability-weighted, not retrieval-truth: an assistant's
+    messages are counted toward ALL its document sets (so totals can exceed
+    the real query count), and it uses CURRENT attachments (membership drift
+    isn't historical). There is no per-query record of which document set
+    actually served a result, so this is the best durable signal without new
+    instrumentation. Returns (document_set_id, name, attributed_messages).
+    """
+    start_date = start.date()
+    end_date = end.date()
+    stmt = (
+        select(
+            DocumentSet.id,
+            DocumentSet.name,
+            func.coalesce(func.sum(AnalyticsPersonaDailyStats.message_count), 0),
+        )
+        .select_from(AnalyticsPersonaDailyStats)
+        .join(
+            Persona__DocumentSet,
+            Persona__DocumentSet.persona_id == AnalyticsPersonaDailyStats.persona_id,
+        )
+        .join(DocumentSet, DocumentSet.id == Persona__DocumentSet.document_set_id)
+        .where(AnalyticsPersonaDailyStats.date >= start_date)
+        .where(AnalyticsPersonaDailyStats.date <= end_date)
+        .group_by(DocumentSet.id, DocumentSet.name)
+        .order_by(func.sum(AnalyticsPersonaDailyStats.message_count).desc())
+        .limit(limit)
+    )
+    return db_session.execute(stmt).all()  # type: ignore
+
+
 def fetch_danswerbot_analytics(
     start: datetime.datetime,
     end: datetime.datetime,
diff --git a/backend/danswer/db/analytics_rollup.py b/backend/danswer/db/analytics_rollup.py
index 9fa8abc5637..eaa0cf0a634 100644
--- a/backend/danswer/db/analytics_rollup.py
+++ b/backend/danswer/db/analytics_rollup.py
@@ -48,6 +48,7 @@
 from sqlalchemy import cast
 from sqlalchemy import Date
 from sqlalchemy import func
+from sqlalchemy import literal
 from sqlalchemy import or_
 from sqlalchemy import select
 from sqlalchemy import text
@@ -57,6 +58,9 @@
 from danswer.configs.constants import MessageType
 from danswer.db.engine import get_sqlalchemy_engine
 from danswer.db.models import AnalyticsDailyRollup
+from danswer.db.models import AnalyticsPersonaDailyStats
+from danswer.db.models import AnalyticsUserDailyStats
+from danswer.db.models import AnalyticsUserFirstSeen
 from danswer.db.models import ChatMessage
 from danswer.db.models import ChatMessageFeedback
 from danswer.db.models import ChatSession
@@ -309,6 +313,176 @@ def upsert_rollup_for_date(
     return metrics
 
 
+def capture_first_seen_for_date(
+    db_session: Session, target_date: datetime.date
+) -> None:
+    """Record ``first_seen_date`` for every user active on ``target_date``
+    who isn't already in ``analytics_user_first_seen``.
+
+    INSERT … SELECT … ON CONFLICT (user_id) DO NOTHING: a user already
+    present keeps their stored date, so first-seen never moves forward.
+    Because :func:`run_rollup` walks dates ascending, the earliest date in
+    the processed window on which a user appears is the one recorded — and
+    for the full backfill that's their true first-ever day. Once written,
+    the row is immune to chat retention deletes (this is the whole point:
+    the adoption curve must outlive the raw chat_message rows)."""
+    start, end = _day_bounds(target_date)
+    active_user_ids = (
+        select(
+            ChatSession.user_id.label("user_id"),
+            literal(target_date, Date).label("first_seen_date"),
+        )
+        .select_from(ChatMessage)
+        .join(ChatSession, ChatSession.id == ChatMessage.chat_session_id)
+        .where(ChatMessage.time_sent >= start)
+        .where(ChatMessage.time_sent < end)
+        .where(ChatMessage.message_type == MessageType.ASSISTANT)
+        .where(ChatSession.user_id.is_not(None))
+        .distinct()
+    )
+    stmt = (
+        pg_insert(AnalyticsUserFirstSeen.__table__)
+        .from_select(["user_id", "first_seen_date"], active_user_ids)
+        .on_conflict_do_nothing(index_elements=[AnalyticsUserFirstSeen.user_id])
+    )
+    db_session.execute(stmt)
+    db_session.commit()
+
+
+def upsert_user_daily_stats_for_date(
+    db_session: Session, target_date: datetime.date
+) -> None:
+    """Upsert one row per active user for ``target_date`` into
+    ``analytics_user_daily_stats`` (message / like / dislike counts).
+
+    Single INSERT … SELECT … ON CONFLICT (user_id, date) DO UPDATE, so a
+    re-run over the sliding window recomputes that day's per-user counts
+    (reflecting late feedback). Once written the rows outlive the raw
+    chat_message rows that retention deletes — the leaderboard reads this
+    aggregate, so it spans full history rather than the last
+    RETENTION_DAYS_CHAT."""
+    start, end = _day_bounds(target_date)
+    per_user = (
+        select(
+            ChatSession.user_id.label("user_id"),
+            literal(target_date, Date).label("date"),
+            # distinct: the feedback outerjoin can fan out a message into
+            # multiple rows (a message may have >1 feedback row).
+            func.count(func.distinct(ChatMessage.id)).label("message_count"),
+            func.coalesce(
+                func.sum(case((ChatMessageFeedback.is_positive, 1), else_=0)), 0
+            ).label("like_count"),
+            func.coalesce(
+                func.sum(
+                    case(
+                        (ChatMessageFeedback.is_positive == False, 1),  # noqa: E712
+                        else_=0,
+                    )
+                ),
+                0,
+            ).label("dislike_count"),
+        )
+        .select_from(ChatMessage)
+        .join(ChatSession, ChatSession.id == ChatMessage.chat_session_id)
+        .outerjoin(
+            ChatMessageFeedback,
+            ChatMessageFeedback.chat_message_id == ChatMessage.id,
+        )
+        .where(ChatMessage.time_sent >= start)
+        .where(ChatMessage.time_sent < end)
+        .where(ChatMessage.message_type == MessageType.ASSISTANT)
+        .where(ChatSession.user_id.is_not(None))
+        .group_by(ChatSession.user_id)
+    )
+    stmt = pg_insert(AnalyticsUserDailyStats.__table__).from_select(
+        ["user_id", "date", "message_count", "like_count", "dislike_count"],
+        per_user,
+    )
+    stmt = stmt.on_conflict_do_update(
+        index_elements=[
+            AnalyticsUserDailyStats.user_id,
+            AnalyticsUserDailyStats.date,
+        ],
+        set_={
+            "message_count": stmt.excluded.message_count,
+            "like_count": stmt.excluded.like_count,
+            "dislike_count": stmt.excluded.dislike_count,
+            "rolled_up_at": func.now(),
+        },
+    )
+    db_session.execute(stmt)
+    db_session.commit()
+
+
+def upsert_persona_daily_stats_for_date(
+    db_session: Session, target_date: datetime.date
+) -> None:
+    """Upsert one row per assistant (persona) active on ``target_date`` into
+    ``analytics_persona_daily_stats``.
+
+    Same durable/idempotent contract as the per-user variant. ``persona_id``
+    lives on chat_session, so this is a clean group-by. ``message_count``
+    uses COUNT(DISTINCT message) because the feedback outerjoin can fan a
+    message into multiple rows."""
+    start, end = _day_bounds(target_date)
+    per_persona = (
+        select(
+            ChatSession.persona_id.label("persona_id"),
+            literal(target_date, Date).label("date"),
+            func.count(func.distinct(ChatSession.id)).label("session_count"),
+            func.count(func.distinct(ChatMessage.id)).label("message_count"),
+            func.coalesce(
+                func.sum(case((ChatMessageFeedback.is_positive, 1), else_=0)), 0
+            ).label("like_count"),
+            func.coalesce(
+                func.sum(
+                    case(
+                        (ChatMessageFeedback.is_positive == False, 1),  # noqa: E712
+                        else_=0,
+                    )
+                ),
+                0,
+            ).label("dislike_count"),
+        )
+        .select_from(ChatMessage)
+        .join(ChatSession, ChatSession.id == ChatMessage.chat_session_id)
+        .outerjoin(
+            ChatMessageFeedback,
+            ChatMessageFeedback.chat_message_id == ChatMessage.id,
+        )
+        .where(ChatMessage.time_sent >= start)
+        .where(ChatMessage.time_sent < end)
+        .where(ChatMessage.message_type == MessageType.ASSISTANT)
+        .group_by(ChatSession.persona_id)
+    )
+    stmt = pg_insert(AnalyticsPersonaDailyStats.__table__).from_select(
+        [
+            "persona_id",
+            "date",
+            "session_count",
+            "message_count",
+            "like_count",
+            "dislike_count",
+        ],
+        per_persona,
+    )
+    stmt = stmt.on_conflict_do_update(
+        index_elements=[
+            AnalyticsPersonaDailyStats.persona_id,
+            AnalyticsPersonaDailyStats.date,
+        ],
+        set_={
+            "session_count": stmt.excluded.session_count,
+            "message_count": stmt.excluded.message_count,
+            "like_count": stmt.excluded.like_count,
+            "dislike_count": stmt.excluded.dislike_count,
+            "rolled_up_at": func.now(),
+        },
+    )
+    db_session.execute(stmt)
+    db_session.commit()
+
+
 # ---------------------------------------------------------------------------
 # Batch operations — sliding window (daily task) + full backfill
 # ---------------------------------------------------------------------------
@@ -429,6 +603,12 @@ def run_rollup(today: datetime.date | None = None) -> int:
         current = start
         while current <= today:
             upsert_rollup_for_date(db_session, current)
+            # Capture first-seen + per-user daily stats in the same ascending
+            # pass, before retention can delete the day's chat rows (rollup
+            # runs 07:30, sweep 08:00).
+            capture_first_seen_for_date(db_session, current)
+            upsert_user_daily_stats_for_date(db_session, current)
+            upsert_persona_daily_stats_for_date(db_session, current)
             current += datetime.timedelta(days=1)
             n += 1
 
@@ -458,6 +638,11 @@ def backfill_all_rollups(start_date: datetime.date, end_date: datetime.date) ->
         current = start_date
         while current <= end_date:
             upsert_rollup_for_date(db_session, current)
+            # Walk ascending so each user's first_seen_date is their true
+            # first-ever active day across all currently-available history.
+            capture_first_seen_for_date(db_session, current)
+            upsert_user_daily_stats_for_date(db_session, current)
+            upsert_persona_daily_stats_for_date(db_session, current)
             current += datetime.timedelta(days=1)
             n += 1
             if n % 30 == 0:
diff --git a/backend/danswer/db/chat.py b/backend/danswer/db/chat.py
index 11a4cfec047..4c791bcfc2e 100644
--- a/backend/danswer/db/chat.py
+++ b/backend/danswer/db/chat.py
@@ -98,14 +98,18 @@ def delete_search_doc_message_relationship(
 
 
 def delete_orphaned_search_docs(db_session: Session) -> None:
-    orphaned_docs = (
-        db_session.query(SearchDoc)
+    # Delete SearchDoc rows no longer referenced by any chat_message__search_doc.
+    # Previously this fetched every orphan as a full ORM row (incl. blurb/
+    # content) just to delete it in a loop; a single bulk DELETE over the same
+    # set avoids materializing them. Orphans have no association rows by
+    # definition, so there is nothing for an ORM cascade to handle (matches the
+    # raw-SQL orphan cleanup in db/retention.py).
+    orphan_ids = (
+        select(SearchDoc.id)
         .outerjoin(ChatMessage__SearchDoc)
-        .filter(ChatMessage__SearchDoc.chat_message_id.is_(None))
-        .all()
+        .where(ChatMessage__SearchDoc.chat_message_id.is_(None))
     )
-    for doc in orphaned_docs:
-        db_session.delete(doc)
+    db_session.execute(delete(SearchDoc).where(SearchDoc.id.in_(orphan_ids)))
     db_session.commit()
 
 
diff --git a/backend/danswer/db/connector.py b/backend/danswer/db/connector.py
index 2e4b1ed4c3e..8046b074fea 100644
--- a/backend/danswer/db/connector.py
+++ b/backend/danswer/db/connector.py
@@ -189,6 +189,13 @@ def fetch_latest_index_attempt_by_connector(
         return []
 
     for connector in connectors:
+        # NOTE: legacy Query.first() DOES emit LIMIT 1, so this ordered query
+        # over the large index_attempt table is safe despite running per
+        # connector. If you ever migrate this to the 2.x style
+        # `db_session.execute(select(...).order_by(...)).scalars().first()`,
+        # you MUST add `.limit(1)` — Result.first() does NOT add LIMIT and would
+        # materialize the connector's entire attempt history (the exact bug that
+        # was fixed in db/index_attempt.py::get_last_attempt).
         latest_index_attempt = (
             db_session.query(IndexAttempt)
             .filter(IndexAttempt.connector_id == connector.id)
diff --git a/backend/danswer/db/connector_credential_pair.py b/backend/danswer/db/connector_credential_pair.py
index 4fa2f8a0a09..f5ba20ccebc 100644
--- a/backend/danswer/db/connector_credential_pair.py
+++ b/backend/danswer/db/connector_credential_pair.py
@@ -5,6 +5,7 @@
 from sqlalchemy import desc
 from sqlalchemy import select
 from sqlalchemy import text
+from sqlalchemy.orm import joinedload
 from sqlalchemy.orm import Session
 
 from danswer.db.connector import fetch_connector_by_id
@@ -77,11 +78,20 @@ def release_deletion_lock(
 
 
 def get_connector_credential_pairs(
-    db_session: Session, include_disabled: bool = True
+    db_session: Session,
+    include_disabled: bool = True,
+    eager_load_connector: bool = False,
 ) -> list[ConnectorCredentialPair]:
     stmt = select(ConnectorCredentialPair)
     if not include_disabled:
         stmt = stmt.where(ConnectorCredentialPair.connector.disabled == False)  # noqa
+    # Callers that read `cc_pair.connector.*` for every row (e.g. the
+    # basic indexing-status endpoint on the chat page, which derives
+    # available source types) MUST set this — otherwise the lazy
+    # relationship fires one query per cc-pair (an N+1 that, at a few
+    # hundred cc-pairs against a remote Postgres, dominates page load).
+    if eager_load_connector:
+        stmt = stmt.options(joinedload(ConnectorCredentialPair.connector))
     results = db_session.scalars(stmt)
     return list(results.all())
 
diff --git a/backend/danswer/db/document.py b/backend/danswer/db/document.py
index befb8675748..03302561113 100644
--- a/backend/danswer/db/document.py
+++ b/backend/danswer/db/document.py
@@ -45,6 +45,29 @@ def get_documents_for_connector_credential_pair(
     return db_session.scalars(stmt).all()
 
 
+def get_document_ids_for_connector_credential_pair(
+    db_session: Session, connector_id: int, credential_id: int
+) -> list[str]:
+    """Same document set as get_documents_for_connector_credential_pair, but
+    selects ONLY the id column.
+
+    Callers that just need the set of indexed document ids (e.g. the prune task,
+    which diffs them against the connector's current docs) were materializing
+    full DbDocument ORM rows for the connector's ENTIRE corpus just to read
+    `.id` — hundreds of MB on large connectors. Same WHERE + DISTINCT, so the
+    returned id set is identical."""
+    initial_doc_ids_stmt = select(DocumentByConnectorCredentialPair.id).where(
+        and_(
+            DocumentByConnectorCredentialPair.connector_id == connector_id,
+            DocumentByConnectorCredentialPair.credential_id == credential_id,
+        )
+    )
+    stmt = (
+        select(DbDocument.id).where(DbDocument.id.in_(initial_doc_ids_stmt)).distinct()
+    )
+    return list(db_session.scalars(stmt).all())
+
+
 def get_documents_by_ids(
     document_ids: list[str],
     db_session: Session,
@@ -223,14 +246,28 @@ def upsert_document_by_connector_credential_pair(
 def update_docs_updated_at(
     ids_to_new_updated_at: dict[str, datetime],
     db_session: Session,
+    ids_to_new_content_hash: dict[str, str] | None = None,
 ) -> None:
-    doc_ids = list(ids_to_new_updated_at.keys())
+    """Record post-successful-index state on the document rows.
+
+    `ids_to_new_content_hash` (optional) stores the sha256 of the indexed
+    content so a later run can skip re-indexing unchanged docs. Default None
+    keeps the original updated-at-only behavior for any other caller.
+    """
+    ids_to_new_content_hash = ids_to_new_content_hash or {}
+    doc_ids = list(set(ids_to_new_updated_at) | set(ids_to_new_content_hash))
+    if not doc_ids:
+        return
+
     documents_to_update = (
         db_session.query(DbDocument).filter(DbDocument.id.in_(doc_ids)).all()
     )
 
     for document in documents_to_update:
-        document.doc_updated_at = ids_to_new_updated_at[document.id]
+        if document.id in ids_to_new_updated_at:
+            document.doc_updated_at = ids_to_new_updated_at[document.id]
+        if document.id in ids_to_new_content_hash:
+            document.indexed_content_hash = ids_to_new_content_hash[document.id]
 
     db_session.commit()
 
diff --git a/backend/danswer/db/document_set.py b/backend/danswer/db/document_set.py
index 51064f78e2f..cd68527a7b2 100644
--- a/backend/danswer/db/document_set.py
+++ b/backend/danswer/db/document_set.py
@@ -9,6 +9,7 @@
 from sqlalchemy import select
 from sqlalchemy.orm import Session
 
+from danswer.db.document_set_cache import invalidate_document_sets_all
 from danswer.db.models import ConnectorCredentialPair
 from danswer.db.models import Document
 from danswer.db.models import DocumentByConnectorCredentialPair
@@ -130,6 +131,9 @@ def insert_document_set(
         )
 
         db_session.commit()
+        # Write-through: drop the per-user document-set cache (no-op if
+        # disabled). After commit so a concurrent reader can't refill stale.
+        invalidate_document_sets_all()
     except:
         db_session.rollback()
         raise
@@ -194,6 +198,7 @@ def update_document_set(
         ]
         db_session.add_all(ds_cc_pairs)
         db_session.commit()
+        invalidate_document_sets_all()  # write-through bust (see insert_document_set)
     except:
         db_session.rollback()
         raise
@@ -214,6 +219,7 @@ def mark_document_set_as_synced(document_set_id: int, db_session: Session) -> No
         db_session=db_session, document_set_id=document_set_id, is_current=False
     )
     db_session.commit()
+    invalidate_document_sets_all()  # write-through bust (background sync changes membership)
 
 
 def delete_document_set(
@@ -225,6 +231,7 @@ def delete_document_set(
     )
     db_session.delete(document_set_row)
     db_session.commit()
+    invalidate_document_sets_all()  # write-through bust (see insert_document_set)
 
 
 def mark_document_set_as_to_be_deleted(
@@ -265,6 +272,7 @@ def mark_document_set_as_to_be_deleted(
         # are no more relationships to cc pairs
         document_set_row.is_up_to_date = False
         db_session.commit()
+        invalidate_document_sets_all()  # write-through bust (see insert_document_set)
     except:
         db_session.rollback()
         raise
@@ -388,9 +396,12 @@ def fetch_documents_for_document_set_paginated(
     current_only: bool = True,
     last_document_id: str | None = None,
     limit: int = 100,
-) -> tuple[Sequence[Document], str | None]:
+) -> tuple[Sequence[str], str | None]:
+    # Selects only Document.id — the sole caller (document-set sync) uses just
+    # the ids, and the keyset cursor is the last id. Selecting full Document
+    # ORM rows per batch was needless materialization.
     stmt = (
-        select(Document)
+        select(Document.id)
         .join(
             DocumentByConnectorCredentialPair,
             DocumentByConnectorCredentialPair.id == Document.id,
@@ -426,8 +437,8 @@ def fetch_documents_for_document_set_paginated(
         )
     stmt = stmt.distinct()
 
-    documents = db_session.scalars(stmt).all()
-    return documents, documents[-1].id if documents else None
+    document_ids = db_session.scalars(stmt).all()
+    return document_ids, document_ids[-1] if document_ids else None
 
 
 def fetch_document_sets_for_documents(
@@ -486,6 +497,7 @@ def get_or_create_document_set_by_name(
 
     db_session.add(new_doc_set)
     db_session.commit()
+    invalidate_document_sets_all()  # write-through bust (see insert_document_set)
 
     return new_doc_set
 
diff --git a/backend/danswer/db/document_set_cache.py b/backend/danswer/db/document_set_cache.py
new file mode 100644
index 00000000000..db1e7f8990e
--- /dev/null
+++ b/backend/danswer/db/document_set_cache.py
@@ -0,0 +1,193 @@
+"""Global document-set list cache, Redis-backed (MIT-scoped).
+
+The chat-page bundle fires ``GET /document-set`` →
+``server/features/document_set/api.py::list_document_sets`` →
+``db/document_set.py::fetch_user_document_sets`` on *every* page load.
+That read is a multi-join (DocumentSet ⋈ cc-pair mapping ⋈
+ConnectorCredentialPair). At a few hundred users clicking around chat it
+adds avoidable DB-pool pressure.
+
+In Danswer **MIT**, document sets are *not* permission-filtered — every
+user sees the same full list (they're organizational; the documents
+themselves are permission-enforced at search time). So one **global**
+cached list is correct for everyone, and 200 concurrent first-loads
+collapse to a single DB query.
+
+This module has **no dependency on the EE package** (different license).
+It only reads the MIT-core flag ``global_version.get_is_ee_version()`` to
+stay safe: if a deployment enables EE, ``fetch_user_document_sets`` starts
+filtering per user, at which point a shared global list would leak sets
+across users — so under EE we simply **bypass the cache** and read the DB
+directly. Nothing here imports ``ee.*``; the global build uses the
+``user_id=None`` path, which the core resolves to the MIT base query
+without going through the versioned (EE) dispatch at all.
+
+**Invalidation:** write-through. Every committing mutation in
+``db/document_set.py`` calls :func:`invalidate_document_sets_all` after
+commit (a single ``DEL`` of the global key). The
+``DOCUMENT_SET_CACHE_TTL_SECONDS`` backstop heals any missed bust;
+staleness is cosmetic (names/membership in the UI list).
+
+**Fail-open**: any Redis error logs and falls through to a direct DB
+build. **Default OFF**: ``DOCUMENT_SET_CACHE_ENABLED=false``.
+"""
+from __future__ import annotations
+
+import json
+from typing import Any
+from typing import cast
+from uuid import UUID
+
+from sqlalchemy.orm import Session
+
+from danswer.configs.app_configs import DOCUMENT_SET_CACHE_ENABLED
+from danswer.configs.app_configs import DOCUMENT_SET_CACHE_TTL_SECONDS
+from danswer.redis.redis_pool import DANSWER_REDIS_KEY_PREFIX
+from danswer.redis.redis_pool import get_redis_client
+from danswer.server.features.document_set.models import DocumentSet
+from danswer.utils.logger import setup_logger
+from danswer.utils.variable_functionality import global_version
+
+
+logger = setup_logger()
+
+
+# Single shared key — the full document-set list, identical for all users in
+# MIT. Any document-set mutation must invalidate it.
+_DOC_SETS_ALL_KEY = DANSWER_REDIS_KEY_PREFIX + "document_sets:all"
+
+
+# ---------------------------------------------------------------------------
+# Public API — read path
+# ---------------------------------------------------------------------------
+
+
+def get_document_sets_for_user_cached(
+    user_id: UUID | None, db_session: Session
+) -> list[DocumentSet]:
+    """Return the ``DocumentSet`` list for ``user_id``.
+
+    * Cache disabled, OR EE enabled (per-user filtering) → direct DB build
+      for this user. The EE bypass avoids serving one user's filtered list
+      to another; we never import EE, only check the MIT-core version flag.
+    * MIT + enabled → the shared global list (built once, reused by all).
+    """
+    if not DOCUMENT_SET_CACHE_ENABLED or global_version.get_is_ee_version():
+        return _build(user_id, db_session)
+
+    hit, cached = _safe_get(_DOC_SETS_ALL_KEY)
+    if hit and isinstance(cached, list):
+        try:
+            return [DocumentSet.parse_obj(d) for d in cached]
+        except Exception as e:
+            # Schema drift since the entry was cached — treat as a miss.
+            logger.warning(
+                "Cached document-set list failed DocumentSet parse, refilling: %s", e
+            )
+
+    # Build the global list via the user_id=None path — in the core this is
+    # the MIT base query (all sets), and it never touches the versioned/EE
+    # dispatch.
+    result = _build(None, db_session)
+    _safe_set(_DOC_SETS_ALL_KEY, [json.loads(ds.json()) for ds in result])
+    return result
+
+
+# ---------------------------------------------------------------------------
+# Public API — invalidation
+# ---------------------------------------------------------------------------
+
+
+def invalidate_document_sets_all() -> None:
+    """Drop the cached global document-set list.
+
+    Call *after* ``db_session.commit()`` in any document-set mutation.
+    Cheap no-op when the cache is disabled.
+    """
+    if not DOCUMENT_SET_CACHE_ENABLED:
+        return
+    try:
+        get_redis_client().delete(_DOC_SETS_ALL_KEY)
+    except Exception as e:
+        # Fail-open — the TTL backstop heals it. Loud log for a persistent
+        # Redis outage.
+        logger.warning("invalidate_document_sets_all: Redis DEL failed: %s", e)
+
+
+# ---------------------------------------------------------------------------
+# Internals
+# ---------------------------------------------------------------------------
+
+
+def _build(user_id: UUID | None, db_session: Session) -> list[DocumentSet]:
+    """Build the ``DocumentSet`` list exactly as the endpoint did.
+
+    Local imports keep this module free of an import cycle: ``db.document_set``
+    imports :func:`invalidate_document_sets_all` from here at module load.
+    """
+    from danswer.db.document_set import fetch_user_document_sets
+    from danswer.server.documents.models import ConnectorCredentialPairDescriptor
+    from danswer.server.documents.models import ConnectorSnapshot
+    from danswer.server.documents.models import CredentialSnapshot
+
+    document_set_info = fetch_user_document_sets(user_id=user_id, db_session=db_session)
+    return [
+        DocumentSet(
+            id=document_set_db_model.id,
+            name=document_set_db_model.name,
+            description=document_set_db_model.description,
+            contains_non_public=any(not cc_pair.is_public for cc_pair in cc_pairs),
+            cc_pair_descriptors=[
+                ConnectorCredentialPairDescriptor(
+                    id=cc_pair.id,
+                    name=cc_pair.name,
+                    connector=ConnectorSnapshot.from_connector_db_model(
+                        cc_pair.connector
+                    ),
+                    credential=CredentialSnapshot.from_credential_db_model(
+                        cc_pair.credential
+                    ),
+                )
+                for cc_pair in cc_pairs
+            ],
+            is_up_to_date=document_set_db_model.is_up_to_date,
+            is_public=document_set_db_model.is_public,
+            users=[user.id for user in document_set_db_model.users],
+            groups=[group.id for group in document_set_db_model.groups],
+        )
+        for document_set_db_model, cc_pairs in document_set_info
+    ]
+
+
+# ---- Fail-open Redis helpers (mirror persona_cache.py) ----
+
+
+def _safe_get(key: str) -> tuple[bool, Any]:
+    """Return ``(hit, value)``; ``hit=False`` covers miss AND any Redis or
+    decode error — the caller treats them all as "go to the DB"."""
+    try:
+        # decode_responses=False on the pool → bytes | None. The cast just
+        # collapses redis-py's sync/async overload union for mypy.
+        raw = cast("bytes | None", get_redis_client().get(key))
+    except Exception as e:
+        logger.warning("document_set_cache: Redis GET failed for %s: %s", key, e)
+        return (False, None)
+    if raw is None:
+        return (False, None)
+    try:
+        return (True, json.loads(raw))
+    except (TypeError, ValueError) as e:
+        logger.warning("document_set_cache: corrupt entry at %s, ignoring: %s", key, e)
+        return (False, None)
+
+
+def _safe_set(key: str, val: Any) -> None:
+    try:
+        payload = json.dumps(val)
+    except (TypeError, ValueError) as e:
+        logger.warning("document_set_cache: skipping non-JSON value at %s: %s", key, e)
+        return
+    try:
+        get_redis_client().set(key, payload, ex=DOCUMENT_SET_CACHE_TTL_SECONDS)
+    except Exception as e:
+        logger.warning("document_set_cache: Redis SET failed for %s: %s", key, e)
diff --git a/backend/danswer/db/embedding_model.py b/backend/danswer/db/embedding_model.py
index ae2b98d514f..3a449e92d21 100644
--- a/backend/danswer/db/embedding_model.py
+++ b/backend/danswer/db/embedding_model.py
@@ -47,6 +47,7 @@ def get_current_db_embedding_model(db_session: Session) -> EmbeddingModel:
         select(EmbeddingModel)
         .where(EmbeddingModel.status == IndexModelStatus.PRESENT)
         .order_by(EmbeddingModel.id.desc())
+        .limit(1)  # .scalars().first() doesn't add LIMIT; table is tiny, be explicit
     )
     result = db_session.execute(query)
     latest_model = result.scalars().first()
@@ -62,6 +63,7 @@ def get_secondary_db_embedding_model(db_session: Session) -> EmbeddingModel | No
         select(EmbeddingModel)
         .where(EmbeddingModel.status == IndexModelStatus.FUTURE)
         .order_by(EmbeddingModel.id.desc())
+        .limit(1)  # .scalars().first() doesn't add LIMIT; table is tiny, be explicit
     )
     result = db_session.execute(query)
     latest_model = result.scalars().first()
diff --git a/backend/danswer/db/engine.py b/backend/danswer/db/engine.py
index 14174f20e6d..1c3c0a69cae 100644
--- a/backend/danswer/db/engine.py
+++ b/backend/danswer/db/engine.py
@@ -16,6 +16,8 @@
 from danswer.configs.app_configs import POSTGRES_DB
 from danswer.configs.app_configs import POSTGRES_HOST
 from danswer.configs.app_configs import POSTGRES_PASSWORD
+from danswer.configs.app_configs import POSTGRES_POOL_OVERFLOW
+from danswer.configs.app_configs import POSTGRES_POOL_SIZE
 from danswer.configs.app_configs import POSTGRES_PORT
 from danswer.configs.app_configs import POSTGRES_USER
 from danswer.utils.logger import setup_logger
@@ -69,8 +71,8 @@ def get_sqlalchemy_engine() -> Engine:
 
         _SYNC_ENGINE = create_engine(
             connection_string,
-            pool_size=40,
-            max_overflow=10,
+            pool_size=POSTGRES_POOL_SIZE,
+            max_overflow=POSTGRES_POOL_OVERFLOW,
             pool_pre_ping=True,
             connect_args=keepalive_kwargs,
         )
@@ -82,7 +84,9 @@ def get_sqlalchemy_async_engine() -> AsyncEngine:
     if _ASYNC_ENGINE is None:
         connection_string = build_connection_string()
         _ASYNC_ENGINE = create_async_engine(
-            connection_string, pool_size=40, max_overflow=10
+            connection_string,
+            pool_size=POSTGRES_POOL_SIZE,
+            max_overflow=POSTGRES_POOL_OVERFLOW,
         )
     return _ASYNC_ENGINE
 
diff --git a/backend/danswer/db/index_attempt.py b/backend/danswer/db/index_attempt.py
index 6bb188360d2..fcd4a18b938 100644
--- a/backend/danswer/db/index_attempt.py
+++ b/backend/danswer/db/index_attempt.py
@@ -6,6 +6,7 @@
 from sqlalchemy import desc
 from sqlalchemy import func
 from sqlalchemy import or_
+from sqlalchemy import Select
 from sqlalchemy import select
 from sqlalchemy import text
 from sqlalchemy import update
@@ -238,6 +239,15 @@ def get_last_attempt(
     # Note, the below is using time_created instead of time_updated
     stmt = stmt.order_by(desc(IndexAttempt.time_created))
 
+    # LIMIT 1 in SQL — NOT just Result.first(). `execute(stmt).scalars().first()`
+    # does not add a LIMIT, so without this the DB returns the cc-pair's ENTIRE
+    # attempt history (psycopg2 buffers it all client-side, the ORM materializes
+    # every row) and we throw all but one away. The indexing scheduler calls this
+    # once per cc-pair every loop, so with a large index_attempt table that spiked
+    # the scheduler to multi-GB per cycle (OOMKilled). With LIMIT 1 the DB returns
+    # one row. See update.py::create_indexing_jobs.
+    stmt = stmt.limit(1)
+
     return db_session.execute(stmt).scalars().first()
 
 
@@ -292,7 +302,12 @@ def get_index_attempts_for_cc_pair(
     cc_pair_identifier: ConnectorCredentialPairIdentifier,
     only_current: bool = True,
     disinclude_finished: bool = False,
+    limit: int | None = None,
 ) -> Sequence[IndexAttempt]:
+    # `limit` is optional and defaults to None (unbounded — unchanged behavior).
+    # IndexAttempt rows carry large Text columns (error_msg, full_exception_trace),
+    # so callers that only need existence or a recent slice should pass a limit
+    # rather than materialize a busy cc-pair's entire history.
     stmt = select(IndexAttempt).where(
         and_(
             IndexAttempt.connector_id == cc_pair_identifier.connector_id,
@@ -311,6 +326,52 @@ def get_index_attempts_for_cc_pair(
         )
 
     stmt = stmt.order_by(IndexAttempt.time_created.desc())
+    if limit is not None:
+        stmt = stmt.limit(limit)
+    return db_session.execute(stmt).scalars().all()
+
+
+def _cc_pair_index_attempts_base_stmt(
+    cc_pair_identifier: ConnectorCredentialPairIdentifier,
+    only_current: bool,
+) -> Select:
+    """Shared WHERE/JOIN for the cc-pair index-attempt queries (count +
+    paginated fetch) so they always agree on what counts as 'in scope'."""
+    stmt = select(IndexAttempt).where(
+        and_(
+            IndexAttempt.connector_id == cc_pair_identifier.connector_id,
+            IndexAttempt.credential_id == cc_pair_identifier.credential_id,
+        )
+    )
+    if only_current:
+        stmt = stmt.join(EmbeddingModel).where(
+            EmbeddingModel.status == IndexModelStatus.PRESENT
+        )
+    return stmt
+
+
+def count_index_attempts_for_cc_pair(
+    db_session: Session,
+    cc_pair_identifier: ConnectorCredentialPairIdentifier,
+    only_current: bool = True,
+) -> int:
+    base = _cc_pair_index_attempts_base_stmt(cc_pair_identifier, only_current)
+    count_stmt = select(func.count()).select_from(base.subquery())
+    return db_session.execute(count_stmt).scalar_one()
+
+
+def get_paginated_index_attempts_for_cc_pair(
+    db_session: Session,
+    cc_pair_identifier: ConnectorCredentialPairIdentifier,
+    page: int,
+    page_size: int,
+    only_current: bool = True,
+) -> Sequence[IndexAttempt]:
+    """One page of a cc-pair's index attempts, newest first. `page` is 0-based.
+    Server-side LIMIT/OFFSET so the API never materializes the full history."""
+    stmt = _cc_pair_index_attempts_base_stmt(cc_pair_identifier, only_current)
+    stmt = stmt.order_by(IndexAttempt.time_created.desc())
+    stmt = stmt.limit(page_size).offset(max(page, 0) * page_size)
     return db_session.execute(stmt).scalars().all()
 
 
diff --git a/backend/danswer/db/models.py b/backend/danswer/db/models.py
index 4a07d4c2887..f99b0da8f24 100644
--- a/backend/danswer/db/models.py
+++ b/backend/danswer/db/models.py
@@ -11,6 +11,7 @@
 from fastapi_users_db_sqlalchemy import SQLAlchemyBaseOAuthAccountTableUUID
 from fastapi_users_db_sqlalchemy import SQLAlchemyBaseUserTableUUID
 from fastapi_users_db_sqlalchemy.access_token import SQLAlchemyBaseAccessTokenTableUUID
+from fastapi_users_db_sqlalchemy.generics import GUID
 from sqlalchemy import Boolean
 from sqlalchemy import Date
 from sqlalchemy import DateTime
@@ -327,6 +328,13 @@ class Document(Base):
     doc_updated_at: Mapped[datetime.datetime | None] = mapped_column(
         DateTime(timezone=True), nullable=True
     )
+    # sha256 of the document's INDEXED content (sections/title/metadata/owners,
+    # NOT doc_updated_at) as of the last SUCCESSFUL index into Vespa. Lets the
+    # indexing pipeline skip the expensive Vespa clear-and-rewrite when a
+    # connector re-emits a document whose timestamp advanced but whose content
+    # is identical (e.g. Salesforce LastModifiedDate churn). Nullable: rows
+    # indexed before this column existed fall back to the doc_updated_at skip.
+    indexed_content_hash: Mapped[str | None] = mapped_column(String, nullable=True)
     # The following are not attached to User because the account/email may not be known
     # within Danswer
     # Something like the document creator
@@ -1177,7 +1185,12 @@ class PGFileStore(Base):
     file_origin: Mapped[FileOrigin] = mapped_column(Enum(FileOrigin, native_enum=False))
     file_type: Mapped[str] = mapped_column(String, default="text/plain")
     file_metadata: Mapped[JSON_ro] = mapped_column(postgresql.JSONB(), nullable=True)
-    lobj_oid: Mapped[int] = mapped_column(Integer, nullable=False)
+    # Exactly one of these locates the bytes:
+    #   lobj_oid    — Postgres large object (PostgresBackedFileStore)
+    #   object_key  — Blob/object key (AzureBlobFileStore); metadata stays here
+    # Both nullable so the two backends coexist during migration.
+    lobj_oid: Mapped[int | None] = mapped_column(Integer, nullable=True)
+    object_key: Mapped[str | None] = mapped_column(String, nullable=True)
 
 
 """
@@ -1453,7 +1466,7 @@ class AnalyticsDailyRollup(Base):
     retention deletes.
 
     `chat_message` / `chat_session` rows older than RETENTION_DAYS_CHAT
-    (default 30d) are purged by the daily retention sweep. The analytics
+    (default 90d) are purged by the daily retention sweep. The analytics
     endpoints used to read directly from those tables, so any date range
     older than ~30 days returned zeros. This rollup table is computed
     BEFORE the retention sweep each day (Celery beat at 07:30 UTC, sweep
@@ -1502,3 +1515,107 @@ class AnalyticsDailyRollup(Base):
         server_default=func.now(),
         onupdate=func.now(),
     )
+
+
+class AnalyticsUserFirstSeen(Base):
+    """Durable record of the first UTC date each user used chat (asked a
+    question). Powers the adoption curve ("how many distinct users have ever
+    tried chat") on the admin Analytics page.
+
+    Populated incrementally by the analytics rollup (BEFORE the retention
+    sweep), one row per user, ever — ``first_seen_date`` is written once and
+    never moves forward (INSERT ... ON CONFLICT DO NOTHING). This is what
+    makes adoption survive chat retention: once chat_message rows age out of
+    RETENTION_DAYS_CHAT they're deleted, so "first time we saw user X" can no
+    longer be recomputed from raw data — it must be captured here while the
+    data still exists.
+
+    Deliberately NO foreign key to ``user`` (mirrors AnalyticsDailyRollup's
+    no-FK stance): deleting a user must not erase the historical fact that
+    they once adopted chat, and must not cascade into this aggregate.
+    """
+
+    __tablename__ = "analytics_user_first_seen"
+
+    user_id: Mapped[UUID] = mapped_column(GUID(), primary_key=True)
+    first_seen_date: Mapped[datetime.date] = mapped_column(
+        Date, nullable=False, index=True
+    )
+    created_at: Mapped[datetime.datetime] = mapped_column(
+        DateTime(timezone=True), nullable=False, server_default=func.now()
+    )
+
+
+class AnalyticsUserDailyStats(Base):
+    """Durable per-user-per-day chat activity counts. Powers the "top users
+    by activity" leaderboard on the admin Analytics page.
+
+    Same durability contract as AnalyticsDailyRollup / AnalyticsUserFirstSeen:
+    upserted daily by the rollup (one row per active user per UTC day) BEFORE
+    the retention sweep, then kept indefinitely. Reading the leaderboard from
+    this aggregate — instead of raw chat_message — means it spans the full
+    history regardless of RETENTION_DAYS_CHAT, not just the last window.
+
+    Idempotent: the rollup re-upserts the sliding recompute window with
+    ON CONFLICT (user_id, date) DO UPDATE, so late-arriving feedback is
+    reflected. No FK to `user` (see AnalyticsUserFirstSeen) — the email is
+    joined live at query time, so a deleted user simply drops off the
+    leaderboard without erasing the historical counts.
+    """
+
+    __tablename__ = "analytics_user_daily_stats"
+
+    user_id: Mapped[UUID] = mapped_column(GUID(), primary_key=True)
+    date: Mapped[datetime.date] = mapped_column(Date, primary_key=True)
+    message_count: Mapped[int] = mapped_column(
+        Integer, nullable=False, default=0, server_default="0"
+    )
+    like_count: Mapped[int] = mapped_column(
+        Integer, nullable=False, default=0, server_default="0"
+    )
+    dislike_count: Mapped[int] = mapped_column(
+        Integer, nullable=False, default=0, server_default="0"
+    )
+    rolled_up_at: Mapped[datetime.datetime] = mapped_column(
+        DateTime(timezone=True),
+        nullable=False,
+        server_default=func.now(),
+        onupdate=func.now(),
+    )
+
+
+class AnalyticsPersonaDailyStats(Base):
+    """Durable per-assistant-per-day chat activity counts. Powers the
+    "most-used assistants" leaderboard, and (by joining persona__document_set
+    at query time) an approximate "datasets in use" view.
+
+    Same durability contract as the other analytics rollups: upserted daily
+    BEFORE the retention sweep, kept indefinitely, so usage spans the full
+    history regardless of RETENTION_DAYS_CHAT. No FK to `persona` — the name
+    is joined live, so a deleted assistant drops off the leaderboard without
+    erasing historical counts. ``session_count`` is distinct chat sessions
+    that had at least one assistant reply that day.
+    """
+
+    __tablename__ = "analytics_persona_daily_stats"
+
+    persona_id: Mapped[int] = mapped_column(Integer, primary_key=True)
+    date: Mapped[datetime.date] = mapped_column(Date, primary_key=True)
+    session_count: Mapped[int] = mapped_column(
+        Integer, nullable=False, default=0, server_default="0"
+    )
+    message_count: Mapped[int] = mapped_column(
+        Integer, nullable=False, default=0, server_default="0"
+    )
+    like_count: Mapped[int] = mapped_column(
+        Integer, nullable=False, default=0, server_default="0"
+    )
+    dislike_count: Mapped[int] = mapped_column(
+        Integer, nullable=False, default=0, server_default="0"
+    )
+    rolled_up_at: Mapped[datetime.datetime] = mapped_column(
+        DateTime(timezone=True),
+        nullable=False,
+        server_default=func.now(),
+        onupdate=func.now(),
+    )
diff --git a/backend/danswer/db/persona.py b/backend/danswer/db/persona.py
index 946a3f897e4..e39eb47306b 100644
--- a/backend/danswer/db/persona.py
+++ b/backend/danswer/db/persona.py
@@ -24,6 +24,7 @@
 from danswer.db.models import Tool
 from danswer.db.models import User
 from danswer.db.models import User__UserGroup
+from danswer.db.persona_cache import invalidate_personas_all
 from danswer.search.enums import RecencyBiasSetting
 from danswer.server.features.persona.models import CreatePersonaRequest
 from danswer.server.features.persona.models import PersonaSnapshot
@@ -48,6 +49,7 @@ def make_persona_private(
             db_session.add(Persona__User(persona_id=persona_id, user_id=user_uuid))
 
         db_session.commit()
+        invalidate_personas_all()  # Persona__User membership changed
 
     # May cause error if someone switches down to MIT from EE
     if group_ids:
@@ -218,6 +220,7 @@ def mark_persona_as_deleted(
         )
     persona.deleted = True
     db_session.commit()
+    invalidate_personas_all()
 
 
 def mark_persona_as_not_deleted(
@@ -231,6 +234,7 @@ def mark_persona_as_not_deleted(
     if persona.deleted:
         persona.deleted = False
         db_session.commit()
+        invalidate_personas_all()
     else:
         raise ValueError(f"Persona with ID {persona_id} is not deleted.")
 
@@ -246,6 +250,7 @@ def mark_delete_persona_by_name(
 
     db_session.execute(stmt)
     db_session.commit()
+    invalidate_personas_all()
 
 
 def update_all_personas_display_priority(
@@ -262,6 +267,7 @@ def update_all_personas_display_priority(
         persona.display_priority = display_priority_map[persona.id]
 
     db_session.commit()
+    invalidate_personas_all()
 
 
 def upsert_prompt(
@@ -430,9 +436,14 @@ def upsert_persona(
 
     if commit:
         db_session.commit()
+        invalidate_personas_all()
     else:
         # flush the session so that the persona has an ID
         db_session.flush()
+        # No bust here — caller hasn't committed. They are responsible for
+        # invalidating after their final commit, OR they're being called
+        # by a wrapper like create_update_persona whose subsequent steps
+        # (make_persona_private) will commit and bust.
 
     return persona
 
@@ -460,6 +471,7 @@ def delete_old_default_personas(
 
     db_session.execute(stmt)
     db_session.commit()
+    invalidate_personas_all()
 
 
 def update_persona_visibility(
@@ -470,6 +482,7 @@ def update_persona_visibility(
     persona = get_persona_by_id(persona_id=persona_id, user=None, db_session=db_session)
     persona.is_visible = is_visible
     db_session.commit()
+    invalidate_personas_all()
 
 
 def check_user_can_edit_persona(user: User | None, persona: Persona) -> None:
@@ -636,6 +649,7 @@ def delete_persona_by_name(
     db_session.execute(stmt)
 
     db_session.commit()
+    invalidate_personas_all()
 
 
 def get_persona_with_docset_and_prompts(
diff --git a/backend/danswer/db/persona_cache.py b/backend/danswer/db/persona_cache.py
new file mode 100644
index 00000000000..81905150234
--- /dev/null
+++ b/backend/danswer/db/persona_cache.py
@@ -0,0 +1,299 @@
+"""Per-user persona ("assistant") list cache, Redis-backed.
+
+The "Manage Assistants" tile in the chat UI fires
+``GET /persona`` → ``server/features/persona/api.py::list_personas`` →
+``db/persona.py::get_personas(user_id, …)``. That query is a multi-OR
+permission filter over ``Persona`` joined with ``Persona__User``,
+``Persona__UserGroup`` and ``User__UserGroup`` (the user's groups). It
+runs once per user per page-load; at hundreds of users opening chat
+around the same time, the burst hits the DB connection pool harder
+than it deserves to.
+
+This module shifts the work to Redis with a **global cache + Python
+filter** shape:
+
+  ``personas:all:not_deleted``         — JSON list of every visible
+                                         ``PersonaSnapshot``. Shared
+                                         across users, so 200 concurrent
+                                         first-clicks become ~1 DB
+                                         query rather than 200.
+
+  ``personas:groups:{user_id}``        — JSON list of the user's group
+                                         ids; cheap one-row indexed
+                                         lookup but worth caching since
+                                         it's hit on every persona-list
+                                         call.
+
+Because ``PersonaSnapshot`` already carries the permission inputs
+(``is_public``, ``users``, ``groups``), the filter runs in Python on
+the cached list:
+
+    persona.is_public
+    OR user_id in {u.id for u in persona.users}      # direct grant
+    OR (user_group_ids ∩ set(persona.groups))         # group grant
+
+This mirrors the SQL OR-block in :func:`danswer.db.persona.get_personas`
+exactly — the parity is locked down by tests.
+
+**Invalidation:** explicit, write-through. Every mutation that affects
+``Persona`` / ``Persona__User`` / ``Persona__UserGroup`` calls
+:func:`invalidate_personas_all` after commit; every change to
+``User__UserGroup`` calls :func:`invalidate_user_groups(user_id)`. The
+``PERSONA_CACHE_TTL_SECONDS`` (24 h default) is *only* a long-tail
+safety net for missed busts — the primary mechanism is explicit.
+
+**Fail-open**: any Redis error logs and falls through to a direct DB
+read. A Redis outage degrades latency, not availability.
+
+**Default OFF**: ``PERSONA_CACHE_ENABLED=false`` keeps the existing
+direct-DB path. Enable per environment once Redis is reachable.
+"""
+from __future__ import annotations
+
+import json
+from typing import Any
+from uuid import UUID
+
+from sqlalchemy import select
+from sqlalchemy.orm import Session
+
+from danswer.configs.app_configs import PERSONA_CACHE_ENABLED
+from danswer.configs.app_configs import PERSONA_CACHE_TTL_SECONDS
+from danswer.db.models import User__UserGroup
+from danswer.redis.redis_pool import DANSWER_REDIS_KEY_PREFIX
+from danswer.redis.redis_pool import get_redis_client
+from danswer.server.features.persona.models import PersonaSnapshot
+from danswer.utils.logger import setup_logger
+
+
+logger = setup_logger()
+
+
+# Single shared key for the "all non-deleted personas" snapshot list. Any
+# Persona / Persona__User / Persona__UserGroup mutation must invalidate this.
+_PERSONAS_ALL_KEY = DANSWER_REDIS_KEY_PREFIX + "personas:all:not_deleted"
+
+# Per-user namespace for cached group memberships. User__UserGroup mutations
+# must invalidate the affected user(s).
+_USER_GROUPS_KEY_PREFIX = DANSWER_REDIS_KEY_PREFIX + "personas:groups:"
+
+
+# ---------------------------------------------------------------------------
+# Public API — read path
+# ---------------------------------------------------------------------------
+
+
+def get_personas_for_user_cached(
+    user_id: UUID | None,
+    db_session: Session,
+    include_deleted: bool = False,
+) -> list[PersonaSnapshot]:
+    """Return the persona list visible to ``user_id`` as ``PersonaSnapshot``s.
+
+    Routing:
+
+    * Cache disabled OR ``include_deleted=True`` → direct DB read via the
+      existing :func:`danswer.db.persona.get_personas`. The
+      ``include_deleted`` case is rare admin-only and we deliberately
+      don't cache it — keeping the cache key set small avoids accidental
+      mis-keying on the hot path.
+    * ``user_id is None`` (admin call) → return the global cached list
+      unfiltered.
+    * Authenticated user → load the global list + user's groups from
+      cache (or DB on miss), apply the Python permission filter.
+    """
+    # Local import to keep this module importable from `db.persona` itself
+    # without a circular import — `get_personas` is the fallback only.
+    from danswer.db.persona import get_personas
+
+    if not PERSONA_CACHE_ENABLED or include_deleted:
+        personas = get_personas(
+            user_id=user_id,
+            db_session=db_session,
+            include_deleted=include_deleted,
+        )
+        return [PersonaSnapshot.from_model(p) for p in personas]
+
+    all_snapshots = _get_all_personas_cached(db_session)
+    if user_id is None:
+        # Admin / no-auth path: no permission filter needed.
+        return all_snapshots
+
+    user_group_ids = _get_user_group_ids_cached(user_id, db_session)
+    return _filter_personas_for_user(all_snapshots, user_id, user_group_ids)
+
+
+# ---------------------------------------------------------------------------
+# Public API — invalidation
+# ---------------------------------------------------------------------------
+
+
+def invalidate_personas_all() -> None:
+    """Drop the cached global persona list.
+
+    Call this *after* ``db_session.commit()`` in any mutation that
+    changes ``Persona``, ``Persona__User``, or ``Persona__UserGroup``.
+    Before-commit invalidation has a stale-cache-fill race: a concurrent
+    reader between bust and commit would refill the cache with the
+    pre-mutation snapshot.
+
+    Cheap when the cache is disabled — short-circuits before any Redis
+    call so mutation paths don't pay an ambient cost.
+    """
+    if not PERSONA_CACHE_ENABLED:
+        return
+    try:
+        get_redis_client().delete(_PERSONAS_ALL_KEY)
+    except Exception as e:
+        # Fail-open — TTL safety net will heal eventually. Loud log so
+        # the dashboard catches a persistent Redis outage.
+        logger.warning("invalidate_personas_all: Redis DEL failed: %s", e)
+
+
+def invalidate_user_groups(user_id: UUID) -> None:
+    """Drop ``user_id``'s cached group-membership list.
+
+    Call this when ``User__UserGroup`` rows for the user are inserted
+    or removed. Same after-commit ordering rule as
+    :func:`invalidate_personas_all`.
+    """
+    if not PERSONA_CACHE_ENABLED:
+        return
+    try:
+        get_redis_client().delete(_USER_GROUPS_KEY_PREFIX + str(user_id))
+    except Exception as e:
+        logger.warning(
+            "invalidate_user_groups(user_id=%s): Redis DEL failed: %s",
+            user_id,
+            e,
+        )
+
+
+# ---------------------------------------------------------------------------
+# Internals
+# ---------------------------------------------------------------------------
+
+
+def _get_all_personas_cached(db_session: Session) -> list[PersonaSnapshot]:
+    """Load all non-deleted personas as ``PersonaSnapshot``s.
+
+    Cache hit: deserialize the stored JSON straight back into Pydantic
+    models — no DB call.
+    Cache miss / Redis error: fall through to the existing
+    :func:`get_personas` (with ``user_id=None``) so the source of truth
+    is reused.
+    """
+    hit, cached = _safe_get(_PERSONAS_ALL_KEY)
+    if hit and isinstance(cached, list):
+        try:
+            return [PersonaSnapshot.parse_obj(d) for d in cached]
+        except Exception as e:
+            # Pydantic schema drift (e.g. a new required field was added
+            # since the entry was cached) — treat as a miss so the next
+            # read repopulates with the current schema.
+            logger.warning(
+                "Cached persona list failed PersonaSnapshot parse, refilling: %s",
+                e,
+            )
+
+    from danswer.db.persona import get_personas
+
+    personas = get_personas(
+        user_id=None,
+        db_session=db_session,
+        include_deleted=False,
+    )
+    snapshots = [PersonaSnapshot.from_model(p) for p in personas]
+
+    # Round-trip through PersonaSnapshot.json() so nested types (UUID,
+    # enums, datetimes) get the same serializer Pydantic uses on the wire.
+    payload: list[Any] = [json.loads(s.json()) for s in snapshots]
+    _safe_set(_PERSONAS_ALL_KEY, payload)
+    return snapshots
+
+
+def _get_user_group_ids_cached(user_id: UUID, db_session: Session) -> list[int]:
+    """Return the ``user_group_id``s the user belongs to.
+
+    Tiny indexed lookup — caching wins not on per-call latency but on
+    aggregate, since it's hit on every persona-list call across all
+    users.
+    """
+    key = _USER_GROUPS_KEY_PREFIX + str(user_id)
+    hit, cached = _safe_get(key)
+    if hit and isinstance(cached, list):
+        return [int(x) for x in cached]
+
+    rows = db_session.scalars(
+        select(User__UserGroup.user_group_id).where(User__UserGroup.user_id == user_id)
+    ).all()
+    group_ids = [int(r) for r in rows]
+    _safe_set(key, group_ids)
+    return group_ids
+
+
+def _filter_personas_for_user(
+    personas: list[PersonaSnapshot],
+    user_id: UUID,
+    user_group_ids: list[int],
+) -> list[PersonaSnapshot]:
+    """Apply the same OR-filter ``get_personas`` runs in SQL.
+
+    SQL:
+        Persona.is_public
+        OR Persona.id IN (Persona__User where user_id = U)
+        OR Persona.id IN (Persona__UserGroup
+                          where user_group_id IN <U's groups>)
+
+    The parity vs SQL is covered by tests with representative permission
+    shapes; if you change one side, change the other.
+    """
+    user_group_set = set(user_group_ids)
+    out: list[PersonaSnapshot] = []
+    for p in personas:
+        if p.is_public:
+            out.append(p)
+            continue
+        if any(u.id == user_id for u in p.users):
+            out.append(p)
+            continue
+        if user_group_set.intersection(p.groups):
+            out.append(p)
+            continue
+    return out
+
+
+# ---- Fail-open Redis helpers (mirror the P1 cache module's posture) ----
+
+
+def _safe_get(key: str) -> tuple[bool, Any]:
+    """Return ``(hit, value)``. ``hit=False`` covers miss AND any Redis
+    or decode error — the caller treats them all as "go to the DB".
+    """
+    try:
+        raw = get_redis_client().get(key)
+    except Exception as e:
+        logger.warning("persona_cache: Redis GET failed for %s: %s", key, e)
+        return (False, None)
+    if raw is None:
+        return (False, None)
+    try:
+        return (True, json.loads(raw))
+    except (TypeError, ValueError) as e:
+        logger.warning("persona_cache: corrupt entry at %s, ignoring: %s", key, e)
+        return (False, None)
+
+
+def _safe_set(key: str, val: Any) -> None:
+    try:
+        payload = json.dumps(val)
+    except (TypeError, ValueError) as e:
+        # Defensive — _get_all_personas_cached/_get_user_group_ids_cached
+        # only ever cache JSON-clean values. If this fires the cache is
+        # silently skipped and the inner read still served the caller.
+        logger.warning("persona_cache: skipping non-JSON value at %s: %s", key, e)
+        return
+    try:
+        get_redis_client().set(key, payload, ex=PERSONA_CACHE_TTL_SECONDS)
+    except Exception as e:
+        logger.warning("persona_cache: Redis SET failed for %s: %s", key, e)
diff --git a/backend/danswer/db/pg_file_store.py b/backend/danswer/db/pg_file_store.py
index 1333dcd6cee..685c3469ee9 100644
--- a/backend/danswer/db/pg_file_store.py
+++ b/backend/danswer/db/pg_file_store.py
@@ -102,8 +102,10 @@ def delete_lobj_by_name(
         logger.info(f"no file with name {lobj_name} found")
         return
 
-    pg_conn = get_pg_conn_from_session(db_session)
-    pg_conn.lobject(pgfilestore.lobj_oid).unlink()
+    # Only unlink a Postgres large object; object-store rows have no lobj.
+    if pgfilestore.lobj_oid is not None:
+        pg_conn = get_pg_conn_from_session(db_session)
+        pg_conn.lobject(pgfilestore.lobj_oid).unlink()
 
     delete_pgfilestore_by_file_name(lobj_name, db_session)
     db_session.commit()
@@ -114,25 +116,35 @@ def upsert_pgfilestore(
     display_name: str | None,
     file_origin: FileOrigin,
     file_type: str,
-    lobj_oid: int,
     db_session: Session,
+    lobj_oid: int | None = None,
+    object_key: str | None = None,
     commit: bool = False,
     file_metadata: dict | None = None,
 ) -> PGFileStore:
+    """Upsert a file_store metadata row. The bytes live in EITHER a Postgres
+    large object (``lobj_oid``) or an object-storage blob (``object_key``);
+    pass exactly one. Both backends share this metadata row."""
     pgfilestore = db_session.query(PGFileStore).filter_by(file_name=file_name).first()
 
     if pgfilestore:
-        try:
-            # This should not happen in normal execution
-            delete_lobj_by_id(lobj_oid=pgfilestore.lobj_oid, db_session=db_session)
-        except Exception:
-            # If the delete fails as well, the large object doesn't exist anyway and even if it
-            # fails to delete, it's not too terrible as most files sizes are insignificant
-            logger.error(
-                f"Failed to delete large object with oid {pgfilestore.lobj_oid}"
-            )
+        # Clean up the previous backing bytes only if it was a Postgres lobj
+        # (object-store blobs are managed by the AzureBlobFileStore itself).
+        if pgfilestore.lobj_oid is not None:
+            try:
+                delete_lobj_by_id(lobj_oid=pgfilestore.lobj_oid, db_session=db_session)
+            except Exception:
+                # Best-effort — if the lobj is already gone that's fine.
+                logger.error(
+                    f"Failed to delete large object with oid {pgfilestore.lobj_oid}"
+                )
 
         pgfilestore.lobj_oid = lobj_oid
+        pgfilestore.object_key = object_key
+        pgfilestore.display_name = display_name or file_name
+        pgfilestore.file_origin = file_origin
+        pgfilestore.file_type = file_type
+        pgfilestore.file_metadata = file_metadata
     else:
         pgfilestore = PGFileStore(
             file_name=file_name,
@@ -141,6 +153,7 @@ def upsert_pgfilestore(
             file_type=file_type,
             file_metadata=file_metadata,
             lobj_oid=lobj_oid,
+            object_key=object_key,
         )
         db_session.add(pgfilestore)
 
diff --git a/backend/danswer/db/retention.py b/backend/danswer/db/retention.py
index e5cf43699d4..4b210a97c37 100644
--- a/backend/danswer/db/retention.py
+++ b/backend/danswer/db/retention.py
@@ -87,7 +87,7 @@ def _env_int(name: str, default: int, minimum: int = 0) -> int:
 # based + keep-last-N pruning by setting RETENTION_DAYS_INDEX_ATTEMPT to a
 # positive integer; the executor short-circuits when days <= 0.
 RETENTION_DAYS_INDEX_ATTEMPT = _env_int("RETENTION_DAYS_INDEX_ATTEMPT", 0)
-RETENTION_DAYS_CHAT = _env_int("RETENTION_DAYS_CHAT", 30)
+RETENTION_DAYS_CHAT = _env_int("RETENTION_DAYS_CHAT", 90)
 RETENTION_DAYS_USAGE_REPORTS = _env_int("RETENTION_DAYS_USAGE_REPORTS", 90)
 RETENTION_DAYS_PERMISSION_SYNC = _env_int("RETENTION_DAYS_PERMISSION_SYNC", 30)
 RETENTION_KEEP_LAST_N_INDEX_ATTEMPTS = _env_int(
diff --git a/backend/danswer/db/tag.py b/backend/danswer/db/tag.py
index 66418b948e7..a539f2471a5 100644
--- a/backend/danswer/db/tag.py
+++ b/backend/danswer/db/tag.py
@@ -110,6 +110,7 @@ def get_tags_by_value_prefix_for_source_types(
     tag_value_prefix: str | None,
     sources: list[DocumentSource] | None,
     db_session: Session,
+    limit: int | None = None,
 ) -> list[Tag]:
     query = select(Tag)
 
@@ -119,6 +120,12 @@ def get_tags_by_value_prefix_for_source_types(
     if sources:
         query = query.where(Tag.source.in_(sources))
 
+    # Optional bound (default None = unbounded, unchanged). When no prefix is
+    # given this would otherwise load every Tag row for the source(s); the knob
+    # lets callers cap it without changing existing behavior.
+    if limit is not None:
+        query = query.limit(limit)
+
     result = db_session.execute(query)
 
     tags = result.scalars().all()
diff --git a/backend/danswer/document_index/vespa/index.py b/backend/danswer/document_index/vespa/index.py
index 24156d11aae..8605a9bdd86 100644
--- a/backend/danswer/document_index/vespa/index.py
+++ b/backend/danswer/document_index/vespa/index.py
@@ -248,16 +248,33 @@ def _delete_vespa_doc_chunks(
     doc_chunk_ids = _get_vespa_chunk_ids_by_document_id(
         document_id=document_id, index_name=index_name
     )
+    if not doc_chunk_ids:
+        return
 
-    for chunk_id in doc_chunk_ids:
-        try:
-            res = http_client.delete(
-                f"{DOCUMENT_ID_ENDPOINT.format(index_name=index_name)}/{chunk_id}"
-            )
-            res.raise_for_status()
-        except httpx.HTTPStatusError as e:
-            logger.error(f"Failed to delete chunk, details: {e.response.text}")
-            raise
+    def _delete_chunk(chunk_id: str) -> None:
+        res = http_client.delete(
+            f"{DOCUMENT_ID_ENDPOINT.format(index_name=index_name)}/{chunk_id}"
+        )
+        res.raise_for_status()
+
+    # Delete a document's chunks concurrently rather than one blocking HTTP
+    # round-trip at a time — sequential per-chunk DELETEs were a large part of
+    # the per-document re-index cost for multi-chunk docs. Bounded local pool
+    # (capped low so that, combined with the per-document executor in
+    # _delete_vespa_docs, total in-flight requests stay reasonable); a fresh
+    # ThreadPoolExecutor here can't deadlock against that outer pool since its
+    # threads are independent. httpx.Client is safe for concurrent use. The
+    # @retry on this function still covers transient failures.
+    with concurrent.futures.ThreadPoolExecutor(
+        max_workers=min(len(doc_chunk_ids), 8)
+    ) as executor:
+        futures = [executor.submit(_delete_chunk, cid) for cid in doc_chunk_ids]
+        for future in concurrent.futures.as_completed(futures):
+            try:
+                future.result()
+            except httpx.HTTPStatusError as e:
+                logger.error(f"Failed to delete chunk, details: {e.response.text}")
+                raise
 
 
 def _delete_vespa_docs(
diff --git a/backend/danswer/dynamic_configs/factory.py b/backend/danswer/dynamic_configs/factory.py
index 44b6e096b6d..60e46950df0 100644
--- a/backend/danswer/dynamic_configs/factory.py
+++ b/backend/danswer/dynamic_configs/factory.py
@@ -1,15 +1,33 @@
 from danswer.configs.app_configs import DYNAMIC_CONFIG_STORE
+from danswer.configs.app_configs import REDIS_KV_CACHE_ENABLED
+from danswer.configs.app_configs import REDIS_KV_CACHE_TTL_SECONDS
 from danswer.dynamic_configs.interface import DynamicConfigStore
 from danswer.dynamic_configs.store import FileSystemBackedDynamicConfigStore
 from danswer.dynamic_configs.store import PostgresBackedDynamicConfigStore
+from danswer.dynamic_configs.store import RedisCachedDynamicConfigStore
 
 
 def get_dynamic_config_store() -> DynamicConfigStore:
+    """Resolve the configured KV store.
+
+    The Postgres-backed store is the source of truth. When
+    ``REDIS_KV_CACHE_ENABLED`` is true, we transparently wrap it with a
+    read-through / write-through Redis cache — call sites are unchanged.
+    Wrapping is additive on top of the configured backend rather than a
+    distinct ``DYNAMIC_CONFIG_STORE`` value so "which backend" and "do I
+    cache" are independently controllable.
+    """
     dynamic_config_store_type = DYNAMIC_CONFIG_STORE
     if dynamic_config_store_type == FileSystemBackedDynamicConfigStore.__name__:
         raise NotImplementedError("File based config store no longer supported")
     if dynamic_config_store_type == PostgresBackedDynamicConfigStore.__name__:
-        return PostgresBackedDynamicConfigStore()
+        inner: DynamicConfigStore = PostgresBackedDynamicConfigStore()
+        if REDIS_KV_CACHE_ENABLED:
+            return RedisCachedDynamicConfigStore(
+                inner=inner,
+                ttl_seconds=REDIS_KV_CACHE_TTL_SECONDS,
+            )
+        return inner
 
     # TODO: change exception type
     raise Exception("Unknown dynamic config store type")
diff --git a/backend/danswer/dynamic_configs/store.py b/backend/danswer/dynamic_configs/store.py
index ee4ac3d09ae..59df2c807dd 100644
--- a/backend/danswer/dynamic_configs/store.py
+++ b/backend/danswer/dynamic_configs/store.py
@@ -1,11 +1,14 @@
 import json
 import os
+from collections.abc import Callable
 from collections.abc import Iterator
 from contextlib import contextmanager
 from pathlib import Path
 from typing import cast
 
 from filelock import FileLock
+from redis import Redis
+from redis import RedisError
 from sqlalchemy.orm import Session
 
 from danswer.db.engine import SessionFactory
@@ -13,6 +16,12 @@
 from danswer.dynamic_configs.interface import ConfigNotFoundError
 from danswer.dynamic_configs.interface import DynamicConfigStore
 from danswer.dynamic_configs.interface import JSON_ro
+from danswer.redis.redis_pool import DANSWER_REDIS_KEY_PREFIX
+from danswer.redis.redis_pool import get_redis_client
+from danswer.utils.logger import setup_logger
+
+
+logger = setup_logger()
 
 
 FILE_LOCK_TIMEOUT = 10
@@ -99,3 +108,119 @@ def delete(self, key: str) -> None:
             if result == 0:
                 raise ConfigNotFoundError
             session.commit()
+
+
+class RedisCachedDynamicConfigStore(DynamicConfigStore):
+    """Read-through / write-through Redis cache over an inner ``DynamicConfigStore``.
+
+    Mirrors the shape of upstream Onyx's ``PgRedisKVStore`` but composed
+    via wrapping rather than inheritance so the inner store stays
+    single-purpose and the cache layer can wrap *any* future backend.
+
+    Semantics:
+      * ``load``: probe Redis; on hit, return; on miss, read inner and
+        repopulate Redis (with TTL). Encrypted entries are never cached
+        plaintext — they always fall through to the inner store.
+      * ``store``: write the inner store first (source of truth), then
+        refresh Redis (or invalidate if ``encrypt=True``). The
+        inner-first order means a Redis success after an inner failure
+        cannot leave Redis holding a value the source of truth lacks.
+      * ``delete``: delete inner first, then Redis. Same ordering reason.
+
+    Fail-open: every Redis operation is wrapped — a Redis outage degrades
+    latency, not availability. Wrap-then-log-then-fall-through is the
+    rule throughout.
+
+    Single-tenant: keys are namespaced by :data:`_KEY_PREFIX` only (no
+    tenant id), reflecting this fork's divergence from upstream.
+    """
+
+    _KEY_PREFIX = DANSWER_REDIS_KEY_PREFIX + "kv:"
+
+    def __init__(
+        self,
+        inner: DynamicConfigStore,
+        ttl_seconds: int,
+        client_factory: Callable[[], Redis] | None = None,
+    ) -> None:
+        self._inner = inner
+        self._ttl = ttl_seconds
+        # Indirection lets tests inject a fake client without monkey-
+        # patching the global pool. Production wiring uses the default.
+        self._client_factory = client_factory or get_redis_client
+
+    # ---- DynamicConfigStore surface ----
+
+    def store(self, key: str, val: JSON_ro, encrypt: bool = False) -> None:
+        self._inner.store(key, val, encrypt=encrypt)
+        if encrypt:
+            # Never hold plaintext of an encrypted value in Redis — that
+            # would silently defeat the encryption-at-rest guarantee.
+            # Also invalidates any stale plaintext entry from before
+            # the value was switched to encrypted.
+            self._safe_redis_delete(key)
+            return
+        self._safe_redis_set(key, val)
+
+    def load(self, key: str) -> JSON_ro:
+        hit, cached = self._safe_redis_get(key)
+        if hit:
+            return cached
+        # May raise ConfigNotFoundError — propagate without caching the miss.
+        # (Negative caching has its own correctness traps; skip for now.)
+        val = self._inner.load(key)
+        self._safe_redis_set(key, val)
+        return val
+
+    def delete(self, key: str) -> None:
+        self._inner.delete(key)
+        self._safe_redis_delete(key)
+
+    # ---- private Redis helpers (all fail-open) ----
+
+    def _redis_key(self, key: str) -> str:
+        return self._KEY_PREFIX + key
+
+    def _safe_redis_get(self, key: str) -> tuple[bool, JSON_ro]:
+        """Return ``(hit, value)``. ``hit=False`` means cache miss OR
+        Redis error — caller treats both the same (read inner).
+        """
+        try:
+            raw = self._client_factory().get(self._redis_key(key))
+        except RedisError as e:
+            logger.warning("Redis GET failed for kv key=%s: %s", key, e)
+            return (False, None)
+        if raw is None:
+            return (False, None)
+        try:
+            return (True, cast(JSON_ro, json.loads(raw)))
+        except (TypeError, ValueError) as e:
+            # Corrupt or legacy-format entry — treat as a miss so the
+            # next read repopulates from the inner store.
+            logger.warning("Corrupt Redis kv entry for key=%s, ignoring: %s", key, e)
+            return (False, None)
+
+    def _safe_redis_set(self, key: str, val: JSON_ro) -> None:
+        try:
+            payload = json.dumps(val)
+        except (TypeError, ValueError) as e:
+            # Caller stored a value the inner store accepts but JSON
+            # doesn't — log and skip the cache (inner still holds truth).
+            logger.warning(
+                "Skipping Redis cache for non-JSON-serialisable key=%s: %s", key, e
+            )
+            return
+        try:
+            self._client_factory().set(
+                self._redis_key(key),
+                payload,
+                ex=self._ttl,
+            )
+        except RedisError as e:
+            logger.warning("Redis SET failed for kv key=%s: %s", key, e)
+
+    def _safe_redis_delete(self, key: str) -> None:
+        try:
+            self._client_factory().delete(self._redis_key(key))
+        except RedisError as e:
+            logger.warning("Redis DEL failed for kv key=%s: %s", key, e)
diff --git a/backend/danswer/file_store/file_store.py b/backend/danswer/file_store/file_store.py
index 9bc4c41d361..3c1c71c6d99 100644
--- a/backend/danswer/file_store/file_store.py
+++ b/backend/danswer/file_store/file_store.py
@@ -1,9 +1,16 @@
+import threading
 from abc import ABC
 from abc import abstractmethod
+from io import BytesIO
+from tempfile import SpooledTemporaryFile
+from typing import Any
 from typing import IO
 
 from sqlalchemy.orm import Session
 
+from danswer.configs.app_configs import AZURE_BLOB_CONNECTION_STRING
+from danswer.configs.app_configs import AZURE_BLOB_CONTAINER
+from danswer.configs.app_configs import FILE_STORE_TYPE
 from danswer.configs.constants import FileOrigin
 from danswer.db.models import PGFileStore
 from danswer.db.pg_file_store import create_populate_lobj
@@ -12,6 +19,10 @@
 from danswer.db.pg_file_store import get_pgfilestore_by_file_name
 from danswer.db.pg_file_store import read_lobj
 from danswer.db.pg_file_store import upsert_pgfilestore
+from danswer.file_store.constants import MAX_IN_MEMORY_SIZE
+from danswer.utils.logger import setup_logger
+
+logger = setup_logger()
 
 
 class FileStore(ABC):
@@ -106,6 +117,14 @@ def read_file(
         file_record = get_pgfilestore_by_file_name(
             file_name=file_name, db_session=self.db_session
         )
+        if file_record.lobj_oid is None:
+            # Row was written by an object-store backend — can't read it as a
+            # large object. Indicates FILE_STORE_TYPE was changed without
+            # migrating, or the wrong backend is active.
+            raise RuntimeError(
+                f"File '{file_name}' has no Postgres large object "
+                f"(object_key={file_record.object_key!r}); is FILE_STORE_TYPE correct?"
+            )
         return read_lobj(
             lobj_oid=file_record.lobj_oid,
             db_session=self.db_session,
@@ -125,7 +144,156 @@ def delete_file(self, file_name: str) -> None:
             file_record = get_pgfilestore_by_file_name(
                 file_name=file_name, db_session=self.db_session
             )
-            delete_lobj_by_id(file_record.lobj_oid, db_session=self.db_session)
+            if file_record.lobj_oid is not None:
+                delete_lobj_by_id(file_record.lobj_oid, db_session=self.db_session)
+            delete_pgfilestore_by_file_name(
+                file_name=file_name, db_session=self.db_session
+            )
+            self.db_session.commit()
+        except Exception:
+            self.db_session.rollback()
+            raise
+
+
+# --- Azure Blob backend -----------------------------------------------------
+# The azure SDK is an OPTIONAL dependency: file_store.py is imported app-wide,
+# so we must NOT import azure at module load. It's lazily imported only when
+# the Azure backend is actually constructed (and the package is present in the
+# image). The container client is a process-wide lazy singleton.
+_az_container_client: Any = None
+_az_lock = threading.Lock()
+
+
+def _parse_azure_conn_str(conn_str: str) -> dict[str, str]:
+    """Parse an Azure Storage connection string into its parts. Split on the
+    FIRST '=' per segment so values containing '=' (AccountKey ends with '==',
+    BlobEndpoint has '://') survive intact."""
+    return dict(seg.split("=", 1) for seg in conn_str.split(";") if "=" in seg)
+
+
+def _get_azure_container_client() -> Any:
+    global _az_container_client
+    if _az_container_client is None:
+        with _az_lock:
+            if _az_container_client is None:
+                if not AZURE_BLOB_CONNECTION_STRING:
+                    raise RuntimeError(
+                        "FILE_STORE_TYPE=AzureBlobFileStore but "
+                        "AZURE_BLOB_CONNECTION_STRING is unset."
+                    )
+                # Lazy import — optional dependency (azure-storage-blob).
+                try:
+                    from azure.storage.blob import BlobServiceClient  # type: ignore
+                except ImportError as e:
+                    raise RuntimeError(
+                        "FILE_STORE_TYPE=AzureBlobFileStore requires the "
+                        "azure-storage-blob package (it's in requirements; "
+                        "rebuild the image or `pip install azure-storage-blob`)."
+                    ) from e
+
+                svc = BlobServiceClient.from_connection_string(
+                    AZURE_BLOB_CONNECTION_STRING
+                )
+                cc = svc.get_container_client(AZURE_BLOB_CONTAINER)
+                try:
+                    cc.create_container()
+                except Exception:
+                    # Already exists (or no create permission) — fine.
+                    pass
+                _az_container_client = cc
+    return _az_container_client
+
+
+class AzureBlobFileStore(FileStore):
+    """File store that keeps the BYTES in Azure Blob Storage and the METADATA
+    row in Postgres (``file_store`` table, ``object_key`` column).
+
+    Why hybrid: the metadata is small and queryable, but the blob bytes are
+    what bloat Postgres and (via read_lobj) pin a DB connection for the whole
+    read. Moving only the bytes off-DB fixes both — reads stream straight from
+    Blob and never hold a Postgres connection.
+
+    Reads fall back to the Postgres large object when a row hasn't been
+    migrated yet (``object_key is None`` but ``lobj_oid`` set), so the cutover
+    is graceful: flip FILE_STORE_TYPE, new files go to Blob, old files keep
+    working until the migration script moves them.
+    """
+
+    def __init__(self, db_session: Session):
+        self.db_session = db_session
+
+    def save_file(
+        self,
+        file_name: str,
+        content: IO,
+        display_name: str | None,
+        file_origin: FileOrigin,
+        file_type: str,
+        file_metadata: dict | None = None,
+    ) -> None:
+        object_key = file_name  # file_name is already the unique identifier
+        try:
+            # upload_blob streams `content` in chunks — no whole-file-in-memory.
+            _get_azure_container_client().upload_blob(
+                name=object_key, data=content, overwrite=True
+            )
+            upsert_pgfilestore(
+                file_name=file_name,
+                display_name=display_name or file_name,
+                file_origin=file_origin,
+                file_type=file_type,
+                object_key=object_key,
+                lobj_oid=None,
+                db_session=self.db_session,
+                file_metadata=file_metadata,
+            )
+            self.db_session.commit()
+        except Exception:
+            self.db_session.rollback()
+            raise
+
+    def read_file(
+        self, file_name: str, mode: str | None = None, use_tempfile: bool = False
+    ) -> IO:
+        record = get_pgfilestore_by_file_name(
+            file_name=file_name, db_session=self.db_session
+        )
+        if record.object_key is None:
+            # Not yet migrated — read from the legacy Postgres large object.
+            if record.lobj_oid is None:
+                raise RuntimeError(
+                    f"File '{file_name}' has neither object_key nor lobj_oid."
+                )
+            return read_lobj(
+                lobj_oid=record.lobj_oid,
+                db_session=self.db_session,
+                mode=mode,
+                use_tempfile=use_tempfile,
+            )
+
+        downloader = _get_azure_container_client().download_blob(record.object_key)
+        if use_tempfile:
+            temp_file: IO = SpooledTemporaryFile(max_size=MAX_IN_MEMORY_SIZE)
+            downloader.readinto(temp_file)
+            temp_file.seek(0)
+            return temp_file
+        return BytesIO(downloader.readall())
+
+    def delete_file(self, file_name: str) -> None:
+        try:
+            record = get_pgfilestore_by_file_name(
+                file_name=file_name, db_session=self.db_session
+            )
+            if record.object_key is not None:
+                try:
+                    _get_azure_container_client().delete_blob(record.object_key)
+                except Exception:
+                    logger.error(
+                        f"Failed to delete blob {record.object_key}; "
+                        "removing the metadata row anyway."
+                    )
+            elif record.lobj_oid is not None:
+                delete_lobj_by_id(record.lobj_oid, db_session=self.db_session)
             delete_pgfilestore_by_file_name(
                 file_name=file_name, db_session=self.db_session
             )
@@ -134,7 +302,99 @@ def delete_file(self, file_name: str) -> None:
             self.db_session.rollback()
             raise
 
+    def generate_upload_sas_url(self, file_name: str, expiry_minutes: int = 30) -> str:
+        """Mint a short-lived, write/create-scoped SAS URL so a client can PUT
+        bytes DIRECTLY to Blob (bypassing the server). Used by the chat
+        direct-upload flow. The blob is `file_name`; record the metadata row
+        afterward with :meth:`register_object`."""
+        import datetime
+
+        from azure.storage.blob import BlobSasPermissions  # type: ignore
+        from azure.storage.blob import BlobServiceClient  # type: ignore
+        from azure.storage.blob import generate_blob_sas  # type: ignore
+
+        # Let the SDK parse the connection string — it's authoritative about
+        # the blob endpoint (handles Azurite, custom endpoints, and key
+        # casing/ordering that a hand-rolled parse trips on).
+        try:
+            svc = BlobServiceClient.from_connection_string(AZURE_BLOB_CONNECTION_STRING)
+        except Exception as e:
+            # Surface a SAFE diagnostic (key NAMES + length only, never the
+            # secret value). The usual cause: the shell split the value on a
+            # ';' so the process only got the first segment.
+            keys = sorted(_parse_azure_conn_str(AZURE_BLOB_CONNECTION_STRING).keys())
+            raise RuntimeError(
+                f"AZURE_BLOB_CONNECTION_STRING is malformed: the process received "
+                f"a value of length {len(AZURE_BLOB_CONNECTION_STRING)} with keys "
+                f"{keys}. If that's just ['DefaultEndpointsProtocol'] it was "
+                f"truncated at the first ';' — single-quote the value where it's "
+                f"set AND restart the api-server in that shell (it reads the env "
+                f"once at startup)."
+            ) from e
+        blob_endpoint = svc.url.rstrip("/")
+
+        # Account-key connection string → mint a short-lived, scoped, per-blob
+        # SAS (write+create only, expiry below). This is the secure shape: the
+        # browser only ever gets a one-blob, minutes-long token — never a broad
+        # account/service SAS. Prefer the SDK-parsed credential (also handles
+        # `UseDevelopmentStorage=true`), fall back to a case-insensitive parse.
+        account_key = getattr(getattr(svc, "credential", None), "account_key", None)
+        if not account_key:
+            cfg = {
+                k.lower(): v
+                for k, v in _parse_azure_conn_str(AZURE_BLOB_CONNECTION_STRING).items()
+            }
+            account_key = cfg.get("accountkey")
+        if not account_key:
+            raise RuntimeError(
+                "AZURE_BLOB_CONNECTION_STRING has no AccountKey. Direct chat "
+                "uploads require the account-key connection string (Storage "
+                "account → Access keys → Connection string) so the server can "
+                "mint a scoped, short-lived per-blob upload SAS. SAS-token / "
+                "managed-identity connection strings aren't supported here."
+            )
+
+        sas = generate_blob_sas(
+            account_name=svc.account_name,
+            container_name=AZURE_BLOB_CONTAINER,
+            blob_name=file_name,
+            account_key=account_key,
+            permission=BlobSasPermissions(write=True, create=True),
+            expiry=datetime.datetime.utcnow()
+            + datetime.timedelta(minutes=expiry_minutes),
+        )
+        return f"{blob_endpoint}/{AZURE_BLOB_CONTAINER}/{file_name}?{sas}"
+
+    def register_object(
+        self,
+        file_name: str,
+        display_name: str | None,
+        file_origin: FileOrigin,
+        file_type: str,
+        file_metadata: dict | None = None,
+    ) -> None:
+        """Record the metadata row for a blob uploaded out-of-band (e.g. a
+        client direct-to-Blob SAS upload). Does NOT touch the bytes — they're
+        already in the container under `file_name`."""
+        upsert_pgfilestore(
+            file_name=file_name,
+            display_name=display_name or file_name,
+            file_origin=file_origin,
+            file_type=file_type,
+            object_key=file_name,
+            lobj_oid=None,
+            db_session=self.db_session,
+            file_metadata=file_metadata,
+            commit=True,
+        )
+
 
 def get_default_file_store(db_session: Session) -> FileStore:
-    # The only supported file store now is the Postgres File Store
+    """Resolve the configured file-store backend (FILE_STORE_TYPE).
+
+    Default is Postgres large objects. AzureBlobFileStore offloads the bytes
+    to Azure Blob Storage (metadata stays in Postgres) — opt-in per env.
+    """
+    if FILE_STORE_TYPE == AzureBlobFileStore.__name__:
+        return AzureBlobFileStore(db_session=db_session)
     return PostgresBackedFileStore(db_session=db_session)
diff --git a/backend/danswer/indexing/indexing_pipeline.py b/backend/danswer/indexing/indexing_pipeline.py
index 2506b4715ac..98b4f70e85e 100644
--- a/backend/danswer/indexing/indexing_pipeline.py
+++ b/backend/danswer/indexing/indexing_pipeline.py
@@ -91,20 +91,39 @@ def upsert_documents_in_db(
 def get_doc_ids_to_update(
     documents: list[Document], db_docs: list[DBDocument]
 ) -> list[Document]:
-    """Figures out which documents actually need to be updated. If a document is already present
-    and the `updated_at` hasn't changed, we shouldn't need to do anything with it."""
-    id_update_time_map = {
-        doc.id: doc.doc_updated_at for doc in db_docs if doc.doc_updated_at
-    }
+    """Figures out which documents actually need to be (re)indexed.
+
+    Two skip conditions, checked per already-present document:
+
+    1. Content-hash match: if the stored `indexed_content_hash` equals the
+       document's current content hash, the indexed representation is identical
+       and we skip — even if `doc_updated_at` advanced. This is the important
+       one for sources that bump their modified-timestamp without changing
+       content (e.g. Salesforce LastModifiedDate churn re-pulling the whole
+       corpus every poll). Benefits ALL connectors, not just Salesforce.
+    2. Timestamp fallback: for rows with no stored hash yet (indexed before
+       this existed), keep the original behavior — skip if `doc_updated_at`
+       isn't newer than what's stored.
+    """
+    id_to_db_doc = {doc.id: doc for doc in db_docs}
 
     updatable_docs: list[Document] = []
     for doc in documents:
-        if (
-            doc.id in id_update_time_map
-            and doc.doc_updated_at
-            and doc.doc_updated_at <= id_update_time_map[doc.id]
-        ):
-            continue
+        db_doc = id_to_db_doc.get(doc.id)
+        if db_doc is not None:
+            # (1) content unchanged — skip regardless of timestamp
+            if (
+                db_doc.indexed_content_hash is not None
+                and db_doc.indexed_content_hash == doc.get_content_hash()
+            ):
+                continue
+            # (2) fallback: no newer content per the source timestamp
+            if (
+                doc.doc_updated_at is not None
+                and db_doc.doc_updated_at is not None
+                and doc.doc_updated_at <= db_doc.doc_updated_at
+            ):
+                continue
         updatable_docs.append(doc)
 
     return updatable_docs
@@ -140,6 +159,19 @@ def index_doc_batch(
     )
     updatable_ids = [doc.id for doc in updatable_docs]
 
+    # Visibility into the content-hash / timestamp skip: how many docs in this
+    # batch were unchanged and therefore skip the expensive embed + Vespa
+    # clear-and-rewrite. Aggregated across an attempt's batches this confirms,
+    # in prod logs, that a churny source (e.g. Salesforce LastModifiedDate)
+    # is no longer re-indexing unchanged records. Only logged when >0 to keep
+    # steady-state logs quiet.
+    num_skipped = len(documents) - len(updatable_docs)
+    if num_skipped:
+        logger.info(
+            f"Skipping {num_skipped}/{len(documents)} documents in batch "
+            "(unchanged since last successful index — no re-embed / re-index)."
+        )
+
     # Create records in the source of truth about these documents,
     # does not include doc_updated_at which is also used to indicate a successful update
     upsert_documents_in_db(
@@ -203,15 +235,25 @@ def index_doc_batch(
             doc for doc in updatable_docs if doc.id in successful_doc_ids
         ]
 
-        # Update the time of latest version of the doc successfully indexed
+        # Record post-success state: the latest updated-at (skip docs that
+        # don't carry one) AND the content hash (for every successful doc, so a
+        # later run can skip re-indexing it if its content is unchanged). The
+        # hash is stored only here — after a confirmed Vespa write — so it
+        # always reflects what's actually in the index.
         ids_to_new_updated_at = {}
         for doc in successful_docs:
             if doc.doc_updated_at is None:
                 continue
             ids_to_new_updated_at[doc.id] = doc.doc_updated_at
 
+        ids_to_new_content_hash = {
+            doc.id: doc.get_content_hash() for doc in successful_docs
+        }
+
         update_docs_updated_at(
-            ids_to_new_updated_at=ids_to_new_updated_at, db_session=db_session
+            ids_to_new_updated_at=ids_to_new_updated_at,
+            ids_to_new_content_hash=ids_to_new_content_hash,
+            db_session=db_session,
         )
 
     return len([r for r in insertion_records if r.already_existed is False]), len(
diff --git a/backend/danswer/redis/__init__.py b/backend/danswer/redis/__init__.py
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/backend/danswer/redis/redis_pool.py b/backend/danswer/redis/redis_pool.py
new file mode 100644
index 00000000000..567bb0a5160
--- /dev/null
+++ b/backend/danswer/redis/redis_pool.py
@@ -0,0 +1,110 @@
+"""Process-local Redis client and connection pool.
+
+This fork is single-tenant, so there is no per-tenant key prefixing
+(upstream Onyx's ``TenantRedisClient`` is intentionally not ported).
+Instead, all keys written by this codebase must namespace themselves
+under :data:`DANSWER_REDIS_KEY_PREFIX` so a shared Redis (or a
+later multi-app deployment) does not collide.
+
+The pool is lazily built on first use and reused for the life of the
+process. The :func:`get_redis_client` helper hands out a thin ``Redis``
+wrapper around the shared pool — cheap to call repeatedly, no need to
+cache the result at call sites.
+
+Errors are NOT swallowed here. Callers that want to fail open (e.g. the
+KV cache layer, the rate limiter) wrap their own try/except — that
+choice belongs to the caller, not the connection helper.
+"""
+from __future__ import annotations
+
+import threading
+from typing import Any
+
+import redis
+from redis import ConnectionPool
+from redis import Redis
+
+from danswer.configs.app_configs import REDIS_DB_NUMBER
+from danswer.configs.app_configs import REDIS_HEALTH_CHECK_INTERVAL
+from danswer.configs.app_configs import REDIS_HOST
+from danswer.configs.app_configs import REDIS_PASSWORD
+from danswer.configs.app_configs import REDIS_POOL_MAX_CONNECTIONS
+from danswer.configs.app_configs import REDIS_PORT
+from danswer.configs.app_configs import REDIS_SOCKET_TIMEOUT_SECONDS
+from danswer.configs.app_configs import REDIS_SSL
+from danswer.utils.logger import setup_logger
+
+
+logger = setup_logger()
+
+# Every key written by this codebase MUST start with this prefix. Sub-modules
+# append their own namespace (e.g. ``DANSWER_REDIS_KEY_PREFIX + "kv:"`` in the
+# KV cache). Keeping the namespace centralised here avoids the
+# "two callers picked the same key by accident" footgun.
+DANSWER_REDIS_KEY_PREFIX = "danswer:"
+
+
+_pool: ConnectionPool | None = None
+_pool_lock = threading.Lock()
+
+
+def _build_pool() -> ConnectionPool:
+    """Construct the connection pool from the current env-driven config.
+
+    Kept private so callers can't accidentally instantiate parallel pools.
+    """
+    kwargs: dict[str, Any] = {
+        "host": REDIS_HOST,
+        "port": REDIS_PORT,
+        "db": REDIS_DB_NUMBER,
+        "max_connections": REDIS_POOL_MAX_CONNECTIONS,
+        "health_check_interval": REDIS_HEALTH_CHECK_INTERVAL,
+        "socket_timeout": REDIS_SOCKET_TIMEOUT_SECONDS,
+        "socket_connect_timeout": REDIS_SOCKET_TIMEOUT_SECONDS,
+        "socket_keepalive": True,
+        "retry_on_timeout": True,
+        # We store JSON / counters as bytes; consumers decode as needed.
+        # decode_responses=False keeps us out of accidental str/bytes mixups.
+        "decode_responses": False,
+    }
+    if REDIS_PASSWORD:
+        kwargs["password"] = REDIS_PASSWORD
+    if REDIS_SSL:
+        # SSLConnection picks up REDIS_SSL_CA_CERTS / REDIS_SSL_CERT_REQS
+        # from env via the redis-py default — extend here if needed.
+        kwargs["connection_class"] = redis.SSLConnection
+
+    logger.info(
+        "Building Redis ConnectionPool host=%s port=%s db=%s ssl=%s max=%s",
+        REDIS_HOST,
+        REDIS_PORT,
+        REDIS_DB_NUMBER,
+        REDIS_SSL,
+        REDIS_POOL_MAX_CONNECTIONS,
+    )
+    return ConnectionPool(**kwargs)
+
+
+def get_redis_client() -> Redis:
+    """Return a thin Redis client backed by the shared, lazily-built pool.
+
+    Safe to call from any thread; uses double-checked locking so the pool
+    is constructed exactly once per process.
+    """
+    global _pool
+    if _pool is None:
+        with _pool_lock:
+            if _pool is None:
+                _pool = _build_pool()
+    return Redis(connection_pool=_pool)
+
+
+def reset_pool_for_tests() -> None:
+    """Drop the cached pool so the next ``get_redis_client`` rebuilds it.
+
+    Tests only — never call this in production code. Lets a test mutate
+    env vars (host/port/etc.) and observe the effect on the next call.
+    """
+    global _pool
+    with _pool_lock:
+        _pool = None
diff --git a/backend/danswer/server/analytics/api.py b/backend/danswer/server/analytics/api.py
index dbd773a5a25..31a7fd8086e 100644
--- a/backend/danswer/server/analytics/api.py
+++ b/backend/danswer/server/analytics/api.py
@@ -21,8 +21,12 @@
 import danswer.db.models as db_models
 from danswer.auth.users import current_admin_user
 from danswer.db.analytics import fetch_docs_per_source
+from danswer.db.analytics import fetch_document_set_usage
+from danswer.db.analytics import fetch_per_user_chat_stats
+from danswer.db.analytics import fetch_persona_usage
 from danswer.db.analytics import fetch_slack_bot_channel_stats
 from danswer.db.analytics import fetch_total_docs_indexed
+from danswer.db.analytics import fetch_user_adoption
 from danswer.db.analytics_rollup import fetch_danswerbot_analytics_from_rollup
 from danswer.db.analytics_rollup import fetch_query_analytics_from_rollup
 from danswer.db.analytics_rollup import fetch_user_analytics_from_rollup
@@ -106,6 +110,152 @@ def get_user_analytics(
     ]
 
 
+class UserAdoptionResponse(BaseModel):
+    # Users who first used chat on this date.
+    new_users: int
+    # Running total of distinct users who had ever used chat as of this date.
+    cumulative_users: int
+    date: datetime.date
+
+
+@router.get("/admin/user-adoption")
+def get_user_adoption_analytics(
+    start: datetime.datetime | None = None,
+    end: datetime.datetime | None = None,
+    _: db_models.User | None = Depends(current_admin_user),
+    db_session: Session = Depends(get_session),
+) -> list[UserAdoptionResponse]:
+    """Chat adoption curve: new + cumulative distinct users per day, served
+    from the durable `analytics_user_first_seen` table (survives chat
+    retention)."""
+    rows = fetch_user_adoption(
+        start=start or (datetime.datetime.utcnow() - datetime.timedelta(days=90)),
+        end=end or datetime.datetime.utcnow(),
+        db_session=db_session,
+    )
+    return [
+        UserAdoptionResponse(
+            new_users=new_users, cumulative_users=cumulative_users, date=date
+        )
+        for date, new_users, cumulative_users in rows
+    ]
+
+
+class PerUserChatStatsResponse(BaseModel):
+    user_id: str
+    email: str
+    total_messages: int
+    total_likes: int
+    total_dislikes: int
+    last_active: datetime.date
+
+
+@router.get("/admin/per-user")
+def get_per_user_analytics(
+    start: datetime.datetime | None = None,
+    end: datetime.datetime | None = None,
+    limit: int = 100,
+    _: db_models.User | None = Depends(current_admin_user),
+    db_session: Session = Depends(get_session),
+) -> list[PerUserChatStatsResponse]:
+    """Top users by message volume over the range, from the durable
+    analytics_user_daily_stats aggregate — spans full history (survives
+    chat retention)."""
+    rows = fetch_per_user_chat_stats(
+        start=start or (datetime.datetime.utcnow() - datetime.timedelta(days=90)),
+        end=end or datetime.datetime.utcnow(),
+        db_session=db_session,
+        limit=limit,
+    )
+    return [
+        PerUserChatStatsResponse(
+            user_id=str(user_id),
+            email=email,
+            total_messages=int(total_messages),
+            total_likes=int(total_likes),
+            total_dislikes=int(total_dislikes),
+            last_active=last_active,
+        )
+        for user_id, email, total_messages, total_likes, total_dislikes, last_active in rows
+    ]
+
+
+class PersonaUsageResponse(BaseModel):
+    persona_id: int
+    name: str
+    sessions: int
+    messages: int
+    likes: int
+    dislikes: int
+    last_active: datetime.date
+
+
+@router.get("/admin/persona-usage")
+def get_persona_usage_analytics(
+    start: datetime.datetime | None = None,
+    end: datetime.datetime | None = None,
+    limit: int = 100,
+    _: db_models.User | None = Depends(current_admin_user),
+    db_session: Session = Depends(get_session),
+) -> list[PersonaUsageResponse]:
+    """Most-used assistants over the range, from the durable
+    analytics_persona_daily_stats aggregate (spans full history)."""
+    rows = fetch_persona_usage(
+        start=start or (datetime.datetime.utcnow() - datetime.timedelta(days=90)),
+        end=end or datetime.datetime.utcnow(),
+        db_session=db_session,
+        limit=limit,
+    )
+    return [
+        PersonaUsageResponse(
+            persona_id=persona_id,
+            name=name,
+            sessions=int(sessions),
+            messages=int(messages),
+            likes=int(likes),
+            dislikes=int(dislikes),
+            last_active=last_active,
+        )
+        for persona_id, name, sessions, messages, likes, dislikes, last_active in rows
+    ]
+
+
+class DocumentSetUsageResponse(BaseModel):
+    document_set_id: int
+    name: str
+    # APPROXIMATE: assistant message volume attributed to every document set
+    # attached to the assistant (see fetch_document_set_usage). Not a
+    # per-query retrieval count.
+    attributed_messages: int
+
+
+@router.get("/admin/document-set-usage")
+def get_document_set_usage_analytics(
+    start: datetime.datetime | None = None,
+    end: datetime.datetime | None = None,
+    limit: int = 100,
+    _: db_models.User | None = Depends(current_admin_user),
+    db_session: Session = Depends(get_session),
+) -> list[DocumentSetUsageResponse]:
+    """Approximate datasets-in-use over the range, derived from assistant
+    usage × current persona→document-set attachments (see the db function's
+    caveats)."""
+    rows = fetch_document_set_usage(
+        start=start or (datetime.datetime.utcnow() - datetime.timedelta(days=90)),
+        end=end or datetime.datetime.utcnow(),
+        db_session=db_session,
+        limit=limit,
+    )
+    return [
+        DocumentSetUsageResponse(
+            document_set_id=document_set_id,
+            name=name,
+            attributed_messages=int(attributed_messages),
+        )
+        for document_set_id, name, attributed_messages in rows
+    ]
+
+
 class DanswerbotAnalyticsResponse(BaseModel):
     total_queries: int
     auto_resolved: int
diff --git a/backend/danswer/server/documents/cc_pair.py b/backend/danswer/server/documents/cc_pair.py
index 2a67efe2b56..735d51fb85e 100644
--- a/backend/danswer/server/documents/cc_pair.py
+++ b/backend/danswer/server/documents/cc_pair.py
@@ -14,11 +14,14 @@
 from danswer.db.connector_credential_pair import remove_credential_from_connector
 from danswer.db.document import get_document_cnts_for_cc_pairs
 from danswer.db.engine import get_session
+from danswer.db.index_attempt import count_index_attempts_for_cc_pair
 from danswer.db.index_attempt import get_index_attempts_for_cc_pair
+from danswer.db.index_attempt import get_paginated_index_attempts_for_cc_pair
 from danswer.db.models import User
 from danswer.server.documents.models import CCPairFullInfo
 from danswer.server.documents.models import ConnectorCredentialPairIdentifier
 from danswer.server.documents.models import ConnectorCredentialPairMetadata
+from danswer.server.documents.models import PaginatedIndexAttempts
 from danswer.server.models import StatusResponse
 
 router = APIRouter(prefix="/manage", dependencies=[Depends(validate_api_key)])
@@ -45,7 +48,15 @@ def get_cc_pair_full_info(
         credential_id=cc_pair.credential_id,
     )
 
-    index_attempts = get_index_attempts_for_cc_pair(
+    # Only the latest attempt + a count are needed for the detail page; the
+    # full history is served (paginated) by the endpoint below.
+    latest_index_attempts = get_index_attempts_for_cc_pair(
+        db_session=db_session,
+        cc_pair_identifier=cc_pair_identifier,
+        limit=1,
+    )
+    latest_index_attempt = latest_index_attempts[0] if latest_index_attempts else None
+    num_index_attempts = count_index_attempts_for_cc_pair(
         db_session=db_session,
         cc_pair_identifier=cc_pair_identifier,
     )
@@ -68,12 +79,59 @@ def get_cc_pair_full_info(
 
     return CCPairFullInfo.from_models(
         cc_pair_model=cc_pair,
-        index_attempt_models=list(index_attempts),
+        latest_index_attempt=latest_index_attempt,
+        num_index_attempts=num_index_attempts,
         latest_deletion_attempt=latest_deletion_attempt,
         num_docs_indexed=documents_indexed,
     )
 
 
+@router.get("/admin/cc-pair/{cc_pair_id}/index-attempts")
+def get_cc_pair_index_attempts(
+    cc_pair_id: int,
+    page: int = 0,
+    page_size: int = 10,
+    _: User | None = Depends(current_admin_user),
+    db_session: Session = Depends(get_session),
+) -> PaginatedIndexAttempts:
+    cc_pair = get_connector_credential_pair_from_id(
+        cc_pair_id=cc_pair_id,
+        db_session=db_session,
+    )
+    if cc_pair is None:
+        raise HTTPException(
+            status_code=400,
+            detail=f"Connector with ID {cc_pair_id} not found. Has it been deleted?",
+        )
+
+    page = max(page, 0)
+    page_size = min(max(page_size, 1), 100)  # clamp to a sane range
+
+    cc_pair_identifier = ConnectorCredentialPairIdentifier(
+        connector_id=cc_pair.connector_id,
+        credential_id=cc_pair.credential_id,
+    )
+
+    total_count = count_index_attempts_for_cc_pair(
+        db_session=db_session,
+        cc_pair_identifier=cc_pair_identifier,
+    )
+    index_attempts = get_paginated_index_attempts_for_cc_pair(
+        db_session=db_session,
+        cc_pair_identifier=cc_pair_identifier,
+        page=page,
+        page_size=page_size,
+    )
+    total_pages = max((total_count + page_size - 1) // page_size, 1)
+
+    return PaginatedIndexAttempts.from_models(
+        index_attempt_models=list(index_attempts),
+        page=page,
+        total_pages=total_pages,
+        total_count=total_count,
+    )
+
+
 class CCPairRenameRequest(BaseModel):
     name: str
 
diff --git a/backend/danswer/server/documents/connector.py b/backend/danswer/server/documents/connector.py
index 799083eaaa9..03e35cdb604 100644
--- a/backend/danswer/server/documents/connector.py
+++ b/backend/danswer/server/documents/connector.py
@@ -1,3 +1,4 @@
+import json
 import os
 import uuid
 from typing import cast
@@ -15,6 +16,8 @@
 from danswer.auth.users import current_admin_user
 from danswer.auth.users import current_user
 from danswer.background.task_utils import name_cc_cleanup_task
+from danswer.configs.app_configs import CC_PAIR_INFO_CACHE_ENABLED
+from danswer.configs.app_configs import CC_PAIR_INFO_CACHE_TTL_SECONDS
 from danswer.configs.app_configs import ENABLED_CONNECTOR_TYPES
 from danswer.configs.constants import DocumentSource
 from danswer.configs.constants import FileOrigin
@@ -71,6 +74,8 @@
 from danswer.db.tasks import get_latest_tasks_by_names
 from danswer.dynamic_configs.interface import ConfigNotFoundError
 from danswer.file_store.file_store import get_default_file_store
+from danswer.redis.redis_pool import DANSWER_REDIS_KEY_PREFIX
+from danswer.redis.redis_pool import get_redis_client
 from danswer.server.documents.models import AuthStatus
 from danswer.server.documents.models import AuthUrl
 from danswer.server.documents.models import ConnectorBase
@@ -90,11 +95,14 @@
 from danswer.server.documents.models import RunConnectorRequest
 from danswer.server.documents.models import UpdateIndexAttemptPriorityRequest
 from danswer.server.models import StatusResponse
+from danswer.utils.logger import setup_logger
 
 _GMAIL_CREDENTIAL_ID_COOKIE_NAME = "gmail_credential_id"
 _GOOGLE_DRIVE_CREDENTIAL_ID_COOKIE_NAME = "google_drive_credential_id"
 
 
+logger = setup_logger()
+
 router = APIRouter(prefix="/manage", dependencies=[Depends(validate_api_key)])
 
 
@@ -670,6 +678,9 @@ def connector_run_once(
             ),
             only_current=True,
             disinclude_finished=True,
+            # Used only for truthiness ("any unfinished attempt?"); one row is
+            # enough — don't materialize the full set just to test existence.
+            limit=1,
             db_session=db_session,
         )
     ]
@@ -869,12 +880,16 @@ class BasicCCPairInfo(BaseModel):
     source: DocumentSource
 
 
-@router.get("/indexing-status")
-def get_basic_connector_indexing_status(
-    _: User = Depends(current_user),
-    db_session: Session = Depends(get_session),
-) -> list[BasicCCPairInfo]:
-    cc_pairs = get_connector_credential_pairs(db_session)
+_CC_PAIR_INFO_CACHE_KEY = DANSWER_REDIS_KEY_PREFIX + "cc_pair_basic_info"
+
+
+def _build_basic_cc_pair_info(db_session: Session) -> list[BasicCCPairInfo]:
+    # eager_load_connector: the return comprehension reads
+    # `cc_pair.connector.source` for every cc-pair. Without eager loading
+    # that's an N+1 (one query per cc-pair) — at ~hundreds of cc-pairs
+    # against a remote Postgres that was seconds; eager loading collapses
+    # it to a couple of queries.
+    cc_pairs = get_connector_credential_pairs(db_session, eager_load_connector=True)
     cc_pair_identifiers = [
         ConnectorCredentialPairIdentifier(
             connector_id=cc_pair.connector_id, credential_id=cc_pair.credential_id
@@ -901,3 +916,43 @@ def get_basic_connector_indexing_status(
         for cc_pair in cc_pairs
         if cc_pair.connector.source != DocumentSource.INGESTION_API
     ]
+
+
+@router.get("/indexing-status")
+def get_basic_connector_indexing_status(
+    _: User = Depends(current_user),
+    db_session: Session = Depends(get_session),
+) -> list[BasicCCPairInfo]:
+    # This is the chat page's slowest fan-out call: the per-cc-pair
+    # document-count aggregation in _build_basic_cc_pair_info measured
+    # ~300ms on the live DB and runs on every chat page load. The result
+    # is identical for all users and changes slowly (only when connectors
+    # are added/removed or an indexing run completes), so we front it with
+    # a short-TTL global Redis cache. Fail-open: any Redis error falls
+    # straight through to a direct DB build. Default OFF
+    # (CC_PAIR_INFO_CACHE_ENABLED).
+    if not CC_PAIR_INFO_CACHE_ENABLED:
+        return _build_basic_cc_pair_info(db_session)
+
+    try:
+        # decode_responses=False on the pool → bytes | None. The cast just
+        # collapses redis-py's sync/async overload union for mypy.
+        raw = cast("bytes | None", get_redis_client().get(_CC_PAIR_INFO_CACHE_KEY))
+    except Exception as e:
+        logger.warning("cc-pair-info cache GET failed, using DB: %s", e)
+        raw = None
+    if raw is not None:
+        try:
+            return [BasicCCPairInfo(**d) for d in json.loads(raw)]
+        except Exception as e:
+            logger.warning("cc-pair-info cache entry corrupt, rebuilding: %s", e)
+
+    result = _build_basic_cc_pair_info(db_session)
+    try:
+        payload = json.dumps([json.loads(item.json()) for item in result])
+        get_redis_client().set(
+            _CC_PAIR_INFO_CACHE_KEY, payload, ex=CC_PAIR_INFO_CACHE_TTL_SECONDS
+        )
+    except Exception as e:
+        logger.warning("cc-pair-info cache SET failed (DB result still served): %s", e)
+    return result
diff --git a/backend/danswer/server/documents/models.py b/backend/danswer/server/documents/models.py
index 9726958b350..a5337100043 100644
--- a/backend/danswer/server/documents/models.py
+++ b/backend/danswer/server/documents/models.py
@@ -131,14 +131,20 @@ class CCPairFullInfo(BaseModel):
     num_docs_indexed: int
     connector: ConnectorSnapshot
     credential: CredentialSnapshot
-    index_attempts: list[IndexAttemptSnapshot]
+    # The full index-attempt history is paginated via a dedicated endpoint
+    # (GET /admin/cc-pair/{id}/index-attempts) — embedding it here loaded a
+    # busy cc-pair's entire history (thousands of rows w/ full tracebacks) on
+    # every page view. The detail page only needs the latest attempt + a count.
+    latest_index_attempt: IndexAttemptSnapshot | None
+    num_index_attempts: int
     latest_deletion_attempt: DeletionAttemptSnapshot | None
 
     @classmethod
     def from_models(
         cls,
         cc_pair_model: ConnectorCredentialPair,
-        index_attempt_models: list[IndexAttempt],
+        latest_index_attempt: IndexAttempt | None,
+        num_index_attempts: int,
         latest_deletion_attempt: DeletionAttemptSnapshot | None,
         num_docs_indexed: int,  # not ideal, but this must be computed separately
     ) -> "CCPairFullInfo":
@@ -152,11 +158,38 @@ def from_models(
             credential=CredentialSnapshot.from_credential_db_model(
                 cc_pair_model.credential
             ),
+            latest_index_attempt=(
+                IndexAttemptSnapshot.from_index_attempt_db_model(latest_index_attempt)
+                if latest_index_attempt is not None
+                else None
+            ),
+            num_index_attempts=num_index_attempts,
+            latest_deletion_attempt=latest_deletion_attempt,
+        )
+
+
+class PaginatedIndexAttempts(BaseModel):
+    index_attempts: list[IndexAttemptSnapshot]
+    page: int
+    total_pages: int
+    total_count: int
+
+    @classmethod
+    def from_models(
+        cls,
+        index_attempt_models: list[IndexAttempt],
+        page: int,
+        total_pages: int,
+        total_count: int,
+    ) -> "PaginatedIndexAttempts":
+        return cls(
             index_attempts=[
-                IndexAttemptSnapshot.from_index_attempt_db_model(index_attempt_model)
-                for index_attempt_model in index_attempt_models
+                IndexAttemptSnapshot.from_index_attempt_db_model(m)
+                for m in index_attempt_models
             ],
-            latest_deletion_attempt=latest_deletion_attempt,
+            page=page,
+            total_pages=total_pages,
+            total_count=total_count,
         )
 
 
diff --git a/backend/danswer/server/features/document_set/api.py b/backend/danswer/server/features/document_set/api.py
index 3cdaf7b9c21..de49346f969 100644
--- a/backend/danswer/server/features/document_set/api.py
+++ b/backend/danswer/server/features/document_set/api.py
@@ -8,15 +8,12 @@
 from danswer.auth.users import current_user
 from danswer.db.document_set import check_document_sets_are_public
 from danswer.db.document_set import fetch_all_document_sets
-from danswer.db.document_set import fetch_user_document_sets
 from danswer.db.document_set import insert_document_set
 from danswer.db.document_set import mark_document_set_as_to_be_deleted
 from danswer.db.document_set import update_document_set
+from danswer.db.document_set_cache import get_document_sets_for_user_cached
 from danswer.db.engine import get_session
 from danswer.db.models import User
-from danswer.server.documents.models import ConnectorCredentialPairDescriptor
-from danswer.server.documents.models import ConnectorSnapshot
-from danswer.server.documents.models import CredentialSnapshot
 from danswer.server.features.document_set.models import CheckDocSetPublicRequest
 from danswer.server.features.document_set.models import CheckDocSetPublicResponse
 from danswer.server.features.document_set.models import DocumentSet
@@ -92,35 +89,13 @@ def list_document_sets(
     user: User | None = Depends(current_user),
     db_session: Session = Depends(get_session),
 ) -> list[DocumentSet]:
-    document_set_info = fetch_user_document_sets(
+    # Read-through Redis cache (per user, fail-open, default OFF). On the
+    # chat-page bundle this fires on every load; the cache collapses a
+    # user's repeat loads to one DB build per TTL. The build logic lives in
+    # the cache module so cached/uncached paths stay identical.
+    return get_document_sets_for_user_cached(
         user_id=user.id if user else None, db_session=db_session
     )
-    return [
-        DocumentSet(
-            id=document_set_db_model.id,
-            name=document_set_db_model.name,
-            description=document_set_db_model.description,
-            contains_non_public=any([not cc_pair.is_public for cc_pair in cc_pairs]),
-            cc_pair_descriptors=[
-                ConnectorCredentialPairDescriptor(
-                    id=cc_pair.id,
-                    name=cc_pair.name,
-                    connector=ConnectorSnapshot.from_connector_db_model(
-                        cc_pair.connector
-                    ),
-                    credential=CredentialSnapshot.from_credential_db_model(
-                        cc_pair.credential
-                    ),
-                )
-                for cc_pair in cc_pairs
-            ],
-            is_up_to_date=document_set_db_model.is_up_to_date,
-            is_public=document_set_db_model.is_public,
-            users=[user.id for user in document_set_db_model.users],
-            groups=[group.id for group in document_set_db_model.groups],
-        )
-        for document_set_db_model, cc_pairs in document_set_info
-    ]
 
 
 @router.get("/document-set-public")
diff --git a/backend/danswer/server/features/persona/api.py b/backend/danswer/server/features/persona/api.py
index cd7deb5321a..73f0cd6e3e8 100644
--- a/backend/danswer/server/features/persona/api.py
+++ b/backend/danswer/server/features/persona/api.py
@@ -18,6 +18,7 @@
 from danswer.db.persona import update_all_personas_display_priority
 from danswer.db.persona import update_persona_shared_users
 from danswer.db.persona import update_persona_visibility
+from danswer.db.persona_cache import get_personas_for_user_cached
 from danswer.llm.answering.prompts.utils import build_dummy_prompt
 from danswer.server.features.persona.models import CreatePersonaRequest
 from danswer.server.features.persona.models import PersonaSnapshot
@@ -163,13 +164,15 @@ def list_personas(
     db_session: Session = Depends(get_session),
     include_deleted: bool = False,
 ) -> list[PersonaSnapshot]:
+    # Routes through the Redis-backed cache when PERSONA_CACHE_ENABLED;
+    # otherwise behaves exactly as before (direct DB read + serialize).
+    # The cache handles the include_deleted=True case by falling through.
     user_id = user.id if user is not None else None
-    return [
-        PersonaSnapshot.from_model(persona)
-        for persona in get_personas(
-            user_id=user_id, include_deleted=include_deleted, db_session=db_session
-        )
-    ]
+    return get_personas_for_user_cached(
+        user_id=user_id,
+        db_session=db_session,
+        include_deleted=include_deleted,
+    )
 
 
 @basic_router.get("/{persona_id}")
diff --git a/backend/danswer/server/middleware/__init__.py b/backend/danswer/server/middleware/__init__.py
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/backend/danswer/server/middleware/request_rate_limit.py b/backend/danswer/server/middleware/request_rate_limit.py
new file mode 100644
index 00000000000..a4a07a75465
--- /dev/null
+++ b/backend/danswer/server/middleware/request_rate_limit.py
@@ -0,0 +1,179 @@
+"""Per-user request-rate limiter — Redis-backed, multi-window, fail-open.
+
+Why this exists, in one sentence: the existing
+``danswer.server.query_and_chat.token_limit.check_token_rate_limits`` is a
+**token-budget** limiter (sum of tokens over a window, DB-backed). It
+caps cost, not request volume, and its in-process ``@lru_cache`` short-
+circuit (``any_rate_limit_exists``) is per-pod, so two replicas can
+disagree on whether limits are configured at all. This module is the
+**request-rate** complement: a per-user (or per-IP for anonymous) cap on
+the number of /send-message calls per minute / per hour, with Redis as
+the shared counter so the cap holds across replicas.
+
+Design notes:
+
+* **Fixed-window buckets.** ``bucket = floor(time() / window)``. Simpler
+  and cheaper than sliding-window log; the trade-off is that a user
+  can burst up to ``2 * limit`` across a window boundary. Acceptable
+  for the protection target (abuse / runaway cost), not for strict SLA
+  enforcement.
+* **Atomic ``INCR`` + ``EXPIRE NX``.** The expiry is set only on the
+  first increment of the bucket so the window boundary is preserved
+  across concurrent requests racing for the first slot. Without ``NX``,
+  every request would push the expiry forward and the bucket would
+  never reset.
+* **Fail-open.** Any Redis error allows the request through with a log.
+  Refusing the chat path because the *rate limiter* is down is a worse
+  outcome than serving a few extra requests during a Redis blip.
+* **Default OFF.** Even when Redis is up, the limiter does nothing
+  until ``REQUEST_RATE_LIMIT_ENABLED=true`` AND at least one window
+  limit (per-minute or per-hour) is > 0. This is a protection feature,
+  not an always-on guard.
+* **Anonymous callers are keyed by IP** (X-Forwarded-For first hop,
+  falling back to the socket peer). If the IP can't be determined we
+  silently skip — no key, nothing to limit.
+"""
+from __future__ import annotations
+
+import time
+
+from fastapi import Depends
+from fastapi import HTTPException
+from fastapi import Request
+
+from danswer.auth.users import current_user
+from danswer.configs.app_configs import REQUEST_RATE_LIMIT_ENABLED
+from danswer.configs.app_configs import REQUEST_RATE_LIMIT_PER_HOUR
+from danswer.configs.app_configs import REQUEST_RATE_LIMIT_PER_MINUTE
+from danswer.db.models import User
+from danswer.redis.redis_pool import DANSWER_REDIS_KEY_PREFIX
+from danswer.redis.redis_pool import get_redis_client
+from danswer.utils.logger import setup_logger
+
+
+logger = setup_logger()
+
+
+# All counters live under this prefix so a global FLUSHDB-by-prefix on
+# this namespace is trivial in incident response. Sub-key shape:
+#   <prefix>{actor}:{label}:{bucket}
+# where actor is "u:<uuid>" or "ip:<addr>", label is "min" or "hour",
+# and bucket is floor(unix_seconds / window).
+_KEY_PREFIX = DANSWER_REDIS_KEY_PREFIX + "ratelimit:msg:"
+
+_MIN_WINDOW_SECONDS = 60
+_HOUR_WINDOW_SECONDS = 3600
+
+
+def check_message_request_rate_limit(
+    request: Request,
+    user: User | None = Depends(current_user),
+) -> None:
+    """FastAPI dependency that 429s a caller over their per-window cap.
+
+    Cheap fast-path when disabled — no Redis call, no env reads beyond
+    the module-level constants. Safe to attach to every chat / query
+    endpoint; the cost when off is one tuple-truthy check.
+    """
+    if not REQUEST_RATE_LIMIT_ENABLED:
+        return
+    if REQUEST_RATE_LIMIT_PER_MINUTE <= 0 and REQUEST_RATE_LIMIT_PER_HOUR <= 0:
+        # Nothing to enforce — saves the Redis round-trip when an
+        # operator enabled the flag but hasn't picked window values yet.
+        return
+
+    actor = _actor_key(user, request)
+    if actor is None:
+        return  # no key material; nothing we can fairly attribute
+
+    # Order matters: enforce the tighter window first. If a user trips
+    # the per-minute cap we don't need to also increment per-hour for
+    # this request — but we do anyway so per-hour accounting stays
+    # honest across bursts that don't trip the minute window.
+    if REQUEST_RATE_LIMIT_PER_MINUTE > 0:
+        _enforce_window(
+            actor=actor,
+            label="min",
+            window_seconds=_MIN_WINDOW_SECONDS,
+            limit=REQUEST_RATE_LIMIT_PER_MINUTE,
+        )
+    if REQUEST_RATE_LIMIT_PER_HOUR > 0:
+        _enforce_window(
+            actor=actor,
+            label="hour",
+            window_seconds=_HOUR_WINDOW_SECONDS,
+            limit=REQUEST_RATE_LIMIT_PER_HOUR,
+        )
+
+
+def _actor_key(user: User | None, request: Request) -> str | None:
+    """Identifier the limit is attributed to.
+
+    Authenticated users are keyed by uuid (stable, survives IP changes).
+    Anonymous traffic falls back to the first X-Forwarded-For hop set
+    by the ingress; if nothing usable is present, we return None and
+    skip — better to under-enforce than to bucket everyone behind a
+    misconfigured proxy under the LB's own IP.
+    """
+    if user is not None:
+        return f"u:{user.id}"
+
+    xff = request.headers.get("x-forwarded-for", "")
+    if xff:
+        client_ip = xff.split(",", 1)[0].strip()
+    elif request.client is not None:
+        client_ip = request.client.host
+    else:
+        client_ip = ""
+
+    if not client_ip:
+        return None
+    return f"ip:{client_ip}"
+
+
+def _enforce_window(*, actor: str, label: str, window_seconds: int, limit: int) -> None:
+    """Increment-and-check one window for one actor.
+
+    Raises ``HTTPException(429)`` if the post-increment count exceeds
+    ``limit``. The Retry-After header tells the caller exactly how long
+    until the current bucket rolls over — handy for clients that back
+    off intelligently.
+    """
+    bucket = int(time.time() // window_seconds)
+    key = f"{_KEY_PREFIX}{actor}:{label}:{bucket}"
+
+    try:
+        client = get_redis_client()
+        pipe = client.pipeline()
+        pipe.incr(key, 1)
+        # ``nx=True`` here means "set expiry only if no expiry yet" so
+        # the first increment of the bucket fixes the window boundary.
+        # Without it, every increment pushes expiry forward and the
+        # bucket never resets.
+        pipe.expire(key, window_seconds, nx=True)
+        result = pipe.execute()
+        count = int(result[0])
+    except Exception as e:
+        # Fail-open: better to let a request through than to wedge the
+        # chat path because Redis is unhappy. Loud log so it's obvious
+        # in the dashboard, but no exception propagation.
+        logger.warning(
+            "Rate-limit check skipped due to Redis error (actor=%s window=%s): %s",
+            actor,
+            label,
+            e,
+        )
+        return
+
+    if count > limit:
+        # Seconds remaining in the current bucket — tells the caller
+        # when to retry without us needing to look up the TTL.
+        retry_after = window_seconds - (int(time.time()) % window_seconds)
+        raise HTTPException(
+            status_code=429,
+            detail=(
+                f"Request rate limit exceeded "
+                f"({limit} per {label}). Retry in {retry_after}s."
+            ),
+            headers={"Retry-After": str(retry_after)},
+        )
diff --git a/backend/danswer/server/query_and_chat/chat_backend.py b/backend/danswer/server/query_and_chat/chat_backend.py
index 4e5a1bb2138..c8eff35bfba 100644
--- a/backend/danswer/server/query_and_chat/chat_backend.py
+++ b/backend/danswer/server/query_and_chat/chat_backend.py
@@ -1,5 +1,6 @@
 import io
 import uuid
+from typing import cast
 
 from fastapi import APIRouter
 from fastapi import Depends
@@ -15,6 +16,9 @@
 from danswer.auth.users import current_user
 from danswer.chat.chat_utils import create_chat_chain
 from danswer.chat.process_message import stream_chat_message
+from danswer.configs.app_configs import CHAT_FILE_MAX_SIZE_MB
+from danswer.configs.app_configs import CHAT_FILE_MAX_TOKEN_FRACTION
+from danswer.configs.app_configs import FILE_STORE_TYPE
 from danswer.configs.app_configs import WEB_DOMAIN
 from danswer.configs.constants import FileOrigin
 from danswer.configs.constants import MessageType
@@ -37,6 +41,7 @@
 from danswer.document_index.document_index_utils import get_both_index_names
 from danswer.document_index.factory import get_default_document_index
 from danswer.file_processing.extract_file_text import extract_file_text
+from danswer.file_store.file_store import AzureBlobFileStore
 from danswer.file_store.file_store import get_default_file_store
 from danswer.file_store.models import ChatFileType
 from danswer.file_store.models import FileDescriptor
@@ -47,9 +52,13 @@
 from danswer.llm.factory import get_default_llms
 from danswer.llm.headers import get_litellm_additional_request_headers
 from danswer.llm.utils import get_default_llm_tokenizer
+from danswer.llm.utils import get_max_input_tokens
 from danswer.secondary_llm_flows.chat_session_naming import (
     get_renamed_conversation_name,
 )
+from danswer.server.middleware.request_rate_limit import (
+    check_message_request_rate_limit,
+)
 from danswer.server.query_and_chat.models import ChatFeedbackRequest
 from danswer.server.query_and_chat.models import ChatMessageIdentifier
 from danswer.server.query_and_chat.models import ChatRenameRequest
@@ -74,6 +83,52 @@
 # api_router = APIRouter(prefix="/chat", dependencies=[Depends(validate_api_key)])
 
 
+# --- Chat file-upload limits ----------------------------------------------
+# A chat-attached doc is stuffed WHOLE into the LLM prompt (no retrieval), so
+# the real ceiling is the model context window. _reject_if_text_too_long is
+# the meaningful guard; the byte cap is a cheap pre-filter.
+def _max_chat_file_tokens() -> int:
+    """CHAT_FILE_MAX_TOKEN_FRACTION of the default LLM's max input tokens.
+    Falls back to a conservative default if the model map can't be resolved,
+    so a lookup failure never blocks uploads."""
+    try:
+        llm, _ = get_default_llms()
+        max_input = get_max_input_tokens(
+            model_name=llm.config.model_name,
+            model_provider=llm.config.model_provider,
+        )
+    except Exception:
+        max_input = 128_000
+    return int(max_input * CHAT_FILE_MAX_TOKEN_FRACTION)
+
+
+def _reject_if_file_too_large(size: int | None, filename: str | None) -> None:
+    if size and size > CHAT_FILE_MAX_SIZE_MB * 1024 * 1024:
+        raise HTTPException(
+            status_code=400,
+            detail=(
+                f"File '{filename or ''}' is {size // (1024 * 1024)}MB; the "
+                f"upload limit is {CHAT_FILE_MAX_SIZE_MB}MB."
+            ),
+        )
+
+
+def _reject_if_text_too_long(text: str, filename: str | None) -> None:
+    n_tokens = len(get_default_llm_tokenizer().encode(text))
+    budget = _max_chat_file_tokens()
+    if n_tokens > budget:
+        raise HTTPException(
+            status_code=400,
+            detail=(
+                f"Document '{filename or ''}' is too large to chat with "
+                f"(~{n_tokens:,} tokens; limit {budget:,}). The whole document "
+                "is sent to the model, so it must fit the context window — "
+                "upload a smaller excerpt, or add it as a connector to search "
+                "over it instead."
+            ),
+        )
+
+
 @router.get("/get-user-chat-sessions")
 def get_user_chat_sessions(
     user: User | None = Depends(current_user),
@@ -278,6 +333,10 @@ def handle_new_chat_message(
     chat_message_req: CreateChatMessageRequest,
     request: Request,
     user: User | None = Depends(current_user),
+    # Request-rate cap (Redis-backed, default off) runs BEFORE the
+    # token-budget check — cheap fast-path means a 429'd caller never
+    # touches the DB-backed token-usage query.
+    _rate_limit: None = Depends(check_message_request_rate_limit),
     _: None = Depends(check_token_rate_limits),
 ) -> StreamingResponse:
     """This endpoint is both used for all the following purposes:
@@ -515,6 +574,10 @@ def upload_files_for_chat(
                 )
             raise HTTPException(status_code=400, detail=error_detail)
 
+        # Byte cap, all types (cheap pre-filter; the token gate below is the
+        # real protection for text/docs).
+        _reject_if_file_too_large(file.size, file.filename)
+
         if (
             file.content_type in image_content_types
             and file.size
@@ -550,6 +613,13 @@ def upload_files_for_chat(
         # to re-extract it every time we send a message
         if file_type == ChatFileType.DOC:
             extracted_text = extract_file_text(file_name=file.filename, file=file.file)
+            # Token gate: the extracted text gets stuffed whole into the prompt.
+            # Reject (and drop the just-stored raw file) if it can't fit.
+            try:
+                _reject_if_text_too_long(extracted_text, file.filename)
+            except HTTPException:
+                file_store.delete_file(file_id)
+                raise
             text_file_id = str(uuid.uuid4())
             file_store.save_file(
                 file_name=text_file_id,
@@ -563,6 +633,17 @@ def upload_files_for_chat(
             # message
             file_info.append((text_file_id, file.filename, ChatFileType.PLAIN_TEXT))
         else:
+            if file_type == ChatFileType.PLAIN_TEXT:
+                # Plain text is stuffed as-is — token-gate it too (read the
+                # just-stored copy back so we don't depend on stream position).
+                raw = file_store.read_file(file_id, mode="b", use_tempfile=True)
+                try:
+                    _reject_if_text_too_long(
+                        raw.read().decode("utf-8", errors="ignore"), file.filename
+                    )
+                except HTTPException:
+                    file_store.delete_file(file_id)
+                    raise
             file_info.append((file_id, file.filename, file_type))
 
     return {
@@ -573,6 +654,147 @@ def upload_files_for_chat(
     }
 
 
+# --- Direct-to-Blob upload (SAS) -------------------------------------------
+# Lets the browser PUT files straight to Azure Blob, bypassing the server
+# (faster + offloads api-server bandwidth). Two steps: mint a SAS URL, then
+# confirm so the server records metadata (+ extracts text for docs). Only
+# active when the Azure file store is configured; otherwise the client falls
+# back to the two-hop POST /chat/file above.
+
+_IMAGE_CONTENT_TYPES = {"image/jpeg", "image/png", "image/webp"}
+_DOCUMENT_CONTENT_TYPES = {
+    "application/pdf",
+    "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
+    "application/vnd.openxmlformats-officedocument.presentationml.presentation",
+    "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
+    "message/rfc822",
+    "application/epub+zip",
+}
+
+
+def _chat_file_type_for(content_type: str | None) -> ChatFileType:
+    if content_type in _IMAGE_CONTENT_TYPES:
+        return ChatFileType.IMAGE
+    if content_type in _DOCUMENT_CONTENT_TYPES:
+        return ChatFileType.DOC
+    return ChatFileType.PLAIN_TEXT
+
+
+class ChatFileUploadUrlItem(BaseModel):
+    name: str
+    content_type: str | None = None
+    # Client-reported size — byte-gated here; the authoritative content gate
+    # is the token check at /file/confirm (after extraction).
+    size: int | None = None
+
+
+class ChatFileUploadUrlRequest(BaseModel):
+    files: list[ChatFileUploadUrlItem]
+
+
+class ChatFileUploadUrlResponseItem(BaseModel):
+    file_id: str
+    upload_url: str
+    content_type: str | None = None
+
+
+class ChatFileUploadUrlResponse(BaseModel):
+    # False → the active file store can't do direct uploads (e.g. Postgres);
+    # the client should fall back to the two-hop POST /chat/file.
+    direct_upload: bool
+    files: list[ChatFileUploadUrlResponseItem] = []
+
+
+@router.post("/file/upload-url")
+def get_chat_file_upload_urls(
+    req: ChatFileUploadUrlRequest,
+    db_session: Session = Depends(get_session),
+    _: User | None = Depends(current_user),
+) -> ChatFileUploadUrlResponse:
+    """Mint short-lived SAS URLs so the client PUTs files DIRECTLY to Blob."""
+    if FILE_STORE_TYPE != AzureBlobFileStore.__name__:
+        return ChatFileUploadUrlResponse(direct_upload=False)
+    store = cast(AzureBlobFileStore, get_default_file_store(db_session))
+    items: list[ChatFileUploadUrlResponseItem] = []
+    for f in req.files:
+        _reject_if_file_too_large(f.size, f.name)
+        file_id = str(uuid.uuid4())
+        items.append(
+            ChatFileUploadUrlResponseItem(
+                file_id=file_id,
+                upload_url=store.generate_upload_sas_url(file_id),
+                content_type=f.content_type,
+            )
+        )
+    return ChatFileUploadUrlResponse(direct_upload=True, files=items)
+
+
+class ChatFileConfirmItem(BaseModel):
+    file_id: str
+    name: str | None = None
+    content_type: str | None = None
+
+
+class ChatFileConfirmRequest(BaseModel):
+    files: list[ChatFileConfirmItem]
+
+
+@router.post("/file/confirm")
+def confirm_chat_file_uploads(
+    req: ChatFileConfirmRequest,
+    db_session: Session = Depends(get_session),
+    _: User | None = Depends(current_user),
+) -> dict[str, list[FileDescriptor]]:
+    """After the client direct-uploads to Blob, record each file's metadata
+    row and (for docs) extract text server-side — mirrors the tail of
+    upload_files_for_chat. Returns the FileDescriptors to attach to a message."""
+    store = cast(AzureBlobFileStore, get_default_file_store(db_session))
+    file_info: list[tuple[str, str | None, ChatFileType]] = []
+    for f in req.files:
+        file_type = _chat_file_type_for(f.content_type)
+        store.register_object(
+            file_name=f.file_id,
+            display_name=f.name,
+            file_origin=FileOrigin.CHAT_UPLOAD,
+            file_type=f.content_type or file_type.value,
+        )
+        if file_type == ChatFileType.DOC:
+            raw = store.read_file(f.file_id, mode="b", use_tempfile=True)
+            extracted_text = extract_file_text(file_name=f.name, file=raw)
+            # Token gate (stuffed whole into the prompt). On reject, drop the
+            # orphan blob the client already uploaded.
+            try:
+                _reject_if_text_too_long(extracted_text, f.name)
+            except HTTPException:
+                store.delete_file(f.file_id)
+                raise
+            text_file_id = str(uuid.uuid4())
+            store.save_file(
+                file_name=text_file_id,
+                content=io.BytesIO(extracted_text.encode()),
+                display_name=f.name,
+                file_origin=FileOrigin.CHAT_UPLOAD,
+                file_type="text/plain",
+            )
+            file_info.append((text_file_id, f.name, ChatFileType.PLAIN_TEXT))
+        else:
+            if file_type == ChatFileType.PLAIN_TEXT:
+                raw = store.read_file(f.file_id, mode="b", use_tempfile=True)
+                try:
+                    _reject_if_text_too_long(
+                        raw.read().decode("utf-8", errors="ignore"), f.name
+                    )
+                except HTTPException:
+                    store.delete_file(f.file_id)
+                    raise
+            file_info.append((f.file_id, f.name, file_type))
+    return {
+        "files": [
+            {"id": fid, "type": ftype, "name": fname} for fid, fname, ftype in file_info
+        ]
+    }
+
+
 @router.get("/file/{file_id:path}")
 def fetch_chat_file(
     file_id: str,
diff --git a/backend/danswer/server/query_and_chat/query_backend.py b/backend/danswer/server/query_and_chat/query_backend.py
index ff632e0613a..57f49e143b6 100644
--- a/backend/danswer/server/query_and_chat/query_backend.py
+++ b/backend/danswer/server/query_and_chat/query_backend.py
@@ -1,6 +1,7 @@
 from fastapi import APIRouter
 from fastapi import Depends
 from fastapi import HTTPException
+from fastapi import Request
 from fastapi.responses import StreamingResponse
 from sqlalchemy.orm import Session
 
@@ -23,6 +24,9 @@
 from danswer.search.utils import chunks_or_sections_to_search_docs
 from danswer.secondary_llm_flows.query_validation import get_query_answerability
 from danswer.secondary_llm_flows.query_validation import stream_query_answerability
+from danswer.server.middleware.request_rate_limit import (
+    check_message_request_rate_limit,
+)
 from danswer.server.query_and_chat.models import AdminSearchRequest
 from danswer.server.query_and_chat.models import AdminSearchResponse
 from danswer.server.query_and_chat.models import HelperResponse
@@ -89,6 +93,9 @@ def get_tags(
     # If this is empty or None, then tags for all sources are considered
     sources: list[DocumentSource] | None = None,
     allow_prefix: bool = True,  # This is currently the only option
+    # Optional cap on tags returned. Default None preserves the existing
+    # unbounded behavior; a client can pass a limit to bound the response.
+    limit: int | None = None,
     _: User = Depends(current_user),
     db_session: Session = Depends(get_session),
 ) -> TagResponse:
@@ -98,6 +105,7 @@ def get_tags(
     db_tags = get_tags_by_value_prefix_for_source_types(
         tag_value_prefix=match_pattern,
         sources=sources,
+        limit=limit,
         db_session=db_session,
     )
     server_tags = [
@@ -150,7 +158,11 @@ def stream_query_validation(
 @basic_router.post("/stream-answer-with-quote")
 def get_answer_with_quote(
     query_request: DirectQARequest,
+    request: Request,
     user: User = Depends(current_user),
+    # Mirrors /chat/send-message: request-rate cap first (cheap when
+    # off), token-budget check second.
+    _rate_limit: None = Depends(check_message_request_rate_limit),
     _: None = Depends(check_token_rate_limits),
 ) -> StreamingResponse:
     query = query_request.messages[0].message
diff --git a/backend/danswer/server/settings/models.py b/backend/danswer/server/settings/models.py
index 3f00eb85794..15e3d86b4ac 100644
--- a/backend/danswer/server/settings/models.py
+++ b/backend/danswer/server/settings/models.py
@@ -18,6 +18,10 @@ class Settings(BaseModel):
     # stored value wins, so flip it in Admin → Settings on existing deployments.
     default_page: PageType = PageType.CHAT
     maximum_chat_retention_days: int | None = None
+    # Env-driven (CHAT_FILE_MAX_SIZE_MB), injected in load_settings — surfaced
+    # here so the chat UI pre-checks against the SAME value the backend enforces
+    # instead of a hardcoded duplicate.
+    chat_file_max_size_mb: int = 25
 
     def check_validity(self) -> None:
         chat_page_enabled = self.chat_page_enabled
diff --git a/backend/danswer/server/settings/store.py b/backend/danswer/server/settings/store.py
index ead1e3652a9..29293afaab8 100644
--- a/backend/danswer/server/settings/store.py
+++ b/backend/danswer/server/settings/store.py
@@ -1,5 +1,6 @@
 from typing import cast
 
+from danswer.configs.app_configs import CHAT_FILE_MAX_SIZE_MB
 from danswer.dynamic_configs.factory import get_dynamic_config_store
 from danswer.dynamic_configs.interface import ConfigNotFoundError
 from danswer.server.settings.models import Settings
@@ -16,6 +17,10 @@ def load_settings() -> Settings:
         settings = Settings()
         dynamic_config_store.store(_SETTINGS_KEY, settings.dict())
 
+    # Env-controlled, not admin-stored — always reflect the current env so the
+    # chat UI pre-check matches the backend's CHAT_FILE_MAX_SIZE_MB.
+    settings.chat_file_max_size_mb = CHAT_FILE_MAX_SIZE_MB
+
     return settings
 
 
diff --git a/backend/ee/danswer/db/user_group.py b/backend/ee/danswer/db/user_group.py
index 0451db9b633..74c79ddf20c 100644
--- a/backend/ee/danswer/db/user_group.py
+++ b/backend/ee/danswer/db/user_group.py
@@ -14,6 +14,7 @@
 from danswer.db.models import User__UserGroup
 from danswer.db.models import UserGroup
 from danswer.db.models import UserGroup__ConnectorCredentialPair
+from danswer.db.persona_cache import invalidate_user_groups
 from danswer.server.documents.models import ConnectorCredentialPairIdentifier
 from ee.danswer.server.user_group.models import UserGroupCreate
 from ee.danswer.server.user_group.models import UserGroupUpdate
@@ -180,6 +181,10 @@ def insert_user_group(db_session: Session, user_group: UserGroupCreate) -> UserG
     )
 
     db_session.commit()
+    # New User__UserGroup rows for these users — bust their cached group lists
+    # so the next persona-list call sees the new group's persona grants.
+    for affected_user_id in user_group.user_ids:
+        invalidate_user_groups(affected_user_id)
     return db_user_group
 
 
@@ -221,9 +226,10 @@ def update_user_group(
     cc_pairs_updated = set([cc_pair.id for cc_pair in existing_cc_pairs]) != set(
         user_group.cc_pair_ids
     )
-    users_updated = set([user.id for user in db_user_group.users]) != set(
-        user_group.user_ids
-    )
+    # Snapshot existing members BEFORE the cleanup mutation, so we know
+    # which users to invalidate. The new member set is on the request.
+    existing_user_ids = {user.id for user in db_user_group.users}
+    users_updated = existing_user_ids != set(user_group.user_ids)
 
     if users_updated:
         _cleanup_user__user_group_relationships__no_commit(
@@ -249,6 +255,12 @@ def update_user_group(
         db_user_group.is_up_to_date = False
 
     db_session.commit()
+    if users_updated:
+        # Bust both removed (existing - new) and added (new - existing) users.
+        # Symmetric difference would be enough, but unioning both sides is
+        # cheap and avoids missing edge cases when membership reshuffles.
+        for affected_user_id in existing_user_ids | set(user_group.user_ids):
+            invalidate_user_groups(affected_user_id)
     return db_user_group
 
 
@@ -275,6 +287,11 @@ def prepare_user_group_for_deletion(db_session: Session, user_group_id: int) ->
 
     _check_user_group_is_modifiable(db_user_group)
 
+    # Snapshot current members before cleanup so we can bust their caches
+    # after commit. The cleanup helper deletes the User__UserGroup rows,
+    # so reading after cleanup would give an empty set.
+    affected_user_ids = [user.id for user in db_user_group.users]
+
     _cleanup_user__user_group_relationships__no_commit(
         db_session=db_session, user_group_id=user_group_id
     )
@@ -288,6 +305,8 @@ def prepare_user_group_for_deletion(db_session: Session, user_group_id: int) ->
     db_user_group.is_up_to_date = False
     db_user_group.is_up_for_deletion = True
     db_session.commit()
+    for affected_user_id in affected_user_ids:
+        invalidate_user_groups(affected_user_id)
 
 
 def _cleanup_user_group__cc_pair_relationships__no_commit(
diff --git a/backend/requirements/default.txt b/backend/requirements/default.txt
index 8391736906f..cc13447c8fa 100644
--- a/backend/requirements/default.txt
+++ b/backend/requirements/default.txt
@@ -2,8 +2,15 @@ aiohttp==3.9.4
 alembic==1.10.4
 asyncpg==0.27.0
 atlassian-python-api==3.37.0
+# Optional file-store backend (FILE_STORE_TYPE=AzureBlobFileStore). Imported
+# lazily in file_store.py, so only needed when that backend is selected.
+azure-storage-blob==12.19.1
 bcrypt==4.0.1  # pin: passlib 1.7.4 reads bcrypt.__about__, removed in bcrypt 4.1+
 beautifulsoup4==4.12.2
+# bokeh powers the Dask scheduler dashboard at :8787. Required by
+# Dask 2023.8.1 when the scheduler is started without --no-dashboard.
+# Pin matches Dask's `dashboard` extra (bokeh!=3.0.*,>=2.4.2).
+bokeh>=2.4.2,<3.0
 boto3==1.34.84
 celery==5.3.4
 chardet==5.2.0
@@ -61,6 +68,7 @@ retry==0.9.2  # This pulls in py which is in CVE-2022-42969, must remove py from
 rfc3986==1.5.0
 rt==3.1.2
 simple-salesforce==1.12.6
+redis==5.0.8
 slack-sdk==3.20.2
 SQLAlchemy[mypy]==2.0.15
 starlette==0.36.3
diff --git a/backend/scripts/backfill_analytics_rollup.py b/backend/scripts/backfill_analytics_rollup.py
index 61571674436..217a4f268d7 100644
--- a/backend/scripts/backfill_analytics_rollup.py
+++ b/backend/scripts/backfill_analytics_rollup.py
@@ -1,8 +1,11 @@
-"""Populate `analytics_daily_rollup` from existing chat data.
+"""Populate `analytics_daily_rollup` AND `analytics_user_first_seen` from
+existing chat data.
 
 Run this ONCE after deploying the rollup feature, before the next chat
 retention sweep deletes any old data. After this completes, the daily
-Celery beat task (`run_analytics_rollup_task`) keeps the table fresh.
+Celery beat task (`run_analytics_rollup_task`) keeps both tables fresh.
+Walking history ascending means each user's first_seen_date is their true
+first-ever active day (within the data that still exists).
 
 Usage:
 
diff --git a/backend/scripts/dev_run_dask_distributed.py b/backend/scripts/dev_run_dask_distributed.py
new file mode 100644
index 00000000000..f63d0af1dd5
--- /dev/null
+++ b/backend/scripts/dev_run_dask_distributed.py
@@ -0,0 +1,298 @@
+"""Dev helper that spawns the full Dask-Distributed background stack
+as plain subprocesses.
+
+Mirrors `dev_run_background_jobs.py` (one parent Python process, child
+processes for each background role, Ctrl-C tears down the tree) but
+with the prod-shape topology:
+
+  dask-scheduler        TCP RPC + dashboard
+       │
+       ├── dask-worker × N        actual indexing executors
+       ├── indexer-scheduler      runs update.py polling loop, submits
+       │                          to dask-scheduler instead of an
+       │                          in-process LocalCluster
+       ├── celery-worker          unchanged
+       └── celery-beat            unchanged
+
+Use this when you want to reproduce production indexing behavior
+locally without K8s or Docker. For day-to-day connector-code work,
+keep using `dev_run_background_jobs.py` — it's faster to start and
+the LocalCluster mode is sufficient for most testing.
+
+Usage:
+    cd backend
+    PYTHONPATH=$(pwd) python scripts/dev_run_dask_distributed.py
+    # ...or with custom worker count:
+    PYTHONPATH=$(pwd) python scripts/dev_run_dask_distributed.py \\
+        --num-workers 4
+"""
+from __future__ import annotations
+
+import argparse
+import os
+import signal
+import socket
+import subprocess
+import sys
+import threading
+import time
+
+
+SCHEDULER_HOST = "127.0.0.1"
+
+
+def monitor_process(process_name: str, process: subprocess.Popen) -> None:
+    """Stream a child's stdout/stderr to our own stdout with a label."""
+    assert process.stdout is not None
+    while True:
+        output = process.stdout.readline()
+        if output:
+            print(f"{process_name}: {output.strip()}", flush=True)
+        if process.poll() is not None:
+            break
+
+
+def wait_for_port(host: str, port: int, timeout: float = 30.0) -> bool:
+    """Poll a TCP port until something accepts connections, or timeout.
+
+    Used to gate dask-worker spawn on the scheduler being reachable —
+    without this, workers crash with `ConnectionRefusedError` and have
+    to retry on their own backoff.
+    """
+    deadline = time.monotonic() + timeout
+    while time.monotonic() < deadline:
+        try:
+            with socket.create_connection((host, port), timeout=1.0):
+                return True
+        except OSError:
+            time.sleep(0.5)
+    return False
+
+
+def spawn(
+    name: str,
+    cmd: list[str],
+    env: dict[str, str] | None = None,
+) -> tuple[subprocess.Popen, threading.Thread]:
+    """Start a subprocess + a thread tailing its output."""
+    process = subprocess.Popen(
+        cmd,
+        env=env,
+        stdout=subprocess.PIPE,
+        stderr=subprocess.STDOUT,
+        text=True,
+    )
+    thread = threading.Thread(target=monitor_process, args=(name, process), daemon=True)
+    thread.start()
+    return process, thread
+
+
+def run(
+    num_workers: int,
+    scheduler_port: int,
+    dashboard_port: int,
+    no_celery: bool,
+    no_indexer: bool,
+) -> int:
+    # Children inherit our env (Postgres / Vespa / GenAI / model-server
+    # creds etc.) plus a guaranteed PYTHONPATH=. so that subprocess'd
+    # `dask worker` can import `danswer.*` when deserializing the
+    # run_indexing_entrypoint callable.
+    base_env = os.environ.copy()
+    base_env["PYTHONPATH"] = "."
+
+    scheduler_addr = f"tcp://{SCHEDULER_HOST}:{scheduler_port}"
+    children: list[subprocess.Popen] = []
+
+    def shutdown(*_args: object) -> None:
+        print("\n[dev_run_dask_distributed] Caught signal; shutting down…")
+        for proc in children:
+            if proc.poll() is None:
+                try:
+                    proc.terminate()
+                except Exception:
+                    pass
+        # Give them a moment to terminate cleanly before SIGKILL.
+        deadline = time.monotonic() + 5.0
+        for proc in children:
+            timeout = max(0.1, deadline - time.monotonic())
+            try:
+                proc.wait(timeout=timeout)
+            except subprocess.TimeoutExpired:
+                try:
+                    proc.kill()
+                except Exception:
+                    pass
+        sys.exit(0)
+
+    signal.signal(signal.SIGINT, shutdown)
+    signal.signal(signal.SIGTERM, shutdown)
+
+    # 1. Dask scheduler — must come up first so workers don't have to
+    # back off + retry. The bind host is 127.0.0.1 not 0.0.0.0
+    # because this is a dev-only helper; nothing should reach it from
+    # outside the host.
+    print(f"[dev_run_dask_distributed] starting dask-scheduler on {scheduler_addr}")
+    sched_proc, _ = spawn(
+        "DASK-SCHED",
+        [
+            "dask",
+            "scheduler",
+            "--host",
+            SCHEDULER_HOST,
+            "--port",
+            str(scheduler_port),
+            "--dashboard-address",
+            f":{dashboard_port}",
+        ],
+        env=base_env,
+    )
+    children.append(sched_proc)
+
+    if not wait_for_port(SCHEDULER_HOST, scheduler_port, timeout=30.0):
+        print(
+            f"[dev_run_dask_distributed] scheduler did not bind {scheduler_addr} "
+            "within 30s; aborting."
+        )
+        shutdown()
+        return 1
+    print(
+        "[dev_run_dask_distributed] scheduler is up. "
+        f"Dashboard: http://{SCHEDULER_HOST}:{dashboard_port}"
+    )
+
+    # 2. Dask workers — N processes, each one thread / one worker, so
+    # each gets its own RSS envelope. Same pattern as the K8s
+    # `dask-worker-deployment.yaml`.
+    worker_env = base_env.copy()
+    worker_env["CURRENT_PROCESS_IS_AN_INDEXING_JOB"] = "true"
+    for i in range(num_workers):
+        proc, _ = spawn(
+            f"DASK-WORKER-{i}",
+            [
+                "dask",
+                "worker",
+                scheduler_addr,
+                "--nworkers=1",
+                "--nthreads=1",
+                "--memory-limit=4GB",
+            ],
+            env=worker_env,
+        )
+        children.append(proc)
+    print(f"[dev_run_dask_distributed] started {num_workers} dask-worker(s)")
+
+    # 3. Indexer-scheduler — runs the update.py polling loop and
+    # submits work to the scheduler we just started.
+    if not no_indexer:
+        indexer_env = base_env.copy()
+        indexer_env["DASK_SCHEDULER_ADDRESS"] = scheduler_addr
+        indexer_env["CURRENT_PROCESS_IS_AN_INDEXING_JOB"] = "true"
+        proc, _ = spawn(
+            "INDEXER",
+            ["python", "danswer/background/update.py"],
+            env=indexer_env,
+        )
+        children.append(proc)
+        print("[dev_run_dask_distributed] started indexer-scheduler")
+
+    # 4. Celery worker + beat — unchanged from dev_run_background_jobs.py.
+    # Indexing isn't routed through Celery in this fork, so these
+    # exist solely to handle prune / sync / retention / cleanup / etc.
+    if not no_celery:
+        worker_proc, _ = spawn(
+            "CELERY-WORKER",
+            [
+                "celery",
+                "-A",
+                "ee.danswer.background.celery.celery_app",
+                "worker",
+                "--pool=threads",
+                "--concurrency=10",
+                "--loglevel=INFO",
+            ],
+            env=base_env,
+        )
+        children.append(worker_proc)
+
+        beat_proc, _ = spawn(
+            "CELERY-BEAT",
+            [
+                "celery",
+                "-A",
+                "ee.danswer.background.celery.celery_app",
+                "beat",
+                "--loglevel=INFO",
+            ],
+            env=base_env,
+        )
+        children.append(beat_proc)
+        print("[dev_run_dask_distributed] started celery worker + beat")
+
+    print(
+        "[dev_run_dask_distributed] all processes launched. "
+        "Ctrl-C to tear down the whole tree."
+    )
+
+    # Block forever, watching for any child to die. If the scheduler
+    # or indexer goes down we don't try to recover here (it's a dev
+    # helper, not a supervisor) — just exit and let the dev see why.
+    try:
+        while True:
+            for proc in children:
+                if proc.poll() is not None:
+                    print(
+                        f"[dev_run_dask_distributed] child process exited "
+                        f"with code {proc.returncode}; tearing down."
+                    )
+                    shutdown()
+                    return proc.returncode or 1
+            time.sleep(1.0)
+    except KeyboardInterrupt:
+        shutdown()
+        return 0
+
+
+def main() -> int:
+    parser = argparse.ArgumentParser(description=__doc__)
+    parser.add_argument(
+        "--num-workers",
+        type=int,
+        default=2,
+        help="Number of dask-worker subprocesses (default: 2)",
+    )
+    parser.add_argument(
+        "--scheduler-port",
+        type=int,
+        default=8786,
+        help="Dask scheduler RPC port (default: 8786)",
+    )
+    parser.add_argument(
+        "--dashboard-port",
+        type=int,
+        default=8787,
+        help="Dask scheduler dashboard port (default: 8787)",
+    )
+    parser.add_argument(
+        "--no-celery",
+        action="store_true",
+        help="Skip Celery worker + beat (useful when only testing indexing)",
+    )
+    parser.add_argument(
+        "--no-indexer",
+        action="store_true",
+        help="Skip the indexer-scheduler (useful when bringing your own "
+        "by running update.py manually with DASK_SCHEDULER_ADDRESS set)",
+    )
+    args = parser.parse_args()
+    return run(
+        num_workers=args.num_workers,
+        scheduler_port=args.scheduler_port,
+        dashboard_port=args.dashboard_port,
+        no_celery=args.no_celery,
+        no_indexer=args.no_indexer,
+    )
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/backend/scripts/migrate_file_store_to_azure_blob.py b/backend/scripts/migrate_file_store_to_azure_blob.py
new file mode 100644
index 00000000000..3232731b60c
--- /dev/null
+++ b/backend/scripts/migrate_file_store_to_azure_blob.py
@@ -0,0 +1,99 @@
+"""Migrate file bytes from Postgres large objects → Azure Blob Storage.
+
+For every `file_store` row that still has its bytes in a Postgres large
+object (`lobj_oid` set, `object_key` NULL), this streams the lobj up to the
+Azure Blob container, points the row at the blob (`object_key`), clears
+`lobj_oid`, and frees the large object.
+
+Idempotent — already-migrated rows (object_key set) are skipped, so it's
+safe to re-run / resume. Reads use a spooled temp file, so a huge file
+won't OOM the migrator.
+
+Requires the Azure backend to be configured in the environment:
+    FILE_STORE_TYPE=AzureBlobFileStore   (not strictly required, but matches prod)
+    AZURE_BLOB_CONNECTION_STRING=...      (the storage account connection string)
+    AZURE_BLOB_CONTAINER=danswer-files
+
+Usage:
+    cd backend
+    PYTHONPATH=$(pwd) python scripts/migrate_file_store_to_azure_blob.py
+    PYTHONPATH=$(pwd) python scripts/migrate_file_store_to_azure_blob.py --dry-run
+
+Cutover: deploy the image (with azure-storage-blob) + the migration that
+adds object_key, set the secret, flip FILE_STORE_TYPE=AzureBlobFileStore,
+then run this once. Reads of un-migrated rows fall back to the lobj in the
+meantime, so there's no hard ordering requirement — but run it promptly so
+the lobjs (and the DB bloat) actually go away.
+"""
+from __future__ import annotations
+
+import argparse
+import sys
+
+from sqlalchemy import select
+from sqlalchemy.orm import Session
+
+from danswer.db.engine import get_sqlalchemy_engine
+from danswer.db.models import PGFileStore
+from danswer.db.pg_file_store import delete_lobj_by_id
+from danswer.db.pg_file_store import read_lobj
+from danswer.file_store.file_store import _get_azure_container_client
+
+
+def main() -> int:
+    parser = argparse.ArgumentParser(description=__doc__)
+    parser.add_argument(
+        "--dry-run",
+        action="store_true",
+        help="List what would migrate; upload/modify nothing.",
+    )
+    args = parser.parse_args()
+
+    engine = get_sqlalchemy_engine()
+    with Session(engine) as db_session:
+        rows = db_session.scalars(
+            select(PGFileStore)
+            .where(PGFileStore.lobj_oid.isnot(None))
+            .where(PGFileStore.object_key.is_(None))
+        ).all()
+        print(f"{len(rows)} file(s) to migrate (lobj → blob).")
+        if args.dry_run:
+            for r in rows:
+                print(f"  would migrate: {r.file_name} (lobj_oid={r.lobj_oid})")
+            return 0
+
+        container = _get_azure_container_client()
+        migrated = 0
+        for r in rows:
+            old_lobj = r.lobj_oid
+            # Spooled temp file → bounded memory even for large blobs.
+            stream = read_lobj(
+                lobj_oid=old_lobj, db_session=db_session, use_tempfile=True
+            )
+            container.upload_blob(name=r.file_name, data=stream, overwrite=True)
+
+            # Point the row at the blob, then free the lobj. Commit the row
+            # first so a crash leaves it readable from the blob (the lobj
+            # delete is best-effort cleanup).
+            r.object_key = r.file_name
+            r.lobj_oid = None
+            db_session.commit()
+            try:
+                delete_lobj_by_id(old_lobj, db_session=db_session)
+                db_session.commit()
+            except Exception as e:
+                print(
+                    f"  WARN: uploaded {r.file_name} but failed to free lobj {old_lobj}: {e}"
+                )
+                db_session.rollback()
+
+            migrated += 1
+            if migrated % 50 == 0:
+                print(f"  migrated {migrated}/{len(rows)}…")
+
+        print(f"Done. Migrated {migrated} file(s) to Azure Blob.")
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/backend/scripts/seed_assistants.py b/backend/scripts/seed_assistants.py
new file mode 100644
index 00000000000..8f900dbe142
--- /dev/null
+++ b/backend/scripts/seed_assistants.py
@@ -0,0 +1,393 @@
+"""Seed N varied personas/assistants into the local DB for UX testing.
+
+WARNING — local dev tool only. Runs whatever `DATABASE_URL` / `POSTGRES_*`
+env vars point at. NEVER point this at a prod Postgres. If `POSTGRES_HOST`
+contains anything that smells like prod (configured to error below), the
+script aborts.
+
+Produces a realistic mix for exercising the redesigned gallery page:
+
+  ~30%  "Yours"           — owned by the target user (private)
+  ~20%  "Shared with you" — owned by another user, target user in users[]
+   ~50% "Featured"        — public (is_public=True, no specific owner)
+
+Each row gets a random subset of available tools / document sets so the
+{n} tools / {n} sources chips render with variety. Half of "Yours" land
+in the user's chosen_assistants picker, half do not — so the "Already
+added" / "Available to add" filter chips have content on both sides.
+
+Usage (from repo root):
+
+    cd backend
+    source ../.venv/bin/activate
+    python -m scripts.seed_assistants --email you@example.com --count 50
+
+    # Wipe just the seeded rows (by name prefix) and re-seed:
+    python -m scripts.seed_assistants --clear
+    python -m scripts.seed_assistants --email you@example.com --count 50
+
+Notes:
+  * Re-running without --clear stacks more rows. Use --prefix to namespace.
+  * If --email isn't supplied, picks the first admin user in the DB.
+  * If only one user exists, the "Shared with you" tier is folded into
+    "Featured" since there's no one else to own them.
+"""
+from __future__ import annotations
+
+import argparse
+import os
+import random
+import sys
+from collections.abc import Sequence
+
+from sqlalchemy import select
+from sqlalchemy.orm import Session
+
+from danswer.auth.schemas import UserRole
+from danswer.db.engine import SessionFactory
+from danswer.db.models import DocumentSet
+from danswer.db.models import Persona
+from danswer.db.models import Persona__User
+from danswer.db.models import Tool
+from danswer.db.models import User
+from danswer.search.enums import RecencyBiasSetting
+
+
+# --- safety: don't blast prod by accident ---------------------------------
+
+# If POSTGRES_HOST contains any of these substrings, bail. Extend as
+# needed. The whole point is: this script generates fake data; you only
+# want it on your own laptop's Postgres.
+_PROD_HOST_FINGERPRINTS = (
+    "azure.com",  # Azure managed Postgres (darwin uses one)
+    "amazonaws.com",
+    "rds.",
+    "gcp.",
+    ".cloud.",
+    "prod",
+    "production",
+)
+
+
+def _abort_if_pointed_at_prod() -> None:
+    host = (os.environ.get("POSTGRES_HOST") or "").lower()
+    for marker in _PROD_HOST_FINGERPRINTS:
+        if marker in host:
+            print(
+                f"REFUSING TO RUN: POSTGRES_HOST={host!r} looks like a prod DB.\n"
+                f"Point POSTGRES_HOST at localhost / your dev container first.",
+                file=sys.stderr,
+            )
+            sys.exit(2)
+
+
+# --- content pools --------------------------------------------------------
+
+# 60 distinct names so we can cover the requested ~50 without dup.
+_NAMES: list[str] = [
+    "Research Pal",
+    "Code Reviewer",
+    "SQL Helper",
+    "Email Drafter",
+    "Bug Triage",
+    "API Documenter",
+    "Test Writer",
+    "Meeting Summarizer",
+    "Slack Digest",
+    "Stand-up Buddy",
+    "Customer Insights",
+    "Onboarding Guide",
+    "Roadmap Reviewer",
+    "Incident Reporter",
+    "Refactor Assistant",
+    "Release Notes",
+    "Spec Reader",
+    "RFC Writer",
+    "PR Summarizer",
+    "Postmortem Helper",
+    "Design Critic",
+    "Architecture Sketch",
+    "Security Reviewer",
+    "Threat Modeler",
+    "Compliance Auditor",
+    "Pricing Analyst",
+    "Sales Enabler",
+    "Renewal Scout",
+    "Churn Predictor",
+    "Marketing Riff",
+    "Blog Draftsman",
+    "Tweet Polisher",
+    "Tagline Brewer",
+    "FAQ Generator",
+    "Support Tier-1",
+    "Escalation Helper",
+    "Runbook Walker",
+    "Migration Planner",
+    "Schema Diff Reader",
+    "Index Tuner",
+    "Query Explainer",
+    "Log Whisperer",
+    "Metric Hunter",
+    "Alert Wrangler",
+    "Dashboard Builder",
+    "Hire Brief",
+    "Interview Scribe",
+    "Skill Mapper",
+    "Doc Search",
+    "Wiki Pal",
+    "Note Taker",
+    "Action-Items Finder",
+    "Standup Cliff-Notes",
+    "Investor FAQ",
+    "Roadblock Spotter",
+    "OKR Reviewer",
+    "Quarterly Recap",
+    "Pitch Sharpener",
+    "Customer-Reply Drafter",
+    "Demo Outline",
+]
+
+# 30 description templates — varied tones / scopes so the cards don't all
+# read the same.
+_DESCRIPTIONS: list[str] = [
+    "Answers questions about our codebase using semantic search across the indexed repos.",
+    "Drafts polished customer-facing emails in the company's voice.",
+    "Summarizes long Slack threads and surfaces decisions and action items.",
+    "Reads design docs and points out the assumptions and the risky bits.",
+    "Generates SQL against the analytics warehouse from a plain-English question.",
+    "Triages new bug reports — classifies severity, finds duplicates, and assigns.",
+    "Writes release notes from a list of merged PR titles.",
+    "Cross-references Jira tickets and surfaces blocked dependencies.",
+    "Helps onboard new engineers by answering 'where does X live?' questions.",
+    "Reviews pull requests for naming, structure, and style consistency.",
+    "Drafts incident postmortems from log excerpts and timeline notes.",
+    "Translates marketing copy into different audience voices.",
+    "Walks runbooks step by step, asking before each destructive action.",
+    "Reads the customer-success knowledge base and answers tier-1 tickets.",
+    "Explains an unfamiliar SQL query — joins, CTEs, window functions.",
+    "Reviews quarterly OKR drafts for measurability and ambition.",
+    "Builds the outline of a sales demo from a list of pain points.",
+    "Tightens taglines — shorter, sharper, fewer adjectives.",
+    "Sketches an architecture diagram outline from a design doc.",
+    "Surfaces churn-risk signals from a list of recent customer emails.",
+    "Answers HR / benefits FAQ from the employee handbook.",
+    "Reads RFCs and writes the executive summary at the top.",
+    "Indexes API documentation and answers 'how do I do X' questions.",
+    "Drafts response templates for support tickets matching common patterns.",
+    "Generates test cases for a function or endpoint from its signature.",
+    "Reviews threat models against OWASP top-10 categories.",
+    "Plans data migrations — pre-checks, batch sizing, rollback steps.",
+    "Reads incident-channel logs and produces a concise five-line summary.",
+    "Brainstorms blog post angles given a working title.",
+    "Helps interviewers stay structured — drafts notes, scores, follow-ups.",
+]
+
+
+# --- helpers --------------------------------------------------------------
+
+
+def _pick(rng: random.Random, items: Sequence, k_min: int, k_max: int) -> list:
+    """Return between k_min and k_max random items (without replacement).
+
+    Tolerates `items` being shorter than k_max — caps at available length.
+    """
+    if not items:
+        return []
+    upper = min(k_max, len(items))
+    k = rng.randint(k_min, upper)
+    if k <= 0:
+        return []
+    return rng.sample(list(items), k)
+
+
+def _resolve_target_user(session: Session, email: str | None) -> User | None:
+    if email:
+        user = session.scalar(select(User).where(User.email == email))
+        if user is None:
+            print(f"No user with email {email!r} found.", file=sys.stderr)
+        return user
+    # No email given — prefer an admin user, fall back to any user.
+    admin = session.scalar(select(User).where(User.role == UserRole.ADMIN).limit(1))
+    if admin is not None:
+        return admin
+    return session.scalar(select(User).limit(1))
+
+
+def _pick_other_user(session: Session, target_user_id) -> User | None:
+    """Find a user other than the target to own the "shared with you" rows."""
+    return session.scalar(select(User).where(User.id != target_user_id).limit(1))
+
+
+def _clear(session: Session, prefix: str) -> int:
+    """Soft-delete by name prefix is risky if a real persona shares the
+    prefix. We assert prefix is non-empty and unmistakably synthetic.
+    """
+    if not prefix or len(prefix) < 3:
+        print(
+            f"Refusing to clear with suspiciously short prefix {prefix!r}.",
+            file=sys.stderr,
+        )
+        sys.exit(2)
+    personas = session.scalars(
+        select(Persona).where(Persona.name.startswith(prefix))
+    ).all()
+    n = 0
+    for p in personas:
+        # Hard delete — these are synthetic seed rows, not user data.
+        # Junction rows clean up via cascade configured on the model.
+        session.delete(p)
+        n += 1
+    session.commit()
+    return n
+
+
+# --- main -----------------------------------------------------------------
+
+
+def main() -> None:
+    _abort_if_pointed_at_prod()
+
+    ap = argparse.ArgumentParser(description=__doc__.splitlines()[0])
+    ap.add_argument("--count", type=int, default=50, help="How many to create.")
+    ap.add_argument(
+        "--email",
+        help="Target user email — the 'me' for testing. Default: first admin user.",
+    )
+    ap.add_argument(
+        "--prefix",
+        default="[seed] ",
+        help="Name prefix so seeded rows are easy to spot / clear. (default: '[seed] ')",
+    )
+    ap.add_argument(
+        "--clear",
+        action="store_true",
+        help="Delete previously seeded personas (by --prefix) and exit.",
+    )
+    ap.add_argument(
+        "--seed",
+        type=int,
+        default=42,
+        help="RNG seed — same seed = same data each run. Default: 42.",
+    )
+    args = ap.parse_args()
+
+    with SessionFactory() as session:
+        if args.clear:
+            n = _clear(session, args.prefix)
+            print(f"Cleared {n} seeded personas (prefix={args.prefix!r}).")
+            return
+
+        target_user = _resolve_target_user(session, args.email)
+        if target_user is None:
+            print(
+                "No users in DB. Sign in to the app first so a user row "
+                "exists, then re-run.",
+                file=sys.stderr,
+            )
+            sys.exit(1)
+
+        other_user = _pick_other_user(session, target_user.id)
+        tools = list(session.scalars(select(Tool)).all())
+        doc_sets = list(session.scalars(select(DocumentSet)).all())
+
+        rng = random.Random(args.seed)
+
+        if args.count > len(_NAMES):
+            print(
+                f"--count={args.count} exceeds {len(_NAMES)} unique names; "
+                f"will cycle with numeric suffixes.",
+                file=sys.stderr,
+            )
+
+        # Yours: ~30%, Shared: ~20% (only if other_user exists), rest Featured.
+        yours_n = max(1, args.count * 30 // 100)
+        shared_n = args.count * 20 // 100 if other_user is not None else 0
+        featured_n = args.count - yours_n - shared_n
+
+        # Track which Yours rows land in the user's picker (half do).
+        # We'll mutate chosen_assistants at the end of the run.
+        new_chosen_ids: list[int] = []
+
+        created = 0
+        for i in range(args.count):
+            base_name = _NAMES[i % len(_NAMES)]
+            suffix = "" if i < len(_NAMES) else f" #{i // len(_NAMES) + 1}"
+            name = f"{args.prefix}{base_name}{suffix}"
+            desc = rng.choice(_DESCRIPTIONS)
+            persona_tools = _pick(rng, tools, 0, 3)
+            persona_docs = _pick(rng, doc_sets, 0, 2)
+
+            if i < yours_n:
+                owner_id = target_user.id
+                is_public = False
+                shared_target = None
+            elif i < yours_n + shared_n:
+                # Owned by someone else, granted to target user via Persona__User.
+                owner_id = other_user.id if other_user else None
+                is_public = False
+                shared_target = target_user.id
+            else:
+                # Public / featured — no specific owner.
+                owner_id = None
+                is_public = True
+                shared_target = None
+
+            persona = Persona(
+                name=name,
+                description=desc,
+                user_id=owner_id,
+                is_public=is_public,
+                # Required scalars on Persona — pick sensible defaults so
+                # the row is queryable by get_personas without errors.
+                llm_relevance_filter=False,
+                llm_filter_extraction=False,
+                recency_bias=RecencyBiasSetting.AUTO,
+                default_persona=False,
+                is_visible=True,
+                deleted=False,
+                num_chunks=None,
+                llm_model_provider_override=None,
+                llm_model_version_override=None,
+                starter_messages=None,
+                tools=persona_tools,
+                document_sets=persona_docs,
+            )
+            session.add(persona)
+            session.flush()  # populate persona.id
+
+            if shared_target is not None:
+                session.add(Persona__User(persona_id=persona.id, user_id=shared_target))
+
+            # Half of "Yours" auto-land in the picker; the other half are
+            # available-to-add. Featured rows never auto-add (the user can
+            # add them from the gallery). Shared rows auto-add so the user
+            # sees their permitted assistants in chat immediately.
+            if i < yours_n and i % 2 == 0:
+                new_chosen_ids.append(persona.id)
+            elif yours_n <= i < yours_n + shared_n:
+                new_chosen_ids.append(persona.id)
+
+            created += 1
+
+        # Merge with the target user's existing chosen_assistants (if any).
+        # We APPEND so we don't disturb whatever order they already have.
+        if new_chosen_ids:
+            existing = list(target_user.chosen_assistants or [])
+            target_user.chosen_assistants = existing + new_chosen_ids
+
+        session.commit()
+
+        print(f"Created {created} personas under prefix {args.prefix!r}.")
+        print(f"  Target user        : {target_user.email}")
+        if other_user is not None:
+            print(f"  Shared-from user   : {other_user.email}")
+        print(f"  Yours              : {yours_n}")
+        print(f"  Shared with you    : {shared_n}")
+        print(f"  Featured / public  : {featured_n}")
+        print(f"  Auto-added to picker: {len(new_chosen_ids)}")
+        print()
+        print("Open /assistants/gallery to see them. Run with --clear to wipe.")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/backend/scripts/test_dask_distributed_e2e.py b/backend/scripts/test_dask_distributed_e2e.py
new file mode 100644
index 00000000000..4ce389e4854
--- /dev/null
+++ b/backend/scripts/test_dask_distributed_e2e.py
@@ -0,0 +1,517 @@
+"""End-to-end test for the Dask-Distributed background topology.
+
+Spawns a real `dask scheduler` + N `dask worker` subprocesses on the
+local machine, exercises the topology with synthetic tasks via the
+`distributed.Client` API, and asserts the behaviors that matter for
+the indexing-scaling design:
+
+  POSITIVE
+    P1  All N workers register with the scheduler within a bounded
+        time window.
+    P2  M concurrent tasks run in parallel across workers — wall
+        time is bounded by ceil(M/N) × per_task_seconds, not M ×
+        per_task_seconds. (This is THE assertion proving "multiple
+        workers pick work in parallel".)
+    P3  Tasks fan out across at least 2 distinct workers when
+        M > 1. (Catches a degenerate scheduler that pins everything
+        to one worker.)
+
+  NEGATIVE
+    N1  Worker death mid-task — surviving workers continue accepting
+        new submissions; cluster doesn't deadlock.
+    N2  Connecting to a non-existent scheduler fails fast with a
+        clear error rather than hanging indefinitely.
+    N3  Scheduler death — Client.submit() against a dead scheduler
+        raises within a bounded time, doesn't hang.
+
+The test is self-contained (no Postgres/Vespa/model-server needed)
+and uses random ports per run so concurrent invocations don't
+collide. Pass --runs N to repeat the whole suite N times — useful
+for catching flakes.
+
+Usage:
+    cd backend
+    PYTHONPATH=$(pwd) python scripts/test_dask_distributed_e2e.py [--runs N] [--workers M]
+
+Exits 0 if every run passes, non-zero otherwise.
+"""
+from __future__ import annotations
+
+import argparse
+import os
+import socket
+import subprocess
+import sys
+import time
+from collections.abc import Iterator
+from contextlib import closing
+from contextlib import contextmanager
+from pathlib import Path
+
+from dask.distributed import Client
+
+
+# Path to the `dask` CLI that ships with the same venv we're running
+# under. `sys.executable` always points at the active python, even
+# when the venv was invoked directly without `source activate` (which
+# leaves `.venv/bin` off PATH). Falling back to the bare name lets the
+# test still work if `dask` is on PATH for some other reason.
+_VENV_BIN = Path(sys.executable).parent
+_DASK_CLI = str(_VENV_BIN / "dask") if (_VENV_BIN / "dask").exists() else "dask"
+
+
+def _subprocess_env() -> dict[str, str]:
+    """Env for dask child processes — prepend the venv's bin so the
+    `dask` CLI (and anything else it shells out to) is resolvable."""
+    env = os.environ.copy()
+    env["PATH"] = f"{_VENV_BIN}{os.pathsep}{env.get('PATH', '')}"
+    return env
+
+
+_PASS = "\033[32mPASS\033[0m"
+_FAIL = "\033[31mFAIL\033[0m"
+_INFO = "\033[33mINFO\033[0m"
+
+
+# ---------------------------------------------------------------------------
+# Output helpers
+# ---------------------------------------------------------------------------
+
+
+def section(title: str) -> None:
+    print(f"\n=== {title} ===")
+
+
+def ok(msg: str) -> None:
+    print(f"  [{_PASS}] {msg}")
+
+
+def fail(msg: str) -> None:
+    print(f"  [{_FAIL}] {msg}")
+
+
+def info(msg: str) -> None:
+    print(f"  [{_INFO}] {msg}")
+
+
+# ---------------------------------------------------------------------------
+# Subprocess plumbing
+# ---------------------------------------------------------------------------
+
+
+def find_free_port() -> int:
+    """Pick a random unused TCP port. Used to avoid 8786 collisions
+    when the user runs multiple suites concurrently or alongside a
+    real dev stack."""
+    with closing(socket.socket()) as s:
+        s.bind(("127.0.0.1", 0))
+        return s.getsockname()[1]
+
+
+def wait_for_port(host: str, port: int, timeout: float) -> bool:
+    """Poll until something accepts on host:port, or timeout."""
+    deadline = time.monotonic() + timeout
+    while time.monotonic() < deadline:
+        try:
+            with closing(socket.create_connection((host, port), timeout=1.0)):
+                return True
+        except OSError:
+            time.sleep(0.2)
+    return False
+
+
+def start_scheduler(port: int) -> subprocess.Popen:
+    """Start a dask scheduler bound to localhost. Dashboard is set to
+    a random ephemeral port so it doesn't fight with anything."""
+    proc = subprocess.Popen(
+        [
+            _DASK_CLI,
+            "scheduler",
+            "--host",
+            "127.0.0.1",
+            "--port",
+            str(port),
+            "--dashboard-address",
+            ":0",
+        ],
+        stdout=subprocess.DEVNULL,
+        stderr=subprocess.DEVNULL,
+        env=_subprocess_env(),
+    )
+    if not wait_for_port("127.0.0.1", port, timeout=20.0):
+        proc.kill()
+        raise RuntimeError(f"scheduler did not bind 127.0.0.1:{port} within 20s")
+    return proc
+
+
+def start_worker(scheduler_addr: str) -> subprocess.Popen:
+    return subprocess.Popen(
+        [
+            _DASK_CLI,
+            "worker",
+            scheduler_addr,
+            "--nworkers=1",
+            "--nthreads=1",
+            "--memory-limit=1GB",
+        ],
+        stdout=subprocess.DEVNULL,
+        stderr=subprocess.DEVNULL,
+        env=_subprocess_env(),
+    )
+
+
+def kill(proc: subprocess.Popen, grace_seconds: float = 3.0) -> None:
+    if proc.poll() is not None:
+        return
+    proc.terminate()
+    try:
+        proc.wait(timeout=grace_seconds)
+    except subprocess.TimeoutExpired:
+        proc.kill()
+
+
+@contextmanager
+def cluster(num_workers: int) -> Iterator[tuple[str, list[subprocess.Popen]]]:
+    """Bring up a scheduler + N workers, hand back (addr, worker_procs).
+    Tears everything down on exit, even on exception."""
+    sched_port = find_free_port()
+    scheduler_addr = f"tcp://127.0.0.1:{sched_port}"
+    sched_proc = start_scheduler(sched_port)
+    workers: list[subprocess.Popen] = []
+    try:
+        for _ in range(num_workers):
+            workers.append(start_worker(scheduler_addr))
+        yield scheduler_addr, workers
+    finally:
+        for w in workers:
+            kill(w)
+        kill(sched_proc)
+
+
+def wait_for_workers(client: Client, expected: int, timeout: float) -> int:
+    """Poll the scheduler until it reports `expected` workers (or
+    timeout). Returns the actual count seen at the end."""
+    deadline = time.monotonic() + timeout
+    last = 0
+    while time.monotonic() < deadline:
+        last = len(client.scheduler_info()["workers"])
+        if last >= expected:
+            return last
+        time.sleep(0.5)
+    return last
+
+
+# ---------------------------------------------------------------------------
+# Synthetic tasks (run inside dask-worker subprocesses)
+# ---------------------------------------------------------------------------
+
+
+def _sleep_task(duration: float) -> str:
+    """Sleep + return the worker's hostname so we can verify
+    distribution. Defined at module level so Dask can pickle it."""
+    import socket as _socket
+    import time as _time
+
+    _time.sleep(duration)
+    return _socket.gethostname()
+
+
+def _quick_task(x: int) -> int:
+    """Trivial task to verify task plumbing without sleeping."""
+    return x * 2
+
+
+# ---------------------------------------------------------------------------
+# Test phases
+# ---------------------------------------------------------------------------
+
+
+def phase_setup(num_workers: int, scheduler_addr: str) -> tuple[Client, bool]:
+    """P1: every worker registers with the scheduler within a bounded
+    window. Returns (client, ok_flag)."""
+    section("Phase 1 — workers register with scheduler")
+    try:
+        client = Client(scheduler_addr, timeout=10)
+    except Exception as e:
+        fail(f"could not connect to scheduler: {e}")
+        return None, False  # type: ignore[return-value]
+    seen = wait_for_workers(client, expected=num_workers, timeout=20.0)
+    if seen >= num_workers:
+        ok(f"scheduler reports {seen} worker(s) registered")
+        return client, True
+    fail(
+        f"only {seen}/{num_workers} workers registered after 20s; "
+        "did the workers crash on startup?"
+    )
+    return client, False
+
+
+def phase_parallelism(client: Client, num_workers: int) -> bool:
+    """P2: M concurrent tasks should run in parallel.
+
+    With M = 2 × num_workers and per-task sleep = 3s, sequential time
+    is M × 3 = 6×num_workers seconds; parallel time is 2 × 3 = 6
+    seconds (plus scheduler overhead). We bound the wall time at
+    `ceil(M/N) × per_task + slack` and assert.
+    """
+    section("Phase 2 — concurrent tasks run in parallel")
+    per_task = 3.0
+    num_tasks = num_workers * 2
+    expected_parallel_time = (num_tasks / num_workers) * per_task
+    # Allow generous overhead: scheduler dispatch, Python startup,
+    # GC, CI noise. 5s slack is plenty in practice.
+    upper_bound = expected_parallel_time + 5.0
+
+    start = time.monotonic()
+    futures = [
+        client.submit(_sleep_task, per_task, pure=False) for _ in range(num_tasks)
+    ]
+    # gather() blocks until all are done; raises if any failed.
+    try:
+        results = client.gather(futures)
+    except Exception as e:
+        fail(f"gather() raised: {e}")
+        return False
+    elapsed = time.monotonic() - start
+
+    if elapsed <= upper_bound:
+        ok(
+            f"{num_tasks} tasks × {per_task}s each finished in "
+            f"{elapsed:.1f}s (bound {upper_bound:.1f}s)"
+        )
+        info(
+            f"sequential lower bound would be {num_tasks * per_task:.1f}s; "
+            f"parallelism is real."
+        )
+        info(f"task return values (worker hostnames): {sorted(set(results))[:5]!r}")
+        return True
+    fail(
+        f"{num_tasks} tasks took {elapsed:.1f}s, expected <{upper_bound:.1f}s. "
+        "Tasks may be running sequentially — check that --nthreads=1 "
+        "isn't pinning everything to one worker."
+    )
+    return False
+
+
+def phase_distribution(client: Client) -> bool:
+    """P3: tasks land on at least 2 distinct workers."""
+    section("Phase 3 — tasks distribute across workers")
+    futures = [client.submit(_sleep_task, 0.2, pure=False) for _ in range(20)]
+    results = client.gather(futures)
+    distinct_workers = set(results)
+    # `_sleep_task` returns hostname; in a single-host test all
+    # workers share a hostname. So instead of hostname-cardinality
+    # we ask the scheduler directly which workers ran tasks.
+    who_has = client.scheduler_info()["workers"]
+    workers_used = set()
+    for fut in futures:
+        try:
+            who = client.who_has(fut).get(fut.key, ())
+            workers_used.update(who)
+        except Exception:
+            pass
+    used_count = len(workers_used) if workers_used else len(distinct_workers)
+    if used_count >= 2:
+        ok(f"work spread across {used_count} workers (out of {len(who_has)})")
+        return True
+    info(
+        f"only {used_count} worker(s) used — possibly all tasks finished too "
+        "fast for the scheduler to spread, or the cluster is single-worker."
+    )
+    # Don't hard-fail this with N=1 worker (degenerate); only fail if
+    # we expected spread.
+    return len(who_has) < 2 or False
+
+
+def phase_worker_death(
+    client: Client, workers: list[subprocess.Popen], scheduler_addr: str
+) -> bool:
+    """N1: kill a worker mid-task; surviving workers continue
+    accepting submissions and the scheduler doesn't deadlock."""
+    section("Phase 4 — worker death does not deadlock the cluster")
+    if len(workers) < 2:
+        info("skipping — need ≥2 workers for this test")
+        return True
+
+    # Submit a long task on each worker so at least one is busy when
+    # we kill it.
+    busy_futures = [
+        client.submit(_sleep_task, 4.0, pure=False) for _ in range(len(workers))
+    ]
+    time.sleep(0.5)  # let the scheduler dispatch them
+
+    # Pick the first live worker and kill it.
+    victim = None
+    for w in workers:
+        if w.poll() is None:
+            victim = w
+            break
+    if victim is None:
+        fail("no live workers to kill")
+        return False
+    info(f"killing worker pid={victim.pid} mid-task")
+    kill(victim)
+
+    # The future on the killed worker will likely raise. We don't
+    # care which one fails; we care that the cluster STAYS USABLE.
+    # gather() with errors='skip' returns successful ones.
+    for f in busy_futures:
+        try:
+            f.result(timeout=10.0)
+        except Exception:
+            pass
+
+    # Cluster usable test: submit a trivial task, gather, must
+    # succeed within a few seconds on a surviving worker.
+    try:
+        result = client.submit(_quick_task, 21, pure=False).result(timeout=10.0)
+    except Exception as e:
+        fail(f"cluster unusable after worker death: {e}")
+        return False
+    if result == 42:
+        ok("cluster still serves new submissions after a worker died")
+        return True
+    fail(f"unexpected result {result} from quick task")
+    return False
+
+
+def phase_unreachable_scheduler() -> bool:
+    """N2: connecting to a non-existent scheduler fails fast."""
+    section("Phase 5 — connecting to a dead scheduler fails fast")
+    bogus_port = find_free_port()  # nothing listening here
+    bogus_addr = f"tcp://127.0.0.1:{bogus_port}"
+    start = time.monotonic()
+    try:
+        # Short timeout — we'd rather see "couldn't connect" than hang.
+        Client(bogus_addr, timeout=3)
+    except Exception as e:
+        elapsed = time.monotonic() - start
+        if elapsed < 8.0:
+            ok(
+                f"Client({bogus_addr}) raised {type(e).__name__} in "
+                f"{elapsed:.1f}s (bounded as expected)"
+            )
+            return True
+        fail(f"Client raised but took {elapsed:.1f}s — too slow for a fail-fast")
+        return False
+    fail("Client connected to a non-existent scheduler — expected an exception")
+    return False
+
+
+def phase_scheduler_death(client: Client, sched_killer) -> bool:
+    """N3: scheduler death is observable to the client. After
+    sched_killer() runs, client.submit() must error within a bounded
+    time rather than hanging."""
+    section("Phase 6 — scheduler death surfaces to client without hanging")
+    sched_killer()
+    # Give the client a moment to notice the dropped connection.
+    time.sleep(2.0)
+    start = time.monotonic()
+    try:
+        f = client.submit(_quick_task, 1, pure=False)
+        f.result(timeout=10.0)
+    except Exception as e:
+        elapsed = time.monotonic() - start
+        ok(
+            f"submit/result against dead scheduler raised {type(e).__name__} "
+            f"in {elapsed:.1f}s (bounded)"
+        )
+        return True
+    fail("submit/result succeeded against a dead scheduler — unexpected")
+    return False
+
+
+# ---------------------------------------------------------------------------
+# Driver
+# ---------------------------------------------------------------------------
+
+
+def run_once(num_workers: int) -> bool:
+    """One full pass of all phases. Returns True iff everything passed."""
+    print(f"\n{'#' * 60}\n# Run start — {num_workers} workers\n{'#' * 60}")
+    sched_port = find_free_port()
+    scheduler_addr = f"tcp://127.0.0.1:{sched_port}"
+    sched_proc = start_scheduler(sched_port)
+    worker_procs: list[subprocess.Popen] = []
+    overall_ok = True
+    client: Client | None = None
+    try:
+        for _ in range(num_workers):
+            worker_procs.append(start_worker(scheduler_addr))
+
+        client, ok_setup = phase_setup(num_workers, scheduler_addr)
+        if not ok_setup:
+            return False
+
+        if not phase_parallelism(client, num_workers):
+            overall_ok = False
+
+        if not phase_distribution(client):
+            overall_ok = False
+
+        if not phase_worker_death(client, worker_procs, scheduler_addr):
+            overall_ok = False
+
+        if not phase_unreachable_scheduler():
+            overall_ok = False
+
+        # Scheduler-death must run last — it kills the scheduler we
+        # were using and we'd have to restart it for any subsequent
+        # phase.
+        def _kill_scheduler() -> None:
+            kill(sched_proc)
+
+        if not phase_scheduler_death(client, _kill_scheduler):
+            overall_ok = False
+
+    finally:
+        if client is not None:
+            try:
+                client.close()
+            except Exception:
+                pass
+        for w in worker_procs:
+            kill(w)
+        kill(sched_proc)
+    return overall_ok
+
+
+def main() -> int:
+    parser = argparse.ArgumentParser(description=__doc__)
+    parser.add_argument(
+        "--runs",
+        type=int,
+        default=1,
+        help="How many times to repeat the full suite (default: 1)",
+    )
+    parser.add_argument(
+        "--workers",
+        type=int,
+        default=3,
+        help="Number of dask-worker subprocesses per run (default: 3)",
+    )
+    args = parser.parse_args()
+
+    failures: list[int] = []
+    for i in range(1, args.runs + 1):
+        print(f"\n{'=' * 60}\n=== Run {i}/{args.runs}\n{'=' * 60}")
+        try:
+            if not run_once(args.workers):
+                failures.append(i)
+        except Exception as e:
+            fail(f"run {i} crashed: {type(e).__name__}: {e}")
+            failures.append(i)
+
+    print()
+    if not failures:
+        print(f"[{_PASS}] dask-distributed e2e: {args.runs} run(s), all passed")
+        return 0
+    print(
+        f"[{_FAIL}] dask-distributed e2e: {len(failures)}/{args.runs} run(s) failed: "
+        f"{failures}"
+    )
+    return 1
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/backend/tests/integration/__init__.py b/backend/tests/integration/__init__.py
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/backend/tests/integration/danswer/__init__.py b/backend/tests/integration/danswer/__init__.py
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/backend/tests/integration/danswer/file_store/__init__.py b/backend/tests/integration/danswer/file_store/__init__.py
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/backend/tests/integration/danswer/file_store/test_azure_blob_file_store.py b/backend/tests/integration/danswer/file_store/test_azure_blob_file_store.py
new file mode 100644
index 00000000000..aef57b42e38
--- /dev/null
+++ b/backend/tests/integration/danswer/file_store/test_azure_blob_file_store.py
@@ -0,0 +1,99 @@
+"""Integration test for the Azure Blob file store — big-file round-trip.
+
+Verifies the real AzureBlobFileStore end-to-end against a live Blob endpoint
+(Azurite emulator locally, or a real storage account): a LARGE file is
+streamed up, streamed back down, and its bytes must match — exercising the
+streaming/spool paths that the OOM crash exposed. Metadata lives in the
+Postgres `file_store` table, so a reachable + migrated DB is also required.
+
+This test is SKIPPED unless AZURE_BLOB_CONNECTION_STRING is set, so it never
+runs (or breaks) in environments without Blob configured.
+
+Run it (locally, against Azurite):
+
+    # 1. Start Azurite (Azure Storage emulator):
+    docker run -d -p 10000:10000 mcr.microsoft.com/azure-storage/azurite \
+        azurite-blob --blobHost 0.0.0.0
+    # 2. Install the optional dep into your venv:
+    pip install azure-storage-blob==12.19.1
+    # 3. Point the test at Azurite (well-known dev connection string) + your
+    #    local Postgres (must have run `alembic upgrade head` for object_key):
+    export AZURE_BLOB_CONNECTION_STRING="DefaultEndpointsProtocol=http;\
+AccountName=devstoreaccount1;\
+AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==;\
+BlobEndpoint=http://127.0.0.1:10000/devstoreaccount1;"
+    export AZURE_BLOB_CONTAINER=danswer-files-test
+    # 4. Run:
+    PYTHONPATH=$(pwd) pytest tests/integration/danswer/file_store/test_azure_blob_file_store.py -v
+"""
+import hashlib
+import os
+import uuid
+from io import BytesIO
+
+import pytest
+from sqlalchemy.orm import Session
+
+from danswer.configs.constants import FileOrigin
+from danswer.db.engine import get_sqlalchemy_engine
+
+pytestmark = pytest.mark.skipif(
+    not os.environ.get("AZURE_BLOB_CONNECTION_STRING"),
+    reason="AZURE_BLOB_CONNECTION_STRING unset — Azure Blob integration test skipped.",
+)
+
+# 40 MB — comfortably above MAX_IN_MEMORY_SIZE (30 MB), so the read path must
+# spill to the spooled temp file rather than holding it all in memory.
+BIG_SIZE = 40 * 1024 * 1024
+
+
+def _sha256(stream) -> str:
+    h = hashlib.sha256()
+    for chunk in iter(lambda: stream.read(1024 * 1024), b""):
+        h.update(chunk)
+    return h.hexdigest()
+
+
+def test_azure_blob_big_file_round_trip() -> None:
+    from danswer.file_store.file_store import AzureBlobFileStore
+
+    file_name = f"integration-test/big-{uuid.uuid4()}.bin"
+    content = os.urandom(BIG_SIZE)
+    expected = hashlib.sha256(content).hexdigest()
+
+    with Session(get_sqlalchemy_engine()) as db_session:
+        store = AzureBlobFileStore(db_session=db_session)
+        try:
+            # --- streaming upload ---
+            store.save_file(
+                file_name=file_name,
+                content=BytesIO(content),
+                display_name="big upload integration test",
+                file_origin=FileOrigin.OTHER,
+                file_type="application/octet-stream",
+            )
+
+            # --- streaming download (use_tempfile=True → spools to disk) ---
+            got = store.read_file(file_name, mode="b", use_tempfile=True)
+            assert _sha256(got) == expected, "round-tripped bytes differ (streamed)"
+
+            # --- in-memory download path too ---
+            got2 = store.read_file(file_name, mode="b")
+            assert got2.read() == content, "round-tripped bytes differ (in-memory)"
+
+            # --- metadata row points at Blob, not a lobj ---
+            from danswer.db.pg_file_store import get_pgfilestore_by_file_name
+
+            record = get_pgfilestore_by_file_name(file_name, db_session)
+            assert record.object_key is not None
+            assert record.lobj_oid is None
+        finally:
+            # Always clean up the blob + metadata row.
+            try:
+                store.delete_file(file_name)
+            except Exception:
+                pass
+
+        # --- deletion removed it ---
+        with pytest.raises(Exception):
+            store.read_file(file_name, mode="b")
diff --git a/backend/tests/unit/danswer/db/test_persona_cache.py b/backend/tests/unit/danswer/db/test_persona_cache.py
new file mode 100644
index 00000000000..a2160da450a
--- /dev/null
+++ b/backend/tests/unit/danswer/db/test_persona_cache.py
@@ -0,0 +1,490 @@
+"""Unit tests for ``danswer.db.persona_cache``.
+
+What we lock down here:
+
+1. **Filter parity vs SQL.** The Python filter in
+   ``_filter_personas_for_user`` must match the OR-block in
+   :func:`danswer.db.persona.get_personas` for every representative
+   permission shape — public, direct-user grant, group grant, and the
+   negative (none of the above). If either filter drifts, users see the
+   wrong assistants. Each case here corresponds 1:1 to an SQL branch.
+
+2. **Read path** with cache enabled:
+   - Miss → DB call → Redis SET (with TTL)
+   - Hit  → no DB call (the perf promise)
+   - Per-user-groups miss/hit independently of the global personas miss/hit
+
+3. **Read path** with cache disabled:
+   - Always reads the DB; never touches Redis.
+   - ``include_deleted=True`` always falls through to DB even when the
+     cache is otherwise enabled — we deliberately don't cache that
+     less-common shape.
+
+4. **Invalidation:**
+   - ``invalidate_personas_all`` deletes the right Redis key.
+   - ``invalidate_user_groups(uid)`` deletes the per-user key.
+   - Both short-circuit (no Redis call) when the cache is disabled —
+     mutation paths shouldn't pay ambient cost in the off state.
+
+5. **Fail-open** on Redis errors:
+   - GET error → treated as miss → DB read → no crash.
+   - SET / DELETE errors swallowed with a log; calling code sees nothing.
+
+Redis is stubbed with a tiny in-memory fake; the inner DB function and
+``PersonaSnapshot.from_model`` are patched. No real Postgres or Redis.
+"""
+from __future__ import annotations
+
+import unittest
+import uuid
+from typing import Any
+from unittest.mock import MagicMock
+from unittest.mock import patch
+
+from danswer.db import persona_cache as pc
+
+
+# ---------- shared fakes ----------
+
+
+class _FakeRedis:
+    """In-memory Redis fake covering get/set/delete only.
+
+    Stores bytes the same way redis-py does so the cache module's
+    JSON encode/decode actually runs in tests.
+    """
+
+    def __init__(self) -> None:
+        self.store: dict[str, bytes] = {}
+        self.get_calls: list[str] = []
+        self.set_calls: list[tuple[str, Any, int | None]] = []
+        self.delete_calls: list[str] = []
+
+    def get(self, key: str) -> bytes | None:
+        self.get_calls.append(key)
+        return self.store.get(key)
+
+    def set(self, key: str, value: Any, ex: int | None = None) -> bool:
+        self.set_calls.append((key, value, ex))
+        if isinstance(value, str):
+            self.store[key] = value.encode("utf-8")
+        elif isinstance(value, bytes):
+            self.store[key] = value
+        else:
+            self.store[key] = str(value).encode("utf-8")
+        return True
+
+    def delete(self, *keys: str) -> int:
+        removed = 0
+        for k in keys:
+            self.delete_calls.append(k)
+            if k in self.store:
+                del self.store[k]
+                removed += 1
+        return removed
+
+
+class _FakePersonaSnapshot:
+    """Stand-in for ``PersonaSnapshot`` for filter tests only.
+
+    The real Pydantic model has ~20 required fields; the filter function
+    touches just three of them. We use a duck-typed mock so test cases
+    stay focused on permission semantics, not Pydantic field plumbing.
+    """
+
+    def __init__(
+        self,
+        *,
+        persona_id: int,
+        is_public: bool,
+        user_ids_with_access: list[uuid.UUID],
+        group_ids_with_access: list[int],
+    ) -> None:
+        self.id = persona_id
+        self.is_public = is_public
+        # Match PersonaSnapshot.users: list[MinimalUserSnapshot] (has .id)
+        self.users = [MagicMock(id=uid) for uid in user_ids_with_access]
+        # Match PersonaSnapshot.groups: list[int]
+        self.groups = group_ids_with_access
+
+
+# ---------- filter parity ----------
+
+
+class TestFilterParityVsSqlOrBlock(unittest.TestCase):
+    """One test per SQL branch in get_personas's OR-filter.
+
+    SQL (paraphrased):
+        Persona.is_public
+        OR Persona.id IN (Persona__User where user_id = U)
+        OR Persona.id IN (Persona__UserGroup where group_id IN <U's groups>)
+
+    Each case below isolates one branch; the last asserts the negative.
+    """
+
+    def setUp(self) -> None:
+        self.user_id = uuid.uuid4()
+        self.other_user_id = uuid.uuid4()
+        self.user_group_ids = [10, 20]
+
+    def test_public_persona_always_visible(self) -> None:
+        """Branch 1: ``is_public`` → visible regardless of grants. This
+        is the most-traveled path and must stay correct even when the
+        user has no direct or group grant."""
+        p = _FakePersonaSnapshot(
+            persona_id=1,
+            is_public=True,
+            user_ids_with_access=[],
+            group_ids_with_access=[],
+        )
+        result = pc._filter_personas_for_user([p], self.user_id, self.user_group_ids)
+        self.assertEqual([x.id for x in result], [1])
+
+    def test_direct_user_grant_visible(self) -> None:
+        """Branch 2: not public, but the user is in the persona's
+        ``users`` list. Mirrors a row in Persona__User."""
+        p = _FakePersonaSnapshot(
+            persona_id=2,
+            is_public=False,
+            user_ids_with_access=[self.user_id],
+            group_ids_with_access=[],
+        )
+        result = pc._filter_personas_for_user([p], self.user_id, self.user_group_ids)
+        self.assertEqual([x.id for x in result], [2])
+
+    def test_group_grant_visible_if_user_in_one_of_those_groups(self) -> None:
+        """Branch 3: not public, no direct grant, but a group the user
+        belongs to has access. Mirrors a row in Persona__UserGroup
+        joined with User__UserGroup."""
+        p = _FakePersonaSnapshot(
+            persona_id=3,
+            is_public=False,
+            user_ids_with_access=[],
+            group_ids_with_access=[20, 999],  # 20 is one of the user's groups
+        )
+        result = pc._filter_personas_for_user([p], self.user_id, self.user_group_ids)
+        self.assertEqual([x.id for x in result], [3])
+
+    def test_no_access_hidden(self) -> None:
+        """Negative: not public, not in users, no overlapping group →
+        must be filtered out. If any branch leaks into this case we have
+        a permission bug."""
+        p = _FakePersonaSnapshot(
+            persona_id=4,
+            is_public=False,
+            user_ids_with_access=[self.other_user_id],  # different user
+            group_ids_with_access=[999, 888],  # no overlap with [10, 20]
+        )
+        result = pc._filter_personas_for_user([p], self.user_id, self.user_group_ids)
+        self.assertEqual(result, [])
+
+    def test_mixed_list_returns_only_visible(self) -> None:
+        """A realistic mix: 4 personas, only the first 3 should pass
+        the filter (one per branch + one denied). Verifies that the
+        denial path doesn't accidentally short-circuit later visible
+        items in the list."""
+        personas = [
+            _FakePersonaSnapshot(
+                persona_id=1,
+                is_public=True,
+                user_ids_with_access=[],
+                group_ids_with_access=[],
+            ),
+            _FakePersonaSnapshot(
+                persona_id=2,
+                is_public=False,
+                user_ids_with_access=[self.user_id],
+                group_ids_with_access=[],
+            ),
+            _FakePersonaSnapshot(
+                persona_id=3,
+                is_public=False,
+                user_ids_with_access=[],
+                group_ids_with_access=[10],
+            ),
+            _FakePersonaSnapshot(
+                persona_id=4,
+                is_public=False,
+                user_ids_with_access=[self.other_user_id],
+                group_ids_with_access=[888],
+            ),
+        ]
+        result = pc._filter_personas_for_user(
+            personas, self.user_id, self.user_group_ids
+        )
+        self.assertEqual(sorted(x.id for x in result), [1, 2, 3])
+
+    def test_user_with_no_groups_still_sees_public_and_direct_grants(self) -> None:
+        """Edge case: user belongs to zero groups. The group branch
+        contributes nothing, but public + direct grants must still
+        work — otherwise zero-group users get a broken assistant list."""
+        personas = [
+            _FakePersonaSnapshot(
+                persona_id=1,
+                is_public=True,
+                user_ids_with_access=[],
+                group_ids_with_access=[],
+            ),
+            _FakePersonaSnapshot(
+                persona_id=2,
+                is_public=False,
+                user_ids_with_access=[self.user_id],
+                group_ids_with_access=[],
+            ),
+            _FakePersonaSnapshot(
+                persona_id=3,
+                is_public=False,
+                user_ids_with_access=[],
+                group_ids_with_access=[10],
+            ),
+        ]
+        result = pc._filter_personas_for_user(personas, self.user_id, [])
+        self.assertEqual(sorted(x.id for x in result), [1, 2])
+
+
+# ---------- read path ----------
+
+
+class TestUserGroupCache(unittest.TestCase):
+    """The per-user group-ids cache: cheap query, big aggregate win."""
+
+    def test_miss_then_hit_only_one_db_read(self) -> None:
+        """First call hits the DB, subsequent calls within TTL serve
+        from Redis. Locks in the central performance promise of the
+        per-user side of the cache."""
+        fake = _FakeRedis()
+        db_session = MagicMock()
+        rows = MagicMock()
+        rows.all.return_value = [10, 20, 30]
+        db_session.scalars.return_value = rows
+        user_id = uuid.uuid4()
+
+        with patch.object(pc, "PERSONA_CACHE_ENABLED", True), patch.object(
+            pc, "PERSONA_CACHE_TTL_SECONDS", 60
+        ), patch.object(pc, "get_redis_client", return_value=fake):
+            first = pc._get_user_group_ids_cached(user_id, db_session)
+            second = pc._get_user_group_ids_cached(user_id, db_session)
+
+        self.assertEqual(first, [10, 20, 30])
+        self.assertEqual(second, [10, 20, 30])
+        self.assertEqual(
+            db_session.scalars.call_count,
+            1,
+            "second lookup must come from Redis, not the DB",
+        )
+
+    def test_set_uses_configured_ttl(self) -> None:
+        """The TTL is the safety net for missed busts — if it isn't
+        applied, a stale entry could live forever after a missed
+        invalidation. Lock down that ``ex=`` is the configured value.
+        """
+        fake = _FakeRedis()
+        db_session = MagicMock()
+        rows = MagicMock()
+        rows.all.return_value = []
+        db_session.scalars.return_value = rows
+
+        with patch.object(pc, "PERSONA_CACHE_ENABLED", True), patch.object(
+            pc, "PERSONA_CACHE_TTL_SECONDS", 1234
+        ), patch.object(pc, "get_redis_client", return_value=fake):
+            pc._get_user_group_ids_cached(uuid.uuid4(), db_session)
+
+        self.assertEqual(len(fake.set_calls), 1)
+        _key, _val, ex = fake.set_calls[0]
+        self.assertEqual(ex, 1234)
+
+
+# ---------- routing / disabled mode ----------
+
+
+class TestGetPersonasForUserCached(unittest.TestCase):
+    def test_disabled_falls_through_to_get_personas(self) -> None:
+        """With the flag off, the wrapper must NOT call Redis at all —
+        it must behave exactly like the previous direct-DB code path.
+        Important so enabling/disabling the feature is a clean toggle.
+        """
+        db_session = MagicMock()
+        snap = MagicMock()
+
+        with patch.object(pc, "PERSONA_CACHE_ENABLED", False), patch(
+            "danswer.db.persona.get_personas", return_value=[MagicMock()]
+        ) as mock_get_personas, patch(
+            "danswer.db.persona_cache.PersonaSnapshot.from_model", return_value=snap
+        ), patch.object(
+            pc, "get_redis_client"
+        ) as mock_client:
+            result = pc.get_personas_for_user_cached(
+                user_id=uuid.uuid4(), db_session=db_session
+            )
+
+        self.assertEqual(result, [snap])
+        mock_get_personas.assert_called_once()
+        mock_client.assert_not_called()
+
+    def test_include_deleted_true_bypasses_cache_even_when_enabled(self) -> None:
+        """We deliberately don't cache the ``include_deleted=True`` shape —
+        keeps the cache key set small and avoids accidental mis-keying.
+        Locks down that this path skips Redis entirely.
+        """
+        db_session = MagicMock()
+        with patch.object(pc, "PERSONA_CACHE_ENABLED", True), patch(
+            "danswer.db.persona.get_personas", return_value=[]
+        ) as mock_get_personas, patch(
+            "danswer.db.persona_cache.PersonaSnapshot.from_model"
+        ), patch.object(
+            pc, "get_redis_client"
+        ) as mock_client:
+            pc.get_personas_for_user_cached(
+                user_id=uuid.uuid4(),
+                db_session=db_session,
+                include_deleted=True,
+            )
+
+        mock_get_personas.assert_called_once()
+        # include_deleted=True was passed through to the DB read
+        self.assertTrue(mock_get_personas.call_args.kwargs["include_deleted"])
+        mock_client.assert_not_called()
+
+    def test_admin_call_returns_unfiltered_global_cache(self) -> None:
+        """``user_id=None`` is the admin / no-auth case. The cache
+        already holds the full list with no permission filter, so we
+        skip the Python filter step. Locks down the fast path for
+        admin endpoints that share the same cache.
+        """
+        all_snaps = [
+            _FakePersonaSnapshot(
+                persona_id=i,
+                is_public=False,
+                user_ids_with_access=[],
+                group_ids_with_access=[],
+            )
+            for i in [1, 2, 3]
+        ]
+        with patch.object(pc, "PERSONA_CACHE_ENABLED", True), patch.object(
+            pc, "_get_all_personas_cached", return_value=all_snaps
+        ) as mock_get_all, patch.object(
+            pc, "_get_user_group_ids_cached"
+        ) as mock_get_groups:
+            result = pc.get_personas_for_user_cached(
+                user_id=None, db_session=MagicMock()
+            )
+
+        self.assertEqual([x.id for x in result], [1, 2, 3])
+        mock_get_all.assert_called_once()
+        # Critically, we did NOT look up groups for the admin path.
+        mock_get_groups.assert_not_called()
+
+
+# ---------- invalidation ----------
+
+
+class TestInvalidation(unittest.TestCase):
+    def test_invalidate_personas_all_deletes_right_key(self) -> None:
+        """The bust call must target ``personas:all:not_deleted``. Any
+        drift between this key and the SET key in the read path would
+        produce a stuck cache."""
+        fake = _FakeRedis()
+        with patch.object(pc, "PERSONA_CACHE_ENABLED", True), patch.object(
+            pc, "get_redis_client", return_value=fake
+        ):
+            pc.invalidate_personas_all()
+        self.assertIn("danswer:personas:all:not_deleted", fake.delete_calls)
+
+    def test_invalidate_user_groups_deletes_per_user_key(self) -> None:
+        """Each user gets their own key. The bust must include the
+        user_id in string form (UUIDs are not JSON-stringified
+        consistently otherwise)."""
+        fake = _FakeRedis()
+        uid = uuid.uuid4()
+        with patch.object(pc, "PERSONA_CACHE_ENABLED", True), patch.object(
+            pc, "get_redis_client", return_value=fake
+        ):
+            pc.invalidate_user_groups(uid)
+        self.assertIn(f"danswer:personas:groups:{uid}", fake.delete_calls)
+
+    def test_invalidate_when_disabled_short_circuits(self) -> None:
+        """When the flag is off, mutation paths must not pay a Redis
+        round-trip cost. Without this, every assistant edit would touch
+        Redis even on a deployment that's opted out."""
+        with patch.object(pc, "PERSONA_CACHE_ENABLED", False), patch.object(
+            pc, "get_redis_client"
+        ) as mock_client:
+            pc.invalidate_personas_all()
+            pc.invalidate_user_groups(uuid.uuid4())
+        mock_client.assert_not_called()
+
+    def test_redis_error_during_bust_is_swallowed(self) -> None:
+        """If the bust call fails (Redis down, network blip), we don't
+        want to roll back the user's mutation — the DB write already
+        committed. Loud log, no exception."""
+        bad = MagicMock()
+        bad.delete.side_effect = RuntimeError("redis exploded")
+        with patch.object(pc, "PERSONA_CACHE_ENABLED", True), patch.object(
+            pc, "get_redis_client", return_value=bad
+        ):
+            # Both must complete without raising.
+            pc.invalidate_personas_all()
+            pc.invalidate_user_groups(uuid.uuid4())
+
+
+# ---------- fail-open on read ----------
+
+
+class TestFailOpenOnRedisRead(unittest.TestCase):
+    def test_redis_get_error_treated_as_miss(self) -> None:
+        """Redis GET exploding (timeout, conn refused) must NOT
+        propagate — the wrapper falls through to a direct DB read so
+        a Redis outage degrades latency, not availability.
+
+        We stub _safe_set out: the set path's round-trip serialization
+        (s.json() → json.loads) requires a real PersonaSnapshot. Here
+        we're verifying the GET-error fallback, not the SET path.
+        """
+        bad = MagicMock()
+        bad.get.side_effect = RuntimeError("connection refused")
+        db_session = MagicMock()
+        snap = MagicMock(is_public=True, users=[], groups=[])
+        # The cache module round-trips via json.loads(s.json()) before
+        # the SET — needs a real JSON string here.
+        snap.json.return_value = '{"id":1,"is_public":true}'
+
+        with patch.object(pc, "PERSONA_CACHE_ENABLED", True), patch.object(
+            pc, "get_redis_client", return_value=bad
+        ), patch("danswer.db.persona.get_personas", return_value=[MagicMock()]), patch(
+            "danswer.db.persona_cache.PersonaSnapshot.from_model", return_value=snap
+        ), patch.object(
+            pc, "_safe_set"
+        ):
+            # Must not raise; must return the DB result.
+            result = pc._get_all_personas_cached(db_session)
+
+        self.assertEqual(result, [snap])
+
+    def test_corrupt_cache_entry_treated_as_miss(self) -> None:
+        """Non-JSON bytes under our key (legacy format, manual SET,
+        schema migration race) must not crash. Fall through to DB and
+        overwrite the corrupt entry on the next SET. (Same _safe_set
+        stub rationale as above.)
+        """
+        fake = _FakeRedis()
+        fake.store["danswer:personas:all:not_deleted"] = b"not-json-at-all"
+        db_session = MagicMock()
+        snap = MagicMock(is_public=True, users=[], groups=[])
+        # The cache module round-trips via json.loads(s.json()) before
+        # the SET — needs a real JSON string here.
+        snap.json.return_value = '{"id":1,"is_public":true}'
+
+        with patch.object(pc, "PERSONA_CACHE_ENABLED", True), patch.object(
+            pc, "get_redis_client", return_value=fake
+        ), patch("danswer.db.persona.get_personas", return_value=[MagicMock()]), patch(
+            "danswer.db.persona_cache.PersonaSnapshot.from_model", return_value=snap
+        ), patch.object(
+            pc, "_safe_set"
+        ):
+            result = pc._get_all_personas_cached(db_session)
+
+        self.assertEqual(result, [snap])
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/backend/tests/unit/danswer/dynamic_configs/__init__.py b/backend/tests/unit/danswer/dynamic_configs/__init__.py
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/backend/tests/unit/danswer/dynamic_configs/test_redis_cached_store.py b/backend/tests/unit/danswer/dynamic_configs/test_redis_cached_store.py
new file mode 100644
index 00000000000..0da4077086f
--- /dev/null
+++ b/backend/tests/unit/danswer/dynamic_configs/test_redis_cached_store.py
@@ -0,0 +1,353 @@
+"""Unit tests for ``RedisCachedDynamicConfigStore`` — the read-through /
+write-through Redis cache wrapper around any ``DynamicConfigStore``.
+
+The behaviour we lock down here is the contract the rest of the app
+relies on:
+
+  1. **Read-through:** first ``load`` reads the inner store and
+     repopulates Redis; subsequent ``load``s hit Redis only and never
+     touch the inner store.
+  2. **Write-through:** ``store`` writes the inner store first, then
+     refreshes Redis with the new value + TTL so other replicas see
+     the change without waiting for the TTL to expire.
+  3. **Delete invalidates:** ``delete`` removes the inner row and clears
+     Redis. Inner store is removed first — a Redis success that arrives
+     before an inner failure must not leave Redis caching a value the
+     source of truth no longer has.
+  4. **Fail-open:** any ``RedisError`` is logged and silently swallowed.
+     ``GET`` failures become misses; ``SET``/``DEL`` failures don't
+     bubble up. The point is that a Redis outage degrades latency, not
+     availability.
+  5. **Encrypted values are never cached plaintext.** ``store(..., encrypt=True)``
+     invalidates the Redis entry rather than writing plaintext into it,
+     so the encryption-at-rest guarantee isn't silently bypassed.
+  6. **Cache miss vs None:** the wrapper distinguishes "Redis returned
+     ``nil``" (miss) from "Redis returned the JSON literal ``null``"
+     (cached None value). Both look like Python ``None`` if you're
+     careless; we test that a cached ``None`` is served from Redis
+     without re-hitting the inner store.
+
+The inner store and the Redis client are mocks — no real Postgres or
+Redis required.
+"""
+from __future__ import annotations
+
+import json
+import unittest
+from typing import Any
+from unittest.mock import MagicMock
+
+from redis import RedisError
+
+from danswer.dynamic_configs.interface import ConfigNotFoundError
+from danswer.dynamic_configs.store import RedisCachedDynamicConfigStore
+
+
+# We can't import the real prefix without importing redis_pool, which
+# imports app_configs and is fine — but expressing it here documents
+# the on-disk key shape we expect.
+_EXPECTED_PREFIX = "danswer:kv:"
+
+
+def _make_inner() -> MagicMock:
+    """Inner DynamicConfigStore mock with the methods we exercise."""
+    inner = MagicMock()
+    inner.store = MagicMock()
+    inner.load = MagicMock()
+    inner.delete = MagicMock()
+    return inner
+
+
+def _make_redis() -> MagicMock:
+    """In-memory fake Redis covering get/set/delete only.
+
+    Stores raw bytes the same way redis-py does, so JSON encode/decode
+    in the wrapper actually runs.
+    """
+    storage: dict[str, bytes] = {}
+
+    fake = MagicMock()
+
+    def fake_get(key: str) -> bytes | None:
+        return storage.get(key)
+
+    def fake_set(key: str, value: Any, ex: int | None = None) -> bool:
+        if isinstance(value, str):
+            storage[key] = value.encode("utf-8")
+        elif isinstance(value, bytes):
+            storage[key] = value
+        else:
+            storage[key] = str(value).encode("utf-8")
+        # ``ex`` is observed via the mock for the TTL assertion below.
+        return True
+
+    def fake_delete(*keys: str) -> int:
+        removed = 0
+        for k in keys:
+            if k in storage:
+                del storage[k]
+                removed += 1
+        return removed
+
+    fake.get.side_effect = fake_get
+    fake.set.side_effect = fake_set
+    fake.delete.side_effect = fake_delete
+    fake._storage = storage  # expose for assertions
+    return fake
+
+
+class TestRedisCachedDynamicConfigStore(unittest.TestCase):
+    # ------------- read-through -------------
+
+    def test_load_miss_then_hit_only_one_inner_read(self) -> None:
+        """First ``load`` is a Redis miss → falls through to the inner
+        store and repopulates Redis. The second ``load`` must serve from
+        Redis alone — the inner store must NOT be touched again. This
+        is the central performance promise of the cache.
+        """
+        inner = _make_inner()
+        inner.load.return_value = {"feature_flag": True}
+        redis = _make_redis()
+        store = RedisCachedDynamicConfigStore(
+            inner=inner, ttl_seconds=60, client_factory=lambda: redis
+        )
+
+        first = store.load("settings")
+        second = store.load("settings")
+
+        self.assertEqual(first, {"feature_flag": True})
+        self.assertEqual(second, {"feature_flag": True})
+        self.assertEqual(
+            inner.load.call_count,
+            1,
+            "second load must come from Redis, not the inner store",
+        )
+
+    def test_load_populates_redis_with_ttl(self) -> None:
+        """On a miss, the wrapper must SET into Redis with an expiry —
+        otherwise the cache would never evict and a value written by
+        another pod would be served stale forever.
+        """
+        inner = _make_inner()
+        inner.load.return_value = {"a": 1}
+        redis = _make_redis()
+        store = RedisCachedDynamicConfigStore(
+            inner=inner, ttl_seconds=120, client_factory=lambda: redis
+        )
+
+        store.load("k1")
+
+        redis.set.assert_called_once()
+        args, kwargs = redis.set.call_args
+        self.assertEqual(args[0], _EXPECTED_PREFIX + "k1")
+        # JSON-serialised payload, with the TTL kwarg matching the ctor.
+        self.assertEqual(json.loads(args[1]), {"a": 1})
+        self.assertEqual(kwargs.get("ex"), 120)
+
+    def test_load_propagates_not_found_without_caching_miss(self) -> None:
+        """If the inner store has nothing, the wrapper must raise
+        ``ConfigNotFoundError`` and NOT cache the absence — negative
+        caching has its own correctness gotchas (a later ``store`` would
+        race with the stale "missing" entry), and the plan deliberately
+        defers it.
+        """
+        inner = _make_inner()
+        inner.load.side_effect = ConfigNotFoundError
+        redis = _make_redis()
+        store = RedisCachedDynamicConfigStore(
+            inner=inner, ttl_seconds=60, client_factory=lambda: redis
+        )
+
+        with self.assertRaises(ConfigNotFoundError):
+            store.load("absent")
+        # No SET — we didn't cache the miss.
+        redis.set.assert_not_called()
+
+    def test_cached_none_is_distinguished_from_miss(self) -> None:
+        """``None`` is a legal stored value (the KV store can hold a
+        JSON ``null``). We must serve a cached ``null`` without falling
+        through to the inner store — otherwise every read of a None
+        value is effectively uncached.
+        """
+        inner = _make_inner()
+        inner.load.return_value = None
+        redis = _make_redis()
+        store = RedisCachedDynamicConfigStore(
+            inner=inner, ttl_seconds=60, client_factory=lambda: redis
+        )
+
+        first = store.load("nullable")  # miss → inner → cache
+        second = store.load("nullable")  # hit
+
+        self.assertIsNone(first)
+        self.assertIsNone(second)
+        self.assertEqual(
+            inner.load.call_count,
+            1,
+            "second load of a cached None must hit Redis, not the inner store",
+        )
+
+    # ------------- write-through / invalidation -------------
+
+    def test_store_writes_inner_then_refreshes_redis(self) -> None:
+        """``store`` must write the inner store first (source of truth),
+        then refresh Redis. Order matters: a Redis success after an
+        inner failure would leave Redis ahead of the source of truth.
+        """
+        inner = _make_inner()
+        redis = _make_redis()
+        call_order: list[str] = []
+        inner.store.side_effect = lambda *a, **kw: call_order.append("inner")
+        # Wrap the existing side_effect to record set ordering.
+        original_set = redis.set.side_effect
+
+        def recording_set(*a: Any, **kw: Any) -> Any:
+            call_order.append("redis")
+            return original_set(*a, **kw)
+
+        redis.set.side_effect = recording_set
+
+        store = RedisCachedDynamicConfigStore(
+            inner=inner, ttl_seconds=30, client_factory=lambda: redis
+        )
+        store.store("settings", {"v": 7})
+
+        inner.store.assert_called_once_with("settings", {"v": 7}, encrypt=False)
+        self.assertEqual(
+            call_order, ["inner", "redis"], "inner store must be written first"
+        )
+        # Subsequent load returns the new value from Redis only.
+        inner.load.reset_mock()
+        result = store.load("settings")
+        self.assertEqual(result, {"v": 7})
+        inner.load.assert_not_called()
+
+    def test_encrypted_store_invalidates_redis(self) -> None:
+        """``encrypt=True`` means "Postgres holds this encrypted." We
+        must NOT mirror plaintext into Redis (which has no encryption
+        guarantee), and we must invalidate any prior plaintext entry
+        in case the value was just switched to encrypted.
+        """
+        inner = _make_inner()
+        redis = _make_redis()
+        # Pre-seed a stale plaintext entry to confirm it gets cleared.
+        redis._storage[_EXPECTED_PREFIX + "secret"] = b'"old"'
+
+        store = RedisCachedDynamicConfigStore(
+            inner=inner, ttl_seconds=60, client_factory=lambda: redis
+        )
+        store.store("secret", "new-value", encrypt=True)
+
+        inner.store.assert_called_once_with("secret", "new-value", encrypt=True)
+        redis.set.assert_not_called()  # no plaintext mirror
+        redis.delete.assert_called_once_with(_EXPECTED_PREFIX + "secret")
+        self.assertNotIn(_EXPECTED_PREFIX + "secret", redis._storage)
+
+    def test_delete_clears_inner_and_redis(self) -> None:
+        """``delete`` clears both layers. Inner first — same ordering
+        invariant as ``store``: Redis must never be cleaner than the
+        source of truth.
+        """
+        inner = _make_inner()
+        redis = _make_redis()
+        redis._storage[_EXPECTED_PREFIX + "k"] = b'{"x":1}'
+        call_order: list[str] = []
+        inner.delete.side_effect = lambda *a, **kw: call_order.append("inner")
+        original_delete = redis.delete.side_effect
+
+        def recording_delete(*a: Any, **kw: Any) -> Any:
+            call_order.append("redis")
+            return original_delete(*a, **kw)
+
+        redis.delete.side_effect = recording_delete
+
+        store = RedisCachedDynamicConfigStore(
+            inner=inner, ttl_seconds=60, client_factory=lambda: redis
+        )
+        store.delete("k")
+
+        inner.delete.assert_called_once_with("k")
+        self.assertEqual(call_order, ["inner", "redis"])
+        self.assertNotIn(_EXPECTED_PREFIX + "k", redis._storage)
+
+    # ------------- fail-open behaviour -------------
+
+    def test_redis_get_error_falls_through_to_inner(self) -> None:
+        """Redis ``GET`` exploding (timeout, conn refused, network
+        partition) must NOT propagate. The wrapper degrades to a plain
+        read against the inner store so a Redis outage costs latency,
+        not availability.
+        """
+        inner = _make_inner()
+        inner.load.return_value = "from-postgres"
+        redis = MagicMock()
+        redis.get.side_effect = RedisError("connection refused")
+
+        store = RedisCachedDynamicConfigStore(
+            inner=inner, ttl_seconds=60, client_factory=lambda: redis
+        )
+        result = store.load("k")
+
+        self.assertEqual(result, "from-postgres")
+        inner.load.assert_called_once_with("k")
+
+    def test_redis_set_error_does_not_break_store(self) -> None:
+        """SET failing must not bubble out of ``store`` — the inner
+        write already succeeded, returning an error to the caller would
+        lie about the durability of the write.
+        """
+        inner = _make_inner()
+        redis = MagicMock()
+        redis.set.side_effect = RedisError("OOM")
+
+        store = RedisCachedDynamicConfigStore(
+            inner=inner, ttl_seconds=60, client_factory=lambda: redis
+        )
+        # Must not raise.
+        store.store("k", {"v": 1})
+        inner.store.assert_called_once()
+
+    def test_corrupt_cache_entry_treated_as_miss(self) -> None:
+        """If something else wrote non-JSON bytes under our key (legacy
+        format, manual ``SET``, race during a schema change), the next
+        read must not crash — it must fall through to the inner store
+        and overwrite the corrupt entry on the next SET.
+        """
+        inner = _make_inner()
+        inner.load.return_value = "ok"
+        redis = _make_redis()
+        redis._storage[_EXPECTED_PREFIX + "k"] = b"not-json-at-all"
+
+        store = RedisCachedDynamicConfigStore(
+            inner=inner, ttl_seconds=60, client_factory=lambda: redis
+        )
+        result = store.load("k")
+
+        self.assertEqual(result, "ok")
+        inner.load.assert_called_once_with("k")
+        # Wrapper repopulated Redis with the good value.
+        self.assertEqual(json.loads(redis._storage[_EXPECTED_PREFIX + "k"]), "ok")
+
+    def test_non_json_serialisable_value_skips_cache_but_inner_still_written(
+        self,
+    ) -> None:
+        """If a caller hands us a Python object json can't serialise
+        (sets, complex numbers, etc.), the inner store still gets it —
+        Redis just silently skips the cache write. The inner is the
+        source of truth; the cache is best-effort.
+        """
+        inner = _make_inner()
+        redis = _make_redis()
+        store = RedisCachedDynamicConfigStore(
+            inner=inner, ttl_seconds=60, client_factory=lambda: redis
+        )
+
+        # set() is not JSON-serialisable.
+        store.store("k", {1, 2, 3})  # type: ignore[arg-type]
+
+        inner.store.assert_called_once()
+        redis.set.assert_not_called()
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/backend/tests/unit/danswer/indexing/__init__.py b/backend/tests/unit/danswer/indexing/__init__.py
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/backend/tests/unit/danswer/indexing/test_get_doc_ids_to_update.py b/backend/tests/unit/danswer/indexing/test_get_doc_ids_to_update.py
new file mode 100644
index 00000000000..5556285018f
--- /dev/null
+++ b/backend/tests/unit/danswer/indexing/test_get_doc_ids_to_update.py
@@ -0,0 +1,73 @@
+"""Unit tests for get_doc_ids_to_update — the content-hash + timestamp skip
+logic that decides which documents actually need (re)indexing.
+
+Covers the new content-hash skip (so timestamp churn like Salesforce's
+LastModifiedDate doesn't force a full re-index) AND the backward-compatible
+fallback to the original doc_updated_at behavior for rows with no stored hash.
+"""
+from datetime import datetime
+from datetime import timezone
+from types import SimpleNamespace
+
+from danswer.configs.constants import DocumentSource
+from danswer.connectors.models import Document
+from danswer.connectors.models import Section
+from danswer.indexing.indexing_pipeline import get_doc_ids_to_update
+
+
+OLD = datetime(2024, 1, 1, tzinfo=timezone.utc)
+NEW = datetime(2024, 6, 1, tzinfo=timezone.utc)
+
+
+def _doc(doc_id: str, text: str, updated_at: datetime | None = NEW) -> Document:
+    return Document(
+        id=doc_id,
+        sections=[Section(text=text, link=None)],
+        source=DocumentSource.SALESFORCE,
+        semantic_identifier=doc_id,
+        metadata={},
+        doc_updated_at=updated_at,
+    )
+
+
+def _db_doc(doc_id: str, content_hash: str | None, updated_at: datetime | None):
+    # get_doc_ids_to_update only reads .id, .indexed_content_hash, .doc_updated_at
+    return SimpleNamespace(
+        id=doc_id, indexed_content_hash=content_hash, doc_updated_at=updated_at
+    )
+
+
+def _ids(docs: list[Document]) -> set[str]:
+    return {d.id for d in docs}
+
+
+def test_new_document_is_updatable() -> None:
+    doc = _doc("a", "hello")
+    assert _ids(get_doc_ids_to_update([doc], db_docs=[])) == {"a"}
+
+
+def test_unchanged_content_is_skipped_even_when_timestamp_advances() -> None:
+    # The Salesforce case: LastModifiedDate moved forward but content is identical.
+    doc = _doc("a", "hello", updated_at=NEW)
+    db = _db_doc("a", content_hash=doc.get_content_hash(), updated_at=OLD)
+    assert get_doc_ids_to_update([doc], db_docs=[db]) == []
+
+
+def test_changed_content_is_updatable() -> None:
+    doc = _doc("a", "new text", updated_at=NEW)
+    stale_hash = _doc("a", "old text").get_content_hash()
+    db = _db_doc("a", content_hash=stale_hash, updated_at=OLD)
+    assert _ids(get_doc_ids_to_update([doc], db_docs=[db])) == {"a"}
+
+
+def test_backcompat_null_hash_skips_when_not_newer() -> None:
+    # Pre-existing row (no stored hash): original updated_at behavior applies.
+    doc = _doc("a", "hello", updated_at=OLD)
+    db = _db_doc("a", content_hash=None, updated_at=NEW)
+    assert get_doc_ids_to_update([doc], db_docs=[db]) == []
+
+
+def test_backcompat_null_hash_updates_when_newer() -> None:
+    doc = _doc("a", "hello", updated_at=NEW)
+    db = _db_doc("a", content_hash=None, updated_at=OLD)
+    assert _ids(get_doc_ids_to_update([doc], db_docs=[db])) == {"a"}
diff --git a/backend/tests/unit/danswer/redis_layer/__init__.py b/backend/tests/unit/danswer/redis_layer/__init__.py
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/backend/tests/unit/danswer/redis_layer/test_redis_pool.py b/backend/tests/unit/danswer/redis_layer/test_redis_pool.py
new file mode 100644
index 00000000000..a50e8d8bd7b
--- /dev/null
+++ b/backend/tests/unit/danswer/redis_layer/test_redis_pool.py
@@ -0,0 +1,87 @@
+"""Unit tests for ``danswer.redis.redis_pool``.
+
+We exercise only what can be verified without a real Redis server:
+
+  1. The pool is a process-wide singleton — calling ``get_redis_client``
+     repeatedly does not build a new ``ConnectionPool`` each time.
+  2. ``reset_pool_for_tests`` forces the next ``get_redis_client`` to
+     rebuild — important so other tests can swap env vars and observe
+     the change.
+  3. The global key prefix is the documented value. If this ever
+     changes silently it would orphan every cached entry in production
+     on the next deploy; lock it down with a string equality assertion.
+
+Live socket-level behaviour (pool sizing, TCP timeouts, SSL handshake)
+is intentionally out of scope here — those need an integration test
+against a real Redis.
+"""
+from __future__ import annotations
+
+import unittest
+from unittest.mock import patch
+
+from danswer.redis import redis_pool
+
+
+class TestRedisPool(unittest.TestCase):
+    def setUp(self) -> None:
+        # Each test starts with a fresh, unbuilt pool so the singleton
+        # state from earlier tests can't bleed in.
+        redis_pool.reset_pool_for_tests()
+
+    def tearDown(self) -> None:
+        redis_pool.reset_pool_for_tests()
+
+    def test_prefix_is_stable(self) -> None:
+        """The on-the-wire key prefix is part of the persistence
+        contract — every cached entry in production starts with it.
+        Renaming it requires intentional migration, not a drive-by edit.
+        """
+        self.assertEqual(redis_pool.DANSWER_REDIS_KEY_PREFIX, "danswer:")
+
+    def test_pool_built_lazily_and_reused(self) -> None:
+        """``get_redis_client`` must build the pool on first use and
+        reuse it after. We assert this by counting calls to the pool
+        constructor under a patch.
+        """
+        with patch.object(
+            redis_pool, "ConnectionPool", wraps=redis_pool.ConnectionPool
+        ) as mock_pool:
+            client_a = redis_pool.get_redis_client()
+            client_b = redis_pool.get_redis_client()
+            client_c = redis_pool.get_redis_client()
+
+        self.assertEqual(
+            mock_pool.call_count,
+            1,
+            "ConnectionPool should be constructed exactly once across "
+            "repeated get_redis_client() calls",
+        )
+        # Different Redis() instances are fine — they share the pool.
+        self.assertIs(
+            client_a.connection_pool,
+            client_b.connection_pool,
+            "all clients must share the singleton pool",
+        )
+        self.assertIs(client_b.connection_pool, client_c.connection_pool)
+
+    def test_reset_for_tests_drops_singleton(self) -> None:
+        """After ``reset_pool_for_tests`` the next ``get_redis_client``
+        must rebuild — otherwise tests can't observe config changes.
+        """
+        with patch.object(
+            redis_pool, "ConnectionPool", wraps=redis_pool.ConnectionPool
+        ) as mock_pool:
+            redis_pool.get_redis_client()
+            redis_pool.reset_pool_for_tests()
+            redis_pool.get_redis_client()
+
+        self.assertEqual(
+            mock_pool.call_count,
+            2,
+            "reset_pool_for_tests should force the next call to rebuild",
+        )
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/backend/tests/unit/danswer/server/__init__.py b/backend/tests/unit/danswer/server/__init__.py
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/backend/tests/unit/danswer/server/middleware/__init__.py b/backend/tests/unit/danswer/server/middleware/__init__.py
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/backend/tests/unit/danswer/server/middleware/test_request_rate_limit.py b/backend/tests/unit/danswer/server/middleware/test_request_rate_limit.py
new file mode 100644
index 00000000000..95c06699b78
--- /dev/null
+++ b/backend/tests/unit/danswer/server/middleware/test_request_rate_limit.py
@@ -0,0 +1,344 @@
+"""Unit tests for the Redis-backed per-user request rate limiter.
+
+What we lock down here is the contract a chat endpoint relies on when it
+attaches ``Depends(check_message_request_rate_limit)``:
+
+  1. **Default off:** with the feature flag down OR both window limits
+     at 0, the dependency must short-circuit before touching Redis.
+     This matters because the dependency is mounted on the hot path of
+     every chat message — any cost in the off case is paid on every
+     request forever.
+  2. **Per-window enforcement:** the Nth request through the same
+     bucket exceeds the cap and 429s; the same caller in the next
+     bucket gets a fresh window.
+  3. **Per-user isolation:** two distinct users must not share counters
+     even if their requests interleave in the same bucket.
+  4. **Anonymous keying by IP:** unauth'd callers are bucketed by
+     X-Forwarded-For first hop (matching the ingress shape), falling
+     back to the socket peer; otherwise the dependency skips.
+  5. **EXPIRE NX semantics:** the first ``INCR`` of a bucket sets the
+     TTL; subsequent ``INCR`` calls must NOT extend it (a sliding TTL
+     would make the bucket never reset and effectively cap *forever*
+     after the first burst).
+  6. **Fail-open:** any Redis error allows the request through. The
+     limiter is protection, not authorization — a Redis blip is not a
+     reason to wedge the chat path.
+  7. **Retry-After header:** a 429 carries seconds-until-bucket-rollover
+     so well-behaved clients can back off precisely.
+
+Redis is mocked at the ``get_redis_client`` boundary; the FastAPI
+``Request`` and ``User`` are dummy objects. No HTTP layer, no real
+Redis — pure dependency-function tests.
+"""
+from __future__ import annotations
+
+import unittest
+import uuid
+from typing import Any
+from unittest.mock import MagicMock
+from unittest.mock import patch
+
+from fastapi import HTTPException
+
+from danswer.server.middleware import request_rate_limit as rrl
+
+
+# ---------- shared fakes ----------
+
+
+class _FakePipeline:
+    """Minimal stand-in for redis.client.Pipeline.
+
+    We only need .incr, .expire, .execute — that's the full surface
+    used in _enforce_window. We also remember every .expire(..., nx=)
+    call so the NX-semantics test can inspect it.
+    """
+
+    def __init__(self, storage: dict[str, int], expiry: dict[str, bool]) -> None:
+        self._storage = storage
+        self._expiry = expiry
+        self._ops: list[tuple[str, Any, Any]] = []
+
+    def incr(self, key: str, amount: int = 1) -> "_FakePipeline":
+        self._ops.append(("incr", key, amount))
+        return self
+
+    def expire(self, key: str, seconds: int, nx: bool = False) -> "_FakePipeline":
+        self._ops.append(("expire", key, (seconds, nx)))
+        return self
+
+    def execute(self) -> list[Any]:
+        results: list[Any] = []
+        for op, key, arg in self._ops:
+            if op == "incr":
+                self._storage[key] = self._storage.get(key, 0) + int(arg)
+                results.append(self._storage[key])
+            elif op == "expire":
+                seconds, nx = arg
+                if nx and self._expiry.get(key):
+                    results.append(False)  # already has TTL — refused
+                else:
+                    self._expiry[key] = True
+                    results.append(True)
+        self._ops.clear()
+        return results
+
+
+class _FakeRedis:
+    """Fake Redis client exposing only the methods the limiter uses."""
+
+    def __init__(self) -> None:
+        self._counters: dict[str, int] = {}
+        self._has_expiry: dict[str, bool] = {}
+        self.expire_calls: list[tuple[str, int, bool]] = []
+
+    def pipeline(self) -> _FakePipeline:
+        pipe = _FakePipeline(self._counters, self._has_expiry)
+        # Wrap pipe.expire to record every call for inspection.
+        original_expire = pipe.expire
+
+        def recording_expire(key: str, seconds: int, nx: bool = False) -> Any:
+            self.expire_calls.append((key, seconds, nx))
+            return original_expire(key, seconds, nx=nx)
+
+        pipe.expire = recording_expire  # type: ignore[method-assign]
+        return pipe
+
+
+def _make_request(
+    headers: dict[str, str] | None = None, peer_host: str | None = None
+) -> MagicMock:
+    """Minimal Starlette Request stand-in."""
+    req = MagicMock()
+    req.headers = headers or {}
+    req.client = MagicMock(host=peer_host) if peer_host is not None else None
+    return req
+
+
+def _make_user(uid: uuid.UUID | None = None) -> MagicMock:
+    user = MagicMock()
+    user.id = uid or uuid.uuid4()
+    return user
+
+
+# ---------- tests ----------
+
+
+class TestRequestRateLimitDisabled(unittest.TestCase):
+    """When disabled, the dependency must do nothing — not even
+    construct a Redis client. The hot path can't afford ambient cost
+    that callers thought they'd avoided by turning the flag off.
+    """
+
+    def test_flag_off_short_circuits_before_redis(self) -> None:
+        request = _make_request()
+        user = _make_user()
+        with patch.object(rrl, "REQUEST_RATE_LIMIT_ENABLED", False), patch.object(
+            rrl, "get_redis_client"
+        ) as mock_client:
+            rrl.check_message_request_rate_limit(request=request, user=user)
+            mock_client.assert_not_called()
+
+    def test_both_windows_zero_short_circuits_before_redis(self) -> None:
+        """Flag on but no limits configured = nothing to enforce. The
+        operator probably enabled the flag and hasn't picked numbers
+        yet; we must not pay the Redis round-trip in that interim
+        state.
+        """
+        request = _make_request()
+        user = _make_user()
+        with patch.object(rrl, "REQUEST_RATE_LIMIT_ENABLED", True), patch.object(
+            rrl, "REQUEST_RATE_LIMIT_PER_MINUTE", 0
+        ), patch.object(rrl, "REQUEST_RATE_LIMIT_PER_HOUR", 0), patch.object(
+            rrl, "get_redis_client"
+        ) as mock_client:
+            rrl.check_message_request_rate_limit(request=request, user=user)
+            mock_client.assert_not_called()
+
+
+class TestRequestRateLimitEnforcement(unittest.TestCase):
+    def _patch_enabled(self, per_min: int = 0, per_hour: int = 0) -> Any:
+        """Helper: turn the limiter on with the given window caps."""
+        return _MultiPatch(
+            (rrl, "REQUEST_RATE_LIMIT_ENABLED", True),
+            (rrl, "REQUEST_RATE_LIMIT_PER_MINUTE", per_min),
+            (rrl, "REQUEST_RATE_LIMIT_PER_HOUR", per_hour),
+        )
+
+    def test_within_limit_allows_request(self) -> None:
+        """Under the cap = no 429. Sanity, but also makes sure the
+        ``count > limit`` boundary is strict (the Nth allowed request
+        is the *limit*-th, not limit-minus-one).
+        """
+        fake = _FakeRedis()
+        request = _make_request()
+        user = _make_user()
+        with self._patch_enabled(per_min=3), patch.object(
+            rrl, "get_redis_client", return_value=fake
+        ):
+            for _ in range(3):
+                rrl.check_message_request_rate_limit(request=request, user=user)
+            # No exception raised — all three under the cap of 3.
+
+    def test_request_above_cap_raises_429_with_retry_after(self) -> None:
+        """The (limit+1)-th call in a bucket must 429, and the response
+        must carry Retry-After. Clients without Retry-After back off
+        with guesswork; we should hand them the exact answer.
+        """
+        fake = _FakeRedis()
+        request = _make_request()
+        user = _make_user()
+        with self._patch_enabled(per_min=2), patch.object(
+            rrl, "get_redis_client", return_value=fake
+        ):
+            rrl.check_message_request_rate_limit(request=request, user=user)
+            rrl.check_message_request_rate_limit(request=request, user=user)
+            with self.assertRaises(HTTPException) as ctx:
+                rrl.check_message_request_rate_limit(request=request, user=user)
+        self.assertEqual(ctx.exception.status_code, 429)
+        retry_after = ctx.exception.headers and ctx.exception.headers.get("Retry-After")
+        self.assertIsNotNone(retry_after)
+        self.assertTrue(retry_after.isdigit())  # type: ignore[union-attr]
+        # 0 < retry_after <= window. (Equal to window iff time landed
+        # exactly on the boundary — possible but rare, allow it.)
+        self.assertGreaterEqual(int(retry_after), 0)  # type: ignore[arg-type]
+        self.assertLessEqual(int(retry_after), 60)  # type: ignore[arg-type]
+
+    def test_two_users_have_independent_counters(self) -> None:
+        """Distinct user UUIDs must NOT share a bucket. If they did, a
+        loud user could 429 a quiet one.
+        """
+        fake = _FakeRedis()
+        request = _make_request()
+        alice = _make_user()
+        bob = _make_user()
+        with self._patch_enabled(per_min=1), patch.object(
+            rrl, "get_redis_client", return_value=fake
+        ):
+            rrl.check_message_request_rate_limit(request=request, user=alice)
+            # Bob's first request must succeed even though Alice already
+            # used her one allowed call in this bucket.
+            rrl.check_message_request_rate_limit(request=request, user=bob)
+            # Alice's second request hits her cap — should 429.
+            with self.assertRaises(HTTPException) as ctx:
+                rrl.check_message_request_rate_limit(request=request, user=alice)
+        self.assertEqual(ctx.exception.status_code, 429)
+
+    def test_next_bucket_resets_count(self) -> None:
+        """When time advances past the window boundary, the bucket key
+        changes (it's keyed by ``floor(time / window)``) and the new
+        bucket starts at 0. Without this, the limit is forever rather
+        than per-window.
+        """
+        fake = _FakeRedis()
+        request = _make_request()
+        user = _make_user()
+        with self._patch_enabled(per_min=1), patch.object(
+            rrl, "get_redis_client", return_value=fake
+        ):
+            with patch.object(rrl.time, "time", return_value=1_000_000.0):
+                rrl.check_message_request_rate_limit(request=request, user=user)
+                # Same bucket -> over cap.
+                with self.assertRaises(HTTPException):
+                    rrl.check_message_request_rate_limit(request=request, user=user)
+            # Jump 90s — new minute bucket.
+            with patch.object(rrl.time, "time", return_value=1_000_000.0 + 90):
+                rrl.check_message_request_rate_limit(request=request, user=user)
+
+    def test_expire_uses_nx_so_ttl_is_set_only_once(self) -> None:
+        """Every ``INCR`` is paired with ``EXPIRE`` — but if NX weren't
+        set, each increment would push the expiry forward and the
+        bucket would never roll over. Lock down ``nx=True`` so a future
+        refactor doesn't accidentally make every limited window become
+        a permanent ban after the first burst.
+        """
+        fake = _FakeRedis()
+        request = _make_request()
+        user = _make_user()
+        with self._patch_enabled(per_min=10), patch.object(
+            rrl, "get_redis_client", return_value=fake
+        ):
+            for _ in range(3):
+                rrl.check_message_request_rate_limit(request=request, user=user)
+        # All EXPIRE calls used nx=True. (At least one happened.)
+        self.assertGreater(len(fake.expire_calls), 0)
+        for _key, _seconds, nx in fake.expire_calls:
+            self.assertTrue(
+                nx, "EXPIRE must use NX so TTL isn't extended on every INCR"
+            )
+
+    def test_anonymous_user_keyed_by_xff_first_hop(self) -> None:
+        """Anonymous traffic keys on the first XFF hop (the real client
+        IP behind nginx), not on the LB's own peer address. Otherwise
+        every anonymous request would share one bucket.
+        """
+        fake = _FakeRedis()
+        # Two distinct anonymous IPs in XFF.
+        req_a = _make_request(headers={"x-forwarded-for": "10.1.1.1, 10.0.0.1"})
+        req_b = _make_request(headers={"x-forwarded-for": "10.1.1.2, 10.0.0.1"})
+        with self._patch_enabled(per_min=1), patch.object(
+            rrl, "get_redis_client", return_value=fake
+        ):
+            rrl.check_message_request_rate_limit(request=req_a, user=None)
+            # Different XFF first hop => different bucket, allowed.
+            rrl.check_message_request_rate_limit(request=req_b, user=None)
+            # Same XFF as req_a => second hit, exceeds cap.
+            with self.assertRaises(HTTPException):
+                rrl.check_message_request_rate_limit(request=req_a, user=None)
+
+    def test_anonymous_with_no_ip_skips_silently(self) -> None:
+        """If neither XFF nor a client peer is present, we have nothing
+        to attribute the request to. Skipping is the only honest
+        option — bucketing everyone under "" would silently flatten
+        every anonymous client into one counter.
+        """
+        fake = _FakeRedis()
+        request = _make_request(headers={}, peer_host=None)
+        with self._patch_enabled(per_min=1), patch.object(
+            rrl, "get_redis_client", return_value=fake
+        ) as mock_client:
+            # Call twice — both must pass; the limiter must not even
+            # have constructed a key to enforce against.
+            rrl.check_message_request_rate_limit(request=request, user=None)
+            rrl.check_message_request_rate_limit(request=request, user=None)
+            mock_client.assert_not_called()
+
+    def test_redis_error_fails_open(self) -> None:
+        """A pipeline that explodes (timeout, broken connection,
+        whatever) must NOT raise out of the dependency. The chat path
+        keeps serving — a request slipped past the limiter is better
+        than a chat outage caused by the limiter itself.
+        """
+        bad_client = MagicMock()
+        bad_client.pipeline.side_effect = RuntimeError("redis exploded")
+        request = _make_request()
+        user = _make_user()
+        with self._patch_enabled(per_min=1), patch.object(
+            rrl, "get_redis_client", return_value=bad_client
+        ):
+            # Two calls back-to-back — neither raises, because the
+            # limiter swallows the Redis error.
+            rrl.check_message_request_rate_limit(request=request, user=user)
+            rrl.check_message_request_rate_limit(request=request, user=user)
+
+
+class _MultiPatch:
+    """Context manager that applies several ``patch.object`` patches at
+    once. Used to make per-test "turn on the limiter with these
+    windows" blocks readable.
+    """
+
+    def __init__(self, *patches: tuple[Any, str, Any]) -> None:
+        self._patches = [patch.object(obj, attr, val) for obj, attr, val in patches]
+
+    def __enter__(self) -> None:
+        for p in self._patches:
+            p.start()
+
+    def __exit__(self, *exc: Any) -> None:
+        for p in reversed(self._patches):
+            p.stop()
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/darwin-kubernetes/api_server-service-deployment.yaml b/darwin-kubernetes/api_server-service-deployment.yaml
deleted file mode 100644
index 2959e1409a2..00000000000
--- a/darwin-kubernetes/api_server-service-deployment.yaml
+++ /dev/null
@@ -1,86 +0,0 @@
-apiVersion: v1
-kind: Service
-metadata:
-  name: api-server-service
-spec:
-  selector:
-    app: api-server
-  ports:
-    - name: api-server-port
-      protocol: TCP
-      port: 80
-      targetPort: 8080
-  type: ClusterIP
----
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: api-server-deployment
-spec:
-  replicas: 1
-  selector:
-    matchLabels:
-      app: api-server
-  template:
-    metadata:
-      labels:
-        app: api-server
-    spec:
-      containers:
-      - name: api-server
-        image: sfbrdevhelmweacr.azurecr.io/danswer/danswer-backend:vha-5
-        imagePullPolicy: IfNotPresent
-        command:
-          - "/bin/sh"
-          - "-c"
-          - |
-            #sleep 1000000 &&
-            alembic upgrade heads &&
-            echo "Starting Danswer Api Server" &&
-            uvicorn danswer.main:app --host 0.0.0.0 --port 8080
-        ports:
-        - containerPort: 8080
-        # There are some extra values since this is shared between services
-        # There are no conflicts though, extra env variables are simply ignored
-        env:
-        - name: POSTGRES_USER
-          valueFrom:
-            secretKeyRef:
-              name: danswer-secrets
-              key: postgres_user
-        - name: POSTGRES_PASSWORD
-          valueFrom:
-            secretKeyRef:
-              name: danswer-secrets
-              key: postgres_password
-        # --- Microsoft / Entra ID OIDC ---
-        - name: OAUTH_CLIENT_ID
-          valueFrom:
-            secretKeyRef:
-              name: danswer-secrets
-              key: oauth_client_id
-        - name: OAUTH_CLIENT_SECRET
-          valueFrom:
-            secretKeyRef:
-              name: danswer-secrets
-              key: oauth_client_secret
-        - name: USER_AUTH_SECRET
-          valueFrom:
-            secretKeyRef:
-              name: danswer-secrets
-              key: user_auth_secret
-        envFrom:
-        - configMapRef:
-            name: env-configmap
-        volumeMounts:
-        - name: dynamic-storage
-          mountPath: /home/storage
-        - name: file-connector-storage
-          mountPath: /home/file_connector_storage
-      volumes:
-      - name: dynamic-storage
-        persistentVolumeClaim:
-          claimName: dynamic-pvc
-      - name: file-connector-storage
-        persistentVolumeClaim:
-          claimName: file-connector-pvc
diff --git a/darwin-kubernetes/background-deployment.yaml b/darwin-kubernetes/background-deployment.yaml
deleted file mode 100644
index 538cf007bd4..00000000000
--- a/darwin-kubernetes/background-deployment.yaml
+++ /dev/null
@@ -1,63 +0,0 @@
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: background-deployment
-spec:
-  replicas: 1
-  strategy:
-    type: Recreate
-  selector:
-    matchLabels:
-      app: background
-  template:
-    metadata:
-      labels:
-        app: background
-    spec:
-      affinity:
-        nodeAffinity:
-          requiredDuringSchedulingIgnoredDuringExecution:
-            nodeSelectorTerms:
-            - matchExpressions:
-              - key: agentpool
-                operator: In
-                values:
-                - indexcpu
-      containers:
-      - name: background
-        image: sfbrdevhelmweacr.azurecr.io/danswer/danswer-backend:vha-5
-        imagePullPolicy: IfNotPresent
-        command: ["/usr/bin/supervisord"]
-        # There are some extra values since this is shared between services
-        # There are no conflicts though, extra env variables are simply ignored
-        env:
-        - name: POSTGRES_USER
-          valueFrom:
-            secretKeyRef:
-              key: postgres_user
-              name: danswer-secrets
-        - name: POSTGRES_PASSWORD
-          valueFrom:
-            secretKeyRef:
-              key: postgres_password
-              name: danswer-secrets
-        envFrom:
-        - configMapRef:
-            name: env-configmap
-        volumeMounts:
-        - name: dynamic-storage
-          mountPath: /home/storage
-        - name: file-connector-storage
-          mountPath: /home/file_connector_storage
-      tolerations:
-      - effect: NoSchedule
-        key: darwin
-        operator: Equal
-        value: indexing
-      volumes:
-      - name: dynamic-storage
-        persistentVolumeClaim:
-          claimName: dynamic-pvc
-      - name: file-connector-storage
-        persistentVolumeClaim:
-          claimName: file-connector-pvc
diff --git a/darwin-kubernetes/backup/persistent-volumes.yaml.bkp b/darwin-kubernetes/backup/persistent-volumes.yaml.bkp
deleted file mode 100644
index 700a6ebffd6..00000000000
--- a/darwin-kubernetes/backup/persistent-volumes.yaml.bkp
+++ /dev/null
@@ -1,21 +0,0 @@
-apiVersion: v1
-kind: PersistentVolumeClaim
-metadata:
-  name: dynamic-pvc
-spec:
-  accessModes:
-    - ReadWriteOnce
-  resources:
-    requests:
-      storage: 5Gi
----
-apiVersion: v1
-kind: PersistentVolumeClaim
-metadata:
-  name: file-connector-pvc
-spec:
-  accessModes:
-    - ReadWriteOnce
-  resources:
-    requests:
-      storage: 5Gi
diff --git a/darwin-kubernetes/backup/postgres-service-deployment.yaml.bkp b/darwin-kubernetes/backup/postgres-service-deployment.yaml.bkp
deleted file mode 100644
index f33efa2bafd..00000000000
--- a/darwin-kubernetes/backup/postgres-service-deployment.yaml.bkp
+++ /dev/null
@@ -1,57 +0,0 @@
-apiVersion: v1
-kind: Service
-metadata:
-  name: relational-db-service
-spec:
-  selector:
-    app: relational-db
-  ports:
-    - protocol: TCP
-      port: 5432
-      targetPort: 5432
-  clusterIP: None
----
-apiVersion: apps/v1
-kind: StatefulSet
-metadata:
-  name: relational-db-statefulset
-spec:
-  serviceName: relational-db-service
-  replicas: 1
-  selector:
-    matchLabels:
-      app: relational-db
-  template:
-    metadata:
-      labels:
-        app: relational-db
-    spec:
-      containers:
-      - name: relational-db
-        image: postgres:15.2-alpine
-        env:
-        - name: POSTGRES_USER
-          valueFrom:
-            secretKeyRef:
-              name: danswer-secrets
-              key: postgres_user
-        - name: POSTGRES_PASSWORD
-          valueFrom:
-            secretKeyRef:
-              name: danswer-secrets
-              key: postgres_password
-        ports:
-        - containerPort: 5432
-        volumeMounts:
-        - mountPath: /var/lib/postgresql/data
-          name: db-storage
-          subPath: postgres
-  volumeClaimTemplates:
-  - metadata:
-      name: db-storage
-    spec:
-      accessModes: ["ReadWriteOnce"]
-      resources:
-        requests:
-          # Adjust the storage request size as needed.
-          storage: 1Gi
diff --git a/darwin-kubernetes/backup/psql.yaml.bkp b/darwin-kubernetes/backup/psql.yaml.bkp
deleted file mode 100644
index f0c85154156..00000000000
--- a/darwin-kubernetes/backup/psql.yaml.bkp
+++ /dev/null
@@ -1,22 +0,0 @@
-apiVersion: v1
-kind: Pod
-metadata:
-  name: postgresql-client
-  labels:
-    app: postgresql-client
-  annotations:
-    cluster-autoscaler.kubernetes.io/safe-to-evict: "true"    
-spec:
-  securityContext:
-    runAsNonRoot: true
-    supplementalGroups: [ 10001] 
-    fsGroup: 10001    
-  containers:
-    - name: postgresql-client
-      image: andreswebs/postgresql-client
-      imagePullPolicy: Always
-      securityContext:
-        runAsUser: 1000      
-      stdin: true
-      tty: true
-      command: ["/bin/sh"]
diff --git a/darwin-kubernetes/backup/vespa-service-deployment.yaml b/darwin-kubernetes/backup/vespa-service-deployment.yaml
deleted file mode 100644
index 2841ea27545..00000000000
--- a/darwin-kubernetes/backup/vespa-service-deployment.yaml
+++ /dev/null
@@ -1,63 +0,0 @@
-apiVersion: v1
-kind: Service
-metadata:
-  name: document-index-service
-spec:
-  selector:
-    app: vespa
-  ports:
-    - name: vespa-tenant-port
-      protocol: TCP
-      port: 19071
-      targetPort: 19071
-    - name: vespa-port
-      protocol: TCP
-      port: 8081
-      targetPort: 8081
-  type: LoadBalancer
----
-apiVersion: apps/v1
-kind: StatefulSet
-metadata:
-  name: vespa
-  labels:
-    app: vespa
-spec:
-  replicas: 1
-  serviceName: vespa
-  selector:
-    matchLabels:
-      app: vespa
-  template:
-    metadata:
-      labels:
-        app: vespa
-    spec:
-      containers:
-      - name: vespa
-        image: vespaengine/vespa:8.277.17
-        imagePullPolicy: IfNotPresent
-        securityContext:
-          privileged: true
-          runAsUser: 0
-        ports:
-        - containerPort: 19071
-        - containerPort: 8081
-        readinessProbe:
-          httpGet:
-            path: /state/v1/health
-            port: 19071
-            scheme: HTTP
-        volumeMounts:
-        - name: vespa-storage
-          mountPath: /opt/vespa/var/
-  volumeClaimTemplates:
-  - metadata:
-      name: vespa-storage
-    spec:
-      accessModes:
-      - ReadWriteOnce
-      resources:
-        requests:
-          # Adjust the storage request size as needed.
-          storage: 200Gi
diff --git a/darwin-kubernetes/env-configmap.yaml b/darwin-kubernetes/env-configmap.yaml
deleted file mode 100644
index eb0cfb25312..00000000000
--- a/darwin-kubernetes/env-configmap.yaml
+++ /dev/null
@@ -1,96 +0,0 @@
-apiVersion: v1
-kind: ConfigMap
-metadata:
-  name: env-configmap
-data:
-  # Auth Setting, also check the secrets file
-  AUTH_TYPE: "oidc"  # Microsoft / Entra ID OIDC (oauth_client_id/secret + user_auth_secret in danswer-secrets)
-  # Entra OIDC discovery doc; tenant id is the path segment.
-  OPENID_CONFIG_URL: "https://login.microsoftonline.com/d8353d2a-b153-4d17-8827-902c51f72357/v2.0/.well-known/openid-configuration"
-  # Comma-separated emails granted ADMIN on first sign-in (replaces the old Istio admin allowlist).
-  DEFAULT_ADMIN_EMAILS: "user1@uipath.com,user2@uipath.com"
-  ENCRYPTION_KEY_SECRET: ""  # This should not be specified directly in the yaml, this is just for reference
-  SESSION_EXPIRE_TIME_SECONDS: "86400"  # 1 Day Default
-  VALID_EMAIL_DOMAINS: ""  # Can be something like danswer.ai, as an extra double-check
-  SMTP_SERVER: "" # For sending verification emails, if unspecified then defaults to 'smtp.gmail.com'
-  SMTP_PORT: "" # For sending verification emails, if unspecified then defaults to '587'
-  SMTP_USER: ""  # 'your-email@company.com'
-  SMTP_PASS: ""  # 'your-gmail-password'
-  EMAIL_FROM: ""  # 'your-email@company.com' SMTP_USER missing used instead
-  # Gen AI Settings
-  GEN_AI_MODEL_PROVIDER: "custom"
-  GEN_AI_API_ENDPOINT: "https://alpha.uipath.com/llmgateway_/openai/deployments/gpt-4o-mini-2024-07-18/chat/completions?api-version=2024-06-01"
-  GEN_AI_IDENTITY_ENDPOINT: "https://alpha.uipath.com/identity_/connect/token"
-  GEN_AI_CLIENT_ID: "XXX"
-  GEN_AI_CLIENT_SECRET: "XXX"
-  GEN_AI_MODEL_VERSION: ""
-  FAST_GEN_AI_MODEL_VERSION: ""
-  GEN_AI_API_KEY: ""
-  GEN_AI_API_VERSION: ""
-  GEN_AI_LLM_PROVIDER_TYPE: ""
-  GEN_AI_MAX_TOKENS: ""
-  QA_TIMEOUT: "60"
-  MAX_CHUNKS_FED_TO_CHAT: ""
-  DISABLE_LLM_FILTER_EXTRACTION: "true"
-  DISABLE_LLM_CHUNK_FILTER: "true"
-  DISABLE_LLM_CHOOSE_SEARCH: "true"
-  DISABLE_LLM_QUERY_REPHRASE: ""
-  # Query Options
-  DOC_TIME_DECAY: ""
-  HYBRID_ALPHA: ""
-  EDIT_KEYWORD_QUERY: ""
-  MULTILINGUAL_QUERY_EXPANSION: ""
-  QA_PROMPT_OVERRIDE: ""
-  # Other Services
-  POSTGRES_HOST: "darwin-postgres.postgres.database.azure.com"
-  VESPA_CONFIG_SERVER_HOST: "vespa-configserver"
-  VESPA_HOST: "vespa-query"
-  VESPA_PORT: "8080"
-  VESPA_FEED_HOST: "vespa-feed"
-  VESPA_FEED_PORT: "8080"
-  # Don't change the NLP models unless you know what you're doing
-  DOCUMENT_ENCODER_MODEL: ""
-  NORMALIZE_EMBEDDINGS: ""
-  ASYM_QUERY_PREFIX: ""
-  ASYM_PASSAGE_PREFIX: ""
-  ENABLE_RERANKING_REAL_TIME_FLOW: ""
-  ENABLE_RERANKING_ASYNC_FLOW: ""
-  MODEL_SERVER_HOST: "inference-model-server-service"
-  MODEL_SERVER_PORT: ""
-  INDEXING_MODEL_SERVER_HOST: "indexing-model-server-service"
-  MIN_THREADS_ML_MODELS: ""
-  # Indexing Configs
-  NUM_INDEXING_WORKERS: "4"
-  ENABLED_CONNECTOR_TYPES: ""
-  DISABLE_INDEX_UPDATE_ON_SWAP: ""
-  DASK_JOB_CLIENT_ENABLED: "true"
-  CONTINUE_ON_CONNECTOR_FAILURE: ""
-  EXPERIMENTAL_CHECKPOINTING_ENABLED: ""
-  CONFLUENCE_CONNECTOR_LABELS_TO_SKIP: ""
-  JIRA_API_VERSION: ""
-  WEB_CONNECTOR_VALIDATE_URLS: ""
-  GONG_CONNECTOR_START_TIME: ""
-  NOTION_CONNECTOR_ENABLE_RECURSIVE_PAGE_LOOKUP: ""
-  # DanswerBot SlackBot Configs
-  DANSWER_BOT_SLACK_APP_TOKEN: ""
-  DANSWER_BOT_SLACK_BOT_TOKEN: ""
-  DANSWER_BOT_DISABLE_DOCS_ONLY_ANSWER: ""
-  DANSWER_BOT_DISPLAY_ERROR_MSGS: ""
-  DANSWER_BOT_RESPOND_EVERY_CHANNEL: ""
-  DANSWER_BOT_DISABLE_COT: ""  # Currently unused
-  OPSGENIE_API_KEY: ""
-  NOTIFY_SLACKBOT_NO_ANSWER: ""
-  # Logging
-  # Optional Telemetry, please keep it on (nothing sensitive is collected)? <3
-  # https://docs.danswer.dev/more/telemetry
-  DISABLE_TELEMETRY: "true"
-  LOG_LEVEL: "info"
-  LOG_ALL_MODEL_INTERACTIONS: ""
-  LOG_VESPA_TIMING_INFORMATION: ""
-  # Shared or Non-backend Related
-  INTERNAL_URL: "http://api-server-service:80"  # for web server
-  # MUST be the externally-reachable https origin — builds the OIDC redirect_uri
-  # and makes the session cookie Secure. Mismatch => AADSTS50011 redirect error.
-  WEB_DOMAIN: "https://darwin.westeurope.cloudapp.azure.com"  # for web server and api server
-  DOMAIN: "darwin.westeurope.cloudapp.azure.com"  # for nginx
-  APPLY_MIGRATIONS: "true"
diff --git a/darwin-kubernetes/index_model_server-statefulset.yaml b/darwin-kubernetes/index_model_server-statefulset.yaml
deleted file mode 100644
index 2fc1154f29e..00000000000
--- a/darwin-kubernetes/index_model_server-statefulset.yaml
+++ /dev/null
@@ -1,71 +0,0 @@
-apiVersion: v1
-kind: Service
-metadata:
-  name: indexing-model-server-service
-spec:
-  selector:
-    app: indexing-model-server
-  ports:
-    - name: indexing-model-server-port
-      protocol: TCP
-      port: 9000
-      targetPort: 9000
-  type: ClusterIP
----
-apiVersion: apps/v1
-kind: StatefulSet
-metadata:
-  name: indexing-model-server-statefulset
-spec:
-  replicas: 2
-  selector:
-    matchLabels:
-      app: indexing-model-server
-      name: indexing-model-server
-  serviceName: indexing-model-server-service
-  template:
-    metadata:
-      labels:
-        app: indexing-model-server
-        name: indexing-model-server
-    spec:
-      affinity:
-        nodeAffinity:
-          requiredDuringSchedulingIgnoredDuringExecution:
-            nodeSelectorTerms:
-            - matchExpressions:
-              - key: agentpool
-                operator: In
-                values:
-                - indexcpu
-      containers:
-      - name: indexing-model-server
-        image: danswer/danswer-model-server:v0.3.94
-        imagePullPolicy: IfNotPresent
-        command: [ "uvicorn", "model_server.main:app", "--host", "0.0.0.0", "--port", "9000" ]
-        ports:
-        - containerPort: 9000
-        envFrom:
-        - configMapRef:
-            name: env-configmap
-        env:
-          - name: INDEXING_ONLY
-            value: "True"
-        volumeMounts:
-        - name: indexing-model-storage
-          mountPath: /root/.cache
-      tolerations:
-      - effect: NoSchedule
-        key: darwin
-        operator: Equal
-        value: indexing
-  volumeClaimTemplates:
-  - metadata:
-      name: indexing-model-storage
-    spec:
-      accessModes: [ "ReadWriteOnce" ]
-      resources:
-        requests:
-          storage: 10Gi
----
-
diff --git a/darwin-kubernetes/indexing_model_server-service-deployment.yaml b/darwin-kubernetes/indexing_model_server-service-deployment.yaml
deleted file mode 100644
index 8cb8e0fe1dd..00000000000
--- a/darwin-kubernetes/indexing_model_server-service-deployment.yaml
+++ /dev/null
@@ -1,59 +0,0 @@
-apiVersion: v1
-kind: Service
-metadata:
-  name: indexing-model-server-service
-spec:
-  selector:
-    app: indexing-model-server
-  ports:
-    - name: indexing-model-server-port
-      protocol: TCP
-      port: 9000
-      targetPort: 9000
-  type: ClusterIP
----
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: indexing-model-server-deployment
-spec:
-  replicas: 1
-  selector:
-    matchLabels:
-      app: indexing-model-server
-  template:
-    metadata:
-      labels:
-        app: indexing-model-server
-    spec:
-      containers:
-      - name: indexing-model-server
-        image: danswer/danswer-model-server:v0.3.94
-        imagePullPolicy: IfNotPresent
-        command: [ "uvicorn", "model_server.main:app", "--host", "0.0.0.0", "--port", "9000" ]
-        ports:
-        - containerPort: 9000
-        envFrom:
-        - configMapRef:
-            name: env-configmap
-        env:
-          - name: INDEXING_ONLY
-            value: "True"
-        volumeMounts:
-        - name: indexing-model-storage
-          mountPath: /root/.cache
-      volumes:
-      - name: indexing-model-storage
-        persistentVolumeClaim:
-          claimName: indexing-model-pvc
----
-apiVersion: v1
-kind: PersistentVolumeClaim
-metadata:
-  name: indexing-model-pvc
-spec:
-  accessModes:
-    - ReadWriteOnce
-  resources:
-    requests:
-      storage: 10Gi
diff --git a/darwin-kubernetes/indexing_model_server-service-deployment_gpu.yaml b/darwin-kubernetes/indexing_model_server-service-deployment_gpu.yaml
deleted file mode 100644
index 96bd9f04f69..00000000000
--- a/darwin-kubernetes/indexing_model_server-service-deployment_gpu.yaml
+++ /dev/null
@@ -1,69 +0,0 @@
-apiVersion: v1
-kind: Service
-metadata:
-  name: indexing-model-server-service
-spec:
-  selector:
-    app: indexing-model-server
-  ports:
-    - name: indexing-model-server-port
-      protocol: TCP
-      port: 9000
-      targetPort: 9000
-  type: ClusterIP
----
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: indexing-model-server-deployment
-spec:
-  replicas: 1
-  selector:
-    matchLabels:
-      app: indexing-model-server
-  template:
-    metadata:
-      labels:
-        app: indexing-model-server
-    spec:
-      containers:
-      - name: indexing-model-server
-        image: danswer/danswer-model-server:v0.3.94
-        imagePullPolicy: IfNotPresent
-        command: [ "uvicorn", "model_server.main:app", "--host", "0.0.0.0", "--port", "9000" ]
-        ports:
-        - containerPort: 9000
-        envFrom:
-        - configMapRef:
-            name: env-configmap
-        env:
-          - name: INDEXING_ONLY
-            value: "True"
-        volumeMounts:
-        - name: indexing-model-storage
-          mountPath: /root/.cache
-        resources:
-          requests:
-            nvidia.com/gpu: 1
-          limits:
-            nvidia.com/gpu: 1
-      volumes:
-      - name: indexing-model-storage
-        persistentVolumeClaim:
-          claimName: indexing-model-pvc
-      tolerations:
-      - effect: NoSchedule
-        key: sku
-        operator: Equal
-        value: gpu
----
-apiVersion: v1
-kind: PersistentVolumeClaim
-metadata:
-  name: indexing-model-pvc
-spec:
-  accessModes:
-    - ReadWriteOnce
-  resources:
-    requests:
-      storage: 10Gi
diff --git a/darwin-kubernetes/indexing_model_statefulset.yaml b/darwin-kubernetes/indexing_model_statefulset.yaml
deleted file mode 100644
index 78f9984d04f..00000000000
--- a/darwin-kubernetes/indexing_model_statefulset.yaml
+++ /dev/null
@@ -1,69 +0,0 @@
-apiVersion: apps/v1
-kind: StatefulSet
-metadata:
-  name: indexing-model-server-statefulset
-spec:
-  replicas: 2
-  selector:
-    matchLabels:
-      app: indexing-model-server
-      name: indexing-model-server
-  serviceName: indexing-model-server-service
-  template:
-    metadata:
-      labels:
-        app: indexing-model-server
-        name: indexing-model-server
-    spec:
-      affinity:
-        nodeAffinity:
-          requiredDuringSchedulingIgnoredDuringExecution:
-            nodeSelectorTerms:
-            - matchExpressions:
-              - key: agentpool
-                operator: In
-                values:
-                - indexcpu
-      containers:
-      - name: indexing-model-server
-        image: danswer/danswer-model-server:v0.3.94
-        imagePullPolicy: IfNotPresent
-        command: [ "uvicorn", "model_server.main:app", "--host", "0.0.0.0", "--port", "9000" ]
-        ports:
-        - containerPort: 9000
-        envFrom:
-        - configMapRef:
-            name: env-configmap
-        env:
-          - name: INDEXING_ONLY
-            value: "True"
-        volumeMounts:
-        - name: indexing-model-storage
-          mountPath: /root/.cache
-      tolerations:
-      - effect: NoSchedule
-        key: darwin
-        operator: Equal
-        value: indexing
-  volumeClaimTemplates:
-  - metadata:
-      name: indexing-model-storage
-    spec:
-      accessModes: [ "ReadWriteOnce" ]
-      resources:
-        requests:
-          storage: 10Gi
----
-apiVersion: v1
-kind: Service
-metadata:
-  name: indexing-model-server-service
-spec:
-  selector:
-    app: indexing-model-server
-  ports:
-    - name: indexing-model-server-port
-      protocol: TCP
-      port: 9000
-      targetPort: 9000
-  type: ClusterIP
diff --git a/darwin-kubernetes/inference_model_server-service-deployment.yaml b/darwin-kubernetes/inference_model_server-service-deployment.yaml
deleted file mode 100644
index db7788efca3..00000000000
--- a/darwin-kubernetes/inference_model_server-service-deployment.yaml
+++ /dev/null
@@ -1,70 +0,0 @@
-apiVersion: v1
-kind: Service
-metadata:
-  name: inference-model-server-service
-spec:
-  selector:
-    app: inference-model-server
-  ports:
-    - name: inference-model-server-port
-      protocol: TCP
-      port: 9000
-      targetPort: 9000
-  type: ClusterIP
----
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: inference-model-server-deployment
-spec:
-  replicas: 1
-  selector:
-    matchLabels:
-      app: inference-model-server
-  template:
-    metadata:
-      labels:
-        app: inference-model-server
-    spec:
-      affinity:
-        nodeAffinity:
-          requiredDuringSchedulingIgnoredDuringExecution:
-            nodeSelectorTerms:
-            - matchExpressions:
-              - key: agentpool
-                operator: In
-                values:
-                - indexcpu
-      containers:
-      - name: inference-model-server
-        image: danswer/danswer-model-server:v0.3.94
-        imagePullPolicy: IfNotPresent
-        command: [ "uvicorn", "model_server.main:app", "--host", "0.0.0.0", "--port", "9000" ]
-        ports:
-        - containerPort: 9000
-        envFrom:
-        - configMapRef:
-            name: env-configmap
-        volumeMounts:
-        - name: inference-model-storage
-          mountPath: /root/.cache
-      volumes:
-      - name: inference-model-storage
-        persistentVolumeClaim:
-          claimName: inference-model-pvc
-      tolerations:
-      - effect: NoSchedule
-        key: darwin
-        operator: Equal
-        value: indexing
----
-apiVersion: v1
-kind: PersistentVolumeClaim
-metadata:
-  name: inference-model-pvc
-spec:
-  accessModes:
-    - ReadWriteOnce
-  resources:
-    requests:
-      storage: 3Gi
diff --git a/darwin-kubernetes/inference_model_server-statefulset.yaml b/darwin-kubernetes/inference_model_server-statefulset.yaml
deleted file mode 100644
index 24b0ce72cf7..00000000000
--- a/darwin-kubernetes/inference_model_server-statefulset.yaml
+++ /dev/null
@@ -1,66 +0,0 @@
-apiVersion: v1
-kind: Service
-metadata:
-  name: inference-model-server-service
-spec:
-  selector:
-    app: inference-model-server
-  ports:
-    - name: inference-model-server-port
-      protocol: TCP
-      port: 9000
-      targetPort: 9000
-  type: ClusterIP
----
-apiVersion: apps/v1
-kind: StatefulSet
-metadata:
-  name: inference-model-server-deployment
-spec:
-  replicas: 1
-  selector:
-    matchLabels:
-      app: inference-model-server
-      name: inference-model-server
-  serviceName: inference-model-server-service
-  template:
-    metadata:
-      labels:
-        app: inference-model-server
-        name: inference-model-server
-    spec:
-      affinity:
-        nodeAffinity:
-          requiredDuringSchedulingIgnoredDuringExecution:
-            nodeSelectorTerms:
-            - matchExpressions:
-              - key: agentpool
-                operator: In
-                values:
-                - indexcpu
-      containers:
-      - name: inference-model-server
-        image: danswer/danswer-model-server:v0.3.94
-        imagePullPolicy: IfNotPresent
-        command: [ "uvicorn", "model_server.main:app", "--host", "0.0.0.0", "--port", "9000" ]
-        ports:
-        - containerPort: 9000
-        envFrom:
-        - configMapRef:
-            name: env-configmap
-        volumeMounts:
-        - name: inference-model-storage
-          mountPath: /root/.cache
-      tolerations:
-      - effect: NoSchedule
-        key: darwin
-        operator: Equal
-        value: indexing
-  volumeClaimTemplates:
-  - metadata:
-      name: inference-model-storage
-    spec:
-      accessModes: [ "ReadWriteOnce" ]
-      resources:
-        requests:
-          storage: 3Gi
diff --git a/darwin-kubernetes/nginx-configmap.yaml b/darwin-kubernetes/nginx-configmap.yaml
deleted file mode 100644
index 08b945d599c..00000000000
--- a/darwin-kubernetes/nginx-configmap.yaml
+++ /dev/null
@@ -1,44 +0,0 @@
-apiVersion: v1
-kind: ConfigMap
-metadata:
-  name: nginx-configmap
-data:
-  nginx.conf: |
-    upstream api_server {
-        server api-server-service:80 fail_timeout=0;
-    }
-
-    upstream web_server {
-        server web-server-service:80 fail_timeout=0;
-    }
-
-    server {
-        listen 80;
-        server_name $$DOMAIN;
-
-        client_max_body_size 5G;    # Maximum upload size
-
-        location ~ ^/api(.*)$ {
-            rewrite ^/api(/.*)$ $1 break;
-            proxy_set_header X-Real-IP $remote_addr;
-            proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
-            proxy_set_header X-Forwarded-Proto $scheme;
-            proxy_set_header X-Forwarded-Host $host;
-            proxy_set_header Host $host;
-            proxy_http_version 1.1;
-            proxy_buffering off;
-            proxy_redirect off;
-            proxy_pass http://api_server;
-        }
-
-        location / {
-            proxy_set_header X-Real-IP $remote_addr;
-            proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
-            proxy_set_header X-Forwarded-Proto $scheme;
-            proxy_set_header X-Forwarded-Host $host;
-            proxy_set_header Host $host;
-            proxy_http_version 1.1;
-            proxy_redirect off;
-            proxy_pass http://web_server;
-        }
-    }
diff --git a/darwin-kubernetes/nginx-service-deployment.yaml b/darwin-kubernetes/nginx-service-deployment.yaml
deleted file mode 100644
index 27b14794ee3..00000000000
--- a/darwin-kubernetes/nginx-service-deployment.yaml
+++ /dev/null
@@ -1,55 +0,0 @@
-apiVersion: v1
-kind: Service
-metadata:
-  name: nginx-service
-spec:
-  selector:
-    app: nginx
-  ports:
-    - name: http
-      protocol: TCP
-      port: 80
-      targetPort: 80
-    - name: danswer
-      protocol: TCP
-      port: 3000
-      targetPort: 80
-  type: LoadBalancer
----
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: nginx-deployment
-spec:
-  replicas: 1
-  selector:
-    matchLabels:
-      app: nginx
-  template:
-    metadata:
-      labels:
-        app: nginx
-    spec:
-      containers:
-      - name: nginx
-        image: nginx:1.23.4-alpine
-        ports:
-        - containerPort: 80
-        env:
-        - name: DOMAIN
-          valueFrom:
-            configMapKeyRef:
-              name: env-configmap
-              key: DOMAIN
-        volumeMounts:
-        - name: nginx-conf
-          mountPath: /etc/nginx/conf.d
-        command:
-        - /bin/sh
-        - -c
-        - |
-          while :; do sleep 6h & wait $$!; nginx -s reload; done & nginx -g "daemon off;"
-      volumes:
-      - name: nginx-conf
-        configMap:
-          name: nginx-configmap
diff --git a/darwin-kubernetes/secrets.yaml b/darwin-kubernetes/secrets.yaml
deleted file mode 100644
index 352ffd16d6a..00000000000
--- a/darwin-kubernetes/secrets.yaml
+++ /dev/null
@@ -1,22 +0,0 @@
-# Real secret values must NOT be committed. Fill the placeholders below and
-# apply out-of-band (or move to a sealed-secret / external secret manager).
-apiVersion: v1
-kind: Secret
-metadata:
-  name: danswer-secrets
-type: Opaque
-stringData:
-  # --- Postgres ---
-  postgres_user: "postgres"
-  postgres_password: "<your-postgres-password>"
-
-  # --- Microsoft / Entra ID OIDC ---
-  # Application (client) ID — an identifier, not a secret.
-  oauth_client_id: "xxxx"
-  # Entra client secret — ROTATE the one shared in chat and paste the new one.
-  oauth_client_secret: "<entra-client-secret>"
-  # Signs the fastapi-users session + OAuth state JWT. Generate once with:
-  #   openssl rand -hex 32
-  # MUST be identical across all replicas and stable across restarts/rollouts,
-  # or in-flight logins fail and existing sessions are invalidated.
-  user_auth_secret: "<openssl rand -hex 32 output>"
diff --git a/deployment/docker_compose/docker-compose.dask-distributed.yml b/deployment/docker_compose/docker-compose.dask-distributed.yml
new file mode 100644
index 00000000000..505edea9521
--- /dev/null
+++ b/deployment/docker_compose/docker-compose.dask-distributed.yml
@@ -0,0 +1,96 @@
+# Opt-in compose overlay that simulates the production
+# Dask-Distributed indexing topology locally.
+#
+# Why use this:
+#   - Reproduce a production-mode bug without going to K8s.
+#   - Watch the Dask scheduler dashboard at http://localhost:8787
+#     while triggering re-indexes from the admin UI.
+#   - Verify that scaling `dask-worker` replicas actually parallelizes
+#     indexing across pods.
+#
+# Why NOT use this for daily dev work:
+#   - The default docker-compose.dev.yml already runs `update.py` in
+#     a single container's LocalCluster — fast, simple, all you need
+#     for connector-code work.
+#
+# Usage:
+#   docker compose \
+#     -f deployment/docker_compose/docker-compose.dev.yml \
+#     -f deployment/docker_compose/docker-compose.dask-distributed.yml \
+#     up
+#
+#   # scale workers up/down at any time:
+#   docker compose ... up -d --scale dask-worker=5
+
+services:
+  dask-scheduler:
+    image: danswer/danswer-backend:latest
+    build:
+      context: ../../backend
+      dockerfile: Dockerfile
+    command:
+      - dask
+      - scheduler
+      - --host=0.0.0.0
+      - --port=8786
+      - --dashboard-address=:8787
+    ports:
+      - "8786:8786"   # scheduler RPC — Client connects here
+      - "8787:8787"   # web dashboard — visit http://localhost:8787
+    restart: always
+
+  dask-worker:
+    image: danswer/danswer-backend:latest
+    build:
+      context: ../../backend
+      dockerfile: Dockerfile
+    # `--nworkers 1 --nthreads 1` per pod, matching the K8s
+    # manifest. Scale this service's `replicas` (or `--scale` flag)
+    # to add concurrent indexing capacity.
+    command:
+      - dask
+      - worker
+      - tcp://dask-scheduler:8786
+      - --nworkers=1
+      - --nthreads=1
+      - --memory-limit=4GB
+    depends_on:
+      - dask-scheduler
+      - relational_db
+      - index
+      - indexing_model_server
+    deploy:
+      replicas: 2
+    restart: always
+    environment:
+      # PYTHONPATH so the worker can import danswer.* when
+      # deserializing the run_indexing_entrypoint callable.
+      - PYTHONPATH=/app
+      - CURRENT_PROCESS_IS_AN_INDEXING_JOB=true
+      # Inherit everything the existing `background` service uses so
+      # connector credentials, model-server endpoints, Vespa hosts,
+      # GenAI keys, etc. all work identically inside dask-worker.
+      - POSTGRES_HOST=relational_db
+      - VESPA_HOST=index
+      - VESPA_PORT=8081
+      - INDEXING_MODEL_SERVER_HOST=indexing_model_server
+      - INDEXING_MODEL_SERVER_PORT=9000
+      - MODEL_SERVER_HOST=inference_model_server
+      - MODEL_SERVER_PORT=9000
+      - GEN_AI_MODEL_PROVIDER=${GEN_AI_MODEL_PROVIDER:-}
+      - GEN_AI_API_KEY=${GEN_AI_API_KEY:-}
+      - GEN_AI_API_ENDPOINT=${GEN_AI_API_ENDPOINT:-}
+      - GEN_AI_IDENTITY_ENDPOINT=${GEN_AI_IDENTITY_ENDPOINT:-}
+      - GEN_AI_CLIENT_ID=${GEN_AI_CLIENT_ID:-}
+      - GEN_AI_CLIENT_SECRET=${GEN_AI_CLIENT_SECRET:-}
+
+  # Override the existing `background` service to point its
+  # update.py loop at the remote scheduler instead of an in-process
+  # LocalCluster. Everything else about the service stays the same
+  # (supervisord, celery beat, celery worker still run in this
+  # container — only the indexing dispatch path is rerouted).
+  background:
+    environment:
+      - DASK_SCHEDULER_ADDRESS=tcp://dask-scheduler:8786
+    depends_on:
+      - dask-scheduler
diff --git a/deployment/docker_compose/docker-compose.dev.yml b/deployment/docker_compose/docker-compose.dev.yml
index 294b22deff1..491cbe4cefe 100644
--- a/deployment/docker_compose/docker-compose.dev.yml
+++ b/deployment/docker_compose/docker-compose.dev.yml
@@ -314,6 +314,31 @@ services:
       - db_volume:/var/lib/postgresql/data
 
 
+  # Cache + per-user request rate limiting. Cache-only — no persistence; an LRU
+  # eviction policy bounds memory so a runaway producer can't OOM the node.
+  # Not used as a Celery broker (this fork uses Postgres for that).
+  redis:
+    image: redis:7.2-alpine
+    restart: always
+    command:
+      - redis-server
+      - --appendonly
+      - "no"
+      - --save
+      - ""
+      - --maxmemory
+      - "256mb"
+      - --maxmemory-policy
+      - allkeys-lru
+    ports:
+      - "6379:6379"
+    healthcheck:
+      test: ["CMD", "redis-cli", "ping"]
+      interval: 10s
+      timeout: 3s
+      retries: 5
+
+
   # This container name cannot have an underscore in it due to Vespa expectations of the URL
   index:
     image: vespaengine/vespa:8.277.17
diff --git a/deployment/helm/.gitignore b/deployment/helm/.gitignore
deleted file mode 100644
index b442275d6b5..00000000000
--- a/deployment/helm/.gitignore
+++ /dev/null
@@ -1,3 +0,0 @@
-### Helm ###
-# Chart dependencies
-**/charts/*.tgz
diff --git a/deployment/helm/.helmignore b/deployment/helm/.helmignore
deleted file mode 100644
index 0e8a0eb36f4..00000000000
--- a/deployment/helm/.helmignore
+++ /dev/null
@@ -1,23 +0,0 @@
-# Patterns to ignore when building packages.
-# This supports shell glob matching, relative path matching, and
-# negation (prefixed with !). Only one pattern per line.
-.DS_Store
-# Common VCS dirs
-.git/
-.gitignore
-.bzr/
-.bzrignore
-.hg/
-.hgignore
-.svn/
-# Common backup files
-*.swp
-*.bak
-*.tmp
-*.orig
-*~
-# Various IDEs
-.project
-.idea/
-*.tmproj
-.vscode/
diff --git a/deployment/helm/Chart.lock b/deployment/helm/Chart.lock
deleted file mode 100644
index 918b44f6ebf..00000000000
--- a/deployment/helm/Chart.lock
+++ /dev/null
@@ -1,12 +0,0 @@
-dependencies:
-- name: postgresql
-  repository: https://charts.bitnami.com/bitnami
-  version: 14.3.1
-- name: vespa
-  repository: https://unoplat.github.io/vespa-helm-charts
-  version: 0.2.3
-- name: nginx
-  repository: oci://registry-1.docker.io/bitnamicharts
-  version: 15.14.0
-digest: sha256:ab17b5d2c3883055cb4a26bf530043521be5220c24f804e954bb428273d16ba8
-generated: "2024-05-24T16:55:30.598279-07:00"
diff --git a/deployment/helm/Chart.yaml b/deployment/helm/Chart.yaml
deleted file mode 100644
index 7763f33bec5..00000000000
--- a/deployment/helm/Chart.yaml
+++ /dev/null
@@ -1,35 +0,0 @@
-apiVersion: v2
-name: danswer-stack
-description: A Helm chart for Kubernetes
-home: https://www.danswer.ai/
-sources:
-  - "https://github.com/danswer-ai/danswer"
-type: application
-version: 0.2.0
-appVersion: "latest"
-annotations:
-  category: Productivity
-  licenses: MIT
-  images: |
-    - name: webserver
-      image: docker.io/danswer/danswer-web-server:latest
-    - name: background
-      image: docker.io/danswer/danswer-backend:latest
-    - name: vespa
-      image: vespaengine/vespa:8.277.17
-dependencies:
-  - name: postgresql
-    version: 14.3.1
-    repository: https://charts.bitnami.com/bitnami
-    condition: postgresql.enabled
-  - name: vespa 
-    version: 0.2.3
-    repository: https://unoplat.github.io/vespa-helm-charts
-    condition: vespa.enabled
-  - name: nginx
-    version: 15.14.0
-    repository: oci://registry-1.docker.io/bitnamicharts
-    condition: nginx.enabled  
-    
-
-    
\ No newline at end of file
diff --git a/deployment/helm/templates/_helpers.tpl b/deployment/helm/templates/_helpers.tpl
deleted file mode 100644
index 483a5b5e5af..00000000000
--- a/deployment/helm/templates/_helpers.tpl
+++ /dev/null
@@ -1,83 +0,0 @@
-{{/*
-Expand the name of the chart.
-*/}}
-{{- define "danswer-stack.name" -}}
-{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
-{{- end }}
-
-{{/*
-Create a default fully qualified app name.
-We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
-If release name contains chart name it will be used as a full name.
-*/}}
-{{- define "danswer-stack.fullname" -}}
-{{- if .Values.fullnameOverride }}
-{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
-{{- else }}
-{{- $name := default .Chart.Name .Values.nameOverride }}
-{{- if contains $name .Release.Name }}
-{{- .Release.Name | trunc 63 | trimSuffix "-" }}
-{{- else }}
-{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
-{{- end }}
-{{- end }}
-{{- end }}
-
-{{/*
-Create chart name and version as used by the chart label.
-*/}}
-{{- define "danswer-stack.chart" -}}
-{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
-{{- end }}
-
-{{/*
-Common labels
-*/}}
-{{- define "danswer-stack.labels" -}}
-helm.sh/chart: {{ include "danswer-stack.chart" . }}
-{{ include "danswer-stack.selectorLabels" . }}
-{{- if .Chart.AppVersion }}
-app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
-{{- end }}
-app.kubernetes.io/managed-by: {{ .Release.Service }}
-{{- end }}
-
-{{/*
-Selector labels
-*/}}
-{{- define "danswer-stack.selectorLabels" -}}
-app.kubernetes.io/name: {{ include "danswer-stack.name" . }}
-app.kubernetes.io/instance: {{ .Release.Name }}
-{{- end }}
-
-{{/*
-Create the name of the service account to use
-*/}}
-{{- define "danswer-stack.serviceAccountName" -}}
-{{- if .Values.serviceAccount.create }}
-{{- default (include "danswer-stack.fullname" .) .Values.serviceAccount.name }}
-{{- else }}
-{{- default "default" .Values.serviceAccount.name }}
-{{- end }}
-{{- end }}
-
-{{/*
-Set secret name
-*/}}
-{{- define "danswer-stack.secretName" -}}
-{{- default (default "danswer-secrets" .Values.auth.secretName) .Values.auth.existingSecret }}
-{{- end }}
-
-{{/*
-Create env vars from secrets
-*/}}
-{{- define "danswer-stack.envSecrets" -}}
-    {{- range $name, $key := .Values.auth.secretKeys }}
-- name: {{ $name | upper | replace "-" "_" | quote }}
-  valueFrom:
-    secretKeyRef:
-      name: {{ include "danswer-stack.secretName" $ }}
-      key: {{ default $name $key }}
-    {{- end }}
-{{- end }}
-
diff --git a/deployment/helm/templates/api-deployment.yaml b/deployment/helm/templates/api-deployment.yaml
deleted file mode 100644
index 7f10bffafd0..00000000000
--- a/deployment/helm/templates/api-deployment.yaml
+++ /dev/null
@@ -1,59 +0,0 @@
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: {{ include "danswer-stack.fullname" . }}-api-deployment
-  labels:
-    {{- include "danswer-stack.labels" . | nindent 4 }}
-spec:
-  {{- if not .Values.api.autoscaling.enabled }}
-  replicas: {{ .Values.api.replicaCount }}
-  {{- end }}
-  selector:
-    matchLabels:
-      {{- include "danswer-stack.selectorLabels" . | nindent 6 }}
-      {{- if .Values.api.deploymentLabels }}
-      {{- toYaml .Values.api.deploymentLabels | nindent 6 }}
-      {{- end }}
-  template:
-    metadata:
-      {{- with .Values.api.podAnnotations }}
-      annotations:
-        {{- toYaml . | nindent 8 }}
-      {{- end }}
-      labels:
-        {{- include "danswer-stack.labels" . | nindent 8 }}
-        {{- with .Values.api.podLabels }}
-        {{- toYaml . | nindent 8 }}
-        {{- end }}
-    spec:
-      {{- with .Values.imagePullSecrets }}
-      imagePullSecrets:
-        {{- toYaml . | nindent 8 }}
-      {{- end }}
-      serviceAccountName: {{ include "danswer-stack.serviceAccountName" . }}
-      securityContext:
-        {{- toYaml .Values.api.podSecurityContext | nindent 8 }}
-      containers:
-        - name: api-server
-          securityContext:
-            {{- toYaml .Values.api.securityContext | nindent 12 }}
-          image: "{{ .Values.api.image.repository }}:{{ .Values.api.image.tag | default .Chart.AppVersion }}"
-          imagePullPolicy: {{ .Values.api.image.pullPolicy }}
-          command:
-            - "/bin/sh"
-            - "-c"
-            - |
-              alembic upgrade head &&
-              echo "Starting Danswer Api Server" &&
-              uvicorn danswer.main:app --host 0.0.0.0 --port 8080
-          ports:
-            - name: api-server-port
-              containerPort: {{ .Values.api.service.port }}
-              protocol: TCP
-          resources:
-            {{- toYaml .Values.api.resources | nindent 12 }}
-          envFrom:
-            - configMapRef:
-                name: {{ .Values.config.envConfigMapName }}
-          env:
-            {{- include "danswer-stack.envSecrets" . | nindent 12}}
diff --git a/deployment/helm/templates/api-hpa.yaml b/deployment/helm/templates/api-hpa.yaml
deleted file mode 100644
index 378c39715ad..00000000000
--- a/deployment/helm/templates/api-hpa.yaml
+++ /dev/null
@@ -1,32 +0,0 @@
-{{- if .Values.api.autoscaling.enabled }}
-apiVersion: autoscaling/v2
-kind: HorizontalPodAutoscaler
-metadata:
-  name: {{ include "danswer-stack.fullname" . }}-api
-  labels:
-    {{- include "danswer-stack.labels" . | nindent 4 }}
-spec:
-  scaleTargetRef:
-    apiVersion: apps/v1
-    kind: Deployment
-    name: {{ include "danswer-stack.fullname" . }}
-  minReplicas: {{ .Values.api.autoscaling.minReplicas }}
-  maxReplicas: {{ .Values.api.autoscaling.maxReplicas }}
-  metrics:
-    {{- if .Values.api.autoscaling.targetCPUUtilizationPercentage }}
-    - type: Resource
-      resource:
-        name: cpu
-        target:
-          type: Utilization
-          averageUtilization: {{ .Values.api.autoscaling.targetCPUUtilizationPercentage }}
-    {{- end }}
-    {{- if .Values.api.autoscaling.targetMemoryUtilizationPercentage }}
-    - type: Resource
-      resource:
-        name: memory
-        target:
-          type: Utilization
-          averageUtilization: {{ .Values.api.autoscaling.targetMemoryUtilizationPercentage }}
-    {{- end }}
-{{- end }}
diff --git a/deployment/helm/templates/api-service.yaml b/deployment/helm/templates/api-service.yaml
deleted file mode 100644
index 1fd74d4ddf5..00000000000
--- a/deployment/helm/templates/api-service.yaml
+++ /dev/null
@@ -1,22 +0,0 @@
-apiVersion: v1
-kind: Service
-metadata:
-  # INTERNAL_URL env variable depends on this, don't change without changing INTERNAL_URL
-  name: {{ include "danswer-stack.fullname" . }}-api-service
-  labels:
-    {{- include "danswer-stack.labels" . | nindent 4 }}
-    {{- if .Values.api.deploymentLabels }}
-    {{- toYaml .Values.api.deploymentLabels | nindent 4 }}
-    {{- end }}
-spec:
-  type: {{ .Values.api.service.type }}
-  ports:
-    - port: {{ .Values.api.service.port }}
-      targetPort: api-server-port
-      protocol: TCP
-      name: api-server-port
-  selector:
-    {{- include "danswer-stack.selectorLabels" . | nindent 4 }}
-    {{- if .Values.api.deploymentLabels }}
-    {{- toYaml .Values.api.deploymentLabels | nindent 4 }}
-    {{- end }}
diff --git a/deployment/helm/templates/background-deployment.yaml b/deployment/helm/templates/background-deployment.yaml
deleted file mode 100644
index 3cd65a99af4..00000000000
--- a/deployment/helm/templates/background-deployment.yaml
+++ /dev/null
@@ -1,51 +0,0 @@
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: {{ include "danswer-stack.fullname" . }}-background
-  labels:
-    {{- include "danswer-stack.labels" . | nindent 4 }}
-spec:
-  {{- if not .Values.background.autoscaling.enabled }}
-  replicas: {{ .Values.background.replicaCount }}
-  {{- end }}
-  selector:
-    matchLabels:
-      {{- include "danswer-stack.selectorLabels" . | nindent 6 }}
-      {{- if .Values.background.deploymentLabels }}
-      {{- toYaml .Values.background.deploymentLabels | nindent 6 }}
-      {{- end }}
-  template:
-    metadata:
-      {{- with .Values.background.podAnnotations }}
-      annotations:
-        {{- toYaml . | nindent 8 }}
-      {{- end }}
-      labels:
-        {{- include "danswer-stack.labels" . | nindent 8 }}
-        {{- with .Values.background.podLabels }}
-        {{- toYaml . | nindent 8 }}
-        {{- end }}
-    spec:
-      {{- with .Values.imagePullSecrets }}
-      imagePullSecrets:
-        {{- toYaml . | nindent 8 }}
-      {{- end }}
-      serviceAccountName: {{ include "danswer-stack.serviceAccountName" . }}
-      securityContext:
-        {{- toYaml .Values.background.podSecurityContext | nindent 8 }}
-      containers:
-        - name: background
-          securityContext:
-            {{- toYaml .Values.background.securityContext | nindent 12 }}
-          image: "{{ .Values.background.image.repository }}:{{ .Values.background.image.tag | default .Chart.AppVersion }}"
-          imagePullPolicy: {{ .Values.background.image.pullPolicy }}
-          command: ["/usr/bin/supervisord"]
-          resources:
-            {{- toYaml .Values.background.resources | nindent 12 }}
-          envFrom:
-            - configMapRef:
-                name: {{ .Values.config.envConfigMapName }}
-          env:
-            - name: ENABLE_MINI_CHUNK
-              value: "{{ .Values.background.enableMiniChunk }}"
-            {{- include "danswer-stack.envSecrets" . | nindent 12}}
diff --git a/deployment/helm/templates/background-hpa.yaml b/deployment/helm/templates/background-hpa.yaml
deleted file mode 100644
index 009daf10f05..00000000000
--- a/deployment/helm/templates/background-hpa.yaml
+++ /dev/null
@@ -1,32 +0,0 @@
-{{- if .Values.background.autoscaling.enabled }}
-apiVersion: autoscaling/v2
-kind: HorizontalPodAutoscaler
-metadata:
-  name: {{ include "danswer-stack.fullname" . }}-background
-  labels:
-    {{- include "danswer-stack.labels" . | nindent 4 }}
-spec:
-  scaleTargetRef:
-    apiVersion: apps/v1
-    kind: Deployment
-    name: {{ include "danswer-stack.fullname" . }}
-  minReplicas: {{ .Values.background.autoscaling.minReplicas }}
-  maxReplicas: {{ .Values.background.autoscaling.maxReplicas }}
-  metrics:
-    {{- if .Values.background.autoscaling.targetCPUUtilizationPercentage }}
-    - type: Resource
-      resource:
-        name: cpu
-        target:
-          type: Utilization
-          averageUtilization: {{ .Values.background.autoscaling.targetCPUUtilizationPercentage }}
-    {{- end }}
-    {{- if .Values.background.autoscaling.targetMemoryUtilizationPercentage }}
-    - type: Resource
-      resource:
-        name: memory
-        target:
-          type: Utilization
-          averageUtilization: {{ .Values.background.autoscaling.targetMemoryUtilizationPercentage }}
-    {{- end }}
-{{- end }}
diff --git a/deployment/helm/templates/configmap.yaml b/deployment/helm/templates/configmap.yaml
deleted file mode 100755
index 8119ae0459c..00000000000
--- a/deployment/helm/templates/configmap.yaml
+++ /dev/null
@@ -1,15 +0,0 @@
-apiVersion: v1
-kind: ConfigMap
-metadata:
-  name: {{ .Values.config.envConfigMapName }}
-  labels:
-    {{- include "danswer-stack.labels" . | nindent 4 }}
-data:
-  INTERNAL_URL: "http://{{ include "danswer-stack.fullname" . }}-api-service:{{ .Values.api.service.port | default 8080 }}"
-  POSTGRES_HOST: {{ .Release.Name }}-postgresql
-  VESPA_HOST: "document-index-service"
-  MODEL_SERVER_HOST: "{{ include "danswer-stack.fullname" . }}-inference-model-service"
-  INDEXING_MODEL_SERVER_HOST: "{{ include "danswer-stack.fullname" . }}-indexing-model-service"
-{{- range $key, $value := .Values.configMap }}
-  {{ $key }}: "{{ $value }}"
-{{- end }}
\ No newline at end of file
diff --git a/deployment/helm/templates/danswer-secret.yaml b/deployment/helm/templates/danswer-secret.yaml
deleted file mode 100644
index 6b2aa317204..00000000000
--- a/deployment/helm/templates/danswer-secret.yaml
+++ /dev/null
@@ -1,11 +0,0 @@
-{{- if not .Values.auth.existingSecret -}}
-apiVersion: v1
-kind: Secret
-metadata:
-  name: {{ include "danswer-stack.secretName" . }}
-type: Opaque
-stringData:
-  {{- range $name, $value := .Values.auth.secrets }}
-  {{ $name }}: {{ $value | quote }}
-  {{- end }}
-{{- end }}
\ No newline at end of file
diff --git a/deployment/helm/templates/indexing-model-deployment.yaml b/deployment/helm/templates/indexing-model-deployment.yaml
deleted file mode 100644
index cc88aefb79a..00000000000
--- a/deployment/helm/templates/indexing-model-deployment.yaml
+++ /dev/null
@@ -1,51 +0,0 @@
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: {{ include "danswer-stack.fullname" . }}-indexing-model
-  labels:
-    {{- include "danswer-stack.labels" . | nindent 4 }}
-spec:
-  replicas: 1
-  selector:
-    matchLabels:
-      {{- include "danswer-stack.selectorLabels" . | nindent 6 }}
-      {{- if .Values.indexCapability.deploymentLabels }}
-      {{- toYaml .Values.indexCapability.deploymentLabels | nindent 6 }}
-      {{- end }}
-  template:
-    metadata:
-      {{- with .Values.indexCapability.podAnnotations }}
-      annotations:
-        {{- toYaml . | nindent 8 }}
-      {{- end }}
-      labels:
-        {{- include "danswer-stack.labels" . | nindent 8 }}
-        {{- with .Values.indexCapability.podLabels }}
-        {{- toYaml . | nindent 8 }}
-        {{- end }}
-    spec:
-      containers:
-      - name: indexing-model-server
-        image: danswer/danswer-model-server:latest
-        imagePullPolicy: IfNotPresent
-        command: [ "uvicorn", "model_server.main:app", "--host", "0.0.0.0", "--port", "9000", "--limit-concurrency", "10" ]
-        ports:
-        - containerPort: 9000
-        envFrom:
-          - configMapRef:
-              name: {{ .Values.config.envConfigMapName }}
-        env:
-          - name: INDEXING_ONLY
-            value: "{{ default "True" .Values.indexCapability.indexingOnly }}"
-          {{- include "danswer-stack.envSecrets" . | nindent 10}}
-        volumeMounts:
-        {{- range .Values.indexCapability.volumeMounts }}
-        - name: {{ .name }}
-          mountPath: {{ .mountPath }}
-        {{- end }}
-      volumes:
-      {{- range .Values.indexCapability.volumes }}
-      - name: {{ .name }}
-        persistentVolumeClaim:
-          claimName: {{ .persistentVolumeClaim.claimName }}
-      {{- end }}
diff --git a/deployment/helm/templates/indexing-model-pvc.yaml b/deployment/helm/templates/indexing-model-pvc.yaml
deleted file mode 100644
index e5825557d5b..00000000000
--- a/deployment/helm/templates/indexing-model-pvc.yaml
+++ /dev/null
@@ -1,10 +0,0 @@
-apiVersion: v1
-kind: PersistentVolumeClaim
-metadata:
-  name: {{ .Values.indexCapability.indexingModelPVC.name }}
-spec:
-  accessModes:
-    - {{ .Values.indexCapability.indexingModelPVC.accessMode | quote }}
-  resources:
-    requests:
-      storage: {{ .Values.indexCapability.indexingModelPVC.storage | quote }}
\ No newline at end of file
diff --git a/deployment/helm/templates/indexing-model-service.yaml b/deployment/helm/templates/indexing-model-service.yaml
deleted file mode 100644
index fbbeb6bee86..00000000000
--- a/deployment/helm/templates/indexing-model-service.yaml
+++ /dev/null
@@ -1,18 +0,0 @@
-apiVersion: v1
-kind: Service
-metadata:
-  name: {{ include "danswer-stack.fullname" . }}-indexing-model-service
-  labels:
-    {{- include "danswer-stack.labels" . | nindent 4 }}
-spec:
-  selector:
-    {{- include "danswer-stack.selectorLabels" . | nindent 4 }}
-    {{- if .Values.indexCapability.deploymentLabels }}
-    {{- toYaml .Values.indexCapability.deploymentLabels | nindent 4 }}
-    {{- end }}
-  ports:
-    - name: {{ .Values.indexCapability.service.name }}
-      protocol: TCP
-      port: {{ .Values.indexCapability.service.port }}
-      targetPort: {{ .Values.indexCapability.service.port }}
-  type: {{ .Values.indexCapability.service.type }}
\ No newline at end of file
diff --git a/deployment/helm/templates/inference-model-deployment.yaml b/deployment/helm/templates/inference-model-deployment.yaml
deleted file mode 100644
index 43caddd29c3..00000000000
--- a/deployment/helm/templates/inference-model-deployment.yaml
+++ /dev/null
@@ -1,45 +0,0 @@
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: {{ include "danswer-stack.fullname" . }}-inference-model
-  labels:
-    {{- range .Values.inferenceCapability.deployment.labels }}
-    {{ .key }}: {{ .value }}
-    {{- end }}
-spec:
-  replicas: {{ .Values.inferenceCapability.deployment.replicas }}
-  selector:
-    matchLabels:
-      {{- range .Values.inferenceCapability.deployment.labels }}
-      {{ .key }}: {{ .value }}
-      {{- end }}
-  template:
-    metadata:
-      labels:
-        {{- range .Values.inferenceCapability.podLabels }}
-        {{ .key }}: {{ .value }}
-        {{- end }}
-    spec:
-      containers:
-      - name: {{ .Values.inferenceCapability.service.name }}
-        image: {{ .Values.inferenceCapability.deployment.image.repository }}:{{ .Values.inferenceCapability.deployment.image.tag }}
-        imagePullPolicy: {{ .Values.inferenceCapability.deployment.image.pullPolicy }}
-        command: {{ toYaml .Values.inferenceCapability.deployment.command | nindent 14 }}
-        ports:
-        - containerPort: {{ .Values.inferenceCapability.service.port }}
-        envFrom:
-        - configMapRef:
-            name: {{ .Values.config.envConfigMapName }}
-        env:
-          {{- include "danswer-stack.envSecrets" . | nindent 12}}
-        volumeMounts:
-        {{- range .Values.inferenceCapability.deployment.volumeMounts }}
-        - name: {{ .name }}
-          mountPath: {{ .mountPath }}
-        {{- end }}
-      volumes:
-      {{- range .Values.inferenceCapability.deployment.volumes }}
-      - name: {{ .name }}
-        persistentVolumeClaim:
-          claimName: {{ .persistentVolumeClaim.claimName }}
-      {{- end }}
diff --git a/deployment/helm/templates/inference-model-pvc.yaml b/deployment/helm/templates/inference-model-pvc.yaml
deleted file mode 100644
index fe47fa879a0..00000000000
--- a/deployment/helm/templates/inference-model-pvc.yaml
+++ /dev/null
@@ -1,10 +0,0 @@
-apiVersion: v1
-kind: PersistentVolumeClaim
-metadata:
-  name: {{ .Values.inferenceCapability.pvc.name }}
-spec:
-  accessModes: 
-    {{- toYaml .Values.inferenceCapability.pvc.accessModes | nindent 4 }}
-  resources:
-    requests:
-      storage: {{ .Values.inferenceCapability.pvc.storage }}
diff --git a/deployment/helm/templates/inference-model-service.yaml b/deployment/helm/templates/inference-model-service.yaml
deleted file mode 100644
index 74433ac11da..00000000000
--- a/deployment/helm/templates/inference-model-service.yaml
+++ /dev/null
@@ -1,15 +0,0 @@
-apiVersion: v1
-kind: Service
-metadata:
-  name: {{ include "danswer-stack.fullname" . }}-inference-model-service
-spec:
-  type: {{ .Values.inferenceCapability.service.type }}
-  ports:
-    - port: {{ .Values.inferenceCapability.service.port }}
-      targetPort: {{ .Values.inferenceCapability.service.port }}
-      protocol: TCP
-      name: {{ .Values.inferenceCapability.service.name }}
-  selector:
-    {{- range .Values.inferenceCapability.deployment.labels }}
-    {{ .key }}: {{ .value }}
-    {{- end }}
diff --git a/deployment/helm/templates/nginx-conf.yaml b/deployment/helm/templates/nginx-conf.yaml
deleted file mode 100644
index 81ecbaaa2f6..00000000000
--- a/deployment/helm/templates/nginx-conf.yaml
+++ /dev/null
@@ -1,44 +0,0 @@
-apiVersion: v1
-kind: ConfigMap
-metadata:
-  name: danswer-nginx-conf
-data:
-  nginx.conf: |
-    upstream api_server {
-        server {{ include "danswer-stack.fullname" . }}-api-service:{{ .Values.api.service.port }} fail_timeout=0;
-    }
-
-    upstream web_server {
-        server {{ include "danswer-stack.fullname" . }}-webserver:{{ .Values.webserver.service.port }} fail_timeout=0;
-    }
-
-    server {
-        listen 1024;
-        server_name $$DOMAIN;
-
-        client_max_body_size 5G;    # Maximum upload size
-
-        location ~ ^/api(.*)$ {
-            rewrite ^/api(/.*)$ $1 break;
-            proxy_set_header X-Real-IP $remote_addr;
-            proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
-            proxy_set_header X-Forwarded-Proto $scheme;
-            proxy_set_header X-Forwarded-Host $host;
-            proxy_set_header Host $host;
-            proxy_http_version 1.1;
-            proxy_buffering off;
-            proxy_redirect off;
-            proxy_pass http://api_server;
-        }
-
-        location / {
-            proxy_set_header X-Real-IP $remote_addr;
-            proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
-            proxy_set_header X-Forwarded-Proto $scheme;
-            proxy_set_header X-Forwarded-Host $host;
-            proxy_set_header Host $host;
-            proxy_http_version 1.1;
-            proxy_redirect off;
-            proxy_pass http://web_server;
-        }
-    }
diff --git a/deployment/helm/templates/serviceaccount.yaml b/deployment/helm/templates/serviceaccount.yaml
deleted file mode 100644
index afd351217ba..00000000000
--- a/deployment/helm/templates/serviceaccount.yaml
+++ /dev/null
@@ -1,13 +0,0 @@
-{{- if .Values.serviceAccount.create -}}
-apiVersion: v1
-kind: ServiceAccount
-metadata:
-  name: {{ include "danswer-stack.serviceAccountName" . }}
-  labels:
-    {{- include "danswer-stack.labels" . | nindent 4 }}
-  {{- with .Values.serviceAccount.annotations }}
-  annotations:
-    {{- toYaml . | nindent 4 }}
-  {{- end }}
-automountServiceAccountToken: {{ .Values.serviceAccount.automount }}
-{{- end }}
diff --git a/deployment/helm/templates/tests/test-connection.yaml b/deployment/helm/templates/tests/test-connection.yaml
deleted file mode 100644
index 60fbd1054c1..00000000000
--- a/deployment/helm/templates/tests/test-connection.yaml
+++ /dev/null
@@ -1,15 +0,0 @@
-apiVersion: v1
-kind: Pod
-metadata:
-  name: "{{ include "danswer-stack.fullname" . }}-test-connection"
-  labels:
-    {{- include "danswer-stack.labels" . | nindent 4 }}
-  annotations:
-    "helm.sh/hook": test
-spec:
-  containers:
-    - name: wget
-      image: busybox
-      command: ['wget']
-      args: ['{{ include "danswer-stack.fullname" . }}:{{ .Values.webserver.service.port }}']
-  restartPolicy: Never
diff --git a/deployment/helm/templates/webserver-deployment.yaml b/deployment/helm/templates/webserver-deployment.yaml
deleted file mode 100644
index c3505248fc6..00000000000
--- a/deployment/helm/templates/webserver-deployment.yaml
+++ /dev/null
@@ -1,60 +0,0 @@
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: {{ include "danswer-stack.fullname" . }}-webserver
-  labels:
-    {{- include "danswer-stack.labels" . | nindent 4 }}
-spec:
-  {{- if not .Values.webserver.autoscaling.enabled }}
-  replicas: {{ .Values.webserver.replicaCount }}
-  {{- end }}
-  selector:
-    matchLabels:
-      {{- include "danswer-stack.selectorLabels" . | nindent 6 }}
-      {{- if .Values.webserver.deploymentLabels }}
-      {{- toYaml .Values.webserver.deploymentLabels | nindent 6 }}
-      {{- end }}
-  template:
-    metadata:
-      {{- with .Values.webserver.podAnnotations }}
-      annotations:
-        {{- toYaml . | nindent 8 }}
-      {{- end }}
-      labels:
-        {{- include "danswer-stack.labels" . | nindent 8 }}
-        {{- with .Values.webserver.podLabels }}
-        {{- toYaml . | nindent 8 }}
-        {{- end }}
-    spec:
-      {{- with .Values.imagePullSecrets }}
-      imagePullSecrets:
-        {{- toYaml . | nindent 8 }}
-      {{- end }}
-      serviceAccountName: {{ include "danswer-stack.serviceAccountName" . }}
-      securityContext:
-        {{- toYaml .Values.webserver.podSecurityContext | nindent 8 }}
-      containers:
-        - name: web-server
-          securityContext:
-            {{- toYaml .Values.webserver.securityContext | nindent 12 }}
-          image: "{{ .Values.webserver.image.repository }}:{{ .Values.webserver.image.tag | default .Chart.AppVersion }}"
-          imagePullPolicy: {{ .Values.webserver.image.pullPolicy }}
-          ports:
-            - name: http
-              containerPort: {{ .Values.webserver.service.port }}
-              protocol: TCP
-          resources:
-            {{- toYaml .Values.webserver.resources | nindent 12 }}
-          envFrom:
-            - configMapRef:
-                name: {{ .Values.config.envConfigMapName }}
-          env:
-            {{- include "danswer-stack.envSecrets" . | nindent 12}}
-          {{- with .Values.webserver.volumeMounts }}
-          volumeMounts:
-            {{- toYaml . | nindent 12 }}
-          {{- end }}
-      {{- with .Values.webserver.volumes }}
-      volumes:
-        {{- toYaml . | nindent 8 }}
-      {{- end }}
diff --git a/deployment/helm/templates/webserver-hpa.yaml b/deployment/helm/templates/webserver-hpa.yaml
deleted file mode 100644
index b46820a7fac..00000000000
--- a/deployment/helm/templates/webserver-hpa.yaml
+++ /dev/null
@@ -1,32 +0,0 @@
-{{- if .Values.webserver.autoscaling.enabled }}
-apiVersion: autoscaling/v2
-kind: HorizontalPodAutoscaler
-metadata:
-  name: {{ include "danswer-stack.fullname" . }}-webserver
-  labels:
-    {{- include "danswer-stack.labels" . | nindent 4 }}
-spec:
-  scaleTargetRef:
-    apiVersion: apps/v1
-    kind: Deployment
-    name: {{ include "danswer-stack.fullname" . }}
-  minReplicas: {{ .Values.webserver.autoscaling.minReplicas }}
-  maxReplicas: {{ .Values.webserver.autoscaling.maxReplicas }}
-  metrics:
-    {{- if .Values.webserver.autoscaling.targetCPUUtilizationPercentage }}
-    - type: Resource
-      resource:
-        name: cpu
-        target:
-          type: Utilization
-          averageUtilization: {{ .Values.webserver.autoscaling.targetCPUUtilizationPercentage }}
-    {{- end }}
-    {{- if .Values.webserver.autoscaling.targetMemoryUtilizationPercentage }}
-    - type: Resource
-      resource:
-        name: memory
-        target:
-          type: Utilization
-          averageUtilization: {{ .Values.webserver.autoscaling.targetMemoryUtilizationPercentage }}
-    {{- end }}
-{{- end }}
diff --git a/deployment/helm/templates/webserver-service.yaml b/deployment/helm/templates/webserver-service.yaml
deleted file mode 100644
index 3e33566fce1..00000000000
--- a/deployment/helm/templates/webserver-service.yaml
+++ /dev/null
@@ -1,21 +0,0 @@
-apiVersion: v1
-kind: Service
-metadata:
-  name: {{ include "danswer-stack.fullname" . }}-webserver
-  labels:
-    {{- include "danswer-stack.labels" . | nindent 4 }}
-    {{- if .Values.webserver.deploymentLabels }}
-    {{- toYaml .Values.webserver.deploymentLabels | nindent 4 }}
-    {{- end }}
-spec:
-  type: {{ .Values.webserver.service.type }}
-  ports:
-    - port: {{ .Values.webserver.service.port }}
-      targetPort: http
-      protocol: TCP
-      name: http
-  selector:
-    {{- include "danswer-stack.selectorLabels" . | nindent 4 }}
-    {{- if .Values.webserver.deploymentLabels }}
-    {{- toYaml .Values.webserver.deploymentLabels | nindent 4 }}
-    {{- end }}
diff --git a/deployment/helm/values.yaml b/deployment/helm/values.yaml
deleted file mode 100644
index 53b82ddc6b9..00000000000
--- a/deployment/helm/values.yaml
+++ /dev/null
@@ -1,457 +0,0 @@
-# Default values for danswer-stack.
-# This is a YAML-formatted file.
-# Declare variables to be passed into your templates.
-
-imagePullSecrets: []
-nameOverride: ""
-fullnameOverride: ""
-
-inferenceCapability:
-  service:
-    name: inference-model-server-service
-    type: ClusterIP
-    port: 9000
-  pvc:
-    name: inference-model-pvc
-    accessModes:
-      - ReadWriteOnce
-    storage: 3Gi
-  deployment:
-    name: inference-model-server-deployment
-    replicas: 1
-    labels:
-      - key: app
-        value: inference-model-server
-    image:
-      repository: danswer/danswer-model-server
-      tag: latest
-      pullPolicy: IfNotPresent
-    command: ["uvicorn", "model_server.main:app", "--host", "0.0.0.0", "--port", "9000"]
-    port: 9000
-    volumeMounts:
-      - name: inference-model-storage
-        mountPath: /root/.cache
-    volumes:
-      - name: inference-model-storage
-        persistentVolumeClaim:
-          claimName: inference-model-pvc
-  podLabels:
-    - key: app
-      value: inference-model-server
-
-indexCapability:
-  service:
-    type: ClusterIP
-    port: 9000
-    name: indexing-model-server-port
-  deploymentLabels:
-    app: indexing-model-server
-  podLabels:
-    app: indexing-model-server
-  indexingOnly: "True"
-  podAnnotations: {}
-  volumeMounts:
-    - name: indexing-model-storage
-      mountPath: /root/.cache
-  volumes:
-    - name: indexing-model-storage
-      persistentVolumeClaim:
-        claimName: indexing-model-storage
-  indexingModelPVC:
-    name: indexing-model-storage
-    accessMode: "ReadWriteOnce"
-    storage: "3Gi"
-
-config:
-  envConfigMapName: env-configmap
-
-serviceAccount:
-  # Specifies whether a service account should be created
-  create: false
-  # Automatically mount a ServiceAccount's API credentials?
-  automount: true
-  # Annotations to add to the service account
-  annotations: {}
-  # The name of the service account to use.
-  # If not set and create is true, a name is generated using the fullname template
-  name: ""
-
-postgresql:
-  primary:
-    persistence:
-      size: 5Gi
-  enabled: true
-  auth:
-    existingSecret: danswer-secrets
-    secretKeys:
-      adminPasswordKey: postgres_password #overwriting as postgres typically expects 'postgres-password'
-
-nginx:
-  containerPorts:
-    http: 1024
-  extraEnvVars:
-    - name: DOMAIN
-      value: localhost
-  service:
-    ports:
-      http: 80
-      danswer: 3000
-    targetPort:
-      http: http
-      danswer: http
-
-  existingServerBlockConfigmap: danswer-nginx-conf
-
-webserver:
-  replicaCount: 1
-  image:
-    repository: danswer/danswer-web-server
-    pullPolicy: IfNotPresent
-    # Overrides the image tag whose default is the chart appVersion.
-    tag: ""
-  deploymentLabels:
-    app: web-server
-  podAnnotations: {}
-  podLabels:
-    app: web-server
-  podSecurityContext: {}
-    # fsGroup: 2000
-
-  securityContext: {}
-    # capabilities:
-    #   drop:
-    #   - ALL
-    # readOnlyRootFilesystem: true
-    # runAsNonRoot: true
-    # runAsUser: 1000
-
-  service:
-    type: ClusterIP
-    port: 3000
-
-  resources: {}
-  # We usually recommend not to specify default resources and to leave this as a conscious
-  # choice for the user. This also increases chances charts run on environments with little
-  # resources, such as Minikube. If you do want to specify resources, uncomment the following
-  # lines, adjust them as necessary, and remove the curly braces after 'resources:'.
-  # limits:
-  #   cpu: 100m
-  #   memory: 128Mi
-  # requests:
-  #   cpu: 100m
-  #   memory: 128Mi
-
-  autoscaling:
-    enabled: false
-    minReplicas: 1
-    maxReplicas: 100
-    targetCPUUtilizationPercentage: 80
-    # targetMemoryUtilizationPercentage: 80
-
-  # Additional volumes on the output Deployment definition.
-  volumes: []
-  # - name: foo
-  #   secret:
-  #     secretName: mysecret
-  #     optional: false
-
-  # Additional volumeMounts on the output Deployment definition.
-  volumeMounts: []
-  # - name: foo
-  #   mountPath: "/etc/foo"
-  #   readOnly: true
-
-  nodeSelector: {}
-  tolerations: []
-  affinity: {}
-
-api:
-  replicaCount: 1
-  image:
-    repository: danswer/danswer-backend
-    pullPolicy: IfNotPresent
-    # Overrides the image tag whose default is the chart appVersion.
-    tag: ""
-  deploymentLabels:
-    app: api-server
-  podAnnotations: {}
-  podLabels:
-    scope: danswer-backend
-    app: api-server
-
-  podSecurityContext: {}
-    # fsGroup: 2000
-
-  securityContext: {}
-    # capabilities:
-    #   drop:
-    #   - ALL
-    # readOnlyRootFilesystem: true
-    # runAsNonRoot: true
-    # runAsUser: 1000
-
-  service:
-    type: ClusterIP
-    port: 8080
-
-  resources: {}
-  # We usually recommend not to specify default resources and to leave this as a conscious
-  # choice for the user. This also increases chances charts run on environments with little
-  # resources, such as Minikube. If you do want to specify resources, uncomment the following
-  # lines, adjust them as necessary, and remove the curly braces after 'resources:'.
-  #  requests:
-  #    cpu: 1000m  # Requests 1 CPU core
-  #    memory: 1Gi  # Requests 1 GiB of memory
-  #  limits:
-  #    cpu: 2000m  # Limits to 2 CPU cores
-  #    memory: 2Gi  # Limits to 2 GiB of memory
-
-  autoscaling:
-    enabled: false
-    minReplicas: 1
-    maxReplicas: 100
-    targetCPUUtilizationPercentage: 80
-    # targetMemoryUtilizationPercentage: 80
-
-  # Additional volumes on the output Deployment definition.
-  volumes: []
-  # - name: foo
-  #   secret:
-  #     secretName: mysecret
-  #     optional: false
-
-  # Additional volumeMounts on the output Deployment definition.
-  volumeMounts: []
-  # - name: foo
-  #   mountPath: "/etc/foo"
-  #   readOnly: true
-
-  nodeSelector: {}
-  tolerations: []
-
-
-background:
-  replicaCount: 1
-  image:
-    repository: danswer/danswer-backend
-    pullPolicy: IfNotPresent
-    # Overrides the image tag whose default is the chart appVersion.
-    tag: latest
-  podAnnotations: {}
-  podLabels:
-    scope: danswer-backend
-    app: background
-  deploymentLabels:
-    app: background
-  podSecurityContext: {}
-    # fsGroup: 2000
-
-  securityContext: {}
-    # capabilities:
-    #   drop:
-    #   - ALL
-    # readOnlyRootFilesystem: true
-    # runAsNonRoot: true
-    # runAsUser: 1000
-  enableMiniChunk: "true"
-  resources: {}
-  # We usually recommend not to specify default resources and to leave this as a conscious
-  # choice for the user. This also increases chances charts run on environments with little
-  # resources, such as Minikube. If you do want to specify resources, uncomment the following
-  # lines, adjust them as necessary, and remove the curly braces after 'resources:'.
-  #  requests:
-  #    cpu: 1000m  # Requests 1 CPU core
-  #    memory: 1Gi  # Requests 1 GiB of memory
-  #  limits:
-  #    cpu: 2000m  # Limits to 2 CPU cores
-  #    memory: 2Gi  # Limits to 2 GiB of memory
-
-  autoscaling:
-    enabled: false
-    minReplicas: 1
-    maxReplicas: 100
-    targetCPUUtilizationPercentage: 80
-    # targetMemoryUtilizationPercentage: 80
-
-  # Additional volumes on the output Deployment definition.
-  volumes: []
-  # - name: foo
-  #   secret:
-  #     secretName: mysecret
-  #     optional: false
-
-  # Additional volumeMounts on the output Deployment definition.
-  volumeMounts: []
-  # - name: foo
-  #   mountPath: "/etc/foo"
-  #   readOnly: true
-
-  nodeSelector: {}
-  tolerations: []
-
-vespa:
-  replicaCount: 1
-  image:
-    repository: vespa
-    pullPolicy: IfNotPresent
-    tag: "8.277.17"
-  podAnnotations: {}
-  podLabels:
-    app: vespa
-    app.kubernetes.io/instance: danswer
-    app.kubernetes.io/name: vespa
-  enabled: true
-
-  podSecurityContext: {}
-    # fsGroup: 2000
-
-  securityContext:
-    privileged: true
-    runAsUser: 0
-    # capabilities:
-    #   drop:
-    #   - ALL
-    # readOnlyRootFilesystem: true
-    # runAsNonRoot: true
-    # runAsUser: 1000
-
-  resources:
-  # The Vespa Helm chart specifies default resources, which are quite modest. We override
-  # them here to increase chances of the chart running successfully.
-    requests:
-      cpu: 1500m
-      memory: 4000Mi
-    limits:
-      cpu: 1500m
-      memory: 4000Mi
-
-  nodeSelector: {}
-  tolerations: []
-  affinity: {}
-
-
-#ingress:
-#  enabled: false
-#  className: ""
-#  annotations: {}
-#    # kubernetes.io/ingress.class: nginx
-#    # kubernetes.io/tls-acme: "true"
-#  hosts:
-#    - host: chart-example.local
-#      paths:
-#        - path: /
-#          pathType: ImplementationSpecific
-#  tls: []
-#  #  - secretName: chart-example-tls
-#  #    hosts:
-#  #      - chart-example.local
-
-persistence:
-  vespa:
-    enabled: true
-    existingClaim: ""
-    storageClassName: ""
-    accessModes:
-      - ReadWriteOnce
-    size: 5Gi
-
-auth:
-  # for storing smtp, oauth, slack, and other secrets
-  # keys are lowercased version of env vars (e.g. SMTP_USER -> smtp_user)
-  existingSecret: "" # danswer-secrets
-  # optionally override the secret keys to reference in the secret
-  secretKeys:
-    postgres_password: "postgres_password"
-    smtp_pass: ""
-    oauth_client_id: ""
-    oauth_client_secret: ""
-    oauth_cookie_secret: ""
-    gen_ai_api_key: ""
-    danswer_bot_slack_app_token: ""
-    danswer_bot_slack_bot_token: ""
-    opsgenie_api_key: ""
-  # will be overridden by the existingSecret if set
-  secretName: "danswer-secrets"
-  # set values as strings, they will be base64 encoded
-  secrets:
-    postgres_password: "postgres"
-    smtp_pass: ""
-    oauth_client_id: ""
-    oauth_client_secret: ""
-    oauth_cookie_secret: ""
-    gen_ai_api_key: ""
-    danswer_bot_slack_app_token: ""
-    danswer_bot_slack_bot_token: ""
-    opsgenie_api_key: ""
-
-configMap:
-  AUTH_TYPE: "disabled"  # Change this for production uses unless Danswer is only accessible behind VPN
-  SESSION_EXPIRE_TIME_SECONDS: "86400"  # 1 Day Default
-  VALID_EMAIL_DOMAINS: ""  # Can be something like danswer.ai, as an extra double-check
-  SMTP_SERVER: "" # For sending verification emails, if unspecified then defaults to 'smtp.gmail.com'
-  SMTP_PORT: "" # For sending verification emails, if unspecified then defaults to '587'
-  SMTP_USER: ""  # 'your-email@company.com'
-  # SMTP_PASS: ""  # 'your-gmail-password'
-  EMAIL_FROM: ""  # 'your-email@company.com' SMTP_USER missing used instead
-  # Gen AI Settings
-  GEN_AI_MODEL_PROVIDER: ""
-  GEN_AI_MODEL_VERSION: ""
-  FAST_GEN_AI_MODEL_VERSION: ""
-  # GEN_AI_API_KEY: ""
-  GEN_AI_API_ENDPOINT: ""
-  GEN_AI_API_VERSION: ""
-  GEN_AI_LLM_PROVIDER_TYPE: ""
-  GEN_AI_MAX_TOKENS: ""
-  QA_TIMEOUT: "60"
-  MAX_CHUNKS_FED_TO_CHAT: ""
-  DISABLE_LLM_FILTER_EXTRACTION: ""
-  DISABLE_LLM_CHUNK_FILTER: ""
-  DISABLE_LLM_CHOOSE_SEARCH: ""
-  DISABLE_LLM_QUERY_REPHRASE: ""
-  # Query Options
-  DOC_TIME_DECAY: ""
-  HYBRID_ALPHA: ""
-  EDIT_KEYWORD_QUERY: ""
-  MULTILINGUAL_QUERY_EXPANSION: ""
-  LANGUAGE_HINT: ""
-  LANGUAGE_CHAT_NAMING_HINT: ""
-  QA_PROMPT_OVERRIDE: ""
-  # Internet Search Tool
-  BING_API_KEY: ""
-  # Don't change the NLP models unless you know what you're doing
-  DOCUMENT_ENCODER_MODEL: ""
-  NORMALIZE_EMBEDDINGS: ""
-  ASYM_QUERY_PREFIX: ""
-  ASYM_PASSAGE_PREFIX: ""
-  ENABLE_RERANKING_REAL_TIME_FLOW: ""
-  ENABLE_RERANKING_ASYNC_FLOW: ""
-  MODEL_SERVER_PORT: ""
-  MIN_THREADS_ML_MODELS: ""
-  # Indexing Configs
-  NUM_INDEXING_WORKERS: ""
-  DISABLE_INDEX_UPDATE_ON_SWAP: ""
-  DASK_JOB_CLIENT_ENABLED: ""
-  CONTINUE_ON_CONNECTOR_FAILURE: ""
-  EXPERIMENTAL_CHECKPOINTING_ENABLED: ""
-  CONFLUENCE_CONNECTOR_LABELS_TO_SKIP: ""
-  JIRA_API_VERSION: ""
-  GONG_CONNECTOR_START_TIME: ""
-  NOTION_CONNECTOR_ENABLE_RECURSIVE_PAGE_LOOKUP: ""
-  # DanswerBot SlackBot Configs
-  # DANSWER_BOT_SLACK_APP_TOKEN: ""
-  # DANSWER_BOT_SLACK_BOT_TOKEN: ""
-  DANSWER_BOT_DISABLE_DOCS_ONLY_ANSWER: ""
-  DANSWER_BOT_DISPLAY_ERROR_MSGS: ""
-  DANSWER_BOT_RESPOND_EVERY_CHANNEL: ""
-  DANSWER_BOT_DISABLE_COT: ""  # Currently unused
-  NOTIFY_SLACKBOT_NO_ANSWER: ""
-  # Logging
-  # Optional Telemetry, please keep it on (nothing sensitive is collected)? <3
-  # https://docs.danswer.dev/more/telemetry
-  DISABLE_TELEMETRY: ""
-  LOG_LEVEL: ""
-  LOG_ALL_MODEL_INTERACTIONS: ""
-  LOG_VESPA_TIMING_INFORMATION: ""
-  # Shared or Non-backend Related
-  WEB_DOMAIN: "http://localhost:3000"  # for web server and api server
-  DOMAIN: "localhost"  # for nginx
diff --git a/deployment/kubernetes/analytics-bootstrap-job.yaml b/deployment/kubernetes/analytics-bootstrap-job.yaml
deleted file mode 100644
index e3ff12d2d2e..00000000000
--- a/deployment/kubernetes/analytics-bootstrap-job.yaml
+++ /dev/null
@@ -1,142 +0,0 @@
-# One-time Kubernetes Job for the Darwin analytics rollup bootstrap.
-#
-# What it does (in order):
-#   1. `alembic upgrade heads` — applies any pending migrations,
-#      including this PR's `c8a4e2f9d1b3_analytics_daily_rollup`. Idempotent:
-#      already-applied revisions are skipped. Safe to re-run.
-#   2. `scripts/backfill_analytics_rollup.py` — walks every historical
-#      date that still has chat data, computes the daily metrics, and
-#      writes them into `analytics_daily_rollup` (also seeds the
-#      `analytics_rollup_state` checkpoint in `key_value_store`).
-#      Idempotent via INSERT…ON CONFLICT(date) DO UPDATE.
-#
-# Why a Job and not a Deployment / Pod:
-#   - Deployment auto-restarts on container exit — wrong for one-time
-#     work; the migration would loop.
-#   - Bare Pod doesn't track success / failure cleanly.
-#   - Job has run-to-completion semantics + retry-on-failure +
-#     TTL-after-finish for auto-cleanup. Standard K8s pattern.
-#
-# When to apply:
-#   - ONCE, after the new backend image (with this PR's code) is rolled
-#     out to the api-server and background-deployment, and BEFORE the
-#     next 08:00 UTC retention sweep on a fresh DB. If retention runs
-#     first, it deletes chat data older than 30 days and the backfill
-#     will then write zero counts for those days.
-#   - Re-applying is safe (both steps are idempotent), but normally
-#     unnecessary — the daily Celery beat task takes over.
-#
-# How to apply:
-#   1. Update IMAGE_TAG to the tag containing the merged PR code
-#      (currently the api-server runs vha-119; replace as needed).
-#   2. kubectl apply -f deployment/kubernetes/analytics-bootstrap-job.yaml
-#   3. Watch logs:
-#        kubectl logs -n darwin -f job/darwin-analytics-bootstrap
-#   4. Verify completion:
-#        kubectl get -n darwin job/darwin-analytics-bootstrap
-#        # COMPLETIONS should read 1/1
-#   5. Verify the rollup table:
-#        kubectl exec -n darwin <postgres-pod> -- psql ... \
-#          -c "SELECT count(*), max(rolled_up_at) FROM analytics_daily_rollup;"
-#
-# How to clean up: nothing required — `ttlSecondsAfterFinished: 3600`
-# auto-deletes the Job and its Pod 1 hour after success. Manual delete:
-#   kubectl delete -n darwin job/darwin-analytics-bootstrap
-#
-# Behaviour on failure: `backoffLimit: 3` retries up to 3 times
-# (with exponential backoff). After that the Job is marked Failed and
-# you can inspect logs of the failed Pod via `kubectl logs ...`.
-apiVersion: batch/v1
-kind: Job
-metadata:
-  name: darwin-analytics-bootstrap
-  namespace: darwin
-  labels:
-    app: darwin-analytics-bootstrap
-    purpose: one-time-migration
-spec:
-  # Auto-cleanup the Job + its completed Pod 1 hour after success.
-  # Tune higher if you want more time to inspect logs.
-  ttlSecondsAfterFinished: 3600
-  # Retry the whole pipeline up to 3 times on failure (each step is
-  # idempotent so retries are safe). 4xx/5xx pods are inspectable until
-  # ttlSecondsAfterFinished kicks in.
-  backoffLimit: 3
-  # Hard kill if the Job runs longer than 30 minutes — backfill on a
-  # large chat history can take a few minutes; 30m is a generous ceiling.
-  activeDeadlineSeconds: 1800
-  template:
-    metadata:
-      labels:
-        app: darwin-analytics-bootstrap
-    spec:
-      # OnFailure → if the container exits non-zero, kubelet restarts
-      # it within the same Pod (faster than scheduling a new Pod).
-      # Combined with backoffLimit above for the cross-Pod retry.
-      restartPolicy: OnFailure
-      containers:
-        - name: bootstrap
-          # IMPORTANT: bump this to the image tag that includes this PR's
-          # backend code (the analytics_rollup module + new migration).
-          # The api-server deployment is currently on vha-119; you'll
-          # likely roll a vha-120 (or similar) once the PR merges.
-          image: sfbrdevhelmweacr.azurecr.io/danswer/danswer-backend:vha-121
-          imagePullPolicy: IfNotPresent
-          command:
-            - /bin/sh
-            - -c
-            - |
-              # `pipefail` isn't supported by the image's /bin/sh
-              # (BusyBox ash / dash). The script has no pipes anyway,
-              # so plain `-eu` is sufficient: any non-zero exit aborts.
-              set -eu
-              echo "=== Step 1/2: alembic upgrade heads ==="
-              alembic upgrade heads
-              echo
-              echo "=== Step 2/2: backfill analytics_daily_rollup ==="
-              # PYTHONPATH=. is needed because the script imports
-              # `danswer.*` and the image's WORKDIR is the backend/ dir.
-              PYTHONPATH=. python scripts/backfill_analytics_rollup.py
-              echo
-              echo "=== Bootstrap complete ==="
-          # Same Postgres creds as the api-server / background pods.
-          env:
-            - name: POSTGRES_USER
-              valueFrom:
-                secretKeyRef:
-                  key: postgres_user
-                  name: danswer-secrets
-            - name: POSTGRES_PASSWORD
-              valueFrom:
-                secretKeyRef:
-                  key: postgres_password
-                  name: danswer-secrets
-          # Same shared config as the api-server. The backfill reads
-          # POSTGRES_HOST, encryption keys, etc. from here.
-          envFrom:
-            - configMapRef:
-                name: env-configmap
-          # PVCs match the api-server. Strictly speaking the backfill
-          # doesn't write to either, but mirroring the api-server config
-          # avoids surprises if anything in the import chain reads from
-          # /home/storage or /home/file_connector_storage.
-          volumeMounts:
-            - mountPath: /home/storage
-              name: dynamic-storage
-            - mountPath: /home/file_connector_storage
-              name: file-connector-storage
-          # Modest resource ask — backfill is mostly DB I/O.
-          resources:
-            requests:
-              cpu: "100m"
-              memory: "256Mi"
-            limits:
-              cpu: "1"
-              memory: "1Gi"
-      volumes:
-        - name: dynamic-storage
-          persistentVolumeClaim:
-            claimName: dynamic-pvc
-        - name: file-connector-storage
-          persistentVolumeClaim:
-            claimName: file-connector-pvc
diff --git a/deployment/kubernetes/api_server-service-deployment.yaml b/deployment/kubernetes/api_server-service-deployment.yaml
deleted file mode 100644
index eeac5fecc96..00000000000
--- a/deployment/kubernetes/api_server-service-deployment.yaml
+++ /dev/null
@@ -1,57 +0,0 @@
-apiVersion: v1
-kind: Service
-metadata:
-  name: api-server-service
-spec:
-  selector:
-    app: api-server
-  ports:
-    - name: api-server-port
-      protocol: TCP
-      port: 80
-      targetPort: 8080
-  type: ClusterIP
----
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: api-server-deployment
-spec:
-  replicas: 1
-  selector:
-    matchLabels:
-      app: api-server
-  template:
-    metadata:
-      labels:
-        app: api-server
-    spec:
-      containers:
-      - name: api-server
-        image: danswer/danswer-backend:latest
-        imagePullPolicy: IfNotPresent
-        command:
-          - "/bin/sh"
-          - "-c"
-          - |
-            alembic upgrade head &&
-            echo "Starting Danswer Api Server" &&
-            uvicorn danswer.main:app --host 0.0.0.0 --port 8080
-        ports:
-        - containerPort: 8080
-        # There are some extra values since this is shared between services
-        # There are no conflicts though, extra env variables are simply ignored
-        env:
-        - name: OAUTH_CLIENT_ID
-          valueFrom:
-            secretKeyRef:
-              name: danswer-secrets
-              key: google_oauth_client_id
-        - name: OAUTH_CLIENT_SECRET
-          valueFrom:
-            secretKeyRef:
-              name: danswer-secrets
-              key: google_oauth_client_secret
-        envFrom:
-        - configMapRef:
-            name: env-configmap
diff --git a/deployment/kubernetes/background-deployment.yaml b/deployment/kubernetes/background-deployment.yaml
deleted file mode 100644
index 18521b0f5ad..00000000000
--- a/deployment/kubernetes/background-deployment.yaml
+++ /dev/null
@@ -1,24 +0,0 @@
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: background-deployment
-spec:
-  replicas: 1
-  selector:
-    matchLabels:
-      app: background
-  template:
-    metadata:
-      labels:
-        app: background
-    spec:
-      containers:
-      - name: background
-        image: danswer/danswer-backend:latest
-        imagePullPolicy: IfNotPresent
-        command: ["/usr/bin/supervisord", "-c", "/etc/supervisor/conf.d/supervisord.conf"]
-        # There are some extra values since this is shared between services
-        # There are no conflicts though, extra env variables are simply ignored
-        envFrom:
-        - configMapRef:
-            name: env-configmap
diff --git a/deployment/kubernetes/env-configmap.yaml b/deployment/kubernetes/env-configmap.yaml
deleted file mode 100644
index ebfcc9deb81..00000000000
--- a/deployment/kubernetes/env-configmap.yaml
+++ /dev/null
@@ -1,107 +0,0 @@
-apiVersion: v1
-kind: ConfigMap
-metadata:
-  name: env-configmap
-data:
-  # Auth Setting, also check the secrets file
-  AUTH_TYPE: "disabled"  # Change this for production uses unless Danswer is only accessible behind VPN
-  ENCRYPTION_KEY_SECRET: ""  # This should not be specified directly in the yaml, this is just for reference
-  SESSION_EXPIRE_TIME_SECONDS: "86400"  # 1 Day Default
-  VALID_EMAIL_DOMAINS: ""  # Can be something like danswer.ai, as an extra double-check
-  SMTP_SERVER: "" # For sending verification emails, if unspecified then defaults to 'smtp.gmail.com'
-  SMTP_PORT: "" # For sending verification emails, if unspecified then defaults to '587'
-  SMTP_USER: ""  # 'your-email@company.com'
-  SMTP_PASS: ""  # 'your-gmail-password'
-  EMAIL_FROM: ""  # 'your-email@company.com' SMTP_USER missing used instead
-  # Gen AI Settings
-  GEN_AI_MODEL_PROVIDER: ""
-  GEN_AI_MODEL_VERSION: ""
-  FAST_GEN_AI_MODEL_VERSION: ""
-  GEN_AI_API_KEY: ""
-  GEN_AI_API_ENDPOINT: ""
-  GEN_AI_API_VERSION: ""
-  GEN_AI_LLM_PROVIDER_TYPE: ""
-  GEN_AI_MAX_TOKENS: ""
-  QA_TIMEOUT: "60"
-  MAX_CHUNKS_FED_TO_CHAT: ""
-  DISABLE_LLM_FILTER_EXTRACTION: ""
-  DISABLE_LLM_CHUNK_FILTER: ""
-  DISABLE_LLM_CHOOSE_SEARCH: ""
-  DISABLE_LLM_QUERY_REPHRASE: ""
-  # Query Options
-  DOC_TIME_DECAY: ""
-  HYBRID_ALPHA: ""
-  EDIT_KEYWORD_QUERY: ""
-  MULTILINGUAL_QUERY_EXPANSION: ""
-  LANGUAGE_HINT: ""
-  LANGUAGE_CHAT_NAMING_HINT: ""
-  QA_PROMPT_OVERRIDE: ""
-  # Other Services
-  POSTGRES_HOST: "relational-db-service"
-  VESPA_HOST: "document-index-service"
-  # Internet Search Tool
-  BING_API_KEY: ""
-  # Don't change the NLP models unless you know what you're doing
-  DOCUMENT_ENCODER_MODEL: ""
-  NORMALIZE_EMBEDDINGS: ""
-  ASYM_QUERY_PREFIX: ""
-  ASYM_PASSAGE_PREFIX: ""
-  ENABLE_RERANKING_REAL_TIME_FLOW: ""
-  ENABLE_RERANKING_ASYNC_FLOW: ""
-  MODEL_SERVER_HOST: "inference-model-server-service"
-  MODEL_SERVER_PORT: ""
-  INDEXING_MODEL_SERVER_HOST: "indexing-model-server-service"
-  MIN_THREADS_ML_MODELS: ""
-  # Indexing Configs
-  NUM_INDEXING_WORKERS: ""
-  # Per-DocumentSource concurrency cap when NUM_INDEXING_WORKERS > 1.
-  # Default 1 = at most one indexing attempt per source type at a time
-  # (prevents a single PAT/credential from getting rate-limited).
-  # 0 = uncapped. Enforced scheduler-side in update.py.
-  INDEXING_PER_SOURCE_CAP: ""
-  ENABLED_CONNECTOR_TYPES: ""
-  DISABLE_INDEX_UPDATE_ON_SWAP: ""
-  DASK_JOB_CLIENT_ENABLED: ""
-  CONTINUE_ON_CONNECTOR_FAILURE: ""
-  EXPERIMENTAL_CHECKPOINTING_ENABLED: ""
-  CONFLUENCE_CONNECTOR_LABELS_TO_SKIP: ""
-  JIRA_API_VERSION: ""
-  WEB_CONNECTOR_VALIDATE_URLS: ""
-  GONG_CONNECTOR_START_TIME: ""
-  NOTION_CONNECTOR_ENABLE_RECURSIVE_PAGE_LOOKUP: ""
-  # DB Retention (daily Celery beat at 08:00 UTC; backend/danswer/db/retention.py).
-  # All defaults are sensible — override only when you need a tighter window.
-  RETENTION_DAYS_KOMBU: ""             # default 7  (Celery broker queue)
-  RETENTION_DAYS_TASK_QUEUE: ""        # default 30 (terminal task_queue_jobs only)
-  RETENTION_DAYS_INDEX_ATTEMPT: ""     # default 0 = disabled (opt-in to keep history)
-  RETENTION_KEEP_LAST_N_INDEX_ATTEMPTS: ""  # default 20 per (cc-pair, embedding model)
-  RETENTION_DAYS_CHAT: ""              # default 30 (chat_session + chat_message + LO blobs)
-  RETENTION_DAYS_PERMISSION_SYNC: ""   # default 30 (terminal permission_sync_run only)
-  RETENTION_DAYS_USAGE_REPORTS: ""     # default 90 (usage_reports + file_store + LO blobs)
-  RETENTION_BATCH_SIZE: ""             # default 5000 rows per DELETE
-  RETENTION_MAX_BATCHES: ""            # default 200 batches per policy per run
-  # Analytics rollup (daily Celery beat at 07:30 UTC, 30 min before retention;
-  # backend/danswer/db/analytics_rollup.py). The lookback is the late-feedback
-  # grace period — MUST be < RETENTION_DAYS_CHAT to avoid recomputing days
-  # whose source rows have already been deleted.
-  ANALYTICS_LATE_FEEDBACK_BUFFER_DAYS: ""  # default 2
-  # DanswerBot SlackBot Configs
-  DANSWER_BOT_SLACK_APP_TOKEN: ""
-  DANSWER_BOT_SLACK_BOT_TOKEN: ""
-  DANSWER_BOT_DISABLE_DOCS_ONLY_ANSWER: ""
-  DANSWER_BOT_DISPLAY_ERROR_MSGS: ""
-  DANSWER_BOT_RESPOND_EVERY_CHANNEL: ""
-  DANSWER_BOT_DISABLE_COT: ""  # Currently unused
-  OPSGENIE_API_KEY: ""
-  NOTIFY_SLACKBOT_NO_ANSWER: ""
-  # Logging
-  # Optional Telemetry, please keep it on (nothing sensitive is collected)? <3
-  # https://docs.danswer.dev/more/telemetry
-  DISABLE_TELEMETRY: ""
-  LOG_LEVEL: ""
-  LOG_ALL_MODEL_INTERACTIONS: ""
-  LOG_VESPA_TIMING_INFORMATION: ""
-  # Shared or Non-backend Related
-  INTERNAL_URL: "http://api-server-service:80"  # for web server
-  WEB_DOMAIN: "http://localhost:3000"  # for web server and api server
-  DOMAIN: "localhost"  # for nginx
diff --git a/deployment/kubernetes/indexing_model_server-service-deployment.yaml b/deployment/kubernetes/indexing_model_server-service-deployment.yaml
deleted file mode 100644
index d44b52e9289..00000000000
--- a/deployment/kubernetes/indexing_model_server-service-deployment.yaml
+++ /dev/null
@@ -1,59 +0,0 @@
-apiVersion: v1
-kind: Service
-metadata:
-  name: indexing-model-server-service
-spec:
-  selector:
-    app: indexing-model-server
-  ports:
-    - name: indexing-model-server-port
-      protocol: TCP
-      port: 9000
-      targetPort: 9000
-  type: ClusterIP
----
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: indexing-model-server-deployment
-spec:
-  replicas: 1
-  selector:
-    matchLabels:
-      app: indexing-model-server
-  template:
-    metadata:
-      labels:
-        app: indexing-model-server
-    spec:
-      containers:
-      - name: indexing-model-server
-        image: danswer/danswer-model-server:latest
-        imagePullPolicy: IfNotPresent
-        command: [ "uvicorn", "model_server.main:app", "--host", "0.0.0.0", "--port", "9000" ]
-        ports:
-        - containerPort: 9000
-        envFrom:
-        - configMapRef:
-            name: env-configmap
-        env:
-          - name: INDEXING_ONLY
-            value: "True"
-        volumeMounts:
-        - name: indexing-model-storage
-          mountPath: /root/.cache
-      volumes:
-      - name: indexing-model-storage
-        persistentVolumeClaim:
-          claimName: indexing-model-pvc
----
-apiVersion: v1
-kind: PersistentVolumeClaim
-metadata:
-  name: indexing-model-pvc
-spec:
-  accessModes:
-    - ReadWriteOnce
-  resources:
-    requests:
-      storage: 3Gi
diff --git a/deployment/kubernetes/nginx-configmap.yaml b/deployment/kubernetes/nginx-configmap.yaml
deleted file mode 100644
index 08b945d599c..00000000000
--- a/deployment/kubernetes/nginx-configmap.yaml
+++ /dev/null
@@ -1,44 +0,0 @@
-apiVersion: v1
-kind: ConfigMap
-metadata:
-  name: nginx-configmap
-data:
-  nginx.conf: |
-    upstream api_server {
-        server api-server-service:80 fail_timeout=0;
-    }
-
-    upstream web_server {
-        server web-server-service:80 fail_timeout=0;
-    }
-
-    server {
-        listen 80;
-        server_name $$DOMAIN;
-
-        client_max_body_size 5G;    # Maximum upload size
-
-        location ~ ^/api(.*)$ {
-            rewrite ^/api(/.*)$ $1 break;
-            proxy_set_header X-Real-IP $remote_addr;
-            proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
-            proxy_set_header X-Forwarded-Proto $scheme;
-            proxy_set_header X-Forwarded-Host $host;
-            proxy_set_header Host $host;
-            proxy_http_version 1.1;
-            proxy_buffering off;
-            proxy_redirect off;
-            proxy_pass http://api_server;
-        }
-
-        location / {
-            proxy_set_header X-Real-IP $remote_addr;
-            proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
-            proxy_set_header X-Forwarded-Proto $scheme;
-            proxy_set_header X-Forwarded-Host $host;
-            proxy_set_header Host $host;
-            proxy_http_version 1.1;
-            proxy_redirect off;
-            proxy_pass http://web_server;
-        }
-    }
diff --git a/deployment/kubernetes/nginx-service-deployment.yaml b/deployment/kubernetes/nginx-service-deployment.yaml
deleted file mode 100644
index 27b14794ee3..00000000000
--- a/deployment/kubernetes/nginx-service-deployment.yaml
+++ /dev/null
@@ -1,55 +0,0 @@
-apiVersion: v1
-kind: Service
-metadata:
-  name: nginx-service
-spec:
-  selector:
-    app: nginx
-  ports:
-    - name: http
-      protocol: TCP
-      port: 80
-      targetPort: 80
-    - name: danswer
-      protocol: TCP
-      port: 3000
-      targetPort: 80
-  type: LoadBalancer
----
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: nginx-deployment
-spec:
-  replicas: 1
-  selector:
-    matchLabels:
-      app: nginx
-  template:
-    metadata:
-      labels:
-        app: nginx
-    spec:
-      containers:
-      - name: nginx
-        image: nginx:1.23.4-alpine
-        ports:
-        - containerPort: 80
-        env:
-        - name: DOMAIN
-          valueFrom:
-            configMapKeyRef:
-              name: env-configmap
-              key: DOMAIN
-        volumeMounts:
-        - name: nginx-conf
-          mountPath: /etc/nginx/conf.d
-        command:
-        - /bin/sh
-        - -c
-        - |
-          while :; do sleep 6h & wait $$!; nginx -s reload; done & nginx -g "daemon off;"
-      volumes:
-      - name: nginx-conf
-        configMap:
-          name: nginx-configmap
diff --git a/deployment/kubernetes/postgres-service-deployment.yaml b/deployment/kubernetes/postgres-service-deployment.yaml
deleted file mode 100644
index 17330204c1e..00000000000
--- a/deployment/kubernetes/postgres-service-deployment.yaml
+++ /dev/null
@@ -1,57 +0,0 @@
-apiVersion: v1
-kind: Service
-metadata:
-  name: relational-db-service
-spec:
-  selector:
-    app: relational-db
-  ports:
-    - protocol: TCP
-      port: 5432
-      targetPort: 5432
-  clusterIP: None
----
-apiVersion: apps/v1
-kind: StatefulSet
-metadata:
-  name: relational-db-statefulset
-spec:
-  serviceName: relational-db-service
-  replicas: 1
-  selector:
-    matchLabels:
-      app: relational-db
-  template:
-    metadata:
-      labels:
-        app: relational-db
-    spec:
-      containers:
-      - name: relational-db
-        image: postgres:15.2-alpine
-        env:
-        - name: POSTGRES_USER
-          valueFrom:
-            secretKeyRef:
-              name: danswer-secrets
-              key: postgres_user
-        - name: POSTGRES_PASSWORD
-          valueFrom:
-            secretKeyRef:
-              name: danswer-secrets
-              key: postgres_password
-        ports:
-        - containerPort: 5432
-        volumeMounts:
-        - mountPath: /var/lib/postgresql/data
-          name: db-storage
-          subPath: postgres
-  volumeClaimTemplates:
-  - metadata:
-      name: db-storage
-    spec:
-      accessModes: ["ReadWriteOnce"]
-      resources:
-        requests:
-          # Adjust the storage request size as needed.
-          storage: 5Gi
diff --git a/deployment/kubernetes/secrets.yaml b/deployment/kubernetes/secrets.yaml
deleted file mode 100644
index c135a29f676..00000000000
--- a/deployment/kubernetes/secrets.yaml
+++ /dev/null
@@ -1,11 +0,0 @@
-# The values in this file should be changed
-apiVersion: v1
-kind: Secret
-metadata:
-  name: danswer-secrets
-type: Opaque
-data:
-  postgres_user: cG9zdGdyZXM= # "postgres" base64 encoded
-  postgres_password: cGFzc3dvcmQ= # "password" base64 encoded
-  google_oauth_client_id: ZXhhbXBsZS1jbGllbnQtaWQ= # "example-client-id" base64 encoded. You will need to provide this, use echo -n "your-client-id" | base64
-  google_oauth_client_secret: example_google_oauth_secret # "example-client-secret" base64 encoded. You will need to provide this, use echo -n "your-client-id" | base64
diff --git a/deployment/kubernetes/vespa-service-deployment.yaml b/deployment/kubernetes/vespa-service-deployment.yaml
deleted file mode 100644
index 5016258b757..00000000000
--- a/deployment/kubernetes/vespa-service-deployment.yaml
+++ /dev/null
@@ -1,63 +0,0 @@
-apiVersion: v1
-kind: Service
-metadata:
-  name: document-index-service
-spec:
-  selector:
-    app: vespa
-  ports:
-    - name: vespa-tenant-port
-      protocol: TCP
-      port: 19071
-      targetPort: 19071
-    - name: vespa-port
-      protocol: TCP
-      port: 8081
-      targetPort: 8081
-  type: LoadBalancer
----
-apiVersion: apps/v1
-kind: StatefulSet
-metadata:
-  name: vespa
-  labels:
-    app: vespa
-spec:
-  replicas: 1
-  serviceName: vespa
-  selector:
-    matchLabels:
-      app: vespa
-  template:
-    metadata:
-      labels:
-        app: vespa
-    spec:
-      containers:
-      - name: vespa
-        image: vespaengine/vespa:8.277.17
-        imagePullPolicy: IfNotPresent
-        securityContext:
-          privileged: true
-          runAsUser: 0
-        ports:
-        - containerPort: 19071
-        - containerPort: 8081
-        readinessProbe:
-          httpGet:
-            path: /state/v1/health
-            port: 19071
-            scheme: HTTP
-        volumeMounts:
-        - name: vespa-storage
-          mountPath: /opt/vespa/var/
-  volumeClaimTemplates:
-  - metadata:
-      name: vespa-storage
-    spec:
-      accessModes:
-      - ReadWriteOnce
-      resources:
-        requests:
-          # Adjust the storage request size as needed.
-          storage: 5Gi
diff --git a/deployment/kubernetes/web_server-service-deployment.yaml b/deployment/kubernetes/web_server-service-deployment.yaml
deleted file mode 100644
index b19b8e37986..00000000000
--- a/deployment/kubernetes/web_server-service-deployment.yaml
+++ /dev/null
@@ -1,38 +0,0 @@
-apiVersion: v1
-kind: Service
-metadata:
-  name: web-server-service
-spec:
-  selector:
-    app: web-server
-  ports:
-    - protocol: TCP
-      port: 80
-      targetPort: 3000
-  type: ClusterIP
----
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: web-server-deployment
-spec:
-  replicas: 1
-  selector:
-    matchLabels:
-      app: web-server
-  template:
-    metadata:
-      labels:
-        app: web-server
-    spec:
-      containers:
-      - name: web-server
-        image: danswer/danswer-web-server:latest
-        imagePullPolicy: IfNotPresent
-        ports:
-        - containerPort: 3000
-        # There are some extra values since this is shared between services
-        # There are no conflicts though, extra env variables are simply ignored
-        envFrom:
-        - configMapRef:
-            name: env-configmap
diff --git a/k8s/README.md b/k8s/README.md
new file mode 100644
index 00000000000..3af163867d4
--- /dev/null
+++ b/k8s/README.md
@@ -0,0 +1,706 @@
+# `k8s/` — Kustomize-based Kubernetes manifests
+
+```
+k8s/
+├── base/              Environment-neutral manifests
+├── overlays/
+│   ├── prod/          Darwin AKS production
+│   └── local/         Local dev (Rancher Desktop, Docker Desktop, kind)
+└── optional/          Opt-in kustomize components (split-background + Dask)
+```
+
+## Quick start
+
+```bash
+# Preview what would be applied:
+kubectl kustomize k8s/overlays/prod
+
+# Apply via the guarded wrapper (verify context first!). It diffs vs live
+# and REFUSES a Vespa version jump >30 minor releases (which would crash
+# the cluster — see "Vespa version guard" below and AGENTS.md §10):
+kubectl config current-context        # → 'darwin' for prod
+k8s/scripts/guarded-apply.sh prod
+
+# Local:
+kubectl config use-context rancher-desktop   # or docker-desktop / kind / etc.
+k8s/scripts/guarded-apply.sh local
+```
+
+> Raw `kubectl apply -k k8s/overlays/prod` still works, but prefer
+> `guarded-apply.sh` — it runs the Vespa version check + `kubectl diff`
+> before applying. Raw apply has no guard; that's how the Vespa outage
+> happened.
+>
+> **Vespa is no longer part of the app overlays.** `apply -k overlays/prod`
+> deploys the app only and never touches Vespa. Vespa has its own overlays
+> (`overlays/prod-vespa`, `overlays/local-vespa`) and is applied deliberately:
+> `k8s/scripts/guarded-apply.sh prod-vespa` (or `kubectl apply -k
+> k8s/overlays/prod-vespa`). Version upgrades still go through
+> `k8s/scripts/vespa-upgrade.sh`. See "Upgrade Vespa" below.
+
+## What lives where
+
+### `base/`
+
+One file per logical service. Each file contains the resources that *belong
+together* (e.g. `nginx.yaml` has the Deployment + Service + ConfigMap for
+nginx). Image references use *logical names* (`danswer-backend`, not
+`sfbrdevhelmweacr.azurecr.io/danswer/danswer-backend:vha-138`) — the overlay's
+`images:` block rewrites them at render time.
+
+`base/kustomization.yaml` lists every resource. To add a new service, drop
+its YAML in here and add it to that list.
+
+The **Vespa** cluster lives in its own subfolder, `base/vespa/`, with its own
+`kustomization.yaml`. It is **deliberately NOT referenced from
+`base/kustomization.yaml`** — so the app overlays (`prod`/`local`) don't deploy
+it. It's a stateful subsystem with a distinct lifecycle (pinned version,
+per-role image names, ordered upgrade procedure — see "Upgrade Vespa" below),
+and re-applying it on every routine app rollout risks rolling the StatefulSets
+on any manifest drift. Instead it has dedicated overlays — `overlays/prod-vespa`
+and `overlays/local-vespa` — that point at `base/vespa/` and carry the pinned
+Vespa images + namespace. Apply Vespa via those, never via the app overlay.
+
+**Base does NOT contain:**
+- `env-configmap` — generated by each overlay from `env.properties`
+- `danswer-secrets` — generated by each overlay from `secrets.env` (gitignored)
+
+### `overlays/prod/`
+
+Single source of truth for the Darwin production cluster:
+
+| File | What it controls |
+|---|---|
+| `kustomization.yaml` | image tags, replica counts, generator wiring, namespace (`darwin`) |
+| `env.properties` | every non-secret env var (committed) |
+| `secrets.env.example` | template for `secrets.env` (committed, placeholders) |
+| `secrets.env` | real prod secrets (**gitignored — never commit**) |
+
+To change a deployed image tag: edit `kustomization.yaml`'s `images:` block.
+To change a config value: edit `env.properties`. To rotate a secret: edit
+`secrets.env` and reapply. **One file per concern, in one place.**
+
+### `overlays/local/`
+
+Same shape as `prod/` but with local-dev defaults: AUTH_TYPE=disabled,
+DOMAIN=localhost, smaller replica counts, latest image tags, hosts pointed at
+`host.docker.internal` so external Postgres / Vespa running on the host
+machine (e.g. via docker-compose) are reachable.
+
+### `optional/`
+
+Opt-in kustomize **components**. Each is a directory with its own
+`kustomization.yaml` of `kind: Component`. Image refs inside use the
+same logical names as base (`danswer-backend`), so when an overlay
+opts in, the overlay's `images:` block parameterizes them — identical
+to base. (They are NOT meant for standalone `kubectl apply -f`; the
+logical image name only resolves through an overlay.)
+
+Two kinds of thing live under `optional/`:
+
+- **Components** (`kind: Component`) — opt-in *into a danswer overlay* via
+  its `components:` block. Image refs use the same logical names as base,
+  so the overlay's `images:` block parameterizes them. NOT for standalone
+  `kubectl apply -f`.
+- **Standalone installs** (`kind: Kustomization`) — cluster-scoped
+  infrastructure applied on their own with `kubectl apply -k`, not pulled
+  into an overlay.
+
+| Path | Kind | What it ships | When to use |
+|---|---|---|---|
+| `background-scaling/` | Component | `background-lite` (beat + celery + slack-listener co-located, 1 pod) + `background-indexer-scheduler` + remote Dask (`dask-scheduler`, `dask-worker`), replacing the combined `background` deployment in base | Horizontal scaling of background/indexing tasks |
+| `keda-indexing-autoscale/` | Component | A KEDA `ScaledObject` + `TriggerAuthentication` that autoscales `dask-worker` on indexing backlog | Bursty indexing; scale workers to 0 when idle (needs KEDA + background-scaling) |
+| `keda/` | Standalone | The KEDA operator itself (CRDs + operator), pinned, into the `keda` namespace | Prereq for `keda-indexing-autoscale` — install once per cluster |
+
+**The "flag" for opting a *component* in** is a single line in the
+overlay's `kustomization.yaml` `components:` block (see "Apply an optional
+component" below). To add another: create a new directory under
+`optional/` with a `kind: Component` `kustomization.yaml`, use logical
+image names in its manifests, and reference it from the overlay's
+`components:`.
+
+## First-time setup
+
+```bash
+# Bootstrap your prod secrets file from the template:
+cp k8s/overlays/prod/secrets.env.example k8s/overlays/prod/secrets.env
+$EDITOR k8s/overlays/prod/secrets.env       # fill in real values
+
+# (Same for local if you'll use the local overlay.)
+cp k8s/overlays/local/secrets.env.example k8s/overlays/local/secrets.env
+$EDITOR k8s/overlays/local/secrets.env
+
+# Verify the render before applying:
+kubectl kustomize k8s/overlays/prod | less
+
+# Apply the app (does NOT include Vespa):
+k8s/scripts/guarded-apply.sh prod
+
+# On a FRESH cluster, also apply Vespa once (separate overlay — guarded):
+k8s/scripts/guarded-apply.sh prod-vespa
+```
+
+## Common workflows
+
+### Bump a backend image tag in prod
+
+1. Edit `k8s/overlays/prod/kustomization.yaml`, change `images: - name: danswer-backend` → `newTag: <new-tag>`.
+2. Commit + push.
+3. On your machine with the `darwin` kube context active:
+   ```bash
+   kubectl apply -k k8s/overlays/prod
+   ```
+
+### Upgrade Vespa (DANGER — use the script, never a bare apply)
+
+Vespa is version-stateful: the config server refuses an auto-upgrade
+spanning **>30 minor releases**, and forcing it past that risks the
+on-disk index format. A careless bump = cluster-wide outage (this
+happened — AGENTS.md §10). The five Vespa StatefulSets also must roll in
+a specific order, one at a time, health-checked between steps — ordering
+that **cannot** be expressed in kustomize (it's declarative; a `kubectl
+apply` of a bumped tag rolls every role at once, unordered).
+
+So Vespa upgrades go through a dedicated script, **not** `guarded-apply.sh`
+and **not** a hand-edit of the overlay tags. The manifests support this in
+two ways: each StatefulSet uses a **per-role logical image name**
+(`vespa-configserver`, `vespa-admin`, `vespa-content`, `vespa-feed`,
+`vespa-query` — see `base/vespa/`), so versions can move independently;
+and `content`/`admin` have readiness probes (with
+`publishNotReadyAddresses: true` on `vespa-internal` so peer discovery is
+never gated) so the rollout waits for each pod to be healthy.
+
+```bash
+# 1. Dry-run first — reads the live version, prints the ordered plan, changes nothing:
+DRY_RUN=1 k8s/scripts/vespa-upgrade.sh 8.620.43 darwin
+
+# 2. Run it. Upgrades in order: configserver → admin → content (one
+#    ordinal at a time, via partition stepping) → feed → query, waiting
+#    for /state/v1/health 200 between each. Refuses >30-minor / major /
+#    downgrade jumps (FORCE=1 to override). Single hop per run — for a
+#    bigger jump, run repeatedly with intermediate versions
+#    (8.600 → 8.630 → 8.660 → …), each image must exist + be format-compatible.
+k8s/scripts/vespa-upgrade.sh 8.620.43 darwin
+
+# 3. After success, sync the per-role vespa newTag values in
+#    k8s/overlays/{prod,local}-vespa/kustomization.yaml to the new version,
+#    so git ≈ live (the script reminds you).
+```
+
+The script uses `kubectl exec → localhost` for health checks (the cluster
+runs Istio; external probes hit mTLS), and stops immediately if any pod
+fails to come back healthy, leaving the cluster mid-upgrade for you to
+investigate rather than barrelling into the next role.
+
+### Vespa version guard (`k8s/scripts/guarded-apply.sh`)
+
+The wrapper reads the **live** running Vespa version and the version your
+overlay would deploy, and:
+- **refuses** an upgrade that jumps >30 minor releases (Vespa's limit),
+- **refuses** a major-version change (needs a dedicated migration),
+- **refuses** a floating/unparseable tag (`:latest`),
+- **warns + requires `FORCE=1`** on a large downgrade (legit only when
+  recovering to the on-disk version),
+- otherwise runs `kubectl diff` then `kubectl apply -k`.
+
+It checks against **live**, not the repo's previous pin, on purpose —
+config can drift out of git, and live is the only truth that matters at
+apply time.
+
+`guarded-apply.sh` is a **safety net for everyday applies** (config/image
+changes), not the Vespa upgrade tool — it would refuse a dangerous Vespa
+jump, but it still rolls all roles at once with no ordering. To actually
+upgrade Vespa, use `vespa-upgrade.sh` (see "Upgrade Vespa" above).
+
+### Add a new env var (non-secret)
+
+1. Append a `KEY=value` line in `k8s/overlays/prod/env.properties`.
+2. (Same for `overlays/local/env.properties` with the local value.)
+3. `kubectl apply -k k8s/overlays/prod` — kustomize regenerates the
+   `env-configmap` with the new value.
+4. **Restart the consumers — the apply does NOT do this for you.** We set
+   `disableNameSuffixHash: true`, so the ConfigMap keeps a stable name
+   and its content change does NOT trigger a pod rollout. `envFrom` reads
+   env only at pod start, so running pods keep the old values until
+   restarted. See "Which workloads to restart after a config change" below.
+
+### Add a new secret
+
+1. Add the key to `k8s/overlays/prod/secrets.env.example` with a placeholder.
+2. Add the same key to your local `k8s/overlays/prod/secrets.env` with the real value (gitignored).
+3. If the secret needs to be exposed to the app, either:
+   - Add an `env` entry with `secretKeyRef` to the relevant deployment in `base/`, or
+   - Rely on the `envFrom: secretRef: danswer-secrets` wiring already in `api-server.yaml` and `background.yaml` — every key in `secrets.env` is automatically exposed as an env var matching the key name.
+
+### Enable the Redis cache + per-user rate limiter
+
+Redis ships in `base/` (deployed in every environment). To turn the
+features on, flip the flags in `k8s/overlays/prod/env.properties`:
+
+```
+REDIS_KV_CACHE_ENABLED=true        # read-through cache on settings/tokens/invited-users
+REQUEST_RATE_LIMIT_ENABLED=true    # per-USER request cap (20/min, 300/hr below)
+REQUEST_RATE_LIMIT_PER_MINUTE=20   # per authenticated user (per-IP for anon), not global
+REQUEST_RATE_LIMIT_PER_HOUR=300
+PERSONA_CACHE_ENABLED=true         # global persona-list cache + per-user group cache
+CC_PAIR_INFO_CACHE_ENABLED=true    # chat-page connector indexing-status (~300ms read), 60s global TTL
+DOCUMENT_SET_CACHE_ENABLED=true    # per-user document-set list (chat bundle), write-through busted, 300s backstop
+```
+
+Then apply **and restart the consumers** (the apply alone won't — see the
+note below on `disableNameSuffixHash`):
+
+```bash
+k8s/scripts/guarded-apply.sh prod          # or: kubectl apply -k k8s/overlays/prod
+kubectl rollout restart deploy/api-server-deployment deploy/background-deployment -n darwin
+kubectl rollout status  deploy/api-server-deployment -n darwin
+```
+
+Redis (`base/redis.yaml`) is already running, so the pods connect on
+restart. If Redis were down, the code is fail-open — it degrades to
+direct Postgres / permissive, never an outage.
+
+### Verify Redis caching is actually working
+
+After enabling + restarting, confirm the cache is being populated and hit.
+All commands run against the in-cluster Redis (`redis-0`, no auth). The
+cache uses these key namespaces:
+
+| Key | Feature |
+|---|---|
+| `danswer:kv:<name>` | KV cache (settings, tokens, invited users) — `REDIS_KV_CACHE_ENABLED` |
+| `danswer:personas:all:not_deleted` | Assistants list (global) — `PERSONA_CACHE_ENABLED` |
+| `danswer:personas:groups:<user_id>` | Per-user group cache — `PERSONA_CACHE_ENABLED` |
+| `danswer:ratelimit:msg:<actor>:<min\|hour>:<bucket>` | Per-user request counters — `REQUEST_RATE_LIMIT_ENABLED` |
+| `danswer:cc_pair_basic_info` | Chat-page connector indexing-status — `CC_PAIR_INFO_CACHE_ENABLED` |
+| `danswer:document_sets:all` | Global document-set list (chat bundle, MIT) — `DOCUMENT_SET_CACHE_ENABLED` |
+
+**1. Are the cache keys present?** (fastest "is it on" check — use `--scan`, never `KEYS`, on a live Redis)
+```bash
+kubectl exec -n darwin redis-0 -c redis -- redis-cli --scan --pattern 'danswer:*'
+```
+Seeing `danswer:personas:all:not_deleted` means the assistants API
+(`GET /persona`) has cached. No `danswer:*` keys at all = the flags
+didn't take effect (pods not restarted? flag not `true`?).
+
+**2. Is it being hit?** (hit/miss ratio — cluster-wide, but proves reads hit cache)
+```bash
+kubectl exec -n darwin redis-0 -c redis -- redis-cli INFO stats | grep keyspace
+# keyspace_hits should climb far faster than keyspace_misses
+```
+
+**3. Inspect a specific entry** (TTL counting down + real payload):
+```bash
+kubectl exec -n darwin redis-0 -c redis -- redis-cli TTL    danswer:personas:all:not_deleted   # ~86400, decreasing
+kubectl exec -n darwin redis-0 -c redis -- redis-cli STRLEN danswer:personas:all:not_deleted   # bytes of cached JSON
+```
+
+**4. Watch a live request hit the cache** (definitive — run, then load Manage Assistants in the UI):
+```bash
+kubectl exec -it -n darwin redis-0 -c redis -- redis-cli MONITOR
+#  hit:  "GET" "danswer:personas:all:not_deleted"
+#  miss: "GET" ... (nil) then "SET" "danswer:personas:all:not_deleted" "[...]" "EX" "86400"
+```
+⚠️ Stop `MONITOR` quickly (Ctrl-C) — it echoes every command and is heavy on a busy Redis.
+
+**5. Force a miss→refill** (proves the read-through path; safe — just one extra Postgres read):
+```bash
+kubectl exec -n darwin redis-0 -c redis -- redis-cli DEL danswer:personas:all:not_deleted
+# load the assistants page once, then:
+kubectl exec -n darwin redis-0 -c redis -- redis-cli EXISTS danswer:personas:all:not_deleted   # 1 = repopulated
+```
+
+**6. Verify write-through invalidation** (no stale assistant lists):
+```bash
+kubectl exec -n darwin redis-0 -c redis -- redis-cli TTL danswer:personas:all:not_deleted   # note it exists
+# rename an assistant in the admin UI, then re-check:
+kubectl exec -n darwin redis-0 -c redis -- redis-cli TTL danswer:personas:all:not_deleted   # -2 = busted by the mutation
+# next page load refills it with the new name.
+```
+
+**Note:** the cache code is **silent on success** and only logs on Redis
+errors (fail-open). So there's nothing in the api-server logs confirming
+a hit — Redis-side inspection above is the only way to observe it. A
+`Redis GET/SET/DEL failed` warning in api-server/background logs means
+Redis is unreachable and the app is silently falling back to Postgres.
+
+### Which workloads to restart after a config change
+
+Because of `disableNameSuffixHash: true`, an `env-configmap` change does
+**not** auto-roll pods (see Footguns). After any `env.properties` edit,
+restart the pods that actually consume the changed vars:
+
+| Changed vars | Restart | Why |
+|---|---|---|
+| Redis flags (`REDIS_*`, `REQUEST_RATE_LIMIT_*`, `PERSONA_CACHE_*`, `CC_PAIR_INFO_CACHE_*`, `DOCUMENT_SET_CACHE_*`) | `api-server` + `background` | api-server runs the rate limiter + reads the caches; background reads config (Slack/OAuth tokens) and busts the persona + document-set caches on mutations (incl. the doc-set sync task) |
+| Celery broker (`CELERY_BROKER_REDIS_ENABLED`, `CELERY_REDIS_DB_NUMBER`) | `background` | the Celery worker + beat (in the `background` pod) read the broker URL at startup. **Restart worker AND beat together** so they don't split across two brokers mid-flight |
+| DB pool (`POSTGRES_POOL_SIZE`, `POSTGRES_POOL_OVERFLOW`) | `api-server` + `background` | engine pool is built once per process at first DB use; both pods build their own |
+| LLM / search / connector vars (`GEN_AI_*`, `QA_TIMEOUT`, `MULTILINGUAL_*`, etc.) | `api-server` + `background` | both run the chat/search/index paths |
+| Model-server vars (`DOCUMENT_ENCODER_MODEL`, `NORMALIZE_EMBEDDINGS`, …) | `inference-model-server` + `indexing-model-server` | only the model servers read these |
+| `WEB_DOMAIN` / `INTERNAL_URL` / frontend | `web-server` (+ `api-server` for `WEB_DOMAIN` / OIDC redirect) | |
+
+```bash
+# The common case (Redis features, or any backend env change):
+kubectl rollout restart deploy/api-server-deployment deploy/background-deployment -n darwin
+```
+
+> **Split-background topology** (the `background-scaling` component): there
+> is no `background-deployment` — restart the split pods that run danswer
+> backend code instead:
+> `kubectl rollout restart deploy/background-lite-deployment deploy/background-indexer-scheduler-deployment deploy/dask-worker-deployment -n darwin`.
+> The model servers, `nginx`, and `web-server` do **not** use the Redis
+> features — no need to restart them for a Redis flag flip.
+
+### Celery on Redis + Postgres pool sizing
+
+Two knobs that reduce/contain Postgres connection pressure (the real ceiling
+for chat at scale — DB sessions are held through the LLM stream):
+
+- **`CELERY_BROKER_REDIS_ENABLED=true`** (prod on, local off) moves Celery's
+  broker + result backend from Postgres to Redis (logical DB
+  `CELERY_REDIS_DB_NUMBER`, default `1` — separate from the cache/rate-limit
+  DB `0`). This stops the Celery worker + beat from polling/writing Postgres
+  for their queue. Task **status** is unaffected: this fork tracks it in its
+  own `task_queue_jobs` table, not the Celery backend. Indexing is still Dask.
+  Note: unlike the **cache** (fail-open), the **broker** is a hard dependency
+  — if Redis is down, Celery maintenance tasks (prune / doc-set sync /
+  user-group sync / deletion / analytics / retention) won't run until it's
+  back. Chat and indexing are unaffected (they don't use Celery).
+- **`POSTGRES_POOL_SIZE` / `POSTGRES_POOL_OVERFLOW`** (default 40 / 10) size
+  the SQLAlchemy pool **per process**. A pod's max connections is
+  `(size + overflow)` per engine, and api-server uses both a sync and an
+  async engine — so up to `2 × (size + overflow)` per api-server pod.
+  Cluster total = that × replicas of every DB-touching pod, and must stay
+  under Azure Postgres `max_connections` with headroom for boot migrations.
+  **Lower these as you add api-server replicas.**
+
+Verify the Celery queue is on Redis (not Postgres) after enabling:
+```bash
+# Celery keys live in DB 1; you should see kombu/celery keys appear here:
+kubectl exec -n darwin redis-0 -c redis -- redis-cli -n 1 --scan --pattern '*' | head
+# And the old Postgres broker tables should stop growing (kombu_message).
+```
+
+### File store: offload bytes to Azure Blob
+
+By default uploaded files / chat attachments / connector blobs are stored
+as **Postgres large objects**. At scale that bloats the DB/WAL/backups, and
+every read pins a Postgres connection for the whole file stream (competes
+with the chat connection pool). The `AzureBlobFileStore` backend keeps the
+small **metadata row** in Postgres but moves the **bytes** to Azure Blob.
+
+Cutover (graceful — un-migrated files keep reading from their lobj):
+
+```bash
+# 1. The image must include azure-storage-blob (it's in requirements now) —
+#    rebuild/redeploy the backend image.
+# 2. alembic upgrade head   (adds file_store.object_key, makes lobj_oid nullable)
+# 3. Put the ACCOUNT-KEY connection string in secrets.env (Storage account →
+#    Access keys → Connection string — the one with AccountName + AccountKey):
+#    AZURE_BLOB_CONNECTION_STRING=...      (see secrets.env.example)
+#    Direct upload needs the account KEY so the server can mint scoped
+#    per-blob SAS (see below); a SAS-token connection string is rejected.
+# 4. Flip the backend in env.properties:
+#    FILE_STORE_TYPE=AzureBlobFileStore
+#    AZURE_BLOB_CONTAINER=danswer-files
+# 5. Apply + restart api-server & background (configmap won't auto-roll).
+# 6. Migrate existing lobjs → Blob (idempotent; safe to resume):
+cd backend && PYTHONPATH=$(pwd) python scripts/migrate_file_store_to_azure_blob.py
+#    (--dry-run first to see the count)
+```
+
+Notes:
+- Steps 4–5 can precede 6: new uploads go to Blob immediately, and reads of
+  not-yet-migrated files transparently fall back to the lobj. Run the
+  migration promptly so the lobjs (and the bloat) actually go away.
+- `azure-storage-blob` is **lazy-imported** — the app runs fine without it
+  unless `FILE_STORE_TYPE=AzureBlobFileStore` is set, so the dep/flag are
+  decoupled.
+- Default (`PostgresBackedFileStore`) is unchanged; this is fully opt-in.
+- **Setting the connection string in a shell?** Single-quote it:
+  `export AZURE_BLOB_CONNECTION_STRING='...AccountKey=...==;...'`. The `;`
+  separators are shell command separators — unquoted, the value is silently
+  truncated at the first `;` (you'll see `KeyError: 'AccountName'` /
+  "Connection string missing required connection details"). Verify with
+  `python -c "import os;print(repr(os.environ.get('AZURE_BLOB_CONNECTION_STRING')))"`.
+- The account key is the storage account's **master credential** — keep it
+  in `secrets.env` / `danswer-secrets` only (never `env.properties` or git),
+  and **rotate it** if it's ever exposed (Access keys → Rotate).
+
+#### Direct-to-Blob chat uploads (requires Storage CORS)
+
+When the Azure backend is active, chat file uploads go **straight from the
+browser to Blob** via a short-lived SAS URL (`POST /chat/file/upload-url` →
+browser `PUT` → `POST /chat/file/confirm`), bypassing the api-server — much
+faster and it shows a real progress bar. On the Postgres backend the client
+auto-falls-back to the two-hop server upload.
+
+The api-server mints the SAS from the **account key** in the connection
+string: a per-blob, write+create-only, 30-minute token. The master key never
+leaves the server; the browser only ever sees a token scoped to one blob.
+This is why step 3 above requires the account-key connection string — a
+SAS-token connection string has no key to sign with and is rejected with a
+clear error.
+
+For the browser `PUT` to succeed, the storage account needs **CORS rules**
+allowing each web origin (one-time, per account). Add a rule for **every**
+origin that will upload — prod *and* local dev if you test against this
+account:
+
+```bash
+az storage cors add --services b \
+  --methods PUT OPTIONS GET \
+  --origins https://darwin.westeurope.cloudapp.azure.com http://localhost:3000 \
+  --allowed-headers '*' --exposed-headers '*' --max-age 3600 \
+  --account-name <account> --account-key <key>
+```
+
+Troubleshooting direct uploads:
+- **"network error during upload"** in the browser = missing/incorrect CORS
+  rule (the preflight `OPTIONS` fails). First thing to check. The two-hop
+  server path doesn't need CORS, so this only bites the Blob backend.
+- The target container is **auto-created** on first use (the account key has
+  create permission), so you don't have to pre-create it.
+
+#### Chat upload limits
+
+Chat-attached files are stuffed **whole** into the LLM prompt (the search
+tool is disabled when files are attached), so they're bounded by two env
+gates — enforced on the backend and pre-checked in the browser:
+
+- `CHAT_FILE_MAX_SIZE_MB` (default `25`) — hard byte cap; oversize uploads
+  are rejected with a message. Surfaced to the web client as
+  `Settings.chat_file_max_size_mb` so it can reject before uploading.
+- `CHAT_FILE_MAX_TOKEN_FRACTION` (default `0.5`) — after text extraction, a
+  file whose token count exceeds this fraction of the model's input window
+  is rejected (it would crowd out the actual conversation).
+
+### Apply an optional component (split-background + Dask)
+
+Optional features are kustomize components, opted into from the overlay
+so they inherit its image tags / namespace / generated config. The
+component carries its own replica counts (in
+`optional/background-scaling/kustomization.yaml`) and env-neutral
+manifests; the overlay adds image tags and any env-specific scheduling.
+
+**To enable it, two edits to the overlay's `kustomization.yaml` are
+REQUIRED (steps 1–2); step 3 is prod-only and optional.**
+
+```yaml
+# 1. REQUIRED — pull the component in:
+components:
+  - ../../optional/background-scaling
+
+# 2. REQUIRED — scale the base combined `background` deployment to 0, or
+#    you run two Celery beat schedulers on one broker (every periodic task
+#    fires twice). This is the ONLY entry you must add to the overlay's
+#    replicas: block; the split deployments' counts come from the component
+#    (see "Replica counts" below).
+replicas:
+  - name: background-deployment
+    count: 0
+
+# 3. OPTIONAL (prod only) — the component manifests are env-neutral — no node
+#    affinity. To pin the indexing-side pods to the Darwin indexcpu pool,
+#    add a patch. Skip this on local (no such node pool):
+patches:
+  - target:
+      kind: Deployment
+      labelSelector: "app in (background-lite,background-indexer-scheduler,dask-scheduler,dask-worker)"
+    patch: |-
+      - op: add
+        path: /spec/template/spec/affinity
+        value:
+          nodeAffinity:
+            requiredDuringSchedulingIgnoredDuringExecution:
+              nodeSelectorTerms:
+                - matchExpressions:
+                    - {key: agentpool, operator: In, values: [indexcpu]}
+      - op: add
+        path: /spec/template/spec/tolerations
+        value:
+          - {effect: NoSchedule, key: darwin, operator: Equal, value: indexing}
+```
+
+Then preview, verify, and apply — these are the commands to deploy the
+background-scaling topology (there is no standalone `kubectl apply -f`
+for `optional/background-scaling/`: its manifests use the logical
+`danswer-backend` image name, which only resolves through the overlay's
+`images:` block):
+
+```bash
+# Preview the rendered split-background + Dask pods:
+kubectl kustomize k8s/overlays/prod | grep -E "name: (background|dask)"
+
+# Diff against the live cluster before committing to it:
+kubectl diff -k k8s/overlays/prod
+
+# Apply (verify context first!):
+kubectl config current-context        # → 'darwin' for prod
+kubectl apply -k k8s/overlays/prod
+
+# Watch the new pods come up:
+kubectl rollout status deploy/dask-scheduler-deployment
+kubectl rollout status deploy/background-lite-deployment
+```
+
+kustomize applies everything together; the new pods reference the same
+overlay-generated `env-configmap` / `danswer-secrets`, and their
+`danswer-backend` image is rewritten to the overlay's pinned tag.
+
+#### Replica counts
+
+The four split deployments get their counts from the **`replicas:` block in
+`optional/background-scaling/kustomization.yaml`** — that's the single
+source of truth:
+
+```yaml
+replicas:
+  - name: background-lite-deployment              # singleton — beat + slack; never >1
+    count: 1
+  - name: background-indexer-scheduler-deployment # singleton — the update.py loop
+    count: 1
+  - name: dask-scheduler-deployment               # singleton
+    count: 1
+  - name: dask-worker-deployment                  # ← THE indexing-throughput knob
+    count: 2
+```
+
+The `replicas: N` you see inside each deployment YAML is just a manifest
+**default** — kustomize's `replicas:` transformer overrides it at render
+time, so editing the YAML directly has no effect through kustomize. To
+scale indexing, change `dask-worker-deployment`'s count here. (You can also
+override any of these from the *overlay's* own `replicas:` block — the
+overlay is applied last and wins — handy if you want a different
+dask-worker count per environment without editing the shared component.)
+
+`background-lite`, `background-indexer-scheduler`, and `dask-scheduler` are
+**hard singletons** (`count: 1`); raising them double-runs beat / the Slack
+websocket / the scheduler loop. Only `dask-worker` scales.
+
+#### Volumes / PVCs — the split deployments mount NONE
+
+None of the four split deployments mount `dynamic-pvc` or
+`file-connector-pvc`. In this fork the file store (File-connector uploads)
+is **Postgres-backed** (`PGFileStore` large objects) and the dynamic config
+store is Postgres-backed too — nothing reads `/home/storage` or
+`/home/file_connector_storage` (grep the code: zero references). The mounts
+were upstream carryover. The base `api-server` / `background` deployments
+also no longer mount them. The `dynamic-pvc` / `file-connector-pvc` claims
+are still **defined** in `persistent-volumes.yaml` (so the live volumes
+aren't deleted) — they're simply unmounted everywhere now.
+
+(Aside: `dynamic-pvc` is declared `ReadWriteOnce` yet was mounted by
+api-server + background on different nodes in prod — that "worked" only
+because `azurefile-csi` is an SMB share, not a block device, so it ignores
+the single-attach restriction. The RWO label was misleading-for-usage, not
+an active outage risk. Moot now that nothing mounts it.)
+
+Rollback: remove the `components:` line (and the patch), set
+`background-deployment` back to `count: 1`, re-apply. (The split pods are
+pruned on the next apply if you use `kubectl apply -k --prune`, or delete
+them by label.) Do NOT run the combined `background` deployment and
+`background-lite` at non-zero replicas simultaneously — both run a celery
+beat, and two beats on one broker fire every periodic task twice.
+
+### KEDA indexing autoscale (autoscale dask-worker on backlog)
+
+`optional/keda-indexing-autoscale/` autoscales `dask-worker-deployment`
+based on real indexing demand read from Postgres, instead of a fixed
+replica count. Use it when indexing load is bursty and you'd rather not
+pay for idle workers.
+
+**How the metric works.** A KEDA PostgreSQL scaler runs this every 30s:
+
+```sql
+SELECT COALESCE(SUM(LEAST(1, cnt)), 0) FROM (
+  SELECT con.source, COUNT(*) cnt
+  FROM index_attempt ia JOIN connector con ON ia.connector_id = con.id
+  WHERE ia.status IN ('NOT_STARTED','IN_PROGRESS')
+  GROUP BY con.source) s
+```
+
+It returns the number of attempts that can run **concurrently** right now
+— respecting `INDEXING_PER_SOURCE_CAP` (default 1, one per source). It is
+deliberately **not** a raw pending count: 10 queued attempts of the same
+source still only run one at a time, so spinning up 10 workers would waste
+9. `targetQueryValue: 1` → desired replicas = the metric.
+
+**Why it's safe to scale down:** the metric counts `IN_PROGRESS` too, so
+replicas never drop below the number of running jobs — KEDA won't scale a
+busy worker away. Scale-to-0 happens only when nothing is queued or
+running. (`status` is stored UPPERCASE — `native_enum=False` — verified
+against the live DB.)
+
+**Prerequisites + how to enable:**
+1. Install the KEDA operator cluster-wide (CRDs + operator, into its own
+   `keda` namespace — pinned, no Helm):
+   ```bash
+   kubectl apply --server-side -k k8s/optional/keda
+   kubectl get pods -n keda          # operator + metrics-apiserver Running
+   ```
+   (Installed once per cluster, independent of the danswer overlays.
+   `--server-side` is required — KEDA's CRDs are too large for client-side
+   apply. To bump KEDA, edit the version in `k8s/optional/keda/kustomization.yaml`.)
+2. Opt in **after** background-scaling, and **remove** `dask-worker-deployment`
+   from the background-scaling `replicas:` block (KEDA owns that count now —
+   leaving a static replicas entry fights the autoscaler):
+   ```yaml
+   # k8s/overlays/prod/kustomization.yaml
+   components:
+     - ../../optional/background-scaling
+     - ../../optional/keda-indexing-autoscale
+   ```
+3. The scaler's `host`/`userName`/`dbName` in `scaledobject.yaml` are the
+   Darwin prod Postgres coords — make sure they match the overlay's
+   `POSTGRES_*`. The password comes from `danswer-secrets` via a
+   `TriggerAuthentication` (no duplication).
+4. `k8s/scripts/guarded-apply.sh prod`, then watch:
+   ```bash
+   kubectl get scaledobject,hpa -n darwin
+   kubectl get pods -n darwin -l app=dask-worker -w
+   ```
+
+**Tuning:** `maxReplicaCount` (default 4) should be ≈ your number of
+distinct active source types (more is wasted under `PER_SOURCE_CAP=1`).
+`minReplicaCount: 0` saves idle cost but adds ~30s-2min cold start on the
+first index after idle — set to 1 to keep a worker warm.
+
+**Recommended companion change** (in `background-scaling/dask-worker.yaml`):
+give the worker a `terminationGracePeriodSeconds` and a preStop that
+retires the Dask worker, so a scale-down lets the current index attempt
+finish instead of being killed mid-run. Even without it, a killed attempt
+is retried by the indexing pipeline (no data loss, just rework).
+
+## Conventions
+
+- **`k8s/overlays/*/secrets.env` is gitignored.** Never commit it — it
+  holds real secret values. Commit `secrets.env.example` instead.
+- **The `deployment/kubernetes/*` tree is upstream Onyx reference only.**
+  Not applied to Darwin. See AGENTS.md "Critical fact §9".
+
+## Footguns
+
+- **`disableNameSuffixHash: true`** is set in both overlays. Without it,
+  kustomize appends a content hash to generated ConfigMap/Secret names
+  (`env-configmap-abc123`), which would break deployments referencing
+  `env-configmap`/`danswer-secrets` by their plain names. Don't remove.
+  **Consequence:** because the name is stable, a ConfigMap/Secret content
+  change does NOT trigger an automatic pod rollout (the hash-suffix
+  behavior is exactly what would). After any `env.properties` /
+  `secrets.env` change you must **manually `kubectl rollout restart`** the
+  consuming workloads — see "Which workloads to restart after a config
+  change". (The trade-off is deliberate: stable names so the optional
+  components + secretKeyRefs resolve, at the cost of manual restarts.)
+- **`behavior: create`** on the configMapGenerator means "create new", not
+  "merge with an existing one in base". Base intentionally ships no
+  ConfigMap with this name; the overlay owns it. If you ever add one to
+  base, switch to `behavior: merge` or `replace` to avoid name clash.
+- **Image refs in base must match `images:` entries by *name*.** If you
+  add a new image to base/ (e.g. `redis-exporter`), you must also add it
+  to the `images:` block in each overlay — otherwise the literal string
+  ships unchanged.
+- **`secrets.env` writes a Secret of type `Opaque`** with the key=value
+  pairs as base64-encoded data fields. If you need a different secret
+  type (e.g. `kubernetes.io/dockerconfigjson` for pull secrets), generate
+  it separately or override in the overlay.
diff --git a/k8s/base/api-server.yaml b/k8s/base/api-server.yaml
new file mode 100644
index 00000000000..b8175131e03
--- /dev/null
+++ b/k8s/base/api-server.yaml
@@ -0,0 +1,112 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: api-server-deployment
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: api-server
+  strategy:
+    rollingUpdate:
+      maxSurge: 25%
+      maxUnavailable: 25%
+    type: RollingUpdate
+  template:
+    metadata:
+      labels:
+        app: api-server
+    spec:
+      containers:
+      - command:
+        - /bin/sh
+        - -c
+        - 'alembic upgrade heads &&
+
+          echo "Starting Danswer Api Server" &&
+
+          uvicorn danswer.main:app --host 0.0.0.0 --port 8080
+
+          '
+        env:
+        - name: POSTGRES_USER
+          valueFrom:
+            secretKeyRef:
+              key: postgres_user
+              name: danswer-secrets
+        - name: POSTGRES_PASSWORD
+          valueFrom:
+            secretKeyRef:
+              key: postgres_password
+              name: danswer-secrets
+        - name: OAUTH_CLIENT_ID
+          valueFrom:
+            secretKeyRef:
+              key: oauth_client_id
+              name: danswer-secrets
+        - name: OAUTH_CLIENT_SECRET
+          valueFrom:
+            secretKeyRef:
+              key: oauth_client_secret
+              name: danswer-secrets
+        - name: USER_AUTH_SECRET
+          valueFrom:
+            secretKeyRef:
+              key: user_auth_secret
+              name: danswer-secrets
+        envFrom:
+        - configMapRef:
+            name: env-configmap
+        - secretRef:
+            name: danswer-secrets
+        image: danswer-backend
+        imagePullPolicy: IfNotPresent
+        name: api-server
+        ports:
+        - containerPort: 8080
+          protocol: TCP
+        # startupProbe: the container runs `alembic upgrade heads` BEFORE
+        # uvicorn, so /health isn't up until migrations finish. This gives
+        # migrations + boot up to ~5 min (30 × 10s) before readiness/
+        # liveness start counting. It also transitively gates on Postgres:
+        # no migrations → no HTTP → never passes startup → pod isn't Ready.
+        startupProbe:
+          httpGet:
+            path: /health
+            port: 8080
+          periodSeconds: 10
+          failureThreshold: 30
+          timeoutSeconds: 5
+        # readinessProbe: gates the api-server Service so it never routes
+        # to a pod still booting. Checks the app's OWN /health — NOT Vespa
+        # or Redis (those are partial/optional deps; coupling the API's
+        # availability to them would amplify outages — see the Vespa
+        # incident). No liveness probe by design: an aggressive liveness on
+        # a slow-starting api-server could kill it mid-migration.
+        readinessProbe:
+          httpGet:
+            path: /health
+            port: 8080
+          periodSeconds: 10
+          timeoutSeconds: 5
+          failureThreshold: 3
+        resources: {}
+        # No volumes: the file store (uploads) and dynamic config are
+        # Postgres-backed (or Azure Blob for files) — nothing reads
+        # /home/storage or /home/file_connector_storage (grep the code: zero
+        # references). The dynamic-pvc / file-connector-pvc claims still exist
+        # in persistent-volumes.yaml; they're just no longer mounted here.
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: api-server-service
+spec:
+  ports:
+  - name: api-server-port
+    port: 80
+    protocol: TCP
+    targetPort: 8080
+  selector:
+    app: api-server
+  type: ClusterIP
diff --git a/k8s/base/background.yaml b/k8s/base/background.yaml
new file mode 100644
index 00000000000..964dc6d3230
--- /dev/null
+++ b/k8s/base/background.yaml
@@ -0,0 +1,46 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: background-deployment
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: background
+  strategy:
+    type: Recreate
+  template:
+    metadata:
+      labels:
+        app: background
+    spec:
+      containers:
+      - command:
+        - /usr/bin/supervisord
+        env:
+        - name: POSTGRES_USER
+          valueFrom:
+            secretKeyRef:
+              key: postgres_user
+              name: danswer-secrets
+        - name: POSTGRES_PASSWORD
+          valueFrom:
+            secretKeyRef:
+              key: postgres_password
+              name: danswer-secrets
+        envFrom:
+        - configMapRef:
+            name: env-configmap
+        - secretRef:
+            name: danswer-secrets
+        image: danswer-backend
+        imagePullPolicy: IfNotPresent
+        name: background
+        resources:
+          requests:
+            cpu: '2'
+            memory: 10Gi
+        # No volumes — vestigial like the split deployments: the file store and
+        # dynamic config are Postgres-backed (files optionally Azure Blob), so
+        # nothing reads /home/storage or /home/file_connector_storage. The PVCs
+        # remain defined in persistent-volumes.yaml, just unmounted here.
diff --git a/k8s/base/indexing-model-server.yaml b/k8s/base/indexing-model-server.yaml
new file mode 100644
index 00000000000..363eac06f61
--- /dev/null
+++ b/k8s/base/indexing-model-server.yaml
@@ -0,0 +1,78 @@
+apiVersion: apps/v1
+kind: StatefulSet
+metadata:
+  name: indexing-model-server-statefulset
+spec:
+  persistentVolumeClaimRetentionPolicy:
+    whenDeleted: Retain
+    whenScaled: Retain
+  podManagementPolicy: OrderedReady
+  replicas: 2
+  selector:
+    matchLabels:
+      app: indexing-model-server
+      name: indexing-model-server
+  serviceName: indexing-model-server-service
+  template:
+    metadata:
+      labels:
+        app: indexing-model-server
+        name: indexing-model-server
+    spec:
+      containers:
+      - command:
+        - uvicorn
+        - model_server.main:app
+        - --host
+        - 0.0.0.0
+        - --port
+        - '9000'
+        env:
+        - name: INDEXING_ONLY
+          value: 'True'
+        envFrom:
+        - configMapRef:
+            name: env-configmap
+        image: danswer-model-server
+        imagePullPolicy: IfNotPresent
+        name: indexing-model-server
+        ports:
+        - containerPort: 9000
+          protocol: TCP
+        resources: {}
+        volumeMounts:
+        - mountPath: /root/.cache
+          name: indexing-model-storage
+  updateStrategy:
+    rollingUpdate:
+      partition: 0
+    type: RollingUpdate
+  volumeClaimTemplates:
+  - apiVersion: v1
+    kind: PersistentVolumeClaim
+    metadata:
+      creationTimestamp: null
+      name: indexing-model-storage
+    spec:
+      accessModes:
+      - ReadWriteOnce
+      resources:
+        requests:
+          storage: 10Gi
+      volumeMode: Filesystem
+    status:
+      phase: Pending
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: indexing-model-server-service
+spec:
+  ports:
+  - name: indexing-model-server-port
+    port: 9000
+    protocol: TCP
+    targetPort: 9000
+  selector:
+    app: indexing-model-server
+  type: ClusterIP
diff --git a/deployment/kubernetes/inference_model_server-service-deployment.yaml b/k8s/base/inference-model-server.yaml
similarity index 58%
rename from deployment/kubernetes/inference_model_server-service-deployment.yaml
rename to k8s/base/inference-model-server.yaml
index 790dc633db8..68b9cd66edc 100644
--- a/deployment/kubernetes/inference_model_server-service-deployment.yaml
+++ b/k8s/base/inference-model-server.yaml
@@ -1,17 +1,3 @@
-apiVersion: v1
-kind: Service
-metadata:
-  name: inference-model-server-service
-spec:
-  selector:
-    app: inference-model-server
-  ports:
-    - name: inference-model-server-port
-      protocol: TCP
-      port: 9000
-      targetPort: 9000
-  type: ClusterIP
----
 apiVersion: apps/v1
 kind: Deployment
 metadata:
@@ -21,36 +7,52 @@ spec:
   selector:
     matchLabels:
       app: inference-model-server
+  strategy:
+    rollingUpdate:
+      maxSurge: 25%
+      maxUnavailable: 25%
+    type: RollingUpdate
   template:
     metadata:
       labels:
         app: inference-model-server
     spec:
       containers:
-      - name: inference-model-server
-        image: danswer/danswer-model-server:latest
-        imagePullPolicy: IfNotPresent
-        command: [ "uvicorn", "model_server.main:app", "--host", "0.0.0.0", "--port", "9000" ]
-        ports:
-        - containerPort: 9000
+      - command:
+        - uvicorn
+        - model_server.main:app
+        - --host
+        - 0.0.0.0
+        - --port
+        - '9000'
         envFrom:
         - configMapRef:
             name: env-configmap
+        image: danswer-model-server
+        imagePullPolicy: IfNotPresent
+        name: inference-model-server
+        ports:
+        - containerPort: 9000
+          protocol: TCP
+        resources: {}
         volumeMounts:
-        - name: inference-model-storage
-          mountPath: /root/.cache
+        - mountPath: /root/.cache
+          name: inference-model-storage
       volumes:
       - name: inference-model-storage
         persistentVolumeClaim:
           claimName: inference-model-pvc
 ---
 apiVersion: v1
-kind: PersistentVolumeClaim
+kind: Service
 metadata:
-  name: inference-model-pvc
+  name: inference-model-server-service
 spec:
-  accessModes:
-    - ReadWriteOnce
-  resources:
-    requests:
-      storage: 3Gi
+  ports:
+  - name: inference-model-server-port
+    port: 9000
+    protocol: TCP
+    targetPort: 9000
+  selector:
+    app: inference-model-server
+  type: ClusterIP
diff --git a/k8s/base/kustomization.yaml b/k8s/base/kustomization.yaml
new file mode 100644
index 00000000000..77e204e9ed0
--- /dev/null
+++ b/k8s/base/kustomization.yaml
@@ -0,0 +1,32 @@
+# Base manifests — environment-neutral. Overlays under k8s/overlays/{prod,local}
+# layer on:
+#   - Image registry + tag (via `images:` in the overlay kustomization)
+#   - Replica counts (via `replicas:` in the overlay kustomization)
+#   - The env-configmap ConfigMap (via configMapGenerator from env.properties)
+#   - The danswer-secrets Secret (via secretGenerator from secrets.env — gitignored)
+#
+# Image refs in base use logical names (e.g. `danswer-backend` not
+# `sfbrdevhelmweacr.azurecr.io/danswer/danswer-backend:vha-138`). Kustomize
+# rewrites them via the overlay's `images:` block.
+apiVersion: kustomize.config.k8s.io/v1beta1
+kind: Kustomization
+
+resources:
+  # Core danswer app
+  - api-server.yaml
+  - background.yaml
+  - web-server.yaml
+  - nginx.yaml
+  - inference-model-server.yaml
+  - indexing-model-server.yaml
+  - persistent-volumes.yaml
+  - redis.yaml  # cache + rate limiting; both prod and local deploy it
+
+  # NOTE: Vespa is intentionally NOT part of base. It is a stateful subsystem
+  # with a PINNED version and an ordered, health-gated upgrade procedure, so
+  # re-applying it on every routine app rollout (`kubectl apply -k
+  # overlays/{prod,local}`) is dangerous — a drifted manifest could roll the
+  # StatefulSets. Vespa manifests live in base/vespa/ and are applied
+  # DELIBERATELY via their own overlay: `kubectl apply -k
+  # k8s/overlays/{prod,local}-vespa`. Version upgrades go through
+  # k8s/scripts/vespa-upgrade.sh. See k8s/README.md.
diff --git a/k8s/base/nginx.yaml b/k8s/base/nginx.yaml
new file mode 100644
index 00000000000..6e56438f2fd
--- /dev/null
+++ b/k8s/base/nginx.yaml
@@ -0,0 +1,86 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: nginx-deployment
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: nginx
+  strategy:
+    rollingUpdate:
+      maxSurge: 25%
+      maxUnavailable: 25%
+    type: RollingUpdate
+  template:
+    metadata:
+      labels:
+        app: nginx
+    spec:
+      containers:
+      - command:
+        - /bin/sh
+        - -c
+        - 'while :; do sleep 6h & wait $$!; nginx -s reload; done & nginx -g "daemon
+          off;"
+
+          '
+        env:
+        - name: DOMAIN
+          valueFrom:
+            configMapKeyRef:
+              key: DOMAIN
+              name: env-configmap
+        image: nginx
+        imagePullPolicy: IfNotPresent
+        name: nginx
+        ports:
+        - containerPort: 80
+          protocol: TCP
+        resources: {}
+        volumeMounts:
+        - mountPath: /etc/nginx/conf.d
+          name: nginx-conf
+      volumes:
+      - configMap:
+          defaultMode: 420
+          name: nginx-configmap
+        name: nginx-conf
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: nginx-service
+spec:
+  ports:
+  - name: http
+    port: 80
+    protocol: TCP
+    targetPort: 80
+  - name: danswer
+    port: 3000
+    protocol: TCP
+    targetPort: 80
+  selector:
+    app: nginx
+  type: ClusterIP
+---
+apiVersion: v1
+data:
+  nginx.conf: "upstream api_server {\n    server api-server-service:80 fail_timeout=0;\n\
+    }\n\nupstream web_server {\n    server web-server-service:80 fail_timeout=0;\n\
+    }\n\nserver {\n    listen 80;\n    server_name $$DOMAIN;\n\n    client_max_body_size\
+    \ 5G;    # Maximum upload size\n\n    location ~ ^/api(.*)$ {\n        rewrite\
+    \ ^/api(/.*)$ $1 break;\n        proxy_set_header X-Real-IP $remote_addr;\n  \
+    \      proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;\n        proxy_set_header\
+    \ X-Forwarded-Proto $scheme;\n        proxy_set_header X-Forwarded-Host $host;\n\
+    \        proxy_set_header Host $host;\n        proxy_http_version 1.1;\n     \
+    \   proxy_buffering off;\n        proxy_redirect off;\n        proxy_pass http://api_server;\n\
+    \    }\n\n    location / {\n        proxy_set_header X-Real-IP $remote_addr;\n\
+    \        proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;\n      \
+    \  proxy_set_header X-Forwarded-Proto $scheme;\n        proxy_set_header X-Forwarded-Host\
+    \ $host;\n        proxy_set_header Host $host;\n        proxy_http_version 1.1;\n\
+    \        proxy_redirect off;\n        proxy_pass http://web_server;\n    }\n}\n"
+kind: ConfigMap
+metadata:
+  name: nginx-configmap
diff --git a/darwin-kubernetes/persistent-volumes.yaml b/k8s/base/persistent-volumes.yaml
similarity index 90%
rename from darwin-kubernetes/persistent-volumes.yaml
rename to k8s/base/persistent-volumes.yaml
index ad83d5a3640..f6fc5fbcb32 100644
--- a/darwin-kubernetes/persistent-volumes.yaml
+++ b/k8s/base/persistent-volumes.yaml
@@ -5,7 +5,7 @@ metadata:
 spec:
   storageClassName: azurefile-csi-premium
   accessModes:
-    - ReadWriteOnce
+  - ReadWriteOnce
   resources:
     requests:
       storage: 5Gi
@@ -17,7 +17,7 @@ metadata:
 spec:
   storageClassName: azurefile-csi-premium
   accessModes:
-    - ReadWriteMany
+  - ReadWriteMany
   resources:
     requests:
       storage: 5Gi
diff --git a/k8s/base/redis.yaml b/k8s/base/redis.yaml
new file mode 100644
index 00000000000..14b69e17612
--- /dev/null
+++ b/k8s/base/redis.yaml
@@ -0,0 +1,66 @@
+apiVersion: v1
+kind: Service
+metadata:
+  name: redis
+  labels:
+    app: redis
+spec:
+  selector:
+    app: redis
+  ports:
+  - name: redis
+    port: 6379
+    targetPort: 6379
+  type: ClusterIP
+---
+apiVersion: apps/v1
+kind: StatefulSet
+metadata:
+  name: redis
+spec:
+  serviceName: redis
+  replicas: 1
+  selector:
+    matchLabels:
+      app: redis
+  template:
+    metadata:
+      labels:
+        app: redis
+    spec:
+      containers:
+      - name: redis
+        image: redis:7.2-alpine
+        ports:
+        - containerPort: 6379
+          name: redis
+        args:
+        - --appendonly
+        - 'no'
+        - --save
+        - ''
+        - --maxmemory
+        - 512mb
+        - --maxmemory-policy
+        - allkeys-lru
+        # Container limit sits comfortably above --maxmemory because Redis
+        # uses extra RSS beyond the data limit for client output buffers,
+        # COW pages during BGSAVE, and fragmentation (~1.3-1.5x is typical;
+        # we set ~2x for safety on a single-replica setup where OOM = downtime).
+        resources:
+          requests:
+            memory: 256Mi
+            cpu: 50m
+          limits:
+            memory: 1Gi
+            cpu: 500m
+        readinessProbe:
+          tcpSocket:
+            port: 6379
+          initialDelaySeconds: 5
+          periodSeconds: 10
+        livenessProbe:
+          tcpSocket:
+            port: 6379
+          initialDelaySeconds: 15
+          periodSeconds: 20
diff --git a/k8s/base/vespa/kustomization.yaml b/k8s/base/vespa/kustomization.yaml
new file mode 100644
index 00000000000..21c1926871d
--- /dev/null
+++ b/k8s/base/vespa/kustomization.yaml
@@ -0,0 +1,24 @@
+# Vespa search-index cluster — grouped into its own kustomization because it
+# is a distinct stateful subsystem with a lifecycle unlike the rest of base:
+#   - the image version is PINNED and must never float (:latest took prod
+#     down once — see AGENTS.md "Critical facts §10");
+#   - upgrades must step minor versions within Vespa's allowed hop AND roll
+#     the StatefulSets in a specific order. That ordering can't live in
+#     declarative manifests — it's in k8s/scripts/vespa-upgrade.sh.
+#
+# Each StatefulSet uses a PER-ROLE logical image name (vespa-configserver,
+# vespa-admin, vespa-content, vespa-feed, vespa-query) rather than one shared
+# `vespa` name. They all resolve to vespaengine/vespa at the same pinned tag
+# via the overlay `images:` block, but the per-role split is what lets the
+# upgrade script move one role's version at a time.
+apiVersion: kustomize.config.k8s.io/v1beta1
+kind: Kustomization
+
+resources:
+  - vespa-config.yaml
+  - vespa-internal-service.yaml
+  - vespa-configserver.yaml
+  - vespa-admin.yaml
+  - vespa-content.yaml
+  - vespa-feed.yaml
+  - vespa-query.yaml
diff --git a/k8s/base/vespa/vespa-admin.yaml b/k8s/base/vespa/vespa-admin.yaml
new file mode 100644
index 00000000000..6687fc05324
--- /dev/null
+++ b/k8s/base/vespa/vespa-admin.yaml
@@ -0,0 +1,53 @@
+apiVersion: apps/v1
+kind: StatefulSet
+metadata:
+  name: vespa-admin
+spec:
+  persistentVolumeClaimRetentionPolicy:
+    whenDeleted: Retain
+    whenScaled: Retain
+  podManagementPolicy: OrderedReady
+  replicas: 1
+  selector:
+    matchLabels:
+      app: vespa-admin
+      name: vespa-internal
+  serviceName: vespa-internal
+  template:
+    metadata:
+      labels:
+        app: vespa-admin
+        name: vespa-internal
+    spec:
+      containers:
+      - args:
+        - services
+        envFrom:
+        - configMapRef:
+            name: vespa-config
+        # Per-role logical image name — see vespa-configserver.yaml.
+        image: vespa-admin
+        imagePullPolicy: Always
+        name: vespa-admin
+        # Readiness on the metrics proxy's /state/v1/health (port 19092 —
+        # node-agnostic, same as content). Gates rollout; readiness-only (no
+        # liveness). See the content-node note re: publishNotReadyAddresses.
+        readinessProbe:
+          httpGet:
+            path: /state/v1/health
+            port: 19092
+          initialDelaySeconds: 30
+          periodSeconds: 10
+          timeoutSeconds: 5
+          failureThreshold: 6
+        resources:
+          limits:
+            memory: 2G
+          requests:
+            memory: 1G
+        securityContext:
+          runAsUser: 1000
+  updateStrategy:
+    rollingUpdate:
+      partition: 0
+    type: RollingUpdate
diff --git a/k8s/base/vespa/vespa-config.yaml b/k8s/base/vespa/vespa-config.yaml
new file mode 100644
index 00000000000..ffd2ec26527
--- /dev/null
+++ b/k8s/base/vespa/vespa-config.yaml
@@ -0,0 +1,6 @@
+apiVersion: v1
+data:
+  VESPA_CONFIGSERVERS: vespa-configserver-0.vespa-internal.darwin.svc.cluster.local,vespa-configserver-1.vespa-internal.darwin.svc.cluster.local,vespa-configserver-2.vespa-internal.darwin.svc.cluster.local
+kind: ConfigMap
+metadata:
+  name: vespa-config
diff --git a/k8s/base/vespa/vespa-configserver.yaml b/k8s/base/vespa/vespa-configserver.yaml
new file mode 100644
index 00000000000..06457987cea
--- /dev/null
+++ b/k8s/base/vespa/vespa-configserver.yaml
@@ -0,0 +1,152 @@
+apiVersion: apps/v1
+kind: StatefulSet
+metadata:
+  name: vespa-configserver
+spec:
+  persistentVolumeClaimRetentionPolicy:
+    whenDeleted: Retain
+    whenScaled: Retain
+  podManagementPolicy: OrderedReady
+  replicas: 3
+  selector:
+    matchLabels:
+      app: vespa-configserver
+      name: vespa-internal
+  serviceName: vespa-internal
+  template:
+    metadata:
+      labels:
+        app: vespa-configserver
+        name: vespa-internal
+    spec:
+      containers:
+      - args:
+        - configserver,services
+        envFrom:
+        - configMapRef:
+            name: vespa-config
+        # Per-role logical image name (overlay images: resolves it to
+        # vespaengine/vespa:<tag>). Split per role so the upgrade script can
+        # move one StatefulSet's version at a time — see vespa-upgrade.sh.
+        image: vespa-configserver
+        imagePullPolicy: Always
+        name: vespa-configserver
+        # Gates the vespa-configserver Service so consumers (api-server's
+        # ensure_indices_exist, other Vespa nodes) only route here once the
+        # config server actually serves — not the instant the container
+        # starts. Readiness-only: a slow/booting config server is pulled
+        # from rotation but never restarted (an aggressive liveness probe
+        # could kill a healthy-but-slow node and cause a restart loop).
+        # initialDelay is generous because config-server bootstrap (ZK +
+        # app load) takes 1-2 min.
+        readinessProbe:
+          httpGet:
+            path: /state/v1/health
+            port: 19071
+          initialDelaySeconds: 45
+          periodSeconds: 10
+          timeoutSeconds: 5
+          failureThreshold: 6
+        resources:
+          limits:
+            memory: 4096M
+          requests:
+            memory: 4096M
+        securityContext:
+          runAsUser: 1000
+        volumeMounts:
+        - mountPath: /opt/vespa/var
+          name: vespa-var1
+        - mountPath: /opt/vespa/logs
+          name: vespa-logs1
+        - mountPath: /workspace
+          name: vespa-workspace1
+      initContainers:
+      - command:
+        - sh
+        - -c
+        - chown -R 1000 /opt/vespa/var
+        image: busybox
+        imagePullPolicy: Always
+        name: chown-var
+        resources: {}
+        securityContext:
+          runAsUser: 0
+        volumeMounts:
+        - mountPath: /opt/vespa/var
+          name: vespa-var1
+      - command:
+        - sh
+        - -c
+        - chown -R 1000 /opt/vespa/logs
+        image: busybox
+        imagePullPolicy: Always
+        name: chown-logs
+        resources: {}
+        securityContext:
+          runAsUser: 0
+        volumeMounts:
+        - mountPath: /opt/vespa/logs
+          name: vespa-logs1
+  updateStrategy:
+    rollingUpdate:
+      partition: 0
+    type: RollingUpdate
+  volumeClaimTemplates:
+  - apiVersion: v1
+    kind: PersistentVolumeClaim
+    metadata:
+      creationTimestamp: null
+      name: vespa-var1
+    spec:
+      accessModes:
+      - ReadWriteOnce
+      resources:
+        requests:
+          storage: 5Gi
+      volumeMode: Filesystem
+    status:
+      phase: Pending
+  - apiVersion: v1
+    kind: PersistentVolumeClaim
+    metadata:
+      creationTimestamp: null
+      name: vespa-logs1
+    spec:
+      accessModes:
+      - ReadWriteOnce
+      resources:
+        requests:
+          storage: 5Gi
+      volumeMode: Filesystem
+    status:
+      phase: Pending
+  - apiVersion: v1
+    kind: PersistentVolumeClaim
+    metadata:
+      creationTimestamp: null
+      name: vespa-workspace1
+    spec:
+      accessModes:
+      - ReadWriteOnce
+      resources:
+        requests:
+          storage: 1Gi
+      volumeMode: Filesystem
+    status:
+      phase: Pending
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: vespa-configserver
+spec:
+  ports:
+  - name: vespa-tenant-port
+    nodePort: 31439
+    port: 19071
+    protocol: TCP
+    targetPort: 19071
+  selector:
+    app: vespa-configserver
+  type: NodePort
diff --git a/k8s/base/vespa/vespa-content.yaml b/k8s/base/vespa/vespa-content.yaml
new file mode 100644
index 00000000000..f4486b5a228
--- /dev/null
+++ b/k8s/base/vespa/vespa-content.yaml
@@ -0,0 +1,92 @@
+apiVersion: apps/v1
+kind: StatefulSet
+metadata:
+  name: vespa-content
+spec:
+  persistentVolumeClaimRetentionPolicy:
+    whenDeleted: Retain
+    whenScaled: Retain
+  podManagementPolicy: OrderedReady
+  replicas: 3
+  selector:
+    matchLabels:
+      app: vespa-content
+      name: vespa-internal
+  serviceName: vespa-internal
+  template:
+    metadata:
+      labels:
+        app: vespa-content
+        name: vespa-internal
+    spec:
+      containers:
+      - args:
+        - services
+        envFrom:
+        - configMapRef:
+            name: vespa-config
+        # Per-role logical image name — see vespa-configserver.yaml.
+        image: vespa-content
+        imagePullPolicy: Always
+        name: vespa-content
+        # Readiness on the metrics proxy's /state/v1/health (port 19092 —
+        # verified serving 200 on the live content nodes; this port exists on
+        # every Vespa node type, unlike the containers' 8080). Gates rollout
+        # so an upgrade won't move to the next content pod until this one is
+        # back up. Readiness-only — NO liveness (a slow content node loading
+        # its index must not be killed). Crucially, vespa-internal sets
+        # publishNotReadyAddresses: true, so a not-ready content pod is still
+        # resolvable for peer discovery — this probe gates rollout WITHOUT
+        # dropping the node from the cluster. initialDelay is generous because
+        # content nodes load buckets on boot.
+        readinessProbe:
+          httpGet:
+            path: /state/v1/health
+            port: 19092
+          initialDelaySeconds: 45
+          periodSeconds: 10
+          timeoutSeconds: 5
+          failureThreshold: 6
+        resources:
+          limits:
+            memory: 12G
+          requests:
+            memory: 6G
+        securityContext:
+          runAsUser: 1000
+        volumeMounts:
+        - mountPath: /opt/vespa/var
+          name: vespa-var
+      initContainers:
+      - command:
+        - sh
+        - -c
+        - chown -R 1000 /opt/vespa/var
+        image: busybox
+        imagePullPolicy: Always
+        name: chown-var
+        resources: {}
+        securityContext:
+          runAsUser: 0
+        volumeMounts:
+        - mountPath: /opt/vespa/var
+          name: vespa-var
+  updateStrategy:
+    rollingUpdate:
+      partition: 0
+    type: RollingUpdate
+  volumeClaimTemplates:
+  - apiVersion: v1
+    kind: PersistentVolumeClaim
+    metadata:
+      creationTimestamp: null
+      name: vespa-var
+    spec:
+      accessModes:
+      - ReadWriteOnce
+      resources:
+        requests:
+          storage: 100Gi
+      volumeMode: Filesystem
+    status:
+      phase: Pending
diff --git a/k8s/base/vespa/vespa-feed.yaml b/k8s/base/vespa/vespa-feed.yaml
new file mode 100644
index 00000000000..dc9f4b27ba3
--- /dev/null
+++ b/k8s/base/vespa/vespa-feed.yaml
@@ -0,0 +1,69 @@
+apiVersion: apps/v1
+kind: StatefulSet
+metadata:
+  name: vespa-feed-container
+spec:
+  persistentVolumeClaimRetentionPolicy:
+    whenDeleted: Retain
+    whenScaled: Retain
+  podManagementPolicy: OrderedReady
+  replicas: 2
+  selector:
+    matchLabels:
+      app: vespa-feed-container
+      name: vespa-internal
+  serviceName: vespa-internal
+  template:
+    metadata:
+      labels:
+        app: vespa-feed-container
+        name: vespa-internal
+    spec:
+      containers:
+      - args:
+        - services
+        envFrom:
+        - configMapRef:
+            name: vespa-config
+        # Per-role logical image name — see vespa-configserver.yaml.
+        image: vespa-feed
+        imagePullPolicy: Always
+        name: vespa-feed-container
+        # Gates the vespa-feed Service so indexing/feed only routes here
+        # once the feed container serves. Readiness-only (see configserver
+        # note).
+        readinessProbe:
+          httpGet:
+            path: /state/v1/health
+            port: 8080
+          initialDelaySeconds: 30
+          periodSeconds: 10
+          timeoutSeconds: 5
+          failureThreshold: 6
+        resources:
+          limits:
+            memory: 4G
+          requests:
+            memory: 1500M
+        securityContext:
+          runAsUser: 1000
+  updateStrategy:
+    rollingUpdate:
+      partition: 0
+    type: RollingUpdate
+---
+apiVersion: v1
+kind: Service
+metadata:
+  labels:
+    app: vespa
+  name: vespa-feed
+spec:
+  ports:
+  - name: api
+    port: 8080
+    protocol: TCP
+    targetPort: 8080
+  selector:
+    app: vespa-feed-container
+  type: ClusterIP
diff --git a/k8s/base/vespa/vespa-internal-service.yaml b/k8s/base/vespa/vespa-internal-service.yaml
new file mode 100644
index 00000000000..76086f9caa5
--- /dev/null
+++ b/k8s/base/vespa/vespa-internal-service.yaml
@@ -0,0 +1,20 @@
+apiVersion: v1
+kind: Service
+metadata:
+  labels:
+    name: vespa-internal
+  name: vespa-internal
+spec:
+  clusterIP: None
+  clusterIPs:
+  - None
+  # Peer discovery (VESPA_CONFIGSERVERS, stable pod DNS like
+  # vespa-content-0.vespa-internal...) MUST NOT be gated by pod readiness:
+  # nodes need to resolve each other to *become* ready, and the content/admin
+  # readiness probes added for rollout-gating would otherwise drop a slow or
+  # booting node from this headless service's DNS and disrupt the cluster.
+  # Publishing not-ready addresses decouples discovery from readiness.
+  publishNotReadyAddresses: true
+  selector:
+    name: vespa-internal
+  type: ClusterIP
diff --git a/k8s/base/vespa/vespa-query.yaml b/k8s/base/vespa/vespa-query.yaml
new file mode 100644
index 00000000000..224c8212ba5
--- /dev/null
+++ b/k8s/base/vespa/vespa-query.yaml
@@ -0,0 +1,71 @@
+apiVersion: apps/v1
+kind: StatefulSet
+metadata:
+  name: vespa-query-container
+spec:
+  persistentVolumeClaimRetentionPolicy:
+    whenDeleted: Retain
+    whenScaled: Retain
+  podManagementPolicy: OrderedReady
+  replicas: 2
+  selector:
+    matchLabels:
+      app: vespa-query-container
+      name: vespa-internal
+  serviceName: vespa-internal
+  template:
+    metadata:
+      labels:
+        app: vespa-query-container
+        name: vespa-internal
+    spec:
+      containers:
+      - args:
+        - services
+        envFrom:
+        - configMapRef:
+            name: vespa-config
+        # Per-role logical image name — see vespa-configserver.yaml.
+        image: vespa-query
+        imagePullPolicy: Always
+        name: vespa-query-container
+        # Gates the vespa-query Service — the path the app hits for search.
+        # This is the probe that directly prevents the "503 / connection
+        # refused" window: the Service won't route a query here until
+        # /state/v1/health is up. Readiness-only (no liveness — see the
+        # configserver note).
+        readinessProbe:
+          httpGet:
+            path: /state/v1/health
+            port: 8080
+          initialDelaySeconds: 30
+          periodSeconds: 10
+          timeoutSeconds: 5
+          failureThreshold: 6
+        resources:
+          limits:
+            memory: 4G
+          requests:
+            memory: 1500M
+        securityContext:
+          runAsUser: 1000
+  updateStrategy:
+    rollingUpdate:
+      partition: 0
+    type: RollingUpdate
+---
+apiVersion: v1
+kind: Service
+metadata:
+  labels:
+    app: vespa
+  name: vespa-query
+spec:
+  ports:
+  - name: api
+    port: 8080
+    protocol: TCP
+    targetPort: 8080
+  selector:
+    app: vespa-query-container
+  type: ClusterIP
diff --git a/darwin-kubernetes/web_server-service-deployment.yaml b/k8s/base/web-server.yaml
similarity index 71%
rename from darwin-kubernetes/web_server-service-deployment.yaml
rename to k8s/base/web-server.yaml
index faa2e230e04..42f6ea6ab44 100644
--- a/darwin-kubernetes/web_server-service-deployment.yaml
+++ b/k8s/base/web-server.yaml
@@ -1,16 +1,3 @@
-apiVersion: v1
-kind: Service
-metadata:
-  name: web-server-service
-spec:
-  selector:
-    app: web-server
-  ports:
-    - protocol: TCP
-      port: 80
-      targetPort: 3000
-  type: ClusterIP
----
 apiVersion: apps/v1
 kind: Deployment
 metadata:
@@ -20,30 +7,51 @@ spec:
   selector:
     matchLabels:
       app: web-server
+  strategy:
+    rollingUpdate:
+      maxSurge: 25%
+      maxUnavailable: 25%
+    type: RollingUpdate
   template:
     metadata:
       labels:
         app: web-server
     spec:
       containers:
-      - name: web-server
-        image: sfbrdevhelmweacr.azurecr.io/danswer/danswer-web-server:latest
-        imagePullPolicy: IfNotPresent
-        ports:
-        - containerPort: 3000
-        # There are some extra values since this is shared between services
-        # There are no conflicts though, extra env variables are simply ignored
-        env:
+      - env:
         - name: POSTGRES_USER
           valueFrom:
             secretKeyRef:
-              name: danswer-secrets
               key: postgres_user
+              name: danswer-secrets
         - name: POSTGRES_PASSWORD
           valueFrom:
             secretKeyRef:
-              name: danswer-secrets
               key: postgres_password
+              name: danswer-secrets
         envFrom:
         - configMapRef:
             name: env-configmap
+        image: danswer-web-server
+        imagePullPolicy: IfNotPresent
+        name: web-server
+        ports:
+        - containerPort: 3000
+          protocol: TCP
+        resources:
+          requests:
+            cpu: 500m
+            memory: 4Gi
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: web-server-service
+spec:
+  ports:
+  - port: 80
+    protocol: TCP
+    targetPort: 3000
+  selector:
+    app: web-server
+  type: ClusterIP
diff --git a/k8s/optional/background-scaling/background-indexer-scheduler.yaml b/k8s/optional/background-scaling/background-indexer-scheduler.yaml
new file mode 100644
index 00000000000..6be92c2b148
--- /dev/null
+++ b/k8s/optional/background-scaling/background-indexer-scheduler.yaml
@@ -0,0 +1,59 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: background-indexer-scheduler-deployment
+spec:
+  strategy:
+    type: Recreate
+  selector:
+    matchLabels:
+      app: background-indexer-scheduler
+  template:
+    metadata:
+      labels:
+        app: background-indexer-scheduler
+    spec:
+      containers:
+      - name: indexer-scheduler
+        image: danswer-backend
+        imagePullPolicy: IfNotPresent
+        command:
+        - python
+        - danswer/background/update.py
+        env:
+        - name: DASK_SCHEDULER_ADDRESS
+          value: tcp://dask-scheduler-service:8786
+        - name: CURRENT_PROCESS_IS_AN_INDEXING_JOB
+          value: 'true'
+        - name: POSTGRES_USER
+          valueFrom:
+            secretKeyRef:
+              key: postgres_user
+              name: danswer-secrets
+        - name: POSTGRES_PASSWORD
+          valueFrom:
+            secretKeyRef:
+              key: postgres_password
+              name: danswer-secrets
+        envFrom:
+        - configMapRef:
+            name: env-configmap
+        - secretRef:
+            name: danswer-secrets
+        # No PVCs — the scheduler only polls Postgres + submits Dask futures;
+        # the file store / dynamic config are Postgres-backed (see README).
+        #
+        # Sized for the FIXED scheduler. The old ~7.5Gi/cycle spikes were a
+        # query bug, not real work: get_last_attempt() ran an unbounded
+        # ORDER BY (no LIMIT) once per cc-pair, materializing each cc-pair's
+        # FULL index_attempt history just to take the newest. Fixed with
+        # LIMIT 1 in db/index_attempt.py. Verified on the fixed image
+        # (vha-140): RSS sits FLAT at ~430Mi across update cycles, no spike.
+        # 512Mi request / 2Gi limit = comfortable headroom over that.
+        resources:
+          requests:
+            cpu: 200m
+            memory: 512Mi
+          limits:
+            cpu: 500m
+            memory: 2Gi
diff --git a/k8s/optional/background-scaling/background-lite.yaml b/k8s/optional/background-scaling/background-lite.yaml
new file mode 100644
index 00000000000..4af4927cca5
--- /dev/null
+++ b/k8s/optional/background-scaling/background-lite.yaml
@@ -0,0 +1,159 @@
+# background-lite — the low-traffic singletons that don't scale with
+# indexing load, co-located in one pod as three containers:
+#   - celery-worker    maintenance tasks (prune/sync/deletion/analytics);
+#                       autoscales 3-10 threads inside this one container
+#   - celery-beat      periodic-task scheduler (singleton)
+#   - slack-listener   Slack Socket Mode websocket (singleton)
+#
+# This replaces three separate deployments (background-celery,
+# background-beat, slack-listener) — none of them benefits from being its
+# own Deployment for the indexing-scaling goal, and collapsing them trims
+# the pod count and per-deployment resource reservations.
+#
+# Indexing is deliberately NOT here — that's the Dask path
+# (background-indexer-scheduler → dask-scheduler → dask-worker), which is
+# the part that actually scales.
+#
+# SINGLETON: replicas managed in kustomization.yaml at count: 1. The pod
+# contains celery-beat (dup beats double-fire periodic tasks) and the
+# Slack websocket listener (dup double-processes events), so this pod
+# must never run more than one replica. Each container restarts
+# independently if it crashes; the pod is rescheduled as a unit.
+#
+# Env-neutral like the rest of the component (logical danswer-backend
+# image; config/secrets via env-configmap + danswer-secrets). The Slack
+# tokens reach the listener through the envFrom secretRef.
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: background-lite-deployment
+spec:
+  # MUST stay 1 — see the singleton note above. Set in the component
+  # kustomization replicas: block too; kept here as the manifest default.
+  replicas: 1
+  strategy:
+    # Recreate (not RollingUpdate): celery-beat must never overlap with a
+    # second beat during a rollout, or periodic tasks double-fire.
+    type: Recreate
+  selector:
+    matchLabels:
+      app: background-lite
+  template:
+    metadata:
+      labels:
+        app: background-lite
+    spec:
+      containers:
+      # --- celery worker (maintenance tasks) ---
+      - name: celery-worker
+        image: danswer-backend
+        imagePullPolicy: IfNotPresent
+        command:
+        - celery
+        - -A
+        - danswer.background.celery.celery_run:celery_app
+        - worker
+        - --pool=threads
+        # Fixed thread count, NOT --autoscale. Autoscale calls pool.grow()/shrink()
+        # which the threads TaskPool doesn't implement -> on the first task burst
+        # the worker dies with "AttributeError: 'TaskPool' object has no attribute
+        # 'grow'" and CrashLoopBackOffs. Autoscale is prefork-only. These
+        # maintenance tasks are I/O-bound, so a fixed pool of threads is fine.
+        - --concurrency=10
+        - --loglevel=INFO
+        env:
+        - name: POSTGRES_USER
+          valueFrom:
+            secretKeyRef:
+              key: postgres_user
+              name: danswer-secrets
+        - name: POSTGRES_PASSWORD
+          valueFrom:
+            secretKeyRef:
+              key: postgres_password
+              name: danswer-secrets
+        envFrom:
+        - configMapRef:
+            name: env-configmap
+        - secretRef:
+            name: danswer-secrets
+        # No PVCs: the file store (File connector uploads) and dynamic config
+        # are Postgres-backed in this fork (PGFileStore lobjs +
+        # PostgresBackedDynamicConfigStore). Nothing here reads /home/storage
+        # or /home/file_connector_storage. See README "Apply an optional
+        # component" for why the split deployments mount no volumes.
+        resources:
+          requests:
+            cpu: 200m
+            memory: 512Mi
+          limits:
+            cpu: '1'
+            memory: 2Gi
+      # --- celery beat (periodic scheduler) ---
+      - name: celery-beat
+        image: danswer-backend
+        imagePullPolicy: IfNotPresent
+        command:
+        - celery
+        - -A
+        - danswer.background.celery.celery_run:celery_app
+        - beat
+        - --loglevel=INFO
+        env:
+        - name: POSTGRES_USER
+          valueFrom:
+            secretKeyRef:
+              key: postgres_user
+              name: danswer-secrets
+        - name: POSTGRES_PASSWORD
+          valueFrom:
+            secretKeyRef:
+              key: postgres_password
+              name: danswer-secrets
+        envFrom:
+        - configMapRef:
+            name: env-configmap
+        - secretRef:
+            name: danswer-secrets
+        # beat is light at runtime, but `celery -A ... beat` still imports the
+        # whole danswer app (langchain/llama-index/tokenizers), which alone
+        # exceeds 256Mi → OOMKilled on startup. Stable at a 1Gi limit; request
+        # doubled (256Mi→512Mi) for a guaranteed floor and limit lifted to 2Gi
+        # for headroom.
+        resources:
+          requests:
+            cpu: 50m
+            memory: 512Mi
+          limits:
+            cpu: 200m
+            memory: 2Gi
+      # --- slack bot listener ---
+      - name: slack-listener
+        image: danswer-backend
+        imagePullPolicy: IfNotPresent
+        command:
+        - python
+        - danswer/danswerbot/slack/listener.py
+        env:
+        - name: POSTGRES_USER
+          valueFrom:
+            secretKeyRef:
+              key: postgres_user
+              name: danswer-secrets
+        - name: POSTGRES_PASSWORD
+          valueFrom:
+            secretKeyRef:
+              key: postgres_password
+              name: danswer-secrets
+        envFrom:
+        - configMapRef:
+            name: env-configmap
+        - secretRef:
+            name: danswer-secrets
+        resources:
+          requests:
+            cpu: 100m
+            memory: 512Mi
+          limits:
+            cpu: 500m
+            memory: 1Gi
diff --git a/k8s/optional/background-scaling/dask-scheduler.yaml b/k8s/optional/background-scaling/dask-scheduler.yaml
new file mode 100644
index 00000000000..7358402359d
--- /dev/null
+++ b/k8s/optional/background-scaling/dask-scheduler.yaml
@@ -0,0 +1,70 @@
+apiVersion: v1
+kind: Service
+metadata:
+  name: dask-scheduler-service
+spec:
+  selector:
+    app: dask-scheduler
+  ports:
+  - name: rpc
+    protocol: TCP
+    port: 8786
+    targetPort: 8786
+  - name: dashboard
+    protocol: TCP
+    port: 8787
+    targetPort: 8787
+  type: ClusterIP
+---
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: dask-scheduler-deployment
+spec:
+  strategy:
+    type: Recreate
+  selector:
+    matchLabels:
+      app: dask-scheduler
+  template:
+    metadata:
+      labels:
+        app: dask-scheduler
+    spec:
+      containers:
+      - name: scheduler
+        image: danswer-backend
+        imagePullPolicy: IfNotPresent
+        command:
+        - dask
+        - scheduler
+        - --host=0.0.0.0
+        - --port=8786
+        - --dashboard-address=:8787
+        ports:
+        - name: rpc
+          containerPort: 8786
+        - name: dashboard
+          containerPort: 8787
+        envFrom:
+        - configMapRef:
+            name: env-configmap
+        - secretRef:
+            name: danswer-secrets
+        resources:
+          requests:
+            cpu: 100m
+            memory: 256Mi
+          limits:
+            cpu: 500m
+            memory: 512Mi
+        readinessProbe:
+          tcpSocket:
+            port: 8786
+          initialDelaySeconds: 5
+          periodSeconds: 10
+        livenessProbe:
+          tcpSocket:
+            port: 8786
+          initialDelaySeconds: 15
+          periodSeconds: 20
diff --git a/k8s/optional/background-scaling/dask-worker.yaml b/k8s/optional/background-scaling/dask-worker.yaml
new file mode 100644
index 00000000000..27ed4553533
--- /dev/null
+++ b/k8s/optional/background-scaling/dask-worker.yaml
@@ -0,0 +1,73 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: dask-worker-deployment
+spec:
+  selector:
+    matchLabels:
+      app: dask-worker
+  template:
+    metadata:
+      labels:
+        app: dask-worker
+    spec:
+      containers:
+      - name: worker
+        image: danswer-backend
+        imagePullPolicy: IfNotPresent
+        # Wait for the Dask scheduler to accept connections before starting the
+        # worker, then exec it. A worker that boots before the scheduler is
+        # ready fails to register its Nanny and exits 1 -> CrashLoopBackOff
+        # until pod ordering happens to work out.
+        #
+        # This retry loop is environment-agnostic — plain TCP connect, no
+        # dependency on any mesh or platform feature; it behaves the same with
+        # or without istio. It lives in the MAIN container (not an
+        # initContainer) for exactly that portability: under istio,
+        # initContainers run before the sidecar proxy, so their traffic to a
+        # mesh Service is blackholed until envoy is up — an init-based wait
+        # would hang there. The main-container loop also naturally waits out
+        # envoy when istio IS present. `exec` hands PID to the worker so
+        # signals/termination propagate normally.
+        command:
+        - /bin/sh
+        - -c
+        - |
+          until python -c "import socket; socket.create_connection(('dask-scheduler-service', 8786), timeout=3)" 2>/dev/null; do
+            echo "waiting for dask-scheduler-service:8786..."
+            sleep 2
+          done
+          echo "dask-scheduler reachable; starting worker"
+          exec dask worker tcp://dask-scheduler-service:8786 --nworkers=1 --nthreads=1 --memory-limit=4GB
+        env:
+        - name: PYTHONPATH
+          value: /app
+        - name: CURRENT_PROCESS_IS_AN_INDEXING_JOB
+          value: 'true'
+        - name: POSTGRES_USER
+          valueFrom:
+            secretKeyRef:
+              key: postgres_user
+              name: danswer-secrets
+        - name: POSTGRES_PASSWORD
+          valueFrom:
+            secretKeyRef:
+              key: postgres_password
+              name: danswer-secrets
+        envFrom:
+        - configMapRef:
+            name: env-configmap
+        - secretRef:
+            name: danswer-secrets
+        # No PVCs — indexing reads File-connector uploads from PGFileStore
+        # (Postgres large objects), not /home/file_connector_storage; the
+        # dynamic config store is Postgres-backed too. Connector temp files
+        # (if any) go to the container's /tmp. This also keeps each worker
+        # off the RWO dynamic-pvc, which can't multi-attach. See README.
+        resources:
+          requests:
+            cpu: "1"
+            memory: 4Gi
+          limits:
+            cpu: '2'
+            memory: 8Gi
diff --git a/k8s/optional/background-scaling/kustomization.yaml b/k8s/optional/background-scaling/kustomization.yaml
new file mode 100644
index 00000000000..a09f2e26d27
--- /dev/null
+++ b/k8s/optional/background-scaling/kustomization.yaml
@@ -0,0 +1,50 @@
+# Kustomize Component — split-background + remote-Dask indexing topology.
+#
+# Replaces the single combined `background` deployment (in base) with FOUR
+# deployments:
+#   - background-lite             beat + celery + slack-listener, co-located
+#                                 (3 containers, 1 pod) — the low-traffic
+#                                 singletons that don't scale with indexing
+#   - background-indexer-scheduler  the update.py polling loop, in remote-Dask mode
+#   - dask-scheduler              Dask scheduler Service + Deployment
+#   - dask-worker                 Dask worker pods (scale these for indexing throughput)
+#
+# Parameterization mirrors base:
+#   - Image refs use the logical name `danswer-backend`; the including
+#     overlay's `images:` block rewrites them (no images: needed here).
+#   - Secrets/config load identically to base: explicit POSTGRES_USER /
+#     POSTGRES_PASSWORD via secretKeyRef, plus
+#     `envFrom: [configMapRef env-configmap, secretRef danswer-secrets]`.
+#   - Replica counts live in the `replicas:` block below (one place).
+#     dask-worker is the knob you turn for more indexing throughput.
+#   - Manifests are environment-neutral (NO node affinity / tolerations).
+#     Darwin's indexcpu-pool scheduling is added by the prod overlay when
+#     it opts in (see k8s/README.md → "Apply an optional component").
+#
+# Only meaningful when pulled into an overlay via its `components:` field:
+#
+#   # k8s/overlays/prod/kustomization.yaml
+#   components:
+#     - ../../optional/background-scaling
+#
+# When you opt in, also set base `background-deployment` to count: 0 in the
+# overlay's replicas: block so you don't run two Celery beat schedulers on
+# the same broker.
+apiVersion: kustomize.config.k8s.io/v1alpha1
+kind: Component
+
+resources:
+  - background-lite.yaml
+  - background-indexer-scheduler.yaml
+  - dask-scheduler.yaml
+  - dask-worker.yaml
+
+replicas:
+  - name: background-lite-deployment
+    count: 1                          # singleton — beat + slack websocket; never >1
+  - name: background-indexer-scheduler-deployment
+    count: 1                          # singleton — the update.py polling loop
+  - name: dask-scheduler-deployment
+    count: 1                          # singleton bookkeeping process
+  - name: dask-worker-deployment
+    count: 2                          # max concurrent indexing jobs (1 connector per pod); raise if attempts backlog
diff --git a/k8s/optional/keda-indexing-autoscale/kustomization.yaml b/k8s/optional/keda-indexing-autoscale/kustomization.yaml
new file mode 100644
index 00000000000..9efad85be4a
--- /dev/null
+++ b/k8s/optional/keda-indexing-autoscale/kustomization.yaml
@@ -0,0 +1,19 @@
+# Kustomize Component — KEDA autoscaling for the Dask indexing workers.
+#
+# Opt in from an overlay (after the background-scaling component, since it
+# scales that component's dask-worker-deployment):
+#
+#   # k8s/overlays/prod/kustomization.yaml
+#   components:
+#     - ../../optional/background-scaling
+#     - ../../optional/keda-indexing-autoscale
+#
+# AND remove dask-worker-deployment from the background-scaling component's
+# replicas: block — KEDA owns that count once this is active.
+#
+# Requires the KEDA operator installed cluster-wide (the keda.sh CRDs).
+apiVersion: kustomize.config.k8s.io/v1alpha1
+kind: Component
+
+resources:
+  - scaledobject.yaml
diff --git a/k8s/optional/keda-indexing-autoscale/scaledobject.yaml b/k8s/optional/keda-indexing-autoscale/scaledobject.yaml
new file mode 100644
index 00000000000..04cf0ef37c5
--- /dev/null
+++ b/k8s/optional/keda-indexing-autoscale/scaledobject.yaml
@@ -0,0 +1,86 @@
+# KEDA autoscaler for the Dask indexing workers.
+#
+# Scales dask-worker-deployment 0..N based on how much indexing work is
+# actually runnable right now, read straight from Postgres.
+#
+# PREREQUISITES (see README "KEDA indexing autoscale"):
+#   1. KEDA operator installed cluster-wide.
+#   2. The background-scaling component is opted in (dask-worker exists).
+#   3. dask-worker is NOT also pinned by a static replicas: entry — KEDA
+#      owns its replica count (remove it from the background-scaling
+#      kustomization replicas: block, or they fight).
+#
+# Password comes from danswer-secrets via TriggerAuthentication; the
+# host/user/db below are the Darwin prod Postgres coordinates and must
+# match the overlay's POSTGRES_* (this is a prod-scaling feature).
+---
+apiVersion: keda.sh/v1alpha1
+kind: TriggerAuthentication
+metadata:
+  name: keda-indexing-pg-auth
+spec:
+  secretTargetRef:
+    - parameter: password
+      name: danswer-secrets
+      key: postgres_password
+---
+apiVersion: keda.sh/v1alpha1
+kind: ScaledObject
+metadata:
+  name: dask-worker-indexing
+spec:
+  scaleTargetRef:
+    name: dask-worker-deployment
+  # Scale to zero when nothing is indexing — saves the idle 4Gi/worker.
+  # Cold-start cost on the first index after idle is ~30s-2min (pod start
+  # + Dask worker connect); fine for background indexing. Set to 1 if you
+  # want a worker always warm.
+  minReplicaCount: 0
+  # Ceiling on burst. Useful concurrency is bounded by
+  # (distinct active source types × INDEXING_PER_SOURCE_CAP), so there's
+  # no point setting this far above your number of source types.
+  maxReplicaCount: 4
+  pollingInterval: 60      # how often the query runs (seconds)
+  cooldownPeriod: 300      # wait 5m after metric hits 0 before scaling to 0
+  advanced:
+    horizontalPodAutoscalerConfig:
+      behavior:
+        scaleDown:
+          # Damp scale-down so finishing jobs don't cause thrash.
+          stabilizationWindowSeconds: 300
+  triggers:
+    - type: postgresql
+      authenticationRef:
+        name: keda-indexing-pg-auth
+      metadata:
+        host: darwin-postgres.postgres.database.azure.com
+        port: "5432"
+        userName: postgres
+        dbName: postgres
+        sslmode: require
+        # Desired replicas = ceil(query_result / targetQueryValue).
+        targetQueryValue: "1"
+        # The metric = number of indexing attempts that can run CONCURRENTLY
+        # right now, respecting PER_SOURCE_CAP (default 1 → one per source).
+        # NOT a raw pending count: 10 queued attempts of the same source
+        # still only run 1 at a time, so we must not spin up 10 workers.
+        #
+        # Includes IN_PROGRESS on purpose: that keeps replicas >= running
+        # jobs, so KEDA never scales a busy worker away — scale-to-0 only
+        # happens when there is genuinely no work.
+        #
+        # Status is stored UPPERCASE (Enum(..., native_enum=False) stores
+        # the member NAME, not its value) — verified against the live DB.
+        # IndexAttempt links directly to connector (this fork has
+        # connector_id on index_attempt, not connector_credential_pair_id).
+        #
+        # If you raise INDEXING_PER_SOURCE_CAP above 1, change LEAST(1,...)
+        # to LEAST(<cap>,...) to match.
+        query: >-
+          SELECT COALESCE(SUM(LEAST(1, cnt)), 0) FROM (
+            SELECT con.source, COUNT(*) AS cnt
+            FROM index_attempt ia
+            JOIN connector con ON ia.connector_id = con.id
+            WHERE ia.status IN ('NOT_STARTED', 'IN_PROGRESS')
+            GROUP BY con.source
+          ) s
diff --git a/k8s/optional/keda/kustomization.yaml b/k8s/optional/keda/kustomization.yaml
new file mode 100644
index 00000000000..d939623dfbd
--- /dev/null
+++ b/k8s/optional/keda/kustomization.yaml
@@ -0,0 +1,30 @@
+# KEDA operator install — cluster-scoped infrastructure (CRDs + RBAC +
+# the operator/metrics-apiserver Deployments). Installed ONCE per cluster,
+# independent of the danswer overlays — this is NOT a kustomize Component
+# pulled into prod/local; apply it on its own.
+#
+# No Helm (per repo preference). We just reference KEDA's official release
+# bundle, PINNED to an exact version. GitHub release assets are immutable,
+# so the URL is effectively a content pin — never use a moving ref
+# (same lesson as the Vespa :latest incident; see AGENTS.md §10).
+#
+# KEDA's bundle creates and installs into its OWN namespace `keda`
+# (the resources carry `namespace: keda` internally) — do NOT add a
+# `namespace:` here, that would try to re-namespace the cluster-scoped
+# CRDs and break the install.
+#
+# Install / upgrade:
+#   kubectl apply --server-side -k k8s/optional/keda
+#   # --server-side: KEDA's CRDs are large and exceed the client-side
+#   # last-applied-configuration annotation size limit.
+#
+# Verify:
+#   kubectl get pods -n keda
+#   kubectl get crd | grep keda.sh    # scaledobjects, triggerauthentications, ...
+#
+# To bump KEDA: change the version in the URL below, re-apply --server-side.
+apiVersion: kustomize.config.k8s.io/v1beta1
+kind: Kustomization
+
+resources:
+  - https://github.com/kedacore/keda/releases/download/v2.14.0/keda-2.14.0.yaml
diff --git a/k8s/overlays/local-vespa/kustomization.yaml b/k8s/overlays/local-vespa/kustomization.yaml
new file mode 100644
index 00000000000..f75ee49031a
--- /dev/null
+++ b/k8s/overlays/local-vespa/kustomization.yaml
@@ -0,0 +1,36 @@
+# Vespa apply target for LOCAL dev — deliberately SEPARATE from the app
+# overlay (Vespa was removed from base/, so `kubectl apply -k
+# k8s/overlays/local` no longer touches it). Apply Vespa with:
+#
+#     kubectl apply -k k8s/overlays/local-vespa     # context: local cluster
+#
+# Vespa is resource-heavy; skip this on machines where you point at a remote
+# Vespa or don't need search locally.
+apiVersion: kustomize.config.k8s.io/v1beta1
+kind: Kustomization
+
+resources:
+  - ../../base/vespa
+
+namespace: default
+
+# Pinned, same as prod — never :latest for Vespa (see prod-vespa overlay note).
+images:
+  - name: vespa-configserver
+    newName: vespaengine/vespa
+    newTag: "8.600.35"
+  - name: vespa-admin
+    newName: vespaengine/vespa
+    newTag: "8.600.35"
+  - name: vespa-content
+    newName: vespaengine/vespa
+    newTag: "8.600.35"
+  - name: vespa-feed
+    newName: vespaengine/vespa
+    newTag: "8.600.35"
+  - name: vespa-query
+    newName: vespaengine/vespa
+    newTag: "8.600.35"
+  - name: busybox
+    newName: busybox
+    newTag: "1.36.1"
diff --git a/k8s/overlays/local/env.properties b/k8s/overlays/local/env.properties
new file mode 100644
index 00000000000..b75f1d918ed
--- /dev/null
+++ b/k8s/overlays/local/env.properties
@@ -0,0 +1,142 @@
+# Local-dev env-configmap values.
+#
+# Assumes Postgres + Vespa are reachable at `host.docker.internal` (so you
+# can run them via docker-compose alongside this k8s cluster on Rancher
+# Desktop / Docker Desktop). If you run them in-cluster instead, swap the
+# *_HOST values.
+
+# --- Auth: disabled for local dev ---
+AUTH_TYPE=disabled
+SESSION_EXPIRE_TIME_SECONDS=86400
+OPENID_CONFIG_URL=
+DEFAULT_ADMIN_EMAILS=
+VALID_EMAIL_DOMAINS=
+APPLY_MIGRATIONS=true
+
+# --- Domain: localhost ---
+DOMAIN=localhost
+WEB_DOMAIN=http://localhost:3000
+INTERNAL_URL=http://api-server-service:80
+
+# --- External services reachable on the host ---
+POSTGRES_HOST=host.docker.internal
+VESPA_HOST=host.docker.internal
+VESPA_CONFIG_SERVER_HOST=host.docker.internal
+VESPA_FEED_HOST=host.docker.internal
+VESPA_PORT=8080
+VESPA_FEED_PORT=8080
+
+# --- Model servers (in-cluster) ---
+MODEL_SERVER_HOST=inference-model-server-service
+INDEXING_MODEL_SERVER_HOST=indexing-model-server-service
+MODEL_SERVER_PORT=
+MIN_THREADS_ML_MODELS=
+DOCUMENT_ENCODER_MODEL=
+NORMALIZE_EMBEDDINGS=
+ASYM_QUERY_PREFIX=
+ASYM_PASSAGE_PREFIX=
+ENABLE_RERANKING_REAL_TIME_FLOW=
+ENABLE_RERANKING_ASYNC_FLOW=
+
+# --- LLM ---
+GEN_AI_MODEL_PROVIDER=custom
+GEN_AI_VENDOR=openai
+GEN_AI_MODEL_NAME=gpt-4o-2024-11-20
+GEN_AI_MODEL_VERSION=
+FAST_GEN_AI_MODEL_VERSION=
+GEN_AI_API_VERSION=
+GEN_AI_LLM_PROVIDER_TYPE=
+GEN_AI_MAX_TOKENS=
+GEN_AI_ACCOUNT_ID=
+GEN_AI_TENANT_ID=
+GEN_AI_API_ENDPOINT=
+GEN_AI_IDENTITY_ENDPOINT=
+
+# --- Query options ---
+QA_TIMEOUT=60
+MAX_CHUNKS_FED_TO_CHAT=
+DISABLE_LLM_FILTER_EXTRACTION=true
+DISABLE_LLM_CHUNK_FILTER=true
+DISABLE_LLM_CHOOSE_SEARCH=true
+DISABLE_LLM_QUERY_REPHRASE=false
+DOC_TIME_DECAY=
+HYBRID_ALPHA=0.8
+EDIT_KEYWORD_QUERY=
+MULTILINGUAL_QUERY_EXPANSION=
+QA_PROMPT_OVERRIDE=
+LANGUAGE_HINT=
+DISABLE_GENERATIVE_AI=
+
+# --- Indexing (smaller for local) ---
+NUM_INDEXING_WORKERS=1
+ENABLED_CONNECTOR_TYPES=
+DISABLE_INDEX_UPDATE_ON_SWAP=
+DASK_JOB_CLIENT_ENABLED=false
+CONTINUE_ON_CONNECTOR_FAILURE=true
+EXPERIMENTAL_CHECKPOINTING_ENABLED=
+CONFLUENCE_CONNECTOR_LABELS_TO_SKIP=
+JIRA_API_VERSION=
+JIRA_SERVER_URL=
+WEB_CONNECTOR_VALIDATE_URLS=
+GONG_CONNECTOR_START_TIME=
+NOTION_CONNECTOR_ENABLE_RECURSIVE_PAGE_LOOKUP=
+
+# --- Slack bot disabled locally ---
+DANSWER_BOT_DISABLE_DOCS_ONLY_ANSWER=
+DANSWER_BOT_DISPLAY_ERROR_MSGS=
+DANSWER_BOT_RESPOND_EVERY_CHANNEL=
+DANSWER_BOT_DISABLE_COT=
+NOTIFY_SLACKBOT_NO_ANSWER=
+
+# --- SMTP unused locally ---
+SMTP_SERVER=
+SMTP_PORT=
+SMTP_USER=
+EMAIL_FROM=
+
+# --- Telemetry / logging (more verbose for local debugging) ---
+DISABLE_TELEMETRY=true
+LOG_LEVEL=debug
+LOG_ALL_MODEL_INTERACTIONS=true
+LOG_VESPA_TIMING_INFORMATION=true
+
+# --- Redis ---
+# Points at the in-cluster Redis StatefulSet deployed by this overlay
+# (see kustomization.yaml resources: ../../optional/redis.yaml).
+REDIS_HOST=redis
+REDIS_PORT=6379
+REDIS_DB_NUMBER=0
+REDIS_SSL=
+REDIS_KV_CACHE_ENABLED=
+REDIS_KV_CACHE_TTL_SECONDS=86400
+REQUEST_RATE_LIMIT_ENABLED=
+REQUEST_RATE_LIMIT_PER_MINUTE=
+REQUEST_RATE_LIMIT_PER_HOUR=
+PERSONA_CACHE_ENABLED=
+PERSONA_CACHE_TTL_SECONDS=86400
+# Chat-page connector indexing-status cache (see prod env.properties for the
+# rationale). Empty = off; set to true to enable the short global TTL cache.
+CC_PAIR_INFO_CACHE_ENABLED=
+CC_PAIR_INFO_CACHE_TTL_SECONDS=60
+# Per-user document-set list cache (see prod env.properties). Empty = off.
+DOCUMENT_SET_CACHE_ENABLED=
+DOCUMENT_SET_CACHE_TTL_SECONDS=300
+# Celery broker on Redis (logical DB 1). Empty = off → falls back to the
+# Postgres broker, which is fine for local. Set to true to mirror prod.
+CELERY_BROKER_REDIS_ENABLED=
+CELERY_REDIS_DB_NUMBER=1
+
+# --- Postgres connection pool (per process; empty = code defaults 40+10) ---
+POSTGRES_POOL_SIZE=
+POSTGRES_POOL_OVERFLOW=
+
+# Prune indexing-run history: terminal attempts older than 30d, always keeping
+# the last 20 per cc-pair. Empty = code default (0 = off). Mirrors prod.
+RETENTION_DAYS_INDEX_ATTEMPT=30
+
+# --- File store (default Postgres; see prod env.properties to use Azure Blob) ---
+FILE_STORE_TYPE=PostgresBackedFileStore
+AZURE_BLOB_CONTAINER=danswer-files
+# Chat upload limits (see prod env.properties for rationale).
+CHAT_FILE_MAX_SIZE_MB=25
+CHAT_FILE_MAX_TOKEN_FRACTION=0.5
diff --git a/k8s/overlays/local/kustomization.yaml b/k8s/overlays/local/kustomization.yaml
new file mode 100644
index 00000000000..770c2c341e6
--- /dev/null
+++ b/k8s/overlays/local/kustomization.yaml
@@ -0,0 +1,60 @@
+# Local-dev overlay → Rancher Desktop / Docker Desktop / kind / any local cluster.
+#
+# Apply:    kubectl apply -k k8s/overlays/local
+# Preview:  kubectl kustomize k8s/overlays/local
+apiVersion: kustomize.config.k8s.io/v1beta1
+kind: Kustomization
+
+resources:
+  - ../../base
+
+namespace: default
+
+# Local-dev image refs. `latest` is fine here — for prod we pin (see prod
+# overlay).
+images:
+  - name: danswer-backend
+    newName: danswer/danswer-backend
+    newTag: latest
+  - name: danswer-web-server
+    newName: danswer/danswer-web-server
+    newTag: latest
+  - name: danswer-model-server
+    newName: danswer/danswer-model-server
+    newTag: v0.3.94
+  - name: nginx
+    newName: nginx
+    newTag: 1.23.4-alpine
+  # Vespa images live in the separate Vespa overlay (k8s/overlays/local-vespa)
+  # now that Vespa is applied independently of the app.
+
+replicas:
+  - name: api-server-deployment
+    count: 1
+  - name: background-deployment
+    count: 1
+  - name: web-server-deployment
+    count: 1
+  - name: nginx-deployment
+    count: 1
+  - name: inference-model-server-deployment
+    count: 1
+
+configMapGenerator:
+  - name: env-configmap
+    behavior: create
+    envs:
+      - env.properties
+
+secretGenerator:
+  - name: danswer-secrets
+    envs:
+      - secrets.env
+
+generatorOptions:
+  disableNameSuffixHash: true
+
+# No affinity/toleration patches needed — the live-cluster dump that
+# seeded base/ doesn't use indexcpu-pool affinity. If you re-introduce
+# Darwin-specific scheduling in base/ later, add JSON patches here to
+# strip them for local.
diff --git a/k8s/overlays/local/secrets.env.example b/k8s/overlays/local/secrets.env.example
new file mode 100644
index 00000000000..feebcca690c
--- /dev/null
+++ b/k8s/overlays/local/secrets.env.example
@@ -0,0 +1,44 @@
+# Template for the local secrets.env file. Copy to secrets.env and fill in
+# whatever you actually need for local dev. secrets.env is gitignored.
+#
+# Most local-dev runs can leave everything blank — AUTH_TYPE=disabled means
+# nothing checks the OAuth secrets, and external integrations (Slack, Jira,
+# Opsgenie) just no-op when their tokens are empty.
+
+# --- Database ---
+postgres_user=postgres
+postgres_password=password
+
+# --- OIDC / Entra (unused when AUTH_TYPE=disabled) ---
+oauth_client_id=
+oauth_client_secret=
+user_auth_secret=local-dev-secret-not-for-production
+
+# --- Google OAuth (legacy / unused locally) ---
+google_oauth_client_id=
+google_oauth_client_secret=
+
+# --- Redis (unauth'd locally) ---
+redis_password=
+
+# --- Encryption key (generate one if you'll test encrypted connector creds) ---
+ENCRYPTION_KEY_SECRET=
+
+# --- Slack bot (leave empty unless testing the bot) ---
+DANSWER_BOT_SLACK_APP_TOKEN=
+DANSWER_BOT_SLACK_BOT_TOKEN=
+
+# --- LLM credentials ---
+GEN_AI_API_KEY=
+GEN_AI_CLIENT_ID=
+GEN_AI_CLIENT_SECRET=
+
+# --- Jira (leave empty unless testing the connector) ---
+JIRA_API_TOKEN=
+JIRA_EMAIL=
+
+# --- Opsgenie ---
+OPSGENIE_API_KEY=
+
+# --- SMTP ---
+SMTP_PASS=
diff --git a/k8s/overlays/prod-vespa/kustomization.yaml b/k8s/overlays/prod-vespa/kustomization.yaml
new file mode 100644
index 00000000000..39602a483f7
--- /dev/null
+++ b/k8s/overlays/prod-vespa/kustomization.yaml
@@ -0,0 +1,48 @@
+# Vespa apply target for PROD — deliberately SEPARATE from the app overlay.
+#
+# `kubectl apply -k k8s/overlays/prod` no longer touches Vespa (Vespa was
+# removed from base/). Apply Vespa only when you actually intend to, with:
+#
+#     kubectl apply -k k8s/overlays/prod-vespa     # context: darwin
+#
+# Version upgrades do NOT go through a blanket apply — use the ordered,
+# health-gated k8s/scripts/vespa-upgrade.sh, then sync the tags below so git
+# ≈ cluster.
+apiVersion: kustomize.config.k8s.io/v1beta1
+kind: Kustomization
+
+resources:
+  - ../../base/vespa
+
+namespace: darwin
+
+# All roles pinned to the SAME version the cluster actually runs.
+# NEVER use :latest — it pulled 8.696.20, a >30-release jump from 8.600.35,
+# which Vespa's config server refuses (incompatible-upgrade guard) →
+# cluster-wide outage. 8.600.35 is the deployed on-disk version (the content
+# nodes' index is in this format).
+#
+# Do NOT hand-edit these tags to a higher version and `kubectl apply` — that
+# bumps all roles at once with no ordering or version-stepping. Upgrades go
+# through k8s/scripts/vespa-upgrade.sh (ordered, health-gated, ≤30 releases
+# per hop); afterwards, sync these tags to match live.
+images:
+  - name: vespa-configserver
+    newName: vespaengine/vespa
+    newTag: "8.600.35"
+  - name: vespa-admin
+    newName: vespaengine/vespa
+    newTag: "8.600.35"
+  - name: vespa-content
+    newName: vespaengine/vespa
+    newTag: "8.600.35"
+  - name: vespa-feed
+    newName: vespaengine/vespa
+    newTag: "8.600.35"
+  - name: vespa-query
+    newName: vespaengine/vespa
+    newTag: "8.600.35"
+  - name: busybox
+    # Pinned (was :latest). Only an init chown helper, but same drift hygiene.
+    newName: busybox
+    newTag: "1.36.1"
diff --git a/k8s/overlays/prod/env.properties b/k8s/overlays/prod/env.properties
new file mode 100644
index 00000000000..331889a5472
--- /dev/null
+++ b/k8s/overlays/prod/env.properties
@@ -0,0 +1,179 @@
+# Non-secret env-configmap values for the Darwin production cluster.
+# Real secrets (Slack tokens, API tokens, OAuth client secret, etc.) live in
+# secrets.env (gitignored — see secrets.env.example for the template).
+
+# --- Auth ---
+AUTH_TYPE=oidc
+SESSION_EXPIRE_TIME_SECONDS=86400
+OPENID_CONFIG_URL=https://login.microsoftonline.com/d8353d2a-b153-4d17-8827-902c51f72357/v2.0/.well-known/openid-configuration
+DEFAULT_ADMIN_EMAILS=user1@uipath.com,user2@uipath.com
+VALID_EMAIL_DOMAINS=
+APPLY_MIGRATIONS=true
+
+# --- Domain ---
+DOMAIN=darwin.westeurope.cloudapp.azure.com
+WEB_DOMAIN=https://darwin.westeurope.cloudapp.azure.com
+INTERNAL_URL=http://api-server-service:80
+
+# --- Database ---
+POSTGRES_HOST=darwin-postgres.postgres.database.azure.com
+# SQLAlchemy pool sizing PER PROCESS. Max DB connections one pod can hold =
+# (size + overflow) per engine; api-server uses BOTH a sync and an async
+# engine, so a single api-server pod can hold up to 2×(size+overflow).
+# Cluster total = that × replicas of every DB-touching pod (api-server +
+# background), and must stay under Azure Postgres max_connections (SKU-
+# dependent) with headroom for migrations on boot. These match the previous
+# hardcoded 40+10 — LOWER them as you add api-server replicas (e.g. 20+10 at
+# a few replicas) so replicas × pool stays under the cap.
+POSTGRES_POOL_SIZE=40
+POSTGRES_POOL_OVERFLOW=10
+# Chat data retention: delete chat_session/chat_message older than this many
+# days (daily retention sweep, 08:00 UTC). Analytics survive the purge —
+# they're pre-aggregated into analytics_daily_rollup + analytics_user_first_seen
+# at 07:30 UTC, before the sweep. The rollup recompute window auto-caps just
+# under this value so it never re-reads deleted days.
+RETENTION_DAYS_CHAT=90
+
+# Indexing-run history. Default is OFF (kept forever). The index_attempt table
+# had grown to ~518k rows; the scheduler reads the latest attempt per cc-pair
+# every loop, so unbounded history is real cost. This prunes TERMINAL attempts
+# older than 30d while always keeping the last 20 per (connector, credential,
+# embedding model) — so every connector retains recent debug history. Runs in
+# the same 08:00 UTC sweep. Tune keep-N via RETENTION_KEEP_LAST_N_INDEX_ATTEMPTS
+# (default 20).
+RETENTION_DAYS_INDEX_ATTEMPT=30
+
+# File-store backend. Default keeps bytes in Postgres (large objects). To
+# offload bytes to Azure Blob (keeps the DB lean; stops file reads holding a
+# PG connection): set FILE_STORE_TYPE=AzureBlobFileStore, put
+# AZURE_BLOB_CONNECTION_STRING in secrets.env, then run
+# scripts/migrate_file_store_to_azure_blob.py once. Until then this is inert.
+#FILE_STORE_TYPE=PostgresBackedFileStore
+FILE_STORE_TYPE=AzureBlobFileStore
+AZURE_BLOB_CONTAINER=danswer-files
+# Chat upload limits: a chat-attached doc is stuffed whole into the prompt, so
+# it must fit the model context window. Byte cap (all types) + a token cap =
+# fraction of the model's max input tokens (rejects docs that would overflow).
+CHAT_FILE_MAX_SIZE_MB=25
+CHAT_FILE_MAX_TOKEN_FRACTION=0.5
+
+# --- Vespa search index ---
+VESPA_HOST=vespa-query
+VESPA_CONFIG_SERVER_HOST=vespa-configserver
+VESPA_FEED_HOST=vespa-feed
+VESPA_PORT=8080
+VESPA_FEED_PORT=8080
+
+# --- Model servers ---
+MODEL_SERVER_HOST=inference-model-server-service
+INDEXING_MODEL_SERVER_HOST=indexing-model-server-service
+MODEL_SERVER_PORT=
+MIN_THREADS_ML_MODELS=
+DOCUMENT_ENCODER_MODEL=
+NORMALIZE_EMBEDDINGS=
+ASYM_QUERY_PREFIX=
+ASYM_PASSAGE_PREFIX=
+ENABLE_RERANKING_REAL_TIME_FLOW=
+ENABLE_RERANKING_ASYNC_FLOW=
+
+# --- LLM ---
+GEN_AI_MODEL_PROVIDER=custom
+GEN_AI_VENDOR=openai
+GEN_AI_MODEL_NAME=gpt-4o-2024-11-20
+GEN_AI_MODEL_VERSION=
+FAST_GEN_AI_MODEL_VERSION=
+GEN_AI_API_VERSION=
+GEN_AI_LLM_PROVIDER_TYPE=
+GEN_AI_MAX_TOKENS=
+GEN_AI_ACCOUNT_ID=bc2ddac5-57bc-40e6-93fe-3b319b60ce36
+GEN_AI_TENANT_ID=e367ca54-053b-4b86-89a2-6b9e89e85e7a
+GEN_AI_API_ENDPOINT=https://alpha.uipath.com/bc2ddac5-57bc-40e6-93fe-3b319b60ce36/e367ca54-053b-4b86-89a2-6b9e89e85e7a/llmgateway_/api/raw/vendor/openai/model/gpt-4.1-mini-2025-04-14/completions
+GEN_AI_IDENTITY_ENDPOINT=https://alpha.uipath.com/identity_/connect/token
+
+# --- Query options ---
+QA_TIMEOUT=60
+MAX_CHUNKS_FED_TO_CHAT=
+DISABLE_LLM_FILTER_EXTRACTION=true
+DISABLE_LLM_CHUNK_FILTER=true
+DISABLE_LLM_CHOOSE_SEARCH=true
+DISABLE_LLM_QUERY_REPHRASE=false
+DOC_TIME_DECAY=
+HYBRID_ALPHA=0.8
+EDIT_KEYWORD_QUERY=
+# Disabled: the expansion path uses a hardcoded 5s LLM timeout
+# (query_expansion.py) which gpt-4o behind the gateway routinely exceeds,
+# causing ReadTimeouts. Re-enable only with a genuinely fast model wired
+# to FAST_GEN_AI_MODEL_VERSION.
+MULTILINGUAL_QUERY_EXPANSION=
+QA_PROMPT_OVERRIDE=
+LANGUAGE_HINT=IMPORTANT: Always respond in English regardless of the language of the query or documents.
+DISABLE_GENERATIVE_AI=
+
+# --- Indexing ---
+NUM_INDEXING_WORKERS=2
+ENABLED_CONNECTOR_TYPES=
+DISABLE_INDEX_UPDATE_ON_SWAP=
+DASK_JOB_CLIENT_ENABLED=true
+CONTINUE_ON_CONNECTOR_FAILURE=true
+EXPERIMENTAL_CHECKPOINTING_ENABLED=
+CONFLUENCE_CONNECTOR_LABELS_TO_SKIP=
+JIRA_API_VERSION=
+JIRA_SERVER_URL=https://uipath.atlassian.net
+WEB_CONNECTOR_VALIDATE_URLS=
+GONG_CONNECTOR_START_TIME=
+NOTION_CONNECTOR_ENABLE_RECURSIVE_PAGE_LOOKUP=
+
+# --- Slack bot (flags only — tokens are in secrets.env) ---
+DANSWER_BOT_DISABLE_DOCS_ONLY_ANSWER=
+DANSWER_BOT_DISPLAY_ERROR_MSGS=
+DANSWER_BOT_RESPOND_EVERY_CHANNEL=
+DANSWER_BOT_DISABLE_COT=
+NOTIFY_SLACKBOT_NO_ANSWER=
+
+# --- SMTP (non-secret bits) ---
+SMTP_SERVER=
+SMTP_PORT=
+SMTP_USER=
+EMAIL_FROM=
+
+# --- Telemetry / logging ---
+DISABLE_TELEMETRY=true
+LOG_LEVEL=info
+LOG_ALL_MODEL_INTERACTIONS=true
+LOG_VESPA_TIMING_INFORMATION=
+
+# --- Redis (cache + rate limiting; from this branch's Redis work) ---
+# Prod deploys the in-cluster Redis StatefulSet (see the components: block
+# in kustomization.yaml referencing ../../optional/redis). REDIS_HOST is
+# the Service name. redis_password in secrets.env is empty by default
+# because the in-cluster Redis runs without requirepass; populate the
+# secret + add `--requirepass $(REDIS_PASSWORD)` to the StatefulSet args
+# if you want auth.
+REDIS_HOST=redis
+REDIS_PORT=6379
+REDIS_DB_NUMBER=0
+REDIS_SSL=
+# Celery brokers + stores results on Redis instead of Postgres — removes
+# Celery's queue polling/writes from the DB. Uses logical DB 1 (cache/rate
+# limit use REDIS_DB_NUMBER=0), so the two never collide.
+CELERY_BROKER_REDIS_ENABLED=true
+CELERY_REDIS_DB_NUMBER=1
+REDIS_KV_CACHE_ENABLED=true
+REQUEST_RATE_LIMIT_ENABLED=true
+REQUEST_RATE_LIMIT_PER_MINUTE=20
+REQUEST_RATE_LIMIT_PER_HOUR=300
+REDIS_KV_CACHE_TTL_SECONDS=86400
+PERSONA_CACHE_ENABLED=true
+PERSONA_CACHE_TTL_SECONDS=86400
+# Chat-page connector indexing-status cache. /manage/indexing-status runs a
+# per-cc-pair doc-count aggregation (~300ms on the live DB) on every chat
+# page load; the result is identical for all users and slow-changing, so a
+# short global TTL cache fronts it. Pure TTL (no explicit invalidation) —
+# staleness is at most CC_PAIR_INFO_CACHE_TTL_SECONDS and harmless.
+CC_PAIR_INFO_CACHE_ENABLED=true
+CC_PAIR_INFO_CACHE_TTL_SECONDS=60
+# Per-user document-set list cache (the /document-set read on the chat-page
+# bundle). Write-through busted on every doc-set mutation; TTL is a short
+# backstop. Staleness is cosmetic (documents stay permission-enforced).
+DOCUMENT_SET_CACHE_ENABLED=true
+DOCUMENT_SET_CACHE_TTL_SECONDS=300
diff --git a/k8s/overlays/prod/kustomization.yaml b/k8s/overlays/prod/kustomization.yaml
new file mode 100644
index 00000000000..7e492172b1c
--- /dev/null
+++ b/k8s/overlays/prod/kustomization.yaml
@@ -0,0 +1,82 @@
+# Production overlay → Darwin AKS cluster (kubectl context: `darwin`).
+#
+# Single source of truth for the prod environment:
+#   - env.properties         non-secret config (committed)
+#   - secrets.env            real secrets (gitignored; copy from .example)
+#   - images: below          which image tags this env runs
+#   - replicas: below        deployment scaling
+#
+# Apply:    kubectl apply -k k8s/overlays/prod
+# Preview:  kubectl kustomize k8s/overlays/prod
+apiVersion: kustomize.config.k8s.io/v1beta1
+kind: Kustomization
+
+resources:
+  - ../../base
+
+components:
+  - ../../optional/background-scaling
+
+namespace: darwin
+
+# Image refs are logical names in base/. Kustomize rewrites them to the
+# concrete registry+tag below. Bump a `newTag` here to roll out a new
+# image; no manifest edits required.
+images:
+  - name: danswer-backend
+    newName: sfbrdevhelmweacr.azurecr.io/danswer/danswer-backend
+    newTag: vha-147
+  - name: danswer-web-server
+    newName: sfbrdevhelmweacr.azurecr.io/danswer/danswer-web-server
+    newTag: vha-77
+  - name: danswer-model-server
+    newName: danswer/danswer-model-server
+    newTag: v0.3.94
+  - name: nginx
+    newName: nginx
+    newTag: 1.23.4-alpine
+  # Vespa images live in the separate Vespa overlay (k8s/overlays/prod-vespa)
+  # now that Vespa is applied independently of the app — see that file and
+  # k8s/README.md.
+
+replicas:
+  - name: api-server-deployment
+    count: 2
+  - name: background-deployment
+    count: 0
+  - name: web-server-deployment
+    count: 1
+  - name: nginx-deployment
+    count: 1
+  - name: inference-model-server-deployment
+    count: 1
+  - name: background-lite-deployment              # singleton — beat + slack; never >1
+    count: 1
+  - name: background-indexer-scheduler-deployment # singleton — the update.py loop
+    count: 1
+  - name: dask-scheduler-deployment               # singleton
+    count: 1
+  - name: dask-worker-deployment                  # ← THE indexing-throughput knob
+    count: 2
+
+# env-configmap: regenerated from env.properties. `behavior: create` because
+# base doesn't ship a ConfigMap with this name — the overlay owns it.
+configMapGenerator:
+  - name: env-configmap
+    behavior: create
+    envs:
+      - env.properties
+
+# danswer-secrets: regenerated from secrets.env (gitignored). Same name as the
+# existing live Secret — deployments referencing `danswer-secrets` work
+# unchanged.
+secretGenerator:
+  - name: danswer-secrets
+    envs:
+      - secrets.env
+
+# Without this kustomize appends a content-hash suffix to generated names
+# (e.g. danswer-secrets-abc123). Disabled so deployments' explicit
+# secretKeyRef name=danswer-secrets continues to resolve.
+generatorOptions:
+  disableNameSuffixHash: true
diff --git a/k8s/overlays/prod/secrets.env.example b/k8s/overlays/prod/secrets.env.example
new file mode 100644
index 00000000000..8f3837df1d7
--- /dev/null
+++ b/k8s/overlays/prod/secrets.env.example
@@ -0,0 +1,60 @@
+# Template for the production secrets.env file. Copy to secrets.env and fill in
+# real values. secrets.env is gitignored — never commit real values.
+#
+# IMPORTANT — values marked LEAKED were previously stored as plaintext
+# in the live env-configmap (visible to anyone with cluster read access).
+# Rotate every LEAKED value before flipping to the new layout.
+
+# --- Database (already in the existing danswer-secrets) ---
+postgres_user=postgres
+postgres_password=REPLACE_ME
+
+# --- OIDC / Entra (already in the existing danswer-secrets) ---
+oauth_client_id=REPLACE_ME
+oauth_client_secret=REPLACE_ME
+# Signs the fastapi-users session + OAuth state JWT. Generate once:
+#   openssl rand -hex 32
+# MUST be identical across all replicas and stable across restarts.
+user_auth_secret=REPLACE_ME
+
+# --- Google OAuth (legacy; already in the existing danswer-secrets) ---
+google_oauth_client_id=
+google_oauth_client_secret=
+
+# --- Redis (added by this branch; optional) ---
+# Leave empty for the unauth'd in-cluster Redis StatefulSet.
+redis_password=
+
+# --- Encryption key (was leaked as empty in live configmap — generate one) ---
+# Used to encrypt connector credentials at rest.
+# Generate once: openssl rand -hex 32
+ENCRYPTION_KEY_SECRET=REPLACE_ME
+
+# --- Slack bot (LEAKED in live configmap as plaintext — ROTATE these) ---
+DANSWER_BOT_SLACK_APP_TOKEN=REPLACE_ME
+DANSWER_BOT_SLACK_BOT_TOKEN=REPLACE_ME
+
+# --- LLM gateway credentials (LEAKED — ROTATE) ---
+GEN_AI_API_KEY=
+GEN_AI_CLIENT_ID=REPLACE_ME
+GEN_AI_CLIENT_SECRET=REPLACE_ME
+
+# --- Jira (LEAKED — ROTATE) ---
+JIRA_API_TOKEN=REPLACE_ME
+JIRA_EMAIL=REPLACE_ME
+
+# --- Opsgenie (LEAKED — ROTATE) ---
+OPSGENIE_API_KEY=REPLACE_ME
+
+# --- SMTP ---
+SMTP_PASS=
+
+# --- Azure Blob file store (only when FILE_STORE_TYPE=AzureBlobFileStore) ---
+# Storage-account connection string. Injected as the AZURE_BLOB_CONNECTION_STRING
+# env var via envFrom. MUST be the ACCOUNT-KEY string (AccountName + AccountKey)
+# — the server signs per-blob upload SAS with that key; a SAS-token string is
+# rejected. Get it with:
+#   az storage account show-connection-string -n <account> -g <rg> -o tsv
+# Setting it in a shell instead? Single-quote it — the ';' separators are shell
+# command separators and silently truncate the value otherwise.
+AZURE_BLOB_CONNECTION_STRING=
diff --git a/k8s/scripts/build-deploy.sh b/k8s/scripts/build-deploy.sh
new file mode 100755
index 00000000000..fdb0a515bf7
--- /dev/null
+++ b/k8s/scripts/build-deploy.sh
@@ -0,0 +1,295 @@
+#!/usr/bin/env bash
+#
+# build-deploy.sh <stage> [component ...]
+#
+# One command for the backend/web image lifecycle against the Darwin prod
+# overlay. Stages are CUMULATIVE — each does everything the lighter stage
+# does, then one more thing:
+#
+#   build    bump tag(s) from kustomization.yaml, docker build (linux/amd64)
+#   push     build + docker tag + docker push to the ACR
+#   deploy   push  + rewrite kustomization.yaml newTag(s) + kubectl apply -k
+#   verify   (standalone) compare LIVE cluster image tags vs the manifest,
+#            and report pod health (running / restarts / crashloops)
+#
+# Components default to BOTH (backend web). Restrict with positional args:
+#   build-deploy.sh push backend         # only the backend image
+#   build-deploy.sh deploy web           # only the web image
+#   build-deploy.sh build                # both
+#
+# The next tag for each component is computed from the CURRENT newTag in
+# k8s/overlays/prod/kustomization.yaml (vha-N -> vha-N+1). The manifest is the
+# source of truth and is only EDITED at the `deploy` stage — `build`/`push`
+# produce/push the next-tag image without touching the committed manifest, so
+# you can build/push first and deploy later (or on another machine).
+#
+# This is your manual flow, automated:
+#   docker build -f ./backend/Dockerfile ./backend -t danswer/danswer-backend:latest --platform linux/amd64
+#   docker build -f ./web/Dockerfile     ./web     -t danswer/danswer-web-server:latest --platform=linux/amd64 --load
+#   docker tag  danswer/danswer-backend:latest    $REGISTRY/danswer-backend:vha-N
+#   docker tag  danswer/danswer-web-server:latest $REGISTRY/danswer-web-server:vha-M
+#   docker push $REGISTRY/danswer-backend:vha-N
+#   docker push $REGISTRY/danswer-web-server:vha-M
+#
+# Safety:
+#   - `deploy` refuses unless the kubectl context is the prod cluster
+#     ($PROD_CONTEXT) — the prod overlay targets it. Override with FORCE=1.
+#   - The manifest tag bump is NOT git-committed; the script reminds you.
+#   - DRY_RUN=1 prints every docker/kubectl command instead of running it.
+#
+# Registry auth (push/deploy stages):
+#   Credentials are read from the environment — export them in ~/.zshrc:
+#       export ACR_USERNAME=<registry username>
+#       export ACR_PASSWORD=<registry password / token>
+#   The script does `docker login` with them (via --password-stdin, never
+#   echoed). If either is unset, push/deploy EXIT immediately (no fallback).
+#
+# Disk pre-req (build stage):
+#   Before building, if the Docker disk is >= DISK_THRESHOLD% (default 80) full,
+#   it reclaims space (build cache -> dangling images -> unused images >7d)
+#   instead of letting the build fail with "no space left on device".
+#   Tune with DISK_THRESHOLD=90; bypass with SKIP_DISK_CHECK=1.
+#
+set -euo pipefail
+
+# ---- config ---------------------------------------------------------------
+REGISTRY="sfbrdevhelmweacr.azurecr.io/danswer"
+REGISTRY_HOST="${REGISTRY%%/*}"   # sfbrdevhelmweacr.azurecr.io (login target)
+PROD_CONTEXT="darwin"
+NAMESPACE="darwin"
+REPO_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
+KUSTOMIZATION="$REPO_ROOT/k8s/overlays/prod/kustomization.yaml"
+OVERLAY_DIR="$REPO_ROOT/k8s/overlays/prod"
+
+# Per-component config. Functions (not associative arrays) so this runs on the
+# stock macOS bash 3.2 too — `declare -A` is bash 4+ only.
+#   backend: ./backend/Dockerfile  ctx ./backend   local tag danswer/danswer-backend
+#   web:     ./web/Dockerfile      ctx ./web       local tag danswer/danswer-web-server
+img_logical()      { case "$1" in backend) echo danswer-backend;;     web) echo danswer-web-server;; esac; }
+img_local()        { case "$1" in backend) echo danswer/danswer-backend;; web) echo danswer/danswer-web-server;; esac; }
+img_dockerfile()   { case "$1" in backend) echo "$REPO_ROOT/backend/Dockerfile";; web) echo "$REPO_ROOT/web/Dockerfile";; esac; }
+img_context()      { case "$1" in backend) echo "$REPO_ROOT/backend";; web) echo "$REPO_ROOT/web";; esac; }
+# web build adds --load (matches your manual command); backend does not.
+img_build_extra()  { case "$1" in web) echo "--load";; *) echo "";; esac; }
+# which live deployment to read the running tag from, for `verify`
+img_verify_deploy(){ case "$1" in backend) echo api-server-deployment;; web) echo web-server-deployment;; esac; }
+
+# ---- logging --------------------------------------------------------------
+log()  { printf '\033[1;34m==>\033[0m %s\n' "$*"; }
+ok()   { printf '\033[1;32m ok\033[0m %s\n' "$*"; }
+warn() { printf '\033[1;33m  !\033[0m %s\n' "$*" >&2; }
+die()  { printf '\033[1;31mERR\033[0m %s\n' "$*" >&2; exit 1; }
+run()  { if [ "${DRY_RUN:-0}" = "1" ]; then printf '\033[2m  $ %s\033[0m\n' "$*"; else "$@"; fi; }
+
+# ---- registry login -------------------------------------------------------
+# Credentials come from the environment — export them in ~/.zshrc:
+#     export ACR_USERNAME=<registry username>
+#     export ACR_PASSWORD=<registry password / token>
+# When you run this script from your zsh shell they're already inherited. As a
+# fallback (e.g. invoked from a non-zsh context) we pull just those two exports
+# out of ~/.zshrc rather than sourcing the whole file (zsh syntax can break
+# under bash). Never echoed; piped via --password-stdin.
+registry_login() {
+  if [ -z "${ACR_USERNAME:-}" ] || [ -z "${ACR_PASSWORD:-}" ]; then
+    if [ -f "$HOME/.zshrc" ]; then
+      eval "$(grep -E '^[[:space:]]*export[[:space:]]+(ACR_USERNAME|ACR_PASSWORD)=' "$HOME/.zshrc" 2>/dev/null || true)"
+    fi
+  fi
+  if [ -z "${ACR_USERNAME:-}" ] || [ -z "${ACR_PASSWORD:-}" ]; then
+    die "ACR_USERNAME/ACR_PASSWORD not set — add them to ~/.zshrc (export ACR_USERNAME=..., export ACR_PASSWORD=...) and retry."
+  fi
+  log "docker login $REGISTRY_HOST as $ACR_USERNAME"
+  if [ "${DRY_RUN:-0}" = "1" ]; then
+    printf '\033[2m  $ docker login %s -u %s --password-stdin <<< $ACR_PASSWORD\033[0m\n' "$REGISTRY_HOST" "$ACR_USERNAME"
+    return 0
+  fi
+  printf '%s' "$ACR_PASSWORD" | docker login "$REGISTRY_HOST" -u "$ACR_USERNAME" --password-stdin \
+    || die "docker login to $REGISTRY_HOST failed — check ACR_USERNAME/ACR_PASSWORD in ~/.zshrc"
+  ok "logged in to $REGISTRY_HOST"
+}
+
+# ---- disk pre-req ---------------------------------------------------------
+# Before building, make sure there's room — a full Docker disk fails the build
+# with "no space left on device" partway through. If usage >= DISK_THRESHOLD%,
+# reclaim space with a graduated prune (cheapest/safest first) rather than
+# letting the build die. On Docker Desktop (mac) the build runs in a Linux VM;
+# DockerRootDir isn't a host path, so we fall back to df of the host root as a
+# proxy — pruning the build cache / unused images still frees the VM's disk,
+# which is what actually fills up.
+disk_used_pct() { df -P "$1" 2>/dev/null | awk 'NR==2{gsub(/%/,"",$5); print $5+0}'; }
+ensure_disk_space() {
+  [ "${SKIP_DISK_CHECK:-0}" = "1" ] && { warn "SKIP_DISK_CHECK=1 — skipping disk pre-req"; return 0; }
+  command -v docker >/dev/null 2>&1 || { warn "docker not found — skipping disk check"; return 0; }
+  local threshold="${DISK_THRESHOLD:-80}" root target used
+  root="$(docker info -f '{{.DockerRootDir}}' 2>/dev/null || true)"
+  target="/"; [ -n "$root" ] && [ -d "$root" ] && target="$root"
+  used="$(disk_used_pct "$target")"; used="${used:-0}"
+  log "disk pre-req: $target at ${used}% used (threshold ${threshold}%)"
+  docker system df 2>/dev/null || true
+  [ "$used" -lt "$threshold" ] && { ok "disk ok — no cleanup needed"; return 0; }
+
+  warn "disk >= ${threshold}% — reclaiming Docker space before build"
+  run docker builder prune -f || true            # build cache — usually the biggest, fully safe
+  run docker image prune -f   || true            # dangling (untagged) images — safe
+  used="$(disk_used_pct "$target")"; used="${used:-0}"
+  if [ "$used" -ge "$threshold" ]; then
+    warn "still ${used}% — pruning unused images older than 7d"
+    run docker image prune -af --filter "until=168h" || true   # unused tagged images >7d old
+    used="$(disk_used_pct "$target")"; used="${used:-0}"
+  fi
+  if [ "$used" -ge "$threshold" ]; then
+    warn "still ${used}% after cleanup — build may hit 'no space left on device'."
+    warn "free space manually, or re-run with a higher DISK_THRESHOLD / SKIP_DISK_CHECK=1."
+  else
+    ok "reclaimed space — now ${used}% used"
+  fi
+}
+
+# ---- arg parsing ----------------------------------------------------------
+STAGE="${1:-}"; shift || true
+case "$STAGE" in build|push|deploy|verify) ;; *)
+  die "usage: build-deploy.sh <build|push|deploy|verify> [backend|web ...]"; esac
+
+COMPONENTS=("$@")
+[ "${#COMPONENTS[@]}" -eq 0 ] && COMPONENTS=(backend web)
+for c in "${COMPONENTS[@]}"; do
+  [ -n "$(img_logical "$c")" ] || die "unknown component '$c' (expected: backend web)"
+done
+
+# ---- kustomization tag helpers --------------------------------------------
+# read the newTag for a logical image name out of kustomization.yaml
+read_tag() {
+  local logical="$1"
+  awk -v img="$logical" '
+    $1=="-" && $2=="name:" && $3==img {inblock=1; next}
+    inblock && $1=="newTag:" {print $2; exit}
+    inblock && $1=="-" {inblock=0}
+  ' "$KUSTOMIZATION"
+}
+# vha-146 -> vha-147 ; refuses anything not matching vha-<int>
+next_tag() {
+  local cur="$1"
+  [[ "$cur" =~ ^vha-([0-9]+)$ ]] || die "tag '$cur' is not vha-<int> — refusing to auto-increment; bump it manually."
+  echo "vha-$(( ${BASH_REMATCH[1]} + 1 ))"
+}
+# rewrite the newTag line that follows `- name: <logical>` in place
+set_tag() {
+  local logical="$1" tag="$2" tmp
+  if [ "${DRY_RUN:-0}" = "1" ]; then
+    printf '\033[2m  $ set newTag %s -> %s in kustomization.yaml\033[0m\n' "$logical" "$tag"
+    return 0
+  fi
+  tmp="$(mktemp)"
+  awk -v img="$logical" -v newtag="$tag" '
+    $1=="-" && $2=="name:" && $3==img {inblock=1}
+    inblock && $1=="newTag:" { sub(/newTag:.*/, "newTag: " newtag); inblock=0 }
+    {print}
+  ' "$KUSTOMIZATION" > "$tmp" && mv "$tmp" "$KUSTOMIZATION"
+}
+
+# ---- verify (standalone) --------------------------------------------------
+if [ "$STAGE" = "verify" ]; then
+  ctx="$(kubectl config current-context 2>/dev/null || true)"
+  log "kubectl context: ${ctx:-<none>}  (expected prod: $PROD_CONTEXT)"
+  [ "$ctx" = "$PROD_CONTEXT" ] || warn "not on prod context — live values below are from '$ctx'."
+  rc=0
+  for c in "${COMPONENTS[@]}"; do
+    logical="$(img_logical "$c")"; deploy="$(img_verify_deploy "$c")"
+    manifest_tag="$(read_tag "$logical")"
+    live_img="$(kubectl get deploy "$deploy" -n "$NAMESPACE" \
+      -o jsonpath='{.spec.template.spec.containers[0].image}' 2>/dev/null || true)"
+    live_tag="${live_img##*:}"
+    if [ -z "$live_img" ]; then
+      warn "$c: could not read live image from deploy/$deploy"
+      rc=1
+    elif [ "$live_tag" = "$manifest_tag" ]; then
+      ok "$c: live=$live_tag == manifest=$manifest_tag"
+    else
+      warn "$c: live=$live_tag != manifest=$manifest_tag (cluster does not match the manifest)"
+      rc=1
+    fi
+  done
+  log "pod health in ns/$NAMESPACE (non-Running / restarts):"
+  kubectl get pods -n "$NAMESPACE" --no-headers 2>/dev/null | awk '
+    { ready=$2; status=$3; restarts=$4; name=$1
+      if (status!="Running" && status!="Completed") { print "  ! " name "  " status "  ready=" ready; bad++ }
+      else if (restarts+0 > 0) { print "  ~ " name "  restarts=" restarts }
+    }
+    END { if (bad>0) exit 0 }' || true
+  [ "$rc" -eq 0 ] && ok "verify: cluster matches manifest" || warn "verify: drift or unreadable — see above"
+  exit "$rc"
+fi
+
+# next tag for a component, computed fresh from the manifest each call (no
+# associative-array state — keeps this bash-3.2 safe).
+component_next_tag() {
+  local cur; cur="$(read_tag "$(img_logical "$1")")"
+  [ -n "$cur" ] || die "could not read current tag for '$1' in kustomization"
+  next_tag "$cur"
+}
+
+# ---- preview next tags for the requested components -----------------------
+log "computing next tags from $(basename "$KUSTOMIZATION"):"
+for c in "${COMPONENTS[@]}"; do
+  cur="$(read_tag "$(img_logical "$c")")"
+  printf '    %-8s %s -> %s\n' "$c" "$cur" "$(next_tag "$cur")"
+done
+
+# ---- build ----------------------------------------------------------------
+ensure_disk_space
+log "BUILD (linux/amd64)"
+cd "$REPO_ROOT"
+for c in "${COMPONENTS[@]}"; do
+  local_tag="$(img_local "$c"):latest"
+  log "build $c -> $local_tag"
+  # shellcheck disable=SC2046,SC2086
+  run docker build -f "$(img_dockerfile "$c")" "$(img_context "$c")" \
+      -t "$local_tag" --platform linux/amd64 $(img_build_extra "$c")
+  ok "built $local_tag"
+done
+[ "$STAGE" = "build" ] && { ok "build complete (no push/deploy)"; exit 0; }
+
+# ---- push -----------------------------------------------------------------
+log "PUSH -> $REGISTRY"
+registry_login   # docker login using $ACR_USERNAME/$ACR_PASSWORD (see helper)
+for c in "${COMPONENTS[@]}"; do
+  local_tag="$(img_local "$c"):latest"
+  remote_tag="$REGISTRY/$(img_logical "$c"):$(component_next_tag "$c")"
+  run docker tag "$local_tag" "$remote_tag"
+  log "push $remote_tag"
+  if ! run docker push "$remote_tag"; then
+    warn "push failed — check ACR_USERNAME/ACR_PASSWORD in ~/.zshrc, or run: az acr login --name ${REGISTRY_HOST%%.*}"
+    die "aborting at push for $c"
+  fi
+  ok "pushed $remote_tag"
+done
+[ "$STAGE" = "push" ] && { ok "push complete (manifest NOT modified; run 'deploy' to roll out)"; exit 0; }
+
+# ---- deploy ---------------------------------------------------------------
+log "DEPLOY"
+ctx="$(kubectl config current-context 2>/dev/null || true)"
+if [ "$ctx" != "$PROD_CONTEXT" ]; then
+  [ "${FORCE:-0}" = "1" ] || die "kubectl context is '$ctx', expected prod '$PROD_CONTEXT'. Switch context or set FORCE=1."
+  warn "context '$ctx' != '$PROD_CONTEXT' but FORCE=1 set — proceeding."
+fi
+# Capture the tags BEFORE editing — set_tag mutates the manifest, so
+# component_next_tag would read the already-bumped value on a second call.
+APPLIED=()
+for c in "${COMPONENTS[@]}"; do
+  nxt="$(component_next_tag "$c")"
+  set_tag "$(img_logical "$c")" "$nxt"
+  APPLIED+=("$c=$nxt")
+  ok "kustomization newTag $(img_logical "$c") -> $nxt"
+done
+log "kubectl apply -k $OVERLAY_DIR  (ns=$NAMESPACE, context=$ctx)"
+run kubectl apply -k "$OVERLAY_DIR"
+ok "applied. Rollout status:"
+for c in "${COMPONENTS[@]}"; do
+  d="$(img_verify_deploy "$c")"
+  run kubectl rollout status "deploy/$d" -n "$NAMESPACE" --timeout=180s || \
+    warn "rollout for $d did not complete in time — check manually"
+done
+warn "manifest tag bump is NOT committed. Commit it:"
+printf '      git -C %s add %s && git commit -m "k8s(prod): bump %s"\n' \
+  "$REPO_ROOT" "k8s/overlays/prod/kustomization.yaml" "${APPLIED[*]}"
+ok "deploy complete"
diff --git a/k8s/scripts/guarded-apply.sh b/k8s/scripts/guarded-apply.sh
new file mode 100755
index 00000000000..1b006459ac2
--- /dev/null
+++ b/k8s/scripts/guarded-apply.sh
@@ -0,0 +1,90 @@
+#!/usr/bin/env bash
+#
+# guarded-apply.sh <overlay> [namespace]
+#
+# Wrapper around `kubectl apply -k k8s/overlays/<overlay>` that refuses to
+# apply a Vespa version pin which would jump too far from the version the
+# cluster is ACTUALLY running.
+#
+# Vespa is applied via its OWN overlays (prod-vespa / local-vespa); the app
+# overlays (prod / local) no longer contain Vespa. So this guard only kicks
+# in for an overlay that actually renders Vespa — for an app overlay it just
+# diffs + applies. Run `guarded-apply.sh prod-vespa` to apply Vespa safely.
+#
+# Why this exists: Vespa's config server refuses an auto-upgrade spanning
+# more than MAX_UPGRADE_HOP minor releases (see AGENTS.md "Critical facts
+# §10"). A bare tag bump that crosses that gap crash-loops the config
+# server and takes the whole cluster down. This guard catches it BEFORE
+# the apply reaches the cluster.
+#
+# It checks against the LIVE running version (not the repo's previous pin)
+# on purpose — config can drift out of git, so live is the only truth that
+# matters at apply time.
+#
+# Usage:
+#   k8s/scripts/guarded-apply.sh prod
+#   k8s/scripts/guarded-apply.sh local default
+#   FORCE=1 k8s/scripts/guarded-apply.sh prod   # override the guard (you accept the risk)
+#
+set -euo pipefail
+
+OVERLAY="${1:?usage: guarded-apply.sh <prod|local> [namespace]}"
+DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)/overlays/${OVERLAY}"
+# Default namespace per overlay (prod / prod-vespa → darwin, else → default).
+if [ "${2:-}" != "" ]; then NS="$2"; elif [[ "$OVERLAY" == prod* ]]; then NS="darwin"; else NS="default"; fi
+MAX_UPGRADE_HOP=30   # Vespa's documented limit (minor releases per upgrade)
+
+[ -d "$DIR" ] || { echo "ERROR: overlay dir not found: $DIR"; exit 2; }
+
+minor() { echo "$1" | cut -d. -f2; }
+major() { echo "$1" | cut -d. -f1; }
+
+RENDERED=$(kubectl kustomize "$DIR" 2>/dev/null || true)
+
+# Does this overlay deploy Vespa at all? App overlays (prod/local) don't —
+# Vespa is applied via the separate *-vespa overlays — so the guard is moot
+# for them. `has_vespa` is any vespaengine/vespa image (any tag, even :latest).
+# `|| true`: under `set -euo pipefail` a no-match grep exits 1, which would
+# abort the whole script before applying. App overlays legitimately have no
+# Vespa, so a non-match is expected, not an error.
+has_vespa=$(echo "$RENDERED" | grep -oE 'image: *vespaengine/vespa:' | head -1 || true)
+
+# New Vespa version this overlay would deploy (pinned X.Y.Z only).
+new_ver=$(echo "$RENDERED" \
+  | grep -oE 'image: *vespaengine/vespa:[0-9]+\.[0-9]+\.[0-9]+' | head -1 | sed -E 's/.*://' || true)
+
+abort() { echo "REFUSING TO APPLY. $1"; echo "Override with FORCE=1 if you understand the risk."; [ "${FORCE:-0}" = "1" ] && { echo "FORCE=1 set — proceeding anyway."; return 0; }; exit 1; }
+
+if [ -z "$has_vespa" ]; then
+  echo "No Vespa in overlay '$OVERLAY' — skipping the Vespa version guard (Vespa is applied via the *-vespa overlays)."
+else
+  # Current running version: the image tag on a live Vespa StatefulSet.
+  cur_ver=$(kubectl get statefulset vespa-content -n "$NS" \
+    -o jsonpath='{.spec.template.spec.containers[0].image}' 2>/dev/null \
+    | grep -oE '[0-9]+\.[0-9]+\.[0-9]+' || true)
+
+  echo "Vespa version guard: current(live)=${cur_ver:-<unknown>}  new(overlay)=${new_ver:-<unparseable>}  ns=$NS"
+
+  if [ -z "$new_ver" ]; then
+    abort "Overlay deploys Vespa but on an unparseable/floating tag (e.g. :latest) — pin an explicit vespaengine/vespa:X.Y.Z."
+  elif [ -z "$cur_ver" ]; then
+    echo "WARNING: could not read the live Vespa version (no cluster access, or vespa-content not deployed yet). Skipping the gap check — verify manually."
+  elif [ "$(major "$cur_ver")" != "$(major "$new_ver")" ]; then
+    abort "Vespa MAJOR version change $cur_ver -> $new_ver. Major upgrades need a dedicated migration, not this guard. "
+  else
+    gap=$(( $(minor "$new_ver") - $(minor "$cur_ver") ))
+    if [ "$gap" -gt "$MAX_UPGRADE_HOP" ]; then
+      abort "Vespa UPGRADE $cur_ver -> $new_ver is $gap minor releases (> $MAX_UPGRADE_HOP). Vespa will refuse this and crash-loop the config server. Do a STEPWISE upgrade (<=$MAX_UPGRADE_HOP per hop). "
+    elif [ "$gap" -lt "-$MAX_UPGRADE_HOP" ]; then
+      echo "WARNING: large DOWNGRADE $cur_ver -> $new_ver (${gap} minors). This is OK only if $new_ver matches the on-disk index format (e.g. recovering after an accidental upgrade). If unsure, STOP."
+      [ "${FORCE:-0}" = "1" ] || { echo "Re-run with FORCE=1 to confirm the downgrade."; exit 1; }
+    else
+      echo "OK: Vespa $cur_ver -> $new_ver is within the $MAX_UPGRADE_HOP-release limit."
+    fi
+  fi
+fi
+
+echo "--- kubectl diff (review before apply) ---"
+kubectl diff -k "$DIR" || true   # diff exits non-zero when there ARE differences; that's expected
+echo "--- applying ---"
+kubectl apply -k "$DIR"
diff --git a/k8s/scripts/vespa-upgrade.sh b/k8s/scripts/vespa-upgrade.sh
new file mode 100755
index 00000000000..0f22097af5c
--- /dev/null
+++ b/k8s/scripts/vespa-upgrade.sh
@@ -0,0 +1,166 @@
+#!/usr/bin/env bash
+#
+# vespa-upgrade.sh <target-version> [namespace]
+#
+# Performs an ORDERED, HEALTH-GATED, single-hop Vespa upgrade across the five
+# StatefulSets. This logic deliberately lives in a script, NOT the manifests:
+# kustomize is declarative and cannot sequence a multi-StatefulSet, version-
+# stepped, health-gated rollout. A plain `kubectl apply` of a bumped tag rolls
+# every role at once with no ordering — exactly what took prod down once.
+#
+# What it does, in order (Vespa's recommended sequence):
+#   1. guard  — refuse major-version / >MAX_HOP / downgrade jumps (as
+#               guarded-apply.sh does), checked against the LIVE version.
+#   2. config servers   (vespa-configserver)      rollout, gated on readiness
+#   3. admin/controller (vespa-admin)             rollout, gated on readiness
+#   4. content nodes    (vespa-content)           ONE ORDINAL AT A TIME via
+#               updateStrategy.partition stepping, with an explicit health
+#               check between each — this is the data tier, so we never let
+#               two content nodes be mid-upgrade at once.
+#   5. feed containers  (vespa-feed-container)    rollout, gated on readiness
+#   6. query containers (vespa-query-container)   rollout, gated on readiness
+#
+# Health is checked from INSIDE each pod (kubectl exec → localhost), not via
+# port-forward, because the cluster runs Istio and external probes hit mTLS.
+#
+# Single hop only: this refuses jumps Vespa itself refuses (> MAX_HOP minor
+# releases). For a larger upgrade, run it repeatedly with intermediate
+# versions (e.g. 8.600.35 -> 8.630.x -> 8.660.x -> ...). Each intermediate
+# image must exist on the registry and be on-disk-format compatible.
+#
+# Usage:
+#   k8s/scripts/vespa-upgrade.sh 8.620.43            # prod (ns darwin)
+#   k8s/scripts/vespa-upgrade.sh 8.620.43 darwin
+#   DRY_RUN=1 k8s/scripts/vespa-upgrade.sh 8.620.43  # print actions, change nothing
+#   YES=1     k8s/scripts/vespa-upgrade.sh 8.620.43  # skip the confirm prompt
+#   FORCE=1   k8s/scripts/vespa-upgrade.sh 8.620.43  # override the version guard
+#
+# After a successful run, update the per-role vespa newTag values in the
+# overlay kustomization so git ≈ live.
+set -euo pipefail
+
+TARGET="${1:?usage: vespa-upgrade.sh <target-version X.Y.Z> [namespace]}"
+NS="${2:-darwin}"
+REGISTRY_IMAGE="vespaengine/vespa"
+MAX_HOP=30   # Vespa's documented per-upgrade minor-release limit
+
+DRY_RUN="${DRY_RUN:-0}"
+YES="${YES:-0}"
+FORCE="${FORCE:-0}"
+
+# Per-role lookups via case (portable to bash 3.2, which macOS still ships —
+# no associative arrays). StatefulSet name, container name, and health port.
+ROLES_ORDER="configserver admin content feed query"
+ss_of() {        case "$1" in
+  configserver) echo vespa-configserver ;;  admin) echo vespa-admin ;;
+  content)      echo vespa-content ;;        feed)  echo vespa-feed-container ;;
+  query)        echo vespa-query-container ;; *) die "unknown role $1" ;; esac; }
+container_of() { ss_of "$1"; }   # container name == StatefulSet name for every role
+port_of() {      case "$1" in
+  configserver) echo 19071 ;;  admin) echo 19092 ;;  content) echo 19092 ;;
+  feed)         echo 8080 ;;   query) echo 8080 ;;   *) die "unknown role $1" ;; esac; }
+
+run() { echo "+ $*"; [ "$DRY_RUN" = "1" ] || "$@"; }
+die() { echo "ERROR: $*" >&2; exit 1; }
+minor() { echo "$1" | cut -d. -f2; }
+major() { echo "$1" | cut -d. -f1; }
+
+[[ "$TARGET" =~ ^[0-9]+\.[0-9]+\.[0-9]+$ ]] || die "target must be X.Y.Z, got '$TARGET'"
+
+echo "Context: $(kubectl config current-context)   namespace: $NS"
+
+# --- version guard (against the LIVE content-node image, the source of truth) ---
+CUR=$(kubectl get statefulset "$(ss_of content)" -n "$NS" \
+  -o jsonpath='{.spec.template.spec.containers[0].image}' 2>/dev/null \
+  | grep -oE '[0-9]+\.[0-9]+\.[0-9]+' || true)
+[ -n "$CUR" ] || die "could not read live Vespa version from $(ss_of content) in ns $NS (cluster access? deployed?)"
+
+echo "Current(live)=$CUR  Target=$TARGET"
+if [ "$CUR" = "$TARGET" ]; then echo "Already at $TARGET — nothing to do."; exit 0; fi
+[ "$(major "$CUR")" = "$(major "$TARGET")" ] || \
+  die "major-version change $CUR -> $TARGET needs a dedicated migration, not this script."
+HOP=$(( $(minor "$TARGET") - $(minor "$CUR") ))
+if [ "$HOP" -lt 0 ]; then
+  [ "$FORCE" = "1" ] || die "DOWNGRADE $CUR -> $TARGET. Only valid for outage recovery to the on-disk format. Re-run with FORCE=1 if you mean it."
+  echo "WARNING: downgrade $CUR -> $TARGET (FORCE=1)."
+elif [ "$HOP" -gt "$MAX_HOP" ]; then
+  [ "$FORCE" = "1" ] || die "$CUR -> $TARGET is $HOP minors (> $MAX_HOP). Vespa will refuse it and crash-loop the config server. Upgrade STEPWISE via intermediate versions. (FORCE=1 to override — not advised.)"
+  echo "WARNING: $HOP-minor hop exceeds $MAX_HOP (FORCE=1)."
+else
+  echo "OK: $HOP-minor hop is within the $MAX_HOP limit."
+fi
+
+echo
+echo "Plan (ns=$NS): set ${REGISTRY_IMAGE}:${TARGET} on, in order:"
+for r in $ROLES_ORDER; do echo "   - $r  ($(ss_of "$r"))"; done
+echo
+if [ "$DRY_RUN" != "1" ] && [ "$YES" != "1" ]; then
+  read -r -p "Proceed against context '$(kubectl config current-context)' / ns '$NS'? [y/N] " ans
+  [ "$ans" = "y" ] || [ "$ans" = "Y" ] || die "aborted by user."
+fi
+
+# Poll /state/v1/health on a specific pod from inside the vespa container.
+health_ok() {
+  local pod="$1" container="$2" port="$3"
+  local code
+  code=$(kubectl exec -n "$NS" "$pod" -c "$container" -- \
+    sh -c "curl -s -m 5 -o /dev/null -w '%{http_code}' http://localhost:${port}/state/v1/health" 2>/dev/null || echo "000")
+  [ "$code" = "200" ]
+}
+
+wait_pod_healthy() {
+  local pod="$1" container="$2" port="$3" tries=60
+  echo "  waiting for $pod to be Ready + health 200 on :$port ..."
+  [ "$DRY_RUN" = "1" ] && { echo "  (dry-run: skip wait)"; return 0; }
+  kubectl wait --for=condition=ready "pod/$pod" -n "$NS" --timeout=600s
+  for ((i=0;i<tries;i++)); do
+    if health_ok "$pod" "$container" "$port"; then echo "  $pod healthy."; return 0; fi
+    sleep 10
+  done
+  die "$pod did not report health 200 on :$port in time — STOPPING. Cluster left mid-upgrade; investigate before continuing."
+}
+
+upgrade_simple() {  # roles whose StatefulSet rolling update (readiness-gated) is safe as-is
+  local r="$1" ss c port
+  ss="$(ss_of "$r")"; c="$(container_of "$r")"; port="$(port_of "$r")"
+  echo ">>> [$r] $ss -> ${REGISTRY_IMAGE}:${TARGET}"
+  run kubectl set image "statefulset/$ss" "$c=${REGISTRY_IMAGE}:${TARGET}" -n "$NS"
+  echo "  rolling out (one pod at a time, gated by readiness probe)..."
+  [ "$DRY_RUN" = "1" ] || kubectl rollout status "statefulset/$ss" -n "$NS" --timeout=900s
+}
+
+upgrade_content() {  # data tier: one ordinal at a time via partition stepping
+  local ss c port n
+  ss="$(ss_of content)"; c="$(container_of content)"; port="$(port_of content)"
+  n=$(kubectl get statefulset "$ss" -n "$NS" -o jsonpath='{.spec.replicas}')
+  echo ">>> [content] $ss ($n replicas) -> ${REGISTRY_IMAGE}:${TARGET}, ONE ordinal at a time"
+  # Freeze updates (partition above all ordinals), set image, then release
+  # ordinals from highest to lowest, verifying health between each.
+  run kubectl patch "statefulset/$ss" -n "$NS" --type merge \
+    -p "{\"spec\":{\"updateStrategy\":{\"rollingUpdate\":{\"partition\":$n}}}}"
+  run kubectl set image "statefulset/$ss" "$c=${REGISTRY_IMAGE}:${TARGET}" -n "$NS"
+  for ((ord=n-1; ord>=0; ord--)); do
+    echo "  -- releasing content ordinal $ord"
+    run kubectl patch "statefulset/$ss" -n "$NS" --type merge \
+      -p "{\"spec\":{\"updateStrategy\":{\"rollingUpdate\":{\"partition\":$ord}}}}"
+    wait_pod_healthy "${ss}-${ord}" "$c" "$port"
+  done
+  echo "  content tier fully upgraded."
+}
+
+for r in $ROLES_ORDER; do
+  if [ "$r" = "content" ]; then upgrade_content; else upgrade_simple "$r"; fi
+  # Verify every pod of this role before advancing to the next role.
+  if [ "$DRY_RUN" != "1" ]; then
+    for pod in $(kubectl get pods -n "$NS" -l "app=$(ss_of "$r")" -o name | sed 's#pod/##'); do
+      health_ok "$pod" "$(container_of "$r")" "$(port_of "$r")" \
+        || die "$pod unhealthy after upgrade — STOPPING before next role."
+    done
+  fi
+  echo "<<< [$r] done."
+  echo
+done
+
+echo "Vespa upgrade $CUR -> $TARGET complete across all roles."
+echo "NOW: update the per-role vespa newTag values to \"$TARGET\" in"
+echo "     k8s/overlays/{prod,local}-vespa/kustomization.yaml so git matches live."
diff --git a/web/src/app/admin/analytics/page.tsx b/web/src/app/admin/analytics/page.tsx
index 03adc9f21c9..adf36fb99c1 100644
--- a/web/src/app/admin/analytics/page.tsx
+++ b/web/src/app/admin/analytics/page.tsx
@@ -10,6 +10,17 @@ import {
   DateRangePickerValue,
   Grid,
   Metric,
+  Tab,
+  TabGroup,
+  TabList,
+  TabPanel,
+  TabPanels,
+  Table,
+  TableBody,
+  TableCell,
+  TableHead,
+  TableHeaderCell,
+  TableRow,
   Text,
   Title,
 } from "@tremor/react";
@@ -62,6 +73,37 @@ interface SlackChannelsResponse {
   enabled_channels: number;
 }
 
+interface UserAdoptionRow {
+  new_users: number;
+  cumulative_users: number;
+  date: string;
+}
+
+interface PerUserChatStatsRow {
+  user_id: string;
+  email: string;
+  total_messages: number;
+  total_likes: number;
+  total_dislikes: number;
+  last_active: string;
+}
+
+interface PersonaUsageRow {
+  persona_id: number;
+  name: string;
+  sessions: number;
+  messages: number;
+  likes: number;
+  dislikes: number;
+  last_active: string;
+}
+
+interface DocumentSetUsageRow {
+  document_set_id: number;
+  name: string;
+  attributed_messages: number;
+}
+
 type Granularity = "day" | "month";
 
 const DEFAULT_LOOKBACK_DAYS = 30;
@@ -161,6 +203,38 @@ export default function AnalyticsPage() {
     swrOpts
   );
 
+  const {
+    data: adoptionData,
+    isLoading: adoptionLoading,
+    error: adoptionErr,
+  } = useSWR<UserAdoptionRow[]>(
+    buildURL("/analytics/admin/user-adoption", range),
+    errorHandlingFetcher,
+    swrOpts
+  );
+
+  const { data: perUserData, error: perUserErr } = useSWR<
+    PerUserChatStatsRow[]
+  >(
+    buildURL("/analytics/admin/per-user", range),
+    errorHandlingFetcher,
+    swrOpts
+  );
+
+  const { data: personaData, error: personaErr } = useSWR<PersonaUsageRow[]>(
+    buildURL("/analytics/admin/persona-usage", range),
+    errorHandlingFetcher,
+    swrOpts
+  );
+
+  const { data: docSetUsageData, error: docSetUsageErr } = useSWR<
+    DocumentSetUsageRow[]
+  >(
+    buildURL("/analytics/admin/document-set-usage", range),
+    errorHandlingFetcher,
+    swrOpts
+  );
+
   // Snapshot endpoints — independent of date range, refresh on mount only.
   const { data: totalDocs, error: totalDocsErr } = useSWR<TotalDocsResponse>(
     buildURL("/analytics/admin/total-docs"),
@@ -184,11 +258,16 @@ export default function AnalyticsPage() {
   const isInitialLoading =
     (queryLoading && !queryData) ||
     (userLoading && !userData) ||
-    (botLoading && !botData);
+    (botLoading && !botData) ||
+    (adoptionLoading && !adoptionData);
   const hasError =
     queryErr ||
     userErr ||
     botErr ||
+    adoptionErr ||
+    perUserErr ||
+    personaErr ||
+    docSetUsageErr ||
     totalDocsErr ||
     docsBySourceErr ||
     slackChannelsErr;
@@ -238,13 +317,44 @@ export default function AnalyticsPage() {
         ? Math.round((totalAutoResolved / totalBotQueries) * 100)
         : null;
 
+    // Cumulative is monotonic, so the latest row in the range carries the
+    // running total of distinct users who have ever tried chat.
+    const adoptionRows = adoptionData ?? [];
+    const totalUsersEverTried =
+      adoptionRows.length > 0
+        ? adoptionRows[adoptionRows.length - 1].cumulative_users
+        : 0;
+    const newUsersInRange = adoptionRows.reduce((s, r) => s + r.new_users, 0);
+
     return {
       totalQueries,
       peakActiveUsers,
       autoResolvePct,
       positivity,
+      totalUsersEverTried,
+      newUsersInRange,
     };
-  }, [queryData, userData, botData]);
+  }, [queryData, userData, botData, adoptionData]);
+
+  // Adoption series: new users per period + the cumulative curve.
+  const adoptionDaily = useMemo(
+    () =>
+      (adoptionData ?? []).map((r) => ({
+        date: r.date,
+        "New Users": r.new_users,
+        "Cumulative Users": r.cumulative_users,
+      })),
+    [adoptionData]
+  );
+  // Monthly: New Users sum within the month; Cumulative takes the peak
+  // (= end-of-month value) since summing a running total is meaningless.
+  const adoptionChartData = useMemo(
+    () =>
+      granularity === "day"
+        ? adoptionDaily
+        : bucketToMonth(adoptionDaily, new Set(["Cumulative Users"])),
+    [adoptionDaily, granularity]
+  );
 
   // Combined query-performance series: queries (from /query) overlaid
   // with active users (from /user). Date join is on ISO date string.
@@ -293,6 +403,14 @@ export default function AnalyticsPage() {
     [docsBySource]
   );
 
+  const docSetUsageBars = useMemo(
+    () =>
+      (docSetUsageData ?? [])
+        .filter((r) => r.attributed_messages > 0)
+        .map((r) => ({ name: r.name, value: r.attributed_messages })),
+    [docSetUsageData]
+  );
+
   return (
     <div className="mx-auto container">
       <AdminPageTitle icon={<FiBarChart2 size={32} />} title="Analytics" />
@@ -330,133 +448,306 @@ export default function AnalyticsPage() {
       {isInitialLoading ? (
         <LoadingAnimation text="Loading analytics" />
       ) : (
-        <>
-          {/* Top row: range-scoped KPIs */}
-          <Grid
-            numItems={1}
-            numItemsSm={2}
-            numItemsLg={3}
-            className="gap-4 mb-4"
-          >
-            <Card>
-              <Text>Total Queries (range)</Text>
-              <Metric>{kpis.totalQueries.toLocaleString()}</Metric>
-            </Card>
-            <Card>
-              <Text>Peak Daily Active Users</Text>
-              <Metric>{kpis.peakActiveUsers.toLocaleString()}</Metric>
-            </Card>
-            <Card>
-              <Text>Auto-Resolution Rate (Slack)</Text>
-              <Metric>
-                {kpis.autoResolvePct !== null ? `${kpis.autoResolvePct}%` : "—"}
-              </Metric>
-            </Card>
-          </Grid>
-
-          {/* Snapshot KPIs — current state, independent of date range */}
-          <Grid
-            numItems={1}
-            numItemsSm={2}
-            numItemsLg={4}
-            className="gap-4 mb-6"
-          >
-            <Card>
-              <Text>Total Docs Indexed</Text>
-              <Metric>
-                {totalDocs
-                  ? totalDocs.total_docs_indexed.toLocaleString()
-                  : "—"}
-              </Metric>
-              <Text className="mt-1 text-xs">
-                {totalDocs
-                  ? `${totalDocs.unique_docs.toLocaleString()} unique`
-                  : ""}
-              </Text>
-            </Card>
-            <Card>
-              <Text>Slack Channels Enabled</Text>
-              <Metric>
-                {slackChannels
-                  ? slackChannels.enabled_channels.toLocaleString()
-                  : "—"}
-              </Metric>
-              <Text className="mt-1 text-xs">
-                {slackChannels
-                  ? `across ${slackChannels.total_configs} config(s)`
-                  : ""}
-              </Text>
-            </Card>
-            <Card>
-              <Text>Positive Feedback %</Text>
-              <Metric>
-                {kpis.positivity !== null ? `${kpis.positivity}%` : "—"}
-              </Metric>
-              <Text className="mt-1 text-xs">over selected date range</Text>
-            </Card>
-            <Card>
-              <Text>Sources Active</Text>
-              <Metric>
-                {docsBySource ? docsBySourceBars.length.toLocaleString() : "—"}
-              </Metric>
-              <Text className="mt-1 text-xs">
-                {docsBySource ? `of ${docsBySource.length} configured` : ""}
-              </Text>
-            </Card>
-          </Grid>
-
-          <Grid numItems={1} numItemsLg={2} className="gap-4 mb-6">
-            <Card>
-              <Title>Users and Query Trend</Title>
-              <Text>
-                {granularity === "day"
-                  ? "Daily"
-                  : "Monthly (Active Users = peak day)"}{" "}
-                assistant replies overlaid with active users
-              </Text>
-              <AreaChart
-                className="mt-4 h-72"
-                data={queryPerformanceData}
-                index="date"
-                categories={["Queries", "Active Users"]}
-                colors={["blue", "green"]}
-                showLegend={true}
-                noDataText="No data in this date range"
-              />
-            </Card>
-
-            <Card>
-              <Title>Feedback Trend</Title>
-              <Text>
-                {granularity === "day" ? "Daily" : "Monthly"} thumbs up vs
-                thumbs down
-              </Text>
-              <AreaChart
-                className="mt-4 h-72"
-                data={feedbackData}
-                index="date"
-                categories={["Likes", "Dislikes"]}
-                colors={["emerald", "rose"]}
-                showLegend={true}
-                noDataText="No feedback in this date range"
-              />
-            </Card>
-          </Grid>
-
-          <Card className="mb-6">
-            <Title>Docs Indexed by Source</Title>
-            <Text>Snapshot — sum across all cc-pairs per source type</Text>
-            {docsBySourceBars.length > 0 ? (
-              <BarList
-                className="mt-4"
-                data={docsBySourceBars}
-                valueFormatter={(n: number) => n.toLocaleString()}
-              />
-            ) : (
-              <Text className="mt-4">No documents indexed yet.</Text>
-            )}
-          </Card>
-        </>
+        <TabGroup>
+          <TabList className="mb-6">
+            <Tab>Overview</Tab>
+            <Tab>User Activity</Tab>
+          </TabList>
+          <TabPanels>
+            <TabPanel>
+              {/* Top row: range-scoped KPIs */}
+              <Grid
+                numItems={1}
+                numItemsSm={2}
+                numItemsLg={3}
+                className="gap-4 mb-4"
+              >
+                <Card>
+                  <Text>Total Queries (range)</Text>
+                  <Metric>{kpis.totalQueries.toLocaleString()}</Metric>
+                </Card>
+                <Card>
+                  <Text>Peak Daily Active Users</Text>
+                  <Metric>{kpis.peakActiveUsers.toLocaleString()}</Metric>
+                </Card>
+                <Card>
+                  <Text>Auto-Resolution Rate (Slack)</Text>
+                  <Metric>
+                    {kpis.autoResolvePct !== null
+                      ? `${kpis.autoResolvePct}%`
+                      : "—"}
+                  </Metric>
+                </Card>
+              </Grid>
+
+              {/* Snapshot KPIs — current state, independent of date range */}
+              <Grid
+                numItems={1}
+                numItemsSm={2}
+                numItemsLg={4}
+                className="gap-4 mb-6"
+              >
+                <Card>
+                  <Text>Total Docs Indexed</Text>
+                  <Metric>
+                    {totalDocs
+                      ? totalDocs.total_docs_indexed.toLocaleString()
+                      : "—"}
+                  </Metric>
+                  <Text className="mt-1 text-xs">
+                    {totalDocs
+                      ? `${totalDocs.unique_docs.toLocaleString()} unique`
+                      : ""}
+                  </Text>
+                </Card>
+                <Card>
+                  <Text>Slack Channels Enabled</Text>
+                  <Metric>
+                    {slackChannels
+                      ? slackChannels.enabled_channels.toLocaleString()
+                      : "—"}
+                  </Metric>
+                  <Text className="mt-1 text-xs">
+                    {slackChannels
+                      ? `across ${slackChannels.total_configs} config(s)`
+                      : ""}
+                  </Text>
+                </Card>
+                <Card>
+                  <Text>Positive Feedback %</Text>
+                  <Metric>
+                    {kpis.positivity !== null ? `${kpis.positivity}%` : "—"}
+                  </Metric>
+                  <Text className="mt-1 text-xs">over selected date range</Text>
+                </Card>
+                <Card>
+                  <Text>Sources Active</Text>
+                  <Metric>
+                    {docsBySource
+                      ? docsBySourceBars.length.toLocaleString()
+                      : "—"}
+                  </Metric>
+                  <Text className="mt-1 text-xs">
+                    {docsBySource ? `of ${docsBySource.length} configured` : ""}
+                  </Text>
+                </Card>
+              </Grid>
+
+              <Grid numItems={1} numItemsLg={2} className="gap-4 mb-6">
+                <Card>
+                  <Title>Users and Query Trend</Title>
+                  <Text>
+                    {granularity === "day"
+                      ? "Daily"
+                      : "Monthly (Active Users = peak day)"}{" "}
+                    assistant replies overlaid with active users
+                  </Text>
+                  <AreaChart
+                    className="mt-4 h-72"
+                    data={queryPerformanceData}
+                    index="date"
+                    categories={["Queries", "Active Users"]}
+                    colors={["blue", "green"]}
+                    showLegend={true}
+                    noDataText="No data in this date range"
+                  />
+                </Card>
+
+                <Card>
+                  <Title>Feedback Trend</Title>
+                  <Text>
+                    {granularity === "day" ? "Daily" : "Monthly"} thumbs up vs
+                    thumbs down
+                  </Text>
+                  <AreaChart
+                    className="mt-4 h-72"
+                    data={feedbackData}
+                    index="date"
+                    categories={["Likes", "Dislikes"]}
+                    colors={["emerald", "rose"]}
+                    showLegend={true}
+                    noDataText="No feedback in this date range"
+                  />
+                </Card>
+              </Grid>
+
+              <Card className="mb-6">
+                <Title>Docs Indexed by Source</Title>
+                <Text>Snapshot — sum across all cc-pairs per source type</Text>
+                {docsBySourceBars.length > 0 ? (
+                  <BarList
+                    className="mt-4"
+                    data={docsBySourceBars}
+                    valueFormatter={(n: number) => n.toLocaleString()}
+                  />
+                ) : (
+                  <Text className="mt-4">No documents indexed yet.</Text>
+                )}
+              </Card>
+            </TabPanel>
+
+            <TabPanel>
+              <Card className="mb-6">
+                <Title>Chat Adoption</Title>
+                <Text>
+                  {granularity === "day" ? "Daily" : "Monthly"} new users
+                  overlaid with the cumulative number who have ever tried chat
+                </Text>
+                <div className="flex flex-wrap gap-8 mt-3">
+                  <div>
+                    <Text className="text-xs">Users who ever tried chat</Text>
+                    <Metric>{kpis.totalUsersEverTried.toLocaleString()}</Metric>
+                  </div>
+                  <div>
+                    <Text className="text-xs">New users (range)</Text>
+                    <Metric>{kpis.newUsersInRange.toLocaleString()}</Metric>
+                  </div>
+                </div>
+                <AreaChart
+                  className="mt-4 h-72"
+                  data={adoptionChartData}
+                  index="date"
+                  categories={["New Users", "Cumulative Users"]}
+                  colors={["cyan", "indigo"]}
+                  showLegend={true}
+                  noDataText="No adoption data yet"
+                />
+              </Card>
+
+              <Card className="mb-6">
+                <Title>Top Users by Activity</Title>
+                <Text>
+                  Most active users over the selected range, by assistant
+                  replies. From the durable daily aggregate, so it spans full
+                  history even after old chats are purged by retention.
+                </Text>
+                {perUserData && perUserData.length > 0 ? (
+                  <div className="mt-4 max-h-96 overflow-y-auto">
+                    <Table>
+                      <TableHead>
+                        <TableRow>
+                          <TableHeaderCell>User</TableHeaderCell>
+                          <TableHeaderCell className="text-right">
+                            Messages
+                          </TableHeaderCell>
+                          <TableHeaderCell className="text-right">
+                            Likes
+                          </TableHeaderCell>
+                          <TableHeaderCell className="text-right">
+                            Dislikes
+                          </TableHeaderCell>
+                          <TableHeaderCell className="text-right">
+                            Last Active
+                          </TableHeaderCell>
+                        </TableRow>
+                      </TableHead>
+                      <TableBody>
+                        {perUserData.map((u) => (
+                          <TableRow key={u.user_id}>
+                            <TableCell>{u.email}</TableCell>
+                            <TableCell className="text-right">
+                              {u.total_messages.toLocaleString()}
+                            </TableCell>
+                            <TableCell className="text-right">
+                              {u.total_likes.toLocaleString()}
+                            </TableCell>
+                            <TableCell className="text-right">
+                              {u.total_dislikes.toLocaleString()}
+                            </TableCell>
+                            <TableCell className="text-right">
+                              {u.last_active}
+                            </TableCell>
+                          </TableRow>
+                        ))}
+                      </TableBody>
+                    </Table>
+                  </div>
+                ) : (
+                  <Text className="mt-4">
+                    No chat activity in this date range.
+                  </Text>
+                )}
+              </Card>
+
+              <Grid numItems={1} numItemsLg={2} className="gap-4 mb-6">
+                <Card>
+                  <Title>Most-Used Assistants</Title>
+                  <Text>
+                    By assistant replies over the selected range. Durable
+                    aggregate — spans full history.
+                  </Text>
+                  {personaData && personaData.length > 0 ? (
+                    <div className="mt-4 max-h-96 overflow-y-auto">
+                      <Table>
+                        <TableHead>
+                          <TableRow>
+                            <TableHeaderCell>Assistant</TableHeaderCell>
+                            <TableHeaderCell className="text-right">
+                              Messages
+                            </TableHeaderCell>
+                            <TableHeaderCell className="text-right">
+                              Sessions
+                            </TableHeaderCell>
+                            <TableHeaderCell className="text-right">
+                              Likes
+                            </TableHeaderCell>
+                            <TableHeaderCell className="text-right">
+                              Dislikes
+                            </TableHeaderCell>
+                          </TableRow>
+                        </TableHead>
+                        <TableBody>
+                          {personaData.map((p) => (
+                            <TableRow key={p.persona_id}>
+                              <TableCell>{p.name}</TableCell>
+                              <TableCell className="text-right">
+                                {p.messages.toLocaleString()}
+                              </TableCell>
+                              <TableCell className="text-right">
+                                {p.sessions.toLocaleString()}
+                              </TableCell>
+                              <TableCell className="text-right">
+                                {p.likes.toLocaleString()}
+                              </TableCell>
+                              <TableCell className="text-right">
+                                {p.dislikes.toLocaleString()}
+                              </TableCell>
+                            </TableRow>
+                          ))}
+                        </TableBody>
+                      </Table>
+                    </div>
+                  ) : (
+                    <Text className="mt-4">
+                      No assistant activity in this date range.
+                    </Text>
+                  )}
+                </Card>
+
+                <Card>
+                  <Title>Datasets in Use (approximate)</Title>
+                  <Text>
+                    Assistant usage attributed to each document set attached to
+                    the assistant. Approximate — counts an assistant&apos;s
+                    messages toward all its datasets and uses current
+                    attachments, not per-query retrieval.
+                  </Text>
+                  {docSetUsageBars.length > 0 ? (
+                    <BarList
+                      className="mt-4"
+                      data={docSetUsageBars}
+                      valueFormatter={(n: number) => n.toLocaleString()}
+                    />
+                  ) : (
+                    <Text className="mt-4">
+                      No dataset usage in this date range.
+                    </Text>
+                  )}
+                </Card>
+              </Grid>
+            </TabPanel>
+          </TabPanels>
+        </TabGroup>
       )}
     </div>
   );
diff --git a/web/src/app/admin/connector/[ccPairId]/IndexingAttemptsTable.tsx b/web/src/app/admin/connector/[ccPairId]/IndexingAttemptsTable.tsx
index 7481a6ee342..07744c47e19 100644
--- a/web/src/app/admin/connector/[ccPairId]/IndexingAttemptsTable.tsx
+++ b/web/src/app/admin/connector/[ccPairId]/IndexingAttemptsTable.tsx
@@ -12,7 +12,7 @@ import {
   Divider,
 } from "@tremor/react";
 import { IndexAttemptStatus } from "@/components/Status";
-import { CCPairFullInfo } from "./types";
+import { CCPairFullInfo, PaginatedIndexAttempts } from "./types";
 import { useState } from "react";
 import { PageSelector } from "@/components/PageSelector";
 import { localizeAndPrettify } from "@/lib/time";
@@ -20,18 +20,39 @@ import { getDocsProcessedPerMinute } from "@/lib/indexAttempt";
 import { Modal } from "@/components/Modal";
 import { CheckmarkIcon, CopyIcon } from "@/components/icons/icons";
 import { updateIndexAttemptPriority } from "@/lib/connector";
-import { mutate } from "swr";
-import { buildCCPairInfoUrl } from "./lib";
+import useSWR, { mutate } from "swr";
+import { errorHandlingFetcher } from "@/lib/fetcher";
+import { buildCCPairInfoUrl, buildIndexAttemptsUrl } from "./lib";
 import { usePopup } from "@/components/admin/connectors/Popup";
+import { ThreeDotsLoader } from "@/components/Loading";
+import { ErrorCallout } from "@/components/ErrorCallout";
 
 const NUM_IN_PAGE = 8;
 
 export function IndexingAttemptsTable({ ccPair }: { ccPair: CCPairFullInfo }) {
   const [page, setPage] = useState(1);
+  // Server-side pagination: fetch one page at a time (page is 0-based on the
+  // API). Changing `page` re-keys the SWR fetch. Avoids loading a busy
+  // cc-pair's entire attempt history (thousands of rows w/ full tracebacks).
+  const indexAttemptsUrl = buildIndexAttemptsUrl(
+    ccPair.id,
+    page - 1,
+    NUM_IN_PAGE
+  );
+  const {
+    data: indexAttemptsData,
+    isLoading,
+    error,
+    mutate: mutateIndexAttempts,
+  } = useSWR<PaginatedIndexAttempts>(indexAttemptsUrl, errorHandlingFetcher);
+
+  const indexAttempts = indexAttemptsData?.index_attempts ?? [];
+  const totalPages = indexAttemptsData?.total_pages ?? 1;
+
   const [indexAttemptTracePopupId, setIndexAttemptTracePopupId] = useState<
     number | null
   >(null);
-  const indexAttemptToDisplayTraceFor = ccPair.index_attempts.find(
+  const indexAttemptToDisplayTraceFor = indexAttempts.find(
     (indexAttempt) => indexAttempt.id === indexAttemptTracePopupId
   );
   const [copyClicked, setCopyClicked] = useState(false);
@@ -56,9 +77,24 @@ export function IndexingAttemptsTable({ ccPair }: { ccPair: CCPairFullInfo }) {
       });
     }
     setTimeout(() => setPopup(null), 3000);
+    // Refresh the current page of attempts + the detail (latest attempt).
+    mutateIndexAttempts();
     mutate(buildCCPairInfoUrl(ccPair.id));
   }
 
+  if (error) {
+    return (
+      <ErrorCallout
+        errorTitle="Failed to fetch indexing attempts"
+        errorMsg={error?.info?.detail || error.toString()}
+      />
+    );
+  }
+
+  if (!indexAttemptsData && isLoading) {
+    return <ThreeDotsLoader />;
+  }
+
   return (
     <>
       {indexAttemptToDisplayTraceFor &&
@@ -114,114 +150,112 @@ export function IndexingAttemptsTable({ ccPair }: { ccPair: CCPairFullInfo }) {
           </TableRow>
         </TableHead>
         <TableBody>
-          {ccPair.index_attempts
-            .slice(NUM_IN_PAGE * (page - 1), NUM_IN_PAGE * page)
-            .map((indexAttempt) => {
-              const docsPerMinute =
-                getDocsProcessedPerMinute(indexAttempt)?.toFixed(2);
-              const priority = indexAttempt.indexing_priority ?? 0;
-              const isNotStarted = indexAttempt.status === "not_started";
-              const isUpdating = updatingPriorityId === indexAttempt.id;
-              return (
-                <TableRow key={indexAttempt.id}>
-                  <TableCell>
-                    {indexAttempt.time_started
-                      ? localizeAndPrettify(indexAttempt.time_started)
-                      : "-"}
-                  </TableCell>
-                  <TableCell>
-                    <IndexAttemptStatus
-                      status={indexAttempt.status || "not_started"}
-                      size="xs"
-                    />
-                    {docsPerMinute && (
-                      <div className="text-xs mt-1">
-                        {docsPerMinute} docs / min
-                      </div>
-                    )}
-                  </TableCell>
-                  <TableCell>
-                    {isNotStarted ? (
-                      <div className="flex items-center gap-1">
-                        <button
-                          className="px-1.5 py-0.5 border rounded text-xs hover:bg-hover-light disabled:opacity-50"
-                          disabled={isUpdating || priority <= 0}
-                          onClick={() =>
-                            bumpPriority(indexAttempt.id, priority - 10)
-                          }
-                          title="Decrease priority by 10"
-                        >
-                          −10
-                        </button>
-                        <span
-                          className={
-                            priority > 0
-                              ? "text-xs font-semibold px-2 py-0.5 rounded bg-emerald-100 text-emerald-800"
-                              : "text-xs px-2 py-0.5 text-subtle"
-                          }
-                        >
-                          {priority}
-                        </span>
-                        <button
-                          className="px-1.5 py-0.5 border rounded text-xs hover:bg-hover-light disabled:opacity-50"
-                          disabled={isUpdating || priority >= 100}
-                          onClick={() =>
-                            bumpPriority(indexAttempt.id, priority + 10)
-                          }
-                          title="Increase priority by 10"
-                        >
-                          +10
-                        </button>
-                      </div>
-                    ) : priority > 0 ? (
-                      <span className="text-xs font-semibold px-2 py-0.5 rounded bg-emerald-100 text-emerald-800">
+          {indexAttempts.map((indexAttempt) => {
+            const docsPerMinute =
+              getDocsProcessedPerMinute(indexAttempt)?.toFixed(2);
+            const priority = indexAttempt.indexing_priority ?? 0;
+            const isNotStarted = indexAttempt.status === "not_started";
+            const isUpdating = updatingPriorityId === indexAttempt.id;
+            return (
+              <TableRow key={indexAttempt.id}>
+                <TableCell>
+                  {indexAttempt.time_started
+                    ? localizeAndPrettify(indexAttempt.time_started)
+                    : "-"}
+                </TableCell>
+                <TableCell>
+                  <IndexAttemptStatus
+                    status={indexAttempt.status || "not_started"}
+                    size="xs"
+                  />
+                  {docsPerMinute && (
+                    <div className="text-xs mt-1">
+                      {docsPerMinute} docs / min
+                    </div>
+                  )}
+                </TableCell>
+                <TableCell>
+                  {isNotStarted ? (
+                    <div className="flex items-center gap-1">
+                      <button
+                        className="px-1.5 py-0.5 border rounded text-xs hover:bg-hover-light disabled:opacity-50"
+                        disabled={isUpdating || priority <= 0}
+                        onClick={() =>
+                          bumpPriority(indexAttempt.id, priority - 10)
+                        }
+                        title="Decrease priority by 10"
+                      >
+                        −10
+                      </button>
+                      <span
+                        className={
+                          priority > 0
+                            ? "text-xs font-semibold px-2 py-0.5 rounded bg-emerald-100 text-emerald-800"
+                            : "text-xs px-2 py-0.5 text-subtle"
+                        }
+                      >
                         {priority}
                       </span>
-                    ) : (
-                      <span className="text-xs text-subtle">-</span>
-                    )}
-                  </TableCell>
-                  <TableCell>
-                    <div className="flex">
-                      <div className="text-right">
-                        <div>{indexAttempt.new_docs_indexed}</div>
-                        {indexAttempt.docs_removed_from_index > 0 && (
-                          <div className="text-xs w-52 text-wrap flex italic overflow-hidden whitespace-normal px-1">
-                            (also removed {indexAttempt.docs_removed_from_index}{" "}
-                            docs that were detected as deleted in the source)
-                          </div>
-                        )}
-                      </div>
+                      <button
+                        className="px-1.5 py-0.5 border rounded text-xs hover:bg-hover-light disabled:opacity-50"
+                        disabled={isUpdating || priority >= 100}
+                        onClick={() =>
+                          bumpPriority(indexAttempt.id, priority + 10)
+                        }
+                        title="Increase priority by 10"
+                      >
+                        +10
+                      </button>
                     </div>
-                  </TableCell>
-                  <TableCell>{indexAttempt.total_docs_indexed}</TableCell>
-                  <TableCell>
-                    <div>
-                      <Text className="flex flex-wrap whitespace-normal">
-                        {indexAttempt.error_msg || "-"}
-                      </Text>
-                      {indexAttempt.full_exception_trace && (
-                        <div
-                          onClick={() => {
-                            setIndexAttemptTracePopupId(indexAttempt.id);
-                          }}
-                          className="mt-2 text-link cursor-pointer select-none"
-                        >
-                          View Full Trace
+                  ) : priority > 0 ? (
+                    <span className="text-xs font-semibold px-2 py-0.5 rounded bg-emerald-100 text-emerald-800">
+                      {priority}
+                    </span>
+                  ) : (
+                    <span className="text-xs text-subtle">-</span>
+                  )}
+                </TableCell>
+                <TableCell>
+                  <div className="flex">
+                    <div className="text-right">
+                      <div>{indexAttempt.new_docs_indexed}</div>
+                      {indexAttempt.docs_removed_from_index > 0 && (
+                        <div className="text-xs w-52 text-wrap flex italic overflow-hidden whitespace-normal px-1">
+                          (also removed {indexAttempt.docs_removed_from_index}{" "}
+                          docs that were detected as deleted in the source)
                         </div>
                       )}
                     </div>
-                  </TableCell>
-                </TableRow>
-              );
-            })}
+                  </div>
+                </TableCell>
+                <TableCell>{indexAttempt.total_docs_indexed}</TableCell>
+                <TableCell>
+                  <div>
+                    <Text className="flex flex-wrap whitespace-normal">
+                      {indexAttempt.error_msg || "-"}
+                    </Text>
+                    {indexAttempt.full_exception_trace && (
+                      <div
+                        onClick={() => {
+                          setIndexAttemptTracePopupId(indexAttempt.id);
+                        }}
+                        className="mt-2 text-link cursor-pointer select-none"
+                      >
+                        View Full Trace
+                      </div>
+                    )}
+                  </div>
+                </TableCell>
+              </TableRow>
+            );
+          })}
         </TableBody>
       </Table>
-      {ccPair.index_attempts.length > NUM_IN_PAGE && (
+      {totalPages > 1 && (
         <div className="mt-3 flex">
           <div className="mx-auto">
             <PageSelector
-              totalPages={Math.ceil(ccPair.index_attempts.length / NUM_IN_PAGE)}
+              totalPages={totalPages}
               currentPage={page}
               onPageChange={(newPage) => {
                 setPage(newPage);
diff --git a/web/src/app/admin/connector/[ccPairId]/lib.ts b/web/src/app/admin/connector/[ccPairId]/lib.ts
index e83f3d406d0..d5492ea0400 100644
--- a/web/src/app/admin/connector/[ccPairId]/lib.ts
+++ b/web/src/app/admin/connector/[ccPairId]/lib.ts
@@ -1,3 +1,11 @@
 export function buildCCPairInfoUrl(ccPairId: string | number) {
   return `/api/manage/admin/cc-pair/${ccPairId}`;
 }
+
+export function buildIndexAttemptsUrl(
+  ccPairId: string | number,
+  page: number,
+  pageSize: number
+) {
+  return `/api/manage/admin/cc-pair/${ccPairId}/index-attempts?page=${page}&page_size=${pageSize}`;
+}
diff --git a/web/src/app/admin/connector/[ccPairId]/page.tsx b/web/src/app/admin/connector/[ccPairId]/page.tsx
index 2d3f9458c38..a1f9c194246 100644
--- a/web/src/app/admin/connector/[ccPairId]/page.tsx
+++ b/web/src/app/admin/connector/[ccPairId]/page.tsx
@@ -52,7 +52,7 @@ function Main({ ccPairId }: { ccPairId: number }) {
     );
   }
 
-  const lastIndexAttempt = ccPair.index_attempts[0];
+  const lastIndexAttempt = ccPair.latest_index_attempt;
   const isDeleting = isCurrentlyDeleting(ccPair.latest_deletion_attempt);
 
   // figure out if we need to artificially deflate the number of docs indexed.
@@ -61,7 +61,7 @@ function Main({ ccPairId }: { ccPairId: number }) {
   // there is a mismatch between these two numbers which may confuse users.
   const totalDocsIndexed =
     lastIndexAttempt?.status === "in_progress" &&
-    ccPair.index_attempts.length === 1
+    ccPair.num_index_attempts === 1
       ? lastIndexAttempt.total_docs_indexed
       : ccPair.num_docs_indexed;
 
diff --git a/web/src/app/admin/connector/[ccPairId]/types.ts b/web/src/app/admin/connector/[ccPairId]/types.ts
index ab4921180cf..6f9ff8d3048 100644
--- a/web/src/app/admin/connector/[ccPairId]/types.ts
+++ b/web/src/app/admin/connector/[ccPairId]/types.ts
@@ -11,6 +11,16 @@ export interface CCPairFullInfo {
   num_docs_indexed: number;
   connector: Connector<any>;
   credential: Credential<any>;
-  index_attempts: IndexAttemptSnapshot[];
+  // Full attempt history is fetched (paginated) separately; the detail page
+  // only needs the most-recent attempt + a total count.
+  latest_index_attempt: IndexAttemptSnapshot | null;
+  num_index_attempts: number;
   latest_deletion_attempt: DeletionAttemptSnapshot | null;
 }
+
+export interface PaginatedIndexAttempts {
+  index_attempts: IndexAttemptSnapshot[];
+  page: number;
+  total_pages: number;
+  total_count: number;
+}
diff --git a/web/src/app/admin/settings/interfaces.ts b/web/src/app/admin/settings/interfaces.ts
index 02372ce2c74..c1f4da5b2cc 100644
--- a/web/src/app/admin/settings/interfaces.ts
+++ b/web/src/app/admin/settings/interfaces.ts
@@ -3,6 +3,8 @@ export interface Settings {
   search_page_enabled: boolean;
   default_page: "search" | "chat";
   maximum_chat_retention_days: number | null;
+  // Byte cap for chat file uploads (mirrors backend CHAT_FILE_MAX_SIZE_MB).
+  chat_file_max_size_mb?: number;
 }
 
 export interface EnterpriseSettings {
diff --git a/web/src/app/assistants/gallery/AssistantsGallery.tsx b/web/src/app/assistants/gallery/AssistantsGallery.tsx
index 64c96af2cff..cfae8b122b0 100644
--- a/web/src/app/assistants/gallery/AssistantsGallery.tsx
+++ b/web/src/app/assistants/gallery/AssistantsGallery.tsx
@@ -1,204 +1,790 @@
 "use client";
 
+/**
+ * Assistant Gallery — redesigned UX.
+ *
+ * Why this rewrite: with 50+ assistants and growing, the old flat 2-column
+ * grid had no hierarchy, no status signal (added vs not), and no real
+ * filtering — every card looked identical regardless of whether it was
+ * yours, shared, public, or already in your picker. This page is now
+ * structured around three questions: "is this mine?" (sections), "have
+ * I added this?" (availability filter + per-card chip), and "what does
+ * it do?" (denser layout + tool/source counts + tool filter chips).
+ *
+ * Changes packed in (numbers map to the design proposal):
+ *
+ *   1. Per-card "✓ In your picker" status chip + muted card style for
+ *      already-added assistants. Eye finds the un-added ones fast.
+ *   2. Three implicit sections: Yours / Shared with you / Featured.
+ *      Empty sections hide; section headers carry counts.
+ *   3. Filter chips above the grid: availability (All / Available / Added)
+ *      + auto-generated per-tool chips (only tools that appear in ≥2
+ *      assistants, so the chip row doesn't bloat as the dataset grows).
+ *   4. Owner display: name-from-email fallback (split on '@'), with a
+ *      "Built-in" badge for default_persona assistants — kills the
+ *      fork-specific "Author: Darwin" magic string.
+ *   6. Responsive grid: 1 col on mobile, 2 / 3 / 4 by breakpoint.
+ *   7. Header matches the Manage page: title + subtitle on the left,
+ *      "Back to my assistants" as a text link, "Create new" button
+ *      top-right. The giant centered button + paragraph are gone.
+ *   8. Sort dropdown: Featured (API order) / A → Z / Newly added.
+ *   9. Search now includes tool names AND document-set names. Empty-
+ *      search-result has a real empty state with a Clear button.
+ *  10. Compact chips ({n} tools / {n} sources), tool list on hover.
+ *      Add/Remove buttons replace the heavy Tremor color="green/red"
+ *      with flat buttons matching the design system.
+ *  11. Design tokens fixed — search input uses border-border /
+ *      focus-ring-accent like the rest of the app.
+ *
+ * What is intentionally NOT here:
+ *   - #5 (detail drawer / modal) — deferred per the proposal; revisit
+ *     after seeing how users use the new gallery.
+ *   - Bulk select — adding 5 assistants at once isn't a real use case.
+ *
+ * All mutations are optimistic + undoable, mirroring the Manage page.
+ */
+
+import { useEffect, useMemo, useState } from "react";
 import { Persona } from "@/app/admin/assistants/interfaces";
-import { AssistantIcon } from "@/components/assistants/AssistantIcon";
 import { User } from "@/lib/types";
-import { Button } from "@tremor/react";
-import Link from "next/link";
-import { useState } from "react";
-import { FiMinus, FiPlus, FiX } from "react-icons/fi";
-import { NavigationButton } from "../NavigationButton";
-import { AssistantsPageTitle } from "../AssistantsPageTitle";
+import { AssistantIcon } from "@/components/assistants/AssistantIcon";
+import { Bubble } from "@/components/Bubble";
+import { usePopup } from "@/components/admin/connectors/Popup";
 import {
   addAssistantToList,
+  reorderAssistantList,
   removeAssistantFromList,
 } from "@/lib/assistants/updateAssistantPreferences";
-import { usePopup } from "@/components/admin/connectors/Popup";
+import { checkUserOwnsAssistant } from "@/lib/assistants/checkOwnership";
+import { AssistantsPageTitle } from "../AssistantsPageTitle";
+import Link from "next/link";
 import { useRouter } from "next/navigation";
-import { ToolsDisplay } from "../ToolsDisplay";
+import { FiBookmark, FiPlus, FiSearch, FiX } from "react-icons/fi";
+
+// ---------------------------------------------------------------------------
+// Types
+// ---------------------------------------------------------------------------
+
+type Availability = "all" | "available" | "added";
+type SortMode = "featured" | "name-asc" | "recent";
+
+interface SectionDef {
+  key: string;
+  label: string;
+  assistants: Persona[];
+}
+
+// ---------------------------------------------------------------------------
+// Column-count parameterisation
+// ---------------------------------------------------------------------------
+//
+// Each row in this table is a complete, static Tailwind class string so the
+// purge step actually emits the classes. You CAN'T compute these at runtime
+// (`md:grid-cols-${n}` won't survive purge). Add a row here to support a new
+// column count. Each row scales 1-col on mobile up to N at the widest
+// breakpoint, with one breakpoint per added column so cards stay roomy on
+// medium screens.
+const GRID_CLASSES: Record<number, string> = {
+  1: "grid-cols-1",
+  2: "grid-cols-1 sm:grid-cols-2",
+  3: "grid-cols-1 md:grid-cols-2 2xl:grid-cols-3",
+  4: "grid-cols-1 md:grid-cols-2 lg:grid-cols-3 2xl:grid-cols-4",
+  5: "grid-cols-1 md:grid-cols-2 lg:grid-cols-3 xl:grid-cols-4 2xl:grid-cols-5",
+};
+
+const DEFAULT_COLUMNS = 3;
+
+// Values exposed in the in-page column picker. The control lets users
+// override the prop-derived default at runtime; the persisted choice
+// lives in localStorage so it survives reloads.
+//
+// Below the smallest md breakpoint everything is 1-col regardless of
+// this value (see GRID_CLASSES), so we don't bother exposing 1.
+const COLUMN_PICKER_OPTIONS = [2, 3, 4];
+const COLUMNS_STORAGE_KEY = "danswer:assistants-gallery:columns";
+
+// How many doc-set name chips to render before collapsing the rest into
+// a "+N more" pill. Three keeps each card's scope visible without
+// blowing the card width at narrower column counts.
+const MAX_VISIBLE_DOC_SETS = 3;
+
+// ---------------------------------------------------------------------------
+// Small helpers
+// ---------------------------------------------------------------------------
+
+/** Best-effort author display name. Names aren't on MinimalUserSnapshot;
+ * we have email only. Split on '@' so "foo.bar@example.com" → "foo.bar"
+ * rather than dumping the full email at the user. */
+function ownerDisplayName(persona: Persona): string | null {
+  if (persona.default_persona) return null; // Built-in badge shown instead.
+  const email = persona.owner?.email;
+  if (!email) return null;
+  const local = email.split("@")[0];
+  // Replace dots/underscores with spaces and trim — usually closer to
+  // "First Last" than the raw local-part.
+  return local.replace(/[._]/g, " ").trim() || email;
+}
+
+// ---------------------------------------------------------------------------
+// Single card
+// ---------------------------------------------------------------------------
+
+interface CardProps {
+  assistant: Persona;
+  user: User | null;
+  isAdded: boolean;
+  onAdd: (a: Persona) => void;
+  onRemove: (a: Persona) => void;
+}
+
+function GalleryCard({ assistant, user, isAdded, onAdd, onRemove }: CardProps) {
+  // Tool-related UI was intentionally removed from this page (filter
+  // chips + per-card counts) — the gallery is for browsing assistants,
+  // and tool execution isn't reliable enough to advertise.
+  const author = ownerDisplayName(assistant);
+  const isBuiltIn = assistant.default_persona;
+
+  return (
+    <div
+      className={`
+        bg-background-emphasis rounded-lg p-5
+        border transition
+        flex flex-col
+        ${
+          isAdded
+            ? "border-border opacity-75 hover:opacity-100"
+            : "border-transparent shadow-sm hover:shadow-md"
+        }
+      `}
+    >
+      {/* Header: icon + name (+ built-in badge if applicable). The
+          prior absolute top-right "In your picker" badge was dropped —
+          the muted card style + the Remove button in the footer
+          already signal "added"; the badge ate horizontal space and
+          crowded the title at narrower widths. */}
+      <div className="flex items-start gap-3 mb-2">
+        <AssistantIcon assistant={assistant} />
+        <div className="min-w-0 flex-1">
+          <div className="flex items-center gap-2 flex-wrap">
+            <h2 className="text-base font-semibold text-strong truncate">
+              {assistant.name}
+            </h2>
+            {isBuiltIn && (
+              <span
+                className="
+                  text-[10px] uppercase tracking-wide
+                  px-1.5 py-0.5 rounded
+                  bg-border text-default font-medium
+                "
+                title="Bundled with the app."
+              >
+                Built-in
+              </span>
+            )}
+          </div>
+        </div>
+      </div>
+
+      {/* Description — primary signal of "should I pick this?". */}
+      {assistant.description && (
+        <p className="text-sm text-default leading-relaxed mb-3 line-clamp-3">
+          {assistant.description}
+        </p>
+      )}
+
+      {/* Knowledge-scope chips — name the document sets the
+          assistant points at (counts alone don't help a chooser
+          decide). Cap at MAX_VISIBLE_DOC_SETS with a "+N more"
+          tooltip so a long list doesn't blow the card width. Tools
+          were removed entirely (see card-level comment). */}
+      {assistant.document_sets && assistant.document_sets.length > 0 && (
+        <div className="flex flex-wrap gap-1.5 mb-3 text-xs">
+          {assistant.document_sets.slice(0, MAX_VISIBLE_DOC_SETS).map((ds) => (
+            <Bubble key={ds.id} isSelected={false} notSelectable>
+              <div className="flex items-center gap-1 max-w-[180px]">
+                <FiBookmark size={12} className="flex-shrink-0" />
+                <span className="truncate" title={ds.name}>
+                  {ds.name}
+                </span>
+              </div>
+            </Bubble>
+          ))}
+          {assistant.document_sets.length > MAX_VISIBLE_DOC_SETS && (
+            <Bubble isSelected={false} notSelectable>
+              <span
+                title={assistant.document_sets
+                  .slice(MAX_VISIBLE_DOC_SETS)
+                  .map((d) => d.name)
+                  .join(", ")}
+              >
+                +{assistant.document_sets.length - MAX_VISIBLE_DOC_SETS} more
+              </span>
+            </Bubble>
+          )}
+        </div>
+      )}
+
+      {/* Footer row: author (or built-in subtle text) + Add/Remove */}
+      <div className="mt-auto flex items-center justify-between gap-2 pt-2 border-t border-border/40">
+        <div className="text-xs text-subtle truncate min-w-0">
+          {isBuiltIn ? (
+            <span>Bundled assistant</span>
+          ) : author ? (
+            <span title={assistant.owner?.email ?? ""}>by {author}</span>
+          ) : (
+            // Public assistant with no owner record — rare; surface
+            // gracefully without the old "Author: Darwin" magic string.
+            <span>Public</span>
+          )}
+        </div>
+
+        {/* Add/Remove — flat, matches design system. Tremor's color="green"
+            for "Add to my list" was visually shoutier than the action. */}
+        {user &&
+          (isAdded ? (
+            <button
+              type="button"
+              onClick={() => onRemove(assistant)}
+              className="
+                text-xs px-3 py-1.5 rounded
+                border border-border
+                text-default hover:bg-hover
+                flex items-center gap-1
+                focus:outline-none focus:ring-2 focus:ring-accent
+              "
+              title="Remove from your picker"
+            >
+              <FiX size={12} /> Remove
+            </button>
+          ) : (
+            <button
+              type="button"
+              onClick={() => onAdd(assistant)}
+              className="
+                text-xs px-3 py-1.5 rounded
+                bg-accent text-inverted font-medium
+                hover:opacity-90
+                flex items-center gap-1
+                focus:outline-none focus:ring-2 focus:ring-accent
+              "
+              title="Add to your picker"
+            >
+              <FiPlus size={12} /> Add
+            </button>
+          ))}
+      </div>
+    </div>
+  );
+}
+
+// ---------------------------------------------------------------------------
+// Filter chip — small reusable toggle pill
+// ---------------------------------------------------------------------------
+
+function FilterChip({
+  active,
+  onClick,
+  children,
+  badge,
+}: {
+  active: boolean;
+  onClick: () => void;
+  children: React.ReactNode;
+  badge?: number;
+}) {
+  return (
+    <button
+      type="button"
+      onClick={onClick}
+      className={`
+        text-xs px-3 py-1.5 rounded-full
+        border transition-colors
+        flex items-center gap-1.5
+        focus:outline-none focus:ring-2 focus:ring-accent
+        ${
+          active
+            ? "bg-accent text-inverted border-accent"
+            : "bg-background border-border text-default hover:bg-hover"
+        }
+      `}
+    >
+      {children}
+      {badge !== undefined && (
+        <span
+          className={`
+            text-[10px] px-1.5 rounded-full
+            ${active ? "bg-white/20" : "bg-border"}
+          `}
+        >
+          {badge}
+        </span>
+      )}
+    </button>
+  );
+}
+
+// ---------------------------------------------------------------------------
+// Main
+// ---------------------------------------------------------------------------
 
 export function AssistantsGallery({
   assistants,
   user,
+  columns: initialColumns = DEFAULT_COLUMNS,
 }: {
   assistants: Persona[];
   user: User | null;
+  /**
+   * Initial max columns at the widest breakpoint. Acts as the default
+   * if the user has no stored preference yet; once the user picks via
+   * the in-page column control that choice (in localStorage) wins.
+   * Responsive scaling below the widest breakpoint is fixed
+   * (see GRID_CLASSES). Supported values: 1–5; out-of-range silently
+   * falls back to DEFAULT_COLUMNS so a bad prop can't break the page.
+   */
+  columns?: number;
 }) {
-  function filterAssistants(assistants: Persona[], query: string): Persona[] {
-    return assistants.filter(
-      (assistant) =>
-        assistant.name.toLowerCase().includes(query.toLowerCase()) ||
-        assistant.description.toLowerCase().includes(query.toLowerCase())
-    );
-  }
-
   const router = useRouter();
 
-  const [searchQuery, setSearchQuery] = useState("");
+  // User-chosen column count. `null` until the localStorage read in
+  // the effect below; SSR + first paint use the prop value so we
+  // don't get a hydration mismatch. After mount, the stored choice
+  // (if any) overrides the prop.
+  const [userColumns, setUserColumns] = useState<number | null>(null);
+  useEffect(() => {
+    try {
+      const raw = window.localStorage.getItem(COLUMNS_STORAGE_KEY);
+      if (raw == null) return;
+      const n = Number.parseInt(raw, 10);
+      if (Number.isFinite(n) && n in GRID_CLASSES) {
+        setUserColumns(n);
+      }
+    } catch {
+      // localStorage can throw in some sandboxed contexts (Safari
+      // private mode in the past, certain iframe configs). Fall
+      // through to the prop default — the picker still works for
+      // the session, just doesn't persist.
+    }
+  }, []);
+
+  const effectiveColumns = userColumns ?? initialColumns;
+  const gridClass =
+    GRID_CLASSES[effectiveColumns] ?? GRID_CLASSES[DEFAULT_COLUMNS];
+
+  const changeColumns = (n: number) => {
+    setUserColumns(n);
+    try {
+      window.localStorage.setItem(COLUMNS_STORAGE_KEY, String(n));
+    } catch {
+      // See above — silently OK to skip persistence.
+    }
+  };
   const { popup, setPopup } = usePopup();
 
-  const allAssistantIds = assistants.map((assistant) => assistant.id);
-  const filteredAssistants = filterAssistants(assistants, searchQuery);
+  // Mirrors the Manage page: no preference = every accessible assistant
+  // is "in the picker" by default.
+  const initialChosen: number[] =
+    user?.preferences?.chosen_assistants ?? assistants.map((a) => a.id);
+  const [chosenAssistants, setChosenAssistants] =
+    useState<number[]>(initialChosen);
+  const chosenSet = useMemo(
+    () => new Set(chosenAssistants),
+    [chosenAssistants]
+  );
+
+  // ---- filter / sort state -------------------------------------------------
+
+  const [search, setSearch] = useState("");
+  const [availability, setAvailability] = useState<Availability>("all");
+  const [sortMode, setSortMode] = useState<SortMode>("featured");
+
+  // ---- derived: filtered + sorted list -------------------------------------
+  // Tool-related filtering was removed from this page — the gallery is
+  // about assistants, not their internals. Search now only matches
+  // name + description + document-set names.
+
+  const filtered: Persona[] = useMemo(() => {
+    const q = search.trim().toLowerCase();
+    const out = assistants.filter((a) => {
+      if (q) {
+        const hay = [
+          a.name,
+          a.description ?? "",
+          ...(a.document_sets ?? []).map((d) => d.name),
+        ]
+          .join(" ")
+          .toLowerCase();
+        if (!hay.includes(q)) return false;
+      }
+      if (availability === "added" && !chosenSet.has(a.id)) return false;
+      if (availability === "available" && chosenSet.has(a.id)) return false;
+      return true;
+    });
+
+    if (sortMode === "name-asc") {
+      out.sort((a, b) => a.name.localeCompare(b.name));
+    } else if (sortMode === "recent") {
+      // No created_at on Persona; id desc is a fair proxy ("newer ids
+      // were created later").
+      out.sort((a, b) => b.id - a.id);
+    }
+    // "featured" = preserve API order (admins curate via display_priority).
+    return out;
+  }, [assistants, search, availability, sortMode, chosenSet]);
+
+  // ---- derived: sections ---------------------------------------------------
+
+  const sections: SectionDef[] = useMemo(() => {
+    const yours: Persona[] = [];
+    const shared: Persona[] = [];
+    const featured: Persona[] = [];
+
+    for (const a of filtered) {
+      const ownedByUser = checkUserOwnsAssistant(user, a);
+      const sharedWithUser =
+        user != null &&
+        !ownedByUser &&
+        !a.is_public &&
+        (a.users ?? []).some((u) => u.id === user.id);
+
+      if (ownedByUser && !a.default_persona) {
+        yours.push(a);
+      } else if (sharedWithUser) {
+        shared.push(a);
+      } else {
+        // Public OR built-in OR (accessible via group permission). All
+        // surface here as "Featured & Built-in" — visually equivalent
+        // from a chooser's POV.
+        featured.push(a);
+      }
+    }
+
+    const out: SectionDef[] = [];
+    if (yours.length > 0)
+      out.push({ key: "yours", label: "Yours", assistants: yours });
+    if (shared.length > 0)
+      out.push({ key: "shared", label: "Shared with you", assistants: shared });
+    if (featured.length > 0)
+      out.push({
+        key: "featured",
+        label: "Featured & Built-in",
+        assistants: featured,
+      });
+    return out;
+  }, [filtered, user]);
+
+  // ---- counts for filter chips ---------------------------------------------
+
+  const counts = useMemo(() => {
+    const q = search.trim().toLowerCase();
+    let all = 0;
+    let added = 0;
+    let available = 0;
+    for (const a of assistants) {
+      if (q) {
+        const hay = [
+          a.name,
+          a.description ?? "",
+          ...(a.document_sets ?? []).map((d) => d.name),
+        ]
+          .join(" ")
+          .toLowerCase();
+        if (!hay.includes(q)) continue;
+      }
+      all++;
+      if (chosenSet.has(a.id)) added++;
+      else available++;
+    }
+    return { all, added, available };
+  }, [assistants, search, chosenSet]);
+
+  // ---- optimistic add/remove (mirrors Manage page persistOrder) -----------
+
+  const persistChosen = async (
+    next: number[],
+    {
+      successMsg,
+      undoToOrder,
+    }: { successMsg?: string; undoToOrder?: number[] } = {}
+  ): Promise<boolean> => {
+    const prev = chosenAssistants;
+    setChosenAssistants(next);
+    const ok = await reorderAssistantList(next);
+    if (!ok) {
+      setChosenAssistants(prev);
+      setPopup({
+        message: "Couldn't update your assistant list — please try again.",
+        type: "error",
+      });
+      return false;
+    }
+    if (successMsg) {
+      setPopup({
+        message: successMsg,
+        type: "success",
+        undo:
+          undoToOrder !== undefined
+            ? {
+                onClick: async () => {
+                  await persistChosen(undoToOrder);
+                },
+              }
+            : undefined,
+      });
+    }
+    router.refresh();
+    return true;
+  };
+
+  const handleAdd = async (a: Persona) => {
+    if (!user) return;
+    if (chosenSet.has(a.id)) return; // already added — no-op
+    const prev = chosenAssistants;
+    const next = [...prev, a.id];
+    // Use addAssistantToList specifically (idempotent) rather than the
+    // generic reorder helper — both PATCH the same endpoint, but this
+    // signals intent at the call-site.
+    setChosenAssistants(next);
+    const ok = await addAssistantToList(a.id, prev);
+    if (!ok) {
+      setChosenAssistants(prev);
+      setPopup({
+        message: `Couldn't add "${a.name}". Try again?`,
+        type: "error",
+      });
+      return;
+    }
+    setPopup({
+      message: `"${a.name}" added to your picker.`,
+      type: "success",
+      undo: {
+        onClick: async () => {
+          await persistChosen(prev);
+        },
+      },
+    });
+    router.refresh();
+  };
+
+  const handleRemove = async (a: Persona) => {
+    if (!user) return;
+    if (chosenAssistants.length === 1 && chosenAssistants[0] === a.id) {
+      setPopup({
+        message:
+          "You need at least one visible assistant — can't remove the last one.",
+        type: "error",
+      });
+      return;
+    }
+    const prev = chosenAssistants;
+    const next = prev.filter((id) => id !== a.id);
+    setChosenAssistants(next);
+    const ok = await removeAssistantFromList(a.id, prev);
+    if (!ok) {
+      setChosenAssistants(prev);
+      setPopup({
+        message: `Couldn't remove "${a.name}". Try again?`,
+        type: "error",
+      });
+      return;
+    }
+    setPopup({
+      message: `"${a.name}" removed from your picker.`,
+      type: "success",
+      undo: {
+        onClick: async () => {
+          await persistChosen(prev);
+        },
+      },
+    });
+    router.refresh();
+  };
+
+  // ---- handlers for filter UI ----------------------------------------------
+
+  const clearAllFilters = () => {
+    setSearch("");
+    setAvailability("all");
+  };
+
+  const hasAnyFilter = search.trim() !== "" || availability !== "all";
+
+  // ---- render -------------------------------------------------------------
 
   return (
     <>
       {popup}
-      <div className="mx-auto w-searchbar-xs 2xl:w-searchbar-sm 3xl:w-searchbar">
-        <AssistantsPageTitle>Assistant Gallery</AssistantsPageTitle>
-        <div className="flex justify-center mb-6">
-          <Link href="/assistants/mine">
-            <NavigationButton>View Your Assistants</NavigationButton>
+
+      <div className="mx-auto w-searchbar-xs 2xl:w-searchbar-sm 3xl:w-searchbar pb-12">
+        {/* Header — matches the Manage page rebuild */}
+        <div className="flex items-start justify-between gap-4 mb-3">
+          <div className="min-w-0">
+            <AssistantsPageTitle>Assistant Gallery</AssistantsPageTitle>
+            <p className="text-subtle">
+              Browse every assistant available to you. Add the ones you want to
+              your chat picker.
+            </p>
+          </div>
+          <Link
+            href="/assistants/new"
+            className="
+              flex items-center gap-1.5 flex-shrink-0
+              px-4 py-2 rounded-md
+              bg-accent text-inverted font-medium
+              hover:opacity-90 focus:outline-none focus:ring-2 focus:ring-accent
+            "
+          >
+            <FiPlus size={16} /> Create new
           </Link>
         </div>
 
-        <p className="text-center mb-6">
-          Discover and create custom assistants that combine instructions, extra
-          knowledge, and any combination of tools.
-        </p>
+        <div className="mb-4">
+          <Link
+            href="/assistants/mine"
+            className="text-sm text-link hover:underline inline-flex items-center gap-1"
+          >
+            ← Back to my assistants
+          </Link>
+        </div>
 
-        <div className="mb-6">
+        {/* Search */}
+        <div className="relative mb-3">
+          <FiSearch
+            className="absolute left-3 top-1/2 -translate-y-1/2 text-subtle"
+            size={16}
+          />
           <input
-            type="text"
-            placeholder="Search assistants..."
-            value={searchQuery}
-            onChange={(e) => setSearchQuery(e.target.value)}
+            type="search"
+            placeholder="Search by name, description, tool, or source…"
+            value={search}
+            onChange={(e) => setSearch(e.target.value)}
             className="
-            w-full
-            p-2
-            border
-            border-gray-300
-            rounded
-            focus:outline-none
-            focus:ring-2
-            focus:ring-blue-500
-          "
+              w-full pl-10 pr-3 py-2
+              rounded-md border border-border bg-background
+              focus:outline-none focus:ring-2 focus:ring-accent
+            "
           />
         </div>
-        <div
-          className="
-          w-full
-          grid
-          grid-cols-2
-          gap-4
-          py-2
-        "
-        >
-          {filteredAssistants.map((assistant) => (
-            <div
-              key={assistant.id}
+
+        {/* Filter chip rows */}
+        <div className="flex flex-wrap items-center gap-2 mb-2">
+          <FilterChip
+            active={availability === "all"}
+            onClick={() => setAvailability("all")}
+            badge={counts.all}
+          >
+            All
+          </FilterChip>
+          <FilterChip
+            active={availability === "available"}
+            onClick={() => setAvailability("available")}
+            badge={counts.available}
+          >
+            Available to add
+          </FilterChip>
+          <FilterChip
+            active={availability === "added"}
+            onClick={() => setAvailability("added")}
+            badge={counts.added}
+          >
+            Already added
+          </FilterChip>
+
+          {/* View controls — columns + sort — live at the right end of
+              the filter row so "narrow the list" (left) and "shape
+              the view" (right) are visually separated. */}
+          <div className="ml-auto flex items-center gap-3 text-xs text-subtle">
+            {/* Column picker. Hidden below md since the layout falls
+                back to a single column there regardless. Pure
+                client-side state + localStorage — no fetch, no
+                router.refresh(), no DB hit. */}
+            <div className="hidden md:flex items-center gap-2">
+              <label htmlFor="columns">Columns</label>
+              <select
+                id="columns"
+                value={effectiveColumns}
+                onChange={(e) => changeColumns(Number(e.target.value))}
+                className="
+                  text-xs px-2 py-1.5 rounded
+                  border border-border bg-background
+                  focus:outline-none focus:ring-2 focus:ring-accent
+                "
+              >
+                {COLUMN_PICKER_OPTIONS.map((n) => (
+                  <option key={n} value={n}>
+                    {n}
+                  </option>
+                ))}
+              </select>
+            </div>
+            <label htmlFor="sort">Sort</label>
+            <select
+              id="sort"
+              value={sortMode}
+              onChange={(e) => setSortMode(e.target.value as SortMode)}
               className="
-              bg-background-emphasis
-              rounded-lg
-              shadow-md
-              p-4
-            "
+                text-xs px-2 py-1.5 rounded
+                border border-border bg-background
+                focus:outline-none focus:ring-2 focus:ring-accent
+              "
             >
-              <div className="flex items-center">
-                <AssistantIcon assistant={assistant} />
-                <h2
-                  className="
-                  text-xl
-                  font-semibold
-                  mb-2
-                  my-auto
-                  ml-2
-                  text-strong
+              <option value="featured">Featured</option>
+              <option value="name-asc">A → Z</option>
+              <option value="recent">Newly added</option>
+            </select>
+          </div>
+        </div>
+
+        {/* Empty state when filters exclude everything */}
+        {sections.length === 0 && (
+          <div className="text-center py-12 text-subtle">
+            <p className="font-medium text-default">No assistants match.</p>
+            <p className="text-sm mt-1">
+              {hasAnyFilter
+                ? "Try a different filter, or clear all to see everything."
+                : "There are no assistants available to you yet."}
+            </p>
+            {hasAnyFilter && (
+              <button
+                type="button"
+                onClick={clearAllFilters}
+                className="
+                  mt-3 text-sm px-3 py-1.5 rounded
+                  border border-border hover:bg-hover
+                  focus:outline-none focus:ring-2 focus:ring-accent
                 "
-                >
-                  {assistant.name}
-                </h2>
-                {user && (
-                  <div className="ml-auto">
-                    {!user.preferences?.chosen_assistants ||
-                    user.preferences?.chosen_assistants?.includes(
-                      assistant.id
-                    ) ? (
-                      <Button
-                        className="
-                          mr-2
-                          my-auto
-                        "
-                        icon={FiMinus}
-                        onClick={async () => {
-                          if (
-                            user.preferences?.chosen_assistants &&
-                            user.preferences?.chosen_assistants.length === 1
-                          ) {
-                            setPopup({
-                              message: `Cannot remove "${assistant.name}" - you must have at least one assistant.`,
-                              type: "error",
-                            });
-                            return;
-                          }
-
-                          const success = await removeAssistantFromList(
-                            assistant.id,
-                            user.preferences?.chosen_assistants ||
-                              allAssistantIds
-                          );
-                          if (success) {
-                            setPopup({
-                              message: `"${assistant.name}" has been removed from your list.`,
-                              type: "success",
-                            });
-                            router.refresh();
-                          } else {
-                            setPopup({
-                              message: `"${assistant.name}" could not be removed from your list.`,
-                              type: "error",
-                            });
-                          }
-                        }}
-                        size="xs"
-                        color="red"
-                      >
-                        Remove
-                      </Button>
-                    ) : (
-                      <Button
-                        className="
-                      mr-2
-                      my-auto
-                    "
-                        icon={FiPlus}
-                        onClick={async () => {
-                          const success = await addAssistantToList(
-                            assistant.id,
-                            user.preferences?.chosen_assistants ||
-                              allAssistantIds
-                          );
-                          if (success) {
-                            setPopup({
-                              message: `"${assistant.name}" has been added to your list.`,
-                              type: "success",
-                            });
-                            router.refresh();
-                          } else {
-                            setPopup({
-                              message: `"${assistant.name}" could not be added to your list.`,
-                              type: "error",
-                            });
-                          }
-                        }}
-                        size="xs"
-                        color="green"
-                      >
-                        Add
-                      </Button>
-                    )}
-                  </div>
-                )}
-              </div>
-              {assistant.tools.length > 0 && (
-                <ToolsDisplay tools={assistant.tools} />
-              )}
-              <p className="text-sm mt-2">{assistant.description}</p>
-              <p className="text-subtle text-sm mt-2">
-                Author: {assistant.owner?.email || "Darwin"}
-              </p>
+              >
+                Clear all filters
+              </button>
+            )}
+          </div>
+        )}
+
+        {/* Sections */}
+        {sections.map((section) => (
+          <section key={section.key} className="mb-8">
+            <div className="flex items-center gap-2 mb-3">
+              <h3 className="text-sm font-semibold uppercase tracking-wide text-subtle">
+                {section.label}
+              </h3>
+              <span className="text-xs text-subtle">
+                ({section.assistants.length})
+              </span>
             </div>
-          ))}
-        </div>
+            <div className={`grid gap-3 ${gridClass}`}>
+              {section.assistants.map((assistant) => (
+                <GalleryCard
+                  key={assistant.id}
+                  assistant={assistant}
+                  user={user}
+                  isAdded={chosenSet.has(assistant.id)}
+                  onAdd={handleAdd}
+                  onRemove={handleRemove}
+                />
+              ))}
+            </div>
+          </section>
+        ))}
       </div>
     </>
   );
diff --git a/web/src/app/assistants/mine/AssistantsList.tsx b/web/src/app/assistants/mine/AssistantsList.tsx
index 576a4c74add..33027fa1ed1 100644
--- a/web/src/app/assistants/mine/AssistantsList.tsx
+++ b/web/src/app/assistants/mine/AssistantsList.tsx
@@ -1,367 +1,964 @@
 "use client";
 
-import { useState } from "react";
+/**
+ * Manage Assistants — redesigned UX.
+ *
+ * What changed vs the prior "move up / move down inside a 3-dot popover":
+ *
+ *   1. Drag-and-drop reorder via @dnd-kit (already in package.json), with
+ *      a grab handle on each visible row. Up/down arrows removed.
+ *   2. Explicit "set as default" pin icon on each row. Filled = current
+ *      default; the default row also gets an accent border. Ordering and
+ *      default are now orthogonal.
+ *   3. Visibility is a row-level toggle, not a popover item. The page
+ *      shows ONE list with a divider; hidden rows render under the
+ *      "Hidden" divider at reduced opacity.
+ *   4. Client-side search filters by name + description + tool name.
+ *   5. Description font-weight bumped; tool chips moved behind a hover
+ *      reveal so the visual hierarchy answers "should I pick this?".
+ *      A "{n} sources" chip surfaces document-set count, which used to
+ *      be hidden in expanded mode only.
+ *   6. Bulk select column + action bar (Show / Hide / Remove) appears
+ *      only when something is selected.
+ *   7. Header: single title + 1-line subtitle + Create button top-right,
+ *      "Browse all available" as a text link. Cut the giant tile pair.
+ *   8. Undo toast on reorder / default-change / visibility-toggle.
+ *      Reuses the extended Popup component (`undo` field on PopupSpec).
+ *
+ * Everything is optimistic: local `chosenOrder` state mutates first, the
+ * PATCH runs after, and a failure rolls back + shows an error toast.
+ *
+ * NOTE: this file replaces the old up/down arrow flow entirely; the
+ * `moveAssistantUp` / `moveAssistantDown` helpers in
+ * `lib/assistants/updateAssistantPreferences.ts` are kept for any other
+ * callers but no longer used here.
+ */
+
+import { useMemo, useRef, useState } from "react";
 import { MinimalUserSnapshot, User } from "@/lib/types";
 import { Persona } from "@/app/admin/assistants/interfaces";
-import { Divider, Text } from "@tremor/react";
+import { Text } from "@tremor/react";
 import {
-  FiArrowDown,
-  FiArrowUp,
+  FiBookmark,
   FiEdit2,
-  FiMoreHorizontal,
+  FiEye,
+  FiEyeOff,
   FiPlus,
   FiSearch,
-  FiX,
   FiShare2,
+  FiStar,
+  FiTrash2,
 } from "react-icons/fi";
-import Link from "next/link";
-import { orderAssistantsForUser } from "@/lib/assistants/orderAssistants";
+import { MdDragIndicator } from "react-icons/md";
 import {
-  addAssistantToList,
-  moveAssistantDown,
-  moveAssistantUp,
-  removeAssistantFromList,
-} from "@/lib/assistants/updateAssistantPreferences";
+  DndContext,
+  DragEndEvent,
+  PointerSensor,
+  closestCenter,
+  useSensor,
+  useSensors,
+} from "@dnd-kit/core";
+import { restrictToVerticalAxis } from "@dnd-kit/modifiers";
+import {
+  SortableContext,
+  arrayMove,
+  useSortable,
+  verticalListSortingStrategy,
+} from "@dnd-kit/sortable";
+import { CSS } from "@dnd-kit/utilities";
+import Link from "next/link";
+import { useRouter } from "next/navigation";
+import useSWR from "swr";
+import { errorHandlingFetcher } from "@/lib/fetcher";
 import { AssistantIcon } from "@/components/assistants/AssistantIcon";
-import { DefaultPopover } from "@/components/popover/DefaultPopover";
+import { Bubble } from "@/components/Bubble";
 import { PopupSpec, usePopup } from "@/components/admin/connectors/Popup";
-import { useRouter } from "next/navigation";
-import { NavigationButton } from "../NavigationButton";
-import { AssistantsPageTitle } from "../AssistantsPageTitle";
 import { checkUserOwnsAssistant } from "@/lib/assistants/checkOwnership";
+import {
+  bulkAddToList,
+  bulkRemoveFromList,
+  reorderAssistantList,
+  setDefaultAssistant,
+} from "@/lib/assistants/updateAssistantPreferences";
 import { AssistantSharingModal } from "./AssistantSharingModal";
 import { AssistantSharedStatusDisplay } from "../AssistantSharedStatus";
-import useSWR from "swr";
-import { errorHandlingFetcher } from "@/lib/fetcher";
-import { ToolsDisplay } from "../ToolsDisplay";
+import { AssistantsPageTitle } from "../AssistantsPageTitle";
 
-function AssistantListItem({
-  assistant,
-  user,
-  allAssistantIds,
-  allUsers,
-  isFirst,
-  isLast,
-  isVisible,
-  setPopup,
+// How many doc-set name chips to render before collapsing the rest
+// into a "+N more" pill. Three keeps the row scannable on most widths
+// without losing the most-relevant scope at a glance.
+const MAX_VISIBLE_DOC_SETS = 3;
+
+// ---------------------------------------------------------------------------
+// Small inline switch — avoids pulling in a new component library for one
+// toggle. role="switch" gives screen readers the right semantics.
+// ---------------------------------------------------------------------------
+
+function Toggle({
+  checked,
+  onChange,
+  ariaLabel,
+  highlight = false,
 }: {
+  checked: boolean;
+  onChange: (next: boolean) => void;
+  ariaLabel: string;
+  // When true, draw a transient ring around the switch to direct the
+  // eye. Used by hidden rows so clicking the (faded) row body points
+  // the user at the action that brings the assistant back.
+  highlight?: boolean;
+}) {
+  return (
+    <button
+      type="button"
+      role="switch"
+      aria-checked={checked}
+      aria-label={ariaLabel}
+      onClick={(e) => {
+        e.stopPropagation();
+        onChange(!checked);
+      }}
+      className={`
+        relative inline-flex h-5 w-9 items-center rounded-full
+        transition-all flex-shrink-0
+        focus:outline-none focus:ring-2 focus:ring-accent focus:ring-offset-1
+        ${checked ? "bg-accent" : "bg-border"}
+        ${
+          highlight
+            ? "ring-2 ring-accent ring-offset-2 ring-offset-background-emphasis scale-110"
+            : ""
+        }
+      `}
+    >
+      <span
+        className={`
+          inline-block h-3.5 w-3.5 transform rounded-full bg-white transition-transform
+          ${checked ? "translate-x-[18px]" : "translate-x-[3px]"}
+        `}
+      />
+    </button>
+  );
+}
+
+// ---------------------------------------------------------------------------
+// Single row — used both inside the sortable visible section AND in the
+// hidden section. `isSortable` toggles drag affordances; the rest of the
+// row is identical so the visual stays consistent across the divider.
+// ---------------------------------------------------------------------------
+
+interface RowProps {
   assistant: Persona;
   user: User | null;
-  allUsers: MinimalUserSnapshot[];
-  allAssistantIds: number[];
-  isFirst: boolean;
-  isLast: boolean;
+  isDefault: boolean;
   isVisible: boolean;
-  setPopup: (popupSpec: PopupSpec | null) => void;
-}) {
-  const router = useRouter();
-  const [showSharingModal, setShowSharingModal] = useState(false);
+  isSelected: boolean;
+  onToggleSelect: (id: number) => void;
+  onSetDefault: (id: number) => void;
+  onToggleVisibility: (id: number, makeVisible: boolean) => void;
+  onShareClick: (id: number) => void;
+}
 
-  const currentChosenAssistants = user?.preferences?.chosen_assistants;
+function RowContent({
+  assistant,
+  user,
+  isDefault,
+  isVisible,
+  isSelected,
+  onToggleSelect,
+  onSetDefault,
+  onToggleVisibility,
+  onShareClick,
+  // From useSortable when in sortable context; null otherwise.
+  dragHandleProps,
+}: RowProps & {
+  dragHandleProps:
+    | (React.HTMLAttributes<HTMLButtonElement> & { ref?: any })
+    | null;
+}) {
   const isOwnedByUser = checkUserOwnsAssistant(user, assistant);
+  const canEdit = isOwnedByUser;
+  const canShare = isOwnedByUser && !assistant.is_public;
+
+  // Doc-set names are surfaced as small chips; tools count was removed
+  // intentionally — see the chip JSX below for the why.
+
+  // Click-on-hidden-row affordance: a click anywhere on the row body
+  // (not on an interactive control) draws a transient ring around the
+  // visibility toggle to point at the action. Doesn't auto-enable —
+  // surprising a user reading the description into enabling it would
+  // be worse than the discoverability gap we're fixing.
+  const [highlightToggle, setHighlightToggle] = useState(false);
+  const highlightTimeoutRef = useRef<ReturnType<typeof setTimeout> | null>(
+    null
+  );
+  const flashToggle = () => {
+    if (highlightTimeoutRef.current) {
+      clearTimeout(highlightTimeoutRef.current);
+    }
+    setHighlightToggle(true);
+    highlightTimeoutRef.current = setTimeout(
+      () => setHighlightToggle(false),
+      1200
+    );
+  };
 
   return (
-    <>
-      <AssistantSharingModal
-        assistant={assistant}
-        user={user}
-        allUsers={allUsers}
-        onClose={() => {
-          setShowSharingModal(false);
-          router.refresh();
-        }}
-        show={showSharingModal}
-      />
-      <div
+    <div
+      onClick={isVisible ? undefined : flashToggle}
+      className={`
+        group bg-background-emphasis rounded-lg p-4 mb-3
+        flex items-center gap-3
+        border transition
+        ${isDefault ? "border-accent shadow-md" : "border-transparent shadow-sm"}
+        ${isSelected ? "ring-2 ring-accent" : ""}
+        ${!isVisible ? "cursor-pointer" : ""}
+      `}
+    >
+      {/* Bulk-select checkbox. Hidden until hover or when something is
+          already selected on the page (the parent shows the action bar
+          based on that). Keyboard users always have it via focus. */}
+      <input
+        type="checkbox"
+        aria-label={`Select ${assistant.name}`}
+        checked={isSelected}
+        onChange={() => onToggleSelect(assistant.id)}
         className="
-          bg-background-emphasis
-          rounded-lg
-          shadow-md
-          p-4
-          mb-4
-          flex
-          justify-between
-          items-center
+          h-4 w-4 cursor-pointer
+          opacity-30 group-hover:opacity-100 focus:opacity-100
+          checked:opacity-100
+          transition-opacity
         "
+      />
+
+      {/* Drag handle — only meaningful for visible rows. Hidden rows
+          have no position to drag to. */}
+      {dragHandleProps ? (
+        <button
+          type="button"
+          aria-label={`Drag to reorder ${assistant.name}`}
+          className="
+            cursor-grab active:cursor-grabbing
+            text-subtle hover:text-default
+            opacity-40 group-hover:opacity-100 transition-opacity
+            focus:outline-none focus:ring-2 focus:ring-accent rounded
+            touch-none
+          "
+          {...dragHandleProps}
+        >
+          <MdDragIndicator size={22} />
+        </button>
+      ) : (
+        // Reserve the slot so visible/hidden rows line up vertically.
+        <div className="w-[22px] flex-shrink-0" />
+      )}
+
+      {/* CONTENT ZONE — fades on hidden rows. Action controls below
+          stay at full opacity so they remain the bright, clickable
+          targets on a dimmed row. */}
+      <div
+        className={`flex flex-1 items-center gap-3 min-w-0 ${
+          isVisible ? "" : "opacity-50"
+        }`}
       >
-        <div className="w-3/4">
-          <div className="flex items-center">
-            <AssistantIcon assistant={assistant} />
-            <h2 className="text-xl font-semibold mb-2 my-auto ml-2">
+        <AssistantIcon assistant={assistant} />
+
+        <div className="flex-1 min-w-0">
+          <div className="flex items-center gap-2 mb-1">
+            <h2 className="text-base font-semibold truncate">
               {assistant.name}
             </h2>
+            {isDefault && (
+              <span
+                className="
+                text-xs px-2 py-0.5 rounded-full
+                bg-accent/15 text-accent font-medium
+              "
+              >
+                Default
+              </span>
+            )}
           </div>
-          {assistant.tools.length > 0 && (
-            <ToolsDisplay tools={assistant.tools} />
+
+          {/* Description bumped — used to be text-sm with no weight; now
+            it's the primary signal of what the assistant is for. */}
+          {assistant.description && (
+            <div className="text-sm text-default leading-snug">
+              {assistant.description}
+            </div>
           )}
-          <div className="text-sm mt-2">{assistant.description}</div>
-          <div className="mt-2">
+
+          {/* Sharing status, e.g. "Shared with 3 people". */}
+          <div className="mt-1">
             <AssistantSharedStatusDisplay assistant={assistant} user={user} />
           </div>
+
+          {/* Knowledge-scope chips — show which document sets the
+            assistant points at, not just the count. With many sets,
+            show the first few and a "+N more" with the rest in a
+            tooltip so the row stays scannable. Tools chip was
+            intentionally removed: tool execution isn't reliable yet
+            and surfacing tool counts misleads users into picking an
+            assistant for a capability that may not work in practice. */}
+          {assistant.document_sets && assistant.document_sets.length > 0 && (
+            <div className="flex flex-wrap gap-2 mt-2 text-xs text-subtle">
+              {assistant.document_sets
+                .slice(0, MAX_VISIBLE_DOC_SETS)
+                .map((ds) => (
+                  <Bubble key={ds.id} isSelected={false}>
+                    <div className="flex items-center gap-1 max-w-[220px]">
+                      <FiBookmark size={12} className="flex-shrink-0" />
+                      <span className="truncate" title={ds.name}>
+                        {ds.name}
+                      </span>
+                    </div>
+                  </Bubble>
+                ))}
+              {assistant.document_sets.length > MAX_VISIBLE_DOC_SETS && (
+                <Bubble isSelected={false}>
+                  <span
+                    title={assistant.document_sets
+                      .slice(MAX_VISIBLE_DOC_SETS)
+                      .map((d) => d.name)
+                      .join(", ")}
+                  >
+                    +{assistant.document_sets.length - MAX_VISIBLE_DOC_SETS}{" "}
+                    more
+                  </span>
+                </Bubble>
+              )}
+            </div>
+          )}
         </div>
-        {isOwnedByUser && (
-          <div className="ml-auto flex items-center">
-            {!assistant.is_public && (
-              <div
-                className="mr-4 rounded p-2 cursor-pointer hover:bg-hover"
-                onClick={() => setShowSharingModal(true)}
-              >
-                <FiShare2 size={16} />
-              </div>
-            )}
-            <Link
-              href={`/assistants/edit/${assistant.id}`}
-              className="mr-4 rounded p-2 cursor-pointer hover:bg-hover"
-            >
-              <FiEdit2 size={16} />
-            </Link>
-          </div>
+      </div>
+      {/* End CONTENT ZONE. Actions below sit OUTSIDE the opacity
+          wrapper so they remain at full opacity on hidden rows — the
+          toggle must be the bright, clickable focus when the rest of
+          the row is dimmed. */}
+
+      {/* Right-side actions. Order matters for scannability: default
+          pin first (most-used), visibility toggle, then ownership
+          actions (edit/share). stopPropagation prevents the row-body
+          flash-toggle handler from firing when the user clicks an
+          action directly. */}
+      <div
+        className="flex items-center gap-2 flex-shrink-0"
+        onClick={(e) => e.stopPropagation()}
+      >
+        {/* Pin / default. Only meaningful for visible rows — pinning a
+            hidden one would have to unhide it too; we surface that via
+            the visibility toggle instead. */}
+        {isVisible && (
+          <button
+            type="button"
+            aria-label={
+              isDefault
+                ? `${assistant.name} is your default`
+                : `Set ${assistant.name} as default`
+            }
+            disabled={isDefault}
+            onClick={() => onSetDefault(assistant.id)}
+            className={`
+              p-2 rounded
+              ${
+                isDefault
+                  ? "text-accent cursor-default"
+                  : "text-subtle hover:text-default hover:bg-hover cursor-pointer"
+              }
+              focus:outline-none focus:ring-2 focus:ring-accent
+            `}
+            title={isDefault ? "Default assistant" : "Set as default"}
+          >
+            <FiStar size={16} className={isDefault ? "fill-current" : ""} />
+          </button>
         )}
-        <DefaultPopover
-          content={
-            <div className="hover:bg-hover rounded p-2 cursor-pointer">
-              <FiMoreHorizontal size={16} />
-            </div>
+
+        {/* Visibility — switch instead of a buried popover item.
+            `highlight` is set by the row-body click handler on hidden
+            rows so a click anywhere on the (faded) row body draws the
+            eye to the action that brings the assistant back. */}
+        <Toggle
+          checked={isVisible}
+          onChange={(next) => onToggleVisibility(assistant.id, next)}
+          highlight={highlightToggle}
+          ariaLabel={
+            isVisible
+              ? `Hide ${assistant.name} from the picker`
+              : `Show ${assistant.name} in the picker`
           }
-          side="bottom"
-          align="start"
-          sideOffset={5}
-        >
-          {[
-            ...(!isFirst
-              ? [
-                  <div
-                    key="move-up"
-                    className="flex items-center gap-x-2"
-                    onClick={async () => {
-                      const success = await moveAssistantUp(
-                        assistant.id,
-                        currentChosenAssistants || allAssistantIds
-                      );
-                      if (success) {
-                        setPopup({
-                          message: `"${assistant.name}" has been moved up.`,
-                          type: "success",
-                        });
-                        router.refresh();
-                      } else {
-                        setPopup({
-                          message: `"${assistant.name}" could not be moved up.`,
-                          type: "error",
-                        });
-                      }
-                    }}
-                  >
-                    <FiArrowUp /> Move Up
-                  </div>,
-                ]
-              : []),
-            ...(!isLast
-              ? [
-                  <div
-                    key="move-down"
-                    className="flex items-center gap-x-2"
-                    onClick={async () => {
-                      const success = await moveAssistantDown(
-                        assistant.id,
-                        currentChosenAssistants || allAssistantIds
-                      );
-                      if (success) {
-                        setPopup({
-                          message: `"${assistant.name}" has been moved down.`,
-                          type: "success",
-                        });
-                        router.refresh();
-                      } else {
-                        setPopup({
-                          message: `"${assistant.name}" could not be moved down.`,
-                          type: "error",
-                        });
-                      }
-                    }}
-                  >
-                    <FiArrowDown /> Move Down
-                  </div>,
-                ]
-              : []),
-            isVisible ? (
-              <div
-                key="remove"
-                className="flex items-center gap-x-2"
-                onClick={async () => {
-                  if (
-                    currentChosenAssistants &&
-                    currentChosenAssistants.length === 1
-                  ) {
-                    setPopup({
-                      message: `Cannot remove "${assistant.name}" - you must have at least one assistant.`,
-                      type: "error",
-                    });
-                    return;
-                  }
-
-                  const success = await removeAssistantFromList(
-                    assistant.id,
-                    currentChosenAssistants || allAssistantIds
-                  );
-                  if (success) {
-                    setPopup({
-                      message: `"${assistant.name}" has been removed from your list.`,
-                      type: "success",
-                    });
-                    router.refresh();
-                  } else {
-                    setPopup({
-                      message: `"${assistant.name}" could not be removed from your list.`,
-                      type: "error",
-                    });
-                  }
-                }}
-              >
-                <FiX /> {isOwnedByUser ? "Hide" : "Remove"}
-              </div>
-            ) : (
-              <div
-                key="add"
-                className="flex items-center gap-x-2"
-                onClick={async () => {
-                  const success = await addAssistantToList(
-                    assistant.id,
-                    currentChosenAssistants || allAssistantIds
-                  );
-                  if (success) {
-                    setPopup({
-                      message: `"${assistant.name}" has been added to your list.`,
-                      type: "success",
-                    });
-                    router.refresh();
-                  } else {
-                    setPopup({
-                      message: `"${assistant.name}" could not be added to your list.`,
-                      type: "error",
-                    });
-                  }
-                }}
-              >
-                <FiPlus /> Add
-              </div>
-            ),
-          ]}
-        </DefaultPopover>
+        />
+
+        {canShare && (
+          <button
+            type="button"
+            aria-label="Share assistant"
+            onClick={() => onShareClick(assistant.id)}
+            className="p-2 rounded hover:bg-hover text-subtle hover:text-default"
+            title="Share"
+          >
+            <FiShare2 size={16} />
+          </button>
+        )}
+        {canEdit && (
+          <Link
+            href={`/assistants/edit/${assistant.id}`}
+            aria-label="Edit assistant"
+            className="p-2 rounded hover:bg-hover text-subtle hover:text-default"
+            title="Edit"
+          >
+            <FiEdit2 size={16} />
+          </Link>
+        )}
       </div>
-    </>
+    </div>
+  );
+}
+
+// Sortable row — wraps RowContent and wires up @dnd-kit's transform/listeners.
+function SortableAssistantRow(props: RowProps) {
+  const {
+    attributes,
+    listeners,
+    setNodeRef,
+    setActivatorNodeRef,
+    transform,
+    transition,
+    isDragging,
+  } = useSortable({ id: props.assistant.id });
+
+  const style: React.CSSProperties = {
+    transform: CSS.Transform.toString(transform),
+    transition,
+    opacity: isDragging ? 0.5 : 1,
+  };
+
+  return (
+    <div ref={setNodeRef} style={style}>
+      <RowContent
+        {...props}
+        dragHandleProps={{
+          ref: setActivatorNodeRef,
+          ...attributes,
+          ...listeners,
+        }}
+      />
+    </div>
   );
 }
 
+// Static row — used for the hidden section (no DnD).
+function StaticAssistantRow(props: RowProps) {
+  return <RowContent {...props} dragHandleProps={null} />;
+}
+
+// ---------------------------------------------------------------------------
+// Bulk action bar — appears only when something is selected. The hide/show
+// split mirrors the per-row visibility toggle; "Remove" matches the prior
+// "Hide / Remove" semantic (removes from chosen_assistants regardless of
+// ownership).
+// ---------------------------------------------------------------------------
+
+function BulkActionsBar({
+  selectedCount,
+  onClearSelection,
+  onShow,
+  onHide,
+  onRemove,
+}: {
+  selectedCount: number;
+  onClearSelection: () => void;
+  onShow: () => void;
+  onHide: () => void;
+  onRemove: () => void;
+}) {
+  return (
+    <div
+      className="
+        sticky top-2 z-10 mb-4
+        bg-background-emphasis border border-accent/30 shadow-md rounded-lg
+        flex items-center gap-3 p-3
+      "
+    >
+      <span className="text-sm font-medium">{selectedCount} selected</span>
+      <button
+        type="button"
+        onClick={onShow}
+        className="
+          text-sm px-3 py-1.5 rounded
+          hover:bg-hover flex items-center gap-1.5
+        "
+      >
+        <FiEye size={14} /> Show
+      </button>
+      <button
+        type="button"
+        onClick={onHide}
+        className="
+          text-sm px-3 py-1.5 rounded
+          hover:bg-hover flex items-center gap-1.5
+        "
+      >
+        <FiEyeOff size={14} /> Hide
+      </button>
+      <button
+        type="button"
+        onClick={onRemove}
+        className="
+          text-sm px-3 py-1.5 rounded
+          hover:bg-hover text-error flex items-center gap-1.5
+        "
+      >
+        <FiTrash2 size={14} /> Remove
+      </button>
+      <button
+        type="button"
+        onClick={onClearSelection}
+        className="ml-auto text-sm text-subtle hover:text-default"
+      >
+        Clear
+      </button>
+    </div>
+  );
+}
+
+// ---------------------------------------------------------------------------
+// Main list. State model:
+//   - `chosenOrder`: the user's chosen_assistants array (ordered, visible)
+//   - hidden = every assistant the user has access to that's NOT in chosenOrder
+//   - selected: bulk-action set; orthogonal to visible/hidden
+//   - search: pure client-side filter applied to both groups before render
+//
+// All mutations are optimistic — update local state, fire PATCH; on error
+// roll back and surface a toast. router.refresh() runs on success so the
+// rest of the app (chat picker etc.) sees the new order.
+// ---------------------------------------------------------------------------
+
 interface AssistantsListProps {
   user: User | null;
   assistants: Persona[];
 }
 
 export function AssistantsList({ user, assistants }: AssistantsListProps) {
-  const filteredAssistants = orderAssistantsForUser(assistants, user);
-  const ownedButHiddenAssistants = assistants.filter(
-    (assistant) =>
-      checkUserOwnsAssistant(user, assistant) &&
-      user?.preferences?.chosen_assistants &&
-      !user?.preferences?.chosen_assistants?.includes(assistant.id)
-  );
-  const allAssistantIds = assistants.map((assistant) => assistant.id);
-
+  const router = useRouter();
   const { popup, setPopup } = usePopup();
 
-  const { data: users } = useSWR<MinimalUserSnapshot[]>(
+  // When the user has no preference yet, treat every accessible
+  // assistant as "visible by default" — matches the previous behavior.
+  const initialChosen: number[] =
+    user?.preferences?.chosen_assistants ?? assistants.map((a) => a.id);
+
+  const [chosenOrder, setChosenOrder] = useState<number[]>(initialChosen);
+  const [search, setSearch] = useState("");
+  const [selected, setSelected] = useState<Set<number>>(new Set());
+  const [sharingAssistantId, setSharingAssistantId] = useState<number | null>(
+    null
+  );
+
+  // Pulled from /api/users; used by the share modal. Same pattern as the
+  // pre-rewrite component.
+  const { data: allUsers } = useSWR<MinimalUserSnapshot[]>(
     "/api/users",
     errorHandlingFetcher
   );
 
+  // Derived: id-keyed lookup, visible/hidden splits, search-filtered.
+  const assistantsById = useMemo(
+    () => new Map(assistants.map((a) => [a.id, a])),
+    [assistants]
+  );
+  const chosenSet = useMemo(() => new Set(chosenOrder), [chosenOrder]);
+
+  const visibleAssistants: Persona[] = useMemo(() => {
+    const out: Persona[] = [];
+    for (const id of chosenOrder) {
+      const a = assistantsById.get(id);
+      if (a) out.push(a);
+    }
+    return out;
+  }, [chosenOrder, assistantsById]);
+
+  const hiddenAssistants: Persona[] = useMemo(
+    () => assistants.filter((a) => !chosenSet.has(a.id)),
+    [assistants, chosenSet]
+  );
+
+  const matchesSearch = (a: Persona) => {
+    if (!search.trim()) return true;
+    const q = search.trim().toLowerCase();
+    if (a.name.toLowerCase().includes(q)) return true;
+    if (a.description?.toLowerCase().includes(q)) return true;
+    return (a.tools ?? []).some((t) => t.name.toLowerCase().includes(q));
+  };
+
+  const filteredVisible = visibleAssistants.filter(matchesSearch);
+  const filteredHidden = hiddenAssistants.filter(matchesSearch);
+
+  // The default is just position 0 of chosen_assistants. If the user has
+  // no preference at all, there's no notion of "default yet" — leave it
+  // unset so no row shows the accent until the user picks.
+  const defaultId =
+    user?.preferences?.chosen_assistants && chosenOrder.length > 0
+      ? chosenOrder[0]
+      : null;
+
+  // ---- persistence with optimistic + undo --------------------------------
+
+  const persistOrder = async (
+    nextOrder: number[],
+    {
+      successMsg,
+      undoToOrder,
+    }: { successMsg?: string; undoToOrder?: number[] } = {}
+  ): Promise<boolean> => {
+    const prev = chosenOrder;
+    setChosenOrder(nextOrder);
+    const ok = await reorderAssistantList(nextOrder);
+    if (!ok) {
+      setChosenOrder(prev);
+      setPopup({
+        message: "Couldn't update your assistant list — please try again.",
+        type: "error",
+      });
+      return false;
+    }
+    if (successMsg) {
+      setPopup({
+        message: successMsg,
+        type: "success",
+        undo:
+          undoToOrder !== undefined
+            ? {
+                onClick: async () => {
+                  await persistOrder(undoToOrder);
+                },
+              }
+            : undefined,
+      });
+    }
+    // Refresh the SSR-fetched data so other parts of the app see the
+    // new order (chat picker, sidebar, etc.).
+    router.refresh();
+    return true;
+  };
+
+  // ---- handlers ----------------------------------------------------------
+
+  const handleDragEnd = (event: DragEndEvent) => {
+    const { active, over } = event;
+    if (!over || active.id === over.id) return;
+    const oldIndex = chosenOrder.indexOf(Number(active.id));
+    const newIndex = chosenOrder.indexOf(Number(over.id));
+    if (oldIndex < 0 || newIndex < 0) return;
+    const next = arrayMove(chosenOrder, oldIndex, newIndex);
+    void persistOrder(next, {
+      successMsg: "Order updated.",
+      undoToOrder: chosenOrder,
+    });
+  };
+
+  const handleSetDefault = async (id: number) => {
+    if (chosenOrder[0] === id) return;
+    const prev = chosenOrder;
+    const ok = await persistOrder(
+      [id, ...chosenOrder.filter((x) => x !== id)],
+      {
+        successMsg: `Default assistant updated.`,
+        undoToOrder: prev,
+      }
+    );
+    if (!ok) {
+      // persistOrder already showed the error toast.
+    } else {
+      // setDefaultAssistant also handles the case where id wasn't in
+      // chosen_assistants; persistOrder above already prepended it.
+      void setDefaultAssistant(id, prev); // best-effort idempotent confirmation
+    }
+  };
+
+  const handleToggleVisibility = async (id: number, makeVisible: boolean) => {
+    const prev = chosenOrder;
+    if (makeVisible) {
+      // Add to end so reorder isn't surprising.
+      const next = [...chosenOrder, id];
+      const assistant = assistantsById.get(id);
+      await persistOrder(next, {
+        successMsg: assistant
+          ? `"${assistant.name}" added to your picker.`
+          : "Added to your picker.",
+        undoToOrder: prev,
+      });
+    } else {
+      if (chosenOrder.length === 1 && chosenOrder[0] === id) {
+        setPopup({
+          message:
+            "You need at least one visible assistant — can't hide the last one.",
+          type: "error",
+        });
+        return;
+      }
+      const next = chosenOrder.filter((x) => x !== id);
+      const assistant = assistantsById.get(id);
+      await persistOrder(next, {
+        successMsg: assistant
+          ? `"${assistant.name}" hidden from your picker.`
+          : "Hidden from your picker.",
+        undoToOrder: prev,
+      });
+    }
+  };
+
+  const handleToggleSelect = (id: number) => {
+    setSelected((curr) => {
+      const next = new Set(curr);
+      if (next.has(id)) next.delete(id);
+      else next.add(id);
+      return next;
+    });
+  };
+
+  const clearSelection = () => setSelected(new Set());
+
+  const handleBulkShow = async () => {
+    const ids = Array.from(selected);
+    const prev = chosenOrder;
+    const ok = await bulkAddToList(ids, chosenOrder);
+    if (!ok) {
+      setPopup({ message: "Couldn't show selected.", type: "error" });
+      return;
+    }
+    // Mirror the optimistic update locally — the helper PATCHed the
+    // server; we just need to align local state.
+    const existing = new Set(chosenOrder);
+    const toAppend = ids.filter((id) => !existing.has(id));
+    setChosenOrder([...chosenOrder, ...toAppend]);
+    setPopup({
+      message: `${ids.length} assistant${ids.length === 1 ? "" : "s"} shown.`,
+      type: "success",
+      undo: {
+        onClick: async () => {
+          await persistOrder(prev);
+        },
+      },
+    });
+    clearSelection();
+    router.refresh();
+  };
+
+  const handleBulkHide = async () => {
+    const ids = Array.from(selected);
+    // Don't let the user hide every visible row at once.
+    const remaining = chosenOrder.filter((id) => !ids.includes(id));
+    if (remaining.length === 0 && chosenOrder.length > 0) {
+      setPopup({
+        message: "Can't hide every visible assistant — keep at least one.",
+        type: "error",
+      });
+      return;
+    }
+    const prev = chosenOrder;
+    const ok = await bulkRemoveFromList(ids, chosenOrder);
+    if (!ok) {
+      setPopup({ message: "Couldn't hide selected.", type: "error" });
+      return;
+    }
+    setChosenOrder(remaining);
+    setPopup({
+      message: `${ids.length} assistant${ids.length === 1 ? "" : "s"} hidden.`,
+      type: "success",
+      undo: {
+        onClick: async () => {
+          await persistOrder(prev);
+        },
+      },
+    });
+    clearSelection();
+    router.refresh();
+  };
+
+  // "Remove" is the same backend op as Hide today — both just remove the
+  // ids from chosen_assistants. The label distinction is a UX hint: Hide
+  // is reversible by toggling the switch back on (or Undo); Remove
+  // implies "I don't want to see this any more." Functionally identical
+  // until we have a true "remove access" path.
+  const handleBulkRemove = handleBulkHide;
+
+  // ---- DnD plumbing -------------------------------------------------------
+
+  // 6px activation distance: a click on the handle shouldn't immediately
+  // start a drag. Helps especially for the click-and-then-undo flow.
+  const sensors = useSensors(
+    useSensor(PointerSensor, { activationConstraint: { distance: 6 } })
+  );
+
+  // ---- render -------------------------------------------------------------
+
+  const sharingAssistant =
+    sharingAssistantId != null
+      ? (assistantsById.get(sharingAssistantId) ?? null)
+      : null;
+
   return (
     <>
       {popup}
-      <div className="mx-auto w-searchbar-xs 2xl:w-searchbar-sm 3xl:w-searchbar">
-        <AssistantsPageTitle>My Assistants</AssistantsPageTitle>
-
-        <div className="grid grid-cols-2 gap-4 mt-3">
-          <Link href="/assistants/new">
-            <NavigationButton>
-              <div className="flex justify-center">
-                <FiPlus className="mr-2 my-auto" size={20} />
-                Create New Assistant
-              </div>
-            </NavigationButton>
-          </Link>
 
-          <Link href="/assistants/gallery">
-            <NavigationButton>
-              <div className="flex justify-center">
-                <FiSearch className="mr-2 my-auto" size={20} />
-                View Available Assistants
-              </div>
-            </NavigationButton>
+      {sharingAssistant && (
+        <AssistantSharingModal
+          assistant={sharingAssistant}
+          user={user}
+          allUsers={allUsers ?? []}
+          onClose={() => {
+            setSharingAssistantId(null);
+            router.refresh();
+          }}
+          show
+        />
+      )}
+
+      <div className="mx-auto w-searchbar-xs 2xl:w-searchbar-sm 3xl:w-searchbar pb-12">
+        {/* Header: title + 1-line subtitle + create button + browse link.
+            Cut the two-tile nav block and the explanatory paragraph. */}
+        <div className="flex items-start justify-between gap-4 mb-3">
+          <div className="min-w-0">
+            <AssistantsPageTitle>My Assistants</AssistantsPageTitle>
+            <Text className="text-subtle">
+              Choose which assistants appear in the chat picker, set your
+              default, and reorder by dragging.
+            </Text>
+          </div>
+          <Link
+            href="/assistants/new"
+            className="
+              flex items-center gap-1.5 flex-shrink-0
+              px-4 py-2 rounded-md
+              bg-accent text-inverted font-medium
+              hover:opacity-90 focus:outline-none focus:ring-2 focus:ring-accent
+            "
+          >
+            <FiPlus size={16} /> Create
           </Link>
         </div>
 
-        <p className="mt-6 text-center text-base">
-          Assistants allow you to customize your experience for a specific
-          purpose. Specifically, they combine instructions, extra knowledge, and
-          any combination of tools.
-        </p>
-
-        <Divider />
-
-        <h3 className="text-xl font-bold mb-4">Active Assistants</h3>
-
-        <Text>
-          The order the assistants appear below will be the order they appear in
-          the Assistants dropdown. The first assistant listed will be your
-          default assistant when you start a new chat.
-        </Text>
-
-        <div className="w-full p-4 mt-3">
-          {filteredAssistants.map((assistant, index) => (
-            <AssistantListItem
-              key={assistant.id}
-              assistant={assistant}
-              user={user}
-              allAssistantIds={allAssistantIds}
-              allUsers={users || []}
-              isFirst={index === 0}
-              isLast={index === filteredAssistants.length - 1}
-              isVisible
-              setPopup={setPopup}
-            />
-          ))}
+        <div className="mb-4">
+          <Link
+            href="/assistants/gallery"
+            className="text-sm text-link hover:underline inline-flex items-center gap-1"
+          >
+            <FiSearch size={14} /> Browse all available assistants
+          </Link>
         </div>
 
-        {ownedButHiddenAssistants.length > 0 && (
-          <>
-            <Divider />
-
-            <h3 className="text-xl font-bold mb-4">Your Hidden Assistants</h3>
+        {/* Search */}
+        <div className="relative mb-4">
+          <FiSearch
+            className="absolute left-3 top-1/2 -translate-y-1/2 text-subtle"
+            size={16}
+          />
+          <input
+            type="search"
+            placeholder="Filter by name, description, or tool…"
+            value={search}
+            onChange={(e) => setSearch(e.target.value)}
+            className="
+              w-full pl-10 pr-3 py-2
+              rounded-md border border-border bg-background
+              focus:outline-none focus:ring-2 focus:ring-accent
+            "
+          />
+        </div>
 
-            <Text>
-              Assistants you&apos;ve created that aren&apos;t currently visible
-              in the Assistants selector.
-            </Text>
+        {/* Bulk actions — only when something selected */}
+        {selected.size > 0 && (
+          <BulkActionsBar
+            selectedCount={selected.size}
+            onClearSelection={clearSelection}
+            onShow={handleBulkShow}
+            onHide={handleBulkHide}
+            onRemove={handleBulkRemove}
+          />
+        )}
 
-            <div className="w-full p-4">
-              {ownedButHiddenAssistants.map((assistant, index) => (
-                <AssistantListItem
+        {/* Visible section — draggable */}
+        {filteredVisible.length > 0 ? (
+          <DndContext
+            sensors={sensors}
+            collisionDetection={closestCenter}
+            onDragEnd={handleDragEnd}
+            modifiers={[restrictToVerticalAxis]}
+          >
+            <SortableContext
+              items={filteredVisible.map((a) => a.id)}
+              strategy={verticalListSortingStrategy}
+            >
+              {filteredVisible.map((assistant) => (
+                <SortableAssistantRow
                   key={assistant.id}
                   assistant={assistant}
                   user={user}
-                  allAssistantIds={allAssistantIds}
-                  allUsers={users || []}
-                  isFirst={index === 0}
-                  isLast={index === filteredAssistants.length - 1}
-                  isVisible={false}
-                  setPopup={setPopup}
+                  isDefault={defaultId === assistant.id}
+                  isVisible
+                  isSelected={selected.has(assistant.id)}
+                  onToggleSelect={handleToggleSelect}
+                  onSetDefault={handleSetDefault}
+                  onToggleVisibility={handleToggleVisibility}
+                  onShareClick={setSharingAssistantId}
                 />
               ))}
+            </SortableContext>
+          </DndContext>
+        ) : (
+          <EmptyState
+            title="No visible assistants"
+            body={
+              search
+                ? `Nothing matches "${search}" in your visible list.`
+                : "Toggle one on below to show it in the chat picker."
+            }
+          />
+        )}
+
+        {/* Hidden section — only show divider/header if there's anything */}
+        {filteredHidden.length > 0 && (
+          <>
+            <div className="flex items-center gap-3 my-6">
+              <div className="flex-1 h-px bg-border" />
+              <span className="text-xs uppercase tracking-wide text-subtle font-medium">
+                Hidden ({filteredHidden.length})
+              </span>
+              <div className="flex-1 h-px bg-border" />
             </div>
+            {filteredHidden.map((assistant) => (
+              <StaticAssistantRow
+                key={assistant.id}
+                assistant={assistant}
+                user={user}
+                isDefault={false}
+                isVisible={false}
+                isSelected={selected.has(assistant.id)}
+                onToggleSelect={handleToggleSelect}
+                onSetDefault={handleSetDefault}
+                onToggleVisibility={handleToggleVisibility}
+                onShareClick={setSharingAssistantId}
+              />
+            ))}
           </>
         )}
+
+        {/* Search produced nothing at all */}
+        {filteredVisible.length === 0 && filteredHidden.length === 0 && (
+          <EmptyState
+            title="No assistants match"
+            body={`Try a different filter — "${search}" matched nothing.`}
+          />
+        )}
       </div>
     </>
   );
 }
+
+function EmptyState({ title, body }: { title: string; body: string }) {
+  return (
+    <div className="text-center py-8 text-subtle">
+      <p className="font-medium text-default">{title}</p>
+      <p className="text-sm mt-1">{body}</p>
+    </div>
+  );
+}
diff --git a/web/src/app/chat/ChatPage.tsx b/web/src/app/chat/ChatPage.tsx
index c9deff70cbb..dd0d34cb341 100644
--- a/web/src/app/chat/ChatPage.tsx
+++ b/web/src/app/chat/ChatPage.tsx
@@ -1018,19 +1018,35 @@ export function ChatPage({
       return;
     }
 
+    // Client-side byte pre-check, reading the SAME limit the backend enforces
+    // (CHAT_FILE_MAX_SIZE_MB, surfaced via settings; falls back to 25 if
+    // absent). The backend is still authoritative — a doc can additionally be
+    // rejected on the token gate after extraction.
+    const MAX_FILE_SIZE_MB = settings?.settings?.chat_file_max_size_mb ?? 25;
+    const tooLarge = acceptedFiles.find(
+      (file) => file.size > MAX_FILE_SIZE_MB * 1024 * 1024
+    );
+    if (tooLarge) {
+      setPopup({
+        type: "error",
+        message: `"${tooLarge.name}" is too large (max ${MAX_FILE_SIZE_MB}MB).`,
+      });
+      return;
+    }
+
     const tempFileDescriptors = acceptedFiles.map((file) => ({
       id: uuidv4(),
       type: file.type.startsWith("image/")
         ? ChatFileType.IMAGE
         : ChatFileType.DOCUMENT,
       isUploading: true,
+      progress: 0,
     }));
 
-    // only show loading spinner for reasonably large files
-    const totalSize = acceptedFiles.reduce((sum, file) => sum + file.size, 0);
-    if (totalSize > 50 * 1024) {
-      setCurrentMessageFiles((prev) => [...prev, ...tempFileDescriptors]);
-    }
+    // Always show the previews (with a progress bar) so the user can see the
+    // upload is actually happening — and so the send button stays gated until
+    // it finishes (see ChatInputBar's anyFilesUploading).
+    setCurrentMessageFiles((prev) => [...prev, ...tempFileDescriptors]);
 
     const removeTempFiles = (prev: FileDescriptor[]) => {
       return prev.filter(
@@ -1038,7 +1054,18 @@ export function ChatPage({
       );
     };
 
-    uploadFilesForChat(acceptedFiles).then(([files, error]) => {
+    // Per-file upload progress → update the matching temp descriptor.
+    const updateProgress = (index: number, percent: number) => {
+      const tempId = tempFileDescriptors[index]?.id;
+      if (!tempId) return;
+      setCurrentMessageFiles((prev) =>
+        prev.map((file) =>
+          file.id === tempId ? { ...file, progress: percent } : file
+        )
+      );
+    };
+
+    uploadFilesForChat(acceptedFiles, updateProgress).then(([files, error]) => {
       if (error) {
         setCurrentMessageFiles((prev) => removeTempFiles(prev));
         setPopup({
diff --git a/web/src/app/chat/files/InputBarPreview.tsx b/web/src/app/chat/files/InputBarPreview.tsx
index 8eee7bbf9cd..57512c4f612 100644
--- a/web/src/app/chat/files/InputBarPreview.tsx
+++ b/web/src/app/chat/files/InputBarPreview.tsx
@@ -56,6 +56,8 @@ export function InputBarPreview({
             absolute
             inset-0
             flex
+            flex-col
+            gap-1
             items-center
             justify-center
             bg-black
@@ -65,6 +67,11 @@ export function InputBarPreview({
           "
         >
           <FiLoader className="animate-spin text-white" />
+          {file.progress != null && (
+            <span className="text-white text-xs font-medium">
+              {file.progress}%
+            </span>
+          )}
         </div>
       )}
       {renderContent()}
diff --git a/web/src/app/chat/folders/FolderManagement.tsx b/web/src/app/chat/folders/FolderManagement.tsx
index 1dd87ccd99f..6a147c30311 100644
--- a/web/src/app/chat/folders/FolderManagement.tsx
+++ b/web/src/app/chat/folders/FolderManagement.tsx
@@ -12,8 +12,12 @@ export async function createFolder(folderName: string): Promise<number> {
   if (!response.ok) {
     throw new Error("Failed to create folder");
   }
-  const data = await response.json();
-  return data.folder_id;
+  // The backend endpoint (POST /folder) returns the new folder id as a
+  // bare integer, not an object — so parse it directly. (`data.folder_id`
+  // was always undefined; harmless until the create handler started using
+  // the returned id for optimistic insertion.)
+  const folderId = await response.json();
+  return folderId;
 }
 
 // Function to add a chat session to a folder
diff --git a/web/src/app/chat/input/ChatInputBar.tsx b/web/src/app/chat/input/ChatInputBar.tsx
index 3d5ff9dd968..3e5763bcbec 100644
--- a/web/src/app/chat/input/ChatInputBar.tsx
+++ b/web/src/app/chat/input/ChatInputBar.tsx
@@ -79,6 +79,13 @@ export function ChatInputBar({
     }
   }, [message]);
 
+  // Block sending while any attached file is still uploading — otherwise the
+  // message references a file_id whose file_store row doesn't exist yet, and
+  // the backend errors ("File by name ... does not exist"). Send re-enables
+  // automatically once the upload(s) finish (the per-file spinner clears).
+  const anyFilesUploading = files.some((file) => file.isUploading);
+  const canSubmit = !!message && !isStreaming && !anyFilesUploading;
+
   const handlePaste = (event: React.ClipboardEvent) => {
     const items = event.clipboardData?.items;
     if (items) {
@@ -217,7 +224,9 @@ export function ChatInputBar({
                 {filteredPersonas.map((currentPersona, index) => (
                   <button
                     key={index}
-                    className={`px-2 ${assistantIconIndex == index && "bg-hover"} rounded content-start flex gap-x-1 py-1.5 w-full  hover:bg-hover cursor-pointer`}
+                    className={`px-2 ${
+                      assistantIconIndex == index && "bg-hover"
+                    } rounded content-start flex gap-x-1 py-1.5 w-full  hover:bg-hover cursor-pointer`}
                     onClick={() => {
                       updateCurrentPersona(currentPersona);
                     }}
@@ -233,7 +242,9 @@ export function ChatInputBar({
                 <a
                   key={filteredPersonas.length}
                   target="_blank"
-                  className={`${assistantIconIndex == filteredPersonas.length && "bg-hover"} px-3 flex gap-x-1 py-2 w-full  items-center  hover:bg-hover-light cursor-pointer"`}
+                  className={`${
+                    assistantIconIndex == filteredPersonas.length && "bg-hover"
+                  } px-3 flex gap-x-1 py-2 w-full  items-center  hover:bg-hover-light cursor-pointer"`}
                   href="/assistants/new"
                 >
                   <FiPlus size={17} />
@@ -354,13 +365,16 @@ export function ChatInputBar({
               placeholder="Send a message..."
               value={message}
               onKeyDown={(event) => {
-                if (
+                if (event.key === "Enter" && !event.shiftKey && canSubmit) {
+                  onSubmit();
+                  event.preventDefault();
+                } else if (
                   event.key === "Enter" &&
                   !event.shiftKey &&
-                  message &&
-                  !isStreaming
+                  anyFilesUploading
                 ) {
-                  onSubmit();
+                  // Swallow the Enter so a half-typed message isn't sent
+                  // against a not-yet-uploaded file.
                   event.preventDefault();
                 }
               }}
@@ -420,10 +434,19 @@ export function ChatInputBar({
             </div>
             <div className="absolute bottom-2.5 right-10">
               <div
-                className="cursor-pointer"
+                className={
+                  anyFilesUploading && !isStreaming
+                    ? "cursor-not-allowed"
+                    : "cursor-pointer"
+                }
+                title={
+                  anyFilesUploading
+                    ? "Waiting for file upload to finish…"
+                    : undefined
+                }
                 onClick={() => {
                   if (!isStreaming) {
-                    if (message) {
+                    if (canSubmit) {
                       onSubmit();
                     }
                   } else {
@@ -434,8 +457,8 @@ export function ChatInputBar({
                 <FiSend
                   size={18}
                   className={`text-emphasis w-9 h-9 p-2 rounded-lg ${
-                    message ? "bg-blue-200" : ""
-                  }`}
+                    anyFilesUploading && !isStreaming ? "opacity-40 " : ""
+                  }${message ? "bg-blue-200" : ""}`}
                 />
               </div>
             </div>
diff --git a/web/src/app/chat/interfaces.ts b/web/src/app/chat/interfaces.ts
index 902f5b86553..ae3a7dd3e37 100644
--- a/web/src/app/chat/interfaces.ts
+++ b/web/src/app/chat/interfaces.ts
@@ -32,6 +32,8 @@ export interface FileDescriptor {
   name?: string | null;
   // FE only
   isUploading?: boolean;
+  // FE only — upload progress 0-100 while isUploading
+  progress?: number;
 }
 
 export interface ToolCallMetadata {
diff --git a/web/src/app/chat/lib.tsx b/web/src/app/chat/lib.tsx
index 70d1fc59b84..606b206f37b 100644
--- a/web/src/app/chat/lib.tsx
+++ b/web/src/app/chat/lib.tsx
@@ -569,24 +569,118 @@ export function buildChatUrl(
   return "/chat";
 }
 
-export async function uploadFilesForChat(
-  files: File[]
+// PUT one file straight to Azure Blob via a SAS URL, reporting progress.
+function putToBlobWithProgress(
+  url: string,
+  file: File,
+  onProgress: (percent: number) => void
+): Promise<void> {
+  return new Promise((resolve, reject) => {
+    const xhr = new XMLHttpRequest();
+    xhr.open("PUT", url);
+    xhr.setRequestHeader("x-ms-blob-type", "BlockBlob");
+    if (file.type) xhr.setRequestHeader("Content-Type", file.type);
+    xhr.upload.onprogress = (e) => {
+      if (e.lengthComputable)
+        onProgress(Math.round((e.loaded / e.total) * 100));
+    };
+    xhr.onload = () =>
+      xhr.status >= 200 && xhr.status < 300
+        ? resolve()
+        : reject(new Error(`Blob upload failed (${xhr.status})`));
+    xhr.onerror = () => reject(new Error("network error during upload"));
+    xhr.send(file);
+  });
+}
+
+// Fallback: two-hop upload through the api-server (Postgres file store, or
+// when direct upload is unavailable). XHR so we still get a progress bar.
+function uploadViaServer(
+  files: File[],
+  onProgress?: (index: number, percent: number) => void
 ): Promise<[FileDescriptor[], string | null]> {
-  const formData = new FormData();
-  files.forEach((file) => {
-    formData.append("files", file);
+  return new Promise((resolve) => {
+    const formData = new FormData();
+    files.forEach((file) => formData.append("files", file));
+    const xhr = new XMLHttpRequest();
+    xhr.open("POST", "/api/chat/file");
+    xhr.upload.onprogress = (e) => {
+      if (e.lengthComputable) {
+        const percent = Math.round((e.loaded / e.total) * 100);
+        files.forEach((_, i) => onProgress?.(i, percent));
+      }
+    };
+    xhr.onload = () =>
+      xhr.status >= 200 && xhr.status < 300
+        ? resolve([
+            JSON.parse(xhr.responseText).files as FileDescriptor[],
+            null,
+          ])
+        : resolve([[], `Failed to upload files (${xhr.status})`]);
+    xhr.onerror = () => resolve([[], "network error during upload"]);
+    xhr.send(formData);
   });
+}
 
-  const response = await fetch("/api/chat/file", {
+export async function uploadFilesForChat(
+  files: File[],
+  onProgress?: (index: number, percent: number) => void
+): Promise<[FileDescriptor[], string | null]> {
+  // 1. Ask the server for direct-to-Blob upload URLs (Azure backend only).
+  const urlResp = await fetch("/api/chat/file/upload-url", {
     method: "POST",
-    body: formData,
+    headers: { "Content-Type": "application/json" },
+    body: JSON.stringify({
+      files: files.map((f) => ({
+        name: f.name,
+        content_type: f.type || null,
+        size: f.size,
+      })),
+    }),
   });
-  if (!response.ok) {
-    return [[], `Failed to upload files - ${(await response.json()).detail}`];
+  if (!urlResp.ok) {
+    return [[], `Failed to start upload - ${(await urlResp.json()).detail}`];
+  }
+  const urlJson = await urlResp.json();
+
+  // 2a. Not Azure → fall back to the two-hop server upload.
+  if (!urlJson.direct_upload) {
+    return uploadViaServer(files, onProgress);
+  }
+
+  // 2b. Azure → PUT each file directly to Blob (bypasses the server).
+  const items: { file_id: string; upload_url: string }[] = urlJson.files;
+  try {
+    await Promise.all(
+      items.map((item, i) =>
+        putToBlobWithProgress(item.upload_url, files[i], (p) =>
+          onProgress?.(i, p)
+        )
+      )
+    );
+  } catch (e) {
+    return [[], `Failed to upload files - ${e}`];
   }
-  const responseJson = await response.json();
 
-  return [responseJson.files as FileDescriptor[], null];
+  // 3. Confirm so the server records metadata (+ extracts doc text).
+  const confirmResp = await fetch("/api/chat/file/confirm", {
+    method: "POST",
+    headers: { "Content-Type": "application/json" },
+    body: JSON.stringify({
+      files: items.map((item, i) => ({
+        file_id: item.file_id,
+        name: files[i].name,
+        content_type: files[i].type || null,
+      })),
+    }),
+  });
+  if (!confirmResp.ok) {
+    return [
+      [],
+      `Failed to finalize upload - ${(await confirmResp.json()).detail}`,
+    ];
+  }
+  return [(await confirmResp.json()).files as FileDescriptor[], null];
 }
 
 export function useScrollonStream({
diff --git a/web/src/app/chat/sessionSidebar/ChatSessionDisplay.tsx b/web/src/app/chat/sessionSidebar/ChatSessionDisplay.tsx
index 782b35a8c66..20c76145fa7 100644
--- a/web/src/app/chat/sessionSidebar/ChatSessionDisplay.tsx
+++ b/web/src/app/chat/sessionSidebar/ChatSessionDisplay.tsx
@@ -95,6 +95,14 @@ export function ChatSessionDisplay({
         scroll={false}
         draggable="true"
         onDragStart={(event) => {
+          // This row is a <Link> (an <a href>), so the browser treats the
+          // drag as a *link* drag and auto-attaches the URL (text/uri-list).
+          // That's what makes some browsers (Arc/Edge/Safari) offer "open in
+          // split view" when you drag toward the edge. Clear that default
+          // link payload and mark this as a move so only our folder DnD
+          // applies.
+          event.dataTransfer.clearData();
+          event.dataTransfer.effectAllowed = "move";
           event.dataTransfer.setData(
             CHAT_SESSION_ID_KEY,
             chatSession.id.toString()
@@ -103,6 +111,36 @@ export function ChatSessionDisplay({
             FOLDER_ID_KEY,
             chatSession.folder_id?.toString() || ""
           );
+
+          // Replace the browser's default drag image (a translucent clone
+          // of this full-width row, which trails awkwardly across the
+          // sidebar) with a compact chip showing the chat name. Built
+          // off-screen, snapshotted by setDragImage, then removed.
+          const chip = document.createElement("div");
+          chip.textContent = chatName || `Chat ${chatSession.id}`;
+          Object.assign(chip.style, {
+            position: "fixed",
+            top: "-1000px",
+            left: "-1000px",
+            maxWidth: "200px",
+            overflow: "hidden",
+            whiteSpace: "nowrap",
+            textOverflow: "ellipsis",
+            padding: "4px 10px",
+            borderRadius: "6px",
+            fontSize: "12px",
+            fontWeight: "500",
+            color: "#fff",
+            background: "rgba(30, 30, 30, 0.92)",
+            boxShadow: "0 2px 6px rgba(0, 0, 0, 0.25)",
+            pointerEvents: "none",
+          });
+          document.body.appendChild(chip);
+          event.dataTransfer.setDragImage(chip, 12, 12);
+          // Remove once the browser has snapshotted it for the drag.
+          setTimeout(() => {
+            if (chip.parentNode) chip.parentNode.removeChild(chip);
+          }, 0);
         }}
       >
         <BasicSelectable fullWidth selected={isSelected}>
diff --git a/web/src/app/chat/sessionSidebar/ChatSidebar.tsx b/web/src/app/chat/sessionSidebar/ChatSidebar.tsx
index c145f7b4b8a..b6e094f6393 100644
--- a/web/src/app/chat/sessionSidebar/ChatSidebar.tsx
+++ b/web/src/app/chat/sessionSidebar/ChatSidebar.tsx
@@ -1,7 +1,13 @@
 "use client";
 
-import { FiBook, FiEdit, FiFolderPlus, FiPlusSquare } from "react-icons/fi";
-import { useContext, useEffect, useRef, useState } from "react";
+import {
+  FiBook,
+  FiEdit,
+  FiFolderPlus,
+  FiLoader,
+  FiPlusSquare,
+} from "react-icons/fi";
+import { useContext, useEffect, useRef, useState, useTransition } from "react";
 import Link from "next/link";
 import Image from "next/image";
 import { useRouter } from "next/navigation";
@@ -38,6 +44,13 @@ export const ChatSidebar = ({
   const router = useRouter();
   const { popup, setPopup } = usePopup();
 
+  // Navigating to "Manage Assistants" awaits the heavy fetchChatData
+  // bundle server-side. useTransition keeps the *current* page (with this
+  // sidebar) mounted and visible throughout — so it reads as an in-app
+  // transition, not a blank reload — while isPending drives an inline
+  // spinner on the button so the click clearly registers.
+  const [isNavigatingAssistants, startAssistantsNav] = useTransition();
+
   const currentChatId = currentChatSession?.id;
 
   // prevent the NextJS Router cache from causing the chat sidebar to not
@@ -46,6 +59,15 @@ export const ChatSidebar = ({
     router.refresh();
   }, [currentChatId]);
 
+  // Local mirror of the server-provided folders so we can show a newly
+  // created folder instantly, without a full `router.refresh()` (which
+  // re-runs the entire heavy fetchChatData bundle just to add one empty
+  // folder). Re-synced whenever the server prop changes.
+  const [localFolders, setLocalFolders] = useState<Folder[]>(folders);
+  useEffect(() => {
+    setLocalFolders(folders);
+  }, [folders]);
+
   const combinedSettings = useContext(SettingsContext);
   if (!combinedSettings) {
     return null;
@@ -118,8 +140,22 @@ export const ChatSidebar = ({
               onClick={() =>
                 createFolder("New Folder")
                   .then((folderId) => {
-                    console.log(`Folder created with ID: ${folderId}`);
-                    router.refresh();
+                    // Append the new (empty) folder to local state instead
+                    // of router.refresh() — instant, no full refetch. The
+                    // create POST itself is a single fast INSERT.
+                    setLocalFolders((prev) => [
+                      ...prev,
+                      {
+                        folder_id: folderId,
+                        folder_name: "New Folder",
+                        display_priority:
+                          prev.reduce(
+                            (max, f) => Math.max(max, f.display_priority),
+                            -1
+                          ) + 1,
+                        chat_sessions: [],
+                      },
+                    ]);
                   })
                   .catch((error) => {
                     console.error("Failed to create folder:", error);
@@ -137,20 +173,30 @@ export const ChatSidebar = ({
           </div>
         </div>
 
-        <Link href="/assistants/mine" className="mt-3 mb-1 mx-3">
-          <BasicClickable fullWidth>
+        <div className="mt-3 mb-1 mx-3">
+          <BasicClickable
+            fullWidth
+            onClick={() =>
+              startAssistantsNav(() => router.push("/assistants/mine"))
+            }
+          >
             <div className="flex items-center text-default font-medium">
-              <FaBrain className="ml-1 mr-2" /> Manage Assistants
+              {isNavigatingAssistants ? (
+                <FiLoader className="ml-1 mr-2 animate-spin" />
+              ) : (
+                <FaBrain className="ml-1 mr-2" />
+              )}
+              {isNavigatingAssistants ? "Loading…" : "Manage Assistants"}
             </div>
           </BasicClickable>
-        </Link>
+        </div>
 
         <div className="border-b border-border pb-4 mx-3" />
 
         <ChatTab
           existingChats={existingChats}
           currentChatId={currentChatId}
-          folders={folders}
+          folders={localFolders}
           openedFolders={openedFolders}
         />
       </div>
diff --git a/web/src/components/admin/connectors/Popup.tsx b/web/src/components/admin/connectors/Popup.tsx
index adfc0665c25..2bcc73b7d5b 100644
--- a/web/src/components/admin/connectors/Popup.tsx
+++ b/web/src/components/admin/connectors/Popup.tsx
@@ -3,15 +3,59 @@ import { useRef, useState } from "react";
 export interface PopupSpec {
   message: string;
   type: "success" | "error";
+  // Optional undo affordance. When present, the popup renders an "Undo"
+  // button next to the message; clicking it invokes onClick and dismisses
+  // the popup. The popup also stays on screen longer when undoable
+  // (default 4s → 6s) so the user has time to react.
+  undo?: {
+    label?: string; // defaults to "Undo"
+    onClick: () => Promise<void> | void;
+  };
 }
 
-export const Popup: React.FC<PopupSpec> = ({ message, type }) => (
+export const Popup: React.FC<
+  PopupSpec & { onUndo?: () => void; onDismiss?: () => void }
+> = ({ message, type, undo, onUndo, onDismiss }) => (
   <div
-    className={`fixed bottom-4 left-4 p-4 rounded-md shadow-lg text-white z-[100] ${
+    className={`fixed bottom-4 left-4 p-4 rounded-md shadow-lg text-white z-[100] flex items-center gap-3 ${
       type === "success" ? "bg-green-500" : "bg-error"
     }`}
   >
-    {message}
+    <span>{message}</span>
+    {undo && (
+      <button
+        type="button"
+        className="
+          ml-2 px-3 py-1 rounded
+          bg-white/20 hover:bg-white/30
+          text-white text-sm font-medium
+          focus:outline-none focus:ring-2 focus:ring-white/50
+        "
+        onClick={async () => {
+          // Run the user's undo handler. Errors are swallowed at this
+          // boundary — the popup will dismiss either way, and the page
+          // will re-render to show whatever state actually persisted.
+          try {
+            await undo.onClick();
+          } catch {
+            /* noop */
+          }
+          onUndo?.();
+        }}
+      >
+        {undo.label ?? "Undo"}
+      </button>
+    )}
+    {onDismiss && (
+      <button
+        type="button"
+        aria-label="Dismiss"
+        className="ml-1 text-white/80 hover:text-white text-lg leading-none"
+        onClick={onDismiss}
+      >
+        ×
+      </button>
+    )}
   </div>
 );
 
@@ -27,13 +71,24 @@ export const usePopup = () => {
     }
 
     setPopup(popupSpec);
-    timeoutRef.current = setTimeout(() => {
-      setPopup(null);
-    }, 4000);
+    if (popupSpec) {
+      // Undoable popups stay on screen a bit longer — users need time to
+      // notice the affordance and click it. 6s vs 4s for plain toasts.
+      const ms = popupSpec.undo ? 6000 : 4000;
+      timeoutRef.current = setTimeout(() => {
+        setPopup(null);
+      }, ms);
+    }
   };
 
   return {
-    popup: popup && <Popup {...popup} />,
+    popup: popup && (
+      <Popup
+        {...popup}
+        onUndo={() => setPopupWithExpiration(null)}
+        onDismiss={() => setPopupWithExpiration(null)}
+      />
+    ),
     setPopup: setPopupWithExpiration,
   };
 };
diff --git a/web/src/lib/assistants/updateAssistantPreferences.ts b/web/src/lib/assistants/updateAssistantPreferences.ts
index f902e561cf4..05b0b3e5302 100644
--- a/web/src/lib/assistants/updateAssistantPreferences.ts
+++ b/web/src/lib/assistants/updateAssistantPreferences.ts
@@ -1,3 +1,8 @@
+// PATCH the user's full `chosen_assistants` array. This single endpoint
+// drives every preference mutation below — visibility, ordering, default
+// — because the backend treats the array as both "which assistants are
+// visible in the picker" (membership) AND "in what order" (positions),
+// with position 0 = default.
 async function updateUserAssistantList(
   chosenAssistants: number[]
 ): Promise<boolean> {
@@ -60,3 +65,51 @@ export async function moveAssistantDown(
   }
   return false;
 }
+
+// ---------------------------------------------------------------------------
+// Used by the new Manage Assistants UX
+// ---------------------------------------------------------------------------
+
+/** Replace the user's full chosen_assistants list (drag-reorder, bulk ops). */
+export async function reorderAssistantList(
+  newOrder: number[]
+): Promise<boolean> {
+  return updateUserAssistantList(newOrder);
+}
+
+/**
+ * Move `assistantId` to position 0 so it becomes the user's default. If the
+ * id isn't in the list it's prepended (i.e. set-as-default also unhides it).
+ */
+export async function setDefaultAssistant(
+  assistantId: number,
+  chosenAssistants: number[]
+): Promise<boolean> {
+  const withoutTarget = chosenAssistants.filter((id) => id !== assistantId);
+  return updateUserAssistantList([assistantId, ...withoutTarget]);
+}
+
+/** Bulk: hide a set of assistant ids (remove them from chosen_assistants). */
+export async function bulkRemoveFromList(
+  assistantIds: number[],
+  chosenAssistants: number[]
+): Promise<boolean> {
+  const toRemove = new Set(assistantIds);
+  return updateUserAssistantList(
+    chosenAssistants.filter((id) => !toRemove.has(id))
+  );
+}
+
+/** Bulk: add a set of assistant ids to the visible list (appended at the end). */
+export async function bulkAddToList(
+  assistantIds: number[],
+  chosenAssistants: number[]
+): Promise<boolean> {
+  const existing = new Set(chosenAssistants);
+  const toAppend = assistantIds.filter((id) => !existing.has(id));
+  if (toAppend.length === 0) {
+    // Nothing to do, but report success so the UI can clear its selection.
+    return true;
+  }
+  return updateUserAssistantList([...chosenAssistants, ...toAppend]);
+}