From 6c8b77a7e7f41a2c4a05a075423102722f5e7995 Mon Sep 17 00:00:00 2001 From: Elliott de Launay Date: Fri, 15 May 2026 12:17:57 -0400 Subject: [PATCH 01/17] Merge upstream/main (Roo Code sunset) into Zoo Code --- .github/ISSUE_TEMPLATE/feature_request.yml | 2 +- .github/workflows/evals.yml | 74 - .github/workflows/update-contributors.yml | 67 - CHANGELOG.md | 10 - ...session-id-resume-loads-correct-session.ts | 4 +- .../scripts/integration/lib/stream-harness.ts | 2 +- .../__tests__/run-provider-resolution.test.ts | 50 - apps/cli/src/commands/cli/list.ts | 9 +- apps/cli/src/commands/cli/run.ts | 66 +- apps/cli/src/commands/index.ts | 2 +- apps/cli/src/index.ts | 4 +- .../lib/storage/__tests__/settings.test.ts | 28 +- apps/cli/src/lib/storage/index.ts | 2 +- apps/cli/src/lib/storage/settings.ts | 53 +- apps/cli/src/types/constants.ts | 6 - apps/cli/src/types/types.ts | 10 +- apps/web-evals/.env | 1 - apps/web-evals/.gitignore | 8 - apps/web-evals/CHANGELOG.md | 3 - apps/web-evals/components.json | 21 - apps/web-evals/eslint.config.mjs | 17 - apps/web-evals/next-env.d.ts | 6 - apps/web-evals/next.config.ts | 7 - apps/web-evals/package.json | 63 - apps/web-evals/postcss.config.mjs | 5 - apps/web-evals/public/.gitkeep | 0 apps/web-evals/scripts/check-services.sh | 20 - .../src/actions/__tests__/killRun.spec.ts | 207 --- apps/web-evals/src/actions/exercises.ts | 22 - apps/web-evals/src/actions/heartbeat.ts | 8 - apps/web-evals/src/actions/runners.ts | 8 - apps/web-evals/src/actions/runs.ts | 377 ---- apps/web-evals/src/actions/tasks.ts | 11 - .../app/api/runs/[id]/logs/[taskId]/route.ts | 74 - .../app/api/runs/[id]/logs/failed/route.ts | 147 -- .../src/app/api/runs/[id]/stream/route.ts | 71 - apps/web-evals/src/app/favicon.ico | Bin 25931 -> 0 bytes apps/web-evals/src/app/globals.css | 141 -- apps/web-evals/src/app/layout.tsx | 35 - apps/web-evals/src/app/page.tsx | 10 - apps/web-evals/src/app/runs/[id]/page.tsx | 14 - .../src/app/runs/[id]/run-status.tsx | 79 - apps/web-evals/src/app/runs/[id]/run.tsx | 1058 ----------- .../src/app/runs/[id]/task-status.tsx | 20 - apps/web-evals/src/app/runs/new/new-run.tsx | 992 ----------- apps/web-evals/src/app/runs/new/page.tsx | 9 - .../src/app/runs/new/settings-diff.tsx | 58 - apps/web-evals/src/components/home/run.tsx | 433 ----- apps/web-evals/src/components/home/runs.tsx | 1024 ----------- .../src/components/layout/header.tsx | 7 - apps/web-evals/src/components/layout/logo.tsx | 54 - .../src/components/providers/index.ts | 2 - .../providers/react-query-provider.tsx | 8 - .../components/providers/theme-provider.tsx | 13 - .../src/components/ui/alert-dialog.tsx | 113 -- apps/web-evals/src/components/ui/badge.tsx | 36 - apps/web-evals/src/components/ui/button.tsx | 51 - apps/web-evals/src/components/ui/checkbox.tsx | 27 - apps/web-evals/src/components/ui/command.tsx | 134 -- apps/web-evals/src/components/ui/dialog.tsx | 110 -- apps/web-evals/src/components/ui/drawer.tsx | 98 -- .../src/components/ui/dropdown-menu.tsx | 171 -- apps/web-evals/src/components/ui/form.tsx | 138 -- apps/web-evals/src/components/ui/index.ts | 22 - apps/web-evals/src/components/ui/input.tsx | 22 - apps/web-evals/src/components/ui/label.tsx | 21 - .../src/components/ui/multi-select.tsx | 300 ---- apps/web-evals/src/components/ui/popover.tsx | 42 - .../src/components/ui/scroll-area.tsx | 51 - apps/web-evals/src/components/ui/select.tsx | 156 -- .../web-evals/src/components/ui/separator.tsx | 28 - apps/web-evals/src/components/ui/slider.tsx | 56 - apps/web-evals/src/components/ui/sonner.tsx | 25 - apps/web-evals/src/components/ui/table.tsx | 75 - apps/web-evals/src/components/ui/tabs.tsx | 122 -- apps/web-evals/src/components/ui/textarea.tsx | 19 - apps/web-evals/src/components/ui/tooltip.tsx | 47 - apps/web-evals/src/hooks/use-copy-run.ts | 28 - apps/web-evals/src/hooks/use-event-source.ts | 101 -- .../src/hooks/use-fuzzy-model-search.ts | 37 - .../src/hooks/use-open-router-models.ts | 38 - apps/web-evals/src/hooks/use-run-status.ts | 110 -- .../src/lib/__tests__/formatters.spec.ts | 30 - .../__tests__/normalize-create-run.spec.ts | 65 - apps/web-evals/src/lib/actions.ts | 19 - apps/web-evals/src/lib/formatters.ts | 59 - .../web-evals/src/lib/normalize-create-run.ts | 20 - apps/web-evals/src/lib/schemas.ts | 46 - .../lib/server/__tests__/sse-stream.spec.ts | 111 -- apps/web-evals/src/lib/server/redis.ts | 13 - apps/web-evals/src/lib/server/sse-stream.ts | 59 - apps/web-evals/src/lib/utils.ts | 6 - apps/web-evals/tsconfig.json | 10 - apps/web-evals/turbo.json | 10 - apps/web-evals/vitest.config.ts | 8 - apps/web-roo-code/.env.example | 11 - apps/web-roo-code/next-sitemap.config.cjs | 11 - apps/web-roo-code/next.config.ts | 57 +- apps/web-roo-code/package.json | 4 +- apps/web-roo-code/src/actions/evals.ts | 29 - .../web-roo-code/src/app/blog/[slug]/page.tsx | 3 - apps/web-roo-code/src/app/blog/page.tsx | 3 - .../src/app/blog/page/[page]/page.tsx | 3 - apps/web-roo-code/src/app/cloud/page.tsx | 296 ---- apps/web-roo-code/src/app/cloud/team/page.tsx | 297 ---- apps/web-roo-code/src/app/enterprise/page.tsx | 559 ------ apps/web-roo-code/src/app/evals/evals.tsx | 149 -- apps/web-roo-code/src/app/evals/page.tsx | 52 - apps/web-roo-code/src/app/evals/plot.tsx | 336 ---- apps/web-roo-code/src/app/evals/types.ts | 9 - apps/web-roo-code/src/app/extension/page.tsx | 84 - .../src/app/legal/cookies/page.tsx | 19 +- .../src/app/legal/subprocessors/page.tsx | 2 +- apps/web-roo-code/src/app/linear/page.tsx | 413 ----- apps/web-roo-code/src/app/page.tsx | 104 +- .../src/app/pr-fixer/content-a.tsx | 95 - apps/web-roo-code/src/app/pr-fixer/page.tsx | 67 - apps/web-roo-code/src/app/pricing/page.tsx | 395 ----- apps/web-roo-code/src/app/privacy/page.tsx | 8 +- apps/web-roo-code/src/app/provider/page.tsx | 263 --- .../pricing/components/model-card.tsx | 190 -- .../src/app/reviewer/content-b.ts | 93 - apps/web-roo-code/src/app/reviewer/content.ts | 93 - apps/web-roo-code/src/app/reviewer/page.tsx | 70 - .../src/app/shared/AgentLandingContent.tsx | 235 --- .../src/app/shared/agent-page-content.ts | 75 - .../src/app/shared/getContentVariant.ts | 36 - apps/web-roo-code/src/app/slack/page.tsx | 401 ----- .../components/RoomoteAnnouncementBanner.tsx | 4 +- .../src/components/animated-text.tsx | 24 - .../src/components/blog/BlogAnalytics.tsx | 50 - .../src/components/blog/BlogContent.tsx | 286 +-- .../src/components/blog/YouTubeModal.test.ts | 140 -- .../src/components/blog/YouTubeModal.tsx | 171 -- .../src/components/chromes/footer.tsx | 185 -- .../src/components/chromes/nav-bar.tsx | 228 +-- .../components/enterprise/contact-form.tsx | 291 ---- .../src/components/homepage/cta-section.tsx | 15 +- .../src/components/homepage/faq-section.tsx | 73 +- .../src/components/homepage/features.tsx | 5 +- .../homepage/option-overview-section.tsx | 8 +- .../components/homepage/pillars-section.tsx | 5 +- .../src/components/homepage/testimonials.tsx | 4 +- .../components/linear/linear-issue-demo.tsx | 442 ----- .../components/providers/posthog-provider.tsx | 91 - .../src/components/providers/providers.tsx | 9 +- .../components/slack/slack-thread-demo.tsx | 548 ------ ...ternal-influencers-not-topdown-mandates.md | 6 +- ...e-ai-spend-by-measuring-return-not-cost.md | 6 +- ...d-waiting-for-engineers-to-unblock-them.md | 6 +- ...gles-production-code-is-now-aigenerated.md | 2 +- ...prds-are-becoming-artifacts-of-the-past.md | 6 +- ...e-agents-like-employees-not-like-models.md | 2 +- ...ing-roo-code-extension-cloud-and-router.md | 2 +- .../src/lib/analytics/consent-manager.ts | 78 +- apps/web-roo-code/src/lib/blog/analytics.ts | 40 - apps/web-roo-code/src/lib/blog/index.ts | 1 - apps/web-roo-code/src/lib/constants.ts | 8 - apps/web-roo-code/src/lib/format-currency.ts | 14 - apps/web-roo-code/src/lib/format-duration.ts | 26 - apps/web-roo-code/src/lib/format-score.ts | 1 - apps/web-roo-code/src/lib/format-tokens.ts | 19 - apps/web-roo-code/src/lib/formatters.ts | 22 - apps/web-roo-code/src/lib/hooks/index.ts | 2 - .../src/lib/hooks/use-open-router-models.ts | 77 - apps/web-roo-code/src/lib/index.ts | 4 - apps/web-roo-code/src/lib/structured-data.ts | 10 +- apps/web-roo-code/src/lib/types/models.ts | 31 - knip.json | 6 +- package.json | 1 - packages/evals/.docker/entrypoints/runner.sh | 10 - packages/evals/.docker/entrypoints/web.sh | 48 - .../scripts/postgres/create-databases.sh | 11 - packages/evals/.env.development | 3 - packages/evals/.env.test | 3 - packages/evals/.gitignore | 8 - packages/evals/ADDING-EVALS.md | 305 ---- packages/evals/ARCHITECTURE.md | 282 --- packages/evals/CHANGELOG.md | 3 - packages/evals/Dockerfile.runner | 175 -- packages/evals/Dockerfile.web | 64 - packages/evals/README.md | 148 -- packages/evals/docker-compose.override.yml | 45 - packages/evals/docker-compose.yml | 87 - packages/evals/drizzle.config.ts | 10 - packages/evals/eslint.config.mjs | 4 - packages/evals/package.json | 55 - packages/evals/scripts/setup.sh | 348 ---- .../cli/__tests__/messageLogDeduper.test.ts | 35 - packages/evals/src/cli/index.ts | 55 - packages/evals/src/cli/messageLogDeduper.ts | 50 - packages/evals/src/cli/processTask.ts | 150 -- packages/evals/src/cli/redis.ts | 63 - packages/evals/src/cli/runCi.ts | 30 - packages/evals/src/cli/runEvals.ts | 88 - packages/evals/src/cli/runTaskInCli.ts | 310 ---- packages/evals/src/cli/runTaskInVscode.ts | 327 ---- packages/evals/src/cli/runUnitTest.ts | 91 - packages/evals/src/cli/types.ts | 19 - packages/evals/src/cli/utils.ts | 251 --- packages/evals/src/db/db.ts | 45 - packages/evals/src/db/index.ts | 9 - .../src/db/migrations/0000_young_trauma.sql | 54 - .../migrations/0001_add_timeout_to_runs.sql | 1 - .../migrations/0001_lowly_captain_flint.sql | 1 - .../migrations/0002_bouncy_blazing_skull.sql | 6 - .../db/migrations/0003_simple_retro_girl.sql | 1 - .../migrations/0004_sloppy_black_knight.sql | 3 - .../src/db/migrations/0005_strong_skrulls.sql | 12 - .../db/migrations/0006_worried_spectrum.sql | 1 - .../src/db/migrations/meta/0000_snapshot.json | 410 ----- .../src/db/migrations/meta/0001_snapshot.json | 417 ----- .../src/db/migrations/meta/0002_snapshot.json | 453 ----- .../src/db/migrations/meta/0003_snapshot.json | 459 ----- .../src/db/migrations/meta/0004_snapshot.json | 472 ----- .../src/db/migrations/meta/0005_snapshot.json | 472 ----- .../src/db/migrations/meta/0006_snapshot.json | 479 ----- .../src/db/migrations/meta/_journal.json | 55 - .../src/db/queries/__tests__/copyRun.spec.ts | 288 --- .../src/db/queries/__tests__/runs.test.ts | 87 - packages/evals/src/db/queries/copyRun.ts | 183 -- packages/evals/src/db/queries/errors.ts | 3 - packages/evals/src/db/queries/runs.ts | 200 --- packages/evals/src/db/queries/taskMetrics.ts | 45 - packages/evals/src/db/queries/tasks.ts | 88 - packages/evals/src/db/queries/toolErrors.ts | 22 - packages/evals/src/db/schema.ts | 143 -- packages/evals/src/exercises/index.ts | 25 - packages/evals/src/index.ts | 2 - packages/evals/tsconfig.json | 11 - packages/evals/vitest-global-setup.ts | 40 - packages/evals/vitest.config.ts | 10 - packages/ipc/src/ipc-client.ts | 2 +- packages/types/src/events.ts | 16 - packages/types/src/global-settings.ts | 72 +- packages/types/src/image-generation.ts | 19 +- packages/types/src/index.ts | 2 +- packages/types/src/mode.ts | 2 +- packages/types/src/provider-settings.ts | 19 +- packages/types/src/providers/index.ts | 4 - packages/types/src/providers/roo.ts | 60 - packages/types/src/task.ts | 5 - packages/types/src/vscode-extension-host.ts | 39 +- pnpm-lock.yaml | 1551 +---------------- src/api/providers/__tests__/roo.spec.ts | 1033 ----------- .../fetchers/__tests__/modelCache.spec.ts | 4 +- .../providers/fetchers/__tests__/roo.spec.ts | 1020 ----------- src/api/providers/fetchers/modelCache.ts | 7 - src/api/providers/fetchers/roo.ts | 192 -- src/api/providers/index.ts | 1 - src/api/providers/roo.ts | 433 ----- .../config/__tests__/ContextProxy.spec.ts | 14 - src/core/tools/GenerateImageTool.ts | 18 +- .../webview/__tests__/ClineProvider.spec.ts | 3 - .../webviewMessageHandler.rooBalance.spec.ts | 37 - ...webviewMessageHandler.routerModels.spec.ts | 24 - .../__tests__/webviewMessageHandler.spec.ts | 3 - src/core/webview/webviewMessageHandler.ts | 1 - src/shared/api.ts | 1 - webview-ui/package.json | 2 +- webview-ui/src/App.tsx | 2 +- webview-ui/src/__tests__/App.spec.tsx | 51 - .../src/__tests__/ErrorBoundary.spec.tsx | 7 - webview-ui/src/components/ErrorBoundary.tsx | 9 - .../src/components/chat/Announcement.tsx | 5 +- webview-ui/src/components/chat/ChatView.tsx | 2 +- .../src/components/chat/ModeSelector.tsx | 21 +- .../chat/__tests__/Announcement.spec.tsx | 6 +- .../ChatRow.rate-limit-wait.spec.tsx | 24 + .../__tests__/ChatView.keyboard-fix.spec.tsx | 36 +- .../ChatView.notification-sound.spec.tsx | 8 - .../ChatView.preserve-images.spec.tsx | 44 - .../ChatView.scroll-debug-repro.spec.tsx | 2 - .../chat/__tests__/ChatView.spec.tsx | 10 - .../chat/__tests__/ModeSelector.spec.tsx | 24 +- .../components/common/DismissibleUpsell.tsx | 14 - .../__tests__/DismissibleUpsell.spec.tsx | 42 +- webview-ui/src/components/mcp/McpView.tsx | 19 - webview-ui/src/components/modes/ModesView.tsx | 18 - webview-ui/src/components/settings/About.tsx | 4 +- .../src/components/settings/ApiOptions.tsx | 13 +- .../components/settings/SkillsSettings.tsx | 26 +- .../src/components/settings/UISettings.tsx | 11 - .../settings/__tests__/About.spec.tsx | 10 +- .../ApiOptions.provider-filtering.spec.tsx | 28 - .../settings/__tests__/ApiOptions.spec.tsx | 85 - .../ImageGenerationSettings.spec.tsx | 10 - .../SettingsView.change-detection.spec.tsx | 1 - .../SettingsView.unsaved-changes.spec.tsx | 1 - .../settings/utils/providerModelConfig.ts | 1 - .../hooks/__tests__/useSelectedModel.spec.ts | 22 +- .../components/ui/hooks/useRouterModels.ts | 2 +- .../components/ui/hooks/useSelectedModel.ts | 9 +- .../welcome/WelcomeViewProvider.tsx | 57 +- .../__tests__/WelcomeViewProvider.spec.tsx | 152 +- .../src/context/ExtensionStateContext.tsx | 29 +- ...tensionStateContext.roo-auth-gate.spec.tsx | 77 - .../__tests__/ExtensionStateContext.spec.tsx | 33 +- webview-ui/src/i18n/locales/ja/mcp.json | 2 +- webview-ui/src/i18n/locales/ko/mcp.json | 2 +- webview-ui/src/i18n/locales/zh-CN/mcp.json | 2 +- .../src/utils/__tests__/validate.spec.ts | 1 - webview-ui/src/utils/docLinks.ts | 4 +- 303 files changed, 586 insertions(+), 27966 deletions(-) delete mode 100644 .github/workflows/evals.yml delete mode 100644 .github/workflows/update-contributors.yml delete mode 100644 apps/cli/src/commands/cli/__tests__/run-provider-resolution.test.ts delete mode 100644 apps/web-evals/.env delete mode 100644 apps/web-evals/.gitignore delete mode 100644 apps/web-evals/CHANGELOG.md delete mode 100644 apps/web-evals/components.json delete mode 100644 apps/web-evals/eslint.config.mjs delete mode 100644 apps/web-evals/next-env.d.ts delete mode 100644 apps/web-evals/next.config.ts delete mode 100644 apps/web-evals/package.json delete mode 100644 apps/web-evals/postcss.config.mjs delete mode 100644 apps/web-evals/public/.gitkeep delete mode 100755 apps/web-evals/scripts/check-services.sh delete mode 100644 apps/web-evals/src/actions/__tests__/killRun.spec.ts delete mode 100644 apps/web-evals/src/actions/exercises.ts delete mode 100644 apps/web-evals/src/actions/heartbeat.ts delete mode 100644 apps/web-evals/src/actions/runners.ts delete mode 100644 apps/web-evals/src/actions/runs.ts delete mode 100644 apps/web-evals/src/actions/tasks.ts delete mode 100644 apps/web-evals/src/app/api/runs/[id]/logs/[taskId]/route.ts delete mode 100644 apps/web-evals/src/app/api/runs/[id]/logs/failed/route.ts delete mode 100644 apps/web-evals/src/app/api/runs/[id]/stream/route.ts delete mode 100644 apps/web-evals/src/app/favicon.ico delete mode 100644 apps/web-evals/src/app/globals.css delete mode 100644 apps/web-evals/src/app/layout.tsx delete mode 100644 apps/web-evals/src/app/page.tsx delete mode 100644 apps/web-evals/src/app/runs/[id]/page.tsx delete mode 100644 apps/web-evals/src/app/runs/[id]/run-status.tsx delete mode 100644 apps/web-evals/src/app/runs/[id]/run.tsx delete mode 100644 apps/web-evals/src/app/runs/[id]/task-status.tsx delete mode 100644 apps/web-evals/src/app/runs/new/new-run.tsx delete mode 100644 apps/web-evals/src/app/runs/new/page.tsx delete mode 100644 apps/web-evals/src/app/runs/new/settings-diff.tsx delete mode 100644 apps/web-evals/src/components/home/run.tsx delete mode 100644 apps/web-evals/src/components/home/runs.tsx delete mode 100644 apps/web-evals/src/components/layout/header.tsx delete mode 100644 apps/web-evals/src/components/layout/logo.tsx delete mode 100644 apps/web-evals/src/components/providers/index.ts delete mode 100644 apps/web-evals/src/components/providers/react-query-provider.tsx delete mode 100644 apps/web-evals/src/components/providers/theme-provider.tsx delete mode 100644 apps/web-evals/src/components/ui/alert-dialog.tsx delete mode 100644 apps/web-evals/src/components/ui/badge.tsx delete mode 100644 apps/web-evals/src/components/ui/button.tsx delete mode 100644 apps/web-evals/src/components/ui/checkbox.tsx delete mode 100644 apps/web-evals/src/components/ui/command.tsx delete mode 100644 apps/web-evals/src/components/ui/dialog.tsx delete mode 100644 apps/web-evals/src/components/ui/drawer.tsx delete mode 100644 apps/web-evals/src/components/ui/dropdown-menu.tsx delete mode 100644 apps/web-evals/src/components/ui/form.tsx delete mode 100644 apps/web-evals/src/components/ui/index.ts delete mode 100644 apps/web-evals/src/components/ui/input.tsx delete mode 100644 apps/web-evals/src/components/ui/label.tsx delete mode 100644 apps/web-evals/src/components/ui/multi-select.tsx delete mode 100644 apps/web-evals/src/components/ui/popover.tsx delete mode 100644 apps/web-evals/src/components/ui/scroll-area.tsx delete mode 100644 apps/web-evals/src/components/ui/select.tsx delete mode 100644 apps/web-evals/src/components/ui/separator.tsx delete mode 100644 apps/web-evals/src/components/ui/slider.tsx delete mode 100644 apps/web-evals/src/components/ui/sonner.tsx delete mode 100644 apps/web-evals/src/components/ui/table.tsx delete mode 100644 apps/web-evals/src/components/ui/tabs.tsx delete mode 100644 apps/web-evals/src/components/ui/textarea.tsx delete mode 100644 apps/web-evals/src/components/ui/tooltip.tsx delete mode 100644 apps/web-evals/src/hooks/use-copy-run.ts delete mode 100644 apps/web-evals/src/hooks/use-event-source.ts delete mode 100644 apps/web-evals/src/hooks/use-fuzzy-model-search.ts delete mode 100644 apps/web-evals/src/hooks/use-open-router-models.ts delete mode 100644 apps/web-evals/src/hooks/use-run-status.ts delete mode 100644 apps/web-evals/src/lib/__tests__/formatters.spec.ts delete mode 100644 apps/web-evals/src/lib/__tests__/normalize-create-run.spec.ts delete mode 100644 apps/web-evals/src/lib/actions.ts delete mode 100644 apps/web-evals/src/lib/formatters.ts delete mode 100644 apps/web-evals/src/lib/normalize-create-run.ts delete mode 100644 apps/web-evals/src/lib/schemas.ts delete mode 100644 apps/web-evals/src/lib/server/__tests__/sse-stream.spec.ts delete mode 100644 apps/web-evals/src/lib/server/redis.ts delete mode 100644 apps/web-evals/src/lib/server/sse-stream.ts delete mode 100644 apps/web-evals/src/lib/utils.ts delete mode 100644 apps/web-evals/tsconfig.json delete mode 100644 apps/web-evals/turbo.json delete mode 100644 apps/web-evals/vitest.config.ts delete mode 100644 apps/web-roo-code/.env.example delete mode 100644 apps/web-roo-code/src/actions/evals.ts delete mode 100644 apps/web-roo-code/src/app/cloud/page.tsx delete mode 100644 apps/web-roo-code/src/app/cloud/team/page.tsx delete mode 100644 apps/web-roo-code/src/app/enterprise/page.tsx delete mode 100644 apps/web-roo-code/src/app/evals/evals.tsx delete mode 100644 apps/web-roo-code/src/app/evals/page.tsx delete mode 100644 apps/web-roo-code/src/app/evals/plot.tsx delete mode 100644 apps/web-roo-code/src/app/evals/types.ts delete mode 100644 apps/web-roo-code/src/app/extension/page.tsx delete mode 100644 apps/web-roo-code/src/app/linear/page.tsx delete mode 100644 apps/web-roo-code/src/app/pr-fixer/content-a.tsx delete mode 100644 apps/web-roo-code/src/app/pr-fixer/page.tsx delete mode 100644 apps/web-roo-code/src/app/pricing/page.tsx delete mode 100644 apps/web-roo-code/src/app/provider/page.tsx delete mode 100644 apps/web-roo-code/src/app/provider/pricing/components/model-card.tsx delete mode 100644 apps/web-roo-code/src/app/reviewer/content-b.ts delete mode 100644 apps/web-roo-code/src/app/reviewer/content.ts delete mode 100644 apps/web-roo-code/src/app/reviewer/page.tsx delete mode 100644 apps/web-roo-code/src/app/shared/AgentLandingContent.tsx delete mode 100644 apps/web-roo-code/src/app/shared/agent-page-content.ts delete mode 100644 apps/web-roo-code/src/app/shared/getContentVariant.ts delete mode 100644 apps/web-roo-code/src/app/slack/page.tsx delete mode 100644 apps/web-roo-code/src/components/animated-text.tsx delete mode 100644 apps/web-roo-code/src/components/blog/BlogAnalytics.tsx delete mode 100644 apps/web-roo-code/src/components/blog/YouTubeModal.test.ts delete mode 100644 apps/web-roo-code/src/components/blog/YouTubeModal.tsx delete mode 100644 apps/web-roo-code/src/components/enterprise/contact-form.tsx delete mode 100644 apps/web-roo-code/src/components/linear/linear-issue-demo.tsx delete mode 100644 apps/web-roo-code/src/components/providers/posthog-provider.tsx delete mode 100644 apps/web-roo-code/src/components/slack/slack-thread-demo.tsx delete mode 100644 apps/web-roo-code/src/lib/blog/analytics.ts delete mode 100644 apps/web-roo-code/src/lib/format-currency.ts delete mode 100644 apps/web-roo-code/src/lib/format-duration.ts delete mode 100644 apps/web-roo-code/src/lib/format-score.ts delete mode 100644 apps/web-roo-code/src/lib/format-tokens.ts delete mode 100644 apps/web-roo-code/src/lib/formatters.ts delete mode 100644 apps/web-roo-code/src/lib/hooks/index.ts delete mode 100644 apps/web-roo-code/src/lib/hooks/use-open-router-models.ts delete mode 100644 apps/web-roo-code/src/lib/index.ts delete mode 100644 apps/web-roo-code/src/lib/types/models.ts delete mode 100644 packages/evals/.docker/entrypoints/runner.sh delete mode 100644 packages/evals/.docker/entrypoints/web.sh delete mode 100755 packages/evals/.docker/scripts/postgres/create-databases.sh delete mode 100644 packages/evals/.env.development delete mode 100644 packages/evals/.env.test delete mode 100644 packages/evals/.gitignore delete mode 100644 packages/evals/ADDING-EVALS.md delete mode 100644 packages/evals/ARCHITECTURE.md delete mode 100644 packages/evals/CHANGELOG.md delete mode 100644 packages/evals/Dockerfile.runner delete mode 100644 packages/evals/Dockerfile.web delete mode 100644 packages/evals/README.md delete mode 100644 packages/evals/docker-compose.override.yml delete mode 100644 packages/evals/docker-compose.yml delete mode 100644 packages/evals/drizzle.config.ts delete mode 100644 packages/evals/eslint.config.mjs delete mode 100644 packages/evals/package.json delete mode 100755 packages/evals/scripts/setup.sh delete mode 100644 packages/evals/src/cli/__tests__/messageLogDeduper.test.ts delete mode 100644 packages/evals/src/cli/index.ts delete mode 100644 packages/evals/src/cli/messageLogDeduper.ts delete mode 100644 packages/evals/src/cli/processTask.ts delete mode 100644 packages/evals/src/cli/redis.ts delete mode 100644 packages/evals/src/cli/runCi.ts delete mode 100644 packages/evals/src/cli/runEvals.ts delete mode 100644 packages/evals/src/cli/runTaskInCli.ts delete mode 100644 packages/evals/src/cli/runTaskInVscode.ts delete mode 100644 packages/evals/src/cli/runUnitTest.ts delete mode 100644 packages/evals/src/cli/types.ts delete mode 100644 packages/evals/src/cli/utils.ts delete mode 100644 packages/evals/src/db/db.ts delete mode 100644 packages/evals/src/db/index.ts delete mode 100644 packages/evals/src/db/migrations/0000_young_trauma.sql delete mode 100644 packages/evals/src/db/migrations/0001_add_timeout_to_runs.sql delete mode 100644 packages/evals/src/db/migrations/0001_lowly_captain_flint.sql delete mode 100644 packages/evals/src/db/migrations/0002_bouncy_blazing_skull.sql delete mode 100644 packages/evals/src/db/migrations/0003_simple_retro_girl.sql delete mode 100644 packages/evals/src/db/migrations/0004_sloppy_black_knight.sql delete mode 100644 packages/evals/src/db/migrations/0005_strong_skrulls.sql delete mode 100644 packages/evals/src/db/migrations/0006_worried_spectrum.sql delete mode 100644 packages/evals/src/db/migrations/meta/0000_snapshot.json delete mode 100644 packages/evals/src/db/migrations/meta/0001_snapshot.json delete mode 100644 packages/evals/src/db/migrations/meta/0002_snapshot.json delete mode 100644 packages/evals/src/db/migrations/meta/0003_snapshot.json delete mode 100644 packages/evals/src/db/migrations/meta/0004_snapshot.json delete mode 100644 packages/evals/src/db/migrations/meta/0005_snapshot.json delete mode 100644 packages/evals/src/db/migrations/meta/0006_snapshot.json delete mode 100644 packages/evals/src/db/migrations/meta/_journal.json delete mode 100644 packages/evals/src/db/queries/__tests__/copyRun.spec.ts delete mode 100644 packages/evals/src/db/queries/__tests__/runs.test.ts delete mode 100644 packages/evals/src/db/queries/copyRun.ts delete mode 100644 packages/evals/src/db/queries/errors.ts delete mode 100644 packages/evals/src/db/queries/runs.ts delete mode 100644 packages/evals/src/db/queries/taskMetrics.ts delete mode 100644 packages/evals/src/db/queries/tasks.ts delete mode 100644 packages/evals/src/db/queries/toolErrors.ts delete mode 100644 packages/evals/src/db/schema.ts delete mode 100644 packages/evals/src/exercises/index.ts delete mode 100644 packages/evals/src/index.ts delete mode 100644 packages/evals/tsconfig.json delete mode 100644 packages/evals/vitest-global-setup.ts delete mode 100644 packages/evals/vitest.config.ts delete mode 100644 packages/types/src/providers/roo.ts delete mode 100644 src/api/providers/__tests__/roo.spec.ts delete mode 100644 src/api/providers/fetchers/__tests__/roo.spec.ts delete mode 100644 src/api/providers/fetchers/roo.ts delete mode 100644 src/api/providers/roo.ts delete mode 100644 src/core/webview/__tests__/webviewMessageHandler.rooBalance.spec.ts delete mode 100644 webview-ui/src/context/__tests__/ExtensionStateContext.roo-auth-gate.spec.tsx diff --git a/.github/ISSUE_TEMPLATE/feature_request.yml b/.github/ISSUE_TEMPLATE/feature_request.yml index 78d650b43d..a4a1155bdf 100644 --- a/.github/ISSUE_TEMPLATE/feature_request.yml +++ b/.github/ISSUE_TEMPLATE/feature_request.yml @@ -64,7 +64,7 @@ body: attributes: value: | --- - Optional (for contributors): You can stop here if you're just proposing the improvement. + Optional: You can stop here if you're just proposing the improvement. - type: textarea id: acceptance-criteria diff --git a/.github/workflows/evals.yml b/.github/workflows/evals.yml deleted file mode 100644 index b99fd7659e..0000000000 --- a/.github/workflows/evals.yml +++ /dev/null @@ -1,74 +0,0 @@ -name: Evals - -on: - pull_request: - types: [labeled] - workflow_dispatch: - -env: - DOCKER_BUILDKIT: 1 - COMPOSE_DOCKER_CLI_BUILD: 1 - -jobs: - evals: - # Run if triggered manually or if PR has 'evals' label. - if: github.event_name == 'workflow_dispatch' || contains(github.event.label.name, 'evals') - runs-on: blacksmith-16vcpu-ubuntu-2404 - timeout-minutes: 45 - - defaults: - run: - working-directory: packages/evals - - steps: - - name: Checkout repository - uses: actions/checkout@v4 - - - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v3 - - - name: Create environment - run: | - cat > .env.local << EOF - OPENROUTER_API_KEY=${{ secrets.OPENROUTER_API_KEY || 'test-key-for-build' }} - EOF - - cat > .env.development << EOF - NODE_ENV=development - DATABASE_URL=postgresql://postgres:password@db:5432/evals_development - REDIS_URL=redis://redis:6379 - HOST_EXECUTION_METHOD=docker - EOF - - - name: Build image - uses: docker/build-push-action@v6 - with: - context: . - file: packages/evals/Dockerfile.runner - tags: evals-runner:latest - cache-from: type=gha - cache-to: type=gha,mode=max - push: false - load: true - - - name: Tag image - run: docker tag evals-runner:latest evals-runner - - - name: Start containers - run: | - docker compose up -d db redis - timeout 60 bash -c 'until docker compose exec -T db pg_isready -U postgres; do sleep 2; done' - timeout 60 bash -c 'until docker compose exec -T redis redis-cli ping | grep -q PONG; do sleep 2; done' - docker compose run --rm runner sh -c 'nc -z db 5432 && echo "✓ Runner -> Database connection successful"' - docker compose run --rm runner sh -c 'nc -z redis 6379 && echo "✓ Runner -> Redis connection successful"' - docker compose run --rm runner docker ps - - - name: Run database migrations - run: docker compose run --rm runner pnpm --filter @roo-code/evals db:migrate - - - name: Run evals - run: docker compose run --rm runner pnpm --filter @roo-code/evals cli --ci - - - name: Cleanup - if: always() - run: docker compose down -v --remove-orphans diff --git a/.github/workflows/update-contributors.yml b/.github/workflows/update-contributors.yml deleted file mode 100644 index 5709bdc10a..0000000000 --- a/.github/workflows/update-contributors.yml +++ /dev/null @@ -1,67 +0,0 @@ -name: Update Contributors # Refresh contrib.rocks image cache - -on: - workflow_dispatch: - -permissions: - contents: write - pull-requests: write - -jobs: - refresh-contrib-cache: - runs-on: ubuntu-latest - steps: - - name: Checkout - uses: actions/checkout@v4 - - - name: Bump cacheBust in all README files - run: | - set -euo pipefail - TS="$(date +%s)" - # Target only the root README.md and localized READMEs under locales/*/README.md - mapfile -t FILES < <(git ls-files README.md 'locales/*/README.md' || true) - - if [ "${#FILES[@]}" -eq 0 ]; then - echo "No target README files found." >&2 - exit 1 - fi - - UPDATED=0 - for f in "${FILES[@]}"; do - if grep -q 'cacheBust=' "$f"; then - # Use portable sed in GNU environment of ubuntu-latest - sed -i -E "s/cacheBust=[0-9]+/cacheBust=${TS}/g" "$f" - echo "Updated cacheBust in $f" - UPDATED=1 - else - echo "Warning: cacheBust parameter not found in $f" >&2 - fi - done - - if [ "$UPDATED" -eq 0 ]; then - echo "No files were updated. Ensure READMEs embed contrib.rocks with cacheBust param." >&2 - exit 1 - fi - - - name: Detect changes - id: changes - run: | - if git diff --quiet; then - echo "changed=false" >> $GITHUB_OUTPUT - else - echo "changed=true" >> $GITHUB_OUTPUT - fi - - - name: Create Pull Request - if: steps.changes.outputs.changed == 'true' - uses: peter-evans/create-pull-request@v7 - with: - token: ${{ secrets.GITHUB_TOKEN }} - commit-message: "docs: update contributors list [skip ci]" - committer: "github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>" - branch: refresh-contrib-cache - delete-branch: true - title: "Refresh contrib.rocks image cache (all READMEs)" - body: | - Automated refresh of the contrib.rocks image cache by bumping the cacheBust parameter in README.md and locales/*/README.md. - base: main diff --git a/CHANGELOG.md b/CHANGELOG.md index 5234acb2db..6699fb267b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1314,7 +1314,6 @@ The entries below are preserved from the upstream Roo Code project history befor - Reposition Add Image button inside ChatTextArea (thanks @roomote!) - Bring back a way to temporarily and globally pause auto-approve without losing your toggle state (thanks @brunobergher!) - Makes text area buttons appear only when there's text (thanks @brunobergher!) -- CONTRIBUTING.md tweaks and issue template rewrite (thanks @hannesrudolph!) - Bump axios from 1.9.0 to 1.12.0 (thanks @dependabot!) ## [3.28.2] - 2025-09-14 @@ -1774,7 +1773,6 @@ The entries below are preserved from the upstream Roo Code project history befor - Fix Claude model detection by name for API protocol selection (thanks @daniel-lxs!) - Move marketplace icon from overflow menu to top navigation - Optional setting to prevent completion with open todos -- Added YouTube to website footer (thanks @thill2323!) ## [3.23.14] - 2025-07-17 @@ -2168,7 +2166,6 @@ The entries below are preserved from the upstream Roo Code project history befor - Fix bug with context condensing in Amazon Bedrock - Fix UTF-8 encoding in ExecaTerminalProcess (thanks @mr-ryan-james!) - Set sidebar name bugfix (thanks @chrarnoldus!) -- Fix link to CONTRIBUTING.md in feature request template (thanks @cannuri!) - Add task metadata to Unbound and improve caching logic (thanks @pugazhendhi-m!) ## [3.19.0] - 2025-05-29 @@ -3136,7 +3133,6 @@ The entries below are preserved from the upstream Roo Code project history befor - Ask and Architect modes can now edit markdown files - Custom modes can now be restricted to specific file patterns (for example, a technical writer who can only edit markdown files 👋) - Support for configuring the Bedrock provider with AWS Profiles -- New Roo Code community Discord at https://roocode.com/discord! ## [3.2.8] @@ -3176,8 +3172,6 @@ The entries below are preserved from the upstream Roo Code project history befor - Create specialized assistants for any workflow - Just type "Create a new mode for " or visit the Prompts tab in the top menu to get started -Join us at https://www.reddit.com/r/RooCode to share your custom modes and be part of our next chapter! - ## [3.1.7] - DeepSeek-R1 support (thanks @philipnext!) @@ -3225,12 +3219,8 @@ Join us at https://www.reddit.com/r/RooCode to share your custom modes and be pa ## [3.0.1] -- Fix the reddit link and a small visual glitch in the chat input - ## [3.0.0] -- This release adds chat modes! Now you can ask Roo Code questions about system architecture or the codebase without immediately jumping into writing code. You can even assign different API configuration profiles to each mode if you prefer to use different models for thinking vs coding. Would love feedback in the new Roo Code Reddit! https://www.reddit.com/r/RooCode - ## [2.2.46] - Only parse @-mentions in user input (not in files) diff --git a/apps/cli/scripts/integration/cases/create-with-session-id-resume-loads-correct-session.ts b/apps/cli/scripts/integration/cases/create-with-session-id-resume-loads-correct-session.ts index cbefd26525..ded1656e1e 100644 --- a/apps/cli/scripts/integration/cases/create-with-session-id-resume-loads-correct-session.ts +++ b/apps/cli/scripts/integration/cases/create-with-session-id-resume-loads-correct-session.ts @@ -88,7 +88,7 @@ async function createSessionWithCustomId( "dev", "--print", "--provider", - "roo", + "openrouter", "--output-format", "stream-json", "--workspace", @@ -148,7 +148,7 @@ async function resumeSessionAndSendMarker( "--print", "--stdin-prompt-stream", "--provider", - "roo", + "openrouter", "--output-format", "stream-json", "--workspace", diff --git a/apps/cli/scripts/integration/lib/stream-harness.ts b/apps/cli/scripts/integration/lib/stream-harness.ts index 73b756c7c3..2693103309 100644 --- a/apps/cli/scripts/integration/lib/stream-harness.ts +++ b/apps/cli/scripts/integration/lib/stream-harness.ts @@ -69,7 +69,7 @@ export async function runStreamCase(options: RunStreamCaseOptions): Promise { - it("defaults to the login-free provider when nothing is configured", () => { - const result = resolveProviderPreference({}) - - expect(result).toEqual({ - provider: DEFAULT_PROVIDER, - fellBackFromStoredRooPreference: false, - fellBackFromExplicitRooRequest: false, - }) - }) - - it("falls back from saved Roo preferences", () => { - const result = resolveProviderPreference({ - settingsProvider: "roo", - }) - - expect(result).toEqual({ - provider: DEFAULT_PROVIDER, - fellBackFromStoredRooPreference: true, - fellBackFromExplicitRooRequest: false, - }) - }) - - it("falls back from an explicitly requested Roo provider selection", () => { - const result = resolveProviderPreference({ - flagProvider: "roo", - }) - - expect(result).toEqual({ - provider: DEFAULT_PROVIDER, - fellBackFromStoredRooPreference: false, - fellBackFromExplicitRooRequest: true, - }) - }) - - it("preserves supported providers", () => { - const result = resolveProviderPreference({ - settingsProvider: "anthropic", - }) - - expect(result).toEqual({ - provider: "anthropic", - fellBackFromStoredRooPreference: false, - fellBackFromExplicitRooRequest: false, - }) - }) -}) diff --git a/apps/cli/src/commands/cli/list.ts b/apps/cli/src/commands/cli/list.ts index 3a7e99170f..fbd33da2cc 100644 --- a/apps/cli/src/commands/cli/list.ts +++ b/apps/cli/src/commands/cli/list.ts @@ -6,7 +6,7 @@ import pWaitFor from "p-wait-for" import type { TaskSessionEntry } from "@roo-code/core/cli" import type { Command, ModelRecord, WebviewMessage } from "@roo-code/types" -import { getProviderDefaultModelId } from "@roo-code/types" +import { openRouterDefaultModelId } from "@roo-code/types" import { ExtensionHost, type ExtensionHostOptions } from "@/agent/index.js" import { readWorkspaceTaskSessions } from "@/lib/task-history/index.js" @@ -112,7 +112,7 @@ async function createListHost(options: BaseListOptions, hostOptions: ListHostOpt reasoningEffort: undefined, user: null, provider: "openrouter", - model: getProviderDefaultModelId("openrouter"), + model: openRouterDefaultModelId, apiKey, workspacePath, extensionPath, @@ -223,8 +223,9 @@ function requestOpenRouterModels(host: ExtensionHost): Promise { return undefined } - const routerModels = isRecord(message.routerModels) ? message.routerModels : undefined - return isRecord(routerModels?.openrouter) ? (routerModels.openrouter as ModelRecord) : {} + const routerModels = isRecord(message.routerModels) ? message.routerModels : {} + const openRouterModels = routerModels.openrouter + return isRecord(openRouterModels) ? (openRouterModels as ModelRecord) : {} }, ) } diff --git a/apps/cli/src/commands/cli/run.ts b/apps/cli/src/commands/cli/run.ts index ae263c5573..908df9938b 100644 --- a/apps/cli/src/commands/cli/run.ts +++ b/apps/cli/src/commands/cli/run.ts @@ -12,7 +12,6 @@ import { isSupportedProvider, supportedProviders, DEFAULT_FLAGS, - DEFAULT_PROVIDER, REASONING_EFFORTS, OutputFormat, } from "@/types/index.js" @@ -22,10 +21,10 @@ import { JsonEventEmitter } from "@/agent/json-event-emitter.js" import { loadSettings } from "@/lib/storage/index.js" import { readWorkspaceTaskSessions, resolveWorkspaceResumeSessionId } from "@/lib/task-history/index.js" import { getEnvVarName, getApiKeyFromEnv } from "@/lib/utils/provider.js" -import { runOnboarding } from "@/lib/utils/onboarding.js" import { validateTerminalShellPath } from "@/lib/utils/shell.js" import { getDefaultExtensionPath } from "@/lib/utils/extension.js" import { isValidSessionId } from "@/lib/utils/session-id.js" +import { runOnboarding } from "@/lib/utils/onboarding.js" import { VERSION } from "@/lib/utils/version.js" import { ExtensionHost, ExtensionHostOptions } from "@/agent/index.js" @@ -50,40 +49,6 @@ function normalizeError(error: unknown): Error { return error instanceof Error ? error : new Error(String(error)) } -export function resolveProviderPreference({ - flagProvider, - settingsProvider, -}: { - flagProvider?: string - settingsProvider?: string -}): { - provider: string - fellBackFromStoredRooPreference: boolean - fellBackFromExplicitRooRequest: boolean -} { - if (flagProvider === "roo") { - return { - provider: DEFAULT_PROVIDER, - fellBackFromStoredRooPreference: false, - fellBackFromExplicitRooRequest: true, - } - } - - if (settingsProvider === "roo") { - return { - provider: DEFAULT_PROVIDER, - fellBackFromStoredRooPreference: true, - fellBackFromExplicitRooRequest: false, - } - } - - return { - provider: flagProvider ?? settingsProvider ?? DEFAULT_PROVIDER, - fellBackFromStoredRooPreference: false, - fellBackFromExplicitRooRequest: false, - } -} - export async function run(promptArg: string | undefined, flagOptions: FlagOptions) { setLogger({ info: () => {}, @@ -157,14 +122,7 @@ export async function run(promptArg: string | undefined, flagOptions: FlagOption const effectiveModel = flagOptions.model || settings.model || DEFAULT_FLAGS.model const effectiveReasoningEffort = flagOptions.reasoningEffort || settings.reasoningEffort || DEFAULT_FLAGS.reasoningEffort - const { - provider: resolvedProvider, - fellBackFromStoredRooPreference, - fellBackFromExplicitRooRequest, - } = resolveProviderPreference({ - flagProvider: flagOptions.provider, - settingsProvider: settings.provider, - }) + const effectiveProvider = flagOptions.provider ?? settings.provider ?? "openrouter" const effectiveWorkspacePath = flagOptions.workspace ? path.resolve(flagOptions.workspace) : process.cwd() const legacyRequireApprovalFromSettings = settings.requireApproval ?? @@ -175,21 +133,9 @@ export async function run(promptArg: string | undefined, flagOptions: FlagOption flagOptions.consecutiveMistakeLimit ?? settings.consecutiveMistakeLimit ?? DEFAULT_FLAGS.consecutiveMistakeLimit const effectiveConsecutiveMistakeLimit = Number(rawConsecutiveMistakeLimit) - if (fellBackFromStoredRooPreference) { - console.warn( - `[CLI] Saved Roo Code Router preference detected in CLI settings. Continuing with the default provider (${DEFAULT_PROVIDER}).`, - ) - } - - if (fellBackFromExplicitRooRequest) { - console.warn( - `[CLI] Roo Code Router is no longer supported by the CLI. Continuing with the default provider (${DEFAULT_PROVIDER}).`, - ) - } - - if (!isSupportedProvider(resolvedProvider)) { + if (!isSupportedProvider(effectiveProvider)) { console.error( - `[CLI] Error: Invalid provider: ${resolvedProvider}; must be one of: ${supportedProviders.join(", ")}`, + `[CLI] Error: Invalid provider: ${effectiveProvider}; must be one of: ${supportedProviders.join(", ")}`, ) process.exit(1) } @@ -219,7 +165,7 @@ export async function run(promptArg: string | undefined, flagOptions: FlagOption reasoningEffort: effectiveReasoningEffort === "unspecified" ? undefined : effectiveReasoningEffort, consecutiveMistakeLimit: effectiveConsecutiveMistakeLimit, user: null, - provider: resolvedProvider, + provider: effectiveProvider, model: effectiveModel, workspacePath: effectiveWorkspacePath, extensionPath: path.resolve(flagOptions.extension || getDefaultExtensionPath(__dirname)), @@ -245,7 +191,7 @@ export async function run(promptArg: string | undefined, flagOptions: FlagOption if (!extensionHostOptions.apiKey) { console.error(`[CLI] Error: No API key provided. Use --api-key or set the appropriate environment variable.`) - console.error(`For ${extensionHostOptions.provider}, set ${getEnvVarName(extensionHostOptions.provider)}`) + console.error(`[CLI] For ${extensionHostOptions.provider}, set ${getEnvVarName(extensionHostOptions.provider)}`) process.exit(1) } diff --git a/apps/cli/src/commands/index.ts b/apps/cli/src/commands/index.ts index 717a7040ef..702b8e2938 100644 --- a/apps/cli/src/commands/index.ts +++ b/apps/cli/src/commands/index.ts @@ -1,2 +1,2 @@ -export * from "./auth/index.js" export * from "./cli/index.js" +export * from "./auth/index.js" diff --git a/apps/cli/src/index.ts b/apps/cli/src/index.ts index f4d123f218..8c368cc233 100644 --- a/apps/cli/src/index.ts +++ b/apps/cli/src/index.ts @@ -69,7 +69,7 @@ const applyListOptions = (command: Command) => command .option("-w, --workspace ", "Workspace directory path (defaults to current working directory)") .option("-e, --extension ", "Path to the extension bundle directory") - .option("-k, --api-key ", "OpenRouter API key (falls back to OPENROUTER_API_KEY)") + .option("-k, --api-key ", "API key for the LLM provider") .option("--format ", 'Output format: "json" (default) or "text"', "json") .option("-d, --debug", "Enable debug output", false) @@ -107,7 +107,7 @@ applyListOptions(listCommand.command("modes").description("List available modes" }, ) -applyListOptions(listCommand.command("models").description("List available OpenRouter models")).action( +applyListOptions(listCommand.command("models").description("List available models")).action( async (options: Parameters[0]) => { await runListAction(() => listModels(options)) }, diff --git a/apps/cli/src/lib/storage/__tests__/settings.test.ts b/apps/cli/src/lib/storage/__tests__/settings.test.ts index 60488c1cb0..b2a3b18cb4 100644 --- a/apps/cli/src/lib/storage/__tests__/settings.test.ts +++ b/apps/cli/src/lib/storage/__tests__/settings.test.ts @@ -19,7 +19,7 @@ vi.mock("../config-dir.js", () => ({ // Import after mocking import { loadSettings, saveSettings, resetOnboarding, getSettingsPath } from "../settings.js" -import { DEFAULT_PROVIDER, OnboardingProviderChoice } from "@/types/index.js" +import { OnboardingProviderChoice } from "@/types/index.js" // Re-derive the test config dir for use in tests (must match the hoisted one) const actualTestConfigDir = getTestConfigDir() @@ -70,32 +70,6 @@ describe("Settings Storage", () => { expect(loaded).toEqual(settingsData) }) - it("migrates legacy Roo provider settings to the default provider path", async () => { - const legacySettings = { - onboardingProviderChoice: "roo", - provider: "roo", - mode: "architect", - } - - await fs.mkdir(actualTestConfigDir, { recursive: true }) - await fs.writeFile(expectedSettingsFile, JSON.stringify(legacySettings), "utf-8") - - const loaded = await loadSettings() - - expect(loaded).toEqual({ - onboardingProviderChoice: OnboardingProviderChoice.Byok, - provider: DEFAULT_PROVIDER, - mode: "architect", - }) - - const rewritten = JSON.parse(await fs.readFile(expectedSettingsFile, "utf-8")) - expect(rewritten).toEqual({ - onboardingProviderChoice: OnboardingProviderChoice.Byok, - provider: DEFAULT_PROVIDER, - mode: "architect", - }) - }) - it("should load settings with only some fields set", async () => { const settingsData = { mode: "code", diff --git a/apps/cli/src/lib/storage/index.ts b/apps/cli/src/lib/storage/index.ts index 53424472c2..d5856747a1 100644 --- a/apps/cli/src/lib/storage/index.ts +++ b/apps/cli/src/lib/storage/index.ts @@ -1,4 +1,4 @@ export * from "./config-dir.js" -export * from "./settings.js" export * from "./credentials.js" +export * from "./settings.js" export * from "./ephemeral.js" diff --git a/apps/cli/src/lib/storage/settings.ts b/apps/cli/src/lib/storage/settings.ts index 541a2ad9a9..86a2d9243e 100644 --- a/apps/cli/src/lib/storage/settings.ts +++ b/apps/cli/src/lib/storage/settings.ts @@ -2,43 +2,9 @@ import fs from "fs/promises" import path from "path" import type { CliSettings } from "@/types/index.js" -import { - DEFAULT_PROVIDER, - LEGACY_ONBOARDING_PROVIDER_CHOICE_ROO, - LEGACY_PROVIDER_PREFERENCE_ROO, - OnboardingProviderChoice, -} from "@/types/index.js" -import { safeWriteJson } from "../../../../../src/utils/safeWriteJson.js" import { getConfigDir } from "./index.js" -type StoredCliSettings = CliSettings & { - provider?: string - onboardingProviderChoice?: string -} - -async function persistSettings(settingsPath: string, settings: CliSettings): Promise { - await safeWriteJson(settingsPath, settings, { prettyPrint: true }) - await fs.chmod(settingsPath, 0o600) -} - -function migrateLegacySettings(settings: StoredCliSettings): { settings: CliSettings; migrated: boolean } { - let migrated = false - const nextSettings: StoredCliSettings = { ...settings } - - if (nextSettings.provider === LEGACY_PROVIDER_PREFERENCE_ROO) { - nextSettings.provider = DEFAULT_PROVIDER - migrated = true - } - - if (nextSettings.onboardingProviderChoice === LEGACY_ONBOARDING_PROVIDER_CHOICE_ROO) { - nextSettings.onboardingProviderChoice = OnboardingProviderChoice.Byok - migrated = true - } - - return { settings: nextSettings as CliSettings, migrated } -} - export function getSettingsPath(): string { return path.join(getConfigDir(), "cli-settings.json") } @@ -47,17 +13,7 @@ export async function loadSettings(): Promise { try { const settingsPath = getSettingsPath() const data = await fs.readFile(settingsPath, "utf-8") - const parsed = JSON.parse(data) as StoredCliSettings - const { settings, migrated } = migrateLegacySettings(parsed) - - if (migrated) { - console.warn( - `[CLI] Detected legacy Roo Code Router selections in CLI settings. Migrating them to the default provider (${DEFAULT_PROVIDER}).`, - ) - await persistSettings(settingsPath, settings) - } - - return settings + return JSON.parse(data) as CliSettings } catch (error) { if ((error as NodeJS.ErrnoException).code === "ENOENT") { return {} @@ -68,10 +24,15 @@ export async function loadSettings(): Promise { } export async function saveSettings(settings: Partial): Promise { + const configDir = getConfigDir() + await fs.mkdir(configDir, { recursive: true }) + const existing = await loadSettings() const merged = { ...existing, ...settings } - await persistSettings(getSettingsPath(), merged) + await fs.writeFile(getSettingsPath(), JSON.stringify(merged, null, 2), { + mode: 0o600, + }) } export async function resetOnboarding(): Promise { diff --git a/apps/cli/src/types/constants.ts b/apps/cli/src/types/constants.ts index 555d73a037..007cfa0783 100644 --- a/apps/cli/src/types/constants.ts +++ b/apps/cli/src/types/constants.ts @@ -7,8 +7,6 @@ export const DEFAULT_FLAGS = { consecutiveMistakeLimit: 10, } -export const DEFAULT_PROVIDER = "openrouter" as const - export const REASONING_EFFORTS = [...reasoningEffortsExtended, "unspecified", "disabled"] /** @@ -23,7 +21,3 @@ export const ASCII_ROO = ` _,' ___ \\,\\ / \\\\ // \\\\ ,/' \`\\_,` - -export const AUTH_BASE_URL = process.env.ROO_AUTH_BASE_URL ?? "https://app.roocode.com" - -export const SDK_BASE_URL = process.env.ROO_SDK_BASE_URL ?? "https://cloud-api.roocode.com" diff --git a/apps/cli/src/types/types.ts b/apps/cli/src/types/types.ts index f7af0a2835..0a9f3d2259 100644 --- a/apps/cli/src/types/types.ts +++ b/apps/cli/src/types/types.ts @@ -46,12 +46,6 @@ export enum OnboardingProviderChoice { Byok = "byok", } -export const LEGACY_PROVIDER_PREFERENCE_ROO = "roo" as const -export const LEGACY_ONBOARDING_PROVIDER_CHOICE_ROO = "roo" as const - -export type CliProviderPreference = SupportedProvider | typeof LEGACY_PROVIDER_PREFERENCE_ROO -export type CliOnboardingProviderChoice = OnboardingProviderChoice | typeof LEGACY_ONBOARDING_PROVIDER_CHOICE_ROO - export interface OnboardingResult { choice: OnboardingProviderChoice token?: string @@ -59,11 +53,11 @@ export interface OnboardingResult { } export interface CliSettings { - onboardingProviderChoice?: CliOnboardingProviderChoice + onboardingProviderChoice?: OnboardingProviderChoice /** Default mode to use (e.g., "code", "architect", "ask", "debug") */ mode?: string /** Default provider to use */ - provider?: CliProviderPreference + provider?: SupportedProvider /** Default model to use */ model?: string /** Default reasoning effort level */ diff --git a/apps/web-evals/.env b/apps/web-evals/.env deleted file mode 100644 index 1bb6dd6dac..0000000000 --- a/apps/web-evals/.env +++ /dev/null @@ -1 +0,0 @@ -DATABASE_URL=postgres://postgres:password@localhost:5433/evals_development diff --git a/apps/web-evals/.gitignore b/apps/web-evals/.gitignore deleted file mode 100644 index 443f3159ed..0000000000 --- a/apps/web-evals/.gitignore +++ /dev/null @@ -1,8 +0,0 @@ -# .env -!.env - -# next.js -.next - -# typescript -tsconfig.tsbuildinfo diff --git a/apps/web-evals/CHANGELOG.md b/apps/web-evals/CHANGELOG.md deleted file mode 100644 index b3531905ac..0000000000 --- a/apps/web-evals/CHANGELOG.md +++ /dev/null @@ -1,3 +0,0 @@ -# @roo-code/web-evals - -## 0.0.1 diff --git a/apps/web-evals/components.json b/apps/web-evals/components.json deleted file mode 100644 index 5bcedb3141..0000000000 --- a/apps/web-evals/components.json +++ /dev/null @@ -1,21 +0,0 @@ -{ - "$schema": "https://ui.shadcn.com/schema.json", - "style": "new-york", - "rsc": true, - "tsx": true, - "tailwind": { - "config": "", - "css": "src/app/globals.css", - "baseColor": "neutral", - "cssVariables": true, - "prefix": "" - }, - "aliases": { - "components": "@/components", - "utils": "@/lib/utils", - "ui": "@/components/ui", - "lib": "@/lib", - "hooks": "@/hooks" - }, - "iconLibrary": "lucide" -} diff --git a/apps/web-evals/eslint.config.mjs b/apps/web-evals/eslint.config.mjs deleted file mode 100644 index 024d6157d4..0000000000 --- a/apps/web-evals/eslint.config.mjs +++ /dev/null @@ -1,17 +0,0 @@ -import { nextJsConfig } from "@roo-code/config-eslint/next-js" - -/** @type {import("eslint").Linter.Config} */ -export default [ - ...nextJsConfig, - { - rules: { - "no-unused-vars": "off", - "@typescript-eslint/no-unused-vars": [ - "error", - { - caughtErrorsIgnorePattern: "^_", - }, - ], - }, - }, -] diff --git a/apps/web-evals/next-env.d.ts b/apps/web-evals/next-env.d.ts deleted file mode 100644 index 7506fe6afb..0000000000 --- a/apps/web-evals/next-env.d.ts +++ /dev/null @@ -1,6 +0,0 @@ -/// -/// -import "./.next/dev/types/routes.d.ts" - -// NOTE: This file should not be edited -// see https://nextjs.org/docs/app/api-reference/config/typescript for more information. diff --git a/apps/web-evals/next.config.ts b/apps/web-evals/next.config.ts deleted file mode 100644 index b5f54a87be..0000000000 --- a/apps/web-evals/next.config.ts +++ /dev/null @@ -1,7 +0,0 @@ -import type { NextConfig } from "next" - -const nextConfig: NextConfig = { - turbopack: {}, -} - -export default nextConfig diff --git a/apps/web-evals/package.json b/apps/web-evals/package.json deleted file mode 100644 index 1723f57583..0000000000 --- a/apps/web-evals/package.json +++ /dev/null @@ -1,63 +0,0 @@ -{ - "name": "@roo-code/web-evals", - "version": "0.0.1", - "type": "module", - "scripts": { - "lint": "eslint src --ext=ts,tsx --max-warnings=0", - "check-types": "tsc -b", - "dev": "scripts/check-services.sh && next dev -p 3446", - "format": "prettier --write src", - "build": "next build", - "start": "next start -p 3446", - "clean": "rimraf tsconfig.tsbuildinfo .next .turbo" - }, - "dependencies": { - "@hookform/resolvers": "^5.1.1", - "@radix-ui/react-alert-dialog": "^1.1.7", - "@radix-ui/react-checkbox": "^1.1.5", - "@radix-ui/react-dialog": "^1.1.6", - "@radix-ui/react-dropdown-menu": "^2.1.7", - "@radix-ui/react-label": "^2.1.2", - "@radix-ui/react-popover": "^1.1.6", - "@radix-ui/react-scroll-area": "^1.2.3", - "@radix-ui/react-select": "^2.1.6", - "@radix-ui/react-separator": "^1.1.2", - "@radix-ui/react-slider": "^1.2.4", - "@radix-ui/react-slot": "^1.1.2", - "@radix-ui/react-tabs": "^1.1.3", - "@radix-ui/react-tooltip": "^1.2.8", - "@roo-code/evals": "workspace:^", - "@roo-code/types": "workspace:^", - "@tanstack/react-query": "^5.69.0", - "archiver": "^7.0.1", - "class-variance-authority": "^0.7.1", - "clsx": "^2.1.1", - "cmdk": "^1.1.0", - "fuzzysort": "^3.1.0", - "lucide-react": "^0.518.0", - "next": "^16.1.6", - "next-themes": "^0.4.6", - "p-map": "^7.0.3", - "react": "^18.3.1", - "react-dom": "^18.3.1", - "react-hook-form": "^7.57.0", - "react-use": "^17.6.0", - "redis": "^5.5.5", - "sonner": "^2.0.5", - "tailwind-merge": "^3.3.0", - "tailwindcss-animate": "^1.0.7", - "vaul": "^1.1.2", - "zod": "^3.25.61" - }, - "devDependencies": { - "@roo-code/config-eslint": "workspace:^", - "@roo-code/config-typescript": "workspace:^", - "@tailwindcss/postcss": "^4", - "@types/archiver": "^7.0.0", - "@types/ps-tree": "^1.1.6", - "@types/react": "^18.3.23", - "@types/react-dom": "^18.3.5", - "tailwindcss": "^4", - "vitest": "^3.2.3" - } -} diff --git a/apps/web-evals/postcss.config.mjs b/apps/web-evals/postcss.config.mjs deleted file mode 100644 index 78452aadce..0000000000 --- a/apps/web-evals/postcss.config.mjs +++ /dev/null @@ -1,5 +0,0 @@ -const config = { - plugins: ["@tailwindcss/postcss"], -} - -export default config diff --git a/apps/web-evals/public/.gitkeep b/apps/web-evals/public/.gitkeep deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/apps/web-evals/scripts/check-services.sh b/apps/web-evals/scripts/check-services.sh deleted file mode 100755 index d72ffd54e8..0000000000 --- a/apps/web-evals/scripts/check-services.sh +++ /dev/null @@ -1,20 +0,0 @@ -#!/bin/bash - -if ! docker info &> /dev/null; then - echo "❌ Docker is not running. Please start Docker Desktop and try again." - exit 1 -fi - -if ! nc -z postgres 5433 2>/dev/null; then - echo "❌ PostgreSQL is not running on port 5432" - echo "💡 Start it with: pnpm --filter @roo-code/evals db:up" - exit 1 -fi - -if ! nc -z redis 6380 2>/dev/null; then - echo "❌ Redis is not running on port 6379" - echo "💡 Start it with: pnpm --filter @roo-code/evals redis:up" - exit 1 -fi - -echo "✅ All required services are running" diff --git a/apps/web-evals/src/actions/__tests__/killRun.spec.ts b/apps/web-evals/src/actions/__tests__/killRun.spec.ts deleted file mode 100644 index 814d70d9fc..0000000000 --- a/apps/web-evals/src/actions/__tests__/killRun.spec.ts +++ /dev/null @@ -1,207 +0,0 @@ -// npx vitest run src/actions/__tests__/killRun.spec.ts - -import { execFileSync } from "child_process" - -// Mock child_process -vi.mock("child_process", () => ({ - execFileSync: vi.fn(), - spawn: vi.fn(), -})) - -// Mock next/cache -vi.mock("next/cache", () => ({ - revalidatePath: vi.fn(), -})) - -// Mock redis client -vi.mock("@/lib/server/redis", () => ({ - redisClient: vi.fn().mockResolvedValue({ - del: vi.fn().mockResolvedValue(1), - }), -})) - -// Mock @roo-code/evals -vi.mock("@roo-code/evals", () => ({ - createRun: vi.fn(), - deleteRun: vi.fn(), - createTask: vi.fn(), - exerciseLanguages: [], - getExercisesForLanguage: vi.fn().mockResolvedValue([]), -})) - -// Mock timers to speed up tests -vi.useFakeTimers() - -// Import after mocks -import { killRun } from "../runs" - -const mockExecFileSync = execFileSync as ReturnType - -describe("killRun", () => { - beforeEach(() => { - vi.clearAllMocks() - }) - - afterEach(() => { - vi.clearAllTimers() - }) - - it("should kill controller first, wait, then kill task containers", async () => { - const runId = 123 - - // execFileSync is used for all docker commands - mockExecFileSync - .mockReturnValueOnce("") // docker kill controller - .mockReturnValueOnce("evals-task-123-456.0\nevals-task-123-789.1\n") // docker ps - .mockReturnValueOnce("") // docker kill evals-task-123-456.0 - .mockReturnValueOnce("") // docker kill evals-task-123-789.1 - - const resultPromise = killRun(runId) - - // Fast-forward past the 10 second sleep - await vi.advanceTimersByTimeAsync(10000) - - const result = await resultPromise - - expect(result.success).toBe(true) - expect(result.killedContainers).toContain("evals-controller-123") - expect(result.killedContainers).toContain("evals-task-123-456.0") - expect(result.killedContainers).toContain("evals-task-123-789.1") - expect(result.errors).toHaveLength(0) - - // Verify execFileSync was called for docker kill - expect(mockExecFileSync).toHaveBeenNthCalledWith( - 1, - "docker", - ["kill", "evals-controller-123"], - expect.any(Object), - ) - // Verify execFileSync was called for docker ps with run-specific filter - expect(mockExecFileSync).toHaveBeenNthCalledWith( - 2, - "docker", - ["ps", "--format", "{{.Names}}", "--filter", "name=evals-task-123-"], - expect.any(Object), - ) - }) - - it("should continue killing runners even if controller is not running", async () => { - const runId = 456 - - mockExecFileSync - .mockImplementationOnce(() => { - throw new Error("No such container") - }) // controller kill fails - .mockReturnValueOnce("evals-task-456-100.0\n") // docker ps - .mockReturnValueOnce("") // docker kill task - - const resultPromise = killRun(runId) - await vi.advanceTimersByTimeAsync(10000) - const result = await resultPromise - - expect(result.success).toBe(true) - expect(result.killedContainers).toContain("evals-task-456-100.0") - // Controller not in list since it failed - expect(result.killedContainers).not.toContain("evals-controller-456") - }) - - it("should clear Redis state after killing containers", async () => { - const runId = 789 - - const mockDel = vi.fn().mockResolvedValue(1) - const { redisClient } = await import("@/lib/server/redis") - vi.mocked(redisClient).mockResolvedValue({ del: mockDel } as never) - - mockExecFileSync - .mockReturnValueOnce("") // controller kill - .mockReturnValueOnce("") // docker ps (no tasks) - - const resultPromise = killRun(runId) - await vi.advanceTimersByTimeAsync(10000) - await resultPromise - - expect(mockDel).toHaveBeenCalledWith("heartbeat:789") - expect(mockDel).toHaveBeenCalledWith("runners:789") - }) - - it("should handle docker ps failure gracefully", async () => { - const runId = 111 - - mockExecFileSync - .mockReturnValueOnce("") // controller kill succeeds - .mockImplementationOnce(() => { - throw new Error("Docker error") - }) // docker ps fails - - const resultPromise = killRun(runId) - await vi.advanceTimersByTimeAsync(10000) - const result = await resultPromise - - // Should still be successful because controller was killed - expect(result.success).toBe(true) - expect(result.killedContainers).toContain("evals-controller-111") - expect(result.errors).toContain("Failed to list Docker task containers") - }) - - it("should handle individual task kill failures", async () => { - const runId = 222 - - mockExecFileSync - .mockReturnValueOnce("") // controller kill - .mockReturnValueOnce("evals-task-222-300.0\nevals-task-222-400.0\n") // docker ps - .mockImplementationOnce(() => { - throw new Error("Kill failed") - }) // first task kill fails - .mockReturnValueOnce("") // second task kill succeeds - - const resultPromise = killRun(runId) - await vi.advanceTimersByTimeAsync(10000) - const result = await resultPromise - - expect(result.success).toBe(true) - expect(result.killedContainers).toContain("evals-controller-222") - expect(result.killedContainers).toContain("evals-task-222-400.0") - expect(result.errors.length).toBe(1) - expect(result.errors[0]).toContain("evals-task-222-300.0") - }) - - it("should return success with no containers when nothing is running", async () => { - const runId = 333 - - mockExecFileSync - .mockImplementationOnce(() => { - throw new Error("No such container") - }) // controller not running - .mockReturnValueOnce("") // no task containers - - const resultPromise = killRun(runId) - await vi.advanceTimersByTimeAsync(10000) - const result = await resultPromise - - expect(result.success).toBe(true) - expect(result.killedContainers).toHaveLength(0) - expect(result.errors).toHaveLength(0) - }) - - it("should only kill containers belonging to the specific run", async () => { - const runId = 555 - - mockExecFileSync - .mockReturnValueOnce("") // controller kill - .mockReturnValueOnce("evals-task-555-100.0\n") // docker ps - .mockReturnValueOnce("") // docker kill task - - const resultPromise = killRun(runId) - await vi.advanceTimersByTimeAsync(10000) - const result = await resultPromise - - expect(result.success).toBe(true) - // Verify execFileSync was called for docker ps with run-specific filter - expect(mockExecFileSync).toHaveBeenNthCalledWith( - 2, - "docker", - ["ps", "--format", "{{.Names}}", "--filter", "name=evals-task-555-"], - expect.any(Object), - ) - }) -}) diff --git a/apps/web-evals/src/actions/exercises.ts b/apps/web-evals/src/actions/exercises.ts deleted file mode 100644 index 17eb1ff085..0000000000 --- a/apps/web-evals/src/actions/exercises.ts +++ /dev/null @@ -1,22 +0,0 @@ -"use server" - -import * as path from "path" -import { fileURLToPath } from "url" - -import { exerciseLanguages, listDirectories } from "@roo-code/evals" - -const __dirname = path.dirname(fileURLToPath(import.meta.url)) // /apps/web-evals/src/actions - -const EVALS_REPO_PATH = path.resolve(__dirname, "../../../../../evals") - -export const getExercises = async () => { - const result = await Promise.all( - exerciseLanguages.map(async (language) => { - const languagePath = path.join(EVALS_REPO_PATH, language) - const exercises = await listDirectories(__dirname, languagePath) - return exercises.map((exercise) => `${language}/${exercise}`) - }), - ) - - return result.flat() -} diff --git a/apps/web-evals/src/actions/heartbeat.ts b/apps/web-evals/src/actions/heartbeat.ts deleted file mode 100644 index a74aa8ee64..0000000000 --- a/apps/web-evals/src/actions/heartbeat.ts +++ /dev/null @@ -1,8 +0,0 @@ -"use server" - -import { redisClient } from "@/lib/server/redis" - -export const getHeartbeat = async (runId: number) => { - const redis = await redisClient() - return redis.get(`heartbeat:${runId}`) -} diff --git a/apps/web-evals/src/actions/runners.ts b/apps/web-evals/src/actions/runners.ts deleted file mode 100644 index 8b7e86b0f3..0000000000 --- a/apps/web-evals/src/actions/runners.ts +++ /dev/null @@ -1,8 +0,0 @@ -"use server" - -import { redisClient } from "@/lib/server/redis" - -export const getRunners = async (runId: number) => { - const redis = await redisClient() - return redis.sMembers(`runners:${runId}`) -} diff --git a/apps/web-evals/src/actions/runs.ts b/apps/web-evals/src/actions/runs.ts deleted file mode 100644 index f0c1578aed..0000000000 --- a/apps/web-evals/src/actions/runs.ts +++ /dev/null @@ -1,377 +0,0 @@ -"use server" - -import * as path from "path" -import fs from "fs" -import { fileURLToPath } from "url" -import { spawn, execFileSync } from "child_process" - -import { revalidatePath } from "next/cache" -import pMap from "p-map" - -import { - type ExerciseLanguage, - exerciseLanguages, - createRun as _createRun, - deleteRun as _deleteRun, - updateRun as _updateRun, - getIncompleteRuns as _getIncompleteRuns, - deleteRunsByIds as _deleteRunsByIds, - createTask, - getExercisesForLanguage, -} from "@roo-code/evals" - -import { CreateRun } from "@/lib/schemas" -import { redisClient } from "@/lib/server/redis" - -// Storage base path for eval logs -const EVALS_STORAGE_PATH = "/tmp/evals/runs" - -const EVALS_REPO_PATH = path.resolve(path.dirname(fileURLToPath(import.meta.url)), "../../../../../evals") - -export async function createRun({ - suite, - exercises = [], - timeout, - iterations = 1, - executionMethod = "vscode", - ...values -}: CreateRun) { - const run = await _createRun({ - ...values, - timeout, - executionMethod, - socketPath: "", // TODO: Get rid of this. - }) - - if (suite === "partial") { - for (const path of exercises) { - const [language, exercise] = path.split("/") - - if (!language || !exercise) { - throw new Error("Invalid exercise path: " + path) - } - - // Create multiple tasks for each iteration - for (let iteration = 1; iteration <= iterations; iteration++) { - await createTask({ - ...values, - runId: run.id, - language: language as ExerciseLanguage, - exercise, - iteration, - }) - } - } - } else { - for (const language of exerciseLanguages) { - const languageExercises = await getExercisesForLanguage(EVALS_REPO_PATH, language) - - // Create tasks for all iterations of each exercise - const tasksToCreate: Array<{ language: ExerciseLanguage; exercise: string; iteration: number }> = [] - for (const exercise of languageExercises) { - for (let iteration = 1; iteration <= iterations; iteration++) { - tasksToCreate.push({ language, exercise, iteration }) - } - } - - await pMap( - tasksToCreate, - ({ language, exercise, iteration }) => createTask({ runId: run.id, language, exercise, iteration }), - { concurrency: 10 }, - ) - } - } - - revalidatePath("/runs") - - try { - const isRunningInDocker = fs.existsSync("/.dockerenv") - - const dockerArgs = [ - `--name evals-controller-${run.id}`, - "--rm", - "--network evals_default", - "-v /var/run/docker.sock:/var/run/docker.sock", - "-v /tmp/evals:/var/log/evals", - "-e HOST_EXECUTION_METHOD=docker", - ] - - const cliCommand = `pnpm --filter @roo-code/evals cli --runId ${run.id}` - - const command = isRunningInDocker - ? `docker run ${dockerArgs.join(" ")} evals-runner sh -c "${cliCommand}"` - : cliCommand - - console.log("spawn ->", command) - - const childProcess = spawn("sh", ["-c", command], { - detached: true, - stdio: ["ignore", "pipe", "pipe"], - }) - - const logStream = fs.createWriteStream("/tmp/roo-code-evals.log", { flags: "a" }) - - if (childProcess.stdout) { - childProcess.stdout.pipe(logStream) - } - - if (childProcess.stderr) { - childProcess.stderr.pipe(logStream) - } - - childProcess.unref() - } catch (error) { - console.error(error) - } - - return run -} - -export async function deleteRun(runId: number) { - await _deleteRun(runId) - revalidatePath("/runs") -} - -export type KillRunResult = { - success: boolean - killedContainers: string[] - errors: string[] -} - -const sleep = (ms: number) => new Promise((resolve) => setTimeout(resolve, ms)) - -/** - * Kill all Docker containers associated with a run (controller and task runners). - * Kills the controller first, waits 10 seconds, then kills runners. - * Also clears Redis state for heartbeat and runners. - * - * Container naming conventions: - * - Controller: evals-controller-{runId} - * - Task runners: evals-task-{runId}-{taskId}.{attempt} - */ -export async function killRun(runId: number): Promise { - const killedContainers: string[] = [] - const errors: string[] = [] - const controllerPattern = `evals-controller-${runId}` - const taskPattern = `evals-task-${runId}-` - - try { - // Step 1: Kill the controller first - console.log(`Killing controller: ${controllerPattern}`) - try { - execFileSync("docker", ["kill", controllerPattern], { encoding: "utf-8", timeout: 10000 }) - killedContainers.push(controllerPattern) - console.log(`Killed controller container: ${controllerPattern}`) - } catch (_error) { - // Controller might not be running - that's ok, continue to kill runners - console.log(`Controller ${controllerPattern} not running or already stopped`) - } - - // Step 2: Wait 10 seconds before killing runners - console.log("Waiting 10 seconds before killing runners...") - await sleep(10000) - - // Step 3: Find and kill all task runner containers for THIS run only - let taskContainerNames: string[] = [] - - try { - const output = execFileSync("docker", ["ps", "--format", "{{.Names}}", "--filter", `name=${taskPattern}`], { - encoding: "utf-8", - timeout: 10000, - }) - taskContainerNames = output - .split("\n") - .map((name) => name.trim()) - .filter((name) => name.length > 0 && name.startsWith(taskPattern)) - } catch (error) { - console.error("Failed to list task containers:", error) - errors.push("Failed to list Docker task containers") - } - - // Kill each task runner container - for (const containerName of taskContainerNames) { - try { - execFileSync("docker", ["kill", containerName], { encoding: "utf-8", timeout: 10000 }) - killedContainers.push(containerName) - console.log(`Killed task container: ${containerName}`) - } catch (error) { - // Container might have already stopped - console.error(`Failed to kill container ${containerName}:`, error) - errors.push(`Failed to kill container: ${containerName}`) - } - } - - // Step 4: Clear Redis state - try { - const redis = await redisClient() - const heartbeatKey = `heartbeat:${runId}` - const runnersKey = `runners:${runId}` - - await redis.del(heartbeatKey) - await redis.del(runnersKey) - console.log(`Cleared Redis keys: ${heartbeatKey}, ${runnersKey}`) - } catch (error) { - console.error("Failed to clear Redis state:", error) - errors.push("Failed to clear Redis state") - } - } catch (error) { - console.error("Error in killRun:", error) - errors.push("Unexpected error while killing containers") - } - - revalidatePath(`/runs/${runId}`) - revalidatePath("/runs") - - return { - success: killedContainers.length > 0 || errors.length === 0, - killedContainers, - errors, - } -} - -export type DeleteIncompleteRunsResult = { - success: boolean - deletedCount: number - deletedRunIds: number[] - storageErrors: string[] -} - -/** - * Delete all incomplete runs (runs without a taskMetricsId/final score). - * Removes both database records and storage folders. - */ -export async function deleteIncompleteRuns(): Promise { - const storageErrors: string[] = [] - - // Get all incomplete runs - const incompleteRuns = await _getIncompleteRuns() - const runIds = incompleteRuns.map((run) => run.id) - - if (runIds.length === 0) { - return { - success: true, - deletedCount: 0, - deletedRunIds: [], - storageErrors: [], - } - } - - // Delete storage folders for each run - for (const runId of runIds) { - const storagePath = path.join(EVALS_STORAGE_PATH, String(runId)) - try { - if (fs.existsSync(storagePath)) { - fs.rmSync(storagePath, { recursive: true, force: true }) - console.log(`Deleted storage folder: ${storagePath}`) - } - } catch (error) { - console.error(`Failed to delete storage folder ${storagePath}:`, error) - storageErrors.push(`Failed to delete storage for run ${runId}`) - } - - // Also try to clear Redis state for any potentially running incomplete runs - try { - const redis = await redisClient() - await redis.del(`heartbeat:${runId}`) - await redis.del(`runners:${runId}`) - } catch (error) { - // Non-critical error, just log it - console.error(`Failed to clear Redis state for run ${runId}:`, error) - } - } - - // Delete from database - await _deleteRunsByIds(runIds) - - revalidatePath("/runs") - - return { - success: true, - deletedCount: runIds.length, - deletedRunIds: runIds, - storageErrors, - } -} - -/** - * Get count of incomplete runs (for UI display) - */ -export async function getIncompleteRunsCount(): Promise { - const incompleteRuns = await _getIncompleteRuns() - return incompleteRuns.length -} - -/** - * Delete all runs older than 30 days. - * Removes both database records and storage folders. - */ -export async function deleteOldRuns(): Promise { - const storageErrors: string[] = [] - - // Get all runs older than 30 days - const thirtyDaysAgo = new Date(Date.now() - 30 * 24 * 60 * 60 * 1000) - const { getRuns } = await import("@roo-code/evals") - const allRuns = await getRuns() - const oldRuns = allRuns.filter((run) => run.createdAt < thirtyDaysAgo) - const runIds = oldRuns.map((run) => run.id) - - if (runIds.length === 0) { - return { - success: true, - deletedCount: 0, - deletedRunIds: [], - storageErrors: [], - } - } - - // Delete storage folders for each run - for (const runId of runIds) { - const storagePath = path.join(EVALS_STORAGE_PATH, String(runId)) - try { - if (fs.existsSync(storagePath)) { - fs.rmSync(storagePath, { recursive: true, force: true }) - console.log(`Deleted storage folder: ${storagePath}`) - } - } catch (error) { - console.error(`Failed to delete storage folder ${storagePath}:`, error) - storageErrors.push(`Failed to delete storage for run ${runId}`) - } - - // Also try to clear Redis state - try { - const redis = await redisClient() - await redis.del(`heartbeat:${runId}`) - await redis.del(`runners:${runId}`) - } catch (error) { - // Non-critical error, just log it - console.error(`Failed to clear Redis state for run ${runId}:`, error) - } - } - - // Delete from database - await _deleteRunsByIds(runIds) - - revalidatePath("/runs") - - return { - success: true, - deletedCount: runIds.length, - deletedRunIds: runIds, - storageErrors, - } -} - -/** - * Update the description of a run. - */ -export async function updateRunDescription(runId: number, description: string | null): Promise<{ success: boolean }> { - try { - await _updateRun(runId, { description }) - revalidatePath("/runs") - revalidatePath(`/runs/${runId}`) - return { success: true } - } catch (error) { - console.error("Failed to update run description:", error) - return { success: false } - } -} diff --git a/apps/web-evals/src/actions/tasks.ts b/apps/web-evals/src/actions/tasks.ts deleted file mode 100644 index 18b428b0ca..0000000000 --- a/apps/web-evals/src/actions/tasks.ts +++ /dev/null @@ -1,11 +0,0 @@ -"use server" - -import { revalidatePath } from "next/cache" - -import { getTasks as _getTasks } from "@roo-code/evals" - -export async function getTasks(runId: number) { - const tasks = await _getTasks(runId) - revalidatePath(`/runs/${runId}`) - return tasks -} diff --git a/apps/web-evals/src/app/api/runs/[id]/logs/[taskId]/route.ts b/apps/web-evals/src/app/api/runs/[id]/logs/[taskId]/route.ts deleted file mode 100644 index e5ec8751ab..0000000000 --- a/apps/web-evals/src/app/api/runs/[id]/logs/[taskId]/route.ts +++ /dev/null @@ -1,74 +0,0 @@ -import { NextResponse } from "next/server" -import type { NextRequest } from "next/server" -import * as fs from "node:fs/promises" -import * as path from "node:path" - -import { findTask, findRun } from "@roo-code/evals" - -export const dynamic = "force-dynamic" - -const LOG_BASE_PATH = "/tmp/evals/runs" - -// Sanitize path components to prevent path traversal attacks -function sanitizePathComponent(component: string): string { - // Remove any path separators, null bytes, and other dangerous characters - return component.replace(/[/\\:\0*?"<>|]/g, "_") -} - -export async function GET(request: NextRequest, { params }: { params: Promise<{ id: string; taskId: string }> }) { - const { id, taskId } = await params - - try { - const runId = Number(id) - const taskIdNum = Number(taskId) - - if (isNaN(runId) || isNaN(taskIdNum)) { - return NextResponse.json({ error: "Invalid run ID or task ID" }, { status: 400 }) - } - - // Verify the run exists - await findRun(runId) - - // Get the task to find its language and exercise - const task = await findTask(taskIdNum) - - // Verify the task belongs to this run - if (task.runId !== runId) { - return NextResponse.json({ error: "Task does not belong to this run" }, { status: 404 }) - } - - // Sanitize language and exercise to prevent path traversal - const safeLanguage = sanitizePathComponent(task.language) - const safeExercise = sanitizePathComponent(task.exercise) - - // Construct the log file path - const logFileName = `${safeLanguage}-${safeExercise}.log` - const logFilePath = path.join(LOG_BASE_PATH, String(runId), logFileName) - - // Verify the resolved path is within the expected directory (defense in depth) - const resolvedPath = path.resolve(logFilePath) - const expectedBase = path.resolve(LOG_BASE_PATH) - if (!resolvedPath.startsWith(expectedBase)) { - return NextResponse.json({ error: "Invalid log path" }, { status: 400 }) - } - - // Check if the log file exists and read it (async) - try { - const logContent = await fs.readFile(logFilePath, "utf-8") - return NextResponse.json({ logContent }) - } catch (err) { - if ((err as NodeJS.ErrnoException).code === "ENOENT") { - return NextResponse.json({ error: "Log file not found", logContent: null }, { status: 200 }) - } - throw err - } - } catch (error) { - console.error("Error reading task log:", error) - - if (error instanceof Error && error.name === "RecordNotFoundError") { - return NextResponse.json({ error: "Task or run not found" }, { status: 404 }) - } - - return NextResponse.json({ error: "Failed to read log file" }, { status: 500 }) - } -} diff --git a/apps/web-evals/src/app/api/runs/[id]/logs/failed/route.ts b/apps/web-evals/src/app/api/runs/[id]/logs/failed/route.ts deleted file mode 100644 index 8b2760df98..0000000000 --- a/apps/web-evals/src/app/api/runs/[id]/logs/failed/route.ts +++ /dev/null @@ -1,147 +0,0 @@ -import { NextResponse } from "next/server" -import type { NextRequest } from "next/server" -import * as fs from "node:fs" -import * as path from "node:path" -import archiver from "archiver" - -import { findRun, getTasks } from "@roo-code/evals" - -export const dynamic = "force-dynamic" - -const LOG_BASE_PATH = "/tmp/evals/runs" - -// Sanitize path components to prevent path traversal attacks -function sanitizePathComponent(component: string): string { - // Remove any path separators, null bytes, and other dangerous characters - return component.replace(/[/\\:\0*?"<>|]/g, "_") -} - -export async function GET(request: NextRequest, { params }: { params: Promise<{ id: string }> }) { - const { id } = await params - - try { - const runId = Number(id) - - if (isNaN(runId)) { - return NextResponse.json({ error: "Invalid run ID" }, { status: 400 }) - } - - // Verify the run exists - await findRun(runId) - - // Get all tasks for this run - const tasks = await getTasks(runId) - - // Filter for failed tasks only - const failedTasks = tasks.filter((task) => task.passed === false) - - if (failedTasks.length === 0) { - return NextResponse.json({ error: "No failed tasks to export" }, { status: 400 }) - } - - // Create a zip archive - const archive = archiver("zip", { zlib: { level: 9 } }) - - // Collect chunks to build the response - const chunks: Buffer[] = [] - - archive.on("data", (chunk: Buffer) => { - chunks.push(chunk) - }) - - // Track archive errors - let archiveError: Error | null = null - archive.on("error", (err: Error) => { - archiveError = err - }) - - // Set up the end promise before finalizing (proper event listener ordering) - const archiveEndPromise = new Promise((resolve, reject) => { - archive.on("end", resolve) - archive.on("error", reject) - }) - - // Add each failed task's log file and history files to the archive - const logDir = path.join(LOG_BASE_PATH, String(runId)) - let filesAdded = 0 - - for (const task of failedTasks) { - // Sanitize language and exercise to prevent path traversal - const safeLanguage = sanitizePathComponent(task.language) - const safeExercise = sanitizePathComponent(task.exercise) - const expectedBase = path.resolve(LOG_BASE_PATH) - - // Add the log file - const logFileName = `${safeLanguage}-${safeExercise}.log` - const logFilePath = path.join(logDir, logFileName) - - // Verify the resolved path is within the expected directory (defense in depth) - const resolvedLogPath = path.resolve(logFilePath) - if (resolvedLogPath.startsWith(expectedBase) && fs.existsSync(logFilePath)) { - archive.file(logFilePath, { name: logFileName }) - filesAdded++ - } - - // Add the API conversation history file - // Format: {language}-{exercise}.{iteration}_api_conversation_history.json - const apiHistoryFileName = `${safeLanguage}-${safeExercise}.${task.iteration}_api_conversation_history.json` - const apiHistoryFilePath = path.join(logDir, apiHistoryFileName) - const resolvedApiHistoryPath = path.resolve(apiHistoryFilePath) - if (resolvedApiHistoryPath.startsWith(expectedBase) && fs.existsSync(apiHistoryFilePath)) { - archive.file(apiHistoryFilePath, { name: apiHistoryFileName }) - filesAdded++ - } - - // Add the UI messages file - // Format: {language}-{exercise}.{iteration}_ui_messages.json - const uiMessagesFileName = `${safeLanguage}-${safeExercise}.${task.iteration}_ui_messages.json` - const uiMessagesFilePath = path.join(logDir, uiMessagesFileName) - const resolvedUiMessagesPath = path.resolve(uiMessagesFilePath) - if (resolvedUiMessagesPath.startsWith(expectedBase) && fs.existsSync(uiMessagesFilePath)) { - archive.file(uiMessagesFilePath, { name: uiMessagesFileName }) - filesAdded++ - } - } - - // Check if any files were actually added - if (filesAdded === 0) { - archive.abort() - return NextResponse.json( - { error: "No log files found - they may have been cleared from disk" }, - { status: 404 }, - ) - } - - // Finalize the archive - await archive.finalize() - - // Wait for all data to be collected - await archiveEndPromise - - // Check for archive errors - if (archiveError) { - throw archiveError - } - - // Combine all chunks into a single buffer - const zipBuffer = Buffer.concat(chunks) - - // Return the zip file - return new NextResponse(zipBuffer, { - status: 200, - headers: { - "Content-Type": "application/zip", - "Content-Disposition": `attachment; filename="run-${runId}-failed-logs.zip"`, - "Content-Length": String(zipBuffer.length), - }, - }) - } catch (error) { - console.error("Error exporting failed logs:", error) - - if (error instanceof Error && error.name === "RecordNotFoundError") { - return NextResponse.json({ error: "Run not found" }, { status: 404 }) - } - - return NextResponse.json({ error: "Failed to export logs" }, { status: 500 }) - } -} diff --git a/apps/web-evals/src/app/api/runs/[id]/stream/route.ts b/apps/web-evals/src/app/api/runs/[id]/stream/route.ts deleted file mode 100644 index 3168974ecd..0000000000 --- a/apps/web-evals/src/app/api/runs/[id]/stream/route.ts +++ /dev/null @@ -1,71 +0,0 @@ -import type { NextRequest } from "next/server" - -import { taskEventSchema } from "@roo-code/types" -import { findRun } from "@roo-code/evals" - -import { SSEStream } from "@/lib/server/sse-stream" -import { redisClient } from "@/lib/server/redis" - -export const dynamic = "force-dynamic" - -export async function GET(request: NextRequest, { params }: { params: Promise<{ id: string }> }) { - const { id } = await params - const requestId = crypto.randomUUID() - const stream = new SSEStream() - const run = await findRun(Number(id)) - const redis = await redisClient() - - let isStreamClosed = false - const channelName = `evals:${run.id}` - - const onMessage = async (data: string) => { - if (isStreamClosed || stream.isClosed) { - return - } - - try { - const taskEvent = taskEventSchema.parse(JSON.parse(data)) - // console.log(`[stream#${requestId}] task event -> ${taskEvent.eventName}`) - const writeSuccess = await stream.write(JSON.stringify(taskEvent)) - - if (!writeSuccess) { - await disconnect() - } - } catch (_error) { - console.error(`[stream#${requestId}] invalid task event:`, data) - } - } - - const disconnect = async () => { - if (isStreamClosed) { - return - } - - isStreamClosed = true - - try { - await redis.unsubscribe(channelName) - console.log(`[stream#${requestId}] unsubscribed from ${channelName}`) - } catch (error) { - console.error(`[stream#${requestId}] error unsubscribing:`, error) - } - - try { - await stream.close() - } catch (error) { - console.error(`[stream#${requestId}] error closing stream:`, error) - } - } - - await redis.subscribe(channelName, onMessage) - - request.signal.addEventListener("abort", () => { - console.log(`[stream#${requestId}] abort`) - - disconnect().catch((error) => { - console.error(`[stream#${requestId}] cleanup error:`, error) - }) - }) - - return stream.getResponse() -} diff --git a/apps/web-evals/src/app/favicon.ico b/apps/web-evals/src/app/favicon.ico deleted file mode 100644 index 718d6fea4835ec2d246af9800eddb7ffb276240c..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 25931 zcmeHv30#a{`}aL_*G&7qml|y<+KVaDM2m#dVr!KsA!#An?kSQM(q<_dDNCpjEux83 zLb9Z^XxbDl(w>%i@8hT6>)&Gu{h#Oeyszu?xtw#Zb1mO{pgX9699l+Qppw7jXaYf~-84xW z)w4x8?=youko|}Vr~(D$UXIbiXABHh`p1?nn8Po~fxRJv}|0e(BPs|G`(TT%kKVJAdg5*Z|x0leQq0 zkdUBvb#>9F()jo|T~kx@OM8$9wzs~t2l;K=woNssA3l6|sx2r3+kdfVW@e^8e*E}v zA1y5{bRi+3Z`uD3{F7LgFJDdvm;nJilkzDku>BwXH(8ItVCXk*-lSJnR?-2UN%hJ){&rlvg`CDTj z)Bzo!3v7Ou#83zEDEFcKt(f1E0~=rqeEbTnMvWR#{+9pg%7G8y>u1OVRUSoox-ovF z2Ydma(;=YuBY(eI|04{hXzZD6_f(v~H;C~y5=DhAC{MMS>2fm~1H_t2$56pc$NH8( z5bH|<)71dV-_oCHIrzrT`2s-5w_+2CM0$95I6X8p^r!gHp+j_gd;9O<1~CEQQGS8) zS9Qh3#p&JM-G8rHekNmKVewU;pJRcTAog68KYo^dRo}(M>36U4Us zfgYWSiHZL3;lpWT=zNAW>Dh#mB!_@Lg%$ms8N-;aPqMn+C2HqZgz&9~Eu z4|Kp<`$q)Uw1R?y(~S>ePdonHxpV1#eSP1B;Ogo+-Pk}6#0GsZZ5!||ev2MGdh}_m z{DeR7?0-1^zVs&`AV6Vt;r3`I`OI_wgs*w=eO%_#7Kepl{B@xiyCANc(l zzIyd4y|c6PXWq9-|KM8(zIk8LPk(>a)zyFWjhT!$HJ$qX1vo@d25W<fvZQ2zUz5WRc(UnFMKHwe1| zWmlB1qdbiA(C0jmnV<}GfbKtmcu^2*P^O?MBLZKt|As~ge8&AAO~2K@zbXelK|4T<{|y4`raF{=72kC2Kn(L4YyenWgrPiv z@^mr$t{#X5VuIMeL!7Ab6_kG$&#&5p*Z{+?5U|TZ`B!7llpVmp@skYz&n^8QfPJzL z0G6K_OJM9x+Wu2gfN45phANGt{7=C>i34CV{Xqlx(fWpeAoj^N0Biu`w+MVcCUyU* zDZuzO0>4Z6fbu^T_arWW5n!E45vX8N=bxTVeFoep_G#VmNlQzAI_KTIc{6>c+04vr zx@W}zE5JNSU>!THJ{J=cqjz+4{L4A{Ob9$ZJ*S1?Ggg3klFp!+Y1@K+pK1DqI|_gq z5ZDXVpge8-cs!o|;K73#YXZ3AShj50wBvuq3NTOZ`M&qtjj#GOFfgExjg8Gn8>Vq5 z`85n+9|!iLCZF5$HJ$Iu($dm?8~-ofu}tEc+-pyke=3!im#6pk_Wo8IA|fJwD&~~F zc16osQ)EBo58U7XDuMexaPRjU@h8tXe%S{fA0NH3vGJFhuyyO!Uyl2^&EOpX{9As0 zWj+P>{@}jxH)8|r;2HdupP!vie{sJ28b&bo!8`D^x}TE$%zXNb^X1p@0PJ86`dZyj z%ce7*{^oo+6%&~I!8hQy-vQ7E)0t0ybH4l%KltWOo~8cO`T=157JqL(oq_rC%ea&4 z2NcTJe-HgFjNg-gZ$6!Y`SMHrlj}Etf7?r!zQTPPSv}{so2e>Fjs1{gzk~LGeesX%r(Lh6rbhSo_n)@@G-FTQy93;l#E)hgP@d_SGvyCp0~o(Y;Ee8{ zdVUDbHm5`2taPUOY^MAGOw*>=s7=Gst=D+p+2yON!0%Hk` zz5mAhyT4lS*T3LS^WSxUy86q&GnoHxzQ6vm8)VS}_zuqG?+3td68_x;etQAdu@sc6 zQJ&5|4(I?~3d-QOAODHpZ=hlSg(lBZ!JZWCtHHSj`0Wh93-Uk)_S%zsJ~aD>{`A0~ z9{AG(e|q3g5B%wYKRxiL2Y$8(4w6bzchKuloQW#e&S3n+P- z8!ds-%f;TJ1>)v)##>gd{PdS2Oc3VaR`fr=`O8QIO(6(N!A?pr5C#6fc~Ge@N%Vvu zaoAX2&(a6eWy_q&UwOhU)|P3J0Qc%OdhzW=F4D|pt0E4osw;%<%Dn58hAWD^XnZD= z>9~H(3bmLtxpF?a7su6J7M*x1By7YSUbxGi)Ot0P77`}P3{)&5Un{KD?`-e?r21!4vTTnN(4Y6Lin?UkSM z`MXCTC1@4A4~mvz%Rh2&EwY))LeoT=*`tMoqcEXI>TZU9WTP#l?uFv+@Dn~b(>xh2 z;>B?;Tz2SR&KVb>vGiBSB`@U7VIWFSo=LDSb9F{GF^DbmWAfpms8Sx9OX4CnBJca3 zlj9(x!dIjN?OG1X4l*imJNvRCk}F%!?SOfiOq5y^mZW)jFL@a|r-@d#f7 z2gmU8L3IZq0ynIws=}~m^#@&C%J6QFo~Mo4V`>v7MI-_!EBMMtb%_M&kvAaN)@ZVw z+`toz&WG#HkWDjnZE!6nk{e-oFdL^$YnbOCN}JC&{$#$O27@|Tn-skXr)2ml2~O!5 zX+gYoxhoc7qoU?C^3~&!U?kRFtnSEecWuH0B0OvLodgUAi}8p1 zrO6RSXHH}DMc$&|?D004DiOVMHV8kXCP@7NKB zgaZq^^O<7PoKEp72kby@W0Z!Y*Ay{&vfg#C&gG@YVR9g?FEocMUi1gSN$+V+ayF45{a zuDZDTN}mS|;BO%gEf}pjBfN2-gIrU#G5~cucA;dokXW89%>AyXJJI z9X4UlIWA|ZYHgbI z5?oFk@A=Ik7lrEQPDH!H+b`7_Y~aDb_qa=B2^Y&Ow41cU=4WDd40dp5(QS-WMN-=Y z9g;6_-JdNU;|6cPwf$ak*aJIcwL@1n$#l~zi{c{EW?T;DaW*E8DYq?Umtz{nJ&w-M zEMyTDrC&9K$d|kZe2#ws6)L=7K+{ zQw{XnV6UC$6-rW0emqm8wJoeZK)wJIcV?dST}Z;G0Arq{dVDu0&4kd%N!3F1*;*pW zR&qUiFzK=@44#QGw7k1`3t_d8&*kBV->O##t|tonFc2YWrL7_eqg+=+k;!F-`^b8> z#KWCE8%u4k@EprxqiV$VmmtiWxDLgnGu$Vs<8rppV5EajBXL4nyyZM$SWVm!wnCj-B!Wjqj5-5dNXukI2$$|Bu3Lrw}z65Lc=1G z^-#WuQOj$hwNGG?*CM_TO8Bg-1+qc>J7k5c51U8g?ZU5n?HYor;~JIjoWH-G>AoUP ztrWWLbRNqIjW#RT*WqZgPJXU7C)VaW5}MiijYbABmzoru6EmQ*N8cVK7a3|aOB#O& zBl8JY2WKfmj;h#Q!pN%9o@VNLv{OUL?rixHwOZuvX7{IJ{(EdPpuVFoQqIOa7giLVkBOKL@^smUA!tZ1CKRK}#SSM)iQHk)*R~?M!qkCruaS!#oIL1c z?J;U~&FfH#*98^G?i}pA{ z9Jg36t4=%6mhY(quYq*vSxptes9qy|7xSlH?G=S@>u>Ebe;|LVhs~@+06N<4CViBk zUiY$thvX;>Tby6z9Y1edAMQaiH zm^r3v#$Q#2T=X>bsY#D%s!bhs^M9PMAcHbCc0FMHV{u-dwlL;a1eJ63v5U*?Q_8JO zT#50!RD619#j_Uf))0ooADz~*9&lN!bBDRUgE>Vud-i5ck%vT=r^yD*^?Mp@Q^v+V zG#-?gKlr}Eeqifb{|So?HM&g91P8|av8hQoCmQXkd?7wIJwb z_^v8bbg`SAn{I*4bH$u(RZ6*xUhuA~hc=8czK8SHEKTzSxgbwi~9(OqJB&gwb^l4+m`k*Q;_?>Y-APi1{k zAHQ)P)G)f|AyjSgcCFps)Fh6Bca*Xznq36!pV6Az&m{O8$wGFD? zY&O*3*J0;_EqM#jh6^gMQKpXV?#1?>$ml1xvh8nSN>-?H=V;nJIwB07YX$e6vLxH( zqYwQ>qxwR(i4f)DLd)-$P>T-no_c!LsN@)8`e;W@)-Hj0>nJ-}Kla4-ZdPJzI&Mce zv)V_j;(3ERN3_@I$N<^|4Lf`B;8n+bX@bHbcZTopEmDI*Jfl)-pFDvo6svPRoo@(x z);_{lY<;);XzT`dBFpRmGrr}z5u1=pC^S-{ce6iXQlLGcItwJ^mZx{m$&DA_oEZ)B{_bYPq-HA zcH8WGoBG(aBU_j)vEy+_71T34@4dmSg!|M8Vf92Zj6WH7Q7t#OHQqWgFE3ARt+%!T z?oLovLVlnf?2c7pTc)~cc^($_8nyKwsN`RA-23ed3sdj(ys%pjjM+9JrctL;dy8a( z@en&CQmnV(()bu|Y%G1-4a(6x{aLytn$T-;(&{QIJB9vMox11U-1HpD@d(QkaJdEb zG{)+6Dos_L+O3NpWo^=gR?evp|CqEG?L&Ut#D*KLaRFOgOEK(Kq1@!EGcTfo+%A&I z=dLbB+d$u{sh?u)xP{PF8L%;YPPW53+@{>5W=Jt#wQpN;0_HYdw1{ksf_XhO4#2F= zyPx6Lx2<92L-;L5PD`zn6zwIH`Jk($?Qw({erA$^bC;q33hv!d!>%wRhj# zal^hk+WGNg;rJtb-EB(?czvOM=H7dl=vblBwAv>}%1@{}mnpUznfq1cE^sgsL0*4I zJ##!*B?=vI_OEVis5o+_IwMIRrpQyT_Sq~ZU%oY7c5JMIADzpD!Upz9h@iWg_>>~j zOLS;wp^i$-E?4<_cp?RiS%Rd?i;f*mOz=~(&3lo<=@(nR!_Rqiprh@weZlL!t#NCc zO!QTcInq|%#>OVgobj{~ixEUec`E25zJ~*DofsQdzIa@5^nOXj2T;8O`l--(QyU^$t?TGY^7#&FQ+2SS3B#qK*k3`ye?8jUYSajE5iBbJls75CCc(m3dk{t?- zopcER9{Z?TC)mk~gpi^kbbu>b-+a{m#8-y2^p$ka4n60w;Sc2}HMf<8JUvhCL0B&Btk)T`ctE$*qNW8L$`7!r^9T+>=<=2qaq-;ll2{`{Rg zc5a0ZUI$oG&j-qVOuKa=*v4aY#IsoM+1|c4Z)<}lEDvy;5huB@1RJPquU2U*U-;gu z=En2m+qjBzR#DEJDO`WU)hdd{Vj%^0V*KoyZ|5lzV87&g_j~NCjwv0uQVqXOb*QrQ zy|Qn`hxx(58c70$E;L(X0uZZ72M1!6oeg)(cdKO ze0gDaTz+ohR-#d)NbAH4x{I(21yjwvBQfmpLu$)|m{XolbgF!pmsqJ#D}(ylp6uC> z{bqtcI#hT#HW=wl7>p!38sKsJ`r8}lt-q%Keqy%u(xk=yiIJiUw6|5IvkS+#?JTBl z8H5(Q?l#wzazujH!8o>1xtn8#_w+397*_cy8!pQGP%K(Ga3pAjsaTbbXJlQF_+m+-UpUUent@xM zg%jqLUExj~o^vQ3Gl*>wh=_gOr2*|U64_iXb+-111aH}$TjeajM+I20xw(((>fej-@CIz4S1pi$(#}P7`4({6QS2CaQS4NPENDp>sAqD z$bH4KGzXGffkJ7R>V>)>tC)uax{UsN*dbeNC*v}#8Y#OWYwL4t$ePR?VTyIs!wea+ z5Urmc)X|^`MG~*dS6pGSbU+gPJoq*^a=_>$n4|P^w$sMBBy@f*Z^Jg6?n5?oId6f{ z$LW4M|4m502z0t7g<#Bx%X;9<=)smFolV&(V^(7Cv2-sxbxopQ!)*#ZRhTBpx1)Fc zNm1T%bONzv6@#|dz(w02AH8OXe>kQ#1FMCzO}2J_mST)+ExmBr9cva-@?;wnmWMOk z{3_~EX_xadgJGv&H@zK_8{(x84`}+c?oSBX*Ge3VdfTt&F}yCpFP?CpW+BE^cWY0^ zb&uBN!Ja3UzYHK-CTyA5=L zEMW{l3Usky#ly=7px648W31UNV@K)&Ub&zP1c7%)`{);I4b0Q<)B}3;NMG2JH=X$U zfIW4)4n9ZM`-yRj67I)YSLDK)qfUJ_ij}a#aZN~9EXrh8eZY2&=uY%2N0UFF7<~%M zsB8=erOWZ>Ct_#^tHZ|*q`H;A)5;ycw*IcmVxi8_0Xk}aJA^ath+E;xg!x+As(M#0=)3!NJR6H&9+zd#iP(m0PIW8$ z1Y^VX`>jm`W!=WpF*{ioM?C9`yOR>@0q=u7o>BP-eSHqCgMDj!2anwH?s%i2p+Q7D zzszIf5XJpE)IG4;d_(La-xenmF(tgAxK`Y4sQ}BSJEPs6N_U2vI{8=0C_F?@7<(G; zo$~G=8p+076G;`}>{MQ>t>7cm=zGtfbdDXm6||jUU|?X?CaE?(<6bKDYKeHlz}DA8 zXT={X=yp_R;HfJ9h%?eWvQ!dRgz&Su*JfNt!Wu>|XfU&68iRikRrHRW|ZxzRR^`eIGt zIeiDgVS>IeExKVRWW8-=A=yA`}`)ZkWBrZD`hpWIxBGkh&f#ijr449~m`j6{4jiJ*C!oVA8ZC?$1RM#K(_b zL9TW)kN*Y4%^-qPpMP7d4)o?Nk#>aoYHT(*g)qmRUb?**F@pnNiy6Fv9rEiUqD(^O zzyS?nBrX63BTRYduaG(0VVG2yJRe%o&rVrLjbxTaAFTd8s;<<@Qs>u(<193R8>}2_ zuwp{7;H2a*X7_jryzriZXMg?bTuegABb^87@SsKkr2)0Gyiax8KQWstw^v#ix45EVrcEhr>!NMhprl$InQMzjSFH54x5k9qHc`@9uKQzvL4ihcq{^B zPrVR=o_ic%Y>6&rMN)hTZsI7I<3&`#(nl+3y3ys9A~&^=4?PL&nd8)`OfG#n zwAMN$1&>K++c{^|7<4P=2y(B{jJsQ0a#U;HTo4ZmWZYvI{+s;Td{Yzem%0*k#)vjpB zia;J&>}ICate44SFYY3vEelqStQWFihx%^vQ@Do(sOy7yR2@WNv7Y9I^yL=nZr3mb zXKV5t@=?-Sk|b{XMhA7ZGB@2hqsx}4xwCW!in#C zI@}scZlr3-NFJ@NFaJlhyfcw{k^vvtGl`N9xSo**rDW4S}i zM9{fMPWo%4wYDG~BZ18BD+}h|GQKc-g^{++3MY>}W_uq7jGHx{mwE9fZiPCoxN$+7 zrODGGJrOkcPQUB(FD5aoS4g~7#6NR^ma7-!>mHuJfY5kTe6PpNNKC9GGRiu^L31uG z$7v`*JknQHsYB!Tm_W{a32TM099djW%5e+j0Ve_ct}IM>XLF1Ap+YvcrLV=|CKo6S zb+9Nl3_YdKP6%Cxy@6TxZ>;4&nTneadr z_ES90ydCev)LV!dN=#(*f}|ZORFdvkYBni^aLbUk>BajeWIOcmHP#8S)*2U~QKI%S zyrLmtPqb&TphJ;>yAxri#;{uyk`JJqODDw%(Z=2`1uc}br^V%>j!gS)D*q*f_-qf8&D;W1dJgQMlaH5er zN2U<%Smb7==vE}dDI8K7cKz!vs^73o9f>2sgiTzWcwY|BMYHH5%Vn7#kiw&eItCqa zIkR2~Q}>X=Ar8W|^Ms41Fm8o6IB2_j60eOeBB1Br!boW7JnoeX6Gs)?7rW0^5psc- zjS16yb>dFn>KPOF;imD}e!enuIniFzv}n$m2#gCCv4jM#ArwlzZ$7@9&XkFxZ4n!V zj3dyiwW4Ki2QG{@i>yuZXQizw_OkZI^-3otXC{!(lUpJF33gI60ak;Uqitp74|B6I zgg{b=Iz}WkhCGj1M=hu4#Aw173YxIVbISaoc z-nLZC*6Tgivd5V`K%GxhBsp@SUU60-rfc$=wb>zdJzXS&-5(NRRodFk;Kxk!S(O(a0e7oY=E( zAyS;Ow?6Q&XA+cnkCb{28_1N8H#?J!*$MmIwLq^*T_9-z^&UE@A(z9oGYtFy6EZef LrJugUA?W`A8`#=m diff --git a/apps/web-evals/src/app/globals.css b/apps/web-evals/src/app/globals.css deleted file mode 100644 index 8c12f0d1d2..0000000000 --- a/apps/web-evals/src/app/globals.css +++ /dev/null @@ -1,141 +0,0 @@ -@import "tailwindcss"; - -@plugin "tailwindcss-animate"; - -@custom-variant dark (&:is(.dark *)); - -:root { - --radius: 0.625rem; - --background: oklch(1 0 0); - --foreground: oklch(0.145 0 0); - --card: oklch(1 0 0); - --card-foreground: oklch(0.145 0 0); - --popover: oklch(1 0 0); - --popover-foreground: oklch(0.145 0 0); - --primary: oklch(0.205 0 0); - --primary-foreground: oklch(0.985 0 0); - --secondary: oklch(0.97 0 0); - --secondary-foreground: oklch(0.205 0 0); - --muted: oklch(0.97 0 0); - --muted-foreground: oklch(0.556 0 0); - --accent: oklch(0.97 0 0); - --accent-foreground: oklch(0.205 0 0); - --destructive: oklch(0.577 0.245 27.325); - --border: oklch(0.922 0 0); - --input: oklch(0.922 0 0); - --ring: oklch(0.708 0 0); - --chart-1: oklch(0.646 0.222 41.116); - --chart-2: oklch(0.6 0.118 184.704); - --chart-3: oklch(0.398 0.07 227.392); - --chart-4: oklch(0.828 0.189 84.429); - --chart-5: oklch(0.769 0.188 70.08); - --sidebar: oklch(0.985 0 0); - --sidebar-foreground: oklch(0.145 0 0); - --sidebar-primary: oklch(0.205 0 0); - --sidebar-primary-foreground: oklch(0.985 0 0); - --sidebar-accent: oklch(0.97 0 0); - --sidebar-accent-foreground: oklch(0.205 0 0); - --sidebar-border: oklch(0.922 0 0); - --sidebar-ring: oklch(0.708 0 0); -} - -.dark { - --background: oklch(23.66% 0.0198 271.79); - --foreground: oklch(75.15% 0.0477 278.41); - --card: oklch(0.205 0 0); - --card-foreground: oklch(0.985 0 0); - --popover: var(--primary); - --popover-foreground: oklch(0.985 0 0); - --primary: oklch(29.33% 0.0295 276.18); - --primary-foreground: var(--accent); - --secondary: var(--primary); - --secondary-foreground: var(--foreground); - --muted: oklch(28.27% 0.0207 273.06); - --muted-foreground: oklch(75.15% 0.0477 278.41 / 75%); - --accent: oklch(70.21% 0.1813 328.71); - --accent-foreground: oklch(1 0 0 / 75%); - --destructive: oklch(72.14% 0.1616 15.49); - --border: var(--primary); - --input: var(--primary); - --ring: oklch(83.63% 0.1259 176.52); - --chart-1: oklch(0.488 0.243 264.376); - --chart-2: oklch(0.696 0.17 162.48); - --chart-3: oklch(0.769 0.188 70.08); - --chart-4: oklch(0.627 0.265 303.9); - --chart-5: oklch(0.645 0.246 16.439); - --sidebar: oklch(0.205 0 0); - --sidebar-foreground: oklch(0.985 0 0); - --sidebar-primary: oklch(0.488 0.243 264.376); - --sidebar-primary-foreground: oklch(0.985 0 0); - --sidebar-accent: oklch(0.269 0 0); - --sidebar-accent-foreground: oklch(0.985 0 0); - --sidebar-border: oklch(1 0 0 / 10%); - --sidebar-ring: oklch(0.556 0 0); -} - -@theme inline { - --color-background: var(--background); - --color-foreground: var(--foreground); - --color-sidebar-ring: var(--sidebar-ring); - --color-sidebar-border: var(--sidebar-border); - --color-sidebar-accent-foreground: var(--sidebar-accent-foreground); - --color-sidebar-accent: var(--sidebar-accent); - --color-sidebar-primary-foreground: var(--sidebar-primary-foreground); - --color-sidebar-primary: var(--sidebar-primary); - --color-sidebar-foreground: var(--sidebar-foreground); - --color-sidebar: var(--sidebar); - --color-chart-5: var(--chart-5); - --color-chart-4: var(--chart-4); - --color-chart-3: var(--chart-3); - --color-chart-2: var(--chart-2); - --color-chart-1: var(--chart-1); - --color-ring: var(--ring); - --color-input: var(--input); - --color-border: var(--border); - --color-destructive: var(--destructive); - --color-accent-foreground: var(--accent-foreground); - --color-accent: var(--accent); - --color-muted-foreground: var(--muted-foreground); - --color-muted: var(--muted); - --color-secondary-foreground: var(--secondary-foreground); - --color-secondary: var(--secondary); - --color-primary-foreground: var(--primary-foreground); - --color-primary: var(--primary); - --color-popover-foreground: var(--popover-foreground); - --color-popover: var(--popover); - --color-card-foreground: var(--card-foreground); - --color-card: var(--card); - --radius-sm: calc(var(--radius) - 4px); - --radius-md: calc(var(--radius) - 2px); - --radius-lg: var(--radius); - --radius-xl: calc(var(--radius) + 4px); - - --animate-hop: hop 0.8s ease-in-out infinite; - - @keyframes hop { - 0%, - 100% { - transform: none; - animation-timing-function: cubic-bezier(0.8, 0, 1, 1); - } - 50% { - transform: translateY(-8px); - animation-timing-function: cubic-bezier(0, 0, 0.2, 1); - } - } -} - -@layer base { - * { - @apply border-border outline-ring/50; - } - html, - body { - height: 100%; - } - body { - @apply bg-background text-foreground; - scrollbar-color: rgba(0, 0, 0, 0.2) transparent; /* Firefox */ - scrollbar-width: thin; - } -} diff --git a/apps/web-evals/src/app/layout.tsx b/apps/web-evals/src/app/layout.tsx deleted file mode 100644 index 3bb34f7dfb..0000000000 --- a/apps/web-evals/src/app/layout.tsx +++ /dev/null @@ -1,35 +0,0 @@ -import type { Metadata } from "next" -import { Geist, Geist_Mono } from "next/font/google" - -import { ThemeProvider, ReactQueryProvider } from "@/components/providers" -import { Toaster } from "@/components/ui" -import { Header } from "@/components/layout/header" - -import "./globals.css" - -const fontSans = Geist({ variable: "--font-sans", subsets: ["latin"] }) -const fontMono = Geist_Mono({ variable: "--font-mono", subsets: ["latin"] }) - -export const metadata: Metadata = { - title: "Roo Code Evals", -} - -export default function RootLayout({ - children, -}: Readonly<{ - children: React.ReactNode -}>) { - return ( - - - - -
- {children} - - - - - - ) -} diff --git a/apps/web-evals/src/app/page.tsx b/apps/web-evals/src/app/page.tsx deleted file mode 100644 index 3dcb26aebf..0000000000 --- a/apps/web-evals/src/app/page.tsx +++ /dev/null @@ -1,10 +0,0 @@ -import { getRuns } from "@roo-code/evals" - -import { Runs } from "@/components/home/runs" - -export const dynamic = "force-dynamic" - -export default async function Page() { - const runs = await getRuns() - return -} diff --git a/apps/web-evals/src/app/runs/[id]/page.tsx b/apps/web-evals/src/app/runs/[id]/page.tsx deleted file mode 100644 index 8b993eec8a..0000000000 --- a/apps/web-evals/src/app/runs/[id]/page.tsx +++ /dev/null @@ -1,14 +0,0 @@ -import { findRun } from "@roo-code/evals" - -import { Run } from "./run" - -export default async function Page({ params }: { params: Promise<{ id: string }> }) { - const { id } = await params - const run = await findRun(Number(id)) - - return ( -
- -
- ) -} diff --git a/apps/web-evals/src/app/runs/[id]/run-status.tsx b/apps/web-evals/src/app/runs/[id]/run-status.tsx deleted file mode 100644 index e05b1b51eb..0000000000 --- a/apps/web-evals/src/app/runs/[id]/run-status.tsx +++ /dev/null @@ -1,79 +0,0 @@ -"use client" - -import { Link2, Link2Off, CheckCircle2 } from "lucide-react" -import type { RunStatus as _RunStatus } from "@/hooks/use-run-status" -import { cn } from "@/lib/utils" -import { Tooltip, TooltipContent, TooltipTrigger } from "@/components/ui" - -function StreamIcon({ status }: { status: "connected" | "waiting" | "error" }) { - if (status === "connected") { - return - } - return -} - -export const RunStatus = ({ - runStatus: { sseStatus, heartbeat, runners = [] }, - isComplete = false, -}: { - runStatus: _RunStatus - isComplete?: boolean -}) => { - // For completed runs, show a simple "Complete" badge - if (isComplete) { - return ( - - -
- -
-
- - Run complete - -
- ) - } - - return ( - - -
- {/* Task Stream status icon */} - - - {/* Task Controller ID */} - {heartbeat ?? "-"} - - {/* Task Runners count */} - 0 ? "text-green-500" : "text-rose-500"}> - {runners.length > 0 ? `${runners.length}r` : "0r"} - -
-
- -
-
- - Task Stream: {sseStatus} -
-
- - Task Controller: {heartbeat ?? "dead"} -
-
- 0 ? "text-green-500" : "text-rose-500"}>● - Task Runners: {runners.length > 0 ? runners.length : "none"} -
- {runners.length > 0 && ( -
- {runners.map((runner) => ( -
{runner}
- ))} -
- )} -
-
-
- ) -} diff --git a/apps/web-evals/src/app/runs/[id]/run.tsx b/apps/web-evals/src/app/runs/[id]/run.tsx deleted file mode 100644 index badd77741e..0000000000 --- a/apps/web-evals/src/app/runs/[id]/run.tsx +++ /dev/null @@ -1,1058 +0,0 @@ -"use client" - -import { useMemo, useState, useCallback, useEffect, Fragment } from "react" -import { toast } from "sonner" -import { LoaderCircle, FileText, Copy, Check, StopCircle, List, Layers } from "lucide-react" - -import type { Run, TaskMetrics as _TaskMetrics, Task } from "@roo-code/evals" -import type { ToolName } from "@roo-code/types" - -import { formatCurrency, formatDuration, formatTokens, formatToolUsageSuccessRate } from "@/lib/formatters" -import { useRunStatus } from "@/hooks/use-run-status" -import { killRun } from "@/actions/runs" -import { - Table, - TableBody, - TableCell, - TableHead, - TableHeader, - TableRow, - Tooltip, - TooltipContent, - TooltipTrigger, - Dialog, - DialogContent, - DialogHeader, - DialogTitle, - ScrollArea, - Button, - AlertDialog, - AlertDialogAction, - AlertDialogCancel, - AlertDialogContent, - AlertDialogDescription, - AlertDialogFooter, - AlertDialogHeader, - AlertDialogTitle, -} from "@/components/ui" - -import { TaskStatus } from "./task-status" -import { RunStatus } from "./run-status" - -type TaskMetrics = Pick<_TaskMetrics, "tokensIn" | "tokensOut" | "tokensContext" | "duration" | "cost"> - -// Extended Task type with taskMetrics from useRunStatus -type TaskWithMetrics = Task & { taskMetrics: _TaskMetrics | null } - -type ToolUsageEntry = { attempts: number; failures: number } -type ToolUsage = Record - -// Generate abbreviation from tool name (e.g., "read_file" -> "RF", "list_code_definition_names" -> "LCDN") -function getToolAbbreviation(toolName: string): string { - return toolName - .split("_") - .map((word) => word[0]?.toUpperCase() ?? "") - .join("") -} - -// Pattern definitions for syntax highlighting -type HighlightPattern = { - pattern: RegExp - className: string - // If true, wraps the entire match; if a number, wraps that capture group - wrapGroup?: number -} - -const HIGHLIGHT_PATTERNS: HighlightPattern[] = [ - // Log levels - styled as badges - { pattern: /\|\s*(INFO)\s*\|/g, className: "text-green-400", wrapGroup: 1 }, - { pattern: /\|\s*(WARN|WARNING)\s*\|/g, className: "text-yellow-400", wrapGroup: 1 }, - { pattern: /\|\s*(ERROR)\s*\|/g, className: "text-red-400 font-semibold", wrapGroup: 1 }, - { pattern: /\|\s*(DEBUG)\s*\|/g, className: "text-gray-400", wrapGroup: 1 }, - // Task identifiers - important events - { - pattern: /(taskCreated|taskFocused|taskStarted|taskCompleted|taskAborted|taskResumable)/g, - className: "text-purple-400 font-medium", - }, - // Tool failures - highlight in red - { pattern: /(taskToolFailed)/g, className: "text-red-400 font-bold" }, - { pattern: /(Tool execution failed|tool.*failed|failed.*tool)/gi, className: "text-red-400" }, - { pattern: /(EvalPass)/g, className: "text-green-400 font-bold" }, - { pattern: /(EvalFail)/g, className: "text-red-400 font-bold" }, - // Message arrows - { pattern: /→/g, className: "text-cyan-400" }, - // Tool names in quotes - { pattern: /"(tool)":\s*"([^"]+)"/g, className: "text-orange-400" }, - // JSON keys - { pattern: /"([^"]+)":/g, className: "text-sky-300" }, - // Boolean values - { pattern: /:\s*(true|false)/g, className: "text-amber-400", wrapGroup: 1 }, - // Numbers - { pattern: /:\s*(-?\d+\.?\d*)/g, className: "text-emerald-400", wrapGroup: 1 }, -] - -// Extract timestamp from a log line and return elapsed time from baseline -function formatElapsedTime(timestamp: string, baselineMs: number): string { - const currentMs = new Date(timestamp).getTime() - const elapsedMs = currentMs - baselineMs - const totalSeconds = Math.floor(elapsedMs / 1000) - const minutes = Math.floor(totalSeconds / 60) - const seconds = totalSeconds % 60 - return `${minutes.toString().padStart(2, "0")}:${seconds.toString().padStart(2, "0")}` -} - -// Extract the first timestamp from the log to use as baseline -function extractFirstTimestamp(log: string): number | null { - // Match timestamp at start of line: [2025-11-28T09:35:23.187Z | ... or [2025-11-28T09:35:23.187Z] - const match = log.match(/\[(\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d{3}Z)[\s|\]]/) - const isoString = match?.[1] - if (!isoString) return null - return new Date(isoString).getTime() -} - -// Simplify log line by removing redundant metadata -function simplifyLogLine(line: string, baselineMs: number | null): { timestamp: string; simplified: string } { - // Extract timestamp - matches [2025-11-28T09:35:23.187Z | ... format - const timestampMatch = line.match(/\[(\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d{3}Z)[\s|\]]/) - const isoTimestamp = timestampMatch?.[1] - if (!isoTimestamp) { - return { timestamp: "", simplified: line } - } - - const timestamp = baselineMs !== null ? formatElapsedTime(isoTimestamp, baselineMs) : isoTimestamp.slice(11, 19) - - // Remove the timestamp from the line (handles both [timestamp] and [timestamp | formats) - let simplified = line.replace(/\[\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d{3}Z\s*\|?\s*/, "") - - // Remove redundant metadata: pid, run, task IDs (they're same for entire log) - simplified = simplified.replace(/\|\s*pid:\d+\s*/g, "") - simplified = simplified.replace(/\|\s*run:\d+\s*/g, "") - simplified = simplified.replace(/\|\s*task:\d+\s*/g, "") - simplified = simplified.replace(/runTask\s*\|\s*/g, "") - - // Clean up extra pipes, spaces, and trailing brackets - simplified = simplified.replace(/\|\s*\|/g, "|") - simplified = simplified.replace(/^\s*\|\s*/, "") - simplified = simplified.replace(/\]\s*$/, "") // Remove trailing bracket if present - - return { timestamp, simplified } -} - -// Format a single line with syntax highlighting using React elements (XSS-safe) -function formatLine(line: string): React.ReactNode[] { - // Find all matches with their positions - type Match = { start: number; end: number; text: string; className: string } - const matches: Match[] = [] - - for (const { pattern, className, wrapGroup } of HIGHLIGHT_PATTERNS) { - // Reset regex state - pattern.lastIndex = 0 - let regexMatch - while ((regexMatch = pattern.exec(line)) !== null) { - const capturedText = wrapGroup !== undefined ? regexMatch[wrapGroup] : regexMatch[0] - // Skip if capture group didn't match - if (!capturedText) continue - const start = - wrapGroup !== undefined ? regexMatch.index + regexMatch[0].indexOf(capturedText) : regexMatch.index - matches.push({ - start, - end: start + capturedText.length, - text: capturedText, - className, - }) - } - } - - // Sort matches by position and filter overlapping ones - matches.sort((a, b) => a.start - b.start) - const filteredMatches: Match[] = [] - for (const m of matches) { - const lastMatch = filteredMatches[filteredMatches.length - 1] - if (!lastMatch || m.start >= lastMatch.end) { - filteredMatches.push(m) - } - } - - // Build result with highlighted spans - const result: React.ReactNode[] = [] - let currentPos = 0 - - for (const [i, m] of filteredMatches.entries()) { - // Add text before this match - if (m.start > currentPos) { - result.push(line.slice(currentPos, m.start)) - } - // Add highlighted match - result.push( - - {m.text} - , - ) - currentPos = m.end - } - - // Add remaining text - if (currentPos < line.length) { - result.push(line.slice(currentPos)) - } - - return result.length > 0 ? result : [line] -} - -// Determine the visual style for a log line based on its content -function getLineStyle(line: string): string { - if (line.includes("ERROR")) return "bg-red-950/30 border-l-2 border-red-500" - if (line.includes("WARN") || line.includes("WARNING")) return "bg-yellow-950/20 border-l-2 border-yellow-500" - if (line.includes("taskToolFailed")) return "bg-red-950/30 border-l-2 border-red-500" - if (line.includes("taskStarted") || line.includes("taskCreated")) return "bg-purple-950/20" - if (line.includes("EvalPass")) return "bg-green-950/30 border-l-2 border-green-500" - if (line.includes("EvalFail")) return "bg-red-950/30 border-l-2 border-red-500" - if (line.includes("taskCompleted") || line.includes("taskAborted")) return "bg-blue-950/20" - return "" -} - -// Format log content with basic highlighting (XSS-safe - no dangerouslySetInnerHTML) -function formatLogContent(log: string): React.ReactNode[] { - const lines = log.split("\n") - const baselineMs = extractFirstTimestamp(log) - - return lines.map((line, index) => { - if (!line.trim()) { - return ( -
- {" "} -
- ) - } - - const parsed = simplifyLogLine(line, baselineMs) - const lineStyle = getLineStyle(line) - - return ( -
- {/* Elapsed time */} - - {parsed.timestamp} - - {/* Log content - pl-12 ensures wrapped lines are indented under the timestamp */} - - {formatLine(parsed.simplified)} - -
- ) - }) -} - -export function Run({ run }: { run: Run }) { - const runStatus = useRunStatus(run) - const { tasks, tokenUsage, toolUsage, usageUpdatedAt, heartbeat, runners } = runStatus - - const [selectedTask, setSelectedTask] = useState(null) - const [taskLog, setTaskLog] = useState(null) - const [isLoadingLog, setIsLoadingLog] = useState(false) - const [copied, setCopied] = useState(false) - const [showKillDialog, setShowKillDialog] = useState(false) - const [isKilling, setIsKilling] = useState(false) - const [groupByStatus, setGroupByStatus] = useState(() => { - // Initialize from localStorage if available (client-side only) - if (typeof window !== "undefined") { - const stored = localStorage.getItem("evals-group-by-status") - return stored === "true" - } - return false - }) - - // Persist groupByStatus to localStorage - useEffect(() => { - localStorage.setItem("evals-group-by-status", String(groupByStatus)) - }, [groupByStatus]) - - // Determine if run is still active (has heartbeat or runners) - const isRunActive = !run.taskMetricsId && (!!heartbeat || (runners && runners.length > 0)) - - const onKillRun = useCallback(async () => { - setIsKilling(true) - try { - const result = await killRun(run.id) - if (result.killedContainers.length > 0) { - toast.success(`Killed ${result.killedContainers.length} container(s)`) - } else if (result.errors.length === 0) { - toast.info("No running containers found") - } else { - toast.error(result.errors.join(", ")) - } - } catch (error) { - console.error("Failed to kill run:", error) - toast.error("Failed to kill run") - } finally { - setIsKilling(false) - setShowKillDialog(false) - } - }, [run.id]) - - const onCopyLog = useCallback(async () => { - if (!taskLog) return - - try { - await navigator.clipboard.writeText(taskLog) - setCopied(true) - toast.success("Log copied to clipboard") - setTimeout(() => setCopied(false), 2000) - } catch (error) { - console.error("Failed to copy log:", error) - toast.error("Failed to copy log") - } - }, [taskLog]) - - // Handle ESC key to close the dialog - useEffect(() => { - const handleKeyDown = (e: KeyboardEvent) => { - if (e.key === "Escape" && selectedTask) { - setSelectedTask(null) - } - } - - document.addEventListener("keydown", handleKeyDown) - return () => document.removeEventListener("keydown", handleKeyDown) - }, [selectedTask]) - - const taskMetrics: Record = useMemo(() => { - // Reference usageUpdatedAt to trigger recomputation when Map contents change - void usageUpdatedAt - const metrics: Record = {} - - // Helper to calculate duration from database timestamps when streaming duration - // is unavailable (e.g., page was loaded after TaskStarted event was published) - const calculateDurationFromTimestamps = (task: TaskWithMetrics): number => { - if (!task.startedAt) return 0 - const startTime = new Date(task.startedAt).getTime() - const endTime = task.finishedAt ? new Date(task.finishedAt).getTime() : Date.now() - return endTime - startTime - } - - tasks?.forEach((task) => { - const streamingUsage = tokenUsage.get(task.id) - const dbMetrics = task.taskMetrics - - // For finished tasks, prefer DB values but fall back to streaming values - // This handles race conditions during timeout where DB might not have latest data - if (task.finishedAt) { - // Check if DB metrics have meaningful values (not just default/empty) - const dbHasData = dbMetrics && (dbMetrics.tokensIn > 0 || dbMetrics.tokensOut > 0 || dbMetrics.cost > 0) - if (dbHasData) { - // If DB duration is 0 but we have timestamps, calculate from timestamps - const duration = dbMetrics.duration || calculateDurationFromTimestamps(task) - metrics[task.id] = { ...dbMetrics, duration } - } else if (streamingUsage) { - // Fall back to streaming values if DB is empty/stale - // Use streaming duration, or calculate from timestamps if not available - const duration = streamingUsage.duration || calculateDurationFromTimestamps(task) - metrics[task.id] = { - tokensIn: streamingUsage.totalTokensIn, - tokensOut: streamingUsage.totalTokensOut, - tokensContext: streamingUsage.contextTokens, - duration, - cost: streamingUsage.totalCost, - } - } else { - // Task finished but no DB metrics and no streaming data - // (e.g., page loaded after task completed, metrics not persisted) - // Still provide duration calculated from timestamps - metrics[task.id] = { - tokensIn: 0, - tokensOut: 0, - tokensContext: 0, - duration: calculateDurationFromTimestamps(task), - cost: 0, - } - } - } else if (streamingUsage) { - // For running tasks, use streaming values - // Use streaming duration, or calculate from task.startedAt if not available - // (happens when page loads after TaskStarted event was already published) - const duration = streamingUsage.duration || calculateDurationFromTimestamps(task) - metrics[task.id] = { - tokensIn: streamingUsage.totalTokensIn, - tokensOut: streamingUsage.totalTokensOut, - tokensContext: streamingUsage.contextTokens, - duration, - cost: streamingUsage.totalCost, - } - } else if (task.startedAt) { - // Task has started (has startedAt in DB) but no streaming data yet - // This can happen when page loads after TaskStarted but before TokenUsageUpdated - metrics[task.id] = { - tokensIn: 0, - tokensOut: 0, - tokensContext: 0, - duration: calculateDurationFromTimestamps(task), - cost: 0, - } - } - }) - - return metrics - }, [tasks, tokenUsage, usageUpdatedAt]) - - const onViewTaskLog = useCallback( - async (task: Task) => { - // Only allow viewing logs for tasks that have started. - // Note: we treat presence of derived metrics as evidence of a started task, - // since this page may be rendered without streaming `tokenUsage` populated. - const hasStarted = !!task.startedAt || !!tokenUsage.get(task.id) || !!taskMetrics[task.id] - if (!hasStarted) { - toast.error("Task has not started yet") - return - } - - setSelectedTask(task) - setIsLoadingLog(true) - setTaskLog(null) - - try { - const response = await fetch(`/api/runs/${run.id}/logs/${task.id}`) - - if (!response.ok) { - const error = await response.json() - toast.error(error.error || "Failed to load log") - setSelectedTask(null) - return - } - - const data = await response.json() - setTaskLog(data.logContent) - } catch (error) { - console.error("Error loading task log:", error) - toast.error("Failed to load log") - setSelectedTask(null) - } finally { - setIsLoadingLog(false) - } - }, - [run.id, tokenUsage, taskMetrics], - ) - - // Collect all unique tool names from all tasks and sort by total attempts - const toolColumns = useMemo(() => { - // Reference usageUpdatedAt to trigger recomputation when Map contents change - void usageUpdatedAt - if (!tasks) return [] - - const toolTotals = new Map() - - for (const task of tasks) { - // Get both DB and streaming values - const dbToolUsage = task.taskMetrics?.toolUsage - const streamingToolUsage = toolUsage.get(task.id) - - // For finished tasks, prefer DB values but fall back to streaming values - // For running tasks, use streaming values - // This handles race conditions during timeout where DB might not have latest data - const taskToolUsage = task.finishedAt - ? dbToolUsage && Object.keys(dbToolUsage).length > 0 - ? dbToolUsage - : streamingToolUsage - : streamingToolUsage - - if (taskToolUsage) { - for (const [toolName, usage] of Object.entries(taskToolUsage)) { - const tool = toolName as ToolName - const current = toolTotals.get(tool) ?? 0 - toolTotals.set(tool, current + usage.attempts) - } - } - } - - // Sort by total attempts descending - return Array.from(toolTotals.entries()) - .sort((a, b) => b[1] - a[1]) - .map(([name]): ToolName => name) - // toolUsage ref is stable; usageUpdatedAt triggers recomputation when Map contents change - }, [tasks, toolUsage, usageUpdatedAt]) - - // Compute aggregate stats - const stats = useMemo(() => { - // Reference usageUpdatedAt to trigger recomputation when Map contents change - void usageUpdatedAt - if (!tasks) return null - - const passed = tasks.filter((t) => t.passed === true).length - const failed = tasks.filter((t) => t.passed === false).length - const completed = passed + failed - - let totalTokensIn = 0 - let totalTokensOut = 0 - let totalCost = 0 - let totalDuration = 0 - - // Aggregate tool usage from all tasks (both finished and running) - const toolUsageAggregate: ToolUsage = {} - - for (const task of tasks) { - const metrics = taskMetrics[task.id] - if (metrics) { - totalTokensIn += metrics.tokensIn - totalTokensOut += metrics.tokensOut - totalCost += metrics.cost - totalDuration += metrics.duration - } - - // Aggregate tool usage: prefer DB values for finished tasks, fall back to streaming values - // This handles race conditions during timeout where DB might not have latest data - const dbToolUsage = task.taskMetrics?.toolUsage - const streamingToolUsage = toolUsage.get(task.id) - const taskToolUsage = task.finishedAt - ? dbToolUsage && Object.keys(dbToolUsage).length > 0 - ? dbToolUsage - : streamingToolUsage - : streamingToolUsage - - if (taskToolUsage) { - for (const [key, usage] of Object.entries(taskToolUsage)) { - const tool = key as keyof ToolUsage - if (!toolUsageAggregate[tool]) { - toolUsageAggregate[tool] = { attempts: 0, failures: 0 } - } - toolUsageAggregate[tool].attempts += usage.attempts - toolUsageAggregate[tool].failures += usage.failures - } - } - } - - const remaining = tasks.length - completed - - return { - passed, - failed, - completed, - remaining, - passRate: completed > 0 ? ((passed / completed) * 100).toFixed(1) : null, - totalTokensIn, - totalTokensOut, - totalCost, - totalDuration, - toolUsage: toolUsageAggregate, - } - // Map refs are stable; usageUpdatedAt triggers recomputation when Map contents change - }, [tasks, taskMetrics, toolUsage, usageUpdatedAt]) - - // Calculate elapsed time (wall-clock time from run creation to completion or now) - const elapsedTime = useMemo(() => { - // Reference usageUpdatedAt to trigger recomputation for live elapsed time updates - void usageUpdatedAt - if (!tasks || tasks.length === 0) return null - - const startTime = new Date(run.createdAt).getTime() - - // If run is complete, find the latest finishedAt from tasks - if (run.taskMetricsId) { - const latestFinish = tasks.reduce((latest, task) => { - if (task.finishedAt) { - const finishTime = new Date(task.finishedAt).getTime() - return finishTime > latest ? finishTime : latest - } - return latest - }, startTime) - return latestFinish - startTime - } - - // If still running, use current time - return Date.now() - startTime - }, [tasks, run.createdAt, run.taskMetricsId, usageUpdatedAt]) - - // Task status categories - type TaskStatusCategory = "failed" | "in_progress" | "passed" | "not_started" - - const getTaskStatusCategory = useCallback( - (task: TaskWithMetrics): TaskStatusCategory => { - if (task.passed === false) return "failed" - if (task.passed === true) return "passed" - // Check streaming data, DB metrics, or startedAt timestamp - const hasStarted = !!task.startedAt || !!tokenUsage.get(task.id) || !!taskMetrics[task.id] - if (hasStarted) return "in_progress" - return "not_started" - }, - [tokenUsage, taskMetrics], - ) - - // Group tasks by status while preserving original index - const groupedTasks = useMemo(() => { - if (!tasks || !groupByStatus) return null - - const groups: Record> = { - failed: [], - in_progress: [], - passed: [], - not_started: [], - } - - tasks.forEach((task, index) => { - const status = getTaskStatusCategory(task) - groups[status].push({ task, originalIndex: index }) - }) - - return groups - }, [tasks, groupByStatus, getTaskStatusCategory]) - - const statusLabels = useMemo( - (): Record => ({ - failed: { label: "Failed", className: "text-red-500", count: groupedTasks?.failed.length ?? 0 }, - in_progress: { - label: "In Progress", - className: "text-yellow-500", - count: groupedTasks?.in_progress.length ?? 0, - }, - passed: { label: "Passed", className: "text-green-500", count: groupedTasks?.passed.length ?? 0 }, - not_started: { - label: "Not Started", - className: "text-muted-foreground", - count: groupedTasks?.not_started.length ?? 0, - }, - }), - [groupedTasks], - ) - - const statusOrder: TaskStatusCategory[] = ["failed", "in_progress", "passed", "not_started"] - - // Helper to render a task row - const renderTaskRow = (task: TaskWithMetrics, originalIndex: number) => { - const hasStarted = !!task.startedAt || !!tokenUsage.get(task.id) || !!taskMetrics[task.id] - return ( - hasStarted && onViewTaskLog(task)}> - - {originalIndex + 1} - - -
- -
- - {task.language}/{task.exercise} - {task.iteration > 1 && ( - (#{task.iteration}) - )} - - {hasStarted && ( - - - - - Click to view log - - )} -
-
-
- {taskMetrics[task.id] ? ( - <> - -
-
{formatTokens(taskMetrics[task.id]!.tokensIn)}
/ -
{formatTokens(taskMetrics[task.id]!.tokensOut)}
-
-
- - {formatTokens(taskMetrics[task.id]!.tokensContext)} - - {toolColumns.map((toolName) => { - const dbUsage = task.taskMetrics?.toolUsage?.[toolName] - const streamingUsage = toolUsage.get(task.id)?.[toolName] - const usage = task.finishedAt ? (dbUsage ?? streamingUsage) : streamingUsage - - const successRate = - usage && usage.attempts > 0 - ? ((usage.attempts - usage.failures) / usage.attempts) * 100 - : 100 - const rateColor = - successRate === 100 - ? "text-muted-foreground" - : successRate >= 80 - ? "text-yellow-500" - : "text-red-500" - return ( - - {usage ? ( -
- {usage.attempts} - {formatToolUsageSuccessRate(usage)} -
- ) : ( - - - )} -
- ) - })} - - {taskMetrics[task.id]!.duration ? formatDuration(taskMetrics[task.id]!.duration) : "-"} - - - {formatCurrency(taskMetrics[task.id]!.cost)} - - - ) : ( - - )} -
- ) - } - - return ( - <> -
- {!tasks ? ( - - ) : ( - <> - {/* View Toggle */} -
- - - - - - {groupByStatus ? "Show tasks in run order" : "Group tasks by status"} - - -
- - - {stats && ( - - - {/* Provider, Model title and status */} -
- {run.settings?.apiProvider && ( - - {run.settings.apiProvider} - - )} -
{run.model}
- - {run.description && ( - - - {run.description} - - )} - {isRunActive && ( - - - - - - Stop all containers for this run - - - )} -
- {/* Main Stats Row */} -
- {/* Pass Rate / Fail Rate / Remaining % */} -
-
- - {stats.completed > 0 - ? `${((stats.passed / stats.completed) * 100).toFixed(1)}%` - : "-"} - - / - - {stats.completed > 0 - ? `${((stats.failed / stats.completed) * 100).toFixed(1)}%` - : "-"} - - / - - {tasks.length > 0 - ? `${((stats.remaining / tasks.length) * 100).toFixed(1)}%` - : "-"} - -
-
- {stats.passed} - {" / "} - {stats.failed} - {" / "} - {stats.remaining} - {" of "} - {tasks.length} -
-
- - {/* Tokens */} -
-
- {formatTokens(stats.totalTokensIn)} - / - {formatTokens(stats.totalTokensOut)} -
-
Tokens In / Out
-
- - {/* Cost */} -
-
- {formatCurrency(stats.totalCost)} -
-
Cost
-
- - {/* Duration */} -
-
- {stats.totalDuration > 0 - ? formatDuration(stats.totalDuration) - : "-"} -
-
Duration
-
- - {/* Elapsed Time */} -
-
- {elapsedTime !== null ? formatDuration(elapsedTime) : "-"} -
-
Elapsed
-
- - {/* Estimated Time Remaining - only show if run is active and we have data */} - {!run.taskMetricsId && - elapsedTime !== null && - stats.completed > 0 && - stats.remaining > 0 && ( -
-
- ~ - {formatDuration( - (elapsedTime / stats.completed) * stats.remaining, - )} -
-
- Est. Remaining -
-
- )} -
- - {/* Tool Usage Row */} - {Object.keys(stats.toolUsage).length > 0 && ( -
- {Object.entries(stats.toolUsage) - .sort(([, a], [, b]) => b.attempts - a.attempts) - .map(([toolName, usage]) => { - const abbr = getToolAbbreviation(toolName) - const successRate = - usage.attempts > 0 - ? ((usage.attempts - usage.failures) / - usage.attempts) * - 100 - : 100 - const rateColor = - successRate === 100 - ? "text-green-500" - : successRate >= 80 - ? "text-yellow-500" - : "text-red-500" - return ( - - -
- - {abbr} - - - {usage.attempts} - - - {formatToolUsageSuccessRate(usage)} - -
-
- - {toolName} - -
- ) - })} -
- )} -
-
- )} - - # - Exercise - Tokens In / Out - Context - {toolColumns.map((toolName) => ( - - - {getToolAbbreviation(toolName)} - {toolName} - - - ))} - Duration - Cost - -
- - {groupByStatus && groupedTasks - ? // Grouped view - statusOrder.map((status) => { - const group = groupedTasks[status] - if (group.length === 0) return null - const { label, className } = statusLabels[status] - return ( - - - - - {label} ({group.length}) - - - - {group.map(({ task, originalIndex }) => - renderTaskRow(task, originalIndex), - )} - - ) - }) - : // Default order view - tasks.map((task, index) => renderTaskRow(task, index))} - -
- - )} -
- - {/* Task Log Dialog - Full Screen */} - setSelectedTask(null)}> - - -
- - - {selectedTask?.language}/{selectedTask?.exercise} - {selectedTask?.iteration && selectedTask.iteration > 1 && ( - (#{selectedTask.iteration}) - )} - - ( - {selectedTask?.passed === true - ? "Passed" - : selectedTask?.passed === false - ? "Failed" - : "Running"} - ) - - - {taskLog && ( - - )} -
-
-
- {isLoadingLog ? ( -
- -
- ) : taskLog ? ( - -
- {formatLogContent(taskLog)} -
-
- ) : ( -
- Log file not available (may have been cleared) -
- )} -
-
-
- - {/* Kill Run Confirmation Dialog */} - - - - Kill Run? - - This will stop the controller and all task runner containers for this run. Any running tasks - will be terminated immediately. This action cannot be undone. - - - - Cancel - - {isKilling ? ( - <> - - Killing... - - ) : ( - "Kill Run" - )} - - - - - - ) -} diff --git a/apps/web-evals/src/app/runs/[id]/task-status.tsx b/apps/web-evals/src/app/runs/[id]/task-status.tsx deleted file mode 100644 index bae785131a..0000000000 --- a/apps/web-evals/src/app/runs/[id]/task-status.tsx +++ /dev/null @@ -1,20 +0,0 @@ -import { CircleCheck, CircleDashed, CircleSlash, LoaderCircle } from "lucide-react" - -import type { Task } from "@roo-code/evals" - -type TaskStatusProps = { - task: Task - running: boolean -} - -export const TaskStatus = ({ task, running }: TaskStatusProps) => { - return task.passed === false ? ( - - ) : task.passed === true ? ( - - ) : running ? ( - - ) : ( - - ) -} diff --git a/apps/web-evals/src/app/runs/new/new-run.tsx b/apps/web-evals/src/app/runs/new/new-run.tsx deleted file mode 100644 index 47eabe7fe0..0000000000 --- a/apps/web-evals/src/app/runs/new/new-run.tsx +++ /dev/null @@ -1,992 +0,0 @@ -"use client" - -import { useCallback, useEffect, useMemo, useRef, useState } from "react" -import { useRouter } from "next/navigation" -import { z } from "zod" -import { useQuery } from "@tanstack/react-query" -import { useForm, FormProvider } from "react-hook-form" -import { zodResolver } from "@hookform/resolvers/zod" -import { toast } from "sonner" -import { - X, - Rocket, - Check, - ChevronsUpDown, - SlidersHorizontal, - Info, - Plus, - Minus, - Terminal, - MonitorPlay, -} from "lucide-react" - -import { - type ProviderSettings, - type GlobalSettings, - globalSettingsSchema, - providerSettingsSchema, - getModelId, - EVALS_SETTINGS, -} from "@roo-code/types" - -import { createRun } from "@/actions/runs" -import { getExercises } from "@/actions/exercises" - -import { - type CreateRun, - type ExecutionMethod, - createRunSchema, - CONCURRENCY_MIN, - CONCURRENCY_MAX, - CONCURRENCY_DEFAULT, - TIMEOUT_MIN, - TIMEOUT_MAX, - TIMEOUT_DEFAULT, - ITERATIONS_MIN, - ITERATIONS_MAX, - ITERATIONS_DEFAULT, -} from "@/lib/schemas" -import { cn } from "@/lib/utils" - -import { normalizeCreateRunForSubmit } from "@/lib/normalize-create-run" - -import { useOpenRouterModels } from "@/hooks/use-open-router-models" - -import { - Button, - FormControl, - FormField, - FormItem, - FormLabel, - FormMessage, - Textarea, - Tabs, - TabsList, - TabsTrigger, - MultiSelect, - Command, - CommandEmpty, - CommandGroup, - CommandInput, - CommandItem, - CommandList, - Popover, - PopoverContent, - PopoverTrigger, - Slider, - Label, - Tooltip, - TooltipContent, - TooltipTrigger, -} from "@/components/ui" - -import { SettingsDiff } from "./settings-diff" - -type ImportedSettings = { - apiConfigs: Record - globalSettings: GlobalSettings - currentApiConfigName: string -} - -type ModelSelection = { - id: string - model: string - popoverOpen: boolean -} - -type ConfigSelection = { - id: string - configName: string - popoverOpen: boolean -} - -export function NewRun() { - const router = useRouter() - const modelSelectionsByProviderRef = useRef>({}) - const modelValueByProviderRef = useRef>({}) - - const [provider, setModelSource] = useState<"openrouter" | "other">("openrouter") - const [executionMethod, setExecutionMethod] = useState("vscode") - const [commandExecutionTimeout, setCommandExecutionTimeout] = useState(20) - const [terminalShellIntegrationTimeout, setTerminalShellIntegrationTimeout] = useState(30) // seconds - - const [modelSelections, setModelSelections] = useState([ - { id: crypto.randomUUID(), model: "", popoverOpen: false }, - ]) - - const [importedSettings, setImportedSettings] = useState(null) - const [configSelections, setConfigSelections] = useState([ - { id: crypto.randomUUID(), configName: "", popoverOpen: false }, - ]) - - const openRouter = useOpenRouterModels() - const models = provider === "openrouter" ? openRouter.data : [] - const searchValue = provider === "openrouter" ? openRouter.searchValue : "" - const setSearchValue = provider === "openrouter" ? openRouter.setSearchValue : () => {} - const onFilter = provider === "openrouter" ? openRouter.onFilter : undefined - - const exercises = useQuery({ queryKey: ["getExercises"], queryFn: () => getExercises() }) - - const [selectedExercises, setSelectedExercises] = useState([]) - - const form = useForm({ - resolver: zodResolver(createRunSchema), - defaultValues: { - model: "", - description: "", - suite: "full", - exercises: [], - settings: undefined, - concurrency: CONCURRENCY_DEFAULT, - timeout: TIMEOUT_DEFAULT, - iterations: ITERATIONS_DEFAULT, - jobToken: "", - executionMethod: "vscode", - }, - }) - - const { - register, - setValue, - clearErrors, - watch, - getValues, - formState: { isSubmitting }, - } = form - - const [suite, settings] = watch(["suite", "settings", "concurrency"]) - - // Ensure the `exercises` field is registered so RHF always includes it in submit values. - useEffect(() => { - register("exercises") - }, [register]) - - // Load settings from localStorage on mount - useEffect(() => { - const savedConcurrency = localStorage.getItem("evals-concurrency") - - if (savedConcurrency) { - const parsed = parseInt(savedConcurrency, 10) - - if (!isNaN(parsed) && parsed >= CONCURRENCY_MIN && parsed <= CONCURRENCY_MAX) { - setValue("concurrency", parsed) - } - } - - const savedTimeout = localStorage.getItem("evals-timeout") - - if (savedTimeout) { - const parsed = parseInt(savedTimeout, 10) - - if (!isNaN(parsed) && parsed >= TIMEOUT_MIN && parsed <= TIMEOUT_MAX) { - setValue("timeout", parsed) - } - } - - const savedCommandTimeout = localStorage.getItem("evals-command-execution-timeout") - - if (savedCommandTimeout) { - const parsed = parseInt(savedCommandTimeout, 10) - - if (!isNaN(parsed) && parsed >= 20 && parsed <= 60) { - setCommandExecutionTimeout(parsed) - } - } - - const savedShellTimeout = localStorage.getItem("evals-shell-integration-timeout") - - if (savedShellTimeout) { - const parsed = parseInt(savedShellTimeout, 10) - - if (!isNaN(parsed) && parsed >= 30 && parsed <= 60) { - setTerminalShellIntegrationTimeout(parsed) - } - } - - const savedSuite = localStorage.getItem("evals-suite") - - if (savedSuite === "partial") { - setValue("suite", "partial") - const savedExercises = localStorage.getItem("evals-exercises") - if (savedExercises) { - try { - const parsed = JSON.parse(savedExercises) as string[] - if (Array.isArray(parsed)) { - setSelectedExercises(parsed) - setValue("exercises", parsed) - } - } catch { - // Invalid JSON, ignore. - } - } - } - }, [setValue]) - - // Track previous provider to detect switches - const [prevProvider, setPrevProvider] = useState(provider) - - // Preserve selections per provider; avoids cross-contamination while keeping UX stable. - useEffect(() => { - if (provider === prevProvider) return - - modelSelectionsByProviderRef.current[prevProvider] = modelSelections - modelValueByProviderRef.current[prevProvider] = getValues("model") - - const nextModelSelections = - modelSelectionsByProviderRef.current[provider] ?? - ([{ id: crypto.randomUUID(), model: "", popoverOpen: false }] satisfies ModelSelection[]) - - setModelSelections(nextModelSelections) - - const nextModelValue = - modelValueByProviderRef.current[provider] ?? - nextModelSelections.find((s) => s.model.trim().length > 0)?.model ?? - (provider === "other" && importedSettings && configSelections[0]?.configName - ? (getModelId(importedSettings.apiConfigs[configSelections[0].configName] ?? {}) ?? "") - : "") - - setValue("model", nextModelValue) - setPrevProvider(provider) - }, [provider, prevProvider, modelSelections, setValue, getValues, importedSettings, configSelections]) - - // Extract unique languages from exercises - const languages = useMemo(() => { - if (!exercises.data) { - return [] - } - - const langs = new Set() - - for (const path of exercises.data) { - const lang = path.split("/")[0] - - if (lang) { - langs.add(lang) - } - } - - return Array.from(langs).sort() - }, [exercises.data]) - - const getExercisesForLanguage = useCallback( - (lang: string) => { - if (!exercises.data) { - return [] - } - - return exercises.data.filter((path) => path.startsWith(`${lang}/`)) - }, - [exercises.data], - ) - - const toggleLanguage = useCallback( - (lang: string) => { - const langExercises = getExercisesForLanguage(lang) - const allSelected = langExercises.every((ex) => selectedExercises.includes(ex)) - - let newSelected: string[] - - if (allSelected) { - newSelected = selectedExercises.filter((ex) => !ex.startsWith(`${lang}/`)) - } else { - const existing = new Set(selectedExercises) - - for (const ex of langExercises) { - existing.add(ex) - } - - newSelected = Array.from(existing) - } - - setSelectedExercises(newSelected) - setValue("exercises", newSelected) - localStorage.setItem("evals-exercises", JSON.stringify(newSelected)) - }, - [getExercisesForLanguage, selectedExercises, setValue], - ) - - const isLanguageSelected = useCallback( - (lang: string) => { - const langExercises = getExercisesForLanguage(lang) - return langExercises.length > 0 && langExercises.every((ex) => selectedExercises.includes(ex)) - }, - [getExercisesForLanguage, selectedExercises], - ) - - const isLanguagePartiallySelected = useCallback( - (lang: string) => { - const langExercises = getExercisesForLanguage(lang) - const selectedCount = langExercises.filter((ex) => selectedExercises.includes(ex)).length - return selectedCount > 0 && selectedCount < langExercises.length - }, - [getExercisesForLanguage, selectedExercises], - ) - - const addModelSelection = useCallback(() => { - setModelSelections((prev) => [...prev, { id: crypto.randomUUID(), model: "", popoverOpen: false }]) - }, []) - - const removeModelSelection = useCallback((id: string) => { - setModelSelections((prev) => prev.filter((s) => s.id !== id)) - }, []) - - const updateModelSelection = useCallback( - (id: string, model: string) => { - setModelSelections((prev) => prev.map((s) => (s.id === id ? { ...s, model, popoverOpen: false } : s))) - // Also set the form model field for validation (use first non-empty model). - setValue("model", model) - }, - [setValue], - ) - - const toggleModelPopover = useCallback((id: string, open: boolean) => { - setModelSelections((prev) => prev.map((s) => (s.id === id ? { ...s, popoverOpen: open } : s))) - }, []) - - const addConfigSelection = useCallback(() => { - setConfigSelections((prev) => [...prev, { id: crypto.randomUUID(), configName: "", popoverOpen: false }]) - }, []) - - const removeConfigSelection = useCallback((id: string) => { - setConfigSelections((prev) => prev.filter((s) => s.id !== id)) - }, []) - - const updateConfigSelection = useCallback( - (id: string, configName: string) => { - setConfigSelections((prev) => prev.map((s) => (s.id === id ? { ...s, configName, popoverOpen: false } : s))) - - // Also update the form settings for the first config (for validation). - if (importedSettings) { - const providerSettings = importedSettings.apiConfigs[configName] ?? {} - setValue("model", getModelId(providerSettings) ?? "") - setValue("settings", { ...EVALS_SETTINGS, ...providerSettings, ...importedSettings.globalSettings }) - } - }, - [importedSettings, setValue], - ) - - const toggleConfigPopover = useCallback((id: string, open: boolean) => { - setConfigSelections((prev) => prev.map((s) => (s.id === id ? { ...s, popoverOpen: open } : s))) - }, []) - - const onSubmit = useCallback( - async (values: CreateRun) => { - try { - const baseValues = normalizeCreateRunForSubmit(values, selectedExercises, suite) - - const selectionsToLaunch: { model: string; configName?: string }[] = [] - - if (provider === "other") { - for (const config of configSelections) { - if (config.configName) { - selectionsToLaunch.push({ model: "", configName: config.configName }) - } - } - } else { - for (const selection of modelSelections) { - if (selection.model) { - selectionsToLaunch.push({ model: selection.model }) - } - } - } - - if (selectionsToLaunch.length === 0) { - toast.error("Please select at least one model or config") - return - } - - const totalRuns = selectionsToLaunch.length - toast.info(totalRuns > 1 ? `Launching ${totalRuns} runs (every 20 seconds)...` : "Launching run...") - - for (let i = 0; i < selectionsToLaunch.length; i++) { - const selection = selectionsToLaunch[i]! - - // Wait 20 seconds between runs (except for the first one). - if (i > 0) { - await new Promise((resolve) => setTimeout(resolve, 20_000)) - } - - const runValues = { ...baseValues } - - if (provider === "openrouter") { - runValues.model = selection.model - runValues.settings = { - ...(runValues.settings || {}), - apiProvider: "openrouter", - openRouterModelId: selection.model, - commandExecutionTimeout, - terminalShellIntegrationTimeout: terminalShellIntegrationTimeout * 1000, - } - } else if (provider === "other" && selection.configName && importedSettings) { - const providerSettings = importedSettings.apiConfigs[selection.configName] ?? {} - runValues.model = getModelId(providerSettings) ?? "" - runValues.settings = { - ...EVALS_SETTINGS, - ...providerSettings, - ...importedSettings.globalSettings, - commandExecutionTimeout, - terminalShellIntegrationTimeout: terminalShellIntegrationTimeout * 1000, - } - } - - try { - await createRun(runValues) - toast.success(`Run ${i + 1}/${totalRuns} launched`) - } catch (e) { - toast.error(`Run ${i + 1} failed: ${e instanceof Error ? e.message : "Unknown error"}`) - } - } - - router.push("/") - } catch (e) { - toast.error(e instanceof Error ? e.message : "An unknown error occurred.") - } - }, - [ - suite, - selectedExercises, - provider, - modelSelections, - configSelections, - importedSettings, - router, - commandExecutionTimeout, - terminalShellIntegrationTimeout, - ], - ) - - const onImportSettings = useCallback( - async (event: React.ChangeEvent) => { - const file = event.target.files?.[0] - - if (!file) { - return - } - - clearErrors("settings") - - try { - const { providerProfiles, globalSettings } = z - .object({ - providerProfiles: z.object({ - currentApiConfigName: z.string(), - apiConfigs: z.record(z.string(), providerSettingsSchema), - }), - globalSettings: globalSettingsSchema, - }) - .parse(JSON.parse(await file.text())) - - setImportedSettings({ - apiConfigs: providerProfiles.apiConfigs, - globalSettings, - currentApiConfigName: providerProfiles.currentApiConfigName, - }) - - const defaultConfigName = providerProfiles.currentApiConfigName - setConfigSelections([{ id: crypto.randomUUID(), configName: defaultConfigName, popoverOpen: false }]) - - const providerSettings = providerProfiles.apiConfigs[defaultConfigName] ?? {} - setValue("model", getModelId(providerSettings) ?? "") - setValue("settings", { ...EVALS_SETTINGS, ...providerSettings, ...globalSettings }) - - event.target.value = "" - } catch (e) { - console.error(e) - toast.error(e instanceof Error ? e.message : "An unknown error occurred.") - } - }, - [clearErrors, setValue], - ) - - return ( - <> - -
- ( - - setModelSource(value as "openrouter" | "other")}> - - Import - OpenRouter - - - - {provider === "other" ? ( -
- - - - {importedSettings && Object.keys(importedSettings.apiConfigs).length > 0 && ( -
- - {configSelections.map((selection, index) => ( -
- - toggleConfigPopover(selection.id, open) - }> - - - - - - - - No config found. - - {Object.keys( - importedSettings.apiConfigs, - ).map((configName) => ( - - updateConfigSelection( - selection.id, - configName, - ) - }> - {configName} - {configName === - importedSettings.currentApiConfigName && ( - - (default) - - )} - - - ))} - - - - - - {index === configSelections.length - 1 ? ( - - ) : ( - - )} -
- ))} -
- )} - - {settings && ( - - )} -
- ) : ( - <> -
- {modelSelections.map((selection, index) => ( -
- toggleModelPopover(selection.id, open)}> - - - - - - - - No model found. - - {models?.map(({ id, name }) => ( - - updateModelSelection( - selection.id, - id, - ) - }> - {name} - - - ))} - - - - - - {index === modelSelections.length - 1 ? ( - - ) : ( - - )} -
- ))} -
- - )} - - -
- )} - /> - - ( - - Exercises -
- { - setValue("suite", value as "full" | "partial") - localStorage.setItem("evals-suite", value) - if (value === "full") { - setSelectedExercises([]) - setValue("exercises", []) - localStorage.removeItem("evals-exercises") - } - }}> - - All - Some - - - {suite === "partial" && languages.length > 0 && ( -
- {languages.map((lang) => ( - - ))} -
- )} -
- {suite === "partial" && ( - ({ value: path, label: path })) || []} - value={selectedExercises} - onValueChange={(value) => { - setSelectedExercises(value) - setValue("exercises", value) - localStorage.setItem("evals-exercises", JSON.stringify(value)) - }} - placeholder="Select" - variant="inverted" - maxCount={4} - /> - )} - -
- )} - /> - - {/* Concurrency, Timeout, and Iterations in a 3-column row */} -
- ( - - Concurrency - -
- { - field.onChange(value[0]) - localStorage.setItem("evals-concurrency", String(value[0])) - }} - /> -
{field.value}
-
-
- -
- )} - /> - - ( - - Timeout (Minutes) - -
- { - field.onChange(value[0]) - localStorage.setItem("evals-timeout", String(value[0])) - }} - /> -
{field.value}
-
-
- -
- )} - /> - - ( - - Iterations - -
- { - field.onChange(value[0]) - }} - /> -
{field.value}
-
-
- -
- )} - /> -
- - {/* Terminal timeouts in a 2-column row */} -
- -
- - - - - - -

- Maximum time in seconds to wait for terminal command execution to complete - before timing out. This applies to commands run via the execute_command - tool. -

-
-
-
-
- { - if (value !== undefined) { - setCommandExecutionTimeout(value) - localStorage.setItem("evals-command-execution-timeout", String(value)) - } - }} - /> -
{commandExecutionTimeout}
-
-
- - -
- - - - - - -

- Maximum time in seconds to wait for shell integration to initialize when - opening a new terminal. -

-
-
-
-
- { - if (value !== undefined) { - setTerminalShellIntegrationTimeout(value) - localStorage.setItem("evals-shell-integration-timeout", String(value)) - } - }} - /> -
{terminalShellIntegrationTimeout}
-
-
-
- - {/* Execution Method */} - ( - - Execution Method - { - const newExecutionMethod = value as ExecutionMethod - setExecutionMethod(newExecutionMethod) - setValue("executionMethod", newExecutionMethod) - }}> - - - - VSCode - - - - CLI - - - - - - )} - /> - - ( - - Description / Notes - -