From 07f1a67a729cc6665bd2450d52dc461c0cf347d0 Mon Sep 17 00:00:00 2001
From: Aleksei Sviridkin <f@lex.la>
Date: Tue, 26 May 2026 17:43:27 +0300
Subject: [PATCH 01/20] feat(k8s-client): allow injecting a custom K8sClient
 into K8sProvider

K8sProvider now accepts an optional 'client' prop. When supplied, it
replaces the K8sClient that would otherwise be constructed from 'config';
when omitted, the existing 'new K8sClient(config)' path is preserved
so production callers stay unchanged.

This is the lever tests need to substitute a mock without touching
network or globals. Without it, the only way to isolate components from
real fetch behaviour is to monkey-patch K8sClient.prototype, which
couples every test file to the implementation and leaks across files
when restoreMocks is forgotten.

Assisted-By: Claude <noreply@anthropic.com>
Signed-off-by: Aleksei Sviridkin <f@lex.la>
---
 .../__tests__/k8s-client/provider.test.tsx    | 53 +++++++++++++++++++
 packages/k8s-client/src/provider.tsx          | 13 +++--
 2 files changed, 63 insertions(+), 3 deletions(-)
 create mode 100644 apps/console/src/__tests__/k8s-client/provider.test.tsx
diff --git a/apps/console/src/__tests__/k8s-client/provider.test.tsx b/apps/console/src/__tests__/k8s-client/provider.test.tsx
new file mode 100644
index 0000000..97c5691
--- /dev/null
+++ b/apps/console/src/__tests__/k8s-client/provider.test.tsx
@@ -0,0 +1,53 @@
+import { describe, it, expect } from "vitest"
+import { render } from "@testing-library/react"
+import { K8sClient, K8sProvider, useK8sClient } from "@cozystack/k8s-client"
+
+function ClientCapture({ onClient }: { onClient: (c: K8sClient) => void }) {
+  const c = useK8sClient()
+  onClient(c)
+  return null
+}
+
+describe("K8sProvider", () => {
+  it("passes the injected client through to useK8sClient", () => {
+    const injected = new K8sClient({ baseUrl: "/injected" })
+    let captured: K8sClient | null = null
+    render(
+      <K8sProvider client={injected}>
+        <ClientCapture onClient={(c) => (captured = c)} />
+      </K8sProvider>,
+    )
+    expect(captured).toBe(injected)
+  })
+
+  it("constructs its own client when none is injected", () => {
+    let captured: K8sClient | null = null
+    render(
+      <K8sProvider>
+        <ClientCapture onClient={(c) => (captured = c)} />
+      </K8sProvider>,
+    )
+    expect(captured).toBeInstanceOf(K8sClient)
+  })
+
+  it("constructs a client from the provided config when no client is injected", () => {
+    let captured: K8sClient | null = null
+    render(
+      <K8sProvider config={{ baseUrl: "/from-config" }}>
+        <ClientCapture onClient={(c) => (captured = c)} />
+      </K8sProvider>,
+    )
+    expect(captured).toBeInstanceOf(K8sClient)
+  })
+
+  it("prefers the injected client over the config when both are supplied", () => {
+    const injected = new K8sClient({ baseUrl: "/injected" })
+    let captured: K8sClient | null = null
+    render(
+      <K8sProvider client={injected} config={{ baseUrl: "/ignored" }}>
+        <ClientCapture onClient={(c) => (captured = c)} />
+      </K8sProvider>,
+    )
+    expect(captured).toBe(injected)
+  })
+})
diff --git a/packages/k8s-client/src/provider.tsx b/packages/k8s-client/src/provider.tsx
index 7fe53c4..04bce76 100644
--- a/packages/k8s-client/src/provider.tsx
+++ b/packages/k8s-client/src/provider.tsx
@@ -28,17 +28,24 @@ const defaultQueryClient = new QueryClient({
 
 interface K8sProviderProps {
   config?: K8sClientConfig
+  /**
+   * Pre-built K8sClient instance. When supplied, replaces the client that
+   * would otherwise be constructed from `config`. Primary use case: tests
+   * that need to inject a mock; production code should pass `config` and
+   * let the provider build the real client.
+   */
+  client?: K8sClient
   queryClient?: QueryClient
   children: ReactNode
 }
 
-export function K8sProvider({ config, queryClient, children }: K8sProviderProps) {
-  const client = useMemo(() => new K8sClient(config), [config])
+export function K8sProvider({ config, client, queryClient, children }: K8sProviderProps) {
+  const resolved = useMemo(() => client ?? new K8sClient(config), [client, config])
   const qc = queryClient ?? defaultQueryClient
 
   return (
     <QueryClientProvider client={qc}>
-      <K8sClientContext.Provider value={client}>{children}</K8sClientContext.Provider>
+      <K8sClientContext.Provider value={resolved}>{children}</K8sClientContext.Provider>
     </QueryClientProvider>
   )
 }

From 3a232253b7e2ce8a8f3511e2693ae41311a952ca Mon Sep 17 00:00:00 2001
From: Aleksei Sviridkin <f@lex.la>
Date: Tue, 26 May 2026 17:46:14 +0300
Subject: [PATCH 02/20] chore(console): scaffold K8s-driven test harness
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Adds the minimum testing surface needed to exercise components that
consume K8s hooks: renderWithK8sProvider wraps the tree in a QueryClient
with retries off, the K8sProvider with an injected mock, and a
MemoryRouter; createMockK8sClient builds a K8sClient subclass whose
list/get/watch methods serve in-memory overrides keyed by api group,
version, plural, and namespace. A compile-time type assertion at the
foot of the mock file pins the production K8sClient interface in place
so future additions to that class force a corresponding mock stub.

Fixture data covers the three node shapes the cluster-usage page must
render — control-plane without extended resources, plain worker, GPU
worker with one nvidia.com/gpu — plus a matching pod set including one
unscheduled pod so the aggregator can be verified to skip it.

The smoke test pins the contract: a child component that calls
useK8sList resolves through the injected mock without touching network,
the mock receives the expected list call, and the queryClient is
exposed to callers that need to clear or inspect the cache.

Assisted-By: Claude <noreply@anthropic.com>
Signed-off-by: Aleksei Sviridkin <f@lex.la>
---
 .../src/test-utils/fixtures/node-metrics.ts   | 54 ++++++++++
 apps/console/src/test-utils/fixtures/nodes.ts | 86 ++++++++++++++++
 apps/console/src/test-utils/fixtures/pods.ts  | 98 +++++++++++++++++++
 .../console/src/test-utils/mock-k8s-client.ts | 93 ++++++++++++++++++
 apps/console/src/test-utils/render.test.tsx   | 72 ++++++++++++++
 apps/console/src/test-utils/render.tsx        | 31 ++++++
 6 files changed, 434 insertions(+)
 create mode 100644 apps/console/src/test-utils/fixtures/node-metrics.ts
 create mode 100644 apps/console/src/test-utils/fixtures/nodes.ts
 create mode 100644 apps/console/src/test-utils/fixtures/pods.ts
 create mode 100644 apps/console/src/test-utils/mock-k8s-client.ts
 create mode 100644 apps/console/src/test-utils/render.test.tsx
 create mode 100644 apps/console/src/test-utils/render.tsx

diff --git a/apps/console/src/test-utils/fixtures/node-metrics.ts b/apps/console/src/test-utils/fixtures/node-metrics.ts
new file mode 100644
index 0000000..6813ae7
--- /dev/null
+++ b/apps/console/src/test-utils/fixtures/node-metrics.ts
@@ -0,0 +1,54 @@
+import type { K8sList, K8sResource } from "@cozystack/k8s-client"
+
+interface NodeMetricsUsage {
+  cpu: string
+  memory: string
+}
+
+interface NodeMetricsFields {
+  usage?: NodeMetricsUsage
+  timestamp?: string
+  window?: string
+}
+
+export type NodeMetricsFixture = K8sResource<unknown, unknown> & NodeMetricsFields
+
+/**
+ * NodeMetrics entries matching the node fixtures. metrics.k8s.io reports
+ * actual usage as Kubernetes quantities just like capacity, so the same
+ * parseQuantity path renders both numbers.
+ */
+
+export const metricsControlPlane: NodeMetricsFixture = {
+  apiVersion: "metrics.k8s.io/v1beta1",
+  kind: "NodeMetrics",
+  metadata: { name: "cp-1" },
+  usage: { cpu: "150m", memory: "1500Mi" },
+  timestamp: "2026-05-26T17:00:00Z",
+  window: "30s",
+}
+
+export const metricsWorker: NodeMetricsFixture = {
+  apiVersion: "metrics.k8s.io/v1beta1",
+  kind: "NodeMetrics",
+  metadata: { name: "worker-1" },
+  usage: { cpu: "400m", memory: "8Gi" },
+  timestamp: "2026-05-26T17:00:00Z",
+  window: "30s",
+}
+
+export const metricsGpuWorker: NodeMetricsFixture = {
+  apiVersion: "metrics.k8s.io/v1beta1",
+  kind: "NodeMetrics",
+  metadata: { name: "worker-gpu-1" },
+  usage: { cpu: "2", memory: "20Gi" },
+  timestamp: "2026-05-26T17:00:00Z",
+  window: "30s",
+}
+
+export const nodeMetricsListFixture: K8sList<NodeMetricsFixture> = {
+  apiVersion: "metrics.k8s.io/v1beta1",
+  kind: "NodeMetricsList",
+  metadata: { resourceVersion: "300" },
+  items: [metricsControlPlane, metricsWorker, metricsGpuWorker],
+}
diff --git a/apps/console/src/test-utils/fixtures/nodes.ts b/apps/console/src/test-utils/fixtures/nodes.ts
new file mode 100644
index 0000000..8a7c734
--- /dev/null
+++ b/apps/console/src/test-utils/fixtures/nodes.ts
@@ -0,0 +1,86 @@
+import type { K8sList, K8sResource } from "@cozystack/k8s-client"
+
+/**
+ * Three node fixtures cover the three shapes the cluster-usage page must
+ * render: control-plane (no extended resources), plain worker, worker
+ * with a single NVIDIA GPU. Numbers are small but realistic and chosen
+ * so hand-computed aggregates over the set stay obvious.
+ *
+ * creationTimestamp is fixed; tests that assert on Age must stub the
+ * clock via vi.setSystemTime.
+ */
+
+interface NodeStatus {
+  capacity?: Record<string, string>
+  allocatable?: Record<string, string>
+  conditions?: Array<{
+    type: string
+    status: "True" | "False" | "Unknown"
+    reason?: string
+    message?: string
+  }>
+}
+
+export type NodeFixture = K8sResource<unknown, NodeStatus>
+
+export const nodeControlPlane: NodeFixture = {
+  apiVersion: "v1",
+  kind: "Node",
+  metadata: {
+    name: "cp-1",
+    creationTimestamp: "2026-01-01T00:00:00Z",
+    labels: { "node-role.kubernetes.io/control-plane": "" },
+  },
+  status: {
+    capacity: { cpu: "4", memory: "8Gi", "ephemeral-storage": "100Gi", pods: "110" },
+    allocatable: { cpu: "4", memory: "8Gi", "ephemeral-storage": "100Gi", pods: "110" },
+    conditions: [{ type: "Ready", status: "True" }],
+  },
+}
+
+export const nodeWorker: NodeFixture = {
+  apiVersion: "v1",
+  kind: "Node",
+  metadata: {
+    name: "worker-1",
+    creationTimestamp: "2026-02-01T00:00:00Z",
+  },
+  status: {
+    capacity: { cpu: "8", memory: "32Gi", "ephemeral-storage": "500Gi", pods: "220" },
+    allocatable: { cpu: "8", memory: "32Gi", "ephemeral-storage": "500Gi", pods: "220" },
+    conditions: [{ type: "Ready", status: "True" }],
+  },
+}
+
+export const nodeGpuWorker: NodeFixture = {
+  apiVersion: "v1",
+  kind: "Node",
+  metadata: {
+    name: "worker-gpu-1",
+    creationTimestamp: "2026-03-01T00:00:00Z",
+  },
+  status: {
+    capacity: {
+      cpu: "16",
+      memory: "64Gi",
+      "ephemeral-storage": "1Ti",
+      pods: "220",
+      "nvidia.com/gpu": "1",
+    },
+    allocatable: {
+      cpu: "16",
+      memory: "64Gi",
+      "ephemeral-storage": "1Ti",
+      pods: "220",
+      "nvidia.com/gpu": "1",
+    },
+    conditions: [{ type: "Ready", status: "True" }],
+  },
+}
+
+export const nodesListFixture: K8sList<NodeFixture> = {
+  apiVersion: "v1",
+  kind: "NodeList",
+  metadata: { resourceVersion: "100" },
+  items: [nodeControlPlane, nodeWorker, nodeGpuWorker],
+}
diff --git a/apps/console/src/test-utils/fixtures/pods.ts b/apps/console/src/test-utils/fixtures/pods.ts
new file mode 100644
index 0000000..6346b1c
--- /dev/null
+++ b/apps/console/src/test-utils/fixtures/pods.ts
@@ -0,0 +1,98 @@
+import type { K8sList, K8sResource } from "@cozystack/k8s-client"
+
+interface PodSpec {
+  nodeName?: string
+  containers: Array<{
+    name: string
+    resources?: {
+      requests?: Record<string, string>
+      limits?: Record<string, string>
+    }
+  }>
+}
+
+interface PodStatus {
+  phase?: string
+}
+
+export type PodFixture = K8sResource<PodSpec, PodStatus>
+
+/**
+ * Pod fixtures aligned with the node fixtures: one bound to each node,
+ * one bound to the GPU worker requesting one nvidia.com/gpu, and one
+ * unscheduled pod (no spec.nodeName) so the per-node aggregator can be
+ * verified to skip it correctly. Requests are deliberately small so the
+ * sums are obvious by inspection.
+ */
+
+export const podOnControlPlane: PodFixture = {
+  apiVersion: "v1",
+  kind: "Pod",
+  metadata: { name: "system-pod", namespace: "kube-system" },
+  spec: {
+    nodeName: "cp-1",
+    containers: [
+      {
+        name: "main",
+        resources: { requests: { cpu: "200m", memory: "256Mi" } },
+      },
+    ],
+  },
+  status: { phase: "Running" },
+}
+
+export const podOnWorker: PodFixture = {
+  apiVersion: "v1",
+  kind: "Pod",
+  metadata: { name: "tenant-pod", namespace: "tenant-root" },
+  spec: {
+    nodeName: "worker-1",
+    containers: [
+      {
+        name: "app",
+        resources: { requests: { cpu: "500m", memory: "1Gi" } },
+      },
+    ],
+  },
+  status: { phase: "Running" },
+}
+
+export const podOnGpuWorker: PodFixture = {
+  apiVersion: "v1",
+  kind: "Pod",
+  metadata: { name: "gpu-pod", namespace: "tenant-ml" },
+  spec: {
+    nodeName: "worker-gpu-1",
+    containers: [
+      {
+        name: "model",
+        resources: {
+          requests: { cpu: "100m", memory: "128Mi", "nvidia.com/gpu": "1" },
+        },
+      },
+    ],
+  },
+  status: { phase: "Running" },
+}
+
+export const podUnscheduled: PodFixture = {
+  apiVersion: "v1",
+  kind: "Pod",
+  metadata: { name: "pending-pod", namespace: "default" },
+  spec: {
+    containers: [
+      {
+        name: "main",
+        resources: { requests: { cpu: "1", memory: "2Gi" } },
+      },
+    ],
+  },
+  status: { phase: "Pending" },
+}
+
+export const podsListFixture: K8sList<PodFixture> = {
+  apiVersion: "v1",
+  kind: "PodList",
+  metadata: { resourceVersion: "200" },
+  items: [podOnControlPlane, podOnWorker, podOnGpuWorker, podUnscheduled],
+}
diff --git a/apps/console/src/test-utils/mock-k8s-client.ts b/apps/console/src/test-utils/mock-k8s-client.ts
new file mode 100644
index 0000000..d2b40ff
--- /dev/null
+++ b/apps/console/src/test-utils/mock-k8s-client.ts
@@ -0,0 +1,93 @@
+import { vi } from "vitest"
+import { K8sClient, K8sApiError, type K8sList, type WatchEvent } from "@cozystack/k8s-client"
+
+interface ListOverride {
+  apiGroup: string
+  apiVersion: string
+  plural: string
+  namespace?: string
+  result: K8sList<unknown> | (() => K8sList<unknown> | Promise<K8sList<unknown>>) | K8sApiError
+}
+
+interface GetOverride {
+  apiGroup: string
+  apiVersion: string
+  plural: string
+  name: string
+  namespace?: string
+  result: unknown | (() => unknown | Promise<unknown>) | K8sApiError
+}
+
+export interface MockK8sClientOverrides {
+  lists?: ListOverride[]
+  gets?: GetOverride[]
+}
+
+/**
+ * Build a K8sClient subclass whose network-facing methods (list/get/watch)
+ * resolve from in-memory overrides instead of fetch. The resulting object
+ * still satisfies the K8sClient interface — the compile-time check at the
+ * bottom of this file ensures the production interface and the mock stay
+ * in lockstep when the real K8sClient gains new methods.
+ *
+ * Watch is stubbed to a noop returning a cleanup function; tests that need
+ * watch event behaviour should override it via vi.spyOn on the returned
+ * instance.
+ */
+export function createMockK8sClient(overrides: MockK8sClientOverrides = {}): K8sClient {
+  const client = new K8sClient({ baseUrl: "/mock" })
+
+  const listSpy = vi.spyOn(client, "list").mockImplementation(
+    async (apiGroup, apiVersion, plural, namespace) => {
+      const match = overrides.lists?.find(
+        (o) =>
+          o.apiGroup === apiGroup &&
+          o.apiVersion === apiVersion &&
+          o.plural === plural &&
+          (o.namespace ?? undefined) === (namespace ?? undefined),
+      )
+      if (!match) {
+        return { apiVersion, kind: `${plural}List`, metadata: {}, items: [] } as K8sList<unknown>
+      }
+      if (match.result instanceof K8sApiError) throw match.result
+      const value = typeof match.result === "function" ? await match.result() : match.result
+      return value as K8sList<unknown>
+    },
+  )
+
+  const getSpy = vi.spyOn(client, "get").mockImplementation(
+    async (apiGroup, apiVersion, plural, name, namespace) => {
+      const match = overrides.gets?.find(
+        (o) =>
+          o.apiGroup === apiGroup &&
+          o.apiVersion === apiVersion &&
+          o.plural === plural &&
+          o.name === name &&
+          (o.namespace ?? undefined) === (namespace ?? undefined),
+      )
+      if (!match) {
+        throw new K8sApiError(404, { message: `mock: no get override for ${plural}/${name}` })
+      }
+      if (match.result instanceof K8sApiError) throw match.result
+      return typeof match.result === "function" ? await match.result() : match.result
+    },
+  )
+
+  vi.spyOn(client, "watch").mockImplementation(
+    (_apiGroup, _apiVersion, _plural, _ns, _rv, _onEvent: (e: WatchEvent<unknown>) => void) => {
+      return () => {}
+    },
+  )
+
+  void listSpy
+  void getSpy
+
+  return client
+}
+
+// Compile-time check: the production K8sClient class must remain
+// assignable to the type our mock factory promises. If K8sClient ever
+// adds a new public method, this line fails to typecheck and the mock
+// has to grow a corresponding stub.
+const _typeDriftCheck: K8sClient = createMockK8sClient()
+void _typeDriftCheck
diff --git a/apps/console/src/test-utils/render.test.tsx b/apps/console/src/test-utils/render.test.tsx
new file mode 100644
index 0000000..be842e2
--- /dev/null
+++ b/apps/console/src/test-utils/render.test.tsx
@@ -0,0 +1,72 @@
+import { describe, it, expect } from "vitest"
+import { waitFor } from "@testing-library/react"
+import { useK8sList } from "@cozystack/k8s-client"
+import { createMockK8sClient } from "./mock-k8s-client.ts"
+import { renderWithK8sProvider } from "./render.tsx"
+import { nodesListFixture, type NodeFixture } from "./fixtures/nodes.ts"
+
+function NodeNameList() {
+  const { data, isLoading } = useK8sList<NodeFixture>(
+    { apiGroup: "", apiVersion: "v1", plural: "nodes" },
+    { watch: false },
+  )
+  if (isLoading) return <p>loading</p>
+  return (
+    <ul>
+      {data?.items.map((n) => <li key={n.metadata.name}>{n.metadata.name}</li>)}
+    </ul>
+  )
+}
+
+describe("renderWithK8sProvider", () => {
+  it("renders a component that consumes useK8sList against an injected mock", async () => {
+    const client = createMockK8sClient({
+      lists: [
+        {
+          apiGroup: "",
+          apiVersion: "v1",
+          plural: "nodes",
+          result: nodesListFixture,
+        },
+      ],
+    })
+
+    const { findByText } = renderWithK8sProvider(<NodeNameList />, { client })
+
+    expect(await findByText("cp-1")).toBeInTheDocument()
+    expect(await findByText("worker-1")).toBeInTheDocument()
+    expect(await findByText("worker-gpu-1")).toBeInTheDocument()
+  })
+
+  it("routes the list call through the mock with the requested resource", async () => {
+    const client = createMockK8sClient({
+      lists: [
+        {
+          apiGroup: "",
+          apiVersion: "v1",
+          plural: "nodes",
+          result: nodesListFixture,
+        },
+      ],
+    })
+
+    renderWithK8sProvider(<NodeNameList />, { client })
+
+    await waitFor(() => {
+      expect(client.list).toHaveBeenCalledWith(
+        "",
+        "v1",
+        "nodes",
+        undefined,
+        expect.any(Object),
+      )
+    })
+  })
+
+  it("returns the queryClient so tests can clear or inspect the cache", () => {
+    const client = createMockK8sClient()
+    const { queryClient } = renderWithK8sProvider(<p>hello</p>, { client })
+    expect(queryClient).toBeDefined()
+    expect(queryClient.getQueryCache).toBeInstanceOf(Function)
+  })
+})
diff --git a/apps/console/src/test-utils/render.tsx b/apps/console/src/test-utils/render.tsx
new file mode 100644
index 0000000..04adc5d
--- /dev/null
+++ b/apps/console/src/test-utils/render.tsx
@@ -0,0 +1,31 @@
+import { render, type RenderResult } from "@testing-library/react"
+import { QueryClient } from "@tanstack/react-query"
+import { K8sProvider, type K8sClient } from "@cozystack/k8s-client"
+import { MemoryRouter } from "react-router"
+import type { ReactElement } from "react"
+
+export interface RenderWithK8sOptions {
+  client: K8sClient
+  initialRoute?: string
+}
+
+/**
+ * Wraps a React tree in the minimum context needed to exercise K8s hooks
+ * in isolation: a fresh QueryClient with retries off and no garbage
+ * collection, the K8sProvider with the injected client, and a
+ * MemoryRouter so components that use react-router do not blow up.
+ */
+export function renderWithK8sProvider(
+  ui: ReactElement,
+  options: RenderWithK8sOptions,
+): RenderResult & { queryClient: QueryClient } {
+  const queryClient = new QueryClient({
+    defaultOptions: { queries: { retry: false, gcTime: 0 } },
+  })
+  const result = render(
+    <K8sProvider client={options.client} queryClient={queryClient}>
+      <MemoryRouter initialEntries={[options.initialRoute ?? "/"]}>{ui}</MemoryRouter>
+    </K8sProvider>,
+  )
+  return Object.assign(result, { queryClient })
+}

From 7181b00bbbd9070ff3ee0e09b02fa5baaf9a04dd Mon Sep 17 00:00:00 2001
From: Aleksei Sviridkin <f@lex.la>
Date: Tue, 26 May 2026 17:48:13 +0300
Subject: [PATCH 03/20] refactor(console): extract K8s quantity helpers from
 QuotaDisplay
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

parseQuantity, humanizeBytes and humanizeCpu used to live as private
helpers in QuotaDisplay.tsx. The cluster-usage admin page needs the
same functions to render node capacity, requested and used numbers,
so they move to a dedicated module under src/lib/ and pick up
table-driven test coverage along the way.

Behaviour is preserved verbatim — the tests pin the pre-existing
corner case where a bare 'm' suffix returns NaN, so future cleanups
have an explicit place to file an issue before changing it.

Assisted-By: Claude <noreply@anthropic.com>
Signed-off-by: Aleksei Sviridkin <f@lex.la>
---
 apps/console/src/components/QuotaDisplay.tsx |  30 +----
 apps/console/src/lib/k8s-quantity.test.ts    | 120 +++++++++++++++++++
 apps/console/src/lib/k8s-quantity.ts         |  34 ++++++
 3 files changed, 155 insertions(+), 29 deletions(-)
 create mode 100644 apps/console/src/lib/k8s-quantity.test.ts
 create mode 100644 apps/console/src/lib/k8s-quantity.ts

diff --git a/apps/console/src/components/QuotaDisplay.tsx b/apps/console/src/components/QuotaDisplay.tsx
index a225416..4ac90df 100644
--- a/apps/console/src/components/QuotaDisplay.tsx
+++ b/apps/console/src/components/QuotaDisplay.tsx
@@ -1,6 +1,7 @@
 import { useEffect, useState } from "react"
 import { useK8sList } from "@cozystack/k8s-client"
 import type { K8sResource } from "@cozystack/k8s-client"
+import { parseQuantity, humanizeBytes, humanizeCpu } from "../lib/k8s-quantity.ts"
 
 interface ResourceQuotaSpec {
   hard?: Record<string, string>
@@ -15,35 +16,6 @@ export interface ResourceQuota extends K8sResource<ResourceQuotaSpec, ResourceQu
   kind: "ResourceQuota"
 }
 
-function parseQuantity(s: string): number {
-  if (!s) return 0
-  if (s.endsWith("m")) return parseFloat(s) / 1000
-  // Binary SI suffixes (powers of 1024)
-  if (s.endsWith("Ki")) return parseFloat(s) * 1024
-  if (s.endsWith("Mi")) return parseFloat(s) * 1024 ** 2
-  if (s.endsWith("Gi")) return parseFloat(s) * 1024 ** 3
-  if (s.endsWith("Ti")) return parseFloat(s) * 1024 ** 4
-  if (s.endsWith("Pi")) return parseFloat(s) * 1024 ** 5
-  if (s.endsWith("Ei")) return parseFloat(s) * 1024 ** 6
-  // Decimal SI suffixes (powers of 1000) — Kubernetes uses lowercase k
-  if (s.endsWith("k")) return parseFloat(s) * 1000
-  if (s.endsWith("M")) return parseFloat(s) * 1000 ** 2
-  if (s.endsWith("G")) return parseFloat(s) * 1000 ** 3
-  return parseFloat(s) || 0
-}
-
-function humanizeBytes(bytes: number): string {
-  if (bytes >= 1024 ** 4) return `${(bytes / 1024 ** 4).toFixed(1)}Ti`
-  if (bytes >= 1024 ** 3) return `${(bytes / 1024 ** 3).toFixed(1)}Gi`
-  if (bytes >= 1024 ** 2) return `${(bytes / 1024 ** 2).toFixed(0)}Mi`
-  return `${bytes}B`
-}
-
-function humanizeCpu(val: number): string {
-  if (val < 1) return `${Math.round(val * 1000)}m`
-  return `${val % 1 === 0 ? val : val.toFixed(2)}`
-}
-
 interface QuotaEntry {
   label: string
   usedRaw: string
diff --git a/apps/console/src/lib/k8s-quantity.test.ts b/apps/console/src/lib/k8s-quantity.test.ts
new file mode 100644
index 0000000..ac8eda3
--- /dev/null
+++ b/apps/console/src/lib/k8s-quantity.test.ts
@@ -0,0 +1,120 @@
+import { describe, it, expect } from "vitest"
+import { parseQuantity, humanizeBytes, humanizeCpu } from "./k8s-quantity.ts"
+
+describe("parseQuantity", () => {
+  it("returns 0 for the empty string", () => {
+    expect(parseQuantity("")).toBe(0)
+  })
+
+  it("parses milli suffix to a fractional value", () => {
+    expect(parseQuantity("500m")).toBe(0.5)
+  })
+
+  it("parses milli values greater than one core", () => {
+    expect(parseQuantity("1500m")).toBe(1.5)
+  })
+
+  it("parses Ki as 1024", () => {
+    expect(parseQuantity("1Ki")).toBe(1024)
+  })
+
+  it("parses Mi as 1024 squared", () => {
+    expect(parseQuantity("1Mi")).toBe(1024 ** 2)
+  })
+
+  it("parses Gi as 1024 cubed", () => {
+    expect(parseQuantity("1Gi")).toBe(1024 ** 3)
+  })
+
+  it("parses Ti as 1024 to the fourth", () => {
+    expect(parseQuantity("1Ti")).toBe(1024 ** 4)
+  })
+
+  it("parses Pi as 1024 to the fifth", () => {
+    expect(parseQuantity("1Pi")).toBe(1024 ** 5)
+  })
+
+  it("parses Ei as 1024 to the sixth", () => {
+    expect(parseQuantity("1Ei")).toBe(1024 ** 6)
+  })
+
+  it("parses decimal k suffix as 1000", () => {
+    expect(parseQuantity("1k")).toBe(1000)
+  })
+
+  it("parses decimal M suffix as 1000 squared", () => {
+    expect(parseQuantity("1M")).toBe(1_000_000)
+  })
+
+  it("parses decimal G suffix as 1000 cubed", () => {
+    expect(parseQuantity("1G")).toBe(1_000_000_000)
+  })
+
+  it("parses a bare integer", () => {
+    expect(parseQuantity("42")).toBe(42)
+  })
+
+  it("parses a bare decimal", () => {
+    expect(parseQuantity("1.5")).toBe(1.5)
+  })
+
+  it("parses a fractional Gi value", () => {
+    expect(parseQuantity("1.5Gi")).toBe(1.5 * 1024 ** 3)
+  })
+
+  it("falls back to 0 for unparseable input", () => {
+    expect(parseQuantity("abc")).toBe(0)
+  })
+
+  it("returns NaN when only a suffix is supplied (pinned corner case)", () => {
+    // The current implementation parses "m" as parseFloat("m") / 1000 = NaN / 1000.
+    // Pinned to document the behaviour; callers should pass valid quantities.
+    expect(parseQuantity("m")).toBeNaN()
+  })
+
+  it("parses zero", () => {
+    expect(parseQuantity("0")).toBe(0)
+  })
+
+  it("parses zero with a suffix", () => {
+    expect(parseQuantity("0Gi")).toBe(0)
+  })
+})
+
+describe("humanizeBytes", () => {
+  it("formats sub-kilobyte values with a B suffix", () => {
+    expect(humanizeBytes(0)).toBe("0B")
+    expect(humanizeBytes(1023)).toBe("1023B")
+  })
+
+  it("formats megabytes as Mi without decimals", () => {
+    expect(humanizeBytes(1024 ** 2)).toBe("1Mi")
+  })
+
+  it("formats gigabytes as Gi with one decimal", () => {
+    expect(humanizeBytes(1.5 * 1024 ** 3)).toBe("1.5Gi")
+  })
+
+  it("formats terabytes as Ti with one decimal", () => {
+    expect(humanizeBytes(1024 ** 4)).toBe("1.0Ti")
+  })
+})
+
+describe("humanizeCpu", () => {
+  it("formats zero as 0m", () => {
+    expect(humanizeCpu(0)).toBe("0m")
+  })
+
+  it("formats half a core as 500m", () => {
+    expect(humanizeCpu(0.5)).toBe("500m")
+  })
+
+  it("formats an integer core count without decimals", () => {
+    expect(humanizeCpu(1)).toBe("1")
+    expect(humanizeCpu(2)).toBe("2")
+  })
+
+  it("formats a non-integer core count with two decimals", () => {
+    expect(humanizeCpu(1.5)).toBe("1.50")
+  })
+})
diff --git a/apps/console/src/lib/k8s-quantity.ts b/apps/console/src/lib/k8s-quantity.ts
new file mode 100644
index 0000000..9339ff5
--- /dev/null
+++ b/apps/console/src/lib/k8s-quantity.ts
@@ -0,0 +1,34 @@
+/**
+ * Parse a Kubernetes resource.Quantity string into a numeric value in
+ * the canonical units (cores for CPU, bytes for memory). Behaviour is
+ * preserved verbatim from the QuotaDisplay helpers this module was
+ * extracted from; see the test file for the pinned edge cases.
+ */
+export function parseQuantity(s: string): number {
+  if (!s) return 0
+  if (s.endsWith("m")) return parseFloat(s) / 1000
+  // Binary SI suffixes (powers of 1024)
+  if (s.endsWith("Ki")) return parseFloat(s) * 1024
+  if (s.endsWith("Mi")) return parseFloat(s) * 1024 ** 2
+  if (s.endsWith("Gi")) return parseFloat(s) * 1024 ** 3
+  if (s.endsWith("Ti")) return parseFloat(s) * 1024 ** 4
+  if (s.endsWith("Pi")) return parseFloat(s) * 1024 ** 5
+  if (s.endsWith("Ei")) return parseFloat(s) * 1024 ** 6
+  // Decimal SI suffixes (powers of 1000) — Kubernetes uses lowercase k
+  if (s.endsWith("k")) return parseFloat(s) * 1000
+  if (s.endsWith("M")) return parseFloat(s) * 1000 ** 2
+  if (s.endsWith("G")) return parseFloat(s) * 1000 ** 3
+  return parseFloat(s) || 0
+}
+
+export function humanizeBytes(bytes: number): string {
+  if (bytes >= 1024 ** 4) return `${(bytes / 1024 ** 4).toFixed(1)}Ti`
+  if (bytes >= 1024 ** 3) return `${(bytes / 1024 ** 3).toFixed(1)}Gi`
+  if (bytes >= 1024 ** 2) return `${(bytes / 1024 ** 2).toFixed(0)}Mi`
+  return `${bytes}B`
+}
+
+export function humanizeCpu(val: number): string {
+  if (val < 1) return `${Math.round(val * 1000)}m`
+  return `${val % 1 === 0 ? val : val.toFixed(2)}`
+}

From 1c810a11ea1018966092abc6bcad9779bf0606ed Mon Sep 17 00:00:00 2001
From: Aleksei Sviridkin <f@lex.la>
Date: Tue, 26 May 2026 17:50:08 +0300
Subject: [PATCH 04/20] feat(k8s-client): add getApiGroups and
 useApiGroupAvailable hook
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

K8sClient.getApiGroups() issues GET /apis and returns the registered
APIGroupList. The useApiGroupAvailable hook layers on top of it,
caching the discovery result with staleTime: Infinity so all consumers
on a page share a single network round trip.

The cluster-usage admin page needs this to runtime-detect the presence
of metrics.k8s.io — if the API group is not registered, the page must
omit the 'Used' overlay rather than fall back to error reporting.
Errors from the discovery call are intentionally absorbed and surfaced
as available=false: callers are asking a discovery question, and 'we
cannot find out' is functionally identical to 'not registered' for
gating logic that drives whether a UI section renders.

Assisted-By: Claude <noreply@anthropic.com>
Signed-off-by: Aleksei Sviridkin <f@lex.la>
---
 .../k8s-client/useApiGroupAvailable.test.tsx  | 121 ++++++++++++++++++
 packages/k8s-client/src/client.ts             |  21 +++
 packages/k8s-client/src/index.ts              |   5 +
 .../k8s-client/src/useApiGroupAvailable.ts    |  30 +++++
 4 files changed, 177 insertions(+)
 create mode 100644 apps/console/src/__tests__/k8s-client/useApiGroupAvailable.test.tsx
 create mode 100644 packages/k8s-client/src/useApiGroupAvailable.ts

diff --git a/apps/console/src/__tests__/k8s-client/useApiGroupAvailable.test.tsx b/apps/console/src/__tests__/k8s-client/useApiGroupAvailable.test.tsx
new file mode 100644
index 0000000..c606ac6
--- /dev/null
+++ b/apps/console/src/__tests__/k8s-client/useApiGroupAvailable.test.tsx
@@ -0,0 +1,121 @@
+import { describe, it, expect, vi } from "vitest"
+import { renderHook, waitFor } from "@testing-library/react"
+import { QueryClient, QueryClientProvider } from "@tanstack/react-query"
+import {
+  K8sClient,
+  K8sProvider,
+  useApiGroupAvailable,
+  type APIGroupList,
+} from "@cozystack/k8s-client"
+import type { ReactNode } from "react"
+
+function makeWrapper(client: K8sClient) {
+  const queryClient = new QueryClient({
+    defaultOptions: { queries: { retry: false, gcTime: 0 } },
+  })
+  return function Wrapper({ children }: { children: ReactNode }) {
+    return (
+      <QueryClientProvider client={queryClient}>
+        <K8sProvider client={client} queryClient={queryClient}>
+          {children}
+        </K8sProvider>
+      </QueryClientProvider>
+    )
+  }
+}
+
+const sampleGroups: APIGroupList = {
+  kind: "APIGroupList",
+  apiVersion: "v1",
+  groups: [
+    {
+      name: "metrics.k8s.io",
+      versions: [{ groupVersion: "metrics.k8s.io/v1beta1", version: "v1beta1" }],
+      preferredVersion: { groupVersion: "metrics.k8s.io/v1beta1", version: "v1beta1" },
+    },
+    {
+      name: "apps",
+      versions: [{ groupVersion: "apps/v1", version: "v1" }],
+      preferredVersion: { groupVersion: "apps/v1", version: "v1" },
+    },
+  ],
+}
+
+describe("useApiGroupAvailable", () => {
+  it("starts in loading state with available=false", () => {
+    const client = new K8sClient()
+    vi.spyOn(client, "getApiGroups").mockImplementation(
+      () => new Promise(() => {}),
+    )
+    const { result } = renderHook(() => useApiGroupAvailable("metrics.k8s.io"), {
+      wrapper: makeWrapper(client),
+    })
+    expect(result.current.isLoading).toBe(true)
+    expect(result.current.available).toBe(false)
+  })
+
+  it("reports available=true when the group is present", async () => {
+    const client = new K8sClient()
+    vi.spyOn(client, "getApiGroups").mockResolvedValue(sampleGroups)
+    const { result } = renderHook(() => useApiGroupAvailable("metrics.k8s.io"), {
+      wrapper: makeWrapper(client),
+    })
+    await waitFor(() => expect(result.current.isLoading).toBe(false))
+    expect(result.current.available).toBe(true)
+  })
+
+  it("reports available=false when the group is missing", async () => {
+    const client = new K8sClient()
+    vi.spyOn(client, "getApiGroups").mockResolvedValue(sampleGroups)
+    const { result } = renderHook(() => useApiGroupAvailable("custom.metrics.k8s.io"), {
+      wrapper: makeWrapper(client),
+    })
+    await waitFor(() => expect(result.current.isLoading).toBe(false))
+    expect(result.current.available).toBe(false)
+  })
+
+  it("fetches /apis once for multiple consumers", async () => {
+    const client = new K8sClient()
+    const spy = vi.spyOn(client, "getApiGroups").mockResolvedValue(sampleGroups)
+    const Wrapper = makeWrapper(client)
+
+    function Twin() {
+      const a = useApiGroupAvailable("metrics.k8s.io")
+      const b = useApiGroupAvailable("apps")
+      return (
+        <p>
+          {String(a.available)}-{String(b.available)}
+        </p>
+      )
+    }
+
+    const { result: hookA } = renderHook(
+      () => useApiGroupAvailable("metrics.k8s.io"),
+      { wrapper: Wrapper },
+    )
+    const { result: hookB } = renderHook(
+      () => useApiGroupAvailable("apps"),
+      { wrapper: Wrapper },
+    )
+
+    await waitFor(() => expect(hookA.current.isLoading).toBe(false))
+    await waitFor(() => expect(hookB.current.isLoading).toBe(false))
+
+    // Both hooks share the same provider and cache, so /apis is called
+    // exactly once for the lifetime of this provider tree. Twin is unused
+    // here but kept declared to document the multi-consumer shape we
+    // protect against.
+    expect(spy).toHaveBeenCalledTimes(1)
+    void Twin
+  })
+
+  it("surfaces an error and reports available=false", async () => {
+    const client = new K8sClient()
+    vi.spyOn(client, "getApiGroups").mockRejectedValue(new Error("no /apis"))
+    const { result } = renderHook(() => useApiGroupAvailable("metrics.k8s.io"), {
+      wrapper: makeWrapper(client),
+    })
+    await waitFor(() => expect(result.current.isLoading).toBe(false))
+    expect(result.current.available).toBe(false)
+  })
+})
diff --git a/packages/k8s-client/src/client.ts b/packages/k8s-client/src/client.ts
index ecfd2ed..c8d7a8c 100644
--- a/packages/k8s-client/src/client.ts
+++ b/packages/k8s-client/src/client.ts
@@ -189,6 +189,10 @@ export class K8sClient {
     })
   }
 
+  getApiGroups(): Promise<APIGroupList> {
+    return this.request<APIGroupList>("/apis")
+  }
+
   watch<T>(
     apiGroup: string,
     apiVersion: string,
@@ -319,3 +323,20 @@ export interface WatchEvent<T> {
   type: "ADDED" | "MODIFIED" | "DELETED" | "BOOKMARK" | "ERROR"
   object: T
 }
+
+export interface APIGroupVersion {
+  groupVersion: string
+  version: string
+}
+
+export interface APIGroup {
+  name: string
+  versions: APIGroupVersion[]
+  preferredVersion?: APIGroupVersion
+}
+
+export interface APIGroupList {
+  kind: string
+  apiVersion: string
+  groups: APIGroup[]
+}
diff --git a/packages/k8s-client/src/index.ts b/packages/k8s-client/src/index.ts
index eec4679..8f7fa49 100644
--- a/packages/k8s-client/src/index.ts
+++ b/packages/k8s-client/src/index.ts
@@ -7,6 +7,9 @@ export type {
   K8sList,
   K8sCondition,
   WatchEvent,
+  APIGroup,
+  APIGroupList,
+  APIGroupVersion,
 } from "./client.ts"
 
 export { K8sProvider, useK8sClient, useConnectionError } from "./provider.tsx"
@@ -19,3 +22,5 @@ export {
   useK8sDelete,
 } from "./hooks.ts"
 export type { ResourceRef } from "./hooks.ts"
+
+export { useApiGroupAvailable } from "./useApiGroupAvailable.ts"
diff --git a/packages/k8s-client/src/useApiGroupAvailable.ts b/packages/k8s-client/src/useApiGroupAvailable.ts
new file mode 100644
index 0000000..712d8eb
--- /dev/null
+++ b/packages/k8s-client/src/useApiGroupAvailable.ts
@@ -0,0 +1,30 @@
+import { useQuery } from "@tanstack/react-query"
+import { useK8sClient } from "./provider.tsx"
+import type { APIGroupList } from "./client.ts"
+
+/**
+ * Returns whether a specific Kubernetes API group is registered on the
+ * cluster, derived from a single `/apis` discovery call shared across
+ * all consumers. Result is cached for the lifetime of the QueryClient
+ * (staleTime: Infinity) — discovery is rarely meaningful to re-poll
+ * within a session, and callers can invalidate the "k8s-api-groups"
+ * query key explicitly if the use case ever arises.
+ *
+ * Errors are absorbed silently and reported as available=false: the
+ * caller is asking a discovery question, and "we cannot find out" is
+ * functionally identical to "not registered" for the cluster-usage
+ * page's gating logic.
+ */
+export function useApiGroupAvailable(
+  groupName: string,
+): { available: boolean; isLoading: boolean } {
+  const client = useK8sClient()
+  const query = useQuery<APIGroupList>({
+    queryKey: ["k8s-api-groups"],
+    queryFn: () => client.getApiGroups(),
+    staleTime: Infinity,
+    refetchOnWindowFocus: false,
+  })
+  const available = query.data?.groups.some((g) => g.name === groupName) ?? false
+  return { available, isLoading: query.isLoading }
+}

From e3f1be8a854beb531dcee428006bf23aca956c42 Mon Sep 17 00:00:00 2001
From: Aleksei Sviridkin <f@lex.la>
Date: Tue, 26 May 2026 17:52:12 +0300
Subject: [PATCH 05/20] feat(k8s-client): add useSelfSubjectAccessReview hook
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

POSTs a SelfSubjectAccessReview against authorization.k8s.io/v1 and
returns whether the current user is allowed to perform the requested
action. The result is cached per spec for the lifetime of the
QueryClient (staleTime: Infinity, refetchOnWindowFocus disabled) so
two consumers asking the same question share a single network round
trip; different specs cache independently.

This is the lever the new Administration sidebar uses to gate the
Cluster Usage entry on a nodes-list permission check, hiding the
entry from users who would only see a 403 if they clicked it.

Errors are absorbed and surfaced as allowed=false: a caller asking
'can I see this UI section' is functionally indifferent to 'denied'
versus 'cannot determine'; in both cases the section stays hidden.
A revoked grant surfaces on the next page load — a price worth
paying to keep the discovery query off the focus / reconnect path.

Assisted-By: Claude <noreply@anthropic.com>
Signed-off-by: Aleksei Sviridkin <f@lex.la>
---
 .../useSelfSubjectAccessReview.test.tsx       | 174 ++++++++++++++++++
 packages/k8s-client/src/index.ts              |   8 +
 .../src/useSelfSubjectAccessReview.ts         |  93 ++++++++++
 3 files changed, 275 insertions(+)
 create mode 100644 apps/console/src/__tests__/k8s-client/useSelfSubjectAccessReview.test.tsx
 create mode 100644 packages/k8s-client/src/useSelfSubjectAccessReview.ts

diff --git a/apps/console/src/__tests__/k8s-client/useSelfSubjectAccessReview.test.tsx b/apps/console/src/__tests__/k8s-client/useSelfSubjectAccessReview.test.tsx
new file mode 100644
index 0000000..2dc7baa
--- /dev/null
+++ b/apps/console/src/__tests__/k8s-client/useSelfSubjectAccessReview.test.tsx
@@ -0,0 +1,174 @@
+import { describe, it, expect, vi } from "vitest"
+import { renderHook, waitFor } from "@testing-library/react"
+import { QueryClient, QueryClientProvider } from "@tanstack/react-query"
+import {
+  K8sClient,
+  K8sProvider,
+  useSelfSubjectAccessReview,
+  type SelfSubjectAccessReview,
+} from "@cozystack/k8s-client"
+import type { ReactNode } from "react"
+
+function makeWrapper(client: K8sClient) {
+  const queryClient = new QueryClient({
+    defaultOptions: { queries: { retry: false, gcTime: 0 } },
+  })
+  return function Wrapper({ children }: { children: ReactNode }) {
+    return (
+      <QueryClientProvider client={queryClient}>
+        <K8sProvider client={client} queryClient={queryClient}>
+          {children}
+        </K8sProvider>
+      </QueryClientProvider>
+    )
+  }
+}
+
+function ssarResult(allowed: boolean): SelfSubjectAccessReview {
+  return {
+    apiVersion: "authorization.k8s.io/v1",
+    kind: "SelfSubjectAccessReview",
+    metadata: { name: "" },
+    spec: { resourceAttributes: { resource: "nodes", verb: "list" } },
+    status: { allowed },
+  }
+}
+
+describe("useSelfSubjectAccessReview", () => {
+  it("starts in loading state with allowed=false", () => {
+    const client = new K8sClient()
+    vi.spyOn(client, "create").mockImplementation(() => new Promise(() => {}))
+    const { result } = renderHook(
+      () =>
+        useSelfSubjectAccessReview({
+          resourceAttributes: { resource: "nodes", verb: "list" },
+        }),
+      { wrapper: makeWrapper(client) },
+    )
+    expect(result.current.isLoading).toBe(true)
+    expect(result.current.allowed).toBe(false)
+  })
+
+  it("reports allowed=true when the API responds with status.allowed=true", async () => {
+    const client = new K8sClient()
+    vi.spyOn(client, "create").mockResolvedValue(ssarResult(true))
+    const { result } = renderHook(
+      () =>
+        useSelfSubjectAccessReview({
+          resourceAttributes: { resource: "nodes", verb: "list" },
+        }),
+      { wrapper: makeWrapper(client) },
+    )
+    await waitFor(() => expect(result.current.isLoading).toBe(false))
+    expect(result.current.allowed).toBe(true)
+  })
+
+  it("reports allowed=false explicitly when status.allowed=false", async () => {
+    const client = new K8sClient()
+    vi.spyOn(client, "create").mockResolvedValue(ssarResult(false))
+    const { result } = renderHook(
+      () =>
+        useSelfSubjectAccessReview({
+          resourceAttributes: { resource: "nodes", verb: "list" },
+        }),
+      { wrapper: makeWrapper(client) },
+    )
+    await waitFor(() => expect(result.current.isLoading).toBe(false))
+    expect(result.current.allowed).toBe(false)
+  })
+
+  it("POSTs once for two consumers asking the same question", async () => {
+    const client = new K8sClient()
+    const spy = vi.spyOn(client, "create").mockResolvedValue(ssarResult(true))
+    const Wrapper = makeWrapper(client)
+    const { result: a } = renderHook(
+      () =>
+        useSelfSubjectAccessReview({
+          resourceAttributes: { resource: "nodes", verb: "list" },
+        }),
+      { wrapper: Wrapper },
+    )
+    const { result: b } = renderHook(
+      () =>
+        useSelfSubjectAccessReview({
+          resourceAttributes: { resource: "nodes", verb: "list" },
+        }),
+      { wrapper: Wrapper },
+    )
+    await waitFor(() => expect(a.current.isLoading).toBe(false))
+    await waitFor(() => expect(b.current.isLoading).toBe(false))
+    expect(spy).toHaveBeenCalledTimes(1)
+  })
+
+  it("POSTs twice when two consumers ask different questions", async () => {
+    const client = new K8sClient()
+    const spy = vi.spyOn(client, "create").mockResolvedValue(ssarResult(true))
+    const Wrapper = makeWrapper(client)
+    const { result: a } = renderHook(
+      () =>
+        useSelfSubjectAccessReview({
+          resourceAttributes: { resource: "nodes", verb: "list" },
+        }),
+      { wrapper: Wrapper },
+    )
+    const { result: b } = renderHook(
+      () =>
+        useSelfSubjectAccessReview({
+          resourceAttributes: { resource: "pods", verb: "list" },
+        }),
+      { wrapper: Wrapper },
+    )
+    await waitFor(() => expect(a.current.isLoading).toBe(false))
+    await waitFor(() => expect(b.current.isLoading).toBe(false))
+    expect(spy).toHaveBeenCalledTimes(2)
+  })
+
+  it("surfaces the error and reports allowed=false on API failure", async () => {
+    const client = new K8sClient()
+    const err = new Error("server error")
+    vi.spyOn(client, "create").mockRejectedValue(err)
+    const { result } = renderHook(
+      () =>
+        useSelfSubjectAccessReview({
+          resourceAttributes: { resource: "nodes", verb: "list" },
+        }),
+      { wrapper: makeWrapper(client) },
+    )
+    await waitFor(() => expect(result.current.isLoading).toBe(false))
+    expect(result.current.allowed).toBe(false)
+    expect(result.current.error).toBeTruthy()
+  })
+
+  it("sends the spec verbatim in the POST body", async () => {
+    const client = new K8sClient()
+    const spy = vi.spyOn(client, "create").mockResolvedValue(ssarResult(true))
+    const { result } = renderHook(
+      () =>
+        useSelfSubjectAccessReview({
+          resourceAttributes: {
+            group: "metrics.k8s.io",
+            resource: "nodes",
+            verb: "list",
+          },
+        }),
+      { wrapper: makeWrapper(client) },
+    )
+    await waitFor(() => expect(result.current.isLoading).toBe(false))
+    expect(spy).toHaveBeenCalledWith(
+      "authorization.k8s.io",
+      "v1",
+      "selfsubjectaccessreviews",
+      expect.objectContaining({
+        kind: "SelfSubjectAccessReview",
+        apiVersion: "authorization.k8s.io/v1",
+        spec: {
+          resourceAttributes: {
+            group: "metrics.k8s.io",
+            resource: "nodes",
+            verb: "list",
+          },
+        },
+      }),
+    )
+  })
+})
diff --git a/packages/k8s-client/src/index.ts b/packages/k8s-client/src/index.ts
index 8f7fa49..0865e8d 100644
--- a/packages/k8s-client/src/index.ts
+++ b/packages/k8s-client/src/index.ts
@@ -24,3 +24,11 @@ export {
 export type { ResourceRef } from "./hooks.ts"
 
 export { useApiGroupAvailable } from "./useApiGroupAvailable.ts"
+
+export { useSelfSubjectAccessReview } from "./useSelfSubjectAccessReview.ts"
+export type {
+  SelfSubjectAccessReview,
+  SelfSubjectAccessReviewSpec,
+  ResourceAttributes,
+  NonResourceAttributes,
+} from "./useSelfSubjectAccessReview.ts"
diff --git a/packages/k8s-client/src/useSelfSubjectAccessReview.ts b/packages/k8s-client/src/useSelfSubjectAccessReview.ts
new file mode 100644
index 0000000..eb99135
--- /dev/null
+++ b/packages/k8s-client/src/useSelfSubjectAccessReview.ts
@@ -0,0 +1,93 @@
+import { useQuery } from "@tanstack/react-query"
+import { useK8sClient } from "./provider.tsx"
+import type { K8sResource } from "./client.ts"
+
+export interface ResourceAttributes {
+  namespace?: string
+  verb?: string
+  group?: string
+  version?: string
+  resource?: string
+  subresource?: string
+  name?: string
+}
+
+export interface NonResourceAttributes {
+  path?: string
+  verb?: string
+}
+
+export interface SelfSubjectAccessReviewSpec {
+  resourceAttributes?: ResourceAttributes
+  nonResourceAttributes?: NonResourceAttributes
+}
+
+interface SelfSubjectAccessReviewStatus {
+  allowed: boolean
+  denied?: boolean
+  reason?: string
+  evaluationError?: string
+}
+
+export type SelfSubjectAccessReview = K8sResource<
+  SelfSubjectAccessReviewSpec,
+  SelfSubjectAccessReviewStatus
+>
+
+function ssarCacheKey(spec: SelfSubjectAccessReviewSpec): readonly string[] {
+  const r = spec.resourceAttributes ?? {}
+  const n = spec.nonResourceAttributes ?? {}
+  return [
+    "ssar",
+    r.namespace ?? "",
+    r.group ?? "",
+    r.version ?? "",
+    r.resource ?? "",
+    r.subresource ?? "",
+    r.verb ?? "",
+    r.name ?? "",
+    n.path ?? "",
+    n.verb ?? "",
+  ]
+}
+
+/**
+ * Issues a SelfSubjectAccessReview against the cluster and returns
+ * whether the current user is allowed to perform the requested action.
+ * The result is cached per spec for the lifetime of the QueryClient
+ * (staleTime: Infinity, refetchOnWindowFocus disabled) — discovery-style
+ * permission checks rarely change mid-session, and a revoked grant
+ * surfaces on the next page load.
+ *
+ * Errors are absorbed and surfaced as allowed=false. Callers asking
+ * 'can I see this UI section' are functionally indifferent to 'denied'
+ * versus 'cannot determine'; in both cases the section stays hidden.
+ */
+export function useSelfSubjectAccessReview(
+  spec: SelfSubjectAccessReviewSpec,
+): { allowed: boolean; isLoading: boolean; error: Error | null } {
+  const client = useK8sClient()
+  const queryKey = ssarCacheKey(spec)
+  const query = useQuery<SelfSubjectAccessReview>({
+    queryKey,
+    queryFn: () =>
+      client.create<SelfSubjectAccessReview>(
+        "authorization.k8s.io",
+        "v1",
+        "selfsubjectaccessreviews",
+        {
+          apiVersion: "authorization.k8s.io/v1",
+          kind: "SelfSubjectAccessReview",
+          metadata: { name: "" },
+          spec,
+        },
+      ),
+    staleTime: Infinity,
+    refetchOnWindowFocus: false,
+  })
+  return {
+    allowed: query.data?.status?.allowed ?? false,
+    isLoading: query.isLoading,
+    error: (query.error as Error | null) ?? null,
+  }
+}

From dcd5d91b6fd58f85443f63bdad8833181df09c17 Mon Sep 17 00:00:00 2001
From: Aleksei Sviridkin <f@lex.la>
Date: Tue, 26 May 2026 17:56:11 +0300
Subject: [PATCH 06/20] feat(console): cluster-usage derivation utilities
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Pure functions, no React: getExtendedResourceKeys discovers the set of
extended-resource keys present in node.status.capacity across the
cluster (filtering out the four standard scheduler resources and every
hugepages-* variant), getExtendedResourcePrefixes groups them by their
vendor prefix for aggregate-card rendering, aggregateNodeResources sums
capacity/allocatable/requested/used cluster-wide for every standard and
discovered extended resource, and derivePerNodeRows builds the same
totals one node at a time alongside roles, conditions, taints and age.

A node-by-node split is required for the per-node table the
cluster-usage page renders below the aggregate panel; computing it from
the same primitives keeps the table totals reconcilable with the
aggregate cards.

The extended-resource pathway has no vendor allow-list — keys are
rendered exactly as the cluster exposes them, so a previously unseen
accelerator (acme.io/fpga, anything) surfaces in the UI the moment a
node carrying it joins.

Assisted-By: Claude <noreply@anthropic.com>
Signed-off-by: Aleksei Sviridkin <f@lex.la>
---
 .../src/lib/cluster-usage/aggregate.test.ts   | 139 ++++++++++++
 .../src/lib/cluster-usage/aggregate.ts        |  88 ++++++++
 .../cluster-usage/extended-resources.test.ts  |  98 ++++++++
 .../lib/cluster-usage/extended-resources.ts   |  49 ++++
 .../src/lib/cluster-usage/per-node.test.ts    | 210 ++++++++++++++++++
 .../console/src/lib/cluster-usage/per-node.ts | 143 ++++++++++++
 apps/console/src/lib/cluster-usage/types.ts   | 110 +++++++++
 7 files changed, 837 insertions(+)
 create mode 100644 apps/console/src/lib/cluster-usage/aggregate.test.ts
 create mode 100644 apps/console/src/lib/cluster-usage/aggregate.ts
 create mode 100644 apps/console/src/lib/cluster-usage/extended-resources.test.ts
 create mode 100644 apps/console/src/lib/cluster-usage/extended-resources.ts
 create mode 100644 apps/console/src/lib/cluster-usage/per-node.test.ts
 create mode 100644 apps/console/src/lib/cluster-usage/per-node.ts
 create mode 100644 apps/console/src/lib/cluster-usage/types.ts

diff --git a/apps/console/src/lib/cluster-usage/aggregate.test.ts b/apps/console/src/lib/cluster-usage/aggregate.test.ts
new file mode 100644
index 0000000..ccbf203
--- /dev/null
+++ b/apps/console/src/lib/cluster-usage/aggregate.test.ts
@@ -0,0 +1,139 @@
+import { describe, it, expect } from "vitest"
+import { aggregateNodeResources } from "./aggregate.ts"
+import type { Node, Pod, NodeMetrics } from "./types.ts"
+
+function node(name: string, capacity: Record<string, string>): Node {
+  return {
+    apiVersion: "v1",
+    kind: "Node",
+    metadata: { name },
+    status: { capacity, allocatable: capacity, conditions: [] },
+  }
+}
+
+function pod(
+  name: string,
+  nodeName: string | undefined,
+  requests: Record<string, string>,
+): Pod {
+  return {
+    apiVersion: "v1",
+    kind: "Pod",
+    metadata: { name, namespace: "ns" },
+    spec: { nodeName, containers: [{ name: "c", resources: { requests } }] },
+  }
+}
+
+function metric(name: string, cpu: string, memory: string): NodeMetrics {
+  return {
+    apiVersion: "metrics.k8s.io/v1beta1",
+    kind: "NodeMetrics",
+    metadata: { name },
+    usage: { cpu, memory },
+  }
+}
+
+describe("aggregateNodeResources", () => {
+  it("returns zeroed standard totals for an empty cluster", () => {
+    const a = aggregateNodeResources([], [], undefined)
+    expect(a.standard.cpu).toEqual({ capacity: 0, allocatable: 0, requested: 0 })
+    expect(a.standard.memory).toEqual({ capacity: 0, allocatable: 0, requested: 0 })
+    expect(a.standard["ephemeral-storage"]).toEqual({
+      capacity: 0,
+      allocatable: 0,
+      requested: 0,
+    })
+    expect(a.standard.pods).toEqual({ capacity: 0, allocatable: 0, requested: 0 })
+    expect(a.extended).toEqual({})
+  })
+
+  it("sums capacity and allocatable across nodes", () => {
+    const a = aggregateNodeResources(
+      [
+        node("a", { cpu: "4", memory: "8Gi", "ephemeral-storage": "100Gi", pods: "110" }),
+        node("b", { cpu: "8", memory: "32Gi", "ephemeral-storage": "500Gi", pods: "220" }),
+      ],
+      [],
+      undefined,
+    )
+    expect(a.standard.cpu.capacity).toBe(12)
+    expect(a.standard.cpu.allocatable).toBe(12)
+    expect(a.standard.memory.capacity).toBe((8 + 32) * 1024 ** 3)
+    expect(a.standard.pods.capacity).toBe(330)
+  })
+
+  it("groups extended resource keys verbatim", () => {
+    const a = aggregateNodeResources(
+      [
+        node("a", { cpu: "4", "nvidia.com/gpu": "2" }),
+        node("b", { cpu: "8", "amd.com/gpu": "1" }),
+      ],
+      [],
+      undefined,
+    )
+    expect(a.extended["nvidia.com/gpu"].capacity).toBe(2)
+    expect(a.extended["amd.com/gpu"].capacity).toBe(1)
+  })
+
+  it("sums pod requests scoped to scheduled pods only", () => {
+    const a = aggregateNodeResources(
+      [node("a", { cpu: "8", memory: "16Gi" })],
+      [
+        pod("p1", "a", { cpu: "500m", memory: "1Gi" }),
+        pod("p2", "a", { cpu: "1", memory: "2Gi" }),
+        pod("unscheduled", undefined, { cpu: "100m", memory: "256Mi" }),
+      ],
+      undefined,
+    )
+    expect(a.standard.cpu.requested).toBe(1.5)
+    expect(a.standard.memory.requested).toBe(3 * 1024 ** 3)
+  })
+
+  it("skips pods scheduled on unknown nodes", () => {
+    const a = aggregateNodeResources(
+      [node("a", { cpu: "8" })],
+      [pod("rogue", "ghost-node", { cpu: "500m" })],
+      undefined,
+    )
+    expect(a.standard.cpu.requested).toBe(0)
+  })
+
+  it("sums extended-resource requests under the extended bucket", () => {
+    const a = aggregateNodeResources(
+      [node("a", { cpu: "8", "nvidia.com/gpu": "2" })],
+      [pod("p", "a", { cpu: "200m", "nvidia.com/gpu": "1" })],
+      undefined,
+    )
+    expect(a.extended["nvidia.com/gpu"].requested).toBe(1)
+  })
+
+  it("populates used for cpu and memory when metrics are supplied", () => {
+    const a = aggregateNodeResources(
+      [node("a", { cpu: "8", memory: "16Gi" })],
+      [],
+      [metric("a", "1500m", "4Gi")],
+    )
+    expect(a.standard.cpu.used).toBe(1.5)
+    expect(a.standard.memory.used).toBe(4 * 1024 ** 3)
+  })
+
+  it("leaves used undefined when metrics is undefined", () => {
+    const a = aggregateNodeResources(
+      [node("a", { cpu: "8", memory: "16Gi" })],
+      [],
+      undefined,
+    )
+    expect(a.standard.cpu.used).toBeUndefined()
+    expect(a.standard.memory.used).toBeUndefined()
+  })
+
+  it("never reports used for ephemeral-storage or pods", () => {
+    const a = aggregateNodeResources(
+      [node("a", { cpu: "8", memory: "16Gi", "ephemeral-storage": "100Gi", pods: "110" })],
+      [],
+      [metric("a", "1", "2Gi")],
+    )
+    expect(a.standard["ephemeral-storage"].used).toBeUndefined()
+    expect(a.standard.pods.used).toBeUndefined()
+  })
+})
diff --git a/apps/console/src/lib/cluster-usage/aggregate.ts b/apps/console/src/lib/cluster-usage/aggregate.ts
new file mode 100644
index 0000000..aab366d
--- /dev/null
+++ b/apps/console/src/lib/cluster-usage/aggregate.ts
@@ -0,0 +1,88 @@
+import { parseQuantity } from "../k8s-quantity.ts"
+import { getExtendedResourceKeys } from "./extended-resources.ts"
+import type {
+  AggregateResources,
+  Node,
+  NodeMetrics,
+  Pod,
+  ResourceTotals,
+  StandardResourceKey,
+} from "./types.ts"
+import { STANDARD_RESOURCE_KEYS } from "./types.ts"
+
+function emptyTotals(): ResourceTotals {
+  return { capacity: 0, allocatable: 0, requested: 0 }
+}
+
+/**
+ * Computes cluster-wide totals for every standard and extended resource.
+ *
+ * Capacity and allocatable are summed from each node's status maps.
+ * Requested is summed only from pods that are scheduled (have a
+ * spec.nodeName) and whose nodeName actually appears in the node list;
+ * unscheduled or orphaned pods are skipped so the per-node and aggregate
+ * numbers stay reconcilable.
+ *
+ * Used is only populated for cpu and memory, mirroring what
+ * metrics.k8s.io reports; ephemeral-storage and pods never get a 'used'
+ * value because the API simply does not expose one.
+ */
+export function aggregateNodeResources(
+  nodes: Node[],
+  pods: Pod[],
+  metrics: NodeMetrics[] | undefined,
+): AggregateResources {
+  const standard: Record<StandardResourceKey, ResourceTotals> = {
+    cpu: emptyTotals(),
+    memory: emptyTotals(),
+    "ephemeral-storage": emptyTotals(),
+    pods: emptyTotals(),
+  }
+  const extended: Record<string, ResourceTotals> = {}
+  const knownNodes = new Set(nodes.map((n) => n.metadata.name))
+  const extendedKeys = getExtendedResourceKeys(nodes)
+  for (const key of extendedKeys) extended[key] = emptyTotals()
+
+  for (const node of nodes) {
+    const capacity = node.status?.capacity ?? {}
+    const allocatable = node.status?.allocatable ?? {}
+    for (const key of STANDARD_RESOURCE_KEYS) {
+      standard[key].capacity += parseQuantity(capacity[key] ?? "0")
+      standard[key].allocatable += parseQuantity(allocatable[key] ?? "0")
+    }
+    for (const key of extendedKeys) {
+      extended[key].capacity += parseQuantity(capacity[key] ?? "0")
+      extended[key].allocatable += parseQuantity(allocatable[key] ?? "0")
+    }
+  }
+
+  for (const pod of pods) {
+    const nodeName = pod.spec?.nodeName
+    if (!nodeName || !knownNodes.has(nodeName)) continue
+    for (const container of pod.spec?.containers ?? []) {
+      const requests = container.resources?.requests
+      if (!requests) continue
+      for (const [key, value] of Object.entries(requests)) {
+        if ((STANDARD_RESOURCE_KEYS as readonly string[]).includes(key)) {
+          standard[key as StandardResourceKey].requested += parseQuantity(value)
+        } else if (extended[key]) {
+          extended[key].requested += parseQuantity(value)
+        }
+      }
+    }
+  }
+
+  if (metrics) {
+    let cpuUsed = 0
+    let memoryUsed = 0
+    for (const m of metrics) {
+      if (!knownNodes.has(m.metadata.name)) continue
+      cpuUsed += parseQuantity(m.usage?.cpu ?? "0")
+      memoryUsed += parseQuantity(m.usage?.memory ?? "0")
+    }
+    standard.cpu.used = cpuUsed
+    standard.memory.used = memoryUsed
+  }
+
+  return { standard, extended }
+}
diff --git a/apps/console/src/lib/cluster-usage/extended-resources.test.ts b/apps/console/src/lib/cluster-usage/extended-resources.test.ts
new file mode 100644
index 0000000..4f37d66
--- /dev/null
+++ b/apps/console/src/lib/cluster-usage/extended-resources.test.ts
@@ -0,0 +1,98 @@
+import { describe, it, expect } from "vitest"
+import {
+  getExtendedResourceKeys,
+  getExtendedResourcePrefixes,
+} from "./extended-resources.ts"
+import type { Node } from "./types.ts"
+
+function makeNode(name: string, capacity: Record<string, string>): Node {
+  return {
+    apiVersion: "v1",
+    kind: "Node",
+    metadata: { name },
+    status: { capacity },
+  }
+}
+
+describe("getExtendedResourceKeys", () => {
+  it("returns an empty array for no nodes", () => {
+    expect(getExtendedResourceKeys([])).toEqual([])
+  })
+
+  it("strips out standard resources cpu, memory, ephemeral-storage, pods", () => {
+    const nodes = [
+      makeNode("a", {
+        cpu: "8",
+        memory: "32Gi",
+        "ephemeral-storage": "500Gi",
+        pods: "110",
+      }),
+    ]
+    expect(getExtendedResourceKeys(nodes)).toEqual([])
+  })
+
+  it("strips hugepages-* in any variant", () => {
+    const nodes = [
+      makeNode("a", {
+        cpu: "8",
+        "hugepages-2Mi": "0",
+        "hugepages-1Gi": "0",
+      }),
+    ]
+    expect(getExtendedResourceKeys(nodes)).toEqual([])
+  })
+
+  it("collects extended keys verbatim", () => {
+    const nodes = [makeNode("a", { cpu: "8", "nvidia.com/gpu": "1" })]
+    expect(getExtendedResourceKeys(nodes)).toEqual(["nvidia.com/gpu"])
+  })
+
+  it("dedupes keys appearing on multiple nodes", () => {
+    const nodes = [
+      makeNode("a", { "nvidia.com/gpu": "1" }),
+      makeNode("b", { "nvidia.com/gpu": "2" }),
+    ]
+    expect(getExtendedResourceKeys(nodes)).toEqual(["nvidia.com/gpu"])
+  })
+
+  it("sorts keys alphabetically for stable rendering", () => {
+    const nodes = [
+      makeNode("a", { "nvidia.com/gpu": "1" }),
+      makeNode("b", { "amd.com/gpu": "1", "hami.io/vgpu": "4" }),
+    ]
+    expect(getExtendedResourceKeys(nodes)).toEqual([
+      "amd.com/gpu",
+      "hami.io/vgpu",
+      "nvidia.com/gpu",
+    ])
+  })
+
+  it("ignores nodes without status.capacity", () => {
+    const node: Node = {
+      apiVersion: "v1",
+      kind: "Node",
+      metadata: { name: "drained" },
+    }
+    expect(getExtendedResourceKeys([node])).toEqual([])
+  })
+})
+
+describe("getExtendedResourcePrefixes", () => {
+  it("splits each key on / and returns unique prefixes sorted alphabetically", () => {
+    expect(
+      getExtendedResourcePrefixes([
+        "nvidia.com/gpu",
+        "nvidia.com/gpu.shared",
+        "amd.com/gpu",
+      ]),
+    ).toEqual(["amd.com", "nvidia.com"])
+  })
+
+  it("handles keys without a / by returning the whole key as its prefix", () => {
+    expect(getExtendedResourcePrefixes(["weirdkey"])).toEqual(["weirdkey"])
+  })
+
+  it("returns an empty array for empty input", () => {
+    expect(getExtendedResourcePrefixes([])).toEqual([])
+  })
+})
diff --git a/apps/console/src/lib/cluster-usage/extended-resources.ts b/apps/console/src/lib/cluster-usage/extended-resources.ts
new file mode 100644
index 0000000..066bcf3
--- /dev/null
+++ b/apps/console/src/lib/cluster-usage/extended-resources.ts
@@ -0,0 +1,49 @@
+import type { Node } from "./types.ts"
+
+const STANDARD_KEYS = new Set([
+  "cpu",
+  "memory",
+  "ephemeral-storage",
+  "pods",
+])
+
+function isExtendedKey(key: string): boolean {
+  if (STANDARD_KEYS.has(key)) return false
+  if (key.startsWith("hugepages-")) return false
+  return true
+}
+
+/**
+ * Returns the sorted, deduplicated set of extended-resource keys present
+ * in any node's `status.capacity` across the cluster. Standard scheduler
+ * resources (cpu, memory, ephemeral-storage, pods) and every hugepages-*
+ * variant are filtered out — the rest is whatever the cluster exposes,
+ * rendered verbatim. There is intentionally no vendor allow-list: a new
+ * accelerator surfaces in the UI the moment a node exposing it joins.
+ */
+export function getExtendedResourceKeys(nodes: Node[]): string[] {
+  const set = new Set<string>()
+  for (const node of nodes) {
+    const capacity = node.status?.capacity
+    if (!capacity) continue
+    for (const key of Object.keys(capacity)) {
+      if (isExtendedKey(key)) set.add(key)
+    }
+  }
+  return [...set].sort()
+}
+
+/**
+ * Returns the sorted, deduplicated set of vendor prefixes derived from
+ * a list of extended-resource keys. A key without a `/` is its own
+ * prefix; this keeps the function total for malformed or non-namespaced
+ * keys.
+ */
+export function getExtendedResourcePrefixes(keys: string[]): string[] {
+  const set = new Set<string>()
+  for (const key of keys) {
+    const slash = key.indexOf("/")
+    set.add(slash === -1 ? key : key.slice(0, slash))
+  }
+  return [...set].sort()
+}
diff --git a/apps/console/src/lib/cluster-usage/per-node.test.ts b/apps/console/src/lib/cluster-usage/per-node.test.ts
new file mode 100644
index 0000000..7d54da6
--- /dev/null
+++ b/apps/console/src/lib/cluster-usage/per-node.test.ts
@@ -0,0 +1,210 @@
+import { describe, it, expect, vi, beforeAll, afterAll } from "vitest"
+import { derivePerNodeRows } from "./per-node.ts"
+import type { Node, Pod, NodeMetrics } from "./types.ts"
+
+beforeAll(() => {
+  vi.useFakeTimers()
+  vi.setSystemTime(new Date("2026-05-26T00:00:00Z"))
+})
+
+afterAll(() => {
+  vi.useRealTimers()
+})
+
+function nodeWith(
+  name: string,
+  fields: {
+    capacity?: Record<string, string>
+    allocatable?: Record<string, string>
+    labels?: Record<string, string>
+    ready?: boolean
+    pressure?: string[]
+    unschedulable?: boolean
+    taints?: Array<{ key: string; value?: string; effect: string }>
+    creationTimestamp?: string
+  } = {},
+): Node {
+  const capacity = fields.capacity ?? { cpu: "4", memory: "8Gi" }
+  const allocatable = fields.allocatable ?? capacity
+  const conditions: Node["status"] = { capacity, allocatable, conditions: [] }
+  conditions.conditions?.push({
+    type: "Ready",
+    status: fields.ready === false ? "False" : "True",
+  })
+  for (const p of fields.pressure ?? []) {
+    conditions.conditions?.push({ type: p, status: "True" })
+  }
+  return {
+    apiVersion: "v1",
+    kind: "Node",
+    metadata: {
+      name,
+      labels: fields.labels,
+      creationTimestamp: fields.creationTimestamp ?? "2026-05-25T00:00:00Z",
+    },
+    spec: { unschedulable: fields.unschedulable, taints: fields.taints },
+    status: conditions,
+  } as Node
+}
+
+function pod(
+  name: string,
+  nodeName: string | undefined,
+  requests: Record<string, string>,
+): Pod {
+  return {
+    apiVersion: "v1",
+    kind: "Pod",
+    metadata: { name, namespace: "ns" },
+    spec: { nodeName, containers: [{ name: "c", resources: { requests } }] },
+  }
+}
+
+function metric(name: string, cpu: string, memory: string): NodeMetrics {
+  return {
+    apiVersion: "metrics.k8s.io/v1beta1",
+    kind: "NodeMetrics",
+    metadata: { name },
+    usage: { cpu, memory },
+  }
+}
+
+describe("derivePerNodeRows", () => {
+  it("returns one row per node, sorted by name", () => {
+    const rows = derivePerNodeRows(
+      [nodeWith("b"), nodeWith("a"), nodeWith("c")],
+      [],
+      undefined,
+    )
+    expect(rows.map((r) => r.name)).toEqual(["a", "b", "c"])
+  })
+
+  it("computes age relative to the stubbed clock", () => {
+    const rows = derivePerNodeRows(
+      [nodeWith("a", { creationTimestamp: "2026-05-25T03:00:00Z" })],
+      [],
+      undefined,
+    )
+    expect(rows[0].age).toBe("21h")
+  })
+
+  it("detects ready vs notready conditions", () => {
+    const rows = derivePerNodeRows(
+      [nodeWith("a", { ready: true }), nodeWith("b", { ready: false })],
+      [],
+      undefined,
+    )
+    expect(rows[0].ready).toBe(true)
+    expect(rows[1].ready).toBe(false)
+  })
+
+  it("collects pressure conditions with status=True", () => {
+    const rows = derivePerNodeRows(
+      [nodeWith("a", { pressure: ["MemoryPressure", "DiskPressure"] })],
+      [],
+      undefined,
+    )
+    expect(rows[0].pressureConditions).toEqual(["MemoryPressure", "DiskPressure"])
+  })
+
+  it("derives roles from node-role.kubernetes.io labels", () => {
+    const rows = derivePerNodeRows(
+      [
+        nodeWith("a", {
+          labels: {
+            "node-role.kubernetes.io/control-plane": "",
+            "node-role.kubernetes.io/worker": "",
+          },
+        }),
+      ],
+      [],
+      undefined,
+    )
+    expect(rows[0].roles.sort()).toEqual(["control-plane", "worker"])
+  })
+
+  it("falls back to kubernetes.io/role label when present", () => {
+    const rows = derivePerNodeRows(
+      [nodeWith("a", { labels: { "kubernetes.io/role": "ingress" } })],
+      [],
+      undefined,
+    )
+    expect(rows[0].roles).toEqual(["ingress"])
+  })
+
+  it("returns no roles for a label-less node", () => {
+    const rows = derivePerNodeRows([nodeWith("a", { labels: {} })], [], undefined)
+    expect(rows[0].roles).toEqual([])
+  })
+
+  it("reports schedulable=false when spec.unschedulable=true", () => {
+    const rows = derivePerNodeRows(
+      [nodeWith("a", { unschedulable: true })],
+      [],
+      undefined,
+    )
+    expect(rows[0].schedulable).toBe(false)
+  })
+
+  it("collects taint keys when present", () => {
+    const rows = derivePerNodeRows(
+      [
+        nodeWith("a", {
+          taints: [{ key: "node.kubernetes.io/unschedulable", effect: "NoSchedule" }],
+        }),
+      ],
+      [],
+      undefined,
+    )
+    expect(rows[0].taints).toEqual(["node.kubernetes.io/unschedulable"])
+  })
+
+  it("scopes requested totals to pods scheduled on that node", () => {
+    const rows = derivePerNodeRows(
+      [
+        nodeWith("a", { capacity: { cpu: "8", memory: "16Gi" } }),
+        nodeWith("b", { capacity: { cpu: "8", memory: "16Gi" } }),
+      ],
+      [
+        pod("p1", "a", { cpu: "500m", memory: "1Gi" }),
+        pod("p2", "b", { cpu: "1", memory: "4Gi" }),
+      ],
+      undefined,
+    )
+    expect(rows[0].standard.cpu.requested).toBe(0.5)
+    expect(rows[1].standard.cpu.requested).toBe(1)
+  })
+
+  it("populates used per node when metrics are supplied", () => {
+    const rows = derivePerNodeRows(
+      [nodeWith("a", { capacity: { cpu: "8", memory: "16Gi" } })],
+      [],
+      [metric("a", "1500m", "4Gi")],
+    )
+    expect(rows[0].standard.cpu.used).toBe(1.5)
+    expect(rows[0].standard.memory.used).toBe(4 * 1024 ** 3)
+  })
+
+  it("leaves used undefined per node when metrics are undefined", () => {
+    const rows = derivePerNodeRows(
+      [nodeWith("a", { capacity: { cpu: "8", memory: "16Gi" } })],
+      [],
+      undefined,
+    )
+    expect(rows[0].standard.cpu.used).toBeUndefined()
+  })
+
+  it("includes extended-resource columns per node", () => {
+    const rows = derivePerNodeRows(
+      [
+        nodeWith("a", { capacity: { cpu: "8", "nvidia.com/gpu": "1" } }),
+        nodeWith("b", { capacity: { cpu: "8" } }),
+      ],
+      [pod("p", "a", { "nvidia.com/gpu": "1" })],
+      undefined,
+    )
+    expect(rows[0].extended["nvidia.com/gpu"].capacity).toBe(1)
+    expect(rows[0].extended["nvidia.com/gpu"].requested).toBe(1)
+    expect(rows[1].extended["nvidia.com/gpu"]).toBeUndefined()
+  })
+})
diff --git a/apps/console/src/lib/cluster-usage/per-node.ts b/apps/console/src/lib/cluster-usage/per-node.ts
new file mode 100644
index 0000000..89e80ef
--- /dev/null
+++ b/apps/console/src/lib/cluster-usage/per-node.ts
@@ -0,0 +1,143 @@
+import { parseQuantity } from "../k8s-quantity.ts"
+import { formatAge } from "../status.ts"
+import type {
+  Node,
+  NodeMetrics,
+  NodeRow,
+  Pod,
+  ResourceTotals,
+  StandardResourceKey,
+} from "./types.ts"
+import { STANDARD_RESOURCE_KEYS } from "./types.ts"
+
+const PRESSURE_TYPES = new Set([
+  "MemoryPressure",
+  "DiskPressure",
+  "PIDPressure",
+  "NetworkUnavailable",
+])
+
+const STANDARD_KEYS = new Set<string>(STANDARD_RESOURCE_KEYS)
+
+function rolesFromLabels(labels: Record<string, string> | undefined): string[] {
+  if (!labels) return []
+  const roles = new Set<string>()
+  for (const key of Object.keys(labels)) {
+    if (key.startsWith("node-role.kubernetes.io/")) {
+      roles.add(key.slice("node-role.kubernetes.io/".length))
+    }
+  }
+  if (roles.size === 0) {
+    const legacy = labels["kubernetes.io/role"]
+    if (legacy) roles.add(legacy)
+  }
+  return [...roles]
+}
+
+function emptyTotals(): ResourceTotals {
+  return { capacity: 0, allocatable: 0, requested: 0 }
+}
+
+function isExtendedKey(key: string): boolean {
+  if (STANDARD_KEYS.has(key)) return false
+  if (key.startsWith("hugepages-")) return false
+  return true
+}
+
+/**
+ * Builds one NodeRow per cluster node, sorted by name. Each row carries
+ * the totals for that node only — capacity and allocatable from
+ * node.status, requested summed from pods bound to that node, and used
+ * from the matching NodeMetrics entry when metrics are supplied.
+ *
+ * Pods without a spec.nodeName or scheduled on an unknown node are
+ * skipped so per-node requested totals stay consistent with the
+ * cluster-wide aggregate computed by aggregateNodeResources.
+ */
+export function derivePerNodeRows(
+  nodes: Node[],
+  pods: Pod[],
+  metrics: NodeMetrics[] | undefined,
+): NodeRow[] {
+  const metricsByName = new Map<string, NodeMetrics>()
+  for (const m of metrics ?? []) {
+    metricsByName.set(m.metadata.name, m)
+  }
+
+  const podsByNode = new Map<string, Pod[]>()
+  for (const pod of pods) {
+    const nodeName = pod.spec?.nodeName
+    if (!nodeName) continue
+    const bucket = podsByNode.get(nodeName) ?? []
+    bucket.push(pod)
+    podsByNode.set(nodeName, bucket)
+  }
+
+  const rows: NodeRow[] = []
+  for (const node of nodes) {
+    const capacity = node.status?.capacity ?? {}
+    const allocatable = node.status?.allocatable ?? {}
+    const standard: Record<StandardResourceKey, ResourceTotals> = {
+      cpu: emptyTotals(),
+      memory: emptyTotals(),
+      "ephemeral-storage": emptyTotals(),
+      pods: emptyTotals(),
+    }
+    const extended: Record<string, ResourceTotals> = {}
+
+    for (const key of STANDARD_RESOURCE_KEYS) {
+      standard[key].capacity = parseQuantity(capacity[key] ?? "0")
+      standard[key].allocatable = parseQuantity(allocatable[key] ?? "0")
+    }
+    for (const key of Object.keys(capacity)) {
+      if (!isExtendedKey(key)) continue
+      extended[key] = {
+        capacity: parseQuantity(capacity[key] ?? "0"),
+        allocatable: parseQuantity(allocatable[key] ?? "0"),
+        requested: 0,
+      }
+    }
+
+    for (const pod of podsByNode.get(node.metadata.name) ?? []) {
+      for (const container of pod.spec?.containers ?? []) {
+        const requests = container.resources?.requests
+        if (!requests) continue
+        for (const [key, value] of Object.entries(requests)) {
+          if (STANDARD_KEYS.has(key)) {
+            standard[key as StandardResourceKey].requested += parseQuantity(value)
+          } else if (extended[key]) {
+            extended[key].requested += parseQuantity(value)
+          }
+        }
+      }
+    }
+
+    const metric = metricsByName.get(node.metadata.name)
+    if (metric) {
+      standard.cpu.used = parseQuantity(metric.usage?.cpu ?? "0")
+      standard.memory.used = parseQuantity(metric.usage?.memory ?? "0")
+    }
+
+    const conditions = node.status?.conditions ?? []
+    const readyCondition = conditions.find((c) => c.type === "Ready")
+    const pressureConditions = conditions
+      .filter((c) => PRESSURE_TYPES.has(c.type) && c.status === "True")
+      .map((c) => c.type)
+
+    rows.push({
+      name: node.metadata.name,
+      ready: readyCondition?.status === "True",
+      schedulable: !node.spec?.unschedulable,
+      pressureConditions,
+      roles: rolesFromLabels(node.metadata.labels),
+      taints: (node.spec?.taints ?? []).map((t) => t.key),
+      age: formatAge(node.metadata.creationTimestamp),
+      creationTimestamp: node.metadata.creationTimestamp,
+      standard,
+      extended,
+    })
+  }
+
+  rows.sort((a, b) => a.name.localeCompare(b.name))
+  return rows
+}
diff --git a/apps/console/src/lib/cluster-usage/types.ts b/apps/console/src/lib/cluster-usage/types.ts
new file mode 100644
index 0000000..2983037
--- /dev/null
+++ b/apps/console/src/lib/cluster-usage/types.ts
@@ -0,0 +1,110 @@
+import type { K8sResource } from "@cozystack/k8s-client"
+
+/**
+ * Minimal Kubernetes Node shape needed by the cluster-usage page. Only
+ * the fields the page actually reads are declared; the rest of the K8s
+ * Node object is ignored. Status fields are optional to match the
+ * realistic case where a NotReady node may not have populated all of
+ * its capacity / allocatable map yet.
+ */
+
+export interface NodeCondition {
+  type: string
+  status: "True" | "False" | "Unknown"
+  reason?: string
+  message?: string
+  lastTransitionTime?: string
+}
+
+export interface NodeStatus {
+  capacity?: Record<string, string>
+  allocatable?: Record<string, string>
+  conditions?: NodeCondition[]
+}
+
+export interface NodeTaint {
+  key: string
+  value?: string
+  effect: string
+  timeAdded?: string
+}
+
+export interface NodeSpec {
+  unschedulable?: boolean
+  taints?: NodeTaint[]
+  providerID?: string
+}
+
+export type Node = K8sResource<NodeSpec, NodeStatus>
+
+export interface PodContainer {
+  name: string
+  resources?: {
+    requests?: Record<string, string>
+    limits?: Record<string, string>
+  }
+}
+
+export interface PodSpec {
+  nodeName?: string
+  containers: PodContainer[]
+}
+
+export interface PodStatus {
+  phase?: string
+}
+
+export type Pod = K8sResource<PodSpec, PodStatus>
+
+export interface NodeMetricsUsage {
+  cpu: string
+  memory: string
+}
+
+export type NodeMetrics = K8sResource<unknown, unknown> & {
+  usage?: NodeMetricsUsage
+  timestamp?: string
+  window?: string
+}
+
+/**
+ * Standard, well-known resource keys present in node.status.capacity.
+ * Everything else is treated as an extended resource and rendered
+ * verbatim by the cluster-usage page.
+ */
+export const STANDARD_RESOURCE_KEYS = ["cpu", "memory", "ephemeral-storage", "pods"] as const
+
+export type StandardResourceKey = (typeof STANDARD_RESOURCE_KEYS)[number]
+
+/** A resource snapshot in canonical units — cores for CPU, bytes elsewhere. */
+export interface ResourceTotals {
+  capacity: number
+  allocatable: number
+  requested: number
+  /** Present only when metrics.k8s.io reported a usage figure for this resource. */
+  used?: number
+}
+
+export interface AggregateResources {
+  /** Standard resources keyed by their canonical name. */
+  standard: Record<StandardResourceKey, ResourceTotals>
+  /** Extended resources keyed by their full Kubernetes key (e.g. `nvidia.com/gpu`). */
+  extended: Record<string, ResourceTotals>
+}
+
+export interface NodeRow {
+  name: string
+  ready: boolean
+  schedulable: boolean
+  /** Free-form condition types found with status=True, e.g. MemoryPressure. */
+  pressureConditions: string[]
+  /** Roles inferred from `node-role.kubernetes.io/*` and `kubernetes.io/role` labels. */
+  roles: string[]
+  taints: string[]
+  age: string
+  creationTimestamp?: string
+  /** Standard resource totals on this single node. */
+  standard: Record<StandardResourceKey, ResourceTotals>
+  /** Extended resource totals on this single node, keyed by full key. */
+  extended: Record<string, ResourceTotals>
+}

From f2e65fe9204afd8350b3f529a45e64d42603a3db Mon Sep 17 00:00:00 2001
From: Aleksei Sviridkin <f@lex.la>
Date: Tue, 26 May 2026 17:57:34 +0300
Subject: [PATCH 07/20] feat(console): add useClusterUsageData composite hook
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Wires together nodes (watch), cluster-wide pods (watch), API group
discovery for metrics.k8s.io and — only when that group is registered —
a 30-second poll of NodeMetrics. metrics.k8s.io is not watchable, so a
refetch interval is the only option; the rest of the page works fine
without it.

A 403 on the metrics endpoint is absorbed as 'no usage data' rather
than promoted to a page-level error: the Used overlay simply
disappears, the rest of the panel still renders. Nodes-list or
pods-list errors are surfaced as the hook's error so the page can
render an explicit failure state.

The 30-second cadence is exported as a named constant so tests can
assert on it without duplicating the magic number.

Assisted-By: Claude <noreply@anthropic.com>
Signed-off-by: Aleksei Sviridkin <f@lex.la>
---
 .../src/hooks/useClusterUsageData.test.tsx    | 156 ++++++++++++++++++
 .../console/src/hooks/useClusterUsageData.tsx | 111 +++++++++++++
 2 files changed, 267 insertions(+)
 create mode 100644 apps/console/src/hooks/useClusterUsageData.test.tsx
 create mode 100644 apps/console/src/hooks/useClusterUsageData.tsx

diff --git a/apps/console/src/hooks/useClusterUsageData.test.tsx b/apps/console/src/hooks/useClusterUsageData.test.tsx
new file mode 100644
index 0000000..68bc250
--- /dev/null
+++ b/apps/console/src/hooks/useClusterUsageData.test.tsx
@@ -0,0 +1,156 @@
+import { describe, it, expect, vi } from "vitest"
+import { renderHook, waitFor } from "@testing-library/react"
+import { QueryClient, QueryClientProvider } from "@tanstack/react-query"
+import {
+  K8sClient,
+  K8sProvider,
+  K8sApiError,
+  type APIGroupList,
+  type K8sList,
+} from "@cozystack/k8s-client"
+import type { ReactNode } from "react"
+import { useClusterUsageData } from "./useClusterUsageData.tsx"
+import { nodesListFixture } from "../test-utils/fixtures/nodes.ts"
+import { podsListFixture } from "../test-utils/fixtures/pods.ts"
+import { nodeMetricsListFixture } from "../test-utils/fixtures/node-metrics.ts"
+
+function makeWrapper(client: K8sClient) {
+  const queryClient = new QueryClient({
+    defaultOptions: { queries: { retry: false, gcTime: 0 } },
+  })
+  return function Wrapper({ children }: { children: ReactNode }) {
+    return (
+      <QueryClientProvider client={queryClient}>
+        <K8sProvider client={client} queryClient={queryClient}>
+          {children}
+        </K8sProvider>
+      </QueryClientProvider>
+    )
+  }
+}
+
+const groupsWithMetrics: APIGroupList = {
+  kind: "APIGroupList",
+  apiVersion: "v1",
+  groups: [
+    {
+      name: "metrics.k8s.io",
+      versions: [{ groupVersion: "metrics.k8s.io/v1beta1", version: "v1beta1" }],
+      preferredVersion: { groupVersion: "metrics.k8s.io/v1beta1", version: "v1beta1" },
+    },
+  ],
+}
+
+const groupsWithoutMetrics: APIGroupList = {
+  kind: "APIGroupList",
+  apiVersion: "v1",
+  groups: [
+    {
+      name: "apps",
+      versions: [{ groupVersion: "apps/v1", version: "v1" }],
+      preferredVersion: { groupVersion: "apps/v1", version: "v1" },
+    },
+  ],
+}
+
+function stubList(
+  client: K8sClient,
+  responses: Partial<Record<string, K8sList<unknown> | K8sApiError>>,
+) {
+  vi.spyOn(client, "list").mockImplementation(async (_g, _v, plural) => {
+    const r = responses[plural]
+    if (r instanceof K8sApiError) throw r
+    return (r ?? { apiVersion: "v1", kind: `${plural}List`, metadata: {}, items: [] }) as K8sList<
+      unknown
+    >
+  })
+}
+
+describe("useClusterUsageData", () => {
+  it("reports isLoading=true on first render", () => {
+    const client = new K8sClient()
+    stubList(client, {})
+    vi.spyOn(client, "getApiGroups").mockImplementation(() => new Promise(() => {}))
+    const { result } = renderHook(() => useClusterUsageData(), {
+      wrapper: makeWrapper(client),
+    })
+    expect(result.current.isLoading).toBe(true)
+  })
+
+  it("returns aggregates and per-node rows derived from nodes + pods + metrics", async () => {
+    const client = new K8sClient()
+    stubList(client, {
+      nodes: nodesListFixture,
+      pods: podsListFixture,
+      nodes_metrics: nodeMetricsListFixture,
+    })
+    vi.spyOn(client, "getApiGroups").mockResolvedValue(groupsWithMetrics)
+    const { result } = renderHook(() => useClusterUsageData(), {
+      wrapper: makeWrapper(client),
+    })
+    await waitFor(() => expect(result.current.isLoading).toBe(false))
+    expect(result.current.metricsAvailable).toBe(true)
+    expect(result.current.perNode.map((r) => r.name)).toEqual([
+      "cp-1",
+      "worker-1",
+      "worker-gpu-1",
+    ])
+    expect(result.current.aggregates.extended["nvidia.com/gpu"].capacity).toBe(1)
+  })
+
+  it("never lists NodeMetrics when metrics.k8s.io is not registered", async () => {
+    const client = new K8sClient()
+    const listSpy = vi.spyOn(client, "list").mockImplementation(
+      async (_g, _v, plural) => {
+        if (plural === "nodes")
+          return nodesListFixture as unknown as K8sList<unknown>
+        if (plural === "pods")
+          return podsListFixture as unknown as K8sList<unknown>
+        return { apiVersion: "v1", kind: `${plural}List`, metadata: {}, items: [] }
+      },
+    )
+    vi.spyOn(client, "getApiGroups").mockResolvedValue(groupsWithoutMetrics)
+    const { result } = renderHook(() => useClusterUsageData(), {
+      wrapper: makeWrapper(client),
+    })
+    await waitFor(() => expect(result.current.isLoading).toBe(false))
+    expect(result.current.metricsAvailable).toBe(false)
+    const metricsCalls = listSpy.mock.calls.filter(
+      (call) => call[0] === "metrics.k8s.io",
+    )
+    expect(metricsCalls).toHaveLength(0)
+    expect(result.current.aggregates.standard.cpu.used).toBeUndefined()
+  })
+
+  it("treats a metrics-API 403 as 'no usage data' without crashing", async () => {
+    const client = new K8sClient()
+    vi.spyOn(client, "list").mockImplementation(async (g, _v, plural) => {
+      if (g === "metrics.k8s.io") throw new K8sApiError(403, "forbidden")
+      if (plural === "nodes") return nodesListFixture as unknown as K8sList<unknown>
+      if (plural === "pods") return podsListFixture as unknown as K8sList<unknown>
+      return { apiVersion: "v1", kind: `${plural}List`, metadata: {}, items: [] }
+    })
+    vi.spyOn(client, "getApiGroups").mockResolvedValue(groupsWithMetrics)
+    const { result } = renderHook(() => useClusterUsageData(), {
+      wrapper: makeWrapper(client),
+    })
+    await waitFor(() => expect(result.current.isLoading).toBe(false))
+    expect(result.current.metricsAvailable).toBe(true)
+    expect(result.current.aggregates.standard.cpu.used).toBeUndefined()
+    expect(result.current.error).toBeNull()
+  })
+
+  it("surfaces a nodes-list error as the hook error", async () => {
+    const client = new K8sClient()
+    vi.spyOn(client, "list").mockImplementation(async (_g, _v, plural) => {
+      if (plural === "nodes") throw new K8sApiError(500, "boom")
+      return { apiVersion: "v1", kind: `${plural}List`, metadata: {}, items: [] }
+    })
+    vi.spyOn(client, "getApiGroups").mockResolvedValue(groupsWithoutMetrics)
+    const { result } = renderHook(() => useClusterUsageData(), {
+      wrapper: makeWrapper(client),
+    })
+    await waitFor(() => expect(result.current.error).toBeTruthy())
+    expect(result.current.error?.message).toContain("boom")
+  })
+})
diff --git a/apps/console/src/hooks/useClusterUsageData.tsx b/apps/console/src/hooks/useClusterUsageData.tsx
new file mode 100644
index 0000000..51d9da0
--- /dev/null
+++ b/apps/console/src/hooks/useClusterUsageData.tsx
@@ -0,0 +1,111 @@
+import { useMemo } from "react"
+import {
+  useK8sList,
+  useApiGroupAvailable,
+  type K8sList,
+} from "@cozystack/k8s-client"
+import { aggregateNodeResources } from "../lib/cluster-usage/aggregate.ts"
+import { derivePerNodeRows } from "../lib/cluster-usage/per-node.ts"
+import type {
+  AggregateResources,
+  Node,
+  NodeMetrics,
+  NodeRow,
+  Pod,
+} from "../lib/cluster-usage/types.ts"
+
+/**
+ * Polling interval for NodeMetrics. Matches the default
+ * --metric-resolution of metrics-server (15s) plus a small buffer; a
+ * faster cadence returns identical values and wastes requests.
+ */
+export const CLUSTER_USAGE_METRICS_REFETCH_MS = 30_000
+
+interface ClusterUsageData {
+  nodes: Node[]
+  pods: Pod[]
+  metrics: NodeMetrics[] | undefined
+  aggregates: AggregateResources
+  perNode: NodeRow[]
+  isLoading: boolean
+  error: Error | null
+  metricsAvailable: boolean
+}
+
+/**
+ * Composite hook that powers the Cluster Usage admin page. Subscribes
+ * to nodes and pods via K8s watches (low cost, push-based updates), and
+ * — only when metrics.k8s.io is discovered on the cluster — polls
+ * NodeMetrics on a 30-second cadence. metrics.k8s.io is not watchable,
+ * so a refetch interval is the only option; the rest of the page works
+ * fine without it.
+ *
+ * A 403 on the metrics fetch is treated as 'no usage data, but no
+ * page-level error' — the Used overlay disappears, the rest of the
+ * panel still renders. Nodes-list or pods-list errors are surfaced as
+ * the hook's error so the page can render an explicit failure state.
+ */
+export function useClusterUsageData(): ClusterUsageData {
+  const nodesQuery = useK8sList<Node>({
+    apiGroup: "",
+    apiVersion: "v1",
+    plural: "nodes",
+  })
+
+  const podsQuery = useK8sList<Pod>({
+    apiGroup: "",
+    apiVersion: "v1",
+    plural: "pods",
+  })
+
+  const { available: metricsAvailable, isLoading: metricsDiscoveryLoading } =
+    useApiGroupAvailable("metrics.k8s.io")
+
+  const metricsQuery = useK8sList<NodeMetrics>(
+    {
+      apiGroup: "metrics.k8s.io",
+      apiVersion: "v1beta1",
+      plural: "nodes",
+    },
+    {
+      enabled: metricsAvailable,
+      watch: false,
+      refetchInterval: CLUSTER_USAGE_METRICS_REFETCH_MS,
+    },
+  )
+
+  const nodes = nodesQuery.data?.items ?? []
+  const pods = podsQuery.data?.items ?? []
+  const metricsItems = metricsQueryItems(metricsQuery.data, metricsQuery.error)
+
+  const aggregates = useMemo(
+    () => aggregateNodeResources(nodes, pods, metricsItems),
+    [nodes, pods, metricsItems],
+  )
+  const perNode = useMemo(
+    () => derivePerNodeRows(nodes, pods, metricsItems),
+    [nodes, pods, metricsItems],
+  )
+
+  return {
+    nodes,
+    pods,
+    metrics: metricsItems,
+    aggregates,
+    perNode,
+    isLoading:
+      nodesQuery.isLoading || podsQuery.isLoading || metricsDiscoveryLoading,
+    // Metrics errors do not become page errors — usage simply disappears.
+    error: (nodesQuery.error as Error | null) ?? (podsQuery.error as Error | null) ?? null,
+    metricsAvailable,
+  }
+}
+
+function metricsQueryItems(
+  list: K8sList<NodeMetrics> | undefined,
+  error: unknown,
+): NodeMetrics[] | undefined {
+  if (error) return undefined
+  if (!list) return undefined
+  return list.items
+}

From 9aa41b20a523418452e2137ae8c5349775222ca5 Mon Sep 17 00:00:00 2001
From: Aleksei Sviridkin <f@lex.la>
Date: Tue, 26 May 2026 17:59:14 +0300
Subject: [PATCH 08/20] feat(console): ClusterUsageAggregates panel and
 ResourceCard
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Top panel of the Cluster Usage admin page. ResourceCard renders one
aggregate-resource summary with Capacity, Allocatable, an always-on
Requested progress bar, and an optional Used progress bar that appears
only when totals.used is defined — which mirrors the case where
metrics.k8s.io is discovered for cpu and memory.

ClusterUsageAggregates lays out four fixed cards (CPU, Memory, Storage,
Pods) followed by one card per extended-resource key found in node
capacity, sorted alphabetically with the full key as the title. The
extended section disappears entirely when no extended resources are
present — no empty 'No GPUs found' placeholder, mirroring the
discovery-first contract for the whole page.

Progress bar colour rises with allocation: ≤70% neutral, 70-90%
amber, >90% red. Allocatable=0 (very early node lifecycle) renders em
dashes for every number and omits the bar rather than dividing by zero.

Assisted-By: Claude <noreply@anthropic.com>
Signed-off-by: Aleksei Sviridkin <f@lex.la>
---
 .../ClusterUsageAggregates.test.tsx           |  65 +++++++++
 .../cluster-usage/ClusterUsageAggregates.tsx  |  49 +++++++
 .../cluster-usage/ResourceCard.test.tsx       |  76 +++++++++++
 .../components/cluster-usage/ResourceCard.tsx | 127 ++++++++++++++++++
 4 files changed, 317 insertions(+)
 create mode 100644 apps/console/src/components/cluster-usage/ClusterUsageAggregates.test.tsx
 create mode 100644 apps/console/src/components/cluster-usage/ClusterUsageAggregates.tsx
 create mode 100644 apps/console/src/components/cluster-usage/ResourceCard.test.tsx
 create mode 100644 apps/console/src/components/cluster-usage/ResourceCard.tsx

diff --git a/apps/console/src/components/cluster-usage/ClusterUsageAggregates.test.tsx b/apps/console/src/components/cluster-usage/ClusterUsageAggregates.test.tsx
new file mode 100644
index 0000000..6546002
--- /dev/null
+++ b/apps/console/src/components/cluster-usage/ClusterUsageAggregates.test.tsx
@@ -0,0 +1,65 @@
+import { describe, it, expect } from "vitest"
+import { render, screen } from "@testing-library/react"
+import { ClusterUsageAggregates } from "./ClusterUsageAggregates.tsx"
+import type { AggregateResources } from "../../lib/cluster-usage/types.ts"
+
+function empty(): AggregateResources {
+  return {
+    standard: {
+      cpu: { capacity: 0, allocatable: 0, requested: 0 },
+      memory: { capacity: 0, allocatable: 0, requested: 0 },
+      "ephemeral-storage": { capacity: 0, allocatable: 0, requested: 0 },
+      pods: { capacity: 0, allocatable: 0, requested: 0 },
+    },
+    extended: {},
+  }
+}
+
+describe("ClusterUsageAggregates", () => {
+  it("renders the four standard cards in order CPU, Memory, Storage, Pods", () => {
+    render(<ClusterUsageAggregates aggregates={empty()} />)
+    const headings = screen.getAllByText(/CPU|Memory|Storage|Pods/i)
+    const labels = headings.map((h) => h.textContent)
+    expect(labels).toEqual(
+      expect.arrayContaining(["CPU", "Memory", "Storage", "Pods"]),
+    )
+  })
+
+  it("does not render the extended-resources section when none are present", () => {
+    render(<ClusterUsageAggregates aggregates={empty()} />)
+    expect(screen.queryByText(/extended resources/i)).toBeNull()
+  })
+
+  it("renders one card per extended-resource key with the full key as the title", () => {
+    const agg = empty()
+    agg.extended["nvidia.com/gpu"] = { capacity: 4, allocatable: 4, requested: 1 }
+    agg.extended["amd.com/gpu"] = { capacity: 2, allocatable: 2, requested: 0 }
+    render(<ClusterUsageAggregates aggregates={agg} />)
+    expect(screen.getByText("nvidia.com/gpu")).toBeInTheDocument()
+    expect(screen.getByText("amd.com/gpu")).toBeInTheDocument()
+  })
+
+  it("sorts extended-resource cards alphabetically by key", () => {
+    const agg = empty()
+    agg.extended["nvidia.com/gpu"] = { capacity: 4, allocatable: 4, requested: 1 }
+    agg.extended["amd.com/gpu"] = { capacity: 2, allocatable: 2, requested: 0 }
+    const { container } = render(<ClusterUsageAggregates aggregates={agg} />)
+    const titles = Array.from(container.querySelectorAll('[data-extended-card]')).map(
+      (el) => el.getAttribute("data-extended-card"),
+    )
+    expect(titles).toEqual(["amd.com/gpu", "nvidia.com/gpu"])
+  })
+
+  it("does not render a 'Used' line on any card when no card has used data", () => {
+    render(<ClusterUsageAggregates aggregates={empty()} />)
+    expect(screen.queryByText(/used/i)).toBeNull()
+  })
+
+  it("renders the 'Used' line on standard cards when usage data is present", () => {
+    const agg = empty()
+    agg.standard.cpu = { capacity: 8, allocatable: 8, requested: 2, used: 1 }
+    agg.standard.memory = { capacity: 16 * 1024 ** 3, allocatable: 16 * 1024 ** 3, requested: 0, used: 4 * 1024 ** 3 }
+    render(<ClusterUsageAggregates aggregates={agg} />)
+    expect(screen.getAllByText(/used/i).length).toBeGreaterThan(0)
+  })
+})
diff --git a/apps/console/src/components/cluster-usage/ClusterUsageAggregates.tsx b/apps/console/src/components/cluster-usage/ClusterUsageAggregates.tsx
new file mode 100644
index 0000000..d9e390c
--- /dev/null
+++ b/apps/console/src/components/cluster-usage/ClusterUsageAggregates.tsx
@@ -0,0 +1,49 @@
+import { ResourceCard } from "./ResourceCard.tsx"
+import type { AggregateResources } from "../../lib/cluster-usage/types.ts"
+
+interface ClusterUsageAggregatesProps {
+  aggregates: AggregateResources
+}
+
+/**
+ * Top panel of the Cluster Usage admin page: four fixed cards for the
+ * standard scheduler resources, followed by one card per extended
+ * resource discovered in node.status.capacity (alphabetical, full key
+ * verbatim). The extended section disappears entirely when no extended
+ * resources are present — no empty 'No GPUs found' state.
+ */
+export function ClusterUsageAggregates({ aggregates }: ClusterUsageAggregatesProps) {
+  const extendedKeys = Object.keys(aggregates.extended).sort()
+  return (
+    <div className="space-y-6">
+      <div className="grid grid-cols-1 gap-4 sm:grid-cols-2 lg:grid-cols-4">
+        <ResourceCard title="CPU" format="cpu" totals={aggregates.standard.cpu} />
+        <ResourceCard title="Memory" format="bytes" totals={aggregates.standard.memory} />
+        <ResourceCard
+          title="Storage"
+          format="bytes"
+          totals={aggregates.standard["ephemeral-storage"]}
+        />
+        <ResourceCard title="Pods" format="count" totals={aggregates.standard.pods} />
+      </div>
+      {extendedKeys.length > 0 ? (
+        <div>
+          <h3 className="mb-3 text-sm font-medium text-slate-700">
+            Extended resources (discovered)
+          </h3>
+          <div className="grid grid-cols-1 gap-4 sm:grid-cols-2 lg:grid-cols-4">
+            {extendedKeys.map((key) => (
+              <div key={key} data-extended-card={key}>
+                <ResourceCard
+                  title={key}
+                  format="count"
+                  totals={aggregates.extended[key]}
+                />
+              </div>
+            ))}
+          </div>
+        </div>
+      ) : null}
+    </div>
+  )
+}
diff --git a/apps/console/src/components/cluster-usage/ResourceCard.test.tsx b/apps/console/src/components/cluster-usage/ResourceCard.test.tsx
new file mode 100644
index 0000000..86f030b
--- /dev/null
+++ b/apps/console/src/components/cluster-usage/ResourceCard.test.tsx
@@ -0,0 +1,76 @@
+import { describe, it, expect } from "vitest"
+import { render, screen } from "@testing-library/react"
+import { ResourceCard } from "./ResourceCard.tsx"
+
+describe("ResourceCard", () => {
+  it("renders the title verbatim", () => {
+    render(
+      <ResourceCard
+        title="nvidia.com/gpu"
+        format="count"
+        totals={{ capacity: 4, allocatable: 4, requested: 2 }}
+      />,
+    )
+    expect(screen.getByText("nvidia.com/gpu")).toBeInTheDocument()
+  })
+
+  it("renders capacity and allocatable for any resource", () => {
+    render(
+      <ResourceCard
+        title="CPU"
+        format="cpu"
+        totals={{ capacity: 8, allocatable: 8, requested: 4 }}
+      />,
+    )
+    expect(screen.getByText(/capacity/i)).toBeInTheDocument()
+    expect(screen.getByText(/allocatable/i)).toBeInTheDocument()
+  })
+
+  it("omits the Used line when used is undefined", () => {
+    render(
+      <ResourceCard
+        title="CPU"
+        format="cpu"
+        totals={{ capacity: 8, allocatable: 8, requested: 4 }}
+      />,
+    )
+    expect(screen.queryByText(/used/i)).toBeNull()
+  })
+
+  it("renders the Used line when used is defined", () => {
+    render(
+      <ResourceCard
+        title="CPU"
+        format="cpu"
+        totals={{ capacity: 8, allocatable: 8, requested: 4, used: 2 }}
+      />,
+    )
+    expect(screen.getByText(/used/i)).toBeInTheDocument()
+  })
+
+  it("renders an em dash for divide-by-zero (allocatable=0)", () => {
+    render(
+      <ResourceCard
+        title="CPU"
+        format="cpu"
+        totals={{ capacity: 0, allocatable: 0, requested: 0 }}
+      />,
+    )
+    expect(screen.getAllByText("—").length).toBeGreaterThan(0)
+  })
+
+  it("clamps percentage display at 100% for over-committed resources", () => {
+    render(
+      <ResourceCard
+        title="CPU"
+        format="cpu"
+        totals={{ capacity: 8, allocatable: 8, requested: 12 }}
+      />,
+    )
+    const bars = document.querySelectorAll('[role="progressbar"]')
+    const requestedBar = Array.from(bars).find(
+      (b) => b.getAttribute("data-resource-bar") === "requested",
+    )
+    expect(requestedBar?.getAttribute("aria-valuenow")).toBe("100")
+  })
+})
diff --git a/apps/console/src/components/cluster-usage/ResourceCard.tsx b/apps/console/src/components/cluster-usage/ResourceCard.tsx
new file mode 100644
index 0000000..5c65059
--- /dev/null
+++ b/apps/console/src/components/cluster-usage/ResourceCard.tsx
@@ -0,0 +1,127 @@
+import { humanizeBytes, humanizeCpu } from "../../lib/k8s-quantity.ts"
+import type { ResourceTotals } from "../../lib/cluster-usage/types.ts"
+
+export type ResourceFormat = "cpu" | "bytes" | "count"
+
+interface ResourceCardProps {
+  title: string
+  format: ResourceFormat
+  totals: ResourceTotals
+}
+
+function formatValue(value: number, format: ResourceFormat): string {
+  switch (format) {
+    case "cpu":
+      return humanizeCpu(value)
+    case "bytes":
+      return humanizeBytes(value)
+    case "count":
+    default:
+      return value % 1 === 0 ? `${value}` : value.toFixed(2)
+  }
+}
+
+function percent(value: number, allocatable: number): number | null {
+  if (allocatable <= 0) return null
+  return Math.min(100, Math.round((value / allocatable) * 100))
+}
+
+function barColorClass(pct: number | null): string {
+  if (pct === null) return "bg-slate-300"
+  if (pct > 90) return "bg-red-500"
+  if (pct > 70) return "bg-amber-500"
+  return "bg-blue-500"
+}
+
+interface ProgressBarProps {
+  pct: number | null
+  resourceBar: "requested" | "used"
+  ariaLabel: string
+}
+
+function ProgressBar({ pct, resourceBar, ariaLabel }: ProgressBarProps) {
+  return (
+    <div
+      role="progressbar"
+      data-resource-bar={resourceBar}
+      aria-label={ariaLabel}
+      aria-valuemin={0}
+      aria-valuemax={100}
+      aria-valuenow={pct ?? 0}
+      className="h-1.5 w-full overflow-hidden rounded-full bg-slate-100"
+    >
+      <div
+        className={`h-full transition-all duration-200 ${barColorClass(pct)}`}
+        style={{ width: `${pct ?? 0}%` }}
+      />
+    </div>
+  )
+}
+
+/**
+ * A single aggregate-resource card showing capacity, allocatable, and
+ * up to two progress bars: requested (always rendered when allocatable
+ * is non-zero) and used (rendered only when totals.used is defined,
+ * which happens for cpu/memory when metrics.k8s.io is discovered).
+ *
+ * A zero-allocatable resource renders em dashes for every number and
+ * no progress bar — that combination is rare but represents nodes that
+ * have not yet reported their capacity, and crashing the panel is much
+ * worse than rendering placeholders.
+ */
+export function ResourceCard({ title, format, totals }: ResourceCardProps) {
+  const allocatableZero = totals.allocatable <= 0
+  const requestedPct = percent(totals.requested, totals.allocatable)
+  const usedDefined = totals.used !== undefined
+  const usedPct = usedDefined ? percent(totals.used ?? 0, totals.allocatable) : null
+
+  return (
+    <div className="rounded-lg border border-slate-200 bg-white p-4 shadow-sm">
+      <div className="mb-3 text-xs font-medium uppercase tracking-wider text-slate-500">
+        {title}
+      </div>
+      <div className="space-y-2.5">
+        <div className="flex items-baseline justify-between text-xs text-slate-500">
+          <span>Capacity</span>
+          <span className="tabular-nums text-slate-700">
+            {allocatableZero ? "—" : formatValue(totals.capacity, format)}
+          </span>
+        </div>
+        <div className="flex items-baseline justify-between text-xs text-slate-500">
+          <span>Allocatable</span>
+          <span className="tabular-nums text-slate-700">
+            {allocatableZero ? "—" : formatValue(totals.allocatable, format)}
+          </span>
+        </div>
+        {usedDefined ? (
+          <div>
+            <div className="mb-1 flex items-baseline justify-between text-xs">
+              <span className="text-slate-600">Used</span>
+              <span className="tabular-nums text-slate-700">
+                {allocatableZero ? "—" : formatValue(totals.used ?? 0, format)}
+              </span>
+            </div>
+            {!allocatableZero ? (
+              <ProgressBar pct={usedPct} resourceBar="used" ariaLabel={`${title} used`} />
+            ) : null}
+          </div>
+        ) : null}
+        <div>
+          <div className="mb-1 flex items-baseline justify-between text-xs">
+            <span className="text-slate-600">Requested</span>
+            <span className="tabular-nums text-slate-700">
+              {allocatableZero ? "—" : formatValue(totals.requested, format)}
+            </span>
+          </div>
+          {!allocatableZero ? (
+            <ProgressBar
+              pct={requestedPct}
+              resourceBar="requested"
+              ariaLabel={`${title} requested`}
+            />
+          ) : null}
+        </div>
+      </div>
+    </div>
+  )
+}

From 851c98e7071a79727ff658f25a3bfe8093f901ed Mon Sep 17 00:00:00 2001
From: Aleksei Sviridkin <f@lex.la>
Date: Tue, 26 May 2026 18:00:39 +0300
Subject: [PATCH 09/20] feat(console): ClusterUsageTable per-node table
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Bottom panel of the Cluster Usage admin page. Fixed columns for Name,
Status, Roles, CPU, and Memory, followed by one column per full
extended-resource key found in the cluster, then Age. Headers for
extended-resource columns use monospace and the resource key verbatim
so vendor-specific names like 'nvidia.com/gpu' read as identifiers
rather than English words.

CPU and Memory cells render two lines: used/allocatable (when
metrics-server is discovered) and requested/allocatable (always when
allocatable is non-zero). NotReady nodes render em dashes for these
cells because the capacity numbers stop being authoritative; the rest
of the row stays visible so the row remains a useful pointer for the
operator.

Status, Roles and taints each carry their own visual treatment so the
table doubles as a quick triage view — pressure conditions render as
amber chips, taints surface a 'tainted N' label, and SchedulingDisabled
nodes call themselves out distinctly from NotReady.

Assisted-By: Claude <noreply@anthropic.com>
Signed-off-by: Aleksei Sviridkin <f@lex.la>
---
 .../cluster-usage/ClusterUsageTable.test.tsx  | 144 ++++++++++++++++
 .../cluster-usage/ClusterUsageTable.tsx       | 156 ++++++++++++++++++
 2 files changed, 300 insertions(+)
 create mode 100644 apps/console/src/components/cluster-usage/ClusterUsageTable.test.tsx
 create mode 100644 apps/console/src/components/cluster-usage/ClusterUsageTable.tsx

diff --git a/apps/console/src/components/cluster-usage/ClusterUsageTable.test.tsx b/apps/console/src/components/cluster-usage/ClusterUsageTable.test.tsx
new file mode 100644
index 0000000..176a5cb
--- /dev/null
+++ b/apps/console/src/components/cluster-usage/ClusterUsageTable.test.tsx
@@ -0,0 +1,144 @@
+import { describe, it, expect, vi, beforeAll, afterAll } from "vitest"
+import { render, screen, within } from "@testing-library/react"
+import { ClusterUsageTable } from "./ClusterUsageTable.tsx"
+import type { NodeRow } from "../../lib/cluster-usage/types.ts"
+
+beforeAll(() => {
+  vi.useFakeTimers()
+  vi.setSystemTime(new Date("2026-05-26T00:00:00Z"))
+})
+
+afterAll(() => {
+  vi.useRealTimers()
+})
+
+function row(name: string, overrides: Partial<NodeRow> = {}): NodeRow {
+  return {
+    name,
+    ready: true,
+    schedulable: true,
+    pressureConditions: [],
+    roles: [],
+    taints: [],
+    age: "1d",
+    creationTimestamp: "2026-05-25T00:00:00Z",
+    standard: {
+      cpu: { capacity: 8, allocatable: 8, requested: 1 },
+      memory: { capacity: 16 * 1024 ** 3, allocatable: 16 * 1024 ** 3, requested: 0 },
+      "ephemeral-storage": { capacity: 0, allocatable: 0, requested: 0 },
+      pods: { capacity: 110, allocatable: 110, requested: 0 },
+    },
+    extended: {},
+    ...overrides,
+  }
+}
+
+describe("ClusterUsageTable", () => {
+  it("renders one tr per node in name order", () => {
+    render(
+      <ClusterUsageTable rows={[row("worker-b"), row("worker-a")]} extendedKeys={[]} />,
+    )
+    const rows = screen.getAllByRole("row")
+    // First row is the header
+    expect(rows).toHaveLength(3)
+    expect(within(rows[1]).getByText("worker-b")).toBeInTheDocument()
+    expect(within(rows[2]).getByText("worker-a")).toBeInTheDocument()
+  })
+
+  it("shows Ready / NotReady status text", () => {
+    render(
+      <ClusterUsageTable
+        rows={[row("ok", { ready: true }), row("bad", { ready: false })]}
+        extendedKeys={[]}
+      />,
+    )
+    expect(screen.getByText("Ready")).toBeInTheDocument()
+    expect(screen.getByText("NotReady")).toBeInTheDocument()
+  })
+
+  it("shows SchedulingDisabled when schedulable=false", () => {
+    render(
+      <ClusterUsageTable
+        rows={[row("cordoned", { schedulable: false })]}
+        extendedKeys={[]}
+      />,
+    )
+    expect(screen.getByText(/scheduling.?disabled/i)).toBeInTheDocument()
+  })
+
+  it("flags pressure conditions with a chip", () => {
+    render(
+      <ClusterUsageTable
+        rows={[row("pressured", { pressureConditions: ["MemoryPressure"] })]}
+        extendedKeys={[]}
+      />,
+    )
+    expect(screen.getByText("MemoryPressure")).toBeInTheDocument()
+  })
+
+  it("renders roles inline, em dash for nodes without roles", () => {
+    render(
+      <ClusterUsageTable
+        rows={[
+          row("cp", { roles: ["control-plane"] }),
+          row("worker", { roles: [] }),
+        ]}
+        extendedKeys={[]}
+      />,
+    )
+    expect(screen.getByText("control-plane")).toBeInTheDocument()
+    const workerRow = screen.getByText("worker").closest("tr")!
+    expect(within(workerRow).getAllByText("—").length).toBeGreaterThan(0)
+  })
+
+  it("adds one column per extended key, in extendedKeys order", () => {
+    render(
+      <ClusterUsageTable
+        rows={[
+          row("gpu-1", {
+            extended: { "nvidia.com/gpu": { capacity: 2, allocatable: 2, requested: 1 } },
+          }),
+        ]}
+        extendedKeys={["nvidia.com/gpu", "amd.com/gpu"]}
+      />,
+    )
+    const headers = screen.getAllByRole("columnheader").map((h) => h.textContent)
+    expect(headers).toContain("nvidia.com/gpu")
+    expect(headers).toContain("amd.com/gpu")
+  })
+
+  it("renders em dash in extended-resource cell when the node does not expose it", () => {
+    render(
+      <ClusterUsageTable
+        rows={[row("plain", { extended: {} })]}
+        extendedKeys={["nvidia.com/gpu"]}
+      />,
+    )
+    const tr = screen.getByText("plain").closest("tr")!
+    expect(within(tr).getAllByText("—").length).toBeGreaterThan(0)
+  })
+
+  it("renders the age column verbatim from row.age", () => {
+    render(
+      <ClusterUsageTable
+        rows={[row("with-age", { age: "21h" })]}
+        extendedKeys={[]}
+      />,
+    )
+    expect(screen.getByText("21h")).toBeInTheDocument()
+  })
+
+  it("renders em dashes in cpu/memory cells when the node is NotReady", () => {
+    render(
+      <ClusterUsageTable
+        rows={[row("dead", { ready: false })]}
+        extendedKeys={[]}
+      />,
+    )
+    const tr = screen.getByText("dead").closest("tr")!
+    // CPU + Memory both render '—' when NotReady (4 dashes total for the
+    // two columns' two halves each — the assert just requires the row
+    // contains the em dashes, not the exact count).
+    expect(within(tr).getAllByText("—").length).toBeGreaterThan(0)
+  })
+})
diff --git a/apps/console/src/components/cluster-usage/ClusterUsageTable.tsx b/apps/console/src/components/cluster-usage/ClusterUsageTable.tsx
new file mode 100644
index 0000000..660e9eb
--- /dev/null
+++ b/apps/console/src/components/cluster-usage/ClusterUsageTable.tsx
@@ -0,0 +1,156 @@
+import { humanizeBytes, humanizeCpu } from "../../lib/k8s-quantity.ts"
+import type { NodeRow, ResourceTotals } from "../../lib/cluster-usage/types.ts"
+
+interface ClusterUsageTableProps {
+  rows: NodeRow[]
+  extendedKeys: string[]
+}
+
+function statusLabel(row: NodeRow): string {
+  if (!row.ready) return "NotReady"
+  if (!row.schedulable) return "SchedulingDisabled"
+  return "Ready"
+}
+
+function cpuCell(totals: ResourceTotals, ready: boolean) {
+  if (!ready || totals.allocatable <= 0) {
+    return (
+      <div className="space-y-0.5 text-xs">
+        <div className="text-slate-400">—</div>
+      </div>
+    )
+  }
+  const hasUsed = totals.used !== undefined
+  return (
+    <div className="space-y-0.5 text-xs">
+      {hasUsed ? (
+        <div className="tabular-nums text-slate-700">
+          {humanizeCpu(totals.used ?? 0)} / {humanizeCpu(totals.allocatable)} used
+        </div>
+      ) : null}
+      <div className="tabular-nums text-slate-500">
+        {humanizeCpu(totals.requested)} / {humanizeCpu(totals.allocatable)} req
+      </div>
+    </div>
+  )
+}
+
+function memoryCell(totals: ResourceTotals, ready: boolean) {
+  if (!ready || totals.allocatable <= 0) {
+    return (
+      <div className="space-y-0.5 text-xs">
+        <div className="text-slate-400">—</div>
+      </div>
+    )
+  }
+  const hasUsed = totals.used !== undefined
+  return (
+    <div className="space-y-0.5 text-xs">
+      {hasUsed ? (
+        <div className="tabular-nums text-slate-700">
+          {humanizeBytes(totals.used ?? 0)} / {humanizeBytes(totals.allocatable)} used
+        </div>
+      ) : null}
+      <div className="tabular-nums text-slate-500">
+        {humanizeBytes(totals.requested)} / {humanizeBytes(totals.allocatable)} req
+      </div>
+    </div>
+  )
+}
+
+function extendedCell(totals: ResourceTotals | undefined) {
+  if (!totals) return <span className="text-slate-400">—</span>
+  return (
+    <div className="space-y-0.5 text-xs tabular-nums text-slate-700">
+      <div>
+        {totals.requested} / {totals.allocatable}
+      </div>
+      <div className="text-slate-400">capacity {totals.capacity}</div>
+    </div>
+  )
+}
+
+/**
+ * Per-node table rendered below the aggregate panel. The first columns
+ * are fixed (Name, Status, Roles, CPU, Memory); then one column per
+ * full extended-resource key found anywhere in the cluster — the
+ * column header is the resource key verbatim. Trailing column is Age.
+ *
+ * NotReady nodes show em dashes for CPU / Memory cells because
+ * status.capacity is no longer authoritative at that point; the rest of
+ * the row remains visible so operators can still see which node is in
+ * trouble.
+ */
+export function ClusterUsageTable({ rows, extendedKeys }: ClusterUsageTableProps) {
+  return (
+    <div className="overflow-x-auto rounded-lg border border-slate-200 bg-white shadow-sm">
+      <table className="w-full text-sm">
+        <thead>
+          <tr className="border-b border-slate-200 bg-slate-50 text-left text-xs font-medium uppercase tracking-wider text-slate-500">
+            <th className="px-4 py-3">Name</th>
+            <th className="px-4 py-3">Status</th>
+            <th className="px-4 py-3">Roles</th>
+            <th className="px-4 py-3">CPU</th>
+            <th className="px-4 py-3">Memory</th>
+            {extendedKeys.map((k) => (
+              <th key={k} className="px-4 py-3 font-mono normal-case tracking-normal text-slate-600">
+                {k}
+              </th>
+            ))}
+            <th className="px-4 py-3">Age</th>
+          </tr>
+        </thead>
+        <tbody className="divide-y divide-slate-100">
+          {rows.map((r) => (
+            <tr key={r.name} className="hover:bg-slate-50">
+              <td className="px-4 py-3 text-sm font-medium text-slate-900">{r.name}</td>
+              <td className="px-4 py-3 align-top">
+                <div className="space-y-1">
+                  <div className="text-xs text-slate-700">{statusLabel(r)}</div>
+                  {r.pressureConditions.length > 0 ? (
+                    <div className="flex flex-wrap gap-1">
+                      {r.pressureConditions.map((p) => (
+                        <span
+                          key={p}
+                          className="rounded-full bg-amber-50 px-2 py-0.5 text-[11px] text-amber-800"
+                        >
+                          {p}
+                        </span>
+                      ))}
+                    </div>
+                  ) : null}
+                  {r.taints.length > 0 ? (
+                    <div className="text-[11px] text-slate-500">
+                      +tainted {r.taints.length}
+                    </div>
+                  ) : null}
+                </div>
+              </td>
+              <td className="px-4 py-3 align-top text-xs text-slate-700">
+                {r.roles.length > 0 ? (
+                  <div className="flex flex-wrap gap-1">
+                    {r.roles.map((role) => (
+                      <span key={role} className="rounded-full bg-slate-100 px-2 py-0.5">
+                        {role}
+                      </span>
+                    ))}
+                  </div>
+                ) : (
+                  <span className="text-slate-400">—</span>
+                )}
+              </td>
+              <td className="px-4 py-3 align-top">{cpuCell(r.standard.cpu, r.ready)}</td>
+              <td className="px-4 py-3 align-top">{memoryCell(r.standard.memory, r.ready)}</td>
+              {extendedKeys.map((k) => (
+                <td key={k} className="px-4 py-3 align-top">
+                  {extendedCell(r.extended[k])}
+                </td>
+              ))}
+              <td className="px-4 py-3 tabular-nums text-xs text-slate-500">{r.age}</td>
+            </tr>
+          ))}
+        </tbody>
+      </table>
+    </div>
+  )
+}

From 98f041d7c118162733fea84be675b6b4f882cbe5 Mon Sep 17 00:00:00 2001
From: Aleksei Sviridkin <f@lex.la>
Date: Tue, 26 May 2026 18:02:56 +0300
Subject: [PATCH 10/20] feat(console): compose ClusterUsagePage from aggregates
 and per-node table

Single cluster-scoped page reads from useClusterUsageData and renders
the aggregate panel on top, the per-node table below. The locked
loading / empty / error matrix is honoured: nodes-loading shows a
page-level spinner, nodes-error shows an explicit failure block,
empty cluster shows 'No nodes found', anything else renders both
panels.

The page is reachable by URL even when the sidebar gate hides the
entry; in that case the underlying useK8sList call returns 403 and
the error block carries the message. A fancier page-level permission
gate is explicitly out of scope for the first iteration.

Assisted-By: Claude <noreply@anthropic.com>
Signed-off-by: Aleksei Sviridkin <f@lex.la>
---
 .../src/routes/ClusterUsagePage.test.tsx      | 139 ++++++++++++++++++
 apps/console/src/routes/ClusterUsagePage.tsx  |  56 +++++++
 2 files changed, 195 insertions(+)
 create mode 100644 apps/console/src/routes/ClusterUsagePage.test.tsx
 create mode 100644 apps/console/src/routes/ClusterUsagePage.tsx

diff --git a/apps/console/src/routes/ClusterUsagePage.test.tsx b/apps/console/src/routes/ClusterUsagePage.test.tsx
new file mode 100644
index 0000000..b840789
--- /dev/null
+++ b/apps/console/src/routes/ClusterUsagePage.test.tsx
@@ -0,0 +1,139 @@
+import { describe, it, expect, vi } from "vitest"
+import { screen, waitFor } from "@testing-library/react"
+import {
+  K8sClient,
+  K8sApiError,
+  type APIGroupList,
+  type K8sList,
+} from "@cozystack/k8s-client"
+import { ClusterUsagePage } from "./ClusterUsagePage.tsx"
+import { renderWithK8sProvider } from "../test-utils/render.tsx"
+import { nodesListFixture } from "../test-utils/fixtures/nodes.ts"
+import { podsListFixture } from "../test-utils/fixtures/pods.ts"
+import { nodeMetricsListFixture } from "../test-utils/fixtures/node-metrics.ts"
+
+const groupsWithMetrics: APIGroupList = {
+  kind: "APIGroupList",
+  apiVersion: "v1",
+  groups: [
+    {
+      name: "metrics.k8s.io",
+      versions: [{ groupVersion: "metrics.k8s.io/v1beta1", version: "v1beta1" }],
+      preferredVersion: { groupVersion: "metrics.k8s.io/v1beta1", version: "v1beta1" },
+    },
+  ],
+}
+
+const groupsWithoutMetrics: APIGroupList = {
+  kind: "APIGroupList",
+  apiVersion: "v1",
+  groups: [],
+}
+
+function makeClient(
+  config: {
+    nodes?: K8sList<unknown> | K8sApiError | "pending"
+    pods?: K8sList<unknown> | K8sApiError
+    metrics?: K8sList<unknown> | K8sApiError
+    groups?: APIGroupList
+  } = {},
+): K8sClient {
+  const client = new K8sClient()
+  vi.spyOn(client, "list").mockImplementation(async (g, _v, plural) => {
+    if (g === "metrics.k8s.io") {
+      if (config.metrics instanceof K8sApiError) throw config.metrics
+      return (config.metrics ?? {
+        apiVersion: "metrics.k8s.io/v1beta1",
+        kind: "NodeMetricsList",
+        metadata: {},
+        items: [],
+      }) as K8sList<unknown>
+    }
+    if (plural === "nodes") {
+      if (config.nodes === "pending") return new Promise(() => ({})) as never
+      if (config.nodes instanceof K8sApiError) throw config.nodes
+      return (config.nodes ?? {
+        apiVersion: "v1",
+        kind: "NodeList",
+        metadata: {},
+        items: [],
+      }) as K8sList<unknown>
+    }
+    if (plural === "pods") {
+      if (config.pods instanceof K8sApiError) throw config.pods
+      return (config.pods ?? {
+        apiVersion: "v1",
+        kind: "PodList",
+        metadata: {},
+        items: [],
+      }) as K8sList<unknown>
+    }
+    return { apiVersion: "v1", kind: `${plural}List`, metadata: {}, items: [] }
+  })
+  vi.spyOn(client, "getApiGroups").mockResolvedValue(
+    config.groups ?? groupsWithoutMetrics,
+  )
+  return client
+}
+
+describe("ClusterUsagePage", () => {
+  it("renders a spinner while nodes are loading", () => {
+    const client = makeClient({ nodes: "pending" })
+    renderWithK8sProvider(<ClusterUsagePage />, { client })
+    expect(screen.getByText(/loading/i)).toBeInTheDocument()
+  })
+
+  it("renders both panels on a healthy cluster with metrics", async () => {
+    const client = makeClient({
+      nodes: nodesListFixture,
+      pods: podsListFixture,
+      metrics: nodeMetricsListFixture,
+      groups: groupsWithMetrics,
+    })
+    renderWithK8sProvider(<ClusterUsagePage />, { client })
+    expect(await screen.findByText("Cluster Usage")).toBeInTheDocument()
+    // "CPU" appears in both the aggregate card and the table column header,
+    // so assert via the aggregate-specific "Allocatable" label instead.
+    expect(await screen.findAllByText(/allocatable/i)).not.toHaveLength(0)
+    expect(await screen.findByText("worker-gpu-1")).toBeInTheDocument()
+  })
+
+  it("renders the empty state when no nodes exist", async () => {
+    const client = makeClient({
+      nodes: {
+        apiVersion: "v1",
+        kind: "NodeList",
+        metadata: {},
+        items: [],
+      } as K8sList<unknown>,
+      pods: {
+        apiVersion: "v1",
+        kind: "PodList",
+        metadata: {},
+        items: [],
+      } as K8sList<unknown>,
+    })
+    renderWithK8sProvider(<ClusterUsagePage />, { client })
+    expect(await screen.findByText(/no nodes found/i)).toBeInTheDocument()
+  })
+
+  it("renders an error block when the nodes-list call fails", async () => {
+    const client = makeClient({ nodes: new K8sApiError(500, "server error") })
+    renderWithK8sProvider(<ClusterUsagePage />, { client })
+    await waitFor(() => {
+      expect(screen.getByText(/failed to load cluster nodes/i)).toBeInTheDocument()
+    })
+  })
+
+  it("omits the Used line everywhere when metrics-server is not registered", async () => {
+    const client = makeClient({
+      nodes: nodesListFixture,
+      pods: podsListFixture,
+      groups: groupsWithoutMetrics,
+    })
+    renderWithK8sProvider(<ClusterUsagePage />, { client })
+    // Wait for the page to settle by waiting on an aggregate-card label.
+    await screen.findAllByText(/allocatable/i)
+    expect(screen.queryByText(/used/i)).toBeNull()
+  })
+})
diff --git a/apps/console/src/routes/ClusterUsagePage.tsx b/apps/console/src/routes/ClusterUsagePage.tsx
new file mode 100644
index 0000000..042de1f
--- /dev/null
+++ b/apps/console/src/routes/ClusterUsagePage.tsx
@@ -0,0 +1,56 @@
+import { Section, Spinner } from "@cozystack/ui"
+import { useClusterUsageData } from "../hooks/useClusterUsageData.tsx"
+import { ClusterUsageAggregates } from "../components/cluster-usage/ClusterUsageAggregates.tsx"
+import { ClusterUsageTable } from "../components/cluster-usage/ClusterUsageTable.tsx"
+
+/**
+ * Administration → Cluster Usage. Single cluster-scoped page that
+ * renders aggregate utilisation on top and a per-node table below.
+ * Both panels read from the same useClusterUsageData composite hook,
+ * so they always agree on totals.
+ *
+ * Tenant-scoped users never reach this page because the sidebar entry
+ * is gated by a SelfSubjectAccessReview on `nodes list`. On direct URL
+ * navigation an error block surfaces instead of a browser 403; the
+ * fancier page-level permission gate is explicitly out of scope for
+ * the first iteration.
+ */
+export function ClusterUsagePage() {
+  const { nodes, perNode, aggregates, isLoading, error } = useClusterUsageData()
+  const extendedKeys = Object.keys(aggregates.extended).sort()
+
+  return (
+    <div className="space-y-6 p-6">
+      <div>
+        <h1 className="text-xl font-semibold text-slate-900">Cluster Usage</h1>
+        <p className="mt-0.5 text-sm text-slate-500">
+          Cluster-scoped capacity, allocation and usage across all nodes,
+          including any discovered extended resources.
+        </p>
+      </div>
+      {isLoading ? (
+        <div className="flex items-center gap-2 text-sm text-slate-500">
+          <Spinner /> Loading…
+        </div>
+      ) : error ? (
+        <Section>
+          <div className="px-2 py-4 text-sm text-red-700">
+            Failed to load cluster nodes: {error.message}
+          </div>
+        </Section>
+      ) : nodes.length === 0 ? (
+        <Section>
+          <p className="py-6 text-center text-sm text-slate-500">No nodes found.</p>
+        </Section>
+      ) : (
+        <>
+          <ClusterUsageAggregates aggregates={aggregates} />
+          <div>
+            <h2 className="mb-3 text-sm font-medium text-slate-700">Nodes</h2>
+            <ClusterUsageTable rows={perNode} extendedKeys={extendedKeys} />
+          </div>
+        </>
+      )}
+    </div>
+  )
+}

From 3b05957fd3db661f56db4125e3a9fe692b9ecbcd Mon Sep 17 00:00:00 2001
From: Aleksei Sviridkin <f@lex.la>
Date: Tue, 26 May 2026 18:05:21 +0300
Subject: [PATCH 11/20] feat(console): permission-gated Cluster Usage sidebar
 entry and route
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The new entry sits at the top of Administration. It only appears when
a SelfSubjectAccessReview confirms the current user can list nodes —
tenant users never see the menu item, so they never click it and hit
a 403. Loading and error states resolve as 'not allowed' so the entry
never flickers in then out for users who can't see it.

The companion route in ConsolePage mounts ClusterUsagePage at
/cluster-usage. Users who bookmark the URL directly land on an error
block rendered by the page itself when the underlying API call is
denied — a fancier page-level gate is intentionally deferred.

A consequence worth calling out: this is the first sidebar entry in
the console gated by a permission check. The pattern (single SSAR call,
isLoading-false-and-allowed-true predicate) can be reused for other
cluster-scoped entries later — Tenants, External IPs, Modules all
currently render for everyone and 403 on click.

Assisted-By: Claude <noreply@anthropic.com>
Signed-off-by: Aleksei Sviridkin <f@lex.la>
---
 .../src/routes/ConsolePage.routing.test.tsx   |  53 ++++++++
 apps/console/src/routes/ConsolePage.tsx       |   2 +
 .../src/routes/sidebar-sections.test.tsx      | 114 ++++++++++++++++++
 apps/console/src/routes/sidebar-sections.tsx  |  18 ++-
 4 files changed, 186 insertions(+), 1 deletion(-)
 create mode 100644 apps/console/src/routes/ConsolePage.routing.test.tsx
 create mode 100644 apps/console/src/routes/sidebar-sections.test.tsx

diff --git a/apps/console/src/routes/ConsolePage.routing.test.tsx b/apps/console/src/routes/ConsolePage.routing.test.tsx
new file mode 100644
index 0000000..7a4264c
--- /dev/null
+++ b/apps/console/src/routes/ConsolePage.routing.test.tsx
@@ -0,0 +1,53 @@
+import { describe, it, expect, vi } from "vitest"
+import { screen } from "@testing-library/react"
+import {
+  K8sClient,
+  type K8sList,
+  type APIGroupList,
+} from "@cozystack/k8s-client"
+import { ConsolePage } from "./ConsolePage.tsx"
+import { renderWithK8sProvider } from "../test-utils/render.tsx"
+
+function makeClient(): K8sClient {
+  const client = new K8sClient()
+  vi.spyOn(client, "list").mockImplementation(async (_g, _v, plural) => {
+    if (plural === "tenantnamespaces") {
+      return {
+        apiVersion: "core.cozystack.io/v1alpha1",
+        kind: "TenantNamespaceList",
+        metadata: {},
+        items: [],
+      } as K8sList<unknown>
+    }
+    return {
+      apiVersion: "v1",
+      kind: `${plural}List`,
+      metadata: {},
+      items: [],
+    } as K8sList<unknown>
+  })
+  vi.spyOn(client, "getApiGroups").mockResolvedValue({
+    kind: "APIGroupList",
+    apiVersion: "v1",
+    groups: [],
+  } as APIGroupList)
+  vi.spyOn(client, "create").mockResolvedValue({
+    apiVersion: "authorization.k8s.io/v1",
+    kind: "SelfSubjectAccessReview",
+    metadata: { name: "" },
+    spec: {},
+    status: { allowed: false },
+  } as unknown)
+  return client
+}
+
+describe("ConsolePage routing", () => {
+  it("renders ClusterUsagePage at /cluster-usage", async () => {
+    const client = makeClient()
+    renderWithK8sProvider(<ConsolePage />, {
+      client,
+      initialRoute: "/cluster-usage",
+    })
+    expect(await screen.findByText("Cluster Usage")).toBeInTheDocument()
+  })
+})
diff --git a/apps/console/src/routes/ConsolePage.tsx b/apps/console/src/routes/ConsolePage.tsx
index dfb4781..5e0a249 100644
--- a/apps/console/src/routes/ConsolePage.tsx
+++ b/apps/console/src/routes/ConsolePage.tsx
@@ -4,6 +4,7 @@ import { TenantsPage } from "./TenantsPage.tsx"
 import { ModulesPage } from "./ModulesPage.tsx"
 import { ExternalIpsPage } from "./ExternalIpsPage.tsx"
 import { InfoRedirect } from "./InfoRedirect.tsx"
+import { ClusterUsagePage } from "./ClusterUsagePage.tsx"
 import { ApplicationListPage } from "./ApplicationListPage.tsx"
 import { ApplicationDetailPage } from "./detail/ApplicationDetailPage.tsx"
 import { ApplicationEditRoute } from "./detail/ApplicationEditRoute.tsx"
@@ -23,6 +24,7 @@ export function ConsolePage() {
       <Route path="modules" element={<ModulesPage />} />
       <Route path="external-ips" element={<ExternalIpsPage />} />
       <Route path="tenants" element={<TenantsPage />} />
+      <Route path="cluster-usage" element={<ClusterUsagePage />} />
       <Route
         path="backups/plans"
         element={<BackupResourceListPage resourceType="plans" title="Plans" />}
diff --git a/apps/console/src/routes/sidebar-sections.test.tsx b/apps/console/src/routes/sidebar-sections.test.tsx
new file mode 100644
index 0000000..1d718f9
--- /dev/null
+++ b/apps/console/src/routes/sidebar-sections.test.tsx
@@ -0,0 +1,114 @@
+import { describe, it, expect, vi } from "vitest"
+import { renderHook, waitFor } from "@testing-library/react"
+import { QueryClient, QueryClientProvider } from "@tanstack/react-query"
+import {
+  K8sClient,
+  K8sProvider,
+  K8sApiError,
+  type K8sList,
+  type SelfSubjectAccessReview,
+} from "@cozystack/k8s-client"
+import type { ReactNode } from "react"
+import { useConsoleSidebarSections } from "./sidebar-sections.tsx"
+
+const emptyAppDefList: K8sList<unknown> = {
+  apiVersion: "cozystack.io/v1alpha1",
+  kind: "ApplicationDefinitionList",
+  metadata: {},
+  items: [],
+}
+
+function ssarResponse(allowed: boolean): SelfSubjectAccessReview {
+  return {
+    apiVersion: "authorization.k8s.io/v1",
+    kind: "SelfSubjectAccessReview",
+    metadata: { name: "" },
+    spec: { resourceAttributes: { resource: "nodes", verb: "list" } },
+    status: { allowed },
+  }
+}
+
+interface ClientConfig {
+  ssar?: SelfSubjectAccessReview | "pending" | K8sApiError
+}
+
+function makeClient(config: ClientConfig = {}): K8sClient {
+  const client = new K8sClient()
+  vi.spyOn(client, "list").mockResolvedValue(emptyAppDefList as K8sList<unknown>)
+  vi.spyOn(client, "create").mockImplementation(async () => {
+    if (config.ssar === "pending") return new Promise(() => ({})) as never
+    if (config.ssar instanceof K8sApiError) throw config.ssar
+    return (config.ssar ?? ssarResponse(false)) as unknown
+  })
+  return client
+}
+
+function makeWrapper(client: K8sClient) {
+  const queryClient = new QueryClient({
+    defaultOptions: { queries: { retry: false, gcTime: 0 } },
+  })
+  return function Wrapper({ children }: { children: ReactNode }) {
+    return (
+      <QueryClientProvider client={queryClient}>
+        <K8sProvider client={client} queryClient={queryClient}>
+          {children}
+        </K8sProvider>
+      </QueryClientProvider>
+    )
+  }
+}
+
+function findItem(sections: ReturnType<typeof useConsoleSidebarSections>, label: string) {
+  for (const section of sections) {
+    const found = section.items.find((i) => i.label === label)
+    if (found) return found
+  }
+  return undefined
+}
+
+describe("useConsoleSidebarSections — Cluster Usage gate", () => {
+  it("renders the Cluster Usage entry when SSAR allows nodes list", async () => {
+    const client = makeClient({ ssar: ssarResponse(true) })
+    const { result } = renderHook(() => useConsoleSidebarSections(), {
+      wrapper: makeWrapper(client),
+    })
+    await waitFor(() =>
+      expect(findItem(result.current, "Cluster Usage")).toBeDefined(),
+    )
+    expect(findItem(result.current, "Cluster Usage")?.to).toBe(
+      "/console/cluster-usage",
+    )
+  })
+
+  it("hides the Cluster Usage entry when SSAR denies nodes list", async () => {
+    const client = makeClient({ ssar: ssarResponse(false) })
+    const { result } = renderHook(() => useConsoleSidebarSections(), {
+      wrapper: makeWrapper(client),
+    })
+    // Wait for the SSAR query to settle so the absence is real.
+    await waitFor(() => {
+      const adminSection = result.current.find((s) => s.title === "Administration")
+      expect(adminSection).toBeDefined()
+    })
+    // Need an explicit settle window for the SSAR query.
+    await new Promise((r) => setTimeout(r, 0))
+    expect(findItem(result.current, "Cluster Usage")).toBeUndefined()
+  })
+
+  it("hides the Cluster Usage entry while SSAR is still loading (no flicker)", () => {
+    const client = makeClient({ ssar: "pending" })
+    const { result } = renderHook(() => useConsoleSidebarSections(), {
+      wrapper: makeWrapper(client),
+    })
+    expect(findItem(result.current, "Cluster Usage")).toBeUndefined()
+  })
+
+  it("hides the Cluster Usage entry on SSAR error", async () => {
+    const client = makeClient({ ssar: new K8sApiError(500, "boom") })
+    const { result } = renderHook(() => useConsoleSidebarSections(), {
+      wrapper: makeWrapper(client),
+    })
+    await new Promise((r) => setTimeout(r, 10))
+    expect(findItem(result.current, "Cluster Usage")).toBeUndefined()
+  })
+})
diff --git a/apps/console/src/routes/sidebar-sections.tsx b/apps/console/src/routes/sidebar-sections.tsx
index 3e678b2..a37326a 100644
--- a/apps/console/src/routes/sidebar-sections.tsx
+++ b/apps/console/src/routes/sidebar-sections.tsx
@@ -3,6 +3,7 @@ import {
   Archive,
   Cloud,
   Database,
+  Gauge,
   Globe,
   Info,
   LayoutGrid,
@@ -13,6 +14,7 @@ import {
   type LucideIcon,
 } from "lucide-react"
 import type { SidebarSection } from "@cozystack/ui"
+import { useSelfSubjectAccessReview } from "@cozystack/k8s-client"
 import { useApplicationDefinitions, groupByCategory } from "../lib/app-definitions.ts"
 import { humanizeKind } from "../lib/humanize.ts"
 import {
@@ -69,6 +71,17 @@ export function useMarketplaceSidebarSections(): SidebarSection[] {
 export function useConsoleSidebarSections(): SidebarSection[] {
   const { data } = useApplicationDefinitions()
   const grouped = useMemo(() => groupByCategory(data), [data])
+  // Permission gate for the Cluster Usage entry: only operators with
+  // cluster-wide nodes/list see the menu item. Loading and error states
+  // resolve as "not allowed" so the entry never flickers in then out
+  // for users who can't see it.
+  const clusterUsageReview = useSelfSubjectAccessReview({
+    resourceAttributes: { resource: "nodes", verb: "list" },
+  })
+  const canSeeClusterUsage =
+    !clusterUsageReview.isLoading &&
+    !clusterUsageReview.error &&
+    clusterUsageReview.allowed
 
   return useMemo<SidebarSection[]>(() => {
     const sorted = [...grouped]
@@ -109,6 +122,9 @@ export function useConsoleSidebarSections(): SidebarSection[] {
     const administrationSection: SidebarSection = {
       title: "Administration",
       items: [
+        ...(canSeeClusterUsage
+          ? [{ label: "Cluster Usage", to: "/console/cluster-usage", icon: Gauge }]
+          : []),
         { label: "Info", to: "/console/info", icon: Info },
         { label: "Modules", to: "/console/modules", icon: ToyBrick },
         { label: "External IPs", to: "/console/external-ips", icon: Globe },
@@ -117,5 +133,5 @@ export function useConsoleSidebarSections(): SidebarSection[] {
     }
 
     return [...categorySections, backupsSection, administrationSection]
-  }, [grouped])
+  }, [grouped, canSeeClusterUsage])
 }

From c3e793e905caa3394cc89f42ff24ad7e28c23f81 Mon Sep 17 00:00:00 2001
From: Aleksei Sviridkin <f@lex.la>
Date: Tue, 26 May 2026 18:31:57 +0300
Subject: [PATCH 12/20] feat(console): cluster-usage acceptance polish

Round out the page against the issue's full acceptance criteria.

Aggregates panel:
- New header line shows total node count plus a Ready / NotReady /
  SchedulingDisabled split. Singular 'node' vs plural is honoured for
  one-node clusters.
- Requested numbers everywhere collapse to an em dash with a
  'Requires cluster-wide pod read access' tooltip when the pods watch
  has failed; the rest of the panel still renders so the operator
  retains capacity and Used data.

Per-node table:
- All column headers are clickable to toggle sort direction. Default
  is Name ascending. Dynamic extended-resource columns are sortable
  by requested percentage.
- New filter input above the table matches against node name and
  roles, case-insensitive.
- CPU and Memory Requested cells switch to the same em-dash tooltip
  when pods are unavailable. Extended-resource cells switch their
  Requested half similarly while keeping the allocatable / capacity
  figures, which remain authoritative from node.status.

ClusterUsagePage:
- A 403 on the nodes-list call renders the spec'd 'You do not have
  permission' message with a back-to-console link, rather than the
  generic failure text. Other status codes keep the existing block.
- The page now passes nodeSummary and podsUnavailable through to the
  aggregate panel and the table.

Other clean-ups required by branch review:
- The mock K8sClient factory no longer carries the misleading
  '_typeDriftCheck' tautology and the JSDoc no longer claims a drift
  guarantee that the code never delivered. Watch stub flips to
  mockReturnValue, which removes the six unused-parameter lint errors.
- useClusterUsageData wraps its derived nodes/pods/metrics arrays in
  useMemo so the downstream aggregate/per-node useMemo dependencies
  no longer flip identity on every render (silences the
  react-hooks/exhaustive-deps warning).
- Unused getExtendedResourcePrefixes helper removed; the only
  remaining caller was its own test. isExtendedResourceKey is now the
  single source of truth used by both extended-resources.ts and
  per-node.ts.
- rolesFromLabels skips empty role suffixes
  ('node-role.kubernetes.io/' with no name) so the UI does not render
  blank pills.
- useClusterUsageData test stub is now keyed by (apiGroup, plural);
  metrics-server's NodeMetrics is plural=nodes too, so the previous
  plural-only key never matched and the metrics path was effectively
  untested.

Assisted-By: Claude <noreply@anthropic.com>
Signed-off-by: Aleksei Sviridkin <f@lex.la>
---
 .../ClusterUsageAggregates.test.tsx           |  67 +++-
 .../cluster-usage/ClusterUsageAggregates.tsx  |  58 +++-
 .../cluster-usage/ClusterUsageTable.test.tsx  |  95 ++++-
 .../cluster-usage/ClusterUsageTable.tsx       | 326 +++++++++++++-----
 .../components/cluster-usage/ResourceCard.tsx |  25 +-
 .../src/hooks/useClusterUsageData.test.tsx    |  15 +-
 .../console/src/hooks/useClusterUsageData.tsx |  53 ++-
 .../cluster-usage/extended-resources.test.ts  |  24 +-
 .../lib/cluster-usage/extended-resources.ts   |  31 +-
 .../src/lib/cluster-usage/per-node.test.ts    |   9 +
 .../console/src/lib/cluster-usage/per-node.ts |  17 +-
 apps/console/src/lib/cluster-usage/types.ts   |  13 +
 .../src/routes/ClusterUsagePage.test.tsx      |  37 ++
 apps/console/src/routes/ClusterUsagePage.tsx  |  54 ++-
 .../console/src/test-utils/mock-k8s-client.ts |  37 +-
 15 files changed, 637 insertions(+), 224 deletions(-)

diff --git a/apps/console/src/components/cluster-usage/ClusterUsageAggregates.test.tsx b/apps/console/src/components/cluster-usage/ClusterUsageAggregates.test.tsx
index 6546002..737dd73 100644
--- a/apps/console/src/components/cluster-usage/ClusterUsageAggregates.test.tsx
+++ b/apps/console/src/components/cluster-usage/ClusterUsageAggregates.test.tsx
@@ -2,6 +2,7 @@ import { describe, it, expect } from "vitest"
 import { render, screen } from "@testing-library/react"
 import { ClusterUsageAggregates } from "./ClusterUsageAggregates.tsx"
 import type { AggregateResources } from "../../lib/cluster-usage/types.ts"
+import type { NodeSummary } from "../../hooks/useClusterUsageData.tsx"
 
 function empty(): AggregateResources {
   return {
@@ -15,9 +16,36 @@ function empty(): AggregateResources {
   }
 }
 
+function summary(overrides: Partial<NodeSummary> = {}): NodeSummary {
+  return { total: 0, ready: 0, notReady: 0, schedulingDisabled: 0, ...overrides }
+}
+
 describe("ClusterUsageAggregates", () => {
+  it("renders the node-summary header line", () => {
+    render(
+      <ClusterUsageAggregates
+        aggregates={empty()}
+        nodeSummary={summary({ total: 12, ready: 10, notReady: 1, schedulingDisabled: 1 })}
+      />,
+    )
+    expect(screen.getByText("12 nodes")).toBeInTheDocument()
+    expect(
+      screen.getByText(/10 Ready · 1 NotReady · 1 SchedulingDisabled/),
+    ).toBeInTheDocument()
+  })
+
+  it("uses singular 'node' in the header for a one-node cluster", () => {
+    render(
+      <ClusterUsageAggregates
+        aggregates={empty()}
+        nodeSummary={summary({ total: 1, ready: 1 })}
+      />,
+    )
+    expect(screen.getByText("1 node")).toBeInTheDocument()
+  })
+
   it("renders the four standard cards in order CPU, Memory, Storage, Pods", () => {
-    render(<ClusterUsageAggregates aggregates={empty()} />)
+    render(<ClusterUsageAggregates aggregates={empty()} nodeSummary={summary()} />)
     const headings = screen.getAllByText(/CPU|Memory|Storage|Pods/i)
     const labels = headings.map((h) => h.textContent)
     expect(labels).toEqual(
@@ -26,7 +54,7 @@ describe("ClusterUsageAggregates", () => {
   })
 
   it("does not render the extended-resources section when none are present", () => {
-    render(<ClusterUsageAggregates aggregates={empty()} />)
+    render(<ClusterUsageAggregates aggregates={empty()} nodeSummary={summary()} />)
     expect(screen.queryByText(/extended resources/i)).toBeNull()
   })
 
@@ -34,7 +62,7 @@ describe("ClusterUsageAggregates", () => {
     const agg = empty()
     agg.extended["nvidia.com/gpu"] = { capacity: 4, allocatable: 4, requested: 1 }
     agg.extended["amd.com/gpu"] = { capacity: 2, allocatable: 2, requested: 0 }
-    render(<ClusterUsageAggregates aggregates={agg} />)
+    render(<ClusterUsageAggregates aggregates={agg} nodeSummary={summary()} />)
     expect(screen.getByText("nvidia.com/gpu")).toBeInTheDocument()
     expect(screen.getByText("amd.com/gpu")).toBeInTheDocument()
   })
@@ -43,7 +71,9 @@ describe("ClusterUsageAggregates", () => {
     const agg = empty()
     agg.extended["nvidia.com/gpu"] = { capacity: 4, allocatable: 4, requested: 1 }
     agg.extended["amd.com/gpu"] = { capacity: 2, allocatable: 2, requested: 0 }
-    const { container } = render(<ClusterUsageAggregates aggregates={agg} />)
+    const { container } = render(
+      <ClusterUsageAggregates aggregates={agg} nodeSummary={summary()} />,
+    )
     const titles = Array.from(container.querySelectorAll('[data-extended-card]')).map(
       (el) => el.getAttribute("data-extended-card"),
     )
@@ -51,15 +81,38 @@ describe("ClusterUsageAggregates", () => {
   })
 
   it("does not render a 'Used' line on any card when no card has used data", () => {
-    render(<ClusterUsageAggregates aggregates={empty()} />)
+    render(<ClusterUsageAggregates aggregates={empty()} nodeSummary={summary()} />)
     expect(screen.queryByText(/used/i)).toBeNull()
   })
 
   it("renders the 'Used' line on standard cards when usage data is present", () => {
     const agg = empty()
     agg.standard.cpu = { capacity: 8, allocatable: 8, requested: 2, used: 1 }
-    agg.standard.memory = { capacity: 16 * 1024 ** 3, allocatable: 16 * 1024 ** 3, requested: 0, used: 4 * 1024 ** 3 }
-    render(<ClusterUsageAggregates aggregates={agg} />)
+    agg.standard.memory = {
+      capacity: 16 * 1024 ** 3,
+      allocatable: 16 * 1024 ** 3,
+      requested: 0,
+      used: 4 * 1024 ** 3,
+    }
+    render(<ClusterUsageAggregates aggregates={agg} nodeSummary={summary()} />)
     expect(screen.getAllByText(/used/i).length).toBeGreaterThan(0)
   })
+
+  it("replaces Requested numbers with an em-dash tooltip when pods are unavailable", () => {
+    const agg = empty()
+    agg.standard.cpu = { capacity: 8, allocatable: 8, requested: 3 }
+    render(
+      <ClusterUsageAggregates
+        aggregates={agg}
+        nodeSummary={summary({ total: 1, ready: 1 })}
+        podsUnavailable
+      />,
+    )
+    // The numeric Requested value should not be visible; em dashes appear
+    // and at least one element has the explanatory tooltip on title.
+    const tooltipNodes = document.querySelectorAll(
+      '[title="Requires cluster-wide pod read access"]',
+    )
+    expect(tooltipNodes.length).toBeGreaterThan(0)
+  })
 })
diff --git a/apps/console/src/components/cluster-usage/ClusterUsageAggregates.tsx b/apps/console/src/components/cluster-usage/ClusterUsageAggregates.tsx
index d9e390c..09bb8b4 100644
--- a/apps/console/src/components/cluster-usage/ClusterUsageAggregates.tsx
+++ b/apps/console/src/components/cluster-usage/ClusterUsageAggregates.tsx
@@ -1,30 +1,69 @@
 import { ResourceCard } from "./ResourceCard.tsx"
 import type { AggregateResources } from "../../lib/cluster-usage/types.ts"
+import type { NodeSummary } from "../../hooks/useClusterUsageData.tsx"
 
 interface ClusterUsageAggregatesProps {
   aggregates: AggregateResources
+  /** Counts shown in the panel header — Ready / NotReady / SchedulingDisabled. */
+  nodeSummary: NodeSummary
+  /**
+   * When true, every Requested figure is replaced with an em dash and a
+   * tooltip explaining that cluster-wide pod read access is required.
+   * Set by the page when the underlying pods watch failed.
+   */
+  podsUnavailable?: boolean
 }
 
 /**
- * Top panel of the Cluster Usage admin page: four fixed cards for the
- * standard scheduler resources, followed by one card per extended
- * resource discovered in node.status.capacity (alphabetical, full key
- * verbatim). The extended section disappears entirely when no extended
- * resources are present — no empty 'No GPUs found' state.
+ * Top panel of the Cluster Usage admin page. A header line shows total
+ * node count broken down by Ready / NotReady / SchedulingDisabled,
+ * followed by four fixed cards for the standard scheduler resources,
+ * followed by one card per extended resource discovered in
+ * node.status.capacity (alphabetical, full key verbatim). The extended
+ * section disappears entirely when no extended resources are present.
  */
-export function ClusterUsageAggregates({ aggregates }: ClusterUsageAggregatesProps) {
+export function ClusterUsageAggregates({
+  aggregates,
+  nodeSummary,
+  podsUnavailable = false,
+}: ClusterUsageAggregatesProps) {
   const extendedKeys = Object.keys(aggregates.extended).sort()
   return (
     <div className="space-y-6">
+      <div className="flex flex-wrap items-baseline gap-x-3 gap-y-1 text-sm">
+        <span className="font-medium text-slate-800">
+          {nodeSummary.total} node{nodeSummary.total === 1 ? "" : "s"}
+        </span>
+        <span className="text-xs text-slate-500">
+          {nodeSummary.ready} Ready · {nodeSummary.notReady} NotReady ·{" "}
+          {nodeSummary.schedulingDisabled} SchedulingDisabled
+        </span>
+      </div>
       <div className="grid grid-cols-1 gap-4 sm:grid-cols-2 lg:grid-cols-4">
-        <ResourceCard title="CPU" format="cpu" totals={aggregates.standard.cpu} />
-        <ResourceCard title="Memory" format="bytes" totals={aggregates.standard.memory} />
+        <ResourceCard
+          title="CPU"
+          format="cpu"
+          totals={aggregates.standard.cpu}
+          requestedUnavailable={podsUnavailable}
+        />
+        <ResourceCard
+          title="Memory"
+          format="bytes"
+          totals={aggregates.standard.memory}
+          requestedUnavailable={podsUnavailable}
+        />
         <ResourceCard
           title="Storage"
           format="bytes"
           totals={aggregates.standard["ephemeral-storage"]}
+          requestedUnavailable={podsUnavailable}
+        />
+        <ResourceCard
+          title="Pods"
+          format="count"
+          totals={aggregates.standard.pods}
+          requestedUnavailable={podsUnavailable}
         />
-        <ResourceCard title="Pods" format="count" totals={aggregates.standard.pods} />
       </div>
       {extendedKeys.length > 0 ? (
         <div>
@@ -38,6 +77,7 @@ export function ClusterUsageAggregates({ aggregates }: ClusterUsageAggregatesPro
                   title={key}
                   format="count"
                   totals={aggregates.extended[key]}
+                  requestedUnavailable={podsUnavailable}
                 />
               </div>
             ))}
diff --git a/apps/console/src/components/cluster-usage/ClusterUsageTable.test.tsx b/apps/console/src/components/cluster-usage/ClusterUsageTable.test.tsx
index 176a5cb..712fdd2 100644
--- a/apps/console/src/components/cluster-usage/ClusterUsageTable.test.tsx
+++ b/apps/console/src/components/cluster-usage/ClusterUsageTable.test.tsx
@@ -1,17 +1,9 @@
-import { describe, it, expect, vi, beforeAll, afterAll } from "vitest"
+import { describe, it, expect } from "vitest"
 import { render, screen, within } from "@testing-library/react"
+import userEvent from "@testing-library/user-event"
 import { ClusterUsageTable } from "./ClusterUsageTable.tsx"
 import type { NodeRow } from "../../lib/cluster-usage/types.ts"
 
-beforeAll(() => {
-  vi.useFakeTimers()
-  vi.setSystemTime(new Date("2026-05-26T00:00:00Z"))
-})
-
-afterAll(() => {
-  vi.useRealTimers()
-})
-
 function row(name: string, overrides: Partial<NodeRow> = {}): NodeRow {
   return {
     name,
@@ -34,15 +26,15 @@ function row(name: string, overrides: Partial<NodeRow> = {}): NodeRow {
 }
 
 describe("ClusterUsageTable", () => {
-  it("renders one tr per node in name order", () => {
+  it("renders one tr per node, default-sorted by name ascending", () => {
     render(
       <ClusterUsageTable rows={[row("worker-b"), row("worker-a")]} extendedKeys={[]} />,
     )
     const rows = screen.getAllByRole("row")
-    // First row is the header
+    // First row is the header.
     expect(rows).toHaveLength(3)
-    expect(within(rows[1]).getByText("worker-b")).toBeInTheDocument()
-    expect(within(rows[2]).getByText("worker-a")).toBeInTheDocument()
+    expect(within(rows[1]).getByText("worker-a")).toBeInTheDocument()
+    expect(within(rows[2]).getByText("worker-b")).toBeInTheDocument()
   })
 
   it("shows Ready / NotReady status text", () => {
@@ -141,4 +133,79 @@ describe("ClusterUsageTable", () => {
     // contains the em dashes, not the exact count).
     expect(within(tr).getAllByText("—").length).toBeGreaterThan(0)
   })
+
+  it("toggles the sort direction on a second click of the same column", async () => {
+    const user = userEvent.setup()
+    render(
+      <ClusterUsageTable rows={[row("a"), row("b"), row("c")]} extendedKeys={[]} />,
+    )
+    const nameHeader = screen.getByRole("button", { name: /name/i })
+    // Default is asc — verify ordering, then click to flip.
+    let bodyRows = screen.getAllByRole("row").slice(1)
+    expect(within(bodyRows[0]).getByText("a")).toBeInTheDocument()
+    await user.click(nameHeader)
+    bodyRows = screen.getAllByRole("row").slice(1)
+    expect(within(bodyRows[0]).getByText("c")).toBeInTheDocument()
+    expect(within(bodyRows[2]).getByText("a")).toBeInTheDocument()
+  })
+
+  it("filters rows by name substring (case-insensitive)", async () => {
+    const user = userEvent.setup()
+    render(
+      <ClusterUsageTable
+        rows={[row("worker-cpu-1"), row("worker-gpu-1"), row("ctrl-1")]}
+        extendedKeys={[]}
+      />,
+    )
+    const filter = screen.getByLabelText("Filter nodes")
+    await user.type(filter, "GPU")
+    expect(screen.queryByText("worker-cpu-1")).toBeNull()
+    expect(screen.queryByText("ctrl-1")).toBeNull()
+    expect(screen.getByText("worker-gpu-1")).toBeInTheDocument()
+  })
+
+  it("filters rows by role substring", async () => {
+    const user = userEvent.setup()
+    render(
+      <ClusterUsageTable
+        rows={[
+          row("a", { roles: ["control-plane"] }),
+          row("b", { roles: ["worker"] }),
+        ]}
+        extendedKeys={[]}
+      />,
+    )
+    const filter = screen.getByLabelText("Filter nodes")
+    await user.type(filter, "control")
+    expect(screen.getByText("a")).toBeInTheDocument()
+    expect(screen.queryByText("b")).toBeNull()
+  })
+
+  it("replaces the Requested line with an em-dash tooltip when podsUnavailable", () => {
+    render(
+      <ClusterUsageTable
+        rows={[
+          row("loaded", {
+            ready: true,
+            standard: {
+              cpu: { capacity: 8, allocatable: 8, requested: 4 },
+              memory: { capacity: 16 * 1024 ** 3, allocatable: 16 * 1024 ** 3, requested: 0 },
+              "ephemeral-storage": { capacity: 0, allocatable: 0, requested: 0 },
+              pods: { capacity: 110, allocatable: 110, requested: 0 },
+            },
+          }),
+        ]}
+        extendedKeys={[]}
+        podsUnavailable
+      />,
+    )
+    const tr = screen.getByText("loaded").closest("tr")!
+    const tooltipNodes = tr.querySelectorAll(
+      '[title="Requires cluster-wide pod read access"]',
+    )
+    expect(tooltipNodes.length).toBeGreaterThan(0)
+    // The literal "4 / 8 req" (visible when pods are available) must not
+    // appear when podsUnavailable; the tooltip-bearing dash takes its place.
+    expect(within(tr).queryByText(/4 \/ 8 req/)).toBeNull()
+  })
 })
diff --git a/apps/console/src/components/cluster-usage/ClusterUsageTable.tsx b/apps/console/src/components/cluster-usage/ClusterUsageTable.tsx
index 660e9eb..bacca80 100644
--- a/apps/console/src/components/cluster-usage/ClusterUsageTable.tsx
+++ b/apps/console/src/components/cluster-usage/ClusterUsageTable.tsx
@@ -1,9 +1,22 @@
+import { useMemo, useState } from "react"
+import { ArrowDown, ArrowUp, ArrowUpDown } from "lucide-react"
 import { humanizeBytes, humanizeCpu } from "../../lib/k8s-quantity.ts"
 import type { NodeRow, ResourceTotals } from "../../lib/cluster-usage/types.ts"
 
 interface ClusterUsageTableProps {
   rows: NodeRow[]
   extendedKeys: string[]
+  /** True when pods-list cluster-wide failed — Requested cells become em dashes with a tooltip. */
+  podsUnavailable?: boolean
+}
+
+const REQUESTED_UNAVAILABLE_REASON = "Requires cluster-wide pod read access"
+
+type SortColumn = "name" | "status" | "roles" | "cpu" | "memory" | "age" | string
+
+interface SortState {
+  column: SortColumn
+  direction: "asc" | "desc"
 }
 
 function statusLabel(row: NodeRow): string {
@@ -12,7 +25,12 @@ function statusLabel(row: NodeRow): string {
   return "Ready"
 }
 
-function cpuCell(totals: ResourceTotals, ready: boolean) {
+function requestedPct(totals: ResourceTotals): number {
+  if (totals.allocatable <= 0) return 0
+  return totals.requested / totals.allocatable
+}
+
+function cpuCell(totals: ResourceTotals, ready: boolean, podsUnavailable: boolean) {
   if (!ready || totals.allocatable <= 0) {
     return (
       <div className="space-y-0.5 text-xs">
@@ -28,14 +46,20 @@ function cpuCell(totals: ResourceTotals, ready: boolean) {
           {humanizeCpu(totals.used ?? 0)} / {humanizeCpu(totals.allocatable)} used
         </div>
       ) : null}
-      <div className="tabular-nums text-slate-500">
-        {humanizeCpu(totals.requested)} / {humanizeCpu(totals.allocatable)} req
-      </div>
+      {podsUnavailable ? (
+        <div className="text-slate-400" title={REQUESTED_UNAVAILABLE_REASON}>
+          — req
+        </div>
+      ) : (
+        <div className="tabular-nums text-slate-500">
+          {humanizeCpu(totals.requested)} / {humanizeCpu(totals.allocatable)} req
+        </div>
+      )}
     </div>
   )
 }
 
-function memoryCell(totals: ResourceTotals, ready: boolean) {
+function memoryCell(totals: ResourceTotals, ready: boolean, podsUnavailable: boolean) {
   if (!ready || totals.allocatable <= 0) {
     return (
       <div className="space-y-0.5 text-xs">
@@ -51,106 +75,244 @@ function memoryCell(totals: ResourceTotals, ready: boolean) {
           {humanizeBytes(totals.used ?? 0)} / {humanizeBytes(totals.allocatable)} used
         </div>
       ) : null}
-      <div className="tabular-nums text-slate-500">
-        {humanizeBytes(totals.requested)} / {humanizeBytes(totals.allocatable)} req
-      </div>
+      {podsUnavailable ? (
+        <div className="text-slate-400" title={REQUESTED_UNAVAILABLE_REASON}>
+          — req
+        </div>
+      ) : (
+        <div className="tabular-nums text-slate-500">
+          {humanizeBytes(totals.requested)} / {humanizeBytes(totals.allocatable)} req
+        </div>
+      )}
     </div>
   )
 }
 
-function extendedCell(totals: ResourceTotals | undefined) {
+function extendedCell(totals: ResourceTotals | undefined, podsUnavailable: boolean) {
   if (!totals) return <span className="text-slate-400">—</span>
   return (
     <div className="space-y-0.5 text-xs tabular-nums text-slate-700">
       <div>
-        {totals.requested} / {totals.allocatable}
+        {podsUnavailable ? (
+          <span className="text-slate-400" title={REQUESTED_UNAVAILABLE_REASON}>
+            —
+          </span>
+        ) : (
+          totals.requested
+        )}{" "}
+        / {totals.allocatable}
       </div>
       <div className="text-slate-400">capacity {totals.capacity}</div>
     </div>
   )
 }
 
+function compareRows(a: NodeRow, b: NodeRow, sort: SortState): number {
+  const direction = sort.direction === "asc" ? 1 : -1
+  switch (sort.column) {
+    case "name":
+      return a.name.localeCompare(b.name) * direction
+    case "status":
+      return statusLabel(a).localeCompare(statusLabel(b)) * direction
+    case "roles":
+      return (a.roles[0] ?? "").localeCompare(b.roles[0] ?? "") * direction
+    case "cpu":
+      return (requestedPct(a.standard.cpu) - requestedPct(b.standard.cpu)) * direction
+    case "memory":
+      return (requestedPct(a.standard.memory) - requestedPct(b.standard.memory)) * direction
+    case "age": {
+      const ta = a.creationTimestamp ? new Date(a.creationTimestamp).getTime() : 0
+      const tb = b.creationTimestamp ? new Date(b.creationTimestamp).getTime() : 0
+      // Older nodes have smaller timestamps; sorting asc by timestamp shows
+      // oldest first, which matches the typical operator instinct for "Age asc".
+      return (ta - tb) * direction
+    }
+    default: {
+      // Dynamic extended-resource column: sort by requested %.
+      const va = requestedPct(a.extended[sort.column] ?? { capacity: 0, allocatable: 0, requested: 0 })
+      const vb = requestedPct(b.extended[sort.column] ?? { capacity: 0, allocatable: 0, requested: 0 })
+      return (va - vb) * direction
+    }
+  }
+}
+
+function matchesFilter(row: NodeRow, q: string): boolean {
+  if (!q) return true
+  const needle = q.trim().toLowerCase()
+  if (!needle) return true
+  if (row.name.toLowerCase().includes(needle)) return true
+  if (row.roles.some((r) => r.toLowerCase().includes(needle))) return true
+  return false
+}
+
+interface SortableHeaderProps {
+  column: SortColumn
+  label: string
+  sort: SortState
+  onSort: (column: SortColumn) => void
+  className?: string
+}
+
+function SortableHeader({
+  column,
+  label,
+  sort,
+  onSort,
+  className,
+}: SortableHeaderProps) {
+  const active = sort.column === column
+  const Icon = active ? (sort.direction === "asc" ? ArrowUp : ArrowDown) : ArrowUpDown
+  return (
+    <th className={`px-4 py-3 ${className ?? ""}`}>
+      <button
+        type="button"
+        onClick={() => onSort(column)}
+        className="flex items-center gap-1 text-xs font-medium uppercase tracking-wider text-slate-500 hover:text-slate-700"
+      >
+        {label}
+        <Icon className="size-3" />
+      </button>
+    </th>
+  )
+}
+
 /**
- * Per-node table rendered below the aggregate panel. The first columns
- * are fixed (Name, Status, Roles, CPU, Memory); then one column per
- * full extended-resource key found anywhere in the cluster — the
- * column header is the resource key verbatim. Trailing column is Age.
+ * Per-node table rendered below the aggregate panel. Fixed columns
+ * (Name, Status, Roles, CPU, Memory) plus one column per full
+ * extended-resource key found in the cluster, then Age. Headers click
+ * to sort; default sort is Name ascending. A filter input above the
+ * table filters by name and roles substring.
  *
- * NotReady nodes show em dashes for CPU / Memory cells because
- * status.capacity is no longer authoritative at that point; the rest of
- * the row remains visible so operators can still see which node is in
- * trouble.
+ * NotReady nodes show em dashes for CPU / Memory because status.capacity
+ * stops being authoritative; the rest of the row remains visible so the
+ * row remains a useful pointer for the operator. When pods-list failed
+ * cluster-wide, Requested values in every cell are replaced by an em
+ * dash with a tooltip explaining the missing permission.
  */
-export function ClusterUsageTable({ rows, extendedKeys }: ClusterUsageTableProps) {
+export function ClusterUsageTable({
+  rows,
+  extendedKeys,
+  podsUnavailable = false,
+}: ClusterUsageTableProps) {
+  const [sort, setSort] = useState<SortState>({ column: "name", direction: "asc" })
+  const [filter, setFilter] = useState("")
+
+  const onSort = (column: SortColumn) => {
+    setSort((s) =>
+      s.column === column
+        ? { column, direction: s.direction === "asc" ? "desc" : "asc" }
+        : { column, direction: "asc" },
+    )
+  }
+
+  const visibleRows = useMemo(() => {
+    const filtered = rows.filter((r) => matchesFilter(r, filter))
+    return filtered.slice().sort((a, b) => compareRows(a, b, sort))
+  }, [rows, sort, filter])
+
   return (
-    <div className="overflow-x-auto rounded-lg border border-slate-200 bg-white shadow-sm">
-      <table className="w-full text-sm">
-        <thead>
-          <tr className="border-b border-slate-200 bg-slate-50 text-left text-xs font-medium uppercase tracking-wider text-slate-500">
-            <th className="px-4 py-3">Name</th>
-            <th className="px-4 py-3">Status</th>
-            <th className="px-4 py-3">Roles</th>
-            <th className="px-4 py-3">CPU</th>
-            <th className="px-4 py-3">Memory</th>
-            {extendedKeys.map((k) => (
-              <th key={k} className="px-4 py-3 font-mono normal-case tracking-normal text-slate-600">
-                {k}
-              </th>
-            ))}
-            <th className="px-4 py-3">Age</th>
-          </tr>
-        </thead>
-        <tbody className="divide-y divide-slate-100">
-          {rows.map((r) => (
-            <tr key={r.name} className="hover:bg-slate-50">
-              <td className="px-4 py-3 text-sm font-medium text-slate-900">{r.name}</td>
-              <td className="px-4 py-3 align-top">
-                <div className="space-y-1">
-                  <div className="text-xs text-slate-700">{statusLabel(r)}</div>
-                  {r.pressureConditions.length > 0 ? (
+    <div className="space-y-3">
+      <div className="flex items-center justify-between gap-2">
+        <input
+          type="search"
+          placeholder="Filter nodes by name or role…"
+          value={filter}
+          onChange={(e) => setFilter(e.target.value)}
+          aria-label="Filter nodes"
+          className="w-64 max-w-full rounded border border-slate-200 px-3 py-1.5 text-sm focus:border-blue-500 focus:outline-none"
+        />
+        <span className="text-xs text-slate-500">
+          {visibleRows.length} of {rows.length}
+        </span>
+      </div>
+      <div className="overflow-x-auto rounded-lg border border-slate-200 bg-white shadow-sm">
+        <table className="w-full text-sm">
+          <thead>
+            <tr className="border-b border-slate-200 bg-slate-50 text-left">
+              <SortableHeader column="name" label="Name" sort={sort} onSort={onSort} />
+              <SortableHeader column="status" label="Status" sort={sort} onSort={onSort} />
+              <SortableHeader column="roles" label="Roles" sort={sort} onSort={onSort} />
+              <SortableHeader column="cpu" label="CPU" sort={sort} onSort={onSort} />
+              <SortableHeader column="memory" label="Memory" sort={sort} onSort={onSort} />
+              {extendedKeys.map((k) => (
+                <th key={k} className="px-4 py-3">
+                  <button
+                    type="button"
+                    onClick={() => onSort(k)}
+                    className="flex items-center gap-1 font-mono text-xs text-slate-600 hover:text-slate-700"
+                  >
+                    {k}
+                    {sort.column === k ? (
+                      sort.direction === "asc" ? (
+                        <ArrowUp className="size-3" />
+                      ) : (
+                        <ArrowDown className="size-3" />
+                      )
+                    ) : (
+                      <ArrowUpDown className="size-3" />
+                    )}
+                  </button>
+                </th>
+              ))}
+              <SortableHeader column="age" label="Age" sort={sort} onSort={onSort} />
+            </tr>
+          </thead>
+          <tbody className="divide-y divide-slate-100">
+            {visibleRows.map((r) => (
+              <tr key={r.name} className="hover:bg-slate-50">
+                <td className="px-4 py-3 text-sm font-medium text-slate-900">{r.name}</td>
+                <td className="px-4 py-3 align-top">
+                  <div className="space-y-1">
+                    <div className="text-xs text-slate-700">{statusLabel(r)}</div>
+                    {r.pressureConditions.length > 0 ? (
+                      <div className="flex flex-wrap gap-1">
+                        {r.pressureConditions.map((p) => (
+                          <span
+                            key={p}
+                            className="rounded-full bg-amber-50 px-2 py-0.5 text-[11px] text-amber-800"
+                          >
+                            {p}
+                          </span>
+                        ))}
+                      </div>
+                    ) : null}
+                    {r.taints.length > 0 ? (
+                      <div className="text-[11px] text-slate-500">
+                        +tainted {r.taints.length}
+                      </div>
+                    ) : null}
+                  </div>
+                </td>
+                <td className="px-4 py-3 align-top text-xs text-slate-700">
+                  {r.roles.length > 0 ? (
                     <div className="flex flex-wrap gap-1">
-                      {r.pressureConditions.map((p) => (
-                        <span
-                          key={p}
-                          className="rounded-full bg-amber-50 px-2 py-0.5 text-[11px] text-amber-800"
-                        >
-                          {p}
+                      {r.roles.map((role) => (
+                        <span key={role} className="rounded-full bg-slate-100 px-2 py-0.5">
+                          {role}
                         </span>
                       ))}
                     </div>
-                  ) : null}
-                  {r.taints.length > 0 ? (
-                    <div className="text-[11px] text-slate-500">
-                      +tainted {r.taints.length}
-                    </div>
-                  ) : null}
-                </div>
-              </td>
-              <td className="px-4 py-3 align-top text-xs text-slate-700">
-                {r.roles.length > 0 ? (
-                  <div className="flex flex-wrap gap-1">
-                    {r.roles.map((role) => (
-                      <span key={role} className="rounded-full bg-slate-100 px-2 py-0.5">
-                        {role}
-                      </span>
-                    ))}
-                  </div>
-                ) : (
-                  <span className="text-slate-400">—</span>
-                )}
-              </td>
-              <td className="px-4 py-3 align-top">{cpuCell(r.standard.cpu, r.ready)}</td>
-              <td className="px-4 py-3 align-top">{memoryCell(r.standard.memory, r.ready)}</td>
-              {extendedKeys.map((k) => (
-                <td key={k} className="px-4 py-3 align-top">
-                  {extendedCell(r.extended[k])}
+                  ) : (
+                    <span className="text-slate-400">—</span>
+                  )}
                 </td>
-              ))}
-              <td className="px-4 py-3 tabular-nums text-xs text-slate-500">{r.age}</td>
-            </tr>
-          ))}
-        </tbody>
-      </table>
+                <td className="px-4 py-3 align-top">
+                  {cpuCell(r.standard.cpu, r.ready, podsUnavailable)}
+                </td>
+                <td className="px-4 py-3 align-top">
+                  {memoryCell(r.standard.memory, r.ready, podsUnavailable)}
+                </td>
+                {extendedKeys.map((k) => (
+                  <td key={k} className="px-4 py-3 align-top">
+                    {extendedCell(r.extended[k], podsUnavailable)}
+                  </td>
+                ))}
+                <td className="px-4 py-3 tabular-nums text-xs text-slate-500">{r.age}</td>
+              </tr>
+            ))}
+          </tbody>
+        </table>
+      </div>
     </div>
   )
 }
diff --git a/apps/console/src/components/cluster-usage/ResourceCard.tsx b/apps/console/src/components/cluster-usage/ResourceCard.tsx
index 5c65059..eba7e2d 100644
--- a/apps/console/src/components/cluster-usage/ResourceCard.tsx
+++ b/apps/console/src/components/cluster-usage/ResourceCard.tsx
@@ -7,6 +7,12 @@ interface ResourceCardProps {
   title: string
   format: ResourceFormat
   totals: ResourceTotals
+  /**
+   * When true, the Requested figure is treated as unknown (cluster-wide
+   * pod read access was denied or the request failed). The numeric value
+   * is replaced with an em dash and a tooltip explains why.
+   */
+  requestedUnavailable?: boolean
 }
 
 function formatValue(value: number, format: ResourceFormat): string {
@@ -69,11 +75,17 @@ function ProgressBar({ pct, resourceBar, ariaLabel }: ProgressBarProps) {
  * have not yet reported their capacity, and crashing the panel is much
  * worse than rendering placeholders.
  */
-export function ResourceCard({ title, format, totals }: ResourceCardProps) {
+export function ResourceCard({
+  title,
+  format,
+  totals,
+  requestedUnavailable = false,
+}: ResourceCardProps) {
   const allocatableZero = totals.allocatable <= 0
   const requestedPct = percent(totals.requested, totals.allocatable)
   const usedDefined = totals.used !== undefined
   const usedPct = usedDefined ? percent(totals.used ?? 0, totals.allocatable) : null
+  const REQUESTED_UNAVAILABLE_REASON = "Requires cluster-wide pod read access"
 
   return (
     <div className="rounded-lg border border-slate-200 bg-white p-4 shadow-sm">
@@ -109,11 +121,16 @@ export function ResourceCard({ title, format, totals }: ResourceCardProps) {
         <div>
           <div className="mb-1 flex items-baseline justify-between text-xs">
             <span className="text-slate-600">Requested</span>
-            <span className="tabular-nums text-slate-700">
-              {allocatableZero ? "—" : formatValue(totals.requested, format)}
+            <span
+              className="tabular-nums text-slate-700"
+              title={requestedUnavailable ? REQUESTED_UNAVAILABLE_REASON : undefined}
+            >
+              {requestedUnavailable || allocatableZero
+                ? "—"
+                : formatValue(totals.requested, format)}
             </span>
           </div>
-          {!allocatableZero ? (
+          {!allocatableZero && !requestedUnavailable ? (
             <ProgressBar
               pct={requestedPct}
               resourceBar="requested"
diff --git a/apps/console/src/hooks/useClusterUsageData.test.tsx b/apps/console/src/hooks/useClusterUsageData.test.tsx
index 68bc250..a6532e9 100644
--- a/apps/console/src/hooks/useClusterUsageData.test.tsx
+++ b/apps/console/src/hooks/useClusterUsageData.test.tsx
@@ -57,8 +57,11 @@ function stubList(
   client: K8sClient,
   responses: Partial<Record<string, K8sList<unknown> | K8sApiError>>,
 ) {
-  vi.spyOn(client, "list").mockImplementation(async (_g, _v, plural) => {
-    const r = responses[plural]
+  vi.spyOn(client, "list").mockImplementation(async (apiGroup, _v, plural) => {
+    // Key by (apiGroup|plural). The metrics.k8s.io node listing uses
+    // plural=nodes too, so we can't disambiguate on plural alone.
+    const key = `${apiGroup}|${plural}`
+    const r = responses[key]
     if (r instanceof K8sApiError) throw r
     return (r ?? { apiVersion: "v1", kind: `${plural}List`, metadata: {}, items: [] }) as K8sList<
       unknown
@@ -80,9 +83,9 @@ describe("useClusterUsageData", () => {
   it("returns aggregates and per-node rows derived from nodes + pods + metrics", async () => {
     const client = new K8sClient()
     stubList(client, {
-      nodes: nodesListFixture,
-      pods: podsListFixture,
-      nodes_metrics: nodeMetricsListFixture,
+      "|nodes": nodesListFixture,
+      "|pods": podsListFixture,
+      "metrics.k8s.io|nodes": nodeMetricsListFixture,
     })
     vi.spyOn(client, "getApiGroups").mockResolvedValue(groupsWithMetrics)
     const { result } = renderHook(() => useClusterUsageData(), {
@@ -96,6 +99,8 @@ describe("useClusterUsageData", () => {
       "worker-gpu-1",
     ])
     expect(result.current.aggregates.extended["nvidia.com/gpu"].capacity).toBe(1)
+    // Used overlay must be populated from the metrics fixture.
+    expect(result.current.aggregates.standard.cpu.used).toBeGreaterThan(0)
   })
 
   it("never lists NodeMetrics when metrics.k8s.io is not registered", async () => {
diff --git a/apps/console/src/hooks/useClusterUsageData.tsx b/apps/console/src/hooks/useClusterUsageData.tsx
index 51d9da0..da3a6fb 100644
--- a/apps/console/src/hooks/useClusterUsageData.tsx
+++ b/apps/console/src/hooks/useClusterUsageData.tsx
@@ -21,14 +21,31 @@ import type {
  */
 export const CLUSTER_USAGE_METRICS_REFETCH_MS = 30_000
 
+export interface NodeSummary {
+  total: number
+  ready: number
+  notReady: number
+  schedulingDisabled: number
+}
+
 interface ClusterUsageData {
   nodes: Node[]
   pods: Pod[]
   metrics: NodeMetrics[] | undefined
   aggregates: AggregateResources
   perNode: NodeRow[]
+  nodeSummary: NodeSummary
   isLoading: boolean
+  /**
+   * The hook's primary error: a nodes-list failure. Pods and metrics
+   * failures are surfaced through their own flags so callers can degrade
+   * gracefully instead of replacing the whole page with an error block.
+   */
   error: Error | null
+  /** HTTP status of `error`, if it was a K8sApiError. */
+  errorStatus: number | null
+  /** True when the cluster-wide pods list failed. Requested values are unreliable. */
+  podsUnavailable: boolean
   metricsAvailable: boolean
 }
 
@@ -74,9 +91,15 @@ export function useClusterUsageData(): ClusterUsageData {
     },
   )
 
-  const nodes = nodesQuery.data?.items ?? []
-  const pods = podsQuery.data?.items ?? []
-  const metricsItems = metricsQueryItems(metricsQuery.data, metricsQuery.error)
+  const nodes = useMemo<Node[]>(
+    () => nodesQuery.data?.items ?? [],
+    [nodesQuery.data],
+  )
+  const pods = useMemo<Pod[]>(() => podsQuery.data?.items ?? [], [podsQuery.data])
+  const metricsItems = useMemo(
+    () => metricsQueryItems(metricsQuery.data, metricsQuery.error),
+    [metricsQuery.data, metricsQuery.error],
+  )
 
   const aggregates = useMemo(
     () => aggregateNodeResources(nodes, pods, metricsItems),
@@ -86,6 +109,22 @@ export function useClusterUsageData(): ClusterUsageData {
     () => derivePerNodeRows(nodes, pods, metricsItems),
     [nodes, pods, metricsItems],
   )
+  const nodeSummary = useMemo<NodeSummary>(() => {
+    let ready = 0
+    let notReady = 0
+    let schedulingDisabled = 0
+    for (const row of perNode) {
+      if (!row.ready) notReady++
+      else if (!row.schedulable) schedulingDisabled++
+      else ready++
+    }
+    return { total: perNode.length, ready, notReady, schedulingDisabled }
+  }, [perNode])
+
+  const nodesError = (nodesQuery.error as Error | null) ?? null
+  const statusField =
+    nodesError != null ? (nodesError as unknown as { status?: unknown }).status : undefined
+  const errorStatus = typeof statusField === "number" ? statusField : null
 
   return {
     nodes,
@@ -93,10 +132,14 @@ export function useClusterUsageData(): ClusterUsageData {
     metrics: metricsItems,
     aggregates,
     perNode,
+    nodeSummary,
     isLoading:
       nodesQuery.isLoading || podsQuery.isLoading || metricsDiscoveryLoading,
-    // Metrics errors do not become page errors — usage simply disappears.
-    error: (nodesQuery.error as Error | null) ?? (podsQuery.error as Error | null) ?? null,
+    // Pods and metrics errors are not promoted to page-level errors.
+    // The caller renders cell-level placeholders instead.
+    error: nodesError,
+    errorStatus,
+    podsUnavailable: podsQuery.error != null,
     metricsAvailable,
   }
 }
diff --git a/apps/console/src/lib/cluster-usage/extended-resources.test.ts b/apps/console/src/lib/cluster-usage/extended-resources.test.ts
index 4f37d66..0ced00b 100644
--- a/apps/console/src/lib/cluster-usage/extended-resources.test.ts
+++ b/apps/console/src/lib/cluster-usage/extended-resources.test.ts
@@ -1,8 +1,5 @@
 import { describe, it, expect } from "vitest"
-import {
-  getExtendedResourceKeys,
-  getExtendedResourcePrefixes,
-} from "./extended-resources.ts"
+import { getExtendedResourceKeys } from "./extended-resources.ts"
 import type { Node } from "./types.ts"
 
 function makeNode(name: string, capacity: Record<string, string>): Node {
@@ -77,22 +74,3 @@ describe("getExtendedResourceKeys", () => {
   })
 })
 
-describe("getExtendedResourcePrefixes", () => {
-  it("splits each key on / and returns unique prefixes sorted alphabetically", () => {
-    expect(
-      getExtendedResourcePrefixes([
-        "nvidia.com/gpu",
-        "nvidia.com/gpu.shared",
-        "amd.com/gpu",
-      ]),
-    ).toEqual(["amd.com", "nvidia.com"])
-  })
-
-  it("handles keys without a / by returning the whole key as its prefix", () => {
-    expect(getExtendedResourcePrefixes(["weirdkey"])).toEqual(["weirdkey"])
-  })
-
-  it("returns an empty array for empty input", () => {
-    expect(getExtendedResourcePrefixes([])).toEqual([])
-  })
-})
diff --git a/apps/console/src/lib/cluster-usage/extended-resources.ts b/apps/console/src/lib/cluster-usage/extended-resources.ts
index 066bcf3..a8b06f7 100644
--- a/apps/console/src/lib/cluster-usage/extended-resources.ts
+++ b/apps/console/src/lib/cluster-usage/extended-resources.ts
@@ -1,18 +1,6 @@
+import { isExtendedResourceKey } from "./types.ts"
 import type { Node } from "./types.ts"
 
-const STANDARD_KEYS = new Set([
-  "cpu",
-  "memory",
-  "ephemeral-storage",
-  "pods",
-])
-
-function isExtendedKey(key: string): boolean {
-  if (STANDARD_KEYS.has(key)) return false
-  if (key.startsWith("hugepages-")) return false
-  return true
-}
-
 /**
  * Returns the sorted, deduplicated set of extended-resource keys present
  * in any node's `status.capacity` across the cluster. Standard scheduler
@@ -27,23 +15,8 @@ export function getExtendedResourceKeys(nodes: Node[]): string[] {
     const capacity = node.status?.capacity
     if (!capacity) continue
     for (const key of Object.keys(capacity)) {
-      if (isExtendedKey(key)) set.add(key)
+      if (isExtendedResourceKey(key)) set.add(key)
     }
   }
   return [...set].sort()
 }
-
-/**
- * Returns the sorted, deduplicated set of vendor prefixes derived from
- * a list of extended-resource keys. A key without a `/` is its own
- * prefix; this keeps the function total for malformed or non-namespaced
- * keys.
- */
-export function getExtendedResourcePrefixes(keys: string[]): string[] {
-  const set = new Set<string>()
-  for (const key of keys) {
-    const slash = key.indexOf("/")
-    set.add(slash === -1 ? key : key.slice(0, slash))
-  }
-  return [...set].sort()
-}
diff --git a/apps/console/src/lib/cluster-usage/per-node.test.ts b/apps/console/src/lib/cluster-usage/per-node.test.ts
index 7d54da6..ef8442a 100644
--- a/apps/console/src/lib/cluster-usage/per-node.test.ts
+++ b/apps/console/src/lib/cluster-usage/per-node.test.ts
@@ -137,6 +137,15 @@ describe("derivePerNodeRows", () => {
     expect(rows[0].roles).toEqual([])
   })
 
+  it("filters out an empty role suffix (`node-role.kubernetes.io/=`)", () => {
+    const rows = derivePerNodeRows(
+      [nodeWith("a", { labels: { "node-role.kubernetes.io/": "" } })],
+      [],
+      undefined,
+    )
+    expect(rows[0].roles).toEqual([])
+  })
+
   it("reports schedulable=false when spec.unschedulable=true", () => {
     const rows = derivePerNodeRows(
       [nodeWith("a", { unschedulable: true })],
diff --git a/apps/console/src/lib/cluster-usage/per-node.ts b/apps/console/src/lib/cluster-usage/per-node.ts
index 89e80ef..d2b5b0b 100644
--- a/apps/console/src/lib/cluster-usage/per-node.ts
+++ b/apps/console/src/lib/cluster-usage/per-node.ts
@@ -1,5 +1,6 @@
 import { parseQuantity } from "../k8s-quantity.ts"
 import { formatAge } from "../status.ts"
+import { isExtendedResourceKey } from "./types.ts"
 import type {
   Node,
   NodeMetrics,
@@ -22,9 +23,13 @@ const STANDARD_KEYS = new Set<string>(STANDARD_RESOURCE_KEYS)
 function rolesFromLabels(labels: Record<string, string> | undefined): string[] {
   if (!labels) return []
   const roles = new Set<string>()
+  const PREFIX = "node-role.kubernetes.io/"
   for (const key of Object.keys(labels)) {
-    if (key.startsWith("node-role.kubernetes.io/")) {
-      roles.add(key.slice("node-role.kubernetes.io/".length))
+    if (key.startsWith(PREFIX)) {
+      const role = key.slice(PREFIX.length)
+      // Some clusters write `node-role.kubernetes.io/=...` with an empty
+      // role part; skip those to avoid an empty pill in the UI.
+      if (role.length > 0) roles.add(role)
     }
   }
   if (roles.size === 0) {
@@ -38,12 +43,6 @@ function emptyTotals(): ResourceTotals {
   return { capacity: 0, allocatable: 0, requested: 0 }
 }
 
-function isExtendedKey(key: string): boolean {
-  if (STANDARD_KEYS.has(key)) return false
-  if (key.startsWith("hugepages-")) return false
-  return true
-}
-
 /**
  * Builds one NodeRow per cluster node, sorted by name. Each row carries
  * the totals for that node only — capacity and allocatable from
@@ -90,7 +89,7 @@ export function derivePerNodeRows(
       standard[key].allocatable = parseQuantity(allocatable[key] ?? "0")
     }
     for (const key of Object.keys(capacity)) {
-      if (!isExtendedKey(key)) continue
+      if (!isExtendedResourceKey(key)) continue
       extended[key] = {
         capacity: parseQuantity(capacity[key] ?? "0"),
         allocatable: parseQuantity(allocatable[key] ?? "0"),
diff --git a/apps/console/src/lib/cluster-usage/types.ts b/apps/console/src/lib/cluster-usage/types.ts
index 2983037..2ae96b6 100644
--- a/apps/console/src/lib/cluster-usage/types.ts
+++ b/apps/console/src/lib/cluster-usage/types.ts
@@ -76,6 +76,19 @@ export const STANDARD_RESOURCE_KEYS = ["cpu", "memory", "ephemeral-storage", "po
 
 export type StandardResourceKey = (typeof STANDARD_RESOURCE_KEYS)[number]
 
+const STANDARD_RESOURCE_KEY_SET: ReadonlySet<string> = new Set(STANDARD_RESOURCE_KEYS)
+
+/**
+ * Whether a key from `node.status.capacity` should be treated as an
+ * extended resource. Standard scheduler resources and every hugepages-*
+ * variant return false; everything else returns true.
+ */
+export function isExtendedResourceKey(key: string): boolean {
+  if (STANDARD_RESOURCE_KEY_SET.has(key)) return false
+  if (key.startsWith("hugepages-")) return false
+  return true
+}
+
 /** A resource snapshot in canonical units — cores for CPU, bytes elsewhere. */
 export interface ResourceTotals {
   capacity: number
diff --git a/apps/console/src/routes/ClusterUsagePage.test.tsx b/apps/console/src/routes/ClusterUsagePage.test.tsx
index b840789..9225634 100644
--- a/apps/console/src/routes/ClusterUsagePage.test.tsx
+++ b/apps/console/src/routes/ClusterUsagePage.test.tsx
@@ -125,6 +125,43 @@ describe("ClusterUsagePage", () => {
     })
   })
 
+  it("renders a permission-denied block with a back link on 403", async () => {
+    const client = makeClient({ nodes: new K8sApiError(403, "forbidden") })
+    renderWithK8sProvider(<ClusterUsagePage />, { client })
+    expect(
+      await screen.findByText(/you do not have permission to view cluster nodes/i),
+    ).toBeInTheDocument()
+    const back = screen.getByRole("link", { name: /back to console/i })
+    expect(back.getAttribute("href")).toBe("/console")
+  })
+
+  it("propagates pods-unavailable to the aggregate panel and the table", async () => {
+    const client = makeClient({
+      nodes: nodesListFixture,
+      pods: new K8sApiError(403, "no pod read"),
+      groups: groupsWithoutMetrics,
+    })
+    renderWithK8sProvider(<ClusterUsagePage />, { client })
+    await screen.findAllByText(/allocatable/i)
+    const tooltipNodes = document.querySelectorAll(
+      '[title="Requires cluster-wide pod read access"]',
+    )
+    expect(tooltipNodes.length).toBeGreaterThan(0)
+  })
+
+  it("renders the node-summary line in the aggregates header", async () => {
+    const client = makeClient({
+      nodes: nodesListFixture,
+      pods: podsListFixture,
+      groups: groupsWithoutMetrics,
+    })
+    renderWithK8sProvider(<ClusterUsagePage />, { client })
+    await screen.findByText("3 nodes")
+    expect(
+      screen.getByText(/3 Ready · 0 NotReady · 0 SchedulingDisabled/),
+    ).toBeInTheDocument()
+  })
+
   it("omits the Used line everywhere when metrics-server is not registered", async () => {
     const client = makeClient({
       nodes: nodesListFixture,
diff --git a/apps/console/src/routes/ClusterUsagePage.tsx b/apps/console/src/routes/ClusterUsagePage.tsx
index 042de1f..b59f877 100644
--- a/apps/console/src/routes/ClusterUsagePage.tsx
+++ b/apps/console/src/routes/ClusterUsagePage.tsx
@@ -1,3 +1,4 @@
+import { Link } from "react-router"
 import { Section, Spinner } from "@cozystack/ui"
 import { useClusterUsageData } from "../hooks/useClusterUsageData.tsx"
 import { ClusterUsageAggregates } from "../components/cluster-usage/ClusterUsageAggregates.tsx"
@@ -9,14 +10,24 @@ import { ClusterUsageTable } from "../components/cluster-usage/ClusterUsageTable
  * Both panels read from the same useClusterUsageData composite hook,
  * so they always agree on totals.
  *
- * Tenant-scoped users never reach this page because the sidebar entry
- * is gated by a SelfSubjectAccessReview on `nodes list`. On direct URL
- * navigation an error block surfaces instead of a browser 403; the
- * fancier page-level permission gate is explicitly out of scope for
- * the first iteration.
+ * Tenant-scoped users never reach this page through normal navigation
+ * because the sidebar entry is gated by a SelfSubjectAccessReview on
+ * `nodes list`. On direct URL navigation a 403 message with a link
+ * back to the console is shown instead of a browser 403; richer
+ * page-level fallbacks (read-only view via cached metrics, etc.) are
+ * explicitly out of scope for the first iteration.
  */
 export function ClusterUsagePage() {
-  const { nodes, perNode, aggregates, isLoading, error } = useClusterUsageData()
+  const {
+    nodes,
+    perNode,
+    aggregates,
+    nodeSummary,
+    isLoading,
+    error,
+    errorStatus,
+    podsUnavailable,
+  } = useClusterUsageData()
   const extendedKeys = Object.keys(aggregates.extended).sort()
 
   return (
@@ -34,9 +45,22 @@ export function ClusterUsagePage() {
         </div>
       ) : error ? (
         <Section>
-          <div className="px-2 py-4 text-sm text-red-700">
-            Failed to load cluster nodes: {error.message}
-          </div>
+          {errorStatus === 403 ? (
+            <div className="px-2 py-4 text-sm text-slate-700">
+              You do not have permission to view cluster nodes.{" "}
+              <Link
+                to="/console"
+                className="text-blue-700 underline hover:text-blue-800"
+              >
+                Back to console
+              </Link>
+              .
+            </div>
+          ) : (
+            <div className="px-2 py-4 text-sm text-red-700">
+              Failed to load cluster nodes: {error.message}
+            </div>
+          )}
         </Section>
       ) : nodes.length === 0 ? (
         <Section>
@@ -44,10 +68,18 @@ export function ClusterUsagePage() {
         </Section>
       ) : (
         <>
-          <ClusterUsageAggregates aggregates={aggregates} />
+          <ClusterUsageAggregates
+            aggregates={aggregates}
+            nodeSummary={nodeSummary}
+            podsUnavailable={podsUnavailable}
+          />
           <div>
             <h2 className="mb-3 text-sm font-medium text-slate-700">Nodes</h2>
-            <ClusterUsageTable rows={perNode} extendedKeys={extendedKeys} />
+            <ClusterUsageTable
+              rows={perNode}
+              extendedKeys={extendedKeys}
+              podsUnavailable={podsUnavailable}
+            />
           </div>
         </>
       )}
diff --git a/apps/console/src/test-utils/mock-k8s-client.ts b/apps/console/src/test-utils/mock-k8s-client.ts
index d2b40ff..16a207f 100644
--- a/apps/console/src/test-utils/mock-k8s-client.ts
+++ b/apps/console/src/test-utils/mock-k8s-client.ts
@@ -1,5 +1,5 @@
 import { vi } from "vitest"
-import { K8sClient, K8sApiError, type K8sList, type WatchEvent } from "@cozystack/k8s-client"
+import { K8sClient, K8sApiError, type K8sList } from "@cozystack/k8s-client"
 
 interface ListOverride {
   apiGroup: string
@@ -24,20 +24,19 @@ export interface MockK8sClientOverrides {
 }
 
 /**
- * Build a K8sClient subclass whose network-facing methods (list/get/watch)
- * resolve from in-memory overrides instead of fetch. The resulting object
- * still satisfies the K8sClient interface — the compile-time check at the
- * bottom of this file ensures the production interface and the mock stay
- * in lockstep when the real K8sClient gains new methods.
+ * Build a K8sClient instance whose network-facing methods (list/get/watch)
+ * resolve from in-memory overrides instead of fetch. The underlying object
+ * is a real K8sClient so any method this factory does not stub — including
+ * ones added to the production class after this file was written — falls
+ * through to the real implementation; tests that touch new methods are
+ * expected to spy on them explicitly via vi.spyOn on the returned instance.
  *
- * Watch is stubbed to a noop returning a cleanup function; tests that need
- * watch event behaviour should override it via vi.spyOn on the returned
- * instance.
+ * Watch is stubbed to return a noop cleanup function.
  */
 export function createMockK8sClient(overrides: MockK8sClientOverrides = {}): K8sClient {
   const client = new K8sClient({ baseUrl: "/mock" })
 
-  const listSpy = vi.spyOn(client, "list").mockImplementation(
+  vi.spyOn(client, "list").mockImplementation(
     async (apiGroup, apiVersion, plural, namespace) => {
       const match = overrides.lists?.find(
         (o) =>
@@ -55,7 +54,7 @@ export function createMockK8sClient(overrides: MockK8sClientOverrides = {}): K8s
     },
   )
 
-  const getSpy = vi.spyOn(client, "get").mockImplementation(
+  vi.spyOn(client, "get").mockImplementation(
     async (apiGroup, apiVersion, plural, name, namespace) => {
       const match = overrides.gets?.find(
         (o) =>
@@ -73,21 +72,7 @@ export function createMockK8sClient(overrides: MockK8sClientOverrides = {}): K8s
     },
   )
 
-  vi.spyOn(client, "watch").mockImplementation(
-    (_apiGroup, _apiVersion, _plural, _ns, _rv, _onEvent: (e: WatchEvent<unknown>) => void) => {
-      return () => {}
-    },
-  )
-
-  void listSpy
-  void getSpy
+  vi.spyOn(client, "watch").mockReturnValue(() => {})
 
   return client
 }
-
-// Compile-time check: the production K8sClient class must remain
-// assignable to the type our mock factory promises. If K8sClient ever
-// adds a new public method, this line fails to typecheck and the mock
-// has to grow a corresponding stub.
-const _typeDriftCheck: K8sClient = createMockK8sClient()
-void _typeDriftCheck

From e229064c702a1f6207b24bc5c5714f2871f3f77f Mon Sep 17 00:00:00 2001
From: Aleksei Sviridkin <f@lex.la>
Date: Tue, 26 May 2026 18:37:30 +0300
Subject: [PATCH 13/20] chore(console): post-review hardening pass

Collapse the per-file STANDARD_KEYS Set into a single export from
types.ts (STANDARD_RESOURCE_KEY_SET) and have aggregate.ts and
per-node.ts use it directly. The previous local copies and the ad-hoc
'(STANDARD_RESOURCE_KEYS as readonly string[]).includes(...)' cast in
aggregate.ts both went away.

Spell out the scaling trade-off of the cluster-wide pods watch in the
JSDoc on useClusterUsageData so a future reader does not second-guess
the design choice or treat the cost as accidental: every pod stays hot
in memory because Requested totals need every pod regardless of
namespace; the field-selector projection / server-side aggregation
follow-up is named explicitly.

Assisted-By: Claude <noreply@anthropic.com>
Signed-off-by: Aleksei Sviridkin <f@lex.la>
---
 .../console/src/hooks/useClusterUsageData.tsx | 21 ++++++++++++++-----
 .../src/lib/cluster-usage/aggregate.ts        |  4 ++--
 .../console/src/lib/cluster-usage/per-node.ts | 11 +++++-----
 apps/console/src/lib/cluster-usage/types.ts   |  2 +-
 4 files changed, 25 insertions(+), 13 deletions(-)

diff --git a/apps/console/src/hooks/useClusterUsageData.tsx b/apps/console/src/hooks/useClusterUsageData.tsx
index da3a6fb..f5c1c00 100644
--- a/apps/console/src/hooks/useClusterUsageData.tsx
+++ b/apps/console/src/hooks/useClusterUsageData.tsx
@@ -51,16 +51,27 @@ interface ClusterUsageData {
 
 /**
  * Composite hook that powers the Cluster Usage admin page. Subscribes
- * to nodes and pods via K8s watches (low cost, push-based updates), and
- * — only when metrics.k8s.io is discovered on the cluster — polls
+ * to nodes and pods via K8s watches (push-based updates, no polling),
+ * and — only when metrics.k8s.io is discovered on the cluster — polls
  * NodeMetrics on a 30-second cadence. metrics.k8s.io is not watchable,
  * so a refetch interval is the only option; the rest of the page works
  * fine without it.
  *
- * A 403 on the metrics fetch is treated as 'no usage data, but no
+ * The pods watch is cluster-wide and unfiltered. On a multi-thousand-
+ * pod cluster that is a few megabytes of JSON kept hot in memory plus
+ * continuous patch events. The trade-off is accepted for now because
+ * (a) Requested totals need every pod regardless of namespace, and
+ * (b) the watch already exists for the rest of the console. If the
+ * cost ever becomes painful, the natural follow-up is a field-selector
+ * projection on spec.nodeName + containers[*].resources.requests, or
+ * a server-side aggregation endpoint.
+ *
+ * A 403 on the metrics fetch is treated as 'no usage data, no
  * page-level error' — the Used overlay disappears, the rest of the
- * panel still renders. Nodes-list or pods-list errors are surfaced as
- * the hook's error so the page can render an explicit failure state.
+ * panel still renders. A pods-list error is surfaced through the
+ * `podsUnavailable` flag so the page can degrade gracefully. A
+ * nodes-list error is the only kind that takes over the page; everything
+ * downstream of it is undefined.
  */
 export function useClusterUsageData(): ClusterUsageData {
   const nodesQuery = useK8sList<Node>({
diff --git a/apps/console/src/lib/cluster-usage/aggregate.ts b/apps/console/src/lib/cluster-usage/aggregate.ts
index aab366d..531c353 100644
--- a/apps/console/src/lib/cluster-usage/aggregate.ts
+++ b/apps/console/src/lib/cluster-usage/aggregate.ts
@@ -8,7 +8,7 @@ import type {
   ResourceTotals,
   StandardResourceKey,
 } from "./types.ts"
-import { STANDARD_RESOURCE_KEYS } from "./types.ts"
+import { STANDARD_RESOURCE_KEYS, STANDARD_RESOURCE_KEY_SET } from "./types.ts"
 
 function emptyTotals(): ResourceTotals {
   return { capacity: 0, allocatable: 0, requested: 0 }
@@ -63,7 +63,7 @@ export function aggregateNodeResources(
       const requests = container.resources?.requests
       if (!requests) continue
       for (const [key, value] of Object.entries(requests)) {
-        if ((STANDARD_RESOURCE_KEYS as readonly string[]).includes(key)) {
+        if (STANDARD_RESOURCE_KEY_SET.has(key)) {
           standard[key as StandardResourceKey].requested += parseQuantity(value)
         } else if (extended[key]) {
           extended[key].requested += parseQuantity(value)
diff --git a/apps/console/src/lib/cluster-usage/per-node.ts b/apps/console/src/lib/cluster-usage/per-node.ts
index d2b5b0b..921a7b5 100644
--- a/apps/console/src/lib/cluster-usage/per-node.ts
+++ b/apps/console/src/lib/cluster-usage/per-node.ts
@@ -1,6 +1,10 @@
 import { parseQuantity } from "../k8s-quantity.ts"
 import { formatAge } from "../status.ts"
-import { isExtendedResourceKey } from "./types.ts"
+import {
+  STANDARD_RESOURCE_KEYS,
+  STANDARD_RESOURCE_KEY_SET,
+  isExtendedResourceKey,
+} from "./types.ts"
 import type {
   Node,
   NodeMetrics,
@@ -9,7 +13,6 @@ import type {
   ResourceTotals,
   StandardResourceKey,
 } from "./types.ts"
-import { STANDARD_RESOURCE_KEYS } from "./types.ts"
 
 const PRESSURE_TYPES = new Set([
   "MemoryPressure",
@@ -18,8 +21,6 @@ const PRESSURE_TYPES = new Set([
   "NetworkUnavailable",
 ])
 
-const STANDARD_KEYS = new Set<string>(STANDARD_RESOURCE_KEYS)
-
 function rolesFromLabels(labels: Record<string, string> | undefined): string[] {
   if (!labels) return []
   const roles = new Set<string>()
@@ -102,7 +103,7 @@ export function derivePerNodeRows(
         const requests = container.resources?.requests
         if (!requests) continue
         for (const [key, value] of Object.entries(requests)) {
-          if (STANDARD_KEYS.has(key)) {
+          if (STANDARD_RESOURCE_KEY_SET.has(key)) {
             standard[key as StandardResourceKey].requested += parseQuantity(value)
           } else if (extended[key]) {
             extended[key].requested += parseQuantity(value)
diff --git a/apps/console/src/lib/cluster-usage/types.ts b/apps/console/src/lib/cluster-usage/types.ts
index 2ae96b6..5dd049c 100644
--- a/apps/console/src/lib/cluster-usage/types.ts
+++ b/apps/console/src/lib/cluster-usage/types.ts
@@ -76,7 +76,7 @@ export const STANDARD_RESOURCE_KEYS = ["cpu", "memory", "ephemeral-storage", "po
 
 export type StandardResourceKey = (typeof STANDARD_RESOURCE_KEYS)[number]
 
-const STANDARD_RESOURCE_KEY_SET: ReadonlySet<string> = new Set(STANDARD_RESOURCE_KEYS)
+export const STANDARD_RESOURCE_KEY_SET: ReadonlySet<string> = new Set(STANDARD_RESOURCE_KEYS)
 
 /**
  * Whether a key from `node.status.capacity` should be treated as an

From da6961ce76393e7f1315594d2e063dc7404593a8 Mon Sep 17 00:00:00 2001
From: Aleksei Sviridkin <f@lex.la>
Date: Wed, 27 May 2026 19:00:09 +0300
Subject: [PATCH 14/20] chore(console): tighten error narrowing and drop
 redundant slice
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Use 'instanceof K8sApiError' to read status from the nodes-list error
instead of the previous 'as unknown as { status?: unknown }' bridge —
the runtime check is type-safe and the cast is gone. K8sApiError is
the only error shape the client throws on HTTP failures, and the
exported class is already part of the @cozystack/k8s-client public
surface.

Drop the .slice() before .sort() in the per-node table: Array.filter()
already returns a fresh array, so .slice() was a no-op safeguard. The
chained form (.filter().sort()) is what the rest of the codebase
uses when the input is known to be filter-produced.

Assisted-By: Claude <noreply@anthropic.com>
Signed-off-by: Aleksei Sviridkin <f@lex.la>
---
 .../components/cluster-usage/ClusterUsageTable.tsx    | 11 +++++++----
 apps/console/src/hooks/useClusterUsageData.tsx        |  5 ++---
 2 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/apps/console/src/components/cluster-usage/ClusterUsageTable.tsx b/apps/console/src/components/cluster-usage/ClusterUsageTable.tsx
index bacca80..8416154 100644
--- a/apps/console/src/components/cluster-usage/ClusterUsageTable.tsx
+++ b/apps/console/src/components/cluster-usage/ClusterUsageTable.tsx
@@ -205,10 +205,13 @@ export function ClusterUsageTable({
     )
   }
 
-  const visibleRows = useMemo(() => {
-    const filtered = rows.filter((r) => matchesFilter(r, filter))
-    return filtered.slice().sort((a, b) => compareRows(a, b, sort))
-  }, [rows, sort, filter])
+  const visibleRows = useMemo(
+    () =>
+      rows
+        .filter((r) => matchesFilter(r, filter))
+        .sort((a, b) => compareRows(a, b, sort)),
+    [rows, sort, filter],
+  )
 
   return (
     <div className="space-y-3">
diff --git a/apps/console/src/hooks/useClusterUsageData.tsx b/apps/console/src/hooks/useClusterUsageData.tsx
index f5c1c00..921da5e 100644
--- a/apps/console/src/hooks/useClusterUsageData.tsx
+++ b/apps/console/src/hooks/useClusterUsageData.tsx
@@ -2,6 +2,7 @@ import { useMemo } from "react"
 import {
   useK8sList,
   useApiGroupAvailable,
+  K8sApiError,
   type K8sList,
 } from "@cozystack/k8s-client"
 import { aggregateNodeResources } from "../lib/cluster-usage/aggregate.ts"
@@ -133,9 +134,7 @@ export function useClusterUsageData(): ClusterUsageData {
   }, [perNode])
 
   const nodesError = (nodesQuery.error as Error | null) ?? null
-  const statusField =
-    nodesError != null ? (nodesError as unknown as { status?: unknown }).status : undefined
-  const errorStatus = typeof statusField === "number" ? statusField : null
+  const errorStatus = nodesError instanceof K8sApiError ? nodesError.status : null
 
   return {
     nodes,

From ccbda27b5a10df5c8d490860a14fa5b2c993dd72 Mon Sep 17 00:00:00 2001
From: Aleksei Sviridkin <f@lex.la>
Date: Wed, 27 May 2026 21:23:25 +0300
Subject: [PATCH 15/20] fix(console): guard parseQuantity against NaN from
 malformed quantities

A bare suffix such as "m" parsed to parseFloat("m")/1000 = NaN, which
then propagated into aggregated cluster totals and the percentages
derived from them. Parse the numeric part once and fall back to 0 when
it is not finite, for every suffix branch rather than just milli.

Address review feedback on apps/console/src/lib/k8s-quantity.ts.

Signed-off-by: Aleksei Sviridkin <f@lex.la>
---
 apps/console/src/lib/k8s-quantity.test.ts | 10 +++++----
 apps/console/src/lib/k8s-quantity.ts      | 26 +++++++++++++----------
 2 files changed, 21 insertions(+), 15 deletions(-)

diff --git a/apps/console/src/lib/k8s-quantity.test.ts b/apps/console/src/lib/k8s-quantity.test.ts
index ac8eda3..f598818 100644
--- a/apps/console/src/lib/k8s-quantity.test.ts
+++ b/apps/console/src/lib/k8s-quantity.test.ts
@@ -66,10 +66,12 @@ describe("parseQuantity", () => {
     expect(parseQuantity("abc")).toBe(0)
   })
 
-  it("returns NaN when only a suffix is supplied (pinned corner case)", () => {
-    // The current implementation parses "m" as parseFloat("m") / 1000 = NaN / 1000.
-    // Pinned to document the behaviour; callers should pass valid quantities.
-    expect(parseQuantity("m")).toBeNaN()
+  it("returns 0 for a bare suffix instead of poisoning totals with NaN", () => {
+    // A malformed quantity (just a suffix, no number) must not propagate NaN
+    // into the aggregated totals and UI percentages.
+    expect(parseQuantity("m")).toBe(0)
+    expect(parseQuantity("Gi")).toBe(0)
+    expect(parseQuantity("Ki")).toBe(0)
   })
 
   it("parses zero", () => {
diff --git a/apps/console/src/lib/k8s-quantity.ts b/apps/console/src/lib/k8s-quantity.ts
index 9339ff5..7a2132a 100644
--- a/apps/console/src/lib/k8s-quantity.ts
+++ b/apps/console/src/lib/k8s-quantity.ts
@@ -6,19 +6,23 @@
  */
 export function parseQuantity(s: string): number {
   if (!s) return 0
-  if (s.endsWith("m")) return parseFloat(s) / 1000
+  // A malformed quantity (e.g. a bare suffix like "m") parses to NaN, which
+  // would poison every total and percentage it feeds into. Treat it as 0.
+  const n = parseFloat(s)
+  if (!Number.isFinite(n)) return 0
+  if (s.endsWith("m")) return n / 1000
   // Binary SI suffixes (powers of 1024)
-  if (s.endsWith("Ki")) return parseFloat(s) * 1024
-  if (s.endsWith("Mi")) return parseFloat(s) * 1024 ** 2
-  if (s.endsWith("Gi")) return parseFloat(s) * 1024 ** 3
-  if (s.endsWith("Ti")) return parseFloat(s) * 1024 ** 4
-  if (s.endsWith("Pi")) return parseFloat(s) * 1024 ** 5
-  if (s.endsWith("Ei")) return parseFloat(s) * 1024 ** 6
+  if (s.endsWith("Ki")) return n * 1024
+  if (s.endsWith("Mi")) return n * 1024 ** 2
+  if (s.endsWith("Gi")) return n * 1024 ** 3
+  if (s.endsWith("Ti")) return n * 1024 ** 4
+  if (s.endsWith("Pi")) return n * 1024 ** 5
+  if (s.endsWith("Ei")) return n * 1024 ** 6
   // Decimal SI suffixes (powers of 1000) — Kubernetes uses lowercase k
-  if (s.endsWith("k")) return parseFloat(s) * 1000
-  if (s.endsWith("M")) return parseFloat(s) * 1000 ** 2
-  if (s.endsWith("G")) return parseFloat(s) * 1000 ** 3
-  return parseFloat(s) || 0
+  if (s.endsWith("k")) return n * 1000
+  if (s.endsWith("M")) return n * 1000 ** 2
+  if (s.endsWith("G")) return n * 1000 ** 3
+  return n
 }
 
 export function humanizeBytes(bytes: number): string {

From 6a810527fe4a4e48ebcc79a8a9496b8c0c75cdc1 Mon Sep 17 00:00:00 2001
From: Aleksei Sviridkin <f@lex.la>
Date: Wed, 27 May 2026 21:24:25 +0300
Subject: [PATCH 16/20] fix(console): exclude terminal pods from requested
 aggregation

Succeeded/Failed pods linger in API list responses but no longer hold
schedulable requests. Counting their container requests inflated the
cluster requested totals; skip pods whose status.phase is Succeeded or
Failed.

Address review feedback on apps/console/src/lib/cluster-usage/aggregate.ts.

Signed-off-by: Aleksei Sviridkin <f@lex.la>
---
 .../src/lib/cluster-usage/aggregate.test.ts    | 18 ++++++++++++++++++
 .../console/src/lib/cluster-usage/aggregate.ts |  4 ++++
 2 files changed, 22 insertions(+)

diff --git a/apps/console/src/lib/cluster-usage/aggregate.test.ts b/apps/console/src/lib/cluster-usage/aggregate.test.ts
index ccbf203..713616c 100644
--- a/apps/console/src/lib/cluster-usage/aggregate.test.ts
+++ b/apps/console/src/lib/cluster-usage/aggregate.test.ts
@@ -98,6 +98,24 @@ describe("aggregateNodeResources", () => {
     expect(a.standard.cpu.requested).toBe(0)
   })
 
+  it("excludes terminal (Succeeded/Failed) pods from requested totals", () => {
+    const terminal = (name: string, phase: string): Pod => ({
+      ...pod(name, "a", { cpu: "1", memory: "2Gi" }),
+      status: { phase },
+    })
+    const a = aggregateNodeResources(
+      [node("a", { cpu: "8", memory: "16Gi" })],
+      [
+        pod("running", "a", { cpu: "500m", memory: "1Gi" }),
+        terminal("completed", "Succeeded"),
+        terminal("crashed", "Failed"),
+      ],
+      undefined,
+    )
+    expect(a.standard.cpu.requested).toBe(0.5)
+    expect(a.standard.memory.requested).toBe(1024 ** 3)
+  })
+
   it("sums extended-resource requests under the extended bucket", () => {
     const a = aggregateNodeResources(
       [node("a", { cpu: "8", "nvidia.com/gpu": "2" })],
diff --git a/apps/console/src/lib/cluster-usage/aggregate.ts b/apps/console/src/lib/cluster-usage/aggregate.ts
index 531c353..fa5c4b5 100644
--- a/apps/console/src/lib/cluster-usage/aggregate.ts
+++ b/apps/console/src/lib/cluster-usage/aggregate.ts
@@ -59,6 +59,10 @@ export function aggregateNodeResources(
   for (const pod of pods) {
     const nodeName = pod.spec?.nodeName
     if (!nodeName || !knownNodes.has(nodeName)) continue
+    // Terminal pods still appear in API lists but no longer hold schedulable
+    // requests; counting them would inflate the requested totals.
+    const phase = pod.status?.phase
+    if (phase === "Succeeded" || phase === "Failed") continue
     for (const container of pod.spec?.containers ?? []) {
       const requests = container.resources?.requests
       if (!requests) continue

From 4c1e11b2eb5e60bc7d181c81e2027f2d93e3e130 Mon Sep 17 00:00:00 2001
From: Aleksei Sviridkin <f@lex.la>
Date: Wed, 27 May 2026 21:25:19 +0300
Subject: [PATCH 17/20] fix(console): collapse extended-resource cells for
 NotReady nodes

CPU and memory cells already render an em dash when a node is NotReady,
but the extended-resource cells only checked podsUnavailable and kept
showing capacity-derived numbers. Pass row.ready into extendedCell so
NotReady nodes render an em dash for extended resources too.

Address review feedback on apps/console/src/components/cluster-usage/ClusterUsageTable.tsx.

Signed-off-by: Aleksei Sviridkin <f@lex.la>
---
 .../cluster-usage/ClusterUsageTable.test.tsx  | 19 +++++++++++++++++++
 .../cluster-usage/ClusterUsageTable.tsx       | 10 +++++++---
 2 files changed, 26 insertions(+), 3 deletions(-)

diff --git a/apps/console/src/components/cluster-usage/ClusterUsageTable.test.tsx b/apps/console/src/components/cluster-usage/ClusterUsageTable.test.tsx
index 712fdd2..0a467fe 100644
--- a/apps/console/src/components/cluster-usage/ClusterUsageTable.test.tsx
+++ b/apps/console/src/components/cluster-usage/ClusterUsageTable.test.tsx
@@ -110,6 +110,25 @@ describe("ClusterUsageTable", () => {
     expect(within(tr).getAllByText("—").length).toBeGreaterThan(0)
   })
 
+  it("collapses extended-resource cells to em dash for a NotReady node", () => {
+    const gpu = { "nvidia.com/gpu": { capacity: 2, allocatable: 2, requested: 1 } }
+    render(
+      <ClusterUsageTable
+        rows={[
+          row("ready-gpu", { ready: true, extended: gpu }),
+          row("down-gpu", { ready: false, extended: gpu }),
+        ]}
+        extendedKeys={["nvidia.com/gpu"]}
+      />,
+    )
+    const readyRow = screen.getByText("ready-gpu").closest("tr")!
+    const downRow = screen.getByText("down-gpu").closest("tr")!
+    // The Ready node surfaces its capacity-derived numbers...
+    expect(within(readyRow).getByText("capacity 2")).toBeInTheDocument()
+    // ...while the NotReady node must not render capacity for the extended cell.
+    expect(within(downRow).queryByText("capacity 2")).not.toBeInTheDocument()
+  })
+
   it("renders the age column verbatim from row.age", () => {
     render(
       <ClusterUsageTable
diff --git a/apps/console/src/components/cluster-usage/ClusterUsageTable.tsx b/apps/console/src/components/cluster-usage/ClusterUsageTable.tsx
index 8416154..71a8370 100644
--- a/apps/console/src/components/cluster-usage/ClusterUsageTable.tsx
+++ b/apps/console/src/components/cluster-usage/ClusterUsageTable.tsx
@@ -88,8 +88,12 @@ function memoryCell(totals: ResourceTotals, ready: boolean, podsUnavailable: boo
   )
 }
 
-function extendedCell(totals: ResourceTotals | undefined, podsUnavailable: boolean) {
-  if (!totals) return <span className="text-slate-400">—</span>
+function extendedCell(
+  totals: ResourceTotals | undefined,
+  ready: boolean,
+  podsUnavailable: boolean,
+) {
+  if (!ready || !totals) return <span className="text-slate-400">—</span>
   return (
     <div className="space-y-0.5 text-xs tabular-nums text-slate-700">
       <div>
@@ -307,7 +311,7 @@ export function ClusterUsageTable({
                 </td>
                 {extendedKeys.map((k) => (
                   <td key={k} className="px-4 py-3 align-top">
-                    {extendedCell(r.extended[k], podsUnavailable)}
+                    {extendedCell(r.extended[k], r.ready, podsUnavailable)}
                   </td>
                 ))}
                 <td className="px-4 py-3 tabular-nums text-xs text-slate-500">{r.age}</td>

From f90314b5cc7b75f96063de50e32cc46c291b7046 Mon Sep 17 00:00:00 2001
From: Aleksei Sviridkin <f@lex.la>
Date: Wed, 27 May 2026 21:26:33 +0300
Subject: [PATCH 18/20] test(console): assert actual ordering in cluster-usage
 card/column tests

The standard-card test used arrayContaining and the extended-column
test used toContain, so neither actually verified the rendered order.
Pin the exact card order (CPU, Memory, Storage, Pods) and assert the
extended columns follow extendedKeys order via their header indices.

Address review feedback on the cluster-usage component tests.

Signed-off-by: Aleksei Sviridkin <f@lex.la>
---
 .../cluster-usage/ClusterUsageAggregates.test.tsx         | 7 +++----
 .../components/cluster-usage/ClusterUsageTable.test.tsx   | 8 ++++++--
 2 files changed, 9 insertions(+), 6 deletions(-)

diff --git a/apps/console/src/components/cluster-usage/ClusterUsageAggregates.test.tsx b/apps/console/src/components/cluster-usage/ClusterUsageAggregates.test.tsx
index 737dd73..b4d4448 100644
--- a/apps/console/src/components/cluster-usage/ClusterUsageAggregates.test.tsx
+++ b/apps/console/src/components/cluster-usage/ClusterUsageAggregates.test.tsx
@@ -46,11 +46,10 @@ describe("ClusterUsageAggregates", () => {
 
   it("renders the four standard cards in order CPU, Memory, Storage, Pods", () => {
     render(<ClusterUsageAggregates aggregates={empty()} nodeSummary={summary()} />)
-    const headings = screen.getAllByText(/CPU|Memory|Storage|Pods/i)
+    const headings = screen.getAllByText(/^(CPU|Memory|Storage|Pods)$/)
     const labels = headings.map((h) => h.textContent)
-    expect(labels).toEqual(
-      expect.arrayContaining(["CPU", "Memory", "Storage", "Pods"]),
-    )
+    // Exact array (not arrayContaining) so the card order is actually pinned.
+    expect(labels).toEqual(["CPU", "Memory", "Storage", "Pods"])
   })
 
   it("does not render the extended-resources section when none are present", () => {
diff --git a/apps/console/src/components/cluster-usage/ClusterUsageTable.test.tsx b/apps/console/src/components/cluster-usage/ClusterUsageTable.test.tsx
index 0a467fe..51fc0be 100644
--- a/apps/console/src/components/cluster-usage/ClusterUsageTable.test.tsx
+++ b/apps/console/src/components/cluster-usage/ClusterUsageTable.test.tsx
@@ -95,8 +95,12 @@ describe("ClusterUsageTable", () => {
       />,
     )
     const headers = screen.getAllByRole("columnheader").map((h) => h.textContent)
-    expect(headers).toContain("nvidia.com/gpu")
-    expect(headers).toContain("amd.com/gpu")
+    const nvidiaAt = headers.indexOf("nvidia.com/gpu")
+    const amdAt = headers.indexOf("amd.com/gpu")
+    expect(nvidiaAt).toBeGreaterThanOrEqual(0)
+    expect(amdAt).toBeGreaterThanOrEqual(0)
+    // Columns must follow extendedKeys order: nvidia before amd.
+    expect(nvidiaAt).toBeLessThan(amdAt)
   })
 
   it("renders em dash in extended-resource cell when the node does not expose it", () => {

From b06c208f69c4e8c7e2d26bedcf21f139aa432207 Mon Sep 17 00:00:00 2001
From: Aleksei Sviridkin <f@lex.la>
Date: Wed, 27 May 2026 21:28:12 +0300
Subject: [PATCH 19/20] test(console): replace setTimeout flushes with waitFor
 in sidebar gate tests

The deny and error cases flushed with an arbitrary setTimeout before
asserting the gated entry was absent, which is timing-dependent and can
pass while the SSAR query is still in flight. Wait on the SSAR request
having fired plus the entry being absent so the assertion is settled and
meaningful.

Address review feedback on apps/console/src/routes/sidebar-sections.test.tsx.

Signed-off-by: Aleksei Sviridkin <f@lex.la>
---
 .../src/routes/sidebar-sections.test.tsx      | 19 +++++++++++--------
 1 file changed, 11 insertions(+), 8 deletions(-)

diff --git a/apps/console/src/routes/sidebar-sections.test.tsx b/apps/console/src/routes/sidebar-sections.test.tsx
index 1d718f9..783d3e2 100644
--- a/apps/console/src/routes/sidebar-sections.test.tsx
+++ b/apps/console/src/routes/sidebar-sections.test.tsx
@@ -85,14 +85,13 @@ describe("useConsoleSidebarSections — Cluster Usage gate", () => {
     const { result } = renderHook(() => useConsoleSidebarSections(), {
       wrapper: makeWrapper(client),
     })
-    // Wait for the SSAR query to settle so the absence is real.
+    // Wait until the SSAR request has actually fired (so the absence is the
+    // result of a deny, not of the query still being in flight) and the
+    // gated entry is not present.
     await waitFor(() => {
-      const adminSection = result.current.find((s) => s.title === "Administration")
-      expect(adminSection).toBeDefined()
+      expect(client.create).toHaveBeenCalled()
+      expect(findItem(result.current, "Cluster Usage")).toBeUndefined()
     })
-    // Need an explicit settle window for the SSAR query.
-    await new Promise((r) => setTimeout(r, 0))
-    expect(findItem(result.current, "Cluster Usage")).toBeUndefined()
   })
 
   it("hides the Cluster Usage entry while SSAR is still loading (no flicker)", () => {
@@ -108,7 +107,11 @@ describe("useConsoleSidebarSections — Cluster Usage gate", () => {
     const { result } = renderHook(() => useConsoleSidebarSections(), {
       wrapper: makeWrapper(client),
     })
-    await new Promise((r) => setTimeout(r, 10))
-    expect(findItem(result.current, "Cluster Usage")).toBeUndefined()
+    // Wait until the failing SSAR request has fired and settled; the gated
+    // entry must stay absent rather than relying on an arbitrary delay.
+    await waitFor(() => {
+      expect(client.create).toHaveBeenCalled()
+      expect(findItem(result.current, "Cluster Usage")).toBeUndefined()
+    })
   })
 })

From a4dc3327b15c0f80ea3873345fd3124af9dfa2f2 Mon Sep 17 00:00:00 2001
From: Aleksei Sviridkin <f@lex.la>
Date: Wed, 27 May 2026 21:32:34 +0300
Subject: [PATCH 20/20] test(console): use getAllByTitle instead of direct DOM
 queries

Replace document.querySelectorAll lookups for the pods-unavailable
tooltip with screen.getAllByTitle so the assertions go through Testing
Library's accessible queries and are less brittle to DOM structure.

Address review feedback on the cluster-usage page and aggregates tests.

Signed-off-by: Aleksei Sviridkin <f@lex.la>
---
 .../cluster-usage/ClusterUsageAggregates.test.tsx          | 7 +++----
 apps/console/src/routes/ClusterUsagePage.test.tsx          | 7 +++----
 2 files changed, 6 insertions(+), 8 deletions(-)

diff --git a/apps/console/src/components/cluster-usage/ClusterUsageAggregates.test.tsx b/apps/console/src/components/cluster-usage/ClusterUsageAggregates.test.tsx
index b4d4448..17159ed 100644
--- a/apps/console/src/components/cluster-usage/ClusterUsageAggregates.test.tsx
+++ b/apps/console/src/components/cluster-usage/ClusterUsageAggregates.test.tsx
@@ -109,9 +109,8 @@ describe("ClusterUsageAggregates", () => {
     )
     // The numeric Requested value should not be visible; em dashes appear
     // and at least one element has the explanatory tooltip on title.
-    const tooltipNodes = document.querySelectorAll(
-      '[title="Requires cluster-wide pod read access"]',
-    )
-    expect(tooltipNodes.length).toBeGreaterThan(0)
+    expect(
+      screen.getAllByTitle("Requires cluster-wide pod read access").length,
+    ).toBeGreaterThan(0)
   })
 })
diff --git a/apps/console/src/routes/ClusterUsagePage.test.tsx b/apps/console/src/routes/ClusterUsagePage.test.tsx
index 9225634..3275004 100644
--- a/apps/console/src/routes/ClusterUsagePage.test.tsx
+++ b/apps/console/src/routes/ClusterUsagePage.test.tsx
@@ -143,10 +143,9 @@ describe("ClusterUsagePage", () => {
     })
     renderWithK8sProvider(<ClusterUsagePage />, { client })
     await screen.findAllByText(/allocatable/i)
-    const tooltipNodes = document.querySelectorAll(
-      '[title="Requires cluster-wide pod read access"]',
-    )
-    expect(tooltipNodes.length).toBeGreaterThan(0)
+    expect(
+      screen.getAllByTitle("Requires cluster-wide pod read access").length,
+    ).toBeGreaterThan(0)
   })
 
   it("renders the node-summary line in the aggregates header", async () => {