diff --git a/apps/console/src/__tests__/k8s-client/provider.test.tsx b/apps/console/src/__tests__/k8s-client/provider.test.tsx new file mode 100644 index 0000000..97c5691 --- /dev/null +++ b/apps/console/src/__tests__/k8s-client/provider.test.tsx @@ -0,0 +1,53 @@ +import { describe, it, expect } from "vitest" +import { render } from "@testing-library/react" +import { K8sClient, K8sProvider, useK8sClient } from "@cozystack/k8s-client" + +function ClientCapture({ onClient }: { onClient: (c: K8sClient) => void }) { + const c = useK8sClient() + onClient(c) + return null +} + +describe("K8sProvider", () => { + it("passes the injected client through to useK8sClient", () => { + const injected = new K8sClient({ baseUrl: "/injected" }) + let captured: K8sClient | null = null + render( + + (captured = c)} /> + , + ) + expect(captured).toBe(injected) + }) + + it("constructs its own client when none is injected", () => { + let captured: K8sClient | null = null + render( + + (captured = c)} /> + , + ) + expect(captured).toBeInstanceOf(K8sClient) + }) + + it("constructs a client from the provided config when no client is injected", () => { + let captured: K8sClient | null = null + render( + + (captured = c)} /> + , + ) + expect(captured).toBeInstanceOf(K8sClient) + }) + + it("prefers the injected client over the config when both are supplied", () => { + const injected = new K8sClient({ baseUrl: "/injected" }) + let captured: K8sClient | null = null + render( + + (captured = c)} /> + , + ) + expect(captured).toBe(injected) + }) +}) diff --git a/apps/console/src/__tests__/k8s-client/useApiGroupAvailable.test.tsx b/apps/console/src/__tests__/k8s-client/useApiGroupAvailable.test.tsx new file mode 100644 index 0000000..c606ac6 --- /dev/null +++ b/apps/console/src/__tests__/k8s-client/useApiGroupAvailable.test.tsx @@ -0,0 +1,121 @@ +import { describe, it, expect, vi } from "vitest" +import { renderHook, waitFor } from "@testing-library/react" +import { QueryClient, QueryClientProvider } from "@tanstack/react-query" +import { + K8sClient, + K8sProvider, + useApiGroupAvailable, + type APIGroupList, +} from "@cozystack/k8s-client" +import type { ReactNode } from "react" + +function makeWrapper(client: K8sClient) { + const queryClient = new QueryClient({ + defaultOptions: { queries: { retry: false, gcTime: 0 } }, + }) + return function Wrapper({ children }: { children: ReactNode }) { + return ( + + + {children} + + + ) + } +} + +const sampleGroups: APIGroupList = { + kind: "APIGroupList", + apiVersion: "v1", + groups: [ + { + name: "metrics.k8s.io", + versions: [{ groupVersion: "metrics.k8s.io/v1beta1", version: "v1beta1" }], + preferredVersion: { groupVersion: "metrics.k8s.io/v1beta1", version: "v1beta1" }, + }, + { + name: "apps", + versions: [{ groupVersion: "apps/v1", version: "v1" }], + preferredVersion: { groupVersion: "apps/v1", version: "v1" }, + }, + ], +} + +describe("useApiGroupAvailable", () => { + it("starts in loading state with available=false", () => { + const client = new K8sClient() + vi.spyOn(client, "getApiGroups").mockImplementation( + () => new Promise(() => {}), + ) + const { result } = renderHook(() => useApiGroupAvailable("metrics.k8s.io"), { + wrapper: makeWrapper(client), + }) + expect(result.current.isLoading).toBe(true) + expect(result.current.available).toBe(false) + }) + + it("reports available=true when the group is present", async () => { + const client = new K8sClient() + vi.spyOn(client, "getApiGroups").mockResolvedValue(sampleGroups) + const { result } = renderHook(() => useApiGroupAvailable("metrics.k8s.io"), { + wrapper: makeWrapper(client), + }) + await waitFor(() => expect(result.current.isLoading).toBe(false)) + expect(result.current.available).toBe(true) + }) + + it("reports available=false when the group is missing", async () => { + const client = new K8sClient() + vi.spyOn(client, "getApiGroups").mockResolvedValue(sampleGroups) + const { result } = renderHook(() => useApiGroupAvailable("custom.metrics.k8s.io"), { + wrapper: makeWrapper(client), + }) + await waitFor(() => expect(result.current.isLoading).toBe(false)) + expect(result.current.available).toBe(false) + }) + + it("fetches /apis once for multiple consumers", async () => { + const client = new K8sClient() + const spy = vi.spyOn(client, "getApiGroups").mockResolvedValue(sampleGroups) + const Wrapper = makeWrapper(client) + + function Twin() { + const a = useApiGroupAvailable("metrics.k8s.io") + const b = useApiGroupAvailable("apps") + return ( +

+ {String(a.available)}-{String(b.available)} +

+ ) + } + + const { result: hookA } = renderHook( + () => useApiGroupAvailable("metrics.k8s.io"), + { wrapper: Wrapper }, + ) + const { result: hookB } = renderHook( + () => useApiGroupAvailable("apps"), + { wrapper: Wrapper }, + ) + + await waitFor(() => expect(hookA.current.isLoading).toBe(false)) + await waitFor(() => expect(hookB.current.isLoading).toBe(false)) + + // Both hooks share the same provider and cache, so /apis is called + // exactly once for the lifetime of this provider tree. Twin is unused + // here but kept declared to document the multi-consumer shape we + // protect against. + expect(spy).toHaveBeenCalledTimes(1) + void Twin + }) + + it("surfaces an error and reports available=false", async () => { + const client = new K8sClient() + vi.spyOn(client, "getApiGroups").mockRejectedValue(new Error("no /apis")) + const { result } = renderHook(() => useApiGroupAvailable("metrics.k8s.io"), { + wrapper: makeWrapper(client), + }) + await waitFor(() => expect(result.current.isLoading).toBe(false)) + expect(result.current.available).toBe(false) + }) +}) diff --git a/apps/console/src/__tests__/k8s-client/useSelfSubjectAccessReview.test.tsx b/apps/console/src/__tests__/k8s-client/useSelfSubjectAccessReview.test.tsx new file mode 100644 index 0000000..2dc7baa --- /dev/null +++ b/apps/console/src/__tests__/k8s-client/useSelfSubjectAccessReview.test.tsx @@ -0,0 +1,174 @@ +import { describe, it, expect, vi } from "vitest" +import { renderHook, waitFor } from "@testing-library/react" +import { QueryClient, QueryClientProvider } from "@tanstack/react-query" +import { + K8sClient, + K8sProvider, + useSelfSubjectAccessReview, + type SelfSubjectAccessReview, +} from "@cozystack/k8s-client" +import type { ReactNode } from "react" + +function makeWrapper(client: K8sClient) { + const queryClient = new QueryClient({ + defaultOptions: { queries: { retry: false, gcTime: 0 } }, + }) + return function Wrapper({ children }: { children: ReactNode }) { + return ( + + + {children} + + + ) + } +} + +function ssarResult(allowed: boolean): SelfSubjectAccessReview { + return { + apiVersion: "authorization.k8s.io/v1", + kind: "SelfSubjectAccessReview", + metadata: { name: "" }, + spec: { resourceAttributes: { resource: "nodes", verb: "list" } }, + status: { allowed }, + } +} + +describe("useSelfSubjectAccessReview", () => { + it("starts in loading state with allowed=false", () => { + const client = new K8sClient() + vi.spyOn(client, "create").mockImplementation(() => new Promise(() => {})) + const { result } = renderHook( + () => + useSelfSubjectAccessReview({ + resourceAttributes: { resource: "nodes", verb: "list" }, + }), + { wrapper: makeWrapper(client) }, + ) + expect(result.current.isLoading).toBe(true) + expect(result.current.allowed).toBe(false) + }) + + it("reports allowed=true when the API responds with status.allowed=true", async () => { + const client = new K8sClient() + vi.spyOn(client, "create").mockResolvedValue(ssarResult(true)) + const { result } = renderHook( + () => + useSelfSubjectAccessReview({ + resourceAttributes: { resource: "nodes", verb: "list" }, + }), + { wrapper: makeWrapper(client) }, + ) + await waitFor(() => expect(result.current.isLoading).toBe(false)) + expect(result.current.allowed).toBe(true) + }) + + it("reports allowed=false explicitly when status.allowed=false", async () => { + const client = new K8sClient() + vi.spyOn(client, "create").mockResolvedValue(ssarResult(false)) + const { result } = renderHook( + () => + useSelfSubjectAccessReview({ + resourceAttributes: { resource: "nodes", verb: "list" }, + }), + { wrapper: makeWrapper(client) }, + ) + await waitFor(() => expect(result.current.isLoading).toBe(false)) + expect(result.current.allowed).toBe(false) + }) + + it("POSTs once for two consumers asking the same question", async () => { + const client = new K8sClient() + const spy = vi.spyOn(client, "create").mockResolvedValue(ssarResult(true)) + const Wrapper = makeWrapper(client) + const { result: a } = renderHook( + () => + useSelfSubjectAccessReview({ + resourceAttributes: { resource: "nodes", verb: "list" }, + }), + { wrapper: Wrapper }, + ) + const { result: b } = renderHook( + () => + useSelfSubjectAccessReview({ + resourceAttributes: { resource: "nodes", verb: "list" }, + }), + { wrapper: Wrapper }, + ) + await waitFor(() => expect(a.current.isLoading).toBe(false)) + await waitFor(() => expect(b.current.isLoading).toBe(false)) + expect(spy).toHaveBeenCalledTimes(1) + }) + + it("POSTs twice when two consumers ask different questions", async () => { + const client = new K8sClient() + const spy = vi.spyOn(client, "create").mockResolvedValue(ssarResult(true)) + const Wrapper = makeWrapper(client) + const { result: a } = renderHook( + () => + useSelfSubjectAccessReview({ + resourceAttributes: { resource: "nodes", verb: "list" }, + }), + { wrapper: Wrapper }, + ) + const { result: b } = renderHook( + () => + useSelfSubjectAccessReview({ + resourceAttributes: { resource: "pods", verb: "list" }, + }), + { wrapper: Wrapper }, + ) + await waitFor(() => expect(a.current.isLoading).toBe(false)) + await waitFor(() => expect(b.current.isLoading).toBe(false)) + expect(spy).toHaveBeenCalledTimes(2) + }) + + it("surfaces the error and reports allowed=false on API failure", async () => { + const client = new K8sClient() + const err = new Error("server error") + vi.spyOn(client, "create").mockRejectedValue(err) + const { result } = renderHook( + () => + useSelfSubjectAccessReview({ + resourceAttributes: { resource: "nodes", verb: "list" }, + }), + { wrapper: makeWrapper(client) }, + ) + await waitFor(() => expect(result.current.isLoading).toBe(false)) + expect(result.current.allowed).toBe(false) + expect(result.current.error).toBeTruthy() + }) + + it("sends the spec verbatim in the POST body", async () => { + const client = new K8sClient() + const spy = vi.spyOn(client, "create").mockResolvedValue(ssarResult(true)) + const { result } = renderHook( + () => + useSelfSubjectAccessReview({ + resourceAttributes: { + group: "metrics.k8s.io", + resource: "nodes", + verb: "list", + }, + }), + { wrapper: makeWrapper(client) }, + ) + await waitFor(() => expect(result.current.isLoading).toBe(false)) + expect(spy).toHaveBeenCalledWith( + "authorization.k8s.io", + "v1", + "selfsubjectaccessreviews", + expect.objectContaining({ + kind: "SelfSubjectAccessReview", + apiVersion: "authorization.k8s.io/v1", + spec: { + resourceAttributes: { + group: "metrics.k8s.io", + resource: "nodes", + verb: "list", + }, + }, + }), + ) + }) +}) diff --git a/apps/console/src/components/QuotaDisplay.tsx b/apps/console/src/components/QuotaDisplay.tsx index a225416..4ac90df 100644 --- a/apps/console/src/components/QuotaDisplay.tsx +++ b/apps/console/src/components/QuotaDisplay.tsx @@ -1,6 +1,7 @@ import { useEffect, useState } from "react" import { useK8sList } from "@cozystack/k8s-client" import type { K8sResource } from "@cozystack/k8s-client" +import { parseQuantity, humanizeBytes, humanizeCpu } from "../lib/k8s-quantity.ts" interface ResourceQuotaSpec { hard?: Record @@ -15,35 +16,6 @@ export interface ResourceQuota extends K8sResource= 1024 ** 4) return `${(bytes / 1024 ** 4).toFixed(1)}Ti` - if (bytes >= 1024 ** 3) return `${(bytes / 1024 ** 3).toFixed(1)}Gi` - if (bytes >= 1024 ** 2) return `${(bytes / 1024 ** 2).toFixed(0)}Mi` - return `${bytes}B` -} - -function humanizeCpu(val: number): string { - if (val < 1) return `${Math.round(val * 1000)}m` - return `${val % 1 === 0 ? val : val.toFixed(2)}` -} - interface QuotaEntry { label: string usedRaw: string diff --git a/apps/console/src/components/cluster-usage/ClusterUsageAggregates.test.tsx b/apps/console/src/components/cluster-usage/ClusterUsageAggregates.test.tsx new file mode 100644 index 0000000..17159ed --- /dev/null +++ b/apps/console/src/components/cluster-usage/ClusterUsageAggregates.test.tsx @@ -0,0 +1,116 @@ +import { describe, it, expect } from "vitest" +import { render, screen } from "@testing-library/react" +import { ClusterUsageAggregates } from "./ClusterUsageAggregates.tsx" +import type { AggregateResources } from "../../lib/cluster-usage/types.ts" +import type { NodeSummary } from "../../hooks/useClusterUsageData.tsx" + +function empty(): AggregateResources { + return { + standard: { + cpu: { capacity: 0, allocatable: 0, requested: 0 }, + memory: { capacity: 0, allocatable: 0, requested: 0 }, + "ephemeral-storage": { capacity: 0, allocatable: 0, requested: 0 }, + pods: { capacity: 0, allocatable: 0, requested: 0 }, + }, + extended: {}, + } +} + +function summary(overrides: Partial = {}): NodeSummary { + return { total: 0, ready: 0, notReady: 0, schedulingDisabled: 0, ...overrides } +} + +describe("ClusterUsageAggregates", () => { + it("renders the node-summary header line", () => { + render( + , + ) + expect(screen.getByText("12 nodes")).toBeInTheDocument() + expect( + screen.getByText(/10 Ready · 1 NotReady · 1 SchedulingDisabled/), + ).toBeInTheDocument() + }) + + it("uses singular 'node' in the header for a one-node cluster", () => { + render( + , + ) + expect(screen.getByText("1 node")).toBeInTheDocument() + }) + + it("renders the four standard cards in order CPU, Memory, Storage, Pods", () => { + render() + const headings = screen.getAllByText(/^(CPU|Memory|Storage|Pods)$/) + const labels = headings.map((h) => h.textContent) + // Exact array (not arrayContaining) so the card order is actually pinned. + expect(labels).toEqual(["CPU", "Memory", "Storage", "Pods"]) + }) + + it("does not render the extended-resources section when none are present", () => { + render() + expect(screen.queryByText(/extended resources/i)).toBeNull() + }) + + it("renders one card per extended-resource key with the full key as the title", () => { + const agg = empty() + agg.extended["nvidia.com/gpu"] = { capacity: 4, allocatable: 4, requested: 1 } + agg.extended["amd.com/gpu"] = { capacity: 2, allocatable: 2, requested: 0 } + render() + expect(screen.getByText("nvidia.com/gpu")).toBeInTheDocument() + expect(screen.getByText("amd.com/gpu")).toBeInTheDocument() + }) + + it("sorts extended-resource cards alphabetically by key", () => { + const agg = empty() + agg.extended["nvidia.com/gpu"] = { capacity: 4, allocatable: 4, requested: 1 } + agg.extended["amd.com/gpu"] = { capacity: 2, allocatable: 2, requested: 0 } + const { container } = render( + , + ) + const titles = Array.from(container.querySelectorAll('[data-extended-card]')).map( + (el) => el.getAttribute("data-extended-card"), + ) + expect(titles).toEqual(["amd.com/gpu", "nvidia.com/gpu"]) + }) + + it("does not render a 'Used' line on any card when no card has used data", () => { + render() + expect(screen.queryByText(/used/i)).toBeNull() + }) + + it("renders the 'Used' line on standard cards when usage data is present", () => { + const agg = empty() + agg.standard.cpu = { capacity: 8, allocatable: 8, requested: 2, used: 1 } + agg.standard.memory = { + capacity: 16 * 1024 ** 3, + allocatable: 16 * 1024 ** 3, + requested: 0, + used: 4 * 1024 ** 3, + } + render() + expect(screen.getAllByText(/used/i).length).toBeGreaterThan(0) + }) + + it("replaces Requested numbers with an em-dash tooltip when pods are unavailable", () => { + const agg = empty() + agg.standard.cpu = { capacity: 8, allocatable: 8, requested: 3 } + render( + , + ) + // The numeric Requested value should not be visible; em dashes appear + // and at least one element has the explanatory tooltip on title. + expect( + screen.getAllByTitle("Requires cluster-wide pod read access").length, + ).toBeGreaterThan(0) + }) +}) diff --git a/apps/console/src/components/cluster-usage/ClusterUsageAggregates.tsx b/apps/console/src/components/cluster-usage/ClusterUsageAggregates.tsx new file mode 100644 index 0000000..09bb8b4 --- /dev/null +++ b/apps/console/src/components/cluster-usage/ClusterUsageAggregates.tsx @@ -0,0 +1,89 @@ +import { ResourceCard } from "./ResourceCard.tsx" +import type { AggregateResources } from "../../lib/cluster-usage/types.ts" +import type { NodeSummary } from "../../hooks/useClusterUsageData.tsx" + +interface ClusterUsageAggregatesProps { + aggregates: AggregateResources + /** Counts shown in the panel header — Ready / NotReady / SchedulingDisabled. */ + nodeSummary: NodeSummary + /** + * When true, every Requested figure is replaced with an em dash and a + * tooltip explaining that cluster-wide pod read access is required. + * Set by the page when the underlying pods watch failed. + */ + podsUnavailable?: boolean +} + +/** + * Top panel of the Cluster Usage admin page. A header line shows total + * node count broken down by Ready / NotReady / SchedulingDisabled, + * followed by four fixed cards for the standard scheduler resources, + * followed by one card per extended resource discovered in + * node.status.capacity (alphabetical, full key verbatim). The extended + * section disappears entirely when no extended resources are present. + */ +export function ClusterUsageAggregates({ + aggregates, + nodeSummary, + podsUnavailable = false, +}: ClusterUsageAggregatesProps) { + const extendedKeys = Object.keys(aggregates.extended).sort() + return ( +
+
+ + {nodeSummary.total} node{nodeSummary.total === 1 ? "" : "s"} + + + {nodeSummary.ready} Ready · {nodeSummary.notReady} NotReady ·{" "} + {nodeSummary.schedulingDisabled} SchedulingDisabled + +
+
+ + + + +
+ {extendedKeys.length > 0 ? ( +
+

+ Extended resources (discovered) +

+
+ {extendedKeys.map((key) => ( +
+ +
+ ))} +
+
+ ) : null} +
+ ) +} diff --git a/apps/console/src/components/cluster-usage/ClusterUsageTable.test.tsx b/apps/console/src/components/cluster-usage/ClusterUsageTable.test.tsx new file mode 100644 index 0000000..51fc0be --- /dev/null +++ b/apps/console/src/components/cluster-usage/ClusterUsageTable.test.tsx @@ -0,0 +1,234 @@ +import { describe, it, expect } from "vitest" +import { render, screen, within } from "@testing-library/react" +import userEvent from "@testing-library/user-event" +import { ClusterUsageTable } from "./ClusterUsageTable.tsx" +import type { NodeRow } from "../../lib/cluster-usage/types.ts" + +function row(name: string, overrides: Partial = {}): NodeRow { + return { + name, + ready: true, + schedulable: true, + pressureConditions: [], + roles: [], + taints: [], + age: "1d", + creationTimestamp: "2026-05-25T00:00:00Z", + standard: { + cpu: { capacity: 8, allocatable: 8, requested: 1 }, + memory: { capacity: 16 * 1024 ** 3, allocatable: 16 * 1024 ** 3, requested: 0 }, + "ephemeral-storage": { capacity: 0, allocatable: 0, requested: 0 }, + pods: { capacity: 110, allocatable: 110, requested: 0 }, + }, + extended: {}, + ...overrides, + } +} + +describe("ClusterUsageTable", () => { + it("renders one tr per node, default-sorted by name ascending", () => { + render( + , + ) + const rows = screen.getAllByRole("row") + // First row is the header. + expect(rows).toHaveLength(3) + expect(within(rows[1]).getByText("worker-a")).toBeInTheDocument() + expect(within(rows[2]).getByText("worker-b")).toBeInTheDocument() + }) + + it("shows Ready / NotReady status text", () => { + render( + , + ) + expect(screen.getByText("Ready")).toBeInTheDocument() + expect(screen.getByText("NotReady")).toBeInTheDocument() + }) + + it("shows SchedulingDisabled when schedulable=false", () => { + render( + , + ) + expect(screen.getByText(/scheduling.?disabled/i)).toBeInTheDocument() + }) + + it("flags pressure conditions with a chip", () => { + render( + , + ) + expect(screen.getByText("MemoryPressure")).toBeInTheDocument() + }) + + it("renders roles inline, em dash for nodes without roles", () => { + render( + , + ) + expect(screen.getByText("control-plane")).toBeInTheDocument() + const workerRow = screen.getByText("worker").closest("tr")! + expect(within(workerRow).getAllByText("—").length).toBeGreaterThan(0) + }) + + it("adds one column per extended key, in extendedKeys order", () => { + render( + , + ) + const headers = screen.getAllByRole("columnheader").map((h) => h.textContent) + const nvidiaAt = headers.indexOf("nvidia.com/gpu") + const amdAt = headers.indexOf("amd.com/gpu") + expect(nvidiaAt).toBeGreaterThanOrEqual(0) + expect(amdAt).toBeGreaterThanOrEqual(0) + // Columns must follow extendedKeys order: nvidia before amd. + expect(nvidiaAt).toBeLessThan(amdAt) + }) + + it("renders em dash in extended-resource cell when the node does not expose it", () => { + render( + , + ) + const tr = screen.getByText("plain").closest("tr")! + expect(within(tr).getAllByText("—").length).toBeGreaterThan(0) + }) + + it("collapses extended-resource cells to em dash for a NotReady node", () => { + const gpu = { "nvidia.com/gpu": { capacity: 2, allocatable: 2, requested: 1 } } + render( + , + ) + const readyRow = screen.getByText("ready-gpu").closest("tr")! + const downRow = screen.getByText("down-gpu").closest("tr")! + // The Ready node surfaces its capacity-derived numbers... + expect(within(readyRow).getByText("capacity 2")).toBeInTheDocument() + // ...while the NotReady node must not render capacity for the extended cell. + expect(within(downRow).queryByText("capacity 2")).not.toBeInTheDocument() + }) + + it("renders the age column verbatim from row.age", () => { + render( + , + ) + expect(screen.getByText("21h")).toBeInTheDocument() + }) + + it("renders em dashes in cpu/memory cells when the node is NotReady", () => { + render( + , + ) + const tr = screen.getByText("dead").closest("tr")! + // CPU + Memory both render '—' when NotReady (4 dashes total for the + // two columns' two halves each — the assert just requires the row + // contains the em dashes, not the exact count). + expect(within(tr).getAllByText("—").length).toBeGreaterThan(0) + }) + + it("toggles the sort direction on a second click of the same column", async () => { + const user = userEvent.setup() + render( + , + ) + const nameHeader = screen.getByRole("button", { name: /name/i }) + // Default is asc — verify ordering, then click to flip. + let bodyRows = screen.getAllByRole("row").slice(1) + expect(within(bodyRows[0]).getByText("a")).toBeInTheDocument() + await user.click(nameHeader) + bodyRows = screen.getAllByRole("row").slice(1) + expect(within(bodyRows[0]).getByText("c")).toBeInTheDocument() + expect(within(bodyRows[2]).getByText("a")).toBeInTheDocument() + }) + + it("filters rows by name substring (case-insensitive)", async () => { + const user = userEvent.setup() + render( + , + ) + const filter = screen.getByLabelText("Filter nodes") + await user.type(filter, "GPU") + expect(screen.queryByText("worker-cpu-1")).toBeNull() + expect(screen.queryByText("ctrl-1")).toBeNull() + expect(screen.getByText("worker-gpu-1")).toBeInTheDocument() + }) + + it("filters rows by role substring", async () => { + const user = userEvent.setup() + render( + , + ) + const filter = screen.getByLabelText("Filter nodes") + await user.type(filter, "control") + expect(screen.getByText("a")).toBeInTheDocument() + expect(screen.queryByText("b")).toBeNull() + }) + + it("replaces the Requested line with an em-dash tooltip when podsUnavailable", () => { + render( + , + ) + const tr = screen.getByText("loaded").closest("tr")! + const tooltipNodes = tr.querySelectorAll( + '[title="Requires cluster-wide pod read access"]', + ) + expect(tooltipNodes.length).toBeGreaterThan(0) + // The literal "4 / 8 req" (visible when pods are available) must not + // appear when podsUnavailable; the tooltip-bearing dash takes its place. + expect(within(tr).queryByText(/4 \/ 8 req/)).toBeNull() + }) +}) diff --git a/apps/console/src/components/cluster-usage/ClusterUsageTable.tsx b/apps/console/src/components/cluster-usage/ClusterUsageTable.tsx new file mode 100644 index 0000000..71a8370 --- /dev/null +++ b/apps/console/src/components/cluster-usage/ClusterUsageTable.tsx @@ -0,0 +1,325 @@ +import { useMemo, useState } from "react" +import { ArrowDown, ArrowUp, ArrowUpDown } from "lucide-react" +import { humanizeBytes, humanizeCpu } from "../../lib/k8s-quantity.ts" +import type { NodeRow, ResourceTotals } from "../../lib/cluster-usage/types.ts" + +interface ClusterUsageTableProps { + rows: NodeRow[] + extendedKeys: string[] + /** True when pods-list cluster-wide failed — Requested cells become em dashes with a tooltip. */ + podsUnavailable?: boolean +} + +const REQUESTED_UNAVAILABLE_REASON = "Requires cluster-wide pod read access" + +type SortColumn = "name" | "status" | "roles" | "cpu" | "memory" | "age" | string + +interface SortState { + column: SortColumn + direction: "asc" | "desc" +} + +function statusLabel(row: NodeRow): string { + if (!row.ready) return "NotReady" + if (!row.schedulable) return "SchedulingDisabled" + return "Ready" +} + +function requestedPct(totals: ResourceTotals): number { + if (totals.allocatable <= 0) return 0 + return totals.requested / totals.allocatable +} + +function cpuCell(totals: ResourceTotals, ready: boolean, podsUnavailable: boolean) { + if (!ready || totals.allocatable <= 0) { + return ( +
+
+
+ ) + } + const hasUsed = totals.used !== undefined + return ( +
+ {hasUsed ? ( +
+ {humanizeCpu(totals.used ?? 0)} / {humanizeCpu(totals.allocatable)} used +
+ ) : null} + {podsUnavailable ? ( +
+ — req +
+ ) : ( +
+ {humanizeCpu(totals.requested)} / {humanizeCpu(totals.allocatable)} req +
+ )} +
+ ) +} + +function memoryCell(totals: ResourceTotals, ready: boolean, podsUnavailable: boolean) { + if (!ready || totals.allocatable <= 0) { + return ( +
+
+
+ ) + } + const hasUsed = totals.used !== undefined + return ( +
+ {hasUsed ? ( +
+ {humanizeBytes(totals.used ?? 0)} / {humanizeBytes(totals.allocatable)} used +
+ ) : null} + {podsUnavailable ? ( +
+ — req +
+ ) : ( +
+ {humanizeBytes(totals.requested)} / {humanizeBytes(totals.allocatable)} req +
+ )} +
+ ) +} + +function extendedCell( + totals: ResourceTotals | undefined, + ready: boolean, + podsUnavailable: boolean, +) { + if (!ready || !totals) return + return ( +
+
+ {podsUnavailable ? ( + + — + + ) : ( + totals.requested + )}{" "} + / {totals.allocatable} +
+
capacity {totals.capacity}
+
+ ) +} + +function compareRows(a: NodeRow, b: NodeRow, sort: SortState): number { + const direction = sort.direction === "asc" ? 1 : -1 + switch (sort.column) { + case "name": + return a.name.localeCompare(b.name) * direction + case "status": + return statusLabel(a).localeCompare(statusLabel(b)) * direction + case "roles": + return (a.roles[0] ?? "").localeCompare(b.roles[0] ?? "") * direction + case "cpu": + return (requestedPct(a.standard.cpu) - requestedPct(b.standard.cpu)) * direction + case "memory": + return (requestedPct(a.standard.memory) - requestedPct(b.standard.memory)) * direction + case "age": { + const ta = a.creationTimestamp ? new Date(a.creationTimestamp).getTime() : 0 + const tb = b.creationTimestamp ? new Date(b.creationTimestamp).getTime() : 0 + // Older nodes have smaller timestamps; sorting asc by timestamp shows + // oldest first, which matches the typical operator instinct for "Age asc". + return (ta - tb) * direction + } + default: { + // Dynamic extended-resource column: sort by requested %. + const va = requestedPct(a.extended[sort.column] ?? { capacity: 0, allocatable: 0, requested: 0 }) + const vb = requestedPct(b.extended[sort.column] ?? { capacity: 0, allocatable: 0, requested: 0 }) + return (va - vb) * direction + } + } +} + +function matchesFilter(row: NodeRow, q: string): boolean { + if (!q) return true + const needle = q.trim().toLowerCase() + if (!needle) return true + if (row.name.toLowerCase().includes(needle)) return true + if (row.roles.some((r) => r.toLowerCase().includes(needle))) return true + return false +} + +interface SortableHeaderProps { + column: SortColumn + label: string + sort: SortState + onSort: (column: SortColumn) => void + className?: string +} + +function SortableHeader({ + column, + label, + sort, + onSort, + className, +}: SortableHeaderProps) { + const active = sort.column === column + const Icon = active ? (sort.direction === "asc" ? ArrowUp : ArrowDown) : ArrowUpDown + return ( + + + + ) +} + +/** + * Per-node table rendered below the aggregate panel. Fixed columns + * (Name, Status, Roles, CPU, Memory) plus one column per full + * extended-resource key found in the cluster, then Age. Headers click + * to sort; default sort is Name ascending. A filter input above the + * table filters by name and roles substring. + * + * NotReady nodes show em dashes for CPU / Memory because status.capacity + * stops being authoritative; the rest of the row remains visible so the + * row remains a useful pointer for the operator. When pods-list failed + * cluster-wide, Requested values in every cell are replaced by an em + * dash with a tooltip explaining the missing permission. + */ +export function ClusterUsageTable({ + rows, + extendedKeys, + podsUnavailable = false, +}: ClusterUsageTableProps) { + const [sort, setSort] = useState({ column: "name", direction: "asc" }) + const [filter, setFilter] = useState("") + + const onSort = (column: SortColumn) => { + setSort((s) => + s.column === column + ? { column, direction: s.direction === "asc" ? "desc" : "asc" } + : { column, direction: "asc" }, + ) + } + + const visibleRows = useMemo( + () => + rows + .filter((r) => matchesFilter(r, filter)) + .sort((a, b) => compareRows(a, b, sort)), + [rows, sort, filter], + ) + + return ( +
+
+ setFilter(e.target.value)} + aria-label="Filter nodes" + className="w-64 max-w-full rounded border border-slate-200 px-3 py-1.5 text-sm focus:border-blue-500 focus:outline-none" + /> + + {visibleRows.length} of {rows.length} + +
+
+ + + + + + + + + {extendedKeys.map((k) => ( + + ))} + + + + + {visibleRows.map((r) => ( + + + + + + + {extendedKeys.map((k) => ( + + ))} + + + ))} + +
+ +
{r.name} +
+
{statusLabel(r)}
+ {r.pressureConditions.length > 0 ? ( +
+ {r.pressureConditions.map((p) => ( + + {p} + + ))} +
+ ) : null} + {r.taints.length > 0 ? ( +
+ +tainted {r.taints.length} +
+ ) : null} +
+
+ {r.roles.length > 0 ? ( +
+ {r.roles.map((role) => ( + + {role} + + ))} +
+ ) : ( + + )} +
+ {cpuCell(r.standard.cpu, r.ready, podsUnavailable)} + + {memoryCell(r.standard.memory, r.ready, podsUnavailable)} + + {extendedCell(r.extended[k], r.ready, podsUnavailable)} + {r.age}
+
+
+ ) +} diff --git a/apps/console/src/components/cluster-usage/ResourceCard.test.tsx b/apps/console/src/components/cluster-usage/ResourceCard.test.tsx new file mode 100644 index 0000000..86f030b --- /dev/null +++ b/apps/console/src/components/cluster-usage/ResourceCard.test.tsx @@ -0,0 +1,76 @@ +import { describe, it, expect } from "vitest" +import { render, screen } from "@testing-library/react" +import { ResourceCard } from "./ResourceCard.tsx" + +describe("ResourceCard", () => { + it("renders the title verbatim", () => { + render( + , + ) + expect(screen.getByText("nvidia.com/gpu")).toBeInTheDocument() + }) + + it("renders capacity and allocatable for any resource", () => { + render( + , + ) + expect(screen.getByText(/capacity/i)).toBeInTheDocument() + expect(screen.getByText(/allocatable/i)).toBeInTheDocument() + }) + + it("omits the Used line when used is undefined", () => { + render( + , + ) + expect(screen.queryByText(/used/i)).toBeNull() + }) + + it("renders the Used line when used is defined", () => { + render( + , + ) + expect(screen.getByText(/used/i)).toBeInTheDocument() + }) + + it("renders an em dash for divide-by-zero (allocatable=0)", () => { + render( + , + ) + expect(screen.getAllByText("—").length).toBeGreaterThan(0) + }) + + it("clamps percentage display at 100% for over-committed resources", () => { + render( + , + ) + const bars = document.querySelectorAll('[role="progressbar"]') + const requestedBar = Array.from(bars).find( + (b) => b.getAttribute("data-resource-bar") === "requested", + ) + expect(requestedBar?.getAttribute("aria-valuenow")).toBe("100") + }) +}) diff --git a/apps/console/src/components/cluster-usage/ResourceCard.tsx b/apps/console/src/components/cluster-usage/ResourceCard.tsx new file mode 100644 index 0000000..eba7e2d --- /dev/null +++ b/apps/console/src/components/cluster-usage/ResourceCard.tsx @@ -0,0 +1,144 @@ +import { humanizeBytes, humanizeCpu } from "../../lib/k8s-quantity.ts" +import type { ResourceTotals } from "../../lib/cluster-usage/types.ts" + +export type ResourceFormat = "cpu" | "bytes" | "count" + +interface ResourceCardProps { + title: string + format: ResourceFormat + totals: ResourceTotals + /** + * When true, the Requested figure is treated as unknown (cluster-wide + * pod read access was denied or the request failed). The numeric value + * is replaced with an em dash and a tooltip explains why. + */ + requestedUnavailable?: boolean +} + +function formatValue(value: number, format: ResourceFormat): string { + switch (format) { + case "cpu": + return humanizeCpu(value) + case "bytes": + return humanizeBytes(value) + case "count": + default: + return value % 1 === 0 ? `${value}` : value.toFixed(2) + } +} + +function percent(value: number, allocatable: number): number | null { + if (allocatable <= 0) return null + return Math.min(100, Math.round((value / allocatable) * 100)) +} + +function barColorClass(pct: number | null): string { + if (pct === null) return "bg-slate-300" + if (pct > 90) return "bg-red-500" + if (pct > 70) return "bg-amber-500" + return "bg-blue-500" +} + +interface ProgressBarProps { + pct: number | null + resourceBar: "requested" | "used" + ariaLabel: string +} + +function ProgressBar({ pct, resourceBar, ariaLabel }: ProgressBarProps) { + return ( +
+
+
+ ) +} + +/** + * A single aggregate-resource card showing capacity, allocatable, and + * up to two progress bars: requested (always rendered when allocatable + * is non-zero) and used (rendered only when totals.used is defined, + * which happens for cpu/memory when metrics.k8s.io is discovered). + * + * A zero-allocatable resource renders em dashes for every number and + * no progress bar — that combination is rare but represents nodes that + * have not yet reported their capacity, and crashing the panel is much + * worse than rendering placeholders. + */ +export function ResourceCard({ + title, + format, + totals, + requestedUnavailable = false, +}: ResourceCardProps) { + const allocatableZero = totals.allocatable <= 0 + const requestedPct = percent(totals.requested, totals.allocatable) + const usedDefined = totals.used !== undefined + const usedPct = usedDefined ? percent(totals.used ?? 0, totals.allocatable) : null + const REQUESTED_UNAVAILABLE_REASON = "Requires cluster-wide pod read access" + + return ( +
+
+ {title} +
+
+
+ Capacity + + {allocatableZero ? "—" : formatValue(totals.capacity, format)} + +
+
+ Allocatable + + {allocatableZero ? "—" : formatValue(totals.allocatable, format)} + +
+ {usedDefined ? ( +
+
+ Used + + {allocatableZero ? "—" : formatValue(totals.used ?? 0, format)} + +
+ {!allocatableZero ? ( + + ) : null} +
+ ) : null} +
+
+ Requested + + {requestedUnavailable || allocatableZero + ? "—" + : formatValue(totals.requested, format)} + +
+ {!allocatableZero && !requestedUnavailable ? ( + + ) : null} +
+
+
+ ) +} diff --git a/apps/console/src/hooks/useClusterUsageData.test.tsx b/apps/console/src/hooks/useClusterUsageData.test.tsx new file mode 100644 index 0000000..a6532e9 --- /dev/null +++ b/apps/console/src/hooks/useClusterUsageData.test.tsx @@ -0,0 +1,161 @@ +import { describe, it, expect, vi } from "vitest" +import { renderHook, waitFor } from "@testing-library/react" +import { QueryClient, QueryClientProvider } from "@tanstack/react-query" +import { + K8sClient, + K8sProvider, + K8sApiError, + type APIGroupList, + type K8sList, +} from "@cozystack/k8s-client" +import type { ReactNode } from "react" +import { useClusterUsageData } from "./useClusterUsageData.tsx" +import { nodesListFixture } from "../test-utils/fixtures/nodes.ts" +import { podsListFixture } from "../test-utils/fixtures/pods.ts" +import { nodeMetricsListFixture } from "../test-utils/fixtures/node-metrics.ts" + +function makeWrapper(client: K8sClient) { + const queryClient = new QueryClient({ + defaultOptions: { queries: { retry: false, gcTime: 0 } }, + }) + return function Wrapper({ children }: { children: ReactNode }) { + return ( + + + {children} + + + ) + } +} + +const groupsWithMetrics: APIGroupList = { + kind: "APIGroupList", + apiVersion: "v1", + groups: [ + { + name: "metrics.k8s.io", + versions: [{ groupVersion: "metrics.k8s.io/v1beta1", version: "v1beta1" }], + preferredVersion: { groupVersion: "metrics.k8s.io/v1beta1", version: "v1beta1" }, + }, + ], +} + +const groupsWithoutMetrics: APIGroupList = { + kind: "APIGroupList", + apiVersion: "v1", + groups: [ + { + name: "apps", + versions: [{ groupVersion: "apps/v1", version: "v1" }], + preferredVersion: { groupVersion: "apps/v1", version: "v1" }, + }, + ], +} + +function stubList( + client: K8sClient, + responses: Partial | K8sApiError>>, +) { + vi.spyOn(client, "list").mockImplementation(async (apiGroup, _v, plural) => { + // Key by (apiGroup|plural). The metrics.k8s.io node listing uses + // plural=nodes too, so we can't disambiguate on plural alone. + const key = `${apiGroup}|${plural}` + const r = responses[key] + if (r instanceof K8sApiError) throw r + return (r ?? { apiVersion: "v1", kind: `${plural}List`, metadata: {}, items: [] }) as K8sList< + unknown + > + }) +} + +describe("useClusterUsageData", () => { + it("reports isLoading=true on first render", () => { + const client = new K8sClient() + stubList(client, {}) + vi.spyOn(client, "getApiGroups").mockImplementation(() => new Promise(() => {})) + const { result } = renderHook(() => useClusterUsageData(), { + wrapper: makeWrapper(client), + }) + expect(result.current.isLoading).toBe(true) + }) + + it("returns aggregates and per-node rows derived from nodes + pods + metrics", async () => { + const client = new K8sClient() + stubList(client, { + "|nodes": nodesListFixture, + "|pods": podsListFixture, + "metrics.k8s.io|nodes": nodeMetricsListFixture, + }) + vi.spyOn(client, "getApiGroups").mockResolvedValue(groupsWithMetrics) + const { result } = renderHook(() => useClusterUsageData(), { + wrapper: makeWrapper(client), + }) + await waitFor(() => expect(result.current.isLoading).toBe(false)) + expect(result.current.metricsAvailable).toBe(true) + expect(result.current.perNode.map((r) => r.name)).toEqual([ + "cp-1", + "worker-1", + "worker-gpu-1", + ]) + expect(result.current.aggregates.extended["nvidia.com/gpu"].capacity).toBe(1) + // Used overlay must be populated from the metrics fixture. + expect(result.current.aggregates.standard.cpu.used).toBeGreaterThan(0) + }) + + it("never lists NodeMetrics when metrics.k8s.io is not registered", async () => { + const client = new K8sClient() + const listSpy = vi.spyOn(client, "list").mockImplementation( + async (_g, _v, plural) => { + if (plural === "nodes") + return nodesListFixture as unknown as K8sList + if (plural === "pods") + return podsListFixture as unknown as K8sList + return { apiVersion: "v1", kind: `${plural}List`, metadata: {}, items: [] } + }, + ) + vi.spyOn(client, "getApiGroups").mockResolvedValue(groupsWithoutMetrics) + const { result } = renderHook(() => useClusterUsageData(), { + wrapper: makeWrapper(client), + }) + await waitFor(() => expect(result.current.isLoading).toBe(false)) + expect(result.current.metricsAvailable).toBe(false) + const metricsCalls = listSpy.mock.calls.filter( + (call) => call[0] === "metrics.k8s.io", + ) + expect(metricsCalls).toHaveLength(0) + expect(result.current.aggregates.standard.cpu.used).toBeUndefined() + }) + + it("treats a metrics-API 403 as 'no usage data' without crashing", async () => { + const client = new K8sClient() + vi.spyOn(client, "list").mockImplementation(async (g, _v, plural) => { + if (g === "metrics.k8s.io") throw new K8sApiError(403, "forbidden") + if (plural === "nodes") return nodesListFixture as unknown as K8sList + if (plural === "pods") return podsListFixture as unknown as K8sList + return { apiVersion: "v1", kind: `${plural}List`, metadata: {}, items: [] } + }) + vi.spyOn(client, "getApiGroups").mockResolvedValue(groupsWithMetrics) + const { result } = renderHook(() => useClusterUsageData(), { + wrapper: makeWrapper(client), + }) + await waitFor(() => expect(result.current.isLoading).toBe(false)) + expect(result.current.metricsAvailable).toBe(true) + expect(result.current.aggregates.standard.cpu.used).toBeUndefined() + expect(result.current.error).toBeNull() + }) + + it("surfaces a nodes-list error as the hook error", async () => { + const client = new K8sClient() + vi.spyOn(client, "list").mockImplementation(async (_g, _v, plural) => { + if (plural === "nodes") throw new K8sApiError(500, "boom") + return { apiVersion: "v1", kind: `${plural}List`, metadata: {}, items: [] } + }) + vi.spyOn(client, "getApiGroups").mockResolvedValue(groupsWithoutMetrics) + const { result } = renderHook(() => useClusterUsageData(), { + wrapper: makeWrapper(client), + }) + await waitFor(() => expect(result.current.error).toBeTruthy()) + expect(result.current.error?.message).toContain("boom") + }) +}) diff --git a/apps/console/src/hooks/useClusterUsageData.tsx b/apps/console/src/hooks/useClusterUsageData.tsx new file mode 100644 index 0000000..921da5e --- /dev/null +++ b/apps/console/src/hooks/useClusterUsageData.tsx @@ -0,0 +1,164 @@ +import { useMemo } from "react" +import { + useK8sList, + useApiGroupAvailable, + K8sApiError, + type K8sList, +} from "@cozystack/k8s-client" +import { aggregateNodeResources } from "../lib/cluster-usage/aggregate.ts" +import { derivePerNodeRows } from "../lib/cluster-usage/per-node.ts" +import type { + AggregateResources, + Node, + NodeMetrics, + NodeRow, + Pod, +} from "../lib/cluster-usage/types.ts" + +/** + * Polling interval for NodeMetrics. Matches the default + * --metric-resolution of metrics-server (15s) plus a small buffer; a + * faster cadence returns identical values and wastes requests. + */ +export const CLUSTER_USAGE_METRICS_REFETCH_MS = 30_000 + +export interface NodeSummary { + total: number + ready: number + notReady: number + schedulingDisabled: number +} + +interface ClusterUsageData { + nodes: Node[] + pods: Pod[] + metrics: NodeMetrics[] | undefined + aggregates: AggregateResources + perNode: NodeRow[] + nodeSummary: NodeSummary + isLoading: boolean + /** + * The hook's primary error: a nodes-list failure. Pods and metrics + * failures are surfaced through their own flags so callers can degrade + * gracefully instead of replacing the whole page with an error block. + */ + error: Error | null + /** HTTP status of `error`, if it was a K8sApiError. */ + errorStatus: number | null + /** True when the cluster-wide pods list failed. Requested values are unreliable. */ + podsUnavailable: boolean + metricsAvailable: boolean +} + +/** + * Composite hook that powers the Cluster Usage admin page. Subscribes + * to nodes and pods via K8s watches (push-based updates, no polling), + * and — only when metrics.k8s.io is discovered on the cluster — polls + * NodeMetrics on a 30-second cadence. metrics.k8s.io is not watchable, + * so a refetch interval is the only option; the rest of the page works + * fine without it. + * + * The pods watch is cluster-wide and unfiltered. On a multi-thousand- + * pod cluster that is a few megabytes of JSON kept hot in memory plus + * continuous patch events. The trade-off is accepted for now because + * (a) Requested totals need every pod regardless of namespace, and + * (b) the watch already exists for the rest of the console. If the + * cost ever becomes painful, the natural follow-up is a field-selector + * projection on spec.nodeName + containers[*].resources.requests, or + * a server-side aggregation endpoint. + * + * A 403 on the metrics fetch is treated as 'no usage data, no + * page-level error' — the Used overlay disappears, the rest of the + * panel still renders. A pods-list error is surfaced through the + * `podsUnavailable` flag so the page can degrade gracefully. A + * nodes-list error is the only kind that takes over the page; everything + * downstream of it is undefined. + */ +export function useClusterUsageData(): ClusterUsageData { + const nodesQuery = useK8sList({ + apiGroup: "", + apiVersion: "v1", + plural: "nodes", + }) + + const podsQuery = useK8sList({ + apiGroup: "", + apiVersion: "v1", + plural: "pods", + }) + + const { available: metricsAvailable, isLoading: metricsDiscoveryLoading } = + useApiGroupAvailable("metrics.k8s.io") + + const metricsQuery = useK8sList( + { + apiGroup: "metrics.k8s.io", + apiVersion: "v1beta1", + plural: "nodes", + }, + { + enabled: metricsAvailable, + watch: false, + refetchInterval: CLUSTER_USAGE_METRICS_REFETCH_MS, + }, + ) + + const nodes = useMemo( + () => nodesQuery.data?.items ?? [], + [nodesQuery.data], + ) + const pods = useMemo(() => podsQuery.data?.items ?? [], [podsQuery.data]) + const metricsItems = useMemo( + () => metricsQueryItems(metricsQuery.data, metricsQuery.error), + [metricsQuery.data, metricsQuery.error], + ) + + const aggregates = useMemo( + () => aggregateNodeResources(nodes, pods, metricsItems), + [nodes, pods, metricsItems], + ) + const perNode = useMemo( + () => derivePerNodeRows(nodes, pods, metricsItems), + [nodes, pods, metricsItems], + ) + const nodeSummary = useMemo(() => { + let ready = 0 + let notReady = 0 + let schedulingDisabled = 0 + for (const row of perNode) { + if (!row.ready) notReady++ + else if (!row.schedulable) schedulingDisabled++ + else ready++ + } + return { total: perNode.length, ready, notReady, schedulingDisabled } + }, [perNode]) + + const nodesError = (nodesQuery.error as Error | null) ?? null + const errorStatus = nodesError instanceof K8sApiError ? nodesError.status : null + + return { + nodes, + pods, + metrics: metricsItems, + aggregates, + perNode, + nodeSummary, + isLoading: + nodesQuery.isLoading || podsQuery.isLoading || metricsDiscoveryLoading, + // Pods and metrics errors are not promoted to page-level errors. + // The caller renders cell-level placeholders instead. + error: nodesError, + errorStatus, + podsUnavailable: podsQuery.error != null, + metricsAvailable, + } +} + +function metricsQueryItems( + list: K8sList | undefined, + error: unknown, +): NodeMetrics[] | undefined { + if (error) return undefined + if (!list) return undefined + return list.items +} diff --git a/apps/console/src/lib/cluster-usage/aggregate.test.ts b/apps/console/src/lib/cluster-usage/aggregate.test.ts new file mode 100644 index 0000000..713616c --- /dev/null +++ b/apps/console/src/lib/cluster-usage/aggregate.test.ts @@ -0,0 +1,157 @@ +import { describe, it, expect } from "vitest" +import { aggregateNodeResources } from "./aggregate.ts" +import type { Node, Pod, NodeMetrics } from "./types.ts" + +function node(name: string, capacity: Record): Node { + return { + apiVersion: "v1", + kind: "Node", + metadata: { name }, + status: { capacity, allocatable: capacity, conditions: [] }, + } +} + +function pod( + name: string, + nodeName: string | undefined, + requests: Record, +): Pod { + return { + apiVersion: "v1", + kind: "Pod", + metadata: { name, namespace: "ns" }, + spec: { nodeName, containers: [{ name: "c", resources: { requests } }] }, + } +} + +function metric(name: string, cpu: string, memory: string): NodeMetrics { + return { + apiVersion: "metrics.k8s.io/v1beta1", + kind: "NodeMetrics", + metadata: { name }, + usage: { cpu, memory }, + } +} + +describe("aggregateNodeResources", () => { + it("returns zeroed standard totals for an empty cluster", () => { + const a = aggregateNodeResources([], [], undefined) + expect(a.standard.cpu).toEqual({ capacity: 0, allocatable: 0, requested: 0 }) + expect(a.standard.memory).toEqual({ capacity: 0, allocatable: 0, requested: 0 }) + expect(a.standard["ephemeral-storage"]).toEqual({ + capacity: 0, + allocatable: 0, + requested: 0, + }) + expect(a.standard.pods).toEqual({ capacity: 0, allocatable: 0, requested: 0 }) + expect(a.extended).toEqual({}) + }) + + it("sums capacity and allocatable across nodes", () => { + const a = aggregateNodeResources( + [ + node("a", { cpu: "4", memory: "8Gi", "ephemeral-storage": "100Gi", pods: "110" }), + node("b", { cpu: "8", memory: "32Gi", "ephemeral-storage": "500Gi", pods: "220" }), + ], + [], + undefined, + ) + expect(a.standard.cpu.capacity).toBe(12) + expect(a.standard.cpu.allocatable).toBe(12) + expect(a.standard.memory.capacity).toBe((8 + 32) * 1024 ** 3) + expect(a.standard.pods.capacity).toBe(330) + }) + + it("groups extended resource keys verbatim", () => { + const a = aggregateNodeResources( + [ + node("a", { cpu: "4", "nvidia.com/gpu": "2" }), + node("b", { cpu: "8", "amd.com/gpu": "1" }), + ], + [], + undefined, + ) + expect(a.extended["nvidia.com/gpu"].capacity).toBe(2) + expect(a.extended["amd.com/gpu"].capacity).toBe(1) + }) + + it("sums pod requests scoped to scheduled pods only", () => { + const a = aggregateNodeResources( + [node("a", { cpu: "8", memory: "16Gi" })], + [ + pod("p1", "a", { cpu: "500m", memory: "1Gi" }), + pod("p2", "a", { cpu: "1", memory: "2Gi" }), + pod("unscheduled", undefined, { cpu: "100m", memory: "256Mi" }), + ], + undefined, + ) + expect(a.standard.cpu.requested).toBe(1.5) + expect(a.standard.memory.requested).toBe(3 * 1024 ** 3) + }) + + it("skips pods scheduled on unknown nodes", () => { + const a = aggregateNodeResources( + [node("a", { cpu: "8" })], + [pod("rogue", "ghost-node", { cpu: "500m" })], + undefined, + ) + expect(a.standard.cpu.requested).toBe(0) + }) + + it("excludes terminal (Succeeded/Failed) pods from requested totals", () => { + const terminal = (name: string, phase: string): Pod => ({ + ...pod(name, "a", { cpu: "1", memory: "2Gi" }), + status: { phase }, + }) + const a = aggregateNodeResources( + [node("a", { cpu: "8", memory: "16Gi" })], + [ + pod("running", "a", { cpu: "500m", memory: "1Gi" }), + terminal("completed", "Succeeded"), + terminal("crashed", "Failed"), + ], + undefined, + ) + expect(a.standard.cpu.requested).toBe(0.5) + expect(a.standard.memory.requested).toBe(1024 ** 3) + }) + + it("sums extended-resource requests under the extended bucket", () => { + const a = aggregateNodeResources( + [node("a", { cpu: "8", "nvidia.com/gpu": "2" })], + [pod("p", "a", { cpu: "200m", "nvidia.com/gpu": "1" })], + undefined, + ) + expect(a.extended["nvidia.com/gpu"].requested).toBe(1) + }) + + it("populates used for cpu and memory when metrics are supplied", () => { + const a = aggregateNodeResources( + [node("a", { cpu: "8", memory: "16Gi" })], + [], + [metric("a", "1500m", "4Gi")], + ) + expect(a.standard.cpu.used).toBe(1.5) + expect(a.standard.memory.used).toBe(4 * 1024 ** 3) + }) + + it("leaves used undefined when metrics is undefined", () => { + const a = aggregateNodeResources( + [node("a", { cpu: "8", memory: "16Gi" })], + [], + undefined, + ) + expect(a.standard.cpu.used).toBeUndefined() + expect(a.standard.memory.used).toBeUndefined() + }) + + it("never reports used for ephemeral-storage or pods", () => { + const a = aggregateNodeResources( + [node("a", { cpu: "8", memory: "16Gi", "ephemeral-storage": "100Gi", pods: "110" })], + [], + [metric("a", "1", "2Gi")], + ) + expect(a.standard["ephemeral-storage"].used).toBeUndefined() + expect(a.standard.pods.used).toBeUndefined() + }) +}) diff --git a/apps/console/src/lib/cluster-usage/aggregate.ts b/apps/console/src/lib/cluster-usage/aggregate.ts new file mode 100644 index 0000000..fa5c4b5 --- /dev/null +++ b/apps/console/src/lib/cluster-usage/aggregate.ts @@ -0,0 +1,92 @@ +import { parseQuantity } from "../k8s-quantity.ts" +import { getExtendedResourceKeys } from "./extended-resources.ts" +import type { + AggregateResources, + Node, + NodeMetrics, + Pod, + ResourceTotals, + StandardResourceKey, +} from "./types.ts" +import { STANDARD_RESOURCE_KEYS, STANDARD_RESOURCE_KEY_SET } from "./types.ts" + +function emptyTotals(): ResourceTotals { + return { capacity: 0, allocatable: 0, requested: 0 } +} + +/** + * Computes cluster-wide totals for every standard and extended resource. + * + * Capacity and allocatable are summed from each node's status maps. + * Requested is summed only from pods that are scheduled (have a + * spec.nodeName) and whose nodeName actually appears in the node list; + * unscheduled or orphaned pods are skipped so the per-node and aggregate + * numbers stay reconcilable. + * + * Used is only populated for cpu and memory, mirroring what + * metrics.k8s.io reports; ephemeral-storage and pods never get a 'used' + * value because the API simply does not expose one. + */ +export function aggregateNodeResources( + nodes: Node[], + pods: Pod[], + metrics: NodeMetrics[] | undefined, +): AggregateResources { + const standard: Record = { + cpu: emptyTotals(), + memory: emptyTotals(), + "ephemeral-storage": emptyTotals(), + pods: emptyTotals(), + } + const extended: Record = {} + const knownNodes = new Set(nodes.map((n) => n.metadata.name)) + const extendedKeys = getExtendedResourceKeys(nodes) + for (const key of extendedKeys) extended[key] = emptyTotals() + + for (const node of nodes) { + const capacity = node.status?.capacity ?? {} + const allocatable = node.status?.allocatable ?? {} + for (const key of STANDARD_RESOURCE_KEYS) { + standard[key].capacity += parseQuantity(capacity[key] ?? "0") + standard[key].allocatable += parseQuantity(allocatable[key] ?? "0") + } + for (const key of extendedKeys) { + extended[key].capacity += parseQuantity(capacity[key] ?? "0") + extended[key].allocatable += parseQuantity(allocatable[key] ?? "0") + } + } + + for (const pod of pods) { + const nodeName = pod.spec?.nodeName + if (!nodeName || !knownNodes.has(nodeName)) continue + // Terminal pods still appear in API lists but no longer hold schedulable + // requests; counting them would inflate the requested totals. + const phase = pod.status?.phase + if (phase === "Succeeded" || phase === "Failed") continue + for (const container of pod.spec?.containers ?? []) { + const requests = container.resources?.requests + if (!requests) continue + for (const [key, value] of Object.entries(requests)) { + if (STANDARD_RESOURCE_KEY_SET.has(key)) { + standard[key as StandardResourceKey].requested += parseQuantity(value) + } else if (extended[key]) { + extended[key].requested += parseQuantity(value) + } + } + } + } + + if (metrics) { + let cpuUsed = 0 + let memoryUsed = 0 + for (const m of metrics) { + if (!knownNodes.has(m.metadata.name)) continue + cpuUsed += parseQuantity(m.usage?.cpu ?? "0") + memoryUsed += parseQuantity(m.usage?.memory ?? "0") + } + standard.cpu.used = cpuUsed + standard.memory.used = memoryUsed + } + + return { standard, extended } +} diff --git a/apps/console/src/lib/cluster-usage/extended-resources.test.ts b/apps/console/src/lib/cluster-usage/extended-resources.test.ts new file mode 100644 index 0000000..0ced00b --- /dev/null +++ b/apps/console/src/lib/cluster-usage/extended-resources.test.ts @@ -0,0 +1,76 @@ +import { describe, it, expect } from "vitest" +import { getExtendedResourceKeys } from "./extended-resources.ts" +import type { Node } from "./types.ts" + +function makeNode(name: string, capacity: Record): Node { + return { + apiVersion: "v1", + kind: "Node", + metadata: { name }, + status: { capacity }, + } +} + +describe("getExtendedResourceKeys", () => { + it("returns an empty array for no nodes", () => { + expect(getExtendedResourceKeys([])).toEqual([]) + }) + + it("strips out standard resources cpu, memory, ephemeral-storage, pods", () => { + const nodes = [ + makeNode("a", { + cpu: "8", + memory: "32Gi", + "ephemeral-storage": "500Gi", + pods: "110", + }), + ] + expect(getExtendedResourceKeys(nodes)).toEqual([]) + }) + + it("strips hugepages-* in any variant", () => { + const nodes = [ + makeNode("a", { + cpu: "8", + "hugepages-2Mi": "0", + "hugepages-1Gi": "0", + }), + ] + expect(getExtendedResourceKeys(nodes)).toEqual([]) + }) + + it("collects extended keys verbatim", () => { + const nodes = [makeNode("a", { cpu: "8", "nvidia.com/gpu": "1" })] + expect(getExtendedResourceKeys(nodes)).toEqual(["nvidia.com/gpu"]) + }) + + it("dedupes keys appearing on multiple nodes", () => { + const nodes = [ + makeNode("a", { "nvidia.com/gpu": "1" }), + makeNode("b", { "nvidia.com/gpu": "2" }), + ] + expect(getExtendedResourceKeys(nodes)).toEqual(["nvidia.com/gpu"]) + }) + + it("sorts keys alphabetically for stable rendering", () => { + const nodes = [ + makeNode("a", { "nvidia.com/gpu": "1" }), + makeNode("b", { "amd.com/gpu": "1", "hami.io/vgpu": "4" }), + ] + expect(getExtendedResourceKeys(nodes)).toEqual([ + "amd.com/gpu", + "hami.io/vgpu", + "nvidia.com/gpu", + ]) + }) + + it("ignores nodes without status.capacity", () => { + const node: Node = { + apiVersion: "v1", + kind: "Node", + metadata: { name: "drained" }, + } + expect(getExtendedResourceKeys([node])).toEqual([]) + }) +}) + diff --git a/apps/console/src/lib/cluster-usage/extended-resources.ts b/apps/console/src/lib/cluster-usage/extended-resources.ts new file mode 100644 index 0000000..a8b06f7 --- /dev/null +++ b/apps/console/src/lib/cluster-usage/extended-resources.ts @@ -0,0 +1,22 @@ +import { isExtendedResourceKey } from "./types.ts" +import type { Node } from "./types.ts" + +/** + * Returns the sorted, deduplicated set of extended-resource keys present + * in any node's `status.capacity` across the cluster. Standard scheduler + * resources (cpu, memory, ephemeral-storage, pods) and every hugepages-* + * variant are filtered out — the rest is whatever the cluster exposes, + * rendered verbatim. There is intentionally no vendor allow-list: a new + * accelerator surfaces in the UI the moment a node exposing it joins. + */ +export function getExtendedResourceKeys(nodes: Node[]): string[] { + const set = new Set() + for (const node of nodes) { + const capacity = node.status?.capacity + if (!capacity) continue + for (const key of Object.keys(capacity)) { + if (isExtendedResourceKey(key)) set.add(key) + } + } + return [...set].sort() +} diff --git a/apps/console/src/lib/cluster-usage/per-node.test.ts b/apps/console/src/lib/cluster-usage/per-node.test.ts new file mode 100644 index 0000000..ef8442a --- /dev/null +++ b/apps/console/src/lib/cluster-usage/per-node.test.ts @@ -0,0 +1,219 @@ +import { describe, it, expect, vi, beforeAll, afterAll } from "vitest" +import { derivePerNodeRows } from "./per-node.ts" +import type { Node, Pod, NodeMetrics } from "./types.ts" + +beforeAll(() => { + vi.useFakeTimers() + vi.setSystemTime(new Date("2026-05-26T00:00:00Z")) +}) + +afterAll(() => { + vi.useRealTimers() +}) + +function nodeWith( + name: string, + fields: { + capacity?: Record + allocatable?: Record + labels?: Record + ready?: boolean + pressure?: string[] + unschedulable?: boolean + taints?: Array<{ key: string; value?: string; effect: string }> + creationTimestamp?: string + } = {}, +): Node { + const capacity = fields.capacity ?? { cpu: "4", memory: "8Gi" } + const allocatable = fields.allocatable ?? capacity + const conditions: Node["status"] = { capacity, allocatable, conditions: [] } + conditions.conditions?.push({ + type: "Ready", + status: fields.ready === false ? "False" : "True", + }) + for (const p of fields.pressure ?? []) { + conditions.conditions?.push({ type: p, status: "True" }) + } + return { + apiVersion: "v1", + kind: "Node", + metadata: { + name, + labels: fields.labels, + creationTimestamp: fields.creationTimestamp ?? "2026-05-25T00:00:00Z", + }, + spec: { unschedulable: fields.unschedulable, taints: fields.taints }, + status: conditions, + } as Node +} + +function pod( + name: string, + nodeName: string | undefined, + requests: Record, +): Pod { + return { + apiVersion: "v1", + kind: "Pod", + metadata: { name, namespace: "ns" }, + spec: { nodeName, containers: [{ name: "c", resources: { requests } }] }, + } +} + +function metric(name: string, cpu: string, memory: string): NodeMetrics { + return { + apiVersion: "metrics.k8s.io/v1beta1", + kind: "NodeMetrics", + metadata: { name }, + usage: { cpu, memory }, + } +} + +describe("derivePerNodeRows", () => { + it("returns one row per node, sorted by name", () => { + const rows = derivePerNodeRows( + [nodeWith("b"), nodeWith("a"), nodeWith("c")], + [], + undefined, + ) + expect(rows.map((r) => r.name)).toEqual(["a", "b", "c"]) + }) + + it("computes age relative to the stubbed clock", () => { + const rows = derivePerNodeRows( + [nodeWith("a", { creationTimestamp: "2026-05-25T03:00:00Z" })], + [], + undefined, + ) + expect(rows[0].age).toBe("21h") + }) + + it("detects ready vs notready conditions", () => { + const rows = derivePerNodeRows( + [nodeWith("a", { ready: true }), nodeWith("b", { ready: false })], + [], + undefined, + ) + expect(rows[0].ready).toBe(true) + expect(rows[1].ready).toBe(false) + }) + + it("collects pressure conditions with status=True", () => { + const rows = derivePerNodeRows( + [nodeWith("a", { pressure: ["MemoryPressure", "DiskPressure"] })], + [], + undefined, + ) + expect(rows[0].pressureConditions).toEqual(["MemoryPressure", "DiskPressure"]) + }) + + it("derives roles from node-role.kubernetes.io labels", () => { + const rows = derivePerNodeRows( + [ + nodeWith("a", { + labels: { + "node-role.kubernetes.io/control-plane": "", + "node-role.kubernetes.io/worker": "", + }, + }), + ], + [], + undefined, + ) + expect(rows[0].roles.sort()).toEqual(["control-plane", "worker"]) + }) + + it("falls back to kubernetes.io/role label when present", () => { + const rows = derivePerNodeRows( + [nodeWith("a", { labels: { "kubernetes.io/role": "ingress" } })], + [], + undefined, + ) + expect(rows[0].roles).toEqual(["ingress"]) + }) + + it("returns no roles for a label-less node", () => { + const rows = derivePerNodeRows([nodeWith("a", { labels: {} })], [], undefined) + expect(rows[0].roles).toEqual([]) + }) + + it("filters out an empty role suffix (`node-role.kubernetes.io/=`)", () => { + const rows = derivePerNodeRows( + [nodeWith("a", { labels: { "node-role.kubernetes.io/": "" } })], + [], + undefined, + ) + expect(rows[0].roles).toEqual([]) + }) + + it("reports schedulable=false when spec.unschedulable=true", () => { + const rows = derivePerNodeRows( + [nodeWith("a", { unschedulable: true })], + [], + undefined, + ) + expect(rows[0].schedulable).toBe(false) + }) + + it("collects taint keys when present", () => { + const rows = derivePerNodeRows( + [ + nodeWith("a", { + taints: [{ key: "node.kubernetes.io/unschedulable", effect: "NoSchedule" }], + }), + ], + [], + undefined, + ) + expect(rows[0].taints).toEqual(["node.kubernetes.io/unschedulable"]) + }) + + it("scopes requested totals to pods scheduled on that node", () => { + const rows = derivePerNodeRows( + [ + nodeWith("a", { capacity: { cpu: "8", memory: "16Gi" } }), + nodeWith("b", { capacity: { cpu: "8", memory: "16Gi" } }), + ], + [ + pod("p1", "a", { cpu: "500m", memory: "1Gi" }), + pod("p2", "b", { cpu: "1", memory: "4Gi" }), + ], + undefined, + ) + expect(rows[0].standard.cpu.requested).toBe(0.5) + expect(rows[1].standard.cpu.requested).toBe(1) + }) + + it("populates used per node when metrics are supplied", () => { + const rows = derivePerNodeRows( + [nodeWith("a", { capacity: { cpu: "8", memory: "16Gi" } })], + [], + [metric("a", "1500m", "4Gi")], + ) + expect(rows[0].standard.cpu.used).toBe(1.5) + expect(rows[0].standard.memory.used).toBe(4 * 1024 ** 3) + }) + + it("leaves used undefined per node when metrics are undefined", () => { + const rows = derivePerNodeRows( + [nodeWith("a", { capacity: { cpu: "8", memory: "16Gi" } })], + [], + undefined, + ) + expect(rows[0].standard.cpu.used).toBeUndefined() + }) + + it("includes extended-resource columns per node", () => { + const rows = derivePerNodeRows( + [ + nodeWith("a", { capacity: { cpu: "8", "nvidia.com/gpu": "1" } }), + nodeWith("b", { capacity: { cpu: "8" } }), + ], + [pod("p", "a", { "nvidia.com/gpu": "1" })], + undefined, + ) + expect(rows[0].extended["nvidia.com/gpu"].capacity).toBe(1) + expect(rows[0].extended["nvidia.com/gpu"].requested).toBe(1) + expect(rows[1].extended["nvidia.com/gpu"]).toBeUndefined() + }) +}) diff --git a/apps/console/src/lib/cluster-usage/per-node.ts b/apps/console/src/lib/cluster-usage/per-node.ts new file mode 100644 index 0000000..921a7b5 --- /dev/null +++ b/apps/console/src/lib/cluster-usage/per-node.ts @@ -0,0 +1,143 @@ +import { parseQuantity } from "../k8s-quantity.ts" +import { formatAge } from "../status.ts" +import { + STANDARD_RESOURCE_KEYS, + STANDARD_RESOURCE_KEY_SET, + isExtendedResourceKey, +} from "./types.ts" +import type { + Node, + NodeMetrics, + NodeRow, + Pod, + ResourceTotals, + StandardResourceKey, +} from "./types.ts" + +const PRESSURE_TYPES = new Set([ + "MemoryPressure", + "DiskPressure", + "PIDPressure", + "NetworkUnavailable", +]) + +function rolesFromLabels(labels: Record | undefined): string[] { + if (!labels) return [] + const roles = new Set() + const PREFIX = "node-role.kubernetes.io/" + for (const key of Object.keys(labels)) { + if (key.startsWith(PREFIX)) { + const role = key.slice(PREFIX.length) + // Some clusters write `node-role.kubernetes.io/=...` with an empty + // role part; skip those to avoid an empty pill in the UI. + if (role.length > 0) roles.add(role) + } + } + if (roles.size === 0) { + const legacy = labels["kubernetes.io/role"] + if (legacy) roles.add(legacy) + } + return [...roles] +} + +function emptyTotals(): ResourceTotals { + return { capacity: 0, allocatable: 0, requested: 0 } +} + +/** + * Builds one NodeRow per cluster node, sorted by name. Each row carries + * the totals for that node only — capacity and allocatable from + * node.status, requested summed from pods bound to that node, and used + * from the matching NodeMetrics entry when metrics are supplied. + * + * Pods without a spec.nodeName or scheduled on an unknown node are + * skipped so per-node requested totals stay consistent with the + * cluster-wide aggregate computed by aggregateNodeResources. + */ +export function derivePerNodeRows( + nodes: Node[], + pods: Pod[], + metrics: NodeMetrics[] | undefined, +): NodeRow[] { + const metricsByName = new Map() + for (const m of metrics ?? []) { + metricsByName.set(m.metadata.name, m) + } + + const podsByNode = new Map() + for (const pod of pods) { + const nodeName = pod.spec?.nodeName + if (!nodeName) continue + const bucket = podsByNode.get(nodeName) ?? [] + bucket.push(pod) + podsByNode.set(nodeName, bucket) + } + + const rows: NodeRow[] = [] + for (const node of nodes) { + const capacity = node.status?.capacity ?? {} + const allocatable = node.status?.allocatable ?? {} + const standard: Record = { + cpu: emptyTotals(), + memory: emptyTotals(), + "ephemeral-storage": emptyTotals(), + pods: emptyTotals(), + } + const extended: Record = {} + + for (const key of STANDARD_RESOURCE_KEYS) { + standard[key].capacity = parseQuantity(capacity[key] ?? "0") + standard[key].allocatable = parseQuantity(allocatable[key] ?? "0") + } + for (const key of Object.keys(capacity)) { + if (!isExtendedResourceKey(key)) continue + extended[key] = { + capacity: parseQuantity(capacity[key] ?? "0"), + allocatable: parseQuantity(allocatable[key] ?? "0"), + requested: 0, + } + } + + for (const pod of podsByNode.get(node.metadata.name) ?? []) { + for (const container of pod.spec?.containers ?? []) { + const requests = container.resources?.requests + if (!requests) continue + for (const [key, value] of Object.entries(requests)) { + if (STANDARD_RESOURCE_KEY_SET.has(key)) { + standard[key as StandardResourceKey].requested += parseQuantity(value) + } else if (extended[key]) { + extended[key].requested += parseQuantity(value) + } + } + } + } + + const metric = metricsByName.get(node.metadata.name) + if (metric) { + standard.cpu.used = parseQuantity(metric.usage?.cpu ?? "0") + standard.memory.used = parseQuantity(metric.usage?.memory ?? "0") + } + + const conditions = node.status?.conditions ?? [] + const readyCondition = conditions.find((c) => c.type === "Ready") + const pressureConditions = conditions + .filter((c) => PRESSURE_TYPES.has(c.type) && c.status === "True") + .map((c) => c.type) + + rows.push({ + name: node.metadata.name, + ready: readyCondition?.status === "True", + schedulable: !node.spec?.unschedulable, + pressureConditions, + roles: rolesFromLabels(node.metadata.labels), + taints: (node.spec?.taints ?? []).map((t) => t.key), + age: formatAge(node.metadata.creationTimestamp), + creationTimestamp: node.metadata.creationTimestamp, + standard, + extended, + }) + } + + rows.sort((a, b) => a.name.localeCompare(b.name)) + return rows +} diff --git a/apps/console/src/lib/cluster-usage/types.ts b/apps/console/src/lib/cluster-usage/types.ts new file mode 100644 index 0000000..5dd049c --- /dev/null +++ b/apps/console/src/lib/cluster-usage/types.ts @@ -0,0 +1,123 @@ +import type { K8sResource } from "@cozystack/k8s-client" + +/** + * Minimal Kubernetes Node shape needed by the cluster-usage page. Only + * the fields the page actually reads are declared; the rest of the K8s + * Node object is ignored. Status fields are optional to match the + * realistic case where a NotReady node may not have populated all of + * its capacity / allocatable map yet. + */ + +export interface NodeCondition { + type: string + status: "True" | "False" | "Unknown" + reason?: string + message?: string + lastTransitionTime?: string +} + +export interface NodeStatus { + capacity?: Record + allocatable?: Record + conditions?: NodeCondition[] +} + +export interface NodeTaint { + key: string + value?: string + effect: string + timeAdded?: string +} + +export interface NodeSpec { + unschedulable?: boolean + taints?: NodeTaint[] + providerID?: string +} + +export type Node = K8sResource + +export interface PodContainer { + name: string + resources?: { + requests?: Record + limits?: Record + } +} + +export interface PodSpec { + nodeName?: string + containers: PodContainer[] +} + +export interface PodStatus { + phase?: string +} + +export type Pod = K8sResource + +export interface NodeMetricsUsage { + cpu: string + memory: string +} + +export type NodeMetrics = K8sResource & { + usage?: NodeMetricsUsage + timestamp?: string + window?: string +} + +/** + * Standard, well-known resource keys present in node.status.capacity. + * Everything else is treated as an extended resource and rendered + * verbatim by the cluster-usage page. + */ +export const STANDARD_RESOURCE_KEYS = ["cpu", "memory", "ephemeral-storage", "pods"] as const + +export type StandardResourceKey = (typeof STANDARD_RESOURCE_KEYS)[number] + +export const STANDARD_RESOURCE_KEY_SET: ReadonlySet = new Set(STANDARD_RESOURCE_KEYS) + +/** + * Whether a key from `node.status.capacity` should be treated as an + * extended resource. Standard scheduler resources and every hugepages-* + * variant return false; everything else returns true. + */ +export function isExtendedResourceKey(key: string): boolean { + if (STANDARD_RESOURCE_KEY_SET.has(key)) return false + if (key.startsWith("hugepages-")) return false + return true +} + +/** A resource snapshot in canonical units — cores for CPU, bytes elsewhere. */ +export interface ResourceTotals { + capacity: number + allocatable: number + requested: number + /** Present only when metrics.k8s.io reported a usage figure for this resource. */ + used?: number +} + +export interface AggregateResources { + /** Standard resources keyed by their canonical name. */ + standard: Record + /** Extended resources keyed by their full Kubernetes key (e.g. `nvidia.com/gpu`). */ + extended: Record +} + +export interface NodeRow { + name: string + ready: boolean + schedulable: boolean + /** Free-form condition types found with status=True, e.g. MemoryPressure. */ + pressureConditions: string[] + /** Roles inferred from `node-role.kubernetes.io/*` and `kubernetes.io/role` labels. */ + roles: string[] + taints: string[] + age: string + creationTimestamp?: string + /** Standard resource totals on this single node. */ + standard: Record + /** Extended resource totals on this single node, keyed by full key. */ + extended: Record +} diff --git a/apps/console/src/lib/k8s-quantity.test.ts b/apps/console/src/lib/k8s-quantity.test.ts new file mode 100644 index 0000000..f598818 --- /dev/null +++ b/apps/console/src/lib/k8s-quantity.test.ts @@ -0,0 +1,122 @@ +import { describe, it, expect } from "vitest" +import { parseQuantity, humanizeBytes, humanizeCpu } from "./k8s-quantity.ts" + +describe("parseQuantity", () => { + it("returns 0 for the empty string", () => { + expect(parseQuantity("")).toBe(0) + }) + + it("parses milli suffix to a fractional value", () => { + expect(parseQuantity("500m")).toBe(0.5) + }) + + it("parses milli values greater than one core", () => { + expect(parseQuantity("1500m")).toBe(1.5) + }) + + it("parses Ki as 1024", () => { + expect(parseQuantity("1Ki")).toBe(1024) + }) + + it("parses Mi as 1024 squared", () => { + expect(parseQuantity("1Mi")).toBe(1024 ** 2) + }) + + it("parses Gi as 1024 cubed", () => { + expect(parseQuantity("1Gi")).toBe(1024 ** 3) + }) + + it("parses Ti as 1024 to the fourth", () => { + expect(parseQuantity("1Ti")).toBe(1024 ** 4) + }) + + it("parses Pi as 1024 to the fifth", () => { + expect(parseQuantity("1Pi")).toBe(1024 ** 5) + }) + + it("parses Ei as 1024 to the sixth", () => { + expect(parseQuantity("1Ei")).toBe(1024 ** 6) + }) + + it("parses decimal k suffix as 1000", () => { + expect(parseQuantity("1k")).toBe(1000) + }) + + it("parses decimal M suffix as 1000 squared", () => { + expect(parseQuantity("1M")).toBe(1_000_000) + }) + + it("parses decimal G suffix as 1000 cubed", () => { + expect(parseQuantity("1G")).toBe(1_000_000_000) + }) + + it("parses a bare integer", () => { + expect(parseQuantity("42")).toBe(42) + }) + + it("parses a bare decimal", () => { + expect(parseQuantity("1.5")).toBe(1.5) + }) + + it("parses a fractional Gi value", () => { + expect(parseQuantity("1.5Gi")).toBe(1.5 * 1024 ** 3) + }) + + it("falls back to 0 for unparseable input", () => { + expect(parseQuantity("abc")).toBe(0) + }) + + it("returns 0 for a bare suffix instead of poisoning totals with NaN", () => { + // A malformed quantity (just a suffix, no number) must not propagate NaN + // into the aggregated totals and UI percentages. + expect(parseQuantity("m")).toBe(0) + expect(parseQuantity("Gi")).toBe(0) + expect(parseQuantity("Ki")).toBe(0) + }) + + it("parses zero", () => { + expect(parseQuantity("0")).toBe(0) + }) + + it("parses zero with a suffix", () => { + expect(parseQuantity("0Gi")).toBe(0) + }) +}) + +describe("humanizeBytes", () => { + it("formats sub-kilobyte values with a B suffix", () => { + expect(humanizeBytes(0)).toBe("0B") + expect(humanizeBytes(1023)).toBe("1023B") + }) + + it("formats megabytes as Mi without decimals", () => { + expect(humanizeBytes(1024 ** 2)).toBe("1Mi") + }) + + it("formats gigabytes as Gi with one decimal", () => { + expect(humanizeBytes(1.5 * 1024 ** 3)).toBe("1.5Gi") + }) + + it("formats terabytes as Ti with one decimal", () => { + expect(humanizeBytes(1024 ** 4)).toBe("1.0Ti") + }) +}) + +describe("humanizeCpu", () => { + it("formats zero as 0m", () => { + expect(humanizeCpu(0)).toBe("0m") + }) + + it("formats half a core as 500m", () => { + expect(humanizeCpu(0.5)).toBe("500m") + }) + + it("formats an integer core count without decimals", () => { + expect(humanizeCpu(1)).toBe("1") + expect(humanizeCpu(2)).toBe("2") + }) + + it("formats a non-integer core count with two decimals", () => { + expect(humanizeCpu(1.5)).toBe("1.50") + }) +}) diff --git a/apps/console/src/lib/k8s-quantity.ts b/apps/console/src/lib/k8s-quantity.ts new file mode 100644 index 0000000..7a2132a --- /dev/null +++ b/apps/console/src/lib/k8s-quantity.ts @@ -0,0 +1,38 @@ +/** + * Parse a Kubernetes resource.Quantity string into a numeric value in + * the canonical units (cores for CPU, bytes for memory). Behaviour is + * preserved verbatim from the QuotaDisplay helpers this module was + * extracted from; see the test file for the pinned edge cases. + */ +export function parseQuantity(s: string): number { + if (!s) return 0 + // A malformed quantity (e.g. a bare suffix like "m") parses to NaN, which + // would poison every total and percentage it feeds into. Treat it as 0. + const n = parseFloat(s) + if (!Number.isFinite(n)) return 0 + if (s.endsWith("m")) return n / 1000 + // Binary SI suffixes (powers of 1024) + if (s.endsWith("Ki")) return n * 1024 + if (s.endsWith("Mi")) return n * 1024 ** 2 + if (s.endsWith("Gi")) return n * 1024 ** 3 + if (s.endsWith("Ti")) return n * 1024 ** 4 + if (s.endsWith("Pi")) return n * 1024 ** 5 + if (s.endsWith("Ei")) return n * 1024 ** 6 + // Decimal SI suffixes (powers of 1000) — Kubernetes uses lowercase k + if (s.endsWith("k")) return n * 1000 + if (s.endsWith("M")) return n * 1000 ** 2 + if (s.endsWith("G")) return n * 1000 ** 3 + return n +} + +export function humanizeBytes(bytes: number): string { + if (bytes >= 1024 ** 4) return `${(bytes / 1024 ** 4).toFixed(1)}Ti` + if (bytes >= 1024 ** 3) return `${(bytes / 1024 ** 3).toFixed(1)}Gi` + if (bytes >= 1024 ** 2) return `${(bytes / 1024 ** 2).toFixed(0)}Mi` + return `${bytes}B` +} + +export function humanizeCpu(val: number): string { + if (val < 1) return `${Math.round(val * 1000)}m` + return `${val % 1 === 0 ? val : val.toFixed(2)}` +} diff --git a/apps/console/src/routes/ClusterUsagePage.test.tsx b/apps/console/src/routes/ClusterUsagePage.test.tsx new file mode 100644 index 0000000..3275004 --- /dev/null +++ b/apps/console/src/routes/ClusterUsagePage.test.tsx @@ -0,0 +1,175 @@ +import { describe, it, expect, vi } from "vitest" +import { screen, waitFor } from "@testing-library/react" +import { + K8sClient, + K8sApiError, + type APIGroupList, + type K8sList, +} from "@cozystack/k8s-client" +import { ClusterUsagePage } from "./ClusterUsagePage.tsx" +import { renderWithK8sProvider } from "../test-utils/render.tsx" +import { nodesListFixture } from "../test-utils/fixtures/nodes.ts" +import { podsListFixture } from "../test-utils/fixtures/pods.ts" +import { nodeMetricsListFixture } from "../test-utils/fixtures/node-metrics.ts" + +const groupsWithMetrics: APIGroupList = { + kind: "APIGroupList", + apiVersion: "v1", + groups: [ + { + name: "metrics.k8s.io", + versions: [{ groupVersion: "metrics.k8s.io/v1beta1", version: "v1beta1" }], + preferredVersion: { groupVersion: "metrics.k8s.io/v1beta1", version: "v1beta1" }, + }, + ], +} + +const groupsWithoutMetrics: APIGroupList = { + kind: "APIGroupList", + apiVersion: "v1", + groups: [], +} + +function makeClient( + config: { + nodes?: K8sList | K8sApiError | "pending" + pods?: K8sList | K8sApiError + metrics?: K8sList | K8sApiError + groups?: APIGroupList + } = {}, +): K8sClient { + const client = new K8sClient() + vi.spyOn(client, "list").mockImplementation(async (g, _v, plural) => { + if (g === "metrics.k8s.io") { + if (config.metrics instanceof K8sApiError) throw config.metrics + return (config.metrics ?? { + apiVersion: "metrics.k8s.io/v1beta1", + kind: "NodeMetricsList", + metadata: {}, + items: [], + }) as K8sList + } + if (plural === "nodes") { + if (config.nodes === "pending") return new Promise(() => ({})) as never + if (config.nodes instanceof K8sApiError) throw config.nodes + return (config.nodes ?? { + apiVersion: "v1", + kind: "NodeList", + metadata: {}, + items: [], + }) as K8sList + } + if (plural === "pods") { + if (config.pods instanceof K8sApiError) throw config.pods + return (config.pods ?? { + apiVersion: "v1", + kind: "PodList", + metadata: {}, + items: [], + }) as K8sList + } + return { apiVersion: "v1", kind: `${plural}List`, metadata: {}, items: [] } + }) + vi.spyOn(client, "getApiGroups").mockResolvedValue( + config.groups ?? groupsWithoutMetrics, + ) + return client +} + +describe("ClusterUsagePage", () => { + it("renders a spinner while nodes are loading", () => { + const client = makeClient({ nodes: "pending" }) + renderWithK8sProvider(, { client }) + expect(screen.getByText(/loading/i)).toBeInTheDocument() + }) + + it("renders both panels on a healthy cluster with metrics", async () => { + const client = makeClient({ + nodes: nodesListFixture, + pods: podsListFixture, + metrics: nodeMetricsListFixture, + groups: groupsWithMetrics, + }) + renderWithK8sProvider(, { client }) + expect(await screen.findByText("Cluster Usage")).toBeInTheDocument() + // "CPU" appears in both the aggregate card and the table column header, + // so assert via the aggregate-specific "Allocatable" label instead. + expect(await screen.findAllByText(/allocatable/i)).not.toHaveLength(0) + expect(await screen.findByText("worker-gpu-1")).toBeInTheDocument() + }) + + it("renders the empty state when no nodes exist", async () => { + const client = makeClient({ + nodes: { + apiVersion: "v1", + kind: "NodeList", + metadata: {}, + items: [], + } as K8sList, + pods: { + apiVersion: "v1", + kind: "PodList", + metadata: {}, + items: [], + } as K8sList, + }) + renderWithK8sProvider(, { client }) + expect(await screen.findByText(/no nodes found/i)).toBeInTheDocument() + }) + + it("renders an error block when the nodes-list call fails", async () => { + const client = makeClient({ nodes: new K8sApiError(500, "server error") }) + renderWithK8sProvider(, { client }) + await waitFor(() => { + expect(screen.getByText(/failed to load cluster nodes/i)).toBeInTheDocument() + }) + }) + + it("renders a permission-denied block with a back link on 403", async () => { + const client = makeClient({ nodes: new K8sApiError(403, "forbidden") }) + renderWithK8sProvider(, { client }) + expect( + await screen.findByText(/you do not have permission to view cluster nodes/i), + ).toBeInTheDocument() + const back = screen.getByRole("link", { name: /back to console/i }) + expect(back.getAttribute("href")).toBe("/console") + }) + + it("propagates pods-unavailable to the aggregate panel and the table", async () => { + const client = makeClient({ + nodes: nodesListFixture, + pods: new K8sApiError(403, "no pod read"), + groups: groupsWithoutMetrics, + }) + renderWithK8sProvider(, { client }) + await screen.findAllByText(/allocatable/i) + expect( + screen.getAllByTitle("Requires cluster-wide pod read access").length, + ).toBeGreaterThan(0) + }) + + it("renders the node-summary line in the aggregates header", async () => { + const client = makeClient({ + nodes: nodesListFixture, + pods: podsListFixture, + groups: groupsWithoutMetrics, + }) + renderWithK8sProvider(, { client }) + await screen.findByText("3 nodes") + expect( + screen.getByText(/3 Ready · 0 NotReady · 0 SchedulingDisabled/), + ).toBeInTheDocument() + }) + + it("omits the Used line everywhere when metrics-server is not registered", async () => { + const client = makeClient({ + nodes: nodesListFixture, + pods: podsListFixture, + groups: groupsWithoutMetrics, + }) + renderWithK8sProvider(, { client }) + // Wait for the page to settle by waiting on an aggregate-card label. + await screen.findAllByText(/allocatable/i) + expect(screen.queryByText(/used/i)).toBeNull() + }) +}) diff --git a/apps/console/src/routes/ClusterUsagePage.tsx b/apps/console/src/routes/ClusterUsagePage.tsx new file mode 100644 index 0000000..b59f877 --- /dev/null +++ b/apps/console/src/routes/ClusterUsagePage.tsx @@ -0,0 +1,88 @@ +import { Link } from "react-router" +import { Section, Spinner } from "@cozystack/ui" +import { useClusterUsageData } from "../hooks/useClusterUsageData.tsx" +import { ClusterUsageAggregates } from "../components/cluster-usage/ClusterUsageAggregates.tsx" +import { ClusterUsageTable } from "../components/cluster-usage/ClusterUsageTable.tsx" + +/** + * Administration → Cluster Usage. Single cluster-scoped page that + * renders aggregate utilisation on top and a per-node table below. + * Both panels read from the same useClusterUsageData composite hook, + * so they always agree on totals. + * + * Tenant-scoped users never reach this page through normal navigation + * because the sidebar entry is gated by a SelfSubjectAccessReview on + * `nodes list`. On direct URL navigation a 403 message with a link + * back to the console is shown instead of a browser 403; richer + * page-level fallbacks (read-only view via cached metrics, etc.) are + * explicitly out of scope for the first iteration. + */ +export function ClusterUsagePage() { + const { + nodes, + perNode, + aggregates, + nodeSummary, + isLoading, + error, + errorStatus, + podsUnavailable, + } = useClusterUsageData() + const extendedKeys = Object.keys(aggregates.extended).sort() + + return ( +
+
+

Cluster Usage

+

+ Cluster-scoped capacity, allocation and usage across all nodes, + including any discovered extended resources. +

+
+ {isLoading ? ( +
+ Loading… +
+ ) : error ? ( +
+ {errorStatus === 403 ? ( +
+ You do not have permission to view cluster nodes.{" "} + + Back to console + + . +
+ ) : ( +
+ Failed to load cluster nodes: {error.message} +
+ )} +
+ ) : nodes.length === 0 ? ( +
+

No nodes found.

+
+ ) : ( + <> + +
+

Nodes

+ +
+ + )} +
+ ) +} diff --git a/apps/console/src/routes/ConsolePage.routing.test.tsx b/apps/console/src/routes/ConsolePage.routing.test.tsx new file mode 100644 index 0000000..7a4264c --- /dev/null +++ b/apps/console/src/routes/ConsolePage.routing.test.tsx @@ -0,0 +1,53 @@ +import { describe, it, expect, vi } from "vitest" +import { screen } from "@testing-library/react" +import { + K8sClient, + type K8sList, + type APIGroupList, +} from "@cozystack/k8s-client" +import { ConsolePage } from "./ConsolePage.tsx" +import { renderWithK8sProvider } from "../test-utils/render.tsx" + +function makeClient(): K8sClient { + const client = new K8sClient() + vi.spyOn(client, "list").mockImplementation(async (_g, _v, plural) => { + if (plural === "tenantnamespaces") { + return { + apiVersion: "core.cozystack.io/v1alpha1", + kind: "TenantNamespaceList", + metadata: {}, + items: [], + } as K8sList + } + return { + apiVersion: "v1", + kind: `${plural}List`, + metadata: {}, + items: [], + } as K8sList + }) + vi.spyOn(client, "getApiGroups").mockResolvedValue({ + kind: "APIGroupList", + apiVersion: "v1", + groups: [], + } as APIGroupList) + vi.spyOn(client, "create").mockResolvedValue({ + apiVersion: "authorization.k8s.io/v1", + kind: "SelfSubjectAccessReview", + metadata: { name: "" }, + spec: {}, + status: { allowed: false }, + } as unknown) + return client +} + +describe("ConsolePage routing", () => { + it("renders ClusterUsagePage at /cluster-usage", async () => { + const client = makeClient() + renderWithK8sProvider(, { + client, + initialRoute: "/cluster-usage", + }) + expect(await screen.findByText("Cluster Usage")).toBeInTheDocument() + }) +}) diff --git a/apps/console/src/routes/ConsolePage.tsx b/apps/console/src/routes/ConsolePage.tsx index dfb4781..5e0a249 100644 --- a/apps/console/src/routes/ConsolePage.tsx +++ b/apps/console/src/routes/ConsolePage.tsx @@ -4,6 +4,7 @@ import { TenantsPage } from "./TenantsPage.tsx" import { ModulesPage } from "./ModulesPage.tsx" import { ExternalIpsPage } from "./ExternalIpsPage.tsx" import { InfoRedirect } from "./InfoRedirect.tsx" +import { ClusterUsagePage } from "./ClusterUsagePage.tsx" import { ApplicationListPage } from "./ApplicationListPage.tsx" import { ApplicationDetailPage } from "./detail/ApplicationDetailPage.tsx" import { ApplicationEditRoute } from "./detail/ApplicationEditRoute.tsx" @@ -23,6 +24,7 @@ export function ConsolePage() { } /> } /> } /> + } /> } diff --git a/apps/console/src/routes/sidebar-sections.test.tsx b/apps/console/src/routes/sidebar-sections.test.tsx new file mode 100644 index 0000000..783d3e2 --- /dev/null +++ b/apps/console/src/routes/sidebar-sections.test.tsx @@ -0,0 +1,117 @@ +import { describe, it, expect, vi } from "vitest" +import { renderHook, waitFor } from "@testing-library/react" +import { QueryClient, QueryClientProvider } from "@tanstack/react-query" +import { + K8sClient, + K8sProvider, + K8sApiError, + type K8sList, + type SelfSubjectAccessReview, +} from "@cozystack/k8s-client" +import type { ReactNode } from "react" +import { useConsoleSidebarSections } from "./sidebar-sections.tsx" + +const emptyAppDefList: K8sList = { + apiVersion: "cozystack.io/v1alpha1", + kind: "ApplicationDefinitionList", + metadata: {}, + items: [], +} + +function ssarResponse(allowed: boolean): SelfSubjectAccessReview { + return { + apiVersion: "authorization.k8s.io/v1", + kind: "SelfSubjectAccessReview", + metadata: { name: "" }, + spec: { resourceAttributes: { resource: "nodes", verb: "list" } }, + status: { allowed }, + } +} + +interface ClientConfig { + ssar?: SelfSubjectAccessReview | "pending" | K8sApiError +} + +function makeClient(config: ClientConfig = {}): K8sClient { + const client = new K8sClient() + vi.spyOn(client, "list").mockResolvedValue(emptyAppDefList as K8sList) + vi.spyOn(client, "create").mockImplementation(async () => { + if (config.ssar === "pending") return new Promise(() => ({})) as never + if (config.ssar instanceof K8sApiError) throw config.ssar + return (config.ssar ?? ssarResponse(false)) as unknown + }) + return client +} + +function makeWrapper(client: K8sClient) { + const queryClient = new QueryClient({ + defaultOptions: { queries: { retry: false, gcTime: 0 } }, + }) + return function Wrapper({ children }: { children: ReactNode }) { + return ( + + + {children} + + + ) + } +} + +function findItem(sections: ReturnType, label: string) { + for (const section of sections) { + const found = section.items.find((i) => i.label === label) + if (found) return found + } + return undefined +} + +describe("useConsoleSidebarSections — Cluster Usage gate", () => { + it("renders the Cluster Usage entry when SSAR allows nodes list", async () => { + const client = makeClient({ ssar: ssarResponse(true) }) + const { result } = renderHook(() => useConsoleSidebarSections(), { + wrapper: makeWrapper(client), + }) + await waitFor(() => + expect(findItem(result.current, "Cluster Usage")).toBeDefined(), + ) + expect(findItem(result.current, "Cluster Usage")?.to).toBe( + "/console/cluster-usage", + ) + }) + + it("hides the Cluster Usage entry when SSAR denies nodes list", async () => { + const client = makeClient({ ssar: ssarResponse(false) }) + const { result } = renderHook(() => useConsoleSidebarSections(), { + wrapper: makeWrapper(client), + }) + // Wait until the SSAR request has actually fired (so the absence is the + // result of a deny, not of the query still being in flight) and the + // gated entry is not present. + await waitFor(() => { + expect(client.create).toHaveBeenCalled() + expect(findItem(result.current, "Cluster Usage")).toBeUndefined() + }) + }) + + it("hides the Cluster Usage entry while SSAR is still loading (no flicker)", () => { + const client = makeClient({ ssar: "pending" }) + const { result } = renderHook(() => useConsoleSidebarSections(), { + wrapper: makeWrapper(client), + }) + expect(findItem(result.current, "Cluster Usage")).toBeUndefined() + }) + + it("hides the Cluster Usage entry on SSAR error", async () => { + const client = makeClient({ ssar: new K8sApiError(500, "boom") }) + const { result } = renderHook(() => useConsoleSidebarSections(), { + wrapper: makeWrapper(client), + }) + // Wait until the failing SSAR request has fired and settled; the gated + // entry must stay absent rather than relying on an arbitrary delay. + await waitFor(() => { + expect(client.create).toHaveBeenCalled() + expect(findItem(result.current, "Cluster Usage")).toBeUndefined() + }) + }) +}) diff --git a/apps/console/src/routes/sidebar-sections.tsx b/apps/console/src/routes/sidebar-sections.tsx index 3e678b2..a37326a 100644 --- a/apps/console/src/routes/sidebar-sections.tsx +++ b/apps/console/src/routes/sidebar-sections.tsx @@ -3,6 +3,7 @@ import { Archive, Cloud, Database, + Gauge, Globe, Info, LayoutGrid, @@ -13,6 +14,7 @@ import { type LucideIcon, } from "lucide-react" import type { SidebarSection } from "@cozystack/ui" +import { useSelfSubjectAccessReview } from "@cozystack/k8s-client" import { useApplicationDefinitions, groupByCategory } from "../lib/app-definitions.ts" import { humanizeKind } from "../lib/humanize.ts" import { @@ -69,6 +71,17 @@ export function useMarketplaceSidebarSections(): SidebarSection[] { export function useConsoleSidebarSections(): SidebarSection[] { const { data } = useApplicationDefinitions() const grouped = useMemo(() => groupByCategory(data), [data]) + // Permission gate for the Cluster Usage entry: only operators with + // cluster-wide nodes/list see the menu item. Loading and error states + // resolve as "not allowed" so the entry never flickers in then out + // for users who can't see it. + const clusterUsageReview = useSelfSubjectAccessReview({ + resourceAttributes: { resource: "nodes", verb: "list" }, + }) + const canSeeClusterUsage = + !clusterUsageReview.isLoading && + !clusterUsageReview.error && + clusterUsageReview.allowed return useMemo(() => { const sorted = [...grouped] @@ -109,6 +122,9 @@ export function useConsoleSidebarSections(): SidebarSection[] { const administrationSection: SidebarSection = { title: "Administration", items: [ + ...(canSeeClusterUsage + ? [{ label: "Cluster Usage", to: "/console/cluster-usage", icon: Gauge }] + : []), { label: "Info", to: "/console/info", icon: Info }, { label: "Modules", to: "/console/modules", icon: ToyBrick }, { label: "External IPs", to: "/console/external-ips", icon: Globe }, @@ -117,5 +133,5 @@ export function useConsoleSidebarSections(): SidebarSection[] { } return [...categorySections, backupsSection, administrationSection] - }, [grouped]) + }, [grouped, canSeeClusterUsage]) } diff --git a/apps/console/src/test-utils/fixtures/node-metrics.ts b/apps/console/src/test-utils/fixtures/node-metrics.ts new file mode 100644 index 0000000..6813ae7 --- /dev/null +++ b/apps/console/src/test-utils/fixtures/node-metrics.ts @@ -0,0 +1,54 @@ +import type { K8sList, K8sResource } from "@cozystack/k8s-client" + +interface NodeMetricsUsage { + cpu: string + memory: string +} + +interface NodeMetricsFields { + usage?: NodeMetricsUsage + timestamp?: string + window?: string +} + +export type NodeMetricsFixture = K8sResource & NodeMetricsFields + +/** + * NodeMetrics entries matching the node fixtures. metrics.k8s.io reports + * actual usage as Kubernetes quantities just like capacity, so the same + * parseQuantity path renders both numbers. + */ + +export const metricsControlPlane: NodeMetricsFixture = { + apiVersion: "metrics.k8s.io/v1beta1", + kind: "NodeMetrics", + metadata: { name: "cp-1" }, + usage: { cpu: "150m", memory: "1500Mi" }, + timestamp: "2026-05-26T17:00:00Z", + window: "30s", +} + +export const metricsWorker: NodeMetricsFixture = { + apiVersion: "metrics.k8s.io/v1beta1", + kind: "NodeMetrics", + metadata: { name: "worker-1" }, + usage: { cpu: "400m", memory: "8Gi" }, + timestamp: "2026-05-26T17:00:00Z", + window: "30s", +} + +export const metricsGpuWorker: NodeMetricsFixture = { + apiVersion: "metrics.k8s.io/v1beta1", + kind: "NodeMetrics", + metadata: { name: "worker-gpu-1" }, + usage: { cpu: "2", memory: "20Gi" }, + timestamp: "2026-05-26T17:00:00Z", + window: "30s", +} + +export const nodeMetricsListFixture: K8sList = { + apiVersion: "metrics.k8s.io/v1beta1", + kind: "NodeMetricsList", + metadata: { resourceVersion: "300" }, + items: [metricsControlPlane, metricsWorker, metricsGpuWorker], +} diff --git a/apps/console/src/test-utils/fixtures/nodes.ts b/apps/console/src/test-utils/fixtures/nodes.ts new file mode 100644 index 0000000..8a7c734 --- /dev/null +++ b/apps/console/src/test-utils/fixtures/nodes.ts @@ -0,0 +1,86 @@ +import type { K8sList, K8sResource } from "@cozystack/k8s-client" + +/** + * Three node fixtures cover the three shapes the cluster-usage page must + * render: control-plane (no extended resources), plain worker, worker + * with a single NVIDIA GPU. Numbers are small but realistic and chosen + * so hand-computed aggregates over the set stay obvious. + * + * creationTimestamp is fixed; tests that assert on Age must stub the + * clock via vi.setSystemTime. + */ + +interface NodeStatus { + capacity?: Record + allocatable?: Record + conditions?: Array<{ + type: string + status: "True" | "False" | "Unknown" + reason?: string + message?: string + }> +} + +export type NodeFixture = K8sResource + +export const nodeControlPlane: NodeFixture = { + apiVersion: "v1", + kind: "Node", + metadata: { + name: "cp-1", + creationTimestamp: "2026-01-01T00:00:00Z", + labels: { "node-role.kubernetes.io/control-plane": "" }, + }, + status: { + capacity: { cpu: "4", memory: "8Gi", "ephemeral-storage": "100Gi", pods: "110" }, + allocatable: { cpu: "4", memory: "8Gi", "ephemeral-storage": "100Gi", pods: "110" }, + conditions: [{ type: "Ready", status: "True" }], + }, +} + +export const nodeWorker: NodeFixture = { + apiVersion: "v1", + kind: "Node", + metadata: { + name: "worker-1", + creationTimestamp: "2026-02-01T00:00:00Z", + }, + status: { + capacity: { cpu: "8", memory: "32Gi", "ephemeral-storage": "500Gi", pods: "220" }, + allocatable: { cpu: "8", memory: "32Gi", "ephemeral-storage": "500Gi", pods: "220" }, + conditions: [{ type: "Ready", status: "True" }], + }, +} + +export const nodeGpuWorker: NodeFixture = { + apiVersion: "v1", + kind: "Node", + metadata: { + name: "worker-gpu-1", + creationTimestamp: "2026-03-01T00:00:00Z", + }, + status: { + capacity: { + cpu: "16", + memory: "64Gi", + "ephemeral-storage": "1Ti", + pods: "220", + "nvidia.com/gpu": "1", + }, + allocatable: { + cpu: "16", + memory: "64Gi", + "ephemeral-storage": "1Ti", + pods: "220", + "nvidia.com/gpu": "1", + }, + conditions: [{ type: "Ready", status: "True" }], + }, +} + +export const nodesListFixture: K8sList = { + apiVersion: "v1", + kind: "NodeList", + metadata: { resourceVersion: "100" }, + items: [nodeControlPlane, nodeWorker, nodeGpuWorker], +} diff --git a/apps/console/src/test-utils/fixtures/pods.ts b/apps/console/src/test-utils/fixtures/pods.ts new file mode 100644 index 0000000..6346b1c --- /dev/null +++ b/apps/console/src/test-utils/fixtures/pods.ts @@ -0,0 +1,98 @@ +import type { K8sList, K8sResource } from "@cozystack/k8s-client" + +interface PodSpec { + nodeName?: string + containers: Array<{ + name: string + resources?: { + requests?: Record + limits?: Record + } + }> +} + +interface PodStatus { + phase?: string +} + +export type PodFixture = K8sResource + +/** + * Pod fixtures aligned with the node fixtures: one bound to each node, + * one bound to the GPU worker requesting one nvidia.com/gpu, and one + * unscheduled pod (no spec.nodeName) so the per-node aggregator can be + * verified to skip it correctly. Requests are deliberately small so the + * sums are obvious by inspection. + */ + +export const podOnControlPlane: PodFixture = { + apiVersion: "v1", + kind: "Pod", + metadata: { name: "system-pod", namespace: "kube-system" }, + spec: { + nodeName: "cp-1", + containers: [ + { + name: "main", + resources: { requests: { cpu: "200m", memory: "256Mi" } }, + }, + ], + }, + status: { phase: "Running" }, +} + +export const podOnWorker: PodFixture = { + apiVersion: "v1", + kind: "Pod", + metadata: { name: "tenant-pod", namespace: "tenant-root" }, + spec: { + nodeName: "worker-1", + containers: [ + { + name: "app", + resources: { requests: { cpu: "500m", memory: "1Gi" } }, + }, + ], + }, + status: { phase: "Running" }, +} + +export const podOnGpuWorker: PodFixture = { + apiVersion: "v1", + kind: "Pod", + metadata: { name: "gpu-pod", namespace: "tenant-ml" }, + spec: { + nodeName: "worker-gpu-1", + containers: [ + { + name: "model", + resources: { + requests: { cpu: "100m", memory: "128Mi", "nvidia.com/gpu": "1" }, + }, + }, + ], + }, + status: { phase: "Running" }, +} + +export const podUnscheduled: PodFixture = { + apiVersion: "v1", + kind: "Pod", + metadata: { name: "pending-pod", namespace: "default" }, + spec: { + containers: [ + { + name: "main", + resources: { requests: { cpu: "1", memory: "2Gi" } }, + }, + ], + }, + status: { phase: "Pending" }, +} + +export const podsListFixture: K8sList = { + apiVersion: "v1", + kind: "PodList", + metadata: { resourceVersion: "200" }, + items: [podOnControlPlane, podOnWorker, podOnGpuWorker, podUnscheduled], +} diff --git a/apps/console/src/test-utils/mock-k8s-client.ts b/apps/console/src/test-utils/mock-k8s-client.ts new file mode 100644 index 0000000..16a207f --- /dev/null +++ b/apps/console/src/test-utils/mock-k8s-client.ts @@ -0,0 +1,78 @@ +import { vi } from "vitest" +import { K8sClient, K8sApiError, type K8sList } from "@cozystack/k8s-client" + +interface ListOverride { + apiGroup: string + apiVersion: string + plural: string + namespace?: string + result: K8sList | (() => K8sList | Promise>) | K8sApiError +} + +interface GetOverride { + apiGroup: string + apiVersion: string + plural: string + name: string + namespace?: string + result: unknown | (() => unknown | Promise) | K8sApiError +} + +export interface MockK8sClientOverrides { + lists?: ListOverride[] + gets?: GetOverride[] +} + +/** + * Build a K8sClient instance whose network-facing methods (list/get/watch) + * resolve from in-memory overrides instead of fetch. The underlying object + * is a real K8sClient so any method this factory does not stub — including + * ones added to the production class after this file was written — falls + * through to the real implementation; tests that touch new methods are + * expected to spy on them explicitly via vi.spyOn on the returned instance. + * + * Watch is stubbed to return a noop cleanup function. + */ +export function createMockK8sClient(overrides: MockK8sClientOverrides = {}): K8sClient { + const client = new K8sClient({ baseUrl: "/mock" }) + + vi.spyOn(client, "list").mockImplementation( + async (apiGroup, apiVersion, plural, namespace) => { + const match = overrides.lists?.find( + (o) => + o.apiGroup === apiGroup && + o.apiVersion === apiVersion && + o.plural === plural && + (o.namespace ?? undefined) === (namespace ?? undefined), + ) + if (!match) { + return { apiVersion, kind: `${plural}List`, metadata: {}, items: [] } as K8sList + } + if (match.result instanceof K8sApiError) throw match.result + const value = typeof match.result === "function" ? await match.result() : match.result + return value as K8sList + }, + ) + + vi.spyOn(client, "get").mockImplementation( + async (apiGroup, apiVersion, plural, name, namespace) => { + const match = overrides.gets?.find( + (o) => + o.apiGroup === apiGroup && + o.apiVersion === apiVersion && + o.plural === plural && + o.name === name && + (o.namespace ?? undefined) === (namespace ?? undefined), + ) + if (!match) { + throw new K8sApiError(404, { message: `mock: no get override for ${plural}/${name}` }) + } + if (match.result instanceof K8sApiError) throw match.result + return typeof match.result === "function" ? await match.result() : match.result + }, + ) + + vi.spyOn(client, "watch").mockReturnValue(() => {}) + + return client +} diff --git a/apps/console/src/test-utils/render.test.tsx b/apps/console/src/test-utils/render.test.tsx new file mode 100644 index 0000000..be842e2 --- /dev/null +++ b/apps/console/src/test-utils/render.test.tsx @@ -0,0 +1,72 @@ +import { describe, it, expect } from "vitest" +import { waitFor } from "@testing-library/react" +import { useK8sList } from "@cozystack/k8s-client" +import { createMockK8sClient } from "./mock-k8s-client.ts" +import { renderWithK8sProvider } from "./render.tsx" +import { nodesListFixture, type NodeFixture } from "./fixtures/nodes.ts" + +function NodeNameList() { + const { data, isLoading } = useK8sList( + { apiGroup: "", apiVersion: "v1", plural: "nodes" }, + { watch: false }, + ) + if (isLoading) return

loading

+ return ( +
    + {data?.items.map((n) =>
  • {n.metadata.name}
  • )} +
+ ) +} + +describe("renderWithK8sProvider", () => { + it("renders a component that consumes useK8sList against an injected mock", async () => { + const client = createMockK8sClient({ + lists: [ + { + apiGroup: "", + apiVersion: "v1", + plural: "nodes", + result: nodesListFixture, + }, + ], + }) + + const { findByText } = renderWithK8sProvider(, { client }) + + expect(await findByText("cp-1")).toBeInTheDocument() + expect(await findByText("worker-1")).toBeInTheDocument() + expect(await findByText("worker-gpu-1")).toBeInTheDocument() + }) + + it("routes the list call through the mock with the requested resource", async () => { + const client = createMockK8sClient({ + lists: [ + { + apiGroup: "", + apiVersion: "v1", + plural: "nodes", + result: nodesListFixture, + }, + ], + }) + + renderWithK8sProvider(, { client }) + + await waitFor(() => { + expect(client.list).toHaveBeenCalledWith( + "", + "v1", + "nodes", + undefined, + expect.any(Object), + ) + }) + }) + + it("returns the queryClient so tests can clear or inspect the cache", () => { + const client = createMockK8sClient() + const { queryClient } = renderWithK8sProvider(

hello

, { client }) + expect(queryClient).toBeDefined() + expect(queryClient.getQueryCache).toBeInstanceOf(Function) + }) +}) diff --git a/apps/console/src/test-utils/render.tsx b/apps/console/src/test-utils/render.tsx new file mode 100644 index 0000000..04adc5d --- /dev/null +++ b/apps/console/src/test-utils/render.tsx @@ -0,0 +1,31 @@ +import { render, type RenderResult } from "@testing-library/react" +import { QueryClient } from "@tanstack/react-query" +import { K8sProvider, type K8sClient } from "@cozystack/k8s-client" +import { MemoryRouter } from "react-router" +import type { ReactElement } from "react" + +export interface RenderWithK8sOptions { + client: K8sClient + initialRoute?: string +} + +/** + * Wraps a React tree in the minimum context needed to exercise K8s hooks + * in isolation: a fresh QueryClient with retries off and no garbage + * collection, the K8sProvider with the injected client, and a + * MemoryRouter so components that use react-router do not blow up. + */ +export function renderWithK8sProvider( + ui: ReactElement, + options: RenderWithK8sOptions, +): RenderResult & { queryClient: QueryClient } { + const queryClient = new QueryClient({ + defaultOptions: { queries: { retry: false, gcTime: 0 } }, + }) + const result = render( + + {ui} + , + ) + return Object.assign(result, { queryClient }) +} diff --git a/packages/k8s-client/src/client.ts b/packages/k8s-client/src/client.ts index ecfd2ed..c8d7a8c 100644 --- a/packages/k8s-client/src/client.ts +++ b/packages/k8s-client/src/client.ts @@ -189,6 +189,10 @@ export class K8sClient { }) } + getApiGroups(): Promise { + return this.request("/apis") + } + watch( apiGroup: string, apiVersion: string, @@ -319,3 +323,20 @@ export interface WatchEvent { type: "ADDED" | "MODIFIED" | "DELETED" | "BOOKMARK" | "ERROR" object: T } + +export interface APIGroupVersion { + groupVersion: string + version: string +} + +export interface APIGroup { + name: string + versions: APIGroupVersion[] + preferredVersion?: APIGroupVersion +} + +export interface APIGroupList { + kind: string + apiVersion: string + groups: APIGroup[] +} diff --git a/packages/k8s-client/src/index.ts b/packages/k8s-client/src/index.ts index eec4679..0865e8d 100644 --- a/packages/k8s-client/src/index.ts +++ b/packages/k8s-client/src/index.ts @@ -7,6 +7,9 @@ export type { K8sList, K8sCondition, WatchEvent, + APIGroup, + APIGroupList, + APIGroupVersion, } from "./client.ts" export { K8sProvider, useK8sClient, useConnectionError } from "./provider.tsx" @@ -19,3 +22,13 @@ export { useK8sDelete, } from "./hooks.ts" export type { ResourceRef } from "./hooks.ts" + +export { useApiGroupAvailable } from "./useApiGroupAvailable.ts" + +export { useSelfSubjectAccessReview } from "./useSelfSubjectAccessReview.ts" +export type { + SelfSubjectAccessReview, + SelfSubjectAccessReviewSpec, + ResourceAttributes, + NonResourceAttributes, +} from "./useSelfSubjectAccessReview.ts" diff --git a/packages/k8s-client/src/provider.tsx b/packages/k8s-client/src/provider.tsx index 7fe53c4..04bce76 100644 --- a/packages/k8s-client/src/provider.tsx +++ b/packages/k8s-client/src/provider.tsx @@ -28,17 +28,24 @@ const defaultQueryClient = new QueryClient({ interface K8sProviderProps { config?: K8sClientConfig + /** + * Pre-built K8sClient instance. When supplied, replaces the client that + * would otherwise be constructed from `config`. Primary use case: tests + * that need to inject a mock; production code should pass `config` and + * let the provider build the real client. + */ + client?: K8sClient queryClient?: QueryClient children: ReactNode } -export function K8sProvider({ config, queryClient, children }: K8sProviderProps) { - const client = useMemo(() => new K8sClient(config), [config]) +export function K8sProvider({ config, client, queryClient, children }: K8sProviderProps) { + const resolved = useMemo(() => client ?? new K8sClient(config), [client, config]) const qc = queryClient ?? defaultQueryClient return ( - {children} + {children} ) } diff --git a/packages/k8s-client/src/useApiGroupAvailable.ts b/packages/k8s-client/src/useApiGroupAvailable.ts new file mode 100644 index 0000000..712d8eb --- /dev/null +++ b/packages/k8s-client/src/useApiGroupAvailable.ts @@ -0,0 +1,30 @@ +import { useQuery } from "@tanstack/react-query" +import { useK8sClient } from "./provider.tsx" +import type { APIGroupList } from "./client.ts" + +/** + * Returns whether a specific Kubernetes API group is registered on the + * cluster, derived from a single `/apis` discovery call shared across + * all consumers. Result is cached for the lifetime of the QueryClient + * (staleTime: Infinity) — discovery is rarely meaningful to re-poll + * within a session, and callers can invalidate the "k8s-api-groups" + * query key explicitly if the use case ever arises. + * + * Errors are absorbed silently and reported as available=false: the + * caller is asking a discovery question, and "we cannot find out" is + * functionally identical to "not registered" for the cluster-usage + * page's gating logic. + */ +export function useApiGroupAvailable( + groupName: string, +): { available: boolean; isLoading: boolean } { + const client = useK8sClient() + const query = useQuery({ + queryKey: ["k8s-api-groups"], + queryFn: () => client.getApiGroups(), + staleTime: Infinity, + refetchOnWindowFocus: false, + }) + const available = query.data?.groups.some((g) => g.name === groupName) ?? false + return { available, isLoading: query.isLoading } +} diff --git a/packages/k8s-client/src/useSelfSubjectAccessReview.ts b/packages/k8s-client/src/useSelfSubjectAccessReview.ts new file mode 100644 index 0000000..eb99135 --- /dev/null +++ b/packages/k8s-client/src/useSelfSubjectAccessReview.ts @@ -0,0 +1,93 @@ +import { useQuery } from "@tanstack/react-query" +import { useK8sClient } from "./provider.tsx" +import type { K8sResource } from "./client.ts" + +export interface ResourceAttributes { + namespace?: string + verb?: string + group?: string + version?: string + resource?: string + subresource?: string + name?: string +} + +export interface NonResourceAttributes { + path?: string + verb?: string +} + +export interface SelfSubjectAccessReviewSpec { + resourceAttributes?: ResourceAttributes + nonResourceAttributes?: NonResourceAttributes +} + +interface SelfSubjectAccessReviewStatus { + allowed: boolean + denied?: boolean + reason?: string + evaluationError?: string +} + +export type SelfSubjectAccessReview = K8sResource< + SelfSubjectAccessReviewSpec, + SelfSubjectAccessReviewStatus +> + +function ssarCacheKey(spec: SelfSubjectAccessReviewSpec): readonly string[] { + const r = spec.resourceAttributes ?? {} + const n = spec.nonResourceAttributes ?? {} + return [ + "ssar", + r.namespace ?? "", + r.group ?? "", + r.version ?? "", + r.resource ?? "", + r.subresource ?? "", + r.verb ?? "", + r.name ?? "", + n.path ?? "", + n.verb ?? "", + ] +} + +/** + * Issues a SelfSubjectAccessReview against the cluster and returns + * whether the current user is allowed to perform the requested action. + * The result is cached per spec for the lifetime of the QueryClient + * (staleTime: Infinity, refetchOnWindowFocus disabled) — discovery-style + * permission checks rarely change mid-session, and a revoked grant + * surfaces on the next page load. + * + * Errors are absorbed and surfaced as allowed=false. Callers asking + * 'can I see this UI section' are functionally indifferent to 'denied' + * versus 'cannot determine'; in both cases the section stays hidden. + */ +export function useSelfSubjectAccessReview( + spec: SelfSubjectAccessReviewSpec, +): { allowed: boolean; isLoading: boolean; error: Error | null } { + const client = useK8sClient() + const queryKey = ssarCacheKey(spec) + const query = useQuery({ + queryKey, + queryFn: () => + client.create( + "authorization.k8s.io", + "v1", + "selfsubjectaccessreviews", + { + apiVersion: "authorization.k8s.io/v1", + kind: "SelfSubjectAccessReview", + metadata: { name: "" }, + spec, + }, + ), + staleTime: Infinity, + refetchOnWindowFocus: false, + }) + return { + allowed: query.data?.status?.allowed ?? false, + isLoading: query.isLoading, + error: (query.error as Error | null) ?? null, + } +}