diff --git a/.claude/agents/docs-expert.md b/.claude/agents/docs-expert.md index dbd348c2f..a72646e70 100644 --- a/.claude/agents/docs-expert.md +++ b/.claude/agents/docs-expert.md @@ -1,11 +1,11 @@ --- allowed-tools: Read, Bash(*), WebSearch, WebFetch -description: Subagent that maintains, grows, and evolves the project documentation — finding stale content, gaps for new features, and structural improvements, then reporting findings back to the orchestrator. +description: Subagent that keeps project documentation accurate, sharp, and well-scoped — fixing stale content, adding high-level orientation where missing, trimming low-value prose, and reporting findings back to the orchestrator. --- # Docs Expert -You are a documentation expert. You receive a digest of recent code changes and your mission is threefold: keep the documentation accurate, grow it by writing new content for features and algorithms, and slowly evolve the structure of `docs/` so it stays navigable as the project grows. You report your findings back to the orchestrator — you do NOT open pull requests yourself. +You are a documentation expert. You receive a digest of recent code changes and your mission is to keep the documentation accurate, grow it where it adds value per the philosophy below, and shrink it where it doesn't — sharpening the content over time. You report your findings back to the orchestrator — you do NOT open pull requests yourself. --- @@ -17,6 +17,29 @@ Before doing any investigation, read the `AGENTS.md` file in the repository root You will be given a change digest that includes commit SHAs, file lists, and descriptions of what changed and why. Use this as your starting point. +## Documentation Philosophy + +Good documentation is **not** a prose retelling of the code. A reader can already read the code. What they cannot get from the code alone: + +- **High-level orientation** — what is this subsystem, what problem does it solve, what are its entry points? +- **Cross-component connections** — how do pieces relate? E.g. the CR API writes CRDs, the syncer also writes CRDs, both feed the same controller — that relationship is invisible when reading files in isolation. +- **Lifecycle and flow** — a mermaid diagram showing the happy path is worth more than three pages of prose. Use diagrams. +- **Non-obvious constraints** — design decisions that would surprise a reader, things that bit someone at 2am, invariants that aren't enforced by the type system. +- **Code pointers** — "looking for X? → `internal/scheduling/reservations/commitments/`" helps navigation. + +What to avoid: +- Step-by-step descriptions of what a function or controller does — that's just reading the code out loud. +- Field-by-field descriptions of CRDs or structs — those belong as godoc on the type. +- Algorithm walkthroughs that mirror the implementation sequentially. + +**Writing style**: Be concise and precise. Short sentences, no filler words, no restating the obvious. One example where it clarifies; none where the point stands without it. Avoid generic statements that could apply to any project — every sentence should be specific to this subsystem. + +**Example of good scope**: A doc on CR reservations shows the entry points (CR API, syncer), the two CRD types, and a mermaid lifecycle diagram. It does not describe what each reconcile step does. + +**Example of good scope**: A doc on pipeline options lists the available options and their intended use cases, notes any corner cases or gotchas, and points to where they are configured. It does not describe the scheduling algorithm internals. + +--- + ## Documentation Scope Everything under `docs/` is in scope. You may read any files there to build your understanding. @@ -25,7 +48,7 @@ Everything under `docs/` is in scope. You may read any files there to build your ## Phase 1: Investigate -1. **Read all documentation files.** Load each doc file listed above. Build a mental model of what the docs currently cover and where they are thin or silent. +1. **Read all documentation files.** Build a mental model of what the docs currently cover and where they are thin, silent, or too verbose. 2. **Cross-reference against changes.** For each notable change in the digest, classify it: @@ -34,28 +57,31 @@ Everything under `docs/` is in scope. You may read any files there to build your | Docs say something that's now **wrong** | Fix it (highest priority) | | Docs reference something that was **removed or deprecated** | Remove or update the section | | A **new feature** was added but the docs don't mention it | Write new documentation for it | - | An **interesting algorithm or technique** was implemented | Document how it works and why it was chosen | - | A setup step, config option, or API changed | Update the relevant doc | - | An existing doc section is **clearly outdated** beyond this week's changes | Note it, but don't fix everything — pick the best one | - | The **docs structure** itself is becoming unwieldy (e.g. one file covers too many topics, related docs are scattered, a folder would group things better) | Note it as a structural improvement candidate | + | An **interesting algorithm or technique** was implemented | Document *why* it was chosen and what constraints drove it — not a step-by-step walkthrough | + | A setup step, config option, or API changed | Update the relevant doc — classify as **Conflict** if it makes existing docs wrong, otherwise **Minor gap** | + | An existing doc section is **clearly outdated** beyond this week's changes | Note it as a **Dead content** or **Conflict** finding; don't fix everything — pick the best one | + | An existing doc section is **too verbose or low-level** | Trim it — but only if the content is easily found by reading one or two source files. Keep it if it saves the reader from cross-checking many files, or if it captures something not obvious from the code alone. | + | The **docs structure** itself is becoming unwieldy (e.g. one file covers too many topics, related docs are scattered, a folder would group things better) | Note it as a **Structural** finding | -3. **Read the actual code.** Don't just rely on the digest. For new features and algorithms, read the implementation to understand the design, the tradeoffs, and the behavior well enough to explain it clearly. +3. **Read the actual code.** Don't just rely on the digest. For new features and algorithms, read the implementation to understand the design well enough to explain entry points, cross-component relationships, and the constraints that shaped the approach — not to transcribe what the code does. 4. **Assess the docs structure.** Step back and consider the `docs/` tree as a whole: - Is a single file doing too much and should be split into focused pages? - Are there multiple small files covering related topics that would read better as one? - - Would a new subdirectory help group related docs (e.g. `docs/algorithms/`, `docs/features/`)? + - Would a new subdirectory help group related docs (e.g. `docs/features/`, `docs/guides/`)? - Are there orphan files that nothing links to, or dead files that cover removed functionality? Structural changes are valuable but should be made **slowly and deliberately** — at most one structural change per run, and only when the improvement is clear. Don't reorganize for the sake of reorganizing. 5. **Prioritize what to do.** You will likely find more work than you can do in one pass. That's expected — your job is to make incremental progress each week. Use this priority order: - 1. **Conflicts** — docs that are actively wrong + 1. **Conflict** — docs that are actively wrong 2. **Dead content** — sections referencing removed or deprecated functionality - 3. **New features** — undocumented capabilities that users or developers need to know about - 4. **Algorithms and design** — interesting technical approaches worth explaining for future contributors - 5. **Minor gaps** — small omissions in existing docs - 6. **Structural improvements** — reorganizing files, splitting, merging, adding folders + 3. **Verbose content** — prose that duplicates what one or two source files already say clearly + 4. **New feature** — undocumented subsystems or entry points that readers have no orientation for + 5. **Cross-component gap** — relationships between components that are invisible when reading files in isolation + 6. **Algorithm** — why an approach was chosen and what constraints drove it (not how it works) + 7. **Minor gap** — small omissions in existing docs + 8. **Structural** — reorganizing files, splitting, merging, adding folders ## Phase 2: Reason over importance @@ -76,15 +102,18 @@ Return a structured report of what you found. Do NOT open any pull requests or c ## Docs Expert Results ### Documentation Health -- Conflicts found: N (docs that are wrong) -- Dead content found: N (references to removed things) -- Undocumented features: N -- Undocumented algorithms/design: N -- Structural issues: N (files to split, merge, or reorganize) +- Conflicts: N (docs that are wrong) +- Dead content: N (references to removed things) +- Verbose content: N (candidates to trim) +- New features: N +- Cross-component gaps: N +- Algorithm gaps: N +- Minor gaps: N +- Structural: N (files to split, merge, or reorganize) ### Findings For each issue found: -- **Priority**: [Conflict/Dead content/New feature/Algorithm/Minor gap/Structural] +- **Priority**: [Conflict/Dead content/Verbose content/New feature/Cross-component gap/Algorithm/Minor gap/Structural] - **Title**: - **File(s)**: - **Description**: diff --git a/CHANGELOG.md b/CHANGELOG.md index 6d41a0d92..2ddb49d00 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,45 @@ # Changelog +## 2026-07-01 — [#1001](https://github.com/cobaltcore-dev/cortex/pull/1001) + +### cortex v0.2.1 (sha-44b8aab7) + +Non-breaking changes: +- Include PAYG slots in MaxSlots guard — the reservation manager now counts existing + PAYG-created slots before allocating blind-scheduler slots, preventing total slot counts from exceeding `MaxSlotsPerCommitment` ([#987](https://github.com/cobaltcore-dev/cortex/pull/987)) +- Use public Limes endpoint in knowledge datasource — fixes HTTP 400 errors after Limes enforced domain name matching on `Host` headers ([#999](https://github.com/cobaltcore-dev/cortex/pull/999)) + +### cortex-shim v0.1.4 (sha-44b8aab7) + +Includes updated image sha-44b8aab7. + +### cortex-nova v0.0.78 + +Includes updated charts cortex v0.2.1, cortex-postgres v0.6.7. + +### cortex-cinder v0.0.78 + +Includes updated charts cortex v0.2.1, cortex-postgres v0.6.7. + +### cortex-manila v0.0.78 + +Includes updated charts cortex v0.2.1, cortex-postgres v0.6.7. + +### cortex-crds v0.0.78 + +Includes updated chart cortex v0.2.1. + +### cortex-ironcore v0.0.78 + +Includes updated chart cortex v0.2.1. + +### cortex-pods v0.0.78 + +Includes updated chart cortex v0.2.1. + +### cortex-placement-shim v0.1.4 + +Includes updated chart cortex-shim v0.1.4. + ## 2026-06-29 — [#990](https://github.com/cobaltcore-dev/cortex/pull/990) ### cortex v0.2.0 (sha-124ec226) diff --git a/helm/bundles/cortex-cinder/Chart.yaml b/helm/bundles/cortex-cinder/Chart.yaml index dd05d0ba3..dfaa6cdce 100644 --- a/helm/bundles/cortex-cinder/Chart.yaml +++ b/helm/bundles/cortex-cinder/Chart.yaml @@ -5,23 +5,23 @@ apiVersion: v2 name: cortex-cinder description: A Helm chart deploying Cortex for Cinder. type: application -version: 0.0.77 +version: 0.0.78 appVersion: 0.1.0 dependencies: # from: file://../../library/cortex-postgres - name: cortex-postgres repository: oci://ghcr.io/cobaltcore-dev/cortex/charts - version: 0.6.6 + version: 0.6.7 # from: file://../../library/cortex - name: cortex repository: oci://ghcr.io/cobaltcore-dev/cortex/charts - version: 0.2.0 + version: 0.2.1 alias: cortex-knowledge-controllers # from: file://../../library/cortex - name: cortex repository: oci://ghcr.io/cobaltcore-dev/cortex/charts - version: 0.2.0 + version: 0.2.1 alias: cortex-scheduling-controllers # Owner info adds a configmap to the kubernetes cluster with information on diff --git a/helm/bundles/cortex-crds/Chart.yaml b/helm/bundles/cortex-crds/Chart.yaml index 1579f2a97..8f6b4cda1 100644 --- a/helm/bundles/cortex-crds/Chart.yaml +++ b/helm/bundles/cortex-crds/Chart.yaml @@ -5,13 +5,13 @@ apiVersion: v2 name: cortex-crds description: A Helm chart deploying Cortex CRDs. type: application -version: 0.0.77 +version: 0.0.78 appVersion: 0.1.0 dependencies: # from: file://../../library/cortex - name: cortex repository: oci://ghcr.io/cobaltcore-dev/cortex/charts - version: 0.2.0 + version: 0.2.1 # Owner info adds a configmap to the kubernetes cluster with information on # the service owner. This makes it easier to find out who to contact in case diff --git a/helm/bundles/cortex-ironcore/Chart.yaml b/helm/bundles/cortex-ironcore/Chart.yaml index 5ba2539bc..762ee557c 100644 --- a/helm/bundles/cortex-ironcore/Chart.yaml +++ b/helm/bundles/cortex-ironcore/Chart.yaml @@ -5,13 +5,13 @@ apiVersion: v2 name: cortex-ironcore description: A Helm chart deploying Cortex for IronCore. type: application -version: 0.0.77 +version: 0.0.78 appVersion: 0.1.0 dependencies: # from: file://../../library/cortex - name: cortex repository: oci://ghcr.io/cobaltcore-dev/cortex/charts - version: 0.2.0 + version: 0.2.1 # Owner info adds a configmap to the kubernetes cluster with information on # the service owner. This makes it easier to find out who to contact in case diff --git a/helm/bundles/cortex-manila/Chart.yaml b/helm/bundles/cortex-manila/Chart.yaml index 487f6789f..76be70c05 100644 --- a/helm/bundles/cortex-manila/Chart.yaml +++ b/helm/bundles/cortex-manila/Chart.yaml @@ -5,23 +5,23 @@ apiVersion: v2 name: cortex-manila description: A Helm chart deploying Cortex for Manila. type: application -version: 0.0.77 +version: 0.0.78 appVersion: 0.1.0 dependencies: # from: file://../../library/cortex-postgres - name: cortex-postgres repository: oci://ghcr.io/cobaltcore-dev/cortex/charts - version: 0.6.6 + version: 0.6.7 # from: file://../../library/cortex - name: cortex repository: oci://ghcr.io/cobaltcore-dev/cortex/charts - version: 0.2.0 + version: 0.2.1 alias: cortex-knowledge-controllers # from: file://../../library/cortex - name: cortex repository: oci://ghcr.io/cobaltcore-dev/cortex/charts - version: 0.2.0 + version: 0.2.1 alias: cortex-scheduling-controllers # Owner info adds a configmap to the kubernetes cluster with information on diff --git a/helm/bundles/cortex-nova/Chart.yaml b/helm/bundles/cortex-nova/Chart.yaml index 666d4a672..b803122b4 100644 --- a/helm/bundles/cortex-nova/Chart.yaml +++ b/helm/bundles/cortex-nova/Chart.yaml @@ -5,23 +5,23 @@ apiVersion: v2 name: cortex-nova description: A Helm chart deploying Cortex for Nova. type: application -version: 0.0.77 +version: 0.0.78 appVersion: 0.1.0 dependencies: # from: file://../../library/cortex-postgres - name: cortex-postgres repository: oci://ghcr.io/cobaltcore-dev/cortex/charts - version: 0.6.6 + version: 0.6.7 # from: file://../../library/cortex - name: cortex repository: oci://ghcr.io/cobaltcore-dev/cortex/charts - version: 0.2.0 + version: 0.2.1 alias: cortex-knowledge-controllers # from: file://../../library/cortex - name: cortex repository: oci://ghcr.io/cobaltcore-dev/cortex/charts - version: 0.2.0 + version: 0.2.1 alias: cortex-scheduling-controllers # Owner info adds a configmap to the kubernetes cluster with information on diff --git a/helm/bundles/cortex-placement-shim/Chart.yaml b/helm/bundles/cortex-placement-shim/Chart.yaml index 1179a123d..95ecb8cec 100644 --- a/helm/bundles/cortex-placement-shim/Chart.yaml +++ b/helm/bundles/cortex-placement-shim/Chart.yaml @@ -5,13 +5,13 @@ apiVersion: v2 name: cortex-placement-shim description: A Helm chart deploying the Cortex placement shim. type: application -version: 0.1.3 +version: 0.1.4 appVersion: 0.1.0 dependencies: # from: file://../../library/cortex-shim - name: cortex-shim repository: oci://ghcr.io/cobaltcore-dev/cortex/charts - version: 0.1.3 + version: 0.1.4 # Owner info adds a configmap to the kubernetes cluster with information on # the service owner. This makes it easier to find out who to contact in case # of issues. See: https://github.com/sapcc/helm-charts/pkgs/container/helm-charts%2Fowner-info diff --git a/helm/bundles/cortex-pods/Chart.yaml b/helm/bundles/cortex-pods/Chart.yaml index 427ce5437..6a773d921 100644 --- a/helm/bundles/cortex-pods/Chart.yaml +++ b/helm/bundles/cortex-pods/Chart.yaml @@ -5,13 +5,13 @@ apiVersion: v2 name: cortex-pods description: A Helm chart deploying Cortex for Pods. type: application -version: 0.0.77 +version: 0.0.78 appVersion: 0.1.0 dependencies: # from: file://../../library/cortex - name: cortex repository: oci://ghcr.io/cobaltcore-dev/cortex/charts - version: 0.2.0 + version: 0.2.1 # Owner info adds a configmap to the kubernetes cluster with information on # the service owner. This makes it easier to find out who to contact in case diff --git a/helm/library/cortex-postgres/Chart.yaml b/helm/library/cortex-postgres/Chart.yaml index 194b0e609..207c80572 100644 --- a/helm/library/cortex-postgres/Chart.yaml +++ b/helm/library/cortex-postgres/Chart.yaml @@ -5,5 +5,5 @@ apiVersion: v2 name: cortex-postgres description: Postgres setup for Cortex. type: application -version: 0.6.6 +version: 0.6.7 appVersion: "sha-af707446" diff --git a/helm/library/cortex-shim/Chart.yaml b/helm/library/cortex-shim/Chart.yaml index 81d4a4f15..f221a68b6 100644 --- a/helm/library/cortex-shim/Chart.yaml +++ b/helm/library/cortex-shim/Chart.yaml @@ -2,7 +2,7 @@ apiVersion: v2 name: cortex-shim description: A Helm chart to distribute cortex shims. type: application -version: 0.1.3 -appVersion: "sha-124ec226" +version: 0.1.4 +appVersion: "sha-44b8aab7" icon: "https://example.com/icon.png" dependencies: [] diff --git a/helm/library/cortex/Chart.yaml b/helm/library/cortex/Chart.yaml index 19c33d1e6..5d1b9bdef 100644 --- a/helm/library/cortex/Chart.yaml +++ b/helm/library/cortex/Chart.yaml @@ -2,7 +2,7 @@ apiVersion: v2 name: cortex description: A Helm chart to distribute cortex. type: application -version: 0.2.0 -appVersion: "sha-124ec226" +version: 0.2.1 +appVersion: "sha-44b8aab7" icon: "https://example.com/icon.png" dependencies: [] diff --git a/internal/knowledge/datasources/plugins/openstack/limes/limes_api.go b/internal/knowledge/datasources/plugins/openstack/limes/limes_api.go index c5e8a7b79..ce77e9ac0 100644 --- a/internal/knowledge/datasources/plugins/openstack/limes/limes_api.go +++ b/internal/knowledge/datasources/plugins/openstack/limes/limes_api.go @@ -55,8 +55,9 @@ func (api *limesAPI) Init(ctx context.Context) error { // See: https://github.com/sapcc/limes/blob/5ea068b/docs/users/api-example.md?plain=1#L23 provider := api.keystoneClient.Client() serviceType := "resources" - sameAsKeystone := api.keystoneClient.Availability() - url, err := api.keystoneClient.FindEndpoint(sameAsKeystone, serviceType) + // Always use the public endpoint: Limes enforces that requests arrive on its configured public + // hostname (LIMES_API_DOMAIN_NAME_V1) and rejects internal-URL requests with 400. + url, err := api.keystoneClient.FindEndpoint("public", serviceType) if err != nil { return err } diff --git a/internal/knowledge/datasources/plugins/openstack/limes/limes_api_test.go b/internal/knowledge/datasources/plugins/openstack/limes/limes_api_test.go index ec5eacc44..781307780 100644 --- a/internal/knowledge/datasources/plugins/openstack/limes/limes_api_test.go +++ b/internal/knowledge/datasources/plugins/openstack/limes/limes_api_test.go @@ -20,6 +20,29 @@ func setupLimesMockServer(handler http.HandlerFunc) (*httptest.Server, keystone. return server, &testlibKeystone.MockKeystoneClient{Url: server.URL + "/"} } +func TestLimesAPI_Init_UsesPublicEndpoint(t *testing.T) { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {})) + defer server.Close() + + var gotAvailability string + k := &testlibKeystone.MockKeystoneClient{ + Url: server.URL + "/", + FindEndpointOverride: func(availability, serviceType string) { + if serviceType == "resources" { + gotAvailability = availability + } + }, + } + + api := NewLimesAPI(datasources.Monitor{}, k, v1alpha1.LimesDatasource{}).(*limesAPI) + if err := api.Init(t.Context()); err != nil { + t.Fatalf("unexpected error: %v", err) + } + if gotAvailability != "public" { + t.Errorf("expected public availability for Limes endpoint, got %q", gotAvailability) + } +} + func TestNewLimesAPI(t *testing.T) { mon := datasources.Monitor{} k := &testlibKeystone.MockKeystoneClient{} diff --git a/internal/scheduling/reservations/commitments/client.go b/internal/scheduling/reservations/commitments/client.go index 0a08d92a0..d57d2bb1e 100644 --- a/internal/scheduling/reservations/commitments/client.go +++ b/internal/scheduling/reservations/commitments/client.go @@ -89,7 +89,8 @@ func (c *commitmentsClient) Init(ctx context.Context, client client.Client, conf Microversion: "2.61", } - // Get the limes endpoint. + // Get the limes endpoint — always use public: Limes enforces that requests arrive on its + // configured public hostname (LIMES_API_DOMAIN_NAME_V1) and rejects internal-URL requests with 400. url = must.Return(c.provider.EndpointLocator(gophercloud.EndpointOpts{ Type: "resources", Availability: "public", diff --git a/internal/scheduling/reservations/commitments/reservation_manager.go b/internal/scheduling/reservations/commitments/reservation_manager.go index ddd509b19..ee518254f 100644 --- a/internal/scheduling/reservations/commitments/reservation_manager.go +++ b/internal/scheduling/reservations/commitments/reservation_manager.go @@ -53,9 +53,9 @@ type ReservationManagerConfig struct { // SlotCreationDelay adds a pause between consecutive Reservation CRD creates to spread // scheduler load across time rather than bursting all creates at once. SlotCreationDelay time.Duration - // MaxSlots caps the total number of Reservation CRDs for a single commitment. - // When non-zero, ApplyCommitmentState returns an error if the desired slot count would - // exceed this limit. Only set by the caller on the AllowRejection=true (API) path. + // MaxSlots caps the number of new blind-scheduler slots created per apply call. + // PAYG remapping slots and already-existing slots are excluded from this limit. + // Only set by the caller on the AllowRejection=true (API) path. MaxSlots int EnablePaygPreAllocation bool VMSource reservations.VMSource @@ -272,6 +272,7 @@ func (m *ReservationManager) ApplyCommitmentState( } if deltaMemoryBytes > 0 { + // MaxSlots caps only blind-scheduler slots (PAYG remapping slots and existing slots are excluded). newSlots := countNewSlots(deltaMemoryBytes, flavorGroup) if m.cfg.MaxSlots > 0 && newSlots > m.cfg.MaxSlots { return nil, &SlotLimitExceededError{NewSlots: newSlots, Limit: m.cfg.MaxSlots} diff --git a/internal/scheduling/reservations/commitments/reservation_manager_test.go b/internal/scheduling/reservations/commitments/reservation_manager_test.go index 2346a16f2..4cdd6c0c9 100644 --- a/internal/scheduling/reservations/commitments/reservation_manager_test.go +++ b/internal/scheduling/reservations/commitments/reservation_manager_test.go @@ -5,6 +5,7 @@ package commitments import ( "context" + "errors" "testing" "github.com/cobaltcore-dev/cortex/api/v1alpha1" @@ -339,7 +340,7 @@ func TestApplyCommitmentState(t *testing.T) { }, }, { - name: "max slots: only new slots counted, existing do not contribute", + name: "max slots: only new blind-scheduler slots counted, existing slots excluded", existingSlots: []v1alpha1.Reservation{ newTestCRSlot("commitment-abc123-0", 8, "", "test-group", nil), newTestCRSlot("commitment-abc123-1", 8, "", "test-group", nil), @@ -738,6 +739,147 @@ func TestApplyCommitmentState_PAYG(t *testing.T) { } } +// ============================================================================ +// Tests: MaxSlots only limits blind-scheduler slots (PAYG remapping excluded) +// ============================================================================ + +func TestApplyCommitmentState_MaxSlotsExcludesPAYG(t *testing.T) { + const ( + az = "test-az" + projectID = "project-1" + hvName = "host-1" + ) + fg := testFlavorGroup() // small=8GiB, medium=16GiB, large=32GiB + + hvWithAZ := func(name string, instanceIDs ...string) *hv1.Hypervisor { + instances := make([]hv1.Instance, len(instanceIDs)) + for i, id := range instanceIDs { + instances[i] = hv1.Instance{ID: id, Name: id, Active: true} + } + return &hv1.Hypervisor{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Labels: map[string]string{"topology.kubernetes.io/zone": az}, + }, + Status: hv1.HypervisorStatus{Instances: instances}, + } + } + + paygVM := func(uuid, flavorName string) reservations.VM { + return reservations.VM{ + UUID: uuid, + FlavorName: flavorName, + CurrentHypervisor: hvName, + } + } + + tests := []struct { + name string + hypervisors []*hv1.Hypervisor + paygVMs []reservations.VM + existingSlots []v1alpha1.Reservation + desiredMemoryGiB int64 + maxSlots int + wantError bool + }{ + { + // PAYG creates 4 slots, remaining delta needs 3 blind-scheduler slots. + // MaxSlots=3 allows it because only blind-scheduler slots (3) are counted. + name: "PAYG slots excluded from MaxSlots — passes when blind slots within limit", + hypervisors: []*hv1.Hypervisor{ + hvWithAZ(hvName, "vm-1", "vm-2", "vm-3", "vm-4"), + }, + paygVMs: []reservations.VM{ + paygVM("vm-1", "small"), // 8 GiB + paygVM("vm-2", "small"), // 8 GiB + paygVM("vm-3", "small"), // 8 GiB + paygVM("vm-4", "small"), // 8 GiB → PAYG consumes 32 GiB, creates 4 slots + }, + desiredMemoryGiB: 56, // delta=56GiB; PAYG=32GiB → remaining=24GiB → 3 blind slots + maxSlots: 3, + wantError: false, + }, + { + // PAYG creates 1 slot, remaining delta needs 2 blind-scheduler slots. + // MaxSlots=1 rejects because blind-scheduler slots (2) > limit (1). + name: "blind-scheduler slots exceed MaxSlots — rejects regardless of PAYG", + hypervisors: []*hv1.Hypervisor{ + hvWithAZ(hvName, "vm-1"), + }, + paygVMs: []reservations.VM{ + paygVM("vm-1", "small"), // 8 GiB → PAYG=8GiB, 1 slot + }, + desiredMemoryGiB: 32, // delta=32GiB; PAYG=8GiB → remaining=24GiB → 16+8=2 blind slots + maxSlots: 1, + wantError: true, + }, + { + // Existing slots + PAYG slots present, but MaxSlots only limits blind-scheduler slots. + name: "existing + PAYG do not count — only blind slots checked against limit", + hypervisors: []*hv1.Hypervisor{ + hvWithAZ(hvName, "vm-1", "vm-2"), + }, + paygVMs: []reservations.VM{ + paygVM("vm-1", "small"), // 8 GiB + paygVM("vm-2", "small"), // 8 GiB → PAYG=16GiB, 2 slots + }, + existingSlots: []v1alpha1.Reservation{ + withAZ(newTestCRSlot("commitment-abc123-0", 8, hvName, "test-group", nil), az), + }, + desiredMemoryGiB: 32, // existing=8GiB, delta=24GiB; PAYG=16GiB → remaining=8GiB → 1 blind slot + maxSlots: 1, // 1 blind slot ≤ 1: passes + wantError: false, + }, + } + + scheme := newCRTestScheme(t) + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + objects := make([]client.Object, 0, len(tt.hypervisors)+len(tt.existingSlots)) + for _, hv := range tt.hypervisors { + objects = append(objects, hv) + } + for i := range tt.existingSlots { + objects = append(objects, &tt.existingSlots[i]) + } + k8sClient := newCRTestClient(scheme, objects...) + + mgr := NewReservationManager(k8sClient, ReservationManagerConfig{ + EnablePaygPreAllocation: true, + VMSource: &fakeVMSource{vms: tt.paygVMs}, + MaxSlots: tt.maxSlots, + }) + + desiredState := &CommitmentState{ + CommitmentUUID: "abc123", + ProjectID: projectID, + FlavorGroupName: "test-group", + TotalMemoryBytes: tt.desiredMemoryGiB * 1024 * 1024 * 1024, + AvailabilityZone: az, + } + + _, err := mgr.ApplyCommitmentState( + context.Background(), logr.Discard(), desiredState, map[string]compute.FlavorGroupFeature{"test-group": fg}, "test", + ) + + if tt.wantError { + if err == nil { + t.Fatal("expected error, got nil") + } + var slotErr *SlotLimitExceededError + if !errors.As(err, &slotErr) { + t.Errorf("expected SlotLimitExceededError, got %T: %v", err, err) + } + return + } + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + }) + } +} + // ============================================================================ // Tests: newReservation flavor selection // ============================================================================ diff --git a/pkg/keystone/testing/mock.go b/pkg/keystone/testing/mock.go index c5fbcc4fd..a346bda61 100644 --- a/pkg/keystone/testing/mock.go +++ b/pkg/keystone/testing/mock.go @@ -10,8 +10,9 @@ import ( ) type MockKeystoneClient struct { - Url string - EndpointLocator gophercloud.EndpointLocator + Url string + EndpointLocator gophercloud.EndpointLocator + FindEndpointOverride func(availability, serviceType string) } func (m *MockKeystoneClient) Authenticate(ctx context.Context) error { @@ -25,6 +26,9 @@ func (m *MockKeystoneClient) Client() *gophercloud.ProviderClient { } func (m *MockKeystoneClient) FindEndpoint(availability, serviceType string) (string, error) { + if m.FindEndpointOverride != nil { + m.FindEndpointOverride(availability, serviceType) + } return m.Url, nil }