Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 22 additions & 13 deletions .github/workflows/e2e-tests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ concurrency:

env:
KIND_VERSION: v0.32.0
IMAGE: ghcr.io/converged-computing/fluence:latest
IMAGE: vanessa/fluence:test

jobs:
e2e:
Expand All @@ -19,19 +19,20 @@ jobs:
- name: Checkout
uses: actions/checkout@v4

#- name: Set up Docker Buildx
# uses: docker/setup-buildx-action@v3
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3

# - name: Build fluence image
# uses: docker/build-push-action@v6
# with:
# context: .
# file: ./Dockerfile
# push: false
# load: true
# tags: ${{ env.IMAGE }}
# cache-from: type=gha
# cache-to: type=gha,mode=max
- name: Build fluence image
uses: docker/build-push-action@v6
with:
context: .
file: ./Dockerfile
push: false
load: true
tags: ${{ env.IMAGE }}
cache-from: type=gha
cache-to: type=gha,mode=max

- name: Create k8s Kind Cluster
uses: helm/kind-action@v1.10.0
with:
Expand All @@ -55,13 +56,20 @@ jobs:
echo "=== Disk space after cleanup ==="
df -h

- name: Load docker images
run: |
kind get clusters
cluster=$(kind get clusters)
kind load --name $cluster docker-image vanessa/fluence:test

- name: Deploy fluence (base)
run: |
kubectl apply -f deploy/fluence-test.yaml
kubectl rollout status -n kube-system deployment/fluence --timeout=180s
POD=$(kubectl -n kube-system get pods -l app=fluence -o name | head -1)
kubectl -n kube-system exec "${POD#pod/}" -- ls /tmp/
kubectl -n kube-system logs "${POD#pod/}"
sleep 2
kubectl -n kube-system exec "${POD#pod/}" -- /bin/bash -c "cat /tmp/fluence-graph-*.json"
kubectl get nodes -o jsonpath='{range .items[*]}{.metadata.name}{": cpu="}{.status.allocatable.cpu}{" mem="}{.status.allocatable.memory}{"\n"}{end}'

Expand All @@ -80,6 +88,7 @@ jobs:
sleep 1
done
POD=$(kubectl -n kube-system get pods -l app=fluence -o name | head -1)
sleep 2
kubectl -n kube-system exec "${POD#pod/}" -- /bin/bash -c "cat /tmp/fluence-graph-*.json"

- name: Wait for webhook
Expand Down
4 changes: 2 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

FLUX_SCHED_ROOT ?= /opt/flux-sched
IMG ?= ghcr.io/converged-computing/fluence:latest
TEST_IMG ?= ghcr.io/converged-computing/fluence:test
TEST_IMG ?= vanessa/fluence:test

# cgo flags for the scheduler binary: flux-sched only.
CGO_CFLAGS = -I$(FLUX_SCHED_ROOT)
Expand Down Expand Up @@ -41,7 +41,7 @@ image: ## Build the scheduler container image

.PHONY: test-image
test-image: ## Build the scheduler container image
docker build -t $(TEST_IMG)-test .
docker build -t $(TEST_IMG) .
docker push $(TEST_IMG)

.PHONY: test-image-deploy
Expand Down
417 changes: 263 additions & 154 deletions README.md

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions cmd/deviceplugin/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,11 +40,11 @@ func main() {

var names []string
if data, err := os.ReadFile(cfgPath); err == nil {
qc, perr := cluster.LoadQuantumConfig(data)
rc, perr := cluster.LoadResourcesConfig(data)
if perr != nil {
log.Fatalf("parse resources config %s: %v", cfgPath, perr)
}
names = cluster.FluxionResourceNames(qc.Backends)
names = cluster.FluxionResourceNames(rc.Resources)
log.Printf("derived %d resource(s) from %s: %v", len(names), cfgPath, names)
} else {
log.Printf("no resources config at %s (%v); advertising nothing", cfgPath, err)
Expand Down
22 changes: 21 additions & 1 deletion cmd/webhook/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import (
"os"
"time"

"github.com/converged-computing/fluence/pkg/cluster"
"github.com/converged-computing/fluence/pkg/webhook"

"k8s.io/client-go/kubernetes"
Expand Down Expand Up @@ -66,8 +67,27 @@ func main() {
cancel()
log.Printf("patched caBundle on MutatingWebhookConfiguration %q", cfgName)

// The env contract is the union of attribute keys across the configured
// backends (plus FLUXION_BACKEND), so the set of injected env vars tracks the
// config automatically. Loaded from the same FLUENCE_RESOURCES the scheduler
// and device plugin use; absent/unset means just FLUXION_BACKEND.
var attrKeys []string
if path := os.Getenv("FLUENCE_RESOURCES"); path != "" {
if data, rerr := os.ReadFile(path); rerr == nil {
rc, perr := cluster.LoadResourcesConfig(data)
if perr != nil {
log.Fatalf("parse resources config %s: %v", path, perr)
}
attrKeys = cluster.AttributeKeys(rc.Resources)
} else {
log.Printf("no resources config at %s (%v); injecting FLUXION_BACKEND only", path, rerr)
}
}
mutator := &webhook.Mutator{AttributeKeys: attrKeys}
log.Printf("[fluence-webhook] env contract injected into fluxion pods: %v", mutator.EnvVarNames())

mux := http.NewServeMux()
mux.HandleFunc("/mutate", webhook.Handler)
mux.HandleFunc("/mutate", mutator.Handler)
mux.HandleFunc("/healthz", func(w http.ResponseWriter, _ *http.Request) { w.WriteHeader(http.StatusOK) })

srv := &http.Server{
Expand Down
35 changes: 26 additions & 9 deletions deploy/fluence-resources-test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,13 +22,30 @@ metadata:
namespace: kube-system
data:
resources.yaml: |
backends:
- name: ibm_fez
num_qubits: 156
vendor: ibm
qrmi_type: qiskit-runtime-service
- name: ibm_marrakesh
num_qubits: 156
# New generic schema: each backend is a resource tree attached under the
# cluster. Modeled as a qdevice carrying a requestable qpu child; attributes
# become filterable graph properties AND injected env (FLUXION_<KEY>).
resources:
- type: qdevice
name: ibm_fez
attributes: ibm
with:
- type: qpu
count: 1
with:
- type: qubit
count: 156
- type: qdevice
name: ibm_marrakesh
attributes: ibm
with:
- type: qpu
count: 1
with:
- type: qubit
count: 156
attributes:
ibm:
vendor: ibm
qrmi_type: qiskit-runtime-service
---
Expand All @@ -53,7 +70,7 @@ spec:
- operator: Exists # run on every node, including tainted/control-plane
containers:
- name: deviceplugin
image: ghcr.io/converged-computing/fluence:test
image: vanessa/fluence:test
command: ["/bin/fluence-deviceplugin"]
env:
- name: FLUENCE_RESOURCES
Expand All @@ -73,4 +90,4 @@ spec:
path: /var/lib/kubelet/device-plugins
- name: resources
configMap:
name: fluence-resources
name: fluence-resources
31 changes: 24 additions & 7 deletions deploy/fluence-resources.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,29 @@ metadata:
namespace: kube-system
data:
resources.yaml: |
backends:
- name: ibm_fez
num_qubits: 156
vendor: ibm
qrmi_type: qiskit-runtime-service
- name: ibm_marrakesh
num_qubits: 156
# New generic schema: each backend is a resource tree attached under the
# cluster. Modeled as a qdevice carrying a requestable qpu child; attributes
# become filterable graph properties AND injected env (FLUXION_<KEY>).
resources:
- type: qdevice
name: ibm_fez
attributes: ibm
with:
- type: qpu
count: 1
with:
- type: qubit
count: 156
- type: qdevice
name: ibm_marrakesh
attributes: ibm
with:
- type: qpu
count: 1
with:
- type: qubit
count: 156
attributes:
ibm:
vendor: ibm
qrmi_type: qiskit-runtime-service
8 changes: 4 additions & 4 deletions deploy/fluence-test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -124,9 +124,9 @@ spec:
serviceAccountName: fluence
containers:
- name: fluence
image: ghcr.io/converged-computing/fluence:test
image: vanessa/fluence:test
# Allows for kind load
# imagePullPolicy: Never
imagePullPolicy: Never
command:
- /bin/fluence
- --config=/etc/fluence/scheduler-config.yaml
Expand Down Expand Up @@ -173,9 +173,9 @@ spec:
serviceAccountName: fluence
containers:
- name: webhook
image: ghcr.io/converged-computing/fluence:test
image: vanessa/fluence:test
# Allows for kind load
# imagePullPolicy: Never
imagePullPolicy: Never
command: ["/bin/fluence-webhook"]
ports:
- containerPort: 8443
Expand Down
2 changes: 1 addition & 1 deletion examples/test/e2e/quantum-pod-mock-2.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ spec:
containers:
- name: sampler
image: busybox
command: ["sh", "-c", "echo BACKEND=$QRMI_BACKEND; sleep 3600"]
command: ["sh", "-c", "echo BACKEND=$FLUXION_BACKEND; sleep 3600"]
resources:
requests:
fluxion.flux-framework.org/qpu: "1"
Expand Down
2 changes: 1 addition & 1 deletion examples/test/e2e/quantum-pod-mock.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ spec:
- name: sampler
image: busybox
# Print the injected backend, then idle so we can assert on it.
command: ["sh", "-c", "echo BACKEND=$QRMI_BACKEND; sleep 3600"]
command: ["sh", "-c", "echo BACKEND=$FLUXION_BACKEND; sleep 3600"]
resources:
requests:
fluxion.flux-framework.org/qpu: "1"
Expand Down
Loading
Loading