Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -904,10 +904,6 @@ spec:
memory: 200Mi
securityContext:
allowPrivilegeEscalation: false
volumeMounts:
- mountPath: /host-etc/os-release
name: host-os-release
readOnly: true
env:
- name: OPERATOR_NAMESPACE
valueFrom:
Expand Down Expand Up @@ -945,10 +941,6 @@ spec:
- name: "GDRCOPY_IMAGE"
value: "nvcr.io/nvidia/cloud-native/gdrdrv@sha256:5c4e61f7ba83d7a64ff2523d447c209ce5bde1ddc79acaf1f32f19620b4912d6"
terminationGracePeriodSeconds: 10
volumes:
- hostPath:
path: /etc/os-release
name: host-os-release
serviceAccountName: gpu-operator
strategy: deployment
installModes:
Expand Down
8 changes: 0 additions & 8 deletions config/manager/manager.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -56,10 +56,6 @@ spec:
memory: 50Mi
securityContext:
allowPrivilegeEscalation: false
volumeMounts:
- mountPath: /host-etc/os-release
name: host-os-release
readOnly: true
env:
- name: OPERATOR_NAMESPACE
valueFrom:
Expand All @@ -72,7 +68,3 @@ spec:
- name: metrics
containerPort: 8080
terminationGracePeriodSeconds: 10
volumes:
- hostPath:
path: /etc/os-release
name: host-os-release
74 changes: 17 additions & 57 deletions controllers/object_controls.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,9 @@
package controllers

import (
"bufio"
"context"
"errors"
"fmt"
"os"
"path"
"path/filepath"
"regexp"
Expand Down Expand Up @@ -1003,36 +1001,6 @@ func setNRIPluginAnnotation(o *metav1.ObjectMeta, cdiConfig *gpuv1.CDIConfigSpec
o.Annotations = annotations
}

// parseOSRelease can be overridden in tests for mocking filesystem access.
// In production, it reads and parses /host-etc/os-release.
var parseOSRelease = parseOSReleaseFromFile

// osReleaseFilePath is the path to the os-release file, configurable for testing.
var osReleaseFilePath = "/host-etc/os-release"

// parseOSReleaseFromFile reads and parses the os-release file from the host filesystem.
func parseOSReleaseFromFile() (map[string]string, error) {
release := map[string]string{}

f, err := os.Open(osReleaseFilePath)
if err != nil {
return nil, err
}
defer f.Close()

re := regexp.MustCompile(`^(?P<key>\w+)=(?P<value>.+)`)

// Read line-by-line
s := bufio.NewScanner(f)
for s.Scan() {
line := s.Text()
if m := re.FindStringSubmatch(line); m != nil {
release[m[1]] = strings.Trim(m[2], `"`)
}
}
return release, nil
}

func TransformDCGMExporterService(obj *corev1.Service, config *gpuv1.ClusterPolicySpec) error {
serviceConfig := config.DCGMExporter.ServiceSpec
if serviceConfig != nil {
Expand Down Expand Up @@ -3299,9 +3267,9 @@ func resolveDriverTag(n ClusterPolicyController, driverSpec interface{}) (string
return image, nil
}

// gpuNodeOSID returns the base OS identifier (e.g. "rhel", "ubuntu", "rocky") for GPU
// getGPUNodeOSID returns the base OS identifier (e.g. "rhel", "ubuntu", "rocky") for GPU
// worker nodes by extracting the version suffix from the osTag obtained via NFD labels.
func (n ClusterPolicyController) gpuNodeOSID() (string, string, error) {
func (n ClusterPolicyController) getGPUNodeOSID() (string, string, error) {
_, osTag, _ := kernelFullVersion(n)
if osTag == "" {
return "", "", fmt.Errorf("unable to determine GPU node OS from NFD labels, is NFD installed?")
Expand All @@ -3314,7 +3282,7 @@ func (n ClusterPolicyController) gpuNodeOSID() (string, string, error) {

// getRepoConfigPath returns the standard OS specific path for repository configuration files.
func (n ClusterPolicyController) getRepoConfigPath() (string, error) {
osID, osTag, err := n.gpuNodeOSID()
osID, osTag, err := n.getGPUNodeOSID()
if err != nil {
return "", err
}
Expand All @@ -3326,7 +3294,7 @@ func (n ClusterPolicyController) getRepoConfigPath() (string, error) {

// getCertConfigPath returns the standard OS specific path for ssl keys/certificates.
func (n ClusterPolicyController) getCertConfigPath() (string, error) {
osID, osTag, err := n.gpuNodeOSID()
osID, osTag, err := n.getGPUNodeOSID()
if err != nil {
return "", err
}
Expand All @@ -3338,17 +3306,15 @@ func (n ClusterPolicyController) getCertConfigPath() (string, error) {

// getSubscriptionPathsToVolumeSources returns the MountPathToVolumeSource map containing all
// OS-specific subscription/entitlement paths that need to be mounted in the container.
func getSubscriptionPathsToVolumeSources() (MountPathToVolumeSource, error) {
release, err := parseOSRelease()
func (n ClusterPolicyController) getSubscriptionPathsToVolumeSources() (MountPathToVolumeSource, error) {
osID, osTag, err := n.getGPUNodeOSID()
if err != nil {
return nil, err
}

os := release["ID"]
if pathToVolumeSource, ok := SubscriptionPathMap[os]; ok {
if pathToVolumeSource, ok := SubscriptionPathMap[osID]; ok {
return pathToVolumeSource, nil
}
return nil, fmt.Errorf("distribution not supported")
return nil, fmt.Errorf("subscription paths not found for distribution %s", osTag)
}

// createConfigMapVolumeMounts creates a VolumeMount for each key
Expand Down Expand Up @@ -3612,15 +3578,14 @@ func transformDriverContainer(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicy
}
}

release, err := parseOSRelease()
osID, _, err := n.getGPUNodeOSID()
if err != nil {
return fmt.Errorf("ERROR: failed to get os-release: %s", err)
return fmt.Errorf("ERROR: failed to retrieve OS name of GPU Node: %w", err)
}

// set up subscription entitlements for RHEL(using K8s with a non-CRIO runtime) and SLES
if (release["ID"] == "rhel" && n.openshift == "" && n.runtime != gpuv1.CRIO) || release["ID"] == "sles" || release["ID"] == "sl-micro" {
n.logger.Info("Mounting subscriptions into the driver container", "OS", release["ID"])
pathToVolumeSource, err := getSubscriptionPathsToVolumeSources()
if (osID == "rhel" && n.openshift == "" && n.runtime != gpuv1.CRIO) || osID == "sles" || osID == "sl-micro" {
n.logger.Info("Mounting subscriptions into the driver container", "OS", osID)
pathToVolumeSource, err := n.getSubscriptionPathsToVolumeSources()
if err != nil {
return fmt.Errorf("ERROR: failed to get path items for subscription entitlements: %v", err)
}
Expand Down Expand Up @@ -3648,8 +3613,8 @@ func transformDriverContainer(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicy
}

// apply proxy and env settings if this is an OpenShift cluster
if _, ok := release["OPENSHIFT_VERSION"]; ok {
setContainerEnv(driverContainer, "OPENSHIFT_VERSION", release["OPENSHIFT_VERSION"])
if len(n.openshift) > 0 {
setContainerEnv(driverContainer, "OPENSHIFT_VERSION", n.openshift)

// Automatically apply proxy settings for OCP and inject custom CA if configured by user
// https://docs.openshift.com/container-platform/4.6/networking/configuring-a-custom-pki.html
Expand Down Expand Up @@ -3720,14 +3685,9 @@ func transformVGPUManagerContainer(obj *appsv1.DaemonSet, config *gpuv1.ClusterP
container.Args = config.VGPUManager.Args
}

release, err := parseOSRelease()
if err != nil {
return fmt.Errorf("ERROR: failed to get os-release: %s", err)
}

// add env for OCP
if _, ok := release["OPENSHIFT_VERSION"]; ok {
setContainerEnv(container, "OPENSHIFT_VERSION", release["OPENSHIFT_VERSION"])
if len(n.openshift) > 0 {
setContainerEnv(container, "OPENSHIFT_VERSION", n.openshift)
}

if len(config.VGPUManager.Env) > 0 {
Expand Down
71 changes: 0 additions & 71 deletions controllers/object_controls_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -149,18 +149,6 @@ func getModuleRoot(dir string) (string, error) {
return dir, nil
}

// mockOSRelease returns a mock parseOSRelease function for testing.
// It allows tests to simulate different operating systems without filesystem access.
func mockOSRelease(osID, version string) func() (map[string]string, error) {
return func() (map[string]string, error) {
return map[string]string{
"ID": osID,
"VERSION_ID": version,
"NAME": osID,
}, nil
}
}

// setup creates a mock kubernetes cluster and client. Nodes are labeled with the minimum
// required NFD labels to be detected as GPU nodes by the GPU Operator. A sample
// ClusterPolicy resource is applied to the cluster. The ClusterPolicyController
Expand All @@ -173,9 +161,6 @@ func setup() error {
boolTrue = new(bool)
*boolTrue = true

// Mock parseOSRelease to avoid filesystem dependency in tests
parseOSRelease = mockOSRelease("ubuntu", "20.04")

s := scheme.Scheme
if err := gpuv1.AddToScheme(s); err != nil {
return fmt.Errorf("unable to add ClusterPolicy v1 schema: %v", err)
Expand Down Expand Up @@ -1452,62 +1437,6 @@ func TestService(t *testing.T) {
}
}

func TestParseOSReleaseFromFile(t *testing.T) {
tests := []struct {
description string
content string
expected map[string]string
}{
{
description: "quoted values",
content: `NAME="Ubuntu"` + "\n" + `VERSION_ID="20.04"`,
expected: map[string]string{"NAME": "Ubuntu", "VERSION_ID": "20.04"},
},
{
description: "unquoted values",
content: `NAME=Ubuntu` + "\n" + `ID=ubuntu`,
expected: map[string]string{"NAME": "Ubuntu", "ID": "ubuntu"},
},
{
description: "mixed quoted and unquoted",
content: `ID="rhel"` + "\n" + `VERSION_ID=8.5`,
expected: map[string]string{"ID": "rhel", "VERSION_ID": "8.5"},
},
{
description: "empty lines and comments",
content: `NAME="Ubuntu"` + "\n\n# comment\n" + `ID=ubuntu`,
expected: map[string]string{"NAME": "Ubuntu", "ID": "ubuntu"},
},
}

tempDir := t.TempDir()

// Save original value and restore after tests for future subsequent tests (if needed)
originalPath := osReleaseFilePath
defer func() { osReleaseFilePath = originalPath }()

for i, test := range tests {
t.Run(test.description, func(t *testing.T) {
testFile := filepath.Join(tempDir, fmt.Sprintf("os-release-%d", i))
err := os.WriteFile(testFile, []byte(test.content), 0600)
require.NoError(t, err)

// Override the path for this test
osReleaseFilePath = testFile
result, err := parseOSReleaseFromFile()
require.NoError(t, err)
require.Equal(t, test.expected, result)
})
}

t.Run("file not found", func(t *testing.T) {
osReleaseFilePath = "/nonexistent/path"
_, err := parseOSReleaseFromFile()
require.Error(t, err)
require.True(t, os.IsNotExist(err))
})
}

func TestCertConfigPathMap(t *testing.T) {
expectedPaths := map[string]string{
"centos": "/etc/pki/ca-trust/extracted/pem",
Expand Down
Loading