From 5fe6f7a9765174920802536c4cf787cbd1e2c5d1 Mon Sep 17 00:00:00 2001 From: Niclas Schad Date: Thu, 7 May 2026 14:04:59 +0200 Subject: [PATCH 1/2] Add support for MutableCSINodeAllocatableCount The CSI list's all PCIe devices that are not of type VIRTIO_BLOCK_DEVICE and subtracts them from the theoretically maximum, so kubernetes can report a correct dynamic max volume count that can be attached for each node. Signed-off-by: Niclas Schad --- pkg/csi/blockstorage/controllerserver.go | 4 ++ pkg/csi/blockstorage/nodeserver.go | 12 +++- pkg/csi/blockstorage/utils.go | 2 +- pkg/csi/blockstorage/utils_test.go | 10 +-- pkg/csi/util/mount/mount_darwin.go | 5 ++ pkg/csi/util/mount/mount_linux.go | 80 ++++++++++++++++++++++++ pkg/stackit/stackiterrors/errors.go | 14 ++++- 7 files changed, 118 insertions(+), 9 deletions(-) diff --git a/pkg/csi/blockstorage/controllerserver.go b/pkg/csi/blockstorage/controllerserver.go index 8de6237e..ef962edd 100644 --- a/pkg/csi/blockstorage/controllerserver.go +++ b/pkg/csi/blockstorage/controllerserver.go @@ -370,6 +370,10 @@ func (cs *controllerServer) ControllerPublishVolume(ctx context.Context, req *cs _, err = cloud.AttachVolume(ctx, instanceID, volumeID) if err != nil { + // Trigger's an immediate `NodeGetInfo` RPC call when MutableCSINodeAllocatableCount is enabled + if stackiterrors.IsTooManyDevicesError(err) { + return nil, status.Errorf(codes.ResourceExhausted, "[ControllerPublishVolume] Node can't accept any more volumes %v. All PCIe lanes are exhausted!", err) + } klog.Errorf("Failed to AttachVolume: %v", err) return nil, status.Errorf(codes.Internal, "[ControllerPublishVolume] Attach Volume failed with error %v", err) } diff --git a/pkg/csi/blockstorage/nodeserver.go b/pkg/csi/blockstorage/nodeserver.go index 648e5df3..0b390d2b 100644 --- a/pkg/csi/blockstorage/nodeserver.go +++ b/pkg/csi/blockstorage/nodeserver.go @@ -308,8 +308,16 @@ func (ns *nodeServer) NodeGetInfo(ctx context.Context, _ *csi.NodeGetInfoRequest } maxVolumesPerNode := DetermineMaxVolumesByFlavor(flavor) - // Subtract 1 for root disk and another for configDrive/spare - maxVolumesPerNode -= 2 + + // Subtract already mounted Volumes + emptyPCIeRootPorts, err := mount.CountNonVirtioBlockDevices() + if err != nil { + klog.Errorf("[NodeGetInfo] unable to retrieve PCIe root ports %v", err) + emptyPCIeRootPorts = 0 + } + + maxVolumesPerNode -= emptyPCIeRootPorts + klog.V(4).Infof("Determined %d PCIe ports occupied by non virtio block devices", emptyPCIeRootPorts) klog.V(4).Infof("Determined node to support %d volumes", maxVolumesPerNode) nodeInfo := &csi.NodeGetInfoResponse{ diff --git a/pkg/csi/blockstorage/utils.go b/pkg/csi/blockstorage/utils.go index aaafc864..eacb77f7 100644 --- a/pkg/csi/blockstorage/utils.go +++ b/pkg/csi/blockstorage/utils.go @@ -97,7 +97,7 @@ func DetermineMaxVolumesByFlavor(flavor string) int64 { return 159 default: // All other flavors can mount 28 volumes - return 25 + return 28 } } diff --git a/pkg/csi/blockstorage/utils_test.go b/pkg/csi/blockstorage/utils_test.go index f9261de4..9d505950 100644 --- a/pkg/csi/blockstorage/utils_test.go +++ b/pkg/csi/blockstorage/utils_test.go @@ -12,14 +12,14 @@ var _ = Describe("Util Test", func() { maxVolumes := DetermineMaxVolumesByFlavor(flavor) Expect(maxVolumes).To(Equal(int64(expectedMaxVolumes))) }, - Entry("Intel 3rd Gen", "c3i.2", 25), - Entry("Intel 2rd Gen", "c2i.2", 25), - Entry("Intel 1st Gen", "c1.2", 25), - Entry("AMD 1st Gen without overprovisioning", "s1a.8d", 25), + Entry("Intel 3rd Gen", "c3i.2", 28), + Entry("Intel 2rd Gen", "c2i.2", 28), + Entry("Intel 1st Gen", "c1.2", 28), + Entry("AMD 1st Gen without overprovisioning", "s1a.8d", 28), Entry("AMD 2nd Gen without overprovisioning", "s2a.8d", 159), Entry("Nvidia GPU", "n2.14d.g1", 10), Entry("Nvidia GPU", "n2.56d.g4", 10), - Entry("ARM Gen1Link without CPU-overprovisioning ARM Gen1", "g1r.4d", 25), + Entry("ARM Gen1Link without CPU-overprovisioning ARM Gen1", "g1r.4d", 28), ) }) }) diff --git a/pkg/csi/util/mount/mount_darwin.go b/pkg/csi/util/mount/mount_darwin.go index 122f4c1c..389fd6cb 100644 --- a/pkg/csi/util/mount/mount_darwin.go +++ b/pkg/csi/util/mount/mount_darwin.go @@ -17,3 +17,8 @@ func newDeviceStats(statfs *unix.Statfs_t) *DeviceStats { UsedInodes: int64(statfs.Files) - int64(statfs.Ffree), } } + +func CountNonVirtioBlockDevices() (int64, error) { + // not implemented + return 0, nil +} diff --git a/pkg/csi/util/mount/mount_linux.go b/pkg/csi/util/mount/mount_linux.go index b525b753..f8925708 100644 --- a/pkg/csi/util/mount/mount_linux.go +++ b/pkg/csi/util/mount/mount_linux.go @@ -4,6 +4,15 @@ package mount import "golang.org/x/sys/unix" +var ( + pciAddressRegex = regexp.MustCompile(`^[0-9a-fA-F]{4}:[0-9a-fA-F]{2}:[0-9a-fA-F]{2}\.[0-9a-fA-F]$`) +) + +const ( + RedhatVendor = "0x1af4" + VirtioBlockDevice = "0x1042" +) + func newDeviceStats(statfs *unix.Statfs_t) *DeviceStats { return &DeviceStats{ Block: false, @@ -17,3 +26,74 @@ func newDeviceStats(statfs *unix.Statfs_t) *DeviceStats { UsedInodes: int64(statfs.Files) - int64(statfs.Ffree), } } + +// CountNonVirtioBlockDevices returns the number of PCIe Root ports who +// are currently occupied by anything else than an VIRTIO 1.0 Block Device +// returns zero when something went wrong +func CountNonVirtioBlockDevices() (int64, error) { + const pciPath = "/sys/bus/pci/devices" + + // Get all PCI devices + devices, err := os.ReadDir(pciPath) + if err != nil { + return 0, fmt.Errorf("failed to read PCI bus: %w", err) + } + + pcieSlotsOccupiedByNonBlockDevice := 0 + + for _, dev := range devices { + devPath := filepath.Join(pciPath, dev.Name()) + + // 1. Identify if it's a Root Port / Bridge + // We check the 'class' file. PCI Bridge class code starts with 0x0604 + classBuf, err := os.ReadFile(filepath.Join(devPath, "class")) + if err != nil { + klog.Errorf("failed to read PCI device class %s : %v", devPath, err) + continue + } + class := strings.TrimSpace(string(classBuf)) + + // Class 0x060400 is a PCI-to-PCI bridge (standard for Root Ports) + if strings.HasPrefix(class, "0x0604") { + // 2. Check if the port has downstream devices + // If the bridge has children, they appear as subdirectories + // matching the PCI address format (e.g., 0000:01:00.0) + files, err2 := os.ReadDir(devPath) + if err2 != nil { + klog.Errorf("failed to read dir %s : %v", devPath, err2) + } + for _, file := range files { + // Ignore PCI bus directories such as pci001 pci002 and pci010 + // Devices must follow format + if pciAddressRegex.MatchString(file.Name()) { + isNonBlockDevice := IsNonBlockDevice(devPath, file) + if isNonBlockDevice { + pcieSlotsOccupiedByNonBlockDevice++ + } + break + } + } + } else { + klog.V(4).Infof("skipping class %s: path: %s", class, devPath) + } + } + + return int64(pcieSlotsOccupiedByNonBlockDevice), nil +} + +func IsNonBlockDevice(devPath string, file os.DirEntry) bool { + var isNonBlockDevice bool + pciDevicePath := filepath.Join(devPath, file.Name()) + vendorBuf, err := os.ReadFile(filepath.Join(pciDevicePath, "vendor")) + if err != nil { + klog.Errorf("failed to read PCI device vendor %s : %v", pciDevicePath, err) + } + deviceBuf, err := os.ReadFile(filepath.Join(pciDevicePath, "device")) + if err != nil { + klog.Errorf("failed to read PCI device file %s : %v", pciDevicePath, err) + } + if strings.TrimSpace(string(vendorBuf)) == RedhatVendor && strings.TrimSpace(string(deviceBuf)) != VirtioBlockDevice { + isNonBlockDevice = true + } + return isNonBlockDevice +} diff --git a/pkg/stackit/stackiterrors/errors.go b/pkg/stackit/stackiterrors/errors.go index ae19b7d7..0e37be49 100644 --- a/pkg/stackit/stackiterrors/errors.go +++ b/pkg/stackit/stackiterrors/errors.go @@ -4,9 +4,10 @@ import ( "errors" "fmt" "net/http" + "strings" oapiError "github.com/stackitcloud/stackit-sdk-go/core/oapierror" - wait "github.com/stackitcloud/stackit-sdk-go/services/iaas/v2api/wait" + "github.com/stackitcloud/stackit-sdk-go/services/iaas/v2api/wait" ) var ErrNotFound = errors.New("failed to find object") @@ -20,6 +21,17 @@ func IsNotFound(err error) bool { return oAPIError.StatusCode == http.StatusNotFound } +func IsTooManyDevicesError(err error) bool { + var oAPIError *oapiError.GenericOpenAPIError + if ok := errors.As(err, &oAPIError); !ok { + return false + } + + // TODO: Improve this if possible + return oAPIError.StatusCode == http.StatusForbidden && + strings.Contains(oAPIError.ErrorMessage, "maximum allowed number of disk devices") +} + func IgnoreNotFound(err error) error { if IsNotFound(err) { return nil From 9072ccda0ca474966fb3b1926517b0982bfe4aef Mon Sep 17 00:00:00 2001 From: Niclas Schad Date: Fri, 15 May 2026 14:39:28 +0200 Subject: [PATCH 2/2] fix imports for linux Signed-off-by: Niclas Schad --- pkg/csi/util/mount/mount_linux.go | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/pkg/csi/util/mount/mount_linux.go b/pkg/csi/util/mount/mount_linux.go index f8925708..c6259b47 100644 --- a/pkg/csi/util/mount/mount_linux.go +++ b/pkg/csi/util/mount/mount_linux.go @@ -2,7 +2,16 @@ package mount -import "golang.org/x/sys/unix" +import ( + "fmt" + "os" + "path/filepath" + "regexp" + "strings" + + "golang.org/x/sys/unix" + "k8s.io/klog/v2" +) var ( pciAddressRegex = regexp.MustCompile(`^[0-9a-fA-F]{4}:[0-9a-fA-F]{2}:[0-9a-fA-F]{2}\.[0-9a-fA-F]$`)