From fca315df35f9cf3440174252c52a2a85ac283521 Mon Sep 17 00:00:00 2001 From: jaspals Date: Fri, 19 Dec 2025 20:46:45 +0000 Subject: [PATCH 01/29] new network plugin enhancements --- .../inband/network/network_collector.py | 127 +++++++++++++++++- test/unit/plugin/test_network_collector.py | 25 ++-- 2 files changed, 142 insertions(+), 10 deletions(-) diff --git a/nodescraper/plugins/inband/network/network_collector.py b/nodescraper/plugins/inband/network/network_collector.py index 0f96e7c8..6a676598 100644 --- a/nodescraper/plugins/inband/network/network_collector.py +++ b/nodescraper/plugins/inband/network/network_collector.py @@ -49,7 +49,28 @@ class NetworkCollector(InBandDataCollector[NetworkDataModel, None]): CMD_ROUTE = "ip route show" CMD_RULE = "ip rule show" CMD_NEIGHBOR = "ip neighbor show" - CMD_ETHTOOL_TEMPLATE = "sudo ethtool {interface}" + CMD_ETHTOOL_TEMPLATE = "ethtool {interface}" + + # LLDP commands + CMD_LLDPCLI_NEIGHBOR = "lldpcli show neighbor" + CMD_LLDPCTL = "lldpctl" + + # Broadcom NIC commands + CMD_NICCLI_LISTDEV = "niccli --listdev" + CMD_NICCLI_GETQOS_TEMPLATE = "niccli -i {device_num} getqos" + + # Pensando NIC commands + CMD_NICCTL_COMMANDS = [ + "nicctl show card", + "nicctl show dcqcn", + "nicctl show environment", + "nicctl show pcie ats", + "nicctl show port", + "nicctl show qos", + "nicctl show rdma statistics", + "nicctl show version host-software", + "nicctl show version firmware", + ] def _parse_ip_addr(self, output: str) -> List[NetworkInterface]: """Parse 'ip addr show' output into NetworkInterface objects. @@ -444,7 +465,7 @@ def _collect_ethtool_info(self, interfaces: List[NetworkInterface]) -> Dict[str, for iface in interfaces: cmd = self.CMD_ETHTOOL_TEMPLATE.format(interface=iface.name) - res_ethtool = self._run_sut_cmd(cmd) + res_ethtool = self._run_sut_cmd(cmd, sudo=True) if res_ethtool.exit_code == 0: ethtool_info = self._parse_ethtool(iface.name, res_ethtool.stdout) @@ -464,6 +485,99 @@ def _collect_ethtool_info(self, interfaces: List[NetworkInterface]) -> Dict[str, return ethtool_data + def _collect_lldp_info(self) -> None: + """Collect LLDP information using lldpcli and lldpctl commands.""" + # Run lldpcli show neighbor + res_lldpcli = self._run_sut_cmd(self.CMD_LLDPCLI_NEIGHBOR, sudo=True) + if res_lldpcli.exit_code == 0: + self._log_event( + category=EventCategory.NETWORK, + description="Collected LLDP neighbor information (lldpcli)", + priority=EventPriority.INFO, + ) + else: + self._log_event( + category=EventCategory.NETWORK, + description="LLDP neighbor collection failed or lldpcli not available", + data={"command": res_lldpcli.command, "exit_code": res_lldpcli.exit_code}, + priority=EventPriority.INFO, + ) + + # Run lldpctl + res_lldpctl = self._run_sut_cmd(self.CMD_LLDPCTL, sudo=True) + if res_lldpctl.exit_code == 0: + self._log_event( + category=EventCategory.NETWORK, + description="Collected LLDP information (lldpctl)", + priority=EventPriority.INFO, + ) + else: + self._log_event( + category=EventCategory.NETWORK, + description="LLDP collection failed or lldpctl not available", + data={"command": res_lldpctl.command, "exit_code": res_lldpctl.exit_code}, + priority=EventPriority.INFO, + ) + + def _collect_broadcom_nic_info(self) -> None: + """Collect Broadcom NIC information using niccli commands.""" + # First, list devices + res_listdev = self._run_sut_cmd(self.CMD_NICCLI_LISTDEV, sudo=True) + if res_listdev.exit_code == 0: + self._log_event( + category=EventCategory.NETWORK, + description="Collected Broadcom NIC device list", + priority=EventPriority.INFO, + ) + + # Parse device numbers and collect QoS info for each + device_count = 0 + for line in res_listdev.stdout.splitlines(): + # Look for device numbers in output (format may vary) + # Common formats: "Device 0:", "dev 0", etc. + match = re.search(r"(?:Device|dev)\s+(\d+)", line, re.IGNORECASE) + if match: + device_num = match.group(1) + cmd = self.CMD_NICCLI_GETQOS_TEMPLATE.format(device_num=device_num) + res_qos = self._run_sut_cmd(cmd, sudo=True) + if res_qos.exit_code == 0: + device_count += 1 + + if device_count > 0: + self._log_event( + category=EventCategory.NETWORK, + description=f"Collected Broadcom NIC QoS info for {device_count} devices", + priority=EventPriority.INFO, + ) + else: + self._log_event( + category=EventCategory.NETWORK, + description="Broadcom NIC collection failed or niccli not available", + data={"command": res_listdev.command, "exit_code": res_listdev.exit_code}, + priority=EventPriority.INFO, + ) + + def _collect_pensando_nic_info(self) -> None: + """Collect Pensando NIC information using nicctl commands.""" + collected_count = 0 + for cmd in self.CMD_NICCTL_COMMANDS: + res = self._run_sut_cmd(cmd, sudo=True) + if res.exit_code == 0: + collected_count += 1 + + if collected_count > 0: + self._log_event( + category=EventCategory.NETWORK, + description=f"Collected Pensando NIC information ({collected_count} commands)", + priority=EventPriority.INFO, + ) + else: + self._log_event( + category=EventCategory.NETWORK, + description="Pensando NIC collection failed or nicctl not available", + priority=EventPriority.INFO, + ) + def collect_data( self, args=None, @@ -558,6 +672,15 @@ def collect_data( priority=EventPriority.WARNING, ) + # Collect LLDP information + self._collect_lldp_info() + + # Collect Broadcom NIC information + self._collect_broadcom_nic_info() + + # Collect Pensando NIC information + self._collect_pensando_nic_info() + if interfaces or routes or rules or neighbors: network_data = NetworkDataModel( interfaces=interfaces, diff --git a/test/unit/plugin/test_network_collector.py b/test/unit/plugin/test_network_collector.py index 9d7e7546..5be6deb0 100644 --- a/test/unit/plugin/test_network_collector.py +++ b/test/unit/plugin/test_network_collector.py @@ -293,7 +293,7 @@ def test_collect_data_success(collector, conn_mock): collector.system_info.os_family = OSFamily.LINUX # Mock successful command execution - def run_sut_cmd_side_effect(cmd): + def run_sut_cmd_side_effect(cmd, **kwargs): if "addr show" in cmd: return MagicMock(exit_code=0, stdout=IP_ADDR_OUTPUT, command=cmd) elif "route show" in cmd: @@ -330,9 +330,9 @@ def test_collect_data_addr_failure(collector, conn_mock): collector.system_info.os_family = OSFamily.LINUX # Mock failed addr command but successful others - def run_sut_cmd_side_effect(cmd): + def run_sut_cmd_side_effect(cmd, **kwargs): if "addr show" in cmd: - return MagicMock(exit_code=1, stdout="", command=cmd) + return MagicMock(exit_code=1, command=cmd) elif "route show" in cmd: return MagicMock(exit_code=0, stdout=IP_ROUTE_OUTPUT, command=cmd) elif "rule show" in cmd: @@ -340,8 +340,17 @@ def run_sut_cmd_side_effect(cmd): elif "neighbor show" in cmd: return MagicMock(exit_code=0, stdout=IP_NEIGHBOR_OUTPUT, command=cmd) elif "ethtool" in cmd: - return MagicMock(exit_code=1, stdout="", command=cmd) - return MagicMock(exit_code=1, stdout="", command=cmd) + return MagicMock(exit_code=1, command=cmd) + elif "lldpcli" in cmd or "lldpctl" in cmd: + # LLDP commands fail (not available) + return MagicMock(exit_code=1, command=cmd) + elif "niccli" in cmd: + # Broadcom NIC commands fail (not available) + return MagicMock(exit_code=1, command=cmd) + elif "nicctl" in cmd: + # Pensando NIC commands fail (not available) + return MagicMock(exit_code=1, command=cmd) + return MagicMock(exit_code=1, command=cmd) collector._run_sut_cmd = MagicMock(side_effect=run_sut_cmd_side_effect) @@ -362,9 +371,9 @@ def test_collect_data_all_failures(collector, conn_mock): """Test collection when all commands fail""" collector.system_info.os_family = OSFamily.LINUX - # Mock all commands failing (including ethtool) - def run_sut_cmd_side_effect(cmd): - return MagicMock(exit_code=1, stdout="", command=cmd) + # Mock all commands failing (including ethtool, LLDP, Broadcom, Pensando) + def run_sut_cmd_side_effect(cmd, **kwargs): + return MagicMock(exit_code=1, command=cmd) collector._run_sut_cmd = MagicMock(side_effect=run_sut_cmd_side_effect) From d4070e9dc3473a8175880112fce7514e93383bd0 Mon Sep 17 00:00:00 2001 From: jaspals Date: Mon, 5 Jan 2026 22:11:26 +0000 Subject: [PATCH 02/29] broadcom enhancement --- .../inband/network/network_collector.py | 265 +++++++++++- .../plugins/inband/network/networkdata.py | 48 +++ test/unit/plugin/test_network_collector.py | 393 ++++++++++++++++++ 3 files changed, 683 insertions(+), 23 deletions(-) diff --git a/nodescraper/plugins/inband/network/network_collector.py b/nodescraper/plugins/inband/network/network_collector.py index 6a676598..3980e645 100644 --- a/nodescraper/plugins/inband/network/network_collector.py +++ b/nodescraper/plugins/inband/network/network_collector.py @@ -31,6 +31,9 @@ from nodescraper.models import TaskResult from .networkdata import ( + BroadcomNicDevice, + BroadcomNicQos, + BroadcomNicQosAppEntry, EthtoolInfo, IpAddress, Neighbor, @@ -56,8 +59,8 @@ class NetworkCollector(InBandDataCollector[NetworkDataModel, None]): CMD_LLDPCTL = "lldpctl" # Broadcom NIC commands - CMD_NICCLI_LISTDEV = "niccli --listdev" - CMD_NICCLI_GETQOS_TEMPLATE = "niccli -i {device_num} getqos" + CMD_NICCLI_LISTDEV = "niccli --list_devices" + CMD_NICCLI_GETQOS_TEMPLATE = "niccli --dev {device_num} qos --ets --show" # Pensando NIC commands CMD_NICCTL_COMMANDS = [ @@ -452,6 +455,198 @@ def _parse_ethtool(self, interface: str, output: str) -> EthtoolInfo: return ethtool_info + def _parse_niccli_listdev(self, output: str) -> List[BroadcomNicDevice]: + """Parse 'niccli --list_devices' output into BroadcomNicDevice objects. + + Args: + output: Raw output from 'niccli --list_devices' command + + Returns: + List of BroadcomNicDevice objects + """ + devices = [] + current_device = None + + for line in output.splitlines(): + line_stripped = line.strip() + if not line_stripped: + continue + + # Check if this is a device header line + # Format: "1 ) Broadcom BCM57608 1x400G QSFP-DD PCIe Ethernet NIC (Adp#1 Port#1)" + match = re.match(r"^(\d+)\s*\)\s*(.+?)(?:\s+\((.+?)\))?$", line_stripped) + if match: + device_num_str = match.group(1) + model = match.group(2).strip() if match.group(2) else None + adapter_port = match.group(3).strip() if match.group(3) else None + + try: + device_num = int(device_num_str) + except ValueError: + continue + + current_device = BroadcomNicDevice( + device_num=device_num, + model=model, + adapter_port=adapter_port, + ) + devices.append(current_device) + + # Check for Device Interface Name line + elif "Device Interface Name" in line and current_device: + parts = line_stripped.split(":") + if len(parts) >= 2: + current_device.interface_name = parts[1].strip() + + # Check for MAC Address line + elif "MAC Address" in line and current_device: + parts = line_stripped.split(":") + if len(parts) >= 2: + # MAC address has colons, so rejoin the parts after first split + mac = ":".join(parts[1:]).strip() + current_device.mac_address = mac + + # Check for PCI Address line + elif "PCI Address" in line and current_device: + parts = line_stripped.split(":") + if len(parts) >= 2: + # PCI address also has colons, rejoin + pci = ":".join(parts[1:]).strip() + current_device.pci_address = pci + + return devices + + def _parse_niccli_qos(self, device_num: int, output: str) -> BroadcomNicQos: + """Parse 'niccli --dev X qos --ets --show' output into BroadcomNicQos object. + + Args: + device_num: Device number + output: Raw output from 'niccli --dev X qos --ets --show' command + + Returns: + BroadcomNicQos object with parsed data + """ + qos_info = BroadcomNicQos(device_num=device_num, raw_output=output) + + current_app_entry = None + + for line in output.splitlines(): + line_stripped = line.strip() + if not line_stripped: + continue + + # Parse PRIO_MAP: "PRIO_MAP: 0:0 1:0 2:0 3:1 4:0 5:0 6:0 7:2" + if "PRIO_MAP:" in line: + parts = line.split("PRIO_MAP:") + if len(parts) >= 2: + prio_entries = parts[1].strip().split() + for entry in prio_entries: + if ":" in entry: + prio, tc = entry.split(":") + try: + qos_info.prio_map[int(prio)] = int(tc) + except ValueError: + pass + + # Parse TC Bandwidth: "TC Bandwidth: 50% 50% 0%" + elif "TC Bandwidth:" in line: + parts = line.split("TC Bandwidth:") + if len(parts) >= 2: + bandwidth_entries = parts[1].strip().split() + for bw in bandwidth_entries: + bw_clean = bw.rstrip("%") + try: + qos_info.tc_bandwidth.append(int(bw_clean)) + except ValueError: + pass + + # Parse TSA_MAP: "TSA_MAP: 0:ets 1:ets 2:strict" + elif "TSA_MAP:" in line: + parts = line.split("TSA_MAP:") + if len(parts) >= 2: + tsa_entries = parts[1].strip().split() + for entry in tsa_entries: + if ":" in entry: + tc, tsa = entry.split(":", 1) + try: + qos_info.tsa_map[int(tc)] = tsa + except ValueError: + pass + + # Parse PFC enabled: "PFC enabled: 3" + elif "PFC enabled:" in line: + parts = line.split("PFC enabled:") + if len(parts) >= 2: + try: + qos_info.pfc_enabled = int(parts[1].strip()) + except ValueError: + pass + + # Parse APP entries - detect start of new APP entry + elif line_stripped.startswith("APP#"): + # Save previous entry if exists + if current_app_entry: + qos_info.app_entries.append(current_app_entry) + current_app_entry = BroadcomNicQosAppEntry() + + # Parse Priority within APP entry + elif "Priority:" in line and current_app_entry is not None: + parts = line.split("Priority:") + if len(parts) >= 2: + try: + current_app_entry.priority = int(parts[1].strip()) + except ValueError: + pass + + # Parse Sel within APP entry + elif "Sel:" in line and current_app_entry is not None: + parts = line.split("Sel:") + if len(parts) >= 2: + try: + current_app_entry.sel = int(parts[1].strip()) + except ValueError: + pass + + # Parse DSCP within APP entry + elif "DSCP:" in line and current_app_entry is not None: + parts = line.split("DSCP:") + if len(parts) >= 2: + try: + current_app_entry.dscp = int(parts[1].strip()) + except ValueError: + pass + + # Parse protocol and port (e.g., "UDP or DCCP: 4791") + elif ( + "UDP" in line or "TCP" in line or "DCCP" in line + ) and current_app_entry is not None: + if ":" in line: + parts = line.split(":") + if len(parts) >= 2: + current_app_entry.protocol = parts[0].strip() + try: + current_app_entry.port = int(parts[1].strip()) + except ValueError: + pass + + # Parse TC Rate Limit: "TC Rate Limit: 100% 100% 100% 0% 0% 0% 0% 0%" + elif "TC Rate Limit:" in line: + parts = line.split("TC Rate Limit:") + if len(parts) >= 2: + rate_entries = parts[1].strip().split() + for rate in rate_entries: + rate_clean = rate.rstrip("%") + try: + qos_info.tc_rate_limit.append(int(rate_clean)) + except ValueError: + pass + + # Add the last APP entry if exists + if current_app_entry: + qos_info.app_entries.append(current_app_entry) + + return qos_info + def _collect_ethtool_info(self, interfaces: List[NetworkInterface]) -> Dict[str, EthtoolInfo]: """Collect ethtool information for all network interfaces. @@ -519,34 +714,52 @@ def _collect_lldp_info(self) -> None: priority=EventPriority.INFO, ) - def _collect_broadcom_nic_info(self) -> None: - """Collect Broadcom NIC information using niccli commands.""" + def _collect_broadcom_nic_info( + self, + ) -> Tuple[List[BroadcomNicDevice], Dict[int, BroadcomNicQos]]: + """Collect Broadcom NIC information using niccli commands. + + Returns: + Tuple of (list of BroadcomNicDevice, dict mapping device number to BroadcomNicQos) + """ + devices = [] + qos_data = {} + # First, list devices res_listdev = self._run_sut_cmd(self.CMD_NICCLI_LISTDEV, sudo=True) if res_listdev.exit_code == 0: + # Parse device list + devices = self._parse_niccli_listdev(res_listdev.stdout) self._log_event( category=EventCategory.NETWORK, - description="Collected Broadcom NIC device list", + description=f"Collected Broadcom NIC device list: {len(devices)} devices", priority=EventPriority.INFO, ) - # Parse device numbers and collect QoS info for each - device_count = 0 - for line in res_listdev.stdout.splitlines(): - # Look for device numbers in output (format may vary) - # Common formats: "Device 0:", "dev 0", etc. - match = re.search(r"(?:Device|dev)\s+(\d+)", line, re.IGNORECASE) - if match: - device_num = match.group(1) - cmd = self.CMD_NICCLI_GETQOS_TEMPLATE.format(device_num=device_num) - res_qos = self._run_sut_cmd(cmd, sudo=True) - if res_qos.exit_code == 0: - device_count += 1 - - if device_count > 0: + # Collect QoS info for each device + for device in devices: + cmd = self.CMD_NICCLI_GETQOS_TEMPLATE.format(device_num=device.device_num) + res_qos = self._run_sut_cmd(cmd, sudo=True) + if res_qos.exit_code == 0: + qos_info = self._parse_niccli_qos(device.device_num, res_qos.stdout) + qos_data[device.device_num] = qos_info + self._log_event( + category=EventCategory.NETWORK, + description=f"Collected Broadcom NIC QoS info for device {device.device_num}", + priority=EventPriority.INFO, + ) + else: + self._log_event( + category=EventCategory.NETWORK, + description=f"Failed to collect QoS info for device {device.device_num}", + data={"command": res_qos.command, "exit_code": res_qos.exit_code}, + priority=EventPriority.WARNING, + ) + + if qos_data: self._log_event( category=EventCategory.NETWORK, - description=f"Collected Broadcom NIC QoS info for {device_count} devices", + description=f"Collected Broadcom NIC QoS info for {len(qos_data)} devices", priority=EventPriority.INFO, ) else: @@ -557,6 +770,8 @@ def _collect_broadcom_nic_info(self) -> None: priority=EventPriority.INFO, ) + return devices, qos_data + def _collect_pensando_nic_info(self) -> None: """Collect Pensando NIC information using nicctl commands.""" collected_count = 0 @@ -593,6 +808,8 @@ def collect_data( rules = [] neighbors = [] ethtool_data = {} + broadcom_devices: List[BroadcomNicDevice] = [] + broadcom_qos_data: Dict[int, BroadcomNicQos] = {} # Collect interface/address information res_addr = self._run_sut_cmd(self.CMD_ADDR) @@ -676,23 +893,25 @@ def collect_data( self._collect_lldp_info() # Collect Broadcom NIC information - self._collect_broadcom_nic_info() + broadcom_devices, broadcom_qos_data = self._collect_broadcom_nic_info() # Collect Pensando NIC information self._collect_pensando_nic_info() - if interfaces or routes or rules or neighbors: + if interfaces or routes or rules or neighbors or broadcom_devices: network_data = NetworkDataModel( interfaces=interfaces, routes=routes, rules=rules, neighbors=neighbors, ethtool_info=ethtool_data, + broadcom_nic_devices=broadcom_devices, + broadcom_nic_qos=broadcom_qos_data, ) self.result.message = ( f"Collected network data: {len(interfaces)} interfaces, " f"{len(routes)} routes, {len(rules)} rules, {len(neighbors)} neighbors, " - f"{len(ethtool_data)} ethtool entries" + f"{len(ethtool_data)} ethtool entries, {len(broadcom_devices)} Broadcom NICs" ) self.result.status = ExecutionStatus.OK return self.result, network_data diff --git a/nodescraper/plugins/inband/network/networkdata.py b/nodescraper/plugins/inband/network/networkdata.py index 5e94efc2..3f9430c8 100644 --- a/nodescraper/plugins/inband/network/networkdata.py +++ b/nodescraper/plugins/inband/network/networkdata.py @@ -105,6 +105,50 @@ class EthtoolInfo(BaseModel): link_detected: Optional[str] = None # Link detection status (e.g., "yes", "no") +class BroadcomNicDevice(BaseModel): + """Broadcom NIC device information from niccli --list_devices""" + + device_num: int # Device number (1, 2, 3, etc.) + model: Optional[str] = None # e.g., "Broadcom BCM57608 1x400G QSFP-DD PCIe Ethernet NIC" + adapter_port: Optional[str] = None # e.g., "Adp#1 Port#1" + interface_name: Optional[str] = None # e.g., "benic1p1" + mac_address: Optional[str] = None # e.g., "8C:84:74:37:C3:70" + pci_address: Optional[str] = None # e.g., "0000:06:00.0" + + +class BroadcomNicQosAppEntry(BaseModel): + """APP TLV entry in Broadcom NIC QoS configuration""" + + priority: Optional[int] = None + sel: Optional[int] = None + dscp: Optional[int] = None + protocol: Optional[str] = None # "UDP or DCCP", etc. + port: Optional[int] = None + + +class BroadcomNicQos(BaseModel): + """Broadcom NIC QoS information from niccli --dev X qos --ets --show""" + + device_num: int # Device number this QoS info belongs to + raw_output: str # Raw command output + # ETS Configuration + prio_map: Dict[int, int] = Field( + default_factory=dict + ) # Priority to TC mapping {0: 0, 1: 0, ...} + tc_bandwidth: List[int] = Field( + default_factory=list + ) # TC bandwidth percentages [50, 50, 0, ...] + tsa_map: Dict[int, str] = Field( + default_factory=dict + ) # TC to TSA mapping {0: "ets", 1: "ets", ...} + # PFC Configuration + pfc_enabled: Optional[int] = None # Bitmap of PFC enabled priorities + # APP TLV entries + app_entries: List[BroadcomNicQosAppEntry] = Field(default_factory=list) + # TC Rate Limit + tc_rate_limit: List[int] = Field(default_factory=list) # TC rate limits [100, 100, 100, ...] + + class NetworkDataModel(DataModel): """Complete network configuration data""" @@ -115,3 +159,7 @@ class NetworkDataModel(DataModel): ethtool_info: Dict[str, EthtoolInfo] = Field( default_factory=dict ) # Interface name -> EthtoolInfo mapping + broadcom_nic_devices: List[BroadcomNicDevice] = Field(default_factory=list) + broadcom_nic_qos: Dict[int, BroadcomNicQos] = Field( + default_factory=dict + ) # Device number -> QoS info mapping diff --git a/test/unit/plugin/test_network_collector.py b/test/unit/plugin/test_network_collector.py index 5be6deb0..4da8a1ce 100644 --- a/test/unit/plugin/test_network_collector.py +++ b/test/unit/plugin/test_network_collector.py @@ -32,6 +32,8 @@ from nodescraper.models.systeminfo import OSFamily from nodescraper.plugins.inband.network.network_collector import NetworkCollector from nodescraper.plugins.inband.network.networkdata import ( + BroadcomNicDevice, + BroadcomNicQos, EthtoolInfo, IpAddress, Neighbor, @@ -551,3 +553,394 @@ def test_network_data_model_creation(collector): assert len(data.ethtool_info) == 1 assert data.interfaces[0].name == "ethmock123" assert data.ethtool_info["ethmock123"].speed == "1000mockMb/s" + + +# Sample Broadcom NIC command outputs for testing +NICCLI_LISTDEV_OUTPUT = """root@smci355-ccs-aus-n13-25:/# niccli --list_devices + +1 ) Broadcom BCM57608 1x400G QSFP-DD PCIe Ethernet NIC (Adp#1 Port#1) + Device Interface Name : benic1p1 + MAC Address : 8C:84:74:37:C3:70 + PCI Address : 0000:06:00.0 + +2 ) Broadcom BCM57608 1x400G QSFP-DD PCIe Ethernet NIC (Adp#2 Port#1) + Device Interface Name : benic2p1 + MAC Address : 8C:84:74:37:DB:D0 + PCI Address : 0000:16:00.0 + +3 ) Broadcom BCM57608 1x400G QSFP-DD PCIe Ethernet NIC (Adp#3 Port#1) + Device Interface Name : benic4p1 + MAC Address : 8C:84:74:37:6C:10 + PCI Address : 0000:66:00.0 + +4 ) Broadcom BCM57608 1x400G QSFP-DD PCIe Ethernet NIC (Adp#4 Port#1) + Device Interface Name : benic3p1 + MAC Address : 8C:84:74:37:BB:F0 + PCI Address : 0000:76:00.0 + +5 ) Broadcom BCM57608 1x400G QSFP-DD PCIe Ethernet NIC (Adp#5 Port#1) + Device Interface Name : benic5p1 + MAC Address : 8C:84:74:37:8E:A0 + PCI Address : 0000:86:00.0 + +6 ) Broadcom BCM57608 1x400G QSFP-DD PCIe Ethernet NIC (Adp#6 Port#1) + Device Interface Name : benic6p1 + MAC Address : 6C:92:CF:9A:15:10 + PCI Address : 0000:96:00.0 + +7 ) Broadcom BCM57608 1x400G QSFP-DD PCIe Ethernet NIC (Adp#7 Port#1) + Device Interface Name : benic8p1 + MAC Address : 8C:84:74:37:69:90 + PCI Address : 0000:E6:00.0 + +8 ) Broadcom BCM57608 1x400G QSFP-DD PCIe Ethernet NIC (Adp#8 Port#1) + Device Interface Name : benic7p1 + MAC Address : 8C:84:74:37:C1:40 + PCI Address : 0000:F6:00.0 +""" + +NICCLI_QOS_OUTPUT = """root@smci355-ccs-aus-n13-25:/# niccli --dev 1 qos --ets --show + +IEEE 8021QAZ ETS Configuration TLV: + PRIO_MAP: 0:0 1:0 2:0 3:1 4:0 5:0 6:0 7:2 + TC Bandwidth: 50% 50% 0% + TSA_MAP: 0:ets 1:ets 2:strict +IEEE 8021QAZ PFC TLV: + PFC enabled: 3 +IEEE 8021QAZ APP TLV: + APP#0: + Priority: 7 + Sel: 5 + DSCP: 48 + + APP#1: + Priority: 3 + Sel: 5 + DSCP: 26 + + APP#2: + Priority: 3 + Sel: 3 + UDP or DCCP: 4791 + +TC Rate Limit: 100% 100% 100% 0% 0% 0% 0% 0% +""" + +NICCLI_LISTDEV_SINGLE_DEVICE = """1 ) Broadcom BCM57608 1x400G QSFP-DD PCIe Ethernet NIC (Adp#1 Port#1) + Device Interface Name : benic1p1 + MAC Address : 8C:84:74:37:C3:70 + PCI Address : 0000:06:00.0 +""" + +NICCLI_QOS_MINIMAL_OUTPUT = """IEEE 8021QAZ ETS Configuration TLV: + PRIO_MAP: 0:0 1:1 + TC Bandwidth: 50% 50% + TSA_MAP: 0:ets 1:strict +IEEE 8021QAZ PFC TLV: + PFC enabled: 1 +TC Rate Limit: 100% 100% +""" + + +def test_parse_niccli_listdev_multiple_devices(collector): + """Test parsing multiple Broadcom NIC devices from niccli --list_devices output""" + devices = collector._parse_niccli_listdev(NICCLI_LISTDEV_OUTPUT) + + assert len(devices) == 8 + + # Check first device + device1 = devices[0] + assert device1.device_num == 1 + assert device1.model == "Broadcom BCM57608 1x400G QSFP-DD PCIe Ethernet NIC" + assert device1.adapter_port == "Adp#1 Port#1" + assert device1.interface_name == "benic1p1" + assert device1.mac_address == "8C:84:74:37:C3:70" + assert device1.pci_address == "0000:06:00.0" + + # Check another device (device 3) + device3 = devices[2] + assert device3.device_num == 3 + assert device3.interface_name == "benic4p1" + assert device3.mac_address == "8C:84:74:37:6C:10" + assert device3.pci_address == "0000:66:00.0" + + # Check last device + device8 = devices[7] + assert device8.device_num == 8 + assert device8.interface_name == "benic7p1" + assert device8.mac_address == "8C:84:74:37:C1:40" + assert device8.pci_address == "0000:F6:00.0" + + +def test_parse_niccli_listdev_single_device(collector): + """Test parsing single Broadcom NIC device""" + devices = collector._parse_niccli_listdev(NICCLI_LISTDEV_SINGLE_DEVICE) + + assert len(devices) == 1 + device = devices[0] + assert device.device_num == 1 + assert device.model == "Broadcom BCM57608 1x400G QSFP-DD PCIe Ethernet NIC" + assert device.adapter_port == "Adp#1 Port#1" + assert device.interface_name == "benic1p1" + assert device.mac_address == "8C:84:74:37:C3:70" + assert device.pci_address == "0000:06:00.0" + + +def test_parse_niccli_listdev_empty_output(collector): + """Test parsing empty niccli --list_devices output""" + devices = collector._parse_niccli_listdev("") + + assert len(devices) == 0 + + +def test_parse_niccli_listdev_malformed_output(collector): + """Test parsing malformed niccli --list_devices output gracefully""" + malformed = """some random text +not a valid device line +123 invalid format +""" + + devices = collector._parse_niccli_listdev(malformed) + + # Should handle gracefully, return empty list or skip invalid lines + assert isinstance(devices, list) + + +def test_parse_niccli_qos_complete(collector): + """Test parsing complete Broadcom NIC QoS output with all fields""" + qos = collector._parse_niccli_qos(1, NICCLI_QOS_OUTPUT) + + assert qos.device_num == 1 + assert qos.raw_output == NICCLI_QOS_OUTPUT + + # Check PRIO_MAP + assert len(qos.prio_map) == 8 + assert qos.prio_map[0] == 0 + assert qos.prio_map[1] == 0 + assert qos.prio_map[3] == 1 + assert qos.prio_map[7] == 2 + + # Check TC Bandwidth + assert len(qos.tc_bandwidth) == 3 + assert qos.tc_bandwidth[0] == 50 + assert qos.tc_bandwidth[1] == 50 + assert qos.tc_bandwidth[2] == 0 + + # Check TSA_MAP + assert len(qos.tsa_map) == 3 + assert qos.tsa_map[0] == "ets" + assert qos.tsa_map[1] == "ets" + assert qos.tsa_map[2] == "strict" + + # Check PFC enabled + assert qos.pfc_enabled == 3 + + # Check APP entries + assert len(qos.app_entries) == 3 + + # Check APP#0 + app0 = qos.app_entries[0] + assert app0.priority == 7 + assert app0.sel == 5 + assert app0.dscp == 48 + assert app0.protocol is None + assert app0.port is None + + # Check APP#1 + app1 = qos.app_entries[1] + assert app1.priority == 3 + assert app1.sel == 5 + assert app1.dscp == 26 + + # Check APP#2 (with protocol and port) + app2 = qos.app_entries[2] + assert app2.priority == 3 + assert app2.sel == 3 + assert app2.dscp is None + assert app2.protocol == "UDP or DCCP" + assert app2.port == 4791 + + # Check TC Rate Limit + assert len(qos.tc_rate_limit) == 8 + assert qos.tc_rate_limit[0] == 100 + assert qos.tc_rate_limit[1] == 100 + assert qos.tc_rate_limit[2] == 100 + assert qos.tc_rate_limit[3] == 0 + assert qos.tc_rate_limit[7] == 0 + + +def test_parse_niccli_qos_minimal(collector): + """Test parsing minimal Broadcom NIC QoS output""" + qos = collector._parse_niccli_qos(2, NICCLI_QOS_MINIMAL_OUTPUT) + + assert qos.device_num == 2 + assert qos.raw_output == NICCLI_QOS_MINIMAL_OUTPUT + + # Check PRIO_MAP + assert len(qos.prio_map) == 2 + assert qos.prio_map[0] == 0 + assert qos.prio_map[1] == 1 + + # Check TC Bandwidth + assert len(qos.tc_bandwidth) == 2 + assert qos.tc_bandwidth[0] == 50 + assert qos.tc_bandwidth[1] == 50 + + # Check TSA_MAP + assert len(qos.tsa_map) == 2 + assert qos.tsa_map[0] == "ets" + assert qos.tsa_map[1] == "strict" + + # Check PFC enabled + assert qos.pfc_enabled == 1 + + # Check APP entries (should be empty) + assert len(qos.app_entries) == 0 + + # Check TC Rate Limit + assert len(qos.tc_rate_limit) == 2 + assert qos.tc_rate_limit[0] == 100 + assert qos.tc_rate_limit[1] == 100 + + +def test_parse_niccli_qos_empty_output(collector): + """Test parsing empty QoS output""" + qos = collector._parse_niccli_qos(1, "") + + assert qos.device_num == 1 + assert qos.raw_output == "" + assert len(qos.prio_map) == 0 + assert len(qos.tc_bandwidth) == 0 + assert len(qos.tsa_map) == 0 + assert qos.pfc_enabled is None + assert len(qos.app_entries) == 0 + assert len(qos.tc_rate_limit) == 0 + + +def test_parse_niccli_qos_no_app_entries(collector): + """Test parsing QoS output without APP entries""" + qos_no_app = """IEEE 8021QAZ ETS Configuration TLV: + PRIO_MAP: 0:0 1:1 2:2 + TC Bandwidth: 33% 33% 34% + TSA_MAP: 0:ets 1:ets 2:ets +IEEE 8021QAZ PFC TLV: + PFC enabled: 7 +TC Rate Limit: 100% 100% 100% +""" + + qos = collector._parse_niccli_qos(5, qos_no_app) + + assert qos.device_num == 5 + assert len(qos.prio_map) == 3 + assert len(qos.tc_bandwidth) == 3 + assert len(qos.tsa_map) == 3 + assert qos.pfc_enabled == 7 + assert len(qos.app_entries) == 0 + assert len(qos.tc_rate_limit) == 3 + + +def test_parse_niccli_qos_multiple_app_protocols(collector): + """Test parsing QoS with APP entries having different protocols""" + qos_multi_protocol = """IEEE 8021QAZ ETS Configuration TLV: + PRIO_MAP: 0:0 + TC Bandwidth: 100% + TSA_MAP: 0:ets +IEEE 8021QAZ PFC TLV: + PFC enabled: 0 +IEEE 8021QAZ APP TLV: + APP#0: + Priority: 5 + Sel: 3 + TCP: 8080 + + APP#1: + Priority: 6 + Sel: 3 + UDP: 9000 + +TC Rate Limit: 100% +""" + + qos = collector._parse_niccli_qos(3, qos_multi_protocol) + + assert len(qos.app_entries) == 2 + + # Check TCP entry + app0 = qos.app_entries[0] + assert app0.priority == 5 + assert app0.sel == 3 + assert app0.protocol == "TCP" + assert app0.port == 8080 + + # Check UDP entry + app1 = qos.app_entries[1] + assert app1.priority == 6 + assert app1.sel == 3 + assert app1.protocol == "UDP" + assert app1.port == 9000 + + +def test_parse_niccli_qos_malformed_values(collector): + """Test parsing QoS output with malformed values gracefully""" + malformed = """IEEE 8021QAZ ETS Configuration TLV: + PRIO_MAP: 0:invalid 1:1 bad:data + TC Bandwidth: 50% invalid 50% + TSA_MAP: 0:ets bad:value 1:strict +IEEE 8021QAZ PFC TLV: + PFC enabled: not_a_number +TC Rate Limit: 100% bad% 100% +""" + + qos = collector._parse_niccli_qos(1, malformed) + + # Should skip invalid entries but parse valid ones + assert qos.device_num == 1 + # Should have parsed valid prio_map entry (1:1) + assert 1 in qos.prio_map + assert qos.prio_map[1] == 1 + # Should have parsed valid bandwidth entries + assert 50 in qos.tc_bandwidth + # Should have parsed valid tsa_map entries + assert qos.tsa_map.get(0) == "ets" + assert qos.tsa_map.get(1) == "strict" + # PFC should be None due to invalid number + assert qos.pfc_enabled is None + + +def test_network_data_model_with_broadcom_nic(collector): + """Test creating NetworkDataModel with Broadcom NIC data""" + device = BroadcomNicDevice( + device_num=1, + model="Broadcom BCM57608 1x400G QSFP-DD PCIe Ethernet NIC", + adapter_port="Adp#1 Port#1", + interface_name="benic1p1", + mac_address="8C:84:74:37:C3:70", + pci_address="0000:06:00.0", + ) + + qos = BroadcomNicQos( + device_num=1, + raw_output="test output", + prio_map={0: 0, 1: 1}, + tc_bandwidth=[50, 50], + tsa_map={0: "ets", 1: "strict"}, + pfc_enabled=3, + tc_rate_limit=[100, 100], + ) + + data = NetworkDataModel( + interfaces=[], + routes=[], + rules=[], + neighbors=[], + ethtool_info={}, + broadcom_nic_devices=[device], + broadcom_nic_qos={1: qos}, + ) + + assert len(data.broadcom_nic_devices) == 1 + assert len(data.broadcom_nic_qos) == 1 + assert data.broadcom_nic_devices[0].device_num == 1 + assert data.broadcom_nic_devices[0].interface_name == "benic1p1" + assert data.broadcom_nic_qos[1].device_num == 1 + assert data.broadcom_nic_qos[1].pfc_enabled == 3 From 744a73e7b6cc1faac44d61428df183d39b0f95e0 Mon Sep 17 00:00:00 2001 From: jaspals3123 Date: Tue, 6 Jan 2026 15:56:35 -0600 Subject: [PATCH 03/29] pensando changes --- .../inband/network/network_collector.py | 944 ++++++++++++- .../plugins/inband/network/networkdata.py | 154 +++ test/unit/plugin/test_network_collector.py | 1186 +++++++++++++++-- 3 files changed, 2139 insertions(+), 145 deletions(-) diff --git a/nodescraper/plugins/inband/network/network_collector.py b/nodescraper/plugins/inband/network/network_collector.py index 3980e645..4289d601 100644 --- a/nodescraper/plugins/inband/network/network_collector.py +++ b/nodescraper/plugins/inband/network/network_collector.py @@ -39,6 +39,17 @@ Neighbor, NetworkDataModel, NetworkInterface, + PensandoNicCard, + PensandoNicDcqcn, + PensandoNicEnvironment, + PensandoNicPcieAts, + PensandoNicPort, + PensandoNicQos, + PensandoNicQosScheduling, + PensandoNicRdmaStatistic, + PensandoNicRdmaStatistics, + PensandoNicVersionFirmware, + PensandoNicVersionHostSoftware, Route, RoutingRule, ) @@ -473,7 +484,6 @@ def _parse_niccli_listdev(self, output: str) -> List[BroadcomNicDevice]: continue # Check if this is a device header line - # Format: "1 ) Broadcom BCM57608 1x400G QSFP-DD PCIe Ethernet NIC (Adp#1 Port#1)" match = re.match(r"^(\d+)\s*\)\s*(.+?)(?:\s+\((.+?)\))?$", line_stripped) if match: device_num_str = match.group(1) @@ -516,6 +526,729 @@ def _parse_niccli_listdev(self, output: str) -> List[BroadcomNicDevice]: return devices + def _parse_nicctl_card(self, output: str) -> List[PensandoNicCard]: + """Parse 'nicctl show card' output into PensandoNicCard objects. + + Args: + output: Raw output from 'nicctl show card' command + + Returns: + List of PensandoNicCard objects + """ + cards = [] + + # Skip header lines and separator lines + in_data_section = False + + for line in output.splitlines(): + line_stripped = line.strip() + if not line_stripped: + continue + + # Skip header line (starts with "Id") + if line_stripped.startswith("Id"): + in_data_section = True + continue + + # Skip separator lines (mostly dashes) + if re.match(r"^-+$", line_stripped): + continue + + # Parse data lines after header + if in_data_section: + # Split by whitespace + parts = line_stripped.split() + + # Expected format: Id PCIe_BDF ASIC F/W_partition Serial_number + if len(parts) >= 2: + card = PensandoNicCard( + id=parts[0], + pcie_bdf=parts[1], + asic=parts[2] if len(parts) > 2 else None, + fw_partition=parts[3] if len(parts) > 3 else None, + serial_number=parts[4] if len(parts) > 4 else None, + ) + cards.append(card) + + return cards + + def _parse_nicctl_dcqcn(self, output: str) -> List[PensandoNicDcqcn]: + """Parse 'nicctl show dcqcn' output into PensandoNicDcqcn objects. + + Args: + output: Raw output from 'nicctl show dcqcn' command + + Returns: + List of PensandoNicDcqcn objects + """ + dcqcn_entries = [] + current_entry = None + + for line in output.splitlines(): + line_stripped = line.strip() + if not line_stripped: + continue + + # Check for NIC line + if line_stripped.startswith("NIC :"): + # Save previous entry if exists + if current_entry: + dcqcn_entries.append(current_entry) + + # Parse NIC ID and PCIe BDF + # Format: "NIC : ()" + match = re.match( + r"NIC\s*:\s*([a-f0-9\-]+)\s*\(([0-9a-f:\.]+)\)", line_stripped, re.IGNORECASE + ) + if match: + nic_id = match.group(1) + pcie_bdf = match.group(2) + current_entry = PensandoNicDcqcn( + nic_id=nic_id, + pcie_bdf=pcie_bdf, + ) + continue + + # Skip separator lines (dashes or asterisks) + if re.match(r"^[-*]+$", line_stripped): + continue + + # Parse fields within current entry + if current_entry and ":" in line_stripped: + parts = line_stripped.split(":", 1) + if len(parts) == 2: + key = parts[0].strip() + value = parts[1].strip() + + if key == "Lif id": + current_entry.lif_id = value + elif key == "ROCE device": + current_entry.roce_device = value + elif key == "DCQCN profile id": + current_entry.dcqcn_profile_id = value + elif key == "Status": + current_entry.status = value + + # Add the last entry if exists + if current_entry: + dcqcn_entries.append(current_entry) + + return dcqcn_entries + + def _parse_nicctl_environment(self, output: str) -> List[PensandoNicEnvironment]: + """Parse 'nicctl show environment' output into PensandoNicEnvironment objects. + + Args: + output: Raw output from 'nicctl show environment' command + + Returns: + List of PensandoNicEnvironment objects + """ + environment_entries = [] + current_entry = None + + for line in output.splitlines(): + line_stripped = line.strip() + if not line_stripped: + continue + + # Check for NIC line + if line_stripped.startswith("NIC :"): + # Save previous entry if exists + if current_entry: + environment_entries.append(current_entry) + + # Parse NIC ID and PCIe BDF + # Format: "NIC : ()" + match = re.match( + r"NIC\s*:\s*([a-f0-9\-]+)\s*\(([0-9a-f:\.]+)\)", line_stripped, re.IGNORECASE + ) + if match: + nic_id = match.group(1) + pcie_bdf = match.group(2) + current_entry = PensandoNicEnvironment( + nic_id=nic_id, + pcie_bdf=pcie_bdf, + ) + continue + + # Skip separator lines (dashes) + if re.match(r"^-+$", line_stripped): + continue + + # Skip section headers (Power(W):, Temperature(C):, etc.) + if line_stripped.endswith("):"): + continue + + # Parse fields within current entry + if current_entry and ":" in line_stripped: + parts = line_stripped.split(":", 1) + if len(parts) == 2: + key = parts[0].strip() + value_str = parts[1].strip() + + # Try to parse the value as float + try: + value = float(value_str) + except ValueError: + continue + + # Map keys to fields + if key == "Total power drawn (pin)" or key == "Total power drawn": + current_entry.total_power_drawn = value + elif key == "Core power (pout1)" or key == "Core power": + current_entry.core_power = value + elif key == "ARM power (pout2)" or key == "ARM power": + current_entry.arm_power = value + elif key == "Local board temperature": + current_entry.local_board_temperature = value + elif key == "Die temperature": + current_entry.die_temperature = value + elif key == "Input voltage": + current_entry.input_voltage = value + elif key == "Core voltage": + current_entry.core_voltage = value + elif key == "Core frequency": + current_entry.core_frequency = value + elif key == "CPU frequency": + current_entry.cpu_frequency = value + elif key == "P4 stage frequency": + current_entry.p4_stage_frequency = value + + # Add the last entry if exists + if current_entry: + environment_entries.append(current_entry) + + return environment_entries + + def _parse_nicctl_pcie_ats(self, output: str) -> List[PensandoNicPcieAts]: + """Parse 'nicctl show pcie ats' output into PensandoNicPcieAts objects. + + Args: + output: Raw output from 'nicctl show pcie ats' command + + Returns: + List of PensandoNicPcieAts objects + """ + pcie_ats_entries = [] + + for line in output.splitlines(): + line_stripped = line.strip() + if not line_stripped: + continue + + # Parse line format: "NIC : () : " + if line_stripped.startswith("NIC :"): + match = re.match( + r"NIC\s*:\s*([a-f0-9\-]+)\s*\(([0-9a-f:\.]+)\)\s*:\s*(\w+)", + line_stripped, + re.IGNORECASE, + ) + if match: + nic_id = match.group(1) + pcie_bdf = match.group(2) + status = match.group(3) + entry = PensandoNicPcieAts( + nic_id=nic_id, + pcie_bdf=pcie_bdf, + status=status, + ) + pcie_ats_entries.append(entry) + + return pcie_ats_entries + + def _parse_nicctl_port(self, output: str) -> List[PensandoNicPort]: + """Parse 'nicctl show port' output into PensandoNicPort objects. + + Args: + output: Raw output from 'nicctl show port' command + + Returns: + List of PensandoNicPort objects + """ + port_entries = [] + current_entry = None + current_section = None # 'spec' or 'status' + current_nic_id = None + current_pcie_bdf = None + + for line in output.splitlines(): + line_stripped = line.strip() + if not line_stripped: + continue + + # Check for NIC line + if line_stripped.startswith("NIC") and ":" in line_stripped: + # Save previous entry if exists + if current_entry: + port_entries.append(current_entry) + current_entry = None + + # Parse NIC ID and PCIe BDF + match = re.match( + r"NIC\s*:\s*([a-f0-9\-]+)\s*\(([0-9a-f:\.]+)\)", line_stripped, re.IGNORECASE + ) + if match: + current_nic_id = match.group(1) + current_pcie_bdf = match.group(2) + continue + + # Check for Port line + if ( + line_stripped.startswith("Port") + and ":" in line_stripped + and current_nic_id + and current_pcie_bdf + ): + # Save previous entry if exists + if current_entry: + port_entries.append(current_entry) + + # Parse Port ID and Port name + match = re.match( + r"Port\s*:\s*([a-f0-9\-]+)\s*\(([^\)]+)\)", line_stripped, re.IGNORECASE + ) + if match: + port_id = match.group(1) + port_name = match.group(2) + current_entry = PensandoNicPort( + nic_id=current_nic_id, + pcie_bdf=current_pcie_bdf, + port_id=port_id, + port_name=port_name, + ) + continue + + # Skip separator lines (dashes) + if re.match(r"^-+$", line_stripped): + continue + + # Check for section headers + if line_stripped.endswith(":"): + if line_stripped == "Spec:": + current_section = "spec" + elif line_stripped == "Status:": + current_section = "status" + continue + + # Parse fields within current entry and section + if current_entry and current_section and ":" in line_stripped: + parts = line_stripped.split(":", 1) + if len(parts) == 2: + key = parts[0].strip() + value = parts[1].strip() + + if current_section == "spec": + if key == "Ifindex": + current_entry.spec_ifindex = value + elif key == "Type": + current_entry.spec_type = value + elif key == "speed": + current_entry.spec_speed = value + elif key == "Admin state": + current_entry.spec_admin_state = value + elif key == "FEC type": + current_entry.spec_fec_type = value + elif key == "Pause type": + current_entry.spec_pause_type = value + elif key == "Number of lanes": + try: + current_entry.spec_num_lanes = int(value) + except ValueError: + pass + elif key == "MTU": + try: + current_entry.spec_mtu = int(value) + except ValueError: + pass + elif key == "TX pause": + current_entry.spec_tx_pause = value + elif key == "RX pause": + current_entry.spec_rx_pause = value + elif key == "Auto negotiation": + current_entry.spec_auto_negotiation = value + elif current_section == "status": + if key == "Physical port": + try: + current_entry.status_physical_port = int(value) + except ValueError: + pass + elif key == "Operational status": + current_entry.status_operational_status = value + elif key == "Link FSM state": + current_entry.status_link_fsm_state = value + elif key == "FEC type": + current_entry.status_fec_type = value + elif key == "Cable type": + current_entry.status_cable_type = value + elif key == "Number of lanes": + try: + current_entry.status_num_lanes = int(value) + except ValueError: + pass + elif key == "speed": + current_entry.status_speed = value + elif key == "Auto negotiation": + current_entry.status_auto_negotiation = value + elif key == "MAC ID": + try: + current_entry.status_mac_id = int(value) + except ValueError: + pass + elif key == "MAC channel": + try: + current_entry.status_mac_channel = int(value) + except ValueError: + pass + elif key == "MAC address": + current_entry.status_mac_address = value + elif key == "Transceiver type": + current_entry.status_transceiver_type = value + elif key == "Transceiver state": + current_entry.status_transceiver_state = value + elif key == "Transceiver PID": + current_entry.status_transceiver_pid = value + + # Add the last entry if exists + if current_entry: + port_entries.append(current_entry) + + return port_entries + + def _parse_nicctl_qos(self, output: str) -> List[PensandoNicQos]: + """Parse 'nicctl show qos' output into PensandoNicQos objects. + + Args: + output: Raw output from 'nicctl show qos' command + + Returns: + List of PensandoNicQos objects + """ + qos_entries = [] + current_entry = None + current_nic_id = None + current_pcie_bdf = None + in_scheduling_table = False + + for line in output.splitlines(): + line_stripped = line.strip() + if not line_stripped: + continue + + # Check for NIC line: "NIC : 42424650-4c32-3533-3330-323934000000 (0000:06:00.0)" + if line_stripped.startswith("NIC") and ":" in line_stripped: + # Save previous entry if exists + if current_entry: + qos_entries.append(current_entry) + current_entry = None + + # Parse NIC ID and PCIe BDF + match = re.match( + r"NIC\s*:\s*([a-f0-9\-]+)\s*\(([0-9a-f:\.]+)\)", line_stripped, re.IGNORECASE + ) + if match: + current_nic_id = match.group(1) + current_pcie_bdf = match.group(2) + in_scheduling_table = False + continue + + # Check for Port line: "Port : 0490814a-6c40-4242-4242-000011010000" + if ( + line_stripped.startswith("Port") + and ":" in line_stripped + and current_nic_id + and current_pcie_bdf + ): + # Save previous entry if exists + if current_entry: + qos_entries.append(current_entry) + + # Parse Port ID + parts = line_stripped.split(":") + if len(parts) >= 2: + port_id = parts[1].strip() + current_entry = PensandoNicQos( + nic_id=current_nic_id, + pcie_bdf=current_pcie_bdf, + port_id=port_id, + ) + in_scheduling_table = False + continue + + # Skip separator lines (dashes) but don't reset scheduling table flag + if re.match(r"^-+$", line_stripped): + continue + + # Check for section headers + if current_entry: + # Classification type + if "Classification type" in line: + parts = line_stripped.split(":") + if len(parts) >= 2: + current_entry.classification_type = parts[1].strip() + + # DSCP bitmap + elif "DSCP bitmap" in line and "==>" in line: + parts = line_stripped.split("==>") + if len(parts) >= 2: + bitmap_part = parts[0].split(":") + if len(bitmap_part) >= 2: + current_entry.dscp_bitmap = bitmap_part[1].strip() + priority_part = parts[1].split(":") + if len(priority_part) >= 2: + try: + current_entry.dscp_priority = int(priority_part[1].strip()) + except ValueError: + pass + + # DSCP range + elif line_stripped.startswith("DSCP") and "==>" in line and "bitmap" not in line: + parts = line_stripped.split("==>") + if len(parts) >= 2: + dscp_part = parts[0].split(":") + if len(dscp_part) >= 2: + current_entry.dscp_range = dscp_part[1].strip() + priority_part = parts[1].split(":") + if len(priority_part) >= 2: + try: + current_entry.dscp_priority = int(priority_part[1].strip()) + except ValueError: + pass + + # PFC priority bitmap + elif "PFC priority bitmap" in line: + parts = line_stripped.split(":") + if len(parts) >= 2: + current_entry.pfc_priority_bitmap = parts[1].strip() + + # PFC no-drop priorities + elif "PFC no-drop priorities" in line: + parts = line_stripped.split(":") + if len(parts) >= 2: + current_entry.pfc_no_drop_priorities = parts[1].strip() + + # Scheduling table header + elif "Priority" in line and "Scheduling" in line: + in_scheduling_table = True + continue + + # Parse scheduling table entries + elif in_scheduling_table and not line_stripped.startswith("---"): + # Try to parse scheduling entry + # Format: "0 DWRR 0 N/A" + parts = line_stripped.split() + if len(parts) >= 2: + try: + priority = int(parts[0]) + scheduling_type = parts[1] if len(parts) > 1 else None + bandwidth = None + rate_limit = None + if len(parts) > 2: + try: + bandwidth = int(parts[2]) + except ValueError: + pass + if len(parts) > 3: + rate_limit = parts[3] + + sched_entry = PensandoNicQosScheduling( + priority=priority, + scheduling_type=scheduling_type, + bandwidth=bandwidth, + rate_limit=rate_limit, + ) + current_entry.scheduling.append(sched_entry) + except (ValueError, IndexError): + pass + + # Add the last entry if exists + if current_entry: + qos_entries.append(current_entry) + + return qos_entries + + def _parse_nicctl_rdma_statistics(self, output: str) -> List[PensandoNicRdmaStatistics]: + """Parse 'nicctl show rdma statistics' output into PensandoNicRdmaStatistics objects. + + Args: + output: Raw output from 'nicctl show rdma statistics' command + + Returns: + List of PensandoNicRdmaStatistics objects + """ + rdma_stats_entries = [] + current_entry = None + in_statistics_table = False + + for line in output.splitlines(): + line_stripped = line.strip() + if not line_stripped: + continue + + # Check for NIC line: "NIC : 42424650-4c32-3533-3330-323934000000 (0000:06:00.0)" + if line_stripped.startswith("NIC") and ":" in line_stripped: + # Save previous entry if exists + if current_entry: + rdma_stats_entries.append(current_entry) + + # Parse NIC ID and PCIe BDF + match = re.match( + r"NIC\s*:\s*([a-f0-9\-]+)\s*\(([0-9a-f:\.]+)\)", line_stripped, re.IGNORECASE + ) + if match: + nic_id = match.group(1) + pcie_bdf = match.group(2) + current_entry = PensandoNicRdmaStatistics( + nic_id=nic_id, + pcie_bdf=pcie_bdf, + ) + in_statistics_table = False + continue + + # Skip separator lines (dashes) + if re.match(r"^-+$", line_stripped): + continue + + # Check for table header + if "Name" in line and "Count" in line: + in_statistics_table = True + continue + + # Parse statistics entries + if current_entry and in_statistics_table: + # The format is: "Queue pair create 1" + # We need to split from the right to get the count + parts = line_stripped.rsplit(None, 1) # Split from right, max 1 split + if len(parts) == 2: + name = parts[0].strip() + count_str = parts[1].strip() + try: + count = int(count_str) + stat_entry = PensandoNicRdmaStatistic( + name=name, + count=count, + ) + current_entry.statistics.append(stat_entry) + except ValueError: + pass + + # Add the last entry if exists + if current_entry: + rdma_stats_entries.append(current_entry) + + return rdma_stats_entries + + def _parse_nicctl_version_host_software( + self, output: str + ) -> Optional[PensandoNicVersionHostSoftware]: + """Parse 'nicctl show version host-software' output into PensandoNicVersionHostSoftware object. + + Args: + output: Raw output from 'nicctl show version host-software' command + + Returns: + PensandoNicVersionHostSoftware object or None if no data found + """ + version_info = PensandoNicVersionHostSoftware() + found_data = False + + for line in output.splitlines(): + line_stripped = line.strip() + if not line_stripped or ":" not in line_stripped: + continue + + # Split on the first colon to get key and value + parts = line_stripped.split(":", 1) + if len(parts) != 2: + continue + + key = parts[0].strip().lower() + value = parts[1].strip() + + if "nicctl" in key: + version_info.nicctl = value + found_data = True + elif "ipc driver" in key or "ipc_driver" in key: + version_info.ipc_driver = value + found_data = True + elif "ionic driver" in key or "ionic_driver" in key: + version_info.ionic_driver = value + found_data = True + + return version_info if found_data else None + + def _parse_nicctl_version_firmware(self, output: str) -> List[PensandoNicVersionFirmware]: + """Parse 'nicctl show version firmware' output into PensandoNicVersionFirmware objects. + + Args: + output: Raw output from 'nicctl show version firmware' command + + Returns: + List of PensandoNicVersionFirmware objects + """ + firmware_entries = [] + current_entry = None + + for line in output.splitlines(): + line_stripped = line.strip() + if not line_stripped: + continue + + # Skip separator lines (dashes) + if re.match(r"^-+$", line_stripped): + # Save previous entry when we hit a separator + if current_entry: + firmware_entries.append(current_entry) + current_entry = None + continue + + # Check for NIC line + if line_stripped.startswith("NIC") and ":" in line_stripped: + # Save previous entry if exists + if current_entry: + firmware_entries.append(current_entry) + + # Parse NIC ID and PCIe BDF + match = re.match( + r"NIC\s*:\s*([a-f0-9\-]+)\s*\(([0-9a-f:\.]+)\)", line_stripped, re.IGNORECASE + ) + if match: + nic_id = match.group(1) + pcie_bdf = match.group(2) + current_entry = PensandoNicVersionFirmware( + nic_id=nic_id, + pcie_bdf=pcie_bdf, + ) + continue + + # Parse version fields + if current_entry and ":" in line_stripped: + parts = line_stripped.split(":", 1) + if len(parts) == 2: + key = parts[0].strip().lower() + value = parts[1].strip() + + if "cpld" in key: + current_entry.cpld = value + elif "boot0" in key: + current_entry.boot0 = value + elif "uboot-a" in key or "uboot_a" in key: + current_entry.uboot_a = value + elif "firmware-a" in key or "firmware_a" in key: + current_entry.firmware_a = value + elif ( + "device config-a" in key + or "device_config_a" in key + or "device config" in key + ): + current_entry.device_config_a = value + + # Add the last entry if exists + if current_entry: + firmware_entries.append(current_entry) + + return firmware_entries + def _parse_niccli_qos(self, device_num: int, output: str) -> BroadcomNicQos: """Parse 'niccli --dev X qos --ets --show' output into BroadcomNicQos object. @@ -772,10 +1505,159 @@ def _collect_broadcom_nic_info( return devices, qos_data - def _collect_pensando_nic_info(self) -> None: - """Collect Pensando NIC information using nicctl commands.""" + def _collect_pensando_nic_info( + self, + ) -> Tuple[ + List[PensandoNicCard], + List[PensandoNicDcqcn], + List[PensandoNicEnvironment], + List[PensandoNicPcieAts], + List[PensandoNicPort], + List[PensandoNicQos], + List[PensandoNicRdmaStatistics], + Optional[PensandoNicVersionHostSoftware], + List[PensandoNicVersionFirmware], + ]: + """Collect Pensando NIC information using nicctl commands. + + Returns: + Tuple of (list of PensandoNicCard, list of PensandoNicDcqcn, + list of PensandoNicEnvironment, list of PensandoNicPcieAts, + list of PensandoNicPort, list of PensandoNicQos, + list of PensandoNicRdmaStatistics, + PensandoNicVersionHostSoftware object, + list of PensandoNicVersionFirmware) + """ + cards = [] + dcqcn_entries = [] + environment_entries = [] + pcie_ats_entries = [] + port_entries = [] + qos_entries = [] + rdma_statistics_entries = [] + version_host_software = None + version_firmware_entries = [] collected_count = 0 + + # Parse nicctl show card output + res_card = self._run_sut_cmd("nicctl show card", sudo=True) + if res_card.exit_code == 0: + cards = self._parse_nicctl_card(res_card.stdout) + self._log_event( + category=EventCategory.NETWORK, + description=f"Collected Pensando NIC card list: {len(cards)} cards", + priority=EventPriority.INFO, + ) + collected_count += 1 + + # Parse nicctl show dcqcn output + res_dcqcn = self._run_sut_cmd("nicctl show dcqcn", sudo=True) + if res_dcqcn.exit_code == 0: + dcqcn_entries = self._parse_nicctl_dcqcn(res_dcqcn.stdout) + self._log_event( + category=EventCategory.NETWORK, + description=f"Collected Pensando NIC DCQCN info: {len(dcqcn_entries)} entries", + priority=EventPriority.INFO, + ) + collected_count += 1 + + # Parse nicctl show environment output + res_environment = self._run_sut_cmd("nicctl show environment", sudo=True) + if res_environment.exit_code == 0: + environment_entries = self._parse_nicctl_environment(res_environment.stdout) + self._log_event( + category=EventCategory.NETWORK, + description=f"Collected Pensando NIC environment info: {len(environment_entries)} entries", + priority=EventPriority.INFO, + ) + collected_count += 1 + + # Parse nicctl show pcie ats output + res_pcie_ats = self._run_sut_cmd("nicctl show pcie ats", sudo=True) + if res_pcie_ats.exit_code == 0: + pcie_ats_entries = self._parse_nicctl_pcie_ats(res_pcie_ats.stdout) + self._log_event( + category=EventCategory.NETWORK, + description=f"Collected Pensando NIC PCIe ATS info: {len(pcie_ats_entries)} entries", + priority=EventPriority.INFO, + ) + collected_count += 1 + + # Parse nicctl show port output + res_port = self._run_sut_cmd("nicctl show port", sudo=True) + if res_port.exit_code == 0: + port_entries = self._parse_nicctl_port(res_port.stdout) + self._log_event( + category=EventCategory.NETWORK, + description=f"Collected Pensando NIC port info: {len(port_entries)} ports", + priority=EventPriority.INFO, + ) + collected_count += 1 + + # Parse nicctl show qos output + res_qos = self._run_sut_cmd("nicctl show qos", sudo=True) + if res_qos.exit_code == 0: + qos_entries = self._parse_nicctl_qos(res_qos.stdout) + self._log_event( + category=EventCategory.NETWORK, + description=f"Collected Pensando NIC QoS info: {len(qos_entries)} entries", + priority=EventPriority.INFO, + ) + collected_count += 1 + + # Parse nicctl show rdma statistics output + res_rdma_stats = self._run_sut_cmd("nicctl show rdma statistics", sudo=True) + if res_rdma_stats.exit_code == 0: + rdma_statistics_entries = self._parse_nicctl_rdma_statistics(res_rdma_stats.stdout) + self._log_event( + category=EventCategory.NETWORK, + description=f"Collected Pensando NIC RDMA statistics: {len(rdma_statistics_entries)} entries", + priority=EventPriority.INFO, + ) + collected_count += 1 + + # Parse nicctl show version host-software output + res_version_host = self._run_sut_cmd("nicctl show version host-software", sudo=True) + if res_version_host.exit_code == 0: + version_host_software = self._parse_nicctl_version_host_software( + res_version_host.stdout + ) + if version_host_software: + self._log_event( + category=EventCategory.NETWORK, + description="Collected Pensando NIC host software version", + priority=EventPriority.INFO, + ) + collected_count += 1 + + # Parse nicctl show version firmware output + res_version_firmware = self._run_sut_cmd("nicctl show version firmware", sudo=True) + if res_version_firmware.exit_code == 0: + version_firmware_entries = self._parse_nicctl_version_firmware( + res_version_firmware.stdout + ) + self._log_event( + category=EventCategory.NETWORK, + description=f"Collected Pensando NIC firmware versions: {len(version_firmware_entries)} entries", + priority=EventPriority.INFO, + ) + collected_count += 1 + + # Collect other nicctl information (raw data) for cmd in self.CMD_NICCTL_COMMANDS: + if cmd in [ + "nicctl show card", + "nicctl show dcqcn", + "nicctl show environment", + "nicctl show pcie ats", + "nicctl show port", + "nicctl show qos", + "nicctl show rdma statistics", + "nicctl show version host-software", + "nicctl show version firmware", + ]: + # Already collected and parsed above + continue res = self._run_sut_cmd(cmd, sudo=True) if res.exit_code == 0: collected_count += 1 @@ -793,6 +1675,18 @@ def _collect_pensando_nic_info(self) -> None: priority=EventPriority.INFO, ) + return ( + cards, + dcqcn_entries, + environment_entries, + pcie_ats_entries, + port_entries, + qos_entries, + rdma_statistics_entries, + version_host_software, + version_firmware_entries, + ) + def collect_data( self, args=None, @@ -810,6 +1704,15 @@ def collect_data( ethtool_data = {} broadcom_devices: List[BroadcomNicDevice] = [] broadcom_qos_data: Dict[int, BroadcomNicQos] = {} + pensando_cards: List[PensandoNicCard] = [] + pensando_dcqcn: List[PensandoNicDcqcn] = [] + pensando_environment: List[PensandoNicEnvironment] = [] + pensando_pcie_ats: List[PensandoNicPcieAts] = [] + pensando_ports: List[PensandoNicPort] = [] + pensando_qos: List[PensandoNicQos] = [] + pensando_rdma_statistics: List[PensandoNicRdmaStatistics] = [] + pensando_version_host_software: Optional[PensandoNicVersionHostSoftware] = None + pensando_version_firmware: List[PensandoNicVersionFirmware] = [] # Collect interface/address information res_addr = self._run_sut_cmd(self.CMD_ADDR) @@ -896,9 +1799,19 @@ def collect_data( broadcom_devices, broadcom_qos_data = self._collect_broadcom_nic_info() # Collect Pensando NIC information - self._collect_pensando_nic_info() - - if interfaces or routes or rules or neighbors or broadcom_devices: + ( + pensando_cards, + pensando_dcqcn, + pensando_environment, + pensando_pcie_ats, + pensando_ports, + pensando_qos, + pensando_rdma_statistics, + pensando_version_host_software, + pensando_version_firmware, + ) = self._collect_pensando_nic_info() + + if interfaces or routes or rules or neighbors or broadcom_devices or pensando_cards: network_data = NetworkDataModel( interfaces=interfaces, routes=routes, @@ -907,11 +1820,28 @@ def collect_data( ethtool_info=ethtool_data, broadcom_nic_devices=broadcom_devices, broadcom_nic_qos=broadcom_qos_data, + pensando_nic_cards=pensando_cards, + pensando_nic_dcqcn=pensando_dcqcn, + pensando_nic_environment=pensando_environment, + pensando_nic_pcie_ats=pensando_pcie_ats, + pensando_nic_ports=pensando_ports, + pensando_nic_qos=pensando_qos, + pensando_nic_rdma_statistics=pensando_rdma_statistics, + pensando_nic_version_host_software=pensando_version_host_software, + pensando_nic_version_firmware=pensando_version_firmware, ) self.result.message = ( f"Collected network data: {len(interfaces)} interfaces, " f"{len(routes)} routes, {len(rules)} rules, {len(neighbors)} neighbors, " - f"{len(ethtool_data)} ethtool entries, {len(broadcom_devices)} Broadcom NICs" + f"{len(ethtool_data)} ethtool entries, {len(broadcom_devices)} Broadcom NICs, " + f"{len(pensando_cards)} Pensando NICs, {len(pensando_dcqcn)} Pensando DCQCN entries, " + f"{len(pensando_environment)} Pensando environment entries, " + f"{len(pensando_pcie_ats)} Pensando PCIe ATS entries, " + f"{len(pensando_ports)} Pensando ports, " + f"{len(pensando_qos)} Pensando QoS entries, " + f"{len(pensando_rdma_statistics)} Pensando RDMA statistics, " + f"Pensando host software version: {'Yes' if pensando_version_host_software else 'No'}, " + f"{len(pensando_version_firmware)} Pensando firmware versions" ) self.result.status = ExecutionStatus.OK return self.result, network_data diff --git a/nodescraper/plugins/inband/network/networkdata.py b/nodescraper/plugins/inband/network/networkdata.py index 3f9430c8..34d1f63e 100644 --- a/nodescraper/plugins/inband/network/networkdata.py +++ b/nodescraper/plugins/inband/network/networkdata.py @@ -149,6 +149,151 @@ class BroadcomNicQos(BaseModel): tc_rate_limit: List[int] = Field(default_factory=list) # TC rate limits [100, 100, 100, ...] +class PensandoNicCard(BaseModel): + """Pensando NIC card information from nicctl show card""" + + id: str # Card ID (UUID format) + pcie_bdf: str # PCIe Bus:Device.Function (e.g., "0000:06:00.0") + asic: Optional[str] = None # ASIC type (e.g., "salina") + fw_partition: Optional[str] = None # Firmware partition (e.g., "A") + serial_number: Optional[str] = None # Serial number (e.g., "FPL25330294") + + +class PensandoNicDcqcn(BaseModel): + """Pensando NIC DCQCN information from nicctl show dcqcn""" + + nic_id: str # NIC ID (UUID format) + pcie_bdf: str # PCIe Bus:Device.Function (e.g., "0000:06:00.0") + lif_id: Optional[str] = None # Lif ID (UUID format) + roce_device: Optional[str] = None # ROCE device name (e.g., "rocep9s0") + dcqcn_profile_id: Optional[str] = None # DCQCN profile id (e.g., "1") + status: Optional[str] = None # Status (e.g., "Disabled") + + +class PensandoNicEnvironment(BaseModel): + """Pensando NIC environment information from nicctl show environment""" + + nic_id: str # NIC ID (UUID format) + pcie_bdf: str # PCIe Bus:Device.Function (e.g., "0000:06:00.0") + # Power measurements in Watts + total_power_drawn: Optional[float] = None # Total power drawn (pin) + core_power: Optional[float] = None # Core power (pout1) + arm_power: Optional[float] = None # ARM power (pout2) + # Temperature measurements in Celsius + local_board_temperature: Optional[float] = None # Local board temperature + die_temperature: Optional[float] = None # Die temperature + # Voltage measurements in millivolts + input_voltage: Optional[float] = None # Input voltage + core_voltage: Optional[float] = None # Core voltage + # Frequency measurements in MHz + core_frequency: Optional[float] = None # Core frequency + cpu_frequency: Optional[float] = None # CPU frequency + p4_stage_frequency: Optional[float] = None # P4 stage frequency + + +class PensandoNicPcieAts(BaseModel): + """Pensando NIC PCIe ATS information from nicctl show pcie ats""" + + nic_id: str # NIC ID (UUID format) + pcie_bdf: str # PCIe Bus:Device.Function (e.g., "0000:06:00.0") + status: str # Status (e.g., "Disabled", "Enabled") + + +class PensandoNicPort(BaseModel): + """Pensando NIC port information from nicctl show port""" + + nic_id: str # NIC ID (UUID format) + pcie_bdf: str # PCIe Bus:Device.Function (e.g., "0000:06:00.0") + port_id: str # Port ID (UUID format) + port_name: str # Port name (e.g., "eth1/1") + # Spec fields + spec_ifindex: Optional[str] = None + spec_type: Optional[str] = None + spec_speed: Optional[str] = None + spec_admin_state: Optional[str] = None + spec_fec_type: Optional[str] = None + spec_pause_type: Optional[str] = None + spec_num_lanes: Optional[int] = None + spec_mtu: Optional[int] = None + spec_tx_pause: Optional[str] = None + spec_rx_pause: Optional[str] = None + spec_auto_negotiation: Optional[str] = None + # Status fields + status_physical_port: Optional[int] = None + status_operational_status: Optional[str] = None + status_link_fsm_state: Optional[str] = None + status_fec_type: Optional[str] = None + status_cable_type: Optional[str] = None + status_num_lanes: Optional[int] = None + status_speed: Optional[str] = None + status_auto_negotiation: Optional[str] = None + status_mac_id: Optional[int] = None + status_mac_channel: Optional[int] = None + status_mac_address: Optional[str] = None + status_transceiver_type: Optional[str] = None + status_transceiver_state: Optional[str] = None + status_transceiver_pid: Optional[str] = None + + +class PensandoNicQosScheduling(BaseModel): + """QoS Scheduling entry""" + + priority: int + scheduling_type: Optional[str] = None # e.g., "DWRR" + bandwidth: Optional[int] = None # Bandwidth in percentage + rate_limit: Optional[str] = None # Rate limit (e.g., "N/A" or value in Gbps) + + +class PensandoNicQos(BaseModel): + """Pensando NIC QoS information from nicctl show qos""" + + nic_id: str # NIC ID (UUID format) + pcie_bdf: str # PCIe Bus:Device.Function (e.g., "0000:06:00.0") + port_id: str # Port ID (UUID format) + classification_type: Optional[str] = None # e.g., "DSCP" + dscp_bitmap: Optional[str] = None # DSCP bitmap + dscp_range: Optional[str] = None # DSCP range (e.g., "0-63") + dscp_priority: Optional[int] = None # Priority mapped from DSCP + pfc_priority_bitmap: Optional[str] = None # PFC priority bitmap + pfc_no_drop_priorities: Optional[str] = None # PFC no-drop priorities + scheduling: List[PensandoNicQosScheduling] = Field(default_factory=list) # Scheduling entries + + +class PensandoNicRdmaStatistic(BaseModel): + """RDMA statistic entry""" + + name: str # Statistic name + count: int # Count value + + +class PensandoNicRdmaStatistics(BaseModel): + """Pensando NIC RDMA statistics from nicctl show rdma statistics""" + + nic_id: str # NIC ID (UUID format) + pcie_bdf: str # PCIe Bus:Device.Function (e.g., "0000:06:00.0") + statistics: List[PensandoNicRdmaStatistic] = Field(default_factory=list) # Statistics entries + + +class PensandoNicVersionHostSoftware(BaseModel): + """Pensando NIC host software version from nicctl show version host-software""" + + nicctl: Optional[str] = None # nicctl version + ipc_driver: Optional[str] = None # IPC driver version + ionic_driver: Optional[str] = None # ionic driver version + + +class PensandoNicVersionFirmware(BaseModel): + """Pensando NIC firmware version from nicctl show version firmware""" + + nic_id: str # NIC ID (UUID format) + pcie_bdf: str # PCIe Bus:Device.Function (e.g., "0000:06:00.0") + cpld: Optional[str] = None # CPLD version + boot0: Optional[str] = None # Boot0 version + uboot_a: Optional[str] = None # Uboot-A version + firmware_a: Optional[str] = None # Firmware-A version + device_config_a: Optional[str] = None # Device config-A version + + class NetworkDataModel(DataModel): """Complete network configuration data""" @@ -163,3 +308,12 @@ class NetworkDataModel(DataModel): broadcom_nic_qos: Dict[int, BroadcomNicQos] = Field( default_factory=dict ) # Device number -> QoS info mapping + pensando_nic_cards: List[PensandoNicCard] = Field(default_factory=list) + pensando_nic_dcqcn: List[PensandoNicDcqcn] = Field(default_factory=list) + pensando_nic_environment: List[PensandoNicEnvironment] = Field(default_factory=list) + pensando_nic_pcie_ats: List[PensandoNicPcieAts] = Field(default_factory=list) + pensando_nic_ports: List[PensandoNicPort] = Field(default_factory=list) + pensando_nic_qos: List[PensandoNicQos] = Field(default_factory=list) + pensando_nic_rdma_statistics: List[PensandoNicRdmaStatistics] = Field(default_factory=list) + pensando_nic_version_host_software: Optional[PensandoNicVersionHostSoftware] = None + pensando_nic_version_firmware: List[PensandoNicVersionFirmware] = Field(default_factory=list) diff --git a/test/unit/plugin/test_network_collector.py b/test/unit/plugin/test_network_collector.py index 4da8a1ce..068b8868 100644 --- a/test/unit/plugin/test_network_collector.py +++ b/test/unit/plugin/test_network_collector.py @@ -39,6 +39,13 @@ Neighbor, NetworkDataModel, NetworkInterface, + PensandoNicCard, + PensandoNicDcqcn, + PensandoNicEnvironment, + PensandoNicPcieAts, + PensandoNicPort, + PensandoNicQos, + PensandoNicQosScheduling, Route, RoutingRule, ) @@ -307,6 +314,15 @@ def run_sut_cmd_side_effect(cmd, **kwargs): elif "ethtool" in cmd: # Fail ethtool commands (simulating no sudo or not supported) return MagicMock(exit_code=1, stdout="", command=cmd) + elif "lldpcli" in cmd or "lldpctl" in cmd: + # LLDP commands fail (not available) + return MagicMock(exit_code=1, stdout="", command=cmd) + elif "niccli" in cmd: + # Broadcom NIC commands fail (not available) + return MagicMock(exit_code=1, stdout="", command=cmd) + elif "nicctl" in cmd: + # Pensando NIC commands fail (not available) + return MagicMock(exit_code=1, stdout="", command=cmd) return MagicMock(exit_code=1, stdout="", command=cmd) collector._run_sut_cmd = MagicMock(side_effect=run_sut_cmd_side_effect) @@ -556,51 +572,14 @@ def test_network_data_model_creation(collector): # Sample Broadcom NIC command outputs for testing -NICCLI_LISTDEV_OUTPUT = """root@smci355-ccs-aus-n13-25:/# niccli --list_devices - +NICCLI_LISTDEV_OUTPUT = """ 1 ) Broadcom BCM57608 1x400G QSFP-DD PCIe Ethernet NIC (Adp#1 Port#1) - Device Interface Name : benic1p1 - MAC Address : 8C:84:74:37:C3:70 - PCI Address : 0000:06:00.0 - -2 ) Broadcom BCM57608 1x400G QSFP-DD PCIe Ethernet NIC (Adp#2 Port#1) - Device Interface Name : benic2p1 - MAC Address : 8C:84:74:37:DB:D0 - PCI Address : 0000:16:00.0 - -3 ) Broadcom BCM57608 1x400G QSFP-DD PCIe Ethernet NIC (Adp#3 Port#1) - Device Interface Name : benic4p1 - MAC Address : 8C:84:74:37:6C:10 - PCI Address : 0000:66:00.0 - -4 ) Broadcom BCM57608 1x400G QSFP-DD PCIe Ethernet NIC (Adp#4 Port#1) - Device Interface Name : benic3p1 - MAC Address : 8C:84:74:37:BB:F0 - PCI Address : 0000:76:00.0 - -5 ) Broadcom BCM57608 1x400G QSFP-DD PCIe Ethernet NIC (Adp#5 Port#1) - Device Interface Name : benic5p1 - MAC Address : 8C:84:74:37:8E:A0 - PCI Address : 0000:86:00.0 - -6 ) Broadcom BCM57608 1x400G QSFP-DD PCIe Ethernet NIC (Adp#6 Port#1) - Device Interface Name : benic6p1 - MAC Address : 6C:92:CF:9A:15:10 - PCI Address : 0000:96:00.0 - -7 ) Broadcom BCM57608 1x400G QSFP-DD PCIe Ethernet NIC (Adp#7 Port#1) - Device Interface Name : benic8p1 - MAC Address : 8C:84:74:37:69:90 - PCI Address : 0000:E6:00.0 - -8 ) Broadcom BCM57608 1x400G QSFP-DD PCIe Ethernet NIC (Adp#8 Port#1) - Device Interface Name : benic7p1 - MAC Address : 8C:84:74:37:C1:40 - PCI Address : 0000:F6:00.0 + Device Interface Name : abcd1p1 + MAC Address : 81:82:83:84:85:88 + PCI Address : 0000:22:00.0 """ -NICCLI_QOS_OUTPUT = """root@smci355-ccs-aus-n13-25:/# niccli --dev 1 qos --ets --show - +NICCLI_QOS_OUTPUT = """ IEEE 8021QAZ ETS Configuration TLV: PRIO_MAP: 0:0 1:0 2:0 3:1 4:0 5:0 6:0 7:2 TC Bandwidth: 50% 50% 0% @@ -626,12 +605,6 @@ def test_network_data_model_creation(collector): TC Rate Limit: 100% 100% 100% 0% 0% 0% 0% 0% """ -NICCLI_LISTDEV_SINGLE_DEVICE = """1 ) Broadcom BCM57608 1x400G QSFP-DD PCIe Ethernet NIC (Adp#1 Port#1) - Device Interface Name : benic1p1 - MAC Address : 8C:84:74:37:C3:70 - PCI Address : 0000:06:00.0 -""" - NICCLI_QOS_MINIMAL_OUTPUT = """IEEE 8021QAZ ETS Configuration TLV: PRIO_MAP: 0:0 1:1 TC Bandwidth: 50% 50% @@ -641,49 +614,123 @@ def test_network_data_model_creation(collector): TC Rate Limit: 100% 100% """ +# Sample Pensando NIC command outputs for testing +NICCTL_SHOW_CARD_OUTPUT = """ +--------------------------------------------------------------------------------------------- +Id PCIe BDF ASIC F/W partition Serial number +--------------------------------------------------------------------------------------------- +1111111-4c32-3533-3330-12345000000 0000:06:00.0 test1 A ABC1234 +2222222-4c32-3533-3731-78901500000 0000:16:00.0 test2 A DEF5678 +""" -def test_parse_niccli_listdev_multiple_devices(collector): - """Test parsing multiple Broadcom NIC devices from niccli --list_devices output""" - devices = collector._parse_niccli_listdev(NICCLI_LISTDEV_OUTPUT) +NICCTL_SHOW_DCQCN_OUTPUT = """ +NIC : 1111111-4c32-3533-3330-12345000000 (0000:06:00.0) +------------------------------------------------------------------------------------------ - assert len(devices) == 8 +Lif id : 1111111-4c32-3533-3330-12345000000 +ROCE device : sample + DCQCN profile id : 1 + Status : Disabled +****************************************************************************************** +""" - # Check first device - device1 = devices[0] - assert device1.device_num == 1 - assert device1.model == "Broadcom BCM57608 1x400G QSFP-DD PCIe Ethernet NIC" - assert device1.adapter_port == "Adp#1 Port#1" - assert device1.interface_name == "benic1p1" - assert device1.mac_address == "8C:84:74:37:C3:70" - assert device1.pci_address == "0000:06:00.0" +NICCTL_SHOW_ENVIRONMENT_OUTPUT = """ +NIC : 1111111-4c32-3533-3330-12345000000 (0000:06:00.0) + + Power(W): + Total power drawn (pin) : 29.437 + Core power (pout1) : 12.375 + ARM power (pout2) : 0.788 + Temperature(C): + Local board temperature : 44.12 + Die temperature : 45.59 + Voltage(mV): + Input voltage : 12078 + Core voltage : 725 + Frequency(MHz): + Core frequency : 1100 + CPU frequency : 1500 + P4 stage frequency : 1500 +------------------------------------------------------------------------------------- +""" + +NICCTL_SHOW_PCIE_ATS_OUTPUT = """ +NIC : 1111111-4c32-3533-3330-12345000000 (0000:06:00.0) : Disabled +""" + +NICCTL_SHOW_PORT_OUTPUT = """ +NIC : 1111111-4c32-3533-3330-12345000000 (0000:06:00.0) + +Port : 555555a-6c40-4242-4242-000011010000 (eth1/1) + Spec: + Ifindex : 0x11010000 + Type : ETH + speed : 400G + Admin state : UP + FEC type : RS + Pause type : PFC + Number of lanes : 4 + MTU : 9216 + TX pause : enabled + RX pause : enabled + Auto negotiation : disabled + Status: + Physical port : 1 + Operational status : DOWN + Link FSM state : SIGNAL_DETECT + FEC type : RS + Cable type : Copper + Number of lanes : 4 + speed : 400G + Auto negotiation : disabled + MAC ID : 0 + MAC channel : 0 + MAC address : 04:90:81:4a:6c:40 + Transceiver type : QSFP_CMIS + Transceiver state : SPROM-READ + Transceiver PID : QSFP-400G-CR4 +------------------------------------------------------------------------------------- +""" - # Check another device (device 3) - device3 = devices[2] - assert device3.device_num == 3 - assert device3.interface_name == "benic4p1" - assert device3.mac_address == "8C:84:74:37:6C:10" - assert device3.pci_address == "0000:66:00.0" +NICCTL_SHOW_QOS_OUTPUT = """ +NIC : 1111111-4c32-3533-3330-12345000000 (0000:06:00.0) - # Check last device - device8 = devices[7] - assert device8.device_num == 8 - assert device8.interface_name == "benic7p1" - assert device8.mac_address == "8C:84:74:37:C1:40" - assert device8.pci_address == "0000:F6:00.0" +Port : 0490814a-6c40-4242-4242-000011010000 + Classification type : DSCP + + DSCP-to-priority : + DSCP bitmap : 0xffffffffffffffff ==> priority : 0 + DSCP : 0-63 ==> priority : 0 + + + PFC : + PFC priority bitmap : 0x0 + PFC no-drop priorities : + + Scheduling : + -------------------------------------------- + Priority Scheduling Bandwidth Rate-limit + Type (in %age) (in Gbps) + -------------------------------------------- + 0 DWRR 0 N/A +""" -def test_parse_niccli_listdev_single_device(collector): - """Test parsing single Broadcom NIC device""" - devices = collector._parse_niccli_listdev(NICCLI_LISTDEV_SINGLE_DEVICE) + +def test_parse_niccli_listdev_device(collector): + """Test parsing Broadcom NIC device from niccli --list_devices output""" + devices = collector._parse_niccli_listdev(NICCLI_LISTDEV_OUTPUT) assert len(devices) == 1 - device = devices[0] - assert device.device_num == 1 - assert device.model == "Broadcom BCM57608 1x400G QSFP-DD PCIe Ethernet NIC" - assert device.adapter_port == "Adp#1 Port#1" - assert device.interface_name == "benic1p1" - assert device.mac_address == "8C:84:74:37:C3:70" - assert device.pci_address == "0000:06:00.0" + + # Check device + device1 = devices[0] + assert device1.device_num == 1 + assert device1.model == "Broadcom BCM57608 1x400G QSFP-DD PCIe Ethernet NIC" + assert device1.adapter_port == "Adp#1 Port#1" + assert device1.interface_name == "abcd1p1" + assert device1.mac_address == "81:82:83:84:85:88" + assert device1.pci_address == "0000:22:00.0" def test_parse_niccli_listdev_empty_output(collector): @@ -769,40 +816,6 @@ def test_parse_niccli_qos_complete(collector): assert qos.tc_rate_limit[7] == 0 -def test_parse_niccli_qos_minimal(collector): - """Test parsing minimal Broadcom NIC QoS output""" - qos = collector._parse_niccli_qos(2, NICCLI_QOS_MINIMAL_OUTPUT) - - assert qos.device_num == 2 - assert qos.raw_output == NICCLI_QOS_MINIMAL_OUTPUT - - # Check PRIO_MAP - assert len(qos.prio_map) == 2 - assert qos.prio_map[0] == 0 - assert qos.prio_map[1] == 1 - - # Check TC Bandwidth - assert len(qos.tc_bandwidth) == 2 - assert qos.tc_bandwidth[0] == 50 - assert qos.tc_bandwidth[1] == 50 - - # Check TSA_MAP - assert len(qos.tsa_map) == 2 - assert qos.tsa_map[0] == "ets" - assert qos.tsa_map[1] == "strict" - - # Check PFC enabled - assert qos.pfc_enabled == 1 - - # Check APP entries (should be empty) - assert len(qos.app_entries) == 0 - - # Check TC Rate Limit - assert len(qos.tc_rate_limit) == 2 - assert qos.tc_rate_limit[0] == 100 - assert qos.tc_rate_limit[1] == 100 - - def test_parse_niccli_qos_empty_output(collector): """Test parsing empty QoS output""" qos = collector._parse_niccli_qos(1, "") @@ -817,28 +830,6 @@ def test_parse_niccli_qos_empty_output(collector): assert len(qos.tc_rate_limit) == 0 -def test_parse_niccli_qos_no_app_entries(collector): - """Test parsing QoS output without APP entries""" - qos_no_app = """IEEE 8021QAZ ETS Configuration TLV: - PRIO_MAP: 0:0 1:1 2:2 - TC Bandwidth: 33% 33% 34% - TSA_MAP: 0:ets 1:ets 2:ets -IEEE 8021QAZ PFC TLV: - PFC enabled: 7 -TC Rate Limit: 100% 100% 100% -""" - - qos = collector._parse_niccli_qos(5, qos_no_app) - - assert qos.device_num == 5 - assert len(qos.prio_map) == 3 - assert len(qos.tc_bandwidth) == 3 - assert len(qos.tsa_map) == 3 - assert qos.pfc_enabled == 7 - assert len(qos.app_entries) == 0 - assert len(qos.tc_rate_limit) == 3 - - def test_parse_niccli_qos_multiple_app_protocols(collector): """Test parsing QoS with APP entries having different protocols""" qos_multi_protocol = """IEEE 8021QAZ ETS Configuration TLV: @@ -944,3 +935,922 @@ def test_network_data_model_with_broadcom_nic(collector): assert data.broadcom_nic_devices[0].interface_name == "benic1p1" assert data.broadcom_nic_qos[1].device_num == 1 assert data.broadcom_nic_qos[1].pfc_enabled == 3 + + +def test_parse_nicctl_show_card_multiple_cards(collector): + """Test parsing multiple Pensando NIC cards from nicctl show card output""" + cards = collector._parse_nicctl_card(NICCTL_SHOW_CARD_OUTPUT) + + assert len(cards) == 2 + + # Check first card + card1 = cards[0] + assert card1.id == "1111111-4c32-3533-3330-12345000000" + assert card1.pcie_bdf == "0000:06:00.0" + assert card1.asic == "test1" + assert card1.fw_partition == "A" + assert card1.serial_number == "ABC1234" + + # Check second card + card2 = cards[1] + assert card2.id == "2222222-4c32-3533-3731-78901500000" + assert card2.pcie_bdf == "0000:16:00.0" + assert card2.asic == "test2" + assert card2.fw_partition == "A" + assert card2.serial_number == "DEF5678" + + +def test_parse_nicctl_show_card_empty_output(collector): + """Test parsing empty nicctl show card output""" + cards = collector._parse_nicctl_card("") + + assert len(cards) == 0 + + +def test_parse_nicctl_show_card_partial_fields(collector): + """Test parsing nicctl show card output with partial fields""" + partial_output = """ +--------------------------------------------------------------------------------------------- +Id PCIe BDF ASIC F/W partition Serial number +--------------------------------------------------------------------------------------------- +42424650-4c32-3533-3330-323934000000 0000:06:00.0 +42424650-4c32-3533-3731-304535000000 0000:16:00.0 salina +""" + + cards = collector._parse_nicctl_card(partial_output) + + assert len(cards) == 2 + + # First card with only ID and PCIe BDF + card1 = cards[0] + assert card1.id == "42424650-4c32-3533-3330-323934000000" + assert card1.pcie_bdf == "0000:06:00.0" + assert card1.asic is None + assert card1.fw_partition is None + assert card1.serial_number is None + + # Second card with ID, PCIe BDF, and ASIC + card2 = cards[1] + assert card2.id == "42424650-4c32-3533-3731-304535000000" + assert card2.pcie_bdf == "0000:16:00.0" + assert card2.asic == "salina" + assert card2.fw_partition is None + assert card2.serial_number is None + + +def test_parse_nicctl_show_card_malformed_output(collector): + """Test parsing malformed nicctl show card output gracefully""" + malformed = """some random text +not a valid card line +123 invalid format +""" + + cards = collector._parse_nicctl_card(malformed) + + # Should handle gracefully, return empty list or skip invalid lines + assert isinstance(cards, list) + # May parse some invalid entries, but should not crash + + +def test_network_data_model_with_pensando_nic(collector): + """Test creating NetworkDataModel with Pensando NIC data""" + card1 = PensandoNicCard( + id="42424650-4c32-3533-3330-323934000000", + pcie_bdf="0000:06:00.0", + asic="salina", + fw_partition="A", + serial_number="FPL25330294", + ) + + card2 = PensandoNicCard( + id="42424650-4c32-3533-3731-304535000000", + pcie_bdf="0000:16:00.0", + asic="salina", + fw_partition="A", + serial_number="FPL253710E5", + ) + + data = NetworkDataModel( + interfaces=[], + routes=[], + rules=[], + neighbors=[], + ethtool_info={}, + pensando_nic_cards=[card1, card2], + ) + + assert len(data.pensando_nic_cards) == 2 + assert data.pensando_nic_cards[0].id == "42424650-4c32-3533-3330-323934000000" + assert data.pensando_nic_cards[0].pcie_bdf == "0000:06:00.0" + assert data.pensando_nic_cards[0].asic == "salina" + assert data.pensando_nic_cards[1].serial_number == "FPL253710E5" + + +def test_collect_pensando_nic_success(collector, conn_mock): + """Test successful collection of Pensando NIC data""" + collector.system_info.os_family = OSFamily.LINUX + + # Mock successful nicctl command execution + def run_sut_cmd_side_effect(cmd, **kwargs): + if "nicctl show card" in cmd: + return MagicMock(exit_code=0, stdout=NICCTL_SHOW_CARD_OUTPUT, command=cmd) + elif "nicctl show dcqcn" in cmd: + return MagicMock(exit_code=0, stdout=NICCTL_SHOW_DCQCN_OUTPUT, command=cmd) + elif "nicctl show environment" in cmd: + return MagicMock(exit_code=0, stdout=NICCTL_SHOW_ENVIRONMENT_OUTPUT, command=cmd) + elif "nicctl show pcie ats" in cmd: + return MagicMock(exit_code=0, stdout=NICCTL_SHOW_PCIE_ATS_OUTPUT, command=cmd) + elif "nicctl show port" in cmd: + return MagicMock(exit_code=0, stdout=NICCTL_SHOW_PORT_OUTPUT, command=cmd) + elif "nicctl show qos" in cmd: + return MagicMock(exit_code=0, stdout=NICCTL_SHOW_QOS_OUTPUT, command=cmd) + elif "nicctl show rdma statistics" in cmd: + return MagicMock(exit_code=0, stdout=NICCTL_SHOW_RDMA_STATISTICS_OUTPUT, command=cmd) + elif "nicctl show version host-software" in cmd: + return MagicMock( + exit_code=0, stdout=NICCTL_SHOW_VERSION_HOST_SOFTWARE_OUTPUT, command=cmd + ) + elif "nicctl show version firmware" in cmd: + return MagicMock(exit_code=0, stdout=NICCTL_SHOW_VERSION_FIRMWARE_OUTPUT, command=cmd) + elif "nicctl" in cmd: + # Other nicctl commands succeed but return empty + return MagicMock(exit_code=0, stdout="", command=cmd) + return MagicMock(exit_code=1, stdout="", command=cmd) + + collector._run_sut_cmd = MagicMock(side_effect=run_sut_cmd_side_effect) + + ( + cards, + dcqcn_entries, + environment_entries, + pcie_ats_entries, + port_entries, + qos_entries, + rdma_statistics_entries, + version_host_software, + version_firmware_entries, + ) = collector._collect_pensando_nic_info() + + assert len(cards) == 2 + assert cards[0].id == "1111111-4c32-3533-3330-12345000000" + assert cards[0].pcie_bdf == "0000:06:00.0" + assert cards[0].asic == "test1" + assert cards[0].serial_number == "ABC1234" + + assert len(dcqcn_entries) == 1 + assert dcqcn_entries[0].nic_id == "1111111-4c32-3533-3330-12345000000" + assert dcqcn_entries[0].pcie_bdf == "0000:06:00.0" + + assert len(environment_entries) == 1 + assert environment_entries[0].nic_id == "1111111-4c32-3533-3330-12345000000" + assert environment_entries[0].pcie_bdf == "0000:06:00.0" + + assert len(pcie_ats_entries) == 1 + assert pcie_ats_entries[0].nic_id == "1111111-4c32-3533-3330-12345000000" + assert pcie_ats_entries[0].pcie_bdf == "0000:06:00.0" + assert pcie_ats_entries[0].status == "Disabled" + + assert len(port_entries) == 1 + assert port_entries[0].nic_id == "1111111-4c32-3533-3330-12345000000" + assert port_entries[0].pcie_bdf == "0000:06:00.0" + assert port_entries[0].port_name == "eth1/1" + + assert len(qos_entries) == 1 + assert qos_entries[0].nic_id == "1111111-4c32-3533-3330-12345000000" + assert qos_entries[0].pcie_bdf == "0000:06:00.0" + assert qos_entries[0].port_id == "0490814a-6c40-4242-4242-000011010000" + + assert len(rdma_statistics_entries) == 2 + assert rdma_statistics_entries[0].nic_id == "42424650-4c32-3533-3330-323934000000" + assert rdma_statistics_entries[0].pcie_bdf == "0000:06:00.0" + assert len(rdma_statistics_entries[0].statistics) == 2 + + assert version_host_software is not None + assert version_host_software.nicctl == "1.117.1-a-63" + assert version_host_software.ipc_driver == "1.117.1.a.63" + assert version_host_software.ionic_driver == "25.08.4.004" + + assert len(version_firmware_entries) == 2 + assert version_firmware_entries[0].nic_id == "42424650-4c32-3533-3330-323934000000" + assert version_firmware_entries[0].pcie_bdf == "0000:06:00.0" + assert version_firmware_entries[0].cpld == "3.16 (primary)" + + +def test_parse_nicctl_show_dcqcn_multiple_entries(collector): + """Test parsing Pensando NIC DCQCN entry from nicctl show dcqcn output""" + dcqcn_entries = collector._parse_nicctl_dcqcn(NICCTL_SHOW_DCQCN_OUTPUT) + + assert len(dcqcn_entries) == 1 + + # Check entry + entry1 = dcqcn_entries[0] + assert entry1.nic_id == "1111111-4c32-3533-3330-12345000000" + assert entry1.pcie_bdf == "0000:06:00.0" + assert entry1.lif_id == "1111111-4c32-3533-3330-12345000000" + assert entry1.roce_device == "sample" + assert entry1.dcqcn_profile_id == "1" + assert entry1.status == "Disabled" + + +def test_parse_nicctl_show_dcqcn_empty_output(collector): + """Test parsing empty nicctl show dcqcn output""" + dcqcn_entries = collector._parse_nicctl_dcqcn("") + + assert len(dcqcn_entries) == 0 + + +def test_parse_nicctl_show_dcqcn_partial_fields(collector): + """Test parsing nicctl show dcqcn output with partial fields""" + partial_output = """ +NIC : 42424650-4c32-3533-3330-323934000000 (0000:06:00.0) +------------------------------------------------------------------------------------------ + +Lif id : 43000070-0100-0000-4242-0490814a6c40 +****************************************************************************************** +""" + + dcqcn_entries = collector._parse_nicctl_dcqcn(partial_output) + + assert len(dcqcn_entries) == 1 + + # Entry with only NIC ID, PCIe BDF, and Lif ID + entry1 = dcqcn_entries[0] + assert entry1.nic_id == "42424650-4c32-3533-3330-323934000000" + assert entry1.pcie_bdf == "0000:06:00.0" + assert entry1.lif_id == "43000070-0100-0000-4242-0490814a6c40" + assert entry1.roce_device is None + assert entry1.dcqcn_profile_id is None + assert entry1.status is None + + +def test_parse_nicctl_show_dcqcn_malformed_output(collector): + """Test parsing malformed nicctl show dcqcn output gracefully""" + malformed = """some random text +not a valid dcqcn line +123 invalid format +""" + + dcqcn_entries = collector._parse_nicctl_dcqcn(malformed) + + # Should handle gracefully, return empty list + assert isinstance(dcqcn_entries, list) + assert len(dcqcn_entries) == 0 + + +def test_network_data_model_with_pensando_nic_dcqcn(collector): + """Test creating NetworkDataModel with Pensando NIC DCQCN data""" + dcqcn1 = PensandoNicDcqcn( + nic_id="42424650-4c32-3533-3330-323934000000", + pcie_bdf="0000:06:00.0", + lif_id="43000070-0100-0000-4242-0490814a6c40", + roce_device="rocep9s0", + dcqcn_profile_id="1", + status="Disabled", + ) + + dcqcn2 = PensandoNicDcqcn( + nic_id="42424650-4c32-3533-3731-304535000000", + pcie_bdf="0000:16:00.0", + lif_id="43000070-0100-0000-4242-0490815cce50", + roce_device="rocep25s0", + dcqcn_profile_id="1", + status="Disabled", + ) + + data = NetworkDataModel( + interfaces=[], + routes=[], + rules=[], + neighbors=[], + ethtool_info={}, + pensando_nic_dcqcn=[dcqcn1, dcqcn2], + ) + + assert len(data.pensando_nic_dcqcn) == 2 + assert data.pensando_nic_dcqcn[0].nic_id == "42424650-4c32-3533-3330-323934000000" + assert data.pensando_nic_dcqcn[0].pcie_bdf == "0000:06:00.0" + assert data.pensando_nic_dcqcn[0].roce_device == "rocep9s0" + assert data.pensando_nic_dcqcn[1].lif_id == "43000070-0100-0000-4242-0490815cce50" + + +def test_parse_nicctl_show_environment_multiple_entries(collector): + """Test parsing Pensando NIC environment entry from nicctl show environment output""" + environment_entries = collector._parse_nicctl_environment(NICCTL_SHOW_ENVIRONMENT_OUTPUT) + + assert len(environment_entries) == 1 + + # Check entry + entry1 = environment_entries[0] + assert entry1.nic_id == "1111111-4c32-3533-3330-12345000000" + assert entry1.pcie_bdf == "0000:06:00.0" + assert entry1.total_power_drawn == 29.437 + assert entry1.core_power == 12.375 + assert entry1.arm_power == 0.788 + assert entry1.local_board_temperature == 44.12 + assert entry1.die_temperature == 45.59 + assert entry1.input_voltage == 12078 + assert entry1.core_voltage == 725 + assert entry1.core_frequency == 1100 + assert entry1.cpu_frequency == 1500 + assert entry1.p4_stage_frequency == 1500 + + +def test_parse_nicctl_show_environment_empty_output(collector): + """Test parsing empty nicctl show environment output""" + environment_entries = collector._parse_nicctl_environment("") + + assert len(environment_entries) == 0 + + +def test_parse_nicctl_show_environment_partial_fields(collector): + """Test parsing nicctl show environment output with partial fields""" + partial_output = """ +NIC : 42424650-4c32-3533-3330-323934000000 (0000:06:00.0) + + Power(W): + Total power drawn (pin) : 29.437 + Temperature(C): + Local board temperature : 44.12 +------------------------------------------------------------------------------------- +""" + + environment_entries = collector._parse_nicctl_environment(partial_output) + + assert len(environment_entries) == 1 + + # Entry with only some fields + entry1 = environment_entries[0] + assert entry1.nic_id == "42424650-4c32-3533-3330-323934000000" + assert entry1.pcie_bdf == "0000:06:00.0" + assert entry1.total_power_drawn == 29.437 + assert entry1.local_board_temperature == 44.12 + assert entry1.core_power is None + assert entry1.die_temperature is None + assert entry1.input_voltage is None + + +def test_parse_nicctl_show_environment_malformed_output(collector): + """Test parsing malformed nicctl show environment output gracefully""" + malformed = """some random text +not a valid environment line +123 invalid format +""" + + environment_entries = collector._parse_nicctl_environment(malformed) + + # Should handle gracefully, return empty list + assert isinstance(environment_entries, list) + assert len(environment_entries) == 0 + + +def test_network_data_model_with_pensando_nic_environment(collector): + """Test creating NetworkDataModel with Pensando NIC environment data""" + env1 = PensandoNicEnvironment( + nic_id="42424650-4c32-3533-3330-323934000000", + pcie_bdf="0000:06:00.0", + total_power_drawn=29.437, + core_power=12.375, + arm_power=0.788, + local_board_temperature=44.12, + die_temperature=45.59, + input_voltage=12078, + core_voltage=725, + core_frequency=1100, + cpu_frequency=1500, + p4_stage_frequency=1500, + ) + + env2 = PensandoNicEnvironment( + nic_id="42424650-4c32-3533-3731-304535000000", + pcie_bdf="0000:16:00.0", + total_power_drawn=28.968, + core_power=12.031, + arm_power=0.292, + local_board_temperature=42.62, + die_temperature=42.28, + input_voltage=12078, + core_voltage=725, + core_frequency=1100, + cpu_frequency=1500, + p4_stage_frequency=1500, + ) + + data = NetworkDataModel( + interfaces=[], + routes=[], + rules=[], + neighbors=[], + ethtool_info={}, + pensando_nic_environment=[env1, env2], + ) + + assert len(data.pensando_nic_environment) == 2 + assert data.pensando_nic_environment[0].nic_id == "42424650-4c32-3533-3330-323934000000" + assert data.pensando_nic_environment[0].pcie_bdf == "0000:06:00.0" + assert data.pensando_nic_environment[0].total_power_drawn == 29.437 + assert data.pensando_nic_environment[0].die_temperature == 45.59 + assert data.pensando_nic_environment[1].core_frequency == 1100 + + +def test_parse_nicctl_show_pcie_ats_multiple_entries(collector): + """Test parsing Pensando NIC PCIe ATS entry from nicctl show pcie ats output""" + pcie_ats_entries = collector._parse_nicctl_pcie_ats(NICCTL_SHOW_PCIE_ATS_OUTPUT) + + assert len(pcie_ats_entries) == 1 + + # Check entry + entry1 = pcie_ats_entries[0] + assert entry1.nic_id == "1111111-4c32-3533-3330-12345000000" + assert entry1.pcie_bdf == "0000:06:00.0" + assert entry1.status == "Disabled" + + +def test_parse_nicctl_show_pcie_ats_empty_output(collector): + """Test parsing empty nicctl show pcie ats output""" + pcie_ats_entries = collector._parse_nicctl_pcie_ats("") + + assert len(pcie_ats_entries) == 0 + + +def test_parse_nicctl_show_pcie_ats_enabled(collector): + """Test parsing nicctl show pcie ats output with Enabled status""" + enabled_output = """ +NIC : 42424650-4c32-3533-3330-323934000000 (0000:06:00.0) : Enabled +NIC : 42424650-4c32-3533-3731-304535000000 (0000:16:00.0) : Disabled +""" + + pcie_ats_entries = collector._parse_nicctl_pcie_ats(enabled_output) + + assert len(pcie_ats_entries) == 2 + assert pcie_ats_entries[0].status == "Enabled" + assert pcie_ats_entries[1].status == "Disabled" + + +def test_parse_nicctl_show_pcie_ats_malformed_output(collector): + """Test parsing malformed nicctl show pcie ats output gracefully""" + malformed = """some random text +not a valid pcie ats line +123 invalid format +""" + + pcie_ats_entries = collector._parse_nicctl_pcie_ats(malformed) + + # Should handle gracefully, return empty list + assert isinstance(pcie_ats_entries, list) + assert len(pcie_ats_entries) == 0 + + +def test_network_data_model_with_pensando_nic_pcie_ats(collector): + """Test creating NetworkDataModel with Pensando NIC PCIe ATS data""" + ats1 = PensandoNicPcieAts( + nic_id="42424650-4c32-3533-3330-323934000000", + pcie_bdf="0000:06:00.0", + status="Disabled", + ) + + ats2 = PensandoNicPcieAts( + nic_id="42424650-4c32-3533-3731-304535000000", + pcie_bdf="0000:16:00.0", + status="Enabled", + ) + + data = NetworkDataModel( + interfaces=[], + routes=[], + rules=[], + neighbors=[], + ethtool_info={}, + pensando_nic_pcie_ats=[ats1, ats2], + ) + + assert len(data.pensando_nic_pcie_ats) == 2 + assert data.pensando_nic_pcie_ats[0].nic_id == "42424650-4c32-3533-3330-323934000000" + assert data.pensando_nic_pcie_ats[0].pcie_bdf == "0000:06:00.0" + assert data.pensando_nic_pcie_ats[0].status == "Disabled" + assert data.pensando_nic_pcie_ats[1].status == "Enabled" + + +def test_parse_nicctl_show_port_multiple_entries(collector): + """Test parsing Pensando NIC port entry from nicctl show port output""" + port_entries = collector._parse_nicctl_port(NICCTL_SHOW_PORT_OUTPUT) + + assert len(port_entries) == 1 + + # Check entry + entry1 = port_entries[0] + assert entry1.nic_id == "1111111-4c32-3533-3330-12345000000" + assert entry1.pcie_bdf == "0000:06:00.0" + assert entry1.port_id == "555555a-6c40-4242-4242-000011010000" + assert entry1.port_name == "eth1/1" + # Spec fields + assert entry1.spec_ifindex == "0x11010000" + assert entry1.spec_type == "ETH" + assert entry1.spec_speed == "400G" + assert entry1.spec_admin_state == "UP" + assert entry1.spec_fec_type == "RS" + assert entry1.spec_pause_type == "PFC" + assert entry1.spec_num_lanes == 4 + assert entry1.spec_mtu == 9216 + assert entry1.spec_tx_pause == "enabled" + assert entry1.spec_rx_pause == "enabled" + assert entry1.spec_auto_negotiation == "disabled" + # Status fields + assert entry1.status_physical_port == 1 + assert entry1.status_operational_status == "DOWN" + assert entry1.status_link_fsm_state == "SIGNAL_DETECT" + assert entry1.status_fec_type == "RS" + assert entry1.status_cable_type == "Copper" + assert entry1.status_num_lanes == 4 + assert entry1.status_speed == "400G" + assert entry1.status_auto_negotiation == "disabled" + assert entry1.status_mac_id == 0 + assert entry1.status_mac_channel == 0 + assert entry1.status_mac_address == "04:90:81:4a:6c:40" + assert entry1.status_transceiver_type == "QSFP_CMIS" + assert entry1.status_transceiver_state == "SPROM-READ" + assert entry1.status_transceiver_pid == "QSFP-400G-CR4" + + +def test_parse_nicctl_show_port_empty_output(collector): + """Test parsing empty nicctl show port output""" + port_entries = collector._parse_nicctl_port("") + + assert len(port_entries) == 0 + + +def test_parse_nicctl_show_port_partial_fields(collector): + """Test parsing nicctl show port output with partial fields""" + partial_output = """ +NIC : 42424650-4c32-3533-3330-323934000000 (0000:06:00.0) + +Port : 0490814a-6c40-4242-4242-000011010000 (eth1/1) + Spec: + speed : 400G + Admin state : UP + Status: + Operational status : DOWN +------------------------------------------------------------------------------------- +""" + + port_entries = collector._parse_nicctl_port(partial_output) + + assert len(port_entries) == 1 + + # Entry with only some fields + entry1 = port_entries[0] + assert entry1.nic_id == "42424650-4c32-3533-3330-323934000000" + assert entry1.pcie_bdf == "0000:06:00.0" + assert entry1.port_name == "eth1/1" + assert entry1.spec_speed == "400G" + assert entry1.spec_admin_state == "UP" + assert entry1.status_operational_status == "DOWN" + assert entry1.spec_mtu is None + assert entry1.status_mac_address is None + + +def test_parse_nicctl_show_port_malformed_output(collector): + """Test parsing malformed nicctl show port output gracefully""" + malformed = """some random text +not a valid port line +123 invalid format +""" + + port_entries = collector._parse_nicctl_port(malformed) + + # Should handle gracefully, return empty list + assert isinstance(port_entries, list) + assert len(port_entries) == 0 + + +def test_network_data_model_with_pensando_nic_port(collector): + """Test creating NetworkDataModel with Pensando NIC port data""" + port1 = PensandoNicPort( + nic_id="42424650-4c32-3533-3330-323934000000", + pcie_bdf="0000:06:00.0", + port_id="0490814a-6c40-4242-4242-000011010000", + port_name="eth1/1", + spec_speed="400G", + spec_admin_state="UP", + spec_mtu=9216, + status_operational_status="DOWN", + status_mac_address="04:90:81:4a:6c:40", + ) + + port2 = PensandoNicPort( + nic_id="42424650-4c32-3533-3731-304535000000", + pcie_bdf="0000:16:00.0", + port_id="0490815c-ce50-4242-4242-000011010000", + port_name="eth1/1", + spec_speed="400G", + spec_admin_state="UP", + spec_mtu=9216, + status_operational_status="UP", + status_mac_address="04:90:81:5c:ce:50", + ) + + data = NetworkDataModel( + interfaces=[], + routes=[], + rules=[], + neighbors=[], + ethtool_info={}, + pensando_nic_ports=[port1, port2], + ) + + assert len(data.pensando_nic_ports) == 2 + assert data.pensando_nic_ports[0].nic_id == "42424650-4c32-3533-3330-323934000000" + assert data.pensando_nic_ports[0].port_name == "eth1/1" + assert data.pensando_nic_ports[0].spec_speed == "400G" + assert data.pensando_nic_ports[0].status_mac_address == "04:90:81:4a:6c:40" + assert data.pensando_nic_ports[1].status_operational_status == "UP" + + +def test_parse_nicctl_show_qos_multiple_entries(collector): + """Test parsing Pensando NIC QoS entry from nicctl show qos output""" + qos_entries = collector._parse_nicctl_qos(NICCTL_SHOW_QOS_OUTPUT) + + assert len(qos_entries) == 1 + + # Check entry + entry1 = qos_entries[0] + assert entry1.nic_id == "1111111-4c32-3533-3330-12345000000" + assert entry1.pcie_bdf == "0000:06:00.0" + assert entry1.port_id == "0490814a-6c40-4242-4242-000011010000" + assert entry1.classification_type == "DSCP" + assert entry1.dscp_bitmap == "0xffffffffffffffff" + assert entry1.dscp_range == "0-63" + assert entry1.dscp_priority == 0 + assert entry1.pfc_priority_bitmap == "0x0" + assert entry1.pfc_no_drop_priorities == "" + assert len(entry1.scheduling) == 1 + assert entry1.scheduling[0].priority == 0 + assert entry1.scheduling[0].scheduling_type == "DWRR" + assert entry1.scheduling[0].bandwidth == 0 + assert entry1.scheduling[0].rate_limit == "N/A" + + +def test_parse_nicctl_show_qos_empty_output(collector): + """Test parsing empty nicctl show qos output""" + qos_entries = collector._parse_nicctl_qos("") + + assert len(qos_entries) == 0 + + +def test_parse_nicctl_show_qos_malformed_output(collector): + """Test parsing malformed nicctl show qos output gracefully""" + malformed = """some random text +not a valid qos line +123 invalid format +""" + + qos_entries = collector._parse_nicctl_qos(malformed) + + # Should handle gracefully, return empty list + assert isinstance(qos_entries, list) + assert len(qos_entries) == 0 + + +def test_network_data_model_with_pensando_nic_qos(collector): + """Test creating NetworkDataModel with Pensando NIC QoS data""" + sched1 = PensandoNicQosScheduling( + priority=0, + scheduling_type="DWRR", + bandwidth=0, + rate_limit="N/A", + ) + + qos1 = PensandoNicQos( + nic_id="42424650-4c32-3533-3330-323934000000", + pcie_bdf="0000:06:00.0", + port_id="0490814a-6c40-4242-4242-000011010000", + classification_type="DSCP", + dscp_bitmap="0xffffffffffffffff", + dscp_range="0-63", + dscp_priority=0, + pfc_priority_bitmap="0x0", + pfc_no_drop_priorities="", + scheduling=[sched1], + ) + + qos2 = PensandoNicQos( + nic_id="42424650-4c32-3533-3731-304535000000", + pcie_bdf="0000:16:00.0", + port_id="0490815c-ce50-4242-4242-000011010000", + classification_type="DSCP", + ) + + data = NetworkDataModel( + interfaces=[], + routes=[], + rules=[], + neighbors=[], + ethtool_info={}, + pensando_nic_qos=[qos1, qos2], + ) + + assert len(data.pensando_nic_qos) == 2 + assert data.pensando_nic_qos[0].nic_id == "42424650-4c32-3533-3330-323934000000" + assert data.pensando_nic_qos[0].port_id == "0490814a-6c40-4242-4242-000011010000" + assert data.pensando_nic_qos[0].classification_type == "DSCP" + assert len(data.pensando_nic_qos[0].scheduling) == 1 + assert data.pensando_nic_qos[1].nic_id == "42424650-4c32-3533-3731-304535000000" + + +# Mock output for 'nicctl show rdma statistics' +NICCTL_SHOW_RDMA_STATISTICS_OUTPUT = """NIC : 42424650-4c32-3533-3330-323934000000 (0000:06:00.0) + +------------------------------------------------------------ +Name Count +------------------------------------------------------------ +Queue pair create 1 +Completion queue create 2 + +NIC : 42424650-4c32-3533-3731-304535000000 (0000:16:00.0) + +------------------------------------------------------------ +Name Count +------------------------------------------------------------ +Queue pair create 1 +Completion queue create 2 +""" + + +def test_parse_nicctl_show_rdma_statistics_multiple_entries(collector): + """Test parsing multiple NIC RDMA statistics entries.""" + entries = collector._parse_nicctl_rdma_statistics(NICCTL_SHOW_RDMA_STATISTICS_OUTPUT) + + assert len(entries) == 2 + + # Check first entry + assert entries[0].nic_id == "42424650-4c32-3533-3330-323934000000" + assert entries[0].pcie_bdf == "0000:06:00.0" + assert len(entries[0].statistics) == 2 + assert entries[0].statistics[0].name == "Queue pair create" + assert entries[0].statistics[0].count == 1 + assert entries[0].statistics[1].name == "Completion queue create" + assert entries[0].statistics[1].count == 2 + + # Check second entry + assert entries[1].nic_id == "42424650-4c32-3533-3731-304535000000" + assert entries[1].pcie_bdf == "0000:16:00.0" + assert len(entries[1].statistics) == 2 + assert entries[1].statistics[0].name == "Queue pair create" + assert entries[1].statistics[0].count == 1 + assert entries[1].statistics[1].name == "Completion queue create" + assert entries[1].statistics[1].count == 2 + + +def test_parse_nicctl_show_rdma_statistics_empty_output(collector): + """Test parsing empty RDMA statistics output.""" + entries = collector._parse_nicctl_rdma_statistics("") + assert len(entries) == 0 + + +# Mock output for 'nicctl show version host-software' +NICCTL_SHOW_VERSION_HOST_SOFTWARE_OUTPUT = """nicctl : 1.117.1-a-63 +IPC driver : 1.117.1.a.63 +ionic driver : 25.08.4.004 +""" + + +def test_parse_nicctl_show_version_host_software(collector): + """Test parsing host software version.""" + version = collector._parse_nicctl_version_host_software( + NICCTL_SHOW_VERSION_HOST_SOFTWARE_OUTPUT + ) + + assert version is not None + assert version.nicctl == "1.117.1-a-63" + assert version.ipc_driver == "1.117.1.a.63" + assert version.ionic_driver == "25.08.4.004" + + +def test_parse_nicctl_show_version_host_software_empty_output(collector): + """Test parsing empty host software version output.""" + version = collector._parse_nicctl_version_host_software("") + assert version is None + + +# Mock output for 'nicctl show version firmware' +NICCTL_SHOW_VERSION_FIRMWARE_OUTPUT = """NIC : 42424650-4c32-3533-3330-323934000000 (0000:06:00.0) + +CPLD : 3.16 (primary) +Boot0 : 21 +Uboot-A : 1.117.1-a-63 +Firmware-A : 1.117.1-a-63 +Device config-A : device_config_rdma_1x400G/1.0.0 +------------------------------------------------------------------------------------- + +NIC : 42424650-4c32-3533-3731-304535000000 (0000:16:00.0) + +CPLD : 3.16 (primary) +Boot0 : 21 +Uboot-A : 1.117.1-a-63 +Firmware-A : 1.117.1-a-63 +Device config-A : device_config_rdma_1x400G/1.0.0 +------------------------------------------------------------------------------------- +""" + + +def test_parse_nicctl_show_version_firmware_multiple_entries(collector): + """Test parsing multiple NIC firmware version entries.""" + entries = collector._parse_nicctl_version_firmware(NICCTL_SHOW_VERSION_FIRMWARE_OUTPUT) + + assert len(entries) == 2 + + # Check first entry + assert entries[0].nic_id == "42424650-4c32-3533-3330-323934000000" + assert entries[0].pcie_bdf == "0000:06:00.0" + assert entries[0].cpld == "3.16 (primary)" + assert entries[0].boot0 == "21" + assert entries[0].uboot_a == "1.117.1-a-63" + assert entries[0].firmware_a == "1.117.1-a-63" + assert entries[0].device_config_a == "device_config_rdma_1x400G/1.0.0" + + # Check second entry + assert entries[1].nic_id == "42424650-4c32-3533-3731-304535000000" + assert entries[1].pcie_bdf == "0000:16:00.0" + assert entries[1].cpld == "3.16 (primary)" + assert entries[1].boot0 == "21" + assert entries[1].uboot_a == "1.117.1-a-63" + assert entries[1].firmware_a == "1.117.1-a-63" + assert entries[1].device_config_a == "device_config_rdma_1x400G/1.0.0" + + +def test_parse_nicctl_show_version_firmware_empty_output(collector): + """Test parsing empty firmware version output.""" + entries = collector._parse_nicctl_version_firmware("") + assert len(entries) == 0 + + +def test_network_data_model_with_pensando_nic_rdma_statistics(): + """Test NetworkDataModel with Pensando NIC RDMA statistics.""" + from nodescraper.plugins.inband.network.networkdata import ( + NetworkDataModel, + PensandoNicRdmaStatistic, + PensandoNicRdmaStatistics, + ) + + data = NetworkDataModel( + pensando_nic_rdma_statistics=[ + PensandoNicRdmaStatistics( + nic_id="42424650-4c32-3533-3330-323934000000", + pcie_bdf="0000:06:00.0", + statistics=[ + PensandoNicRdmaStatistic(name="Queue pair create", count=1), + PensandoNicRdmaStatistic(name="Completion queue create", count=2), + ], + ) + ] + ) + + assert len(data.pensando_nic_rdma_statistics) == 1 + assert data.pensando_nic_rdma_statistics[0].nic_id == "42424650-4c32-3533-3330-323934000000" + assert len(data.pensando_nic_rdma_statistics[0].statistics) == 2 + + +def test_network_data_model_with_pensando_nic_version_host_software(): + """Test NetworkDataModel with Pensando NIC host software version.""" + from nodescraper.plugins.inband.network.networkdata import ( + NetworkDataModel, + PensandoNicVersionHostSoftware, + ) + + data = NetworkDataModel( + pensando_nic_version_host_software=PensandoNicVersionHostSoftware( + nicctl="1.117.1-a-63", + ipc_driver="1.117.1.a.63", + ionic_driver="25.08.4.004", + ) + ) + + assert data.pensando_nic_version_host_software is not None + assert data.pensando_nic_version_host_software.nicctl == "1.117.1-a-63" + assert data.pensando_nic_version_host_software.ipc_driver == "1.117.1.a.63" + assert data.pensando_nic_version_host_software.ionic_driver == "25.08.4.004" + + +def test_network_data_model_with_pensando_nic_version_firmware(): + """Test NetworkDataModel with Pensando NIC firmware versions.""" + from nodescraper.plugins.inband.network.networkdata import ( + NetworkDataModel, + PensandoNicVersionFirmware, + ) + + data = NetworkDataModel( + pensando_nic_version_firmware=[ + PensandoNicVersionFirmware( + nic_id="42424650-4c32-3533-3330-323934000000", + pcie_bdf="0000:06:00.0", + cpld="3.16 (primary)", + boot0="21", + uboot_a="1.117.1-a-63", + firmware_a="1.117.1-a-63", + device_config_a="device_config_rdma_1x400G/1.0.0", + ) + ] + ) + + assert len(data.pensando_nic_version_firmware) == 1 + assert data.pensando_nic_version_firmware[0].nic_id == "42424650-4c32-3533-3330-323934000000" + assert data.pensando_nic_version_firmware[0].cpld == "3.16 (primary)" From 4f59bc9f5e9bbf01105c101f16fb23ea38d76f5a Mon Sep 17 00:00:00 2001 From: Alexandra Bara Date: Wed, 7 Jan 2026 17:24:01 -0600 Subject: [PATCH 04/29] --data bug fix + utests added --- nodescraper/interfaces/dataplugin.py | 6 +- test/functional/test_run_plugins.py | 86 ++++++++++++++++++++++++++ test/unit/framework/test_dataplugin.py | 74 ++++++++++++++++++++++ 3 files changed, 163 insertions(+), 3 deletions(-) diff --git a/nodescraper/interfaces/dataplugin.py b/nodescraper/interfaces/dataplugin.py index 1c5d7a8e..da7a1320 100644 --- a/nodescraper/interfaces/dataplugin.py +++ b/nodescraper/interfaces/dataplugin.py @@ -252,6 +252,9 @@ def analyze( ) return self.analysis_result + if data: + self.data = data + if self.data is None: self.analysis_result = TaskResult( task=self.ANALYZER.__name__, @@ -261,9 +264,6 @@ def analyze( ) return self.analysis_result - if data: - self.data = data - analyzer_task = self.ANALYZER( self.system_info, logger=self.logger, diff --git a/test/functional/test_run_plugins.py b/test/functional/test_run_plugins.py index 0253784e..4f52d01f 100644 --- a/test/functional/test_run_plugins.py +++ b/test/functional/test_run_plugins.py @@ -25,6 +25,9 @@ ############################################################################### """Functional tests for running individual plugins.""" +import csv +from pathlib import Path + import pytest from nodescraper.pluginregistry import PluginRegistry @@ -114,3 +117,86 @@ def test_run_comma_separated_plugins_with_invalid(run_cli_command): assert "Running plugin AmdSmiPlugin" in output # Verify it didn't crash assert "Data written to csv file" in output + + +def test_run_plugin_with_data_file_no_collection(run_cli_command, tmp_path): + """Test running plugin with --data argument and --collection False.""" + collect_log_path = str(tmp_path / "collect_logs") + result = run_cli_command( + ["--log-path", collect_log_path, "run-plugins", "DmesgPlugin"], check=False + ) + + output = result.stdout + result.stderr + assert result.returncode in [0, 1, 2] + + dmesg_data_file = None + collect_path = Path(collect_log_path) + + for log_dir in collect_path.glob("*"): + dmesg_plugin_dir = log_dir / "dmesg_plugin" / "dmesg_collector" + if dmesg_plugin_dir.exists(): + for dmesg_file in dmesg_plugin_dir.glob("dmesg*.log"): + dmesg_data_file = str(dmesg_file) + break + + if not dmesg_data_file: + sample_dmesg_dir = tmp_path / "sample_data" + sample_dmesg_dir.mkdir(parents=True, exist_ok=True) + dmesg_data_file = str(sample_dmesg_dir / "dmesg.log") + + sample_content = """[ 0.000000] Linux version 5.15.0-generic (buildd@lcy02-amd64-001) +[ 0.001000] Command line: BOOT_IMAGE=/boot/vmlinuz root=UUID=test ro quiet splash +[ 1.234567] pci 0000:00:01.0: BAR 0: failed to assign [mem size 0x01000000] +[ 2.345678] WARNING: CPU: 0 PID: 1 at drivers/test/test.c:123 test_function+0x123/0x456 +[ 3.456789] AMD-Vi: Event logged [IO_PAGE_FAULT device=00:14.0 domain=0x0000] +[ 4.567890] normal system message +[ 5.678901] ACPI Error: Method parse/execution failed +[ 10.123456] System is operational +""" + with open(dmesg_data_file, "w", encoding="utf-8") as f: + f.write(sample_content) + + analyze_log_path = str(tmp_path / "analyze_logs") + result = run_cli_command( + [ + "--log-path", + analyze_log_path, + "run-plugins", + "DmesgPlugin", + "--data", + dmesg_data_file, + "--collection", + "False", + ], + check=False, + ) + + output = result.stdout + result.stderr + assert result.returncode in [0, 1, 2], f"Unexpected return code: {result.returncode}" + assert "Data collection not ran" in output or "collection" in output.lower() + assert "Data written to csv file" in output, "CSV file should be created" + + if "Plugin tasks not ran" in output: + pytest.fail( + "Bug regression: Plugin reported 'tasks not ran' with --data file. " + "Analysis should load data from --data parameter before checking if data is None." + ) + + analyze_path = Path(analyze_log_path) + csv_files = list(analyze_path.glob("*/nodescraper.csv")) + assert len(csv_files) > 0, "CSV results file should exist" + + csv_file = csv_files[0] + with open(csv_file, "r", encoding="utf-8") as f: + reader = csv.DictReader(f) + rows = list(reader) + + dmesg_rows = [row for row in rows if "DmesgPlugin" in row.get("Plugin", "")] + assert len(dmesg_rows) > 0, "DmesgPlugin should have results in CSV" + + dmesg_row = dmesg_rows[0] + status = dmesg_row.get("Status", "") + assert status != "NOT_RAN", ( + f"Bug regression: DmesgPlugin status is NOT_RAN with --data file. " + f"Analysis should have run on provided data. Status: {status}" + ) diff --git a/test/unit/framework/test_dataplugin.py b/test/unit/framework/test_dataplugin.py index 8a3b63b0..c6e5cb48 100644 --- a/test/unit/framework/test_dataplugin.py +++ b/test/unit/framework/test_dataplugin.py @@ -329,3 +329,77 @@ def test_collect_preserve_connection(self, plugin_with_conn): # Verify disconnect WAS called when preserve_connection=False mock_disconnect.assert_called_once() + + def test_run_with_data_file_no_collection(self, plugin_with_conn, tmp_path): + """Test running plugin with data file and collection=False.""" + data_file = tmp_path / "test_data.json" + data_file.write_text('{"value": "from_file"}') + + with ( + patch.object(CoreDataPlugin, "collect") as mock_collect, + patch.object(StandardAnalyzer, "analyze_data") as mock_analyze, + ): + mock_analyze.return_value = TaskResult(status=ExecutionStatus.OK) + + result = plugin_with_conn.run(collection=False, analysis=True, data=str(data_file)) + + mock_collect.assert_not_called() + mock_analyze.assert_called_once() + + call_args = mock_analyze.call_args + analyzed_data = call_args[0][0] + assert isinstance(analyzed_data, StandardDataModel) + assert analyzed_data.value == "from_file" + assert result.status == ExecutionStatus.OK + assert plugin_with_conn.analysis_result.status == ExecutionStatus.OK + + def test_run_with_data_dict_no_collection(self, plugin_with_conn): + """Test running plugin with data dict and collection=False.""" + data_dict = {"value": "from_dict"} + + with ( + patch.object(CoreDataPlugin, "collect") as mock_collect, + patch.object(StandardAnalyzer, "analyze_data") as mock_analyze, + ): + mock_analyze.return_value = TaskResult(status=ExecutionStatus.OK) + + result = plugin_with_conn.run(collection=False, analysis=True, data=data_dict) + + mock_collect.assert_not_called() + mock_analyze.assert_called_once() + + call_args = mock_analyze.call_args + analyzed_data = call_args[0][0] + assert isinstance(analyzed_data, StandardDataModel) + assert analyzed_data.value == "from_dict" + assert result.status == ExecutionStatus.OK + + def test_run_with_data_model_no_collection(self, plugin_with_conn): + """Test running plugin with data model instance and collection=False.""" + data_model = StandardDataModel(value="from_model") + + with ( + patch.object(CoreDataPlugin, "collect") as mock_collect, + patch.object(StandardAnalyzer, "analyze_data") as mock_analyze, + ): + mock_analyze.return_value = TaskResult(status=ExecutionStatus.OK) + + result = plugin_with_conn.run(collection=False, analysis=True, data=data_model) + + mock_collect.assert_not_called() + mock_analyze.assert_called_once() + + call_args = mock_analyze.call_args + analyzed_data = call_args[0][0] + assert analyzed_data is data_model + assert analyzed_data.value == "from_model" + assert result.status == ExecutionStatus.OK + + def test_analyze_no_data_available(self, plugin_with_conn): + """Test analyze returns NOT_RAN when no data is available.""" + plugin_with_conn._data = None + + result = plugin_with_conn.analyze() + + assert result.status == ExecutionStatus.NOT_RAN + assert "No data available" in result.message From 729cd3f96870dbcfe96d0bec8d21ed7b077c0d51 Mon Sep 17 00:00:00 2001 From: Alexandra Bara Date: Wed, 7 Jan 2026 18:05:33 -0600 Subject: [PATCH 05/29] addressed utest --- test/functional/fixtures/dmesg_sample.log | 43 ++++++++++++++++++++ test/functional/test_run_plugins.py | 48 +++++------------------ 2 files changed, 53 insertions(+), 38 deletions(-) create mode 100644 test/functional/fixtures/dmesg_sample.log diff --git a/test/functional/fixtures/dmesg_sample.log b/test/functional/fixtures/dmesg_sample.log new file mode 100644 index 00000000..181a3ff8 --- /dev/null +++ b/test/functional/fixtures/dmesg_sample.log @@ -0,0 +1,43 @@ +kern :info : 2026-01-07T10:00:00,123456-06:00 Linux version 5.15.0-91-generic (buildd@amd64-builder) (gcc version 11.4.0) #101-Ubuntu SMP +kern :info : 2026-01-07T10:00:00,234567-06:00 Command line: BOOT_IMAGE=/boot/vmlinuz-5.15.0-91-generic root=UUID=a1b2c3d4 ro quiet splash vt.handoff=7 +kern :info : 2026-01-07T10:00:01,345678-06:00 KERNEL supported cpus: +kern :info : 2026-01-07T10:00:01,456789-06:00 Intel GenuineIntel +kern :info : 2026-01-07T10:00:01,567890-06:00 AMD AuthenticAMD +kern :info : 2026-01-07T10:00:02,678901-06:00 x86/fpu: Supporting XSAVE feature 0x001: 'x87 floating point registers' +kern :info : 2026-01-07T10:00:02,789012-06:00 x86/fpu: Supporting XSAVE feature 0x002: 'SSE registers' +kern :info : 2026-01-07T10:00:03,890123-06:00 Memory: 32823616K/33554432K available +kern :warn : 2026-01-07T10:00:05,123456-06:00 pci 0000:00:01.0: BAR 0: failed to assign [mem size 0x01000000] +kern :info : 2026-01-07T10:00:06,234567-06:00 PCI: Using ACPI for IRQ routing +kern :info : 2026-01-07T10:00:07,345678-06:00 NetLabel: Initializing +kern :info : 2026-01-07T10:00:08,456789-06:00 DMA: preallocated 4096 KiB GFP_KERNEL pool for atomic allocations +kern :err : 2026-01-07T10:00:10,567890-06:00 WARNING: CPU: 0 PID: 1 at drivers/gpu/drm/amd/amdgpu/amdgpu_device.c:123 amdgpu_device_init+0x456/0x789 +kern :info : 2026-01-07T10:00:11,678901-06:00 Modules linked in: amdgpu drm_ttm_helper ttm drm_kms_helper +kern :info : 2026-01-07T10:00:12,789012-06:00 CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.15.0-91-generic #101-Ubuntu +kern :err : 2026-01-07T10:00:15,890123-06:00 AMD-Vi: Event logged [IO_PAGE_FAULT device=00:14.0 domain=0x0000 address=0xfffffffffffffef0 flags=0x0010] +kern :info : 2026-01-07T10:00:16,123456-06:00 SCSI subsystem initialized +kern :info : 2026-01-07T10:00:17,234567-06:00 libata version 3.00 loaded +kern :info : 2026-01-07T10:00:18,345678-06:00 ACPI: Added _OSI(Module Device) +kern :info : 2026-01-07T10:00:19,456789-06:00 ACPI: Added _OSI(Processor Device) +kern :err : 2026-01-07T10:00:20,567890-06:00 ACPI Error: Method parse/execution failed \_SB.PCI0.GPP0.SWUS.SWDS.VGA.LCD._BCM, AE_NOT_FOUND +kern :info : 2026-01-07T10:00:22,678901-06:00 [drm] amdgpu kernel modesetting enabled +kern :info : 2026-01-07T10:00:23,789012-06:00 [drm] initializing kernel modesetting (NAVI21 0x1002:0x73BF) +kern :info : 2026-01-07T10:00:25,890123-06:00 amdgpu 0000:03:00.0: amdgpu: Fetched VBIOS from VFCT +kern :info : 2026-01-07T10:00:26,123456-06:00 amdgpu 0000:03:00.0: amdgpu: ATOM BIOS: 113-D4120100-O04 +kern :info : 2026-01-07T10:00:28,234567-06:00 [drm] GPU posting now... +kern :warn : 2026-01-07T10:00:30,345678-06:00 [drm] *ERROR* Timeout waiting for DMCUB auto-load +kern :info : 2026-01-07T10:00:32,456789-06:00 [drm] Display Core initialized with v3.2.149! +kern :info : 2026-01-07T10:00:35,567890-06:00 [drm] VCN decode and encode initialized successfully +kern :info : 2026-01-07T10:00:38,678901-06:00 [drm] fb0: amdgpudrmfb frame buffer device +kern :info : 2026-01-07T10:00:40,789012-06:00 amdgpu 0000:03:00.0: amdgpu: ring gfx_0.0.0 uses VM inv eng 0 on hub 0 +kern :info : 2026-01-07T10:00:42,890123-06:00 [drm] Initialized amdgpu 3.42.0 20150101 for 0000:03:00.0 on minor 0 +kern :info : 2026-01-07T10:00:45,123456-06:00 EXT4-fs (nvme0n1p2): mounted filesystem with ordered data mode +kern :info : 2026-01-07T10:00:48,234567-06:00 systemd[1]: systemd 249.11-0ubuntu3.6 running in system mode +kern :info : 2026-01-07T10:00:50,345678-06:00 systemd[1]: Detected architecture x86-64 +kern :info : 2026-01-07T10:00:55,456789-06:00 audit: type=1400 audit(1704636055.456:2): apparmor="STATUS" operation="profile_load" +kern :info : 2026-01-07T10:01:00,567890-06:00 Adding 33554428k swap on /swapfile +kern :info : 2026-01-07T10:01:05,678901-06:00 IPv6: ADDRCONF(NETDEV_CHANGE): enp5s0: link becomes ready +kern :info : 2026-01-07T10:01:10,789012-06:00 NFSD: Using UMH upcall client tracking operations +kern :info : 2026-01-07T10:01:15,890123-06:00 NFSD: starting 90-second grace period (net f0000098) +kern :info : 2026-01-07T10:01:20,123456-06:00 Bluetooth: BNEP (Ethernet Emulation) ver 1.3 +kern :info : 2026-01-07T10:01:25,234567-06:00 Bluetooth: BNEP filters: protocol multicast +kern :info : 2026-01-07T10:01:30,345678-06:00 System operational - all services started successfully diff --git a/test/functional/test_run_plugins.py b/test/functional/test_run_plugins.py index 4f52d01f..2c2774d0 100644 --- a/test/functional/test_run_plugins.py +++ b/test/functional/test_run_plugins.py @@ -121,40 +121,10 @@ def test_run_comma_separated_plugins_with_invalid(run_cli_command): def test_run_plugin_with_data_file_no_collection(run_cli_command, tmp_path): """Test running plugin with --data argument and --collection False.""" - collect_log_path = str(tmp_path / "collect_logs") - result = run_cli_command( - ["--log-path", collect_log_path, "run-plugins", "DmesgPlugin"], check=False - ) - - output = result.stdout + result.stderr - assert result.returncode in [0, 1, 2] + fixtures_dir = Path(__file__).parent / "fixtures" + dmesg_fixture = fixtures_dir / "dmesg_sample.log" - dmesg_data_file = None - collect_path = Path(collect_log_path) - - for log_dir in collect_path.glob("*"): - dmesg_plugin_dir = log_dir / "dmesg_plugin" / "dmesg_collector" - if dmesg_plugin_dir.exists(): - for dmesg_file in dmesg_plugin_dir.glob("dmesg*.log"): - dmesg_data_file = str(dmesg_file) - break - - if not dmesg_data_file: - sample_dmesg_dir = tmp_path / "sample_data" - sample_dmesg_dir.mkdir(parents=True, exist_ok=True) - dmesg_data_file = str(sample_dmesg_dir / "dmesg.log") - - sample_content = """[ 0.000000] Linux version 5.15.0-generic (buildd@lcy02-amd64-001) -[ 0.001000] Command line: BOOT_IMAGE=/boot/vmlinuz root=UUID=test ro quiet splash -[ 1.234567] pci 0000:00:01.0: BAR 0: failed to assign [mem size 0x01000000] -[ 2.345678] WARNING: CPU: 0 PID: 1 at drivers/test/test.c:123 test_function+0x123/0x456 -[ 3.456789] AMD-Vi: Event logged [IO_PAGE_FAULT device=00:14.0 domain=0x0000] -[ 4.567890] normal system message -[ 5.678901] ACPI Error: Method parse/execution failed -[ 10.123456] System is operational -""" - with open(dmesg_data_file, "w", encoding="utf-8") as f: - f.write(sample_content) + assert dmesg_fixture.exists(), f"Fixture file not found: {dmesg_fixture}" analyze_log_path = str(tmp_path / "analyze_logs") result = run_cli_command( @@ -164,7 +134,7 @@ def test_run_plugin_with_data_file_no_collection(run_cli_command, tmp_path): "run-plugins", "DmesgPlugin", "--data", - dmesg_data_file, + str(dmesg_fixture), "--collection", "False", ], @@ -172,8 +142,10 @@ def test_run_plugin_with_data_file_no_collection(run_cli_command, tmp_path): ) output = result.stdout + result.stderr - assert result.returncode in [0, 1, 2], f"Unexpected return code: {result.returncode}" - assert "Data collection not ran" in output or "collection" in output.lower() + assert ( + result.returncode == 1 + ), f"Expected return code 1 (errors found), got: {result.returncode}" + assert "Running data analyzer: DmesgAnalyzer" in output, "Analyzer should have run" assert "Data written to csv file" in output, "CSV file should be created" if "Plugin tasks not ran" in output: @@ -191,11 +163,11 @@ def test_run_plugin_with_data_file_no_collection(run_cli_command, tmp_path): reader = csv.DictReader(f) rows = list(reader) - dmesg_rows = [row for row in rows if "DmesgPlugin" in row.get("Plugin", "")] + dmesg_rows = [row for row in rows if "DmesgPlugin" in row.get("plugin", "")] assert len(dmesg_rows) > 0, "DmesgPlugin should have results in CSV" dmesg_row = dmesg_rows[0] - status = dmesg_row.get("Status", "") + status = dmesg_row.get("status", "") assert status != "NOT_RAN", ( f"Bug regression: DmesgPlugin status is NOT_RAN with --data file. " f"Analysis should have run on provided data. Status: {status}" From f5d2b63cf29fad189981b29bfea945d6e58f6e58 Mon Sep 17 00:00:00 2001 From: Alexandra Bara Date: Thu, 8 Jan 2026 10:43:46 -0600 Subject: [PATCH 06/29] allowing ext_pkg to be defined anywhere --- nodescraper/cli/cli.py | 84 ++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 80 insertions(+), 4 deletions(-) diff --git a/nodescraper/cli/cli.py b/nodescraper/cli/cli.py index 81c18f19..152de6a2 100644 --- a/nodescraper/cli/cli.py +++ b/nodescraper/cli/cli.py @@ -30,6 +30,7 @@ import os import platform import sys +from importlib import import_module from typing import Optional import nodescraper @@ -54,12 +55,87 @@ from nodescraper.pluginexecutor import PluginExecutor from nodescraper.pluginregistry import PluginRegistry -try: - import ext_nodescraper_plugins as ext_pkg - extra_pkgs = [ext_pkg] -except ImportError: +def discover_external_plugins(): + """Discover ext_nodescraper_plugins from all installed packages. + + This function searches for ext_nodescraper_plugins in: + 1. Top-level ext_nodescraper_plugins package + 2. Any installed package that has an ext_nodescraper_plugins submodule + + Returns: + list: List of discovered plugin packages + """ extra_pkgs = [] + seen_paths = set() # Track paths to avoid duplicates + + # Try top-level ext_nodescraper_plugins first (original behavior) + try: + import ext_nodescraper_plugins as ext_pkg + extra_pkgs.append(ext_pkg) + if hasattr(ext_pkg, '__file__') and ext_pkg.__file__: + seen_paths.add(ext_pkg.__file__) + except ImportError: + pass + + # Discover ext_nodescraper_plugins from installed packages + try: + from importlib.metadata import distributions + + for dist in distributions(): + # Get package name and try different variations + pkg_name = dist.metadata.get('Name', '') + if not pkg_name: + continue + + # Try multiple name variations (with hyphens, underscores, and top-level module name) + name_variants = [ + pkg_name.replace('-', '_'), # amd-error-scraper -> amd_error_scraper + pkg_name.replace('_', '-'), # amd_error_scraper -> amd-error-scraper + ] + + # Try to find the actual top-level module name + try: + top_level = dist.read_text('top_level.txt') + if top_level: + name_variants.extend(top_level.strip().split('\n')) + except Exception: + pass + + # Try each variant + for variant in name_variants: + if not variant: + continue + + try: + module_path = f"{variant}.ext_nodescraper_plugins" + ext_pkg = import_module(module_path) + + # Check if we already have this package (by file path) + pkg_path = getattr(ext_pkg, '__file__', None) + if pkg_path and pkg_path in seen_paths: + continue + + # Add the package + extra_pkgs.append(ext_pkg) + if pkg_path: + seen_paths.add(pkg_path) + + # Found it, no need to try other variants + break + + except (ImportError, AttributeError, ModuleNotFoundError): + # This variant doesn't have ext_nodescraper_plugins, try next + continue + + except Exception: + # If discovery fails, just use what we found with top-level import + pass + + return extra_pkgs + + +extra_pkgs = discover_external_plugins() def build_parser( From 752365568996c68cd45eee5761e94f589b79dea3 Mon Sep 17 00:00:00 2001 From: Alexandra Bara Date: Thu, 8 Jan 2026 11:10:26 -0600 Subject: [PATCH 07/29] cleanup --- nodescraper/cli/cli.py | 16 ++-------------- 1 file changed, 2 insertions(+), 14 deletions(-) diff --git a/nodescraper/cli/cli.py b/nodescraper/cli/cli.py index 152de6a2..8caa1855 100644 --- a/nodescraper/cli/cli.py +++ b/nodescraper/cli/cli.py @@ -59,17 +59,12 @@ def discover_external_plugins(): """Discover ext_nodescraper_plugins from all installed packages. - This function searches for ext_nodescraper_plugins in: - 1. Top-level ext_nodescraper_plugins package - 2. Any installed package that has an ext_nodescraper_plugins submodule - Returns: list: List of discovered plugin packages """ extra_pkgs = [] seen_paths = set() # Track paths to avoid duplicates - # Try top-level ext_nodescraper_plugins first (original behavior) try: import ext_nodescraper_plugins as ext_pkg extra_pkgs.append(ext_pkg) @@ -83,18 +78,15 @@ def discover_external_plugins(): from importlib.metadata import distributions for dist in distributions(): - # Get package name and try different variations pkg_name = dist.metadata.get('Name', '') if not pkg_name: continue - # Try multiple name variations (with hyphens, underscores, and top-level module name) name_variants = [ - pkg_name.replace('-', '_'), # amd-error-scraper -> amd_error_scraper - pkg_name.replace('_', '-'), # amd_error_scraper -> amd-error-scraper + pkg_name.replace('-', '_'), + pkg_name.replace('_', '-'), ] - # Try to find the actual top-level module name try: top_level = dist.read_text('top_level.txt') if top_level: @@ -102,7 +94,6 @@ def discover_external_plugins(): except Exception: pass - # Try each variant for variant in name_variants: if not variant: continue @@ -121,15 +112,12 @@ def discover_external_plugins(): if pkg_path: seen_paths.add(pkg_path) - # Found it, no need to try other variants break except (ImportError, AttributeError, ModuleNotFoundError): - # This variant doesn't have ext_nodescraper_plugins, try next continue except Exception: - # If discovery fails, just use what we found with top-level import pass return extra_pkgs From 8e0b9c344ded1ad988ec602481a00c58fb0d6d5d Mon Sep 17 00:00:00 2001 From: jaspals Date: Thu, 8 Jan 2026 17:23:36 +0000 Subject: [PATCH 08/29] addressed review comments --- .../inband/network/network_collector.py | 165 ++++++++++-------- test/unit/plugin/test_network_collector.py | 8 +- 2 files changed, 102 insertions(+), 71 deletions(-) diff --git a/nodescraper/plugins/inband/network/network_collector.py b/nodescraper/plugins/inband/network/network_collector.py index 4289d601..94a48026 100644 --- a/nodescraper/plugins/inband/network/network_collector.py +++ b/nodescraper/plugins/inband/network/network_collector.py @@ -1539,8 +1539,13 @@ def _collect_pensando_nic_info( version_firmware_entries = [] collected_count = 0 + # Track which commands succeeded and which failed + collected_commands = [] + uncollected_commands = [] + # Parse nicctl show card output - res_card = self._run_sut_cmd("nicctl show card", sudo=True) + cmd = "nicctl show card" + res_card = self._run_sut_cmd(cmd, sudo=True) if res_card.exit_code == 0: cards = self._parse_nicctl_card(res_card.stdout) self._log_event( @@ -1549,9 +1554,13 @@ def _collect_pensando_nic_info( priority=EventPriority.INFO, ) collected_count += 1 + collected_commands.append(cmd) + else: + uncollected_commands.append(cmd) # Parse nicctl show dcqcn output - res_dcqcn = self._run_sut_cmd("nicctl show dcqcn", sudo=True) + cmd = "nicctl show dcqcn" + res_dcqcn = self._run_sut_cmd(cmd, sudo=True) if res_dcqcn.exit_code == 0: dcqcn_entries = self._parse_nicctl_dcqcn(res_dcqcn.stdout) self._log_event( @@ -1560,9 +1569,13 @@ def _collect_pensando_nic_info( priority=EventPriority.INFO, ) collected_count += 1 + collected_commands.append(cmd) + else: + uncollected_commands.append(cmd) # Parse nicctl show environment output - res_environment = self._run_sut_cmd("nicctl show environment", sudo=True) + cmd = "nicctl show environment" + res_environment = self._run_sut_cmd(cmd, sudo=True) if res_environment.exit_code == 0: environment_entries = self._parse_nicctl_environment(res_environment.stdout) self._log_event( @@ -1571,9 +1584,13 @@ def _collect_pensando_nic_info( priority=EventPriority.INFO, ) collected_count += 1 + collected_commands.append(cmd) + else: + uncollected_commands.append(cmd) # Parse nicctl show pcie ats output - res_pcie_ats = self._run_sut_cmd("nicctl show pcie ats", sudo=True) + cmd = "nicctl show pcie ats" + res_pcie_ats = self._run_sut_cmd(cmd, sudo=True) if res_pcie_ats.exit_code == 0: pcie_ats_entries = self._parse_nicctl_pcie_ats(res_pcie_ats.stdout) self._log_event( @@ -1582,9 +1599,13 @@ def _collect_pensando_nic_info( priority=EventPriority.INFO, ) collected_count += 1 + collected_commands.append(cmd) + else: + uncollected_commands.append(cmd) # Parse nicctl show port output - res_port = self._run_sut_cmd("nicctl show port", sudo=True) + cmd = "nicctl show port" + res_port = self._run_sut_cmd(cmd, sudo=True) if res_port.exit_code == 0: port_entries = self._parse_nicctl_port(res_port.stdout) self._log_event( @@ -1593,9 +1614,13 @@ def _collect_pensando_nic_info( priority=EventPriority.INFO, ) collected_count += 1 + collected_commands.append(cmd) + else: + uncollected_commands.append(cmd) # Parse nicctl show qos output - res_qos = self._run_sut_cmd("nicctl show qos", sudo=True) + cmd = "nicctl show qos" + res_qos = self._run_sut_cmd(cmd, sudo=True) if res_qos.exit_code == 0: qos_entries = self._parse_nicctl_qos(res_qos.stdout) self._log_event( @@ -1604,9 +1629,13 @@ def _collect_pensando_nic_info( priority=EventPriority.INFO, ) collected_count += 1 + collected_commands.append(cmd) + else: + uncollected_commands.append(cmd) # Parse nicctl show rdma statistics output - res_rdma_stats = self._run_sut_cmd("nicctl show rdma statistics", sudo=True) + cmd = "nicctl show rdma statistics" + res_rdma_stats = self._run_sut_cmd(cmd, sudo=True) if res_rdma_stats.exit_code == 0: rdma_statistics_entries = self._parse_nicctl_rdma_statistics(res_rdma_stats.stdout) self._log_event( @@ -1615,9 +1644,13 @@ def _collect_pensando_nic_info( priority=EventPriority.INFO, ) collected_count += 1 + collected_commands.append(cmd) + else: + uncollected_commands.append(cmd) # Parse nicctl show version host-software output - res_version_host = self._run_sut_cmd("nicctl show version host-software", sudo=True) + cmd = "nicctl show version host-software" + res_version_host = self._run_sut_cmd(cmd, sudo=True) if res_version_host.exit_code == 0: version_host_software = self._parse_nicctl_version_host_software( res_version_host.stdout @@ -1629,9 +1662,15 @@ def _collect_pensando_nic_info( priority=EventPriority.INFO, ) collected_count += 1 + collected_commands.append(cmd) + else: + uncollected_commands.append(cmd) + else: + uncollected_commands.append(cmd) # Parse nicctl show version firmware output - res_version_firmware = self._run_sut_cmd("nicctl show version firmware", sudo=True) + cmd = "nicctl show version firmware" + res_version_firmware = self._run_sut_cmd(cmd, sudo=True) if res_version_firmware.exit_code == 0: version_firmware_entries = self._parse_nicctl_version_firmware( res_version_firmware.stdout @@ -1642,33 +1681,26 @@ def _collect_pensando_nic_info( priority=EventPriority.INFO, ) collected_count += 1 + collected_commands.append(cmd) + else: + uncollected_commands.append(cmd) - # Collect other nicctl information (raw data) - for cmd in self.CMD_NICCTL_COMMANDS: - if cmd in [ - "nicctl show card", - "nicctl show dcqcn", - "nicctl show environment", - "nicctl show pcie ats", - "nicctl show port", - "nicctl show qos", - "nicctl show rdma statistics", - "nicctl show version host-software", - "nicctl show version firmware", - ]: - # Already collected and parsed above - continue - res = self._run_sut_cmd(cmd, sudo=True) - if res.exit_code == 0: - collected_count += 1 - - if collected_count > 0: + # Log summary of collected and uncollected commands + if collected_commands: self._log_event( category=EventCategory.NETWORK, - description=f"Collected Pensando NIC information ({collected_count} commands)", + description=f"Successfully collected {len(collected_commands)} nicctl commands: {', '.join(collected_commands)}", priority=EventPriority.INFO, ) - else: + + if uncollected_commands: + self._log_event( + category=EventCategory.NETWORK, + description=f"Failed to collect {len(uncollected_commands)} nicctl commands: {', '.join(uncollected_commands)}", + priority=EventPriority.WARNING, + ) + + if not collected_commands and not uncollected_commands: self._log_event( category=EventCategory.NETWORK, description="Pensando NIC collection failed or nicctl not available", @@ -1811,41 +1843,36 @@ def collect_data( pensando_version_firmware, ) = self._collect_pensando_nic_info() - if interfaces or routes or rules or neighbors or broadcom_devices or pensando_cards: - network_data = NetworkDataModel( - interfaces=interfaces, - routes=routes, - rules=rules, - neighbors=neighbors, - ethtool_info=ethtool_data, - broadcom_nic_devices=broadcom_devices, - broadcom_nic_qos=broadcom_qos_data, - pensando_nic_cards=pensando_cards, - pensando_nic_dcqcn=pensando_dcqcn, - pensando_nic_environment=pensando_environment, - pensando_nic_pcie_ats=pensando_pcie_ats, - pensando_nic_ports=pensando_ports, - pensando_nic_qos=pensando_qos, - pensando_nic_rdma_statistics=pensando_rdma_statistics, - pensando_nic_version_host_software=pensando_version_host_software, - pensando_nic_version_firmware=pensando_version_firmware, - ) - self.result.message = ( - f"Collected network data: {len(interfaces)} interfaces, " - f"{len(routes)} routes, {len(rules)} rules, {len(neighbors)} neighbors, " - f"{len(ethtool_data)} ethtool entries, {len(broadcom_devices)} Broadcom NICs, " - f"{len(pensando_cards)} Pensando NICs, {len(pensando_dcqcn)} Pensando DCQCN entries, " - f"{len(pensando_environment)} Pensando environment entries, " - f"{len(pensando_pcie_ats)} Pensando PCIe ATS entries, " - f"{len(pensando_ports)} Pensando ports, " - f"{len(pensando_qos)} Pensando QoS entries, " - f"{len(pensando_rdma_statistics)} Pensando RDMA statistics, " - f"Pensando host software version: {'Yes' if pensando_version_host_software else 'No'}, " - f"{len(pensando_version_firmware)} Pensando firmware versions" - ) - self.result.status = ExecutionStatus.OK - return self.result, network_data - else: - self.result.message = "Failed to collect network data" - self.result.status = ExecutionStatus.ERROR - return self.result, None + network_data = NetworkDataModel( + interfaces=interfaces, + routes=routes, + rules=rules, + neighbors=neighbors, + ethtool_info=ethtool_data, + broadcom_nic_devices=broadcom_devices, + broadcom_nic_qos=broadcom_qos_data, + pensando_nic_cards=pensando_cards, + pensando_nic_dcqcn=pensando_dcqcn, + pensando_nic_environment=pensando_environment, + pensando_nic_pcie_ats=pensando_pcie_ats, + pensando_nic_ports=pensando_ports, + pensando_nic_qos=pensando_qos, + pensando_nic_rdma_statistics=pensando_rdma_statistics, + pensando_nic_version_host_software=pensando_version_host_software, + pensando_nic_version_firmware=pensando_version_firmware, + ) + self.result.message = ( + f"Collected network data: {len(interfaces)} interfaces, " + f"{len(routes)} routes, {len(rules)} rules, {len(neighbors)} neighbors, " + f"{len(ethtool_data)} ethtool entries, {len(broadcom_devices)} Broadcom NICs, " + f"{len(pensando_cards)} Pensando NICs, {len(pensando_dcqcn)} Pensando DCQCN entries, " + f"{len(pensando_environment)} Pensando environment entries, " + f"{len(pensando_pcie_ats)} Pensando PCIe ATS entries, " + f"{len(pensando_ports)} Pensando ports, " + f"{len(pensando_qos)} Pensando QoS entries, " + f"{len(pensando_rdma_statistics)} Pensando RDMA statistics, " + f"Pensando host software version: {'Yes' if pensando_version_host_software else 'No'}, " + f"{len(pensando_version_firmware)} Pensando firmware versions" + ) + self.result.status = ExecutionStatus.OK + return self.result, network_data diff --git a/test/unit/plugin/test_network_collector.py b/test/unit/plugin/test_network_collector.py index 068b8868..ba5a151d 100644 --- a/test/unit/plugin/test_network_collector.py +++ b/test/unit/plugin/test_network_collector.py @@ -397,8 +397,12 @@ def run_sut_cmd_side_effect(cmd, **kwargs): result, data = collector.collect_data() - assert result.status == ExecutionStatus.ERROR - assert data is None + assert result.status == ExecutionStatus.OK + assert data is not None + assert len(data.interfaces) == 0 + assert len(data.routes) == 0 + assert len(data.rules) == 0 + assert len(data.neighbors) == 0 assert len(result.events) > 0 From 91b16854ad9df2fcb382fa9f95a4836b88418539 Mon Sep 17 00:00:00 2001 From: Alexandra Bara Date: Thu, 8 Jan 2026 13:59:04 -0600 Subject: [PATCH 09/29] added utest --- test/unit/framework/test_cli.py | 228 +++++++++++++++++++++++++++++++- 1 file changed, 227 insertions(+), 1 deletion(-) diff --git a/test/unit/framework/test_cli.py b/test/unit/framework/test_cli.py index cd266ed9..095a852b 100644 --- a/test/unit/framework/test_cli.py +++ b/test/unit/framework/test_cli.py @@ -25,13 +25,21 @@ ############################################################################### import argparse import os +import sys +import tempfile +import types +from pathlib import Path +from unittest import mock import pytest from pydantic import BaseModel +from nodescraper.base import InBandDataPlugin from nodescraper.cli import cli, inputargtypes -from nodescraper.enums import SystemLocation +from nodescraper.enums import ExecutionStatus, SystemLocation +from nodescraper.interfaces import DataAnalyzer from nodescraper.models import SystemInfo +from nodescraper.pluginregistry import PluginRegistry def test_log_path_arg(): @@ -150,3 +158,221 @@ def test_system_info_builder(): ) def test_process_args(raw_arg_input, plugin_names, exp_output): assert cli.process_args(raw_arg_input, plugin_names) == exp_output + + +def test_discover_external_plugins_top_level(): + """Test discovering ext_nodescraper_plugins as a top-level import.""" + mock_ext_pkg = mock.MagicMock() + mock_ext_pkg.__file__ = "/path/to/ext_nodescraper_plugins/__init__.py" + + with mock.patch("nodescraper.cli.cli.import_module"): + with mock.patch.dict("sys.modules", {"ext_nodescraper_plugins": mock_ext_pkg}): + result = cli.discover_external_plugins() + + assert len(result) >= 1 + assert mock_ext_pkg in result + + +def test_discover_external_plugins_no_plugins(): + """Test when no external plugins are installed.""" + with mock.patch("nodescraper.cli.cli.import_module") as mock_import: + mock_import.side_effect = ImportError("No module named 'ext_nodescraper_plugins'") + + with mock.patch("importlib.metadata.distributions", return_value=[]): + result = cli.discover_external_plugins() + + assert result == [] + + +def test_discover_external_plugins_from_installed_package(): + """Test discovering plugins from installed packages (not top-level).""" + mock_dist = mock.MagicMock() + mock_dist.metadata.get.return_value = "amd-custom-package" + mock_dist.read_text.return_value = "custompackage" + + mock_plugin = mock.MagicMock() + mock_plugin.__file__ = "/path/to/custompackage/ext_nodescraper_plugins/__init__.py" + + def mock_import_func(module_path): + if module_path == "custompackage.ext_nodescraper_plugins": + return mock_plugin + raise ImportError(f"No module named '{module_path}'") + + with mock.patch("nodescraper.cli.cli.import_module", side_effect=mock_import_func): + with mock.patch("importlib.metadata.distributions", return_value=[mock_dist]): + result = cli.discover_external_plugins() + + assert mock_plugin in result + + +def test_discover_external_plugins_deduplication(): + """Test that duplicate plugins are not added multiple times.""" + mock_ext_pkg = mock.MagicMock() + mock_ext_pkg.__file__ = "/path/to/ext_nodescraper_plugins/__init__.py" + + mock_dist1 = mock.MagicMock() + mock_dist1.metadata.get.return_value = "package-one" + mock_dist1.read_text.return_value = "package_one" + + mock_dist2 = mock.MagicMock() + mock_dist2.metadata.get.return_value = "package-two" + mock_dist2.read_text.return_value = "package_one" + + def mock_import_func(module_path): + if "package_one.ext_nodescraper_plugins" in module_path: + return mock_ext_pkg + raise ImportError(f"No module named '{module_path}'") + + with mock.patch("nodescraper.cli.cli.import_module", side_effect=mock_import_func): + with mock.patch("importlib.metadata.distributions", return_value=[mock_dist1, mock_dist2]): + result = cli.discover_external_plugins() + + file_paths = [pkg.__file__ for pkg in result if hasattr(pkg, "__file__")] + assert file_paths.count(mock_ext_pkg.__file__) == 1 + + +def test_discover_external_plugins_name_variants(): + """Test that different package name variants are tried (hyphens vs underscores).""" + mock_dist = mock.MagicMock() + mock_dist.metadata.get.return_value = "amd-error-scraper" + mock_dist.read_text.side_effect = Exception("No top_level.txt") + + mock_plugin = mock.MagicMock() + mock_plugin.__file__ = "/path/to/amd_error_scraper/ext_nodescraper_plugins/__init__.py" + + call_count = {"count": 0} + + def mock_import_func(module_path): + call_count["count"] += 1 + if module_path == "amd_error_scraper.ext_nodescraper_plugins": + return mock_plugin + raise ImportError(f"No module named '{module_path}'") + + with mock.patch("nodescraper.cli.cli.import_module", side_effect=mock_import_func): + with mock.patch("importlib.metadata.distributions", return_value=[mock_dist]): + result = cli.discover_external_plugins() + + assert mock_plugin in result + assert call_count["count"] >= 1 + + +def test_discover_external_plugins_handles_exceptions(): + """Test that discovery continues even if some packages fail.""" + mock_dist1 = mock.MagicMock() + mock_dist1.metadata.get.return_value = "good-package" + mock_dist1.read_text.return_value = "goodpackage" + + mock_dist2 = mock.MagicMock() + mock_dist2.metadata.get.side_effect = Exception("Corrupted metadata") + + mock_plugin = mock.MagicMock() + mock_plugin.__file__ = "/path/to/goodpackage/ext_nodescraper_plugins/__init__.py" + + def mock_import_func(module_path): + if module_path == "goodpackage.ext_nodescraper_plugins": + return mock_plugin + raise ImportError(f"No module named '{module_path}'") + + with mock.patch("nodescraper.cli.cli.import_module", side_effect=mock_import_func): + with mock.patch("importlib.metadata.distributions", return_value=[mock_dist1, mock_dist2]): + result = cli.discover_external_plugins() + + assert mock_plugin in result + + +def test_external_plugins_integration(): + """Integration test: Create a temporary external plugin and verify it's picked up.""" + with tempfile.TemporaryDirectory() as tmpdir: + pkg_dir = Path(tmpdir) / "test_external_pkg" + pkg_dir.mkdir() + (pkg_dir / "__init__.py").write_text("# Test external package\n") + + ext_plugins_dir = pkg_dir / "ext_nodescraper_plugins" + ext_plugins_dir.mkdir() + (ext_plugins_dir / "__init__.py").write_text("# External plugins package\n") + + plugin_module_dir = ext_plugins_dir / "test_plugin" + plugin_module_dir.mkdir() + + plugin_code = ''' +"""Test external plugin module""" +from nodescraper.base import InBandDataPlugin +from nodescraper.enums import ExecutionStatus +from nodescraper.interfaces import DataAnalyzer + +class TestAnalyzer(DataAnalyzer): + DATA_MODEL = dict + + def analyze_data(self, data): + return ExecutionStatus.SUCCESS, None + +class TestExternalPlugin(InBandDataPlugin): + DATA_MODEL = dict + ANALYZER = TestAnalyzer + + def run(self): + return ExecutionStatus.SUCCESS, {"test": "data"} +''' + (plugin_module_dir / "__init__.py").write_text(plugin_code) + + sys.path.insert(0, tmpdir) + + try: + import test_external_pkg.ext_nodescraper_plugins as test_ext_pkg + + plugin_registry = PluginRegistry(plugin_pkg=[test_ext_pkg]) + + assert ( + "TestExternalPlugin" in plugin_registry.plugins + ), f"External plugin not found. Available plugins: {list(plugin_registry.plugins.keys())}" + + plugin_class = plugin_registry.plugins["TestExternalPlugin"] + assert plugin_class.__name__ == "TestExternalPlugin" + + finally: + sys.path.remove(tmpdir) + modules_to_remove = [ + key for key in sys.modules.keys() if key.startswith("test_external_pkg") + ] + for module in modules_to_remove: + del sys.modules[module] + + +def test_discover_and_load_external_plugins(): + """Test the full flow: discover external plugins using mocked modules.""" + mock_plugin_module = types.ModuleType("mock_ext_nodescraper_plugins") + mock_plugin_module.__file__ = "/fake/path/mock_ext_nodescraper_plugins/__init__.py" + mock_plugin_module.__path__ = ["/fake/path/mock_ext_nodescraper_plugins"] + + mock_submodule = types.ModuleType("mock_ext_nodescraper_plugins.mock_plugin") + mock_submodule.__file__ = "/fake/path/mock_ext_nodescraper_plugins/mock_plugin.py" + + class MockAnalyzer(DataAnalyzer): + DATA_MODEL = dict + + def analyze_data(self, data): + return ExecutionStatus.SUCCESS, None + + class MockExternalPlugin(InBandDataPlugin): + DATA_MODEL = dict + ANALYZER = MockAnalyzer + + def run(self): + return ExecutionStatus.SUCCESS, {} + + mock_submodule.MockExternalPlugin = MockExternalPlugin + + def mock_iter_modules(path, prefix=""): + yield None, f"{prefix}mock_plugin", False + + def mock_import_module(name): + if "mock_plugin" in name: + return mock_submodule + raise ImportError(f"No module named {name}") + + with mock.patch("pkgutil.iter_modules", side_effect=mock_iter_modules): + with mock.patch("importlib.import_module", side_effect=mock_import_module): + plugin_registry = PluginRegistry(plugin_pkg=[mock_plugin_module]) + + assert "MockExternalPlugin" in plugin_registry.plugins + assert plugin_registry.plugins["MockExternalPlugin"] == MockExternalPlugin From 4142cf1b40587d7f550158f022246617c8b1147e Mon Sep 17 00:00:00 2001 From: Alexandra Bara Date: Thu, 8 Jan 2026 14:14:16 -0600 Subject: [PATCH 10/29] cleanup --- test/unit/framework/test_cli.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/test/unit/framework/test_cli.py b/test/unit/framework/test_cli.py index 095a852b..49cd8990 100644 --- a/test/unit/framework/test_cli.py +++ b/test/unit/framework/test_cli.py @@ -285,17 +285,16 @@ def test_external_plugins_integration(): with tempfile.TemporaryDirectory() as tmpdir: pkg_dir = Path(tmpdir) / "test_external_pkg" pkg_dir.mkdir() - (pkg_dir / "__init__.py").write_text("# Test external package\n") + (pkg_dir / "__init__.py").write_text("") ext_plugins_dir = pkg_dir / "ext_nodescraper_plugins" ext_plugins_dir.mkdir() - (ext_plugins_dir / "__init__.py").write_text("# External plugins package\n") + (ext_plugins_dir / "__init__.py").write_text("") plugin_module_dir = ext_plugins_dir / "test_plugin" plugin_module_dir.mkdir() - plugin_code = ''' -"""Test external plugin module""" + plugin_code = """ from nodescraper.base import InBandDataPlugin from nodescraper.enums import ExecutionStatus from nodescraper.interfaces import DataAnalyzer @@ -312,7 +311,7 @@ class TestExternalPlugin(InBandDataPlugin): def run(self): return ExecutionStatus.SUCCESS, {"test": "data"} -''' +""" (plugin_module_dir / "__init__.py").write_text(plugin_code) sys.path.insert(0, tmpdir) From fb9cef1cc55887cfc1cd696638545d32adc6ba88 Mon Sep 17 00:00:00 2001 From: Alexandra Bara Date: Thu, 8 Jan 2026 14:42:41 -0600 Subject: [PATCH 11/29] formatting --- nodescraper/cli/cli.py | 43 +++++++++++++++++++++--------------------- 1 file changed, 22 insertions(+), 21 deletions(-) diff --git a/nodescraper/cli/cli.py b/nodescraper/cli/cli.py index 8caa1855..072703ca 100644 --- a/nodescraper/cli/cli.py +++ b/nodescraper/cli/cli.py @@ -58,68 +58,69 @@ def discover_external_plugins(): """Discover ext_nodescraper_plugins from all installed packages. - + Returns: list: List of discovered plugin packages """ extra_pkgs = [] seen_paths = set() # Track paths to avoid duplicates - + try: import ext_nodescraper_plugins as ext_pkg + extra_pkgs.append(ext_pkg) - if hasattr(ext_pkg, '__file__') and ext_pkg.__file__: + if hasattr(ext_pkg, "__file__") and ext_pkg.__file__: seen_paths.add(ext_pkg.__file__) except ImportError: pass - + # Discover ext_nodescraper_plugins from installed packages try: from importlib.metadata import distributions - + for dist in distributions(): - pkg_name = dist.metadata.get('Name', '') + pkg_name = dist.metadata.get("Name", "") if not pkg_name: continue - + name_variants = [ - pkg_name.replace('-', '_'), - pkg_name.replace('_', '-'), + pkg_name.replace("-", "_"), + pkg_name.replace("_", "-"), ] - + try: - top_level = dist.read_text('top_level.txt') + top_level = dist.read_text("top_level.txt") if top_level: - name_variants.extend(top_level.strip().split('\n')) + name_variants.extend(top_level.strip().split("\n")) except Exception: pass - + for variant in name_variants: if not variant: continue - + try: module_path = f"{variant}.ext_nodescraper_plugins" ext_pkg = import_module(module_path) - + # Check if we already have this package (by file path) - pkg_path = getattr(ext_pkg, '__file__', None) + pkg_path = getattr(ext_pkg, "__file__", None) if pkg_path and pkg_path in seen_paths: continue - + # Add the package extra_pkgs.append(ext_pkg) if pkg_path: seen_paths.add(pkg_path) - + break - + except (ImportError, AttributeError, ModuleNotFoundError): continue - + except Exception: pass - + return extra_pkgs From 32cbe2a087f5bca64940b4fe5de3f2b88b1f2359 Mon Sep 17 00:00:00 2001 From: jaspals Date: Thu, 8 Jan 2026 20:58:02 +0000 Subject: [PATCH 12/29] fabrics plugin --- .../plugins/inband/fabrics/__init__.py | 28 + .../inband/fabrics/fabrics_collector.py | 635 ++++++++++++++++++ .../plugins/inband/fabrics/fabrics_plugin.py | 37 + .../plugins/inband/fabrics/fabricsdata.py | 140 ++++ 4 files changed, 840 insertions(+) create mode 100644 nodescraper/plugins/inband/fabrics/__init__.py create mode 100644 nodescraper/plugins/inband/fabrics/fabrics_collector.py create mode 100644 nodescraper/plugins/inband/fabrics/fabrics_plugin.py create mode 100644 nodescraper/plugins/inband/fabrics/fabricsdata.py diff --git a/nodescraper/plugins/inband/fabrics/__init__.py b/nodescraper/plugins/inband/fabrics/__init__.py new file mode 100644 index 00000000..9edfbcda --- /dev/null +++ b/nodescraper/plugins/inband/fabrics/__init__.py @@ -0,0 +1,28 @@ +############################################################################### +# +# MIT License +# +# Copyright (c) 2025 Advanced Micro Devices, Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +# +############################################################################### +from .fabrics_plugin import FabricsPlugin + +__all__ = ["FabricsPlugin"] diff --git a/nodescraper/plugins/inband/fabrics/fabrics_collector.py b/nodescraper/plugins/inband/fabrics/fabrics_collector.py new file mode 100644 index 00000000..c0cdac31 --- /dev/null +++ b/nodescraper/plugins/inband/fabrics/fabrics_collector.py @@ -0,0 +1,635 @@ +############################################################################### +# +# MIT License +# +# Copyright (c) 2025 Advanced Micro Devices, Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +# +############################################################################### +import re +from typing import Dict, List, Optional, Tuple + +from nodescraper.base import InBandDataCollector +from nodescraper.enums import EventCategory, EventPriority, ExecutionStatus +from nodescraper.models import TaskResult + +from .fabricsdata import ( + FabricsDataModel, + IbdevNetdevMapping, + IbstatDevice, + IbvDeviceInfo, + MstDevice, + MstStatus, + OfedInfo, + RdmaDevice, + RdmaInfo, + RdmaLink, +) + + +class FabricsCollector(InBandDataCollector[FabricsDataModel, None]): + """Collect InfiniBand/RDMA fabrics configuration details""" + + DATA_MODEL = FabricsDataModel + CMD_IBSTAT = "ibstat" + CMD_IBV_DEVINFO = "ibv_devinfo" + CMD_IBDEV2NETDEV = "ibdev2netdev -v" + CMD_OFED_INFO = "/usr/bin/ofed_info -s" + CMD_MST_START = "mst start" + CMD_MST_STATUS = "mst status -v" + CMD_RDMA_DEV = "rdma dev" + CMD_RDMA_LINK = "rdma link" + + def _parse_ibstat(self, output: str) -> List[IbstatDevice]: + """Parse 'ibstat' output into IbstatDevice objects. + + Args: + output: Raw output from 'ibstat' command + + Returns: + List of IbstatDevice objects + """ + devices = [] + current_device = None + current_port = None + current_port_attrs: Dict[str, str] = {} + + for line in output.splitlines(): + line_stripped = line.strip() + + # CA name line (e.g., "CA 'mlx5_0'") + if line.startswith("CA "): + # Save previous device if exists + if current_device: + devices.append(current_device) + + # Extract CA name + match = re.search(r"CA\s+'([^']+)'", line) + if match: + ca_name = match.group(1) + current_device = IbstatDevice(ca_name=ca_name, raw_output=output) + current_port = None + current_port_attrs = {} + + # Port line (e.g., "Port 1:") + elif line.startswith("Port ") and ":" in line: + # Save previous port if exists + if current_device and current_port is not None: + current_device.ports[current_port] = current_port_attrs + + # Extract port number + match = re.search(r"Port\s+(\d+):", line) + if match: + current_port = int(match.group(1)) + current_port_attrs = {} + + # Attribute lines (indented with key: value format) + elif ":" in line_stripped and current_device: + parts = line_stripped.split(":", 1) + if len(parts) == 2: + key = parts[0].strip() + value = parts[1].strip() + + # Store port-specific attributes + if current_port is not None: + current_port_attrs[key] = value + else: + # Store device-level attributes + if key == "CA type": + current_device.ca_type = value + elif key == "Number of ports": + try: + current_device.number_of_ports = int(value) + except ValueError: + pass + elif key == "Firmware version": + current_device.firmware_version = value + elif key == "Hardware version": + current_device.hardware_version = value + elif key == "Node GUID": + current_device.node_guid = value + elif key == "System image GUID": + current_device.system_image_guid = value + + # Save last device and port + if current_device: + if current_port is not None: + current_device.ports[current_port] = current_port_attrs + devices.append(current_device) + + return devices + + def _parse_ibv_devinfo(self, output: str) -> List[IbvDeviceInfo]: + """Parse 'ibv_devinfo' output into IbvDeviceInfo objects. + + Args: + output: Raw output from 'ibv_devinfo' command + + Returns: + List of IbvDeviceInfo objects + """ + devices = [] + current_device = None + current_port = None + current_port_attrs: Dict[str, str] = {} + + for line in output.splitlines(): + line_stripped = line.strip() + + # Device header (e.g., "hca_id: mlx5_0") + if line.startswith("hca_id:"): + # Save previous device if exists + if current_device: + devices.append(current_device) + + parts = line.split(":", 1) + if len(parts) == 2: + device_name = parts[1].strip() + current_device = IbvDeviceInfo(device=device_name, raw_output=output) + current_port = None + current_port_attrs = {} + + # Port line (e.g., "port: 1") + elif line_stripped.startswith("port:") and current_device: + # Save previous port if exists + if current_port is not None: + current_device.ports[current_port] = current_port_attrs + + parts = line_stripped.split(":", 1) + if len(parts) == 2: + try: + current_port = int(parts[1].strip()) + current_port_attrs = {} + except ValueError: + pass + + # Attribute lines (with key: value format) + elif ":" in line_stripped and current_device: + parts = line_stripped.split(":", 1) + if len(parts) == 2: + key = parts[0].strip() + value = parts[1].strip() + + # Store port-specific attributes + if current_port is not None: + current_port_attrs[key] = value + else: + # Store device-level attributes + if key == "node_guid": + current_device.node_guid = value + elif key == "sys_image_guid": + current_device.sys_image_guid = value + elif key == "vendor_id": + current_device.vendor_id = value + elif key == "vendor_part_id": + current_device.vendor_part_id = value + elif key == "hw_ver": + current_device.hw_ver = value + elif key == "fw_ver": + current_device.fw_ver = value + elif key == "node_type": + current_device.node_type = value + elif key == "transport_type" or key == "transport": + current_device.transport_type = value + + # Save last device and port + if current_device: + if current_port is not None: + current_device.ports[current_port] = current_port_attrs + devices.append(current_device) + + return devices + + def _parse_ibdev2netdev(self, output: str) -> List[IbdevNetdevMapping]: + """Parse 'ibdev2netdev -v' output into IbdevNetdevMapping objects. + + Args: + output: Raw output from 'ibdev2netdev -v' command + + Returns: + List of IbdevNetdevMapping objects + """ + mappings = [] + + for line in output.splitlines(): + line = line.strip() + if not line: + continue + + # Example format: mlx5_0 port 1 ==> ib0 (Up) + # Example format: mlx5_1 port 1 ==> N/A (Down) + match = re.match(r"(\S+)\s+port\s+(\d+)\s+==>\s+(\S+)\s+\(([^)]+)\)", line) + if match: + ib_device = match.group(1) + port = int(match.group(2)) + netdev = match.group(3) if match.group(3) != "N/A" else None + state = match.group(4) + + mapping = IbdevNetdevMapping( + ib_device=ib_device, port=port, netdev=netdev, state=state + ) + mappings.append(mapping) + + return mappings + + def _parse_ofed_info(self, output: str) -> OfedInfo: + """Parse '/usr/bin/ofed_info -s' output into OfedInfo object. + + Args: + output: Raw output from 'ofed_info -s' command + + Returns: + OfedInfo object + """ + version = None + + # The output is typically just a version string + output_stripped = output.strip() + if output_stripped: + version = output_stripped + + return OfedInfo(version=version, raw_output=output) + + def _parse_mst_status(self, output: str) -> MstStatus: + """Parse 'sudo mst status -v' output into MstStatus object. + + Args: + output: Raw output from 'mst status -v' command + + Returns: + MstStatus object + """ + mst_status = MstStatus(raw_output=output) + devices = [] + + # Check if MST is started + if "MST modules:" in output or "MST devices:" in output: + mst_status.mst_started = True + + for line in output.splitlines(): + line = line.strip() + if not line: + continue + + # Look for device lines (e.g., "/dev/mst/mt4123_pciconf0") + if line.startswith("/dev/mst/"): + parts = line.split() + if parts: + device_path = parts[0] + device = MstDevice(device=device_path) + + # Try to parse additional fields + for part in parts[1:]: + if "=" in part: + key, value = part.split("=", 1) + if key == "rdma": + device.rdma_device = value + elif key == "net": + device.net_device = value + elif ":" in value and "." in value: + # Looks like a PCI address + device.pci_address = value + else: + device.attributes[key] = value + elif re.match(r"[0-9a-f]{4}:[0-9a-f]{2}:[0-9a-f]{2}\.[0-9]", part): + # PCI address format + device.pci_address = part + + devices.append(device) + + mst_status.devices = devices + return mst_status + + def _parse_rdma_dev(self, output: str) -> List[RdmaDevice]: + """Parse 'rdma dev' output into RdmaDevice objects. + + Args: + output: Raw output from 'rdma dev' command + + Returns: + List of RdmaDevice objects + """ + devices = [] + + for line in output.splitlines(): + line = line.strip() + if not line: + continue + + # Example format: 0: mlx5_0: node_type ca fw 16.28.2006 node_guid 0c42:a103:00b3:bfa0 sys_image_guid 0c42:a103:00b3:bfa0 + parts = line.split() + if len(parts) < 2: + continue + + # First part might be index followed by colon + device_name = None + start_idx = 0 + + if parts[0].endswith(":"): + # Skip index + start_idx = 1 + + if start_idx < len(parts): + device_name = parts[start_idx].rstrip(":") + start_idx += 1 + + if not device_name: + continue + + device = RdmaDevice(device=device_name) + + # Parse remaining attributes + i = start_idx + while i < len(parts): + if parts[i] == "node_type" and i + 1 < len(parts): + device.node_type = parts[i + 1] + i += 2 + elif parts[i] == "fw" and i + 1 < len(parts): + device.attributes["fw_version"] = parts[i + 1] + i += 2 + elif parts[i] == "node_guid" and i + 1 < len(parts): + device.node_guid = parts[i + 1] + i += 2 + elif parts[i] == "sys_image_guid" and i + 1 < len(parts): + device.sys_image_guid = parts[i + 1] + i += 2 + elif parts[i] == "state" and i + 1 < len(parts): + device.state = parts[i + 1] + i += 2 + else: + # Store as generic attribute + if i + 1 < len(parts) and not parts[i + 1].startswith("-"): + device.attributes[parts[i]] = parts[i + 1] + i += 2 + else: + i += 1 + + devices.append(device) + + return devices + + def _parse_rdma_link(self, output: str) -> List[RdmaLink]: + """Parse 'rdma link' output into RdmaLink objects. + + Args: + output: Raw output from 'rdma link' command + + Returns: + List of RdmaLink objects + """ + links = [] + + for line in output.splitlines(): + line = line.strip() + if not line: + continue + + # Example format: link mlx5_0/1 state ACTIVE physical_state LINK_UP netdev ib0 + # Example format: 0/1: mlx5_0/1: state ACTIVE physical_state LINK_UP + match = re.search(r"(\S+)/(\d+)", line) + if not match: + continue + + device_name = match.group(1) + port = int(match.group(2)) + + link = RdmaLink(device=device_name, port=port) + + # Parse remaining attributes + parts = line.split() + i = 0 + while i < len(parts): + if parts[i] == "state" and i + 1 < len(parts): + link.state = parts[i + 1] + i += 2 + elif parts[i] == "physical_state" and i + 1 < len(parts): + link.physical_state = parts[i + 1] + i += 2 + elif parts[i] == "netdev" and i + 1 < len(parts): + link.netdev = parts[i + 1] + i += 2 + else: + # Store as generic attribute if it's a key-value pair + if i + 1 < len(parts) and not parts[i + 1].startswith("-"): + link.attributes[parts[i]] = parts[i + 1] + i += 2 + else: + i += 1 + + links.append(link) + + return links + + def collect_data( + self, + args=None, + ) -> Tuple[TaskResult, Optional[FabricsDataModel]]: + """Collect InfiniBand/RDMA fabrics configuration from the system. + + Returns: + Tuple[TaskResult, Optional[FabricsDataModel]]: tuple containing the task result + and an instance of FabricsDataModel or None if collection failed. + """ + ibstat_devices = [] + ibv_devices = [] + ibdev_netdev_mappings = [] + ofed_info = None + mst_status = None + rdma_info = None + + # Collect ibstat information + res_ibstat = self._run_sut_cmd(self.CMD_IBSTAT) + if res_ibstat.exit_code == 0: + ibstat_devices = self._parse_ibstat(res_ibstat.stdout) + self._log_event( + category=EventCategory.NETWORK, + description=f"Collected {len(ibstat_devices)} IB devices from ibstat", + priority=EventPriority.INFO, + ) + else: + self._log_event( + category=EventCategory.NETWORK, + description="Error collecting ibstat information", + data={"command": res_ibstat.command, "exit_code": res_ibstat.exit_code}, + priority=EventPriority.WARNING, + ) + + # Collect ibv_devinfo information + res_ibv = self._run_sut_cmd(self.CMD_IBV_DEVINFO) + if res_ibv.exit_code == 0: + ibv_devices = self._parse_ibv_devinfo(res_ibv.stdout) + self._log_event( + category=EventCategory.NETWORK, + description=f"Collected {len(ibv_devices)} IB devices from ibv_devinfo", + priority=EventPriority.INFO, + ) + else: + self._log_event( + category=EventCategory.NETWORK, + description="Error collecting ibv_devinfo information", + data={"command": res_ibv.command, "exit_code": res_ibv.exit_code}, + priority=EventPriority.WARNING, + ) + + # Collect ibdev2netdev mappings + res_ibdev2netdev = self._run_sut_cmd(self.CMD_IBDEV2NETDEV) + if res_ibdev2netdev.exit_code == 0: + ibdev_netdev_mappings = self._parse_ibdev2netdev(res_ibdev2netdev.stdout) + self._log_event( + category=EventCategory.NETWORK, + description=f"Collected {len(ibdev_netdev_mappings)} IB to netdev mappings", + priority=EventPriority.INFO, + ) + else: + self._log_event( + category=EventCategory.NETWORK, + description="Error collecting ibdev2netdev mappings", + data={ + "command": res_ibdev2netdev.command, + "exit_code": res_ibdev2netdev.exit_code, + }, + priority=EventPriority.WARNING, + ) + + # Collect OFED version info + res_ofed = self._run_sut_cmd(self.CMD_OFED_INFO) + if res_ofed.exit_code == 0: + ofed_info = self._parse_ofed_info(res_ofed.stdout) + self._log_event( + category=EventCategory.NETWORK, + description=f"Collected OFED version: {ofed_info.version}", + priority=EventPriority.INFO, + ) + else: + self._log_event( + category=EventCategory.NETWORK, + description="Error collecting OFED info", + data={"command": res_ofed.command, "exit_code": res_ofed.exit_code}, + priority=EventPriority.WARNING, + ) + + # Start MST and collect status + # First start MST + res_mst_start = self._run_sut_cmd(self.CMD_MST_START, sudo=True) + if res_mst_start.exit_code == 0: + self._log_event( + category=EventCategory.NETWORK, + description="MST service started successfully", + priority=EventPriority.INFO, + ) + else: + self._log_event( + category=EventCategory.NETWORK, + description="Error starting MST service (might already be running)", + data={"command": res_mst_start.command, "exit_code": res_mst_start.exit_code}, + priority=EventPriority.WARNING, + ) + + # Get MST status + res_mst_status = self._run_sut_cmd(self.CMD_MST_STATUS, sudo=True) + if res_mst_status.exit_code == 0: + mst_status = self._parse_mst_status(res_mst_status.stdout) + self._log_event( + category=EventCategory.NETWORK, + description=f"Collected MST status: {len(mst_status.devices)} devices", + priority=EventPriority.INFO, + ) + else: + self._log_event( + category=EventCategory.NETWORK, + description="Error collecting MST status", + data={"command": res_mst_status.command, "exit_code": res_mst_status.exit_code}, + priority=EventPriority.WARNING, + ) + + # Collect RDMA device information + rdma_devices = [] + res_rdma_dev = self._run_sut_cmd(self.CMD_RDMA_DEV) + if res_rdma_dev.exit_code == 0: + rdma_devices = self._parse_rdma_dev(res_rdma_dev.stdout) + self._log_event( + category=EventCategory.NETWORK, + description=f"Collected {len(rdma_devices)} RDMA devices", + priority=EventPriority.INFO, + ) + else: + self._log_event( + category=EventCategory.NETWORK, + description="Error collecting RDMA device information", + data={"command": res_rdma_dev.command, "exit_code": res_rdma_dev.exit_code}, + priority=EventPriority.WARNING, + ) + + # Collect RDMA link information + rdma_links = [] + res_rdma_link = self._run_sut_cmd(self.CMD_RDMA_LINK) + if res_rdma_link.exit_code == 0: + rdma_links = self._parse_rdma_link(res_rdma_link.stdout) + self._log_event( + category=EventCategory.NETWORK, + description=f"Collected {len(rdma_links)} RDMA links", + priority=EventPriority.INFO, + ) + else: + self._log_event( + category=EventCategory.NETWORK, + description="Error collecting RDMA link information", + data={"command": res_rdma_link.command, "exit_code": res_rdma_link.exit_code}, + priority=EventPriority.WARNING, + ) + + # Combine RDMA information + if rdma_devices or rdma_links: + rdma_info = RdmaInfo( + devices=rdma_devices, + links=rdma_links, + raw_output=res_rdma_dev.stdout + "\n" + res_rdma_link.stdout, + ) + + # Build the data model if we collected any data + if ( + ibstat_devices + or ibv_devices + or ibdev_netdev_mappings + or ofed_info + or mst_status + or rdma_info + ): + fabrics_data = FabricsDataModel( + ibstat_devices=ibstat_devices, + ibv_devices=ibv_devices, + ibdev_netdev_mappings=ibdev_netdev_mappings, + ofed_info=ofed_info, + mst_status=mst_status, + rdma_info=rdma_info, + ) + self.result.message = ( + f"Collected fabrics data: {len(ibstat_devices)} ibstat devices, " + f"{len(ibv_devices)} ibv devices, {len(ibdev_netdev_mappings)} mappings, " + f"OFED: {ofed_info.version if ofed_info else 'N/A'}, " + f"MST devices: {len(mst_status.devices) if mst_status else 0}, " + f"RDMA devices: {len(rdma_info.devices) if rdma_info else 0}" + ) + self.result.status = ExecutionStatus.OK + return self.result, fabrics_data + else: + self.result.message = "Failed to collect fabrics data" + self.result.status = ExecutionStatus.ERROR + return self.result, None diff --git a/nodescraper/plugins/inband/fabrics/fabrics_plugin.py b/nodescraper/plugins/inband/fabrics/fabrics_plugin.py new file mode 100644 index 00000000..6c51b9f6 --- /dev/null +++ b/nodescraper/plugins/inband/fabrics/fabrics_plugin.py @@ -0,0 +1,37 @@ +############################################################################### +# +# MIT License +# +# Copyright (c) 2025 Advanced Micro Devices, Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +# +############################################################################### +from nodescraper.base import InBandDataPlugin + +from .fabrics_collector import FabricsCollector +from .fabricsdata import FabricsDataModel + + +class FabricsPlugin(InBandDataPlugin[FabricsDataModel, None, None]): + """Plugin for collection of InfiniBand/RDMA fabrics configuration data""" + + DATA_MODEL = FabricsDataModel + + COLLECTOR = FabricsCollector diff --git a/nodescraper/plugins/inband/fabrics/fabricsdata.py b/nodescraper/plugins/inband/fabrics/fabricsdata.py new file mode 100644 index 00000000..01061b3c --- /dev/null +++ b/nodescraper/plugins/inband/fabrics/fabricsdata.py @@ -0,0 +1,140 @@ +############################################################################### +# +# MIT License +# +# Copyright (c) 2025 Advanced Micro Devices, Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +# +############################################################################### +from typing import Dict, List, Optional + +from pydantic import BaseModel, Field + +from nodescraper.models import DataModel + + +class IbstatDevice(BaseModel): + """InfiniBand device information from ibstat""" + + ca_name: Optional[str] = None # CA name (e.g., "mlx5_0") + ca_type: Optional[str] = None # CA type + number_of_ports: Optional[int] = None # Number of physical ports + firmware_version: Optional[str] = None # Firmware version + hardware_version: Optional[str] = None # Hardware version + node_guid: Optional[str] = None # Node GUID + system_image_guid: Optional[str] = None # System image GUID + ports: Dict[int, Dict[str, str]] = Field(default_factory=dict) # Port number -> port attributes + raw_output: str = "" # Raw command output + + +class IbvDeviceInfo(BaseModel): + """InfiniBand verbs device information from ibv_devinfo""" + + device: Optional[str] = None # Device name (e.g., "mlx5_0") + node_guid: Optional[str] = None # Node GUID + sys_image_guid: Optional[str] = None # System image GUID + vendor_id: Optional[str] = None # Vendor ID + vendor_part_id: Optional[str] = None # Vendor part ID + hw_ver: Optional[str] = None # Hardware version + fw_ver: Optional[str] = None # Firmware version + node_type: Optional[str] = None # Node type + transport_type: Optional[str] = None # Transport type (e.g., "InfiniBand", "Ethernet") + ports: Dict[int, Dict[str, str]] = Field(default_factory=dict) # Port number -> port attributes + raw_output: str = "" # Raw command output + + +class IbdevNetdevMapping(BaseModel): + """Mapping between IB device and network interface""" + + ib_device: str # InfiniBand device name (e.g., "mlx5_0") + port: int # Port number + netdev: Optional[str] = None # Network device name (e.g., "ib0", "eth0") + state: Optional[str] = None # Port state (e.g., "Up", "Down") + pkey: Optional[str] = None # Partition key + guid: Optional[str] = None # GUID + + +class OfedInfo(BaseModel): + """OFED version and information""" + + version: Optional[str] = None # OFED version + raw_output: str = "" # Raw command output + + +class MstDevice(BaseModel): + """Mellanox Software Tools device information""" + + device: str # Device path (e.g., "/dev/mst/mt4123_pciconf0") + pci_address: Optional[str] = None # PCI address + rdma_device: Optional[str] = None # RDMA device name + net_device: Optional[str] = None # Network device name + attributes: Dict[str, str] = Field(default_factory=dict) # Additional attributes + + +class MstStatus(BaseModel): + """Mellanox Software Tools status""" + + mst_started: bool = False # Whether MST service is started + devices: List[MstDevice] = Field(default_factory=list) # List of MST devices + raw_output: str = "" # Raw command output + + +class RdmaDevice(BaseModel): + """RDMA device information from rdma command""" + + device: str # Device name (e.g., "mlx5_0") + node_type: Optional[str] = None # Node type + transport: Optional[str] = None # Transport type + node_guid: Optional[str] = None # Node GUID + sys_image_guid: Optional[str] = None # System image GUID + state: Optional[str] = None # Device state + attributes: Dict[str, str] = Field(default_factory=dict) # Additional attributes + + +class RdmaLink(BaseModel): + """RDMA link information""" + + device: str # Device name + port: int # Port number + state: Optional[str] = None # Link state + physical_state: Optional[str] = None # Physical state + netdev: Optional[str] = None # Associated network device + attributes: Dict[str, str] = Field(default_factory=dict) # Additional attributes + + +class RdmaInfo(BaseModel): + """Complete RDMA information from rdma command""" + + devices: List[RdmaDevice] = Field(default_factory=list) # RDMA devices + links: List[RdmaLink] = Field(default_factory=list) # RDMA links + raw_output: str = "" # Raw command output + + +class FabricsDataModel(DataModel): + """Complete InfiniBand/RDMA fabrics configuration data""" + + ibstat_devices: List[IbstatDevice] = Field(default_factory=list) # ibstat output + ibv_devices: List[IbvDeviceInfo] = Field(default_factory=list) # ibv_devinfo output + ibdev_netdev_mappings: List[IbdevNetdevMapping] = Field( + default_factory=list + ) # ibdev2netdev output + ofed_info: Optional[OfedInfo] = None # OFED version info + mst_status: Optional[MstStatus] = None # MST status + rdma_info: Optional[RdmaInfo] = None # RDMA information From 55920b2ebf40cad0932ab26edb27c196724c5eba Mon Sep 17 00:00:00 2001 From: Alexandra Bara Date: Thu, 8 Jan 2026 15:06:23 -0600 Subject: [PATCH 13/29] Relax paramiko requirement to >=3.2.0,<4.0.0 - Changed from paramiko~=3.5.1 to paramiko>=3.2.0,<4.0.0 - Allows broader compatibility with downstream dependencies - Uses only basic paramiko features stable since 3.2.0 --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 358d9cf2..7b0c0f35 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -12,7 +12,7 @@ classifiers = ["Topic :: Software Development"] dependencies = [ "pydantic>=2.8.2", - "paramiko~=3.5.1", + "paramiko>=3.2.0,<4.0.0", "requests", "pytz" ] From 2d13b1795e57d3cb4ca7dc5939c96279427dcb35 Mon Sep 17 00:00:00 2001 From: jaspals Date: Thu, 8 Jan 2026 21:13:50 +0000 Subject: [PATCH 14/29] class var fix --- .../inband/network/network_collector.py | 85 ++++++++----------- 1 file changed, 37 insertions(+), 48 deletions(-) diff --git a/nodescraper/plugins/inband/network/network_collector.py b/nodescraper/plugins/inband/network/network_collector.py index 94a48026..21f571da 100644 --- a/nodescraper/plugins/inband/network/network_collector.py +++ b/nodescraper/plugins/inband/network/network_collector.py @@ -74,17 +74,15 @@ class NetworkCollector(InBandDataCollector[NetworkDataModel, None]): CMD_NICCLI_GETQOS_TEMPLATE = "niccli --dev {device_num} qos --ets --show" # Pensando NIC commands - CMD_NICCTL_COMMANDS = [ - "nicctl show card", - "nicctl show dcqcn", - "nicctl show environment", - "nicctl show pcie ats", - "nicctl show port", - "nicctl show qos", - "nicctl show rdma statistics", - "nicctl show version host-software", - "nicctl show version firmware", - ] + CMD_NICCTL_CARD = "nicctl show card" + CMD_NICCTL_DCQCN = "nicctl show dcqcn" + CMD_NICCTL_ENVIRONMENT = "nicctl show environment" + CMD_NICCTL_PCIE_ATS = "nicctl show pcie ats" + CMD_NICCTL_PORT = "nicctl show port" + CMD_NICCTL_QOS = "nicctl show qos" + CMD_NICCTL_RDMA_STATISTICS = "nicctl show rdma statistics" + CMD_NICCTL_VERSION_HOST_SOFTWARE = "nicctl show version host-software" + CMD_NICCTL_VERSION_FIRMWARE = "nicctl show version firmware" def _parse_ip_addr(self, output: str) -> List[NetworkInterface]: """Parse 'ip addr show' output into NetworkInterface objects. @@ -1544,8 +1542,7 @@ def _collect_pensando_nic_info( uncollected_commands = [] # Parse nicctl show card output - cmd = "nicctl show card" - res_card = self._run_sut_cmd(cmd, sudo=True) + res_card = self._run_sut_cmd(self.CMD_NICCTL_CARD, sudo=True) if res_card.exit_code == 0: cards = self._parse_nicctl_card(res_card.stdout) self._log_event( @@ -1554,13 +1551,12 @@ def _collect_pensando_nic_info( priority=EventPriority.INFO, ) collected_count += 1 - collected_commands.append(cmd) + collected_commands.append(self.CMD_NICCTL_CARD) else: - uncollected_commands.append(cmd) + uncollected_commands.append(self.CMD_NICCTL_CARD) # Parse nicctl show dcqcn output - cmd = "nicctl show dcqcn" - res_dcqcn = self._run_sut_cmd(cmd, sudo=True) + res_dcqcn = self._run_sut_cmd(self.CMD_NICCTL_DCQCN, sudo=True) if res_dcqcn.exit_code == 0: dcqcn_entries = self._parse_nicctl_dcqcn(res_dcqcn.stdout) self._log_event( @@ -1569,13 +1565,12 @@ def _collect_pensando_nic_info( priority=EventPriority.INFO, ) collected_count += 1 - collected_commands.append(cmd) + collected_commands.append(self.CMD_NICCTL_DCQCN) else: - uncollected_commands.append(cmd) + uncollected_commands.append(self.CMD_NICCTL_DCQCN) # Parse nicctl show environment output - cmd = "nicctl show environment" - res_environment = self._run_sut_cmd(cmd, sudo=True) + res_environment = self._run_sut_cmd(self.CMD_NICCTL_ENVIRONMENT, sudo=True) if res_environment.exit_code == 0: environment_entries = self._parse_nicctl_environment(res_environment.stdout) self._log_event( @@ -1584,13 +1579,12 @@ def _collect_pensando_nic_info( priority=EventPriority.INFO, ) collected_count += 1 - collected_commands.append(cmd) + collected_commands.append(self.CMD_NICCTL_ENVIRONMENT) else: - uncollected_commands.append(cmd) + uncollected_commands.append(self.CMD_NICCTL_ENVIRONMENT) # Parse nicctl show pcie ats output - cmd = "nicctl show pcie ats" - res_pcie_ats = self._run_sut_cmd(cmd, sudo=True) + res_pcie_ats = self._run_sut_cmd(self.CMD_NICCTL_PCIE_ATS, sudo=True) if res_pcie_ats.exit_code == 0: pcie_ats_entries = self._parse_nicctl_pcie_ats(res_pcie_ats.stdout) self._log_event( @@ -1599,13 +1593,12 @@ def _collect_pensando_nic_info( priority=EventPriority.INFO, ) collected_count += 1 - collected_commands.append(cmd) + collected_commands.append(self.CMD_NICCTL_PCIE_ATS) else: - uncollected_commands.append(cmd) + uncollected_commands.append(self.CMD_NICCTL_PCIE_ATS) # Parse nicctl show port output - cmd = "nicctl show port" - res_port = self._run_sut_cmd(cmd, sudo=True) + res_port = self._run_sut_cmd(self.CMD_NICCTL_PORT, sudo=True) if res_port.exit_code == 0: port_entries = self._parse_nicctl_port(res_port.stdout) self._log_event( @@ -1614,13 +1607,12 @@ def _collect_pensando_nic_info( priority=EventPriority.INFO, ) collected_count += 1 - collected_commands.append(cmd) + collected_commands.append(self.CMD_NICCTL_PORT) else: - uncollected_commands.append(cmd) + uncollected_commands.append(self.CMD_NICCTL_PORT) # Parse nicctl show qos output - cmd = "nicctl show qos" - res_qos = self._run_sut_cmd(cmd, sudo=True) + res_qos = self._run_sut_cmd(self.CMD_NICCTL_QOS, sudo=True) if res_qos.exit_code == 0: qos_entries = self._parse_nicctl_qos(res_qos.stdout) self._log_event( @@ -1629,13 +1621,12 @@ def _collect_pensando_nic_info( priority=EventPriority.INFO, ) collected_count += 1 - collected_commands.append(cmd) + collected_commands.append(self.CMD_NICCTL_QOS) else: - uncollected_commands.append(cmd) + uncollected_commands.append(self.CMD_NICCTL_QOS) # Parse nicctl show rdma statistics output - cmd = "nicctl show rdma statistics" - res_rdma_stats = self._run_sut_cmd(cmd, sudo=True) + res_rdma_stats = self._run_sut_cmd(self.CMD_NICCTL_RDMA_STATISTICS, sudo=True) if res_rdma_stats.exit_code == 0: rdma_statistics_entries = self._parse_nicctl_rdma_statistics(res_rdma_stats.stdout) self._log_event( @@ -1644,13 +1635,12 @@ def _collect_pensando_nic_info( priority=EventPriority.INFO, ) collected_count += 1 - collected_commands.append(cmd) + collected_commands.append(self.CMD_NICCTL_RDMA_STATISTICS) else: - uncollected_commands.append(cmd) + uncollected_commands.append(self.CMD_NICCTL_RDMA_STATISTICS) # Parse nicctl show version host-software output - cmd = "nicctl show version host-software" - res_version_host = self._run_sut_cmd(cmd, sudo=True) + res_version_host = self._run_sut_cmd(self.CMD_NICCTL_VERSION_HOST_SOFTWARE, sudo=True) if res_version_host.exit_code == 0: version_host_software = self._parse_nicctl_version_host_software( res_version_host.stdout @@ -1662,15 +1652,14 @@ def _collect_pensando_nic_info( priority=EventPriority.INFO, ) collected_count += 1 - collected_commands.append(cmd) + collected_commands.append(self.CMD_NICCTL_VERSION_HOST_SOFTWARE) else: - uncollected_commands.append(cmd) + uncollected_commands.append(self.CMD_NICCTL_VERSION_HOST_SOFTWARE) else: - uncollected_commands.append(cmd) + uncollected_commands.append(self.CMD_NICCTL_VERSION_HOST_SOFTWARE) # Parse nicctl show version firmware output - cmd = "nicctl show version firmware" - res_version_firmware = self._run_sut_cmd(cmd, sudo=True) + res_version_firmware = self._run_sut_cmd(self.CMD_NICCTL_VERSION_FIRMWARE, sudo=True) if res_version_firmware.exit_code == 0: version_firmware_entries = self._parse_nicctl_version_firmware( res_version_firmware.stdout @@ -1681,9 +1670,9 @@ def _collect_pensando_nic_info( priority=EventPriority.INFO, ) collected_count += 1 - collected_commands.append(cmd) + collected_commands.append(self.CMD_NICCTL_VERSION_FIRMWARE) else: - uncollected_commands.append(cmd) + uncollected_commands.append(self.CMD_NICCTL_VERSION_FIRMWARE) # Log summary of collected and uncollected commands if collected_commands: From 4493e17776524075fe0d93efd6d70c856dc32c9c Mon Sep 17 00:00:00 2001 From: Alexandra Bara Date: Fri, 9 Jan 2026 11:05:41 -0600 Subject: [PATCH 15/29] urllib3 version downgrade --- pyproject.toml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 7b0c0f35..831d83ad 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -14,7 +14,8 @@ dependencies = [ "pydantic>=2.8.2", "paramiko>=3.2.0,<4.0.0", "requests", - "pytz" + "pytz", + "urllib3>=1.26.15,<2.0.0" ] [project.optional-dependencies] From f409344b7796ba1166841e65e34068d695926b9b Mon Sep 17 00:00:00 2001 From: Alexandra Bara Date: Fri, 9 Jan 2026 11:49:22 -0600 Subject: [PATCH 16/29] When node-scraper runs as an entry point script, Python adds venv/bin (or venv\Scripts on Windows) to sys.path[0] This breaks importlib.metadata.distributions() from discovering editable installs which results in xternal plugins from packages like amd-error-scraper weren't being found --- nodescraper/cli/cli.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/nodescraper/cli/cli.py b/nodescraper/cli/cli.py index 072703ca..ff9eddd6 100644 --- a/nodescraper/cli/cli.py +++ b/nodescraper/cli/cli.py @@ -124,8 +124,16 @@ def discover_external_plugins(): return extra_pkgs +# Fix sys.path[0] if it's the venv/bin directory to avoid breaking editable install discovery +_original_syspath0 = sys.path[0] +if _original_syspath0.endswith("/bin") or _original_syspath0.endswith("\\Scripts"): + sys.path[0] = "" + extra_pkgs = discover_external_plugins() +# Restore original sys.path[0] +sys.path[0] = _original_syspath0 + def build_parser( plugin_reg: PluginRegistry, From 43d4584eba30d30834956e8196bc2cb351e3bbee Mon Sep 17 00:00:00 2001 From: jaspals Date: Mon, 12 Jan 2026 17:52:50 +0000 Subject: [PATCH 17/29] fixed output logging --- .../inband/network/network_collector.py | 75 +++++-------------- 1 file changed, 18 insertions(+), 57 deletions(-) diff --git a/nodescraper/plugins/inband/network/network_collector.py b/nodescraper/plugins/inband/network/network_collector.py index 21f571da..1dac9ac4 100644 --- a/nodescraper/plugins/inband/network/network_collector.py +++ b/nodescraper/plugins/inband/network/network_collector.py @@ -1515,6 +1515,7 @@ def _collect_pensando_nic_info( List[PensandoNicRdmaStatistics], Optional[PensandoNicVersionHostSoftware], List[PensandoNicVersionFirmware], + List[str], ]: """Collect Pensando NIC information using nicctl commands. @@ -1524,7 +1525,8 @@ def _collect_pensando_nic_info( list of PensandoNicPort, list of PensandoNicQos, list of PensandoNicRdmaStatistics, PensandoNicVersionHostSoftware object, - list of PensandoNicVersionFirmware) + list of PensandoNicVersionFirmware, + list of uncollected command names) """ cards = [] dcqcn_entries = [] @@ -1535,10 +1537,8 @@ def _collect_pensando_nic_info( rdma_statistics_entries = [] version_host_software = None version_firmware_entries = [] - collected_count = 0 - # Track which commands succeeded and which failed - collected_commands = [] + # Track which commands failed uncollected_commands = [] # Parse nicctl show card output @@ -1550,8 +1550,6 @@ def _collect_pensando_nic_info( description=f"Collected Pensando NIC card list: {len(cards)} cards", priority=EventPriority.INFO, ) - collected_count += 1 - collected_commands.append(self.CMD_NICCTL_CARD) else: uncollected_commands.append(self.CMD_NICCTL_CARD) @@ -1564,8 +1562,6 @@ def _collect_pensando_nic_info( description=f"Collected Pensando NIC DCQCN info: {len(dcqcn_entries)} entries", priority=EventPriority.INFO, ) - collected_count += 1 - collected_commands.append(self.CMD_NICCTL_DCQCN) else: uncollected_commands.append(self.CMD_NICCTL_DCQCN) @@ -1578,8 +1574,6 @@ def _collect_pensando_nic_info( description=f"Collected Pensando NIC environment info: {len(environment_entries)} entries", priority=EventPriority.INFO, ) - collected_count += 1 - collected_commands.append(self.CMD_NICCTL_ENVIRONMENT) else: uncollected_commands.append(self.CMD_NICCTL_ENVIRONMENT) @@ -1592,8 +1586,6 @@ def _collect_pensando_nic_info( description=f"Collected Pensando NIC PCIe ATS info: {len(pcie_ats_entries)} entries", priority=EventPriority.INFO, ) - collected_count += 1 - collected_commands.append(self.CMD_NICCTL_PCIE_ATS) else: uncollected_commands.append(self.CMD_NICCTL_PCIE_ATS) @@ -1606,8 +1598,6 @@ def _collect_pensando_nic_info( description=f"Collected Pensando NIC port info: {len(port_entries)} ports", priority=EventPriority.INFO, ) - collected_count += 1 - collected_commands.append(self.CMD_NICCTL_PORT) else: uncollected_commands.append(self.CMD_NICCTL_PORT) @@ -1620,8 +1610,6 @@ def _collect_pensando_nic_info( description=f"Collected Pensando NIC QoS info: {len(qos_entries)} entries", priority=EventPriority.INFO, ) - collected_count += 1 - collected_commands.append(self.CMD_NICCTL_QOS) else: uncollected_commands.append(self.CMD_NICCTL_QOS) @@ -1634,8 +1622,6 @@ def _collect_pensando_nic_info( description=f"Collected Pensando NIC RDMA statistics: {len(rdma_statistics_entries)} entries", priority=EventPriority.INFO, ) - collected_count += 1 - collected_commands.append(self.CMD_NICCTL_RDMA_STATISTICS) else: uncollected_commands.append(self.CMD_NICCTL_RDMA_STATISTICS) @@ -1651,8 +1637,6 @@ def _collect_pensando_nic_info( description="Collected Pensando NIC host software version", priority=EventPriority.INFO, ) - collected_count += 1 - collected_commands.append(self.CMD_NICCTL_VERSION_HOST_SOFTWARE) else: uncollected_commands.append(self.CMD_NICCTL_VERSION_HOST_SOFTWARE) else: @@ -1669,33 +1653,9 @@ def _collect_pensando_nic_info( description=f"Collected Pensando NIC firmware versions: {len(version_firmware_entries)} entries", priority=EventPriority.INFO, ) - collected_count += 1 - collected_commands.append(self.CMD_NICCTL_VERSION_FIRMWARE) else: uncollected_commands.append(self.CMD_NICCTL_VERSION_FIRMWARE) - # Log summary of collected and uncollected commands - if collected_commands: - self._log_event( - category=EventCategory.NETWORK, - description=f"Successfully collected {len(collected_commands)} nicctl commands: {', '.join(collected_commands)}", - priority=EventPriority.INFO, - ) - - if uncollected_commands: - self._log_event( - category=EventCategory.NETWORK, - description=f"Failed to collect {len(uncollected_commands)} nicctl commands: {', '.join(uncollected_commands)}", - priority=EventPriority.WARNING, - ) - - if not collected_commands and not uncollected_commands: - self._log_event( - category=EventCategory.NETWORK, - description="Pensando NIC collection failed or nicctl not available", - priority=EventPriority.INFO, - ) - return ( cards, dcqcn_entries, @@ -1706,6 +1666,7 @@ def _collect_pensando_nic_info( rdma_statistics_entries, version_host_software, version_firmware_entries, + uncollected_commands, ) def collect_data( @@ -1830,8 +1791,21 @@ def collect_data( pensando_rdma_statistics, pensando_version_host_software, pensando_version_firmware, + uncollected_commands, ) = self._collect_pensando_nic_info() + # Log summary of uncollected commands or success + if uncollected_commands: + self.result.message = "Network data collection failed" + self._log_event( + category=EventCategory.NETWORK, + description=f"Failed to collect {len(uncollected_commands)} nicctl commands: {', '.join(uncollected_commands)}", + priority=EventPriority.WARNING, + ) + + else: + self.result.message = "Network data collected successfully" + network_data = NetworkDataModel( interfaces=interfaces, routes=routes, @@ -1850,18 +1824,5 @@ def collect_data( pensando_nic_version_host_software=pensando_version_host_software, pensando_nic_version_firmware=pensando_version_firmware, ) - self.result.message = ( - f"Collected network data: {len(interfaces)} interfaces, " - f"{len(routes)} routes, {len(rules)} rules, {len(neighbors)} neighbors, " - f"{len(ethtool_data)} ethtool entries, {len(broadcom_devices)} Broadcom NICs, " - f"{len(pensando_cards)} Pensando NICs, {len(pensando_dcqcn)} Pensando DCQCN entries, " - f"{len(pensando_environment)} Pensando environment entries, " - f"{len(pensando_pcie_ats)} Pensando PCIe ATS entries, " - f"{len(pensando_ports)} Pensando ports, " - f"{len(pensando_qos)} Pensando QoS entries, " - f"{len(pensando_rdma_statistics)} Pensando RDMA statistics, " - f"Pensando host software version: {'Yes' if pensando_version_host_software else 'No'}, " - f"{len(pensando_version_firmware)} Pensando firmware versions" - ) self.result.status = ExecutionStatus.OK return self.result, network_data From 5e19a0223de297e8e6322e387212f2dc49c587f3 Mon Sep 17 00:00:00 2001 From: jaspals Date: Mon, 12 Jan 2026 18:06:12 +0000 Subject: [PATCH 18/29] test fix --- test/unit/plugin/test_network_collector.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/test/unit/plugin/test_network_collector.py b/test/unit/plugin/test_network_collector.py index ba5a151d..222c1fc0 100644 --- a/test/unit/plugin/test_network_collector.py +++ b/test/unit/plugin/test_network_collector.py @@ -336,11 +336,8 @@ def run_sut_cmd_side_effect(cmd, **kwargs): assert len(data.routes) == 3 assert len(data.rules) == 3 assert len(data.neighbors) == 2 - assert "2 interfaces" in result.message - assert "3 routes" in result.message - assert "3 rules" in result.message - assert "2 neighbors" in result.message - assert "ethtool" in result.message + # Since nicctl commands fail in this test, we expect the failure message + assert "Network data collection failed" in result.message def test_collect_data_addr_failure(collector, conn_mock): @@ -1093,8 +1090,12 @@ def run_sut_cmd_side_effect(cmd, **kwargs): rdma_statistics_entries, version_host_software, version_firmware_entries, + uncollected_commands, ) = collector._collect_pensando_nic_info() + # All commands succeeded, so uncollected_commands should be empty + assert len(uncollected_commands) == 0 + assert len(cards) == 2 assert cards[0].id == "1111111-4c32-3533-3330-12345000000" assert cards[0].pcie_bdf == "0000:06:00.0" From 7b92d8472dc4f908a0c52b5fb5439948d7192cbc Mon Sep 17 00:00:00 2001 From: Alexandra Bara Date: Mon, 12 Jan 2026 16:17:31 -0600 Subject: [PATCH 19/29] cleanup --- nodescraper/cli/cli.py | 82 +----------- nodescraper/pluginregistry.py | 46 +++++++ test/unit/framework/test_cli.py | 227 +------------------------------- 3 files changed, 48 insertions(+), 307 deletions(-) diff --git a/nodescraper/cli/cli.py b/nodescraper/cli/cli.py index ff9eddd6..94a1f0fb 100644 --- a/nodescraper/cli/cli.py +++ b/nodescraper/cli/cli.py @@ -30,7 +30,6 @@ import os import platform import sys -from importlib import import_module from typing import Optional import nodescraper @@ -56,85 +55,6 @@ from nodescraper.pluginregistry import PluginRegistry -def discover_external_plugins(): - """Discover ext_nodescraper_plugins from all installed packages. - - Returns: - list: List of discovered plugin packages - """ - extra_pkgs = [] - seen_paths = set() # Track paths to avoid duplicates - - try: - import ext_nodescraper_plugins as ext_pkg - - extra_pkgs.append(ext_pkg) - if hasattr(ext_pkg, "__file__") and ext_pkg.__file__: - seen_paths.add(ext_pkg.__file__) - except ImportError: - pass - - # Discover ext_nodescraper_plugins from installed packages - try: - from importlib.metadata import distributions - - for dist in distributions(): - pkg_name = dist.metadata.get("Name", "") - if not pkg_name: - continue - - name_variants = [ - pkg_name.replace("-", "_"), - pkg_name.replace("_", "-"), - ] - - try: - top_level = dist.read_text("top_level.txt") - if top_level: - name_variants.extend(top_level.strip().split("\n")) - except Exception: - pass - - for variant in name_variants: - if not variant: - continue - - try: - module_path = f"{variant}.ext_nodescraper_plugins" - ext_pkg = import_module(module_path) - - # Check if we already have this package (by file path) - pkg_path = getattr(ext_pkg, "__file__", None) - if pkg_path and pkg_path in seen_paths: - continue - - # Add the package - extra_pkgs.append(ext_pkg) - if pkg_path: - seen_paths.add(pkg_path) - - break - - except (ImportError, AttributeError, ModuleNotFoundError): - continue - - except Exception: - pass - - return extra_pkgs - - -# Fix sys.path[0] if it's the venv/bin directory to avoid breaking editable install discovery -_original_syspath0 = sys.path[0] -if _original_syspath0.endswith("/bin") or _original_syspath0.endswith("\\Scripts"): - sys.path[0] = "" - -extra_pkgs = discover_external_plugins() - -# Restore original sys.path[0] -sys.path[0] = _original_syspath0 - - def build_parser( plugin_reg: PluginRegistry, config_reg: ConfigRegistry, @@ -449,7 +369,7 @@ def main(arg_input: Optional[list[str]] = None): if arg_input is None: arg_input = sys.argv[1:] - plugin_reg = PluginRegistry(plugin_pkg=extra_pkgs) + plugin_reg = PluginRegistry() config_reg = ConfigRegistry() parser, plugin_subparser_map = build_parser(plugin_reg, config_reg) diff --git a/nodescraper/pluginregistry.py b/nodescraper/pluginregistry.py index 7d3e24dc..59545a40 100644 --- a/nodescraper/pluginregistry.py +++ b/nodescraper/pluginregistry.py @@ -24,6 +24,7 @@ # ############################################################################### import importlib +import importlib.metadata import inspect import pkgutil import types @@ -45,12 +46,14 @@ def __init__( self, plugin_pkg: Optional[list[types.ModuleType]] = None, load_internal_plugins: bool = True, + load_entry_point_plugins: bool = True, ) -> None: """Initialize the PluginRegistry with optional plugin packages. Args: plugin_pkg (Optional[list[types.ModuleType]], optional): The module to search for plugins in. Defaults to None. load_internal_plugins (bool, optional): Whether internal plugin should be loaded. Defaults to True. + load_entry_point_plugins (bool, optional): Whether to load plugins from entry points. Defaults to True. """ if load_internal_plugins: self.plugin_pkg = [internal_plugins, internal_connections, internal_collators] @@ -70,6 +73,10 @@ def __init__( PluginResultCollator, self.plugin_pkg ) + if load_entry_point_plugins: + entry_point_plugins = self.load_plugins_from_entry_points() + self.plugins.update(entry_point_plugins) + @staticmethod def load_plugins( base_class: type, @@ -104,3 +111,42 @@ def _recurse_pkg(pkg: types.ModuleType, base_class: type) -> None: for pkg in search_modules: _recurse_pkg(pkg, base_class) return registry + + @staticmethod + def load_plugins_from_entry_points() -> dict[str, type]: + """Load plugins registered via entry points. + + Returns: + dict[str, type]: A dictionary mapping plugin names to their classes. + """ + plugins = {} + + try: + # Python 3.10+ supports group parameter + try: + plugin_entry_points = importlib.metadata.entry_points(group="nodescraper.plugins") # type: ignore[call-arg] + except TypeError: + # Python 3.9 - entry_points() returns dict-like object + all_entry_points = importlib.metadata.entry_points() + plugin_entry_points = all_entry_points.get("nodescraper.plugins", []) # type: ignore[attr-defined] + + for entry_point in plugin_entry_points: + try: + plugin_class = entry_point.load() + + if ( + inspect.isclass(plugin_class) + and issubclass(plugin_class, PluginInterface) + and not inspect.isabstract(plugin_class) + ): + if hasattr(plugin_class, "is_valid") and not plugin_class.is_valid(): + continue + + plugins[plugin_class.__name__] = plugin_class + except Exception: + pass + + except Exception: + pass + + return plugins diff --git a/test/unit/framework/test_cli.py b/test/unit/framework/test_cli.py index 49cd8990..cd266ed9 100644 --- a/test/unit/framework/test_cli.py +++ b/test/unit/framework/test_cli.py @@ -25,21 +25,13 @@ ############################################################################### import argparse import os -import sys -import tempfile -import types -from pathlib import Path -from unittest import mock import pytest from pydantic import BaseModel -from nodescraper.base import InBandDataPlugin from nodescraper.cli import cli, inputargtypes -from nodescraper.enums import ExecutionStatus, SystemLocation -from nodescraper.interfaces import DataAnalyzer +from nodescraper.enums import SystemLocation from nodescraper.models import SystemInfo -from nodescraper.pluginregistry import PluginRegistry def test_log_path_arg(): @@ -158,220 +150,3 @@ def test_system_info_builder(): ) def test_process_args(raw_arg_input, plugin_names, exp_output): assert cli.process_args(raw_arg_input, plugin_names) == exp_output - - -def test_discover_external_plugins_top_level(): - """Test discovering ext_nodescraper_plugins as a top-level import.""" - mock_ext_pkg = mock.MagicMock() - mock_ext_pkg.__file__ = "/path/to/ext_nodescraper_plugins/__init__.py" - - with mock.patch("nodescraper.cli.cli.import_module"): - with mock.patch.dict("sys.modules", {"ext_nodescraper_plugins": mock_ext_pkg}): - result = cli.discover_external_plugins() - - assert len(result) >= 1 - assert mock_ext_pkg in result - - -def test_discover_external_plugins_no_plugins(): - """Test when no external plugins are installed.""" - with mock.patch("nodescraper.cli.cli.import_module") as mock_import: - mock_import.side_effect = ImportError("No module named 'ext_nodescraper_plugins'") - - with mock.patch("importlib.metadata.distributions", return_value=[]): - result = cli.discover_external_plugins() - - assert result == [] - - -def test_discover_external_plugins_from_installed_package(): - """Test discovering plugins from installed packages (not top-level).""" - mock_dist = mock.MagicMock() - mock_dist.metadata.get.return_value = "amd-custom-package" - mock_dist.read_text.return_value = "custompackage" - - mock_plugin = mock.MagicMock() - mock_plugin.__file__ = "/path/to/custompackage/ext_nodescraper_plugins/__init__.py" - - def mock_import_func(module_path): - if module_path == "custompackage.ext_nodescraper_plugins": - return mock_plugin - raise ImportError(f"No module named '{module_path}'") - - with mock.patch("nodescraper.cli.cli.import_module", side_effect=mock_import_func): - with mock.patch("importlib.metadata.distributions", return_value=[mock_dist]): - result = cli.discover_external_plugins() - - assert mock_plugin in result - - -def test_discover_external_plugins_deduplication(): - """Test that duplicate plugins are not added multiple times.""" - mock_ext_pkg = mock.MagicMock() - mock_ext_pkg.__file__ = "/path/to/ext_nodescraper_plugins/__init__.py" - - mock_dist1 = mock.MagicMock() - mock_dist1.metadata.get.return_value = "package-one" - mock_dist1.read_text.return_value = "package_one" - - mock_dist2 = mock.MagicMock() - mock_dist2.metadata.get.return_value = "package-two" - mock_dist2.read_text.return_value = "package_one" - - def mock_import_func(module_path): - if "package_one.ext_nodescraper_plugins" in module_path: - return mock_ext_pkg - raise ImportError(f"No module named '{module_path}'") - - with mock.patch("nodescraper.cli.cli.import_module", side_effect=mock_import_func): - with mock.patch("importlib.metadata.distributions", return_value=[mock_dist1, mock_dist2]): - result = cli.discover_external_plugins() - - file_paths = [pkg.__file__ for pkg in result if hasattr(pkg, "__file__")] - assert file_paths.count(mock_ext_pkg.__file__) == 1 - - -def test_discover_external_plugins_name_variants(): - """Test that different package name variants are tried (hyphens vs underscores).""" - mock_dist = mock.MagicMock() - mock_dist.metadata.get.return_value = "amd-error-scraper" - mock_dist.read_text.side_effect = Exception("No top_level.txt") - - mock_plugin = mock.MagicMock() - mock_plugin.__file__ = "/path/to/amd_error_scraper/ext_nodescraper_plugins/__init__.py" - - call_count = {"count": 0} - - def mock_import_func(module_path): - call_count["count"] += 1 - if module_path == "amd_error_scraper.ext_nodescraper_plugins": - return mock_plugin - raise ImportError(f"No module named '{module_path}'") - - with mock.patch("nodescraper.cli.cli.import_module", side_effect=mock_import_func): - with mock.patch("importlib.metadata.distributions", return_value=[mock_dist]): - result = cli.discover_external_plugins() - - assert mock_plugin in result - assert call_count["count"] >= 1 - - -def test_discover_external_plugins_handles_exceptions(): - """Test that discovery continues even if some packages fail.""" - mock_dist1 = mock.MagicMock() - mock_dist1.metadata.get.return_value = "good-package" - mock_dist1.read_text.return_value = "goodpackage" - - mock_dist2 = mock.MagicMock() - mock_dist2.metadata.get.side_effect = Exception("Corrupted metadata") - - mock_plugin = mock.MagicMock() - mock_plugin.__file__ = "/path/to/goodpackage/ext_nodescraper_plugins/__init__.py" - - def mock_import_func(module_path): - if module_path == "goodpackage.ext_nodescraper_plugins": - return mock_plugin - raise ImportError(f"No module named '{module_path}'") - - with mock.patch("nodescraper.cli.cli.import_module", side_effect=mock_import_func): - with mock.patch("importlib.metadata.distributions", return_value=[mock_dist1, mock_dist2]): - result = cli.discover_external_plugins() - - assert mock_plugin in result - - -def test_external_plugins_integration(): - """Integration test: Create a temporary external plugin and verify it's picked up.""" - with tempfile.TemporaryDirectory() as tmpdir: - pkg_dir = Path(tmpdir) / "test_external_pkg" - pkg_dir.mkdir() - (pkg_dir / "__init__.py").write_text("") - - ext_plugins_dir = pkg_dir / "ext_nodescraper_plugins" - ext_plugins_dir.mkdir() - (ext_plugins_dir / "__init__.py").write_text("") - - plugin_module_dir = ext_plugins_dir / "test_plugin" - plugin_module_dir.mkdir() - - plugin_code = """ -from nodescraper.base import InBandDataPlugin -from nodescraper.enums import ExecutionStatus -from nodescraper.interfaces import DataAnalyzer - -class TestAnalyzer(DataAnalyzer): - DATA_MODEL = dict - - def analyze_data(self, data): - return ExecutionStatus.SUCCESS, None - -class TestExternalPlugin(InBandDataPlugin): - DATA_MODEL = dict - ANALYZER = TestAnalyzer - - def run(self): - return ExecutionStatus.SUCCESS, {"test": "data"} -""" - (plugin_module_dir / "__init__.py").write_text(plugin_code) - - sys.path.insert(0, tmpdir) - - try: - import test_external_pkg.ext_nodescraper_plugins as test_ext_pkg - - plugin_registry = PluginRegistry(plugin_pkg=[test_ext_pkg]) - - assert ( - "TestExternalPlugin" in plugin_registry.plugins - ), f"External plugin not found. Available plugins: {list(plugin_registry.plugins.keys())}" - - plugin_class = plugin_registry.plugins["TestExternalPlugin"] - assert plugin_class.__name__ == "TestExternalPlugin" - - finally: - sys.path.remove(tmpdir) - modules_to_remove = [ - key for key in sys.modules.keys() if key.startswith("test_external_pkg") - ] - for module in modules_to_remove: - del sys.modules[module] - - -def test_discover_and_load_external_plugins(): - """Test the full flow: discover external plugins using mocked modules.""" - mock_plugin_module = types.ModuleType("mock_ext_nodescraper_plugins") - mock_plugin_module.__file__ = "/fake/path/mock_ext_nodescraper_plugins/__init__.py" - mock_plugin_module.__path__ = ["/fake/path/mock_ext_nodescraper_plugins"] - - mock_submodule = types.ModuleType("mock_ext_nodescraper_plugins.mock_plugin") - mock_submodule.__file__ = "/fake/path/mock_ext_nodescraper_plugins/mock_plugin.py" - - class MockAnalyzer(DataAnalyzer): - DATA_MODEL = dict - - def analyze_data(self, data): - return ExecutionStatus.SUCCESS, None - - class MockExternalPlugin(InBandDataPlugin): - DATA_MODEL = dict - ANALYZER = MockAnalyzer - - def run(self): - return ExecutionStatus.SUCCESS, {} - - mock_submodule.MockExternalPlugin = MockExternalPlugin - - def mock_iter_modules(path, prefix=""): - yield None, f"{prefix}mock_plugin", False - - def mock_import_module(name): - if "mock_plugin" in name: - return mock_submodule - raise ImportError(f"No module named {name}") - - with mock.patch("pkgutil.iter_modules", side_effect=mock_iter_modules): - with mock.patch("importlib.import_module", side_effect=mock_import_module): - plugin_registry = PluginRegistry(plugin_pkg=[mock_plugin_module]) - - assert "MockExternalPlugin" in plugin_registry.plugins - assert plugin_registry.plugins["MockExternalPlugin"] == MockExternalPlugin From 53a7666c815e3b6beccbffee87ee9edca6401732 Mon Sep 17 00:00:00 2001 From: Alexandra Bara Date: Tue, 13 Jan 2026 13:50:43 -0600 Subject: [PATCH 20/29] updated documentation with entry points --- docs/node-scraper-external/README.md | 123 ++++++++++++++++++---- docs/node-scraper-external/pyproject.toml | 3 + 2 files changed, 108 insertions(+), 18 deletions(-) diff --git a/docs/node-scraper-external/README.md b/docs/node-scraper-external/README.md index f3d67f2e..98bccab4 100644 --- a/docs/node-scraper-external/README.md +++ b/docs/node-scraper-external/README.md @@ -1,7 +1,12 @@ # node-scraper external plugins (example) This directory lives at **`/docs/node-scraper-external`** in the `node-scraper` repo and contains -an example external plugin package you can install in editable mode. +an example external plugin package that demonstrates how to create plugins for node-scraper. + +## Overview + +External plugins are discovered by node-scraper via **Python entry points**. This allows plugins +to be distributed as separate packages and automatically discovered when installed. ## Installation @@ -12,44 +17,126 @@ cd ~/node-scraper source venv/bin/activate pip install -e ./docs/node-scraper-external ``` -You should see `ext-nodescraper-plugins` installed in editable mode. +This installs `ext-nodescraper-plugins` in editable mode and registers the plugin entry points. -## Verify the external package is importable +## Verify Plugin Discovery + +Check that node-scraper discovered the external plugin: ```bash -python - <<'PY' -import ext_nodescraper_plugins -print("ext_nodescraper_plugins loaded from:", ext_nodescraper_plugins.__file__) -PY +node-scraper run-plugins -h ``` -## Run external plugins +You should see `SamplePlugin` listed alongside built-in plugins. -Confirm the CLI sees your external plugin(s): +## Run the Example Plugin ```bash -node-scraper run-plugins -h node-scraper run-plugins SamplePlugin ``` -## Add your own plugins +## How It Works + +### Entry Points -Add new modules under the **`ext_nodescraper_plugins/`** package. Example layout: +Plugins are registered in `pyproject.toml` using entry points: + +```toml +[project.entry-points."nodescraper.plugins"] +SamplePlugin = "ext_nodescraper_plugins.sample.sample_plugin:SamplePlugin" +``` + +When you install the package, Python registers these entry points in the package metadata. +Node-scraper automatically discovers and loads plugins from the `nodescraper.plugins` entry point group. + +### Plugin Structure ``` /docs/node-scraper-external -├─ pyproject.toml -└─ ext_nodescraper_plugins/ - └─ sample/ +├─ pyproject.toml # Package metadata + entry points +└─ ext_nodescraper_plugins/ # Plugin package + └─ sample/ # Plugin module ├─ __init__.py - └─ sample_plugin.py + ├─ sample_plugin.py # Plugin class + ├─ sample_collector.py # Data collector + ├─ sample_analyzer.py # Data analyzer + └─ sample_data.py # Data model +``` + +## Creating Your Own External Plugins + +### Step 1: Create Package Structure + +```bash +mkdir my-plugin-package +cd my-plugin-package +mkdir -p ext_nodescraper_plugins/my_plugin ``` +### Step 2: Create pyproject.toml + +```toml +[project] +name = "my-plugin-package" +version = "0.1.0" +requires-python = ">=3.10" +dependencies = ["amd-node-scraper"] + +[project.entry-points."nodescraper.plugins"] +MyPlugin = "ext_nodescraper_plugins.my_plugin:MyPlugin" + +[build-system] +requires = ["setuptools", "wheel"] +build-backend = "setuptools.build_meta" ``` -Re-install (editable mode picks up code changes automatically, but if you add new files you may -need to re-run): +### Step 3: Implement Your Plugin + +Create `ext_nodescraper_plugins/my_plugin/__init__.py`: + +```python +from nodescraper.base import InBandDataPlugin, InBandDataCollector +from pydantic import BaseModel + +class MyDataModel(BaseModel): + """Your data model""" + data: dict + +class MyCollector(InBandDataCollector[MyDataModel, None]): + """Your data collector""" + DATA_MODEL = MyDataModel + + def collect_data(self, args=None): + # Collection logic + return MyDataModel(data={}) + +class MyPlugin(InBandDataPlugin[MyDataModel, None, None]): + """Your plugin""" + DATA_MODEL = MyDataModel + COLLECTOR = MyCollector +``` + +### Step 4: Install and Test + ```bash pip install -e . +node-scraper run-plugins -h # Should show MyPlugin +node-scraper run-plugins MyPlugin ``` + +## Adding More Plugins to This Package + +To add additional plugins to this example package: + +1. **Create a new module** under `ext_nodescraper_plugins/` +2. **Register the entry point** in `pyproject.toml`: + ```toml + [project.entry-points."nodescraper.plugins"] + SamplePlugin = "ext_nodescraper_plugins.sample.sample_plugin:SamplePlugin" + AnotherPlugin = "ext_nodescraper_plugins.another:AnotherPlugin" + ``` +3. **Reinstall** to register the new entry point: + ```bash + pip install -e . --force-reinstall --no-deps + ``` diff --git a/docs/node-scraper-external/pyproject.toml b/docs/node-scraper-external/pyproject.toml index b07273ab..45eadcdc 100644 --- a/docs/node-scraper-external/pyproject.toml +++ b/docs/node-scraper-external/pyproject.toml @@ -4,6 +4,9 @@ version = "0.1.0" requires-python = ">=3.10" dependencies = ["node-scraper"] +[project.entry-points."nodescraper.plugins"] +SamplePlugin = "ext_nodescraper_plugins.sample.sample_plugin:SamplePlugin" + [build-system] requires = ["setuptools", "wheel"] build-backend = "setuptools.build_meta" From 4cf6919a6806f5802c1818ae5666705183a23efc Mon Sep 17 00:00:00 2001 From: Alexandra Bara Date: Tue, 13 Jan 2026 14:27:04 -0600 Subject: [PATCH 21/29] mypy fix --- nodescraper/pluginregistry.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/nodescraper/pluginregistry.py b/nodescraper/pluginregistry.py index 59545a40..84e50205 100644 --- a/nodescraper/pluginregistry.py +++ b/nodescraper/pluginregistry.py @@ -124,13 +124,13 @@ def load_plugins_from_entry_points() -> dict[str, type]: try: # Python 3.10+ supports group parameter try: - plugin_entry_points = importlib.metadata.entry_points(group="nodescraper.plugins") # type: ignore[call-arg] + eps = importlib.metadata.entry_points(group="nodescraper.plugins") # type: ignore[call-arg] except TypeError: # Python 3.9 - entry_points() returns dict-like object - all_entry_points = importlib.metadata.entry_points() - plugin_entry_points = all_entry_points.get("nodescraper.plugins", []) # type: ignore[attr-defined] + all_eps = importlib.metadata.entry_points() + eps = all_eps.get("nodescraper.plugins", []) # type: ignore[assignment, attr-defined] - for entry_point in plugin_entry_points: + for entry_point in eps: try: plugin_class = entry_point.load() From 1d56d7f0b0e740731a6a88f8f5ceaaf17f4fa12a Mon Sep 17 00:00:00 2001 From: Alexandra Bara Date: Tue, 13 Jan 2026 14:56:48 -0600 Subject: [PATCH 22/29] mypy --- nodescraper/pluginregistry.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/nodescraper/pluginregistry.py b/nodescraper/pluginregistry.py index 84e50205..2bb36a25 100644 --- a/nodescraper/pluginregistry.py +++ b/nodescraper/pluginregistry.py @@ -28,7 +28,7 @@ import inspect import pkgutil import types -from typing import Optional +from typing import Optional, Sequence import nodescraper.connection as internal_connections import nodescraper.plugins as internal_plugins @@ -123,6 +123,7 @@ def load_plugins_from_entry_points() -> dict[str, type]: try: # Python 3.10+ supports group parameter + eps: Sequence[importlib.metadata.EntryPoint] try: eps = importlib.metadata.entry_points(group="nodescraper.plugins") # type: ignore[call-arg] except TypeError: From ffb4298194434ed71c58ccf678b2d56a67528cfe Mon Sep 17 00:00:00 2001 From: Alexandra Bara Date: Tue, 13 Jan 2026 17:45:08 -0600 Subject: [PATCH 23/29] mypy --- nodescraper/pluginregistry.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/nodescraper/pluginregistry.py b/nodescraper/pluginregistry.py index 2bb36a25..d43a41dd 100644 --- a/nodescraper/pluginregistry.py +++ b/nodescraper/pluginregistry.py @@ -28,7 +28,7 @@ import inspect import pkgutil import types -from typing import Optional, Sequence +from typing import Optional import nodescraper.connection as internal_connections import nodescraper.plugins as internal_plugins @@ -123,12 +123,11 @@ def load_plugins_from_entry_points() -> dict[str, type]: try: # Python 3.10+ supports group parameter - eps: Sequence[importlib.metadata.EntryPoint] try: eps = importlib.metadata.entry_points(group="nodescraper.plugins") # type: ignore[call-arg] except TypeError: # Python 3.9 - entry_points() returns dict-like object - all_eps = importlib.metadata.entry_points() + all_eps = importlib.metadata.entry_points() # type: ignore[assignment] eps = all_eps.get("nodescraper.plugins", []) # type: ignore[assignment, attr-defined] for entry_point in eps: From 025a010724ca3b2a5ae8ca530780c44e748ec548 Mon Sep 17 00:00:00 2001 From: Alexandra Bara Date: Wed, 14 Jan 2026 09:30:05 -0600 Subject: [PATCH 24/29] mypy --- nodescraper/pluginregistry.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nodescraper/pluginregistry.py b/nodescraper/pluginregistry.py index d43a41dd..6822d329 100644 --- a/nodescraper/pluginregistry.py +++ b/nodescraper/pluginregistry.py @@ -132,7 +132,7 @@ def load_plugins_from_entry_points() -> dict[str, type]: for entry_point in eps: try: - plugin_class = entry_point.load() + plugin_class = entry_point.load() # type: ignore[attr-defined] if ( inspect.isclass(plugin_class) From 250bb49599ba40c112fad56a8d26a8f8e932ff47 Mon Sep 17 00:00:00 2001 From: jaspals Date: Wed, 14 Jan 2026 15:37:02 +0000 Subject: [PATCH 25/29] mellanox changes --- .../inband/fabrics/fabrics_collector.py | 183 +++++++++++++----- 1 file changed, 137 insertions(+), 46 deletions(-) diff --git a/nodescraper/plugins/inband/fabrics/fabrics_collector.py b/nodescraper/plugins/inband/fabrics/fabrics_collector.py index c0cdac31..a91a6588 100644 --- a/nodescraper/plugins/inband/fabrics/fabrics_collector.py +++ b/nodescraper/plugins/inband/fabrics/fabrics_collector.py @@ -50,8 +50,8 @@ class FabricsCollector(InBandDataCollector[FabricsDataModel, None]): DATA_MODEL = FabricsDataModel CMD_IBSTAT = "ibstat" CMD_IBV_DEVINFO = "ibv_devinfo" - CMD_IBDEV2NETDEV = "ibdev2netdev -v" - CMD_OFED_INFO = "/usr/bin/ofed_info -s" + CMD_IB_DEV_NETDEVS = "ls -l /sys/class/infiniband/*/device/net" + CMD_OFED_INFO = "ofed_info -s" CMD_MST_START = "mst start" CMD_MST_STATUS = "mst status -v" CMD_RDMA_DEV = "rdma dev" @@ -217,40 +217,55 @@ def _parse_ibv_devinfo(self, output: str) -> List[IbvDeviceInfo]: return devices - def _parse_ibdev2netdev(self, output: str) -> List[IbdevNetdevMapping]: - """Parse 'ibdev2netdev -v' output into IbdevNetdevMapping objects. + def _parse_ib_dev_netdevs(self, output: str) -> List[IbdevNetdevMapping]: + """Parse 'ls -l /sys/class/infiniband/*/device/net' output into IbdevNetdevMapping objects. Args: - output: Raw output from 'ibdev2netdev -v' command + output: Raw output from 'ls -l /sys/class/infiniband/*/device/net' command Returns: List of IbdevNetdevMapping objects """ mappings = [] + current_ib_device = None for line in output.splitlines(): line = line.strip() if not line: continue - # Example format: mlx5_0 port 1 ==> ib0 (Up) - # Example format: mlx5_1 port 1 ==> N/A (Down) - match = re.match(r"(\S+)\s+port\s+(\d+)\s+==>\s+(\S+)\s+\(([^)]+)\)", line) - if match: - ib_device = match.group(1) - port = int(match.group(2)) - netdev = match.group(3) if match.group(3) != "N/A" else None - state = match.group(4) - - mapping = IbdevNetdevMapping( - ib_device=ib_device, port=port, netdev=netdev, state=state - ) - mappings.append(mapping) + # Check if this is a directory path line + # Example: /sys/class/infiniband/rocep105s0/device/net: + if line.startswith("/sys/class/infiniband/") and line.endswith(":"): + # Extract IB device name from path + path_match = re.search(r"/sys/class/infiniband/([^/]+)/device/net:", line) + if path_match: + current_ib_device = path_match.group(1) + continue + + # Skip "total" lines + if line.startswith("total"): + continue + + # Parse directory listing lines (network device names) + # Example: drwxr-xr-x 5 root root 0 Jan 8 18:01 benic5p1 + if current_ib_device and line.startswith("d"): + parts = line.split() + if len(parts) >= 9: + # The last part is the network device name + netdev = parts[-1] + + # Create mapping with default port 1 (most common for single-port devices) + # State is unknown from ls output + mapping = IbdevNetdevMapping( + ib_device=current_ib_device, port=1, netdev=netdev, state=None + ) + mappings.append(mapping) return mappings def _parse_ofed_info(self, output: str) -> OfedInfo: - """Parse '/usr/bin/ofed_info -s' output into OfedInfo object. + """Parse 'ofed_info -s' output into OfedInfo object. Args: output: Raw output from 'ofed_info -s' command @@ -260,15 +275,17 @@ def _parse_ofed_info(self, output: str) -> OfedInfo: """ version = None - # The output is typically just a version string + # The output is typically just a version string, possibly with trailing colon + # Example: OFED-internal-25.10-1.7.1: output_stripped = output.strip() if output_stripped: - version = output_stripped + # Remove trailing colon if present + version = output_stripped.rstrip(":") return OfedInfo(version=version, raw_output=output) def _parse_mst_status(self, output: str) -> MstStatus: - """Parse 'sudo mst status -v' output into MstStatus object. + """Parse 'mst status -v' output into MstStatus object. Args: output: Raw output from 'mst status -v' command @@ -280,7 +297,7 @@ def _parse_mst_status(self, output: str) -> MstStatus: devices = [] # Check if MST is started - if "MST modules:" in output or "MST devices:" in output: + if "MST modules:" in output or "MST devices:" in output or "PCI devices:" in output: mst_status.mst_started = True for line in output.splitlines(): @@ -288,14 +305,26 @@ def _parse_mst_status(self, output: str) -> MstStatus: if not line: continue - # Look for device lines (e.g., "/dev/mst/mt4123_pciconf0") - if line.startswith("/dev/mst/"): + # Skip header lines + if ( + line.startswith("MST modules:") + or line.startswith("PCI devices:") + or line.startswith("---") + ): + continue + if line.startswith("DEVICE_TYPE") or line.startswith("MST PCI module"): + continue + + # Look for device lines containing "/dev/mst/" + if "/dev/mst/" in line: parts = line.split() - if parts: + + # Handle old format: "/dev/mst/device_path" at the beginning + if line.startswith("/dev/mst/"): device_path = parts[0] device = MstDevice(device=device_path) - # Try to parse additional fields + # Try to parse additional fields (old format with key=value) for part in parts[1:]: if "=" in part: key, value = part.split("=", 1) @@ -304,16 +333,66 @@ def _parse_mst_status(self, output: str) -> MstStatus: elif key == "net": device.net_device = value elif ":" in value and "." in value: - # Looks like a PCI address device.pci_address = value else: device.attributes[key] = value - elif re.match(r"[0-9a-f]{4}:[0-9a-f]{2}:[0-9a-f]{2}\.[0-9]", part): - # PCI address format + elif re.match(r"[0-9a-f]{2,4}:[0-9a-f]{2}:[0-9a-f]{2}\.[0-9]", part): device.pci_address = part devices.append(device) + # Handle new tabular format: DEVICE_TYPE MST PCI RDMA NET NUMA [VFIO] + # Example: ConnectX7(rev:0) /dev/mst/mt4129_pciconf9 ec:00.0 mlx5_4 net-enp235s0np0 1 + else: + # Find the index of the /dev/mst/ device path + mst_idx = None + for i, part in enumerate(parts): + if part.startswith("/dev/mst/"): + mst_idx = i + break + + if mst_idx is not None and len(parts) >= mst_idx + 3: + device_path = parts[mst_idx] + device = MstDevice(device=device_path) + + # Store device type if available (before mst path) + if mst_idx > 0: + device.attributes["device_type"] = " ".join(parts[:mst_idx]) + + # PCI address (next column after MST path) + if mst_idx + 1 < len(parts): + pci_addr = parts[mst_idx + 1] + # Validate PCI address format (short or long form) + if re.match(r"[0-9a-f]{2,4}:[0-9a-f]{2}:[0-9a-f]{2}\.[0-9]", pci_addr): + device.pci_address = pci_addr + + # RDMA device (column after PCI) + if mst_idx + 2 < len(parts): + rdma_dev = parts[mst_idx + 2] + if rdma_dev.startswith("mlx") or rdma_dev != "-": + device.rdma_device = rdma_dev + + # NET device (column after RDMA) + if mst_idx + 3 < len(parts): + net_dev = parts[mst_idx + 3] + # Remove "net-" prefix if present + if net_dev.startswith("net-"): + net_dev = net_dev[4:] + if net_dev != "-": + device.net_device = net_dev + + # NUMA node (column after NET) + if mst_idx + 4 < len(parts): + numa = parts[mst_idx + 4] + if numa.isdigit(): + device.attributes["numa_node"] = numa + + # VFIO or other attributes (remaining columns) + if mst_idx + 5 < len(parts): + device.attributes["vfio"] = " ".join(parts[mst_idx + 5 :]) + + devices.append(device) + mst_status.devices = devices return mst_status @@ -333,7 +412,8 @@ def _parse_rdma_dev(self, output: str) -> List[RdmaDevice]: if not line: continue - # Example format: 0: mlx5_0: node_type ca fw 16.28.2006 node_guid 0c42:a103:00b3:bfa0 sys_image_guid 0c42:a103:00b3:bfa0 + # Example InfiniBand format: 0: mlx5_0: node_type ca fw 16.28.2006 node_guid 0c42:a103:00b3:bfa0 sys_image_guid 0c42:a103:00b3:bfa0 + # Example RoCE format: 0: rocep9s0: node_type ca fw 1.117.1-a-63 node_guid 0690:81ff:fe4a:6c40 sys_image_guid 0690:81ff:fe4a:6c40 parts = line.split() if len(parts) < 2: continue @@ -343,7 +423,7 @@ def _parse_rdma_dev(self, output: str) -> List[RdmaDevice]: start_idx = 0 if parts[0].endswith(":"): - # Skip index + # Skip index (e.g., "0:") start_idx = 1 if start_idx < len(parts): @@ -401,8 +481,9 @@ def _parse_rdma_link(self, output: str) -> List[RdmaLink]: if not line: continue - # Example format: link mlx5_0/1 state ACTIVE physical_state LINK_UP netdev ib0 - # Example format: 0/1: mlx5_0/1: state ACTIVE physical_state LINK_UP + # Example InfiniBand format: link mlx5_0/1 state ACTIVE physical_state LINK_UP netdev ib0 + # Example RoCE format: link rocep9s0/1 state DOWN physical_state POLLING netdev benic8p1 + # Example alternate format: 0/1: mlx5_0/1: state ACTIVE physical_state LINK_UP match = re.search(r"(\S+)/(\d+)", line) if not match: continue @@ -488,10 +569,10 @@ def collect_data( priority=EventPriority.WARNING, ) - # Collect ibdev2netdev mappings - res_ibdev2netdev = self._run_sut_cmd(self.CMD_IBDEV2NETDEV) - if res_ibdev2netdev.exit_code == 0: - ibdev_netdev_mappings = self._parse_ibdev2netdev(res_ibdev2netdev.stdout) + # Collect IB device to netdev mappings + res_ib_dev_netdevs = self._run_sut_cmd(self.CMD_IB_DEV_NETDEVS) + if res_ib_dev_netdevs.exit_code == 0: + ibdev_netdev_mappings = self._parse_ib_dev_netdevs(res_ib_dev_netdevs.stdout) self._log_event( category=EventCategory.NETWORK, description=f"Collected {len(ibdev_netdev_mappings)} IB to netdev mappings", @@ -500,10 +581,10 @@ def collect_data( else: self._log_event( category=EventCategory.NETWORK, - description="Error collecting ibdev2netdev mappings", + description="Error collecting IB device to netdev mappings", data={ - "command": res_ibdev2netdev.command, - "exit_code": res_ibdev2netdev.exit_code, + "command": res_ib_dev_netdevs.command, + "exit_code": res_ib_dev_netdevs.exit_code, }, priority=EventPriority.WARNING, ) @@ -529,11 +610,21 @@ def collect_data( # First start MST res_mst_start = self._run_sut_cmd(self.CMD_MST_START, sudo=True) if res_mst_start.exit_code == 0: - self._log_event( - category=EventCategory.NETWORK, - description="MST service started successfully", - priority=EventPriority.INFO, - ) + # Check output for success indicators + output_lower = res_mst_start.stdout.lower() + if "success" in output_lower or "loading mst" in output_lower: + self._log_event( + category=EventCategory.NETWORK, + description="MST service started successfully", + priority=EventPriority.INFO, + ) + else: + self._log_event( + category=EventCategory.NETWORK, + description="MST service command completed but status unclear", + data={"output": res_mst_start.stdout[:200]}, + priority=EventPriority.INFO, + ) else: self._log_event( category=EventCategory.NETWORK, From dcdfe1d0dfbf16a351965e0ae769925203f7dd25 Mon Sep 17 00:00:00 2001 From: jaspals Date: Wed, 14 Jan 2026 17:27:21 +0000 Subject: [PATCH 26/29] unit and functional tests --- .../inband/fabrics/fabrics_collector.py | 25 +- .../fixtures/fabrics_plugin_config.json | 11 + test/functional/test_fabrics_plugin.py | 106 +++++ test/unit/plugin/test_fabrics_collector.py | 406 ++++++++++++++++++ 4 files changed, 536 insertions(+), 12 deletions(-) create mode 100644 test/functional/fixtures/fabrics_plugin_config.json create mode 100644 test/functional/test_fabrics_plugin.py create mode 100644 test/unit/plugin/test_fabrics_collector.py diff --git a/nodescraper/plugins/inband/fabrics/fabrics_collector.py b/nodescraper/plugins/inband/fabrics/fabrics_collector.py index a91a6588..731ed239 100644 --- a/nodescraper/plugins/inband/fabrics/fabrics_collector.py +++ b/nodescraper/plugins/inband/fabrics/fabrics_collector.py @@ -564,9 +564,9 @@ def collect_data( else: self._log_event( category=EventCategory.NETWORK, - description="Error collecting ibv_devinfo information", + description="ibv_devinfo command not available or failed", data={"command": res_ibv.command, "exit_code": res_ibv.exit_code}, - priority=EventPriority.WARNING, + priority=EventPriority.INFO, ) # Collect IB device to netdev mappings @@ -581,12 +581,12 @@ def collect_data( else: self._log_event( category=EventCategory.NETWORK, - description="Error collecting IB device to netdev mappings", + description="No InfiniBand devices found in sysfs", data={ "command": res_ib_dev_netdevs.command, "exit_code": res_ib_dev_netdevs.exit_code, }, - priority=EventPriority.WARNING, + priority=EventPriority.INFO, ) # Collect OFED version info @@ -601,9 +601,9 @@ def collect_data( else: self._log_event( category=EventCategory.NETWORK, - description="Error collecting OFED info", + description="OFED not installed or ofed_info command not available", data={"command": res_ofed.command, "exit_code": res_ofed.exit_code}, - priority=EventPriority.WARNING, + priority=EventPriority.INFO, ) # Start MST and collect status @@ -628,9 +628,9 @@ def collect_data( else: self._log_event( category=EventCategory.NETWORK, - description="Error starting MST service (might already be running)", + description="MST tools not available (Mellanox-specific)", data={"command": res_mst_start.command, "exit_code": res_mst_start.exit_code}, - priority=EventPriority.WARNING, + priority=EventPriority.INFO, ) # Get MST status @@ -645,9 +645,9 @@ def collect_data( else: self._log_event( category=EventCategory.NETWORK, - description="Error collecting MST status", + description="MST status not available (Mellanox-specific)", data={"command": res_mst_status.command, "exit_code": res_mst_status.exit_code}, - priority=EventPriority.WARNING, + priority=EventPriority.INFO, ) # Collect RDMA device information @@ -694,7 +694,7 @@ def collect_data( raw_output=res_rdma_dev.stdout + "\n" + res_rdma_link.stdout, ) - # Build the data model if we collected any data + # Build the data model only if we collected any data if ( ibstat_devices or ibv_devices @@ -721,6 +721,7 @@ def collect_data( self.result.status = ExecutionStatus.OK return self.result, fabrics_data else: - self.result.message = "Failed to collect fabrics data" + # No fabrics hardware detected - this is not an error for optional hardware + self.result.message = "No InfiniBand/RDMA fabrics hardware detected on this system" self.result.status = ExecutionStatus.ERROR return self.result, None diff --git a/test/functional/fixtures/fabrics_plugin_config.json b/test/functional/fixtures/fabrics_plugin_config.json new file mode 100644 index 00000000..a60d56b7 --- /dev/null +++ b/test/functional/fixtures/fabrics_plugin_config.json @@ -0,0 +1,11 @@ +{ + "global_args": {}, + "plugins": { + "FabricsPlugin": { + "analysis_args": {} + } + }, + "result_collators": {}, + "name": "FabricsPlugin config", + "desc": "Config for testing FabricsPlugin" +} diff --git a/test/functional/test_fabrics_plugin.py b/test/functional/test_fabrics_plugin.py new file mode 100644 index 00000000..a8f0cd62 --- /dev/null +++ b/test/functional/test_fabrics_plugin.py @@ -0,0 +1,106 @@ +############################################################################### +# +# MIT License +# +# Copyright (c) 2025 Advanced Micro Devices, Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +# +############################################################################### +"""Functional tests for FabricsPlugin with --plugin-configs.""" + +from pathlib import Path + +import pytest + + +@pytest.fixture +def fixtures_dir(): + """Return path to fixtures directory.""" + return Path(__file__).parent / "fixtures" + + +@pytest.fixture +def fabrics_config_file(fixtures_dir): + """Return path to FabricsPlugin config file.""" + return fixtures_dir / "fabrics_plugin_config.json" + + +def test_fabrics_plugin_with_basic_config(run_cli_command, fabrics_config_file, tmp_path): + """Test FabricsPlugin using basic config file.""" + assert fabrics_config_file.exists(), f"Config file not found: {fabrics_config_file}" + + log_path = str(tmp_path / "logs_fabrics_basic") + result = run_cli_command( + ["--log-path", log_path, "--plugin-configs", str(fabrics_config_file)], check=False + ) + + assert result.returncode in [0, 1, 2] + output = result.stdout + result.stderr + assert len(output) > 0 + assert "fabricsplugin" in output.lower() or "fabrics" in output.lower() + + +def test_fabrics_plugin_with_run_plugins_subcommand(run_cli_command, tmp_path): + """Test FabricsPlugin using run-plugins subcommand.""" + log_path = str(tmp_path / "logs_fabrics_subcommand") + result = run_cli_command(["--log-path", log_path, "run-plugins", "FabricsPlugin"], check=False) + + assert result.returncode in [0, 1, 2] + output = result.stdout + result.stderr + assert len(output) > 0 + + +def test_fabrics_plugin_with_passive_interaction(run_cli_command, fabrics_config_file, tmp_path): + """Test FabricsPlugin with PASSIVE system interaction level.""" + log_path = str(tmp_path / "logs_fabrics_passive") + result = run_cli_command( + [ + "--log-path", + log_path, + "--sys-interaction-level", + "PASSIVE", + "--plugin-configs", + str(fabrics_config_file), + ], + check=False, + ) + + assert result.returncode in [0, 1, 2] + output = result.stdout + result.stderr + assert len(output) > 0 + + +def test_fabrics_plugin_skip_sudo(run_cli_command, fabrics_config_file, tmp_path): + """Test FabricsPlugin with --skip-sudo flag.""" + log_path = str(tmp_path / "logs_fabrics_no_sudo") + result = run_cli_command( + [ + "--log-path", + log_path, + "--skip-sudo", + "--plugin-configs", + str(fabrics_config_file), + ], + check=False, + ) + + assert result.returncode in [0, 1, 2] + output = result.stdout + result.stderr + assert len(output) > 0 diff --git a/test/unit/plugin/test_fabrics_collector.py b/test/unit/plugin/test_fabrics_collector.py new file mode 100644 index 00000000..884a7a88 --- /dev/null +++ b/test/unit/plugin/test_fabrics_collector.py @@ -0,0 +1,406 @@ +############################################################################### +# +# MIT License +# +# Copyright (c) 2025 Advanced Micro Devices, Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +# +############################################################################### + +import pytest + +from nodescraper.enums.systeminteraction import SystemInteractionLevel +from nodescraper.plugins.inband.fabrics.fabrics_collector import FabricsCollector +from nodescraper.plugins.inband.fabrics.fabricsdata import ( + FabricsDataModel, + IbdevNetdevMapping, + IbstatDevice, + IbvDeviceInfo, + MstDevice, + MstStatus, + OfedInfo, + RdmaDevice, + RdmaInfo, + RdmaLink, +) + + +@pytest.fixture +def collector(system_info, conn_mock): + return FabricsCollector( + system_info=system_info, + system_interaction_level=SystemInteractionLevel.PASSIVE, + connection=conn_mock, + ) + + +# Sample command outputs for testing (mock data) + +IBSTAT_OUTPUT = """CA 'mlx5_0' + CA type: AB5678 + Number of ports: 1 + Firmware version: 10.10.1010 + Hardware version: 0X0 + Node GUID: 0x506b4b0300abcdef + System image GUID: 0x506b4b0300abcdef +Port 1: + State: Active + Physical state: LinkUp + Rate: 200 + Base lid: 0 + LMC: 0 + SM lid: 0 + Capability mask: 0x2651e848 + Port GUID: 0x506b4b0300abcdef + Link layer: MockBand""" + +IBSTAT_EMPTY_OUTPUT = "" + +# ibv_devinfo output +IBV_DEVINFO_OUTPUT = """hca_id: mlx5_0 + transport: MockBand (0) + fw_ver: 20.32.1010 + node_guid: 1234:7891:00ab:cdef + sys_image_guid: 1234:7891:00ab:cdef + vendor_id: 0x6789 + vendor_part_id: 4123 + hw_ver: 0x0 + board_id: MT_0000000010 + phys_port_cnt: 1 + port: 1 + state: PORT_ACTIVE (4) + max_mtu: 4096 (5) + active_mtu: 4096 (5) + sm_lid: 0 + port_lid: 0 + port_lmc: 0x00 + link_layer: MockBand""" + +# ls -l /sys/class/infiniband/*/device/net output - RoCE devices +IB_DEV_NETDEVS_OUTPUT = """/sys/class/infiniband/rocep105s0/device/net: +total 0 +drwxr-xr-x 5 root root 0 Jan 8 18:01 benic5p1 + +/sys/class/infiniband/rocep121s0/device/net: +total 0 +drwxr-xr-x 5 root root 0 Jan 8 18:01 benic6p1 +""" + +IB_DEV_NETDEVS_EMPTY = "" + +# ofed_info output +OFED_INFO_OUTPUT = "OFED-internal-25.11-1.2.3:" + +# mst status -v output - new tabular format +MST_STATUS_OUTPUT = """MST modules: +------------ + MST PCI module is not loaded + MST PCI configuration module loaded + +PCI devices: +------------ +DEVICE_TYPE MST PCI RDMA NET NUMA +ConnectX7(rev:0) /dev/mst/ab1234_pciconf9 0000:ec:00.0 mlx5_4 net-mock235s0np0 1 +ConnectX7(rev:0) /dev/mst/cd5678_pciconf8 0000:d4:00.0 mlx5_6 net-mock211s0np0 1""" + +MST_STATUS_EMPTY = "" + +# rdma dev output - RoCE devices +RDMA_DEV_OUTPUT = """0: abcdef25s0: node_type ca fw 1.117.1-a-63 node_guid 1234:56ff:890f:1111 sys_image_guid 1234:56ff:890f:1111 +1: abcdef105s0: node_type ca fw 1.117.1-a-63 node_guid 2222:81ff:3333:b450 sys_image_guid 2222:81ff:3333:b450""" + +RDMA_DEV_EMPTY = "" + +# rdma link output - RoCE devices +RDMA_LINK_OUTPUT = """link rocep9s0/1 state DOWN physical_state POLLING netdev benic8p1 +link abcdef25s0/1 state DOWN physical_state POLLING netdev mock7p1 +""" + +RDMA_LINK_EMPTY = "" + + +def test_parse_ibstat_basic(collector): + """Test parsing basic ibstat output""" + devices = collector._parse_ibstat(IBSTAT_OUTPUT) + + assert len(devices) == 1 + device = devices[0] + assert device.ca_name == "mlx5_0" + assert device.ca_type == "AB5678" + assert device.number_of_ports == 1 + assert device.firmware_version == "10.10.1010" + assert device.hardware_version == "0X0" + assert device.node_guid == "0x506b4b0300abcdef" + assert device.system_image_guid == "0x506b4b0300abcdef" + + +def test_parse_ibstat_port(collector): + """Test parsing ibstat port information""" + devices = collector._parse_ibstat(IBSTAT_OUTPUT) + + assert len(devices) == 1 + device = devices[0] + assert 1 in device.ports + port_attrs = device.ports[1] + assert port_attrs["State"] == "Active" + assert port_attrs["Physical state"] == "LinkUp" + assert port_attrs["Rate"] == "200" + assert port_attrs["Link layer"] == "MockBand" + + +def test_parse_ibstat_empty(collector): + """Test parsing empty ibstat output""" + devices = collector._parse_ibstat(IBSTAT_EMPTY_OUTPUT) + assert len(devices) == 0 + + +def test_parse_ibv_devinfo_basic(collector): + """Test parsing basic ibv_devinfo output""" + devices = collector._parse_ibv_devinfo(IBV_DEVINFO_OUTPUT) + + assert len(devices) == 1 + device = devices[0] + assert device.device == "mlx5_0" + assert device.transport_type == "MockBand (0)" + assert device.fw_ver == "20.32.1010" + assert device.node_guid == "1234:7891:00ab:cdef" + assert device.sys_image_guid == "1234:7891:00ab:cdef" + assert device.vendor_id == "0x6789" + assert device.vendor_part_id == "4123" + assert device.hw_ver == "0x0" + + +def test_parse_ibv_devinfo_port(collector): + """Test parsing ibv_devinfo port information""" + devices = collector._parse_ibv_devinfo(IBV_DEVINFO_OUTPUT) + + assert len(devices) == 1 + device = devices[0] + assert 1 in device.ports + port_attrs = device.ports[1] + assert port_attrs["state"] == "PORT_ACTIVE (4)" + assert port_attrs["link_layer"] == "MockBand" + + +def test_parse_ib_dev_netdevs(collector): + """Test parsing ls -l /sys/class/infiniband/*/device/net output""" + mappings = collector._parse_ib_dev_netdevs(IB_DEV_NETDEVS_OUTPUT) + + assert len(mappings) == 2 + + # Check first mapping + mapping1 = next((m for m in mappings if m.ib_device == "rocep105s0"), None) + assert mapping1 is not None + assert mapping1.port == 1 + assert mapping1.netdev == "benic5p1" + assert mapping1.state is None + + # Check second mapping + mapping2 = next((m for m in mappings if m.ib_device == "rocep121s0"), None) + assert mapping2 is not None + assert mapping2.netdev == "benic6p1" + + +def test_parse_ib_dev_netdevs_empty(collector): + """Test parsing empty IB device netdev output""" + mappings = collector._parse_ib_dev_netdevs(IB_DEV_NETDEVS_EMPTY) + assert len(mappings) == 0 + + +def test_parse_ofed_info(collector): + """Test parsing ofed_info output""" + ofed_info = collector._parse_ofed_info(OFED_INFO_OUTPUT) + + assert ofed_info.version == "OFED-internal-25.11-1.2.3" + assert ofed_info.raw_output == OFED_INFO_OUTPUT + + +def test_parse_mst_status_tabular(collector): + """Test parsing mst status -v output in new tabular format""" + mst_status = collector._parse_mst_status(MST_STATUS_OUTPUT) + + assert mst_status.mst_started is True + assert len(mst_status.devices) == 2 + + # Check first device + device1 = next((d for d in mst_status.devices if d.device == "/dev/mst/ab1234_pciconf9"), None) + assert device1 is not None + assert device1.pci_address == "0000:ec:00.0" + assert device1.rdma_device == "mlx5_4" + assert device1.net_device == "mock235s0np0" + assert device1.attributes["numa_node"] == "1" + assert device1.attributes["device_type"] == "ConnectX7(rev:0)" + + # Check second device + device2 = next((d for d in mst_status.devices if d.device == "/dev/mst/cd5678_pciconf8"), None) + assert device2 is not None + assert device2.pci_address == "0000:d4:00.0" + assert device2.rdma_device == "mlx5_6" + assert device2.net_device == "mock211s0np0" + + +def test_parse_mst_status_empty(collector): + """Test parsing empty mst status output""" + mst_status = collector._parse_mst_status(MST_STATUS_EMPTY) + + assert mst_status.mst_started is False + assert len(mst_status.devices) == 0 + + +def test_parse_rdma_dev_roce(collector): + """Test parsing rdma dev output with RoCE devices""" + devices = collector._parse_rdma_dev(RDMA_DEV_OUTPUT) + + assert len(devices) == 2 + + # Check first device + device1 = devices[0] + assert device1.device == "abcdef25s0" + assert device1.node_type == "ca" + assert device1.attributes["fw_version"] == "1.117.1-a-63" + assert device1.node_guid == "1234:56ff:890f:1111" + assert device1.sys_image_guid == "1234:56ff:890f:1111" + + # Check second device + device2 = devices[1] + assert device2.device == "abcdef105s0" + assert device2.node_type == "ca" + assert device2.node_guid == "2222:81ff:3333:b450" + assert device2.sys_image_guid == "2222:81ff:3333:b450" + + +def test_parse_rdma_dev_empty(collector): + """Test parsing empty rdma dev output""" + devices = collector._parse_rdma_dev(RDMA_DEV_EMPTY) + assert len(devices) == 0 + + +def test_parse_rdma_link_roce(collector): + """Test parsing rdma link output with RoCE devices""" + links = collector._parse_rdma_link(RDMA_LINK_OUTPUT) + + assert len(links) == 2 + + # Check first link + link1 = next((link for link in links if link.device == "rocep9s0"), None) + assert link1 is not None + assert link1.port == 1 + assert link1.state == "DOWN" + assert link1.physical_state == "POLLING" + assert link1.netdev == "benic8p1" + + # Check second link + link2 = next((link for link in links if link.device == "abcdef25s0"), None) + assert link2 is not None + assert link2.netdev == "mock7p1" + + +def test_parse_rdma_link_empty(collector): + """Test parsing empty rdma link output""" + links = collector._parse_rdma_link(RDMA_LINK_EMPTY) + assert len(links) == 0 + + +def test_fabrics_data_model_creation(collector): + """Test creating FabricsDataModel with all components""" + ibstat_device = IbstatDevice( + ca_name="mlx5_0", + ca_type="AB5678", + number_of_ports=1, + firmware_version="10.10.1010", + node_guid="0x506b4b0300abcdef", + ports={1: {"State": "Active"}}, + raw_output=IBSTAT_OUTPUT, + ) + + ibv_device = IbvDeviceInfo( + device="mlx5_0", + node_guid="1234:7891:00ab:cdef", + fw_ver="20.32.1010", + transport_type="MockBand", + ports={1: {"state": "PORT_ACTIVE"}}, + raw_output=IBV_DEVINFO_OUTPUT, + ) + + mapping = IbdevNetdevMapping(ib_device="rocep105s0", port=1, netdev="benic5p1", state=None) + + ofed_info = OfedInfo(version="OFED-internal-25.11-1.2.3", raw_output=OFED_INFO_OUTPUT) + + mst_device = MstDevice( + device="/dev/mst/ab1234_pciconf9", + pci_address="0000:ec:00.0", + rdma_device="mlx5_4", + net_device="mock235s0np0", + attributes={"numa_node": "1", "device_type": "ConnectX7(rev:0)"}, + ) + mst_status = MstStatus(mst_started=True, devices=[mst_device], raw_output=MST_STATUS_OUTPUT) + + rdma_device = RdmaDevice( + device="abcdef25s0", + node_type="ca", + node_guid="1234:56ff:890f:1111", + attributes={"fw_version": "1.117.1-a-63"}, + ) + + rdma_link = RdmaLink( + device="abcdef25s0", + port=1, + state="DOWN", + physical_state="POLLING", + netdev="mock7p1", + ) + + rdma_info = RdmaInfo(devices=[rdma_device], links=[rdma_link], raw_output=RDMA_DEV_OUTPUT) + + data = FabricsDataModel( + ibstat_devices=[ibstat_device], + ibv_devices=[ibv_device], + ibdev_netdev_mappings=[mapping], + ofed_info=ofed_info, + mst_status=mst_status, + rdma_info=rdma_info, + ) + + assert len(data.ibstat_devices) == 1 + assert len(data.ibv_devices) == 1 + assert len(data.ibdev_netdev_mappings) == 1 + assert data.ofed_info.version == "OFED-internal-25.11-1.2.3" + assert len(data.mst_status.devices) == 1 + assert len(data.rdma_info.devices) == 1 + assert len(data.rdma_info.links) == 1 + + +def test_fabrics_data_model_empty(collector): + """Test creating empty FabricsDataModel""" + data = FabricsDataModel( + ibstat_devices=[], + ibv_devices=[], + ibdev_netdev_mappings=[], + ofed_info=None, + mst_status=None, + rdma_info=None, + ) + + assert len(data.ibstat_devices) == 0 + assert len(data.ibv_devices) == 0 + assert len(data.ibdev_netdev_mappings) == 0 + assert data.ofed_info is None + assert data.mst_status is None + assert data.rdma_info is None From 3f25417d9f8a23db1648b42d1ae1bee3187f90d1 Mon Sep 17 00:00:00 2001 From: jaspals3123 Date: Thu, 15 Jan 2026 21:50:31 +0000 Subject: [PATCH 27/29] addressed review comments --- nodescraper/plugins/inband/fabrics/fabrics_collector.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/nodescraper/plugins/inband/fabrics/fabrics_collector.py b/nodescraper/plugins/inband/fabrics/fabrics_collector.py index 731ed239..a86c753f 100644 --- a/nodescraper/plugins/inband/fabrics/fabrics_collector.py +++ b/nodescraper/plugins/inband/fabrics/fabrics_collector.py @@ -622,7 +622,7 @@ def collect_data( self._log_event( category=EventCategory.NETWORK, description="MST service command completed but status unclear", - data={"output": res_mst_start.stdout[:200]}, + data={"output": res_mst_start.stdout}, priority=EventPriority.INFO, ) else: @@ -721,7 +721,6 @@ def collect_data( self.result.status = ExecutionStatus.OK return self.result, fabrics_data else: - # No fabrics hardware detected - this is not an error for optional hardware self.result.message = "No InfiniBand/RDMA fabrics hardware detected on this system" self.result.status = ExecutionStatus.ERROR return self.result, None From 24eeeab7730bc04da1135e118e6c86b2c567b29c Mon Sep 17 00:00:00 2001 From: Alexandra Bara Date: Fri, 16 Jan 2026 10:40:15 -0600 Subject: [PATCH 28/29] clearning cache --- .github/workflows/update-plugin-docs.yml | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/.github/workflows/update-plugin-docs.yml b/.github/workflows/update-plugin-docs.yml index 2778efb1..b5479130 100644 --- a/.github/workflows/update-plugin-docs.yml +++ b/.github/workflows/update-plugin-docs.yml @@ -40,9 +40,16 @@ jobs: --package nodescraper.plugins.inband \ --output docs/PLUGIN_DOC.md + - name: Clean pre-commit cache + run: | + rm -rf /tmp/github-actions-home/.cache/pre-commit + source venv/bin/activate + pre-commit clean || true + - name: Format documentation with pre-commit run: | source venv/bin/activate + pre-commit install-hooks || true pre-commit run --files docs/PLUGIN_DOC.md || true - name: Create Pull Request From 3d24fa0d209cd06ffec956fa605c9db1e79402e6 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Fri, 16 Jan 2026 11:29:22 -0600 Subject: [PATCH 29/29] docs: Update plugin documentation [automated] --- docs/PLUGIN_DOC.md | 100 +++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 97 insertions(+), 3 deletions(-) diff --git a/docs/PLUGIN_DOC.md b/docs/PLUGIN_DOC.md index ffe6a7b8..a5de7017 100644 --- a/docs/PLUGIN_DOC.md +++ b/docs/PLUGIN_DOC.md @@ -11,11 +11,12 @@ | DimmPlugin | sh -c 'dmidecode -t 17 \| tr -s " " \| grep -v "Volatile\\|None\\|Module" \| grep Size' 2>/dev/null
dmidecode
wmic memorychip get Capacity | - | [DimmDataModel](#DimmDataModel-Model) | [DimmCollector](#Collector-Class-DimmCollector) | - | | DkmsPlugin | dkms status
dkms --version | **Analyzer Args:**
- `dkms_status`: Union[str, list]
- `dkms_version`: Union[str, list]
- `regex_match`: bool | [DkmsDataModel](#DkmsDataModel-Model) | [DkmsCollector](#Collector-Class-DkmsCollector) | [DkmsAnalyzer](#Data-Analyzer-Class-DkmsAnalyzer) | | DmesgPlugin | dmesg --time-format iso -x
ls -1 /var/log/dmesg* 2>/dev/null \| grep -E '^/var/log/dmesg(\.[0-9]+(\.gz)?)?$' \|\| true | **Built-in Regexes:**
- Out of memory error: `(?:oom_kill_process.*)\|(?:Out of memory.*)`
- I/O Page Fault: `IO_PAGE_FAULT`
- Kernel Panic: `\bkernel panic\b.*`
- SQ Interrupt: `sq_intr`
- SRAM ECC: `sram_ecc.*`
- Failed to load driver. IP hardware init error.: `\[amdgpu\]\] \*ERROR\* hw_init of IP block.*`
- Failed to load driver. IP software init error.: `\[amdgpu\]\] \*ERROR\* sw_init of IP block.*`
- Real Time throttling activated: `sched: RT throttling activated.*`
- RCU preempt detected stalls: `rcu_preempt detected stalls.*`
- RCU preempt self-detected stall: `rcu_preempt self-detected stall.*`
- QCM fence timeout: `qcm fence wait loop timeout.*`
- General protection fault: `(?:[\w-]+(?:\[[0-9.]+\])?\s+)?general protectio...`
- Segmentation fault: `(?:segfault.*in .*\[)\|(?:[Ss]egmentation [Ff]au...`
- Failed to disallow cf state: `amdgpu: Failed to disallow cf state.*`
- Failed to terminate tmr: `\*ERROR\* Failed to terminate tmr.*`
- Suspend of IP block failed: `\*ERROR\* suspend of IP block <\w+> failed.*`
- amdgpu Page Fault: `(amdgpu \w{4}:\w{2}:\w{2}\.\w:\s+amdgpu:\s+\[\S...`
- Page Fault: `page fault for address.*`
- Fatal error during GPU init: `(?:amdgpu)(.*Fatal error during GPU init)\|(Fata...`
- PCIe AER Error: `(?:pcieport )(.*AER: aer_status.*)\|(aer_status.*)`
- Failed to read journal file: `Failed to read journal file.*`
- Journal file corrupted or uncleanly shut down: `journal corrupted or uncleanly shut down.*`
- ACPI BIOS Error: `ACPI BIOS Error`
- ACPI Error: `ACPI Error`
- Filesystem corrupted!: `EXT4-fs error \(device .*\):`
- Error in buffered IO, check filesystem integrity: `(Buffer I\/O error on dev)(?:ice)? (\w+)`
- PCIe card no longer present: `pcieport (\w+:\w+:\w+\.\w+):\s+(\w+):\s+(Slot\(...`
- PCIe Link Down: `pcieport (\w+:\w+:\w+\.\w+):\s+(\w+):\s+(Slot\(...`
- Mismatched clock configuration between PCIe device and host: `pcieport (\w+:\w+:\w+\.\w+):\s+(\w+):\s+(curren...`
- RAS Correctable Error: `(?:\d{4}-\d+-\d+T\d+:\d+:\d+,\d+[+-]\d+:\d+)?(....`
- RAS Uncorrectable Error: `(?:\d{4}-\d+-\d+T\d+:\d+:\d+,\d+[+-]\d+:\d+)?(....`
- RAS Deferred Error: `(?:\d{4}-\d+-\d+T\d+:\d+:\d+,\d+[+-]\d+:\d+)?(....`
- RAS Corrected PCIe Error: `((?:\[Hardware Error\]:\s+)?event severity: cor...`
- GPU Reset: `(?:\d{4}-\d+-\d+T\d+:\d+:\d+,\d+[+-]\d+:\d+)?(....`
- GPU reset failed: `(?:\d{4}-\d+-\d+T\d+:\d+:\d+,\d+[+-]\d+:\d+)?(....`
- ACA Error: `(Accelerator Check Architecture[^\n]*)(?:\n[^\n...`
- ACA Error: `(Accelerator Check Architecture[^\n]*)(?:\n[^\n...`
- MCE Error: `\[Hardware Error\]:.+MC\d+_STATUS.*(?:\n.*){0,5}`
- Mode 2 Reset Failed: `(?:\d{4}-\d+-\d+T\d+:\d+:\d+,\d+[+-]\d+:\d+)? (...`
- RAS Corrected Error: `(?:\d{4}-\d+-\d+T\d+:\d+:\d+,\d+[+-]\d+:\d+)?(....`
- SGX Error: `x86/cpu: SGX disabled by BIOS`
- GPU Throttled: `amdgpu \w{4}:\w{2}:\w{2}.\w: amdgpu: WARN: GPU ...`
- LNet: ko2iblnd has no matching interfaces: `(?:\[[^\]]+\]\s*)?LNetError:.*ko2iblnd:\s*No ma...`
- LNet: Error starting up LNI: `(?:\[[^\]]+\]\s*)?LNetError:\s*.*Error\s*-?\d+\...`
- Lustre: network initialisation failed: `LustreError:.*ptlrpc_init_portals\(\).*network ...` | [DmesgData](#DmesgData-Model) | [DmesgCollector](#Collector-Class-DmesgCollector) | [DmesgAnalyzer](#Data-Analyzer-Class-DmesgAnalyzer) | +| FabricsPlugin | ibstat
ibv_devinfo
ls -l /sys/class/infiniband/*/device/net
mst start
mst status -v
ofed_info -s
rdma dev
rdma link | - | [FabricsDataModel](#FabricsDataModel-Model) | [FabricsCollector](#Collector-Class-FabricsCollector) | - | | JournalPlugin | journalctl --no-pager --system --output=short-iso | - | [JournalData](#JournalData-Model) | [JournalCollector](#Collector-Class-JournalCollector) | - | | KernelPlugin | sh -c 'uname -a'
wmic os get Version /Value | **Analyzer Args:**
- `exp_kernel`: Union[str, list]
- `regex_match`: bool | [KernelDataModel](#KernelDataModel-Model) | [KernelCollector](#Collector-Class-KernelCollector) | [KernelAnalyzer](#Data-Analyzer-Class-KernelAnalyzer) | | KernelModulePlugin | cat /proc/modules
modinfo amdgpu
wmic os get Version /Value | **Analyzer Args:**
- `kernel_modules`: dict[str, dict]
- `regex_filter`: list[str] | [KernelModuleDataModel](#KernelModuleDataModel-Model) | [KernelModuleCollector](#Collector-Class-KernelModuleCollector) | [KernelModuleAnalyzer](#Data-Analyzer-Class-KernelModuleAnalyzer) | | MemoryPlugin | free -b
lsmem
numactl -H
wmic OS get FreePhysicalMemory /Value; wmic ComputerSystem get TotalPhysicalMemory /Value | **Analyzer Args:**
- `ratio`: float
- `memory_threshold`: str | [MemoryDataModel](#MemoryDataModel-Model) | [MemoryCollector](#Collector-Class-MemoryCollector) | [MemoryAnalyzer](#Data-Analyzer-Class-MemoryAnalyzer) | -| NetworkPlugin | ip addr show
sudo ethtool {interface}
ip neighbor show
ip route show
ip rule show | - | [NetworkDataModel](#NetworkDataModel-Model) | [NetworkCollector](#Collector-Class-NetworkCollector) | - | +| NetworkPlugin | ip addr show
ethtool {interface}
lldpcli show neighbor
lldpctl
ip neighbor show
niccli --dev {device_num} qos --ets --show
niccli --list_devices
nicctl show card
nicctl show dcqcn
nicctl show environment
nicctl show pcie ats
nicctl show port
nicctl show qos
nicctl show rdma statistics
nicctl show version firmware
nicctl show version host-software
ip route show
ip rule show | - | [NetworkDataModel](#NetworkDataModel-Model) | [NetworkCollector](#Collector-Class-NetworkCollector) | - | | NvmePlugin | nvme smart-log {dev}
nvme error-log {dev} --log-entries=256
nvme id-ctrl {dev}
nvme id-ns {dev}{ns}
nvme fw-log {dev}
nvme self-test-log {dev}
nvme get-log {dev} --log-id=6 --log-len=512
nvme telemetry-log {dev} --output-file={dev}_{f_name} | - | [NvmeDataModel](#NvmeDataModel-Model) | [NvmeCollector](#Collector-Class-NvmeCollector) | - | | OsPlugin | sh -c '( lsb_release -ds \|\| (cat /etc/*release \| grep PRETTY_NAME) \|\| uname -om ) 2>/dev/null \| head -n1'
cat /etc/*release \| grep VERSION_ID
wmic os get Version /value
wmic os get Caption /Value | **Analyzer Args:**
- `exp_os`: Union[str, list]
- `exact_match`: bool | [OsDataModel](#OsDataModel-Model) | [OsCollector](#Collector-Class-OsCollector) | [OsAnalyzer](#Data-Analyzer-Class-OsAnalyzer) | | PackagePlugin | dnf list --installed
dpkg-query -W
pacman -Q
cat /etc/*release
wmic product get name,version | **Analyzer Args:**
- `exp_package_ver`: Dict[str, Optional[str]]
- `regex_match`: bool
- `rocm_regex`: Optional[str]
- `enable_rocm_regex`: bool | [PackageDataModel](#PackageDataModel-Model) | [PackageCollector](#Collector-Class-PackageCollector) | [PackageAnalyzer](#Data-Analyzer-Class-PackageAnalyzer) | @@ -224,6 +225,42 @@ DmesgData - dmesg --time-format iso -x - ls -1 /var/log/dmesg* 2>/dev/null | grep -E '^/var/log/dmesg(\.[0-9]+(\.gz)?)?$' || true +## Collector Class FabricsCollector + +### Description + +Collect InfiniBand/RDMA fabrics configuration details + +**Bases**: ['InBandDataCollector'] + +**Link to code**: [fabrics_collector.py](https://github.com/amd/node-scraper/blob/HEAD/nodescraper/plugins/inband/fabrics/fabrics_collector.py) + +### Class Variables + +- **CMD_IBSTAT**: `ibstat` +- **CMD_IBV_DEVINFO**: `ibv_devinfo` +- **CMD_IB_DEV_NETDEVS**: `ls -l /sys/class/infiniband/*/device/net` +- **CMD_OFED_INFO**: `ofed_info -s` +- **CMD_MST_START**: `mst start` +- **CMD_MST_STATUS**: `mst status -v` +- **CMD_RDMA_DEV**: `rdma dev` +- **CMD_RDMA_LINK**: `rdma link` + +### Provides Data + +FabricsDataModel + +### Commands + +- ibstat +- ibv_devinfo +- ls -l /sys/class/infiniband/*/device/net +- mst start +- mst status -v +- ofed_info -s +- rdma dev +- rdma link + ## Collector Class JournalCollector ### Description @@ -341,7 +378,20 @@ Collect network configuration details using ip command - **CMD_ROUTE**: `ip route show` - **CMD_RULE**: `ip rule show` - **CMD_NEIGHBOR**: `ip neighbor show` -- **CMD_ETHTOOL_TEMPLATE**: `sudo ethtool {interface}` +- **CMD_ETHTOOL_TEMPLATE**: `ethtool {interface}` +- **CMD_LLDPCLI_NEIGHBOR**: `lldpcli show neighbor` +- **CMD_LLDPCTL**: `lldpctl` +- **CMD_NICCLI_LISTDEV**: `niccli --list_devices` +- **CMD_NICCLI_GETQOS_TEMPLATE**: `niccli --dev {device_num} qos --ets --show` +- **CMD_NICCTL_CARD**: `nicctl show card` +- **CMD_NICCTL_DCQCN**: `nicctl show dcqcn` +- **CMD_NICCTL_ENVIRONMENT**: `nicctl show environment` +- **CMD_NICCTL_PCIE_ATS**: `nicctl show pcie ats` +- **CMD_NICCTL_PORT**: `nicctl show port` +- **CMD_NICCTL_QOS**: `nicctl show qos` +- **CMD_NICCTL_RDMA_STATISTICS**: `nicctl show rdma statistics` +- **CMD_NICCTL_VERSION_HOST_SOFTWARE**: `nicctl show version host-software` +- **CMD_NICCTL_VERSION_FIRMWARE**: `nicctl show version firmware` ### Provides Data @@ -350,8 +400,21 @@ NetworkDataModel ### Commands - ip addr show -- sudo ethtool {interface} +- ethtool {interface} +- lldpcli show neighbor +- lldpctl - ip neighbor show +- niccli --dev {device_num} qos --ets --show +- niccli --list_devices +- nicctl show card +- nicctl show dcqcn +- nicctl show environment +- nicctl show pcie ats +- nicctl show port +- nicctl show qos +- nicctl show rdma statistics +- nicctl show version firmware +- nicctl show version host-software - ip route show - ip rule show @@ -769,6 +832,26 @@ Data model for in band dmesg log ### Model annotations and fields - **dmesg_content**: `str` +- **skip_log_file**: `bool` + +## FabricsDataModel Model + +### Description + +Complete InfiniBand/RDMA fabrics configuration data + +**Link to code**: [fabricsdata.py](https://github.com/amd/node-scraper/blob/HEAD/nodescraper/plugins/inband/fabrics/fabricsdata.py) + +**Bases**: ['DataModel'] + +### Model annotations and fields + +- **ibstat_devices**: `List[nodescraper.plugins.inband.fabrics.fabricsdata.IbstatDevice]` +- **ibv_devices**: `List[nodescraper.plugins.inband.fabrics.fabricsdata.IbvDeviceInfo]` +- **ibdev_netdev_mappings**: `List[nodescraper.plugins.inband.fabrics.fabricsdata.IbdevNetdevMapping]` +- **ofed_info**: `Optional[nodescraper.plugins.inband.fabrics.fabricsdata.OfedInfo]` +- **mst_status**: `Optional[nodescraper.plugins.inband.fabrics.fabricsdata.MstStatus]` +- **rdma_info**: `Optional[nodescraper.plugins.inband.fabrics.fabricsdata.RdmaInfo]` ## JournalData Model @@ -840,6 +923,17 @@ Complete network configuration data - **rules**: `List[nodescraper.plugins.inband.network.networkdata.RoutingRule]` - **neighbors**: `List[nodescraper.plugins.inband.network.networkdata.Neighbor]` - **ethtool_info**: `Dict[str, nodescraper.plugins.inband.network.networkdata.EthtoolInfo]` +- **broadcom_nic_devices**: `List[nodescraper.plugins.inband.network.networkdata.BroadcomNicDevice]` +- **broadcom_nic_qos**: `Dict[int, nodescraper.plugins.inband.network.networkdata.BroadcomNicQos]` +- **pensando_nic_cards**: `List[nodescraper.plugins.inband.network.networkdata.PensandoNicCard]` +- **pensando_nic_dcqcn**: `List[nodescraper.plugins.inband.network.networkdata.PensandoNicDcqcn]` +- **pensando_nic_environment**: `List[nodescraper.plugins.inband.network.networkdata.PensandoNicEnvironment]` +- **pensando_nic_pcie_ats**: `List[nodescraper.plugins.inband.network.networkdata.PensandoNicPcieAts]` +- **pensando_nic_ports**: `List[nodescraper.plugins.inband.network.networkdata.PensandoNicPort]` +- **pensando_nic_qos**: `List[nodescraper.plugins.inband.network.networkdata.PensandoNicQos]` +- **pensando_nic_rdma_statistics**: `List[nodescraper.plugins.inband.network.networkdata.PensandoNicRdmaStatistics]` +- **pensando_nic_version_host_software**: `Optional[nodescraper.plugins.inband.network.networkdata.PensandoNicVersionHostSoftware]` +- **pensando_nic_version_firmware**: `List[nodescraper.plugins.inband.network.networkdata.PensandoNicVersionFirmware]` ## NvmeDataModel Model