Skip to content

Commit 72231fa

Browse files
Yogibaer75smeagol91
andcommitted
19552 Redfish: add new checks and bugfixes from PR #892
Add PDU status, power consumption, power redundancy, and storage battery monitoring. Enhance ethernet interfaces with link change detection, add configurable drive/volume naming schemes, and fix SSD endurance reporting. Harden special agents against missing Redfish endpoints. Closes: #892 Jira-Ref: CMK-32388 Change-Id: Id0122e82b7eeef1ddfb099584269e8f372e30624 Co-authored-by: Sebastian Groemcke <sebastian.groemcke@checkmk.com>
1 parent 05f8433 commit 72231fa

33 files changed

+1542
-114
lines changed

.werks/19552.md

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
[//]: # (werk v3)
2+
# Redfish: add PDU, power consumption, and power redundancy monitoring
3+
4+
key | value
5+
---------- | ---
6+
date | 2026-03-24T08:57:36.083866+00:00
7+
version | 2.5.0b2
8+
class | feature
9+
edition | community
10+
component | checks
11+
level | 2
12+
compatible | yes
13+
14+
The Redfish integration now supports several new monitoring capabilities and improvements.
15+
16+
## New checks
17+
18+
- _PDU %s_: Monitors Power Distribution Unit status including firmware, serial number, model, and health state.
19+
- _Power consumption %s_: Tracks average, minimum, and maximum power consumption with per-system metrics and graphs.
20+
- _Power redundancy %s_: Verifies power supply redundancy configuration and health.
21+
- _Storage controller %s battery_: Monitors Dell storage controller battery status (Dell OEM).
22+
23+
## Enhanced checks
24+
25+
- _Physical port %s_: Now detects link status and speed changes compared to what was seen at discovery. Alert severity is configurable via the new _Redfish Ethernet Interface_ ruleset (default: `CRIT` for link status change, `WARN` for speed change). A (periodic) service rediscovery resets the baseline values.
26+
- _Storage controller %s_: New _Redfish Storage Controller_ ruleset lets you choose between full detail and rollup-only health reporting.
27+
- _Drive %s_ and _Volume %s_: New discovery rulesets let you choose between classic item naming and a structured controller-ID format.
28+
- _System state %s_: Now shows serial number, SKU, and Dell OEM rollup details.
29+
30+
## Bugfixes
31+
32+
- _Drive %s_ (physical drives): SSD endurance now correctly displays "Media Life Left" instead of raw utilization percentage.
33+
- The Redfish power agent now fetches chassis sensor data (temperature, humidity) from PDUs. Previously, only outlets and mains were collected.
34+
- The Redfish special agents no longer crash when a system, chassis, or power equipment endpoint is missing or a section has no data.
35+
36+
## Inventory
37+
38+
A new Redfish drives inventory view is available under _Hardware > Storage > Redfish drives_, showing manufacturer, model, serial number, firmware version, capacity, and media type.
39+
40+
Thanks to Andreas Döhler for the contribution.

packages/cmk-plugins/BUILD

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -386,6 +386,7 @@ py_library(
386386
":lib",
387387
"//packages/cmk-plugin-apis:agent_based",
388388
"//packages/cmk-plugin-apis:graphing",
389+
"//packages/cmk-plugin-apis:inventory_ui",
389390
"//packages/cmk-plugin-apis:password_store",
390391
"//packages/cmk-plugin-apis:rulesets",
391392
"//packages/cmk-plugin-apis:server_side_calls",

packages/cmk-plugins/cmk/plugins/redfish/agent_based/inv_redfish_firmware_hpe_ilo4.py

Lines changed: 0 additions & 47 deletions
This file was deleted.
Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
#!/usr/bin/env python3
2+
# Copyright (C) 2026 Checkmk GmbH - License: GNU General Public License v2
3+
# This file is part of Checkmk (https://checkmk.com). It is subject to the terms and
4+
# conditions defined in the file COPYING, which is part of this source code package.
5+
"""Redfish storage drives inventory plugin.
6+
7+
Extracts hardware component data from Redfish drive sections to build
8+
a consolidated inventory view of physical drives.
9+
"""
10+
11+
from cmk.agent_based.v2 import (
12+
InventoryPlugin,
13+
InventoryResult,
14+
TableRow,
15+
)
16+
from cmk.plugins.redfish.lib import RedfishAPIData
17+
18+
19+
def _extract_component_name(entry: RedfishAPIData) -> str:
20+
"""Build a component name from entry data."""
21+
entry_id = str(entry.get("Id", ""))
22+
name = str(entry.get("Name", ""))
23+
if entry_id and name and entry_id != name:
24+
return f"{entry_id}-{name}"
25+
return name or entry_id or "Unknown"
26+
27+
28+
def inventorize_redfish_drives(section: RedfishAPIData) -> InventoryResult:
29+
"""Create inventory entries from Redfish drive data."""
30+
for _key, entry in section.items():
31+
if not isinstance(entry, dict):
32+
continue
33+
component_name = _extract_component_name(entry)
34+
35+
inventory_columns: dict[str, int | float | str | bool | None] = {}
36+
if manufacturer := entry.get("Manufacturer"):
37+
inventory_columns["manufacturer"] = manufacturer
38+
if model := entry.get("Model"):
39+
inventory_columns["model"] = model
40+
if serial := entry.get("SerialNumber"):
41+
inventory_columns["serial"] = serial
42+
if firmware := entry.get("FirmwareVersion"):
43+
inventory_columns["firmware_version"] = firmware
44+
if capacity := entry.get("CapacityBytes"):
45+
inventory_columns["capacity_bytes"] = capacity
46+
if media_type := entry.get("MediaType"):
47+
inventory_columns["media_type"] = media_type
48+
49+
if not inventory_columns:
50+
continue
51+
52+
yield TableRow(
53+
path=["hardware", "storage", "redfish_drives"],
54+
key_columns={"component": component_name},
55+
inventory_columns=inventory_columns,
56+
)
57+
58+
59+
inventory_plugin_redfish_storage_inventory = InventoryPlugin(
60+
name="redfish_storage_inventory",
61+
sections=["redfish_drives"],
62+
inventory_function=inventorize_redfish_drives,
63+
)

packages/cmk-plugins/cmk/plugins/redfish/agent_based/redfish_drives.py

Lines changed: 44 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,9 @@
33
# This file is part of Checkmk (https://checkmk.com). It is subject to the terms and
44
# conditions defined in the file COPYING, which is part of this source code package.
55

6+
from collections.abc import Mapping
7+
from typing import Any
8+
69
from cmk.agent_based.v2 import (
710
AgentSection,
811
CheckPlugin,
@@ -26,21 +29,53 @@
2629
)
2730

2831

29-
def discovery_redfish_drives(section: RedfishAPIData) -> DiscoveryResult:
30-
for key in section.keys():
32+
def _build_drive_item(data: RedfishAPIData) -> tuple[str, str]:
33+
"""Build classic and ctrlid item names for a drive.
34+
35+
Classic: "Id-Name" (e.g., "0-1.2TB 12G SAS HDD")
36+
Ctrlid: structured from OData path (e.g., "0:1:4" from
37+
/redfish/v1/Systems/0/Storage/1/Drives/4)
38+
"""
39+
classic = data.get("Id", "0") + "-" + data.get("Name", "")
40+
odata_id = data.get("@odata.id", "")
41+
if odata_id:
42+
parts = odata_id.strip("/").split("/")
43+
# Try to extract system:storage:drive or chassis:drive format
44+
if len(parts) >= 7 and "Drives" in parts:
45+
drive_idx = parts.index("Drives")
46+
if drive_idx >= 4:
47+
# e.g., /redfish/v1/Systems/0/Storage/1/Drives/4 -> 0:1:4
48+
ctrlid = ":".join(parts[3:drive_idx:2] + [parts[drive_idx + 1]])
49+
else:
50+
ctrlid = ":".join(parts[-2:])
51+
elif len(parts) >= 5:
52+
# e.g., /redfish/v1/Chassis/DE00B000/Drives/0 -> DE00B000:0
53+
ctrlid = ":".join(parts[-2:])
54+
else:
55+
ctrlid = classic
56+
else:
57+
ctrlid = classic
58+
return classic, ctrlid
59+
60+
61+
def discovery_redfish_drives(params: Mapping[str, Any], section: RedfishAPIData) -> DiscoveryResult:
62+
naming = params.get("item", "classic")
63+
for key in section:
3164
if section[key].get("Status", {}).get("State") == "Absent":
3265
continue
33-
if not section[key]["Name"]:
66+
if not section[key].get("Name"):
3467
continue
35-
item = section[key].get("Id", "0") + "-" + section[key]["Name"]
68+
classic, ctrlid = _build_drive_item(section[key])
69+
item = ctrlid if naming == "ctrlid" else classic
3670
yield Service(item=item)
3771

3872

3973
def check_redfish_drives(item: str, section: RedfishAPIData) -> CheckResult:
4074
data = None
41-
for key in section.keys():
42-
if item == section[key].get("Id", "0") + "-" + section[key]["Name"]:
43-
data = section.get(key, None)
75+
for key in section:
76+
classic, ctrlid = _build_drive_item(section[key])
77+
if item in (classic, ctrlid):
78+
data = section.get(key)
4479
break
4580
if data is None:
4681
return
@@ -70,5 +105,7 @@ def check_redfish_drives(item: str, section: RedfishAPIData) -> CheckResult:
70105
service_name="Drive %s",
71106
sections=["redfish_drives"],
72107
discovery_function=discovery_redfish_drives,
108+
discovery_ruleset_name="discovery_redfish_drives",
109+
discovery_default_parameters={"item": "classic"},
73110
check_function=check_redfish_drives,
74111
)

packages/cmk-plugins/cmk/plugins/redfish/agent_based/redfish_ethernetinterfaces.py

Lines changed: 55 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,6 @@
33
# This file is part of Checkmk (https://checkmk.com). It is subject to the terms and
44
# conditions defined in the file COPYING, which is part of this source code package.
55

6-
# mypy: disable-error-code="comparison-overlap"
7-
8-
# mypy: disable-error-code="unreachable"
9-
106
from collections.abc import Mapping
117
from typing import Any
128

@@ -32,12 +28,19 @@
3228
)
3329

3430

31+
def _render_speed(speed: int) -> str:
32+
"""Render link speed with appropriate unit."""
33+
if speed >= 1000:
34+
return f"{speed / 1000:g} Gbps"
35+
return f"{speed} Mbps"
36+
37+
3538
def discovery_redfish_ethernetinterfaces(
3639
params: Mapping[str, Any], section: RedfishAPIData
3740
) -> DiscoveryResult:
3841
"""Discover single interfaces"""
3942
disc_state = params.get("state")
40-
for key in section.keys():
43+
for key in section:
4144
if not section[key].get("Status"):
4245
continue
4346
if section[key].get("Status", {}).get("State") in [
@@ -52,47 +55,66 @@ def discovery_redfish_ethernetinterfaces(
5255
continue
5356
if section[key].get("LinkStatus", "NOLINK") in ["LinkUp"] and disc_state == "down":
5457
continue
55-
yield Service(item=section[key]["Id"])
5658

59+
speed = section[key].get("SpeedMbps", 0)
60+
if speed == 0:
61+
speed = section[key].get("CurrentLinkSpeedMbps", 0)
62+
63+
yield Service(
64+
item=section[key]["Id"],
65+
parameters={
66+
"discover_speed": speed if speed else 0,
67+
"discover_link_status": section[key].get("LinkStatus", "NOLINK"),
68+
},
69+
)
5770

58-
def check_redfish_ethernetinterfaces(item: str, section: RedfishAPIData) -> CheckResult:
71+
72+
def check_redfish_ethernetinterfaces(
73+
item: str, params: Mapping[str, Any], section: RedfishAPIData
74+
) -> CheckResult:
5975
"""Check single interfaces"""
60-
data = section.get(item, None)
76+
data = section.get(item)
6177
if data is None:
6278
return
6379

80+
# Link status
81+
link_state = State.OK
82+
link_summary = "Link: No info"
83+
if (link_status := data.get("LinkStatus")) is not None:
84+
link_summary = f"Link: {link_status}"
85+
discover_link = params.get("discover_link_status")
86+
if discover_link and discover_link != link_status:
87+
link_state = State(params.get("state_if_link_status_changed", 2))
88+
link_summary = f"Link: {link_status} (changed from {discover_link})"
89+
yield Result(state=link_state, summary=link_summary)
90+
91+
# Speed
92+
link_speed = data.get("CurrentLinkSpeedMbps") or data.get("SpeedMbps") or 0
93+
speed_state = State.OK
94+
speed_summary = f"Speed: {_render_speed(link_speed)}" if link_speed else "Speed: Unknown"
95+
discover_speed = params.get("discover_speed")
96+
if discover_speed and link_speed and discover_speed != link_speed:
97+
speed_state = State(params.get("state_if_link_speed_changed", 1))
98+
speed_summary = (
99+
f"Speed: {_render_speed(link_speed)} (changed from {_render_speed(discover_speed)})"
100+
)
101+
yield Result(state=speed_state, summary=speed_summary)
102+
103+
# MAC address
64104
mac_addr = ""
65105
if data.get("AssociatedNetworkAddresses"):
66106
mac_addr = ", ".join(data.get("AssociatedNetworkAddresses"))
67107
elif data.get("MACAddress"):
68108
mac_addr = data.get("MACAddress")
109+
if mac_addr:
110+
yield Result(state=State.OK, summary=f"MAC: {mac_addr}")
69111

70-
link_speed = 0
71-
if data.get("CurrentLinkSpeedMbps"):
72-
link_speed = data.get("CurrentLinkSpeedMbps")
73-
elif data.get("SpeedMbps"):
74-
link_speed = data.get("SpeedMbps")
75-
if link_speed is None:
76-
link_speed = 0
77-
78-
link_status = "Unknown"
79-
if data.get("LinkStatus"):
80-
link_status = data.get("LinkStatus")
81-
if link_status is None:
82-
link_status = "Down"
83-
84-
int_msg = f"Link: {link_status}, Speed: {link_speed:0.0f}Mbps, MAC: {mac_addr}"
85-
yield Result(state=State(0), summary=int_msg)
86-
112+
# Health state
87113
if data.get("Status"):
88114
dev_state, dev_msg = redfish_health_state(data.get("Status", {}))
89-
status = dev_state
90-
message = dev_msg
115+
yield Result(state=State(dev_state), notice=dev_msg)
91116
else:
92-
status = 0
93-
message = "No known status value found"
94-
95-
yield Result(state=State(status), notice=message)
117+
yield Result(state=State.OK, notice="No known status value found")
96118

97119

98120
check_plugin_redfish_ethernetinterfaces = CheckPlugin(
@@ -103,4 +125,6 @@ def check_redfish_ethernetinterfaces(item: str, section: RedfishAPIData) -> Chec
103125
discovery_ruleset_name="discovery_redfish_ethernetinterfaces",
104126
discovery_default_parameters={"state": "updown"},
105127
check_function=check_redfish_ethernetinterfaces,
128+
check_ruleset_name="check_redfish_ethernetinterfaces",
129+
check_default_parameters={},
106130
)

0 commit comments

Comments
 (0)