diff --git a/api/v1alpha1/bmc_types.go b/api/v1alpha1/bmc_types.go
index 5c2d04d60..d6b7d5a5c 100644
--- a/api/v1alpha1/bmc_types.go
+++ b/api/v1alpha1/bmc_types.go
@@ -206,6 +206,10 @@ type BMCStatus struct {
// +optional
LastResetTime *metav1.Time `json:"lastResetTime,omitempty"`
+ // Tasks tracks ongoing and recent BMC operations.
+ // +optional
+ Tasks []BMCTask `json:"tasks,omitempty"`
+
// Conditions represents the latest available observations of the BMC's current state.
// +patchStrategy=merge
// +patchMergeKey=type
@@ -227,6 +231,67 @@ const (
BMCStatePending BMCState = "Pending"
)
+// BMCTask represents a single BMC operation task.
+type BMCTask struct {
+ // TaskURI is the URI to monitor the task on the BMC.
+ // +required
+ TaskURI string `json:"taskURI"`
+
+ // TaskType indicates the type of operation.
+ // +required
+ // +kubebuilder:validation:Enum=DiskErase;BIOSReset;BMCReset;NetworkClear;FirmwareUpdate;ConfigurationChange;AccountManagement;Other
+ TaskType BMCTaskType `json:"taskType"`
+
+ // TargetID identifies what the task is operating on (e.g., "BIOS", "BMC", "Drive-1").
+ // +optional
+ TargetID string `json:"targetID,omitempty"`
+
+ // State is the current state of the task.
+ // +optional
+ State string `json:"state,omitempty"`
+
+ // PercentComplete indicates completion percentage (0-100).
+ // +optional
+ PercentComplete int32 `json:"percentComplete,omitempty"`
+
+ // Message provides additional information about the task.
+ // +optional
+ Message string `json:"message,omitempty"`
+
+ // LastUpdateTime is when this task status was last updated.
+ // +optional
+ LastUpdateTime metav1.Time `json:"lastUpdateTime,omitempty"`
+}
+
+// BMCTaskType defines the type of BMC task.
+type BMCTaskType string
+
+const (
+ // BMCTaskTypeDiskErase indicates a disk erasing task.
+ BMCTaskTypeDiskErase BMCTaskType = "DiskErase"
+
+ // BMCTaskTypeBIOSReset indicates a BIOS reset task.
+ BMCTaskTypeBIOSReset BMCTaskType = "BIOSReset"
+
+ // BMCTaskTypeBMCReset indicates a BMC reset task.
+ BMCTaskTypeBMCReset BMCTaskType = "BMCReset"
+
+ // BMCTaskTypeNetworkClear indicates a network configuration clear task.
+ BMCTaskTypeNetworkClear BMCTaskType = "NetworkClear"
+
+ // BMCTaskTypeFirmwareUpdate indicates a firmware update task (BIOS or BMC).
+ BMCTaskTypeFirmwareUpdate BMCTaskType = "FirmwareUpdate"
+
+ // BMCTaskTypeConfigurationChange indicates a configuration change task.
+ BMCTaskTypeConfigurationChange BMCTaskType = "ConfigurationChange"
+
+ // BMCTaskTypeAccountManagement indicates an account management task.
+ BMCTaskTypeAccountManagement BMCTaskType = "AccountManagement"
+
+ // BMCTaskTypeOther indicates a task type not covered by the specific types.
+ BMCTaskTypeOther BMCTaskType = "Other"
+)
+
// +kubebuilder:object:root=true
// +kubebuilder:subresource:status
// +kubebuilder:resource:scope=Cluster
diff --git a/api/v1alpha1/server_types.go b/api/v1alpha1/server_types.go
index e280f9570..0cc0204a3 100644
--- a/api/v1alpha1/server_types.go
+++ b/api/v1alpha1/server_types.go
@@ -128,6 +128,10 @@ type ServerSpec struct {
// the BIOS configuration for this server.
// +optional
BIOSSettingsRef *v1.LocalObjectReference `json:"biosSettingsRef,omitempty"`
+
+ // Taints is a list of taints that affect this server.
+ // +optional
+ Taints []v1.Taint `json:"taints,omitempty"`
}
// ServerState defines the possible states of a server.
@@ -146,6 +150,10 @@ const (
// ServerStateReserved indicates that the server is reserved for a specific use or user.
ServerStateReserved ServerState = "Reserved"
+ // ServerStateTainted indicates that the server is tainted and requires cleaning
+ // before transitioning back to Available.
+ ServerStateTainted ServerState = "Tainted"
+
// ServerStateError indicates that there is an error with the server.
ServerStateError ServerState = "Error"
diff --git a/api/v1alpha1/servercleaning_types.go b/api/v1alpha1/servercleaning_types.go
new file mode 100644
index 000000000..f4fd147e5
--- /dev/null
+++ b/api/v1alpha1/servercleaning_types.go
@@ -0,0 +1,175 @@
+// SPDX-FileCopyrightText: 2025 SAP SE or an SAP affiliate company and IronCore contributors
+// SPDX-License-Identifier: Apache-2.0
+
+package v1alpha1
+
+import (
+ corev1 "k8s.io/api/core/v1"
+ metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+)
+
+// ServerCleaningSpec defines the desired cleaning operations
+// +kubebuilder:validation:XValidation:rule="has(self.serverRef) || has(self.serverSelector)", message="either serverRef or serverSelector must be specified"
+type ServerCleaningSpec struct {
+ // ServerRef references a specific Server to be cleaned.
+ // Mutually exclusive with ServerSelector.
+ // +optional
+ ServerRef *corev1.LocalObjectReference `json:"serverRef,omitempty"`
+
+ // ServerSelector specifies a label selector to identify servers to be cleaned.
+ // Mutually exclusive with ServerRef.
+ // +optional
+ ServerSelector *metav1.LabelSelector `json:"serverSelector,omitempty"`
+
+ // DiskWipe specifies disk erasing configuration
+ // +optional
+ DiskWipe *DiskWipeConfig `json:"diskWipe,omitempty"`
+
+ // BMCReset specifies if BMC should be reset to defaults
+ // +optional
+ BMCReset bool `json:"bmcReset,omitempty"`
+
+ // BIOSReset specifies if BIOS should be reset to defaults
+ // +optional
+ BIOSReset bool `json:"biosReset,omitempty"`
+
+ // NetworkCleanup specifies if network configurations should be cleared
+ // +optional
+ NetworkCleanup bool `json:"networkCleanup,omitempty"`
+
+ // ServerBootConfigurationTemplate defines the boot configuration for cleaning agent
+ // If not specified, cleaning operations are performed via BMC APIs
+ // +optional
+ ServerBootConfigurationTemplate *ServerBootConfigurationTemplate `json:"serverBootConfigurationTemplate,omitempty"`
+}
+
+// DiskWipeConfig defines disk erasing behavior
+type DiskWipeConfig struct {
+ // Method specifies the disk erasing method
+ // +kubebuilder:validation:Enum=quick;secure;dod
+ // +kubebuilder:default=quick
+ Method DiskWipeMethod `json:"method"`
+
+ // IncludeBootDrives specifies whether to erase boot drives
+ // +optional
+ IncludeBootDrives bool `json:"includeBootDrives,omitempty"`
+}
+
+// DiskWipeMethod defines the available disk erasing methods
+type DiskWipeMethod string
+
+const (
+ // DiskWipeMethodQuick performs a quick erase (single pass)
+ DiskWipeMethodQuick DiskWipeMethod = "quick"
+
+ // DiskWipeMethodSecure performs a secure erase (3 passes)
+ DiskWipeMethodSecure DiskWipeMethod = "secure"
+
+ // DiskWipeMethodDoD performs DoD 5220.22-M standard erase (7 passes)
+ DiskWipeMethodDoD DiskWipeMethod = "dod"
+)
+
+// ServerCleaningState defines the state of the cleaning process
+type ServerCleaningState string
+
+const (
+ // ServerCleaningStatePending indicates cleaning is waiting to start
+ ServerCleaningStatePending ServerCleaningState = "Pending"
+
+ // ServerCleaningStateInProgress indicates cleaning is in progress
+ ServerCleaningStateInProgress ServerCleaningState = "InProgress"
+
+ // ServerCleaningStateCompleted indicates cleaning completed successfully
+ ServerCleaningStateCompleted ServerCleaningState = "Completed"
+
+ // ServerCleaningStateFailed indicates cleaning failed
+ ServerCleaningStateFailed ServerCleaningState = "Failed"
+)
+
+// ServerCleaningStatus defines the observed state of ServerCleaning
+type ServerCleaningStatus struct {
+ // State represents the current state of the cleaning process
+ // +optional
+ State ServerCleaningState `json:"state,omitempty"`
+
+ // SelectedServers is the total number of servers selected for cleaning
+ // +optional
+ SelectedServers int32 `json:"selectedServers,omitempty"`
+
+ // PendingCleanings is the number of servers with pending cleaning
+ // +optional
+ PendingCleanings int32 `json:"pendingCleanings,omitempty"`
+
+ // InProgressCleanings is the number of servers currently being cleaned
+ // +optional
+ InProgressCleanings int32 `json:"inProgressCleanings,omitempty"`
+
+ // CompletedCleanings is the number of servers successfully cleaned
+ // +optional
+ CompletedCleanings int32 `json:"completedCleanings,omitempty"`
+
+ // FailedCleanings is the number of servers where cleaning failed
+ // +optional
+ FailedCleanings int32 `json:"failedCleanings,omitempty"`
+
+ // ServerCleaningStatuses contains per-server cleaning status
+ // +optional
+ ServerCleaningStatuses []ServerCleaningStatusEntry `json:"serverCleaningStatuses,omitempty"`
+
+ // Conditions represents the latest available observations
+ // +patchStrategy=merge
+ // +patchMergeKey=type
+ // +optional
+ Conditions []metav1.Condition `json:"conditions,omitempty" patchStrategy:"merge" patchMergeKey:"type"`
+}
+
+// ServerCleaningStatusEntry represents the cleaning status for a single server
+type ServerCleaningStatusEntry struct {
+ // ServerName is the name of the server
+ // +required
+ ServerName string `json:"serverName"`
+
+ // State is the cleaning state for this server
+ // +required
+ State ServerCleaningState `json:"state"`
+
+ // Message provides additional information about the cleaning state
+ // +optional
+ Message string `json:"message,omitempty"`
+
+ // LastUpdateTime is the last time this status was updated
+ // +optional
+ LastUpdateTime metav1.Time `json:"lastUpdateTime,omitempty"`
+}
+
+// +kubebuilder:object:root=true
+// +kubebuilder:subresource:status
+// +kubebuilder:resource:scope=Namespaced,shortName=scl
+// +kubebuilder:printcolumn:name="Selected",type=integer,JSONPath=`.status.selectedServers`
+// +kubebuilder:printcolumn:name="Completed",type=integer,JSONPath=`.status.completedCleanings`
+// +kubebuilder:printcolumn:name="InProgress",type=integer,JSONPath=`.status.inProgressCleanings`
+// +kubebuilder:printcolumn:name="Failed",type=integer,JSONPath=`.status.failedCleanings`
+// +kubebuilder:printcolumn:name="State",type=string,JSONPath=`.status.state`
+// +kubebuilder:printcolumn:name="Age",type=date,JSONPath=`.metadata.creationTimestamp`
+
+// ServerCleaning is the Schema for the servercleaning API
+type ServerCleaning struct {
+ metav1.TypeMeta `json:",inline"`
+ metav1.ObjectMeta `json:"metadata,omitempty"`
+
+ Spec ServerCleaningSpec `json:"spec,omitempty"`
+ Status ServerCleaningStatus `json:"status,omitempty"`
+}
+
+// +kubebuilder:object:root=true
+
+// ServerCleaningList contains a list of ServerCleaning
+type ServerCleaningList struct {
+ metav1.TypeMeta `json:",inline"`
+ metav1.ListMeta `json:"metadata,omitempty"`
+ Items []ServerCleaning `json:"items"`
+}
+
+func init() {
+ SchemeBuilder.Register(&ServerCleaning{}, &ServerCleaningList{})
+}
diff --git a/api/v1alpha1/zz_generated.deepcopy.go b/api/v1alpha1/zz_generated.deepcopy.go
index 8b96f5e64..2ade1a876 100644
--- a/api/v1alpha1/zz_generated.deepcopy.go
+++ b/api/v1alpha1/zz_generated.deepcopy.go
@@ -927,6 +927,13 @@ func (in *BMCStatus) DeepCopyInto(out *BMCStatus) {
in, out := &in.LastResetTime, &out.LastResetTime
*out = (*in).DeepCopy()
}
+ if in.Tasks != nil {
+ in, out := &in.Tasks, &out.Tasks
+ *out = make([]BMCTask, len(*in))
+ for i := range *in {
+ (*in)[i].DeepCopyInto(&(*out)[i])
+ }
+ }
if in.Conditions != nil {
in, out := &in.Conditions, &out.Conditions
*out = make([]metav1.Condition, len(*in))
@@ -946,6 +953,22 @@ func (in *BMCStatus) DeepCopy() *BMCStatus {
return out
}
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *BMCTask) DeepCopyInto(out *BMCTask) {
+ *out = *in
+ in.LastUpdateTime.DeepCopyInto(&out.LastUpdateTime)
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new BMCTask.
+func (in *BMCTask) DeepCopy() *BMCTask {
+ if in == nil {
+ return nil
+ }
+ out := new(BMCTask)
+ in.DeepCopyInto(out)
+ return out
+}
+
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *BMCUser) DeepCopyInto(out *BMCUser) {
*out = *in
@@ -1317,6 +1340,21 @@ func (in *ConsoleProtocol) DeepCopy() *ConsoleProtocol {
return out
}
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *DiskWipeConfig) DeepCopyInto(out *DiskWipeConfig) {
+ *out = *in
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new DiskWipeConfig.
+func (in *DiskWipeConfig) DeepCopy() *DiskWipeConfig {
+ if in == nil {
+ return nil
+ }
+ out := new(DiskWipeConfig)
+ in.DeepCopyInto(out)
+ return out
+}
+
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *Endpoint) DeepCopyInto(out *Endpoint) {
*out = *in
@@ -1783,6 +1821,145 @@ func (in *ServerClaimStatus) DeepCopy() *ServerClaimStatus {
return out
}
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *ServerCleaning) DeepCopyInto(out *ServerCleaning) {
+ *out = *in
+ out.TypeMeta = in.TypeMeta
+ in.ObjectMeta.DeepCopyInto(&out.ObjectMeta)
+ in.Spec.DeepCopyInto(&out.Spec)
+ in.Status.DeepCopyInto(&out.Status)
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ServerCleaning.
+func (in *ServerCleaning) DeepCopy() *ServerCleaning {
+ if in == nil {
+ return nil
+ }
+ out := new(ServerCleaning)
+ in.DeepCopyInto(out)
+ return out
+}
+
+// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
+func (in *ServerCleaning) DeepCopyObject() runtime.Object {
+ if c := in.DeepCopy(); c != nil {
+ return c
+ }
+ return nil
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *ServerCleaningList) DeepCopyInto(out *ServerCleaningList) {
+ *out = *in
+ out.TypeMeta = in.TypeMeta
+ in.ListMeta.DeepCopyInto(&out.ListMeta)
+ if in.Items != nil {
+ in, out := &in.Items, &out.Items
+ *out = make([]ServerCleaning, len(*in))
+ for i := range *in {
+ (*in)[i].DeepCopyInto(&(*out)[i])
+ }
+ }
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ServerCleaningList.
+func (in *ServerCleaningList) DeepCopy() *ServerCleaningList {
+ if in == nil {
+ return nil
+ }
+ out := new(ServerCleaningList)
+ in.DeepCopyInto(out)
+ return out
+}
+
+// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
+func (in *ServerCleaningList) DeepCopyObject() runtime.Object {
+ if c := in.DeepCopy(); c != nil {
+ return c
+ }
+ return nil
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *ServerCleaningSpec) DeepCopyInto(out *ServerCleaningSpec) {
+ *out = *in
+ if in.ServerRef != nil {
+ in, out := &in.ServerRef, &out.ServerRef
+ *out = new(v1.LocalObjectReference)
+ **out = **in
+ }
+ if in.ServerSelector != nil {
+ in, out := &in.ServerSelector, &out.ServerSelector
+ *out = new(metav1.LabelSelector)
+ (*in).DeepCopyInto(*out)
+ }
+ if in.DiskWipe != nil {
+ in, out := &in.DiskWipe, &out.DiskWipe
+ *out = new(DiskWipeConfig)
+ **out = **in
+ }
+ if in.ServerBootConfigurationTemplate != nil {
+ in, out := &in.ServerBootConfigurationTemplate, &out.ServerBootConfigurationTemplate
+ *out = new(ServerBootConfigurationTemplate)
+ (*in).DeepCopyInto(*out)
+ }
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ServerCleaningSpec.
+func (in *ServerCleaningSpec) DeepCopy() *ServerCleaningSpec {
+ if in == nil {
+ return nil
+ }
+ out := new(ServerCleaningSpec)
+ in.DeepCopyInto(out)
+ return out
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *ServerCleaningStatus) DeepCopyInto(out *ServerCleaningStatus) {
+ *out = *in
+ if in.ServerCleaningStatuses != nil {
+ in, out := &in.ServerCleaningStatuses, &out.ServerCleaningStatuses
+ *out = make([]ServerCleaningStatusEntry, len(*in))
+ for i := range *in {
+ (*in)[i].DeepCopyInto(&(*out)[i])
+ }
+ }
+ if in.Conditions != nil {
+ in, out := &in.Conditions, &out.Conditions
+ *out = make([]metav1.Condition, len(*in))
+ for i := range *in {
+ (*in)[i].DeepCopyInto(&(*out)[i])
+ }
+ }
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ServerCleaningStatus.
+func (in *ServerCleaningStatus) DeepCopy() *ServerCleaningStatus {
+ if in == nil {
+ return nil
+ }
+ out := new(ServerCleaningStatus)
+ in.DeepCopyInto(out)
+ return out
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *ServerCleaningStatusEntry) DeepCopyInto(out *ServerCleaningStatusEntry) {
+ *out = *in
+ in.LastUpdateTime.DeepCopyInto(&out.LastUpdateTime)
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ServerCleaningStatusEntry.
+func (in *ServerCleaningStatusEntry) DeepCopy() *ServerCleaningStatusEntry {
+ if in == nil {
+ return nil
+ }
+ out := new(ServerCleaningStatusEntry)
+ in.DeepCopyInto(out)
+ return out
+}
+
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *ServerList) DeepCopyInto(out *ServerList) {
*out = *in
@@ -1977,6 +2154,13 @@ func (in *ServerSpec) DeepCopyInto(out *ServerSpec) {
*out = new(v1.LocalObjectReference)
**out = **in
}
+ if in.Taints != nil {
+ in, out := &in.Taints, &out.Taints
+ *out = make([]v1.Taint, len(*in))
+ for i := range *in {
+ (*in)[i].DeepCopyInto(&(*out)[i])
+ }
+ }
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ServerSpec.
diff --git a/bmc/bmc.go b/bmc/bmc.go
index d9f6634b7..ff90de3b3 100644
--- a/bmc/bmc.go
+++ b/bmc/bmc.go
@@ -20,6 +20,60 @@ const (
ManufacturerSupermicro Manufacturer = "Supermicro"
)
+// DiskWipeMethod defines the disk wiping method
+type DiskWipeMethod string
+
+const (
+ // DiskWipeMethodQuick performs a quick wipe (single pass with zeros)
+ DiskWipeMethodQuick DiskWipeMethod = "quick"
+
+ // DiskWipeMethodSecure performs a secure wipe (3 passes)
+ DiskWipeMethodSecure DiskWipeMethod = "secure"
+
+ // DiskWipeMethodDoD performs DoD 5220.22-M standard wipe (7 passes)
+ DiskWipeMethodDoD DiskWipeMethod = "dod"
+)
+
+// CleaningTaskInfo contains information about a cleaning task
+type CleaningTaskInfo struct {
+ // TaskURI is the URI to monitor the task
+ TaskURI string
+ // TaskType indicates what type of cleaning task this is
+ TaskType CleaningTaskType
+ // TargetID identifies the target resource (e.g., drive ID for disk wipe)
+ TargetID string
+}
+
+// CleaningTaskType defines the type of cleaning task
+type CleaningTaskType string
+
+const (
+ // CleaningTaskTypeDiskErase indicates a disk erasing task
+ CleaningTaskTypeDiskErase CleaningTaskType = "DiskErase"
+ // CleaningTaskTypeBIOSReset indicates a BIOS reset task
+ CleaningTaskTypeBIOSReset CleaningTaskType = "BIOSReset"
+ // CleaningTaskTypeBMCReset indicates a BMC reset task
+ CleaningTaskTypeBMCReset CleaningTaskType = "BMCReset"
+ // CleaningTaskTypeNetworkClear indicates a network config clear task
+ CleaningTaskTypeNetworkClear CleaningTaskType = "NetworkClear"
+)
+
+// CleaningTaskStatus represents the status of a cleaning task
+type CleaningTaskStatus struct {
+ // TaskURI is the URI to monitor the task
+ TaskURI string
+ // State is the current state of the task
+ State string
+ // PercentComplete indicates the completion percentage (0-100)
+ PercentComplete int
+ // Message provides additional information about the task
+ Message string
+ // TaskType indicates what type of cleaning task this is
+ TaskType CleaningTaskType
+ // TargetID identifies the target resource
+ TargetID string
+}
+
// BMC defines an interface for interacting with a Baseboard Management Controller.
type BMC interface {
// PowerOn powers on the system.
@@ -109,6 +163,9 @@ type BMC interface {
// GetBMCUpgradeTask retrieves the task for the BMC upgrade.
GetBMCUpgradeTask(ctx context.Context, manufacturer string, taskURI string) (*schemas.Task, error)
+ // GetTaskStatus retrieves the status of a task by its URI.
+ GetTaskStatus(ctx context.Context, taskURI string) (*schemas.Task, error)
+
// CreateOrUpdateAccount creates or updates a BMC user account.
CreateOrUpdateAccount(ctx context.Context, userName, role, password string, enabled bool) error
@@ -120,6 +177,18 @@ type BMC interface {
// GetAccountService retrieves the account service.
GetAccountService() (*schemas.AccountService, error)
+
+ // EraseDisk initiates disk erasing operation via Redfish. Returns task URIs for long-running operations.
+ EraseDisk(ctx context.Context, systemURI string, method DiskWipeMethod) ([]CleaningTaskInfo, error)
+
+ // ResetBIOSToDefaults resets BIOS configuration to factory defaults. Returns task URI if operation is async.
+ ResetBIOSToDefaults(ctx context.Context, systemURI string) (*CleaningTaskInfo, error)
+
+ // ResetBMCToDefaults resets BMC configuration to factory defaults. Returns task URI if operation is async.
+ ResetBMCToDefaults(ctx context.Context, managerUUID string) (*CleaningTaskInfo, error)
+
+ // ClearNetworkConfiguration clears network configuration settings. Returns task URI if operation is async.
+ ClearNetworkConfiguration(ctx context.Context, systemURI string) (*CleaningTaskInfo, error)
}
type Entity struct {
diff --git a/bmc/cleaning.go b/bmc/cleaning.go
new file mode 100644
index 000000000..b807815d1
--- /dev/null
+++ b/bmc/cleaning.go
@@ -0,0 +1,31 @@
+// SPDX-FileCopyrightText: 2025 SAP SE or an SAP affiliate company and IronCore contributors
+// SPDX-License-Identifier: Apache-2.0
+
+package bmc
+
+import (
+ "context"
+ "net/http"
+
+ "github.com/stmcginnis/gofish/schemas"
+)
+
+// CleaningInterface defines methods for OEM-specific cleaning operations
+type CleaningInterface interface {
+ // EraseDisk erases disks using vendor-specific methods
+ EraseDisk(ctx context.Context, storages []*schemas.Storage, method DiskWipeMethod) error
+
+ // ResetBIOS resets BIOS to factory defaults
+ ResetBIOS(ctx context.Context, biosURI string) error
+
+ // ResetBMC resets BMC to factory defaults
+ ResetBMC(ctx context.Context, manager *schemas.Manager) error
+
+ // ClearNetworkConfig clears network configuration
+ ClearNetworkConfig(ctx context.Context, systemURI string) error
+}
+
+// HTTPClient interface for making HTTP requests
+type HTTPClient interface {
+ Post(uri string, payload any) (*http.Response, error)
+}
diff --git a/bmc/cleaning_test.go b/bmc/cleaning_test.go
new file mode 100644
index 000000000..41d68bf65
--- /dev/null
+++ b/bmc/cleaning_test.go
@@ -0,0 +1,96 @@
+// SPDX-FileCopyrightText: 2025 SAP SE or an SAP affiliate company and IronCore contributors
+// SPDX-License-Identifier: Apache-2.0
+
+package bmc
+
+import (
+ . "github.com/onsi/ginkgo/v2"
+ . "github.com/onsi/gomega"
+)
+
+var _ = Describe("Server Cleaning Operations", func() {
+ Describe("Vendor-Specific Disk Wipe Configuration", func() {
+ Describe("Dell Disk Wipe Passes", func() {
+ It("should return correct pass count for quick wipe", func() {
+ passes := getDellWipePasses(DiskWipeMethodQuick)
+ Expect(passes).To(Equal(1))
+ })
+
+ It("should return correct pass count for secure wipe", func() {
+ passes := getDellWipePasses(DiskWipeMethodSecure)
+ Expect(passes).To(Equal(3))
+ })
+
+ It("should return correct pass count for DoD wipe", func() {
+ passes := getDellWipePasses(DiskWipeMethodDoD)
+ Expect(passes).To(Equal(7))
+ })
+
+ It("should default to 1 pass for unknown method", func() {
+ passes := getDellWipePasses("unknown")
+ Expect(passes).To(Equal(1))
+ })
+ })
+
+ Describe("HPE Wipe Type", func() {
+ It("should return correct type for quick wipe", func() {
+ wipeType := getHPEWipeType(DiskWipeMethodQuick)
+ Expect(wipeType).To(Equal("BlockErase"))
+ })
+
+ It("should return correct type for secure wipe", func() {
+ wipeType := getHPEWipeType(DiskWipeMethodSecure)
+ Expect(wipeType).To(Equal("Overwrite"))
+ })
+
+ It("should return correct type for DoD wipe", func() {
+ wipeType := getHPEWipeType(DiskWipeMethodDoD)
+ Expect(wipeType).To(Equal("CryptographicErase"))
+ })
+
+ It("should default to BlockErase for unknown method", func() {
+ wipeType := getHPEWipeType("unknown")
+ Expect(wipeType).To(Equal("BlockErase"))
+ })
+ })
+
+ Describe("Lenovo Wipe Method", func() {
+ It("should return correct method for quick wipe", func() {
+ method := getLenovoWipeMethod(DiskWipeMethodQuick)
+ Expect(method).To(Equal("Simple"))
+ })
+
+ It("should return correct method for secure wipe", func() {
+ method := getLenovoWipeMethod(DiskWipeMethodSecure)
+ Expect(method).To(Equal("Cryptographic"))
+ })
+
+ It("should return correct method for DoD wipe", func() {
+ method := getLenovoWipeMethod(DiskWipeMethodDoD)
+ Expect(method).To(Equal("Sanitize"))
+ })
+
+ It("should default to Simple for unknown method", func() {
+ method := getLenovoWipeMethod("unknown")
+ Expect(method).To(Equal("Simple"))
+ })
+ })
+ })
+
+ Describe("DiskWipeMethod Constants", func() {
+ It("should have expected constant values", func() {
+ Expect(DiskWipeMethodQuick).To(Equal(DiskWipeMethod("quick")))
+ Expect(DiskWipeMethodSecure).To(Equal(DiskWipeMethod("secure")))
+ Expect(DiskWipeMethodDoD).To(Equal(DiskWipeMethod("dod")))
+ })
+ })
+
+ Describe("Manufacturer Constants", func() {
+ It("should have expected manufacturer values", func() {
+ Expect(ManufacturerDell).To(Equal(Manufacturer("Dell Inc.")))
+ Expect(ManufacturerHPE).To(Equal(Manufacturer("HPE")))
+ Expect(ManufacturerLenovo).To(Equal(Manufacturer("Lenovo")))
+ Expect(ManufacturerSupermicro).To(Equal(Manufacturer("Supermicro")))
+ })
+ })
+})
diff --git a/bmc/dell_cleaning.go b/bmc/dell_cleaning.go
new file mode 100644
index 000000000..b0ba72197
--- /dev/null
+++ b/bmc/dell_cleaning.go
@@ -0,0 +1,143 @@
+// SPDX-FileCopyrightText: 2025 SAP SE or an SAP affiliate company and IronCore contributors
+// SPDX-License-Identifier: Apache-2.0
+
+package bmc
+
+import (
+ "context"
+ "fmt"
+ "io"
+
+ "github.com/stmcginnis/gofish/schemas"
+ ctrl "sigs.k8s.io/controller-runtime"
+)
+
+// DellCleaning implements cleaning operations for Dell servers
+type DellCleaning struct {
+ client HTTPClient
+}
+
+// NewDellCleaning creates a new DellCleaning instance
+func NewDellCleaning(client HTTPClient) *DellCleaning {
+ return &DellCleaning{client: client}
+}
+
+// EraseDisk performs disk erasing for Dell servers using iDRAC OEM extensions
+func (d *DellCleaning) EraseDisk(ctx context.Context, storages []*schemas.Storage, method DiskWipeMethod) error {
+ log := ctrl.LoggerFrom(ctx)
+
+ // Dell iDRAC supports secure erase via Storage Controller actions
+ for _, storage := range storages {
+ drives, err := storage.Drives()
+ if err != nil {
+ log.Error(err, "Failed to get drives for storage", "storage", storage.Name)
+ continue
+ }
+
+ for _, drive := range drives {
+ // Construct OEM action URI for Dell
+ // Dell uses: /redfish/v1/Systems/{id}/Storage/{storageId}/Drives/{driveId}/Actions/Drive.SecureErase
+ actionURI := fmt.Sprintf("%s/Actions/Drive.SecureErase", drive.ODataID)
+
+ payload := map[string]any{
+ "OverwritePasses": getDellWipePasses(method),
+ }
+
+ log.V(1).Info("Initiating Dell drive wipe", "drive", drive.Name, "uri", actionURI)
+
+ resp, err := d.client.Post(actionURI, payload)
+ if err != nil {
+ log.Error(err, "Failed to initiate disk wipe for drive", "drive", drive.Name)
+ continue
+ }
+ _ = resp.Body.Close()
+
+ if resp.StatusCode >= 300 {
+ body, _ := io.ReadAll(resp.Body)
+ log.Error(fmt.Errorf("wipe request failed"), "Failed to wipe drive",
+ "drive", drive.Name, "status", resp.StatusCode, "body", string(body))
+ continue
+ }
+ }
+ }
+
+ return nil
+}
+
+// ResetBIOS resets BIOS configuration to factory defaults for Dell servers
+func (d *DellCleaning) ResetBIOS(ctx context.Context, biosURI string) error {
+ log := ctrl.LoggerFrom(ctx)
+
+ // Dell iDRAC: POST to /redfish/v1/Systems/{id}/Bios/Actions/Bios.ResetBios
+ actionURI := fmt.Sprintf("%s/Actions/Bios.ResetBios", biosURI)
+
+ log.V(1).Info("Resetting Dell BIOS to defaults", "uri", actionURI)
+
+ resp, err := d.client.Post(actionURI, map[string]any{})
+ if err != nil {
+ return fmt.Errorf("failed to reset BIOS: %w", err)
+ }
+ defer func() { _ = resp.Body.Close() }()
+
+ if resp.StatusCode >= 300 {
+ body, _ := io.ReadAll(resp.Body)
+ return fmt.Errorf("BIOS reset failed with status %d: %s", resp.StatusCode, string(body))
+ }
+
+ return nil
+}
+
+// ResetBMC resets BMC configuration to factory defaults for Dell servers
+func (d *DellCleaning) ResetBMC(ctx context.Context, manager *schemas.Manager) error {
+ log := ctrl.LoggerFrom(ctx)
+
+ // Dell iDRAC: Use OEM action to reset to defaults
+ // /redfish/v1/Managers/{id}/Actions/Oem/DellManager.ResetToDefaults
+ actionURI := fmt.Sprintf("%s/Actions/Oem/DellManager.ResetToDefaults", manager.ODataID)
+
+ payload := map[string]any{
+ "ResetType": "ResetAllWithRootDefaults",
+ }
+
+ log.V(1).Info("Resetting Dell iDRAC to defaults", "uri", actionURI)
+
+ resp, err := d.client.Post(actionURI, payload)
+ if err != nil {
+ return fmt.Errorf("failed to reset BMC: %w", err)
+ }
+ defer func() { _ = resp.Body.Close() }()
+
+ if resp.StatusCode >= 300 {
+ body, _ := io.ReadAll(resp.Body)
+ return fmt.Errorf("BMC reset failed with status %d: %s", resp.StatusCode, string(body))
+ }
+
+ return nil
+}
+
+// ClearNetworkConfig clears network configuration for Dell servers
+func (d *DellCleaning) ClearNetworkConfig(ctx context.Context, systemURI string) error {
+ log := ctrl.LoggerFrom(ctx)
+
+ // Dell: Clear network adapters configuration via OEM extensions
+ // This typically involves resetting NIC settings to defaults
+ actionURI := fmt.Sprintf("%s/NetworkAdapters/Actions/Oem/DellNetworkAdapter.ClearConfiguration", systemURI)
+
+ log.V(1).Info("Clearing Dell network configuration", "uri", actionURI)
+
+ resp, err := d.client.Post(actionURI, map[string]any{})
+ if err != nil {
+ // Network config clear might not be critical, log and continue
+ log.Error(err, "Failed to clear network configuration (non-critical)")
+ return nil
+ }
+ defer func() { _ = resp.Body.Close() }()
+
+ if resp.StatusCode >= 300 {
+ body, _ := io.ReadAll(resp.Body)
+ log.Error(fmt.Errorf("network config clear failed"), "Failed with status",
+ "status", resp.StatusCode, "body", string(body))
+ }
+
+ return nil
+}
diff --git a/bmc/hpe_cleaning.go b/bmc/hpe_cleaning.go
new file mode 100644
index 000000000..9b9545026
--- /dev/null
+++ b/bmc/hpe_cleaning.go
@@ -0,0 +1,141 @@
+// SPDX-FileCopyrightText: 2025 SAP SE or an SAP affiliate company and IronCore contributors
+// SPDX-License-Identifier: Apache-2.0
+
+package bmc
+
+import (
+ "context"
+ "fmt"
+ "io"
+
+ "github.com/stmcginnis/gofish/schemas"
+ ctrl "sigs.k8s.io/controller-runtime"
+)
+
+// HPECleaning implements cleaning operations for HPE servers
+type HPECleaning struct {
+ client HTTPClient
+}
+
+// NewHPECleaning creates a new HPECleaning instance
+func NewHPECleaning(client HTTPClient) *HPECleaning {
+ return &HPECleaning{client: client}
+}
+
+// EraseDisk performs disk erasing for HPE servers using iLO OEM extensions
+func (h *HPECleaning) EraseDisk(ctx context.Context, storages []*schemas.Storage, method DiskWipeMethod) error {
+ log := ctrl.LoggerFrom(ctx)
+
+ // HPE iLO supports sanitize operations via OEM extensions
+ for _, storage := range storages {
+ drives, err := storage.Drives()
+ if err != nil {
+ log.Error(err, "Failed to get drives for storage", "storage", storage.Name)
+ continue
+ }
+
+ for _, drive := range drives {
+ // HPE OEM action: /redfish/v1/Systems/{id}/Storage/{storageId}/Drives/{driveId}/Actions/Oem/Hpe/HpeDrive.SecureErase
+ actionURI := fmt.Sprintf("%s/Actions/Oem/Hpe/HpeDrive.SecureErase", drive.ODataID)
+
+ payload := map[string]any{
+ "SanitizeType": getHPEWipeType(method),
+ }
+
+ log.V(1).Info("Initiating HPE drive wipe", "drive", drive.Name, "uri", actionURI)
+
+ resp, err := h.client.Post(actionURI, payload)
+ if err != nil {
+ log.Error(err, "Failed to initiate disk wipe for drive", "drive", drive.Name)
+ continue
+ }
+ _ = resp.Body.Close()
+
+ if resp.StatusCode >= 300 {
+ body, _ := io.ReadAll(resp.Body)
+ log.Error(fmt.Errorf("wipe request failed"), "Failed to wipe drive",
+ "drive", drive.Name, "status", resp.StatusCode, "body", string(body))
+ continue
+ }
+ }
+ }
+
+ return nil
+}
+
+// ResetBIOS resets BIOS configuration to factory defaults for HPE servers
+func (h *HPECleaning) ResetBIOS(ctx context.Context, biosURI string) error {
+ log := ctrl.LoggerFrom(ctx)
+
+ // HPE iLO: Use ResetBios action
+ // /redfish/v1/Systems/{id}/Bios/Actions/Bios.ResetBios
+ actionURI := fmt.Sprintf("%s/Actions/Bios.ResetBios", biosURI)
+
+ log.V(1).Info("Resetting HPE BIOS to defaults", "uri", actionURI)
+
+ resp, err := h.client.Post(actionURI, map[string]any{})
+ if err != nil {
+ return fmt.Errorf("failed to reset BIOS: %w", err)
+ }
+ defer func() { _ = resp.Body.Close() }()
+
+ if resp.StatusCode >= 300 {
+ body, _ := io.ReadAll(resp.Body)
+ return fmt.Errorf("BIOS reset failed with status %d: %s", resp.StatusCode, string(body))
+ }
+
+ return nil
+}
+
+// ResetBMC resets BMC configuration to factory defaults for HPE servers
+func (h *HPECleaning) ResetBMC(ctx context.Context, manager *schemas.Manager) error {
+ log := ctrl.LoggerFrom(ctx)
+
+ // HPE iLO: Use OEM action to reset to factory defaults
+ // /redfish/v1/Managers/{id}/Actions/Oem/Hpe/HpiLO.ResetToFactoryDefaults
+ actionURI := fmt.Sprintf("%s/Actions/Oem/Hpe/HpiLO.ResetToFactoryDefaults", manager.ODataID)
+
+ payload := map[string]any{
+ "ResetType": "Default",
+ }
+
+ log.V(1).Info("Resetting HPE iLO to defaults", "uri", actionURI)
+
+ resp, err := h.client.Post(actionURI, payload)
+ if err != nil {
+ return fmt.Errorf("failed to reset BMC: %w", err)
+ }
+ defer func() { _ = resp.Body.Close() }()
+
+ if resp.StatusCode >= 300 {
+ body, _ := io.ReadAll(resp.Body)
+ return fmt.Errorf("BMC reset failed with status %d: %s", resp.StatusCode, string(body))
+ }
+
+ return nil
+}
+
+// ClearNetworkConfig clears network configuration for HPE servers
+func (h *HPECleaning) ClearNetworkConfig(ctx context.Context, systemURI string) error {
+ log := ctrl.LoggerFrom(ctx)
+
+ // HPE: Clear network adapters configuration
+ actionURI := fmt.Sprintf("%s/NetworkAdapters/Actions/Oem/Hpe/HpeNetworkAdapter.ClearConfiguration", systemURI)
+
+ log.V(1).Info("Clearing HPE network configuration", "uri", actionURI)
+
+ resp, err := h.client.Post(actionURI, map[string]any{})
+ if err != nil {
+ log.Error(err, "Failed to clear network configuration (non-critical)")
+ return nil
+ }
+ defer func() { _ = resp.Body.Close() }()
+
+ if resp.StatusCode >= 300 {
+ body, _ := io.ReadAll(resp.Body)
+ log.Error(fmt.Errorf("network config clear failed"), "Failed with status",
+ "status", resp.StatusCode, "body", string(body))
+ }
+
+ return nil
+}
diff --git a/bmc/lenovo_cleaning.go b/bmc/lenovo_cleaning.go
new file mode 100644
index 000000000..be3bcc959
--- /dev/null
+++ b/bmc/lenovo_cleaning.go
@@ -0,0 +1,140 @@
+// SPDX-FileCopyrightText: 2025 SAP SE or an SAP affiliate company and IronCore contributors
+// SPDX-License-Identifier: Apache-2.0
+
+package bmc
+
+import (
+ "context"
+ "fmt"
+ "io"
+
+ "github.com/stmcginnis/gofish/schemas"
+ ctrl "sigs.k8s.io/controller-runtime"
+)
+
+// LenovoCleaning implements cleaning operations for Lenovo servers
+type LenovoCleaning struct {
+ client HTTPClient
+}
+
+// NewLenovoCleaning creates a new LenovoCleaning instance
+func NewLenovoCleaning(client HTTPClient) *LenovoCleaning {
+ return &LenovoCleaning{client: client}
+}
+
+// EraseDisk performs disk erasing for Lenovo servers using XClarity OEM extensions
+func (l *LenovoCleaning) EraseDisk(ctx context.Context, storages []*schemas.Storage, method DiskWipeMethod) error {
+ log := ctrl.LoggerFrom(ctx)
+
+ // Lenovo XClarity supports secure erase via OEM extensions
+ for _, storage := range storages {
+ drives, err := storage.Drives()
+ if err != nil {
+ log.Error(err, "Failed to get drives for storage", "storage", storage.Name)
+ continue
+ }
+
+ for _, drive := range drives {
+ // Lenovo OEM action path
+ actionURI := fmt.Sprintf("%s/Actions/Drive.SecureErase", drive.ODataID)
+
+ payload := map[string]any{
+ "EraseMethod": getLenovoWipeMethod(method),
+ }
+
+ log.V(1).Info("Initiating Lenovo drive wipe", "drive", drive.Name, "uri", actionURI)
+
+ resp, err := l.client.Post(actionURI, payload)
+ if err != nil {
+ log.Error(err, "Failed to initiate disk wipe for drive", "drive", drive.Name)
+ continue
+ }
+ _ = resp.Body.Close()
+
+ if resp.StatusCode >= 300 {
+ body, _ := io.ReadAll(resp.Body)
+ log.Error(fmt.Errorf("wipe request failed"), "Failed to wipe drive",
+ "drive", drive.Name, "status", resp.StatusCode, "body", string(body))
+ continue
+ }
+ }
+ }
+
+ return nil
+}
+
+// ResetBIOS resets BIOS configuration to factory defaults for Lenovo servers
+func (l *LenovoCleaning) ResetBIOS(ctx context.Context, biosURI string) error {
+ log := ctrl.LoggerFrom(ctx)
+
+ // Lenovo XClarity: POST to reset action
+ actionURI := fmt.Sprintf("%s/Actions/Bios.ResetBios", biosURI)
+
+ log.V(1).Info("Resetting Lenovo BIOS to defaults", "uri", actionURI)
+
+ resp, err := l.client.Post(actionURI, map[string]any{})
+ if err != nil {
+ return fmt.Errorf("failed to reset BIOS: %w", err)
+ }
+ defer func() { _ = resp.Body.Close() }()
+
+ if resp.StatusCode >= 300 {
+ body, _ := io.ReadAll(resp.Body)
+ return fmt.Errorf("BIOS reset failed with status %d: %s", resp.StatusCode, string(body))
+ }
+
+ return nil
+}
+
+// ResetBMC resets BMC configuration to factory defaults for Lenovo servers
+func (l *LenovoCleaning) ResetBMC(ctx context.Context, manager *schemas.Manager) error {
+ log := ctrl.LoggerFrom(ctx)
+
+ // Lenovo XClarity: Use OEM action to reset to factory defaults
+ // /redfish/v1/Managers/{id}/Actions/Manager.ResetToDefaults
+ actionURI := fmt.Sprintf("%s/Actions/Manager.ResetToDefaults", manager.ODataID)
+
+ payload := map[string]any{
+ "ResetToDefaultsType": "ResetAll",
+ }
+
+ log.V(1).Info("Resetting Lenovo XCC to defaults", "uri", actionURI)
+
+ resp, err := l.client.Post(actionURI, payload)
+ if err != nil {
+ return fmt.Errorf("failed to reset BMC: %w", err)
+ }
+ defer func() { _ = resp.Body.Close() }()
+
+ if resp.StatusCode >= 300 {
+ body, _ := io.ReadAll(resp.Body)
+ return fmt.Errorf("BMC reset failed with status %d: %s", resp.StatusCode, string(body))
+ }
+
+ return nil
+}
+
+// ClearNetworkConfig clears network configuration for Lenovo servers
+func (l *LenovoCleaning) ClearNetworkConfig(ctx context.Context, systemURI string) error {
+ log := ctrl.LoggerFrom(ctx)
+
+ // Lenovo: Clear network adapters configuration
+ actionURI := fmt.Sprintf("%s/NetworkAdapters/Actions/NetworkAdapter.ClearConfiguration", systemURI)
+
+ log.V(1).Info("Clearing Lenovo network configuration", "uri", actionURI)
+
+ resp, err := l.client.Post(actionURI, map[string]any{})
+ if err != nil {
+ log.Error(err, "Failed to clear network configuration (non-critical)")
+ return nil
+ }
+ defer func() { _ = resp.Body.Close() }()
+
+ if resp.StatusCode >= 300 {
+ body, _ := io.ReadAll(resp.Body)
+ log.Error(fmt.Errorf("network config clear failed"), "Failed with status",
+ "status", resp.StatusCode, "body", string(body))
+ }
+
+ return nil
+}
diff --git a/bmc/redfish.go b/bmc/redfish.go
index 08d3d195e..c754a1bd8 100644
--- a/bmc/redfish.go
+++ b/bmc/redfish.go
@@ -12,6 +12,7 @@ import (
"io"
"maps"
"math/big"
+ "net/http"
"slices"
"strings"
"time"
@@ -873,6 +874,33 @@ func (r *RedfishBaseBMC) GetBMCUpgradeTask(_ context.Context, _ string, _ string
return nil, fmt.Errorf("firmware upgrade task not supported for manufacturer %q", r.manufacturer)
}
+// GetTaskStatus retrieves the status of a task by its URI.
+func (r *RedfishBaseBMC) GetTaskStatus(ctx context.Context, taskURI string) (*schemas.Task, error) {
+ log := ctrl.LoggerFrom(ctx)
+ client := r.client.GetService().GetClient()
+
+ resp, err := client.Get(taskURI)
+ if err != nil {
+ return nil, fmt.Errorf("failed to get task status: %w", err)
+ }
+ defer func() {
+ if closeErr := resp.Body.Close(); closeErr != nil {
+ log.Error(closeErr, "Failed to close response body")
+ }
+ }()
+
+ if resp.StatusCode != http.StatusOK {
+ return nil, fmt.Errorf("unexpected status code %d when getting task status", resp.StatusCode)
+ }
+
+ var task schemas.Task
+ if err := json.NewDecoder(resp.Body).Decode(&task); err != nil {
+ return nil, fmt.Errorf("failed to decode task response: %w", err)
+ }
+
+ return &task, nil
+}
+
const (
charLower = "abcdefghijklmnopqrstuvwxyz"
charUpper = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
@@ -975,3 +1003,832 @@ func shuffleRunes(a []rune) error {
}
return nil
}
+
+// extractTaskURIFromResponse extracts the task URI from HTTP response headers or body
+func (r *RedfishBaseBMC) extractTaskURIFromResponse(resp *http.Response) string {
+ // Check Location header (standard Redfish async response)
+ if location := resp.Header.Get("Location"); location != "" {
+ return location
+ }
+
+ // Check for task monitor in response body
+ if resp.Body != nil {
+ body, err := io.ReadAll(resp.Body)
+ if err == nil {
+ var taskResponse struct {
+ TaskMonitor string `json:"@odata.id"`
+ }
+ if err := json.Unmarshal(body, &taskResponse); err == nil && taskResponse.TaskMonitor != "" {
+ return taskResponse.TaskMonitor
+ }
+ }
+ }
+
+ return ""
+}
+
+// EraseDisk initiates disk erasing operation via Redfish.
+// This implementation uses vendor-specific OEM extensions when available.
+func (r *RedfishBaseBMC) EraseDisk(ctx context.Context, systemURI string, method DiskWipeMethod) ([]CleaningTaskInfo, error) {
+ log := ctrl.LoggerFrom(ctx)
+ log.V(1).Info("Erasing disks", "systemURI", systemURI, "method", method)
+
+ system, err := r.getSystemFromUri(ctx, systemURI)
+ if err != nil {
+ return nil, fmt.Errorf("failed to get computer system: %w", err)
+ }
+
+ manufacturer := system.Manufacturer
+ log.V(1).Info("Detected manufacturer", "manufacturer", manufacturer)
+
+ // Get system storage
+ systemStorage, err := system.Storage()
+ if err != nil {
+ return nil, fmt.Errorf("failed to get storage: %w", err)
+ }
+
+ if len(systemStorage) == 0 {
+ log.V(1).Info("No storage devices found")
+ return nil, nil
+ }
+
+ // Use OEM-specific wipe if available
+ switch Manufacturer(manufacturer) {
+ case ManufacturerDell:
+ return r.wipeDiskDell(ctx, systemStorage, method)
+ case ManufacturerHPE:
+ return r.wipeDiskHPE(ctx, systemStorage, method)
+ case ManufacturerLenovo:
+ return r.wipeDiskLenovo(ctx, systemStorage, method)
+ default:
+ // Generic Redfish SecureErase
+ return r.wipeDiskGeneric(ctx, systemStorage, method)
+ }
+}
+
+// wipeDiskDell performs disk wiping for Dell servers using iDRAC OEM extensions
+func (r *RedfishBaseBMC) wipeDiskDell(ctx context.Context, storages []*schemas.Storage, method DiskWipeMethod) ([]CleaningTaskInfo, error) {
+ log := ctrl.LoggerFrom(ctx)
+ var tasks []CleaningTaskInfo
+
+ // Dell iDRAC supports secure erase via Storage Controller actions
+ for _, storage := range storages {
+ drives, err := storage.Drives()
+ if err != nil {
+ log.Error(err, "Failed to get drives for storage", "storage", storage.Name)
+ continue
+ }
+
+ for _, drive := range drives {
+ // Construct OEM action URI for Dell
+ // Dell uses: /redfish/v1/Systems/{id}/Storage/{storageId}/Drives/{driveId}/Actions/Drive.SecureErase
+ actionURI := fmt.Sprintf("%s/Actions/Drive.SecureErase", drive.ODataID)
+
+ payload := map[string]any{
+ "OverwritePasses": getDellWipePasses(method),
+ }
+
+ log.V(1).Info("Initiating Dell drive wipe", "drive", drive.Name, "uri", actionURI)
+
+ resp, err := r.client.Post(actionURI, payload)
+ if err != nil {
+ log.Error(err, "Failed to initiate disk wipe for drive", "drive", drive.Name)
+ continue
+ }
+
+ if resp.StatusCode >= 300 {
+ body, _ := io.ReadAll(resp.Body)
+ _ = resp.Body.Close()
+ log.Error(fmt.Errorf("wipe request failed"), "Failed to wipe drive",
+ "drive", drive.Name, "status", resp.StatusCode, "body", string(body))
+ continue
+ }
+
+ // Extract task URI from response
+ taskURI := r.extractTaskURIFromResponse(resp)
+ _ = resp.Body.Close()
+
+ if taskURI != "" {
+ tasks = append(tasks, CleaningTaskInfo{
+ TaskURI: taskURI,
+ TaskType: CleaningTaskTypeDiskErase,
+ TargetID: drive.ID,
+ })
+ log.V(1).Info("Dell disk wipe task created", "drive", drive.Name, "taskURI", taskURI)
+ } else {
+ log.V(1).Info("Dell disk wipe completed synchronously", "drive", drive.Name)
+ }
+ }
+ }
+
+ return tasks, nil
+}
+
+func getDellWipePasses(method DiskWipeMethod) int {
+ switch method {
+ case DiskWipeMethodQuick:
+ return 1
+ case DiskWipeMethodSecure:
+ return 3
+ case DiskWipeMethodDoD:
+ return 7
+ default:
+ return 1
+ }
+}
+
+// wipeDiskHPE performs disk wiping for HPE servers using iLO OEM extensions
+func (r *RedfishBaseBMC) wipeDiskHPE(ctx context.Context, storages []*schemas.Storage, method DiskWipeMethod) ([]CleaningTaskInfo, error) {
+ log := ctrl.LoggerFrom(ctx)
+ var tasks []CleaningTaskInfo
+
+ // HPE iLO supports sanitize operations via OEM extensions
+ for _, storage := range storages {
+ drives, err := storage.Drives()
+ if err != nil {
+ log.Error(err, "Failed to get drives for storage", "storage", storage.Name)
+ continue
+ }
+
+ for _, drive := range drives {
+ // HPE OEM action: /redfish/v1/Systems/{id}/Storage/{storageId}/Drives/{driveId}/Actions/Oem/Hpe/HpeDrive.SecureErase
+ actionURI := fmt.Sprintf("%s/Actions/Oem/Hpe/HpeDrive.SecureErase", drive.ODataID)
+
+ payload := map[string]any{
+ "SanitizeType": getHPEWipeType(method),
+ }
+
+ log.V(1).Info("Initiating HPE drive wipe", "drive", drive.Name, "uri", actionURI)
+
+ resp, err := r.client.Post(actionURI, payload)
+ if err != nil {
+ log.Error(err, "Failed to initiate disk wipe for drive", "drive", drive.Name)
+ continue
+ }
+
+ if resp.StatusCode >= 300 {
+ body, _ := io.ReadAll(resp.Body)
+ _ = resp.Body.Close()
+ log.Error(fmt.Errorf("wipe request failed"), "Failed to wipe drive",
+ "drive", drive.Name, "status", resp.StatusCode, "body", string(body))
+ continue
+ }
+
+ // Extract task URI from response
+ taskURI := r.extractTaskURIFromResponse(resp)
+ _ = resp.Body.Close()
+
+ if taskURI != "" {
+ tasks = append(tasks, CleaningTaskInfo{
+ TaskURI: taskURI,
+ TaskType: CleaningTaskTypeDiskErase,
+ TargetID: drive.ID,
+ })
+ log.V(1).Info("HPE disk wipe task created", "drive", drive.Name, "taskURI", taskURI)
+ } else {
+ log.V(1).Info("HPE disk wipe completed synchronously", "drive", drive.Name)
+ }
+ }
+ }
+
+ return tasks, nil
+}
+
+func getHPEWipeType(method DiskWipeMethod) string {
+ switch method {
+ case DiskWipeMethodQuick:
+ return "BlockErase"
+ case DiskWipeMethodSecure:
+ return "Overwrite"
+ case DiskWipeMethodDoD:
+ return "CryptographicErase"
+ default:
+ return "BlockErase"
+ }
+}
+
+// wipeDiskLenovo performs disk wiping for Lenovo servers using XClarity OEM extensions
+func (r *RedfishBaseBMC) wipeDiskLenovo(ctx context.Context, storages []*schemas.Storage, method DiskWipeMethod) ([]CleaningTaskInfo, error) {
+ log := ctrl.LoggerFrom(ctx)
+ var tasks []CleaningTaskInfo
+
+ // Lenovo XClarity supports secure erase via OEM extensions
+ for _, storage := range storages {
+ drives, err := storage.Drives()
+ if err != nil {
+ log.Error(err, "Failed to get drives for storage", "storage", storage.Name)
+ continue
+ }
+
+ for _, drive := range drives {
+ // Lenovo OEM action path
+ actionURI := fmt.Sprintf("%s/Actions/Drive.SecureErase", drive.ODataID)
+
+ payload := map[string]any{
+ "EraseMethod": getLenovoWipeMethod(method),
+ }
+
+ log.V(1).Info("Initiating Lenovo drive wipe", "drive", drive.Name, "uri", actionURI)
+
+ resp, err := r.client.Post(actionURI, payload)
+ if err != nil {
+ log.Error(err, "Failed to initiate disk wipe for drive", "drive", drive.Name)
+ continue
+ }
+
+ if resp.StatusCode >= 300 {
+ body, _ := io.ReadAll(resp.Body)
+ _ = resp.Body.Close()
+ log.Error(fmt.Errorf("wipe request failed"), "Failed to wipe drive",
+ "drive", drive.Name, "status", resp.StatusCode, "body", string(body))
+ continue
+ }
+
+ // Extract task URI from response
+ taskURI := r.extractTaskURIFromResponse(resp)
+ _ = resp.Body.Close()
+
+ if taskURI != "" {
+ tasks = append(tasks, CleaningTaskInfo{
+ TaskURI: taskURI,
+ TaskType: CleaningTaskTypeDiskErase,
+ TargetID: drive.ID,
+ })
+ log.V(1).Info("Lenovo disk wipe task created", "drive", drive.Name, "taskURI", taskURI)
+ } else {
+ log.V(1).Info("Lenovo disk wipe completed synchronously", "drive", drive.Name)
+ }
+ }
+ }
+
+ return tasks, nil
+}
+
+func getLenovoWipeMethod(method DiskWipeMethod) string {
+ switch method {
+ case DiskWipeMethodQuick:
+ return "Simple"
+ case DiskWipeMethodSecure:
+ return "Cryptographic"
+ case DiskWipeMethodDoD:
+ return "Sanitize"
+ default:
+ return "Simple"
+ }
+}
+
+// wipeDiskGeneric performs generic Redfish disk wiping for unsupported vendors
+func (r *RedfishBaseBMC) wipeDiskGeneric(ctx context.Context, storages []*schemas.Storage, _ DiskWipeMethod) ([]CleaningTaskInfo, error) {
+ log := ctrl.LoggerFrom(ctx)
+ log.V(1).Info("Using generic Redfish disk wipe")
+ var tasks []CleaningTaskInfo
+
+ // Standard Redfish SecureErase action
+ for _, storage := range storages {
+ drives, err := storage.Drives()
+ if err != nil {
+ log.Error(err, "Failed to get drives for storage", "storage", storage.Name)
+ continue
+ }
+
+ for _, drive := range drives {
+ actionURI := fmt.Sprintf("%s/Actions/Drive.SecureErase", drive.ODataID)
+
+ payload := map[string]any{}
+
+ log.V(1).Info("Initiating generic drive wipe", "drive", drive.Name, "uri", actionURI)
+
+ resp, err := r.client.Post(actionURI, payload)
+ if err != nil {
+ log.Error(err, "Failed to initiate disk wipe for drive", "drive", drive.Name)
+ continue
+ }
+
+ if resp.StatusCode >= 300 {
+ body, _ := io.ReadAll(resp.Body)
+ _ = resp.Body.Close()
+ log.Error(fmt.Errorf("wipe request failed"), "Failed to wipe drive",
+ "drive", drive.Name, "status", resp.StatusCode, "body", string(body))
+ continue
+ }
+
+ // Extract task URI from response
+ taskURI := r.extractTaskURIFromResponse(resp)
+ _ = resp.Body.Close()
+
+ if taskURI != "" {
+ tasks = append(tasks, CleaningTaskInfo{
+ TaskURI: taskURI,
+ TaskType: CleaningTaskTypeDiskErase,
+ TargetID: drive.ID,
+ })
+ log.V(1).Info("Generic disk wipe task created", "drive", drive.Name, "taskURI", taskURI)
+ } else {
+ log.V(1).Info("Generic disk wipe completed synchronously", "drive", drive.Name)
+ }
+ }
+ }
+
+ return tasks, nil
+}
+
+// ResetBIOSToDefaults resets BIOS configuration to factory defaults
+func (r *RedfishBaseBMC) ResetBIOSToDefaults(ctx context.Context, systemURI string) (*CleaningTaskInfo, error) {
+ log := ctrl.LoggerFrom(ctx)
+ log.V(1).Info("Resetting BIOS to defaults", "systemURI", systemURI)
+
+ system, err := r.getSystemFromUri(ctx, systemURI)
+ if err != nil {
+ return nil, fmt.Errorf("failed to get computer system: %w", err)
+ }
+
+ manufacturer := system.Manufacturer
+ log.V(1).Info("Detected manufacturer", "manufacturer", manufacturer)
+
+ // Get BIOS
+ bios, err := system.Bios()
+ if err != nil {
+ return nil, fmt.Errorf("failed to get BIOS for system %s: %w", systemURI, err)
+ }
+
+ biosURI := bios.ODataID
+ if biosURI == "" {
+ return nil, fmt.Errorf("BIOS URI not found for system %s", systemURI)
+ }
+
+ // Use vendor-specific reset methods
+ switch Manufacturer(manufacturer) {
+ case ManufacturerDell:
+ return r.resetBIOSDell(ctx, biosURI)
+ case ManufacturerHPE:
+ return r.resetBIOSHPE(ctx, biosURI)
+ case ManufacturerLenovo:
+ return r.resetBIOSLenovo(ctx, biosURI)
+ default:
+ return r.resetBIOSGeneric(ctx, biosURI)
+ }
+}
+
+func (r *RedfishBaseBMC) resetBIOSDell(ctx context.Context, biosURI string) (*CleaningTaskInfo, error) {
+ log := ctrl.LoggerFrom(ctx)
+
+ // Dell iDRAC: POST to /redfish/v1/Systems/{id}/Bios/Actions/Bios.ResetBios
+ actionURI := fmt.Sprintf("%s/Actions/Bios.ResetBios", biosURI)
+
+ log.V(1).Info("Resetting Dell BIOS to defaults", "uri", actionURI)
+
+ resp, err := r.client.Post(actionURI, map[string]any{})
+ if err != nil {
+ return nil, fmt.Errorf("failed to reset BIOS: %w", err)
+ }
+ defer func() { _ = resp.Body.Close() }()
+
+ if resp.StatusCode >= 300 {
+ body, _ := io.ReadAll(resp.Body)
+ return nil, fmt.Errorf("BIOS reset failed with status %d: %s", resp.StatusCode, string(body))
+ }
+
+ // Extract task URI from response
+ taskURI := r.extractTaskURIFromResponse(resp)
+ if taskURI != "" {
+ log.V(1).Info("Dell BIOS reset task created", "taskURI", taskURI)
+ return &CleaningTaskInfo{
+ TaskURI: taskURI,
+ TaskType: CleaningTaskTypeBIOSReset,
+ TargetID: biosURI,
+ }, nil
+ }
+
+ log.V(1).Info("Dell BIOS reset completed synchronously")
+ return nil, nil
+}
+
+func (r *RedfishBaseBMC) resetBIOSHPE(ctx context.Context, biosURI string) (*CleaningTaskInfo, error) {
+ log := ctrl.LoggerFrom(ctx)
+
+ // HPE iLO: Use ChangePassword action with default parameters
+ // /redfish/v1/Systems/{id}/Bios/Actions/Bios.ResetBios
+ actionURI := fmt.Sprintf("%s/Actions/Bios.ResetBios", biosURI)
+
+ log.V(1).Info("Resetting HPE BIOS to defaults", "uri", actionURI)
+
+ resp, err := r.client.Post(actionURI, map[string]any{})
+ if err != nil {
+ return nil, fmt.Errorf("failed to reset BIOS: %w", err)
+ }
+ defer func() { _ = resp.Body.Close() }()
+
+ if resp.StatusCode >= 300 {
+ body, _ := io.ReadAll(resp.Body)
+ return nil, fmt.Errorf("BIOS reset failed with status %d: %s", resp.StatusCode, string(body))
+ }
+
+ // Extract task URI from response
+ taskURI := r.extractTaskURIFromResponse(resp)
+ if taskURI != "" {
+ log.V(1).Info("HPE BIOS reset task created", "taskURI", taskURI)
+ return &CleaningTaskInfo{
+ TaskURI: taskURI,
+ TaskType: CleaningTaskTypeBIOSReset,
+ TargetID: biosURI,
+ }, nil
+ }
+
+ log.V(1).Info("HPE BIOS reset completed synchronously")
+ return nil, nil
+}
+
+func (r *RedfishBaseBMC) resetBIOSLenovo(ctx context.Context, biosURI string) (*CleaningTaskInfo, error) {
+ log := ctrl.LoggerFrom(ctx)
+
+ // Lenovo XClarity: POST to reset action
+ actionURI := fmt.Sprintf("%s/Actions/Bios.ResetBios", biosURI)
+
+ log.V(1).Info("Resetting Lenovo BIOS to defaults", "uri", actionURI)
+
+ resp, err := r.client.Post(actionURI, map[string]any{})
+ if err != nil {
+ return nil, fmt.Errorf("failed to reset BIOS: %w", err)
+ }
+ defer func() { _ = resp.Body.Close() }()
+
+ if resp.StatusCode >= 300 {
+ body, _ := io.ReadAll(resp.Body)
+ return nil, fmt.Errorf("BIOS reset failed with status %d: %s", resp.StatusCode, string(body))
+ }
+
+ // Extract task URI from response
+ taskURI := r.extractTaskURIFromResponse(resp)
+ if taskURI != "" {
+ log.V(1).Info("Lenovo BIOS reset task created", "taskURI", taskURI)
+ return &CleaningTaskInfo{
+ TaskURI: taskURI,
+ TaskType: CleaningTaskTypeBIOSReset,
+ TargetID: biosURI,
+ }, nil
+ }
+
+ log.V(1).Info("Lenovo BIOS reset completed synchronously")
+ return nil, nil
+}
+
+func (r *RedfishBaseBMC) resetBIOSGeneric(ctx context.Context, biosURI string) (*CleaningTaskInfo, error) {
+ log := ctrl.LoggerFrom(ctx)
+
+ // Generic Redfish: Try standard ResetBios action
+ actionURI := fmt.Sprintf("%s/Actions/Bios.ResetBios", biosURI)
+
+ log.V(1).Info("Resetting BIOS to defaults (generic)", "uri", actionURI)
+
+ resp, err := r.client.Post(actionURI, map[string]any{})
+ if err != nil {
+ return nil, fmt.Errorf("failed to reset BIOS: %w", err)
+ }
+ defer func() { _ = resp.Body.Close() }()
+
+ if resp.StatusCode >= 300 {
+ body, _ := io.ReadAll(resp.Body)
+ return nil, fmt.Errorf("BIOS reset failed with status %d: %s", resp.StatusCode, string(body))
+ }
+
+ // Extract task URI from response
+ taskURI := r.extractTaskURIFromResponse(resp)
+ if taskURI != "" {
+ log.V(1).Info("Generic BIOS reset task created", "taskURI", taskURI)
+ return &CleaningTaskInfo{
+ TaskURI: taskURI,
+ TaskType: CleaningTaskTypeBIOSReset,
+ TargetID: biosURI,
+ }, nil
+ }
+
+ log.V(1).Info("Generic BIOS reset completed synchronously")
+ return nil, nil
+}
+
+// ResetBMCToDefaults resets BMC configuration to factory defaults
+func (r *RedfishBaseBMC) ResetBMCToDefaults(ctx context.Context, managerUUID string) (*CleaningTaskInfo, error) {
+ log := ctrl.LoggerFrom(ctx)
+ log.V(1).Info("Resetting BMC to defaults", "managerUUID", managerUUID)
+
+ manager, err := r.GetManager(managerUUID)
+ if err != nil {
+ return nil, fmt.Errorf("failed to get manager: %w", err)
+ }
+
+ manufacturer := manager.Manufacturer
+ log.V(1).Info("Detected manufacturer", "manufacturer", manufacturer)
+
+ // Use vendor-specific reset methods
+ switch Manufacturer(manufacturer) {
+ case ManufacturerDell:
+ return r.resetBMCDell(ctx, manager)
+ case ManufacturerHPE:
+ return r.resetBMCHPE(ctx, manager)
+ case ManufacturerLenovo:
+ return r.resetBMCLenovo(ctx, manager)
+ default:
+ return r.resetBMCGeneric(ctx, manager)
+ }
+}
+
+func (r *RedfishBaseBMC) resetBMCDell(ctx context.Context, manager *schemas.Manager) (*CleaningTaskInfo, error) {
+ log := ctrl.LoggerFrom(ctx)
+
+ // Dell iDRAC: Use OEM action to reset to defaults
+ // /redfish/v1/Managers/{id}/Actions/Oem/DellManager.ResetToDefaults
+ actionURI := fmt.Sprintf("%s/Actions/Oem/DellManager.ResetToDefaults", manager.ODataID)
+
+ payload := map[string]any{
+ "ResetType": "ResetAllWithRootDefaults",
+ }
+
+ log.V(1).Info("Resetting Dell iDRAC to defaults", "uri", actionURI)
+
+ resp, err := r.client.Post(actionURI, payload)
+ if err != nil {
+ return nil, fmt.Errorf("failed to reset BMC: %w", err)
+ }
+ defer func() { _ = resp.Body.Close() }()
+
+ if resp.StatusCode >= 300 {
+ body, _ := io.ReadAll(resp.Body)
+ return nil, fmt.Errorf("BMC reset failed with status %d: %s", resp.StatusCode, string(body))
+ }
+
+ // Extract task URI from response
+ taskURI := r.extractTaskURIFromResponse(resp)
+ if taskURI != "" {
+ log.V(1).Info("Dell BMC reset task created", "taskURI", taskURI)
+ return &CleaningTaskInfo{
+ TaskURI: taskURI,
+ TaskType: CleaningTaskTypeBMCReset,
+ TargetID: manager.ID,
+ }, nil
+ }
+
+ log.V(1).Info("Dell BMC reset completed synchronously")
+ return nil, nil
+}
+
+func (r *RedfishBaseBMC) resetBMCHPE(ctx context.Context, manager *schemas.Manager) (*CleaningTaskInfo, error) {
+ log := ctrl.LoggerFrom(ctx)
+
+ // HPE iLO: Use OEM action to reset to factory defaults
+ // /redfish/v1/Managers/{id}/Actions/Oem/Hpe/HpiLO.ResetToFactoryDefaults
+ actionURI := fmt.Sprintf("%s/Actions/Oem/Hpe/HpiLO.ResetToFactoryDefaults", manager.ODataID)
+
+ payload := map[string]any{
+ "ResetType": "Default",
+ }
+
+ log.V(1).Info("Resetting HPE iLO to defaults", "uri", actionURI)
+
+ resp, err := r.client.Post(actionURI, payload)
+ if err != nil {
+ return nil, fmt.Errorf("failed to reset BMC: %w", err)
+ }
+ defer func() { _ = resp.Body.Close() }()
+
+ if resp.StatusCode >= 300 {
+ body, _ := io.ReadAll(resp.Body)
+ return nil, fmt.Errorf("BMC reset failed with status %d: %s", resp.StatusCode, string(body))
+ }
+
+ // Extract task URI from response
+ taskURI := r.extractTaskURIFromResponse(resp)
+ if taskURI != "" {
+ log.V(1).Info("HPE BMC reset task created", "taskURI", taskURI)
+ return &CleaningTaskInfo{
+ TaskURI: taskURI,
+ TaskType: CleaningTaskTypeBMCReset,
+ TargetID: manager.ID,
+ }, nil
+ }
+
+ log.V(1).Info("HPE BMC reset completed synchronously")
+ return nil, nil
+}
+
+func (r *RedfishBaseBMC) resetBMCLenovo(ctx context.Context, manager *schemas.Manager) (*CleaningTaskInfo, error) {
+ log := ctrl.LoggerFrom(ctx)
+
+ // Lenovo XClarity: Use OEM action to reset to factory defaults
+ // /redfish/v1/Managers/{id}/Actions/Manager.ResetToDefaults
+ actionURI := fmt.Sprintf("%s/Actions/Manager.ResetToDefaults", manager.ODataID)
+
+ payload := map[string]any{
+ "ResetToDefaultsType": "ResetAll",
+ }
+
+ log.V(1).Info("Resetting Lenovo XCC to defaults", "uri", actionURI)
+
+ resp, err := r.client.Post(actionURI, payload)
+ if err != nil {
+ return nil, fmt.Errorf("failed to reset BMC: %w", err)
+ }
+ defer func() { _ = resp.Body.Close() }()
+
+ if resp.StatusCode >= 300 {
+ body, _ := io.ReadAll(resp.Body)
+ return nil, fmt.Errorf("BMC reset failed with status %d: %s", resp.StatusCode, string(body))
+ }
+
+ // Extract task URI from response
+ taskURI := r.extractTaskURIFromResponse(resp)
+ if taskURI != "" {
+ log.V(1).Info("Lenovo BMC reset task created", "taskURI", taskURI)
+ return &CleaningTaskInfo{
+ TaskURI: taskURI,
+ TaskType: CleaningTaskTypeBMCReset,
+ TargetID: manager.ID,
+ }, nil
+ }
+
+ log.V(1).Info("Lenovo BMC reset completed synchronously")
+ return nil, nil
+}
+
+func (r *RedfishBaseBMC) resetBMCGeneric(ctx context.Context, manager *schemas.Manager) (*CleaningTaskInfo, error) {
+ log := ctrl.LoggerFrom(ctx)
+
+ // Generic Redfish: Try standard ResetToDefaults action
+ actionURI := fmt.Sprintf("%s/Actions/Manager.ResetToDefaults", manager.ODataID)
+
+ payload := map[string]any{
+ "ResetToDefaultsType": "ResetAll",
+ }
+
+ log.V(1).Info("Resetting BMC to defaults (generic)", "uri", actionURI)
+
+ resp, err := r.client.Post(actionURI, payload)
+ if err != nil {
+ return nil, fmt.Errorf("failed to reset BMC: %w", err)
+ }
+ defer func() { _ = resp.Body.Close() }()
+
+ if resp.StatusCode >= 300 {
+ body, _ := io.ReadAll(resp.Body)
+ return nil, fmt.Errorf("BMC reset failed with status %d: %s", resp.StatusCode, string(body))
+ }
+
+ // Extract task URI from response
+ taskURI := r.extractTaskURIFromResponse(resp)
+ if taskURI != "" {
+ log.V(1).Info("Generic BMC reset task created", "taskURI", taskURI)
+ return &CleaningTaskInfo{
+ TaskURI: taskURI,
+ TaskType: CleaningTaskTypeBMCReset,
+ TargetID: manager.ID,
+ }, nil
+ }
+
+ log.V(1).Info("Generic BMC reset completed synchronously")
+ return nil, nil
+}
+
+// ClearNetworkConfiguration clears network configuration settings
+func (r *RedfishBaseBMC) ClearNetworkConfiguration(ctx context.Context, systemURI string) (*CleaningTaskInfo, error) {
+ log := ctrl.LoggerFrom(ctx)
+ log.V(1).Info("Clearing network configuration", "systemURI", systemURI)
+
+ system, err := r.getSystemFromUri(ctx, systemURI)
+ if err != nil {
+ return nil, fmt.Errorf("failed to get computer system: %w", err)
+ }
+
+ manufacturer := system.Manufacturer
+ log.V(1).Info("Detected manufacturer", "manufacturer", manufacturer)
+
+ // Use vendor-specific methods when available
+ switch Manufacturer(manufacturer) {
+ case ManufacturerDell:
+ return r.clearNetworkConfigDell(ctx, systemURI)
+ case ManufacturerHPE:
+ return r.clearNetworkConfigHPE(ctx, systemURI)
+ case ManufacturerLenovo:
+ return r.clearNetworkConfigLenovo(ctx, systemURI)
+ default:
+ return r.clearNetworkConfigGeneric(ctx, systemURI)
+ }
+}
+
+func (r *RedfishBaseBMC) clearNetworkConfigDell(ctx context.Context, systemURI string) (*CleaningTaskInfo, error) {
+ log := ctrl.LoggerFrom(ctx)
+
+ // Dell: Clear network adapters configuration via OEM extensions
+ // This typically involves resetting NIC settings to defaults
+ actionURI := fmt.Sprintf("%s/NetworkAdapters/Actions/Oem/DellNetworkAdapter.ClearConfiguration", systemURI)
+
+ log.V(1).Info("Clearing Dell network configuration", "uri", actionURI)
+
+ resp, err := r.client.Post(actionURI, map[string]any{})
+ if err != nil {
+ // Network config clear might not be critical, log and continue
+ log.Error(err, "Failed to clear network configuration (non-critical)")
+ return nil, nil
+ }
+ defer func() { _ = resp.Body.Close() }()
+
+ if resp.StatusCode >= 300 {
+ body, _ := io.ReadAll(resp.Body)
+ log.Error(fmt.Errorf("network config clear failed"), "Failed with status",
+ "status", resp.StatusCode, "body", string(body))
+ return nil, nil
+ }
+
+ // Extract task URI from response
+ taskURI := r.extractTaskURIFromResponse(resp)
+ if taskURI != "" {
+ log.V(1).Info("Dell network config clear task created", "taskURI", taskURI)
+ return &CleaningTaskInfo{
+ TaskURI: taskURI,
+ TaskType: CleaningTaskTypeNetworkClear,
+ TargetID: systemURI,
+ }, nil
+ }
+
+ log.V(1).Info("Dell network config clear completed synchronously")
+ return nil, nil
+}
+
+func (r *RedfishBaseBMC) clearNetworkConfigHPE(ctx context.Context, systemURI string) (*CleaningTaskInfo, error) {
+ log := ctrl.LoggerFrom(ctx)
+
+ // HPE: Clear network adapters configuration
+ actionURI := fmt.Sprintf("%s/NetworkAdapters/Actions/Oem/Hpe/HpeNetworkAdapter.ClearConfiguration", systemURI)
+
+ log.V(1).Info("Clearing HPE network configuration", "uri", actionURI)
+
+ resp, err := r.client.Post(actionURI, map[string]any{})
+ if err != nil {
+ log.Error(err, "Failed to clear network configuration (non-critical)")
+ return nil, nil
+ }
+ defer func() { _ = resp.Body.Close() }()
+
+ if resp.StatusCode >= 300 {
+ body, _ := io.ReadAll(resp.Body)
+ log.Error(fmt.Errorf("network config clear failed"), "Failed with status",
+ "status", resp.StatusCode, "body", string(body))
+ return nil, nil
+ }
+
+ // Extract task URI from response
+ taskURI := r.extractTaskURIFromResponse(resp)
+ if taskURI != "" {
+ log.V(1).Info("HPE network config clear task created", "taskURI", taskURI)
+ return &CleaningTaskInfo{
+ TaskURI: taskURI,
+ TaskType: CleaningTaskTypeNetworkClear,
+ TargetID: systemURI,
+ }, nil
+ }
+
+ log.V(1).Info("HPE network config clear completed synchronously")
+ return nil, nil
+}
+
+func (r *RedfishBaseBMC) clearNetworkConfigLenovo(ctx context.Context, systemURI string) (*CleaningTaskInfo, error) {
+ log := ctrl.LoggerFrom(ctx)
+
+ // Lenovo: Clear network adapters configuration
+ actionURI := fmt.Sprintf("%s/NetworkAdapters/Actions/NetworkAdapter.ClearConfiguration", systemURI)
+
+ log.V(1).Info("Clearing Lenovo network configuration", "uri", actionURI)
+
+ resp, err := r.client.Post(actionURI, map[string]any{})
+ if err != nil {
+ log.Error(err, "Failed to clear network configuration (non-critical)")
+ return nil, nil
+ }
+ defer func() { _ = resp.Body.Close() }()
+
+ if resp.StatusCode >= 300 {
+ body, _ := io.ReadAll(resp.Body)
+ log.Error(fmt.Errorf("network config clear failed"), "Failed with status",
+ "status", resp.StatusCode, "body", string(body))
+ return nil, nil
+ }
+
+ // Extract task URI from response
+ taskURI := r.extractTaskURIFromResponse(resp)
+ if taskURI != "" {
+ log.V(1).Info("Lenovo network config clear task created", "taskURI", taskURI)
+ return &CleaningTaskInfo{
+ TaskURI: taskURI,
+ TaskType: CleaningTaskTypeNetworkClear,
+ TargetID: systemURI,
+ }, nil
+ }
+
+ log.V(1).Info("Lenovo network config clear completed synchronously")
+ return nil, nil
+}
+
+func (r *RedfishBaseBMC) clearNetworkConfigGeneric(ctx context.Context, _ string) (*CleaningTaskInfo, error) {
+ log := ctrl.LoggerFrom(ctx)
+ log.V(1).Info("Network configuration clearing not supported for this vendor (generic)")
+ // For generic vendors, this operation is optional and non-critical
+ return nil, nil
+}
diff --git a/bmc/redfish_kube.go b/bmc/redfish_kube.go
index 7e31cbda0..2acaa92a9 100644
--- a/bmc/redfish_kube.go
+++ b/bmc/redfish_kube.go
@@ -367,6 +367,12 @@ func (r *RedfishKubeBMC) GetBMCUpgradeTask(ctx context.Context, manufacturer, ta
return task, nil
}
+// GetTaskStatus retrieves the status of a task by its URI.
+func (r *RedfishKubeBMC) GetTaskStatus(ctx context.Context, taskURI string) (*schemas.Task, error) {
+ // Delegate to the underlying RedfishBaseBMC implementation
+ return r.RedfishBaseBMC.GetTaskStatus(ctx, taskURI)
+}
+
// SetPXEBootOnce sets the boot device for the next system boot using Redfish.
func (r *RedfishKubeBMC) SetPXEBootOnce(ctx context.Context, systemURI string) error {
system, err := r.getSystemFromUri(ctx, systemURI)
@@ -455,3 +461,23 @@ func (r *RedfishKubeBMC) createJob(
}
return nil
}
+
+// EraseDisk delegates to the underlying RedfishBaseBMC
+func (r *RedfishKubeBMC) EraseDisk(ctx context.Context, systemURI string, method DiskWipeMethod) ([]CleaningTaskInfo, error) {
+ return r.RedfishBaseBMC.EraseDisk(ctx, systemURI, method)
+}
+
+// ResetBIOSToDefaults delegates to the underlying RedfishBaseBMC
+func (r *RedfishKubeBMC) ResetBIOSToDefaults(ctx context.Context, systemURI string) (*CleaningTaskInfo, error) {
+ return r.RedfishBaseBMC.ResetBIOSToDefaults(ctx, systemURI)
+}
+
+// ResetBMCToDefaults delegates to the underlying RedfishBaseBMC
+func (r *RedfishKubeBMC) ResetBMCToDefaults(ctx context.Context, managerUUID string) (*CleaningTaskInfo, error) {
+ return r.RedfishBaseBMC.ResetBMCToDefaults(ctx, managerUUID)
+}
+
+// ClearNetworkConfiguration delegates to the underlying RedfishBaseBMC
+func (r *RedfishKubeBMC) ClearNetworkConfiguration(ctx context.Context, systemURI string) (*CleaningTaskInfo, error) {
+ return r.RedfishBaseBMC.ClearNetworkConfiguration(ctx, systemURI)
+}
diff --git a/bmc/redfish_local.go b/bmc/redfish_local.go
index 8839f5a41..ee026c39d 100644
--- a/bmc/redfish_local.go
+++ b/bmc/redfish_local.go
@@ -267,3 +267,50 @@ func (r *RedfishLocalBMC) GetBMCUpgradeTask(ctx context.Context, manufacturer, t
}
return task, nil
}
+
+// EraseDisk simulates disk erasing for testing
+func (r *RedfishLocalBMC) EraseDisk(ctx context.Context, systemURI string, method DiskWipeMethod) ([]CleaningTaskInfo, error) {
+ log := ctrl.LoggerFrom(ctx)
+ log.V(1).Info("Simulating disk erase", "systemURI", systemURI, "method", method)
+ // Mock implementation - does nothing but succeeds
+ return nil, nil
+}
+
+// ResetBIOSToDefaults simulates BIOS reset for testing
+func (r *RedfishLocalBMC) ResetBIOSToDefaults(ctx context.Context, systemURI string) (*CleaningTaskInfo, error) {
+ log := ctrl.LoggerFrom(ctx)
+ log.V(1).Info("Simulating BIOS reset", "systemURI", systemURI)
+ // Mock implementation - does nothing but succeeds
+ return nil, nil
+}
+
+// ResetBMCToDefaults simulates BMC reset for testing
+func (r *RedfishLocalBMC) ResetBMCToDefaults(ctx context.Context, managerUUID string) (*CleaningTaskInfo, error) {
+ log := ctrl.LoggerFrom(ctx)
+ log.V(1).Info("Simulating BMC reset", "managerUUID", managerUUID)
+ // Mock implementation - does nothing but succeeds
+ return nil, nil
+}
+
+// ClearNetworkConfiguration simulates network config clearing for testing
+func (r *RedfishLocalBMC) ClearNetworkConfiguration(ctx context.Context, systemURI string) (*CleaningTaskInfo, error) {
+ log := ctrl.LoggerFrom(ctx)
+ log.V(1).Info("Simulating network config clear", "systemURI", systemURI)
+ // Mock implementation - does nothing but succeeds
+ return nil, nil
+}
+
+// GetTaskStatus simulates task status retrieval for testing
+func (r *RedfishLocalBMC) GetTaskStatus(ctx context.Context, taskURI string) (*schemas.Task, error) {
+ log := ctrl.LoggerFrom(ctx)
+ log.V(1).Info("Simulating task status check", "taskURI", taskURI)
+ // Mock implementation - returns completed status
+ percentComplete := uint(100)
+ return &schemas.Task{
+ TaskState: schemas.CompletedTaskState,
+ PercentComplete: &percentComplete,
+ Messages: []schemas.Message{
+ {Message: "Mock task completed"},
+ },
+ }, nil
+}
diff --git a/cmd/main.go b/cmd/main.go
index 5610fd63f..4ee2abac2 100644
--- a/cmd/main.go
+++ b/cmd/main.go
@@ -93,6 +93,7 @@ func main() { // nolint: gocyclo
serverMaxConcurrentReconciles int
serverClaimMaxConcurrentReconciles int
dnsRecordTemplatePath string
+ taskPollInterval time.Duration
)
flag.IntVar(&serverMaxConcurrentReconciles, "server-max-concurrent-reconciles", 5,
@@ -153,6 +154,8 @@ func main() { // nolint: gocyclo
"Timeout for BIOS Settings Controller")
flag.StringVar(&dnsRecordTemplatePath, "dns-record-template-path", "",
"Path to the DNS record template file used for creating DNS records for Servers.")
+ flag.DurationVar(&taskPollInterval, "task-poll-interval", 30*time.Second,
+ "Interval for polling BMC task status.")
opts := zap.Options{
Development: true,
@@ -407,6 +410,13 @@ func main() { // nolint: gocyclo
setupLog.Error(err, "Failed to create controller", "controller", "ServerMaintenance")
os.Exit(1)
}
+ if err = (&controller.ServerCleaningReconciler{
+ Client: mgr.GetClient(),
+ Scheme: mgr.GetScheme(),
+ }).SetupWithManager(mgr); err != nil {
+ setupLog.Error(err, "Failed to create controller", "controller", "ServerCleaning")
+ os.Exit(1)
+ }
if err = (&controller.BIOSSettingsReconciler{
Client: mgr.GetClient(),
Scheme: mgr.GetScheme(),
@@ -527,6 +537,18 @@ func main() { // nolint: gocyclo
setupLog.Error(err, "Failed to create controller", "controller", "BMCUser")
os.Exit(1)
}
+ if err = (&controller.BMCTaskReconciler{
+ Client: mgr.GetClient(),
+ Scheme: mgr.GetScheme(),
+ Insecure: insecure,
+ PollInterval: taskPollInterval,
+ BMCOptions: bmc.Options{
+ BasicAuth: true,
+ },
+ }).SetupWithManager(mgr); err != nil {
+ setupLog.Error(err, "Failed to create controller", "controller", "BMCTask")
+ os.Exit(1)
+ }
// nolint:goconst
if os.Getenv("ENABLE_WEBHOOKS") != "false" {
diff --git a/config/crd/bases/metal.ironcore.dev_bmcs.yaml b/config/crd/bases/metal.ironcore.dev_bmcs.yaml
index efdefb8fa..2404d3fcb 100644
--- a/config/crd/bases/metal.ironcore.dev_bmcs.yaml
+++ b/config/crd/bases/metal.ironcore.dev_bmcs.yaml
@@ -284,6 +284,52 @@ spec:
State represents the current state of the BMC.
kubebuilder:validation:Enum=Enabled;Error;Pending
type: string
+ tasks:
+ description: Tasks tracks ongoing and recent BMC operations.
+ items:
+ description: BMCTask represents a single BMC operation task.
+ properties:
+ lastUpdateTime:
+ description: LastUpdateTime is when this task status was last
+ updated.
+ format: date-time
+ type: string
+ message:
+ description: Message provides additional information about the
+ task.
+ type: string
+ percentComplete:
+ description: PercentComplete indicates completion percentage
+ (0-100).
+ format: int32
+ type: integer
+ state:
+ description: State is the current state of the task.
+ type: string
+ targetID:
+ description: TargetID identifies what the task is operating
+ on (e.g., "BIOS", "BMC", "Drive-1").
+ type: string
+ taskType:
+ description: TaskType indicates the type of operation.
+ enum:
+ - DiskErase
+ - BIOSReset
+ - BMCReset
+ - NetworkClear
+ - FirmwareUpdate
+ - ConfigurationChange
+ - AccountManagement
+ - Other
+ type: string
+ taskURI:
+ description: TaskURI is the URI to monitor the task on the BMC.
+ type: string
+ required:
+ - taskType
+ - taskURI
+ type: object
+ type: array
type: object
type: object
served: true
diff --git a/config/crd/bases/metal.ironcore.dev_servercleanings.yaml b/config/crd/bases/metal.ironcore.dev_servercleanings.yaml
new file mode 100644
index 000000000..180235be2
--- /dev/null
+++ b/config/crd/bases/metal.ironcore.dev_servercleanings.yaml
@@ -0,0 +1,331 @@
+---
+apiVersion: apiextensions.k8s.io/v1
+kind: CustomResourceDefinition
+metadata:
+ annotations:
+ controller-gen.kubebuilder.io/version: v0.20.1
+ name: servercleanings.metal.ironcore.dev
+spec:
+ group: metal.ironcore.dev
+ names:
+ kind: ServerCleaning
+ listKind: ServerCleaningList
+ plural: servercleanings
+ shortNames:
+ - scl
+ singular: servercleaning
+ scope: Namespaced
+ versions:
+ - additionalPrinterColumns:
+ - jsonPath: .status.selectedServers
+ name: Selected
+ type: integer
+ - jsonPath: .status.completedCleanings
+ name: Completed
+ type: integer
+ - jsonPath: .status.inProgressCleanings
+ name: InProgress
+ type: integer
+ - jsonPath: .status.failedCleanings
+ name: Failed
+ type: integer
+ - jsonPath: .status.state
+ name: State
+ type: string
+ - jsonPath: .metadata.creationTimestamp
+ name: Age
+ type: date
+ name: v1alpha1
+ schema:
+ openAPIV3Schema:
+ description: ServerCleaning is the Schema for the servercleaning API
+ properties:
+ apiVersion:
+ description: |-
+ APIVersion defines the versioned schema of this representation of an object.
+ Servers should convert recognized schemas to the latest internal value, and
+ may reject unrecognized values.
+ More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources
+ type: string
+ kind:
+ description: |-
+ Kind is a string value representing the REST resource this object represents.
+ Servers may infer this from the endpoint the client submits requests to.
+ Cannot be updated.
+ In CamelCase.
+ More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds
+ type: string
+ metadata:
+ type: object
+ spec:
+ description: ServerCleaningSpec defines the desired cleaning operations
+ properties:
+ biosReset:
+ description: BIOSReset specifies if BIOS should be reset to defaults
+ type: boolean
+ bmcReset:
+ description: BMCReset specifies if BMC should be reset to defaults
+ type: boolean
+ diskWipe:
+ description: DiskWipe specifies disk erasing configuration
+ properties:
+ includeBootDrives:
+ description: IncludeBootDrives specifies whether to erase boot
+ drives
+ type: boolean
+ method:
+ default: quick
+ description: Method specifies the disk erasing method
+ enum:
+ - quick
+ - secure
+ - dod
+ type: string
+ required:
+ - method
+ type: object
+ networkCleanup:
+ description: NetworkCleanup specifies if network configurations should
+ be cleared
+ type: boolean
+ serverBootConfigurationTemplate:
+ description: |-
+ ServerBootConfigurationTemplate defines the boot configuration for cleaning agent
+ If not specified, cleaning operations are performed via BMC APIs
+ properties:
+ name:
+ description: Name specifies the name of the boot configuration.
+ type: string
+ spec:
+ description: Spec specifies the boot configuration to be rendered.
+ properties:
+ ignitionSecretRef:
+ description: |-
+ IgnitionSecretRef is a reference to the Secret object that contains
+ the ignition configuration for the server.
+ properties:
+ name:
+ default: ""
+ description: |-
+ Name of the referent.
+ This field is effectively required, but due to backwards compatibility is
+ allowed to be empty. Instances of this type with an empty value here are
+ almost certainly wrong.
+ More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
+ type: string
+ type: object
+ x-kubernetes-map-type: atomic
+ image:
+ description: Image specifies the boot image to be used for
+ the server.
+ type: string
+ serverRef:
+ description: ServerRef is a reference to the server for which
+ this boot configuration is intended.
+ properties:
+ name:
+ default: ""
+ description: |-
+ Name of the referent.
+ This field is effectively required, but due to backwards compatibility is
+ allowed to be empty. Instances of this type with an empty value here are
+ almost certainly wrong.
+ More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
+ type: string
+ type: object
+ x-kubernetes-map-type: atomic
+ required:
+ - serverRef
+ type: object
+ required:
+ - name
+ - spec
+ type: object
+ serverRef:
+ description: |-
+ ServerRef references a specific Server to be cleaned.
+ Mutually exclusive with ServerSelector.
+ properties:
+ name:
+ default: ""
+ description: |-
+ Name of the referent.
+ This field is effectively required, but due to backwards compatibility is
+ allowed to be empty. Instances of this type with an empty value here are
+ almost certainly wrong.
+ More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
+ type: string
+ type: object
+ x-kubernetes-map-type: atomic
+ serverSelector:
+ description: |-
+ ServerSelector specifies a label selector to identify servers to be cleaned.
+ Mutually exclusive with ServerRef.
+ properties:
+ matchExpressions:
+ description: matchExpressions is a list of label selector requirements.
+ The requirements are ANDed.
+ items:
+ description: |-
+ A label selector requirement is a selector that contains values, a key, and an operator that
+ relates the key and values.
+ properties:
+ key:
+ description: key is the label key that the selector applies
+ to.
+ type: string
+ operator:
+ description: |-
+ operator represents a key's relationship to a set of values.
+ Valid operators are In, NotIn, Exists and DoesNotExist.
+ type: string
+ values:
+ description: |-
+ values is an array of string values. If the operator is In or NotIn,
+ the values array must be non-empty. If the operator is Exists or DoesNotExist,
+ the values array must be empty. This array is replaced during a strategic
+ merge patch.
+ items:
+ type: string
+ type: array
+ x-kubernetes-list-type: atomic
+ required:
+ - key
+ - operator
+ type: object
+ type: array
+ x-kubernetes-list-type: atomic
+ matchLabels:
+ additionalProperties:
+ type: string
+ description: |-
+ matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels
+ map is equivalent to an element of matchExpressions, whose key field is "key", the
+ operator is "In", and the values array contains only "value". The requirements are ANDed.
+ type: object
+ type: object
+ x-kubernetes-map-type: atomic
+ type: object
+ x-kubernetes-validations:
+ - message: either serverRef or serverSelector must be specified
+ rule: has(self.serverRef) || has(self.serverSelector)
+ status:
+ description: ServerCleaningStatus defines the observed state of ServerCleaning
+ properties:
+ completedCleanings:
+ description: CompletedCleanings is the number of servers successfully
+ cleaned
+ format: int32
+ type: integer
+ conditions:
+ description: Conditions represents the latest available observations
+ items:
+ description: Condition contains details for one aspect of the current
+ state of this API Resource.
+ properties:
+ lastTransitionTime:
+ description: |-
+ lastTransitionTime is the last time the condition transitioned from one status to another.
+ This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable.
+ format: date-time
+ type: string
+ message:
+ description: |-
+ message is a human readable message indicating details about the transition.
+ This may be an empty string.
+ maxLength: 32768
+ type: string
+ observedGeneration:
+ description: |-
+ observedGeneration represents the .metadata.generation that the condition was set based upon.
+ For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date
+ with respect to the current state of the instance.
+ format: int64
+ minimum: 0
+ type: integer
+ reason:
+ description: |-
+ reason contains a programmatic identifier indicating the reason for the condition's last transition.
+ Producers of specific condition types may define expected values and meanings for this field,
+ and whether the values are considered a guaranteed API.
+ The value should be a CamelCase string.
+ This field may not be empty.
+ maxLength: 1024
+ minLength: 1
+ pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$
+ type: string
+ status:
+ description: status of the condition, one of True, False, Unknown.
+ enum:
+ - "True"
+ - "False"
+ - Unknown
+ type: string
+ type:
+ description: type of condition in CamelCase or in foo.example.com/CamelCase.
+ maxLength: 316
+ pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$
+ type: string
+ required:
+ - lastTransitionTime
+ - message
+ - reason
+ - status
+ - type
+ type: object
+ type: array
+ failedCleanings:
+ description: FailedCleanings is the number of servers where cleaning
+ failed
+ format: int32
+ type: integer
+ inProgressCleanings:
+ description: InProgressCleanings is the number of servers currently
+ being cleaned
+ format: int32
+ type: integer
+ pendingCleanings:
+ description: PendingCleanings is the number of servers with pending
+ cleaning
+ format: int32
+ type: integer
+ selectedServers:
+ description: SelectedServers is the total number of servers selected
+ for cleaning
+ format: int32
+ type: integer
+ serverCleaningStatuses:
+ description: ServerCleaningStatuses contains per-server cleaning status
+ items:
+ description: ServerCleaningStatusEntry represents the cleaning status
+ for a single server
+ properties:
+ lastUpdateTime:
+ description: LastUpdateTime is the last time this status was
+ updated
+ format: date-time
+ type: string
+ message:
+ description: Message provides additional information about the
+ cleaning state
+ type: string
+ serverName:
+ description: ServerName is the name of the server
+ type: string
+ state:
+ description: State is the cleaning state for this server
+ type: string
+ required:
+ - serverName
+ - state
+ type: object
+ type: array
+ state:
+ description: State represents the current state of the cleaning process
+ type: string
+ type: object
+ type: object
+ served: true
+ storage: true
+ subresources:
+ status: {}
diff --git a/config/crd/bases/metal.ironcore.dev_servers.yaml b/config/crd/bases/metal.ironcore.dev_servers.yaml
index f53eb3987..eb3c4f88e 100644
--- a/config/crd/bases/metal.ironcore.dev_servers.yaml
+++ b/config/crd/bases/metal.ironcore.dev_servers.yaml
@@ -294,6 +294,35 @@ spec:
systemUUID:
description: SystemUUID is the unique identifier for the server.
type: string
+ taints:
+ description: Taints is a list of taints that affect this server.
+ items:
+ description: |-
+ The node this Taint is attached to has the "effect" on
+ any pod that does not tolerate the Taint.
+ properties:
+ effect:
+ description: |-
+ Required. The effect of the taint on pods
+ that do not tolerate the taint.
+ Valid effects are NoSchedule, PreferNoSchedule and NoExecute.
+ type: string
+ key:
+ description: Required. The taint key to be applied to a node.
+ type: string
+ timeAdded:
+ description: TimeAdded represents the time at which the taint
+ was added.
+ format: date-time
+ type: string
+ value:
+ description: The taint value corresponding to the taint key.
+ type: string
+ required:
+ - effect
+ - key
+ type: object
+ type: array
required:
- systemUUID
type: object
diff --git a/config/rbac/role.yaml b/config/rbac/role.yaml
index 7b33068c8..c83ef73b0 100644
--- a/config/rbac/role.yaml
+++ b/config/rbac/role.yaml
@@ -54,6 +54,7 @@ rules:
- endpoints
- serverbootconfigurations
- serverclaims
+ - servercleanings
- serverconfigurations
- servermaintenances
- servers
@@ -82,6 +83,7 @@ rules:
- endpoints/finalizers
- serverbootconfigurations/finalizers
- serverclaims/finalizers
+ - servercleanings/finalizers
- servermaintenances/finalizers
- servers/finalizers
verbs:
@@ -103,6 +105,7 @@ rules:
- endpoints/status
- serverbootconfigurations/status
- serverclaims/status
+ - servercleanings/status
- servermaintenances/status
- servers/status
verbs:
diff --git a/config/samples/metal_v1alpha1_servercleaning.yaml b/config/samples/metal_v1alpha1_servercleaning.yaml
new file mode 100644
index 000000000..d8a0cda20
--- /dev/null
+++ b/config/samples/metal_v1alpha1_servercleaning.yaml
@@ -0,0 +1,58 @@
+# Example 1: Cleaning a single server using serverRef
+apiVersion: metal.ironcore.dev/v1alpha1
+kind: ServerCleaning
+metadata:
+ name: servercleaning-single-server
+ namespace: default
+spec:
+ serverRef:
+ name: server-sample
+ diskWipe:
+ method: secure
+ includeBootDrives: true
+ bmcReset: true
+ biosReset: true
+ networkCleanup: true
+ serverBootConfigurationTemplate:
+ name: cleaning-boot-config
+ spec:
+ serverRef:
+ name: server-sample
+ image: "ghcr.io/ironcore-dev/metal-operator/cleaning-agent:latest"
+ ignitionSecretRef:
+ name: cleaning-ignition-secret
+---
+# Example 2: Cleaning multiple servers using serverSelector
+apiVersion: metal.ironcore.dev/v1alpha1
+kind: ServerCleaning
+metadata:
+ name: servercleaning-multi-server
+ namespace: default
+spec:
+ serverSelector:
+ matchLabels:
+ environment: staging
+ cleanup-required: "true"
+ diskWipe:
+ method: quick
+ includeBootDrives: false
+ bmcReset: false
+ biosReset: false
+ networkCleanup: true
+---
+# Example 3: DoD-compliant cleaning for multiple decommissioned servers
+apiVersion: metal.ironcore.dev/v1alpha1
+kind: ServerCleaning
+metadata:
+ name: servercleaning-decommission
+ namespace: default
+spec:
+ serverSelector:
+ matchLabels:
+ status: decommissioned
+ diskWipe:
+ method: dod
+ includeBootDrives: true
+ bmcReset: true
+ biosReset: true
+ networkCleanup: true
diff --git a/dist/chart/templates/crd/metal.ironcore.dev_servercleanings.yaml b/dist/chart/templates/crd/metal.ironcore.dev_servercleanings.yaml
new file mode 100644
index 000000000..87ac6d041
--- /dev/null
+++ b/dist/chart/templates/crd/metal.ironcore.dev_servercleanings.yaml
@@ -0,0 +1,376 @@
+{{- if .Values.crd.enable }}
+---
+apiVersion: apiextensions.k8s.io/v1
+kind: CustomResourceDefinition
+metadata:
+ labels:
+ {{- include "chart.labels" . | nindent 4 }}
+ annotations:
+ {{- if .Values.crd.keep }}
+ "helm.sh/resource-policy": keep
+ {{- end }}
+ controller-gen.kubebuilder.io/version: v0.20.1
+ name: servercleanings.metal.ironcore.dev
+spec:
+ group: metal.ironcore.dev
+ names:
+ kind: ServerCleaning
+ listKind: ServerCleaningList
+ plural: servercleanings
+ shortNames:
+ - scl
+ singular: servercleaning
+ scope: Namespaced
+ versions:
+ - additionalPrinterColumns:
+ - jsonPath: .status.selectedServers
+ name: Selected
+ type: integer
+ - jsonPath: .status.completedCleanings
+ name: Completed
+ type: integer
+ - jsonPath: .status.inProgressCleanings
+ name: InProgress
+ type: integer
+ - jsonPath: .status.failedCleanings
+ name: Failed
+ type: integer
+ - jsonPath: .status.state
+ name: State
+ type: string
+ - jsonPath: .metadata.creationTimestamp
+ name: Age
+ type: date
+ name: v1alpha1
+ schema:
+ openAPIV3Schema:
+ description: ServerCleaning is the Schema for the servercleaning API
+ properties:
+ apiVersion:
+ description: |-
+ APIVersion defines the versioned schema of this representation of an object.
+ Servers should convert recognized schemas to the latest internal value, and
+ may reject unrecognized values.
+ More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources
+ type: string
+ kind:
+ description: |-
+ Kind is a string value representing the REST resource this object represents.
+ Servers may infer this from the endpoint the client submits requests to.
+ Cannot be updated.
+ In CamelCase.
+ More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds
+ type: string
+ metadata:
+ type: object
+ spec:
+ description: ServerCleaningSpec defines the desired cleaning operations
+ properties:
+ biosReset:
+ description: BIOSReset specifies if BIOS should be reset to defaults
+ type: boolean
+ bmcReset:
+ description: BMCReset specifies if BMC should be reset to defaults
+ type: boolean
+ diskWipe:
+ description: DiskWipe specifies disk erasing configuration
+ properties:
+ includeBootDrives:
+ description: IncludeBootDrives specifies whether to erase boot
+ drives
+ type: boolean
+ method:
+ default: quick
+ description: Method specifies the disk erasing method
+ enum:
+ - quick
+ - secure
+ - dod
+ type: string
+ required:
+ - method
+ type: object
+ networkCleanup:
+ description: NetworkCleanup specifies if network configurations should
+ be cleared
+ type: boolean
+ serverBootConfigurationTemplate:
+ description: |-
+ ServerBootConfigurationTemplate defines the boot configuration for cleaning agent
+ If not specified, cleaning operations are performed via BMC APIs
+ properties:
+ name:
+ description: Name specifies the name of the boot configuration.
+ type: string
+ spec:
+ description: Spec specifies the boot configuration to be rendered.
+ properties:
+ ignitionSecretRef:
+ description: |-
+ IgnitionSecretRef is a reference to the Secret object that contains
+ the ignition configuration for the server.
+ properties:
+ name:
+ default: ""
+ description: |-
+ Name of the referent.
+ This field is effectively required, but due to backwards compatibility is
+ allowed to be empty. Instances of this type with an empty value here are
+ almost certainly wrong.
+ More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
+ type: string
+ type: object
+ x-kubernetes-map-type: atomic
+ image:
+ description: Image specifies the boot image to be used for
+ the server.
+ type: string
+ serverRef:
+ description: ServerRef is a reference to the server for which
+ this boot configuration is intended.
+ properties:
+ name:
+ default: ""
+ description: |-
+ Name of the referent.
+ This field is effectively required, but due to backwards compatibility is
+ allowed to be empty. Instances of this type with an empty value here are
+ almost certainly wrong.
+ More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
+ type: string
+ type: object
+ x-kubernetes-map-type: atomic
+ required:
+ - serverRef
+ type: object
+ required:
+ - name
+ - spec
+ type: object
+ serverRef:
+ description: |-
+ ServerRef references a specific Server to be cleaned.
+ Mutually exclusive with ServerSelector.
+ properties:
+ name:
+ default: ""
+ description: |-
+ Name of the referent.
+ This field is effectively required, but due to backwards compatibility is
+ allowed to be empty. Instances of this type with an empty value here are
+ almost certainly wrong.
+ More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
+ type: string
+ type: object
+ x-kubernetes-map-type: atomic
+ serverSelector:
+ description: |-
+ ServerSelector specifies a label selector to identify servers to be cleaned.
+ Mutually exclusive with ServerRef.
+ properties:
+ matchExpressions:
+ description: matchExpressions is a list of label selector requirements.
+ The requirements are ANDed.
+ items:
+ description: |-
+ A label selector requirement is a selector that contains values, a key, and an operator that
+ relates the key and values.
+ properties:
+ key:
+ description: key is the label key that the selector applies
+ to.
+ type: string
+ operator:
+ description: |-
+ operator represents a key's relationship to a set of values.
+ Valid operators are In, NotIn, Exists and DoesNotExist.
+ type: string
+ values:
+ description: |-
+ values is an array of string values. If the operator is In or NotIn,
+ the values array must be non-empty. If the operator is Exists or DoesNotExist,
+ the values array must be empty. This array is replaced during a strategic
+ merge patch.
+ items:
+ type: string
+ type: array
+ x-kubernetes-list-type: atomic
+ required:
+ - key
+ - operator
+ type: object
+ type: array
+ x-kubernetes-list-type: atomic
+ matchLabels:
+ additionalProperties:
+ type: string
+ description: |-
+ matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels
+ map is equivalent to an element of matchExpressions, whose key field is "key", the
+ operator is "In", and the values array contains only "value". The requirements are ANDed.
+ type: object
+ type: object
+ x-kubernetes-map-type: atomic
+ type: object
+ x-kubernetes-validations:
+ - message: either serverRef or serverSelector must be specified
+ rule: has(self.serverRef) || has(self.serverSelector)
+ status:
+ description: ServerCleaningStatus defines the observed state of ServerCleaning
+ properties:
+ completedCleanings:
+ description: CompletedCleanings is the number of servers successfully
+ cleaned
+ format: int32
+ type: integer
+ conditions:
+ description: Conditions represents the latest available observations
+ items:
+ description: Condition contains details for one aspect of the current
+ state of this API Resource.
+ properties:
+ lastTransitionTime:
+ description: |-
+ lastTransitionTime is the last time the condition transitioned from one status to another.
+ This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable.
+ format: date-time
+ type: string
+ message:
+ description: |-
+ message is a human readable message indicating details about the transition.
+ This may be an empty string.
+ maxLength: 32768
+ type: string
+ observedGeneration:
+ description: |-
+ observedGeneration represents the .metadata.generation that the condition was set based upon.
+ For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date
+ with respect to the current state of the instance.
+ format: int64
+ minimum: 0
+ type: integer
+ reason:
+ description: |-
+ reason contains a programmatic identifier indicating the reason for the condition's last transition.
+ Producers of specific condition types may define expected values and meanings for this field,
+ and whether the values are considered a guaranteed API.
+ The value should be a CamelCase string.
+ This field may not be empty.
+ maxLength: 1024
+ minLength: 1
+ pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$
+ type: string
+ status:
+ description: status of the condition, one of True, False, Unknown.
+ enum:
+ - "True"
+ - "False"
+ - Unknown
+ type: string
+ type:
+ description: type of condition in CamelCase or in foo.example.com/CamelCase.
+ maxLength: 316
+ pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$
+ type: string
+ required:
+ - lastTransitionTime
+ - message
+ - reason
+ - status
+ - type
+ type: object
+ type: array
+ failedCleanings:
+ description: FailedCleanings is the number of servers where cleaning
+ failed
+ format: int32
+ type: integer
+ inProgressCleanings:
+ description: InProgressCleanings is the number of servers currently
+ being cleaned
+ format: int32
+ type: integer
+ pendingCleanings:
+ description: PendingCleanings is the number of servers with pending
+ cleaning
+ format: int32
+ type: integer
+ selectedServers:
+ description: SelectedServers is the total number of servers selected
+ for cleaning
+ format: int32
+ type: integer
+ serverCleaningStatuses:
+ description: ServerCleaningStatuses contains per-server cleaning status
+ items:
+ description: ServerCleaningStatusEntry represents the cleaning status
+ for a single server
+ properties:
+ cleaningTasks:
+ description: CleaningTasks contains information about the cleaning
+ tasks for this server
+ items:
+ description: CleaningTaskStatus represents the status of a
+ cleaning task
+ properties:
+ lastUpdateTime:
+ description: LastUpdateTime is the last time this task
+ status was updated
+ format: date-time
+ type: string
+ message:
+ description: Message provides additional information about
+ the task
+ type: string
+ percentComplete:
+ description: PercentComplete indicates the completion
+ percentage (0-100)
+ type: integer
+ state:
+ description: State is the current state of the task
+ type: string
+ targetID:
+ description: TargetID identifies the target resource (e.g.,
+ drive ID for disk erase)
+ type: string
+ taskType:
+ description: TaskType indicates what type of cleaning
+ task this is
+ type: string
+ taskURI:
+ description: TaskURI is the URI to monitor the task
+ type: string
+ required:
+ - taskType
+ type: object
+ type: array
+ lastUpdateTime:
+ description: LastUpdateTime is the last time this status was
+ updated
+ format: date-time
+ type: string
+ message:
+ description: Message provides additional information about the
+ cleaning state
+ type: string
+ serverName:
+ description: ServerName is the name of the server
+ type: string
+ state:
+ description: State is the cleaning state for this server
+ type: string
+ required:
+ - serverName
+ - state
+ type: object
+ type: array
+ state:
+ description: State represents the current state of the cleaning process
+ type: string
+ type: object
+ type: object
+ served: true
+ storage: true
+ subresources:
+ status: {}
+{{- end -}}
diff --git a/dist/chart/templates/crd/metal.ironcore.dev_servers.yaml b/dist/chart/templates/crd/metal.ironcore.dev_servers.yaml
index d48b6e80c..7b2cf18f5 100755
--- a/dist/chart/templates/crd/metal.ironcore.dev_servers.yaml
+++ b/dist/chart/templates/crd/metal.ironcore.dev_servers.yaml
@@ -300,6 +300,40 @@ spec:
systemUUID:
description: SystemUUID is the unique identifier for the server.
type: string
+ taints:
+ description: Taints is a list of taints that affect this server.
+ items:
+ description: |-
+ The node this Taint is attached to has the "effect" on
+ any pod that does not tolerate the Taint.
+ properties:
+ effect:
+ description: |-
+ Required. The effect of the taint on pods
+ that do not tolerate the taint.
+ Valid effects are NoSchedule, PreferNoSchedule and NoExecute.
+ type: string
+ key:
+ description: Required. The taint key to be applied to a node.
+ type: string
+ timeAdded:
+ description: TimeAdded represents the time at which the taint
+ was added.
+ format: date-time
+ type: string
+ value:
+ description: The taint value corresponding to the taint key.
+ type: string
+ required:
+ - effect
+ - key
+ type: object
+ type: array
+ uuid:
+ description: |-
+ UUID is the unique identifier for the server.
+ Deprecated in favor of systemUUID.
+ type: string
required:
- systemUUID
type: object
diff --git a/dist/chart/templates/rbac/role.yaml b/dist/chart/templates/rbac/role.yaml
index 96c75c9ae..899d532db 100755
--- a/dist/chart/templates/rbac/role.yaml
+++ b/dist/chart/templates/rbac/role.yaml
@@ -57,6 +57,7 @@ rules:
- endpoints
- serverbootconfigurations
- serverclaims
+ - servercleanings
- serverconfigurations
- servermaintenances
- servers
@@ -85,6 +86,7 @@ rules:
- endpoints/finalizers
- serverbootconfigurations/finalizers
- serverclaims/finalizers
+ - servercleanings/finalizers
- servermaintenances/finalizers
- servers/finalizers
verbs:
@@ -106,6 +108,7 @@ rules:
- endpoints/status
- serverbootconfigurations/status
- serverclaims/status
+ - servercleanings/status
- servermaintenances/status
- servers/status
verbs:
diff --git a/docs/api-reference/api.md b/docs/api-reference/api.md
index 0a09802c8..b50485c26 100644
--- a/docs/api-reference/api.md
+++ b/docs/api-reference/api.md
@@ -26,6 +26,7 @@ Package v1alpha1 contains API Schema definitions for the metal v1alpha1 API grou
- [Server](#server)
- [ServerBootConfiguration](#serverbootconfiguration)
- [ServerClaim](#serverclaim)
+- [ServerCleaning](#servercleaning)
- [ServerMaintenance](#servermaintenance)
@@ -904,6 +905,28 @@ _Appears in:_
| `device` _string_ | Device is the device to boot from. | | |
+#### CleaningTaskStatus
+
+
+
+CleaningTaskStatus represents the status of a cleaning task
+
+
+
+_Appears in:_
+- [ServerCleaningStatusEntry](#servercleaningstatusentry)
+
+| Field | Description | Default | Validation |
+| --- | --- | --- | --- |
+| `taskURI` _string_ | TaskURI is the URI to monitor the task | | |
+| `taskType` _string_ | TaskType indicates what type of cleaning task this is | | |
+| `targetID` _string_ | TargetID identifies the target resource (e.g., drive ID for disk erase) | | |
+| `state` _string_ | State is the current state of the task | | |
+| `percentComplete` _integer_ | PercentComplete indicates the completion percentage (0-100) | | |
+| `message` _string_ | Message provides additional information about the task | | |
+| `lastUpdateTime` _[Time](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.35/#time-v1-meta)_ | LastUpdateTime is the last time this task status was updated | | |
+
+
#### ConsoleProtocol
@@ -939,6 +962,41 @@ _Appears in:_
| `SSHLenovo` | ConsoleProtocolNameSSHLenovo represents the SSH console protocol specific to Lenovo hardware.
|
+#### DiskWipeConfig
+
+
+
+DiskWipeConfig defines disk erasing behavior
+
+
+
+_Appears in:_
+- [ServerCleaningSpec](#servercleaningspec)
+
+| Field | Description | Default | Validation |
+| --- | --- | --- | --- |
+| `method` _[DiskWipeMethod](#diskwipemethod)_ | Method specifies the disk erasing method | quick | Enum: [quick secure dod]
|
+| `includeBootDrives` _boolean_ | IncludeBootDrives specifies whether to erase boot drives | | |
+
+
+#### DiskWipeMethod
+
+_Underlying type:_ _string_
+
+DiskWipeMethod defines the available disk erasing methods
+
+
+
+_Appears in:_
+- [DiskWipeConfig](#diskwipeconfig)
+
+| Field | Description |
+| --- | --- |
+| `quick` | DiskWipeMethodQuick performs a quick erase (single pass)
|
+| `secure` | DiskWipeMethodSecure performs a secure erase (3 passes)
|
+| `dod` | DiskWipeMethodDoD performs DoD 5220.22-M standard erase (7 passes)
|
+
+
#### Endpoint
@@ -1346,6 +1404,7 @@ ServerBootConfigurationTemplate defines the parameters to be used for rendering
_Appears in:_
+- [ServerCleaningSpec](#servercleaningspec)
- [ServerMaintenanceSpec](#servermaintenancespec)
| Field | Description | Default | Validation |
@@ -1409,6 +1468,110 @@ _Appears in:_
| `phase` _[Phase](#phase)_ | Phase represents the current phase of the server claim. | | |
+#### ServerCleaning
+
+
+
+ServerCleaning is the Schema for the servercleaning API
+
+
+
+
+
+| Field | Description | Default | Validation |
+| --- | --- | --- | --- |
+| `apiVersion` _string_ | `metal.ironcore.dev/v1alpha1` | | |
+| `kind` _string_ | `ServerCleaning` | | |
+| `metadata` _[ObjectMeta](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.35/#objectmeta-v1-meta)_ | Refer to Kubernetes API documentation for fields of `metadata`. | | |
+| `spec` _[ServerCleaningSpec](#servercleaningspec)_ | | | |
+| `status` _[ServerCleaningStatus](#servercleaningstatus)_ | | | |
+
+
+#### ServerCleaningSpec
+
+
+
+ServerCleaningSpec defines the desired cleaning operations
+
+
+
+_Appears in:_
+- [ServerCleaning](#servercleaning)
+
+| Field | Description | Default | Validation |
+| --- | --- | --- | --- |
+| `serverRef` _[LocalObjectReference](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.35/#localobjectreference-v1-core)_ | ServerRef references a specific Server to be cleaned.
Mutually exclusive with ServerSelector. | | |
+| `serverSelector` _[LabelSelector](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.35/#labelselector-v1-meta)_ | ServerSelector specifies a label selector to identify servers to be cleaned.
Mutually exclusive with ServerRef. | | |
+| `diskWipe` _[DiskWipeConfig](#diskwipeconfig)_ | DiskWipe specifies disk erasing configuration | | |
+| `bmcReset` _boolean_ | BMCReset specifies if BMC should be reset to defaults | | |
+| `biosReset` _boolean_ | BIOSReset specifies if BIOS should be reset to defaults | | |
+| `networkCleanup` _boolean_ | NetworkCleanup specifies if network configurations should be cleared | | |
+| `serverBootConfigurationTemplate` _[ServerBootConfigurationTemplate](#serverbootconfigurationtemplate)_ | ServerBootConfigurationTemplate defines the boot configuration for cleaning agent
If not specified, cleaning operations are performed via BMC APIs | | |
+
+
+#### ServerCleaningState
+
+_Underlying type:_ _string_
+
+ServerCleaningState defines the state of the cleaning process
+
+
+
+_Appears in:_
+- [ServerCleaningStatus](#servercleaningstatus)
+- [ServerCleaningStatusEntry](#servercleaningstatusentry)
+
+| Field | Description |
+| --- | --- |
+| `Pending` | ServerCleaningStatePending indicates cleaning is waiting to start
|
+| `InProgress` | ServerCleaningStateInProgress indicates cleaning is in progress
|
+| `Completed` | ServerCleaningStateCompleted indicates cleaning completed successfully
|
+| `Failed` | ServerCleaningStateFailed indicates cleaning failed
|
+
+
+#### ServerCleaningStatus
+
+
+
+ServerCleaningStatus defines the observed state of ServerCleaning
+
+
+
+_Appears in:_
+- [ServerCleaning](#servercleaning)
+
+| Field | Description | Default | Validation |
+| --- | --- | --- | --- |
+| `state` _[ServerCleaningState](#servercleaningstate)_ | State represents the current state of the cleaning process | | |
+| `selectedServers` _integer_ | SelectedServers is the total number of servers selected for cleaning | | |
+| `pendingCleanings` _integer_ | PendingCleanings is the number of servers with pending cleaning | | |
+| `inProgressCleanings` _integer_ | InProgressCleanings is the number of servers currently being cleaned | | |
+| `completedCleanings` _integer_ | CompletedCleanings is the number of servers successfully cleaned | | |
+| `failedCleanings` _integer_ | FailedCleanings is the number of servers where cleaning failed | | |
+| `serverCleaningStatuses` _[ServerCleaningStatusEntry](#servercleaningstatusentry) array_ | ServerCleaningStatuses contains per-server cleaning status | | |
+| `conditions` _[Condition](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.35/#condition-v1-meta) array_ | Conditions represents the latest available observations | | |
+
+
+#### ServerCleaningStatusEntry
+
+
+
+ServerCleaningStatusEntry represents the cleaning status for a single server
+
+
+
+_Appears in:_
+- [ServerCleaningStatus](#servercleaningstatus)
+
+| Field | Description | Default | Validation |
+| --- | --- | --- | --- |
+| `serverName` _string_ | ServerName is the name of the server | | |
+| `state` _[ServerCleaningState](#servercleaningstate)_ | State is the cleaning state for this server | | |
+| `message` _string_ | Message provides additional information about the cleaning state | | |
+| `lastUpdateTime` _[Time](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.35/#time-v1-meta)_ | LastUpdateTime is the last time this status was updated | | |
+| `cleaningTasks` _[CleaningTaskStatus](#cleaningtaskstatus) array_ | CleaningTasks contains information about the cleaning tasks for this server | | |
+
+
#### ServerMaintenance
@@ -1568,6 +1731,7 @@ _Appears in:_
| `maintenanceBootConfigurationRef` _[ObjectReference](#objectreference)_ | MaintenanceBootConfigurationRef is a reference to a BootConfiguration object that specifies
the boot configuration for this server during maintenance. | | |
| `bootOrder` _[BootOrder](#bootorder) array_ | BootOrder specifies the boot order of the server. | | |
| `biosSettingsRef` _[LocalObjectReference](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.35/#localobjectreference-v1-core)_ | BIOSSettingsRef is a reference to a biossettings object that specifies
the BIOS configuration for this server. | | |
+| `taints` _[Taint](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.35/#taint-v1-core) array_ | Taints is a list of taints that affect this server. | | |
#### ServerState
@@ -1587,6 +1751,7 @@ _Appears in:_
| `Discovery` | ServerStateDiscovery indicates that the server is in its discovery state.
|
| `Available` | ServerStateAvailable indicates that the server is available for use.
|
| `Reserved` | ServerStateReserved indicates that the server is reserved for a specific use or user.
|
+| `Tainted` | ServerStateTainted indicates that the server is tainted and requires cleaning
before transitioning back to Available.
|
| `Error` | ServerStateError indicates that there is an error with the server.
|
| `Maintenance` | ServerStateMaintenance indicates that the server is in maintenance.
|
diff --git a/docs/bmc-task-tracking.md b/docs/bmc-task-tracking.md
new file mode 100644
index 000000000..7f0930c79
--- /dev/null
+++ b/docs/bmc-task-tracking.md
@@ -0,0 +1,553 @@
+# BMC Task Tracking
+
+## Overview
+
+All BMC operations are tracked centrally in `BMC.Status.Tasks[]`. This provides a single source of truth for all BMC operations across multiple controllers.
+
+## Architecture
+
+### Dedicated Task Controller (New in v0.x.x) - Initial Rollout for ServerCleaning
+
+The **BMCTask controller** is a dedicated controller responsible for monitoring BMC task progress. This separation of concerns provides:
+
+- ✅ **Consistent polling** - All tasks polled at configurable intervals (default 30s)
+- ✅ **Automatic monitoring** - Tasks update even when parent resources don't change
+- ✅ **Better performance** - No task polling overhead on cleaning operations
+- ✅ **Simplified controllers** - Controllers only create tasks, don't poll
+
+**Current Implementation Status:**
+- ✅ **ServerCleaning Controller** - Uses BMCTask controller for task monitoring
+- 🔄 **Other Controllers** - Still use their own polling mechanisms (future enhancement)
+
+```
+┌─────────────────────────────────────────────────────────────┐
+│ BMC Resource │
+│ ┌────────────────────────────────────────────────────────┐ │
+│ │ Status: │ │
+│ │ Tasks: []BMCTask ← Single source of truth │ │
+│ │ - TaskURI, Type, State, Progress, Message │ │
+│ └────────────────────────────────────────────────────────┘ │
+└─────────────────────────────────────────────────────────────┘
+ ▲ ▲
+ │ Creates tasks │ Polls & updates
+ │ │
+ ┌────┴─────┐ ┌──────┴────────┐
+ │SrvClean │ ◄─────watches───────│ BMCTask │
+ │ │ task updates │ Controller │
+ │ │ │ │
+ └──────────┘ │ • Watches BMC │
+ │ • Polls tasks │
+ │ • Updates │
+ │ progress │
+ │ • Requeues │
+ └───────────────┘
+```
+
+### Controller Responsibilities
+
+**BMCTask Controller (Dedicated Task Monitor):**
+- Watches BMC resources that have tasks
+- Polls BMC API for task status every 30s (configurable via `--task-poll-interval`)
+- Updates `BMC.Status.Tasks` with latest State, PercentComplete, Message
+- Automatically requeues when active tasks exist
+- Stops polling when all tasks reach terminal states
+- **Currently used by**: ServerCleaning controller
+
+**Controllers Using BMCTask Controller:**
+- **ServerCleaning Controller**: Creates tasks for cleaning operations, watches BMC for updates
+
+**Controllers Using Own Polling (Future Migration):**
+- **BMC Controller**: Still polls tasks during reconciliation (uses `updateBMCTaskStatus()`)
+- **BMCVersion Controller**: Still has 2-minute polling via `ResyncInterval`
+- **BMCSettings Controller**: Synchronous operations (no polling needed)
+
+**Interaction Pattern (ServerCleaning):**
+1. **Task Creation**: ServerCleaning adds task entry to `BMC.Status.Tasks` with initial state
+2. **Automatic Monitoring**: BMCTask controller automatically detects new task and begins polling
+3. **Progress Updates**: BMCTask controller updates task status every 30s
+4. **Completion Detection**: BMCTask controller stops polling when task reaches terminal state
+5. **Watch for Updates**: ServerCleaning controller watches BMC resources and reacts to task status changes
+
+### Task Structure
+
+Each `BMCTask` contains:
+
+```go
+type BMCTask struct {
+ TaskURI string // Unique identifier for the task
+ TaskType BMCTaskType // Type of operation
+ TargetID string // What the task operates on (e.g., "BMC", "BIOS", "Drive-1")
+ State string // Current state (e.g., "New", "Running", "Completed", "Failed")
+ PercentComplete int32 // Progress (0-100)
+ Message string // Additional information
+ LastUpdateTime metav1.Time // When task was last updated
+}
+```
+
+### Task Types
+
+- **FirmwareUpdate**: BMC/BIOS firmware upgrades
+- **ConfigurationChange**: BMC/BIOS attribute changes
+- **DiskErase**: Disk wiping operations
+- **BMCReset**: BMC reset operations
+- **BIOSReset**: BIOS reset to defaults
+- **NetworkClear**: Network configuration cleanup
+- **AccountManagement**: User account operations
+- **Other**: Other operations
+
+## Task Lifecycle
+
+### Automatic Task Monitoring (BMCTask Controller)
+
+The **BMCTask controller** is a dedicated controller that automatically monitors all in-progress tasks:
+
+**How it works:**
+1. **Watches BMC resources** that have non-empty `Status.Tasks` arrays
+2. **Runs every 30 seconds** (configurable via `--task-poll-interval` flag)
+3. **Iterates through tasks** in `BMC.Status.Tasks`
+4. **Skips terminal states**: `Completed`, `Failed`, `Killed`, `Exception`, `Cancelled`
+5. **Polls the BMC** via `bmcClient.GetTaskStatus(taskURI)` for active tasks
+6. **Updates task status** with latest `State`, `PercentComplete`, `Message`, and `LastUpdateTime`
+7. **Persists changes** via `Status().Update()` if any tasks were updated
+8. **Automatic requeue**: Continues polling as long as active tasks exist
+
+**Key Benefits:**
+- ✅ **Automatic monitoring** - Tasks update even if BMC resource doesn't change
+- ✅ **Consistent frequency** - All tasks polled at same interval regardless of source
+- ✅ **No event dependency** - Doesn't rely on BMC reconciliation to trigger updates
+- ✅ **Works across restarts** - Tasks persisted in BMC status survive controller restarts
+- ✅ **Simplified controllers** - BMCVersion/BMCSettings/ServerCleaning don't need polling logic
+
+**Terminal States** (tasks that are no longer polled):
+- `Completed` - Task finished successfully
+- `Failed` - Task encountered an error
+- `Killed` - Task was terminated
+- `Exception` - Task threw an exception
+- `Cancelled` - Task was cancelled
+
+**Configuration:**
+```bash
+# Default 30 second polling interval
+./manager
+
+# Custom interval (e.g., 15 seconds)
+./manager --task-poll-interval=15s
+
+# Longer interval for less frequent updates (e.g., 1 minute)
+./manager --task-poll-interval=1m
+```
+
+### 1. Synchronous Operations
+
+For operations that complete immediately (e.g., BMC settings changes):
+
+```go
+task := metalv1alpha1.BMCTask{
+ TaskURI: fmt.Sprintf("config-change-%s-%s", name, time.Now().Format("20060102-150405")),
+ TaskType: metalv1alpha1.BMCTaskTypeConfigurationChange,
+ TargetID: "BMC",
+ State: "Completed",
+ PercentComplete: 100,
+ Message: fmt.Sprintf("Applied %d BMC attributes", len(attributes)),
+ LastUpdateTime: metav1.Now(),
+}
+```
+
+### 2. Asynchronous Operations
+
+For long-running operations (e.g., firmware updates):
+
+**Initial Creation:**
+```go
+task := metalv1alpha1.BMCTask{
+ TaskURI: taskMonitorURI, // From BMC client
+ TaskType: metalv1alpha1.BMCTaskTypeFirmwareUpdate,
+ TargetID: "BMC",
+ State: "New",
+ PercentComplete: 0,
+ Message: fmt.Sprintf("Upgrading BMC firmware to %s", version),
+ LastUpdateTime: metav1.Now(),
+}
+```
+
+**Progress Updates:**
+```go
+// Poll task status from BMC client
+taskStatus, err := bmcClient.GetBMCUpgradeTask(ctx, manufacturer, taskURI)
+
+// Update task in BMC status
+updateBMCTask(ctx, bmcName, namespace, taskURI, func(bmcTask *metalv1alpha1.BMCTask) {
+ bmcTask.State = string(taskStatus.TaskState)
+ bmcTask.PercentComplete = int32(*taskStatus.PercentComplete)
+ bmcTask.Message = fmt.Sprintf("Status: %s", taskStatus.TaskStatus)
+})
+```
+
+## Controller-Specific Implementations
+
+### BMCTask Controller (Dedicated Task Monitor)
+
+**Responsibility:**
+- Automatic monitoring of all BMC tasks across all controllers
+
+**Operations:**
+- Polls task status from BMC API
+- Updates `BMC.Status.Tasks` with progress
+- Manages requeue for active tasks
+
+**Implementation Details:**
+```go
+// Only reconciles BMCs with tasks (via event filter)
+func hasTasksPredicate() predicate.Predicate {
+ return predicate.Funcs{
+ CreateFunc: func(e event.CreateEvent) bool {
+ bmc := e.Object.(*metalv1alpha1.BMC)
+ return len(bmc.Status.Tasks) > 0
+ },
+ UpdateFunc: func(e event.UpdateEvent) bool {
+ bmc := e.ObjectNew.(*metalv1alpha1.BMC)
+ return len(bmc.Status.Tasks) > 0
+ },
+ }
+}
+
+// Polls tasks and updates status
+func (r *BMCTaskReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
+ // Fetch BMC, skip if no tasks
+ // Get BMC client
+ // Iterate through tasks, poll non-terminal ones
+ // Update BMC.Status.Tasks if changed
+ // Requeue if active tasks exist
+ return ctrl.Result{RequeueAfter: r.PollInterval}, nil
+}
+```
+
+**Configuration:**
+- `--task-poll-interval` flag controls polling frequency (default 30s)
+
+### BMC Controller
+
+**Operations Tracked:**
+- BMC reset operations
+
+**Helper Functions:**
+- `addBMCTask(bmcObj, task)` - Add new task to BMC status
+- `updateBMCTask(bmcObj, taskURI, updateFn)` - Update existing task
+- `getBMCTask(bmcObj, taskURI)` - Retrieve task by URI
+
+**Important:** The BMC controller **no longer polls tasks**. It only creates tasks for its operations. The BMCTask controller handles all polling automatically.
+
+**Example Usage:**
+```go
+func (r *BMCReconciler) resetBMC(ctx context.Context, bmcObj *metalv1alpha1.BMC) error {
+ // ... perform reset ...
+
+ task := metalv1alpha1.BMCTask{
+ TaskURI: fmt.Sprintf("bmc-reset-%s", time.Now().Format("20060102-150405")),
+ TaskType: metalv1alpha1.BMCTaskTypeBMCReset,
+ TargetID: "BMC",
+ State: "Completed",
+ PercentComplete: 100,
+ Message: "BMC reset initiated",
+ LastUpdateTime: metav1.Now(),
+ }
+ r.addBMCTask(bmcObj, task)
+
+ return r.updateBMCState(ctx, bmcObj, metalv1alpha1.BMCStatePending)
+}
+```
+
+### BMCVersion Controller
+
+**Operations Tracked:**
+- Firmware upgrade operations
+
+**Helper Functions:**
+- `addTaskToBMC(ctx, bmcName, namespace, task)` - Add task to referenced BMC
+
+**Important:** The BMCVersion controller **no longer polls** for task progress. The BMCTask controller automatically monitors all in-progress tasks. The BMCVersion controller only needs to:
+1. Create the task when starting a firmware upgrade
+2. Watch the BMC resource for task status updates
+3. React to task completion/failure
+
+**Example Usage:**
+```go
+// When issuing upgrade
+taskMonitor, _, err := bmcClient.UpgradeBMCVersion(ctx, manufacturer, params)
+if taskMonitor != "" {
+ r.addTaskToBMC(ctx, bmcVersion.Spec.BMCRef.Name, bmcVersion.Namespace, metalv1alpha1.BMCTask{
+ TaskURI: taskMonitor,
+ TaskType: metalv1alpha1.BMCTaskTypeFirmwareUpdate,
+ TargetID: "BMC",
+ State: "New",
+ PercentComplete: 0,
+ Message: fmt.Sprintf("Upgrading BMC firmware to %s", bmcVersion.Spec.Version),
+ LastUpdateTime: metav1.Now(),
+ })
+}
+
+// To check progress - read from BMC.Status.Tasks (BMCTask controller updates it automatically)
+bmc := &metalv1alpha1.BMC{}
+if err := r.Get(ctx, types.NamespacedName{Name: bmcName, Namespace: namespace}, bmc); err != nil {
+ return err
+}
+for _, task := range bmc.Status.Tasks {
+ if task.TaskURI == taskMonitor {
+ // Task is automatically updated by BMCTask controller
+ if task.State == "Completed" {
+ // Firmware upgrade complete
+ } else if task.State == "Failed" {
+ // Firmware upgrade failed
+ }
+ break
+ }
+}
+```
+
+### BMCSettings Controller
+
+**Operations Tracked:**
+- BMC attribute configuration changes
+
+**Helper Functions:**
+- `addTaskToBMC(ctx, bmcName, namespace, task)` - Add task to referenced BMC
+
+**Important:** For synchronous operations (immediate configuration changes), tasks are created with `State: "Completed"`. The BMCTask controller will not poll these since they're already in a terminal state.
+
+**Example Usage:**
+```go
+err = bmcClient.SetBMCAttributesImmediately(ctx, BMC.Spec.BMCUUID, attributes)
+if err != nil {
+ return fmt.Errorf("failed to set BMC settings: %w", err)
+}
+
+// Record configuration change (synchronous operation - already completed)
+taskURI := fmt.Sprintf("config-change-%s-%s", bmcSetting.Name, time.Now().Format("20060102-150405"))
+r.addTaskToBMC(ctx, bmcSetting.Spec.BMCRef.Name, bmcSetting.Namespace, metalv1alpha1.BMCTask{
+ TaskURI: taskURI,
+ TaskType: metalv1alpha1.BMCTaskTypeConfigurationChange,
+ TargetID: "BMC",
+ State: "Completed",
+ PercentComplete: 100,
+ Message: fmt.Sprintf("Applied %d BMC attributes", len(attributes)),
+ LastUpdateTime: metav1.Now(),
+})
+```
+
+### ServerCleaning Controller
+
+**Operations Tracked:**
+- Disk erase operations
+- BIOS reset operations
+- Network configuration cleanup
+- Account management operations
+
+**Helper Functions:**
+- `addTaskToBMC(ctx, bmcName, namespace, task)` - Add task to referenced BMC
+
+**Important:** The ServerCleaning controller **no longer polls** for task progress. The BMCTask controller automatically monitors all in-progress tasks. The ServerCleaning controller only needs to:
+1. Create tasks when starting cleaning operations
+2. Watch the BMC resource for task status updates
+3. React to task completion/failure to proceed with next cleaning steps
+
+**Example Usage:**
+```go
+// Start disk erase operation
+taskURI, err := bmcClient.ErasePhysicalDrive(ctx, driveURI)
+if err != nil {
+ return err
+}
+
+// Create task in BMC status
+r.addTaskToBMC(ctx, bmcName, namespace, metalv1alpha1.BMCTask{
+ TaskURI: taskURI,
+ TaskType: metalv1alpha1.BMCTaskTypeDiskErase,
+ TargetID: driveURI,
+ State: "New",
+ PercentComplete: 0,
+ Message: fmt.Sprintf("Erasing drive %s", driveURI),
+ LastUpdateTime: metav1.Now(),
+})
+
+// BMCTask controller will automatically poll and update this task
+// ServerCleaning controller watches BMC and reacts to task completion
+```
+
+## Task Cleanup
+
+Tasks are automatically pruned to prevent unbounded growth:
+- Only the **last 10 tasks** are retained per BMC
+- Older tasks are automatically removed when new tasks are added
+- This happens transparently in `addBMCTask()` helper functions
+
+## Querying Tasks
+
+### From CLI
+
+```bash
+# List all tasks for a BMC
+kubectl get bmc -o jsonpath='{.status.tasks[*]}' | jq
+
+# Get specific task type
+kubectl get bmc -o jsonpath='{.status.tasks[?(@.taskType=="FirmwareUpdate")]}' | jq
+
+# Watch task progress
+watch 'kubectl get bmc -o jsonpath="{.status.tasks[0]}" | jq'
+
+# Get tasks with specific state
+kubectl get bmc -o jsonpath='{.status.tasks[?(@.state=="Running")]}' | jq
+```
+
+### From Code
+
+```go
+// Get BMC object
+bmc := &metalv1alpha1.BMC{}
+err := client.Get(ctx, types.NamespacedName{Name: bmcName}, bmc)
+
+// List all tasks
+for _, task := range bmc.Status.Tasks {
+ fmt.Printf("Task: %s, Type: %s, State: %s, Progress: %d%%\n",
+ task.TaskURI, task.TaskType, task.State, task.PercentComplete)
+}
+
+// Find specific task
+for _, task := range bmc.Status.Tasks {
+ if task.TaskURI == targetURI {
+ fmt.Printf("Found task: %s at %d%% complete\n", task.Message, task.PercentComplete)
+ break
+ }
+}
+```
+
+## Benefits
+
+### Single Source of Truth
+- All BMC operations tracked in one place
+- Eliminates duplication across controller status fields
+- Simplifies operational monitoring
+
+### Cross-Controller Awareness
+- See all operations affecting a BMC regardless of source
+- Better understanding of BMC state and activity
+- Prevents conflicting operations
+
+### Operational Transparency
+- Complete audit trail of BMC operations
+- Task history preserved (last 10 tasks)
+- Clear progress indicators for async operations
+
+### Better Failure Recovery
+- Tasks persist in BMC status across controller restarts
+- Can resume monitoring of long-running operations
+- Clear indication of failed operations
+
+## Migration Notes
+
+### Backward Compatibility
+
+**BMCVersion Controller:**
+- Still maintains `Status.UpgradeTask` field (deprecated but updated)
+- This allows existing monitoring/tooling to continue working
+- Plan to remove in future version once consumers migrate
+
+**BMCSettings Controller:**
+- No previous task tracking existed
+- Pure addition of functionality
+
+**BMC Controller:**
+- Tasks field was previously unused
+- Now actively populated
+
+### Architecture Changes (v0.x.x)
+
+**What Changed:**
+
+**Before (Old Architecture):**
+- BMC controller polled tasks during every reconciliation (event-driven, inconsistent)
+- BMCVersion controller had its own 2-minute polling loop
+- ServerCleaning controller had its own 30-second polling loop
+- Tasks only updated when reconciliation triggered
+- Redundant BMC API calls from multiple controllers
+
+**After (New Architecture):**
+- Dedicated BMCTask controller handles ALL task polling
+- Consistent 30-second polling interval (configurable)
+- Tasks update automatically even without reconciliation events
+- Single BMC API call per task per interval
+- Other controllers only create tasks and watch for updates
+
+**Migration Impact:**
+
+✅ **No API changes** - `BMC.Status.Tasks` structure unchanged
+✅ **No configuration changes** - Works with existing BMC resources
+✅ **New flag available** - `--task-poll-interval` (default 30s maintains similar behavior)
+✅ **Better consistency** - Tasks now update predictably every 30s
+✅ **Improved performance** - Eliminates redundant polling overhead
+
+**Deployment:**
+
+1. Deploy new controller version with BMCTask controller
+2. Verify task polling works as expected
+3. Monitor logs for any issues
+4. Roll back if needed (old architecture code preserved in git history)
+
+**Testing:**
+
+```bash
+# Verify BMCTask controller is running
+kubectl get pods -n metal-operator-system
+kubectl logs -n metal-operator-system deployment/controller-manager | grep BMCTaskReconciler
+
+# Test task polling
+kubectl apply -f test-bmcversion.yaml
+
+# Watch task progress (should update every 30s)
+watch 'kubectl get bmc -o jsonpath="{.status.tasks[0]}" | jq'
+
+# Verify consistent updates
+kubectl get bmc -o jsonpath='{.status.tasks[0].lastUpdateTime}'
+# Should update every ~30 seconds for active tasks
+```
+
+**Rollback Plan:**
+
+If issues are found:
+1. Revert to previous version
+2. BMC controller will resume event-driven polling
+3. No data loss - tasks persisted in BMC.Status.Tasks
+4. Report issue with logs and reproduction steps
+
+### Migrating Consumers
+
+If you're consuming BMC operation status:
+
+**Before:**
+```go
+// Old way - check specific controller status
+bmcVersion := &metalv1alpha1.BMCVersion{}
+client.Get(ctx, key, bmcVersion)
+progress := bmcVersion.Status.UpgradeTask.PercentComplete
+```
+
+**After:**
+```go
+// New way - check BMC tasks
+bmc := &metalv1alpha1.BMC{}
+client.Get(ctx, key, bmc)
+for _, task := range bmc.Status.Tasks {
+ if task.TaskType == metalv1alpha1.BMCTaskTypeFirmwareUpdate {
+ progress := task.PercentComplete
+ break
+ }
+}
+```
+
+## Future Enhancements
+
+Potential improvements:
+- Task filtering by date range
+- Task persistence to external storage for long-term audit
+- Webhooks/events when tasks complete
+- Task cancellation support
+- Task priority/scheduling
diff --git a/internal/controller/bmctask_controller.go b/internal/controller/bmctask_controller.go
new file mode 100644
index 000000000..adaef7970
--- /dev/null
+++ b/internal/controller/bmctask_controller.go
@@ -0,0 +1,191 @@
+// SPDX-FileCopyrightText: 2025 SAP SE or an SAP affiliate company and IronCore contributors
+// SPDX-License-Identifier: Apache-2.0
+
+package controller
+
+import (
+ "context"
+ "time"
+
+ metalv1alpha1 "github.com/ironcore-dev/metal-operator/api/v1alpha1"
+ "github.com/ironcore-dev/metal-operator/bmc"
+ "github.com/ironcore-dev/metal-operator/internal/bmcutils"
+ "github.com/stmcginnis/gofish/schemas"
+ metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+ "k8s.io/apimachinery/pkg/runtime"
+ ctrl "sigs.k8s.io/controller-runtime"
+ "sigs.k8s.io/controller-runtime/pkg/client"
+ "sigs.k8s.io/controller-runtime/pkg/event"
+ "sigs.k8s.io/controller-runtime/pkg/predicate"
+)
+
+// BMCTaskReconciler reconciles BMC tasks by polling task status from the BMC.
+// This controller is responsible for monitoring all in-progress BMC operations
+// and updating task status in BMC.Status.Tasks.
+type BMCTaskReconciler struct {
+ client.Client
+ Scheme *runtime.Scheme
+ // Insecure allows insecure connections to the BMC.
+ Insecure bool
+ // BMCOptions contains additional options for BMC clients.
+ BMCOptions bmc.Options
+ // PollInterval defines how often to poll task status from the BMC.
+ PollInterval time.Duration
+}
+
+// +kubebuilder:rbac:groups=metal.ironcore.dev,resources=bmcs,verbs=get;list;watch
+// +kubebuilder:rbac:groups=metal.ironcore.dev,resources=bmcs/status,verbs=get;update;patch
+// +kubebuilder:rbac:groups=metal.ironcore.dev,resources=bmcsecrets,verbs=get;list;watch
+// +kubebuilder:rbac:groups=metal.ironcore.dev,resources=endpoints,verbs=get;list;watch
+
+// Reconcile monitors BMC tasks and updates their status by polling the BMC.
+func (r *BMCTaskReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
+ log := ctrl.LoggerFrom(ctx)
+ log.V(1).Info("Reconciling BMC tasks")
+
+ // Fetch the BMC object
+ bmcObj := &metalv1alpha1.BMC{}
+ if err := r.Get(ctx, req.NamespacedName, bmcObj); err != nil {
+ return ctrl.Result{}, client.IgnoreNotFound(err)
+ }
+
+ // Skip reconciliation if the BMC is being deleted
+ if !bmcObj.DeletionTimestamp.IsZero() {
+ return ctrl.Result{}, nil
+ }
+
+ // Skip if there are no tasks to monitor
+ if len(bmcObj.Status.Tasks) == 0 {
+ log.V(1).Info("No tasks to monitor")
+ return ctrl.Result{}, nil
+ }
+
+ // Check if there are any non-terminal tasks
+ hasActiveTasks := false
+ for i := range bmcObj.Status.Tasks {
+ task := &bmcObj.Status.Tasks[i]
+ if !isTerminalState(task.State) {
+ hasActiveTasks = true
+ break
+ }
+ }
+
+ if !hasActiveTasks {
+ log.V(1).Info("All tasks are in terminal state")
+ return ctrl.Result{}, nil
+ }
+
+ // Get BMC client
+ bmcClient, err := bmcutils.GetBMCClientFromBMC(ctx, r.Client, bmcObj, r.Insecure, r.BMCOptions)
+ if err != nil {
+ log.V(1).Info("Failed to get BMC client, will retry", "error", err)
+ // Don't fail the reconciliation, just requeue
+ return ctrl.Result{RequeueAfter: r.PollInterval}, nil
+ }
+ defer bmcClient.Logout()
+
+ // Poll and update task statuses
+ needsUpdate := false
+ for i := range bmcObj.Status.Tasks {
+ task := &bmcObj.Status.Tasks[i]
+
+ // Skip tasks in terminal states
+ if isTerminalState(task.State) {
+ continue
+ }
+
+ // Poll task status from BMC
+ taskStatus, err := bmcClient.GetTaskStatus(ctx, task.TaskURI)
+ if err != nil {
+ log.V(1).Info("Failed to get task status", "taskURI", task.TaskURI, "error", err)
+ continue
+ }
+
+ // Update task if status changed
+ if taskStatus != nil {
+ oldState := task.State
+ oldPercent := task.PercentComplete
+
+ task.State = string(taskStatus.TaskState)
+ if taskStatus.PercentComplete != nil {
+ task.PercentComplete = int32(*taskStatus.PercentComplete)
+ }
+ if taskStatus.TaskStatus != "" {
+ task.Message = string(taskStatus.TaskStatus)
+ }
+ task.LastUpdateTime = metav1.Now()
+
+ // Log if status changed
+ if oldState != task.State || oldPercent != task.PercentComplete {
+ log.V(1).Info("Updated task status",
+ "taskURI", task.TaskURI,
+ "taskType", task.TaskType,
+ "state", task.State,
+ "percentComplete", task.PercentComplete)
+ needsUpdate = true
+ }
+ }
+ }
+
+ // Persist changes if any tasks were updated
+ if needsUpdate {
+ bmcBase := bmcObj.DeepCopy()
+ if err := r.Status().Patch(ctx, bmcObj, client.MergeFrom(bmcBase)); err != nil {
+ log.Error(err, "Failed to update BMC task status")
+ return ctrl.Result{}, err
+ }
+ log.V(1).Info("Successfully updated BMC task status")
+ }
+
+ // Requeue to continue monitoring active tasks
+ return ctrl.Result{RequeueAfter: r.PollInterval}, nil
+}
+
+// isTerminalState checks if a task state is terminal (no further updates expected).
+func isTerminalState(state string) bool {
+ return state == "Completed" ||
+ state == "Failed" ||
+ state == string(schemas.CompletedTaskState) ||
+ state == string(schemas.KilledTaskState) ||
+ state == string(schemas.ExceptionTaskState) ||
+ state == string(schemas.CancelledTaskState)
+}
+
+// SetupWithManager sets up the controller with the Manager.
+func (r *BMCTaskReconciler) SetupWithManager(mgr ctrl.Manager) error {
+ return ctrl.NewControllerManagedBy(mgr).
+ For(&metalv1alpha1.BMC{}).
+ WithEventFilter(hasTasksPredicate()).
+ Complete(r)
+}
+
+// hasTasksPredicate filters BMC events to only reconcile BMCs that have tasks.
+func hasTasksPredicate() predicate.Predicate {
+ return predicate.Funcs{
+ CreateFunc: func(e event.CreateEvent) bool {
+ bmc, ok := e.Object.(*metalv1alpha1.BMC)
+ if !ok {
+ return false
+ }
+ return len(bmc.Status.Tasks) > 0
+ },
+ UpdateFunc: func(e event.UpdateEvent) bool {
+ bmcNew, ok := e.ObjectNew.(*metalv1alpha1.BMC)
+ if !ok {
+ return false
+ }
+ return len(bmcNew.Status.Tasks) > 0
+ },
+ DeleteFunc: func(e event.DeleteEvent) bool {
+ // Don't reconcile on delete
+ return false
+ },
+ GenericFunc: func(e event.GenericEvent) bool {
+ bmc, ok := e.Object.(*metalv1alpha1.BMC)
+ if !ok {
+ return false
+ }
+ return len(bmc.Status.Tasks) > 0
+ },
+ }
+}
diff --git a/internal/controller/bmctask_controller_test.go b/internal/controller/bmctask_controller_test.go
new file mode 100644
index 000000000..19f2af7ee
--- /dev/null
+++ b/internal/controller/bmctask_controller_test.go
@@ -0,0 +1,908 @@
+// SPDX-FileCopyrightText: 2025 SAP SE or an SAP affiliate company and IronCore contributors
+// SPDX-License-Identifier: Apache-2.0
+
+package controller
+
+import (
+ "time"
+
+ metalv1alpha1 "github.com/ironcore-dev/metal-operator/api/v1alpha1"
+ . "github.com/onsi/ginkgo/v2"
+ . "github.com/onsi/gomega"
+ v1 "k8s.io/api/core/v1"
+ metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+ "sigs.k8s.io/controller-runtime/pkg/client"
+ . "sigs.k8s.io/controller-runtime/pkg/envtest/komega"
+ "sigs.k8s.io/controller-runtime/pkg/event"
+)
+
+var _ = Describe("BMCTask Controller", func() {
+ _ = SetupTest(nil)
+
+ AfterEach(func(ctx SpecContext) {
+ EnsureCleanState()
+ })
+
+ It("Should update BMC.Status.Tasks when polling active tasks", func(ctx SpecContext) {
+ By("Creating a BMCSecret")
+ bmcSecret := &metalv1alpha1.BMCSecret{
+ ObjectMeta: metav1.ObjectMeta{
+ GenerateName: "test-",
+ },
+ Data: map[string][]byte{
+ metalv1alpha1.BMCSecretUsernameKeyName: []byte("foo"),
+ metalv1alpha1.BMCSecretPasswordKeyName: []byte("bar"),
+ },
+ }
+ Expect(k8sClient.Create(ctx, bmcSecret)).To(Succeed())
+
+ By("Creating a BMC resource with active tasks")
+ bmc := &metalv1alpha1.BMC{
+ ObjectMeta: metav1.ObjectMeta{
+ GenerateName: "test-bmc-",
+ },
+ Spec: metalv1alpha1.BMCSpec{
+ Endpoint: &metalv1alpha1.InlineEndpoint{
+ IP: metalv1alpha1.MustParseIP(MockServerIP),
+ MACAddress: "aa:bb:cc:dd:ee:ff",
+ },
+ Protocol: metalv1alpha1.Protocol{
+ Name: metalv1alpha1.ProtocolRedfishLocal,
+ Port: MockServerPort,
+ },
+ BMCSecretRef: v1.LocalObjectReference{
+ Name: bmcSecret.Name,
+ },
+ },
+ Status: metalv1alpha1.BMCStatus{
+ Tasks: []metalv1alpha1.BMCTask{
+ {
+ TaskURI: "/redfish/v1/TaskService/Tasks/1",
+ TaskType: metalv1alpha1.BMCTaskTypeDiskErase,
+ TargetID: "Drive-1",
+ State: "Running",
+ PercentComplete: 0,
+ Message: "Erasing disk",
+ LastUpdateTime: metav1.Now(),
+ },
+ },
+ },
+ }
+ Expect(k8sClient.Create(ctx, bmc)).To(Succeed())
+ Expect(k8sClient.Status().Update(ctx, bmc)).To(Succeed())
+
+ By("Ensuring that the task status is updated by the controller")
+ // The mock BMC will return Completed status
+ Eventually(Object(bmc)).Should(SatisfyAll(
+ HaveField("Status.Tasks", HaveLen(1)),
+ HaveField("Status.Tasks[0].State", "Completed"),
+ HaveField("Status.Tasks[0].PercentComplete", BeNumerically(">=", 0)),
+ ))
+
+ // cleanup
+ Expect(k8sClient.Delete(ctx, bmc)).To(Succeed())
+ Expect(k8sClient.Delete(ctx, bmcSecret)).To(Succeed())
+ })
+
+ It("Should only reconcile BMCs with tasks due to event filter", func(ctx SpecContext) {
+ By("Creating a BMCSecret")
+ bmcSecret := &metalv1alpha1.BMCSecret{
+ ObjectMeta: metav1.ObjectMeta{
+ GenerateName: "test-",
+ },
+ Data: map[string][]byte{
+ metalv1alpha1.BMCSecretUsernameKeyName: []byte("foo"),
+ metalv1alpha1.BMCSecretPasswordKeyName: []byte("bar"),
+ },
+ }
+ Expect(k8sClient.Create(ctx, bmcSecret)).To(Succeed())
+
+ By("Creating a BMC resource without tasks")
+ bmcWithoutTasks := &metalv1alpha1.BMC{
+ ObjectMeta: metav1.ObjectMeta{
+ GenerateName: "test-bmc-notasks-",
+ },
+ Spec: metalv1alpha1.BMCSpec{
+ Endpoint: &metalv1alpha1.InlineEndpoint{
+ IP: metalv1alpha1.MustParseIP(MockServerIP),
+ MACAddress: "aa:bb:cc:dd:ee:11",
+ },
+ Protocol: metalv1alpha1.Protocol{
+ Name: metalv1alpha1.ProtocolRedfishLocal,
+ Port: MockServerPort,
+ },
+ BMCSecretRef: v1.LocalObjectReference{
+ Name: bmcSecret.Name,
+ },
+ },
+ }
+ Expect(k8sClient.Create(ctx, bmcWithoutTasks)).To(Succeed())
+
+ By("Ensuring BMC without tasks remains unchanged")
+ Consistently(Object(bmcWithoutTasks)).Should(HaveField("Status.Tasks", BeEmpty()))
+
+ By("Creating a BMC resource with tasks")
+ bmcWithTasks := &metalv1alpha1.BMC{
+ ObjectMeta: metav1.ObjectMeta{
+ GenerateName: "test-bmc-withtasks-",
+ },
+ Spec: metalv1alpha1.BMCSpec{
+ Endpoint: &metalv1alpha1.InlineEndpoint{
+ IP: metalv1alpha1.MustParseIP(MockServerIP),
+ MACAddress: "aa:bb:cc:dd:ee:22",
+ },
+ Protocol: metalv1alpha1.Protocol{
+ Name: metalv1alpha1.ProtocolRedfishLocal,
+ Port: MockServerPort,
+ },
+ BMCSecretRef: v1.LocalObjectReference{
+ Name: bmcSecret.Name,
+ },
+ },
+ Status: metalv1alpha1.BMCStatus{
+ Tasks: []metalv1alpha1.BMCTask{
+ {
+ TaskURI: "/redfish/v1/TaskService/Tasks/1",
+ TaskType: metalv1alpha1.BMCTaskTypeDiskErase,
+ State: "Running",
+ PercentComplete: 0,
+ LastUpdateTime: metav1.Now(),
+ },
+ },
+ },
+ }
+ Expect(k8sClient.Create(ctx, bmcWithTasks)).To(Succeed())
+ Expect(k8sClient.Status().Update(ctx, bmcWithTasks)).To(Succeed())
+
+ By("Ensuring BMC with tasks is reconciled")
+ Eventually(Object(bmcWithTasks)).Should(SatisfyAll(
+ HaveField("Status.Tasks", HaveLen(1)),
+ HaveField("Status.Tasks[0].State", "Completed"),
+ ))
+
+ // cleanup
+ Expect(k8sClient.Delete(ctx, bmcWithoutTasks)).To(Succeed())
+ Expect(k8sClient.Delete(ctx, bmcWithTasks)).To(Succeed())
+ Expect(k8sClient.Delete(ctx, bmcSecret)).To(Succeed())
+ })
+
+ It("Should automatically requeue when active tasks exist", func(ctx SpecContext) {
+ By("Creating a BMCSecret")
+ bmcSecret := &metalv1alpha1.BMCSecret{
+ ObjectMeta: metav1.ObjectMeta{
+ GenerateName: "test-",
+ },
+ Data: map[string][]byte{
+ metalv1alpha1.BMCSecretUsernameKeyName: []byte("foo"),
+ metalv1alpha1.BMCSecretPasswordKeyName: []byte("bar"),
+ },
+ }
+ Expect(k8sClient.Create(ctx, bmcSecret)).To(Succeed())
+
+ By("Creating a BMC resource with an active task")
+ bmc := &metalv1alpha1.BMC{
+ ObjectMeta: metav1.ObjectMeta{
+ GenerateName: "test-bmc-",
+ },
+ Spec: metalv1alpha1.BMCSpec{
+ Endpoint: &metalv1alpha1.InlineEndpoint{
+ IP: metalv1alpha1.MustParseIP(MockServerIP),
+ MACAddress: "aa:bb:cc:dd:ee:33",
+ },
+ Protocol: metalv1alpha1.Protocol{
+ Name: metalv1alpha1.ProtocolRedfishLocal,
+ Port: MockServerPort,
+ },
+ BMCSecretRef: v1.LocalObjectReference{
+ Name: bmcSecret.Name,
+ },
+ },
+ Status: metalv1alpha1.BMCStatus{
+ Tasks: []metalv1alpha1.BMCTask{
+ {
+ TaskURI: "/redfish/v1/TaskService/Tasks/active",
+ TaskType: metalv1alpha1.BMCTaskTypeFirmwareUpdate,
+ State: "Running",
+ PercentComplete: 25,
+ LastUpdateTime: metav1.Now(),
+ },
+ },
+ },
+ }
+ Expect(k8sClient.Create(ctx, bmc)).To(Succeed())
+ Expect(k8sClient.Status().Update(ctx, bmc)).To(Succeed())
+
+ By("Ensuring the task is polled multiple times due to requeue")
+ initialUpdateTime := metav1.Now()
+
+ // Since the mock returns completed, we verify the task was updated
+ Eventually(Object(bmc)).Should(SatisfyAll(
+ HaveField("Status.Tasks", HaveLen(1)),
+ HaveField("Status.Tasks[0].State", "Completed"),
+ HaveField("Status.Tasks[0].LastUpdateTime", Not(Equal(initialUpdateTime))),
+ ))
+
+ // cleanup
+ Expect(k8sClient.Delete(ctx, bmc)).To(Succeed())
+ Expect(k8sClient.Delete(ctx, bmcSecret)).To(Succeed())
+ })
+
+ It("Should not requeue when all tasks are in terminal state", func(ctx SpecContext) {
+ By("Creating a BMCSecret")
+ bmcSecret := &metalv1alpha1.BMCSecret{
+ ObjectMeta: metav1.ObjectMeta{
+ GenerateName: "test-",
+ },
+ Data: map[string][]byte{
+ metalv1alpha1.BMCSecretUsernameKeyName: []byte("foo"),
+ metalv1alpha1.BMCSecretPasswordKeyName: []byte("bar"),
+ },
+ }
+ Expect(k8sClient.Create(ctx, bmcSecret)).To(Succeed())
+
+ By("Creating a BMC resource with only terminal tasks")
+ bmc := &metalv1alpha1.BMC{
+ ObjectMeta: metav1.ObjectMeta{
+ GenerateName: "test-bmc-",
+ },
+ Spec: metalv1alpha1.BMCSpec{
+ Endpoint: &metalv1alpha1.InlineEndpoint{
+ IP: metalv1alpha1.MustParseIP(MockServerIP),
+ MACAddress: "aa:bb:cc:dd:ee:44",
+ },
+ Protocol: metalv1alpha1.Protocol{
+ Name: metalv1alpha1.ProtocolRedfishLocal,
+ Port: MockServerPort,
+ },
+ BMCSecretRef: v1.LocalObjectReference{
+ Name: bmcSecret.Name,
+ },
+ },
+ Status: metalv1alpha1.BMCStatus{
+ Tasks: []metalv1alpha1.BMCTask{
+ {
+ TaskURI: "/redfish/v1/TaskService/Tasks/completed",
+ TaskType: metalv1alpha1.BMCTaskTypeDiskErase,
+ State: "Completed",
+ PercentComplete: 100,
+ LastUpdateTime: metav1.Now(),
+ },
+ {
+ TaskURI: "/redfish/v1/TaskService/Tasks/failed",
+ TaskType: metalv1alpha1.BMCTaskTypeBIOSReset,
+ State: "Failed",
+ PercentComplete: 50,
+ Message: "Operation failed",
+ LastUpdateTime: metav1.Now(),
+ },
+ },
+ },
+ }
+ Expect(k8sClient.Create(ctx, bmc)).To(Succeed())
+ Expect(k8sClient.Status().Update(ctx, bmc)).To(Succeed())
+
+ By("Ensuring terminal tasks are not updated")
+ // Store the initial last update time
+ Expect(k8sClient.Get(ctx, client.ObjectKeyFromObject(bmc), bmc)).To(Succeed())
+ initialUpdateTime1 := bmc.Status.Tasks[0].LastUpdateTime
+ initialUpdateTime2 := bmc.Status.Tasks[1].LastUpdateTime
+
+ // Wait a bit and verify the tasks haven't changed
+ time.Sleep(200 * time.Millisecond)
+ Expect(k8sClient.Get(ctx, client.ObjectKeyFromObject(bmc), bmc)).To(Succeed())
+
+ Expect(bmc.Status.Tasks).To(HaveLen(2))
+ Expect(bmc.Status.Tasks[0].LastUpdateTime).To(Equal(initialUpdateTime1))
+ Expect(bmc.Status.Tasks[1].LastUpdateTime).To(Equal(initialUpdateTime2))
+
+ // cleanup
+ Expect(k8sClient.Delete(ctx, bmc)).To(Succeed())
+ Expect(k8sClient.Delete(ctx, bmcSecret)).To(Succeed())
+ })
+
+ It("Should handle BMC client errors gracefully", func(ctx SpecContext) {
+ By("Creating a BMCSecret with invalid credentials")
+ bmcSecret := &metalv1alpha1.BMCSecret{
+ ObjectMeta: metav1.ObjectMeta{
+ GenerateName: "test-",
+ },
+ Data: map[string][]byte{
+ metalv1alpha1.BMCSecretUsernameKeyName: []byte("invalid"),
+ metalv1alpha1.BMCSecretPasswordKeyName: []byte("invalid"),
+ },
+ }
+ Expect(k8sClient.Create(ctx, bmcSecret)).To(Succeed())
+
+ By("Creating a BMC resource with active tasks")
+ bmc := &metalv1alpha1.BMC{
+ ObjectMeta: metav1.ObjectMeta{
+ GenerateName: "test-bmc-",
+ },
+ Spec: metalv1alpha1.BMCSpec{
+ Endpoint: &metalv1alpha1.InlineEndpoint{
+ IP: metalv1alpha1.MustParseIP("192.0.2.1"), // TEST-NET-1 (unreachable)
+ MACAddress: "aa:bb:cc:dd:ee:55",
+ },
+ Protocol: metalv1alpha1.Protocol{
+ Name: metalv1alpha1.ProtocolRedfish,
+ Port: 8000,
+ },
+ BMCSecretRef: v1.LocalObjectReference{
+ Name: bmcSecret.Name,
+ },
+ },
+ Status: metalv1alpha1.BMCStatus{
+ Tasks: []metalv1alpha1.BMCTask{
+ {
+ TaskURI: "/redfish/v1/TaskService/Tasks/1",
+ TaskType: metalv1alpha1.BMCTaskTypeDiskErase,
+ State: "Running",
+ PercentComplete: 0,
+ LastUpdateTime: metav1.Now(),
+ },
+ },
+ },
+ }
+ Expect(k8sClient.Create(ctx, bmc)).To(Succeed())
+ Expect(k8sClient.Status().Update(ctx, bmc)).To(Succeed())
+
+ By("Ensuring the controller handles the error gracefully")
+ // The controller should not crash and should keep retrying
+ Consistently(Object(bmc), "2s", "100ms").Should(SatisfyAll(
+ HaveField("Status.Tasks", HaveLen(1)),
+ HaveField("Status.Tasks[0].State", "Running"),
+ ))
+
+ // cleanup
+ Expect(k8sClient.Delete(ctx, bmc)).To(Succeed())
+ Expect(k8sClient.Delete(ctx, bmcSecret)).To(Succeed())
+ })
+
+ It("Should only update changed tasks", func(ctx SpecContext) {
+ By("Creating a BMCSecret")
+ bmcSecret := &metalv1alpha1.BMCSecret{
+ ObjectMeta: metav1.ObjectMeta{
+ GenerateName: "test-",
+ },
+ Data: map[string][]byte{
+ metalv1alpha1.BMCSecretUsernameKeyName: []byte("foo"),
+ metalv1alpha1.BMCSecretPasswordKeyName: []byte("bar"),
+ },
+ }
+ Expect(k8sClient.Create(ctx, bmcSecret)).To(Succeed())
+
+ By("Creating a BMC resource with mixed terminal and active tasks")
+ bmc := &metalv1alpha1.BMC{
+ ObjectMeta: metav1.ObjectMeta{
+ GenerateName: "test-bmc-",
+ },
+ Spec: metalv1alpha1.BMCSpec{
+ Endpoint: &metalv1alpha1.InlineEndpoint{
+ IP: metalv1alpha1.MustParseIP(MockServerIP),
+ MACAddress: "aa:bb:cc:dd:ee:66",
+ },
+ Protocol: metalv1alpha1.Protocol{
+ Name: metalv1alpha1.ProtocolRedfishLocal,
+ Port: MockServerPort,
+ },
+ BMCSecretRef: v1.LocalObjectReference{
+ Name: bmcSecret.Name,
+ },
+ },
+ Status: metalv1alpha1.BMCStatus{
+ Tasks: []metalv1alpha1.BMCTask{
+ {
+ TaskURI: "/redfish/v1/TaskService/Tasks/completed",
+ TaskType: metalv1alpha1.BMCTaskTypeDiskErase,
+ State: "Completed",
+ PercentComplete: 100,
+ Message: "Disk erased successfully",
+ LastUpdateTime: metav1.Now(),
+ },
+ {
+ TaskURI: "/redfish/v1/TaskService/Tasks/active",
+ TaskType: metalv1alpha1.BMCTaskTypeBIOSReset,
+ State: "Running",
+ PercentComplete: 50,
+ LastUpdateTime: metav1.Now(),
+ },
+ },
+ },
+ }
+ Expect(k8sClient.Create(ctx, bmc)).To(Succeed())
+ Expect(k8sClient.Status().Update(ctx, bmc)).To(Succeed())
+
+ By("Getting the initial state")
+ Eventually(Get(bmc)).Should(Succeed())
+ initialTask1UpdateTime := bmc.Status.Tasks[0].LastUpdateTime
+ initialTask2UpdateTime := bmc.Status.Tasks[1].LastUpdateTime
+
+ By("Ensuring only active task is updated")
+ Eventually(Object(bmc)).Should(SatisfyAll(
+ HaveField("Status.Tasks", HaveLen(2)),
+ // First task (completed) should remain unchanged
+ HaveField("Status.Tasks[0].State", "Completed"),
+ HaveField("Status.Tasks[0].PercentComplete", BeNumerically("==", 100)),
+ // Second task (active) should be updated by the mock BMC
+ HaveField("Status.Tasks[1].State", "Completed"),
+ HaveField("Status.Tasks[1].LastUpdateTime", Not(Equal(initialTask2UpdateTime))),
+ ))
+
+ // Verify first task was not updated
+ Expect(k8sClient.Get(ctx, client.ObjectKeyFromObject(bmc), bmc)).To(Succeed())
+ Expect(bmc.Status.Tasks[0].LastUpdateTime).To(Equal(initialTask1UpdateTime))
+
+ // cleanup
+ Expect(k8sClient.Delete(ctx, bmc)).To(Succeed())
+ Expect(k8sClient.Delete(ctx, bmcSecret)).To(Succeed())
+ })
+
+ It("Should handle multiple tasks with mixed states correctly", func(ctx SpecContext) {
+ By("Creating a BMCSecret")
+ bmcSecret := &metalv1alpha1.BMCSecret{
+ ObjectMeta: metav1.ObjectMeta{
+ GenerateName: "test-",
+ },
+ Data: map[string][]byte{
+ metalv1alpha1.BMCSecretUsernameKeyName: []byte("foo"),
+ metalv1alpha1.BMCSecretPasswordKeyName: []byte("bar"),
+ },
+ }
+ Expect(k8sClient.Create(ctx, bmcSecret)).To(Succeed())
+
+ By("Creating a BMC resource with multiple tasks in various states")
+ bmc := &metalv1alpha1.BMC{
+ ObjectMeta: metav1.ObjectMeta{
+ GenerateName: "test-bmc-",
+ },
+ Spec: metalv1alpha1.BMCSpec{
+ Endpoint: &metalv1alpha1.InlineEndpoint{
+ IP: metalv1alpha1.MustParseIP(MockServerIP),
+ MACAddress: "aa:bb:cc:dd:ee:77",
+ },
+ Protocol: metalv1alpha1.Protocol{
+ Name: metalv1alpha1.ProtocolRedfishLocal,
+ Port: MockServerPort,
+ },
+ BMCSecretRef: v1.LocalObjectReference{
+ Name: bmcSecret.Name,
+ },
+ },
+ Status: metalv1alpha1.BMCStatus{
+ Tasks: []metalv1alpha1.BMCTask{
+ {
+ TaskURI: "/redfish/v1/TaskService/Tasks/task1",
+ TaskType: metalv1alpha1.BMCTaskTypeDiskErase,
+ TargetID: "Drive-1",
+ State: "Running",
+ PercentComplete: 10,
+ Message: "Erasing drive 1",
+ LastUpdateTime: metav1.Now(),
+ },
+ {
+ TaskURI: "/redfish/v1/TaskService/Tasks/task2",
+ TaskType: metalv1alpha1.BMCTaskTypeBMCReset,
+ State: "Completed",
+ PercentComplete: 100,
+ Message: "BMC reset completed",
+ LastUpdateTime: metav1.Now(),
+ },
+ {
+ TaskURI: "/redfish/v1/TaskService/Tasks/task3",
+ TaskType: metalv1alpha1.BMCTaskTypeFirmwareUpdate,
+ State: "Running",
+ PercentComplete: 75,
+ Message: "Updating firmware",
+ LastUpdateTime: metav1.Now(),
+ },
+ {
+ TaskURI: "/redfish/v1/TaskService/Tasks/task4",
+ TaskType: metalv1alpha1.BMCTaskTypeNetworkClear,
+ State: "Failed",
+ PercentComplete: 0,
+ Message: "Network clear failed",
+ LastUpdateTime: metav1.Now(),
+ },
+ },
+ },
+ }
+ Expect(k8sClient.Create(ctx, bmc)).To(Succeed())
+ Expect(k8sClient.Status().Update(ctx, bmc)).To(Succeed())
+
+ By("Ensuring only non-terminal tasks are updated")
+ Eventually(Object(bmc)).Should(SatisfyAll(
+ HaveField("Status.Tasks", HaveLen(4)),
+ // Task 1: was Running, should be updated to Completed by mock
+ HaveField("Status.Tasks[0].State", "Completed"),
+ // Task 2: was Completed, should remain Completed
+ HaveField("Status.Tasks[1].State", "Completed"),
+ HaveField("Status.Tasks[1].PercentComplete", BeNumerically("==", 100)),
+ // Task 3: was Running, should be updated to Completed by mock
+ HaveField("Status.Tasks[2].State", "Completed"),
+ // Task 4: was Failed, should remain Failed
+ HaveField("Status.Tasks[3].State", "Failed"),
+ HaveField("Status.Tasks[3].Message", "Network clear failed"),
+ ))
+
+ // cleanup
+ Expect(k8sClient.Delete(ctx, bmc)).To(Succeed())
+ Expect(k8sClient.Delete(ctx, bmcSecret)).To(Succeed())
+ })
+
+ It("Should skip reconciliation if BMC is being deleted", func(ctx SpecContext) {
+ By("Creating a BMCSecret")
+ bmcSecret := &metalv1alpha1.BMCSecret{
+ ObjectMeta: metav1.ObjectMeta{
+ GenerateName: "test-",
+ },
+ Data: map[string][]byte{
+ metalv1alpha1.BMCSecretUsernameKeyName: []byte("foo"),
+ metalv1alpha1.BMCSecretPasswordKeyName: []byte("bar"),
+ },
+ }
+ Expect(k8sClient.Create(ctx, bmcSecret)).To(Succeed())
+
+ By("Creating a BMC resource with tasks and a finalizer")
+ bmc := &metalv1alpha1.BMC{
+ ObjectMeta: metav1.ObjectMeta{
+ GenerateName: "test-bmc-",
+ Finalizers: []string{"test.finalizer"},
+ },
+ Spec: metalv1alpha1.BMCSpec{
+ Endpoint: &metalv1alpha1.InlineEndpoint{
+ IP: metalv1alpha1.MustParseIP(MockServerIP),
+ MACAddress: "aa:bb:cc:dd:ee:88",
+ },
+ Protocol: metalv1alpha1.Protocol{
+ Name: metalv1alpha1.ProtocolRedfishLocal,
+ Port: MockServerPort,
+ },
+ BMCSecretRef: v1.LocalObjectReference{
+ Name: bmcSecret.Name,
+ },
+ },
+ Status: metalv1alpha1.BMCStatus{
+ Tasks: []metalv1alpha1.BMCTask{
+ {
+ TaskURI: "/redfish/v1/TaskService/Tasks/1",
+ TaskType: metalv1alpha1.BMCTaskTypeDiskErase,
+ State: "Running",
+ PercentComplete: 0,
+ LastUpdateTime: metav1.Now(),
+ },
+ },
+ },
+ }
+ Expect(k8sClient.Create(ctx, bmc)).To(Succeed())
+ Expect(k8sClient.Status().Update(ctx, bmc)).To(Succeed())
+
+ By("Deleting the BMC")
+ Expect(k8sClient.Delete(ctx, bmc)).To(Succeed())
+
+ By("Ensuring tasks are not updated during deletion")
+ Eventually(Get(bmc)).Should(Succeed())
+ Expect(bmc.DeletionTimestamp).NotTo(BeNil())
+
+ // Store the task state when deletion started
+ initialTaskState := bmc.Status.Tasks[0].State
+ initialUpdateTime := bmc.Status.Tasks[0].LastUpdateTime
+
+ // Wait a bit and verify the task hasn't been updated
+ time.Sleep(200 * time.Millisecond)
+ Expect(k8sClient.Get(ctx, client.ObjectKeyFromObject(bmc), bmc)).To(Succeed())
+ Expect(bmc.Status.Tasks[0].State).To(Equal(initialTaskState))
+ Expect(bmc.Status.Tasks[0].LastUpdateTime).To(Equal(initialUpdateTime))
+
+ By("Removing finalizer to allow deletion")
+ Eventually(Update(bmc, func() {
+ bmc.Finalizers = []string{}
+ })).Should(Succeed())
+
+ // cleanup
+ Expect(k8sClient.Delete(ctx, bmcSecret)).To(Succeed())
+ })
+
+ It("Should handle BMCs with empty task list", func(ctx SpecContext) {
+ By("Creating a BMCSecret")
+ bmcSecret := &metalv1alpha1.BMCSecret{
+ ObjectMeta: metav1.ObjectMeta{
+ GenerateName: "test-",
+ },
+ Data: map[string][]byte{
+ metalv1alpha1.BMCSecretUsernameKeyName: []byte("foo"),
+ metalv1alpha1.BMCSecretPasswordKeyName: []byte("bar"),
+ },
+ }
+ Expect(k8sClient.Create(ctx, bmcSecret)).To(Succeed())
+
+ By("Creating a BMC resource")
+ bmc := &metalv1alpha1.BMC{
+ ObjectMeta: metav1.ObjectMeta{
+ GenerateName: "test-bmc-",
+ },
+ Spec: metalv1alpha1.BMCSpec{
+ Endpoint: &metalv1alpha1.InlineEndpoint{
+ IP: metalv1alpha1.MustParseIP(MockServerIP),
+ MACAddress: "aa:bb:cc:dd:ee:99",
+ },
+ Protocol: metalv1alpha1.Protocol{
+ Name: metalv1alpha1.ProtocolRedfishLocal,
+ Port: MockServerPort,
+ },
+ BMCSecretRef: v1.LocalObjectReference{
+ Name: bmcSecret.Name,
+ },
+ },
+ Status: metalv1alpha1.BMCStatus{
+ Tasks: []metalv1alpha1.BMCTask{},
+ },
+ }
+ Expect(k8sClient.Create(ctx, bmc)).To(Succeed())
+ Expect(k8sClient.Status().Update(ctx, bmc)).To(Succeed())
+
+ By("Ensuring the controller doesn't fail with empty task list")
+ Consistently(Object(bmc), "1s", "100ms").Should(HaveField("Status.Tasks", BeEmpty()))
+
+ // cleanup
+ Expect(k8sClient.Delete(ctx, bmc)).To(Succeed())
+ Expect(k8sClient.Delete(ctx, bmcSecret)).To(Succeed())
+ })
+
+ It("Should register BMCTask controller in the test setup", func(ctx SpecContext) {
+ By("Verifying the BMCTask controller is registered")
+ // This test verifies that the controller is properly set up in suite_test.go
+ // The fact that other tests pass indicates the controller is working
+ // This is a placeholder to ensure we remember to register it in suite_test.go
+
+ By("Creating a BMCSecret")
+ bmcSecret := &metalv1alpha1.BMCSecret{
+ ObjectMeta: metav1.ObjectMeta{
+ GenerateName: "test-",
+ },
+ Data: map[string][]byte{
+ metalv1alpha1.BMCSecretUsernameKeyName: []byte("foo"),
+ metalv1alpha1.BMCSecretPasswordKeyName: []byte("bar"),
+ },
+ }
+ Expect(k8sClient.Create(ctx, bmcSecret)).To(Succeed())
+
+ By("Creating a BMC with tasks to trigger reconciliation")
+ bmc := &metalv1alpha1.BMC{
+ ObjectMeta: metav1.ObjectMeta{
+ GenerateName: "test-bmc-controller-",
+ },
+ Spec: metalv1alpha1.BMCSpec{
+ Endpoint: &metalv1alpha1.InlineEndpoint{
+ IP: metalv1alpha1.MustParseIP(MockServerIP),
+ MACAddress: "aa:bb:cc:dd:ee:00",
+ },
+ Protocol: metalv1alpha1.Protocol{
+ Name: metalv1alpha1.ProtocolRedfishLocal,
+ Port: MockServerPort,
+ },
+ BMCSecretRef: v1.LocalObjectReference{
+ Name: bmcSecret.Name,
+ },
+ },
+ Status: metalv1alpha1.BMCStatus{
+ Tasks: []metalv1alpha1.BMCTask{
+ {
+ TaskURI: "/redfish/v1/TaskService/Tasks/1",
+ TaskType: metalv1alpha1.BMCTaskTypeDiskErase,
+ State: "Running",
+ PercentComplete: 0,
+ LastUpdateTime: metav1.Now(),
+ },
+ },
+ },
+ }
+ Expect(k8sClient.Create(ctx, bmc)).To(Succeed())
+ Expect(k8sClient.Status().Update(ctx, bmc)).To(Succeed())
+
+ By("Ensuring controller processes the BMC task")
+ Eventually(Object(bmc), "5s", "100ms").Should(SatisfyAll(
+ HaveField("Status.Tasks", HaveLen(1)),
+ HaveField("Status.Tasks[0].State", "Completed"),
+ ))
+
+ // cleanup
+ Expect(k8sClient.Delete(ctx, bmc)).To(Succeed())
+ Expect(k8sClient.Delete(ctx, bmcSecret)).To(Succeed())
+ })
+})
+
+var _ = Describe("BMCTask Event Filter", func() {
+ It("Should filter BMCs without tasks on create event", func() {
+ predicate := hasTasksPredicate()
+
+ By("Testing with BMC without tasks")
+ bmcWithoutTasks := &metalv1alpha1.BMC{
+ ObjectMeta: metav1.ObjectMeta{
+ Name: "test-bmc-no-tasks",
+ },
+ Status: metalv1alpha1.BMCStatus{
+ Tasks: []metalv1alpha1.BMCTask{},
+ },
+ }
+
+ // Create event should be filtered (return false)
+ Expect(predicate.Create(MockCreateEvent(bmcWithoutTasks))).To(BeFalse())
+
+ By("Testing with BMC with tasks")
+ bmcWithTasks := &metalv1alpha1.BMC{
+ ObjectMeta: metav1.ObjectMeta{
+ Name: "test-bmc-with-tasks",
+ },
+ Status: metalv1alpha1.BMCStatus{
+ Tasks: []metalv1alpha1.BMCTask{
+ {
+ TaskURI: "/redfish/v1/TaskService/Tasks/1",
+ TaskType: metalv1alpha1.BMCTaskTypeDiskErase,
+ State: "Running",
+ },
+ },
+ },
+ }
+
+ // Create event should pass (return true)
+ Expect(predicate.Create(MockCreateEvent(bmcWithTasks))).To(BeTrue())
+ })
+
+ It("Should filter BMCs without tasks on update event", func() {
+ predicate := hasTasksPredicate()
+
+ By("Testing update with old BMC having tasks, new BMC without tasks")
+ oldBMC := &metalv1alpha1.BMC{
+ ObjectMeta: metav1.ObjectMeta{
+ Name: "test-bmc",
+ },
+ Status: metalv1alpha1.BMCStatus{
+ Tasks: []metalv1alpha1.BMCTask{
+ {
+ TaskURI: "/redfish/v1/TaskService/Tasks/1",
+ TaskType: metalv1alpha1.BMCTaskTypeDiskErase,
+ State: "Running",
+ },
+ },
+ },
+ }
+ newBMC := &metalv1alpha1.BMC{
+ ObjectMeta: metav1.ObjectMeta{
+ Name: "test-bmc",
+ },
+ Status: metalv1alpha1.BMCStatus{
+ Tasks: []metalv1alpha1.BMCTask{},
+ },
+ }
+
+ // Update event should be filtered when new BMC has no tasks
+ Expect(predicate.Update(MockUpdateEvent(oldBMC, newBMC))).To(BeFalse())
+
+ By("Testing update with both BMCs having tasks")
+ newBMCWithTasks := &metalv1alpha1.BMC{
+ ObjectMeta: metav1.ObjectMeta{
+ Name: "test-bmc",
+ },
+ Status: metalv1alpha1.BMCStatus{
+ Tasks: []metalv1alpha1.BMCTask{
+ {
+ TaskURI: "/redfish/v1/TaskService/Tasks/1",
+ TaskType: metalv1alpha1.BMCTaskTypeDiskErase,
+ State: "Completed",
+ },
+ },
+ },
+ }
+
+ // Update event should pass when new BMC has tasks
+ Expect(predicate.Update(MockUpdateEvent(oldBMC, newBMCWithTasks))).To(BeTrue())
+ })
+
+ It("Should always filter delete events", func() {
+ predicate := hasTasksPredicate()
+
+ bmc := &metalv1alpha1.BMC{
+ ObjectMeta: metav1.ObjectMeta{
+ Name: "test-bmc",
+ },
+ Status: metalv1alpha1.BMCStatus{
+ Tasks: []metalv1alpha1.BMCTask{
+ {
+ TaskURI: "/redfish/v1/TaskService/Tasks/1",
+ TaskType: metalv1alpha1.BMCTaskTypeDiskErase,
+ State: "Running",
+ },
+ },
+ },
+ }
+
+ // Delete events should always be filtered regardless of tasks
+ Expect(predicate.Delete(MockDeleteEvent(bmc))).To(BeFalse())
+ })
+
+ It("Should filter generic events based on task presence", func() {
+ predicate := hasTasksPredicate()
+
+ By("Testing generic event without tasks")
+ bmcWithoutTasks := &metalv1alpha1.BMC{
+ ObjectMeta: metav1.ObjectMeta{
+ Name: "test-bmc",
+ },
+ Status: metalv1alpha1.BMCStatus{
+ Tasks: []metalv1alpha1.BMCTask{},
+ },
+ }
+
+ Expect(predicate.Generic(MockGenericEvent(bmcWithoutTasks))).To(BeFalse())
+
+ By("Testing generic event with tasks")
+ bmcWithTasks := &metalv1alpha1.BMC{
+ ObjectMeta: metav1.ObjectMeta{
+ Name: "test-bmc",
+ },
+ Status: metalv1alpha1.BMCStatus{
+ Tasks: []metalv1alpha1.BMCTask{
+ {
+ TaskURI: "/redfish/v1/TaskService/Tasks/1",
+ TaskType: metalv1alpha1.BMCTaskTypeDiskErase,
+ State: "Running",
+ },
+ },
+ },
+ }
+
+ Expect(predicate.Generic(MockGenericEvent(bmcWithTasks))).To(BeTrue())
+ })
+})
+
+var _ = Describe("isTerminalState", func() {
+ It("Should identify terminal states correctly", func() {
+ By("Testing completed state")
+ Expect(isTerminalState("Completed")).To(BeTrue())
+
+ By("Testing failed state")
+ Expect(isTerminalState("Failed")).To(BeTrue())
+
+ By("Testing Redfish terminal states")
+ Expect(isTerminalState("Killed")).To(BeTrue())
+ Expect(isTerminalState("Exception")).To(BeTrue())
+ Expect(isTerminalState("Cancelled")).To(BeTrue())
+
+ By("Testing non-terminal states")
+ Expect(isTerminalState("Running")).To(BeFalse())
+ Expect(isTerminalState("Pending")).To(BeFalse())
+ Expect(isTerminalState("Starting")).To(BeFalse())
+ Expect(isTerminalState("")).To(BeFalse())
+ })
+})
+
+// Helper functions for creating mock events for predicate testing
+
+// MockCreateEvent creates a mock CreateEvent for testing predicates.
+func MockCreateEvent(obj client.Object) event.CreateEvent {
+ return event.CreateEvent{
+ Object: obj,
+ }
+}
+
+// MockUpdateEvent creates a mock UpdateEvent for testing predicates.
+func MockUpdateEvent(oldObj, newObj client.Object) event.UpdateEvent {
+ return event.UpdateEvent{
+ ObjectOld: oldObj,
+ ObjectNew: newObj,
+ }
+}
+
+// MockDeleteEvent creates a mock DeleteEvent for testing predicates.
+func MockDeleteEvent(obj client.Object) event.DeleteEvent {
+ return event.DeleteEvent{
+ Object: obj,
+ }
+}
+
+// MockGenericEvent creates a mock GenericEvent for testing predicates.
+func MockGenericEvent(obj client.Object) event.GenericEvent {
+ return event.GenericEvent{
+ Object: obj,
+ }
+}
diff --git a/internal/controller/server_controller.go b/internal/controller/server_controller.go
index b8218cb6d..707054aa6 100644
--- a/internal/controller/server_controller.go
+++ b/internal/controller/server_controller.go
@@ -104,6 +104,7 @@ type ServerReconciler struct {
// +kubebuilder:rbac:groups=metal.ironcore.dev,resources=servers/status,verbs=get;update;patch
// +kubebuilder:rbac:groups=metal.ironcore.dev,resources=servers/finalizers,verbs=update
// +kubebuilder:rbac:groups=metal.ironcore.dev,resources=serverconfigurations,verbs=get;list;watch;create;update;patch;delete
+// +kubebuilder:rbac:groups=metal.ironcore.dev,resources=servercleanings,verbs=get;list;watch
// +kubebuilder:rbac:groups="",resources=secrets,verbs=get;list;watch;create;update;patch;delete
// +kubebuilder:rbac:groups="batch",resources=jobs,verbs=get;list;watch;create;update;patch;delete
@@ -285,6 +286,8 @@ func (r *ServerReconciler) ensureServerStateTransition(ctx context.Context, bmcC
return r.handleAvailableState(ctx, bmcClient, server)
case metalv1alpha1.ServerStateReserved:
return r.handleReservedState(ctx, bmcClient, server)
+ case metalv1alpha1.ServerStateTainted:
+ return r.handleTaintedState(ctx, bmcClient, server)
case metalv1alpha1.ServerStateMaintenance:
return r.handleMaintenanceState(ctx, bmcClient, server)
default:
@@ -417,8 +420,17 @@ func (r *ServerReconciler) handleReservedState(ctx context.Context, bmcClient bm
// TODO: This needs be reworked later as the Server cleanup has to happen here. For now we just transition the server
// back to available state.
if server.Spec.ServerClaimRef == nil {
- if modified, err := r.patchServerState(ctx, server, metalv1alpha1.ServerStateAvailable); err != nil || modified {
- return true, err
+ // Check if server has taints
+ if len(server.Spec.Taints) > 0 {
+ log.V(1).Info("Server has taints, transitioning to Tainted state for cleaning")
+ if modified, err := r.patchServerState(ctx, server, metalv1alpha1.ServerStateTainted); err != nil || modified {
+ return true, err
+ }
+ } else {
+ // No taints, transition directly to Available
+ if modified, err := r.patchServerState(ctx, server, metalv1alpha1.ServerStateAvailable); err != nil || modified {
+ return true, err
+ }
}
}
@@ -467,6 +479,54 @@ func (r *ServerReconciler) handleReservedState(ctx context.Context, bmcClient bm
return true, nil
}
+func (r *ServerReconciler) handleTaintedState(ctx context.Context, _ bmc.BMC, server *metalv1alpha1.Server) (bool, error) {
+ log := ctrl.LoggerFrom(ctx)
+
+ // Check if ServerCleaning exists for this server
+ cleaningList := &metalv1alpha1.ServerCleaningList{}
+ if err := r.List(ctx, cleaningList); err != nil {
+ return false, fmt.Errorf("failed to list ServerCleaning resources: %w", err)
+ }
+
+ var activeCleaning *metalv1alpha1.ServerCleaning
+ for i := range cleaningList.Items {
+ cleaning := &cleaningList.Items[i]
+ if cleaning.Spec.ServerRef.Name != server.Name {
+ continue
+ }
+ if cleaning.Status.State == metalv1alpha1.ServerCleaningStateCompleted {
+ // Cleaning completed, remove taints and transition to Available
+ log.V(1).Info("Cleaning completed, removing taints")
+ serverBase := server.DeepCopy()
+ server.Spec.Taints = nil
+ if err := r.Patch(ctx, server, client.MergeFrom(serverBase)); err != nil {
+ return false, fmt.Errorf("failed to remove taints: %w", err)
+ }
+
+ // Transition to Available
+ if modified, err := r.patchServerState(ctx, server, metalv1alpha1.ServerStateAvailable); err != nil || modified {
+ return modified, err
+ }
+ return false, nil
+ }
+ if cleaning.Status.State == metalv1alpha1.ServerCleaningStatePending ||
+ cleaning.Status.State == metalv1alpha1.ServerCleaningStateInProgress {
+ activeCleaning = cleaning
+ break
+ }
+ }
+
+ if activeCleaning == nil {
+ log.V(1).Info("No active ServerCleaning found, waiting for cleaning to be created")
+ // A separate controller or operator should create ServerCleaning
+ // Requeue to check again
+ return true, nil
+ }
+
+ log.V(1).Info("Server cleaning in progress", "cleaningState", activeCleaning.Status.State)
+ return true, nil
+}
+
func (r *ServerReconciler) handleMaintenanceState(ctx context.Context, bmcClient bmc.BMC, server *metalv1alpha1.Server) (bool, error) {
log := ctrl.LoggerFrom(ctx)
if server.Spec.ServerMaintenanceRef == nil {
diff --git a/internal/controller/servercleaning_controller.go b/internal/controller/servercleaning_controller.go
new file mode 100644
index 000000000..3c256ad41
--- /dev/null
+++ b/internal/controller/servercleaning_controller.go
@@ -0,0 +1,766 @@
+// SPDX-FileCopyrightText: 2025 SAP SE or an SAP affiliate company and IronCore contributors
+// SPDX-License-Identifier: Apache-2.0
+
+package controller
+
+import (
+ "context"
+ "fmt"
+ "slices"
+ "time"
+
+ "github.com/ironcore-dev/controller-utils/clientutils"
+ metalv1alpha1 "github.com/ironcore-dev/metal-operator/api/v1alpha1"
+ "github.com/ironcore-dev/metal-operator/bmc"
+ "github.com/ironcore-dev/metal-operator/internal/bmcutils"
+ "k8s.io/apimachinery/pkg/api/meta"
+ metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+ "k8s.io/apimachinery/pkg/runtime"
+ "k8s.io/apimachinery/pkg/types"
+ ctrl "sigs.k8s.io/controller-runtime"
+ "sigs.k8s.io/controller-runtime/pkg/client"
+ "sigs.k8s.io/controller-runtime/pkg/handler"
+ "sigs.k8s.io/controller-runtime/pkg/reconcile"
+)
+
+const (
+ // ServerCleaningFinalizer is the finalizer for the ServerCleaning resource.
+ ServerCleaningFinalizer = "metal.ironcore.dev/servercleaning"
+
+ // ServerCleaningConditionTypeCleaning indicates that cleaning is in progress
+ ServerCleaningConditionTypeCleaning = "Cleaning"
+
+ // ServerCleaningConditionReasonInProgress indicates cleaning is in progress
+ ServerCleaningConditionReasonInProgress = "CleaningInProgress"
+
+ // ServerCleaningConditionReasonCompleted indicates cleaning is completed
+ ServerCleaningConditionReasonCompleted = "CleaningCompleted"
+
+ // ServerCleaningConditionReasonFailed indicates cleaning failed
+ ServerCleaningConditionReasonFailed = "CleaningFailed"
+
+ // Task state constants
+ taskStateCompleted = "Completed"
+ taskStateException = "Exception"
+ taskStateCancelled = "Cancelled"
+ taskStateKilled = "Killed"
+ taskStateFailed = "Failed"
+ taskStateNew = "New"
+)
+
+// ServerCleaningReconciler reconciles a ServerCleaning object
+type ServerCleaningReconciler struct {
+ client.Client
+ Scheme *runtime.Scheme
+}
+
+// +kubebuilder:rbac:groups=metal.ironcore.dev,resources=servercleanings,verbs=get;list;watch;create;update;patch;delete
+// +kubebuilder:rbac:groups=metal.ironcore.dev,resources=servercleanings/status,verbs=get;update;patch
+// +kubebuilder:rbac:groups=metal.ironcore.dev,resources=servercleanings/finalizers,verbs=update
+// +kubebuilder:rbac:groups=metal.ironcore.dev,resources=servermaintenances,verbs=get;list;watch;create;update;patch;delete
+
+// Reconcile is part of the main kubernetes reconciliation loop which aims to
+// move the current state of the cluster closer to the desired state.
+func (r *ServerCleaningReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
+ cleaning := &metalv1alpha1.ServerCleaning{}
+ if err := r.Get(ctx, req.NamespacedName, cleaning); err != nil {
+ return ctrl.Result{}, client.IgnoreNotFound(err)
+ }
+ return r.reconcileExists(ctx, cleaning)
+}
+
+func (r *ServerCleaningReconciler) reconcileExists(ctx context.Context, cleaning *metalv1alpha1.ServerCleaning) (ctrl.Result, error) {
+ if !cleaning.DeletionTimestamp.IsZero() {
+ return r.delete(ctx, cleaning)
+ }
+ return r.reconcile(ctx, cleaning)
+}
+
+func (r *ServerCleaningReconciler) reconcile(ctx context.Context, cleaning *metalv1alpha1.ServerCleaning) (ctrl.Result, error) {
+ log := ctrl.LoggerFrom(ctx)
+ log.V(1).Info("Reconciling ServerCleaning")
+
+ // Ensure finalizer
+ if modified, err := clientutils.PatchEnsureFinalizer(ctx, r.Client, cleaning, ServerCleaningFinalizer); err != nil || modified {
+ return ctrl.Result{}, err
+ }
+
+ // Set initial state if not set
+ if cleaning.Status.State == "" {
+ if modified, err := r.patchCleaningState(ctx, cleaning, metalv1alpha1.ServerCleaningStatePending); err != nil || modified {
+ return ctrl.Result{}, err
+ }
+ }
+
+ return r.ensureServerCleaningStateTransition(ctx, cleaning)
+}
+
+func (r *ServerCleaningReconciler) ensureServerCleaningStateTransition(ctx context.Context, cleaning *metalv1alpha1.ServerCleaning) (ctrl.Result, error) {
+ log := ctrl.LoggerFrom(ctx)
+ switch cleaning.Status.State {
+ case metalv1alpha1.ServerCleaningStatePending:
+ return r.handlePendingState(ctx, cleaning)
+ case metalv1alpha1.ServerCleaningStateInProgress:
+ return r.handleInProgressState(ctx, cleaning)
+ case metalv1alpha1.ServerCleaningStateCompleted:
+ return r.handleCompletedState(ctx, cleaning)
+ case metalv1alpha1.ServerCleaningStateFailed:
+ return r.handleFailedState(ctx, cleaning)
+ default:
+ log.V(1).Info("Unknown ServerCleaning state, skipping reconciliation", "State", cleaning.Status.State)
+ return ctrl.Result{}, nil
+ }
+}
+
+func (r *ServerCleaningReconciler) handlePendingState(ctx context.Context, cleaning *metalv1alpha1.ServerCleaning) (ctrl.Result, error) {
+ log := ctrl.LoggerFrom(ctx)
+
+ // Get list of servers to clean
+ servers, err := r.getServersForCleaning(ctx, cleaning)
+ if err != nil {
+ return ctrl.Result{}, fmt.Errorf("failed to get servers for cleaning: %w", err)
+ }
+
+ if len(servers) == 0 {
+ log.V(1).Info("No servers found for cleaning")
+ return ctrl.Result{}, nil
+ }
+
+ // Update selected servers count
+ if err := r.updateSelectedServersCount(ctx, cleaning, int32(len(servers))); err != nil {
+ return ctrl.Result{}, err
+ }
+
+ // Initialize server status entries
+ if err := r.initializeServerStatuses(ctx, cleaning, servers); err != nil {
+ return ctrl.Result{}, err
+ }
+
+ // Initiate BMC cleaning operations for each server
+ pendingCount := int32(0)
+ inProgressCount := int32(0)
+ failedCount := int32(0)
+
+ for _, server := range servers {
+ if server.Status.State != metalv1alpha1.ServerStateTainted {
+ log.V(1).Info("Server is not in Tainted state, skipping", "Server", server.Name, "State", server.Status.State)
+ continue
+ }
+
+ // Initiate cleaning operations via BMC
+ if err := r.initiateBMCCleaning(ctx, cleaning, &server); err != nil {
+ log.Error(err, "Failed to initiate BMC cleaning for server", "Server", server.Name)
+ if err := r.updateServerStatus(ctx, cleaning, server.Name, metalv1alpha1.ServerCleaningStateFailed, fmt.Sprintf("Failed to initiate cleaning: %v", err)); err != nil {
+ return ctrl.Result{}, err
+ }
+ failedCount++
+ continue
+ }
+
+ inProgressCount++
+ if err := r.updateServerStatus(ctx, cleaning, server.Name, metalv1alpha1.ServerCleaningStateInProgress, "Cleaning initiated"); err != nil {
+ return ctrl.Result{}, err
+ }
+ }
+
+ // Update status counts
+ if err := r.updateCleaningCounts(ctx, cleaning, pendingCount, inProgressCount, 0, failedCount); err != nil {
+ return ctrl.Result{}, err
+ }
+
+ // Update status condition
+ if err := r.setCondition(ctx, cleaning, metav1.Condition{
+ Type: ServerCleaningConditionTypeCleaning,
+ Status: metav1.ConditionTrue,
+ Reason: ServerCleaningConditionReasonInProgress,
+ Message: fmt.Sprintf("Cleaning operations initiated for %d servers", inProgressCount),
+ ObservedGeneration: cleaning.Generation,
+ }); err != nil {
+ return ctrl.Result{}, err
+ }
+
+ // Transition to InProgress
+ if modified, err := r.patchCleaningState(ctx, cleaning, metalv1alpha1.ServerCleaningStateInProgress); err != nil || modified {
+ return ctrl.Result{}, err
+ }
+
+ // Requeue to monitor task progress
+ return ctrl.Result{RequeueAfter: 30 * time.Second}, nil
+}
+
+func (r *ServerCleaningReconciler) handleInProgressState(ctx context.Context, cleaning *metalv1alpha1.ServerCleaning) (ctrl.Result, error) {
+ log := ctrl.LoggerFrom(ctx)
+
+ // Get servers for cleaning
+ servers, err := r.getServersForCleaning(ctx, cleaning)
+ if err != nil {
+ return ctrl.Result{}, fmt.Errorf("failed to get servers for cleaning: %w", err)
+ }
+
+ if len(servers) == 0 {
+ log.V(1).Info("No servers found for monitoring")
+ return ctrl.Result{}, nil
+ }
+
+ // Track counts
+ var inProgressCount, completedCount, failedCount int32
+ allComplete := true
+
+ // Monitor each server's cleaning tasks
+ for _, server := range servers {
+ // Find the server status entry
+ var serverStatus *metalv1alpha1.ServerCleaningStatusEntry
+ for i := range cleaning.Status.ServerCleaningStatuses {
+ if cleaning.Status.ServerCleaningStatuses[i].ServerName == server.Name {
+ serverStatus = &cleaning.Status.ServerCleaningStatuses[i]
+ break
+ }
+ }
+
+ if serverStatus == nil {
+ log.V(1).Info("No status entry found for server", "server", server.Name)
+ continue
+ }
+
+ // Skip servers that are already in terminal states
+ if serverStatus.State == metalv1alpha1.ServerCleaningStateCompleted {
+ completedCount++
+ continue
+ }
+ if serverStatus.State == metalv1alpha1.ServerCleaningStateFailed {
+ failedCount++
+ continue
+ }
+
+ // Check BMC tasks for this server
+ // Tasks are now in BMC.Status.Tasks and monitored by BMCTask controller
+ tasks, err := r.getTasksForServer(ctx, &server, cleaning.Name)
+ if err != nil {
+ log.Error(err, "Failed to get BMC tasks for server", "server", server.Name)
+ allComplete = false
+ inProgressCount++
+ continue
+ }
+
+ // Check if all tasks are complete
+ tasksComplete, tasksFailed := r.checkTasksComplete(tasks)
+
+ if tasksComplete {
+ // All tasks finished - update server status
+ if tasksFailed {
+ log.Info("Cleaning completed with failures", "server", server.Name)
+ if err := r.updateServerStatus(ctx, cleaning, server.Name, metalv1alpha1.ServerCleaningStateFailed, "One or more cleaning tasks failed"); err != nil {
+ return ctrl.Result{}, err
+ }
+ failedCount++
+ } else {
+ log.Info("Cleaning completed successfully", "server", server.Name)
+ if err := r.updateServerStatus(ctx, cleaning, server.Name, metalv1alpha1.ServerCleaningStateCompleted, "All cleaning tasks completed successfully"); err != nil {
+ return ctrl.Result{}, err
+ }
+ completedCount++
+ }
+ } else {
+ // Tasks still in progress
+ inProgressCount++
+ allComplete = false
+
+ // Calculate progress
+ completedTaskCount := 0
+ totalPercent := int32(0)
+ for _, task := range tasks {
+ if task.State == taskStateCompleted {
+ completedTaskCount++
+ }
+ totalPercent += task.PercentComplete
+ }
+ avgPercent := int32(0)
+ if len(tasks) > 0 {
+ avgPercent = totalPercent / int32(len(tasks))
+ }
+ progressMsg := fmt.Sprintf("Cleaning in progress: %d%% (%d/%d tasks completed)", avgPercent, completedTaskCount, len(tasks))
+
+ if err := r.updateServerStatus(ctx, cleaning, server.Name, metalv1alpha1.ServerCleaningStateInProgress, progressMsg); err != nil {
+ return ctrl.Result{}, err
+ }
+ }
+ }
+
+ // Update counts
+ if err := r.updateCleaningCounts(ctx, cleaning, 0, inProgressCount, completedCount, failedCount); err != nil {
+ return ctrl.Result{}, err
+ }
+
+ // Check if all cleanings are complete
+ totalServers := cleaning.Status.SelectedServers
+ processedServers := completedCount + failedCount
+
+ if allComplete && processedServers >= totalServers {
+ // All servers processed
+ if failedCount > 0 {
+ log.V(1).Info("Cleaning completed with failures", "completed", completedCount, "failed", failedCount)
+ if err := r.setCondition(ctx, cleaning, metav1.Condition{
+ Type: ServerCleaningConditionTypeCleaning,
+ Status: metav1.ConditionFalse,
+ Reason: ServerCleaningConditionReasonFailed,
+ Message: fmt.Sprintf("Cleaning completed: %d succeeded, %d failed", completedCount, failedCount),
+ ObservedGeneration: cleaning.Generation,
+ }); err != nil {
+ return ctrl.Result{}, err
+ }
+ if modified, err := r.patchCleaningState(ctx, cleaning, metalv1alpha1.ServerCleaningStateFailed); err != nil || modified {
+ return ctrl.Result{}, err
+ }
+ } else {
+ log.V(1).Info("Cleaning completed successfully", "completed", completedCount)
+ if err := r.setCondition(ctx, cleaning, metav1.Condition{
+ Type: ServerCleaningConditionTypeCleaning,
+ Status: metav1.ConditionTrue,
+ Reason: ServerCleaningConditionReasonCompleted,
+ Message: fmt.Sprintf("Cleaning completed successfully for %d servers", completedCount),
+ ObservedGeneration: cleaning.Generation,
+ }); err != nil {
+ return ctrl.Result{}, err
+ }
+ if modified, err := r.patchCleaningState(ctx, cleaning, metalv1alpha1.ServerCleaningStateCompleted); err != nil || modified {
+ return ctrl.Result{}, err
+ }
+ }
+ return ctrl.Result{}, nil
+ }
+
+ // Still in progress, requeue to check again
+ log.V(1).Info("Cleaning still in progress", "inProgress", inProgressCount, "completed", completedCount, "failed", failedCount)
+ return ctrl.Result{RequeueAfter: 30 * time.Second}, nil
+}
+
+func (r *ServerCleaningReconciler) handleCompletedState(ctx context.Context, _ *metalv1alpha1.ServerCleaning) (ctrl.Result, error) {
+ log := ctrl.LoggerFrom(ctx)
+ log.V(1).Info("ServerCleaning completed, nothing to do")
+ return ctrl.Result{}, nil
+}
+
+func (r *ServerCleaningReconciler) handleFailedState(ctx context.Context, _ *metalv1alpha1.ServerCleaning) (ctrl.Result, error) {
+ log := ctrl.LoggerFrom(ctx)
+ log.V(1).Info("ServerCleaning failed, manual intervention required")
+ return ctrl.Result{}, nil
+}
+
+func (r *ServerCleaningReconciler) delete(ctx context.Context, cleaning *metalv1alpha1.ServerCleaning) (ctrl.Result, error) {
+ log := ctrl.LoggerFrom(ctx)
+ log.V(1).Info("Deleting ServerCleaning")
+
+ // Remove finalizer
+ if err := clientutils.PatchRemoveFinalizer(ctx, r.Client, cleaning, ServerCleaningFinalizer); err != nil {
+ return ctrl.Result{}, err
+ }
+
+ return ctrl.Result{}, nil
+}
+
+func (r *ServerCleaningReconciler) patchCleaningState(ctx context.Context, cleaning *metalv1alpha1.ServerCleaning, state metalv1alpha1.ServerCleaningState) (bool, error) {
+ if cleaning.Status.State == state {
+ return false, nil
+ }
+
+ cleaningBase := cleaning.DeepCopy()
+ cleaning.Status.State = state
+ if err := r.Status().Patch(ctx, cleaning, client.MergeFrom(cleaningBase)); err != nil {
+ return false, fmt.Errorf("failed to patch ServerCleaning state: %w", err)
+ }
+
+ return true, nil
+}
+
+func (r *ServerCleaningReconciler) setCondition(ctx context.Context, cleaning *metalv1alpha1.ServerCleaning, condition metav1.Condition) error {
+ cleaningBase := cleaning.DeepCopy()
+ condition.LastTransitionTime = metav1.Now()
+ meta.SetStatusCondition(&cleaning.Status.Conditions, condition)
+ if err := r.Status().Patch(ctx, cleaning, client.MergeFrom(cleaningBase)); err != nil {
+ return fmt.Errorf("failed to update conditions: %w", err)
+ }
+ return nil
+}
+
+// SetupWithManager sets up the controller with the Manager.
+func (r *ServerCleaningReconciler) SetupWithManager(mgr ctrl.Manager) error {
+ return ctrl.NewControllerManagedBy(mgr).
+ For(&metalv1alpha1.ServerCleaning{}).
+ Owns(&metalv1alpha1.ServerMaintenance{}).
+ Watches(
+ &metalv1alpha1.Server{},
+ handler.EnqueueRequestsFromMapFunc(r.mapServerToServerCleaning),
+ ).
+ Watches(
+ &metalv1alpha1.BMC{},
+ handler.EnqueueRequestsFromMapFunc(r.mapBMCToServerCleaning),
+ ).
+ Complete(r)
+}
+
+func (r *ServerCleaningReconciler) mapServerToServerCleaning(ctx context.Context, obj client.Object) []reconcile.Request {
+ server := obj.(*metalv1alpha1.Server)
+
+ cleaningList := &metalv1alpha1.ServerCleaningList{}
+ if err := r.List(ctx, cleaningList); err != nil {
+ return nil
+ }
+
+ var requests []reconcile.Request
+ for _, cleaning := range cleaningList.Items {
+ if cleaning.Spec.ServerRef.Name == server.Name {
+ requests = append(requests, reconcile.Request{
+ NamespacedName: client.ObjectKeyFromObject(&cleaning),
+ })
+ }
+ }
+
+ return requests
+}
+
+// mapBMCToServerCleaning maps BMC updates (specifically task status changes) to ServerCleaning reconcile requests
+func (r *ServerCleaningReconciler) mapBMCToServerCleaning(ctx context.Context, obj client.Object) []reconcile.Request {
+ bmcObj := obj.(*metalv1alpha1.BMC)
+
+ // Find all servers that reference this BMC
+ serverList := &metalv1alpha1.ServerList{}
+ if err := r.List(ctx, serverList); err != nil {
+ return nil
+ }
+
+ var affectedServers []string
+ for _, server := range serverList.Items {
+ if server.Spec.BMCRef != nil && server.Spec.BMCRef.Name == bmcObj.Name {
+ affectedServers = append(affectedServers, server.Name)
+ }
+ }
+
+ // Find ServerCleaning objects that are working on these servers
+ cleaningList := &metalv1alpha1.ServerCleaningList{}
+ if err := r.List(ctx, cleaningList); err != nil {
+ return nil
+ }
+
+ var requests []reconcile.Request
+ for _, cleaning := range cleaningList.Items {
+ // Only reconcile if cleaning is in progress
+ if cleaning.Status.State != metalv1alpha1.ServerCleaningStateInProgress {
+ continue
+ }
+
+ // Check if this cleaning is working on any of the affected servers
+ if cleaning.Spec.ServerRef != nil {
+ if slices.Contains(affectedServers, cleaning.Spec.ServerRef.Name) {
+ requests = append(requests, reconcile.Request{
+ NamespacedName: client.ObjectKeyFromObject(&cleaning),
+ })
+ }
+ }
+ }
+
+ return requests
+}
+
+func (r *ServerCleaningReconciler) getServersForCleaning(ctx context.Context, cleaning *metalv1alpha1.ServerCleaning) ([]metalv1alpha1.Server, error) {
+ // If ServerRef is specified, return that single server
+ if cleaning.Spec.ServerRef != nil {
+ server, err := GetServerByName(ctx, r.Client, cleaning.Spec.ServerRef.Name)
+ if err != nil {
+ return nil, fmt.Errorf("failed to get server %s: %w", cleaning.Spec.ServerRef.Name, err)
+ }
+ return []metalv1alpha1.Server{*server}, nil
+ }
+
+ // If ServerSelector is specified, list matching servers
+ if cleaning.Spec.ServerSelector != nil {
+ serverList := &metalv1alpha1.ServerList{}
+ selector, err := metav1.LabelSelectorAsSelector(cleaning.Spec.ServerSelector)
+ if err != nil {
+ return nil, fmt.Errorf("failed to convert label selector: %w", err)
+ }
+
+ if err := r.List(ctx, serverList, client.MatchingLabelsSelector{Selector: selector}); err != nil {
+ return nil, fmt.Errorf("failed to list servers: %w", err)
+ }
+
+ return serverList.Items, nil
+ }
+
+ return nil, fmt.Errorf("neither serverRef nor serverSelector is specified")
+}
+
+func (r *ServerCleaningReconciler) updateSelectedServersCount(ctx context.Context, cleaning *metalv1alpha1.ServerCleaning, count int32) error {
+ cleaningBase := cleaning.DeepCopy()
+ cleaning.Status.SelectedServers = count
+ if err := r.Status().Patch(ctx, cleaning, client.MergeFrom(cleaningBase)); err != nil {
+ return fmt.Errorf("failed to update selected servers count: %w", err)
+ }
+ return nil
+}
+
+func (r *ServerCleaningReconciler) initializeServerStatuses(ctx context.Context, cleaning *metalv1alpha1.ServerCleaning, servers []metalv1alpha1.Server) error {
+ cleaningBase := cleaning.DeepCopy()
+ cleaning.Status.ServerCleaningStatuses = make([]metalv1alpha1.ServerCleaningStatusEntry, 0, len(servers))
+
+ for _, server := range servers {
+ cleaning.Status.ServerCleaningStatuses = append(cleaning.Status.ServerCleaningStatuses, metalv1alpha1.ServerCleaningStatusEntry{
+ ServerName: server.Name,
+ State: metalv1alpha1.ServerCleaningStatePending,
+ Message: "Waiting to start cleaning",
+ LastUpdateTime: metav1.Now(),
+ })
+ }
+
+ if err := r.Status().Patch(ctx, cleaning, client.MergeFrom(cleaningBase)); err != nil {
+ return fmt.Errorf("failed to initialize server statuses: %w", err)
+ }
+ return nil
+}
+
+func (r *ServerCleaningReconciler) updateServerStatus(ctx context.Context, cleaning *metalv1alpha1.ServerCleaning, serverName string, state metalv1alpha1.ServerCleaningState, message string) error {
+ cleaningBase := cleaning.DeepCopy()
+
+ // Find and update the server status entry
+ found := false
+ for i := range cleaning.Status.ServerCleaningStatuses {
+ if cleaning.Status.ServerCleaningStatuses[i].ServerName == serverName {
+ cleaning.Status.ServerCleaningStatuses[i].State = state
+ cleaning.Status.ServerCleaningStatuses[i].Message = message
+ cleaning.Status.ServerCleaningStatuses[i].LastUpdateTime = metav1.Now()
+ found = true
+ break
+ }
+ }
+
+ // If not found, add new entry
+ if !found {
+ cleaning.Status.ServerCleaningStatuses = append(cleaning.Status.ServerCleaningStatuses, metalv1alpha1.ServerCleaningStatusEntry{
+ ServerName: serverName,
+ State: state,
+ Message: message,
+ LastUpdateTime: metav1.Now(),
+ })
+ }
+
+ if err := r.Status().Patch(ctx, cleaning, client.MergeFrom(cleaningBase)); err != nil {
+ return fmt.Errorf("failed to update server status: %w", err)
+ }
+ return nil
+}
+
+func (r *ServerCleaningReconciler) updateCleaningCounts(ctx context.Context, cleaning *metalv1alpha1.ServerCleaning, pending, inProgress, completed, failed int32) error {
+ cleaningBase := cleaning.DeepCopy()
+ cleaning.Status.PendingCleanings = pending
+ cleaning.Status.InProgressCleanings = inProgress
+ cleaning.Status.CompletedCleanings = completed
+ cleaning.Status.FailedCleanings = failed
+
+ if err := r.Status().Patch(ctx, cleaning, client.MergeFrom(cleaningBase)); err != nil {
+ return fmt.Errorf("failed to update cleaning counts: %w", err)
+ }
+ return nil
+}
+
+// addTaskToBMC adds a BMCTask to the specified BMC's status
+func (r *ServerCleaningReconciler) addTaskToBMC(ctx context.Context, bmcName string, task metalv1alpha1.BMCTask) error {
+ log := ctrl.LoggerFrom(ctx)
+
+ // Get the BMC resource
+ bmcObj := &metalv1alpha1.BMC{}
+ if err := r.Get(ctx, types.NamespacedName{Name: bmcName}, bmcObj); err != nil {
+ return fmt.Errorf("failed to get BMC %s: %w", bmcName, err)
+ }
+
+ // Add the task to BMC.Status.Tasks
+ bmcObj.Status.Tasks = append(bmcObj.Status.Tasks, task)
+
+ // Keep only the last 10 tasks to prevent unbounded growth
+ if len(bmcObj.Status.Tasks) > 10 {
+ bmcObj.Status.Tasks = bmcObj.Status.Tasks[len(bmcObj.Status.Tasks)-10:]
+ }
+
+ // Update BMC status
+ if err := r.Status().Update(ctx, bmcObj); err != nil {
+ return fmt.Errorf("failed to update BMC tasks: %w", err)
+ }
+
+ log.V(1).Info("Added task to BMC", "bmc", bmcName, "taskType", task.TaskType, "taskURI", task.TaskURI)
+ return nil
+}
+
+// getTasksForServer retrieves tasks from BMC.Status.Tasks for a specific server's cleaning operation
+func (r *ServerCleaningReconciler) getTasksForServer(ctx context.Context, server *metalv1alpha1.Server, _ string) ([]metalv1alpha1.BMCTask, error) {
+ // Get the BMC for this server
+ if server.Spec.BMCRef == nil {
+ return nil, fmt.Errorf("server %s has no BMCRef", server.Name)
+ }
+
+ bmcObj := &metalv1alpha1.BMC{}
+ if err := r.Get(ctx, types.NamespacedName{Name: server.Spec.BMCRef.Name}, bmcObj); err != nil {
+ return nil, fmt.Errorf("failed to get BMC %s: %w", server.Spec.BMCRef.Name, err)
+ }
+
+ // Filter tasks that belong to this cleaning operation
+ // We identify our tasks by checking if they were created recently and match expected types
+ var relevantTasks []metalv1alpha1.BMCTask
+ for _, task := range bmcObj.Status.Tasks {
+ // Check if this is a cleaning-related task type
+ if task.TaskType == metalv1alpha1.BMCTaskTypeDiskErase ||
+ task.TaskType == metalv1alpha1.BMCTaskTypeBIOSReset ||
+ task.TaskType == metalv1alpha1.BMCTaskTypeBMCReset ||
+ task.TaskType == metalv1alpha1.BMCTaskTypeNetworkClear {
+ relevantTasks = append(relevantTasks, task)
+ }
+ }
+
+ return relevantTasks, nil
+}
+
+// checkTasksComplete checks if all tasks are in terminal states and returns completion status
+func (r *ServerCleaningReconciler) checkTasksComplete(tasks []metalv1alpha1.BMCTask) (allComplete bool, anyFailed bool) {
+ if len(tasks) == 0 {
+ return true, false
+ }
+
+ allComplete = true
+ anyFailed = false
+
+ for _, task := range tasks {
+ taskState := task.State
+
+ // Check if task is still running
+ if taskState != taskStateCompleted &&
+ taskState != taskStateException &&
+ taskState != taskStateCancelled &&
+ taskState != taskStateKilled &&
+ taskState != taskStateFailed {
+ allComplete = false
+ }
+
+ // Check if task failed
+ if taskState == taskStateException ||
+ taskState == taskStateCancelled ||
+ taskState == taskStateKilled ||
+ taskState == taskStateFailed {
+ anyFailed = true
+ }
+ }
+
+ return allComplete, anyFailed
+}
+
+// initiateBMCCleaning initiates cleaning operations directly via BMC and stores task information
+func (r *ServerCleaningReconciler) initiateBMCCleaning(ctx context.Context, cleaning *metalv1alpha1.ServerCleaning, server *metalv1alpha1.Server) error {
+ log := ctrl.LoggerFrom(ctx)
+
+ // Get BMC client for this server
+ bmcClient, err := bmcutils.GetBMCClientForServer(ctx, r.Client, server, false, bmc.Options{})
+ if err != nil {
+ return fmt.Errorf("failed to get BMC client: %w", err)
+ }
+ defer bmcClient.Logout()
+
+ systemURI := server.Spec.SystemURI
+ if systemURI == "" {
+ return fmt.Errorf("server %s has no system URI", server.Name)
+ }
+
+ // Get BMC reference for adding tasks
+ if server.Spec.BMCRef == nil {
+ return fmt.Errorf("server %s has no BMCRef", server.Name)
+ }
+ bmcName := server.Spec.BMCRef.Name
+ taskCount := 0
+
+ // Initiate disk wipe if requested
+ if cleaning.Spec.DiskWipe != nil {
+ log.V(1).Info("Initiating disk erase", "server", server.Name, "method", cleaning.Spec.DiskWipe.Method)
+ tasks, err := bmcClient.EraseDisk(ctx, systemURI, bmc.DiskWipeMethod(cleaning.Spec.DiskWipe.Method))
+ if err != nil {
+ return fmt.Errorf("failed to initiate disk wipe: %w", err)
+ }
+ // Add each disk erase task to BMC.Status.Tasks
+ for _, task := range tasks {
+ bmcTask := metalv1alpha1.BMCTask{
+ TaskURI: task.TaskURI,
+ TaskType: metalv1alpha1.BMCTaskTypeDiskErase,
+ TargetID: task.TargetID,
+ State: taskStateNew,
+ PercentComplete: 0,
+ LastUpdateTime: metav1.Now(),
+ }
+ if err := r.addTaskToBMC(ctx, bmcName, bmcTask); err != nil {
+ return fmt.Errorf("failed to add disk erase task to BMC: %w", err)
+ }
+ taskCount++
+ }
+ log.V(1).Info("Disk wipe tasks created", "server", server.Name, "count", len(tasks))
+ }
+
+ // Initiate BIOS reset if requested
+ if cleaning.Spec.BIOSReset {
+ log.V(1).Info("Initiating BIOS reset", "server", server.Name)
+ task, err := bmcClient.ResetBIOSToDefaults(ctx, systemURI)
+ if err != nil {
+ return fmt.Errorf("failed to initiate BIOS reset: %w", err)
+ }
+ if task != nil {
+ bmcTask := metalv1alpha1.BMCTask{
+ TaskURI: task.TaskURI,
+ TaskType: metalv1alpha1.BMCTaskTypeBIOSReset,
+ TargetID: task.TargetID,
+ State: taskStateNew,
+ PercentComplete: 0,
+ LastUpdateTime: metav1.Now(),
+ }
+ if err := r.addTaskToBMC(ctx, bmcName, bmcTask); err != nil {
+ return fmt.Errorf("failed to add BIOS reset task to BMC: %w", err)
+ }
+ taskCount++
+ log.V(1).Info("BIOS reset task created", "server", server.Name, "taskURI", task.TaskURI)
+ }
+ }
+
+ // Initiate BMC reset if requested
+ // TODO: BMC reset requires manager UUID which is not readily available from server spec.
+ // For now, BMC reset will be handled via ServerMaintenance or manual intervention.
+ if cleaning.Spec.BMCReset {
+ log.V(1).Info("BMC reset requested but not yet implemented via direct BMC access", "server", server.Name)
+ // Note: BMC reset is a critical operation that may disconnect the BMC client,
+ // so it should be done last or via ServerMaintenance with proper handling.
+ }
+
+ // Initiate network config clear if requested
+ if cleaning.Spec.NetworkCleanup {
+ log.V(1).Info("Initiating network configuration clear", "server", server.Name)
+ task, err := bmcClient.ClearNetworkConfiguration(ctx, systemURI)
+ if err != nil {
+ // Network cleanup is non-critical, log and continue
+ log.Error(err, "Failed to initiate network config clear (non-critical)", "server", server.Name)
+ } else if task != nil {
+ bmcTask := metalv1alpha1.BMCTask{
+ TaskURI: task.TaskURI,
+ TaskType: metalv1alpha1.BMCTaskTypeNetworkClear,
+ TargetID: task.TargetID,
+ State: taskStateNew,
+ PercentComplete: 0,
+ LastUpdateTime: metav1.Now(),
+ }
+ if err := r.addTaskToBMC(ctx, bmcName, bmcTask); err != nil {
+ log.Error(err, "Failed to add network clear task to BMC (non-critical)", "server", server.Name)
+ } else {
+ taskCount++
+ log.V(1).Info("Network config clear task created", "server", server.Name, "taskURI", task.TaskURI)
+ }
+ }
+ }
+
+ // Tasks are now in BMC.Status.Tasks and will be monitored by BMCTask controller
+ if taskCount > 0 {
+ log.Info("Cleaning tasks initiated and added to BMC", "server", server.Name, "bmc", bmcName, "taskCount", taskCount)
+ } else {
+ log.Info("No cleaning tasks created (all operations completed synchronously)", "server", server.Name)
+ }
+
+ return nil
+}
diff --git a/internal/controller/servercleaning_controller_test.go b/internal/controller/servercleaning_controller_test.go
new file mode 100644
index 000000000..f251c9b84
--- /dev/null
+++ b/internal/controller/servercleaning_controller_test.go
@@ -0,0 +1,670 @@
+// SPDX-FileCopyrightText: 2025 SAP SE or an SAP affiliate company and IronCore contributors
+// SPDX-License-Identifier: Apache-2.0
+
+package controller
+
+import (
+ "time"
+
+ metalv1alpha1 "github.com/ironcore-dev/metal-operator/api/v1alpha1"
+ . "github.com/onsi/ginkgo/v2"
+ . "github.com/onsi/gomega"
+ corev1 "k8s.io/api/core/v1"
+ apierrors "k8s.io/apimachinery/pkg/api/errors"
+ metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+ "sigs.k8s.io/controller-runtime/pkg/client"
+ . "sigs.k8s.io/controller-runtime/pkg/envtest/komega"
+)
+
+var _ = Describe("ServerCleaning Controller", func() {
+ ns := SetupTest(nil)
+
+ AfterEach(func(ctx SpecContext) {
+ EnsureCleanState()
+ })
+
+ It("Should successfully create and reconcile a ServerCleaning resource with serverRef", func(ctx SpecContext) {
+ By("Creating a Server resource in Tainted state")
+ server := &metalv1alpha1.Server{
+ ObjectMeta: metav1.ObjectMeta{
+ GenerateName: "test-server-",
+ Namespace: ns.Name,
+ Labels: map[string]string{
+ "test": "cleaning",
+ },
+ },
+ Spec: metalv1alpha1.ServerSpec{
+ SystemUUID: "test-system-uuid-1",
+ SystemURI: "/redfish/v1/Systems/1",
+ BMCRef: &corev1.LocalObjectReference{
+ Name: "test-bmc",
+ },
+ Taints: []corev1.Taint{
+ {
+ Key: "metal.ironcore.dev/tainted",
+ Effect: corev1.TaintEffectNoSchedule,
+ },
+ },
+ },
+ }
+ Expect(k8sClient.Create(ctx, server)).To(Succeed())
+
+ By("Setting Server state to Tainted")
+ Eventually(UpdateStatus(server, func() {
+ server.Status.State = metalv1alpha1.ServerStateTainted
+ })).Should(Succeed())
+
+ By("Creating a ServerCleaning resource")
+ cleaning := &metalv1alpha1.ServerCleaning{
+ ObjectMeta: metav1.ObjectMeta{
+ GenerateName: "test-cleaning-",
+ Namespace: ns.Name,
+ },
+ Spec: metalv1alpha1.ServerCleaningSpec{
+ ServerRef: &corev1.LocalObjectReference{
+ Name: server.Name,
+ },
+ DiskWipe: &metalv1alpha1.DiskWipeConfig{
+ Method: metalv1alpha1.DiskWipeMethodQuick,
+ IncludeBootDrives: true,
+ },
+ BIOSReset: true,
+ NetworkCleanup: true,
+ },
+ }
+ Expect(k8sClient.Create(ctx, cleaning)).To(Succeed())
+
+ By("Ensuring ServerCleaning transitions to Pending state")
+ Eventually(Object(cleaning)).Should(SatisfyAll(
+ HaveField("Status.State", metalv1alpha1.ServerCleaningStatePending),
+ ))
+
+ By("Ensuring ServerCleaning has finalizer")
+ Eventually(Object(cleaning)).Should(SatisfyAll(
+ HaveField("Finalizers", ContainElement(ServerCleaningFinalizer)),
+ ))
+
+ By("Ensuring ServerCleaning transitions to InProgress state")
+ Eventually(Object(cleaning)).WithTimeout(2 * time.Minute).Should(SatisfyAll(
+ HaveField("Status.State", metalv1alpha1.ServerCleaningStateInProgress),
+ HaveField("Status.SelectedServers", BeNumerically(">", 0)),
+ ))
+
+ By("Ensuring ServerCleaning status has server status entry")
+ Eventually(func(g Gomega) {
+ g.Expect(k8sClient.Get(ctx, client.ObjectKeyFromObject(cleaning), cleaning)).To(Succeed())
+ g.Expect(cleaning.Status.ServerCleaningStatuses).NotTo(BeEmpty())
+ g.Expect(cleaning.Status.ServerCleaningStatuses[0].ServerName).To(Equal(server.Name))
+ }).Should(Succeed())
+
+ // Cleanup
+ Expect(k8sClient.Delete(ctx, cleaning)).To(Succeed())
+ Eventually(Get(cleaning)).Should(Satisfy(apierrors.IsNotFound))
+ Expect(k8sClient.Delete(ctx, server)).To(Succeed())
+ })
+
+ It("Should successfully create and reconcile a ServerCleaning resource with serverSelector", func(ctx SpecContext) {
+ By("Creating multiple Server resources in Tainted state")
+ server1 := &metalv1alpha1.Server{
+ ObjectMeta: metav1.ObjectMeta{
+ GenerateName: "test-server-",
+ Namespace: ns.Name,
+ Labels: map[string]string{
+ "cleanup-group": "staging",
+ "region": "us-west",
+ },
+ },
+ Spec: metalv1alpha1.ServerSpec{
+ SystemUUID: "test-system-uuid-1",
+ SystemURI: "/redfish/v1/Systems/1",
+ BMCRef: &corev1.LocalObjectReference{
+ Name: "test-bmc",
+ },
+ Taints: []corev1.Taint{
+ {
+ Key: "metal.ironcore.dev/tainted",
+ Effect: corev1.TaintEffectNoSchedule,
+ },
+ },
+ },
+ }
+ Expect(k8sClient.Create(ctx, server1)).To(Succeed())
+
+ server2 := &metalv1alpha1.Server{
+ ObjectMeta: metav1.ObjectMeta{
+ GenerateName: "test-server-",
+ Namespace: ns.Name,
+ Labels: map[string]string{
+ "cleanup-group": "staging",
+ "region": "us-east",
+ },
+ },
+ Spec: metalv1alpha1.ServerSpec{
+ SystemUUID: "test-system-uuid-2",
+ SystemURI: "/redfish/v1/Systems/2",
+ BMCRef: &corev1.LocalObjectReference{
+ Name: "test-bmc",
+ },
+ Taints: []corev1.Taint{
+ {
+ Key: "metal.ironcore.dev/tainted",
+ Effect: corev1.TaintEffectNoSchedule,
+ },
+ },
+ },
+ }
+ Expect(k8sClient.Create(ctx, server2)).To(Succeed())
+
+ By("Setting Server states to Tainted")
+ Eventually(UpdateStatus(server1, func() {
+ server1.Status.State = metalv1alpha1.ServerStateTainted
+ })).Should(Succeed())
+ Eventually(UpdateStatus(server2, func() {
+ server2.Status.State = metalv1alpha1.ServerStateTainted
+ })).Should(Succeed())
+
+ By("Creating a ServerCleaning resource with serverSelector")
+ cleaning := &metalv1alpha1.ServerCleaning{
+ ObjectMeta: metav1.ObjectMeta{
+ GenerateName: "test-cleaning-",
+ Namespace: ns.Name,
+ },
+ Spec: metalv1alpha1.ServerCleaningSpec{
+ ServerSelector: &metav1.LabelSelector{
+ MatchLabels: map[string]string{
+ "cleanup-group": "staging",
+ },
+ },
+ DiskWipe: &metalv1alpha1.DiskWipeConfig{
+ Method: metalv1alpha1.DiskWipeMethodSecure,
+ IncludeBootDrives: false,
+ },
+ NetworkCleanup: true,
+ },
+ }
+ Expect(k8sClient.Create(ctx, cleaning)).To(Succeed())
+
+ By("Ensuring ServerCleaning transitions to InProgress state")
+ Eventually(Object(cleaning)).WithTimeout(2 * time.Minute).Should(SatisfyAll(
+ HaveField("Status.State", metalv1alpha1.ServerCleaningStateInProgress),
+ HaveField("Status.SelectedServers", BeNumerically("==", 2)),
+ ))
+
+ By("Ensuring ServerCleaning status has entries for both servers")
+ Eventually(func(g Gomega) {
+ g.Expect(k8sClient.Get(ctx, client.ObjectKeyFromObject(cleaning), cleaning)).To(Succeed())
+ g.Expect(cleaning.Status.ServerCleaningStatuses).To(HaveLen(2))
+ }).Should(Succeed())
+
+ // Cleanup
+ Expect(k8sClient.Delete(ctx, cleaning)).To(Succeed())
+ Eventually(Get(cleaning)).Should(Satisfy(apierrors.IsNotFound))
+ Expect(k8sClient.Delete(ctx, server1)).To(Succeed())
+ Expect(k8sClient.Delete(ctx, server2)).To(Succeed())
+ })
+
+ It("Should track cleaning tasks in status", func(ctx SpecContext) {
+ By("Creating a Server resource in Tainted state")
+ server := &metalv1alpha1.Server{
+ ObjectMeta: metav1.ObjectMeta{
+ GenerateName: "test-server-",
+ Namespace: ns.Name,
+ },
+ Spec: metalv1alpha1.ServerSpec{
+ SystemUUID: "test-system-uuid-1",
+ SystemURI: "/redfish/v1/Systems/1",
+ BMCRef: &corev1.LocalObjectReference{
+ Name: "test-bmc",
+ },
+ Taints: []corev1.Taint{
+ {
+ Key: "metal.ironcore.dev/tainted",
+ Effect: corev1.TaintEffectNoSchedule,
+ },
+ },
+ },
+ }
+ Expect(k8sClient.Create(ctx, server)).To(Succeed())
+
+ By("Setting Server state to Tainted")
+ Eventually(UpdateStatus(server, func() {
+ server.Status.State = metalv1alpha1.ServerStateTainted
+ })).Should(Succeed())
+
+ By("Creating a ServerCleaning resource with multiple cleaning operations")
+ cleaning := &metalv1alpha1.ServerCleaning{
+ ObjectMeta: metav1.ObjectMeta{
+ GenerateName: "test-cleaning-",
+ Namespace: ns.Name,
+ },
+ Spec: metalv1alpha1.ServerCleaningSpec{
+ ServerRef: &corev1.LocalObjectReference{
+ Name: server.Name,
+ },
+ DiskWipe: &metalv1alpha1.DiskWipeConfig{
+ Method: metalv1alpha1.DiskWipeMethodDoD,
+ IncludeBootDrives: true,
+ },
+ BIOSReset: true,
+ NetworkCleanup: true,
+ },
+ }
+ Expect(k8sClient.Create(ctx, cleaning)).To(Succeed())
+
+ By("Ensuring cleaning tasks are tracked in BMC status")
+ Eventually(func(g Gomega) {
+ g.Expect(k8sClient.Get(ctx, client.ObjectKeyFromObject(cleaning), cleaning)).To(Succeed())
+ // Check that cleaning is in progress
+ g.Expect(cleaning.Status.State).To(Equal(metalv1alpha1.ServerCleaningStateInProgress))
+
+ // Verify tasks are created in BMC.Status.Tasks (not in ServerCleaning status)
+ bmcObj := &metalv1alpha1.BMC{}
+ g.Expect(k8sClient.Get(ctx, client.ObjectKey{Name: server.Spec.BMCRef.Name}, bmcObj)).To(Succeed())
+ // Should have at least one task from the cleaning operations
+ g.Expect(bmcObj.Status.Tasks).ToNot(BeEmpty())
+ }).WithTimeout(2 * time.Minute).Should(Succeed())
+
+ // Cleanup
+ Expect(k8sClient.Delete(ctx, cleaning)).To(Succeed())
+ Eventually(Get(cleaning)).Should(Satisfy(apierrors.IsNotFound))
+ Expect(k8sClient.Delete(ctx, server)).To(Succeed())
+ })
+
+ It("Should update cleaning counts correctly", func(ctx SpecContext) {
+ By("Creating multiple Server resources")
+ servers := make([]*metalv1alpha1.Server, 3)
+ for i := range 3 {
+ servers[i] = &metalv1alpha1.Server{
+ ObjectMeta: metav1.ObjectMeta{
+ GenerateName: "test-server-",
+ Namespace: ns.Name,
+ Labels: map[string]string{
+ "batch": "test",
+ },
+ },
+ Spec: metalv1alpha1.ServerSpec{
+ SystemUUID: "test-system-uuid-" + string(rune(i)),
+ SystemURI: "/redfish/v1/Systems/" + string(rune(i)),
+ BMCRef: &corev1.LocalObjectReference{
+ Name: "test-bmc",
+ },
+ },
+ }
+ Expect(k8sClient.Create(ctx, servers[i])).To(Succeed())
+
+ Eventually(UpdateStatus(servers[i], func() {
+ servers[i].Status.State = metalv1alpha1.ServerStateTainted
+ })).Should(Succeed())
+ }
+
+ By("Creating a ServerCleaning resource for all servers")
+ cleaning := &metalv1alpha1.ServerCleaning{
+ ObjectMeta: metav1.ObjectMeta{
+ GenerateName: "test-cleaning-",
+ Namespace: ns.Name,
+ },
+ Spec: metalv1alpha1.ServerCleaningSpec{
+ ServerSelector: &metav1.LabelSelector{
+ MatchLabels: map[string]string{
+ "batch": "test",
+ },
+ },
+ DiskWipe: &metalv1alpha1.DiskWipeConfig{
+ Method: metalv1alpha1.DiskWipeMethodQuick,
+ },
+ },
+ }
+ Expect(k8sClient.Create(ctx, cleaning)).To(Succeed())
+
+ By("Ensuring cleaning counts are updated")
+ Eventually(func(g Gomega) {
+ g.Expect(k8sClient.Get(ctx, client.ObjectKeyFromObject(cleaning), cleaning)).To(Succeed())
+ // Should have selected all 3 servers
+ g.Expect(cleaning.Status.SelectedServers).To(BeNumerically("==", 3))
+ // Should have counts tracking progress
+ totalProcessed := cleaning.Status.InProgressCleanings +
+ cleaning.Status.CompletedCleanings +
+ cleaning.Status.FailedCleanings
+ g.Expect(totalProcessed).To(BeNumerically(">", 0))
+ }).WithTimeout(2 * time.Minute).Should(Succeed())
+
+ // Cleanup
+ Expect(k8sClient.Delete(ctx, cleaning)).To(Succeed())
+ Eventually(Get(cleaning)).Should(Satisfy(apierrors.IsNotFound))
+ for _, server := range servers {
+ Expect(k8sClient.Delete(ctx, server)).To(Succeed())
+ }
+ })
+
+ It("Should set proper conditions during cleaning lifecycle", func(ctx SpecContext) {
+ By("Creating a Server resource in Tainted state")
+ server := &metalv1alpha1.Server{
+ ObjectMeta: metav1.ObjectMeta{
+ GenerateName: "test-server-",
+ Namespace: ns.Name,
+ },
+ Spec: metalv1alpha1.ServerSpec{
+ SystemUUID: "test-system-uuid-1",
+ SystemURI: "/redfish/v1/Systems/1",
+ BMCRef: &corev1.LocalObjectReference{
+ Name: "test-bmc",
+ },
+ Taints: []corev1.Taint{
+ {
+ Key: "metal.ironcore.dev/tainted",
+ Effect: corev1.TaintEffectNoSchedule,
+ },
+ },
+ },
+ }
+ Expect(k8sClient.Create(ctx, server)).To(Succeed())
+
+ By("Setting Server state to Tainted")
+ Eventually(UpdateStatus(server, func() {
+ server.Status.State = metalv1alpha1.ServerStateTainted
+ })).Should(Succeed())
+
+ By("Creating a ServerCleaning resource")
+ cleaning := &metalv1alpha1.ServerCleaning{
+ ObjectMeta: metav1.ObjectMeta{
+ GenerateName: "test-cleaning-",
+ Namespace: ns.Name,
+ },
+ Spec: metalv1alpha1.ServerCleaningSpec{
+ ServerRef: &corev1.LocalObjectReference{
+ Name: server.Name,
+ },
+ DiskWipe: &metalv1alpha1.DiskWipeConfig{
+ Method: metalv1alpha1.DiskWipeMethodQuick,
+ },
+ },
+ }
+ Expect(k8sClient.Create(ctx, cleaning)).To(Succeed())
+
+ By("Ensuring Cleaning condition is set when in progress")
+ Eventually(func(g Gomega) {
+ g.Expect(k8sClient.Get(ctx, client.ObjectKeyFromObject(cleaning), cleaning)).To(Succeed())
+ if cleaning.Status.State == metalv1alpha1.ServerCleaningStateInProgress {
+ g.Expect(cleaning.Status.Conditions).NotTo(BeEmpty())
+ condition := findCondition(cleaning.Status.Conditions, ServerCleaningConditionTypeCleaning)
+ g.Expect(condition).NotTo(BeNil())
+ g.Expect(condition.Status).To(Equal(metav1.ConditionTrue))
+ g.Expect(condition.Reason).To(Equal(ServerCleaningConditionReasonInProgress))
+ }
+ }).WithTimeout(2 * time.Minute).Should(Succeed())
+
+ // Cleanup
+ Expect(k8sClient.Delete(ctx, cleaning)).To(Succeed())
+ Eventually(Get(cleaning)).Should(Satisfy(apierrors.IsNotFound))
+ Expect(k8sClient.Delete(ctx, server)).To(Succeed())
+ })
+
+ It("Should skip servers not in Tainted state", func(ctx SpecContext) {
+ By("Creating servers in different states")
+ taintedServer := &metalv1alpha1.Server{
+ ObjectMeta: metav1.ObjectMeta{
+ GenerateName: "tainted-server-",
+ Namespace: ns.Name,
+ Labels: map[string]string{
+ "group": "mixed",
+ },
+ },
+ Spec: metalv1alpha1.ServerSpec{
+ SystemUUID: "test-system-uuid-1",
+ SystemURI: "/redfish/v1/Systems/1",
+ BMCRef: &corev1.LocalObjectReference{
+ Name: "test-bmc",
+ },
+ Taints: []corev1.Taint{
+ {
+ Key: "metal.ironcore.dev/tainted",
+ Effect: corev1.TaintEffectNoSchedule,
+ },
+ },
+ },
+ }
+ Expect(k8sClient.Create(ctx, taintedServer)).To(Succeed())
+ Eventually(UpdateStatus(taintedServer, func() {
+ taintedServer.Status.State = metalv1alpha1.ServerStateTainted
+ })).Should(Succeed())
+
+ availableServer := &metalv1alpha1.Server{
+ ObjectMeta: metav1.ObjectMeta{
+ GenerateName: "available-server-",
+ Namespace: ns.Name,
+ Labels: map[string]string{
+ "group": "mixed",
+ },
+ },
+ Spec: metalv1alpha1.ServerSpec{
+ SystemUUID: "test-system-uuid-2",
+ SystemURI: "/redfish/v1/Systems/2",
+ BMCRef: &corev1.LocalObjectReference{
+ Name: "test-bmc",
+ },
+ },
+ }
+ Expect(k8sClient.Create(ctx, availableServer)).To(Succeed())
+ Eventually(UpdateStatus(availableServer, func() {
+ availableServer.Status.State = metalv1alpha1.ServerStateAvailable
+ })).Should(Succeed())
+
+ By("Creating a ServerCleaning resource targeting both servers")
+ cleaning := &metalv1alpha1.ServerCleaning{
+ ObjectMeta: metav1.ObjectMeta{
+ GenerateName: "test-cleaning-",
+ Namespace: ns.Name,
+ },
+ Spec: metalv1alpha1.ServerCleaningSpec{
+ ServerSelector: &metav1.LabelSelector{
+ MatchLabels: map[string]string{
+ "group": "mixed",
+ },
+ },
+ DiskWipe: &metalv1alpha1.DiskWipeConfig{
+ Method: metalv1alpha1.DiskWipeMethodQuick,
+ },
+ },
+ }
+ Expect(k8sClient.Create(ctx, cleaning)).To(Succeed())
+
+ By("Ensuring only tainted server gets cleaning status entry")
+ Eventually(func(g Gomega) {
+ g.Expect(k8sClient.Get(ctx, client.ObjectKeyFromObject(cleaning), cleaning)).To(Succeed())
+ // Should select 2 servers but only process the tainted one
+ g.Expect(cleaning.Status.SelectedServers).To(BeNumerically("==", 2))
+ // Only tainted server should have a status entry
+ if len(cleaning.Status.ServerCleaningStatuses) > 0 {
+ for _, status := range cleaning.Status.ServerCleaningStatuses {
+ g.Expect(status.ServerName).To(Equal(taintedServer.Name))
+ }
+ }
+ }).WithTimeout(2 * time.Minute).Should(Succeed())
+
+ // Cleanup
+ Expect(k8sClient.Delete(ctx, cleaning)).To(Succeed())
+ Eventually(Get(cleaning)).Should(Satisfy(apierrors.IsNotFound))
+ Expect(k8sClient.Delete(ctx, taintedServer)).To(Succeed())
+ Expect(k8sClient.Delete(ctx, availableServer)).To(Succeed())
+ })
+
+ It("Should clean tainted server and transition from Reserved to Available", func(ctx SpecContext) {
+ By("Creating a ServerClaim resource")
+ claim := &metalv1alpha1.ServerClaim{
+ ObjectMeta: metav1.ObjectMeta{
+ GenerateName: "test-claim-",
+ Namespace: ns.Name,
+ },
+ Spec: metalv1alpha1.ServerClaimSpec{
+ Power: metalv1alpha1.PowerOn,
+ ServerSelector: &metav1.LabelSelector{
+ MatchLabels: map[string]string{
+ "claim-test": "transition",
+ },
+ },
+ },
+ }
+ Expect(k8sClient.Create(ctx, claim)).To(Succeed())
+
+ By("Creating a Server resource that will be claimed")
+ server := &metalv1alpha1.Server{
+ ObjectMeta: metav1.ObjectMeta{
+ GenerateName: "test-server-",
+ Namespace: ns.Name,
+ Labels: map[string]string{
+ "claim-test": "transition",
+ },
+ },
+ Spec: metalv1alpha1.ServerSpec{
+ SystemUUID: "test-system-uuid-claim",
+ SystemURI: "/redfish/v1/Systems/claim",
+ BMCRef: &corev1.LocalObjectReference{
+ Name: "test-bmc",
+ },
+ },
+ }
+ Expect(k8sClient.Create(ctx, server)).To(Succeed())
+
+ By("Setting Server state to Available initially")
+ Eventually(UpdateStatus(server, func() {
+ server.Status.State = metalv1alpha1.ServerStateAvailable
+ })).Should(Succeed())
+
+ By("Waiting for Server to be claimed")
+ Eventually(func(g Gomega) {
+ g.Expect(k8sClient.Get(ctx, client.ObjectKeyFromObject(server), server)).To(Succeed())
+ g.Expect(server.Spec.ServerClaimRef).NotTo(BeNil())
+ g.Expect(server.Spec.ServerClaimRef.Name).To(Equal(claim.Name))
+ }).Should(Succeed())
+
+ By("Setting Server state to Reserved")
+ Eventually(UpdateStatus(server, func() {
+ server.Status.State = metalv1alpha1.ServerStateReserved
+ })).Should(Succeed())
+
+ By("Adding taints to the Server before releasing")
+ Eventually(func(g Gomega) {
+ g.Expect(k8sClient.Get(ctx, client.ObjectKeyFromObject(server), server)).To(Succeed())
+ serverBase := server.DeepCopy()
+ server.Spec.Taints = []corev1.Taint{
+ {
+ Key: "metal.ironcore.dev/tainted",
+ Effect: corev1.TaintEffectNoSchedule,
+ },
+ }
+ g.Expect(k8sClient.Patch(ctx, server, client.MergeFrom(serverBase))).To(Succeed())
+ }).Should(Succeed())
+
+ By("Deleting the ServerClaim to release the server")
+ Expect(k8sClient.Delete(ctx, claim)).To(Succeed())
+ Eventually(Get(claim)).Should(Satisfy(apierrors.IsNotFound))
+
+ By("Ensuring ServerClaimRef is removed from Server")
+ Eventually(func(g Gomega) {
+ g.Expect(k8sClient.Get(ctx, client.ObjectKeyFromObject(server), server)).To(Succeed())
+ g.Expect(server.Spec.ServerClaimRef).To(BeNil())
+ }).Should(Succeed())
+
+ By("Ensuring Server transitions to Tainted state")
+ Eventually(func(g Gomega) {
+ g.Expect(k8sClient.Get(ctx, client.ObjectKeyFromObject(server), server)).To(Succeed())
+ g.Expect(server.Status.State).To(Equal(metalv1alpha1.ServerStateTainted))
+ }).WithTimeout(2 * time.Minute).Should(Succeed())
+
+ By("Creating a ServerCleaning resource for the tainted server")
+ cleaning := &metalv1alpha1.ServerCleaning{
+ ObjectMeta: metav1.ObjectMeta{
+ GenerateName: "test-cleaning-",
+ Namespace: ns.Name,
+ },
+ Spec: metalv1alpha1.ServerCleaningSpec{
+ ServerRef: &corev1.LocalObjectReference{
+ Name: server.Name,
+ },
+ DiskWipe: &metalv1alpha1.DiskWipeConfig{
+ Method: metalv1alpha1.DiskWipeMethodQuick,
+ IncludeBootDrives: true,
+ },
+ BIOSReset: true,
+ NetworkCleanup: true,
+ },
+ }
+ Expect(k8sClient.Create(ctx, cleaning)).To(Succeed())
+
+ By("Ensuring ServerCleaning transitions through states")
+ Eventually(Object(cleaning)).Should(SatisfyAll(
+ HaveField("Status.State", metalv1alpha1.ServerCleaningStatePending),
+ ))
+
+ Eventually(Object(cleaning)).WithTimeout(2 * time.Minute).Should(SatisfyAll(
+ HaveField("Status.State", metalv1alpha1.ServerCleaningStateInProgress),
+ ))
+
+ By("Simulating cleaning completion by updating ServerCleaning status")
+ Eventually(func(g Gomega) {
+ g.Expect(k8sClient.Get(ctx, client.ObjectKeyFromObject(cleaning), cleaning)).To(Succeed())
+ cleaningBase := cleaning.DeepCopy()
+ cleaning.Status.State = metalv1alpha1.ServerCleaningStateCompleted
+ if len(cleaning.Status.ServerCleaningStatuses) > 0 {
+ cleaning.Status.ServerCleaningStatuses[0].State = metalv1alpha1.ServerCleaningStateCompleted
+ }
+ cleaning.Status.CompletedCleanings = 1
+ cleaning.Status.InProgressCleanings = 0
+ g.Expect(k8sClient.Status().Patch(ctx, cleaning, client.MergeFrom(cleaningBase))).To(Succeed())
+ }).Should(Succeed())
+
+ By("Ensuring Server taints are removed after cleaning completion")
+ Eventually(func(g Gomega) {
+ g.Expect(k8sClient.Get(ctx, client.ObjectKeyFromObject(server), server)).To(Succeed())
+ g.Expect(server.Spec.Taints).To(BeEmpty())
+ }).WithTimeout(2 * time.Minute).Should(Succeed())
+
+ By("Ensuring Server transitions to Available state")
+ Eventually(func(g Gomega) {
+ g.Expect(k8sClient.Get(ctx, client.ObjectKeyFromObject(server), server)).To(Succeed())
+ g.Expect(server.Status.State).To(Equal(metalv1alpha1.ServerStateAvailable))
+ }).WithTimeout(2 * time.Minute).Should(Succeed())
+
+ // Cleanup
+ Expect(k8sClient.Delete(ctx, cleaning)).To(Succeed())
+ Eventually(Get(cleaning)).Should(Satisfy(apierrors.IsNotFound))
+ Expect(k8sClient.Delete(ctx, server)).To(Succeed())
+ })
+
+ It("Should handle deletion with finalizer", func(ctx SpecContext) {
+ By("Creating a ServerCleaning resource")
+ cleaning := &metalv1alpha1.ServerCleaning{
+ ObjectMeta: metav1.ObjectMeta{
+ GenerateName: "test-cleaning-",
+ Namespace: ns.Name,
+ },
+ Spec: metalv1alpha1.ServerCleaningSpec{
+ ServerRef: &corev1.LocalObjectReference{
+ Name: "non-existent-server",
+ },
+ },
+ }
+ Expect(k8sClient.Create(ctx, cleaning)).To(Succeed())
+
+ By("Ensuring finalizer is added")
+ Eventually(Object(cleaning)).Should(SatisfyAll(
+ HaveField("Finalizers", ContainElement(ServerCleaningFinalizer)),
+ ))
+
+ By("Deleting the ServerCleaning resource")
+ Expect(k8sClient.Delete(ctx, cleaning)).To(Succeed())
+
+ By("Ensuring the resource is eventually deleted")
+ Eventually(Get(cleaning)).Should(Satisfy(apierrors.IsNotFound))
+ })
+})
+
+// Helper function to find a condition by type
+func findCondition(conditions []metav1.Condition, conditionType string) *metav1.Condition {
+ for i := range conditions {
+ if conditions[i].Type == conditionType {
+ return &conditions[i]
+ }
+ }
+ return nil
+}
diff --git a/internal/controller/suite_test.go b/internal/controller/suite_test.go
index 9db3050f7..317e2ea95 100644
--- a/internal/controller/suite_test.go
+++ b/internal/controller/suite_test.go
@@ -219,6 +219,11 @@ func SetupTest(redfishMockServers []netip.AddrPort) *corev1.Namespace {
Scheme: k8sManager.GetScheme(),
}).SetupWithManager(k8sManager)).To(Succeed())
+ Expect((&ServerCleaningReconciler{
+ Client: k8sManager.GetClient(),
+ Scheme: k8sManager.GetScheme(),
+ }).SetupWithManager(k8sManager)).To(Succeed())
+
Expect((&BIOSSettingsReconciler{
Client: k8sManager.GetClient(),
ManagerNamespace: ns.Name,
@@ -311,6 +316,16 @@ func SetupTest(redfishMockServers []netip.AddrPort) *corev1.Namespace {
},
}).SetupWithManager(k8sManager)).To(Succeed())
+ Expect((&BMCTaskReconciler{
+ Client: k8sManager.GetClient(),
+ Scheme: k8sManager.GetScheme(),
+ Insecure: true,
+ PollInterval: 50 * time.Millisecond,
+ BMCOptions: bmc.Options{
+ BasicAuth: true,
+ },
+ }).SetupWithManager(k8sManager)).To(Succeed())
+
By("Starting the registry server")
Expect(k8sManager.Add(manager.RunnableFunc(func(ctx context.Context) error {
registryServer := registry.NewServer(GinkgoLogr, ":30000", k8sManager.GetClient())