From 31d468f0bb0f4c7a494b1b7b92f5f4e417d247aa Mon Sep 17 00:00:00 2001 From: Fabian Wiesel Date: Thu, 8 Jan 2026 12:02:03 +0100 Subject: [PATCH] Wait for eviction condition when offboarding and match eviction trigger It doesn't make sense to poll openstack for the hypervisors, when we are still evicting, so wait for that. We have to match the condition to trigger the eviction and waiting on the eviction. --- internal/controller/decomission_controller.go | 13 ++++++++++++- .../controller/decomission_controller_test.go | 8 +++++++- .../hypervisor_maintenance_controller.go | 18 ++++++++++-------- 3 files changed, 29 insertions(+), 10 deletions(-) diff --git a/internal/controller/decomission_controller.go b/internal/controller/decomission_controller.go index 852e3e38..3181adf4 100644 --- a/internal/controller/decomission_controller.go +++ b/internal/controller/decomission_controller.go @@ -78,7 +78,18 @@ func (r *NodeDecommissionReconciler) Reconcile(ctx context.Context, req ctrl.Req return ctrl.Result{}, nil } - log.Info("removing host from nova") + // Onboarding-condition needs to be either unset or set to false, so that we can continue + // The first means, onboarding has never started, the second means it has been aborted or finished + if meta.IsStatusConditionTrue(hv.Status.Conditions, kvmv1.ConditionTypeOnboarding) { + return ctrl.Result{}, nil + } + + // If the service id is set, there might be VMs either from onboarding or even from normal operation + // In that case we need to wait until those are evicted + if hv.Status.ServiceID != "" && !meta.IsStatusConditionFalse(hv.Status.Conditions, kvmv1.ConditionTypeEvicting) { + // Either has not evicted yet, or is still evicting VMs, so we have to wait for that to finish + return ctrl.Result{}, nil + } hypervisor, err := openstack.GetHypervisorByName(ctx, r.computeClient, hostname, true) if err != nil { diff --git a/internal/controller/decomission_controller_test.go b/internal/controller/decomission_controller_test.go index c45b52c9..8aed4ade 100644 --- a/internal/controller/decomission_controller_test.go +++ b/internal/controller/decomission_controller_test.go @@ -135,7 +135,7 @@ var _ = Describe("Decommission Controller", func() { Expect(k8sClient.Status().Update(ctx, hypervisor)).To(Succeed()) }) - When("the hypervisor was set to ready", func() { + When("the hypervisor was set to ready and has been evicted", func() { getHypervisorsCalled := 0 BeforeEach(func(ctx SpecContext) { hv := &kvmv1.Hypervisor{} @@ -148,6 +148,12 @@ var _ = Describe("Decommission Controller", func() { Message: "dontcare", }, ) + meta.SetStatusCondition(&hv.Status.Conditions, metav1.Condition{ + Type: kvmv1.ConditionTypeEvicting, + Status: metav1.ConditionFalse, + Reason: "dontcare", + Message: "dontcare", + }) Expect(k8sClient.Status().Update(ctx, hv)).To(Succeed()) fakeServer.Mux.HandleFunc("GET /os-hypervisors/detail", func(w http.ResponseWriter, r *http.Request) { diff --git a/internal/controller/hypervisor_maintenance_controller.go b/internal/controller/hypervisor_maintenance_controller.go index 5ca8fee2..928b7255 100644 --- a/internal/controller/hypervisor_maintenance_controller.go +++ b/internal/controller/hypervisor_maintenance_controller.go @@ -54,7 +54,6 @@ type HypervisorMaintenanceController struct { // +kubebuilder:rbac:groups=kvm.cloud.sap,resources=hypervisors,verbs=get;list;watch // +kubebuilder:rbac:groups=kvm.cloud.sap,resources=hypervisors/status,verbs=get;list;watch;create;update;patch;delete // +kubebuilder:rbac:groups=kvm.cloud.sap,resources=evictions,verbs=get;list;watch;create;update;patch;delete - func (hec *HypervisorMaintenanceController) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { hv := &kvmv1.Hypervisor{} if err := hec.Get(ctx, req.NamespacedName, hv); err != nil { @@ -62,13 +61,10 @@ func (hec *HypervisorMaintenanceController) Reconcile(ctx context.Context, req c return ctrl.Result{}, k8sclient.IgnoreNotFound(err) } - // is onboarding completed? - if !meta.IsStatusConditionFalse(hv.Status.Conditions, kvmv1.ConditionTypeOnboarding) { - return ctrl.Result{}, nil - } - - // ensure serviceId is set - if hv.Status.ServiceID == "" { + // If onboarding hasn't even started, no value will be set + // If it has been started, but not finished yet, we need to wait for it to be aborted + // So we can continue, if the condition is either not set at all or false + if meta.IsStatusConditionTrue(hv.Status.Conditions, kvmv1.ConditionTypeOnboarding) { return ctrl.Result{}, nil } @@ -94,6 +90,12 @@ func (hec *HypervisorMaintenanceController) reconcileComputeService(ctx context. log := logger.FromContext(ctx) serviceId := hv.Status.ServiceID + // We can only do something here, if there is a service to begin with. + // The onboarding should take care of that + if serviceId == "" { + return nil + } + switch hv.Spec.Maintenance { case kvmv1.MaintenanceUnset: // Enable the compute service (in case we haven't done so already)