From 823a3a0a4dd1a1b6fea25b5b1f2b7be58c6d5130 Mon Sep 17 00:00:00 2001
From: Andy Kolibri Vendetti <andy@vendetti.ru>
Date: Mon, 5 May 2025 00:57:24 +0500
Subject: [PATCH] pod autoremoval feature added

---
 api/v1alpha1/nodetainterconfig_types.go       |  23 +++
 api/v1alpha1/zz_generated.deepcopy.go         |  25 +++
 cmd/main.go                                   |   9 +
 ...r.andy.vendetti.ru_nodetainterconfigs.yaml |  24 +++
 config/rbac/role.yaml                         |   9 +
 .../operator_v1alpha1_nodetainterconfig.yaml  |   8 +-
 internal/controller/podcrash_controller.go    | 163 ++++++++++++++++++
 7 files changed, 260 insertions(+), 1 deletion(-)
 create mode 100644 internal/controller/podcrash_controller.go

diff --git a/api/v1alpha1/nodetainterconfig_types.go b/api/v1alpha1/nodetainterconfig_types.go
index de5340a..9b76b76 100644
--- a/api/v1alpha1/nodetainterconfig_types.go
+++ b/api/v1alpha1/nodetainterconfig_types.go
@@ -63,6 +63,26 @@ type ImageUpdatePolicy struct {
 	RestartAnnotation string `json:"restartAnnotation,omitempty"`
 }
 
+// CrashLoopPolicy defines the policy for handling pods in CrashLoopBackOff.
+type CrashLoopPolicy struct {
+	// Enabled toggles the CrashLoopBackOff handling feature.
+	// +optional
+	Enabled bool `json:"enabled,omitempty"`
+
+	// MonitoredDeployments is a list of Deployments (in "namespace/name" format)
+	// whose pods should be monitored for CrashLoopBackOff.
+	// +optional
+	MonitoredDeployments []string `json:"monitoredDeployments,omitempty"`
+
+	// RestartThreshold is the number of container restarts after which
+	// a pod in CrashLoopBackOff will be deleted to attempt rescheduling.
+	// Minimum recommended value: 3 or 5.
+	// +kubebuilder:validation:Minimum=1
+	// +kubebuilder:default=5
+	// +optional
+	RestartThreshold int32 `json:"restartThreshold,omitempty"`
+}
+
 // NodeTainterConfigSpec defines the desired state of NodeTainterConfig.
 type NodeTainterConfigSpec struct {
 	// INSERT ADDITIONAL SPEC FIELDS - desired state of cluster
@@ -95,6 +115,9 @@ type NodeTainterConfigSpec struct {
 
 	// +optional
 	ImageUpdatePolicy *ImageUpdatePolicy `json:"imageUpdatePolicy,omitempty"`
+
+	// +optional
+	CrashLoopPolicy *CrashLoopPolicy `json:"crashLoopPolicy,omitempty"`
 }
 
 // NodeTainterConfigStatus defines the observed state of NodeTainterConfig.
diff --git a/api/v1alpha1/zz_generated.deepcopy.go b/api/v1alpha1/zz_generated.deepcopy.go
index 36b2189..bc714f1 100644
--- a/api/v1alpha1/zz_generated.deepcopy.go
+++ b/api/v1alpha1/zz_generated.deepcopy.go
@@ -25,6 +25,26 @@ import (
 	runtime "k8s.io/apimachinery/pkg/runtime"
 )
 
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *CrashLoopPolicy) DeepCopyInto(out *CrashLoopPolicy) {
+	*out = *in
+	if in.MonitoredDeployments != nil {
+		in, out := &in.MonitoredDeployments, &out.MonitoredDeployments
+		*out = make([]string, len(*in))
+		copy(*out, *in)
+	}
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new CrashLoopPolicy.
+func (in *CrashLoopPolicy) DeepCopy() *CrashLoopPolicy {
+	if in == nil {
+		return nil
+	}
+	out := new(CrashLoopPolicy)
+	in.DeepCopyInto(out)
+	return out
+}
+
 // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
 func (in *ImageUpdatePolicy) DeepCopyInto(out *ImageUpdatePolicy) {
 	*out = *in
@@ -144,6 +164,11 @@ func (in *NodeTainterConfigSpec) DeepCopyInto(out *NodeTainterConfigSpec) {
 		*out = new(ImageUpdatePolicy)
 		(*in).DeepCopyInto(*out)
 	}
+	if in.CrashLoopPolicy != nil {
+		in, out := &in.CrashLoopPolicy, &out.CrashLoopPolicy
+		*out = new(CrashLoopPolicy)
+		(*in).DeepCopyInto(*out)
+	}
 }
 
 // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new NodeTainterConfigSpec.
diff --git a/cmd/main.go b/cmd/main.go
index bfaa04f..7af880e 100644
--- a/cmd/main.go
+++ b/cmd/main.go
@@ -230,6 +230,15 @@ func main() {
 		os.Exit(1)
 	}
 
+	if err = (&controller.PodCrashReconciler{
+		Client:   mgr.GetClient(),
+		Scheme:   mgr.GetScheme(),
+		Recorder: mgr.GetEventRecorderFor("podcrash-controller"),
+	}).SetupWithManager(mgr); err != nil {
+		setupLog.Error(err, "unable to create controller", "controller", "PodCrash")
+		os.Exit(1)
+	}
+
 	// +kubebuilder:scaffold:builder
 
 	if metricsCertWatcher != nil {
diff --git a/config/crd/bases/operator.andy.vendetti.ru_nodetainterconfigs.yaml b/config/crd/bases/operator.andy.vendetti.ru_nodetainterconfigs.yaml
index 25f50fb..a25d480 100644
--- a/config/crd/bases/operator.andy.vendetti.ru_nodetainterconfigs.yaml
+++ b/config/crd/bases/operator.andy.vendetti.ru_nodetainterconfigs.yaml
@@ -41,6 +41,30 @@ spec:
           spec:
             description: NodeTainterConfigSpec defines the desired state of NodeTainterConfig.
             properties:
+              crashLoopPolicy:
+                description: CrashLoopPolicy defines the policy for handling pods
+                  in CrashLoopBackOff.
+                properties:
+                  enabled:
+                    description: Enabled toggles the CrashLoopBackOff handling feature.
+                    type: boolean
+                  monitoredDeployments:
+                    description: |-
+                      MonitoredDeployments is a list of Deployments (in "namespace/name" format)
+                      whose pods should be monitored for CrashLoopBackOff.
+                    items:
+                      type: string
+                    type: array
+                  restartThreshold:
+                    default: 5
+                    description: |-
+                      RestartThreshold is the number of container restarts after which
+                      a pod in CrashLoopBackOff will be deleted to attempt rescheduling.
+                      Minimum recommended value: 3 or 5.
+                    format: int32
+                    minimum: 1
+                    type: integer
+                type: object
               imageUpdatePolicy:
                 description: ImageUpdatePolicy defines the policy for automatic image
                   updates.
diff --git a/config/rbac/role.yaml b/config/rbac/role.yaml
index c7fed8d..f6efa14 100644
--- a/config/rbac/role.yaml
+++ b/config/rbac/role.yaml
@@ -26,6 +26,7 @@ rules:
   resources:
   - pods
   verbs:
+  - delete
   - get
   - list
   - watch
@@ -39,6 +40,14 @@ rules:
   - patch
   - update
   - watch
+- apiGroups:
+  - apps
+  resources:
+  - replicasets
+  verbs:
+  - get
+  - list
+  - watch
 - apiGroups:
   - operator.andy.vendetti.ru
   resources:
diff --git a/config/samples/operator_v1alpha1_nodetainterconfig.yaml b/config/samples/operator_v1alpha1_nodetainterconfig.yaml
index b5f6a5b..fd9caba 100644
--- a/config/samples/operator_v1alpha1_nodetainterconfig.yaml
+++ b/config/samples/operator_v1alpha1_nodetainterconfig.yaml
@@ -19,5 +19,11 @@ spec:
   imageUpdatePolicy:
     enabled: true
     checkInterval: "5m"
-    monitoredTags: ["latest", "dev"]
+    monitoredTags: ["latest", "dev", "master"]
     # restartAnnotation: "andy.vendetti.ru/restartedAt"
+  crashLoopPolicy:
+    enabled: true
+    restartThreshold: 5
+    monitoredDeployments:
+      - "default/hello-updater-test"
+      - "app-namespace/critical-app-deployment"
diff --git a/internal/controller/podcrash_controller.go b/internal/controller/podcrash_controller.go
new file mode 100644
index 0000000..80ecc09
--- /dev/null
+++ b/internal/controller/podcrash_controller.go
@@ -0,0 +1,163 @@
+// internal/controller/podcrash_controller.go
+package controller
+
+import (
+	"context"
+	"fmt"
+
+	appsv1 "k8s.io/api/apps/v1"
+	corev1 "k8s.io/api/core/v1"
+	"k8s.io/apimachinery/pkg/api/errors"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/apimachinery/pkg/runtime"
+	"k8s.io/apimachinery/pkg/types"
+	"k8s.io/client-go/tools/record"
+	ctrl "sigs.k8s.io/controller-runtime"
+	"sigs.k8s.io/controller-runtime/pkg/client"
+	"sigs.k8s.io/controller-runtime/pkg/log"
+
+	configv1alpha1 "git.vendetti.ru/andy/operator/api/v1alpha1"
+)
+
+// PodCrashReconciler reconciles Pods to detect and handle CrashLoopBackOff state.
+type PodCrashReconciler struct {
+	client.Client
+	Scheme   *runtime.Scheme
+	Recorder record.EventRecorder
+}
+
+// +kubebuilder:rbac:groups="",resources=pods,verbs=get;list;watch;delete
+// +kubebuilder:rbac:groups=operator.andy.vendetti.ru,resources=nodetainterconfigs,verbs=get;list;watch
+// +kubebuilder:rbac:groups="",resources=events,verbs=create;patch
+// +kubebuilder:rbac:groups=apps,resources=replicasets,verbs=get;list;watch
+// +kubebuilder:rbac:groups=apps,resources=deployments,verbs=get;list;watch
+
+func (r *PodCrashReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
+	log := log.FromContext(ctx).WithValues("pod", req.NamespacedName)
+
+	var config configv1alpha1.NodeTainterConfig
+	configKey := types.NamespacedName{Name: GlobalTaintConfigName}
+	if err := r.Get(ctx, configKey, &config); err != nil {
+		if !errors.IsNotFound(err) {
+			log.Error(err, "Failed to get NodeTainterConfig for crash loop policy", "configName", GlobalTaintConfigName)
+			return ctrl.Result{}, err // Requeue on real error
+		}
+		log.V(1).Info("Global NodeTainterConfig not found, crash loop handling skipped.", "configName", GlobalTaintConfigName)
+		return ctrl.Result{}, nil
+	}
+
+	if config.Spec.CrashLoopPolicy == nil || !config.Spec.CrashLoopPolicy.Enabled {
+		log.V(1).Info("Crash loop policy is disabled in NodeTainterConfig.")
+		return ctrl.Result{}, nil
+	}
+	policy := config.Spec.CrashLoopPolicy
+	if len(policy.MonitoredDeployments) == 0 {
+		log.V(1).Info("No monitored deployments configured in CrashLoopPolicy.")
+		return ctrl.Result{}, nil
+	}
+	monitoredSet := make(map[string]struct{}, len(policy.MonitoredDeployments))
+	for _, item := range policy.MonitoredDeployments {
+		monitoredSet[item] = struct{}{}
+	}
+
+	var pod corev1.Pod
+	if err := r.Get(ctx, req.NamespacedName, &pod); err != nil {
+		if errors.IsNotFound(err) {
+			log.Info("Pod not found. Ignoring.")
+			return ctrl.Result{}, nil
+		}
+		log.Error(err, "Failed to get Pod")
+		return ctrl.Result{}, err // Requeue on error
+	}
+
+	ownerDeploymentName, isOwnedByMonitoredDeployment := r.getOwnerDeploymentIfMonitored(ctx, &pod, monitoredSet)
+	if !isOwnedByMonitoredDeployment {
+		log.V(1).Info("Pod is not owned by a monitored Deployment, skipping.")
+		return ctrl.Result{}, nil
+	}
+	log = log.WithValues("deployment", ownerDeploymentName)
+
+	podShouldBeDeleted := false
+	var crashingContainerName string
+	var restartCount int32
+
+	for _, status := range pod.Status.ContainerStatuses {
+		if status.State.Waiting != nil && status.State.Waiting.Reason == "CrashLoopBackOff" {
+			if status.RestartCount >= policy.RestartThreshold {
+				podShouldBeDeleted = true
+				crashingContainerName = status.Name
+				restartCount = status.RestartCount
+				log.Info("Pod needs deletion due to CrashLoopBackOff threshold",
+					"container", crashingContainerName,
+					"restarts", restartCount,
+					"threshold", policy.RestartThreshold)
+				break
+			} else {
+				log.V(1).Info("Container in CrashLoopBackOff but restart count below threshold",
+					"container", status.Name,
+					"restarts", status.RestartCount,
+					"threshold", policy.RestartThreshold)
+			}
+		}
+	}
+
+	if podShouldBeDeleted {
+		log.Info("Deleting pod to attempt rescheduling", "reason", "CrashLoopBackOff threshold reached")
+		err := r.Delete(ctx, &pod)
+		if err != nil {
+			if errors.IsNotFound(err) || errors.IsConflict(err) {
+				log.Info("Pod likely already deleted or being deleted.")
+				return ctrl.Result{}, nil
+			}
+			log.Error(err, "Failed to delete pod in CrashLoopBackOff")
+			r.Recorder.Eventf(&pod, corev1.EventTypeWarning, "DeleteFailed", "Failed to delete pod (%s/%s) stuck in CrashLoopBackOff: %v", pod.Namespace, pod.Name, err)
+			return ctrl.Result{}, err // Requeue on deletion error
+		}
+		log.Info("Pod deleted successfully.")
+		r.Recorder.Eventf(&pod, corev1.EventTypeNormal, "PodDeleted", "Deleted pod (%s/%s) stuck in CrashLoopBackOff (container: %s, restarts: %d)", pod.Namespace, pod.Name, crashingContainerName, restartCount)
+	}
+
+	return ctrl.Result{}, nil
+}
+
+func (r *PodCrashReconciler) getOwnerDeploymentIfMonitored(ctx context.Context, pod *corev1.Pod, monitoredSet map[string]struct{}) (string, bool) {
+	log := log.FromContext(ctx).WithValues("pod", client.ObjectKeyFromObject(pod))
+
+	rsOwnerRef := metav1.GetControllerOf(pod)
+	if rsOwnerRef == nil || rsOwnerRef.APIVersion != appsv1.SchemeGroupVersion.String() || rsOwnerRef.Kind != "ReplicaSet" {
+		return "", false
+	}
+
+	var rs appsv1.ReplicaSet
+	rsKey := types.NamespacedName{Namespace: pod.Namespace, Name: rsOwnerRef.Name}
+	if err := r.Get(ctx, rsKey, &rs); err != nil {
+		log.V(1).Error(err, "Failed to get owner ReplicaSet", "replicaset", rsKey)
+		return "", false
+	}
+
+	depOwnerRef := metav1.GetControllerOf(&rs)
+	if depOwnerRef == nil || depOwnerRef.APIVersion != appsv1.SchemeGroupVersion.String() || depOwnerRef.Kind != "Deployment" {
+		return "", false
+	}
+
+	deploymentName := fmt.Sprintf("%s/%s", pod.Namespace, depOwnerRef.Name)
+	if _, exists := monitoredSet[deploymentName]; exists {
+		return deploymentName, true
+	}
+
+	return deploymentName, false
+}
+
+// SetupWithManager sets up the controller with the Manager.
+func (r *PodCrashReconciler) SetupWithManager(mgr ctrl.Manager) error {
+	r.Recorder = mgr.GetEventRecorderFor("podcrash-controller")
+
+	return ctrl.NewControllerManagedBy(mgr).
+		Named("podcrash").
+		For(&corev1.Pod{}).
+		// Watches(
+		// 	&configv1alpha1.NodeTainterConfig{},
+		// 	handler.EnqueueRequestsFromMapFunc(r.mapConfigToPods),
+		// ).
+		Complete(r)
+}