From 823a3a0a4dd1a1b6fea25b5b1f2b7be58c6d5130 Mon Sep 17 00:00:00 2001 From: Andy Kolibri Vendetti Date: Mon, 5 May 2025 00:57:24 +0500 Subject: [PATCH] pod autoremoval feature added --- api/v1alpha1/nodetainterconfig_types.go | 23 +++ api/v1alpha1/zz_generated.deepcopy.go | 25 +++ cmd/main.go | 9 + ...r.andy.vendetti.ru_nodetainterconfigs.yaml | 24 +++ config/rbac/role.yaml | 9 + .../operator_v1alpha1_nodetainterconfig.yaml | 8 +- internal/controller/podcrash_controller.go | 163 ++++++++++++++++++ 7 files changed, 260 insertions(+), 1 deletion(-) create mode 100644 internal/controller/podcrash_controller.go diff --git a/api/v1alpha1/nodetainterconfig_types.go b/api/v1alpha1/nodetainterconfig_types.go index de5340a..9b76b76 100644 --- a/api/v1alpha1/nodetainterconfig_types.go +++ b/api/v1alpha1/nodetainterconfig_types.go @@ -63,6 +63,26 @@ type ImageUpdatePolicy struct { RestartAnnotation string `json:"restartAnnotation,omitempty"` } +// CrashLoopPolicy defines the policy for handling pods in CrashLoopBackOff. +type CrashLoopPolicy struct { + // Enabled toggles the CrashLoopBackOff handling feature. + // +optional + Enabled bool `json:"enabled,omitempty"` + + // MonitoredDeployments is a list of Deployments (in "namespace/name" format) + // whose pods should be monitored for CrashLoopBackOff. + // +optional + MonitoredDeployments []string `json:"monitoredDeployments,omitempty"` + + // RestartThreshold is the number of container restarts after which + // a pod in CrashLoopBackOff will be deleted to attempt rescheduling. + // Minimum recommended value: 3 or 5. + // +kubebuilder:validation:Minimum=1 + // +kubebuilder:default=5 + // +optional + RestartThreshold int32 `json:"restartThreshold,omitempty"` +} + // NodeTainterConfigSpec defines the desired state of NodeTainterConfig. type NodeTainterConfigSpec struct { // INSERT ADDITIONAL SPEC FIELDS - desired state of cluster @@ -95,6 +115,9 @@ type NodeTainterConfigSpec struct { // +optional ImageUpdatePolicy *ImageUpdatePolicy `json:"imageUpdatePolicy,omitempty"` + + // +optional + CrashLoopPolicy *CrashLoopPolicy `json:"crashLoopPolicy,omitempty"` } // NodeTainterConfigStatus defines the observed state of NodeTainterConfig. diff --git a/api/v1alpha1/zz_generated.deepcopy.go b/api/v1alpha1/zz_generated.deepcopy.go index 36b2189..bc714f1 100644 --- a/api/v1alpha1/zz_generated.deepcopy.go +++ b/api/v1alpha1/zz_generated.deepcopy.go @@ -25,6 +25,26 @@ import ( runtime "k8s.io/apimachinery/pkg/runtime" ) +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *CrashLoopPolicy) DeepCopyInto(out *CrashLoopPolicy) { + *out = *in + if in.MonitoredDeployments != nil { + in, out := &in.MonitoredDeployments, &out.MonitoredDeployments + *out = make([]string, len(*in)) + copy(*out, *in) + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new CrashLoopPolicy. +func (in *CrashLoopPolicy) DeepCopy() *CrashLoopPolicy { + if in == nil { + return nil + } + out := new(CrashLoopPolicy) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *ImageUpdatePolicy) DeepCopyInto(out *ImageUpdatePolicy) { *out = *in @@ -144,6 +164,11 @@ func (in *NodeTainterConfigSpec) DeepCopyInto(out *NodeTainterConfigSpec) { *out = new(ImageUpdatePolicy) (*in).DeepCopyInto(*out) } + if in.CrashLoopPolicy != nil { + in, out := &in.CrashLoopPolicy, &out.CrashLoopPolicy + *out = new(CrashLoopPolicy) + (*in).DeepCopyInto(*out) + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new NodeTainterConfigSpec. diff --git a/cmd/main.go b/cmd/main.go index bfaa04f..7af880e 100644 --- a/cmd/main.go +++ b/cmd/main.go @@ -230,6 +230,15 @@ func main() { os.Exit(1) } + if err = (&controller.PodCrashReconciler{ + Client: mgr.GetClient(), + Scheme: mgr.GetScheme(), + Recorder: mgr.GetEventRecorderFor("podcrash-controller"), + }).SetupWithManager(mgr); err != nil { + setupLog.Error(err, "unable to create controller", "controller", "PodCrash") + os.Exit(1) + } + // +kubebuilder:scaffold:builder if metricsCertWatcher != nil { diff --git a/config/crd/bases/operator.andy.vendetti.ru_nodetainterconfigs.yaml b/config/crd/bases/operator.andy.vendetti.ru_nodetainterconfigs.yaml index 25f50fb..a25d480 100644 --- a/config/crd/bases/operator.andy.vendetti.ru_nodetainterconfigs.yaml +++ b/config/crd/bases/operator.andy.vendetti.ru_nodetainterconfigs.yaml @@ -41,6 +41,30 @@ spec: spec: description: NodeTainterConfigSpec defines the desired state of NodeTainterConfig. properties: + crashLoopPolicy: + description: CrashLoopPolicy defines the policy for handling pods + in CrashLoopBackOff. + properties: + enabled: + description: Enabled toggles the CrashLoopBackOff handling feature. + type: boolean + monitoredDeployments: + description: |- + MonitoredDeployments is a list of Deployments (in "namespace/name" format) + whose pods should be monitored for CrashLoopBackOff. + items: + type: string + type: array + restartThreshold: + default: 5 + description: |- + RestartThreshold is the number of container restarts after which + a pod in CrashLoopBackOff will be deleted to attempt rescheduling. + Minimum recommended value: 3 or 5. + format: int32 + minimum: 1 + type: integer + type: object imageUpdatePolicy: description: ImageUpdatePolicy defines the policy for automatic image updates. diff --git a/config/rbac/role.yaml b/config/rbac/role.yaml index c7fed8d..f6efa14 100644 --- a/config/rbac/role.yaml +++ b/config/rbac/role.yaml @@ -26,6 +26,7 @@ rules: resources: - pods verbs: + - delete - get - list - watch @@ -39,6 +40,14 @@ rules: - patch - update - watch +- apiGroups: + - apps + resources: + - replicasets + verbs: + - get + - list + - watch - apiGroups: - operator.andy.vendetti.ru resources: diff --git a/config/samples/operator_v1alpha1_nodetainterconfig.yaml b/config/samples/operator_v1alpha1_nodetainterconfig.yaml index b5f6a5b..fd9caba 100644 --- a/config/samples/operator_v1alpha1_nodetainterconfig.yaml +++ b/config/samples/operator_v1alpha1_nodetainterconfig.yaml @@ -19,5 +19,11 @@ spec: imageUpdatePolicy: enabled: true checkInterval: "5m" - monitoredTags: ["latest", "dev"] + monitoredTags: ["latest", "dev", "master"] # restartAnnotation: "andy.vendetti.ru/restartedAt" + crashLoopPolicy: + enabled: true + restartThreshold: 5 + monitoredDeployments: + - "default/hello-updater-test" + - "app-namespace/critical-app-deployment" diff --git a/internal/controller/podcrash_controller.go b/internal/controller/podcrash_controller.go new file mode 100644 index 0000000..80ecc09 --- /dev/null +++ b/internal/controller/podcrash_controller.go @@ -0,0 +1,163 @@ +// internal/controller/podcrash_controller.go +package controller + +import ( + "context" + "fmt" + + appsv1 "k8s.io/api/apps/v1" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/types" + "k8s.io/client-go/tools/record" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/log" + + configv1alpha1 "git.vendetti.ru/andy/operator/api/v1alpha1" +) + +// PodCrashReconciler reconciles Pods to detect and handle CrashLoopBackOff state. +type PodCrashReconciler struct { + client.Client + Scheme *runtime.Scheme + Recorder record.EventRecorder +} + +// +kubebuilder:rbac:groups="",resources=pods,verbs=get;list;watch;delete +// +kubebuilder:rbac:groups=operator.andy.vendetti.ru,resources=nodetainterconfigs,verbs=get;list;watch +// +kubebuilder:rbac:groups="",resources=events,verbs=create;patch +// +kubebuilder:rbac:groups=apps,resources=replicasets,verbs=get;list;watch +// +kubebuilder:rbac:groups=apps,resources=deployments,verbs=get;list;watch + +func (r *PodCrashReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { + log := log.FromContext(ctx).WithValues("pod", req.NamespacedName) + + var config configv1alpha1.NodeTainterConfig + configKey := types.NamespacedName{Name: GlobalTaintConfigName} + if err := r.Get(ctx, configKey, &config); err != nil { + if !errors.IsNotFound(err) { + log.Error(err, "Failed to get NodeTainterConfig for crash loop policy", "configName", GlobalTaintConfigName) + return ctrl.Result{}, err // Requeue on real error + } + log.V(1).Info("Global NodeTainterConfig not found, crash loop handling skipped.", "configName", GlobalTaintConfigName) + return ctrl.Result{}, nil + } + + if config.Spec.CrashLoopPolicy == nil || !config.Spec.CrashLoopPolicy.Enabled { + log.V(1).Info("Crash loop policy is disabled in NodeTainterConfig.") + return ctrl.Result{}, nil + } + policy := config.Spec.CrashLoopPolicy + if len(policy.MonitoredDeployments) == 0 { + log.V(1).Info("No monitored deployments configured in CrashLoopPolicy.") + return ctrl.Result{}, nil + } + monitoredSet := make(map[string]struct{}, len(policy.MonitoredDeployments)) + for _, item := range policy.MonitoredDeployments { + monitoredSet[item] = struct{}{} + } + + var pod corev1.Pod + if err := r.Get(ctx, req.NamespacedName, &pod); err != nil { + if errors.IsNotFound(err) { + log.Info("Pod not found. Ignoring.") + return ctrl.Result{}, nil + } + log.Error(err, "Failed to get Pod") + return ctrl.Result{}, err // Requeue on error + } + + ownerDeploymentName, isOwnedByMonitoredDeployment := r.getOwnerDeploymentIfMonitored(ctx, &pod, monitoredSet) + if !isOwnedByMonitoredDeployment { + log.V(1).Info("Pod is not owned by a monitored Deployment, skipping.") + return ctrl.Result{}, nil + } + log = log.WithValues("deployment", ownerDeploymentName) + + podShouldBeDeleted := false + var crashingContainerName string + var restartCount int32 + + for _, status := range pod.Status.ContainerStatuses { + if status.State.Waiting != nil && status.State.Waiting.Reason == "CrashLoopBackOff" { + if status.RestartCount >= policy.RestartThreshold { + podShouldBeDeleted = true + crashingContainerName = status.Name + restartCount = status.RestartCount + log.Info("Pod needs deletion due to CrashLoopBackOff threshold", + "container", crashingContainerName, + "restarts", restartCount, + "threshold", policy.RestartThreshold) + break + } else { + log.V(1).Info("Container in CrashLoopBackOff but restart count below threshold", + "container", status.Name, + "restarts", status.RestartCount, + "threshold", policy.RestartThreshold) + } + } + } + + if podShouldBeDeleted { + log.Info("Deleting pod to attempt rescheduling", "reason", "CrashLoopBackOff threshold reached") + err := r.Delete(ctx, &pod) + if err != nil { + if errors.IsNotFound(err) || errors.IsConflict(err) { + log.Info("Pod likely already deleted or being deleted.") + return ctrl.Result{}, nil + } + log.Error(err, "Failed to delete pod in CrashLoopBackOff") + r.Recorder.Eventf(&pod, corev1.EventTypeWarning, "DeleteFailed", "Failed to delete pod (%s/%s) stuck in CrashLoopBackOff: %v", pod.Namespace, pod.Name, err) + return ctrl.Result{}, err // Requeue on deletion error + } + log.Info("Pod deleted successfully.") + r.Recorder.Eventf(&pod, corev1.EventTypeNormal, "PodDeleted", "Deleted pod (%s/%s) stuck in CrashLoopBackOff (container: %s, restarts: %d)", pod.Namespace, pod.Name, crashingContainerName, restartCount) + } + + return ctrl.Result{}, nil +} + +func (r *PodCrashReconciler) getOwnerDeploymentIfMonitored(ctx context.Context, pod *corev1.Pod, monitoredSet map[string]struct{}) (string, bool) { + log := log.FromContext(ctx).WithValues("pod", client.ObjectKeyFromObject(pod)) + + rsOwnerRef := metav1.GetControllerOf(pod) + if rsOwnerRef == nil || rsOwnerRef.APIVersion != appsv1.SchemeGroupVersion.String() || rsOwnerRef.Kind != "ReplicaSet" { + return "", false + } + + var rs appsv1.ReplicaSet + rsKey := types.NamespacedName{Namespace: pod.Namespace, Name: rsOwnerRef.Name} + if err := r.Get(ctx, rsKey, &rs); err != nil { + log.V(1).Error(err, "Failed to get owner ReplicaSet", "replicaset", rsKey) + return "", false + } + + depOwnerRef := metav1.GetControllerOf(&rs) + if depOwnerRef == nil || depOwnerRef.APIVersion != appsv1.SchemeGroupVersion.String() || depOwnerRef.Kind != "Deployment" { + return "", false + } + + deploymentName := fmt.Sprintf("%s/%s", pod.Namespace, depOwnerRef.Name) + if _, exists := monitoredSet[deploymentName]; exists { + return deploymentName, true + } + + return deploymentName, false +} + +// SetupWithManager sets up the controller with the Manager. +func (r *PodCrashReconciler) SetupWithManager(mgr ctrl.Manager) error { + r.Recorder = mgr.GetEventRecorderFor("podcrash-controller") + + return ctrl.NewControllerManagedBy(mgr). + Named("podcrash"). + For(&corev1.Pod{}). + // Watches( + // &configv1alpha1.NodeTainterConfig{}, + // handler.EnqueueRequestsFromMapFunc(r.mapConfigToPods), + // ). + Complete(r) +}