Merge pull request 'resources' (#2) from resources into main
Reviewed-on: #2
This commit is contained in:
		| @@ -23,6 +23,22 @@ import ( | ||||
| // EDIT THIS FILE!  THIS IS SCAFFOLDING FOR YOU TO OWN! | ||||
| // NOTE: json tags are required.  Any new fields you add must have json tags for the fields to be serialized. | ||||
|  | ||||
| // ResourceDefaults defines the default resource requests and limits. | ||||
| type ResourceDefaults struct { | ||||
| 	// Default CPU request (e.g., "100m"). Applied if a container has no CPU request. | ||||
| 	// +optional | ||||
| 	CPURequest string `json:"cpuRequest,omitempty"` | ||||
| 	// Default Memory request (e.g., "128Mi"). Applied if a container has no Memory request. | ||||
| 	// +optional | ||||
| 	MemoryRequest string `json:"memoryRequest,omitempty"` | ||||
| 	// Default CPU limit (e.g., "500m"). Applied if a container has no CPU limit. | ||||
| 	// +optional | ||||
| 	CPULimit string `json:"cpuLimit,omitempty"` | ||||
| 	// Default Memory limit (e.g., "512Mi"). Applied if a container has no Memory limit. | ||||
| 	// +optional | ||||
| 	MemoryLimit string `json:"memoryLimit,omitempty"` | ||||
| } | ||||
|  | ||||
| // NodeTainterConfigSpec defines the desired state of NodeTainterConfig. | ||||
| type NodeTainterConfigSpec struct { | ||||
| 	// INSERT ADDITIONAL SPEC FIELDS - desired state of cluster | ||||
| @@ -38,7 +54,20 @@ type NodeTainterConfigSpec struct { | ||||
|  | ||||
| 	// +kubebuilder:validation:Optional | ||||
| 	// +kubebuilder:validation:MinProperties=1 | ||||
| 	// +optional | ||||
| 	LabelRules map[string]string `json:"labelRules,omitempty"` | ||||
|  | ||||
| 	// ResourceDefaults contains the default requests/limits to apply. | ||||
| 	// If this section is omitted, resource defaulting is disabled. | ||||
| 	// +optional | ||||
| 	ResourceDefaults *ResourceDefaults `json:"resourceDefaults,omitempty"` | ||||
|  | ||||
| 	// OptOutLabelKey is the label key used to exempt Deployments from resource defaulting. | ||||
| 	// If a Deployment has a label with this key (any value), defaults won't be applied. | ||||
| 	// If empty or omitted, the opt-out mechanism is disabled. | ||||
| 	// Example: "my-operator.example.com/skip-resource-defaults" | ||||
| 	// +optional | ||||
| 	OptOutLabelKey string `json:"optOutLabelKey,omitempty"` | ||||
| } | ||||
|  | ||||
| // NodeTainterConfigStatus defines the observed state of NodeTainterConfig. | ||||
|   | ||||
| @@ -114,6 +114,11 @@ func (in *NodeTainterConfigSpec) DeepCopyInto(out *NodeTainterConfigSpec) { | ||||
| 			(*out)[key] = val | ||||
| 		} | ||||
| 	} | ||||
| 	if in.ResourceDefaults != nil { | ||||
| 		in, out := &in.ResourceDefaults, &out.ResourceDefaults | ||||
| 		*out = new(ResourceDefaults) | ||||
| 		**out = **in | ||||
| 	} | ||||
| } | ||||
|  | ||||
| // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new NodeTainterConfigSpec. | ||||
| @@ -154,3 +159,18 @@ func (in *NodeTainterConfigStatus) DeepCopy() *NodeTainterConfigStatus { | ||||
| 	in.DeepCopyInto(out) | ||||
| 	return out | ||||
| } | ||||
|  | ||||
| // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. | ||||
| func (in *ResourceDefaults) DeepCopyInto(out *ResourceDefaults) { | ||||
| 	*out = *in | ||||
| } | ||||
|  | ||||
| // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ResourceDefaults. | ||||
| func (in *ResourceDefaults) DeepCopy() *ResourceDefaults { | ||||
| 	if in == nil { | ||||
| 		return nil | ||||
| 	} | ||||
| 	out := new(ResourceDefaults) | ||||
| 	in.DeepCopyInto(out) | ||||
| 	return out | ||||
| } | ||||
|   | ||||
							
								
								
									
										16
									
								
								cmd/main.go
									
									
									
									
									
								
							
							
						
						
									
										16
									
								
								cmd/main.go
									
									
									
									
									
								
							| @@ -27,6 +27,7 @@ import ( | ||||
| 	_ "k8s.io/client-go/plugin/pkg/client/auth" | ||||
|  | ||||
| 	"k8s.io/apimachinery/pkg/runtime" | ||||
| 	// "k8s.io/client-go/kubernetes" | ||||
| 	utilruntime "k8s.io/apimachinery/pkg/util/runtime" | ||||
| 	clientgoscheme "k8s.io/client-go/kubernetes/scheme" | ||||
| 	ctrl "sigs.k8s.io/controller-runtime" | ||||
| @@ -203,12 +204,23 @@ func main() { | ||||
| 	} | ||||
|  | ||||
| 	if err = (&controller.NodeTainterConfigReconciler{ | ||||
| 		Client: mgr.GetClient(), | ||||
| 		Scheme: mgr.GetScheme(), | ||||
| 		Client:   mgr.GetClient(), | ||||
| 		Scheme:   mgr.GetScheme(), | ||||
| 		Recorder: mgr.GetEventRecorderFor("nodetainter-controller"), | ||||
| 	}).SetupWithManager(mgr); err != nil { | ||||
| 		setupLog.Error(err, "unable to create controller", "controller", "NodeTainterConfig") | ||||
| 		os.Exit(1) | ||||
| 	} | ||||
|  | ||||
| 	if err = (&controller.DeploymentDefaultsReconciler{ | ||||
| 		Client:   mgr.GetClient(), | ||||
| 		Scheme:   mgr.GetScheme(), | ||||
| 		Recorder: mgr.GetEventRecorderFor("deploymentdefaults-controller"), | ||||
| 	}).SetupWithManager(mgr); err != nil { | ||||
| 		setupLog.Error(err, "unable to create controller", "controller", "DeploymentDefaults") | ||||
| 		os.Exit(1) | ||||
| 	} | ||||
|  | ||||
| 	// +kubebuilder:scaffold:builder | ||||
|  | ||||
| 	if metricsCertWatcher != nil { | ||||
|   | ||||
| @@ -46,6 +46,35 @@ spec: | ||||
|                   type: string | ||||
|                 minProperties: 1 | ||||
|                 type: object | ||||
|               optOutLabelKey: | ||||
|                 description: |- | ||||
|                   OptOutLabelKey is the label key used to exempt Deployments from resource defaulting. | ||||
|                   If a Deployment has a label with this key (any value), defaults won't be applied. | ||||
|                   If empty or omitted, the opt-out mechanism is disabled. | ||||
|                   Example: "my-operator.example.com/skip-resource-defaults" | ||||
|                 type: string | ||||
|               resourceDefaults: | ||||
|                 description: |- | ||||
|                   ResourceDefaults contains the default requests/limits to apply. | ||||
|                   If this section is omitted, resource defaulting is disabled. | ||||
|                 properties: | ||||
|                   cpuLimit: | ||||
|                     description: Default CPU limit (e.g., "500m"). Applied if a container | ||||
|                       has no CPU limit. | ||||
|                     type: string | ||||
|                   cpuRequest: | ||||
|                     description: Default CPU request (e.g., "100m"). Applied if a | ||||
|                       container has no CPU request. | ||||
|                     type: string | ||||
|                   memoryLimit: | ||||
|                     description: Default Memory limit (e.g., "512Mi"). Applied if | ||||
|                       a container has no Memory limit. | ||||
|                     type: string | ||||
|                   memoryRequest: | ||||
|                     description: Default Memory request (e.g., "128Mi"). Applied if | ||||
|                       a container has no Memory request. | ||||
|                     type: string | ||||
|                 type: object | ||||
|             type: object | ||||
|           status: | ||||
|             description: NodeTainterConfigStatus defines the observed state of NodeTainterConfig. | ||||
|   | ||||
| @@ -21,6 +21,16 @@ rules: | ||||
|   - patch | ||||
|   - update | ||||
|   - watch | ||||
| - apiGroups: | ||||
|   - apps | ||||
|   resources: | ||||
|   - deployments | ||||
|   verbs: | ||||
|   - get | ||||
|   - list | ||||
|   - patch | ||||
|   - update | ||||
|   - watch | ||||
| - apiGroups: | ||||
|   - operator.andy.vendetti.ru | ||||
|   resources: | ||||
|   | ||||
| @@ -10,3 +10,9 @@ spec: | ||||
|     "andy.vendetti.ru/category=priority": "workload/priority=high:NoSchedule" | ||||
|     "andy.vendetti.ru/category=gpu": "nvidia.com/gpu=present:NoSchedule" | ||||
|     "andy.vendetti.ru/category=svc": "workload/type=service:NoSchedule" | ||||
|   resourceDefaults: | ||||
|     cpuRequest: "100m" | ||||
|     memoryRequest: "128Mi" | ||||
|     cpuLimit: "500m" | ||||
|     memoryLimit: "512Mi" | ||||
|   optOutLabelKey: "andy.vendetti.ru/skip-resource-defaults" | ||||
|   | ||||
							
								
								
									
										217
									
								
								internal/controller/deploymentdefaults_controller.go
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										217
									
								
								internal/controller/deploymentdefaults_controller.go
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,217 @@ | ||||
| // internal/controller/deploymentdefaults_controller.go | ||||
| package controller | ||||
|  | ||||
| import ( | ||||
| 	"context" | ||||
| 	"fmt" | ||||
| 	"strings" | ||||
|  | ||||
| 	appsv1 "k8s.io/api/apps/v1" | ||||
| 	corev1 "k8s.io/api/core/v1" | ||||
| 	"k8s.io/apimachinery/pkg/api/errors" | ||||
| 	"k8s.io/apimachinery/pkg/api/resource" | ||||
| 	"k8s.io/apimachinery/pkg/runtime" | ||||
| 	"k8s.io/apimachinery/pkg/types" | ||||
| 	"k8s.io/client-go/tools/record" | ||||
| 	ctrl "sigs.k8s.io/controller-runtime" | ||||
| 	"sigs.k8s.io/controller-runtime/pkg/client" | ||||
| 	"sigs.k8s.io/controller-runtime/pkg/handler" | ||||
| 	"sigs.k8s.io/controller-runtime/pkg/log" | ||||
| 	"sigs.k8s.io/controller-runtime/pkg/reconcile" | ||||
|  | ||||
| 	configv1alpha1 "git.vendetti.ru/andy/operator/api/v1alpha1" | ||||
| ) | ||||
|  | ||||
| // DeploymentDefaultsReconciler reconciles Deployment objects to apply default resources. | ||||
| type DeploymentDefaultsReconciler struct { | ||||
| 	client.Client | ||||
| 	Scheme   *runtime.Scheme | ||||
| 	Recorder record.EventRecorder | ||||
| } | ||||
|  | ||||
| // +kubebuilder:rbac:groups=apps,resources=deployments,verbs=get;list;watch;update;patch | ||||
| // +kubebuilder:rbac:groups=operator.andy.vendetti.ru,resources=nodetainterconfigs,verbs=get;list;watch | ||||
| // +kubebuilder:rbac:groups="",resources=events,verbs=create;patch | ||||
|  | ||||
| func (r *DeploymentDefaultsReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { | ||||
| 	log := log.FromContext(ctx).WithValues("deployment", req.NamespacedName) | ||||
|  | ||||
| 	var deployment appsv1.Deployment | ||||
| 	if err := r.Get(ctx, req.NamespacedName, &deployment); err != nil { | ||||
| 		if errors.IsNotFound(err) { | ||||
| 			log.Info("Deployment not found. Ignoring.") | ||||
| 			return ctrl.Result{}, nil | ||||
| 		} | ||||
| 		log.Error(err, "Failed to get Deployment") | ||||
| 		return ctrl.Result{}, err // Requeue on error | ||||
| 	} | ||||
|  | ||||
| 	var config configv1alpha1.NodeTainterConfig | ||||
| 	configKey := types.NamespacedName{Name: GlobalTaintConfigName} | ||||
| 	if err := r.Get(ctx, configKey, &config); err != nil { | ||||
| 		if errors.IsNotFound(err) { | ||||
| 			log.Info("Global NodeTainterConfig not found, skipping resource defaulting", "configName", GlobalTaintConfigName) | ||||
| 			return ctrl.Result{}, nil | ||||
| 		} | ||||
| 		log.Error(err, "Failed to get NodeTainterConfig for defaults", "configName", GlobalTaintConfigName) | ||||
| 		r.Recorder.Eventf(&deployment, corev1.EventTypeWarning, "ConfigError", "Failed to get config %s: %v", GlobalTaintConfigName, err) | ||||
| 		return ctrl.Result{}, err | ||||
| 	} | ||||
|  | ||||
| 	if config.Spec.ResourceDefaults == nil { | ||||
| 		log.V(1).Info("Resource defaulting is disabled in NodeTainterConfig.") | ||||
| 		return ctrl.Result{}, nil | ||||
| 	} | ||||
|  | ||||
| 	optOutKey := strings.TrimSpace(config.Spec.OptOutLabelKey) | ||||
| 	if optOutKey != "" { | ||||
| 		labels := deployment.GetLabels() | ||||
| 		if _, exists := labels[optOutKey]; exists { | ||||
| 			log.Info("Deployment has opt-out label, skipping resource defaulting", "labelKey", optOutKey) | ||||
| 			r.Recorder.Eventf(&deployment, corev1.EventTypeNormal, "OptedOut", "Skipping resource defaulting due to label %s", optOutKey) | ||||
| 			return ctrl.Result{}, nil | ||||
| 		} | ||||
| 	} | ||||
|  | ||||
| 	defaults := config.Spec.ResourceDefaults | ||||
| 	defaultCPUReq, errCPUReq := parseQuantity(defaults.CPURequest) | ||||
| 	defaultMemReq, errMemReq := parseQuantity(defaults.MemoryRequest) | ||||
| 	defaultCPULim, errCPULim := parseQuantity(defaults.CPULimit) | ||||
| 	defaultMemLim, errMemLim := parseQuantity(defaults.MemoryLimit) | ||||
|  | ||||
| 	var parseErrors []string | ||||
| 	if errCPUReq != nil { | ||||
| 		parseErrors = append(parseErrors, fmt.Sprintf("CPURequest: %v", errCPUReq)) | ||||
| 	} | ||||
| 	if errMemReq != nil { | ||||
| 		parseErrors = append(parseErrors, fmt.Sprintf("MemoryRequest: %v", errMemReq)) | ||||
| 	} | ||||
| 	if errCPULim != nil { | ||||
| 		parseErrors = append(parseErrors, fmt.Sprintf("CPULimit: %v", errCPULim)) | ||||
| 	} | ||||
| 	if errMemLim != nil { | ||||
| 		parseErrors = append(parseErrors, fmt.Sprintf("MemoryLimit: %v", errMemLim)) | ||||
| 	} | ||||
|  | ||||
| 	if len(parseErrors) > 0 { | ||||
| 		parsingError := fmt.Errorf("invalid resource quantity format in NodeTainterConfig %s: %s", config.Name, strings.Join(parseErrors, "; ")) | ||||
| 		log.Error(parsingError, "Default resource parsing failed", "configName", config.Name, "parsingErrors", parseErrors) | ||||
| 		r.Recorder.Eventf(&deployment, corev1.EventTypeWarning, "ConfigError", parsingError.Error()) | ||||
| 		return ctrl.Result{}, nil | ||||
| 	} | ||||
|  | ||||
| 	deploymentCopy := deployment.DeepCopy() | ||||
| 	mutated := false | ||||
|  | ||||
| 	for i, container := range deploymentCopy.Spec.Template.Spec.Containers { | ||||
| 		containerName := container.Name | ||||
| 		log := log.WithValues("container", containerName) | ||||
|  | ||||
| 		if deploymentCopy.Spec.Template.Spec.Containers[i].Resources.Requests == nil { | ||||
| 			deploymentCopy.Spec.Template.Spec.Containers[i].Resources.Requests = corev1.ResourceList{} | ||||
| 		} | ||||
| 		if deploymentCopy.Spec.Template.Spec.Containers[i].Resources.Limits == nil { | ||||
| 			deploymentCopy.Spec.Template.Spec.Containers[i].Resources.Limits = corev1.ResourceList{} | ||||
| 		} | ||||
|  | ||||
| 		requests := deploymentCopy.Spec.Template.Spec.Containers[i].Resources.Requests | ||||
| 		limits := deploymentCopy.Spec.Template.Spec.Containers[i].Resources.Limits | ||||
|  | ||||
| 		if _, exists := requests[corev1.ResourceCPU]; !exists && defaultCPUReq != nil { | ||||
| 			requests[corev1.ResourceCPU] = *defaultCPUReq | ||||
| 			log.V(1).Info("Applied default CPU request", "value", defaultCPUReq.String()) | ||||
| 			mutated = true | ||||
| 		} | ||||
| 		if _, exists := requests[corev1.ResourceMemory]; !exists && defaultMemReq != nil { | ||||
| 			requests[corev1.ResourceMemory] = *defaultMemReq | ||||
| 			log.V(1).Info("Applied default Memory request", "value", defaultMemReq.String()) | ||||
| 			mutated = true | ||||
| 		} | ||||
| 		if _, exists := limits[corev1.ResourceCPU]; !exists && defaultCPULim != nil { | ||||
| 			limits[corev1.ResourceCPU] = *defaultCPULim | ||||
| 			log.V(1).Info("Applied default CPU limit", "value", defaultCPULim.String()) | ||||
| 			mutated = true | ||||
| 		} | ||||
| 		if _, exists := limits[corev1.ResourceMemory]; !exists && defaultMemLim != nil { | ||||
| 			limits[corev1.ResourceMemory] = *defaultMemLim | ||||
| 			log.V(1).Info("Applied default Memory limit", "value", defaultMemLim.String()) | ||||
| 			mutated = true | ||||
| 		} | ||||
| 	} | ||||
|  | ||||
| 	if mutated { | ||||
| 		log.Info("Applying default resource requests/limits to Deployment") | ||||
| 		if err := r.Patch(ctx, deploymentCopy, client.MergeFrom(&deployment)); err != nil { | ||||
| 			log.Error(err, "Failed to patch Deployment with default resources") | ||||
| 			r.Recorder.Eventf(&deployment, corev1.EventTypeWarning, "UpdateFailed", "Failed to apply default resources: %v", err) | ||||
| 			return ctrl.Result{}, err | ||||
| 		} | ||||
| 		log.Info("Successfully applied default resources") | ||||
| 		r.Recorder.Eventf(&deployment, corev1.EventTypeNormal, "DefaultsApplied", "Default resource requests/limits applied") | ||||
| 	} else { | ||||
| 		log.V(1).Info("Deployment already has necessary resource requests/limits or no defaults configured.") | ||||
| 	} | ||||
|  | ||||
| 	return ctrl.Result{}, nil | ||||
| } | ||||
|  | ||||
| func parseQuantity(s string) (*resource.Quantity, error) { | ||||
| 	s = strings.TrimSpace(s) | ||||
| 	if s == "" { | ||||
| 		return nil, nil | ||||
| 	} | ||||
| 	q, err := resource.ParseQuantity(s) | ||||
| 	if err != nil { | ||||
| 		return nil, fmt.Errorf("invalid quantity format '%s': %w", s, err) | ||||
| 	} | ||||
| 	return &q, nil | ||||
| } | ||||
|  | ||||
| // Map function for NodeTainterConfig: Trigger reconcile for ALL Deployments when the specific config changes | ||||
| func (r *DeploymentDefaultsReconciler) mapConfigToDeployments(ctx context.Context, obj client.Object) []reconcile.Request { | ||||
| 	config, ok := obj.(*configv1alpha1.NodeTainterConfig) | ||||
| 	log := log.FromContext(ctx) | ||||
| 	if !ok || config.Name != GlobalTaintConfigName { | ||||
| 		return nil | ||||
| 	} | ||||
|  | ||||
| 	log.Info("Global NodeTainterConfig changed, queuing reconciliation for all deployments potentially affected by resource defaults", "configName", config.Name) | ||||
|  | ||||
| 	var deploymentList appsv1.DeploymentList | ||||
| 	if err := r.List(ctx, &deploymentList, client.InNamespace("")); err != nil { | ||||
| 		log.Error(err, "Failed to list deployments for config change") | ||||
| 		return nil | ||||
| 	} | ||||
|  | ||||
| 	requests := make([]reconcile.Request, 0, len(deploymentList.Items)) | ||||
| 	optOutKey := strings.TrimSpace(config.Spec.OptOutLabelKey) | ||||
|  | ||||
| 	for _, deployment := range deploymentList.Items { | ||||
| 		if optOutKey != "" { | ||||
| 			labels := deployment.GetLabels() | ||||
| 			if _, exists := labels[optOutKey]; exists { | ||||
| 				continue | ||||
| 			} | ||||
| 		} | ||||
| 		requests = append(requests, reconcile.Request{ | ||||
| 			NamespacedName: types.NamespacedName{ | ||||
| 				Name:      deployment.Name, | ||||
| 				Namespace: deployment.Namespace, | ||||
| 			}, | ||||
| 		}) | ||||
| 	} | ||||
| 	log.Info("Queued deployment reconcile requests", "count", len(requests)) | ||||
| 	return requests | ||||
| } | ||||
|  | ||||
| func (r *DeploymentDefaultsReconciler) SetupWithManager(mgr ctrl.Manager) error { | ||||
| 	r.Recorder = mgr.GetEventRecorderFor("deploymentdefaults-controller") | ||||
|  | ||||
| 	return ctrl.NewControllerManagedBy(mgr). | ||||
| 		For(&appsv1.Deployment{}). | ||||
| 		Watches( | ||||
| 			&configv1alpha1.NodeTainterConfig{}, | ||||
| 			handler.EnqueueRequestsFromMapFunc(r.mapConfigToDeployments), | ||||
| 		). | ||||
| 		Complete(r) | ||||
| } | ||||
		Reference in New Issue
	
	Block a user