Merge pull request 'resources' (#2) from resources into main
Reviewed-on: #2
This commit is contained in:
@@ -23,6 +23,22 @@ import (
|
||||
// EDIT THIS FILE! THIS IS SCAFFOLDING FOR YOU TO OWN!
|
||||
// NOTE: json tags are required. Any new fields you add must have json tags for the fields to be serialized.
|
||||
|
||||
// ResourceDefaults defines the default resource requests and limits.
|
||||
type ResourceDefaults struct {
|
||||
// Default CPU request (e.g., "100m"). Applied if a container has no CPU request.
|
||||
// +optional
|
||||
CPURequest string `json:"cpuRequest,omitempty"`
|
||||
// Default Memory request (e.g., "128Mi"). Applied if a container has no Memory request.
|
||||
// +optional
|
||||
MemoryRequest string `json:"memoryRequest,omitempty"`
|
||||
// Default CPU limit (e.g., "500m"). Applied if a container has no CPU limit.
|
||||
// +optional
|
||||
CPULimit string `json:"cpuLimit,omitempty"`
|
||||
// Default Memory limit (e.g., "512Mi"). Applied if a container has no Memory limit.
|
||||
// +optional
|
||||
MemoryLimit string `json:"memoryLimit,omitempty"`
|
||||
}
|
||||
|
||||
// NodeTainterConfigSpec defines the desired state of NodeTainterConfig.
|
||||
type NodeTainterConfigSpec struct {
|
||||
// INSERT ADDITIONAL SPEC FIELDS - desired state of cluster
|
||||
@@ -38,7 +54,20 @@ type NodeTainterConfigSpec struct {
|
||||
|
||||
// +kubebuilder:validation:Optional
|
||||
// +kubebuilder:validation:MinProperties=1
|
||||
// +optional
|
||||
LabelRules map[string]string `json:"labelRules,omitempty"`
|
||||
|
||||
// ResourceDefaults contains the default requests/limits to apply.
|
||||
// If this section is omitted, resource defaulting is disabled.
|
||||
// +optional
|
||||
ResourceDefaults *ResourceDefaults `json:"resourceDefaults,omitempty"`
|
||||
|
||||
// OptOutLabelKey is the label key used to exempt Deployments from resource defaulting.
|
||||
// If a Deployment has a label with this key (any value), defaults won't be applied.
|
||||
// If empty or omitted, the opt-out mechanism is disabled.
|
||||
// Example: "my-operator.example.com/skip-resource-defaults"
|
||||
// +optional
|
||||
OptOutLabelKey string `json:"optOutLabelKey,omitempty"`
|
||||
}
|
||||
|
||||
// NodeTainterConfigStatus defines the observed state of NodeTainterConfig.
|
||||
|
@@ -114,6 +114,11 @@ func (in *NodeTainterConfigSpec) DeepCopyInto(out *NodeTainterConfigSpec) {
|
||||
(*out)[key] = val
|
||||
}
|
||||
}
|
||||
if in.ResourceDefaults != nil {
|
||||
in, out := &in.ResourceDefaults, &out.ResourceDefaults
|
||||
*out = new(ResourceDefaults)
|
||||
**out = **in
|
||||
}
|
||||
}
|
||||
|
||||
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new NodeTainterConfigSpec.
|
||||
@@ -154,3 +159,18 @@ func (in *NodeTainterConfigStatus) DeepCopy() *NodeTainterConfigStatus {
|
||||
in.DeepCopyInto(out)
|
||||
return out
|
||||
}
|
||||
|
||||
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
|
||||
func (in *ResourceDefaults) DeepCopyInto(out *ResourceDefaults) {
|
||||
*out = *in
|
||||
}
|
||||
|
||||
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ResourceDefaults.
|
||||
func (in *ResourceDefaults) DeepCopy() *ResourceDefaults {
|
||||
if in == nil {
|
||||
return nil
|
||||
}
|
||||
out := new(ResourceDefaults)
|
||||
in.DeepCopyInto(out)
|
||||
return out
|
||||
}
|
||||
|
12
cmd/main.go
12
cmd/main.go
@@ -27,6 +27,7 @@ import (
|
||||
_ "k8s.io/client-go/plugin/pkg/client/auth"
|
||||
|
||||
"k8s.io/apimachinery/pkg/runtime"
|
||||
// "k8s.io/client-go/kubernetes"
|
||||
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
|
||||
clientgoscheme "k8s.io/client-go/kubernetes/scheme"
|
||||
ctrl "sigs.k8s.io/controller-runtime"
|
||||
@@ -205,10 +206,21 @@ func main() {
|
||||
if err = (&controller.NodeTainterConfigReconciler{
|
||||
Client: mgr.GetClient(),
|
||||
Scheme: mgr.GetScheme(),
|
||||
Recorder: mgr.GetEventRecorderFor("nodetainter-controller"),
|
||||
}).SetupWithManager(mgr); err != nil {
|
||||
setupLog.Error(err, "unable to create controller", "controller", "NodeTainterConfig")
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
if err = (&controller.DeploymentDefaultsReconciler{
|
||||
Client: mgr.GetClient(),
|
||||
Scheme: mgr.GetScheme(),
|
||||
Recorder: mgr.GetEventRecorderFor("deploymentdefaults-controller"),
|
||||
}).SetupWithManager(mgr); err != nil {
|
||||
setupLog.Error(err, "unable to create controller", "controller", "DeploymentDefaults")
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
// +kubebuilder:scaffold:builder
|
||||
|
||||
if metricsCertWatcher != nil {
|
||||
|
@@ -46,6 +46,35 @@ spec:
|
||||
type: string
|
||||
minProperties: 1
|
||||
type: object
|
||||
optOutLabelKey:
|
||||
description: |-
|
||||
OptOutLabelKey is the label key used to exempt Deployments from resource defaulting.
|
||||
If a Deployment has a label with this key (any value), defaults won't be applied.
|
||||
If empty or omitted, the opt-out mechanism is disabled.
|
||||
Example: "my-operator.example.com/skip-resource-defaults"
|
||||
type: string
|
||||
resourceDefaults:
|
||||
description: |-
|
||||
ResourceDefaults contains the default requests/limits to apply.
|
||||
If this section is omitted, resource defaulting is disabled.
|
||||
properties:
|
||||
cpuLimit:
|
||||
description: Default CPU limit (e.g., "500m"). Applied if a container
|
||||
has no CPU limit.
|
||||
type: string
|
||||
cpuRequest:
|
||||
description: Default CPU request (e.g., "100m"). Applied if a
|
||||
container has no CPU request.
|
||||
type: string
|
||||
memoryLimit:
|
||||
description: Default Memory limit (e.g., "512Mi"). Applied if
|
||||
a container has no Memory limit.
|
||||
type: string
|
||||
memoryRequest:
|
||||
description: Default Memory request (e.g., "128Mi"). Applied if
|
||||
a container has no Memory request.
|
||||
type: string
|
||||
type: object
|
||||
type: object
|
||||
status:
|
||||
description: NodeTainterConfigStatus defines the observed state of NodeTainterConfig.
|
||||
|
@@ -21,6 +21,16 @@ rules:
|
||||
- patch
|
||||
- update
|
||||
- watch
|
||||
- apiGroups:
|
||||
- apps
|
||||
resources:
|
||||
- deployments
|
||||
verbs:
|
||||
- get
|
||||
- list
|
||||
- patch
|
||||
- update
|
||||
- watch
|
||||
- apiGroups:
|
||||
- operator.andy.vendetti.ru
|
||||
resources:
|
||||
|
@@ -10,3 +10,9 @@ spec:
|
||||
"andy.vendetti.ru/category=priority": "workload/priority=high:NoSchedule"
|
||||
"andy.vendetti.ru/category=gpu": "nvidia.com/gpu=present:NoSchedule"
|
||||
"andy.vendetti.ru/category=svc": "workload/type=service:NoSchedule"
|
||||
resourceDefaults:
|
||||
cpuRequest: "100m"
|
||||
memoryRequest: "128Mi"
|
||||
cpuLimit: "500m"
|
||||
memoryLimit: "512Mi"
|
||||
optOutLabelKey: "andy.vendetti.ru/skip-resource-defaults"
|
||||
|
217
internal/controller/deploymentdefaults_controller.go
Normal file
217
internal/controller/deploymentdefaults_controller.go
Normal file
@@ -0,0 +1,217 @@
|
||||
// internal/controller/deploymentdefaults_controller.go
|
||||
package controller
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
appsv1 "k8s.io/api/apps/v1"
|
||||
corev1 "k8s.io/api/core/v1"
|
||||
"k8s.io/apimachinery/pkg/api/errors"
|
||||
"k8s.io/apimachinery/pkg/api/resource"
|
||||
"k8s.io/apimachinery/pkg/runtime"
|
||||
"k8s.io/apimachinery/pkg/types"
|
||||
"k8s.io/client-go/tools/record"
|
||||
ctrl "sigs.k8s.io/controller-runtime"
|
||||
"sigs.k8s.io/controller-runtime/pkg/client"
|
||||
"sigs.k8s.io/controller-runtime/pkg/handler"
|
||||
"sigs.k8s.io/controller-runtime/pkg/log"
|
||||
"sigs.k8s.io/controller-runtime/pkg/reconcile"
|
||||
|
||||
configv1alpha1 "git.vendetti.ru/andy/operator/api/v1alpha1"
|
||||
)
|
||||
|
||||
// DeploymentDefaultsReconciler reconciles Deployment objects to apply default resources.
|
||||
type DeploymentDefaultsReconciler struct {
|
||||
client.Client
|
||||
Scheme *runtime.Scheme
|
||||
Recorder record.EventRecorder
|
||||
}
|
||||
|
||||
// +kubebuilder:rbac:groups=apps,resources=deployments,verbs=get;list;watch;update;patch
|
||||
// +kubebuilder:rbac:groups=operator.andy.vendetti.ru,resources=nodetainterconfigs,verbs=get;list;watch
|
||||
// +kubebuilder:rbac:groups="",resources=events,verbs=create;patch
|
||||
|
||||
func (r *DeploymentDefaultsReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
|
||||
log := log.FromContext(ctx).WithValues("deployment", req.NamespacedName)
|
||||
|
||||
var deployment appsv1.Deployment
|
||||
if err := r.Get(ctx, req.NamespacedName, &deployment); err != nil {
|
||||
if errors.IsNotFound(err) {
|
||||
log.Info("Deployment not found. Ignoring.")
|
||||
return ctrl.Result{}, nil
|
||||
}
|
||||
log.Error(err, "Failed to get Deployment")
|
||||
return ctrl.Result{}, err // Requeue on error
|
||||
}
|
||||
|
||||
var config configv1alpha1.NodeTainterConfig
|
||||
configKey := types.NamespacedName{Name: GlobalTaintConfigName}
|
||||
if err := r.Get(ctx, configKey, &config); err != nil {
|
||||
if errors.IsNotFound(err) {
|
||||
log.Info("Global NodeTainterConfig not found, skipping resource defaulting", "configName", GlobalTaintConfigName)
|
||||
return ctrl.Result{}, nil
|
||||
}
|
||||
log.Error(err, "Failed to get NodeTainterConfig for defaults", "configName", GlobalTaintConfigName)
|
||||
r.Recorder.Eventf(&deployment, corev1.EventTypeWarning, "ConfigError", "Failed to get config %s: %v", GlobalTaintConfigName, err)
|
||||
return ctrl.Result{}, err
|
||||
}
|
||||
|
||||
if config.Spec.ResourceDefaults == nil {
|
||||
log.V(1).Info("Resource defaulting is disabled in NodeTainterConfig.")
|
||||
return ctrl.Result{}, nil
|
||||
}
|
||||
|
||||
optOutKey := strings.TrimSpace(config.Spec.OptOutLabelKey)
|
||||
if optOutKey != "" {
|
||||
labels := deployment.GetLabels()
|
||||
if _, exists := labels[optOutKey]; exists {
|
||||
log.Info("Deployment has opt-out label, skipping resource defaulting", "labelKey", optOutKey)
|
||||
r.Recorder.Eventf(&deployment, corev1.EventTypeNormal, "OptedOut", "Skipping resource defaulting due to label %s", optOutKey)
|
||||
return ctrl.Result{}, nil
|
||||
}
|
||||
}
|
||||
|
||||
defaults := config.Spec.ResourceDefaults
|
||||
defaultCPUReq, errCPUReq := parseQuantity(defaults.CPURequest)
|
||||
defaultMemReq, errMemReq := parseQuantity(defaults.MemoryRequest)
|
||||
defaultCPULim, errCPULim := parseQuantity(defaults.CPULimit)
|
||||
defaultMemLim, errMemLim := parseQuantity(defaults.MemoryLimit)
|
||||
|
||||
var parseErrors []string
|
||||
if errCPUReq != nil {
|
||||
parseErrors = append(parseErrors, fmt.Sprintf("CPURequest: %v", errCPUReq))
|
||||
}
|
||||
if errMemReq != nil {
|
||||
parseErrors = append(parseErrors, fmt.Sprintf("MemoryRequest: %v", errMemReq))
|
||||
}
|
||||
if errCPULim != nil {
|
||||
parseErrors = append(parseErrors, fmt.Sprintf("CPULimit: %v", errCPULim))
|
||||
}
|
||||
if errMemLim != nil {
|
||||
parseErrors = append(parseErrors, fmt.Sprintf("MemoryLimit: %v", errMemLim))
|
||||
}
|
||||
|
||||
if len(parseErrors) > 0 {
|
||||
parsingError := fmt.Errorf("invalid resource quantity format in NodeTainterConfig %s: %s", config.Name, strings.Join(parseErrors, "; "))
|
||||
log.Error(parsingError, "Default resource parsing failed", "configName", config.Name, "parsingErrors", parseErrors)
|
||||
r.Recorder.Eventf(&deployment, corev1.EventTypeWarning, "ConfigError", parsingError.Error())
|
||||
return ctrl.Result{}, nil
|
||||
}
|
||||
|
||||
deploymentCopy := deployment.DeepCopy()
|
||||
mutated := false
|
||||
|
||||
for i, container := range deploymentCopy.Spec.Template.Spec.Containers {
|
||||
containerName := container.Name
|
||||
log := log.WithValues("container", containerName)
|
||||
|
||||
if deploymentCopy.Spec.Template.Spec.Containers[i].Resources.Requests == nil {
|
||||
deploymentCopy.Spec.Template.Spec.Containers[i].Resources.Requests = corev1.ResourceList{}
|
||||
}
|
||||
if deploymentCopy.Spec.Template.Spec.Containers[i].Resources.Limits == nil {
|
||||
deploymentCopy.Spec.Template.Spec.Containers[i].Resources.Limits = corev1.ResourceList{}
|
||||
}
|
||||
|
||||
requests := deploymentCopy.Spec.Template.Spec.Containers[i].Resources.Requests
|
||||
limits := deploymentCopy.Spec.Template.Spec.Containers[i].Resources.Limits
|
||||
|
||||
if _, exists := requests[corev1.ResourceCPU]; !exists && defaultCPUReq != nil {
|
||||
requests[corev1.ResourceCPU] = *defaultCPUReq
|
||||
log.V(1).Info("Applied default CPU request", "value", defaultCPUReq.String())
|
||||
mutated = true
|
||||
}
|
||||
if _, exists := requests[corev1.ResourceMemory]; !exists && defaultMemReq != nil {
|
||||
requests[corev1.ResourceMemory] = *defaultMemReq
|
||||
log.V(1).Info("Applied default Memory request", "value", defaultMemReq.String())
|
||||
mutated = true
|
||||
}
|
||||
if _, exists := limits[corev1.ResourceCPU]; !exists && defaultCPULim != nil {
|
||||
limits[corev1.ResourceCPU] = *defaultCPULim
|
||||
log.V(1).Info("Applied default CPU limit", "value", defaultCPULim.String())
|
||||
mutated = true
|
||||
}
|
||||
if _, exists := limits[corev1.ResourceMemory]; !exists && defaultMemLim != nil {
|
||||
limits[corev1.ResourceMemory] = *defaultMemLim
|
||||
log.V(1).Info("Applied default Memory limit", "value", defaultMemLim.String())
|
||||
mutated = true
|
||||
}
|
||||
}
|
||||
|
||||
if mutated {
|
||||
log.Info("Applying default resource requests/limits to Deployment")
|
||||
if err := r.Patch(ctx, deploymentCopy, client.MergeFrom(&deployment)); err != nil {
|
||||
log.Error(err, "Failed to patch Deployment with default resources")
|
||||
r.Recorder.Eventf(&deployment, corev1.EventTypeWarning, "UpdateFailed", "Failed to apply default resources: %v", err)
|
||||
return ctrl.Result{}, err
|
||||
}
|
||||
log.Info("Successfully applied default resources")
|
||||
r.Recorder.Eventf(&deployment, corev1.EventTypeNormal, "DefaultsApplied", "Default resource requests/limits applied")
|
||||
} else {
|
||||
log.V(1).Info("Deployment already has necessary resource requests/limits or no defaults configured.")
|
||||
}
|
||||
|
||||
return ctrl.Result{}, nil
|
||||
}
|
||||
|
||||
func parseQuantity(s string) (*resource.Quantity, error) {
|
||||
s = strings.TrimSpace(s)
|
||||
if s == "" {
|
||||
return nil, nil
|
||||
}
|
||||
q, err := resource.ParseQuantity(s)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("invalid quantity format '%s': %w", s, err)
|
||||
}
|
||||
return &q, nil
|
||||
}
|
||||
|
||||
// Map function for NodeTainterConfig: Trigger reconcile for ALL Deployments when the specific config changes
|
||||
func (r *DeploymentDefaultsReconciler) mapConfigToDeployments(ctx context.Context, obj client.Object) []reconcile.Request {
|
||||
config, ok := obj.(*configv1alpha1.NodeTainterConfig)
|
||||
log := log.FromContext(ctx)
|
||||
if !ok || config.Name != GlobalTaintConfigName {
|
||||
return nil
|
||||
}
|
||||
|
||||
log.Info("Global NodeTainterConfig changed, queuing reconciliation for all deployments potentially affected by resource defaults", "configName", config.Name)
|
||||
|
||||
var deploymentList appsv1.DeploymentList
|
||||
if err := r.List(ctx, &deploymentList, client.InNamespace("")); err != nil {
|
||||
log.Error(err, "Failed to list deployments for config change")
|
||||
return nil
|
||||
}
|
||||
|
||||
requests := make([]reconcile.Request, 0, len(deploymentList.Items))
|
||||
optOutKey := strings.TrimSpace(config.Spec.OptOutLabelKey)
|
||||
|
||||
for _, deployment := range deploymentList.Items {
|
||||
if optOutKey != "" {
|
||||
labels := deployment.GetLabels()
|
||||
if _, exists := labels[optOutKey]; exists {
|
||||
continue
|
||||
}
|
||||
}
|
||||
requests = append(requests, reconcile.Request{
|
||||
NamespacedName: types.NamespacedName{
|
||||
Name: deployment.Name,
|
||||
Namespace: deployment.Namespace,
|
||||
},
|
||||
})
|
||||
}
|
||||
log.Info("Queued deployment reconcile requests", "count", len(requests))
|
||||
return requests
|
||||
}
|
||||
|
||||
func (r *DeploymentDefaultsReconciler) SetupWithManager(mgr ctrl.Manager) error {
|
||||
r.Recorder = mgr.GetEventRecorderFor("deploymentdefaults-controller")
|
||||
|
||||
return ctrl.NewControllerManagedBy(mgr).
|
||||
For(&appsv1.Deployment{}).
|
||||
Watches(
|
||||
&configv1alpha1.NodeTainterConfig{},
|
||||
handler.EnqueueRequestsFromMapFunc(r.mapConfigToDeployments),
|
||||
).
|
||||
Complete(r)
|
||||
}
|
Reference in New Issue
Block a user