Compare commits

..

3 Commits

Author SHA1 Message Date
8029529f89 Merge pull request 'resources' (#2) from resources into main
All checks were successful
Lint / Run on Ubuntu (push) Successful in 29s
Tests / Run on Ubuntu (push) Successful in 1m28s
Reviewed-on: #2
2025-05-01 18:02:17 +03:00
2f05069e01 linter fixes
All checks were successful
Lint / Run on Ubuntu (push) Successful in 21s
Tests / Run on Ubuntu (push) Successful in 26s
Lint / Run on Ubuntu (pull_request) Successful in 20s
Tests / Run on Ubuntu (pull_request) Successful in 50s
2025-05-01 19:39:58 +05:00
23b333c07d alpha ver. of deployments resource controller
Some checks failed
Lint / Run on Ubuntu (push) Failing after 53s
Tests / Run on Ubuntu (push) Successful in 55s
2025-05-01 18:00:10 +05:00
7 changed files with 325 additions and 2 deletions

View File

@@ -23,6 +23,22 @@ import (
// EDIT THIS FILE! THIS IS SCAFFOLDING FOR YOU TO OWN! // EDIT THIS FILE! THIS IS SCAFFOLDING FOR YOU TO OWN!
// NOTE: json tags are required. Any new fields you add must have json tags for the fields to be serialized. // NOTE: json tags are required. Any new fields you add must have json tags for the fields to be serialized.
// ResourceDefaults defines the default resource requests and limits.
type ResourceDefaults struct {
// Default CPU request (e.g., "100m"). Applied if a container has no CPU request.
// +optional
CPURequest string `json:"cpuRequest,omitempty"`
// Default Memory request (e.g., "128Mi"). Applied if a container has no Memory request.
// +optional
MemoryRequest string `json:"memoryRequest,omitempty"`
// Default CPU limit (e.g., "500m"). Applied if a container has no CPU limit.
// +optional
CPULimit string `json:"cpuLimit,omitempty"`
// Default Memory limit (e.g., "512Mi"). Applied if a container has no Memory limit.
// +optional
MemoryLimit string `json:"memoryLimit,omitempty"`
}
// NodeTainterConfigSpec defines the desired state of NodeTainterConfig. // NodeTainterConfigSpec defines the desired state of NodeTainterConfig.
type NodeTainterConfigSpec struct { type NodeTainterConfigSpec struct {
// INSERT ADDITIONAL SPEC FIELDS - desired state of cluster // INSERT ADDITIONAL SPEC FIELDS - desired state of cluster
@@ -38,7 +54,20 @@ type NodeTainterConfigSpec struct {
// +kubebuilder:validation:Optional // +kubebuilder:validation:Optional
// +kubebuilder:validation:MinProperties=1 // +kubebuilder:validation:MinProperties=1
// +optional
LabelRules map[string]string `json:"labelRules,omitempty"` LabelRules map[string]string `json:"labelRules,omitempty"`
// ResourceDefaults contains the default requests/limits to apply.
// If this section is omitted, resource defaulting is disabled.
// +optional
ResourceDefaults *ResourceDefaults `json:"resourceDefaults,omitempty"`
// OptOutLabelKey is the label key used to exempt Deployments from resource defaulting.
// If a Deployment has a label with this key (any value), defaults won't be applied.
// If empty or omitted, the opt-out mechanism is disabled.
// Example: "my-operator.example.com/skip-resource-defaults"
// +optional
OptOutLabelKey string `json:"optOutLabelKey,omitempty"`
} }
// NodeTainterConfigStatus defines the observed state of NodeTainterConfig. // NodeTainterConfigStatus defines the observed state of NodeTainterConfig.

View File

@@ -114,6 +114,11 @@ func (in *NodeTainterConfigSpec) DeepCopyInto(out *NodeTainterConfigSpec) {
(*out)[key] = val (*out)[key] = val
} }
} }
if in.ResourceDefaults != nil {
in, out := &in.ResourceDefaults, &out.ResourceDefaults
*out = new(ResourceDefaults)
**out = **in
}
} }
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new NodeTainterConfigSpec. // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new NodeTainterConfigSpec.
@@ -154,3 +159,18 @@ func (in *NodeTainterConfigStatus) DeepCopy() *NodeTainterConfigStatus {
in.DeepCopyInto(out) in.DeepCopyInto(out)
return out return out
} }
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *ResourceDefaults) DeepCopyInto(out *ResourceDefaults) {
*out = *in
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ResourceDefaults.
func (in *ResourceDefaults) DeepCopy() *ResourceDefaults {
if in == nil {
return nil
}
out := new(ResourceDefaults)
in.DeepCopyInto(out)
return out
}

View File

@@ -27,6 +27,7 @@ import (
_ "k8s.io/client-go/plugin/pkg/client/auth" _ "k8s.io/client-go/plugin/pkg/client/auth"
"k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/runtime"
// "k8s.io/client-go/kubernetes"
utilruntime "k8s.io/apimachinery/pkg/util/runtime" utilruntime "k8s.io/apimachinery/pkg/util/runtime"
clientgoscheme "k8s.io/client-go/kubernetes/scheme" clientgoscheme "k8s.io/client-go/kubernetes/scheme"
ctrl "sigs.k8s.io/controller-runtime" ctrl "sigs.k8s.io/controller-runtime"
@@ -203,12 +204,23 @@ func main() {
} }
if err = (&controller.NodeTainterConfigReconciler{ if err = (&controller.NodeTainterConfigReconciler{
Client: mgr.GetClient(), Client: mgr.GetClient(),
Scheme: mgr.GetScheme(), Scheme: mgr.GetScheme(),
Recorder: mgr.GetEventRecorderFor("nodetainter-controller"),
}).SetupWithManager(mgr); err != nil { }).SetupWithManager(mgr); err != nil {
setupLog.Error(err, "unable to create controller", "controller", "NodeTainterConfig") setupLog.Error(err, "unable to create controller", "controller", "NodeTainterConfig")
os.Exit(1) os.Exit(1)
} }
if err = (&controller.DeploymentDefaultsReconciler{
Client: mgr.GetClient(),
Scheme: mgr.GetScheme(),
Recorder: mgr.GetEventRecorderFor("deploymentdefaults-controller"),
}).SetupWithManager(mgr); err != nil {
setupLog.Error(err, "unable to create controller", "controller", "DeploymentDefaults")
os.Exit(1)
}
// +kubebuilder:scaffold:builder // +kubebuilder:scaffold:builder
if metricsCertWatcher != nil { if metricsCertWatcher != nil {

View File

@@ -46,6 +46,35 @@ spec:
type: string type: string
minProperties: 1 minProperties: 1
type: object type: object
optOutLabelKey:
description: |-
OptOutLabelKey is the label key used to exempt Deployments from resource defaulting.
If a Deployment has a label with this key (any value), defaults won't be applied.
If empty or omitted, the opt-out mechanism is disabled.
Example: "my-operator.example.com/skip-resource-defaults"
type: string
resourceDefaults:
description: |-
ResourceDefaults contains the default requests/limits to apply.
If this section is omitted, resource defaulting is disabled.
properties:
cpuLimit:
description: Default CPU limit (e.g., "500m"). Applied if a container
has no CPU limit.
type: string
cpuRequest:
description: Default CPU request (e.g., "100m"). Applied if a
container has no CPU request.
type: string
memoryLimit:
description: Default Memory limit (e.g., "512Mi"). Applied if
a container has no Memory limit.
type: string
memoryRequest:
description: Default Memory request (e.g., "128Mi"). Applied if
a container has no Memory request.
type: string
type: object
type: object type: object
status: status:
description: NodeTainterConfigStatus defines the observed state of NodeTainterConfig. description: NodeTainterConfigStatus defines the observed state of NodeTainterConfig.

View File

@@ -21,6 +21,16 @@ rules:
- patch - patch
- update - update
- watch - watch
- apiGroups:
- apps
resources:
- deployments
verbs:
- get
- list
- patch
- update
- watch
- apiGroups: - apiGroups:
- operator.andy.vendetti.ru - operator.andy.vendetti.ru
resources: resources:

View File

@@ -10,3 +10,9 @@ spec:
"andy.vendetti.ru/category=priority": "workload/priority=high:NoSchedule" "andy.vendetti.ru/category=priority": "workload/priority=high:NoSchedule"
"andy.vendetti.ru/category=gpu": "nvidia.com/gpu=present:NoSchedule" "andy.vendetti.ru/category=gpu": "nvidia.com/gpu=present:NoSchedule"
"andy.vendetti.ru/category=svc": "workload/type=service:NoSchedule" "andy.vendetti.ru/category=svc": "workload/type=service:NoSchedule"
resourceDefaults:
cpuRequest: "100m"
memoryRequest: "128Mi"
cpuLimit: "500m"
memoryLimit: "512Mi"
optOutLabelKey: "andy.vendetti.ru/skip-resource-defaults"

View File

@@ -0,0 +1,217 @@
// internal/controller/deploymentdefaults_controller.go
package controller
import (
"context"
"fmt"
"strings"
appsv1 "k8s.io/api/apps/v1"
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/api/resource"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/types"
"k8s.io/client-go/tools/record"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/handler"
"sigs.k8s.io/controller-runtime/pkg/log"
"sigs.k8s.io/controller-runtime/pkg/reconcile"
configv1alpha1 "git.vendetti.ru/andy/operator/api/v1alpha1"
)
// DeploymentDefaultsReconciler reconciles Deployment objects to apply default resources.
type DeploymentDefaultsReconciler struct {
client.Client
Scheme *runtime.Scheme
Recorder record.EventRecorder
}
// +kubebuilder:rbac:groups=apps,resources=deployments,verbs=get;list;watch;update;patch
// +kubebuilder:rbac:groups=operator.andy.vendetti.ru,resources=nodetainterconfigs,verbs=get;list;watch
// +kubebuilder:rbac:groups="",resources=events,verbs=create;patch
func (r *DeploymentDefaultsReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
log := log.FromContext(ctx).WithValues("deployment", req.NamespacedName)
var deployment appsv1.Deployment
if err := r.Get(ctx, req.NamespacedName, &deployment); err != nil {
if errors.IsNotFound(err) {
log.Info("Deployment not found. Ignoring.")
return ctrl.Result{}, nil
}
log.Error(err, "Failed to get Deployment")
return ctrl.Result{}, err // Requeue on error
}
var config configv1alpha1.NodeTainterConfig
configKey := types.NamespacedName{Name: GlobalTaintConfigName}
if err := r.Get(ctx, configKey, &config); err != nil {
if errors.IsNotFound(err) {
log.Info("Global NodeTainterConfig not found, skipping resource defaulting", "configName", GlobalTaintConfigName)
return ctrl.Result{}, nil
}
log.Error(err, "Failed to get NodeTainterConfig for defaults", "configName", GlobalTaintConfigName)
r.Recorder.Eventf(&deployment, corev1.EventTypeWarning, "ConfigError", "Failed to get config %s: %v", GlobalTaintConfigName, err)
return ctrl.Result{}, err
}
if config.Spec.ResourceDefaults == nil {
log.V(1).Info("Resource defaulting is disabled in NodeTainterConfig.")
return ctrl.Result{}, nil
}
optOutKey := strings.TrimSpace(config.Spec.OptOutLabelKey)
if optOutKey != "" {
labels := deployment.GetLabels()
if _, exists := labels[optOutKey]; exists {
log.Info("Deployment has opt-out label, skipping resource defaulting", "labelKey", optOutKey)
r.Recorder.Eventf(&deployment, corev1.EventTypeNormal, "OptedOut", "Skipping resource defaulting due to label %s", optOutKey)
return ctrl.Result{}, nil
}
}
defaults := config.Spec.ResourceDefaults
defaultCPUReq, errCPUReq := parseQuantity(defaults.CPURequest)
defaultMemReq, errMemReq := parseQuantity(defaults.MemoryRequest)
defaultCPULim, errCPULim := parseQuantity(defaults.CPULimit)
defaultMemLim, errMemLim := parseQuantity(defaults.MemoryLimit)
var parseErrors []string
if errCPUReq != nil {
parseErrors = append(parseErrors, fmt.Sprintf("CPURequest: %v", errCPUReq))
}
if errMemReq != nil {
parseErrors = append(parseErrors, fmt.Sprintf("MemoryRequest: %v", errMemReq))
}
if errCPULim != nil {
parseErrors = append(parseErrors, fmt.Sprintf("CPULimit: %v", errCPULim))
}
if errMemLim != nil {
parseErrors = append(parseErrors, fmt.Sprintf("MemoryLimit: %v", errMemLim))
}
if len(parseErrors) > 0 {
parsingError := fmt.Errorf("invalid resource quantity format in NodeTainterConfig %s: %s", config.Name, strings.Join(parseErrors, "; "))
log.Error(parsingError, "Default resource parsing failed", "configName", config.Name, "parsingErrors", parseErrors)
r.Recorder.Eventf(&deployment, corev1.EventTypeWarning, "ConfigError", parsingError.Error())
return ctrl.Result{}, nil
}
deploymentCopy := deployment.DeepCopy()
mutated := false
for i, container := range deploymentCopy.Spec.Template.Spec.Containers {
containerName := container.Name
log := log.WithValues("container", containerName)
if deploymentCopy.Spec.Template.Spec.Containers[i].Resources.Requests == nil {
deploymentCopy.Spec.Template.Spec.Containers[i].Resources.Requests = corev1.ResourceList{}
}
if deploymentCopy.Spec.Template.Spec.Containers[i].Resources.Limits == nil {
deploymentCopy.Spec.Template.Spec.Containers[i].Resources.Limits = corev1.ResourceList{}
}
requests := deploymentCopy.Spec.Template.Spec.Containers[i].Resources.Requests
limits := deploymentCopy.Spec.Template.Spec.Containers[i].Resources.Limits
if _, exists := requests[corev1.ResourceCPU]; !exists && defaultCPUReq != nil {
requests[corev1.ResourceCPU] = *defaultCPUReq
log.V(1).Info("Applied default CPU request", "value", defaultCPUReq.String())
mutated = true
}
if _, exists := requests[corev1.ResourceMemory]; !exists && defaultMemReq != nil {
requests[corev1.ResourceMemory] = *defaultMemReq
log.V(1).Info("Applied default Memory request", "value", defaultMemReq.String())
mutated = true
}
if _, exists := limits[corev1.ResourceCPU]; !exists && defaultCPULim != nil {
limits[corev1.ResourceCPU] = *defaultCPULim
log.V(1).Info("Applied default CPU limit", "value", defaultCPULim.String())
mutated = true
}
if _, exists := limits[corev1.ResourceMemory]; !exists && defaultMemLim != nil {
limits[corev1.ResourceMemory] = *defaultMemLim
log.V(1).Info("Applied default Memory limit", "value", defaultMemLim.String())
mutated = true
}
}
if mutated {
log.Info("Applying default resource requests/limits to Deployment")
if err := r.Patch(ctx, deploymentCopy, client.MergeFrom(&deployment)); err != nil {
log.Error(err, "Failed to patch Deployment with default resources")
r.Recorder.Eventf(&deployment, corev1.EventTypeWarning, "UpdateFailed", "Failed to apply default resources: %v", err)
return ctrl.Result{}, err
}
log.Info("Successfully applied default resources")
r.Recorder.Eventf(&deployment, corev1.EventTypeNormal, "DefaultsApplied", "Default resource requests/limits applied")
} else {
log.V(1).Info("Deployment already has necessary resource requests/limits or no defaults configured.")
}
return ctrl.Result{}, nil
}
func parseQuantity(s string) (*resource.Quantity, error) {
s = strings.TrimSpace(s)
if s == "" {
return nil, nil
}
q, err := resource.ParseQuantity(s)
if err != nil {
return nil, fmt.Errorf("invalid quantity format '%s': %w", s, err)
}
return &q, nil
}
// Map function for NodeTainterConfig: Trigger reconcile for ALL Deployments when the specific config changes
func (r *DeploymentDefaultsReconciler) mapConfigToDeployments(ctx context.Context, obj client.Object) []reconcile.Request {
config, ok := obj.(*configv1alpha1.NodeTainterConfig)
log := log.FromContext(ctx)
if !ok || config.Name != GlobalTaintConfigName {
return nil
}
log.Info("Global NodeTainterConfig changed, queuing reconciliation for all deployments potentially affected by resource defaults", "configName", config.Name)
var deploymentList appsv1.DeploymentList
if err := r.List(ctx, &deploymentList, client.InNamespace("")); err != nil {
log.Error(err, "Failed to list deployments for config change")
return nil
}
requests := make([]reconcile.Request, 0, len(deploymentList.Items))
optOutKey := strings.TrimSpace(config.Spec.OptOutLabelKey)
for _, deployment := range deploymentList.Items {
if optOutKey != "" {
labels := deployment.GetLabels()
if _, exists := labels[optOutKey]; exists {
continue
}
}
requests = append(requests, reconcile.Request{
NamespacedName: types.NamespacedName{
Name: deployment.Name,
Namespace: deployment.Namespace,
},
})
}
log.Info("Queued deployment reconcile requests", "count", len(requests))
return requests
}
func (r *DeploymentDefaultsReconciler) SetupWithManager(mgr ctrl.Manager) error {
r.Recorder = mgr.GetEventRecorderFor("deploymentdefaults-controller")
return ctrl.NewControllerManagedBy(mgr).
For(&appsv1.Deployment{}).
Watches(
&configv1alpha1.NodeTainterConfig{},
handler.EnqueueRequestsFromMapFunc(r.mapConfigToDeployments),
).
Complete(r)
}