diff --git a/api/v1alpha1/nodetainterconfig_types.go b/api/v1alpha1/nodetainterconfig_types.go index 557091b..010c8e4 100644 --- a/api/v1alpha1/nodetainterconfig_types.go +++ b/api/v1alpha1/nodetainterconfig_types.go @@ -29,16 +29,37 @@ type NodeTainterConfigSpec struct { // Important: Run "make" to regenerate code after modifying this file // Foo is an example field of NodeTainterConfig. Edit nodetainterconfig_types.go to remove/update - Foo string `json:"foo,omitempty"` + // Foo string `json:"foo,omitempty"` + + // Rules defines the mapping between keywords found in node names + // and the taints that should be applied. + // The key is the keyword (e.g., "gpu", "priority", "svc"). + // The value is the taint string (e.g., "nvidia.com/gpu=present:NoSchedule"). + + // +kubebuilder:validation:Optional + // +kubebuilder:validation:MinProperties=1 + LabelRules map[string]string `json:"labelRules,omitempty"` } // NodeTainterConfigStatus defines the observed state of NodeTainterConfig. type NodeTainterConfigStatus struct { - // INSERT ADDITIONAL STATUS FIELD - define observed state of cluster - // Important: Run "make" to regenerate code after modifying this file + // +optional + ObservedGeneration int64 `json:"observedGeneration,omitempty"` + // +optional + Conditions []metav1.Condition `json:"conditions,omitempty"` + // +optional + NodeTaintStatus []NodeTaintInfo `json:"nodeTaintStatus,omitempty"` // every Node status +} + +// NodeTaintInfo holds Taint status for Node +type NodeTaintInfo struct { + NodeName string `json:"nodeName"` + AppliedTaints []string `json:"appliedTaints,omitempty"` // String representation of applied Taints + Error string `json:"error,omitempty"` // Error for this Node } // +kubebuilder:object:root=true +// +kubebuilder:resource:scope=Cluster,shortName=ntc // +kubebuilder:subresource:status // NodeTainterConfig is the Schema for the nodetainterconfigs API. diff --git a/api/v1alpha1/zz_generated.deepcopy.go b/api/v1alpha1/zz_generated.deepcopy.go index fa7aa1e..cd10c2f 100644 --- a/api/v1alpha1/zz_generated.deepcopy.go +++ b/api/v1alpha1/zz_generated.deepcopy.go @@ -21,16 +21,37 @@ limitations under the License. package v1alpha1 import ( + "k8s.io/apimachinery/pkg/apis/meta/v1" runtime "k8s.io/apimachinery/pkg/runtime" ) +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *NodeTaintInfo) DeepCopyInto(out *NodeTaintInfo) { + *out = *in + if in.AppliedTaints != nil { + in, out := &in.AppliedTaints, &out.AppliedTaints + *out = make([]string, len(*in)) + copy(*out, *in) + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new NodeTaintInfo. +func (in *NodeTaintInfo) DeepCopy() *NodeTaintInfo { + if in == nil { + return nil + } + out := new(NodeTaintInfo) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *NodeTainterConfig) DeepCopyInto(out *NodeTainterConfig) { *out = *in out.TypeMeta = in.TypeMeta in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) - out.Spec = in.Spec - out.Status = in.Status + in.Spec.DeepCopyInto(&out.Spec) + in.Status.DeepCopyInto(&out.Status) } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new NodeTainterConfig. @@ -86,6 +107,13 @@ func (in *NodeTainterConfigList) DeepCopyObject() runtime.Object { // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *NodeTainterConfigSpec) DeepCopyInto(out *NodeTainterConfigSpec) { *out = *in + if in.LabelRules != nil { + in, out := &in.LabelRules, &out.LabelRules + *out = make(map[string]string, len(*in)) + for key, val := range *in { + (*out)[key] = val + } + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new NodeTainterConfigSpec. @@ -101,6 +129,20 @@ func (in *NodeTainterConfigSpec) DeepCopy() *NodeTainterConfigSpec { // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *NodeTainterConfigStatus) DeepCopyInto(out *NodeTainterConfigStatus) { *out = *in + if in.Conditions != nil { + in, out := &in.Conditions, &out.Conditions + *out = make([]v1.Condition, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } + if in.NodeTaintStatus != nil { + in, out := &in.NodeTaintStatus, &out.NodeTaintStatus + *out = make([]NodeTaintInfo, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new NodeTainterConfigStatus. diff --git a/config/crd/bases/operator.andy.vendetti.ru_nodetainterconfigs.yaml b/config/crd/bases/operator.andy.vendetti.ru_nodetainterconfigs.yaml new file mode 100644 index 0000000..358ebd6 --- /dev/null +++ b/config/crd/bases/operator.andy.vendetti.ru_nodetainterconfigs.yaml @@ -0,0 +1,133 @@ +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.17.2 + name: nodetainterconfigs.operator.andy.vendetti.ru +spec: + group: operator.andy.vendetti.ru + names: + kind: NodeTainterConfig + listKind: NodeTainterConfigList + plural: nodetainterconfigs + shortNames: + - ntc + singular: nodetainterconfig + scope: Cluster + versions: + - name: v1alpha1 + schema: + openAPIV3Schema: + description: NodeTainterConfig is the Schema for the nodetainterconfigs API. + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: NodeTainterConfigSpec defines the desired state of NodeTainterConfig. + properties: + labelRules: + additionalProperties: + type: string + minProperties: 1 + type: object + type: object + status: + description: NodeTainterConfigStatus defines the observed state of NodeTainterConfig. + properties: + conditions: + items: + description: Condition contains details for one aspect of the current + state of this API Resource. + properties: + lastTransitionTime: + description: |- + lastTransitionTime is the last time the condition transitioned from one status to another. + This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable. + format: date-time + type: string + message: + description: |- + message is a human readable message indicating details about the transition. + This may be an empty string. + maxLength: 32768 + type: string + observedGeneration: + description: |- + observedGeneration represents the .metadata.generation that the condition was set based upon. + For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date + with respect to the current state of the instance. + format: int64 + minimum: 0 + type: integer + reason: + description: |- + reason contains a programmatic identifier indicating the reason for the condition's last transition. + Producers of specific condition types may define expected values and meanings for this field, + and whether the values are considered a guaranteed API. + The value should be a CamelCase string. + This field may not be empty. + maxLength: 1024 + minLength: 1 + pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$ + type: string + status: + description: status of the condition, one of True, False, Unknown. + enum: + - "True" + - "False" + - Unknown + type: string + type: + description: type of condition in CamelCase or in foo.example.com/CamelCase. + maxLength: 316 + pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ + type: string + required: + - lastTransitionTime + - message + - reason + - status + - type + type: object + type: array + nodeTaintStatus: + items: + description: NodeTaintInfo holds Taint status for Node + properties: + appliedTaints: + items: + type: string + type: array + error: + type: string + nodeName: + type: string + required: + - nodeName + type: object + type: array + observedGeneration: + format: int64 + type: integer + type: object + type: object + served: true + storage: true + subresources: + status: {} diff --git a/config/manager/kustomization.yaml b/config/manager/kustomization.yaml index 5c5f0b8..1336785 100644 --- a/config/manager/kustomization.yaml +++ b/config/manager/kustomization.yaml @@ -1,2 +1,8 @@ resources: - manager.yaml +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +images: +- name: controller + newName: aboba + newTag: latest diff --git a/config/manager/manager.yaml b/config/manager/manager.yaml index 87e999b..96a2122 100644 --- a/config/manager/manager.yaml +++ b/config/manager/manager.yaml @@ -64,6 +64,7 @@ spec: - --leader-elect - --health-probe-bind-address=:8081 image: controller:latest + imagePullPolicy: IfNotPresent name: manager ports: [] securityContext: diff --git a/config/rbac/role.yaml b/config/rbac/role.yaml index 1b89a4c..43c0e17 100644 --- a/config/rbac/role.yaml +++ b/config/rbac/role.yaml @@ -1,11 +1,39 @@ +--- apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRole metadata: - labels: - app.kubernetes.io/name: operator - app.kubernetes.io/managed-by: kustomize name: manager-role rules: -- apiGroups: [""] - resources: ["pods"] - verbs: ["get", "list", "watch"] +- apiGroups: + - "" + resources: + - events + verbs: + - create + - patch +- apiGroups: + - "" + resources: + - nodes + verbs: + - get + - list + - patch + - update + - watch +- apiGroups: + - operator.andy.vendetti.ru + resources: + - nodetainterconfigs + verbs: + - get + - list + - watch +- apiGroups: + - operator.andy.vendetti.ru + resources: + - nodetainterconfigs/status + verbs: + - get + - patch + - update diff --git a/config/samples/operator_v1alpha1_nodetainterconfig.yaml b/config/samples/operator_v1alpha1_nodetainterconfig.yaml index 55886d9..7df3384 100644 --- a/config/samples/operator_v1alpha1_nodetainterconfig.yaml +++ b/config/samples/operator_v1alpha1_nodetainterconfig.yaml @@ -4,6 +4,9 @@ metadata: labels: app.kubernetes.io/name: operator app.kubernetes.io/managed-by: kustomize - name: nodetainterconfig-sample + name: global-taint-rules spec: - # TODO(user): Add fields here + labelRules: + "andy.vendetti.ru/category=priority": "workload/priority=high:NoSchedule" + "andy.vendetti.ru/category=gpu": "nvidia.com/gpu=present:NoSchedule" + "andy.vendetti.ru/category=svc": "workload/type=service:NoSchedule" diff --git a/internal/controller/nodetainterconfig_controller.go b/internal/controller/nodetainterconfig_controller.go index ad4347e..a135f8e 100644 --- a/internal/controller/nodetainterconfig_controller.go +++ b/internal/controller/nodetainterconfig_controller.go @@ -18,24 +18,54 @@ package controller import ( "context" + "fmt" + "sort" + "strings" + "time" + corev1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/types" + "k8s.io/apimachinery/pkg/util/sets" + apivalidation "k8s.io/apimachinery/pkg/util/validation" + "k8s.io/client-go/tools/record" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/handler" "sigs.k8s.io/controller-runtime/pkg/log" + "sigs.k8s.io/controller-runtime/pkg/reconcile" - operatorv1alpha1 "git.vendetti.ru/andy/operator/api/v1alpha1" + configv1alpha1 "git.vendetti.ru/andy/operator/api/v1alpha1" +) + +const ( + GlobalTaintConfigName = "global-taint-rules" + + // Condition Types + ConditionTypeReady = "Ready" + ConditionReasonConfigParsingError = "ConfigParsingError" + ConditionReasonConfigNotFound = "ConfigNotFound" + ConditionReasonReady = "Ready" ) // NodeTainterConfigReconciler reconciles a NodeTainterConfig object type NodeTainterConfigReconciler struct { client.Client - Scheme *runtime.Scheme + Scheme *runtime.Scheme + Recorder record.EventRecorder } -// +kubebuilder:rbac:groups=operator.andy.vendetti.ru,resources=nodetainterconfigs,verbs=get;list;watch;create;update;patch;delete +// kubebuilder:rbac:groups=operator.andy.vendetti.ru,resources=nodetainterconfigs,verbs=get;list;watch;create;update;patch;delete + +// +kubebuilder:rbac:groups=operator.andy.vendetti.ru,resources=nodetainterconfigs,verbs=get;list;watch // +kubebuilder:rbac:groups=operator.andy.vendetti.ru,resources=nodetainterconfigs/status,verbs=get;update;patch -// +kubebuilder:rbac:groups=operator.andy.vendetti.ru,resources=nodetainterconfigs/finalizers,verbs=update +// kubebuilder:rbac:groups=operator.andy.vendetti.ru,resources=nodetainterconfigs/finalizers,verbs=update + +// For nodes control +// +kubebuilder:rbac:groups=core,resources=nodes,verbs=get;list;watch;update;patch +// +kubebuilder:rbac:groups="",resources=events,verbs=create;patch // Reconcile is part of the main kubernetes reconciliation loop which aims to // move the current state of the cluster closer to the desired state. @@ -47,17 +77,432 @@ type NodeTainterConfigReconciler struct { // For more details, check Reconcile and its Result here: // - https://pkg.go.dev/sigs.k8s.io/controller-runtime@v0.20.2/pkg/reconcile func (r *NodeTainterConfigReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { - _ = log.FromContext(ctx) + // _ = log.FromContext(ctx) + log := log.FromContext(ctx) - // TODO(user): your logic here + // 1. Getting current Node + var node corev1.Node + if err := r.Get(ctx, req.NamespacedName, &node); err != nil { + if apierrors.IsNotFound(err) { + // Node deleted, nothing to do + log.Info("Node not found. Ignoring.", "node", req.NamespacedName) + return ctrl.Result{}, nil + } + // Other error with getting Node entity + log.Error(err, "Failed to get Node", "node", req.NamespacedName) + return ctrl.Result{}, err // Repeat + } + log = log.WithValues("node", node.Name) // Now logger knows Node name + + // 2. Getting global NodeTainterConfig + var config configv1alpha1.NodeTainterConfig + configKey := types.NamespacedName{Name: GlobalTaintConfigName} // Namespace is empty for Cluster scope + if err := r.Get(ctx, configKey, &config); err != nil { + nodeUpdateErr := r.updateNodeTaintStatus(ctx, &node, nil, fmt.Sprintf("Failed to get NodeTainterConfig %s: %v", GlobalTaintConfigName, err)) + if nodeUpdateErr != nil { + log.Error(nodeUpdateErr, "Failed to update node status after config fetch error") + } + if apierrors.IsNotFound(err) { + log.Error(err, "Global NodeTainterConfig not found", "configName", GlobalTaintConfigName) + r.Recorder.Eventf(&node, corev1.EventTypeWarning, "ConfigMissing", "Required NodeTainterConfig '%s' not found", GlobalTaintConfigName) + return ctrl.Result{RequeueAfter: 1 * time.Minute}, nil + } + // Other error with getting CRD + log.Error(err, "Failed to get NodeTainterConfig", "configName", GlobalTaintConfigName) + return ctrl.Result{}, err // Repeat + } + + log = log.WithValues("node", node.Name, "config", config.Name, "configGeneration", config.Generation) + // log.Info("Processing node using rules from NodeTainterConfig", "configName", config.Name) + + // 3. Parse rules from CRD (this is just a field access now) + // Use config.Spec.Rules straight + specLabelRules := config.Spec.LabelRules + if specLabelRules == nil { + specLabelRules = make(map[string]string) // If the field is empty + log.Info("Taint rules map is nil in config, assuming no rules", "configName", config.Name) + } + + // Converting map[string]string into map[string]corev1.Taint for convenience + parsedRules, parseErrs := parseLabelRulesFromSpec(specLabelRules) + if len(parseErrs) > 0 { + errMsg := fmt.Sprintf("Invalid rules found in NodeTainterConfig %s: %v", config.Name, parseErrs) + log.Error(fmt.Errorf(errMsg), "Rule parsing failed") + r.Recorder.Eventf(&config, corev1.EventTypeWarning, "InvalidConfig", errMsg) + _ = r.updateCRDStatus(ctx, &config, metav1.ConditionFalse, ConditionReasonConfigParsingError, errMsg) + _ = r.updateNodeTaintStatus(ctx, &node, nil, errMsg) + return ctrl.Result{}, nil + } + + //log.V(1).Info("Parsed taint rules from CRD", "rulesCount", len(parsedRules)) + + // 4. Define desired Taints for THAT Node + desiredTaints := calculateDesiredTaints(node.Labels, parsedRules) + log.V(1).Info("Calculated desired taints", "taints", desiredTaints) + + // 5. Get current Taints and compare/update + originalTaints := node.Spec.Taints + // Passing parsedRules for the func to know which Taints are controlled by this operator + needsUpdate, newTaints := mergeAndCheckTaints(originalTaints, desiredTaints, parsedRules) // Function + + var updateErr error + + if needsUpdate { + log.Info("Taints require update", "old", originalTaints, "new", newTaints) + nodeCopy := node.DeepCopy() + nodeCopy.Spec.Taints = newTaints + // Using Patch for atomicity and less conflict risk + if err := r.Patch(ctx, nodeCopy, client.MergeFrom(&node)); err != nil { + log.Error(err, "Failed to patch Node taints") + r.Recorder.Eventf(&node, corev1.EventTypeWarning, "UpdateFailed", "Failed to patch taints: %v", err) + updateErr = err + } else { + log.Info("Successfully patched Node taints") + r.Recorder.Eventf(&node, corev1.EventTypeNormal, "TaintsUpdated", "Taints updated based on rules from %s", config.Name) + updateErr = r.updateNodeTaintStatus(ctx, &node, newTaints, "") + } + } else { + log.Info("Node taints are already up-to-date") + updateErr = r.updateNodeTaintStatus(ctx, &node, originalTaints, "") + } + + if updateErr != nil { + return ctrl.Result{}, updateErr + } return ctrl.Result{}, nil + +} + +// Map function for NodeTainterConfig: Trigger reconcile for ALL nodes when the specific config changes +func (r *NodeTainterConfigReconciler) mapConfigToNodes(ctx context.Context, obj client.Object) []reconcile.Request { + config, ok := obj.(*configv1alpha1.NodeTainterConfig) + log := log.FromContext(ctx) + + // Interested only in global config + if !ok || config.Name != GlobalTaintConfigName { + // log.V(1).Info("Ignoring unrelated config change", "object", client.ObjectKeyFromObject(obj)) + return nil + } + + log.Info("Global NodeTainterConfig changed, queuing reconciliation for all nodes", "configName", config.Name) + + var nodeList corev1.NodeList + if err := r.List(ctx, &nodeList); err != nil { + log.Error(err, "Failed to list nodes for config change") + return nil + } + + requests := make([]reconcile.Request, len(nodeList.Items)) + for i, node := range nodeList.Items { + requests[i] = reconcile.Request{NamespacedName: types.NamespacedName{Name: node.Name}} + } + log.Info("Queued node reconcile requests", "count", len(requests)) + return requests } // SetupWithManager sets up the controller with the Manager. func (r *NodeTainterConfigReconciler) SetupWithManager(mgr ctrl.Manager) error { + r.Recorder = mgr.GetEventRecorderFor("nodetainter-controller") + return ctrl.NewControllerManagedBy(mgr). - For(&operatorv1alpha1.NodeTainterConfig{}). - Named("nodetainterconfig"). + // Main resource we are watching is Node + For(&corev1.Node{}). + // Watching for changes of our global CRD config. + // If it changes, queuing ALL Nodes. + Watches( + &configv1alpha1.NodeTainterConfig{}, + handler.EnqueueRequestsFromMapFunc(r.mapConfigToNodes), + // For(&operatorv1alpha1.NodeTainterConfig{}). + // Named("nodetainterconfig"). + ). Complete(r) } + +// --- UTILS FUNCTIONS --- + +// TaintToString конвертирует тейнт в строку для статуса/логов +func TaintToString(taint *corev1.Taint) string { + return fmt.Sprintf("%s=%s:%s", taint.Key, taint.Value, taint.Effect) +} + +// TaintsToString конвертирует слайс тейнтов в слайс строк +func TaintsToStrings(taints []corev1.Taint) []string { + res := make([]string, len(taints)) + for i, t := range taints { + res[i] = TaintToString(&t) + } + sort.Strings(res) // Сортируем для консистентности статуса + return res +} + +// parseLabelRulesFromSpec парсит правила из CRD Spec +// Возвращает map["labelKey=labelValue"]corev1.Taint и ошибки +func parseLabelRulesFromSpec(specLabelRules map[string]string) (map[string]corev1.Taint, []error) { + parsed := make(map[string]corev1.Taint) + var errs []error + + for ruleSelector, taintString := range specLabelRules { + ruleSelector = strings.TrimSpace(ruleSelector) + taintString = strings.TrimSpace(taintString) + + if ruleSelector == "" || taintString == "" { + errs = append(errs, fmt.Errorf("rule has empty selector ('%s') or taint string ('%s')", ruleSelector, taintString)) + continue + } + + // Парсим селектор "key=value" + partsSelector := strings.SplitN(ruleSelector, "=", 2) + if len(partsSelector) != 2 { // Должен быть знак = + errs = append(errs, fmt.Errorf("invalid rule selector format '%s': missing '='", ruleSelector)) + continue + } + labelKey := strings.TrimSpace(partsSelector[0]) + labelValue := strings.TrimSpace(partsSelector[1]) // Может быть пустым! + + if labelKey == "" { + errs = append(errs, fmt.Errorf("invalid rule selector format '%s': empty label key", ruleSelector)) + continue + } + // Валидируем ключ лейбла + if msgs := apivalidation.IsQualifiedName(labelKey); len(msgs) > 0 { + errs = append(errs, fmt.Errorf("invalid label key in selector '%s': %v", ruleSelector, msgs)) + continue + } + // Валидируем значение лейбла (если не пустое) + if labelValue != "" { + if msgs := apivalidation.IsValidLabelValue(labelValue); len(msgs) > 0 { + errs = append(errs, fmt.Errorf("invalid label value in selector '%s': %v", ruleSelector, msgs)) + continue + } + } + + // Парсим строку тейнта "key=value:Effect" (используем улучшенную логику из прошлого ответа) + partsEffect := strings.SplitN(taintString, ":", 2) + if len(partsEffect) != 2 || partsEffect[1] == "" { + errs = append(errs, fmt.Errorf("invalid taint format for rule '%s': '%s' (missing effect)", ruleSelector, taintString)) + continue + } + keyAndValue := partsEffect[0] + effectString := partsEffect[1] + partsKeyValue := strings.SplitN(keyAndValue, "=", 2) + if len(partsKeyValue) != 2 || partsKeyValue[0] == "" || partsKeyValue[1] == "" { + errs = append(errs, fmt.Errorf("invalid taint format for rule '%s': '%s' (invalid key/value)", ruleSelector, taintString)) + continue + } + taintKey := partsKeyValue[0] + taintValue := partsKeyValue[1] + if msgs := apivalidation.IsQualifiedName(taintKey); len(msgs) > 0 { + if simpleMsgs := apivalidation.IsDNS1123Label(taintKey); len(simpleMsgs) > 0 { + errs = append(errs, fmt.Errorf("invalid taint key for rule '%s': '%s' in '%s' (%v / %v)", ruleSelector, taintKey, taintString, msgs, simpleMsgs)) + continue + } + } + var effect corev1.TaintEffect + switch effectString { + case string(corev1.TaintEffectNoSchedule): + effect = corev1.TaintEffectNoSchedule + case string(corev1.TaintEffectPreferNoSchedule): + effect = corev1.TaintEffectPreferNoSchedule + case string(corev1.TaintEffectNoExecute): + effect = corev1.TaintEffectNoExecute + default: + errs = append(errs, fmt.Errorf("invalid taint effect for rule '%s': '%s' in '%s'", ruleSelector, effectString, taintString)) + continue + } + + // Все ок + taint := corev1.Taint{Key: taintKey, Value: taintValue, Effect: effect} + parsed[ruleSelector] = taint // Ключ = "labelKey=labelValue" + } + return parsed, errs +} + +// calculateDesiredTaints определяет тейнты на основе лейблов ноды и правил +func calculateDesiredTaints(nodeLabels map[string]string, parsedLabelRules map[string]corev1.Taint) []corev1.Taint { + desired := []corev1.Taint{} + foundTaints := make(map[string]bool) // Для уникальности Key:Effect + + if nodeLabels == nil { + nodeLabels = make(map[string]string) // Безопасность + } + + for ruleSelector, taint := range parsedLabelRules { + parts := strings.SplitN(ruleSelector, "=", 2) + if len(parts) != 2 { + continue + } // Уже должно быть отва лидировано + ruleKey := parts[0] + ruleValue := parts[1] // Может быть пустой + + actualValue, exists := nodeLabels[ruleKey] + + // Логика сравнения: + // 1. Ключ лейбла должен существовать на ноде. + // 2. Значение лейбла на ноде должно ТОЧНО совпадать со значением в правиле (включая пустую строку). + if exists && actualValue == ruleValue { + taintKeyEffect := fmt.Sprintf("%s:%s", taint.Key, taint.Effect) + if !foundTaints[taintKeyEffect] { + desired = append(desired, taint) + foundTaints[taintKeyEffect] = true + } + } + } + return desired +} + +// TaintKeyEffect создает уникальную строку для тейнта (Key:Effect) +func TaintKeyEffect(taint *corev1.Taint) string { + return fmt.Sprintf("%s:%s", taint.Key, taint.Effect) +} + +// mergeAndCheckTaints сравнивает текущие и желаемые тейнты, управляемые оператором. +// parsedLabelRules: map["labelKey=labelValue"]corev1.Taint - содержит ВСЕ валидные правила из конфига. +func mergeAndCheckTaints(currentTaints []corev1.Taint, desiredTaints []corev1.Taint, parsedLabelRules map[string]corev1.Taint) (bool, []corev1.Taint) { + // 1. Определяем, какие типы тейнтов (Key:Effect) управляются нами по всем правилам + managedTaintTypes := sets.NewString() + for _, ruleTaint := range parsedLabelRules { // Итерируем по значениям (Taint объектам) + managedTaintTypes.Insert(TaintKeyEffect(&ruleTaint)) + } + + // 2. Разделяем текущие тейнты на управляемые и неуправляемые + currentManagedTaints := make(map[string]corev1.Taint) // key:Effect -> Taint + unmanagedTaints := []corev1.Taint{} + for _, taint := range currentTaints { + ke := TaintKeyEffect(&taint) + if managedTaintTypes.Has(ke) { + currentManagedTaints[ke] = taint + } else { + unmanagedTaints = append(unmanagedTaints, taint) + } + } + + // 3. Создаем map желаемых тейнтов для быстрого поиска + desiredTaintsMap := make(map[string]corev1.Taint) // key:Effect -> Taint + for _, taint := range desiredTaints { + // Проверка, что желаемый тейнт действительно определен в правилах (на всякий случай) + ke := TaintKeyEffect(&taint) + if managedTaintTypes.Has(ke) { + desiredTaintsMap[ke] = taint + } + } + + // 4. Сравниваем управляемые текущие и желаемые + needsUpdate := false + if len(currentManagedTaints) != len(desiredTaintsMap) { + needsUpdate = true + } else { + for ke, desiredTaint := range desiredTaintsMap { + currentTaint, exists := currentManagedTaints[ke] + if !exists || currentTaint.Value != desiredTaint.Value { // Сравниваем и значения + needsUpdate = true + break + } + } + } + + // 5. Собираем новый список тейнтов, если нужно обновление + if needsUpdate { + newTaints := make([]corev1.Taint, 0, len(unmanagedTaints)+len(desiredTaintsMap)) + newTaints = append(newTaints, unmanagedTaints...) + desiredKeys := make([]string, 0, len(desiredTaintsMap)) + for ke := range desiredTaintsMap { + desiredKeys = append(desiredKeys, ke) + } + sort.Strings(desiredKeys) // Сортируем для консистентности + for _, ke := range desiredKeys { + newTaints = append(newTaints, desiredTaintsMap[ke]) + } + return true, newTaints + } + + return false, currentTaints +} + +// updateCRDStatus обновляет статус ресурса NodeTainterConfig +// TODO: Вызывать эту функцию при изменении CRD или при старте/ошибках контроллера. +func (r *NodeTainterConfigReconciler) updateCRDStatus(ctx context.Context, config *configv1alpha1.NodeTainterConfig, status metav1.ConditionStatus, reason, message string) error { + log := log.FromContext(ctx).WithValues("config", config.Name) + configCopy := config.DeepCopy() + + // Устанавливаем observedGeneration + configCopy.Status.ObservedGeneration = config.Generation + + // Обновляем Condition + newCondition := metav1.Condition{ + Type: ConditionTypeReady, + Status: status, + Reason: reason, + Message: message, + LastTransitionTime: metav1.Now(), + } + // TODO: Использовать 'meta.SetStatusCondition' из 'k8s.io/apimachinery/pkg/api/meta' для правильного обновления conditions + // Примерно так: + // meta.SetStatusCondition(&configCopy.Status.Conditions, newCondition) + // Пока просто заменяем для простоты + configCopy.Status.Conditions = []metav1.Condition{newCondition} + + // TODO: Обновить NodeTaintStatus на основе данных со всех нод (может быть сложно и затратно) + // configCopy.Status.NodeTaintStatus = ... + + // Используем Patch для обновления статуса + if err := r.Status().Patch(ctx, configCopy, client.MergeFrom(config)); err != nil { + log.Error(err, "Failed to patch NodeTainterConfig status") + return err + } + log.Info("NodeTainterConfig status updated", "reason", reason, "message", message) + return nil +} + +// updateNodeTaintStatus обновляет информацию о тейнтах для конкретной ноды в статусе CRD +// TODO: Эта функция в текущем виде будет вызывать конфликты, т.к. каждый Reconcile ноды +// будет пытаться перезаписать весь Status.NodeTaintStatus. +// Правильный подход: читать текущий статус CRD, обновлять только запись для текущей ноды, патчить. +// Это усложняет код, пока оставим так для демонстрации, но ЭТО НУЖНО ИСПРАВИТЬ для production. +func (r *NodeTainterConfigReconciler) updateNodeTaintStatus(ctx context.Context, node *corev1.Node, appliedTaints []corev1.Taint, errorMsg string) error { + log := log.FromContext(ctx).WithValues("node", node.Name) + + var config configv1alpha1.NodeTainterConfig + configKey := types.NamespacedName{Name: GlobalTaintConfigName} + // Получаем CRD еще раз, чтобы обновить его статус + if err := r.Get(ctx, configKey, &config); err != nil { + log.Error(err, "Failed to get NodeTainterConfig for status update", "configName", GlobalTaintConfigName) + // Не можем обновить статус, если не получили CRD + return fmt.Errorf("failed to get config %s for status update: %w", GlobalTaintConfigName, err) + } + + configCopy := config.DeepCopy() + + // Ищем статус для текущей ноды + found := false + nodeStatus := configv1alpha1.NodeTaintInfo{ + NodeName: node.Name, + AppliedTaints: TaintsToStrings(appliedTaints), + Error: errorMsg, + } + + for i := range configCopy.Status.NodeTaintStatus { + if configCopy.Status.NodeTaintStatus[i].NodeName == node.Name { + configCopy.Status.NodeTaintStatus[i] = nodeStatus + found = true + break + } + } + if !found { + configCopy.Status.NodeTaintStatus = append(configCopy.Status.NodeTaintStatus, nodeStatus) + } + + // Сортируем для консистентности + sort.Slice(configCopy.Status.NodeTaintStatus, func(i, j int) bool { + return configCopy.Status.NodeTaintStatus[i].NodeName < configCopy.Status.NodeTaintStatus[j].NodeName + }) + + // Патчим статус + if err := r.Status().Patch(ctx, configCopy, client.MergeFrom(&config)); err != nil { + log.Error(err, "Failed to patch NodeTainterConfig status with node info", "node", node.Name) + return err + } + log.V(1).Info("Updated node status in CRD", "applied", nodeStatus.AppliedTaints, "error", nodeStatus.Error) + + return nil +}