Skip to content

Commit

Permalink
Replace liveness and readiness with initialization node controller (#…
Browse files Browse the repository at this point in the history
…1186)

* Replace liveness and readiness with startup node controller

* Apply suggestions from code review

Co-authored-by: Ellis Tarn <[email protected]>

* add taints.HasKey

* rename startup to initialization

* Missed some comments during the renaming

Co-authored-by: Ellis Tarn <[email protected]>
  • Loading branch information
suket22 and ellistarn authored Jan 25, 2022
1 parent 21cb075 commit 2346ed5
Show file tree
Hide file tree
Showing 6 changed files with 92 additions and 141 deletions.
12 changes: 11 additions & 1 deletion pkg/apis/provisioning/v1alpha5/taints.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ func (ts Taints) WithPod(pod *v1.Pod) Taints {
return ts
}

// Has returns true if taints has a taint for the given key
// Has returns true if taints has a taint for the given key and effect
func (ts Taints) Has(taint v1.Taint) bool {
for _, t := range ts {
if t.Key == taint.Key && t.Effect == taint.Effect {
Expand All @@ -62,6 +62,16 @@ func (ts Taints) Has(taint v1.Taint) bool {
return false
}

// HasKey returns true if taints has a taint for the given key
func (ts Taints) HasKey(taintKey string) bool {
for _, t := range ts {
if t.Key == taintKey {
return true
}
}
return false
}

// Tolerates returns true if the pod tolerates all taints
func (ts Taints) Tolerates(pod *v1.Pod) (errs error) {
for i := range ts {
Expand Down
22 changes: 10 additions & 12 deletions pkg/controllers/node/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,22 +41,21 @@ const controllerName = "node"
// NewController constructs a controller instance
func NewController(kubeClient client.Client) *Controller {
return &Controller{
kubeClient: kubeClient,
liveness: &Liveness{kubeClient: kubeClient},
emptiness: &Emptiness{kubeClient: kubeClient},
expiration: &Expiration{kubeClient: kubeClient},
kubeClient: kubeClient,
initialization: &Initialization{kubeClient: kubeClient},
emptiness: &Emptiness{kubeClient: kubeClient},
expiration: &Expiration{kubeClient: kubeClient},
}
}

// Controller manages a set of properties on karpenter provisioned nodes, such as
// taints, labels, finalizers.
type Controller struct {
kubeClient client.Client
readiness *Readiness
liveness *Liveness
emptiness *Emptiness
expiration *Expiration
finalizer *Finalizer
kubeClient client.Client
initialization *Initialization
emptiness *Emptiness
expiration *Expiration
finalizer *Finalizer
}

// Reconcile executes a reallocation control loop for the resource
Expand Down Expand Up @@ -93,8 +92,7 @@ func (c *Controller) Reconcile(ctx context.Context, req reconcile.Request) (reco
for _, reconciler := range []interface {
Reconcile(context.Context, *v1alpha5.Provisioner, *v1.Node) (reconcile.Result, error)
}{
c.readiness,
c.liveness,
c.initialization,
c.expiration,
c.emptiness,
c.finalizer,
Expand Down
65 changes: 65 additions & 0 deletions pkg/controllers/node/initialization.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
/*
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package node

import (
"context"
"fmt"
"time"

"github.com/aws/karpenter/pkg/apis/provisioning/v1alpha5"
"github.com/aws/karpenter/pkg/utils/injectabletime"
"github.com/aws/karpenter/pkg/utils/node"
v1 "k8s.io/api/core/v1"
"knative.dev/pkg/logging"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/reconcile"
)

const InitializationTimeout = 15 * time.Minute

// Initialization is a subreconciler that
// 1. Removes the NotReady taint when the node is ready. This taint is originally applied on node creation.
// 2. Terminates nodes that don't transition to ready within InitializationTimeout
type Initialization struct {
kubeClient client.Client
}

// Reconcile reconciles the node
func (r *Initialization) Reconcile(ctx context.Context, _ *v1alpha5.Provisioner, n *v1.Node) (reconcile.Result, error) {
if !v1alpha5.Taints(n.Spec.Taints).HasKey(v1alpha5.NotReadyTaintKey) {
// At this point, the startup of the node is complete and no more evaluation is necessary.
return reconcile.Result{}, nil
}

if !node.IsReady(n) {
if age := injectabletime.Now().Sub(n.GetCreationTimestamp().Time); age < InitializationTimeout {
return reconcile.Result{RequeueAfter: InitializationTimeout - age}, nil
}
logging.FromContext(ctx).Infof("Triggering termination for node that failed to become ready")
if err := r.kubeClient.Delete(ctx, n); err != nil {
return reconcile.Result{}, fmt.Errorf("deleting node, %w", err)
}
return reconcile.Result{}, nil
}
taints := []v1.Taint{}
for _, taint := range n.Spec.Taints {
if taint.Key != v1alpha5.NotReadyTaintKey {
taints = append(taints, taint)
}
}
n.Spec.Taints = taints
return reconcile.Result{}, nil
}
57 changes: 0 additions & 57 deletions pkg/controllers/node/liveness.go

This file was deleted.

42 changes: 0 additions & 42 deletions pkg/controllers/node/readiness.go

This file was deleted.

35 changes: 6 additions & 29 deletions pkg/controllers/node/suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ var _ = Describe("Controller", func() {
})
})

Context("Readiness", func() {
Context("Initialization", func() {
It("should not remove the readiness taint if not ready", func() {
n := test.Node(test.NodeOptions{
ReadyStatus: v1.ConditionUnknown,
Expand Down Expand Up @@ -181,52 +181,29 @@ var _ = Describe("Controller", func() {
n = ExpectNodeExists(ctx, env.Client, n.Name)
Expect(n.Spec.Taints).To(Equal(n.Spec.Taints))
})
})
Context("Liveness", func() {
It("should delete nodes if NodeStatusNeverUpdated after 5 minutes", func() {
It("should delete nodes if node not ready even after Initialization timeout ", func() {
n := test.Node(test.NodeOptions{
ObjectMeta: metav1.ObjectMeta{
Finalizers: []string{v1alpha5.TerminationFinalizer},
Labels: map[string]string{v1alpha5.ProvisionerNameLabelKey: provisioner.Name},
},
ReadyStatus: v1.ConditionUnknown,
ReadyReason: "NodeStatusNeverUpdated",
})
ExpectCreated(ctx, env.Client, provisioner)
ExpectCreatedWithStatus(ctx, env.Client, n)

ExpectReconcileSucceeded(ctx, controller, client.ObjectKeyFromObject(provisioner))

// Expect node not be deleted
n = ExpectNodeExists(ctx, env.Client, n.Name)
Expect(n.DeletionTimestamp.IsZero()).To(BeTrue())

// Simulate time passing and a n failing to join
injectabletime.Now = func() time.Time { return time.Now().Add(node.LivenessTimeout) }
ExpectReconcileSucceeded(ctx, controller, client.ObjectKeyFromObject(n))

n = ExpectNodeExists(ctx, env.Client, n.Name)
Expect(n.DeletionTimestamp.IsZero()).To(BeFalse())
})
It("should delete nodes if we never hear anything after 5 minutes", func() {
n := test.Node(test.NodeOptions{
ObjectMeta: metav1.ObjectMeta{
Finalizers: []string{v1alpha5.TerminationFinalizer},
Labels: map[string]string{v1alpha5.ProvisionerNameLabelKey: provisioner.Name},
Taints: []v1.Taint{
{Key: v1alpha5.NotReadyTaintKey, Effect: v1.TaintEffectNoSchedule},
},
ReadyStatus: v1.ConditionUnknown,
ReadyReason: "",
})
ExpectCreated(ctx, env.Client, provisioner)
ExpectCreatedWithStatus(ctx, env.Client, n)

ExpectReconcileSucceeded(ctx, controller, client.ObjectKeyFromObject(provisioner))

// Expect node not be deleted
n = ExpectNodeExists(ctx, env.Client, n.Name)
Expect(n.DeletionTimestamp.IsZero()).To(BeTrue())

// Simulate time passing and a n failing to join
injectabletime.Now = func() time.Time { return time.Now().Add(node.LivenessTimeout) }
injectabletime.Now = func() time.Time { return time.Now().Add(node.InitializationTimeout) }
ExpectReconcileSucceeded(ctx, controller, client.ObjectKeyFromObject(n))

n = ExpectNodeExists(ctx, env.Client, n.Name)
Expand Down

0 comments on commit 2346ed5

Please sign in to comment.