Skip to content

Commit

Permalink
CA: Debugging snapshot adding a new field for TemplateNode. This capt…
Browse files Browse the repository at this point in the history
…ures all the templates for nodegroups present
  • Loading branch information
jayantjain93 committed Jan 24, 2022
1 parent 5c741c8 commit 537e07f
Show file tree
Hide file tree
Showing 5 changed files with 76 additions and 41 deletions.
6 changes: 4 additions & 2 deletions cluster-autoscaler/core/static_autoscaler.go
Original file line number Diff line number Diff line change
Expand Up @@ -291,6 +291,8 @@ func (a *StaticAutoscaler) RunOnce(currentTime time.Time) errors.AutoscalerError
return autoscalerError.AddPrefix("failed to build node infos for node groups: ")
}

a.DebuggingSnapshotter.SetTemplateNodes(nodeInfosForGroups)

nodeInfosForGroups, err = a.processors.NodeInfoProcessor.Process(autoscalingContext, nodeInfosForGroups)
if err != nil {
klog.Errorf("Failed to process nodeInfos: %v", err)
Expand Down Expand Up @@ -416,9 +418,9 @@ func (a *StaticAutoscaler) RunOnce(currentTime time.Time) errors.AutoscalerError

l, err := a.ClusterSnapshot.NodeInfos().List()
if err != nil {
klog.Errorf("Unable to fetch NodeInfo List for Debugging Snapshot, %v", err)
klog.Errorf("Unable to fetch ClusterNode List for Debugging Snapshot, %v", err)
} else {
a.AutoscalingContext.DebuggingSnapshotter.SetNodeGroupInfo(l)
a.AutoscalingContext.DebuggingSnapshotter.SetClusterNodes(l)
}

unschedulablePodsToHelp, _ := a.processors.PodListProcessor.Process(a.AutoscalingContext, unschedulablePods)
Expand Down
73 changes: 47 additions & 26 deletions cluster-autoscaler/debuggingsnapshot/debugging_snapshot.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,20 +25,23 @@ import (
"k8s.io/kubernetes/pkg/scheduler/framework"
)

// NodeInfo captures a single entity of nodeInfo. i.e. Node specs and all the pods on that node.
type NodeInfo struct {
Node *v1.Node `json:"Node"`
Pods []*framework.PodInfo `json:"Pods"`
// ClusterNode captures a single entity of nodeInfo. i.e. Node specs and all the pods on that node.
type ClusterNode struct {
Node *v1.Node `json:"Node"`
Pods []*v1.Pod `json:"Pods"`
}

// DebuggingSnapshot is the interface used to define any debugging snapshot
// implementation, incl. any custom impl. to be used by DebuggingSnapshotter
type DebuggingSnapshot interface {
// SetNodeGroupInfo is a setter to capture all the NodeInfo
SetNodeGroupInfo([]*framework.NodeInfo)
// SetUnscheduledPodsCanBeScheduled is a setter for all pods which are unscheduled
// SetClusterNodes is a setter to capture all the ClusterNode
SetClusterNodes([]*framework.NodeInfo)
// SetUnscheduledPodsCanBeScheduled is a setter for all pods which are unscheduled,
// but they can be scheduled. i.e. pods which aren't triggering scale-up
SetUnscheduledPodsCanBeScheduled([]*v1.Pod)
// SetTemplateNodes is a setter for all the TemplateNodes present in the cluster
// incl. templates for which there are no nodes
SetTemplateNodes(map[string]*framework.NodeInfo)
// SetErrorMessage sets the error message in the snapshot
SetErrorMessage(string)
// SetEndTimestamp sets the timestamp in the snapshot,
Expand All @@ -58,11 +61,12 @@ type DebuggingSnapshot interface {
// Please add all new output fields in this struct. This is to make the data
// encoding/decoding easier as the single object going into the decoder
type DebuggingSnapshotImpl struct {
NodeInfo []*NodeInfo `json:"NodeList"`
UnscheduledPodsCanBeScheduled []*v1.Pod `json:"UnscheduledPodsCanBeScheduled"`
Error string `json:"Error,omitempty"`
StartTimestamp time.Time `json:"StartTimestamp"`
EndTimestamp time.Time `json:"EndTimestamp"`
NodeList []*ClusterNode `json:"NodeList"`
UnscheduledPodsCanBeScheduled []*v1.Pod `json:"UnscheduledPodsCanBeScheduled"`
Error string `json:"Error,omitempty"`
StartTimestamp time.Time `json:"StartTimestamp"`
EndTimestamp time.Time `json:"EndTimestamp"`
TemplateNodes map[string]*ClusterNode `json:"TemplateNodes"`
}

// SetUnscheduledPodsCanBeScheduled is the setter for UnscheduledPodsCanBeScheduled
Expand All @@ -73,31 +77,48 @@ func (s *DebuggingSnapshotImpl) SetUnscheduledPodsCanBeScheduled(podList []*v1.P

s.UnscheduledPodsCanBeScheduled = nil
for _, pod := range podList {
s.UnscheduledPodsCanBeScheduled = append(s.UnscheduledPodsCanBeScheduled, pod)
s.UnscheduledPodsCanBeScheduled = append(s.UnscheduledPodsCanBeScheduled, pod.DeepCopy())
}
}

// SetNodeGroupInfo is the setter for Node Group Info
// SetTemplateNodes is the setter for TemplateNodes
func (s *DebuggingSnapshotImpl) SetTemplateNodes(templates map[string]*framework.NodeInfo) {
if templates == nil {
return
}

s.TemplateNodes = make(map[string]*ClusterNode)
for ng, template := range templates {
s.TemplateNodes[ng] = GetClusterNodeCopy(template)
}
}

// GetClusterNodeCopy is an util func to copy template node and filter values
func GetClusterNodeCopy(template *framework.NodeInfo) *ClusterNode {
cNode := &ClusterNode{}
cNode.Node = template.Node().DeepCopy()
var pods []*v1.Pod
for _, p := range template.Pods {
pods = append(pods, p.Pod.DeepCopy())
}
cNode.Pods = pods
return cNode
}

// SetClusterNodes is the setter for Node Group Info
// All filtering/prettifying of data should be done here.
func (s *DebuggingSnapshotImpl) SetNodeGroupInfo(nodeInfos []*framework.NodeInfo) {
func (s *DebuggingSnapshotImpl) SetClusterNodes(nodeInfos []*framework.NodeInfo) {
if nodeInfos == nil {
return
}

var NodeInfoList []*NodeInfo
var NodeInfoList []*ClusterNode

for _, n := range nodeInfos {
nClone := n.Clone()
node := nClone.Node()

nodeInfo := &NodeInfo{
Node: node,
Pods: nClone.Pods,
}

NodeInfoList = append(NodeInfoList, nodeInfo)
clusterNode := GetClusterNodeCopy(n)
NodeInfoList = append(NodeInfoList, clusterNode)
}
s.NodeInfo = NodeInfoList
s.NodeList = NodeInfoList
}

// SetEndTimestamp is the setter for end timestamp
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ func TestBasicSetterWorkflow(t *testing.T) {
nodeGroups = append(nodeGroups, nodeInfo)
nodeGroups[0].SetNode(node)
timestamp := time.Now().In(time.UTC)
snapshot.SetNodeGroupInfo(nodeGroups)
snapshot.SetClusterNodes(nodeGroups)
snapshot.SetEndTimestamp(timestamp)
op, err := snapshot.GetOutputBytes()
assert.False(t, err)
Expand Down Expand Up @@ -87,9 +87,7 @@ func TestBasicSetterWorkflow(t *testing.T) {
assert.IsType(t, JSONList{}, pNodeInfo["Pods"])
assert.Greater(t, len(pNodeInfo["Pods"].([]interface{})), 0)
assert.IsType(t, JSONMap{}, pNodeInfo["Pods"].([]interface{})[0])
pPodInfo := pNodeInfo["Pods"].([]interface{})[0].(map[string]interface{})
assert.IsType(t, JSONMap{}, pPodInfo["Pod"])
pPod := pPodInfo["Pod"].(map[string]interface{})
pPod := pNodeInfo["Pods"].([]interface{})[0].(map[string]interface{})
assert.IsType(t, JSONMap{}, pPod["metadata"])
pPodMeta := pPod["metadata"].(map[string]interface{})
assert.IsType(t, String, pPodMeta["name"])
Expand Down
28 changes: 21 additions & 7 deletions cluster-autoscaler/debuggingsnapshot/debugging_snapshotter.go
Original file line number Diff line number Diff line change
Expand Up @@ -74,11 +74,14 @@ type DebuggingSnapshotter interface {
// StartDataCollection will check the State(s) and enable data
// collection for the loop if applicable
StartDataCollection()
// SetNodeGroupInfo is a setter to capture all the NodeInfo
SetNodeGroupInfo([]*framework.NodeInfo)
// SetClusterNodes is a setter to capture all the ClusterNode
SetClusterNodes([]*framework.NodeInfo)
// SetUnscheduledPodsCanBeScheduled is a setter for all pods which are unscheduled
// but they can be scheduled. i.e. pods which aren't triggering scale-up
SetUnscheduledPodsCanBeScheduled([]*v1.Pod)
// SetTemplateNodes is a setter for all the TemplateNodes present in the cluster
// incl. templates for which there are no nodes
SetTemplateNodes(map[string]*framework.NodeInfo)
// ResponseHandler is the http response handler to manage incoming requests
ResponseHandler(http.ResponseWriter, *http.Request)
// IsDataCollectionAllowed checks the internal State of the snapshotter
Expand Down Expand Up @@ -205,16 +208,16 @@ func (d *DebuggingSnapshotterImpl) Flush() {
}
}

// SetNodeGroupInfo is the setter for Node Group Info
// SetClusterNodes is the setter for Node Group Info
// All filtering/prettifying of data should be done here.
func (d *DebuggingSnapshotterImpl) SetNodeGroupInfo(nodeInfos []*framework.NodeInfo) {
func (d *DebuggingSnapshotterImpl) SetClusterNodes(nodeInfos []*framework.NodeInfo) {
if !d.IsDataCollectionAllowed() {
return
}
d.Mutex.Lock()
defer d.Mutex.Unlock()
klog.Infof("NodeGroupInfo is being set for the debugging snapshot")
d.DebuggingSnapshot.SetNodeGroupInfo(nodeInfos)
klog.V(4).Infof("NodeGroupInfo is being set for the debugging snapshot")
d.DebuggingSnapshot.SetClusterNodes(nodeInfos)
*d.State = DATA_COLLECTED
}

Expand All @@ -225,11 +228,22 @@ func (d *DebuggingSnapshotterImpl) SetUnscheduledPodsCanBeScheduled(podList []*v
}
d.Mutex.Lock()
defer d.Mutex.Unlock()
klog.Infof("UnscheduledPodsCanBeScheduled is being set for the debugging snapshot")
klog.V(4).Infof("UnscheduledPodsCanBeScheduled is being set for the debugging snapshot")
d.DebuggingSnapshot.SetUnscheduledPodsCanBeScheduled(podList)
*d.State = DATA_COLLECTED
}

// SetTemplateNodes is the setter for TemplateNodes
func (d *DebuggingSnapshotterImpl) SetTemplateNodes(templates map[string]*framework.NodeInfo) {
if !d.IsDataCollectionAllowed() {
return
}
klog.V(4).Infof("TemplateNodes is being set for the debugging snapshot")
d.Mutex.Lock()
defer d.Mutex.Unlock()
d.DebuggingSnapshot.SetTemplateNodes(templates)
}

// Cleanup clears the internal data sets of the cluster
func (d *DebuggingSnapshotterImpl) Cleanup() {
if d.CancelRequest != nil {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ func TestBasicSnapshotRequest(t *testing.T) {
for !snapshotter.IsDataCollectionAllowed() {
snapshotter.StartDataCollection()
}
snapshotter.SetNodeGroupInfo(nodeGroups)
snapshotter.SetClusterNodes(nodeGroups)
snapshotter.Flush()

wg.Wait()
Expand Down Expand Up @@ -152,7 +152,7 @@ func TestRejectParallelRequest(t *testing.T) {
snapshotter.ResponseHandler(w1, req1)
assert.Equal(t, http.StatusTooManyRequests, w1.Code)

snapshotter.SetNodeGroupInfo(nil)
snapshotter.SetClusterNodes(nil)
snapshotter.Flush()
wg.Wait()

Expand Down

0 comments on commit 537e07f

Please sign in to comment.