Revert "Fixed the spec update issue in kubeflow#795 (kubeflow#804)" (k…

…ubeflow#805) This reverts commit 1687c06.
xtb10034 · Feb 12, 2020 · 3df7030 · 3df7030
1 parent 1687c06
commit 3df7030
Show file tree

Hide file tree

Showing 3 changed files with 32 additions and 34 deletions.
diff --git a/pkg/batchscheduler/interface/interface.go b/pkg/batchscheduler/interface/interface.go
@@ -24,5 +24,5 @@ type BatchScheduler interface {
 	Name() string
 
 	ShouldSchedule(app *v1beta2.SparkApplication) bool
-	DoBatchSchedulingOnSubmission(app *v1beta2.SparkApplication) error
+	DoBatchSchedulingOnSubmission(app *v1beta2.SparkApplication) (*v1beta2.SparkApplication, error)
 }
diff --git a/pkg/batchscheduler/volcano/volcano_scheduler.go b/pkg/batchscheduler/volcano/volcano_scheduler.go
@@ -18,19 +18,18 @@ package volcano
 
 import (
 	"fmt"
-
 	corev1 "k8s.io/api/core/v1"
 	apiextensionsclient "k8s.io/apiextensions-apiserver/pkg/client/clientset/clientset"
 	"k8s.io/apimachinery/pkg/api/errors"
 	"k8s.io/apimachinery/pkg/api/resource"
-	v1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/apimachinery/pkg/apis/meta/v1"
 	"k8s.io/client-go/rest"
 
 	"volcano.sh/volcano/pkg/apis/scheduling/v1alpha2"
 	volcanoclient "volcano.sh/volcano/pkg/client/clientset/versioned"
 
 	"github.com/GoogleCloudPlatform/spark-on-k8s-operator/pkg/apis/sparkoperator.k8s.io/v1beta2"
-	schedulerinterface "github.com/GoogleCloudPlatform/spark-on-k8s-operator/pkg/batchscheduler/interface"
+	"github.com/GoogleCloudPlatform/spark-on-k8s-operator/pkg/batchscheduler/interface"
 )
 
 const (
@@ -55,36 +54,39 @@ func (v *VolcanoBatchScheduler) ShouldSchedule(app *v1beta2.SparkApplication) bo
 	return true
 }
 
-func (v *VolcanoBatchScheduler) DoBatchSchedulingOnSubmission(app *v1beta2.SparkApplication) error {
-	if app.Spec.Executor.Annotations == nil {
-		app.Spec.Executor.Annotations = make(map[string]string)
+func (v *VolcanoBatchScheduler) DoBatchSchedulingOnSubmission(app *v1beta2.SparkApplication) (*v1beta2.SparkApplication, error) {
+	newApp := app.DeepCopy()
+	if newApp.Spec.Executor.Annotations == nil {
+		newApp.Spec.Executor.Annotations = make(map[string]string)
 	}
 
-	if app.Spec.Driver.Annotations == nil {
-		app.Spec.Driver.Annotations = make(map[string]string)
+	if newApp.Spec.Driver.Annotations == nil {
+		newApp.Spec.Driver.Annotations = make(map[string]string)
 	}
 
-	if app.Spec.Mode == v1beta2.ClientMode {
-		return v.syncPodGroupInClientMode(app)
-	} else if app.Spec.Mode == v1beta2.ClusterMode {
-		return v.syncPodGroupInClusterMode(app)
+	if newApp.Spec.Mode == v1beta2.ClientMode {
+		return v.syncPodGroupInClientMode(newApp)
+	} else if newApp.Spec.Mode == v1beta2.ClusterMode {
+		return v.syncPodGroupInClusterMode(newApp)
 	}
-	return nil
+	return newApp, nil
 }
 
-func (v *VolcanoBatchScheduler) syncPodGroupInClientMode(app *v1beta2.SparkApplication) error {
-	// We only care about the executor pods in client mode
-	if _, ok := app.Spec.Executor.Annotations[v1alpha2.GroupNameAnnotationKey]; !ok {
-		if err := v.syncPodGroup(app, 1, getExecutorRequestResource(app)); err == nil {
-			app.Spec.Executor.Annotations[v1alpha2.GroupNameAnnotationKey] = v.getAppPodGroupName(app)
+func (v *VolcanoBatchScheduler) syncPodGroupInClientMode(app *v1beta2.SparkApplication) (*v1beta2.SparkApplication, error) {
+	//We only care about the executor pods in client mode
+	newApp := app.DeepCopy()
+	if _, ok := newApp.Spec.Executor.Annotations[v1alpha2.GroupNameAnnotationKey]; !ok {
+		//Only executor resource will be considered.
+		if err := v.syncPodGroup(newApp, 1, getExecutorRequestResource(app)); err == nil {
+			newApp.Spec.Executor.Annotations[v1alpha2.GroupNameAnnotationKey] = v.getAppPodGroupName(newApp)
 		} else {
-			return err
+			return nil, err
 		}
 	}
-	return nil
+	return newApp, nil
 }
 
-func (v *VolcanoBatchScheduler) syncPodGroupInClusterMode(app *v1beta2.SparkApplication) error {
+func (v *VolcanoBatchScheduler) syncPodGroupInClusterMode(app *v1beta2.SparkApplication) (*v1beta2.SparkApplication, error) {
 	//We need both mark Driver and Executor when submitting
 	//NOTE: In cluster mode, the initial size of PodGroup is set to 1 in order to schedule driver pod first.
 	if _, ok := app.Spec.Driver.Annotations[v1alpha2.GroupNameAnnotationKey]; !ok {
@@ -94,10 +96,10 @@ func (v *VolcanoBatchScheduler) syncPodGroupInClusterMode(app *v1beta2.SparkAppl
 			app.Spec.Executor.Annotations[v1alpha2.GroupNameAnnotationKey] = v.getAppPodGroupName(app)
 			app.Spec.Driver.Annotations[v1alpha2.GroupNameAnnotationKey] = v.getAppPodGroupName(app)
 		} else {
-			return err
+			return nil, err
 		}
 	}
-	return nil
+	return app, nil
 }
 
 func (v *VolcanoBatchScheduler) getAppPodGroupName(app *v1beta2.SparkApplication) string {

diff --git a/pkg/controller/sparkapplication/controller.go b/pkg/controller/sparkapplication/controller.go
@@ -618,30 +618,26 @@ func hasRetryIntervalPassed(retryInterval *int64, attemptsDone int32, lastEventT
 
 // submitSparkApplication creates a new submission for the given SparkApplication and submits it using spark-submit.
 func (c *Controller) submitSparkApplication(app *v1beta2.SparkApplication) *v1beta2.SparkApplication {
-	// Make a copy to avoid sending potential updates made to the SparkApplication spec
-	// to the API server if Prometheus monitoring and/or batch scheduling is enabled.
-	// Changes made to the SparkApplication spec are only needed to construct the right
-	// submission command at runtime.
-	appCopy := app.DeepCopy()
 	if app.PrometheusMonitoringEnabled() {
-		if err := configPrometheusMonitoring(appCopy, c.kubeClient); err != nil {
+		if err := configPrometheusMonitoring(app, c.kubeClient); err != nil {
 			glog.Error(err)
 		}
 	}
 
 	// Use batch scheduler to perform scheduling task before submitting (before build command arguments).
-	if needScheduling, scheduler := c.shouldDoBatchScheduling(appCopy); needScheduling {
-		err := scheduler.DoBatchSchedulingOnSubmission(appCopy)
+	if needScheduling, scheduler := c.shouldDoBatchScheduling(app); needScheduling {
+		newApp, err := scheduler.DoBatchSchedulingOnSubmission(app)
 		if err != nil {
 			glog.Errorf("failed to process batch scheduler BeforeSubmitSparkApplication with error %v", err)
 			return app
 		}
+		//Spark submit will use the updated app to submit tasks(Spec will not be updated into API server)
+		app = newApp
 	}
 
 	driverPodName := getDriverPodName(app)
 	submissionID := uuid.New().String()
-	// Use the copy that may contain runtime changes to construct the submission command.
-	submissionCmdArgs, err := buildSubmissionCommandArgs(appCopy, driverPodName, submissionID)
+	submissionCmdArgs, err := buildSubmissionCommandArgs(app, driverPodName, submissionID)
 	if err != nil {
 		app.Status = v1beta2.SparkApplicationStatus{
 			AppState: v1beta2.ApplicationState{