Skip to content

Commit

Permalink
Merge pull request kubeflow#405 from lightbend/promfix
Browse files Browse the repository at this point in the history
Various Prometheus-related fixes and improvements
  • Loading branch information
liyinan926 authored Feb 14, 2019
2 parents 822726d + 11f80c4 commit ba9258d
Show file tree
Hide file tree
Showing 6 changed files with 33 additions and 32 deletions.
4 changes: 3 additions & 1 deletion examples/spark-pi-prometheus.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@ spec:
imagePullPolicy: Always
mainClass: org.apache.spark.examples.SparkPi
mainApplicationFile: "local:///opt/spark/examples/jars/spark-examples_2.11-2.4.0.jar"
arguments:
- "100000"
sparkVersion: "2.4.0"
restartPolicy:
type: Never
Expand All @@ -47,4 +49,4 @@ spec:
exposeExecutorMetrics: true
prometheus:
jmxExporterJar: "/prometheus/jmx_prometheus_javaagent-0.3.1.jar"
port: 8090
port: 8090
26 changes: 13 additions & 13 deletions pkg/config/constants.go
Original file line number Diff line number Diff line change
Expand Up @@ -177,84 +177,84 @@ const DefaultPrometheusConfiguration = `
lowercaseOutputName: true
attrNameSnakeCase: true
rules:
- pattern: metrics<name=(\S+)/(\S+)\.driver\.(BlockManager|DAGScheduler|jvm)\.(\S+)><>Value
- pattern: metrics<name=(\S+)-(\S+)\.driver\.(BlockManager|DAGScheduler|jvm)\.(\S+)><>Value
name: spark_driver_$3_$4
type: GAUGE
labels:
app_namespace: "$1"
app_name: "$2"
- pattern: metrics<name=(\S+)/(\S+)\.driver\.(\S+)\.StreamingMetrics\.streaming\.(\S+)><>Value
- pattern: metrics<name=(\S+)-(\S+)\.driver\.(\S+)\.StreamingMetrics\.streaming\.(\S+)><>Value
name: spark_streaming_driver_$4
type: GAUGE
labels:
app_namespace: "$1"
app_name: "$2"
- pattern: metrics<name=(\S+)/(\S+)\.driver\.spark\.streaming\.(\S+)\.(\S+)><>Value
- pattern: metrics<name=(\S+)-(\S+)\.driver\.spark\.streaming\.(\S+)\.(\S+)><>Value
name: spark_structured_streaming_driver_$4
type: GAUGE
labels:
app_namespace: "$1"
app_name: "$2"
query_name: "$3"
- pattern: metrics<name=(\S+)/(\S+)\.(\S+)\.executor\.(\S+)><>Value
- pattern: metrics<name=(\S+)-(\S+)\.(\S+)\.executor\.(\S+)><>Value
name: spark_executor_$4
type: GAUGE
labels:
app_namespace: "$1"
app_name: "$2"
executor_id: "$3"
- pattern: metrics<name=(\S+)/(\S+)\.driver\.DAGScheduler\.(.*)><>Count
- pattern: metrics<name=(\S+)-(\S+)\.driver\.DAGScheduler\.(.*)><>Count
name: spark_driver_DAGScheduler_$3_count
type: COUNTER
labels:
app_namespace: "$1"
app_name: "$2"
- pattern: metrics<name=(\S+)/(\S+)\.driver\.HiveExternalCatalog\.(.*)><>Count
- pattern: metrics<name=(\S+)-(\S+)\.driver\.HiveExternalCatalog\.(.*)><>Count
name: spark_driver_HiveExternalCatalog_$3_count
type: COUNTER
labels:
app_namespace: "$1"
app_name: "$2"
- pattern: metrics<name=(\S+)/(\S+)\.driver\.CodeGenerator\.(.*)><>Count
- pattern: metrics<name=(\S+)-(\S+)\.driver\.CodeGenerator\.(.*)><>Count
name: spark_driver_CodeGenerator_$3_count
type: COUNTER
labels:
app_namespace: "$1"
app_name: "$2"
- pattern: metrics<name=(\S+)/(\S+)\.driver\.LiveListenerBus\.(.*)><>Count
- pattern: metrics<name=(\S+)-(\S+)\.driver\.LiveListenerBus\.(.*)><>Count
name: spark_driver_LiveListenerBus_$3_count
type: COUNTER
labels:
app_namespace: "$1"
app_name: "$2"
- pattern: metrics<name=(\S+)/(\S+)\.driver\.LiveListenerBus\.(.*)><>Value
- pattern: metrics<name=(\S+)-(\S+)\.driver\.LiveListenerBus\.(.*)><>Value
name: spark_driver_LiveListenerBus_$3
type: GAUGE
labels:
app_namespace: "$1"
app_name: "$2"
- pattern: metrics<name=(\S+)/(\S+)\.(.*)\.executor\.(.*)><>Count
- pattern: metrics<name=(\S+)-(\S+)\.(.*)\.executor\.(.*)><>Count
name: spark_executor_$4_count
type: COUNTER
labels:
app_namespace: "$1"
app_name: "$2"
executor_id: "$3"
- pattern: metrics<name=(\S+)/(\S+)\.([0-9]+)\.(jvm|NettyBlockTransfer)\.(.*)><>Value
- pattern: metrics<name=(\S+)-(\S+)\.([0-9]+)\.(jvm|NettyBlockTransfer)\.(.*)><>Value
name: spark_executor_$4_$5
type: GAUGE
labels:
app_namespace: "$1"
app_name: "$2"
executor_id: "$3"
- pattern: metrics<name=(\S+)/(\S+)\.([0-9]+)\.HiveExternalCatalog\.(.*)><>Count
- pattern: metrics<name=(\S+)-(\S+)\.([0-9]+)\.HiveExternalCatalog\.(.*)><>Count
name: spark_executor_HiveExternalCatalog_$4_count
type: COUNTER
labels:
app_namespace: "$1"
app_name: "$2"
executor_id: "$3"
- pattern: metrics<name=(\S+)/(\S+)\.([0-9]+)\.CodeGenerator\.(.*)><>Count
- pattern: metrics<name=(\S+)-(\S+)\.([0-9]+)\.CodeGenerator\.(.*)><>Count
name: spark_executor_CodeGenerator_$4_count
type: COUNTER
labels:
Expand Down
3 changes: 1 addition & 2 deletions pkg/controller/sparkapplication/monitoring_config.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ package sparkapplication

import (
"fmt"

corev1 "k8s.io/api/core/v1"
apiErrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
Expand All @@ -32,7 +31,7 @@ import (
const (
metricsPropertiesKey = "metrics.properties"
prometheusConfigKey = "prometheus.yaml"
prometheusConfigMapNameSuffix = "prometheus-config"
prometheusConfigMapNameSuffix = "prom-conf"
prometheusConfigMapMountPath = "/etc/metrics/conf"
prometheusScrapeAnnotation = "prometheus.io/scrape"
prometheusPortAnnotation = "prometheus.io/port"
Expand Down
2 changes: 1 addition & 1 deletion pkg/webhook/patch.go
Original file line number Diff line number Diff line change
Expand Up @@ -205,7 +205,7 @@ func addGeneralConfigMaps(pod *corev1.Pod) []*patchOperation {
var patchOps []*patchOperation
namesToMountPaths := config.FindGeneralConfigMaps(pod.Annotations)
for name, mountPath := range namesToMountPaths {
volumeName := name + "-volume"
volumeName := name + "-vol"
patchOps = append(patchOps, addConfigMapVolume(pod, name, volumeName))
patchOps = append(patchOps, addConfigMapVolumeMount(pod, volumeName, mountPath))
}
Expand Down
2 changes: 1 addition & 1 deletion pkg/webhook/patch_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -223,7 +223,7 @@ func TestPatchSparkPod_ConfigMaps(t *testing.T) {
}

assert.Equal(t, 1, len(modifiedPod.Spec.Volumes))
assert.Equal(t, "foo-volume", modifiedPod.Spec.Volumes[0].Name)
assert.Equal(t, "foo-vol", modifiedPod.Spec.Volumes[0].Name)
assert.True(t, modifiedPod.Spec.Volumes[0].ConfigMap != nil)
assert.Equal(t, 1, len(modifiedPod.Spec.Containers[0].VolumeMounts))
assert.Equal(t, "/path/to/foo", modifiedPod.Spec.Containers[0].VolumeMounts[0].MountPath)
Expand Down
28 changes: 14 additions & 14 deletions spark-docker/conf/prometheus.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,22 +20,22 @@ attrNameSnakeCase: true
rules:
# These come from the application driver if it's a streaming application
# Example: default/streaming.driver.com.example.ClassName.StreamingMetrics.streaming.lastCompletedBatch_schedulingDelay
- pattern: metrics<name=(\S+)/(\S+)\.driver\.(\S+)\.StreamingMetrics\.streaming\.(\S+)><>Value
- pattern: metrics<name=(\S+)-(\S+)\.driver\.(\S+)\.StreamingMetrics\.streaming\.(\S+)><>Value
name: spark_streaming_driver_$4
labels:
app_namespace: "$1"
app_name: "$2"
# These come from the application driver if it's a structured streaming application
# Example: default/sstreaming.driver.spark.streaming.QueryName.inputRate-total
- pattern: metrics<name=(\S+)/(\S+)\.driver\.spark\.streaming\.(\S+)\.(\S+)><>Value
- pattern: metrics<name=(\S+)-(\S+)\.driver\.spark\.streaming\.(\S+)\.(\S+)><>Value
name: spark_structured_streaming_driver_$4
labels:
app_namespace: "$1"
app_name: "$2"
query_name: "$3"
# These come from the application executors
# Example: default/spark-pi.0.executor.threadpool.activeTasks
- pattern: metrics<name=(\S+)/(\S+)\.(\S+)\.executor\.(\S+)><>Value
- pattern: metrics<name=(\S+)-(\S+)\.(\S+)\.executor\.(\S+)><>Value
name: spark_executor_$4
type: GAUGE
labels:
Expand All @@ -44,52 +44,52 @@ rules:
executor_id: "$3"
# These come from the application driver
# Example: default/spark-pi.driver.DAGScheduler.stage.failedStages
- pattern: metrics<name=(\S+)/(\S+)\.driver\.(BlockManager|DAGScheduler|jvm)\.(\S+)><>Value
- pattern: metrics<name=(\S+)-(\S+)\.driver\.(BlockManager|DAGScheduler|jvm)\.(\S+)><>Value
name: spark_driver_$3_$4
type: GAUGE
labels:
app_namespace: "$1"
app_name: "$2"
# These come from the application driver
# Emulate timers for DAGScheduler like messagePRocessingTime
- pattern: metrics<name=(\S+)/(\S+)\.driver\.DAGScheduler\.(.*)><>Count
- pattern: metrics<name=(\S+)-(\S+)\.driver\.DAGScheduler\.(.*)><>Count
name: spark_driver_DAGScheduler_$3_count
type: COUNTER
labels:
app_namespace: "$1"
app_name: "$2"
# HiveExternalCatalog is of type counter
- pattern: metrics<name=(\S+)/(\S+)\.driver\.HiveExternalCatalog\.(.*)><>Count
- pattern: metrics<name=(\S+)-(\S+)\.driver\.HiveExternalCatalog\.(.*)><>Count
name: spark_driver_HiveExternalCatalog_$3_count
type: COUNTER
labels:
app_namespace: "$1"
app_name: "$2"
# These come from the application driver
# Emulate histograms for CodeGenerator
- pattern: metrics<name=(\S+)/(\S+)\.driver\.CodeGenerator\.(.*)><>Count
- pattern: metrics<name=(\S+)-(\S+)\.driver\.CodeGenerator\.(.*)><>Count
name: spark_driver_CodeGenerator_$3_count
type: COUNTER
labels:
app_namespace: "$1"
app_name: "$2"
# These come from the application driver
# Emulate timer (keep only count attribute) plus counters for LiveListenerBus
- pattern: metrics<name=(\S+)/(\S+)\.driver\.LiveListenerBus\.(.*)><>Count
- pattern: metrics<name=(\S+)-(\S+)\.driver\.LiveListenerBus\.(.*)><>Count
name: spark_driver_LiveListenerBus_$3_count
type: COUNTER
labels:
app_namespace: "$1"
app_name: "$2"
# Get Gauge type metrics for LiveListenerBus
- pattern: metrics<name=(\S+)/(\S+)\.driver\.LiveListenerBus\.(.*)><>Value
- pattern: metrics<name=(\S+)-(\S+)\.driver\.LiveListenerBus\.(.*)><>Value
name: spark_driver_LiveListenerBus_$3
type: GAUGE
labels:
app_namespace: "$1"
app_name: "$2"
# Executors counters
- pattern: metrics<name=(\S+)/(\S+)\.(.*)\.executor\.(.*)><>Count
- pattern: metrics<name=(\S+)-(\S+)\.(.*)\.executor\.(.*)><>Count
name: spark_executor_$4_count
type: COUNTER
labels:
Expand All @@ -98,14 +98,14 @@ rules:
executor_id: "$3"
# These come from the application executors
# Example: app-20160809000059-0000.0.jvm.threadpool.activeTasks
- pattern: metrics<name=(\S+)/(\S+)\.([0-9]+)\.(jvm|NettyBlockTransfer)\.(.*)><>Value
- pattern: metrics<name=(\S+)-(\S+)\.([0-9]+)\.(jvm|NettyBlockTransfer)\.(.*)><>Value
name: spark_executor_$4_$5
type: GAUGE
labels:
app_namespace: "$1"
app_name: "$2"
executor_id: "$3"
- pattern: metrics<name=(\S+)/(\S+)\.([0-9]+)\.HiveExternalCatalog\.(.*)><>Count
- pattern: metrics<name=(\S+)-(\S+)\.([0-9]+)\.HiveExternalCatalog\.(.*)><>Count
name: spark_executor_HiveExternalCatalog_$4_count
type: COUNTER
labels:
Expand All @@ -114,10 +114,10 @@ rules:
executor_id: "$3"
# These come from the application driver
# Emulate histograms for CodeGenerator
- pattern: metrics<name=(\S+)/(\S+)\.([0-9]+)\.CodeGenerator\.(.*)><>Count
- pattern: metrics<name=(\S+)-(\S+)\.([0-9]+)\.CodeGenerator\.(.*)><>Count
name: spark_executor_CodeGenerator_$4_count
type: COUNTER
labels:
app_namespace: "$1"
app_name: "$2"
executor_id: "$3"
executor_id: "$3"

0 comments on commit ba9258d

Please sign in to comment.