diff --git a/CHANGELOG.md b/CHANGELOG.md index 1ee924d1bbc..91ab9818678 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -37,7 +37,12 @@ ### 💡 Enhancements 💡 - Update OTLP to v0.17.0 (#5335) - - Add optional min/max fields to histograms (#5399) +- Add optional min/max fields to histograms (#5399) +- User-defined Resource attributes can be specified under `service.telemetry.resource` + configuration key and will be included as metric lables for own telemetry. + If `service.instance.id` is not specified it will be auto-generated. Previously + `service.instance.id` was always auto-generated, so the default of the new + behavior matches the old behavior. (#5402) ## v0.51.0 Beta diff --git a/config/moved_service.go b/config/moved_service.go index 83bc3855c96..b4b0b9cbf88 100644 --- a/config/moved_service.go +++ b/config/moved_service.go @@ -38,6 +38,13 @@ type Service struct { type ServiceTelemetry struct { Logs ServiceTelemetryLogs `mapstructure:"logs"` Metrics ServiceTelemetryMetrics `mapstructure:"metrics"` + + // Resource specifies user-defined attributes to include with all emitted telemetry. + // For metrics the attributes become Prometheus labels. + // Note that some attributes are added automatically (e.g. service.version) even + // if they are not specified here. In order to suppress such attributes the + // attribute must be specified in this map with null YAML value (nil string pointer). + Resource map[string]*string `mapstructure:"resource"` } // ServiceTelemetryLogs defines the configurable settings for service telemetry logs. diff --git a/service/collector_test.go b/service/collector_test.go index 4d0899835ed..e79ccd80d5d 100644 --- a/service/collector_test.go +++ b/service/collector_test.go @@ -35,9 +35,9 @@ import ( "go.opentelemetry.io/collector/component" "go.opentelemetry.io/collector/config" - "go.opentelemetry.io/collector/config/mapconverter/overwritepropertiesmapconverter" "go.opentelemetry.io/collector/internal/testcomponents" "go.opentelemetry.io/collector/internal/testutil" + "go.opentelemetry.io/collector/internal/version" "go.opentelemetry.io/collector/service/featuregate" ) @@ -178,7 +178,111 @@ func TestCollectorFailedShutdown(t *testing.T) { assert.Equal(t, Closed, col.GetState()) } -func testCollectorStartHelper(t *testing.T, telemetry collectorTelemetryExporter) { +// mapConverter applies extraMap of config settings. Useful for overriding the config +// for testing purposes. Keys must use "::" delimiter between levels. +type mapConverter struct { + extraMap map[string]*string +} + +func (m mapConverter) Convert(ctx context.Context, cfgMap *config.Map) error { + for k, v := range m.extraMap { + cfgMap.Set(k, v) + } + return nil +} + +type labelState int + +const ( + labelNotPresent labelState = iota + labelSpecificValue + labelAnyValue +) + +type labelValue struct { + label string + state labelState +} + +type ownMetricsTestCase struct { + name string + userDefinedResource map[string]*string + expectedLabels map[string]labelValue +} + +var testResourceAttrValue = "resource_attr_test_value" +var testInstanceID = "test_instance_id" +var testServiceVersion = "2022-05-20" + +var ownMetricsTestCases = []ownMetricsTestCase{ + { + name: "no resource", + userDefinedResource: nil, + // All labels added to all collector metrics by default are listed below. + // These labels are hard coded here in order to avoid inadvertent changes: + // at this point changing labels should be treated as a breaking changing + // and requires a good justification. The reason is that changes to metric + // names or labels can break alerting, dashboards, etc that are used to + // monitor the Collector in production deployments. + expectedLabels: map[string]labelValue{ + "service_instance_id": {state: labelAnyValue}, + "service_version": {label: version.Version, state: labelSpecificValue}, + }, + }, + { + name: "resource with custom attr", + userDefinedResource: map[string]*string{ + "custom_resource_attr": &testResourceAttrValue, + }, + expectedLabels: map[string]labelValue{ + "service_instance_id": {state: labelAnyValue}, + "service_version": {label: version.Version, state: labelSpecificValue}, + "custom_resource_attr": {label: "resource_attr_test_value", state: labelSpecificValue}, + }, + }, + { + name: "override service.instance.id", + userDefinedResource: map[string]*string{ + "service.instance.id": &testInstanceID, + }, + expectedLabels: map[string]labelValue{ + "service_instance_id": {label: "test_instance_id", state: labelSpecificValue}, + "service_version": {label: version.Version, state: labelSpecificValue}, + }, + }, + { + name: "suppress service.instance.id", + userDefinedResource: map[string]*string{ + "service.instance.id": nil, // nil value in config is used to suppress attributes. + }, + expectedLabels: map[string]labelValue{ + "service_instance_id": {state: labelNotPresent}, + "service_version": {label: version.Version, state: labelSpecificValue}, + }, + }, + { + name: "override service.version", + userDefinedResource: map[string]*string{ + "service.version": &testServiceVersion, + }, + expectedLabels: map[string]labelValue{ + "service_instance_id": {state: labelAnyValue}, + "service_version": {label: "2022-05-20", state: labelSpecificValue}, + }, + }, + { + name: "suppress service.version", + userDefinedResource: map[string]*string{ + "service.version": nil, // nil value in config is used to suppress attributes. + }, + expectedLabels: map[string]labelValue{ + "service_instance_id": {state: labelAnyValue}, + "service_version": {state: labelNotPresent}, + }, + }, +} + +func testCollectorStartHelper(t *testing.T, telemetry collectorTelemetryExporter, tc ownMetricsTestCase) { factories, err := testcomponents.NewDefaultFactories() require.NoError(t, err) var once sync.Once @@ -194,10 +298,19 @@ func testCollectorStartHelper(t *testing.T, telemetry collectorTelemetryExporter cfgSet := newDefaultConfigProviderSettings([]string{ filepath.Join("testdata", "otelcol-config.yaml"), }) + + // Prepare config properties to be merged with the main config. + extraCfgAsProps := map[string]*string{ + // Set the metrics address to expose own metrics on. + "service::telemetry::metrics::address": &metricsAddr, + } + // Also include resource attributes under the service.telemetry.resource key. + for k, v := range tc.userDefinedResource { + extraCfgAsProps["service::telemetry::resource::"+k] = v + } + cfgSet.MapConverters = append([]config.MapConverter{ - overwritepropertiesmapconverter.New( - []string{"service.telemetry.metrics.address=" + metricsAddr}, - )}, + mapConverter{extraCfgAsProps}}, cfgSet.MapConverters..., ) cfgProvider, err := NewConfigProvider(cfgSet) @@ -220,16 +333,7 @@ func testCollectorStartHelper(t *testing.T, telemetry collectorTelemetryExporter assert.Equal(t, col.telemetry.Logger, col.GetLogger()) assert.True(t, loggingHookCalled) - // All labels added to all collector metrics by default are listed below. - // These labels are hard coded here in order to avoid inadvertent changes: - // at this point changing labels should be treated as a breaking changing - // and requires a good justification. The reason is that changes to metric - // names or labels can break alerting, dashboards, etc that are used to - // monitor the Collector in production deployments. - mandatoryLabels := []string{ - "service_instance_id", - } - assertMetrics(t, metricsAddr, mandatoryLabels) + assertMetrics(t, metricsAddr, tc.expectedLabels) assertZPages(t) col.signalsChannel <- syscall.SIGTERM @@ -239,15 +343,23 @@ func testCollectorStartHelper(t *testing.T, telemetry collectorTelemetryExporter } func TestCollectorStartWithOpenCensusMetrics(t *testing.T) { - testCollectorStartHelper(t, newColTelemetry(featuregate.NewRegistry())) + for _, tc := range ownMetricsTestCases { + t.Run(tc.name, func(t *testing.T) { + testCollectorStartHelper(t, newColTelemetry(featuregate.NewRegistry()), tc) + }) + } } func TestCollectorStartWithOpenTelemetryMetrics(t *testing.T) { - colTel := newColTelemetry(featuregate.NewRegistry()) - colTel.registry.Apply(map[string]bool{ - useOtelForInternalMetricsfeatureGateID: true, - }) - testCollectorStartHelper(t, colTel) + for _, tc := range ownMetricsTestCases { + t.Run(tc.name, func(t *testing.T) { + colTel := newColTelemetry(featuregate.NewRegistry()) + colTel.registry.Apply(map[string]bool{ + useOtelForInternalMetricsfeatureGateID: true, + }) + testCollectorStartHelper(t, colTel, tc) + }) + } } func TestCollectorShutdownBeforeRun(t *testing.T) { @@ -316,7 +428,7 @@ func (tel *mockColTelemetry) shutdown() error { return errors.New("err1") } -func assertMetrics(t *testing.T, metricsAddr string, mandatoryLabels []string) { +func assertMetrics(t *testing.T, metricsAddr string, expectedLabels map[string]labelValue) { client := &http.Client{} resp, err := client.Get("https://" + metricsAddr + "/metrics") require.NoError(t, err) @@ -341,14 +453,21 @@ func assertMetrics(t *testing.T, metricsAddr string, mandatoryLabels []string) { metricName[:len(prefix)+1]+"...") for _, metric := range metricFamily.Metric { - var labelNames []string + labelMap := map[string]string{} for _, labelPair := range metric.Label { - labelNames = append(labelNames, *labelPair.Name) + labelMap[*labelPair.Name] = *labelPair.Value } - for _, mandatoryLabel := range mandatoryLabels { - // require is used here so test fails with a single message. - require.Contains(t, labelNames, mandatoryLabel, "mandatory label %q not present", mandatoryLabel) + for k, v := range expectedLabels { + switch v.state { + case labelNotPresent: + _, present := labelMap[k] + assert.False(t, present, "label %q must not be present", k) + case labelSpecificValue: + require.Equal(t, v.label, labelMap[k], "mandatory label %q value mismatch", k) + case labelAnyValue: + assert.NotEmpty(t, labelMap[k], "mandatory label %q not present", k) + } } } } diff --git a/service/telemetry.go b/service/telemetry.go index fb7cc6173af..96c6c3eb7f1 100644 --- a/service/telemetry.go +++ b/service/telemetry.go @@ -94,6 +94,7 @@ func (tel *colTelemetry) init(col *Collector) error { func (tel *colTelemetry) initOnce(col *Collector) error { logger := col.telemetry.Logger cfg := col.service.config.Telemetry + resource := cfg.Resource level := cfg.Metrics.Level metricsAddr := cfg.Metrics.Address @@ -109,8 +110,29 @@ func (tel *colTelemetry) initOnce(col *Collector) error { logger.Info("Setting up own telemetry...") - instanceUUID, _ := uuid.NewRandom() - instanceID := instanceUUID.String() + // Construct telemetry attributes from resource attributes. + telAttrs := map[string]string{} + for k, v := range resource { + // nil value indicates that the attribute should not be included in the telemetry. + if v != nil { + telAttrs[k] = *v + } + } + + if _, ok := resource[semconv.AttributeServiceInstanceID]; !ok { + // AttributeServiceInstanceID is not specified in the config. Auto-generate one. + instanceUUID, _ := uuid.NewRandom() + instanceID := instanceUUID.String() + telAttrs[semconv.AttributeServiceInstanceID] = instanceID + } + + if AddCollectorVersionTag { + if _, ok := resource[semconv.AttributeServiceVersion]; !ok { + // AttributeServiceVersion is not specified in the config. Use the actual + // build version. + telAttrs[semconv.AttributeServiceVersion] = version.Version + } + } var pe http.Handler if tel.registry.IsEnabled(useOtelForInternalMetricsfeatureGateID) { @@ -120,7 +142,7 @@ func (tel *colTelemetry) initOnce(col *Collector) error { } pe = otelHandler } else { - ocHandler, err := tel.initOpenCensus(col, instanceID) + ocHandler, err := tel.initOpenCensus(col, telAttrs) if err != nil { return err } @@ -131,8 +153,7 @@ func (tel *colTelemetry) initOnce(col *Collector) error { "Serving Prometheus metrics", zap.String(zapKeyTelemetryAddress, metricsAddr), zap.String(zapKeyTelemetryLevel, level.String()), - zap.String(semconv.AttributeServiceInstanceID, instanceID), - zap.String(semconv.AttributeServiceVersion, version.Version), + zap.Any("Resource", resource), ) mux := http.NewServeMux() @@ -152,7 +173,7 @@ func (tel *colTelemetry) initOnce(col *Collector) error { return nil } -func (tel *colTelemetry) initOpenCensus(col *Collector, instanceID string) (http.Handler, error) { +func (tel *colTelemetry) initOpenCensus(col *Collector, telAttrs map[string]string) (http.Handler, error) { processMetricsViews, err := telemetry2.NewProcessMetricsViews(getBallastSize(col.service.host)) if err != nil { return nil, err @@ -178,10 +199,8 @@ func (tel *colTelemetry) initOpenCensus(col *Collector, instanceID string) (http opts.ConstLabels = make(map[string]string) - opts.ConstLabels[sanitizePrometheusKey(semconv.AttributeServiceInstanceID)] = instanceID - - if AddCollectorVersionTag { - opts.ConstLabels[sanitizePrometheusKey(semconv.AttributeServiceVersion)] = version.Version + for k, v := range telAttrs { + opts.ConstLabels[sanitizePrometheusKey(k)] = v } pe, err := prometheus.NewExporter(opts)