Skip to content

Commit

Permalink
feat: use the otel collector (#874)
Browse files Browse the repository at this point in the history
* configure otel collector locally

* use env variable to set jaeger endpoint

* add code to send traces to lightstep

* add deployment for the collector

* change deploy configuration to point to collector

* fix documentation and fix test

* fix tests

* don't convert trace if it is empty

* enable telemetry in all deployments

* increase retry delay due to collector delay

* set as 5s

* set as 10s

* rollback retry delay

* add amount of retrieved spans in trace

* record errors

* add trace and span ID to trace

* downgrade otlp to 0.12

* go mod tidy

* increase time for polling in integration instance

* move deploy script from github actions to script file

* rename otel folder to local-config

* replace localhost with otel-collector in collector sample configuration

* remove networks from compose file
  • Loading branch information
mathnogueira authored Jul 6, 2022
1 parent 437f068 commit 40896a8
Show file tree
Hide file tree
Showing 17 changed files with 269 additions and 82 deletions.
10 changes: 6 additions & 4 deletions .github/workflows/deploy-main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,8 @@ jobs:
--set image.tag=sha-$(git rev-parse --short $GITHUB_SHA) \
--set tracingBackend=jaeger \
--set jaegerConnectionConfig.endpoint="jaeger-query.tracetest.svc.cluster.local:16685" \
--set telemetry.jaeger.host="jaeger-agent.tracetest.svc.cluster.local" \
--set telemetry.enabled="true" \
--set telemetry.otelCollectorEndpoint="otel-collector.tracetest.svc.cluster.local:4317" \
--set service.annotations."cloud\.google\.com/backend-config"='\{\"default\":\"tracetest-beta\"\}' \
--set ingress.enabled=true \
--set 'ingress.hosts[0].host=beta.tracetest.io,ingress.hosts[0].paths[0].path=/,ingress.hosts[0].paths[0].pathType=Prefix' \
Expand Down Expand Up @@ -135,9 +136,10 @@ jobs:
--namespace tracetest-integration \
--set analytics.enabled=false \
--set image.tag=sha-$(git rev-parse --short $GITHUB_SHA) \
--set poolingConfig.maxWaitTimeForTrace=15s \
--set poolingConfig.retryDelay=3s \
--set poolingConfig.maxWaitTimeForTrace=60s \
--set poolingConfig.retryDelay=6s \
--set tracingBackend=jaeger \
--set jaegerConnectionConfig.endpoint="jaeger-query.tracetest.svc.cluster.local:16685" \
--set telemetry.jaeger.host="jaeger-agent.tracetest.svc.cluster.local" \
--set telemetry.enabled="true" \
--set telemetry.otelCollectorEndpoint="otel-collector.tracetest.svc.cluster.local:4317" \
--set ingress.enabled=false
3 changes: 2 additions & 1 deletion .github/workflows/pull-request.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,8 @@ jobs:
--set image.pullPolicy=Always \
--set tracingBackend=jaeger \
--set jaegerConnectionConfig.endpoint="jaeger-query.tracetest.svc.cluster.local:16685" \
--set telemetry.jaeger.host="jaeger-agent.tracetest.svc.cluster.local" \
--set telemetry.enabled="true" \
--set telemetry.otelCollectorEndpoint="otel-collector.tracetest.svc.cluster.local:4317" \
--set poolingConfig.maxWaitTimeForTrace="60s" \
--set poolingConfig.retryDelay="1s" \
--set ingress.enabled=false \
Expand Down
3 changes: 2 additions & 1 deletion .github/workflows/release-server.yml
Original file line number Diff line number Diff line change
Expand Up @@ -212,7 +212,8 @@ jobs:
--set analytics.enabled=true \
--set tracingBackend=jaeger \
--set jaegerConnectionConfig.endpoint="jaeger-query.tracetest.svc.cluster.local:16685" \
--set telemetry.jaeger.host="jaeger-agent.tracetest.svc.cluster.local" \
--set telemetry.enabled="true" \
--set telemetry.otelCollectorEndpoint="otel-collector.tracetest.svc.cluster.local:4317" \
--set service.annotations."cloud\.google\.com/backend-config"='\{\"default\":\"tracetest-beta\"\}' \
--set ingress.enabled=true \
--set 'ingress.hosts[0].host=demo.tracetest.io,ingress.hosts[0].paths[0].path=/,ingress.hosts[0].paths[0].pathType=Prefix' \
Expand Down
35 changes: 35 additions & 0 deletions .github/workflows/update-dependency-deployments.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
name: Update dependency deployments

on:
push:
branches: [ main ]
paths:
- k8s

jobs:
apply:
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v3

- name: Install envsubst
run: apt-get install -y gettext-base

- uses: google-github-actions/setup-gcloud@94337306dda8180d967a56932ceb4ddcf01edae7
with:
service_account_key: ${{ secrets.GKE_SA_KEY }}
project_id: ${{ secrets.GKE_PROJECT }}

- uses: google-github-actions/get-gke-credentials@fb08709ba27618c31c09e014e1d8364b02e5042e
with:
cluster_name: ${{ secrets.GKE_CLUSTER }}
location: ${{ secrets.GKE_ZONE }}
credentials: ${{ secrets.GKE_SA_KEY }}

- name: Apply
run: ./k8s/deploy-collector.sh




19 changes: 19 additions & 0 deletions docker-compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,24 @@ services:
timeout: 5s
retries: 60

otel-collector:
image: otel/opentelemetry-collector:0.54.0
ports:
- "55679:55679"
- "4317:4317"
- "8888:8888"
command:
- "--config"
- "/otel-local-config.yaml"
volumes:
- "./local-config/collector.config.yaml:/otel-local-config.yaml"
environment:
- JAEGER_ENDPOINT=jaeger:14250
- LIGHTSTEP_TOKEN=<PLACE-YOUR-TOKEN-HERE>
depends_on:
jaeger:
condition: service_healthy

jaeger:
image: jaegertracing/all-in-one:latest
ports:
Expand All @@ -52,6 +70,7 @@ services:
interval: 1s
timeout: 3s
retries: 60

queue:
image: rabbitmq:3.9
ports:
Expand Down
11 changes: 3 additions & 8 deletions docs/server-configuration.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ poolingConfig:
# and the test will be marked as failed.
maxWaitTimeForTrace: 90s

# How much time tracetest should wait before trying to fetch the trace since the last execution?
# How much time tracetest should wait before trying to fetch the trace since the last execution?
retryDelay: 5s

# Server configuration
Expand All @@ -38,15 +38,10 @@ googleAnalytics:
telemetry:
serviceName: tracetest
sampling: 100
jaeger:
host: localhost
port: 6831
exporters:
- console
- jaeger
otelCollectorEndpoint: localhost:4317
```
## Providing a configuration when running a container
```cmd
docker run --volume "`pwd`/my-config-file.yaml:/app/config.yaml" kubeshop/tracetest
```
```
85 changes: 85 additions & 0 deletions k8s/collector.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
apiVersion: v1
kind: ConfigMap
metadata:
name: collector-config
data:
collector.yaml: |
receivers:
otlp:
protocols:
grpc:
http:
processors:
batch:
# Data sources: traces
probabilistic_sampler:
hash_seed: 22
sampling_percentage: 100
exporters:
logging:
logLevel: debug
jaeger:
endpoint: jaeger-collector.tracetest.svc.cluster.local:14250
tls:
insecure: true
otlphttp:
traces_endpoint: https://ingest.lightstep.com:443/traces/otlp/v0.6
metrics_endpoint: https://ingest.lightstep.com:443/metrics/otlp/v0.9
headers: {"lightstep-access-token": "${LIGHTSTEP_TOKEN}"}
compression: gzip
service:
pipelines:
traces:
receivers: [otlp]
processors: [batch, probabilistic_sampler]
exporters: [jaeger, otlphttp]
---

apiVersion: apps/v1
kind: Deployment
metadata:
name: otel-collector
spec:
replicas: 1
selector:
matchLabels:
app.kubernetes.io/name: otel-collector
template:
metadata:
labels:
app.kubernetes.io/name: otel-collector
spec:
containers:
- name: otelcol
args:
- --config=/conf/collector.yaml
image: otel/opentelemetry-collector:0.54.0
volumeMounts:
- mountPath: /conf
name: collector-config
volumes:
- configMap:
items:
- key: collector.yaml
path: collector.yaml
name: collector-config
name: collector-config
---

apiVersion: v1
kind: Service
metadata:
name: otel-collector
spec:
ports:
- name: grpc-otlp
port: 4317
protocol: TCP
targetPort: 4317
selector:
app.kubernetes.io/name: otel-collector
type: ClusterIP
2 changes: 2 additions & 0 deletions k8s/deploy-collector.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
#!/bin/sh
envsubst < k8s/collector.yml | kubectl apply -n tracetest -f -
34 changes: 34 additions & 0 deletions local-config/collector.config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
receivers:
otlp:
protocols:
grpc:
http:


processors:
batch:

# Data sources: traces
probabilistic_sampler:
hash_seed: 22
sampling_percentage: 100

exporters:
logging:
logLevel: debug
jaeger:
endpoint: ${JAEGER_ENDPOINT}
tls:
insecure: true
otlphttp:
traces_endpoint: https://ingest.lightstep.com:443/traces/otlp/v0.6
metrics_endpoint: https://ingest.lightstep.com:443/metrics/otlp/v0.9
headers: {"lightstep-access-token": "${LIGHTSTEP_TOKEN}"}
compression: gzip

service:
pipelines:
traces:
receivers: [otlp]
processors: [batch, probabilistic_sampler]
exporters: [jaeger, otlphttp]
8 changes: 2 additions & 6 deletions server/config.yaml.sample
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,7 @@ googleAnalytics:
enabled: true

telemetry:
enabled: true
serviceName: tracetest
sampling: 100 # 100%
jaeger:
host: localhost
port: 6831
exporters:
- console
- jaeger
otelCollectorEndpoint: otel-collector:4317
13 changes: 4 additions & 9 deletions server/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,15 +37,10 @@ type (
}

TelemetryConfig struct {
ServiceName string `mapstructure:"serviceName"`
Sampling float64 `mapstructure:"sampling"`
Exporters []string `mapstructure:"exporters"`
Jaeger JaegerTelemetryConfig `mapstructure:"jaeger"`
}

JaegerTelemetryConfig struct {
Host string `mapstructure:"host"`
Port int `mapstructure:"port"`
Enabled bool `mapstructure:"enabled"`
ServiceName string `mapstructure:"serviceName"`
Sampling float64 `mapstructure:"sampling"`
OTelCollectorEndpoint string `mapstructure:"otelCollectorEndpoint"`
}
)

Expand Down
14 changes: 12 additions & 2 deletions server/executor/poller_executor.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package executor

import (
"encoding/hex"
"fmt"
"math"
"time"
Expand Down Expand Up @@ -28,10 +29,19 @@ func (pe InstrumentedPollerExecutor) ExecuteRequest(request *PollingRequest) (bo
defer span.End()

finished, run, err := pe.pollerExecutor.ExecuteRequest(request)
if err != nil {
span.RecordError(err)
return finished, run, err
}

spanCount := len(run.Trace.Flat)

span.SetAttributes(
attribute.String("tracetest.run.trace_poller.trace_id", hex.EncodeToString(request.run.TraceID[:])),
attribute.String("tracetest.run.trace_poller.span_id", hex.EncodeToString(request.run.SpanID[:])),
attribute.Bool("tracetest.run.trace_poller.succesful", finished),
attribute.String("tracetest.run.trace_poller.test_id", request.test.ID.String()),
attribute.Int("tracetest.run.trace_poller.amount_retrieved_spans", spanCount),
)

return finished, run, err
Expand Down Expand Up @@ -70,7 +80,7 @@ func (pe DefaultPollerExecutor) ExecuteRequest(request *PollingRequest) (bool, m
if !pe.donePollingTraces(request, trace) {
run.Trace = &trace
request.run = run
return false, model.Run{}, nil
return false, run, nil
}

trace = trace.Sort()
Expand All @@ -83,7 +93,7 @@ func (pe DefaultPollerExecutor) ExecuteRequest(request *PollingRequest) (bool, m

err = pe.updater.Update(request.ctx, run)
if err != nil {
return false, model.Run{}, nil
return false, model.Run{}, err
}

return true, run, nil
Expand Down
6 changes: 5 additions & 1 deletion server/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ require (
go.opentelemetry.io/contrib/propagators/jaeger v1.5.0
go.opentelemetry.io/contrib/propagators/ot v1.5.0
go.opentelemetry.io/otel v1.7.0
go.opentelemetry.io/otel/exporters/jaeger v1.7.0
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.6.0
go.opentelemetry.io/otel/exporters/stdout/stdouttrace v1.3.0
go.opentelemetry.io/otel/sdk v1.7.0
go.opentelemetry.io/otel/trace v1.7.0
Expand All @@ -38,6 +38,7 @@ require (

require (
github.com/Microsoft/go-winio v0.5.2 // indirect
github.com/cenkalti/backoff/v4 v4.1.3 // indirect
github.com/containerd/containerd v1.6.6 // indirect
github.com/davecgh/go-spew v1.1.1 // indirect
github.com/docker/distribution v2.8.1+incompatible // indirect
Expand All @@ -50,6 +51,7 @@ require (
github.com/go-redis/redis/v7 v7.4.1 // indirect
github.com/golang/protobuf v1.5.2 // indirect
github.com/golang/snappy v0.0.4 // indirect
github.com/grpc-ecosystem/grpc-gateway v1.16.0 // indirect
github.com/hashicorp/errwrap v1.1.0 // indirect
github.com/hashicorp/go-multierror v1.1.1 // indirect
github.com/klauspost/compress v1.15.6 // indirect
Expand All @@ -67,6 +69,8 @@ require (
github.com/stretchr/objx v0.2.0 // indirect
go.opentelemetry.io/collector/model v0.44.0 // indirect
go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.28.0 // indirect
go.opentelemetry.io/otel/exporters/otlp/internal/retry v1.7.0 // indirect
go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.6.0 // indirect
go.opentelemetry.io/otel/internal/metric v0.26.0 // indirect
go.opentelemetry.io/otel/metric v0.26.0 // indirect
go.uber.org/atomic v1.9.0 // indirect
Expand Down
Loading

0 comments on commit 40896a8

Please sign in to comment.