Skip to content

Commit 636bbdd

Browse files
authored
Merge pull request #20604 from AwesomePatrol/add-tracing-to-robustness-tests
Add an option to collect traces from robustness tests
2 parents 66e4a11 + b067d82 commit 636bbdd

File tree

5 files changed

+90
-24
lines changed

5 files changed

+90
-24
lines changed

tests/framework/e2e/cluster.go

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -390,6 +390,15 @@ func WithExtensiveMetrics() EPClusterOption {
390390
return func(c *EtcdProcessClusterConfig) { c.ServerConfig.Metrics = "extensive" }
391391
}
392392

393+
func WithEnableDistributedTracing(addr string) EPClusterOption {
394+
return func(c *EtcdProcessClusterConfig) {
395+
c.ServerConfig.EnableDistributedTracing = true
396+
c.ServerConfig.DistributedTracingServiceName = "etcd"
397+
c.ServerConfig.DistributedTracingAddress = addr
398+
c.ServerConfig.DistributedTracingSamplingRatePerMillion = 1_000_000
399+
}
400+
}
401+
393402
// NewEtcdProcessCluster launches a new cluster from etcd processes, returning
394403
// a new EtcdProcessCluster once all nodes are ready to accept client requests.
395404
func NewEtcdProcessCluster(ctx context.Context, tb testing.TB, opts ...EPClusterOption) (*EtcdProcessCluster, error) {
@@ -585,6 +594,14 @@ func (cfg *EtcdProcessClusterConfig) EtcdServerProcessConfig(tb testing.TB, i in
585594
if !cfg.ServerConfig.StrictReconfigCheck {
586595
args = append(args, "--strict-reconfig-check=false")
587596
}
597+
if cfg.ServerConfig.EnableDistributedTracing {
598+
args = append(args,
599+
"--enable-distributed-tracing",
600+
fmt.Sprintf("--distributed-tracing-address=%s", cfg.ServerConfig.DistributedTracingAddress),
601+
fmt.Sprintf("--distributed-tracing-service-name=%s", cfg.ServerConfig.DistributedTracingServiceName),
602+
fmt.Sprintf("--distributed-tracing-sampling-rate=%d", cfg.ServerConfig.DistributedTracingSamplingRatePerMillion),
603+
)
604+
}
588605

589606
var murl string
590607
if cfg.MetricsURLScheme != "" {

tests/robustness/README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,7 @@ Etcd provides strict serializability for KV operations and eventual consistency
110110
* `EXPECT_DEBUG=true` - to get logs from the cluster.
111111
* `RESULTS_DIR` - to change the location where the results report will be saved.
112112
* `PERSIST_RESULTS` - to persist the results report of the test. By default this will not be persisted in the case of a successful run.
113+
* `TRACING_SERVER_ADDR` - to export Open Telemetry traces from test runs to the collector running at given address, for example: `localhost:4317`
113114

114115
## Re-evaluate existing report
115116

tests/robustness/coverage/README.md

Lines changed: 63 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -80,39 +80,79 @@ cluster](https://kind.sigs.k8s.io/docs/user/quick-start/#installation):
8080
1. Exercise Kubernetes API. For example, build and run Conformance tests from
8181
Kubernetes repository (this usually takes 30-40m or will time out after 1 hour):
8282

83-
```shell
84-
make WHAT="test/e2e/e2e.test"
85-
./_output/bin/e2e.test \
86-
-context kind-kind-with-external-etcd \
87-
-ginkgo.focus="\[sig-apps\].*Conformance" \
88-
-num-nodes 2
89-
build/run.sh make test-cmd
90-
```
83+
```shell
84+
make WHAT="test/e2e/e2e.test"
85+
./_output/bin/e2e.test \
86+
-context kind-kind-with-external-etcd \
87+
-ginkgo.focus="\[sig-apps\].*Conformance" \
88+
-num-nodes 2
89+
build/run.sh make test-cmd
90+
```
9191

9292
1. Download traces and put them into `tests/robustness/coverage/testdata`
9393
directory in Etcd git repository:
9494

95-
```shell
96-
curl -v --get --retry 10 --retry-connrefused -o testdata/demo_traces.json \
97-
-H "Content-Type: application/json" \
98-
--data-urlencode "query.start_time_min=$(date --date="5 days ago" -Ins)" \
99-
--data-urlencode "query.start_time_max=$(date --date="2 minutes ago" -Ins)" \
100-
--data-urlencode "query.service_name=etcd" \
101-
"http://192.168.32.1:16686/api/v3/traces"
102-
```
95+
```shell
96+
curl -v --get --retry 10 --retry-connrefused -o testdata/demo_traces.json \
97+
-H "Content-Type: application/json" \
98+
--data-urlencode "query.start_time_min=$(date --date="5 days ago" -Ins)" \
99+
--data-urlencode "query.start_time_max=$(date --date="2 minutes ago" -Ins)" \
100+
--data-urlencode "query.service_name=etcd" \
101+
"http://192.168.32.1:16686/api/v3/traces"
102+
```
103103

104104
1. Run Go test
105105

106-
```shell
107-
go test -v -timeout 60s go.etcd.io/etcd/tests/v3/robustness/coverage
108-
```
106+
```shell
107+
go test -v -timeout 60s go.etcd.io/etcd/tests/v3/robustness/coverage
108+
```
109109

110110
1. Clean up the environment
111111

112-
```shell
113-
kind delete cluster --name kind-with-external-etcd
114-
docker network rm kind-with-external-etcd
115-
```
112+
```shell
113+
kind delete cluster --name kind-with-external-etcd
114+
docker network rm kind-with-external-etcd
115+
```
116+
117+
### Manual trace collection from robustness tests
118+
119+
1. Run [Jaeger](https://www.jaegertracing.io/) container:
120+
121+
```shell
122+
docker run --rm --name jaeger \
123+
-p 16686:16686 \
124+
-p 4317:4317 \
125+
jaegertracing/jaeger:2.6.0 --set=extensions.jaeger_storage.backends.some_storage.memory.max_traces=20000000
126+
```
127+
128+
1. Run robustness tests. For example:
129+
130+
```shell
131+
env \
132+
TRACING_SERVER_ADDR=localhost:4317 \
133+
GO_TEST_FLAGS='--timeout 10m --count=1 -v --run "^TestRobustness.*/Kubernetes.*"' \
134+
make test-robustness
135+
```
136+
137+
1. Download traces and put them into `tests/robustness/coverage/testdata`
138+
directory in Etcd git repository:
139+
140+
```shell
141+
curl -v --get --retry 10 --retry-connrefused -o testdata/demo_traces.json \
142+
-H "Content-Type: application/json" \
143+
--data-urlencode "query.start_time_min=$(date --date="5 days ago" -Ins)" \
144+
--data-urlencode "query.start_time_max=$(date -Ins)" \
145+
--data-urlencode "query.service_name=etcd" \
146+
"http://localhost:16686/api/v3/traces"
147+
```
148+
149+
1. Run Go test
150+
151+
```shell
152+
go test -v -timeout 60s go.etcd.io/etcd/tests/v3/robustness/coverage
153+
```
154+
155+
It will show the coverage of Kubernetes-Etcd surface by robustness tests.:w
116156

117157
### Automated test execution
118158

tests/robustness/coverage/coverage_test.go

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -353,14 +353,17 @@ func spansMap(t *testing.T, traces []*tracev1.ResourceSpans) map[string]*tracev1
353353
}
354354
}
355355
}
356+
if len(inApiserver) == 0 {
357+
t.Logf("WARN: no records of traces from the apiserver")
358+
}
356359

357360
// Map traces by their span ID.
358361
spansByID := make(map[string]*tracev1.Span)
359362
skipped := 0
360363
for _, trace := range traces {
361364
for _, scopeSpan := range trace.GetScopeSpans() {
362365
for _, span := range scopeSpan.GetSpans() {
363-
if !inApiserver[string(span.GetTraceId())] {
366+
if len(inApiserver) > 0 && !inApiserver[string(span.GetTraceId())] {
364367
skipped++
365368
continue
366369
}

tests/robustness/scenarios/scenarios.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
package scenarios
1616

1717
import (
18+
"os"
1819
"path/filepath"
1920
"testing"
2021
"time"
@@ -101,6 +102,10 @@ func Exploratory(_ *testing.T) []TestScenario {
101102
e2e.WithWatchProcessNotifyInterval(100 * time.Millisecond),
102103
}
103104

105+
if addr := os.Getenv("TRACING_SERVER_ADDR"); addr != "" {
106+
baseOptions = append(baseOptions, e2e.WithEnableDistributedTracing(addr))
107+
}
108+
104109
if e2e.CouldSetSnapshotCatchupEntries(e2e.BinPath.Etcd) {
105110
baseOptions = append(baseOptions, options.WithSnapshotCatchUpEntries(100, etcdserver.DefaultSnapshotCatchUpEntries))
106111
}

0 commit comments

Comments
 (0)