From 8e122f43938604f097ccdc2672c12c9a527f3c2a Mon Sep 17 00:00:00 2001
From: Max Leonov <mleonov@redhat.com>
Date: Tue, 6 Feb 2024 11:56:54 +0100
Subject: [PATCH] OBSDOCS-741: Span red alerts

---
 ...istr-tracing-tempo-config-spanmetrics.adoc | 30 ++++++++++++-------
 1 file changed, 19 insertions(+), 11 deletions(-)

diff --git a/modules/distr-tracing-tempo-config-spanmetrics.adoc b/modules/distr-tracing-tempo-config-spanmetrics.adoc
index d2b0d867ffff..50cae204f5ad 100644
--- a/modules/distr-tracing-tempo-config-spanmetrics.adoc
+++ b/modules/distr-tracing-tempo-config-spanmetrics.adoc
@@ -93,30 +93,38 @@ spec:
 <1> Enables the monitoring tab in the Jaeger console.
 <2> The service name for Thanos Querier from user-workload monitoring.
 
-== Enable alerting on span RED metrics
+== Span RED metrics and alerting rules
 
-The metrics generated by the `spanmetrics` connector can be used in alerting rules. For instance to alert on a slow service or define service level objectives (SLOs).
-The connector creates `duration_bucket` histogram and `calls` counter metric. These metrics have labels that identify service, API name, operation type and other attributes.
+The metrics generated by the `spanmetrics` connector are usable with alerting rules. For example, for alerts about a slow service or to define service level objectives (SLOs), the connector creates a `duration_bucket` histogram and the `calls` counter metric. These metrics have labels that identify the service, API name, operation type, and other attributes.
 
-.Labels present on the metrics created oin the `spanmetrics` connector.
+.Labels of the metrics created in the `spanmetrics` connector
 [options="header"]
 [cols="l, a, a"]
 |===
 |Label |Description |Values
+
 |service_name
-| Service name set by `otel_service_name` environment variable.
+|Service name set by the `otel_service_name` environment variable.
 |`frontend`
 
 |span_name
 | Name of the operation.
-|`/`, `/customer`
+|
+* `/`
+* `/customer`
 
 |span_kind
-| Span kind identifies the server, client, messaging or internal operation.
-|`SPAN_KIND_SERVER`, `SPAN_KIND_CLIENT`, `SPAN_KIND_PRODUCER`, `SPAN_KIND_CONSUMER`, `SPAN_KIND_INTERNAL`
+|Identifies the server, client, messaging, or internal operation.
+|
+* `SPAN_KIND_SERVER`
+* `SPAN_KIND_CLIENT`
+* `SPAN_KIND_PRODUCER`
+* `SPAN_KIND_CONSUMER`
+* `SPAN_KIND_INTERNAL`
+
 |===
 
-.PrometheusRule custom resource to define an alert for SLO to serve 95% of requests within 2000ms on the frontend service.
+.Example PrometheusRule CR that defines an alerting rule for SLO when not serving 95% of requests within 2000ms on the front-end service
 [source,yaml]
 ----
 apiVersion: monitoring.coreos.com/v1
@@ -128,11 +136,11 @@ spec:
   - name: server-side-latency
     rules:
     - alert: SpanREDFrontendAPIRequestLatency
-      expr: histogram_quantile(0.95, sum(rate(duration_bucket{service_name="frontend", span_kind="SPAN_KIND_SERVER"}[5m])) by (le, service_name, span_name)) > 2000 <1>
+      expr: histogram_quantile(0.95, sum(rate(duration_bucket{service_name="frontend", span_kind="SPAN_KIND_SERVER"}[5m])) by (le, service_name, span_name)) > 2000 # <1>
       labels:
         severity: Warning
       annotations:
         summary: "High request latency on {{$labels.service_name}} and {{$labels.span_name}}"
         description: "{{$labels.instance}} has 95th request latency above 2s (current value: {{$value}}s)"
 ----
-<1> The expression to check if 95% of frontend server response time is below 2000 ms. The time range (`[5m]`) should be at least four times the scrape interval and long enough to accommodate change in the metric.
+<1> The expression for checking if 95% of the front-end server response time values are below 2000 ms. The time range (`[5m]`) must be at least four times the scrape interval and long enough to accommodate a change in the metric.