Skip to content

Commit 942ecaf

Browse files
committed
[exporter][prometheusremotewrite] Additional metrics for the exporter
Often, we have to do anylsis of the current state of the exporter which includes the current level of concurrency and throughput. For the concurrency, we need to rely on revising each individual configuration which is time consuming, here I'm proposing to have it as a metric that we can keep a record of and see it through observability. For the throughput, while tecnically `otelcol_processor_batch_batch_send_size_count` exists, this is not divisible by type (e.g. metrics vs traces) and is not specific to exporter. Whilst I'm also interested in splitting `batch_send_size_count` by type (because that also includes OTLP batches and this metric is only for RW), I think this is part of a separate conversation and it doesn't hurt to have the single counter that's specific for remote write. There's one more thing here (and please advise if you'd rather see a different PR for it) - I'm also adding the `url` as attribute to _ALL_ the metrics as I think this is key to identify where the exporter is pointing to. This is tecnically my first contribution to the repo so I'd appreciate any guidiance on what else can I need to do. Telemetry tests appear to be auto generated so I didn't add any to this PR but please let me know otherwise. Signed-off-by: gotjosh <[email protected]>
1 parent 236cc57 commit 942ecaf

File tree

6 files changed

+122
-3
lines changed

6 files changed

+122
-3
lines changed

exporter/prometheusremotewriteexporter/documentation.md

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,14 @@
66

77
The following telemetry is emitted by this component.
88

9+
### otelcol_exporter_prometheusremotewrite_consumers
10+
11+
Number of configured workers to use to fan out the outgoing requests
12+
13+
| Unit | Metric Type | Value Type |
14+
| ---- | ----------- | ---------- |
15+
| 1 | Gauge | Int |
16+
917
### otelcol_exporter_prometheusremotewrite_failed_translations
1018

1119
Number of translation operations that failed to translate metrics from Otel to Prometheus
@@ -14,6 +22,14 @@ Number of translation operations that failed to translate metrics from Otel to P
1422
| ---- | ----------- | ---------- | --------- |
1523
| 1 | Sum | Int | true |
1624

25+
### otelcol_exporter_prometheusremotewrite_sent_batch_count
26+
27+
Number of remote write request batches sent to the remote write endpoint
28+
29+
| Unit | Metric Type | Value Type | Monotonic |
30+
| ---- | ----------- | ---------- | --------- |
31+
| 1 | Sum | Int | true |
32+
1733
### otelcol_exporter_prometheusremotewrite_translated_time_series
1834

1935
Number of Prometheus time series that were translated from OTel metrics

exporter/prometheusremotewriteexporter/exporter.go

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -38,13 +38,23 @@ import (
3838
type prwTelemetry interface {
3939
recordTranslationFailure(ctx context.Context)
4040
recordTranslatedTimeSeries(ctx context.Context, numTS int)
41+
recordRemoteWriteSentBatch(ctx context.Context)
42+
setNumberConsumer(ctx context.Context, n int64)
4143
}
4244

4345
type prwTelemetryOtel struct {
4446
telemetryBuilder *metadata.TelemetryBuilder
4547
otelAttrs []attribute.KeyValue
4648
}
4749

50+
func (p *prwTelemetryOtel) setNumberConsumer(ctx context.Context, n int64) {
51+
p.telemetryBuilder.ExporterPrometheusremotewriteConsumers.Record(ctx, n, metric.WithAttributes(p.otelAttrs...))
52+
}
53+
54+
func (p *prwTelemetryOtel) recordRemoteWriteSentBatch(ctx context.Context) {
55+
p.telemetryBuilder.ExporterPrometheusremotewriteSentBatchCount.Add(ctx, 1, metric.WithAttributes(p.otelAttrs...))
56+
}
57+
4858
func (p *prwTelemetryOtel) recordTranslationFailure(ctx context.Context) {
4959
p.telemetryBuilder.ExporterPrometheusremotewriteFailedTranslations.Add(ctx, 1, metric.WithAttributes(p.otelAttrs...))
5060
}
@@ -91,7 +101,7 @@ type prwExporter struct {
91101
batchStatePool sync.Pool
92102
}
93103

94-
func newPRWTelemetry(set exporter.Settings) (prwTelemetry, error) {
104+
func newPRWTelemetry(set exporter.Settings, endpointURL *url.URL) (prwTelemetry, error) {
95105
telemetryBuilder, err := metadata.NewTelemetryBuilder(set.TelemetrySettings)
96106
if err != nil {
97107
return nil, err
@@ -101,6 +111,7 @@ func newPRWTelemetry(set exporter.Settings) (prwTelemetry, error) {
101111
telemetryBuilder: telemetryBuilder,
102112
otelAttrs: []attribute.KeyValue{
103113
attribute.String("exporter", set.ID.String()),
114+
attribute.String("endpoint", endpointURL.String()),
104115
},
105116
}, nil
106117
}
@@ -117,7 +128,7 @@ func newPRWExporter(cfg *Config, set exporter.Settings) (*prwExporter, error) {
117128
return nil, errors.New("invalid endpoint")
118129
}
119130

120-
prwTelemetry, err := newPRWTelemetry(set)
131+
telemetry, err := newPRWTelemetry(set, endpointURL)
121132
if err != nil {
122133
return nil, err
123134
}
@@ -132,6 +143,9 @@ func newPRWExporter(cfg *Config, set exporter.Settings) (*prwExporter, error) {
132143
concurrency = *cfg.MaxBatchRequestParallelism
133144
}
134145

146+
// Set the desired number of consumers as a metric for the exporter.
147+
telemetry.setNumberConsumer(context.Background(), int64(concurrency))
148+
135149
prwe := &prwExporter{
136150
endpointURL: endpointURL,
137151
wg: new(sync.WaitGroup),
@@ -151,7 +165,7 @@ func newPRWExporter(cfg *Config, set exporter.Settings) (*prwExporter, error) {
151165
AddMetricSuffixes: cfg.AddMetricSuffixes,
152166
SendMetadata: cfg.SendMetadata,
153167
},
154-
telemetry: prwTelemetry,
168+
telemetry: telemetry,
155169
batchStatePool: sync.Pool{New: func() any { return newBatchTimeServicesState() }},
156170
}
157171

@@ -350,6 +364,7 @@ func (prwe *prwExporter) execute(ctx context.Context, writeReq *prompb.WriteRequ
350364
req.Header.Set("User-Agent", prwe.userAgentHeader)
351365

352366
resp, err := prwe.client.Do(req)
367+
prwe.telemetry.recordRemoteWriteSentBatch(ctx)
353368
if err != nil {
354369
return err
355370
}

exporter/prometheusremotewriteexporter/internal/metadata/generated_telemetry.go

Lines changed: 14 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

exporter/prometheusremotewriteexporter/internal/metadatatest/generated_telemetrytest.go

Lines changed: 30 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

exporter/prometheusremotewriteexporter/internal/metadatatest/generated_telemetrytest_test.go

Lines changed: 30 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

exporter/prometheusremotewriteexporter/metadata.yaml

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,3 +27,17 @@ telemetry:
2727
sum:
2828
value_type: int
2929
monotonic: true
30+
exporter_prometheusremotewrite_consumers:
31+
enabled: true
32+
description: Number of configured workers to use to fan out the outgoing requests
33+
unit: "1"
34+
gauge:
35+
value_type: int
36+
monotonic: false
37+
exporter_prometheusremotewrite_sent_batch_count:
38+
enabled: true
39+
description: Number of remote write request batches sent to the remote write endpoint
40+
unit: "1"
41+
sum:
42+
value_type: int
43+
monotonic: true

0 commit comments

Comments
 (0)