Skip to content

Commit bee7e37

Browse files
[receiver/prometheus] add feature flag to be able to disable Prometheus setting to fix high CPU for openmetrics (#40355)
<!--Ex. Fixing a bug - Describe the bug and how this fixes the issue. Ex. Adding a feature - Explain what this achieves.--> #### Description The PR #36660 enabled the setting in the scrape manager `EnableCreatedTimestampZeroIngestion` by default. This enabled the [Prometheus feature flag](https://prometheus.io/docs/prometheus/latest/feature_flags/#created-timestamps-zero-injection) by the same name. In Prometheus, when this is set to true, [a call to a new function](https://github.com/prometheus/prometheus/blob/1d9dfde9894f0c82b809b70aa5f9cecab25b8fea/scrape/scrape.go#L1770-L1799) CreatedTimestamp() is run. This currently isn't implemented for the Prometheus parser but is for the OpenMetrics parser and [has a comment](https://github.com/prometheus/prometheus/blob/main/model/textparse/openmetricsparse.go#L287) that it may use additional CPU and memory resources. We are seeing a 10x increase in CPU usage when there's a high volume of metrics scraped with this scenario. This PR puts the setting in the scrape manager behind a feature gate that allows it to be disabled. <!-- Issue number (e.g. #1234) or full URL to issue, if applicable. --> #### Link to tracking issue Fixes #40245 <!--Describe what testing was performed and which tests were added.--> #### Testing Tested with a custom build with this change. <!--Describe the documentation added.--> #### Documentation Documentation about the feature gate exists [here](https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/main/receiver/prometheusreceiver/README.md#getting-started). <!--Please delete paragraphs that you did not use before submitting.--> --------- Co-authored-by: Arthur Silva Sens <[email protected]>
1 parent 3cf8edf commit bee7e37

File tree

5 files changed

+231
-12
lines changed

5 files changed

+231
-12
lines changed
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
# Use this changelog template to create an entry for release notes.
2+
3+
# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix'
4+
change_type: bug_fix
5+
6+
# The name of the component, or a single word describing the area of concern, (e.g. filelogreceiver)
7+
component: prometheusrecevier
8+
9+
# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`).
10+
note: Add feature gate to allow enabling and disabling the Prometheus created timestamp zero ingestion feature flag.
11+
12+
# Mandatory: One or more tracking issues related to the change. You can use the PR number here if no issue exists.
13+
issues: [40245]
14+
15+
# (Optional) One or more lines of additional information to render under the primary note.
16+
# These lines will be padded with 2 spaces and then inserted directly into the document.
17+
# Use pipe (|) for multiline entries.
18+
subtext:
19+
20+
# If your change doesn't affect end users or the exported elements of any package,
21+
# you should instead start your pull request title with [chore] or use the "Skip Changelog" label.
22+
# Optional: The change log or logs in which this entry should be included.
23+
# e.g. '[user]' or '[user, api]'
24+
# Include 'user' if the change is relevant to end users.
25+
# Include 'api' if there is a change to a library API.
26+
# Default: '[user]'
27+
change_logs: [user]

receiver/prometheusreceiver/README.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,11 @@ prometheus --config.file=prom.yaml
6868
```shell
6969
"--feature-gates=receiver.prometheusreceiver.UseCreatedMetric"
7070
```
71+
- `receiver.prometheusreceiver.EnableCreatedTimestampZeroIngestion`: Enables the Prometheus feature flag [created-timestamps-zero-injection](https://prometheus.io/docs/prometheus/latest/feature_flags/#created-timestamps-zero-injection). Currently, this behaviour is disabled by default due to worse CPU performance with higher metric volumes. To enable it, use the following feature gate option:
72+
73+
```shell
74+
"--feature-gates=receiver.prometheusreceiver.EnableCreatedTimestampZeroIngestion"
75+
```
7176
- `receiver.prometheusreceiver.UseCollectorStartTimeFallback`: enables using
7277
the collector start time as the metric start time if the
7378
process_start_time_seconds metric yields no result (for example if targets

receiver/prometheusreceiver/factory.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,13 @@ var enableNativeHistogramsGate = featuregate.GlobalRegistry().MustRegister(
3333
" those Prometheus classic histograms that have a native histogram alternative"),
3434
)
3535

36+
var enableCreatedTimestampZeroIngestionGate = featuregate.GlobalRegistry().MustRegister(
37+
"receiver.prometheusreceiver.EnableCreatedTimestampZeroIngestion",
38+
featuregate.StageAlpha,
39+
featuregate.WithRegisterDescription("Enables ingestion of created timestamp."+
40+
" Created timestamps are injected as 0 valued samples when appropriate."),
41+
)
42+
3643
// NewFactory creates a new Prometheus receiver factory.
3744
func NewFactory() receiver.Factory {
3845
return receiver.NewFactory(

receiver/prometheusreceiver/metrics_receiver.go

Lines changed: 15 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -184,18 +184,7 @@ func (r *pReceiver) initPrometheusComponents(ctx context.Context, logger *slog.L
184184
return err
185185
}
186186

187-
opts := &scrape.Options{
188-
PassMetadataInContext: true,
189-
ExtraMetrics: r.cfg.ReportExtraScrapeMetrics,
190-
HTTPClientOptions: []commonconfig.HTTPClientOption{
191-
commonconfig.WithUserAgent(r.settings.BuildInfo.Command + "/" + r.settings.BuildInfo.Version),
192-
},
193-
EnableCreatedTimestampZeroIngestion: true,
194-
}
195-
196-
if enableNativeHistogramsGate.IsEnabled() {
197-
opts.EnableNativeHistogramsIngestion = true
198-
}
187+
opts := r.initScrapeOptions()
199188

200189
// for testing only
201190
if r.skipOffsetting {
@@ -234,6 +223,20 @@ func (r *pReceiver) initPrometheusComponents(ctx context.Context, logger *slog.L
234223
return nil
235224
}
236225

226+
func (r *pReceiver) initScrapeOptions() *scrape.Options {
227+
opts := &scrape.Options{
228+
PassMetadataInContext: true,
229+
ExtraMetrics: r.cfg.ReportExtraScrapeMetrics,
230+
HTTPClientOptions: []commonconfig.HTTPClientOption{
231+
commonconfig.WithUserAgent(r.settings.BuildInfo.Command + "/" + r.settings.BuildInfo.Version),
232+
},
233+
EnableCreatedTimestampZeroIngestion: enableCreatedTimestampZeroIngestionGate.IsEnabled(),
234+
EnableNativeHistogramsIngestion: enableNativeHistogramsGate.IsEnabled(),
235+
}
236+
237+
return opts
238+
}
239+
237240
func (r *pReceiver) initAPIServer(ctx context.Context, host component.Host) error {
238241
r.settings.Logger.Info("Starting Prometheus API server")
239242

Lines changed: 177 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,177 @@
1+
// Copyright The OpenTelemetry Authors
2+
// SPDX-License-Identifier: Apache-2.0
3+
4+
package prometheusreceiver
5+
6+
import (
7+
"context"
8+
"testing"
9+
10+
"github.com/stretchr/testify/assert"
11+
"github.com/stretchr/testify/require"
12+
"go.opentelemetry.io/collector/consumer/consumertest"
13+
"go.opentelemetry.io/collector/featuregate"
14+
"go.opentelemetry.io/collector/pdata/pcommon"
15+
"go.opentelemetry.io/collector/pdata/pmetric"
16+
"go.opentelemetry.io/collector/receiver/receivertest"
17+
18+
"github.com/open-telemetry/opentelemetry-collector-contrib/receiver/prometheusreceiver/internal/metadata"
19+
)
20+
21+
func TestEnableCreatedTimestampZeroIngestionGateUsage(t *testing.T) {
22+
ctx := context.Background()
23+
mockConsumer := new(consumertest.MetricsSink)
24+
cfg := createDefaultConfig().(*Config)
25+
settings := receivertest.NewNopSettings(metadata.Type)
26+
27+
// Test with feature gate enabled
28+
err := featuregate.GlobalRegistry().Set("receiver.prometheusreceiver.EnableCreatedTimestampZeroIngestion", true)
29+
require.NoError(t, err)
30+
r1, err := newPrometheusReceiver(settings, cfg, mockConsumer)
31+
require.NoError(t, err)
32+
33+
assert.True(t, enableCreatedTimestampZeroIngestionGate.IsEnabled(), "Feature gate should be enabled")
34+
opts := r1.initScrapeOptions()
35+
assert.True(t, opts.EnableCreatedTimestampZeroIngestion, "EnableCreatedTimestampZeroIngestion should be true when feature gate is enabled")
36+
37+
// Test with feature gate disabled
38+
err = featuregate.GlobalRegistry().Set("receiver.prometheusreceiver.EnableCreatedTimestampZeroIngestion", false)
39+
require.NoError(t, err)
40+
r2, err := newPrometheusReceiver(settings, cfg, mockConsumer)
41+
require.NoError(t, err)
42+
43+
assert.False(t, enableCreatedTimestampZeroIngestionGate.IsEnabled(), "Feature gate should be disabled")
44+
opts = r2.initScrapeOptions()
45+
assert.False(t, opts.EnableCreatedTimestampZeroIngestion, "EnableCreatedTimestampZeroIngestion should be false when feature gate is disabled")
46+
47+
// Reset the feature gate and shutdown the created receivers
48+
t.Cleanup(func() {
49+
err := featuregate.GlobalRegistry().Set("receiver.prometheusreceiver.EnableCreatedTimestampZeroIngestion", false)
50+
require.NoError(t, err, "Failed to reset feature gate to default state")
51+
52+
require.NoError(t, r1.Shutdown(ctx), "Failed to shutdown receiver 1")
53+
require.NoError(t, r2.Shutdown(ctx), "Failed to shutdown receiver 2")
54+
})
55+
}
56+
57+
var openMetricsCreatedTimestampMetrics = `# HELP a_seconds A counter
58+
# TYPE a_seconds counter
59+
# UNIT a_seconds seconds
60+
a_seconds_total 1.0
61+
a_seconds_created 123.456
62+
# EOF
63+
`
64+
65+
func TestOpenMetricsCreatedTimestampZeroIngestionEnabled(t *testing.T) {
66+
err := featuregate.GlobalRegistry().Set("receiver.prometheusreceiver.EnableCreatedTimestampZeroIngestion", true)
67+
require.NoError(t, err)
68+
t.Cleanup(func() {
69+
err := featuregate.GlobalRegistry().Set("receiver.prometheusreceiver.EnableCreatedTimestampZeroIngestion", false)
70+
require.NoError(t, err, "Failed to reset feature gate to default state")
71+
})
72+
73+
targets := []*testData{
74+
{
75+
name: "target1",
76+
pages: []mockPrometheusResponse{
77+
{code: 200, data: openMetricsCreatedTimestampMetrics, useOpenMetrics: true},
78+
},
79+
validateFunc: verifyOpenMetricsCreatedTimestampZeroIngestionEnabled,
80+
validateScrapes: true,
81+
normalizedName: true,
82+
},
83+
}
84+
85+
testComponent(t, targets, nil)
86+
}
87+
88+
func verifyOpenMetricsCreatedTimestampZeroIngestionEnabled(t *testing.T, td *testData, mds []pmetric.ResourceMetrics) {
89+
verifyNumValidScrapeResults(t, td, mds)
90+
ts1 := getTS(mds[0].ScopeMetrics().At(0).Metrics())
91+
e1 := []metricExpectation{
92+
{
93+
"a_seconds_total",
94+
pmetric.MetricTypeSum,
95+
"s",
96+
[]dataPointExpectation{
97+
{
98+
numberPointComparator: []numberPointComparator{
99+
compareStartTimestamp(timestampFromFloat64(123.456)),
100+
compareTimestamp(ts1),
101+
compareDoubleValue(1.0),
102+
},
103+
},
104+
},
105+
nil,
106+
},
107+
}
108+
doCompare(t, "created-timestamp-zero-ingestion-enabled", td.attributes, mds[0], e1)
109+
}
110+
111+
func TestOpenMetricsCreatedTimestampZeroIngestionDisabled(t *testing.T) {
112+
err := featuregate.GlobalRegistry().Set("receiver.prometheusreceiver.EnableCreatedTimestampZeroIngestion", false)
113+
require.NoError(t, err)
114+
t.Cleanup(func() {
115+
err := featuregate.GlobalRegistry().Set("receiver.prometheusreceiver.EnableCreatedTimestampZeroIngestion", false)
116+
require.NoError(t, err, "Failed to reset feature gate to default state")
117+
})
118+
119+
targets := []*testData{
120+
{
121+
name: "target2",
122+
pages: []mockPrometheusResponse{
123+
{code: 200, data: openMetricsCreatedTimestampMetrics, useOpenMetrics: true},
124+
},
125+
validateFunc: verifyOpenMetricsCreatedTimestampZeroIngestionDisabled,
126+
validateScrapes: true,
127+
normalizedName: false,
128+
},
129+
}
130+
131+
testComponent(t, targets, nil)
132+
}
133+
134+
func verifyOpenMetricsCreatedTimestampZeroIngestionDisabled(t *testing.T, td *testData, mds []pmetric.ResourceMetrics) {
135+
verifyNumValidScrapeResults(t, td, mds)
136+
ts1 := getTS(mds[0].ScopeMetrics().At(0).Metrics())
137+
e1 := []metricExpectation{
138+
{
139+
"a_seconds_total",
140+
pmetric.MetricTypeSum,
141+
"s",
142+
[]dataPointExpectation{
143+
{
144+
numberPointComparator: []numberPointComparator{
145+
compareStartTimestamp(ts1),
146+
compareTimestamp(ts1),
147+
compareDoubleValue(1.0),
148+
},
149+
},
150+
},
151+
nil,
152+
},
153+
{
154+
"a_seconds_created",
155+
pmetric.MetricTypeSum,
156+
"s",
157+
[]dataPointExpectation{
158+
{
159+
numberPointComparator: []numberPointComparator{
160+
compareStartTimestamp(timestampFromFloat64(123.456)),
161+
compareTimestamp(ts1),
162+
compareDoubleValue(0),
163+
},
164+
},
165+
},
166+
nil,
167+
},
168+
}
169+
170+
doCompare(t, "created-timestamp-zero-ingestion-disabled", td.attributes, mds[0], e1)
171+
}
172+
173+
func timestampFromFloat64(ts float64) pcommon.Timestamp {
174+
secs := int64(ts)
175+
nanos := int64((ts - float64(secs)) * 1e9)
176+
return pcommon.Timestamp(secs*1e9 + nanos)
177+
}

0 commit comments

Comments
 (0)