Skip to content

Commit 80b2e50

Browse files
receiver/prometheusreceiver: add option to fallback to collector starttime (#36365)
#### Description This change adds an option to the metric adjuster to use an approximation of the collector starttime as a fallback for the start time of scraped cumulative metrics. This is useful when no start time is found and when the collector starts up alongside its targets (like in serverless environments or sidecar approaches). <!-- Issue number (e.g. #1234) or full URL to issue, if applicable. --> #### Link to tracking issue Fixes #36364 <!--Describe what testing was performed and which tests were added.--> #### Testing Added unit test for this config option <!--Describe the documentation added.--> #### Documentation Config option added to the README. --------- Signed-off-by: Ridwan Sharif <[email protected]>
1 parent ac0f08e commit 80b2e50

File tree

4 files changed

+159
-1
lines changed

4 files changed

+159
-1
lines changed

.chloggen/starttime-fallback.yaml

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
# Use this changelog template to create an entry for release notes.
2+
3+
# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix'
4+
change_type: enhancement
5+
6+
# The name of the component, or a single word describing the area of concern, (e.g. filelogreceiver)
7+
component: prometheusreceiver
8+
9+
# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`).
10+
note: Add `receiver.prometheusreceiver.UseCollectorStartTimeFallback` featuregate for the start time metric adjuster to use the collector start time as an approximation of process start time as a fallback.
11+
12+
# Mandatory: One or more tracking issues related to the change. You can use the PR number here if no issue exists.
13+
issues: [36364]
14+
15+
# (Optional) One or more lines of additional information to render under the primary note.
16+
# These lines will be padded with 2 spaces and then inserted directly into the document.
17+
# Use pipe (|) for multiline entries.
18+
subtext:
19+
20+
# If your change doesn't affect end users or the exported elements of any package,
21+
# you should instead start your pull request title with [chore] or use the "Skip Changelog" label.
22+
# Optional: The change log or logs in which this entry should be included.
23+
# e.g. '[user]' or '[user, api]'
24+
# Include 'user' if the change is relevant to end users.
25+
# Include 'api' if there is a change to a library API.
26+
# Default: '[user]'
27+
change_logs: []

receiver/prometheusreceiver/README.md

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,17 @@ prometheus --config.file=prom.yaml
6767
```shell
6868
"--feature-gates=receiver.prometheusreceiver.UseCreatedMetric"
6969
```
70+
- `receiver.prometheusreceiver.UseCollectorStartTimeFallback`: enables using
71+
the collector start time as the metric start time if the
72+
process_start_time_seconds metric yields no result (for example if targets
73+
expose no process_start_time_seconds metric). This is useful when the collector
74+
start time is a good approximation of the process start time - for example in
75+
serverless workloads when the collector is deployed as a sidecar. To enable it,
76+
use the following feature gate option:
7077

78+
```shell
79+
"--feature-gates=receiver.prometheusreceiver.UseCollectorStartTimeFallback"
80+
```
7181
- `receiver.prometheusreceiver.EnableNativeHistograms`: process and turn native histogram metrics into OpenTelemetry exponential histograms. For more details consult the [Prometheus native histograms](#prometheus-native-histograms) section.
7282

7383
```shell

receiver/prometheusreceiver/internal/starttimemetricadjuster.go

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,9 @@ package internal // import "github.com/open-telemetry/opentelemetry-collector-co
66
import (
77
"errors"
88
"regexp"
9+
"time"
910

11+
"go.opentelemetry.io/collector/featuregate"
1012
"go.opentelemetry.io/collector/pdata/pmetric"
1113
"go.uber.org/zap"
1214
)
@@ -15,8 +17,27 @@ var (
1517
errNoStartTimeMetrics = errors.New("start_time metric is missing")
1618
errNoDataPointsStartTimeMetric = errors.New("start time metric with no data points")
1719
errUnsupportedTypeStartTimeMetric = errors.New("unsupported data type for start time metric")
20+
21+
// approximateCollectorStartTime is the approximate start time of the
22+
// collector. Used as a fallback start time for metrics that don't have a
23+
// start time set (when the
24+
// receiver.prometheusreceiver.UseCollectorStartTimeFallback feature gate is
25+
// enabled). Set when the component is initialized.
26+
approximateCollectorStartTime time.Time
27+
)
28+
29+
var useCollectorStartTimeFallbackGate = featuregate.GlobalRegistry().MustRegister(
30+
"receiver.prometheusreceiver.UseCollectorStartTimeFallback",
31+
featuregate.StageAlpha,
32+
featuregate.WithRegisterDescription("When enabled, the Prometheus receiver's"+
33+
" start time metric adjuster will fallback to using the collector start time"+
34+
" when a start time is not available"),
1835
)
1936

37+
func init() {
38+
approximateCollectorStartTime = time.Now()
39+
}
40+
2041
type startTimeMetricAdjuster struct {
2142
startTimeMetricRegex *regexp.Regexp
2243
logger *zap.Logger
@@ -33,7 +54,11 @@ func NewStartTimeMetricAdjuster(logger *zap.Logger, startTimeMetricRegex *regexp
3354
func (stma *startTimeMetricAdjuster) AdjustMetrics(metrics pmetric.Metrics) error {
3455
startTime, err := stma.getStartTime(metrics)
3556
if err != nil {
36-
return err
57+
if !useCollectorStartTimeFallbackGate.IsEnabled() {
58+
return err
59+
}
60+
stma.logger.Info("Couldn't get start time for metrics. Using fallback start time.", zap.Error(err), zap.Time("fallback_start_time", approximateCollectorStartTime))
61+
startTime = float64(approximateCollectorStartTime.Unix())
3762
}
3863

3964
startTimeTs := timestampFromFloat64(startTime)

receiver/prometheusreceiver/internal/starttimemetricadjuster_test.go

Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,14 @@ package internal
66
import (
77
"regexp"
88
"testing"
9+
"time"
910

1011
"github.com/stretchr/testify/assert"
1112
"go.opentelemetry.io/collector/pdata/pcommon"
1213
"go.opentelemetry.io/collector/pdata/pmetric"
1314
"go.uber.org/zap"
15+
16+
"github.com/open-telemetry/opentelemetry-collector-contrib/internal/common/testutil"
1417
)
1518

1619
func TestStartTimeMetricMatch(t *testing.T) {
@@ -154,3 +157,96 @@ func TestStartTimeMetricMatch(t *testing.T) {
154157
})
155158
}
156159
}
160+
161+
func TestStartTimeMetricFallback(t *testing.T) {
162+
const startTime = pcommon.Timestamp(123 * 1e9)
163+
const currentTime = pcommon.Timestamp(126 * 1e9)
164+
mockStartTime := time.Now().Add(-10 * time.Hour)
165+
mockStartTimeSeconds := float64(mockStartTime.Unix())
166+
processStartTime := mockStartTime.Add(-10 * time.Hour)
167+
processStartTimeSeconds := float64(processStartTime.Unix())
168+
169+
tests := []struct {
170+
name string
171+
inputs pmetric.Metrics
172+
startTimeMetricRegex *regexp.Regexp
173+
expectedStartTime pcommon.Timestamp
174+
expectedErr error
175+
}{
176+
{
177+
name: "regexp_match_metric_no_fallback",
178+
inputs: metrics(
179+
sumMetric("test_sum_metric", doublePoint(nil, startTime, currentTime, 16)),
180+
histogramMetric("test_histogram_metric", histogramPoint(nil, startTime, currentTime, []float64{1, 2}, []uint64{2, 3, 4})),
181+
summaryMetric("test_summary_metric", summaryPoint(nil, startTime, currentTime, 10, 100, []float64{10, 50, 90}, []float64{9, 15, 48})),
182+
sumMetric("example_process_start_time_seconds", doublePoint(nil, startTime, currentTime, processStartTimeSeconds)),
183+
sumMetric("process_start_time_seconds", doublePoint(nil, startTime, currentTime, processStartTimeSeconds)),
184+
exponentialHistogramMetric("test_exponential_histogram_metric", exponentialHistogramPointSimplified(nil, startTime, currentTime, 3, 1, -5, 3)),
185+
),
186+
startTimeMetricRegex: regexp.MustCompile("^.*_process_start_time_seconds$"),
187+
expectedStartTime: timestampFromFloat64(processStartTimeSeconds),
188+
},
189+
{
190+
name: "regexp_no_regex_match_metric_fallback",
191+
inputs: metrics(
192+
sumMetric("test_sum_metric", doublePoint(nil, startTime, currentTime, 16)),
193+
histogramMetric("test_histogram_metric", histogramPoint(nil, startTime, currentTime, []float64{1, 2}, []uint64{2, 3, 4})),
194+
summaryMetric("test_summary_metric", summaryPoint(nil, startTime, currentTime, 10, 100, []float64{10, 50, 90}, []float64{9, 15, 48})),
195+
),
196+
startTimeMetricRegex: regexp.MustCompile("^.*_process_start_time_seconds$"),
197+
expectedStartTime: timestampFromFloat64(mockStartTimeSeconds),
198+
},
199+
{
200+
name: "match_no_match_metric_fallback",
201+
inputs: metrics(
202+
sumMetric("test_sum_metric", doublePoint(nil, startTime, currentTime, 16)),
203+
histogramMetric("test_histogram_metric", histogramPoint(nil, startTime, currentTime, []float64{1, 2}, []uint64{2, 3, 4})),
204+
summaryMetric("test_summary_metric", summaryPoint(nil, startTime, currentTime, 10, 100, []float64{10, 50, 90}, []float64{9, 15, 48})),
205+
),
206+
expectedStartTime: timestampFromFloat64(mockStartTimeSeconds),
207+
},
208+
}
209+
210+
for _, tt := range tests {
211+
t.Run(tt.name, func(t *testing.T) {
212+
testutil.SetFeatureGateForTest(t, useCollectorStartTimeFallbackGate, true)
213+
stma := NewStartTimeMetricAdjuster(zap.NewNop(), tt.startTimeMetricRegex)
214+
if tt.expectedErr != nil {
215+
assert.ErrorIs(t, stma.AdjustMetrics(tt.inputs), tt.expectedErr)
216+
return
217+
}
218+
219+
// To test that the adjuster is using the fallback correctly, override the fallback time to use
220+
// directly.
221+
approximateCollectorStartTime = mockStartTime
222+
223+
assert.NoError(t, stma.AdjustMetrics(tt.inputs))
224+
for i := 0; i < tt.inputs.ResourceMetrics().Len(); i++ {
225+
rm := tt.inputs.ResourceMetrics().At(i)
226+
for j := 0; j < rm.ScopeMetrics().Len(); j++ {
227+
ilm := rm.ScopeMetrics().At(j)
228+
for k := 0; k < ilm.Metrics().Len(); k++ {
229+
metric := ilm.Metrics().At(k)
230+
switch metric.Type() {
231+
case pmetric.MetricTypeSum:
232+
dps := metric.Sum().DataPoints()
233+
for l := 0; l < dps.Len(); l++ {
234+
assert.Equal(t, tt.expectedStartTime, dps.At(l).StartTimestamp())
235+
}
236+
case pmetric.MetricTypeSummary:
237+
dps := metric.Summary().DataPoints()
238+
for l := 0; l < dps.Len(); l++ {
239+
assert.Equal(t, tt.expectedStartTime, dps.At(l).StartTimestamp())
240+
}
241+
case pmetric.MetricTypeHistogram:
242+
dps := metric.Histogram().DataPoints()
243+
for l := 0; l < dps.Len(); l++ {
244+
assert.Equal(t, tt.expectedStartTime, dps.At(l).StartTimestamp())
245+
}
246+
}
247+
}
248+
}
249+
}
250+
})
251+
}
252+
}

0 commit comments

Comments
 (0)