Skip to content

Commit 9893d5d

Browse files
authored
[pkg/datadog] add hostname_detection_timeout and set default (#40377)
<!--Ex. Fixing a bug - Describe the bug and how this fixes the issue. Ex. Adding a feature - Explain what this achieves.--> #### Description Adds `hostname_detection_timeout` config option and sets a reasonable default (25 seconds) <!-- Issue number (e.g. #1234) or full URL to issue, if applicable. --> #### Link to tracking issue Fixes #40373 <!--Describe what testing was performed and which tests were added.--> #### Testing manually run `make kind-build` and `make kind-install-daemonset` and verify collector does not crash after 30 seconds <!--Describe the documentation added.--> #### Documentation .chloggen and definition in datadogexporter examples folder <!--Please delete paragraphs that you did not use before submitting.-->
1 parent 074607f commit 9893d5d

File tree

9 files changed

+65
-24
lines changed

9 files changed

+65
-24
lines changed
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
# Use this changelog template to create an entry for release notes.
2+
3+
# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix'
4+
change_type: 'bug_fix'
5+
6+
# The name of the component, or a single word describing the area of concern, (e.g. filelogreceiver)
7+
component: 'exporter/datadog'
8+
9+
# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`).
10+
note: Adds `hostname_detection_timeout` configuration option for Datadog Exporter and sets default to 25 seconds.
11+
12+
# Mandatory: One or more tracking issues related to the change. You can use the PR number here if no issue exists.
13+
issues: [40373, 40377]
14+
15+
# (Optional) One or more lines of additional information to render under the primary note.
16+
# These lines will be padded with 2 spaces and then inserted directly into the document.
17+
# Use pipe (|) for multiline entries.
18+
subtext: |
19+
- This fixes a bug on Kubernetes daemonset deployments where collector hangs/reboots on startup.
20+
- This configuration option already existed but was not available for customer configuration.
21+
22+
# If your change doesn't affect end users or the exported elements of any package,
23+
# you should instead start your pull request title with [chore] or use the "Skip Changelog" label.
24+
# Optional: The change log or logs in which this entry should be included.
25+
# e.g. '[user]' or '[user, api]'
26+
# Include 'user' if the change is relevant to end users.
27+
# Include 'api' if there is a change to a library API.
28+
# Default: '[user]'
29+
change_logs: [user]

exporter/datadogexporter/README.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,3 +85,9 @@ processors:
8585
statements:
8686
- set(attributes["datadog.log.source"], "otel")
8787
```
88+
89+
### My Collector K8s pod is getting rebooted on startup when I don't manually set a hostname under `exporters::datadog::hostname`
90+
91+
This is due to a bug with underlying hostname detection blocking the `health_check` extension from responding to liveness/readiness probes on startup. To fix, either set `hostname_detection_timeout` to be less than the pod/daemonset `livenessProbe: failureThreshold * periodSeconds` so that the timeout for hostname detection on startup takes less time than the control plane waits before restarting the pod, or leave `hostname_detection_timeout` at the default `25s` value and double-check the `livenessProbe` and `readinessProbe` settings and ensure that the control plane will in fact wait long enough for startup to complete before restarting the pod.
92+
93+
Hostname detection is currently required to initialize the Datadog Exporter, unless a hostname is specified manually under `hostname`.

exporter/datadogexporter/examples/collector.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -451,6 +451,12 @@ exporters:
451451
## The default is 30 minutes.
452452
# reporter_period: 30m
453453

454+
## @param hostname_detection_timeout - duration - optional - default: 25s
455+
## The amount of time in which the exporter will wait for hostname detection to be available.
456+
## Datadog Exporter requires a hostname to send traces to Datadog backend.
457+
#
458+
# hostname_detection_timeout: 25s
459+
454460
## @param logs - custom object - optional
455461
## Logs exporter specific configuration.
456462
#

exporter/datadogexporter/factory.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -268,7 +268,7 @@ func (f *factory) createMetricsExporter(
268268
c component.Config,
269269
) (exporter.Metrics, error) {
270270
cfg := checkAndCastConfig(c, set.Logger)
271-
hostProvider, err := f.SourceProvider(set.TelemetrySettings, cfg.Hostname, cfg.HostMetadata.GetSourceTimeout())
271+
hostProvider, err := f.SourceProvider(set.TelemetrySettings, cfg.Hostname, cfg.HostnameDetectionTimeout)
272272
if err != nil {
273273
return nil, fmt.Errorf("failed to build hostname provider: %w", err)
274274
}
@@ -453,7 +453,7 @@ func (f *factory) createTracesExporter(
453453
wg sync.WaitGroup // waits for agent to exit
454454
)
455455

456-
hostProvider, err := f.SourceProvider(set.TelemetrySettings, cfg.Hostname, cfg.HostMetadata.GetSourceTimeout())
456+
hostProvider, err := f.SourceProvider(set.TelemetrySettings, cfg.Hostname, cfg.HostnameDetectionTimeout)
457457
if err != nil {
458458
return nil, fmt.Errorf("failed to build hostname provider: %w", err)
459459
}
@@ -557,7 +557,7 @@ func (f *factory) createLogsExporter(
557557

558558
var pusher consumer.ConsumeLogsFunc
559559
var logsAgent logsagentpipeline.LogsAgent
560-
hostProvider, err := f.SourceProvider(set.TelemetrySettings, cfg.Hostname, cfg.HostMetadata.GetSourceTimeout())
560+
hostProvider, err := f.SourceProvider(set.TelemetrySettings, cfg.Hostname, cfg.HostnameDetectionTimeout)
561561
if err != nil {
562562
return nil, fmt.Errorf("failed to build hostname provider: %w", err)
563563
}

exporter/datadogexporter/factory_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -303,8 +303,8 @@ func TestOnlyMetadata(t *testing.T) {
303303
Enabled: true,
304304
ReporterPeriod: 30 * time.Minute,
305305
},
306+
HostnameDetectionTimeout: 50 * time.Millisecond,
306307
}
307-
cfg.HostMetadata.SetSourceTimeout(50 * time.Millisecond)
308308

309309
expTraces, err := factory.CreateTraces(
310310
ctx,

exporter/datadogexporter/metrics_exporter_test.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -65,8 +65,8 @@ func TestNewExporter(t *testing.T) {
6565
Enabled: true,
6666
ReporterPeriod: 30 * time.Minute,
6767
},
68+
HostnameDetectionTimeout: 50 * time.Millisecond,
6869
}
69-
cfg.HostMetadata.SetSourceTimeout(50 * time.Millisecond)
7070

7171
params := exportertest.NewNopSettings(metadata.Type)
7272
f := NewFactory()
@@ -114,8 +114,8 @@ func TestNewExporter_Serializer(t *testing.T) {
114114
Enabled: true,
115115
ReporterPeriod: 30 * time.Minute,
116116
},
117+
HostnameDetectionTimeout: 50 * time.Millisecond,
117118
}
118-
cfg.HostMetadata.SetSourceTimeout(50 * time.Millisecond)
119119

120120
params := exportertest.NewNopSettings(metadata.Type)
121121
var err error

pkg/datadog/config/config.go

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,14 @@ type Config struct {
9191
// HostMetadata defines the host metadata specific configuration
9292
HostMetadata HostMetadataConfig `mapstructure:"host_metadata"`
9393

94+
// HostnameDetectionTimeout defines the timeout for hostname detection.
95+
// This is necessary for initializing datadog exporter
96+
// On K8s, it must be set to less than `failureThreshold * periodSeconds` due to
97+
// initialization blocking health_check liveness probes on startup.
98+
// If set to zero duration, there will be no timeout applied.
99+
// Default is 25 seconds.
100+
HostnameDetectionTimeout time.Duration `mapstructure:"hostname_detection_timeout"`
101+
94102
// OnlyMetadata defines whether to only send metadata
95103
// This is useful for agent-collector setups, so that
96104
// metadata about a host is sent to the backend even
@@ -373,5 +381,7 @@ func CreateDefaultConfig() component.Config {
373381
HostnameSource: HostnameSourceConfigOrSystem,
374382
ReporterPeriod: 30 * time.Minute,
375383
},
384+
385+
HostnameDetectionTimeout: 25 * time.Second, // set to 25 to prevent 30-second pod restart on K8s as reported in issue #40372 and #40373
376386
}
377387
}

pkg/datadog/config/config_test.go

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -467,7 +467,8 @@ func TestCreateDefaultConfig(t *testing.T) {
467467
HostnameSource: HostnameSourceConfigOrSystem,
468468
ReporterPeriod: 30 * time.Minute,
469469
},
470-
OnlyMetadata: false,
470+
HostnameDetectionTimeout: 25 * time.Second,
471+
OnlyMetadata: false,
471472
}, cfg, "failed to create default config")
472473

473474
assert.NoError(t, componenttest.CheckConfigStruct(cfg))
@@ -542,7 +543,8 @@ func TestLoadConfig(t *testing.T) {
542543
HostnameSource: HostnameSourceConfigOrSystem,
543544
ReporterPeriod: 30 * time.Minute,
544545
},
545-
OnlyMetadata: false,
546+
HostnameDetectionTimeout: 25 * time.Second,
547+
OnlyMetadata: false,
546548
},
547549
},
548550
{
@@ -610,6 +612,7 @@ func TestLoadConfig(t *testing.T) {
610612
HostnameSource: HostnameSourceConfigOrSystem,
611613
ReporterPeriod: 30 * time.Minute,
612614
},
615+
HostnameDetectionTimeout: 25 * time.Second,
613616
},
614617
},
615618
{
@@ -675,6 +678,7 @@ func TestLoadConfig(t *testing.T) {
675678
Tags: []string{"example:tag"},
676679
ReporterPeriod: 30 * time.Minute,
677680
},
681+
HostnameDetectionTimeout: 25 * time.Second,
678682
},
679683
},
680684
{
@@ -734,7 +738,8 @@ func TestLoadConfig(t *testing.T) {
734738
HostnameSource: HostnameSourceConfigOrSystem,
735739
ReporterPeriod: 10 * time.Minute,
736740
},
737-
OnlyMetadata: false,
741+
HostnameDetectionTimeout: 25 * time.Second,
742+
OnlyMetadata: false,
738743
},
739744
},
740745
}

pkg/datadog/config/host.go

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -70,21 +70,6 @@ type HostMetadataConfig struct {
7070
// To attach tags to telemetry signals regardless of the host, use a processor instead.
7171
Tags []string `mapstructure:"tags"`
7272

73-
// sourceTimeout is the timeout to fetch from each provider - for example AWS IMDS.
74-
// If unset, or set to zero duration, there will be no timeout applied.
75-
// Default is no timeout.
76-
sourceTimeout time.Duration
77-
7873
// ReporterPeriod is the period at which the host metadata reporter will run.
7974
ReporterPeriod time.Duration `mapstructure:"reporter_period"`
8075
}
81-
82-
// SetSourceTimeout sets the timeout to fetch from each provider - for example AWS IMDS.
83-
func (h *HostMetadataConfig) SetSourceTimeout(t time.Duration) {
84-
h.sourceTimeout = t
85-
}
86-
87-
// GetSourceTimeout returns the timeout to fetch from each provider - for example AWS IMDS.
88-
func (h *HostMetadataConfig) GetSourceTimeout() time.Duration {
89-
return h.sourceTimeout
90-
}

0 commit comments

Comments
 (0)