Skip to content

Commit 33db79c

Browse files
committed
feat (processor/k8sattributes): add timeout for waiting.
1 parent a6a1720 commit 33db79c

File tree

12 files changed

+115
-60
lines changed

12 files changed

+115
-60
lines changed

.chloggen/k8sattributes-block.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ component: processor/k8sattributes
1010
note: Block when starting util the metadata have been synced, to fix that some data couldn't be associated with metadata when the agent was just started.
1111

1212
# Mandatory: One or more tracking issues related to the change. You can use the PR number here if no issue exists.
13-
issues: []
13+
issues: [32556]
1414

1515
# (Optional) One or more lines of additional information to render under the primary note.
1616
# These lines will be padded with 2 spaces and then inserted directly into the document.

processor/k8sattributesprocessor/README.md

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -193,6 +193,21 @@ the processor associates the received trace to the pod, based on the connection
193193
}
194194
```
195195

196+
By default, the processor will be ready as soon as it starts, even no metadata has been fetched yet.
197+
If data is sent to this processor before the metadata is synced, there will be no metadata to enrich the data with.
198+
199+
To wait for the metadata to be synced before the processor is ready, set the `wait_for_metadata` option to `true`.
200+
Then the processor will not be ready until the metadata has been synced.
201+
If a timeout is reached, the processor will fail to start and return an error, which will cause the collector to exit.
202+
The timeout defaults to 10s and can be configured with the `metadata_sync_timeout` option.
203+
204+
example for setting the processor to wait for metadata to be synced before it is ready:
205+
206+
```yaml
207+
wait_for_metadata: true
208+
wait_for_metadata_timeout: 10s
209+
```
210+
196211
## Extracting attributes from pod labels and annotations
197212

198213
The k8sattributesprocessor can also set resource attributes from k8s labels and annotations of pods, namespaces and nodes.

processor/k8sattributesprocessor/client_test.go

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@
44
package k8sattributesprocessor
55

66
import (
7+
"time"
8+
79
"go.opentelemetry.io/collector/component"
810
"k8s.io/apimachinery/pkg/fields"
911
"k8s.io/apimachinery/pkg/labels"
@@ -35,7 +37,7 @@ func selectors() (labels.Selector, fields.Selector) {
3537
}
3638

3739
// newFakeClient instantiates a new FakeClient object and satisfies the ClientProvider type
38-
func newFakeClient(_ component.TelemetrySettings, _ k8sconfig.APIConfig, rules kube.ExtractionRules, filters kube.Filters, associations []kube.Association, _ kube.Excludes, _ kube.APIClientsetProvider, _ kube.InformerProvider, _ kube.InformerProviderNamespace, _ kube.InformerProviderReplicaSet) (kube.Client, error) {
40+
func newFakeClient(_ component.TelemetrySettings, _ k8sconfig.APIConfig, rules kube.ExtractionRules, filters kube.Filters, associations []kube.Association, _ kube.Excludes, _ kube.APIClientsetProvider, _ kube.InformerProvider, _ kube.InformerProviderNamespace, _ kube.InformerProviderReplicaSet, _ bool, _ time.Duration) (kube.Client, error) {
3941
cs := fake.NewSimpleClientset()
4042

4143
ls, fs := selectors()

processor/k8sattributesprocessor/config.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ package k8sattributesprocessor // import "github.com/open-telemetry/opentelemetr
66
import (
77
"fmt"
88
"regexp"
9+
"time"
910

1011
"go.opentelemetry.io/collector/featuregate"
1112
conventions "go.opentelemetry.io/collector/semconv/v1.6.1"
@@ -46,6 +47,12 @@ type Config struct {
4647
// Exclude section allows to define names of pod that should be
4748
// ignored while tagging.
4849
Exclude ExcludeConfig `mapstructure:"exclude"`
50+
51+
// WaitForMetadata is a flag that determines if the processor should wait k8s metadata to be synced when starting.
52+
WaitForMetadata bool `mapstructure:"wait_for_metadata"`
53+
54+
// WaitForMetadataTimeout is the maximum time the processor will wait for the k8s metadata to be synced.
55+
WaitForMetadataTimeout time.Duration `mapstructure:"wait_for_metadata_timeout"`
4956
}
5057

5158
func (cfg *Config) Validate() error {

processor/k8sattributesprocessor/config_test.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ package k8sattributesprocessor
66
import (
77
"path/filepath"
88
"testing"
9+
"time"
910

1011
"github.com/stretchr/testify/assert"
1112
"github.com/stretchr/testify/require"
@@ -34,6 +35,7 @@ func TestLoadConfig(t *testing.T) {
3435
Extract: ExtractConfig{
3536
Metadata: enabledAttributes(),
3637
},
38+
WaitForMetadataTimeout: 10 * time.Second,
3739
},
3840
},
3941
{
@@ -105,6 +107,7 @@ func TestLoadConfig(t *testing.T) {
105107
{Name: "jaeger-collector"},
106108
},
107109
},
110+
WaitForMetadataTimeout: 10 * time.Second,
108111
},
109112
},
110113
{
@@ -127,6 +130,7 @@ func TestLoadConfig(t *testing.T) {
127130
{Name: "jaeger-collector"},
128131
},
129132
},
133+
WaitForMetadataTimeout: 10 * time.Second,
130134
},
131135
},
132136
{
@@ -149,6 +153,7 @@ func TestLoadConfig(t *testing.T) {
149153
{Name: "jaeger-collector"},
150154
},
151155
},
156+
WaitForMetadataTimeout: 10 * time.Second,
152157
},
153158
},
154159
{

processor/k8sattributesprocessor/factory.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ package k8sattributesprocessor // import "github.com/open-telemetry/opentelemetr
55

66
import (
77
"context"
8+
"time"
89

910
"go.opentelemetry.io/collector/component"
1011
"go.opentelemetry.io/collector/consumer"
@@ -38,6 +39,7 @@ func createDefaultConfig() component.Config {
3839
Extract: ExtractConfig{
3940
Metadata: enabledAttributes(),
4041
},
42+
WaitForMetadataTimeout: 10 * time.Second,
4143
}
4244
}
4345

@@ -165,5 +167,10 @@ func createProcessorOpts(cfg component.Config) []option {
165167

166168
opts = append(opts, withExcludes(oCfg.Exclude))
167169

170+
opts = append(opts, withWaitForMetadataTimeout(oCfg.WaitForMetadataTimeout))
171+
if oCfg.WaitForMetadata {
172+
opts = append(opts, withWaitForMetadata(true))
173+
}
174+
168175
return opts
169176
}

processor/k8sattributesprocessor/internal/kube/client.go

Lines changed: 35 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -40,18 +40,20 @@ var enableRFC3339Timestamp = featuregate.GlobalRegistry().MustRegister(
4040

4141
// WatchClient is the main interface provided by this package to a kubernetes cluster.
4242
type WatchClient struct {
43-
m sync.RWMutex
44-
deleteMut sync.Mutex
45-
logger *zap.Logger
46-
kc kubernetes.Interface
47-
informer cache.SharedInformer
48-
namespaceInformer cache.SharedInformer
49-
nodeInformer cache.SharedInformer
50-
replicasetInformer cache.SharedInformer
51-
replicasetRegex *regexp.Regexp
52-
cronJobRegex *regexp.Regexp
53-
deleteQueue []deleteRequest
54-
stopCh chan struct{}
43+
m sync.RWMutex
44+
deleteMut sync.Mutex
45+
logger *zap.Logger
46+
kc kubernetes.Interface
47+
informer cache.SharedInformer
48+
namespaceInformer cache.SharedInformer
49+
nodeInformer cache.SharedInformer
50+
replicasetInformer cache.SharedInformer
51+
replicasetRegex *regexp.Regexp
52+
cronJobRegex *regexp.Regexp
53+
deleteQueue []deleteRequest
54+
stopCh chan struct{}
55+
waitForMetadata bool
56+
waitForMetadataTimeout time.Duration
5557

5658
// A map containing Pod related data, used to associate them with resources.
5759
// Key can be either an IP address or Pod UID
@@ -85,21 +87,23 @@ var rRegex = regexp.MustCompile(`^(.*)-[0-9a-zA-Z]+$`)
8587
var cronJobRegex = regexp.MustCompile(`^(.*)-[0-9]+$`)
8688

8789
// New initializes a new k8s Client.
88-
func New(set component.TelemetrySettings, apiCfg k8sconfig.APIConfig, rules ExtractionRules, filters Filters, associations []Association, exclude Excludes, newClientSet APIClientsetProvider, newInformer InformerProvider, newNamespaceInformer InformerProviderNamespace, newReplicaSetInformer InformerProviderReplicaSet) (Client, error) {
90+
func New(set component.TelemetrySettings, apiCfg k8sconfig.APIConfig, rules ExtractionRules, filters Filters, associations []Association, exclude Excludes, newClientSet APIClientsetProvider, newInformer InformerProvider, newNamespaceInformer InformerProviderNamespace, newReplicaSetInformer InformerProviderReplicaSet, waitForMetadata bool, waitForMetadataTimeout time.Duration) (Client, error) {
8991
telemetryBuilder, err := metadata.NewTelemetryBuilder(set)
9092
if err != nil {
9193
return nil, err
9294
}
9395
c := &WatchClient{
94-
logger: set.Logger,
95-
Rules: rules,
96-
Filters: filters,
97-
Associations: associations,
98-
Exclude: exclude,
99-
replicasetRegex: rRegex,
100-
cronJobRegex: cronJobRegex,
101-
stopCh: make(chan struct{}),
102-
telemetryBuilder: telemetryBuilder,
96+
logger: set.Logger,
97+
Rules: rules,
98+
Filters: filters,
99+
Associations: associations,
100+
Exclude: exclude,
101+
replicasetRegex: rRegex,
102+
cronJobRegex: cronJobRegex,
103+
stopCh: make(chan struct{}),
104+
telemetryBuilder: telemetryBuilder,
105+
waitForMetadata: waitForMetadata,
106+
waitForMetadataTimeout: waitForMetadataTimeout,
103107
}
104108
go c.deleteLoop(time.Second*30, defaultPodDeleteGracePeriod)
105109

@@ -240,10 +244,16 @@ func (c *WatchClient) Start() error {
240244
go c.nodeInformer.Run(c.stopCh)
241245
}
242246

243-
if !cache.WaitForCacheSync(c.stopCh, synced...) {
244-
return errors.New("failed to wait for caches to sync")
247+
if c.waitForMetadata {
248+
timeoutCh := make(chan struct{})
249+
t := time.AfterFunc(c.waitForMetadataTimeout, func() {
250+
close(timeoutCh)
251+
})
252+
defer t.Stop()
253+
if !cache.WaitForCacheSync(timeoutCh, synced...) {
254+
return errors.New("failed to wait for caches to sync")
255+
}
245256
}
246-
247257
return nil
248258
}
249259

processor/k8sattributesprocessor/internal/kube/client_test.go

Lines changed: 5 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -144,29 +144,18 @@ func nodeAddAndUpdateTest(t *testing.T, c *WatchClient, handler func(obj any)) {
144144
}
145145

146146
func TestDefaultClientset(t *testing.T) {
147-
c, err := New(componenttest.NewNopTelemetrySettings(), k8sconfig.APIConfig{}, ExtractionRules{}, Filters{}, []Association{}, Excludes{}, nil, nil, nil, nil)
147+
c, err := New(componenttest.NewNopTelemetrySettings(), k8sconfig.APIConfig{}, ExtractionRules{}, Filters{}, []Association{}, Excludes{}, nil, nil, nil, nil, false, 10*time.Second)
148148
assert.Error(t, err)
149149
assert.Equal(t, "invalid authType for kubernetes: ", err.Error())
150150
assert.Nil(t, c)
151151

152-
c, err = New(componenttest.NewNopTelemetrySettings(), k8sconfig.APIConfig{}, ExtractionRules{}, Filters{}, []Association{}, Excludes{}, newFakeAPIClientset, nil, nil, nil)
152+
c, err = New(componenttest.NewNopTelemetrySettings(), k8sconfig.APIConfig{}, ExtractionRules{}, Filters{}, []Association{}, Excludes{}, newFakeAPIClientset, nil, nil, nil, false, 10*time.Second)
153153
assert.NoError(t, err)
154154
assert.NotNil(t, c)
155155
}
156156

157157
func TestBadFilters(t *testing.T) {
158-
c, err := New(
159-
componenttest.NewNopTelemetrySettings(),
160-
k8sconfig.APIConfig{},
161-
ExtractionRules{},
162-
Filters{Fields: []FieldFilter{{Op: selection.Exists}}},
163-
[]Association{},
164-
Excludes{},
165-
newFakeAPIClientset,
166-
NewFakeInformer,
167-
NewFakeNamespaceInformer,
168-
NewFakeReplicaSetInformer,
169-
)
158+
c, err := New(componenttest.NewNopTelemetrySettings(), k8sconfig.APIConfig{}, ExtractionRules{}, Filters{Fields: []FieldFilter{{Op: selection.Exists}}}, []Association{}, Excludes{}, newFakeAPIClientset, NewFakeInformer, NewFakeNamespaceInformer, NewFakeReplicaSetInformer, false, 10*time.Second)
170159
assert.Error(t, err)
171160
assert.Nil(t, c)
172161
}
@@ -202,7 +191,7 @@ func TestConstructorErrors(t *testing.T) {
202191
gotAPIConfig = c
203192
return nil, fmt.Errorf("error creating k8s client")
204193
}
205-
c, err := New(componenttest.NewNopTelemetrySettings(), apiCfg, er, ff, []Association{}, Excludes{}, clientProvider, NewFakeInformer, NewFakeNamespaceInformer, nil)
194+
c, err := New(componenttest.NewNopTelemetrySettings(), apiCfg, er, ff, []Association{}, Excludes{}, clientProvider, NewFakeInformer, NewFakeNamespaceInformer, nil, false, 10*time.Second)
206195
assert.Nil(t, c)
207196
assert.Error(t, err)
208197
assert.Equal(t, "error creating k8s client", err.Error())
@@ -1883,7 +1872,7 @@ func newTestClientWithRulesAndFilters(t *testing.T, f Filters) (*WatchClient, *o
18831872
},
18841873
},
18851874
}
1886-
c, err := New(set, k8sconfig.APIConfig{}, ExtractionRules{}, f, associations, exclude, newFakeAPIClientset, NewFakeInformer, NewFakeNamespaceInformer, NewFakeReplicaSetInformer)
1875+
c, err := New(set, k8sconfig.APIConfig{}, ExtractionRules{}, f, associations, exclude, newFakeAPIClientset, NewFakeInformer, NewFakeNamespaceInformer, NewFakeReplicaSetInformer, false, 10*time.Second)
18871876
require.NoError(t, err)
18881877
return c.(*WatchClient), logs
18891878
}

processor/k8sattributesprocessor/internal/kube/kube.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,7 @@ type Client interface {
9696
}
9797

9898
// ClientProvider defines a func type that returns a new Client.
99-
type ClientProvider func(component.TelemetrySettings, k8sconfig.APIConfig, ExtractionRules, Filters, []Association, Excludes, APIClientsetProvider, InformerProvider, InformerProviderNamespace, InformerProviderReplicaSet) (Client, error)
99+
type ClientProvider func(component.TelemetrySettings, k8sconfig.APIConfig, ExtractionRules, Filters, []Association, Excludes, APIClientsetProvider, InformerProvider, InformerProviderNamespace, InformerProviderReplicaSet, bool, time.Duration) (Client, error)
100100

101101
// APIClientsetProvider defines a func type that initializes and return a new kubernetes
102102
// Clientset object.

processor/k8sattributesprocessor/options.go

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ import (
77
"fmt"
88
"os"
99
"regexp"
10+
"time"
1011

1112
conventions "go.opentelemetry.io/collector/semconv/v1.6.1"
1213
"k8s.io/apimachinery/pkg/selection"
@@ -381,3 +382,19 @@ func withExcludes(podExclude ExcludeConfig) option {
381382
return nil
382383
}
383384
}
385+
386+
// withWaitForMetadata allows specifying whether to wait for pod metadata to be synced.
387+
func withWaitForMetadata(wait bool) option {
388+
return func(p *kubernetesprocessor) error {
389+
p.waitForMetadata = wait
390+
return nil
391+
}
392+
}
393+
394+
// withWaitForMetadataTimeout allows specifying the timeout for waiting for pod metadata to be synced.
395+
func withWaitForMetadataTimeout(timeout time.Duration) option {
396+
return func(p *kubernetesprocessor) error {
397+
p.waitForMetadataTimeout = timeout
398+
return nil
399+
}
400+
}

0 commit comments

Comments
 (0)