Skip to content

Commit 0fa58d2

Browse files
authored
[receiver/k8scluster] Do not keep metrics in memory (#24769)
Construct metrics on every scrape instead of keeping them in memory and copy them with modifications on every scrape. We keep k8s objects in cache anyway, so we can use that instead of the pre-built metrics. This reduces RAM utilization. This also allows us to extract a metrics builder instance and not create it on every scrape. This is the recommended approach that all other receivers follow. It ensures that any warnings defined in the metadata.yaml will be displayed only once, not on every scrape interval.
1 parent b149984 commit 0fa58d2

37 files changed

+842
-966
lines changed
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
# Use this changelog template to create an entry for release notes.
2+
# If your change doesn't affect end users, such as a test fix or a tooling change,
3+
# you should instead start your pull request title with [chore] or use the "Skip Changelog" label.
4+
5+
# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix'
6+
change_type: enhancement
7+
8+
# The name of the component, or a single word describing the area of concern, (e.g. filelogreceiver)
9+
component: k8sclusterreceiver
10+
11+
# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`).
12+
note: Reduce memory utilization
13+
14+
# Mandatory: One or more tracking issues related to the change. You can use the PR number here if no issue exists.
15+
issues: [24769]
16+
17+
change_logs: [user]

receiver/k8sclusterreceiver/internal/clusterresourcequota/clusterresourcequotas.go

Lines changed: 8 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -5,49 +5,43 @@ package clusterresourcequota // import "github.com/open-telemetry/opentelemetry-
55

66
import (
77
"strings"
8-
"time"
98

109
quotav1 "github.com/openshift/api/quota/v1"
1110
"go.opentelemetry.io/collector/pdata/pcommon"
12-
"go.opentelemetry.io/collector/pdata/pmetric"
13-
"go.opentelemetry.io/collector/receiver"
1411
v1 "k8s.io/api/core/v1"
1512
"k8s.io/apimachinery/pkg/api/resource"
1613

17-
imetadataphase "github.com/open-telemetry/opentelemetry-collector-contrib/receiver/k8sclusterreceiver/internal/metadata"
14+
"github.com/open-telemetry/opentelemetry-collector-contrib/receiver/k8sclusterreceiver/internal/metadata"
1815
)
1916

20-
func GetMetrics(set receiver.CreateSettings, metricsBuilderConfig imetadataphase.MetricsBuilderConfig, crq *quotav1.ClusterResourceQuota) pmetric.Metrics {
21-
mbphase := imetadataphase.NewMetricsBuilder(metricsBuilderConfig, set)
22-
ts := pcommon.NewTimestampFromTime(time.Now())
23-
17+
func RecordMetrics(mb *metadata.MetricsBuilder, crq *quotav1.ClusterResourceQuota, ts pcommon.Timestamp) {
2418
for k, v := range crq.Status.Total.Hard {
2519
val := extractValue(k, v)
26-
mbphase.RecordOpenshiftClusterquotaLimitDataPoint(ts, val, string(k))
20+
mb.RecordOpenshiftClusterquotaLimitDataPoint(ts, val, string(k))
2721
}
2822

2923
for k, v := range crq.Status.Total.Used {
3024
val := extractValue(k, v)
31-
mbphase.RecordOpenshiftClusterquotaUsedDataPoint(ts, val, string(k))
25+
mb.RecordOpenshiftClusterquotaUsedDataPoint(ts, val, string(k))
3226
}
3327

3428
for _, ns := range crq.Status.Namespaces {
3529
for k, v := range ns.Status.Hard {
3630
val := extractValue(k, v)
37-
mbphase.RecordOpenshiftAppliedclusterquotaLimitDataPoint(ts, val, ns.Namespace, string(k))
31+
mb.RecordOpenshiftAppliedclusterquotaLimitDataPoint(ts, val, ns.Namespace, string(k))
3832
}
3933

4034
for k, v := range ns.Status.Used {
4135
val := extractValue(k, v)
42-
mbphase.RecordOpenshiftAppliedclusterquotaUsedDataPoint(ts, val, ns.Namespace, string(k))
36+
mb.RecordOpenshiftAppliedclusterquotaUsedDataPoint(ts, val, ns.Namespace, string(k))
4337
}
4438
}
4539

46-
rb := imetadataphase.NewResourceBuilder(metricsBuilderConfig.ResourceAttributes)
40+
rb := mb.NewResourceBuilder()
4741
rb.SetOpenshiftClusterquotaName(crq.Name)
4842
rb.SetOpenshiftClusterquotaUID(string(crq.UID))
4943
rb.SetOpencensusResourcetype("k8s")
50-
return mbphase.Emit(imetadataphase.WithResource(rb.Emit()))
44+
mb.EmitForResource(metadata.WithResource(rb.Emit()))
5145
}
5246

5347
func extractValue(k v1.ResourceName, v resource.Quantity) int64 {

receiver/k8sclusterreceiver/internal/clusterresourcequota/clusterresourcequotas_test.go

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,10 @@ package clusterresourcequota
66
import (
77
"path/filepath"
88
"testing"
9+
"time"
910

1011
"github.com/stretchr/testify/require"
12+
"go.opentelemetry.io/collector/pdata/pcommon"
1113
"go.opentelemetry.io/collector/receiver/receivertest"
1214

1315
"github.com/open-telemetry/opentelemetry-collector-contrib/internal/coreinternal/golden"
@@ -19,7 +21,10 @@ import (
1921
func TestClusterRequestQuotaMetrics(t *testing.T) {
2022
crq := testutils.NewClusterResourceQuota("1")
2123

22-
m := GetMetrics(receivertest.NewNopCreateSettings(), metadata.DefaultMetricsBuilderConfig(), crq)
24+
ts := pcommon.Timestamp(time.Now().UnixNano())
25+
mb := metadata.NewMetricsBuilder(metadata.DefaultMetricsBuilderConfig(), receivertest.NewNopCreateSettings())
26+
RecordMetrics(mb, crq, ts)
27+
m := mb.Emit()
2328

2429
expected, err := golden.ReadMetrics(filepath.Join("testdata", "expected.yaml"))
2530
require.NoError(t, err)

receiver/k8sclusterreceiver/internal/collection/collector.go

Lines changed: 65 additions & 125 deletions
Original file line numberDiff line numberDiff line change
@@ -4,29 +4,24 @@
44
package collection // import "github.com/open-telemetry/opentelemetry-collector-contrib/receiver/k8sclusterreceiver/internal/collection"
55

66
import (
7-
"reflect"
87
"time"
98

109
quotav1 "github.com/openshift/api/quota/v1"
10+
"go.opentelemetry.io/collector/pdata/pcommon"
1111
"go.opentelemetry.io/collector/pdata/pmetric"
1212
"go.opentelemetry.io/collector/receiver"
13-
"go.uber.org/zap"
1413
appsv1 "k8s.io/api/apps/v1"
1514
autoscalingv2 "k8s.io/api/autoscaling/v2"
1615
autoscalingv2beta2 "k8s.io/api/autoscaling/v2beta2"
1716
batchv1 "k8s.io/api/batch/v1"
1817
batchv1beta1 "k8s.io/api/batch/v1beta1"
1918
corev1 "k8s.io/api/core/v1"
20-
"k8s.io/apimachinery/pkg/runtime"
21-
"k8s.io/apimachinery/pkg/runtime/schema"
22-
"k8s.io/apimachinery/pkg/types"
23-
"k8s.io/client-go/tools/cache"
2419

25-
"github.com/open-telemetry/opentelemetry-collector-contrib/pkg/experimentalmetricmetadata"
2620
"github.com/open-telemetry/opentelemetry-collector-contrib/receiver/k8sclusterreceiver/internal/clusterresourcequota"
2721
"github.com/open-telemetry/opentelemetry-collector-contrib/receiver/k8sclusterreceiver/internal/cronjob"
2822
"github.com/open-telemetry/opentelemetry-collector-contrib/receiver/k8sclusterreceiver/internal/demonset"
2923
"github.com/open-telemetry/opentelemetry-collector-contrib/receiver/k8sclusterreceiver/internal/deployment"
24+
"github.com/open-telemetry/opentelemetry-collector-contrib/receiver/k8sclusterreceiver/internal/gvk"
3025
"github.com/open-telemetry/opentelemetry-collector-contrib/receiver/k8sclusterreceiver/internal/hpa"
3126
"github.com/open-telemetry/opentelemetry-collector-contrib/receiver/k8sclusterreceiver/internal/jobs"
3227
"github.com/open-telemetry/opentelemetry-collector-contrib/receiver/k8sclusterreceiver/internal/metadata"
@@ -42,137 +37,82 @@ import (
4237
// TODO: Consider moving some of these constants to
4338
// https://go.opentelemetry.io/collector/blob/main/model/semconv/opentelemetry.go.
4439

45-
// DataCollector wraps around a metricsStore and a metadaStore exposing
46-
// methods to perform on the underlying stores. DataCollector also provides
47-
// an interface to interact with refactored code from SignalFx Agent which is
48-
// confined to the collection package.
40+
// DataCollector emits metrics with CollectMetricData based on the Kubernetes API objects in the metadata store.
4941
type DataCollector struct {
5042
settings receiver.CreateSettings
51-
metricsStore *metricsStore
5243
metadataStore *metadata.Store
5344
nodeConditionsToReport []string
5445
allocatableTypesToReport []string
55-
metricsBuilderConfig metadata.MetricsBuilderConfig
46+
metricsBuilder *metadata.MetricsBuilder
5647
}
5748

5849
// NewDataCollector returns a DataCollector.
59-
func NewDataCollector(set receiver.CreateSettings, metricsBuilderConfig metadata.MetricsBuilderConfig, nodeConditionsToReport, allocatableTypesToReport []string) *DataCollector {
50+
func NewDataCollector(set receiver.CreateSettings, ms *metadata.Store,
51+
metricsBuilderConfig metadata.MetricsBuilderConfig, nodeConditionsToReport, allocatableTypesToReport []string) *DataCollector {
6052
return &DataCollector{
61-
settings: set,
62-
metricsStore: &metricsStore{
63-
metricsCache: make(map[types.UID]pmetric.Metrics),
64-
},
65-
metadataStore: &metadata.Store{},
53+
settings: set,
54+
metadataStore: ms,
6655
nodeConditionsToReport: nodeConditionsToReport,
6756
allocatableTypesToReport: allocatableTypesToReport,
68-
metricsBuilderConfig: metricsBuilderConfig,
69-
}
70-
}
71-
72-
// SetupMetadataStore initializes a metadata store for the kubernetes kind.
73-
func (dc *DataCollector) SetupMetadataStore(gvk schema.GroupVersionKind, store cache.Store) {
74-
dc.metadataStore.Setup(gvk, store)
75-
}
76-
77-
func (dc *DataCollector) RemoveFromMetricsStore(obj interface{}) {
78-
if err := dc.metricsStore.remove(obj.(runtime.Object)); err != nil {
79-
dc.settings.TelemetrySettings.Logger.Error(
80-
"failed to remove from metric cache",
81-
zap.String("obj", reflect.TypeOf(obj).String()),
82-
zap.Error(err),
83-
)
84-
}
85-
}
86-
87-
func (dc *DataCollector) UpdateMetricsStore(obj interface{}, md pmetric.Metrics) {
88-
if err := dc.metricsStore.update(obj.(runtime.Object), md); err != nil {
89-
dc.settings.TelemetrySettings.Logger.Error(
90-
"failed to update metric cache",
91-
zap.String("obj", reflect.TypeOf(obj).String()),
92-
zap.Error(err),
93-
)
57+
metricsBuilder: metadata.NewMetricsBuilder(metricsBuilderConfig, set),
9458
}
9559
}
9660

9761
func (dc *DataCollector) CollectMetricData(currentTime time.Time) pmetric.Metrics {
98-
return dc.metricsStore.getMetricData(currentTime)
99-
}
100-
101-
// SyncMetrics updates the metric store with latest metrics from the kubernetes object.
102-
func (dc *DataCollector) SyncMetrics(obj interface{}) {
103-
var md pmetric.Metrics
104-
105-
switch o := obj.(type) {
106-
case *corev1.Pod:
107-
md = pod.GetMetrics(dc.settings, dc.metricsBuilderConfig, o)
108-
case *corev1.Node:
109-
md = node.GetMetrics(dc.settings, dc.metricsBuilderConfig, o, dc.nodeConditionsToReport, dc.allocatableTypesToReport)
110-
case *corev1.Namespace:
111-
md = namespace.GetMetrics(dc.settings, dc.metricsBuilderConfig, o)
112-
case *corev1.ReplicationController:
113-
md = replicationcontroller.GetMetrics(dc.settings, dc.metricsBuilderConfig, o)
114-
case *corev1.ResourceQuota:
115-
md = resourcequota.GetMetrics(dc.settings, dc.metricsBuilderConfig, o)
116-
case *appsv1.Deployment:
117-
md = deployment.GetMetrics(dc.settings, dc.metricsBuilderConfig, o)
118-
case *appsv1.ReplicaSet:
119-
md = replicaset.GetMetrics(dc.settings, dc.metricsBuilderConfig, o)
120-
case *appsv1.DaemonSet:
121-
md = demonset.GetMetrics(dc.settings, dc.metricsBuilderConfig, o)
122-
case *appsv1.StatefulSet:
123-
md = statefulset.GetMetrics(dc.settings, dc.metricsBuilderConfig, o)
124-
case *batchv1.Job:
125-
md = jobs.GetMetrics(dc.settings, dc.metricsBuilderConfig, o)
126-
case *batchv1.CronJob:
127-
md = cronjob.GetMetrics(dc.settings, dc.metricsBuilderConfig, o)
128-
case *batchv1beta1.CronJob:
129-
md = cronjob.GetMetricsBeta(dc.settings, dc.metricsBuilderConfig, o)
130-
case *autoscalingv2.HorizontalPodAutoscaler:
131-
md = hpa.GetMetrics(dc.settings, dc.metricsBuilderConfig, o)
132-
case *autoscalingv2beta2.HorizontalPodAutoscaler:
133-
md = hpa.GetMetricsBeta(dc.settings, dc.metricsBuilderConfig, o)
134-
case *quotav1.ClusterResourceQuota:
135-
md = clusterresourcequota.GetMetrics(dc.settings, dc.metricsBuilderConfig, o)
136-
default:
137-
return
138-
}
139-
140-
if md.DataPointCount() == 0 {
141-
return
142-
}
143-
144-
dc.UpdateMetricsStore(obj, md)
145-
}
146-
147-
// SyncMetadata updates the metric store with latest metrics from the kubernetes object
148-
func (dc *DataCollector) SyncMetadata(obj interface{}) map[experimentalmetricmetadata.ResourceID]*metadata.KubernetesMetadata {
149-
km := map[experimentalmetricmetadata.ResourceID]*metadata.KubernetesMetadata{}
150-
switch o := obj.(type) {
151-
case *corev1.Pod:
152-
km = pod.GetMetadata(o, dc.metadataStore, dc.settings.TelemetrySettings.Logger)
153-
case *corev1.Node:
154-
km = node.GetMetadata(o)
155-
case *corev1.ReplicationController:
156-
km = replicationcontroller.GetMetadata(o)
157-
case *appsv1.Deployment:
158-
km = deployment.GetMetadata(o)
159-
case *appsv1.ReplicaSet:
160-
km = replicaset.GetMetadata(o)
161-
case *appsv1.DaemonSet:
162-
km = demonset.GetMetadata(o)
163-
case *appsv1.StatefulSet:
164-
km = statefulset.GetMetadata(o)
165-
case *batchv1.Job:
166-
km = jobs.GetMetadata(o)
167-
case *batchv1.CronJob:
168-
km = cronjob.GetMetadata(o)
169-
case *batchv1beta1.CronJob:
170-
km = cronjob.GetMetadataBeta(o)
171-
case *autoscalingv2.HorizontalPodAutoscaler:
172-
km = hpa.GetMetadata(o)
173-
case *autoscalingv2beta2.HorizontalPodAutoscaler:
174-
km = hpa.GetMetadataBeta(o)
175-
}
176-
177-
return km
62+
ts := pcommon.NewTimestampFromTime(currentTime)
63+
customRMs := pmetric.NewResourceMetricsSlice()
64+
65+
dc.metadataStore.ForEach(gvk.Pod, func(o any) {
66+
pod.RecordMetrics(dc.settings.Logger, dc.metricsBuilder, o.(*corev1.Pod), ts)
67+
})
68+
dc.metadataStore.ForEach(gvk.Node, func(o any) {
69+
crm := node.CustomMetrics(dc.settings, dc.metricsBuilder.NewResourceBuilder(), o.(*corev1.Node),
70+
dc.nodeConditionsToReport, dc.allocatableTypesToReport, ts)
71+
if crm.ScopeMetrics().Len() > 0 {
72+
crm.MoveTo(customRMs.AppendEmpty())
73+
}
74+
})
75+
dc.metadataStore.ForEach(gvk.Namespace, func(o any) {
76+
namespace.RecordMetrics(dc.metricsBuilder, o.(*corev1.Namespace), ts)
77+
})
78+
dc.metadataStore.ForEach(gvk.ReplicationController, func(o any) {
79+
replicationcontroller.RecordMetrics(dc.metricsBuilder, o.(*corev1.ReplicationController), ts)
80+
})
81+
dc.metadataStore.ForEach(gvk.ResourceQuota, func(o any) {
82+
resourcequota.RecordMetrics(dc.metricsBuilder, o.(*corev1.ResourceQuota), ts)
83+
})
84+
dc.metadataStore.ForEach(gvk.Deployment, func(o any) {
85+
deployment.RecordMetrics(dc.metricsBuilder, o.(*appsv1.Deployment), ts)
86+
})
87+
dc.metadataStore.ForEach(gvk.ReplicaSet, func(o any) {
88+
replicaset.RecordMetrics(dc.metricsBuilder, o.(*appsv1.ReplicaSet), ts)
89+
})
90+
dc.metadataStore.ForEach(gvk.DaemonSet, func(o any) {
91+
demonset.RecordMetrics(dc.metricsBuilder, o.(*appsv1.DaemonSet), ts)
92+
})
93+
dc.metadataStore.ForEach(gvk.StatefulSet, func(o any) {
94+
statefulset.RecordMetrics(dc.metricsBuilder, o.(*appsv1.StatefulSet), ts)
95+
})
96+
dc.metadataStore.ForEach(gvk.Job, func(o any) {
97+
jobs.RecordMetrics(dc.metricsBuilder, o.(*batchv1.Job), ts)
98+
})
99+
dc.metadataStore.ForEach(gvk.CronJob, func(o any) {
100+
cronjob.RecordMetrics(dc.metricsBuilder, o.(*batchv1.CronJob), ts)
101+
})
102+
dc.metadataStore.ForEach(gvk.CronJobBeta, func(o any) {
103+
cronjob.RecordMetricsBeta(dc.metricsBuilder, o.(*batchv1beta1.CronJob), ts)
104+
})
105+
dc.metadataStore.ForEach(gvk.HorizontalPodAutoscaler, func(o any) {
106+
hpa.RecordMetrics(dc.metricsBuilder, o.(*autoscalingv2.HorizontalPodAutoscaler), ts)
107+
})
108+
dc.metadataStore.ForEach(gvk.HorizontalPodAutoscalerBeta, func(o any) {
109+
hpa.RecordMetricsBeta(dc.metricsBuilder, o.(*autoscalingv2beta2.HorizontalPodAutoscaler), ts)
110+
})
111+
dc.metadataStore.ForEach(gvk.ClusterResourceQuota, func(o any) {
112+
clusterresourcequota.RecordMetrics(dc.metricsBuilder, o.(*quotav1.ClusterResourceQuota), ts)
113+
})
114+
115+
m := dc.metricsBuilder.Emit()
116+
customRMs.MoveAndAppendTo(m.ResourceMetrics())
117+
return m
178118
}

0 commit comments

Comments
 (0)