Skip to content

Commit cabaf8f

Browse files
jinja2zeck-ops
authored andcommitted
[receiver/k8scluster] add attributes to node and pod entities (open-telemetry#36862)
#### Description Adds below additional metadata attributes to the node and pod entity - k8s.pod.phase - This is similar to the existing metric k8s.pod.phase. The values can be Pending, Running, Succeeded, Failed, Unknown. k8s.pod.status_reason - Similar to k8s.pod.status_reason metric. A brief CamelCase message indicating details about why the pod is in this state. Example values - Evicted, NodeLost, UnexpectedAdmissionError k8s.node.condition_{type} - similar to existing metrics enabled by the config node_conditions_to_report, e.g. k8s.node.condition_ready. Add [k8s default kubelet conditions](https://pkg.go.dev/k8s.io/api/core/v1#NodeConditionType) only. We'll be tracking changes to entitiy attributes for k8s in [this](open-telemetry/semantic-conventions#1693) SemConv issue. #### Link to tracking issue Fixes - open-telemetry#36859 #### Testing Added unit tests and verified in cluster
1 parent 3df9356 commit cabaf8f

File tree

6 files changed

+268
-18
lines changed

6 files changed

+268
-18
lines changed

.chloggen/add_k8scluster_attr.yaml

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
# Use this changelog template to create an entry for release notes.
2+
3+
# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix'
4+
change_type: enhancement
5+
6+
# The name of the component, or a single word describing the area of concern, (e.g. filelogreceiver)
7+
component: k8sclusterreceiver
8+
9+
# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`).
10+
note: Add additional attributes to node and pod entities
11+
12+
# Mandatory: One or more tracking issues related to the change. You can use the PR number here if no issue exists.
13+
issues: [35879]
14+
15+
# (Optional) One or more lines of additional information to render under the primary note.
16+
# These lines will be padded with 2 spaces and then inserted directly into the document.
17+
# Use pipe (|) for multiline entries.
18+
subtext: |
19+
Adds the following attributes to node and pod metadata/entities:
20+
- `k8s.pod.phase`: The phase of a Pod indicates where the Pod is in its lifecycle. E.g. 'Pending', 'Running'
21+
- `k8s.pod.status_reason`: A brief message indicating details about why the pod is in this state. E.g. 'Evicted'
22+
- `k8s.node.condition_*`: The condition of a node. e.g. `k8s.node.condition_ready`. The value can be `true`, `false`, `unknown`.
23+
24+
# If your change doesn't affect end users or the exported elements of any package,
25+
# you should instead start your pull request title with [chore] or use the "Skip Changelog" label.
26+
# Optional: The change log or logs in which this entry should be included.
27+
# e.g. '[user]' or '[user, api]'
28+
# Include 'user' if the change is relevant to end users.
29+
# Include 'api' if there is a change to a library API.
30+
# Default: '[user]'
31+
change_logs: [user]

receiver/k8sclusterreceiver/internal/node/nodes.go

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,9 @@ import (
2323
)
2424

2525
const (
26-
// Keys for node metadata.
27-
nodeCreationTime = "node.creation_timestamp"
26+
// Keys for node metadata and entity attributes. These are NOT used by resource attributes.
27+
nodeCreationTime = "node.creation_timestamp"
28+
k8sNodeConditionPrefix = "k8s.node.condition"
2829
)
2930

3031
// Transform transforms the node to remove the fields that we don't use to reduce RAM utilization.
@@ -151,6 +152,24 @@ func GetMetadata(node *corev1.Node) map[experimentalmetricmetadata.ResourceID]*m
151152
meta[conventions.AttributeK8SNodeName] = node.Name
152153
meta[nodeCreationTime] = node.GetCreationTimestamp().Format(time.RFC3339)
153154

155+
// Node can have many additional conditions (gke has 18 on v1.29). Bad thresholds/implementations
156+
// of custom conditions can cause value to oscillate between true/false frequently. So, only sending the node
157+
// pressure conditions that are set by kubelet to avoid noise.
158+
// https://pkg.go.dev/k8s.io/api/core/v1#NodeConditionType
159+
kubeletConditions := map[corev1.NodeConditionType]struct{}{
160+
corev1.NodeReady: {},
161+
corev1.NodeMemoryPressure: {},
162+
corev1.NodeDiskPressure: {},
163+
corev1.NodePIDPressure: {},
164+
corev1.NodeNetworkUnavailable: {},
165+
}
166+
167+
for _, c := range node.Status.Conditions {
168+
if _, ok := kubeletConditions[c.Type]; ok {
169+
meta[fmt.Sprintf("%s_%s", k8sNodeConditionPrefix, strcase.ToSnake(string(c.Type)))] = strings.ToLower(string(c.Status))
170+
}
171+
}
172+
154173
nodeID := experimentalmetricmetadata.ResourceID(node.UID)
155174
return map[experimentalmetricmetadata.ResourceID]*metadata.KubernetesMetadata{
156175
nodeID: {

receiver/k8sclusterreceiver/internal/node/nodes_test.go

Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,10 +13,12 @@ import (
1313
"go.opentelemetry.io/collector/pdata/pcommon"
1414
"go.opentelemetry.io/collector/pdata/pmetric"
1515
"go.opentelemetry.io/collector/receiver/receivertest"
16+
conventions "go.opentelemetry.io/collector/semconv/v1.18.0"
1617
corev1 "k8s.io/api/core/v1"
1718
"k8s.io/apimachinery/pkg/api/resource"
1819
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
1920

21+
"github.com/open-telemetry/opentelemetry-collector-contrib/pkg/experimentalmetricmetadata"
2022
"github.com/open-telemetry/opentelemetry-collector-contrib/pkg/golden"
2123
"github.com/open-telemetry/opentelemetry-collector-contrib/pkg/pdatatest/pmetrictest"
2224
"github.com/open-telemetry/opentelemetry-collector-contrib/receiver/k8sclusterreceiver/internal/metadata"
@@ -263,3 +265,111 @@ func TestTransform(t *testing.T) {
263265
}
264266
assert.Equal(t, wantNode, Transform(originalNode))
265267
}
268+
269+
func TestNodeMetadata(t *testing.T) {
270+
creationTimestamp := time.Now()
271+
node := &corev1.Node{
272+
ObjectMeta: metav1.ObjectMeta{
273+
Name: "test-node",
274+
UID: "test-node-uid",
275+
Labels: map[string]string{"env": "production"},
276+
CreationTimestamp: metav1.Time{Time: creationTimestamp},
277+
},
278+
Status: corev1.NodeStatus{
279+
Conditions: []corev1.NodeCondition{
280+
{
281+
Type: "FrequentUnregisterNetDevice",
282+
Status: "False",
283+
LastHeartbeatTime: metav1.Time{
284+
Time: time.Now(),
285+
},
286+
LastTransitionTime: metav1.Time{
287+
Time: time.Now(),
288+
},
289+
Message: "node is functioning properly",
290+
Reason: "NoFrequentUnregisterNetDevice",
291+
},
292+
{
293+
Type: "MemoryPressure",
294+
Status: "False",
295+
LastHeartbeatTime: metav1.Time{
296+
Time: time.Now(),
297+
},
298+
LastTransitionTime: metav1.Time{
299+
Time: time.Now(),
300+
},
301+
Reason: "KubeletHasSufficientMemory",
302+
Message: "kubelet has sufficient memory available",
303+
},
304+
{
305+
Type: "DiskPressure",
306+
Status: "False",
307+
LastHeartbeatTime: metav1.Time{
308+
Time: time.Now(),
309+
},
310+
LastTransitionTime: metav1.Time{
311+
Time: time.Now(),
312+
},
313+
Reason: "KubeletHasNoDiskPressure",
314+
Message: "kubelet has no disk pressure",
315+
},
316+
{
317+
Type: "PIDPressure",
318+
Status: "False",
319+
LastHeartbeatTime: metav1.Time{
320+
Time: time.Now(),
321+
},
322+
LastTransitionTime: metav1.Time{
323+
Time: time.Now(),
324+
},
325+
Reason: "KubeletHasSufficientPID",
326+
Message: "kubelet has sufficient PID available",
327+
},
328+
{
329+
Type: "Ready",
330+
Status: "True",
331+
LastHeartbeatTime: metav1.Time{
332+
Time: time.Now(),
333+
},
334+
LastTransitionTime: metav1.Time{
335+
Time: time.Now(),
336+
},
337+
Reason: "KubeletReady",
338+
Message: "kubelet is posting ready status",
339+
},
340+
},
341+
NodeInfo: corev1.NodeSystemInfo{
342+
MachineID: "70ebe86154de42bda73a4ffe181afa3d",
343+
SystemUUID: "70ebe86154de42bda73a4ffe181afa3d",
344+
BootID: "541c31d4-d1e2-4660-a3b2-484abbb1cbce",
345+
KernelVersion: "6.10.4-linuxkit",
346+
OSImage: "Debian GNU/Linux 12 (bookworm)",
347+
ContainerRuntimeVersion: "containerd://1.7.15",
348+
KubeletVersion: "v1.30.0",
349+
OperatingSystem: "linux",
350+
Architecture: "amd64",
351+
},
352+
},
353+
}
354+
355+
expectedMeta := map[experimentalmetricmetadata.ResourceID]*metadata.KubernetesMetadata{
356+
experimentalmetricmetadata.ResourceID("test-node-uid"): {
357+
EntityType: "k8s.node",
358+
ResourceIDKey: "k8s.node.uid",
359+
ResourceID: experimentalmetricmetadata.ResourceID("test-node-uid"),
360+
Metadata: map[string]string{
361+
"env": "production",
362+
conventions.AttributeK8SNodeName: "test-node",
363+
"k8s.node.condition_ready": "true",
364+
"k8s.node.condition_memory_pressure": "false",
365+
"k8s.node.condition_disk_pressure": "false",
366+
"k8s.node.condition_pid_pressure": "false",
367+
"node.creation_timestamp": creationTimestamp.Format(time.RFC3339),
368+
},
369+
},
370+
}
371+
372+
actualMeta := GetMetadata(node)
373+
require.NotNil(t, actualMeta)
374+
require.Equal(t, expectedMeta, actualMeta)
375+
}

receiver/k8sclusterreceiver/internal/pod/pods.go

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,10 @@ import (
2828
)
2929

3030
const (
31-
// Keys for pod metadata.
31+
// Keys for pod metadata and entity attributes. These are NOT used by resource attributes.
3232
podCreationTime = "pod.creation_timestamp"
33+
podPhase = "k8s.pod.phase"
34+
podStatusReason = "k8s.pod.status_reason"
3335
)
3436

3537
// Transform transforms the pod to remove the fields that we don't use to reduce RAM utilization.
@@ -43,6 +45,7 @@ func Transform(pod *corev1.Pod) *corev1.Pod {
4345
Status: corev1.PodStatus{
4446
Phase: pod.Status.Phase,
4547
QOSClass: pod.Status.QOSClass,
48+
Reason: pod.Status.Reason,
4649
},
4750
}
4851
for _, cs := range pod.Status.ContainerStatuses {
@@ -126,6 +129,15 @@ func GetMetadata(pod *corev1.Pod, mc *metadata.Store, logger *zap.Logger) map[ex
126129
meta := maps.MergeStringMaps(map[string]string{}, pod.Labels)
127130

128131
meta[podCreationTime] = pod.CreationTimestamp.Format(time.RFC3339)
132+
phase := pod.Status.Phase
133+
if phase == "" {
134+
phase = corev1.PodUnknown
135+
}
136+
meta[podPhase] = string(phase)
137+
reason := pod.Status.Reason
138+
if reason != "" {
139+
meta[podStatusReason] = reason
140+
}
129141

130142
for _, or := range pod.OwnerReferences {
131143
kind := strings.ToLower(or.Kind)

receiver/k8sclusterreceiver/internal/pod/pods_test.go

Lines changed: 73 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -247,8 +247,9 @@ func expectedKubernetesMetadata(to testCaseOptions) map[experimentalmetricmetada
247247
ResourceIDKey: "k8s.pod.uid",
248248
ResourceID: experimentalmetricmetadata.ResourceID(podUIDLabel),
249249
Metadata: map[string]string{
250-
kindNameLabel: kindObjName,
251-
kindUIDLabel: kindObjUID,
250+
kindNameLabel: kindObjName,
251+
kindUIDLabel: kindObjUID,
252+
"k8s.pod.phase": "Unknown", // Default value when phase is not set.
252253
},
253254
},
254255
}
@@ -415,6 +416,7 @@ func TestTransform(t *testing.T) {
415416
},
416417
Status: corev1.PodStatus{
417418
Phase: corev1.PodRunning,
419+
Reason: "Evicted",
418420
HostIP: "192.168.1.100",
419421
PodIP: "10.244.0.5",
420422
StartTime: &v1.Time{Time: v1.Now().Add(-5 * time.Minute)},
@@ -463,7 +465,8 @@ func TestTransform(t *testing.T) {
463465
},
464466
},
465467
Status: corev1.PodStatus{
466-
Phase: corev1.PodRunning,
468+
Phase: corev1.PodRunning,
469+
Reason: "Evicted",
467470
ContainerStatuses: []corev1.ContainerStatus{
468471
{
469472
Name: "my-container",
@@ -478,3 +481,70 @@ func TestTransform(t *testing.T) {
478481
}
479482
assert.Equal(t, wantPod, Transform(originalPod))
480483
}
484+
485+
func TestPodMetadata(t *testing.T) {
486+
tests := []struct {
487+
name string
488+
statusPhase corev1.PodPhase
489+
statusReason string
490+
expectedMetadata map[string]string
491+
}{
492+
{
493+
name: "Pod with status reason",
494+
statusPhase: corev1.PodFailed,
495+
statusReason: "Evicted",
496+
expectedMetadata: map[string]string{
497+
"k8s.pod.phase": "Failed",
498+
"k8s.pod.status_reason": "Evicted",
499+
"k8s.workload.kind": "Deployment",
500+
"k8s.workload.name": "test-deployment-0",
501+
"k8s.replicaset.name": "test-replicaset-0",
502+
"k8s.replicaset.uid": "test-replicaset-0-uid",
503+
"k8s.deployment.name": "test-deployment-0",
504+
"k8s.deployment.uid": "test-deployment-0-uid",
505+
},
506+
},
507+
{
508+
name: "Pod without status reason",
509+
statusPhase: corev1.PodRunning,
510+
statusReason: "",
511+
expectedMetadata: map[string]string{
512+
"k8s.pod.phase": "Running",
513+
"k8s.workload.kind": "Deployment",
514+
"k8s.workload.name": "test-deployment-0",
515+
"k8s.replicaset.name": "test-replicaset-0",
516+
"k8s.replicaset.uid": "test-replicaset-0-uid",
517+
"k8s.deployment.name": "test-deployment-0",
518+
"k8s.deployment.uid": "test-deployment-0-uid",
519+
},
520+
},
521+
}
522+
523+
for _, tt := range tests {
524+
t.Run(tt.name, func(t *testing.T) {
525+
pod := podWithOwnerReference("ReplicaSet")
526+
pod.Status.Phase = tt.statusPhase
527+
pod.Status.Reason = tt.statusReason
528+
529+
metadataStore := mockMetadataStore(testCaseOptions{
530+
kind: "ReplicaSet",
531+
withParentOR: true,
532+
})
533+
logger := zap.NewNop()
534+
meta := GetMetadata(pod, metadataStore, logger)
535+
536+
require.NotNil(t, meta)
537+
require.Contains(t, meta, experimentalmetricmetadata.ResourceID("test-pod-0-uid"))
538+
podMeta := meta["test-pod-0-uid"].Metadata
539+
540+
allExpectedMetadata := make(map[string]string)
541+
for key, value := range commonPodMetadata {
542+
allExpectedMetadata[key] = value
543+
}
544+
for key, value := range tt.expectedMetadata {
545+
allExpectedMetadata[key] = value
546+
}
547+
assert.Equal(t, allExpectedMetadata, podMeta)
548+
})
549+
}
550+
}

0 commit comments

Comments
 (0)