Skip to content

istio k8s discovery #5854

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 27 commits into from
Mar 31, 2025
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
1cfa1b3
Spike: test istio k8s discovery
atoulme Feb 1, 2025
bd7e504
add more metrics
atoulme Mar 19, 2025
84d9177
Merge branch 'main' into istio
jinja2 Mar 20, 2025
636d324
Spike: test istio k8s discovery
atoulme Feb 1, 2025
b7e5042
add more metrics
atoulme Mar 19, 2025
08086a3
update discover rules and config
jinja2 Mar 26, 2025
771baee
Merge branch 'istio' of github.com:signalfx/splunk-otel-collector int…
jinja2 Mar 26, 2025
e95f3d7
Merge branch 'main' into istio
jinja2 Mar 26, 2025
125f8ae
add discovery test for istio
jinja2 Mar 27, 2025
60f06ab
Merge branch 'main' into istio
jinja2 Mar 27, 2025
6ab6cde
changelog
jinja2 Mar 27, 2025
d6661ef
Merge branch 'main' into istio
jinja2 Mar 27, 2025
1cecbd0
fix test
jinja2 Mar 27, 2025
029c9aa
fix lint
jinja2 Mar 27, 2025
08ef05e
add doc
jinja2 Mar 27, 2025
4b9565f
Merge branch 'main' into istio
jinja2 Mar 27, 2025
350050c
fix test
jinja2 Mar 27, 2025
99bdbea
Merge branch 'main' into istio
jinja2 Mar 27, 2025
fdd4157
multiline the config and check more attrs
jinja2 Mar 28, 2025
77dc0d6
Merge branch 'main' into istio
jinja2 Mar 28, 2025
7172616
add metrics to keep
jinja2 Mar 28, 2025
5de7670
update metric for envoy to a more common one
jinja2 Mar 28, 2025
96733df
Merge branch 'main' into istio
jinja2 Mar 28, 2025
5f9b0c8
Merge branch 'main' into istio
jinja2 Mar 31, 2025
701b3ac
disable prometheus/istio receiver
jinja2 Mar 31, 2025
d75f696
Merge branch 'main' into istio
jinja2 Mar 31, 2025
c19856e
update changelog
jinja2 Mar 31, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions .github/workflows/integration-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -404,7 +404,7 @@ jobs:
id: get-matrix-k8s
run: |
includes=""
for service in "envoy"; do
for service in "envoy" "istio"; do
for arch in "amd64"; do
includes="${includes},{\"SERVICE\": \"${service}\", \"ARCH\": \"${arch}\"}"
done
Expand Down Expand Up @@ -432,7 +432,9 @@ jobs:
cluster_name: kind
- name: Deploy service under test
run: |
kubectl apply -f k8s/${{ matrix.SERVICE }}/*.yaml
for f in k8s/${{ matrix.SERVICE }}/*.sh; do
bash "$f"
done
- uses: actions/setup-go@v5
with:
go-version: ${{ env.GO_VERSION }}
Expand Down
4 changes: 4 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,10 @@ smartagent-integration-test:
integration-test-envoy-discovery-k8s:
@set -e; cd tests && $(GOTEST_SERIAL) $(BUILD_INFO_TESTS) --tags=discovery_integration_envoy_k8s -v -timeout 5m -count 1 ./...

.PHONY: integration-test-istio-discovery-k8s
integration-test-istio-discovery-k8s:
@set -e; cd tests && $(GOTEST_SERIAL) $(BUILD_INFO_TESTS) --tags=discovery_integration_istio_k8s -v -timeout 5m -count 1 ./...

.PHONY: test-with-cover
test-with-cover:
@echo Verifying that all packages have test files to count in coverage
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
#####################################################################################
# This file is generated by the Splunk Distribution of the OpenTelemetry Collector. #
# #
# It reflects the default configuration bundled in the Collector executable for use #
# in discovery mode (--discovery) and is provided for reference or customization. #
# Please note that any changes made to this file will need to be reconciled during #
# upgrades of the Collector. #
#####################################################################################
# prometheus/istio:
# enabled: true
# rule:
# k8s_observer: type == "pod" and ("istio.io/rev" in annotations or name matches "istio.*")
# config:
# default:
# config:
# scrape_configs:
# - job_name: 'istio'
# metrics_path: '`"prometheus.io/path" in annotations ? annotations["prometheus.io/path"] : "/metrics"`'
# scrape_interval: 10s
# static_configs:
# - targets: ['`endpoint`:`"prometheus.io/port" in annotations ? annotations["prometheus.io/port"] : 15090`']
# metric_relabel_configs:
# - source_labels: [__name__]
# action: keep
# regex: (envoy_cluster_lb_healthy_panic|envoy_cluster_manager_warming_clusters|envoy_cluster_membership_healthy|envoy_cluster_membership_total|envoy_cluster_ssl_handshake|envoy_cluster_ssl_session_reused|envoy_cluster_ssl_versions_TLSv1_2|envoy_cluster_upstream_cx_active|envoy_cluster_upstream_cx_close_notify|envoy_cluster_upstream_cx_connect_attempts_exceeded|envoy_cluster_upstream_cx_connect_ms_sum|envoy_cluster_upstream_cx_connect_timeout|envoy_cluster_upstream_cx_destroy_local_with_active_rq|envoy_cluster_upstream_cx_http1_total|envoy_cluster_upstream_cx_http2_total|envoy_cluster_upstream_cx_idle_timeout|envoy_cluster_upstream_cx_max_requests|envoy_cluster_upstream_cx_none_healthy|envoy_cluster_upstream_cx_pool_overflow|envoy_cluster_upstream_cx_protocol_error|envoy_cluster_upstream_cx_total|envoy_cluster_upstream_rq_4xx|envoy_cluster_upstream_rq_5xx|envoy_cluster_upstream_rq_active|envoy_cluster_upstream_rq_cancelled|envoy_cluster_upstream_rq_completed|envoy_cluster_upstream_rq_pending_active|envoy_cluster_upstream_rq_retry|envoy_cluster_upstream_rq_retry_limit_exceeded|envoy_cluster_upstream_rq_timeout|envoy_cluster_upstream_rq_tx_reset|envoy_cluster_upstream_rq_time|envoy_cluster_upstream_rq_xx|envoy_listener_downstream_cx_total|envoy_listener_ssl_versions_TLSv1_3|envoy_server_live|envoy_server_memory_allocated|envoy_server_memory_heap_size|envoy_server_total_connections|envoy_server_uptime|istio_mesh_connections_from_logs|istio_monitor_pods_without_sidecars|istio_request_bytes|istio_request_duration_milliseconds|istio_request_messages_total|istio_requests_total|istio_response_messages_total|istio_tcp_connections_closed_total|istio_tcp_connections_opened_total|istio_tcp_received_bytes_total|istio_tcp_response_bytes_total|pilot_conflict_inbound_listener|pilot_eds_no_instances|pilot_k8s_cfg_events|pilot_k8s_endpoints_pending_pod|pilot_k8s_endpoints_with_no_pods|pilot_no_ip|pilot_proxy_convergence_time|pilot_proxy_queue_time|pilot_services|pilot_xds_cds_reject|pilot_xds_eds_reject|pilot_xds_expired_nonce|pilot_xds_lds_reject|pilot_xds_push_context_errors|pilot_xds_push_time|pilot_xds_rds_reject|pilot_xds_send_time|pilot_xds_write_timeout)(?:_sum|_count|_bucket)?
# - action: labeldrop
# regex: source_cluster|destination_cluster
# status:
# metrics:
# - status: successful
# strict: envoy_cluster_upstream_cx_active
# message: istio prometheus receiver is working for envoy!
# - status: successful
# strict: pilot_k8s_cfg_events
# message: istio prometheus receiver is working for istiod!
# statements:
# - status: failed
# regexp: "connection refused"
# message: The container is not serving http connections.
# - status: failed
# regexp: "dial tcp: lookup"
# message: Unable to resolve istio prometheus tcp endpoint
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
#####################################################################################
# Do not edit manually! #
# All changes must be made to associated .tmpl file before running 'make bundle.d'. #
#####################################################################################
prometheus/istio:
enabled: true
rule:
k8s_observer: type == "pod" and ("istio.io/rev" in annotations or name matches "istio.*")
config:
default:
config:
scrape_configs:
- job_name: 'istio'
metrics_path: '`"prometheus.io/path" in annotations ? annotations["prometheus.io/path"] : "/metrics"`'
scrape_interval: 10s
static_configs:
- targets: ['`endpoint`:`"prometheus.io/port" in annotations ? annotations["prometheus.io/port"] : 15090`']
metric_relabel_configs:
- source_labels: [__name__]
action: keep
regex: (envoy_cluster_lb_healthy_panic|envoy_cluster_manager_warming_clusters|envoy_cluster_membership_healthy|envoy_cluster_membership_total|envoy_cluster_ssl_handshake|envoy_cluster_ssl_session_reused|envoy_cluster_ssl_versions_TLSv1_2|envoy_cluster_upstream_cx_active|envoy_cluster_upstream_cx_close_notify|envoy_cluster_upstream_cx_connect_attempts_exceeded|envoy_cluster_upstream_cx_connect_ms_sum|envoy_cluster_upstream_cx_connect_timeout|envoy_cluster_upstream_cx_destroy_local_with_active_rq|envoy_cluster_upstream_cx_http1_total|envoy_cluster_upstream_cx_http2_total|envoy_cluster_upstream_cx_idle_timeout|envoy_cluster_upstream_cx_max_requests|envoy_cluster_upstream_cx_none_healthy|envoy_cluster_upstream_cx_pool_overflow|envoy_cluster_upstream_cx_protocol_error|envoy_cluster_upstream_cx_total|envoy_cluster_upstream_rq_4xx|envoy_cluster_upstream_rq_5xx|envoy_cluster_upstream_rq_active|envoy_cluster_upstream_rq_cancelled|envoy_cluster_upstream_rq_completed|envoy_cluster_upstream_rq_pending_active|envoy_cluster_upstream_rq_retry|envoy_cluster_upstream_rq_retry_limit_exceeded|envoy_cluster_upstream_rq_timeout|envoy_cluster_upstream_rq_tx_reset|envoy_cluster_upstream_rq_time|envoy_cluster_upstream_rq_xx|envoy_listener_downstream_cx_total|envoy_listener_ssl_versions_TLSv1_3|envoy_server_live|envoy_server_memory_allocated|envoy_server_memory_heap_size|envoy_server_total_connections|envoy_server_uptime|istio_mesh_connections_from_logs|istio_monitor_pods_without_sidecars|istio_request_bytes|istio_request_duration_milliseconds|istio_request_messages_total|istio_requests_total|istio_response_messages_total|istio_tcp_connections_closed_total|istio_tcp_connections_opened_total|istio_tcp_received_bytes_total|istio_tcp_response_bytes_total|pilot_conflict_inbound_listener|pilot_eds_no_instances|pilot_k8s_cfg_events|pilot_k8s_endpoints_pending_pod|pilot_k8s_endpoints_with_no_pods|pilot_no_ip|pilot_proxy_convergence_time|pilot_proxy_queue_time|pilot_services|pilot_xds_cds_reject|pilot_xds_eds_reject|pilot_xds_expired_nonce|pilot_xds_lds_reject|pilot_xds_push_context_errors|pilot_xds_push_time|pilot_xds_rds_reject|pilot_xds_send_time|pilot_xds_write_timeout)(?:_sum|_count|_bucket)?
- action: labeldrop
regex: source_cluster|destination_cluster
status:
metrics:
- status: successful
strict: envoy_cluster_upstream_cx_active
message: istio prometheus receiver is working for envoy!
- status: successful
strict: pilot_k8s_cfg_events
message: istio prometheus receiver is working for istiod!
statements:
- status: failed
regexp: "connection refused"
message: The container is not serving http connections.
- status: failed
regexp: "dial tcp: lookup"
message: Unable to resolve istio prometheus tcp endpoint
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
{{ receiver "prometheus/istio" }}:
enabled: true
rule:
k8s_observer: type == "pod" and ("istio.io/rev" in annotations or name matches "istio.*")
config:
default:
config:
scrape_configs:
- job_name: 'istio'
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This shows up as service.name in the final metrics. Any feedback on a better name is welcome.

metrics_path: '`"prometheus.io/path" in annotations ? annotations["prometheus.io/path"] : "/metrics"`'
scrape_interval: 10s
static_configs:
- targets: ['`endpoint`:`"prometheus.io/port" in annotations ? annotations["prometheus.io/port"] : 15090`']
metric_relabel_configs:
- source_labels: [__name__]
action: keep
regex: (envoy_cluster_lb_healthy_panic|envoy_cluster_manager_warming_clusters|envoy_cluster_membership_healthy|envoy_cluster_membership_total|envoy_cluster_ssl_handshake|envoy_cluster_ssl_session_reused|envoy_cluster_ssl_versions_TLSv1_2|envoy_cluster_upstream_cx_active|envoy_cluster_upstream_cx_close_notify|envoy_cluster_upstream_cx_connect_attempts_exceeded|envoy_cluster_upstream_cx_connect_ms_sum|envoy_cluster_upstream_cx_connect_timeout|envoy_cluster_upstream_cx_destroy_local_with_active_rq|envoy_cluster_upstream_cx_http1_total|envoy_cluster_upstream_cx_http2_total|envoy_cluster_upstream_cx_idle_timeout|envoy_cluster_upstream_cx_max_requests|envoy_cluster_upstream_cx_none_healthy|envoy_cluster_upstream_cx_pool_overflow|envoy_cluster_upstream_cx_protocol_error|envoy_cluster_upstream_cx_total|envoy_cluster_upstream_rq_4xx|envoy_cluster_upstream_rq_5xx|envoy_cluster_upstream_rq_active|envoy_cluster_upstream_rq_cancelled|envoy_cluster_upstream_rq_completed|envoy_cluster_upstream_rq_pending_active|envoy_cluster_upstream_rq_retry|envoy_cluster_upstream_rq_retry_limit_exceeded|envoy_cluster_upstream_rq_timeout|envoy_cluster_upstream_rq_tx_reset|envoy_cluster_upstream_rq_time|envoy_cluster_upstream_rq_xx|envoy_listener_downstream_cx_total|envoy_listener_ssl_versions_TLSv1_3|envoy_server_live|envoy_server_memory_allocated|envoy_server_memory_heap_size|envoy_server_total_connections|envoy_server_uptime|istio_mesh_connections_from_logs|istio_monitor_pods_without_sidecars|istio_request_bytes|istio_request_duration_milliseconds|istio_request_messages_total|istio_requests_total|istio_response_messages_total|istio_tcp_connections_closed_total|istio_tcp_connections_opened_total|istio_tcp_received_bytes_total|istio_tcp_response_bytes_total|pilot_conflict_inbound_listener|pilot_eds_no_instances|pilot_k8s_cfg_events|pilot_k8s_endpoints_pending_pod|pilot_k8s_endpoints_with_no_pods|pilot_no_ip|pilot_proxy_convergence_time|pilot_proxy_queue_time|pilot_services|pilot_xds_cds_reject|pilot_xds_eds_reject|pilot_xds_expired_nonce|pilot_xds_lds_reject|pilot_xds_push_context_errors|pilot_xds_push_time|pilot_xds_rds_reject|pilot_xds_send_time|pilot_xds_write_timeout)(?:_sum|_count|_bucket)?
- action: labeldrop
regex: source_cluster|destination_cluster
status:
metrics:
- status: successful
strict: envoy_cluster_upstream_cx_active
message: istio prometheus receiver is working for envoy!
- status: successful
strict: pilot_k8s_cfg_events
message: istio prometheus receiver is working for istiod!
statements:
- status: failed
regexp: "connection refused"
message: The container is not serving http connections.
- status: failed
regexp: "dial tcp: lookup"
message: Unable to resolve istio prometheus tcp endpoint
2 changes: 2 additions & 0 deletions internal/confmapprovider/discovery/bundle/bundle_gen.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@
//go:generate discoverybundler --render --commented --dir ../../../../cmd/otelcol/config/collector/config.d.linux/receivers -t bundle.d/receivers/apache.discovery.yaml.tmpl
//go:generate discoverybundler --render --template bundle.d/receivers/envoy.discovery.yaml.tmpl
//go:generate discoverybundler --render --commented --dir ../../../../cmd/otelcol/config/collector/config.d.linux/receivers -t bundle.d/receivers/envoy.discovery.yaml.tmpl
//go:generate discoverybundler --render --template bundle.d/receivers/istio.discovery.yaml.tmpl
//go:generate discoverybundler --render --commented --dir ../../../../cmd/otelcol/config/collector/config.d.linux/receivers -t bundle.d/receivers/istio.discovery.yaml.tmpl
//go:generate discoverybundler --render --template bundle.d/receivers/jmx-cassandra.discovery.yaml.tmpl
//go:generate discoverybundler --render --commented --dir ../../../../cmd/otelcol/config/collector/config.d.linux/receivers -t bundle.d/receivers/jmx-cassandra.discovery.yaml.tmpl
//go:generate discoverybundler --render --template bundle.d/receivers/kafkametrics.discovery.yaml.tmpl
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ func TestBundleDir(t *testing.T) {
require.Equal(t, []string{
"bundle.d/receivers/apache.discovery.yaml",
"bundle.d/receivers/envoy.discovery.yaml",
"bundle.d/receivers/istio.discovery.yaml",
"bundle.d/receivers/jmx-cassandra.discovery.yaml",
"bundle.d/receivers/kafkametrics.discovery.yaml",
"bundle.d/receivers/mongodb.discovery.yaml",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ import (
//go:embed bundle.d/extensions/k8s-observer.discovery.yaml
//go:embed bundle.d/receivers/apache.discovery.yaml
//go:embed bundle.d/receivers/envoy.discovery.yaml
//go:embed bundle.d/receivers/istio.discovery.yaml
//go:embed bundle.d/receivers/jmx-cassandra.discovery.yaml
//go:embed bundle.d/receivers/kafkametrics.discovery.yaml
//go:embed bundle.d/receivers/mongodb.discovery.yaml
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ import (
//go:embed bundle.d/extensions/k8s-observer.discovery.yaml
//go:embed bundle.d/receivers/apache.discovery.yaml
//go:embed bundle.d/receivers/envoy.discovery.yaml
//go:embed bundle.d/receivers/istio.discovery.yaml
//go:embed bundle.d/receivers/jmx-cassandra.discovery.yaml
//go:embed bundle.d/receivers/kafkametrics.discovery.yaml
//go:embed bundle.d/receivers/mongodb.discovery.yaml
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ func TestBundleDir(t *testing.T) {
require.Equal(t, []string{
"bundle.d/receivers/apache.discovery.yaml",
"bundle.d/receivers/envoy.discovery.yaml",
"bundle.d/receivers/istio.discovery.yaml",
"bundle.d/receivers/jmx-cassandra.discovery.yaml",
"bundle.d/receivers/kafkametrics.discovery.yaml",
"bundle.d/receivers/mongodb.discovery.yaml",
Expand Down
2 changes: 2 additions & 0 deletions internal/confmapprovider/discovery/bundle/components.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ var (
receivers = []string{
"apache",
"envoy",
"istio",
"jmx-cassandra",
"kafkametrics",
"mongodb",
Expand Down Expand Up @@ -68,6 +69,7 @@ var (
windows := map[string]struct{}{
"apache": {},
"envoy": {},
"istio": {},
"jmx-cassandra": {},
"kafkametrics": {},
"mongodb": {},
Expand Down
13 changes: 13 additions & 0 deletions internal/receiver/discoveryreceiver/endpoint_tracker_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -509,6 +509,19 @@ var (
KubeletEndpointPort: 1,
},
}

podContainerEndpoint = observer.Endpoint{
ID: observer.EndpointID("pod.container.id"),
Target: "pod.container.target",
Details: &observer.PodContainer{
Name: "k8s.node.name",
Image: "redis:123",
ContainerID: "123",
Pod: observer.Pod{
Name: "redis",
},
},
}
)

func expectedPLogs() plog.Logs {
Expand Down
2 changes: 1 addition & 1 deletion internal/receiver/discoveryreceiver/rule.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ func (r Rule) MarshalText() (text []byte, err error) {

// ruleRe is used to verify the rule starts type check.
var ruleRe = regexp.MustCompile(
fmt.Sprintf(`^type\s*==\s*(%q|%q|%q|%q|%q|%q)`, observer.PodType, observer.K8sServiceType, observer.PortType, observer.HostPortType, observer.ContainerType, observer.K8sNodeType),
fmt.Sprintf(`^type\s*==\s*(%q|%q|%q|%q|%q|%q|%q)`, observer.PodType, observer.K8sServiceType, observer.PortType, observer.HostPortType, observer.ContainerType, observer.K8sNodeType, observer.PodContainerType),
)

// newRule creates a new rule instance.
Expand Down
6 changes: 6 additions & 0 deletions internal/receiver/discoveryreceiver/rule_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,12 @@ func TestRuleEval(t *testing.T) {
want: true,
wantErr: false,
},
{
name: "basic pod.container",
args: args{podContainerEndpoint, `type == "pod.container" && container_image matches "redis"`},
want: true,
wantErr: false,
},
{
name: "relocated type builtin",
args: args{k8sNodeEndpoint, `type == "k8s.node" && typeOf("some string") == "string"`},
Expand Down
3 changes: 3 additions & 0 deletions k8s/envoy/envoy.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
#!/bin/bash
SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
kubectl apply -f $SCRIPT_DIR/envoy.yaml
12 changes: 12 additions & 0 deletions k8s/istio/istio.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
#!/bin/bash

# These steps install istio, with a demo profile, set up the default namespace to inject sidecars, CRDs, and the sample application under test.
# See https://istio.io/latest/docs/setup/getting-started/#bookinfo for more info.

curl -L https://istio.io/downloadIstio | sh -
wget https://raw.githubusercontent.com/istio/istio/release-1.24/samples/bookinfo/demo-profile-no-gateways.yaml
$( find . -name istio-*)/bin/istioctl install -f demo-profile-no-gateways.yaml -y
kubectl label namespace default istio-injection=enabled
kubectl get crd gateways.gateway.networking.k8s.io &> /dev/null || \
{ kubectl kustomize "github.com/kubernetes-sigs/gateway-api/config/crd?ref=v1.2.0" | kubectl apply -f -; }
kubectl apply -f https://raw.githubusercontent.com/istio/istio/release-1.24/samples/bookinfo/platform/kube/bookinfo.yaml
Loading
Loading