[processor/tailsampling] Add metric for sampled/not sampled spans (#30485)

ArthurSens · web-flow · commit 30eda261349c · 2024-02-16T15:52:25.000+01:00
**Description:** <Describe what has changed.> Add metrics to measure sampled/not sampled spans. **Link to tracking Issue:** Fixes #30482 **Testing:** <Describe what testing was performed and which tests were added.> None **Documentation:** <Describe the documentation added.> None --------- Signed-off-by: Arthur Silva Sens <arthur.sens@coralogix.com>
diff --git a/.chloggen/sampled_spans_metrics.yaml b/.chloggen/sampled_spans_metrics.yaml
@@ -0,0 +1,27 @@
+# Use this changelog template to create an entry for release notes.
+
+# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix'
+change_type: enhancement
+
+# The name of the component, or a single word describing the area of concern, (e.g. filelogreceiver)
+component: processor/tail_sampling
+
+# A brief description of the change.  Surround your text with quotes ("") if it needs to start with a backtick (`).
+note: "Add metrics that measure the number of sampled spans and the number of spans that are dropped due to sampling decisions."
+
+# Mandatory: One or more tracking issues related to the change. You can use the PR number here if no issue exists.
+issues: [30482]
+
+# (Optional) One or more lines of additional information to render under the primary note.
+# These lines will be padded with 2 spaces and then inserted directly into the document.
+# Use pipe (|) for multiline entries.
+subtext:
+
+# If your change doesn't affect end users or the exported elements of any package,
+# you should instead start your pull request title with [chore] or use the "Skip Changelog" label.
+# Optional: The change log or logs in which this entry should be included.
+# e.g. '[user]' or '[user, api]'
+# Include 'user' if the change is relevant to end users.
+# Include 'api' if there is a change to a library API.
+# Default: '[user]'
+change_logs: [user]
diff --git a/processor/tailsamplingprocessor/factory.go b/processor/tailsamplingprocessor/factory.go
@@ -14,13 +14,24 @@ import (
 	"go.opentelemetry.io/collector/component"
 	"go.opentelemetry.io/collector/config/configtelemetry"
 	"go.opentelemetry.io/collector/consumer"
+	"go.opentelemetry.io/collector/featuregate"
 	"go.opentelemetry.io/collector/processor"
 
 	"github.com/open-telemetry/opentelemetry-collector-contrib/processor/tailsamplingprocessor/internal/metadata"
 )
 
 var onceMetrics sync.Once
 
+var metricStatCountSpansSampledFeatureGate = featuregate.GlobalRegistry().MustRegister(
+	"processor.tailsamplingprocessor.metricstatcountspanssampled",
+	featuregate.StageAlpha,
+	featuregate.WithRegisterDescription("When enabled, a new metric stat_count_spans_sampled will be available in the tail sampling processor. Differently from stat_count_traces_sampled, this metric will count the number of spans sampled or not per sampling policy, where the original counts traces."),
+)
+
+func isMetricStatCountSpansSampledEnabled() bool {
+	return metricStatCountSpansSampledFeatureGate.IsEnabled()
+}
+
 // NewFactory returns a new factory for the Tail Sampling processor.
 func NewFactory() processor.Factory {
 	onceMetrics.Do(func() {
diff --git a/processor/tailsamplingprocessor/go.mod b/processor/tailsamplingprocessor/go.mod
@@ -14,6 +14,7 @@ require (
 	go.opentelemetry.io/collector/config/configtelemetry v0.94.1
 	go.opentelemetry.io/collector/confmap v0.94.1
 	go.opentelemetry.io/collector/consumer v0.94.1
+	go.opentelemetry.io/collector/featuregate v1.1.0
 	go.opentelemetry.io/collector/pdata v1.1.0
 	go.opentelemetry.io/collector/processor v0.94.1
 	go.opentelemetry.io/otel/metric v1.23.1
@@ -33,6 +34,7 @@ require (
 	github.com/gobwas/glob v0.2.3 // indirect
 	github.com/gogo/protobuf v1.3.2 // indirect
 	github.com/golang/protobuf v1.5.3 // indirect
+	github.com/hashicorp/go-version v1.6.0 // indirect
 	github.com/hashicorp/golang-lru/v2 v2.0.7 // indirect
 	github.com/iancoleman/strcase v0.3.0 // indirect
 	github.com/json-iterator/go v1.1.12 // indirect
diff --git a/processor/tailsamplingprocessor/go.sum b/processor/tailsamplingprocessor/go.sum
diff --git a/processor/tailsamplingprocessor/metrics.go b/processor/tailsamplingprocessor/metrics.go
@@ -28,6 +28,7 @@ var (
 	statPolicyEvaluationErrorCount = stats.Int64("sampling_policy_evaluation_error", "Count of sampling policy evaluation errors", stats.UnitDimensionless)
 
 	statCountTracesSampled       = stats.Int64("count_traces_sampled", "Count of traces that were sampled or not per sampling policy", stats.UnitDimensionless)
+	statCountSpansSampled        = stats.Int64("count_spans_sampled", "Count of spans that were sampled or not per sampling policy", stats.UnitDimensionless)
 	statCountGlobalTracesSampled = stats.Int64("global_count_traces_sampled", "Global count of traces that were sampled or not by at least one policy", stats.UnitDimensionless)
 
 	statDroppedTooEarlyCount    = stats.Int64("sampling_trace_dropped_too_early", "Count of traces that needed to be dropped before the configured wait time", stats.UnitDimensionless)
@@ -46,90 +47,82 @@ func samplingProcessorMetricViews(level configtelemetry.Level) []*view.View {
 	latencyDistributionAggregation := view.Distribution(1, 2, 5, 10, 25, 50, 75, 100, 150, 200, 300, 400, 500, 750, 1000, 2000, 3000, 4000, 5000, 10000, 20000, 30000, 50000)
 	ageDistributionAggregation := view.Distribution(1, 2, 5, 10, 20, 30, 40, 50, 60, 90, 120, 180, 300, 600, 1800, 3600, 7200)
 
-	decisionLatencyView := &view.View{
-		Name:        processorhelper.BuildCustomMetricName(metadata.Type.String(), statDecisionLatencyMicroSec.Name()),
-		Measure:     statDecisionLatencyMicroSec,
-		Description: statDecisionLatencyMicroSec.Description(),
-		TagKeys:     policyTagKeys,
-		Aggregation: latencyDistributionAggregation,
-	}
-	overallDecisionLatencyView := &view.View{
-		Name:        processorhelper.BuildCustomMetricName(metadata.Type.String(), statOverallDecisionLatencyUs.Name()),
-		Measure:     statOverallDecisionLatencyUs,
-		Description: statOverallDecisionLatencyUs.Description(),
-		Aggregation: latencyDistributionAggregation,
-	}
-
-	traceRemovalAgeView := &view.View{
-		Name:        processorhelper.BuildCustomMetricName(metadata.Type.String(), statTraceRemovalAgeSec.Name()),
-		Measure:     statTraceRemovalAgeSec,
-		Description: statTraceRemovalAgeSec.Description(),
-		Aggregation: ageDistributionAggregation,
-	}
-	lateSpanArrivalView := &view.View{
-		Name:        processorhelper.BuildCustomMetricName(metadata.Type.String(), statLateSpanArrivalAfterDecision.Name()),
-		Measure:     statLateSpanArrivalAfterDecision,
-		Description: statLateSpanArrivalAfterDecision.Description(),
-		Aggregation: ageDistributionAggregation,
-	}
-
-	countPolicyEvaluationErrorView := &view.View{
-		Name:        processorhelper.BuildCustomMetricName(metadata.Type.String(), statPolicyEvaluationErrorCount.Name()),
-		Measure:     statPolicyEvaluationErrorCount,
-		Description: statPolicyEvaluationErrorCount.Description(),
-		Aggregation: view.Sum(),
-	}
-
+	views := make([]*view.View, 0)
 	sampledTagKeys := []tag.Key{tagPolicyKey, tagSampledKey}
-	countTracesSampledView := &view.View{
-		Name:        processorhelper.BuildCustomMetricName(metadata.Type.String(), statCountTracesSampled.Name()),
-		Measure:     statCountTracesSampled,
-		Description: statCountTracesSampled.Description(),
-		TagKeys:     sampledTagKeys,
-		Aggregation: view.Sum(),
-	}
-
-	countGlobalTracesSampledView := &view.View{
-		Name:        processorhelper.BuildCustomMetricName(metadata.Type.String(), statCountGlobalTracesSampled.Name()),
-		Measure:     statCountGlobalTracesSampled,
-		Description: statCountGlobalTracesSampled.Description(),
-		TagKeys:     []tag.Key{tagSampledKey},
-		Aggregation: view.Sum(),
+	views = append(views,
+		&view.View{
+			Name:        processorhelper.BuildCustomMetricName(metadata.Type.String(), statDecisionLatencyMicroSec.Name()),
+			Measure:     statDecisionLatencyMicroSec,
+			Description: statDecisionLatencyMicroSec.Description(),
+			TagKeys:     policyTagKeys,
+			Aggregation: latencyDistributionAggregation,
+		},
+		&view.View{
+			Name:        processorhelper.BuildCustomMetricName(metadata.Type.String(), statOverallDecisionLatencyUs.Name()),
+			Measure:     statOverallDecisionLatencyUs,
+			Description: statOverallDecisionLatencyUs.Description(),
+			Aggregation: latencyDistributionAggregation,
+		},
+		&view.View{
+			Name:        processorhelper.BuildCustomMetricName(metadata.Type.String(), statTraceRemovalAgeSec.Name()),
+			Measure:     statTraceRemovalAgeSec,
+			Description: statTraceRemovalAgeSec.Description(),
+			Aggregation: ageDistributionAggregation,
+		},
+		&view.View{
+			Name:        processorhelper.BuildCustomMetricName(metadata.Type.String(), statLateSpanArrivalAfterDecision.Name()),
+			Measure:     statLateSpanArrivalAfterDecision,
+			Description: statLateSpanArrivalAfterDecision.Description(),
+			Aggregation: ageDistributionAggregation,
+		},
+		&view.View{
+			Name:        processorhelper.BuildCustomMetricName(metadata.Type.String(), statPolicyEvaluationErrorCount.Name()),
+			Measure:     statPolicyEvaluationErrorCount,
+			Description: statPolicyEvaluationErrorCount.Description(),
+			Aggregation: view.Sum(),
+		},
+		&view.View{
+			Name:        processorhelper.BuildCustomMetricName(metadata.Type.String(), statCountTracesSampled.Name()),
+			Measure:     statCountTracesSampled,
+			Description: statCountTracesSampled.Description(),
+			TagKeys:     sampledTagKeys,
+			Aggregation: view.Sum(),
+		},
+		&view.View{
+			Name:        processorhelper.BuildCustomMetricName(metadata.Type.String(), statCountGlobalTracesSampled.Name()),
+			Measure:     statCountGlobalTracesSampled,
+			Description: statCountGlobalTracesSampled.Description(),
+			TagKeys:     []tag.Key{tagSampledKey},
+			Aggregation: view.Sum(),
+		},
+		&view.View{
+			Name:        processorhelper.BuildCustomMetricName(metadata.Type.String(), statDroppedTooEarlyCount.Name()),
+			Measure:     statDroppedTooEarlyCount,
+			Description: statDroppedTooEarlyCount.Description(),
+			Aggregation: view.Sum(),
+		},
+		&view.View{
+			Name:        processorhelper.BuildCustomMetricName(metadata.Type.String(), statNewTraceIDReceivedCount.Name()),
+			Measure:     statNewTraceIDReceivedCount,
+			Description: statNewTraceIDReceivedCount.Description(),
+			Aggregation: view.Sum(),
+		},
+		&view.View{
+			Name:        processorhelper.BuildCustomMetricName(metadata.Type.String(), statTracesOnMemoryGauge.Name()),
+			Measure:     statTracesOnMemoryGauge,
+			Description: statTracesOnMemoryGauge.Description(),
+			Aggregation: view.LastValue(),
+		})
+
+	if isMetricStatCountSpansSampledEnabled() {
+		views = append(views, &view.View{
+			Name:        processorhelper.BuildCustomMetricName(metadata.Type.String(), statCountSpansSampled.Name()),
+			Measure:     statCountSpansSampled,
+			Description: statCountSpansSampled.Description(),
+			TagKeys:     sampledTagKeys,
+			Aggregation: view.Sum(),
+		})
 	}
 
-	countTraceDroppedTooEarlyView := &view.View{
-		Name:        processorhelper.BuildCustomMetricName(metadata.Type.String(), statDroppedTooEarlyCount.Name()),
-		Measure:     statDroppedTooEarlyCount,
-		Description: statDroppedTooEarlyCount.Description(),
-		Aggregation: view.Sum(),
-	}
-	countTraceIDArrivalView := &view.View{
-		Name:        processorhelper.BuildCustomMetricName(metadata.Type.String(), statNewTraceIDReceivedCount.Name()),
-		Measure:     statNewTraceIDReceivedCount,
-		Description: statNewTraceIDReceivedCount.Description(),
-		Aggregation: view.Sum(),
-	}
-	trackTracesOnMemorylView := &view.View{
-		Name:        processorhelper.BuildCustomMetricName(metadata.Type.String(), statTracesOnMemoryGauge.Name()),
-		Measure:     statTracesOnMemoryGauge,
-		Description: statTracesOnMemoryGauge.Description(),
-		Aggregation: view.LastValue(),
-	}
-
-	return []*view.View{
-		decisionLatencyView,
-		overallDecisionLatencyView,
-
-		traceRemovalAgeView,
-		lateSpanArrivalView,
-
-		countPolicyEvaluationErrorView,
-
-		countTracesSampledView,
-		countGlobalTracesSampledView,
-
-		countTraceDroppedTooEarlyView,
-		countTraceIDArrivalView,
-		trackTracesOnMemorylView,
-	}
+	return views
 }
diff --git a/processor/tailsamplingprocessor/processor.go b/processor/tailsamplingprocessor/processor.go
@@ -308,6 +308,13 @@ func (tsp *tailSamplingSpanProcessor) makeDecision(id pcommon.TraceID, trace *sa
 				mutators,
 				statCountTracesSampled.M(int64(1)),
 			)
+			if isMetricStatCountSpansSampledEnabled() {
+				_ = stats.RecordWithTags(
+					p.ctx,
+					mutators,
+					statCountSpansSampled.M(trace.SpanCount.Load()),
+				)
+			}
 			metrics.decisionSampled++
 
 		case sampling.NotSampled:
@@ -317,6 +324,13 @@ func (tsp *tailSamplingSpanProcessor) makeDecision(id pcommon.TraceID, trace *sa
 				mutators,
 				statCountTracesSampled.M(int64(1)),
 			)
+			if isMetricStatCountSpansSampledEnabled() {
+				_ = stats.RecordWithTags(
+					p.ctx,
+					mutators,
+					statCountSpansSampled.M(trace.SpanCount.Load()),
+				)
+			}
 			metrics.decisionNotSampled++
 		}
 	}

-Original file line number
+Diff line change
 	go.opentelemetry.io/collector/config/configtelemetry v0.94.1
 	go.opentelemetry.io/collector/confmap v0.94.1
 	go.opentelemetry.io/collector/consumer v0.94.1
 +	go.opentelemetry.io/collector/featuregate v1.1.0
 	go.opentelemetry.io/collector/pdata v1.1.0
 	go.opentelemetry.io/collector/processor v0.94.1
 	go.opentelemetry.io/otel/metric v1.23.1
 	github.com/gobwas/glob v0.2.3 // indirect
 	github.com/gogo/protobuf v1.3.2 // indirect
 	github.com/golang/protobuf v1.5.3 // indirect
 +	github.com/hashicorp/go-version v1.6.0 // indirect
 	github.com/hashicorp/golang-lru/v2 v2.0.7 // indirect
 	github.com/iancoleman/strcase v0.3.0 // indirect
 	github.com/json-iterator/go v1.1.12 // indirect