open-telemetry · bogdandrutu · Aug 19, 2021 · Jun 24, 2021 · Jun 24, 2021 · Jun 24, 2021
@@ -4,7 +4,9 @@
 
 ## 💡 Enhancements 💡
 
-- `tailsampling` processor: Add new policy `status_code` (#3754)
+- `tailsampling` processor:
+  - Add new policy `status_code` (#3754)
+  - Add new policy `percentage` (#3876)
 
 ## v0.29.0
 

@@ -15,6 +15,7 @@ Multiple policies exist today and it is straight forward to add more. These incl
 - `always_sample`: Sample all traces
 - `latency`: Sample based on the duration of the trace. The duration is determined by looking at the earliest start time and latest end time, without taking into consideration what happened in between.
 - `numeric_attribute`: Sample based on number attributes
+- `percentage`: Sample a percentage of traces. Only traces that have not been sampled yet by another policy are taken into account.
 - `status_code`: Sample based upon the status code (`OK`, `ERROR` or `UNSET`)
 - `string_attribute`: Sample based on string attributes value matches, both exact and regex value matches are supported
 - `rate_limiting`: Sample based on rate
@@ -50,21 +51,26 @@ processors:
           },
           {
             name: test-policy-4,
+            type: percentage,
+            percentage: {percentage: 0.1}
+          },
+          {
+            name: test-policy-5,
             type: status_code,
             status_code: {status_codes: [ERROR, UNSET]}
           },
           {
-            name: test-policy-5,
+            name: test-policy-6,
             type: string_attribute,
             string_attribute: {key: key2, values: [value1, value2]}
           },
           {
-            name: test-policy-6,
+            name: test-policy-7,
             type: string_attribute,
             string_attribute: {key: key2, values: [value1, val*], enabled_regex_matching: true, cache_max_size: 10}
           },
           {
-            name: test-policy-7,
+            name: test-policy-8,
             type: rate_limiting,
             rate_limiting: {spans_per_second: 35}
          }

@@ -31,6 +31,8 @@ const (
 	// NumericAttribute sample traces that have a given numeric attribute in a specified
 	// range, e.g.: attribute "http.status_code" >= 399 and <= 999.
 	NumericAttribute PolicyType = "numeric_attribute"
+	// Percentage samples a given percentage of traces.
+	Percentage PolicyType = "percentage"
 	// StatusCode sample traces that have a given status code.
 	StatusCode PolicyType = "status_code"
 	// StringAttribute sample traces that a attribute, of type string, matching
@@ -50,6 +52,8 @@ type PolicyCfg struct {
 	LatencyCfg LatencyCfg `mapstructure:"latency"`
 	// Configs for numeric attribute filter sampling policy evaluator.
 	NumericAttributeCfg NumericAttributeCfg `mapstructure:"numeric_attribute"`
+	// Configs for percentage filter sampling policy evaluator.
+	PercentageCfg PercentageCfg `mapstructure:"percentage"`
 	// Configs for status code filter sampling policy evaluator.
 	StatusCodeCfg StatusCodeCfg `mapstructure:"status_code"`
 	// Configs for string attribute filter sampling policy evaluator.
@@ -76,6 +80,12 @@ type NumericAttributeCfg struct {
 	MaxValue int64 `mapstructure:"max_value"`
 }
 
+// PercentageCfg holds the configurable settings to create a percentage filter sampling
+// policy evaluator.
+type PercentageCfg struct {
+	Percentage float32 `mapstructure:"percentage"`
+}
+
 // StatusCodeCfg holds the configurable settings to create a status code filter sampling
 // policy evaluator.
 type StatusCodeCfg struct {

@@ -60,16 +60,21 @@ func TestLoadConfig(t *testing.T) {
 				},
 				{
 					Name:          "test-policy-4",
+					Type:          Percentage,
+					PercentageCfg: PercentageCfg{Percentage: 0.1},
+				},
+				{
+					Name:          "test-policy-5",
 					Type:          StatusCode,
 					StatusCodeCfg: StatusCodeCfg{StatusCodes: []string{"ERROR", "UNSET"}},
 				},
 				{
-					Name:               "test-policy-5",
+					Name:               "test-policy-6",
 					Type:               StringAttribute,
 					StringAttributeCfg: StringAttributeCfg{Key: "key2", Values: []string{"value1", "value2"}},
 				},
 				{
-					Name:            "test-policy-6",
+					Name:            "test-policy-7",
 					Type:            RateLimiting,
 					RateLimitingCfg: RateLimitingCfg{SpansPerSecond: 35},
 				},

@@ -123,6 +123,9 @@ func getPolicyEvaluator(logger *zap.Logger, cfg *PolicyCfg) (sampling.PolicyEval
 	case NumericAttribute:
 		nafCfg := cfg.NumericAttributeCfg
 		return sampling.NewNumericAttributeFilter(logger, nafCfg.Key, nafCfg.MinValue, nafCfg.MaxValue), nil
+	case Percentage:
+		pCfg := cfg.PercentageCfg
+		return sampling.NewPercentageFilter(logger, pCfg.Percentage)
 	case StringAttribute:
 		safCfg := cfg.StringAttributeCfg
 		return sampling.NewStringAttributeFilter(logger, safCfg.Key, safCfg.Values, safCfg.EnabledRegexMatching, safCfg.CacheMaxSize), nil

diff --git a/processor/tailsamplingprocessor/sampling/percentage.go b/processor/tailsamplingprocessor/sampling/percentage.go
@@ -0,0 +1,81 @@
+// Copyright The OpenTelemetry Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//       http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package sampling
+
+import (
+	"errors"
+
+	"go.opentelemetry.io/collector/consumer/pdata"
+	"go.uber.org/zap"
+)
+
+type percentageFilter struct {
+	logger          *zap.Logger
+	percentage      float32
+	tracesSampled   int
+	tracesProcessed int
+}
+
+var _ PolicyEvaluator = (*percentageFilter)(nil)
+
+// NewPercentageFilter creates a policy evaluator that samples a percentage of
+// traces.
+func NewPercentageFilter(logger *zap.Logger, percentage float32) (PolicyEvaluator, error) {
+	if percentage < 0 || percentage > 1 {
+		return nil, errors.New("expected a percentage between 0 and 1")
+	}
+
+	return &percentageFilter{
+		logger:     logger,
+		percentage: percentage,
+	}, nil
+}
+
+// OnLateArrivingSpans notifies the evaluator that the given list of spans arrived
+// after the sampling decision was already taken for the trace.
+// This gives the evaluator a chance to log any message/metrics and/or update any
+// related internal state.
+func (r *percentageFilter) OnLateArrivingSpans(Decision, []*pdata.Span) error {
+	r.logger.Debug("Triggering action for late arriving spans in percentage filter")
+	return nil
+}
+
+// Evaluate looks at the trace data and returns a corresponding SamplingDecision.
+func (r *percentageFilter) Evaluate(_ pdata.TraceID, trace *TraceData) (Decision, error) {
+	r.logger.Debug("Evaluating spans in percentage filter")
+
+	// ignore traces that have already been sampled before
+	for _, decision := range trace.Decisions {
+		if decision == Sampled {
+			return NotSampled, nil
+		}
+	}
+
+	decision := NotSampled
+
+	if float32(r.tracesSampled)/float32(r.tracesProcessed) <= r.percentage {
+		r.tracesSampled++
+		decision = Sampled
+	}
+	r.tracesProcessed++
+
+	// reset counters to avoid overflow
+	if r.tracesProcessed == 1000 {
+		r.tracesSampled = 0
+		r.tracesProcessed = 0
+	}
+
+	return decision, nil
+}
diff --git a/processor/tailsamplingprocessor/sampling/percentage_test.go b/processor/tailsamplingprocessor/sampling/percentage_test.go
@@ -0,0 +1,103 @@
+// Copyright  The OpenTelemetry Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package sampling
+
+import (
+	"fmt"
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+	"go.opentelemetry.io/collector/consumer/pdata"
+	"go.uber.org/zap"
+)
+
+func TestNewPercentageFilter_errorHandling(t *testing.T) {
+	_, err := NewPercentageFilter(zap.NewNop(), -1)
+	assert.EqualError(t, err, "expected a percentage between 0 and 1")
+
+	_, err = NewPercentageFilter(zap.NewNop(), 1.5)
+	assert.EqualError(t, err, "expected a percentage between 0 and 1")
+}
+
+func TestPercentageSampling(t *testing.T) {
+	var empty = map[string]pdata.AttributeValue{}
+
+	cases := []float32{0.01, 0.1, 0.125, 0.33, 0.5, 0.66}
+
+	for _, percentage := range cases {
+		t.Run(fmt.Sprintf("sample %.2f", percentage), func(t *testing.T) {
+			trace := newTraceStringAttrs(empty, "example", "value")
+			traceID := pdata.NewTraceID([16]byte{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16})
+
+			percentageFilter, err := NewPercentageFilter(zap.NewNop(), percentage)
+			assert.NoError(t, err)
+
+			traceCount := 2000
+			sampled := 0
+
+			for i := 0; i < traceCount; i++ {
+				decision, err := percentageFilter.Evaluate(traceID, trace)
+				assert.NoError(t, err)
+
+				if decision == Sampled {
+					sampled++
+				}
+			}
+
+			assert.InDelta(t, percentage*float32(traceCount), sampled, 0.001, "Amount of sampled traces")
+		})
+	}
+}
+
+func TestPercentageSampling_ignoreAlreadySampledTraces(t *testing.T) {
+	var empty = map[string]pdata.AttributeValue{}
+
+	trace := newTraceStringAttrs(empty, "example", "value")
+	traceID := pdata.NewTraceID([16]byte{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16})
+
+	var percentage float32 = 0.33
+
+	percentageFilter, err := NewPercentageFilter(zap.NewNop(), percentage)
+	assert.NoError(t, err)
+
+	traceCount := 100
+	sampled := 0
+
+	for i := 0; i < traceCount; i++ {
+		trace.Decisions = []Decision{NotSampled, NotSampled}
+		decision, err := percentageFilter.Evaluate(traceID, trace)
+		assert.NoError(t, err)
+
+		if decision == Sampled {
+			sampled++
+		}
+
+		// trace has been sampled, should be ignored
+		trace.Decisions = []Decision{NotSampled, Sampled}
+		decision, err = percentageFilter.Evaluate(traceID, trace)
+		assert.NoError(t, err)
+		assert.Equal(t, decision, NotSampled)
+	}
+
+	assert.EqualValues(t, percentage*float32(traceCount), sampled)
+}
+
+func TestOnLateArrivingSpans_PercentageSampling(t *testing.T) {
+	percentageFilter, err := NewPercentageFilter(zap.NewNop(), 0.1)
+	assert.Nil(t, err)
+
+	err = percentageFilter.OnLateArrivingSpans(NotSampled, nil)
+	assert.Nil(t, err)
+}
diff --git a/processor/tailsamplingprocessor/sampling/status_code_test.go b/processor/tailsamplingprocessor/sampling/status_code_test.go
@@ -30,7 +30,7 @@ func TestNewStatusCodeFilter_errorHandling(t *testing.T) {
 	assert.EqualError(t, err, "unknown status code \"ERR\", supported: OK, ERROR, UNSET")
 }
 
-func TestPercentageSampling(t *testing.T) {
+func TestStatusCodeSampling(t *testing.T) {
 	traceID := pdata.NewTraceID([16]byte{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16})
 
 	cases := []struct {
@@ -92,7 +92,7 @@ func TestPercentageSampling(t *testing.T) {
 	}
 }
 
-func TestOnLateArrivingSpans_PercentageSampling(t *testing.T) {
+func TestOnLateArrivingSpans_StatusCodeSampling(t *testing.T) {
 	statusCode, err := NewStatusCodeFilter(zap.NewNop(), []string{"ERROR"})
 	assert.Nil(t, err)
 

@@ -27,16 +27,21 @@ processors:
           },
           {
             name: test-policy-4,
+            type: percentage,
+            percentage: {percentage: 0.1}
+          },
+          {
+            name: test-policy-5,
             type: status_code,
             status_code: {status_codes: [ERROR, UNSET]}
           },
           {
-            name: test-policy-5,
+            name: test-policy-6,
             type: string_attribute,
             string_attribute: {key: key2, values: [value1, value2]}
           },
           {
-            name: test-policy-6,
+            name: test-policy-7,
             type: rate_limiting,
             rate_limiting: {spans_per_second: 35}
          },