Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ require (
github.com/opencontainers/runtime-spec v1.2.1
github.com/prometheus/client_golang v1.23.2
github.com/prometheus/client_model v0.6.2
github.com/prometheus/common v0.66.1
github.com/prometheus/procfs v0.17.0
github.com/sirupsen/logrus v1.9.3
github.com/spf13/cobra v1.10.1
Expand Down Expand Up @@ -116,7 +117,6 @@ require (
github.com/pelletier/go-toml/v2 v2.2.3 // indirect
github.com/pkg/errors v0.9.1 // indirect
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect
github.com/prometheus/common v0.66.1 // indirect
github.com/russross/blackfriday/v2 v2.1.0 // indirect
github.com/sagikazarmark/locafero v0.7.0 // indirect
github.com/sourcegraph/conc v0.3.0 // indirect
Expand Down
8 changes: 8 additions & 0 deletions tests/e2e/metricschecker/doc.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
// SPDX-License-Identifier: Apache-2.0
// Copyright Authors of Tetragon

//go:build !windows

// This package provides a Tetragon Metrics Checker that
// uses http GET requests to get metrics from all Tetragon pods.
package metricschecker
218 changes: 218 additions & 0 deletions tests/e2e/metricschecker/metricschecker.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,218 @@
//go:build !windows

package metricschecker

import (
"context"
"errors"
"fmt"
"net/http"
"testing"
"time"

clmo "github.com/prometheus/client_model/go"
"github.com/prometheus/common/expfmt"
"github.com/prometheus/common/model"
"github.com/stretchr/testify/assert"
"k8s.io/klog/v2"
"sigs.k8s.io/e2e-framework/pkg/envconf"
"sigs.k8s.io/e2e-framework/pkg/features"

"github.com/cilium/tetragon/tests/e2e/state"
)

// MetricsChecker checks prometheus metrics from one or more events streams.
type MetricsChecker struct {
name string
timeLimit time.Duration
}

// NewMetricsChecker constructs a new Metrics from a MultiEventChecker.
func NewMetricsChecker(name string) *MetricsChecker {
rc := &MetricsChecker{
name: name,
timeLimit: 30 * time.Second,
}
return rc
}

// WithTimeLimit sets the time limit for a MetricsChecker http GET calls.
func (rc *MetricsChecker) WithTimeLimit(limit time.Duration) *MetricsChecker {
rc.timeLimit = limit
return rc
}

type metricOp int

const (
opEqual metricOp = 1
opGreater = 1 << iota
opLess
)

type metricValue struct {
tp clmo.MetricType
val float64
}
type metricCheck struct {
value metricValue
op metricOp
}

func (rc *MetricsChecker) Equal(metric string, val int) features.Func {
return rc.checkWithOp(metric, metricCheck{
value: metricValue{
tp: clmo.MetricType_COUNTER,
val: float64(val),
},
op: opEqual,
})
}

func (rc *MetricsChecker) Less(metric string, val int) features.Func {
return rc.checkWithOp(metric, metricCheck{
value: metricValue{
tp: clmo.MetricType_COUNTER,
val: float64(val),
},
op: opLess,
})
}

func (rc *MetricsChecker) LessThanOrEqual(metric string, val int) features.Func {
return rc.checkWithOp(metric, metricCheck{
value: metricValue{
tp: clmo.MetricType_COUNTER,
val: float64(val),
},
op: opEqual | opLess,
})
}

func (rc *MetricsChecker) Greater(metric string, val int) features.Func {
return rc.checkWithOp(metric, metricCheck{
value: metricValue{
tp: clmo.MetricType_COUNTER,
val: float64(val),
},
op: opGreater,
})
}

func (rc *MetricsChecker) GreaterOrEqual(metric string, val int) features.Func {
return rc.checkWithOp(metric, metricCheck{
value: metricValue{
tp: clmo.MetricType_COUNTER,
val: float64(val),
},
op: opEqual | opGreater,
})
}

// getAndParseMetrics fetches metrics from a URL and parses them into MetricFamily objects.
func getAndParseMetrics(ctx context.Context, metricsURL string) (map[string]*clmo.MetricFamily, error) {
req, err := http.NewRequestWithContext(ctx, http.MethodGet, metricsURL, nil)
if err != nil {
return nil, err
}
resp, err := http.DefaultClient.Do(req)
if err != nil {
return nil, fmt.Errorf("failed to fetch metrics from %s: %w", metricsURL, err)
}
defer resp.Body.Close()

if resp.StatusCode != http.StatusOK {
return nil, fmt.Errorf("received non-OK status code %d from %s", resp.StatusCode, metricsURL)
}

parser := expfmt.NewTextParser(model.UTF8Validation)
mf, err := parser.TextToMetricFamilies(resp.Body)
if err != nil {
return nil, fmt.Errorf("failed to parse metrics body: %w", err)
}
return mf, nil
}

// Collect metrics from all forwarded metrics ports for all pods
func (rc *MetricsChecker) checkWithOp(metric string, check metricCheck) features.Func {
return func(ctx context.Context, t *testing.T, _ *envconf.Config) context.Context {
klog.InfoS("Gathering metrics clients", "metricschecker", rc.name)
ports, ok := ctx.Value(state.PromForwardedPorts).(map[string]int)
if !ok {
assert.Fail(t, "failed to find forwarded prometheus ports")
return ctx
}

timedCtx, cancel := context.WithTimeout(ctx, rc.timeLimit)
defer cancel()

results := make(map[string]*clmo.MetricFamily)
for podName, port := range ports {
metricFamilies, err := getAndParseMetrics(timedCtx, fmt.Sprintf("http://localhost:%d/metrics", port))
if err != nil {
assert.Fail(t, "Failed to fetch metrics families from metrics endpoint", "err", err, "podName", podName)
return ctx
}

if mf, ok := metricFamilies[metric]; ok {
results[podName] = mf
} else {
klog.InfoS("No metric found on pod", "metricschecker", rc.name, "metric", metric, "podName", podName)
}
}

// No metrics endpoint exported the requested metric. This is an error.
if len(results) == 0 {
assert.Fail(t, "Failed to fetch requested metrics from any pod", "metricName", metric)
return ctx
}

if err := rc.check(results, metric, check); !assert.NoError(t, err, "checks should pass") {
return ctx
}
return ctx
}
}

// Real check helper that implements the check logic for each metric type.
// For now, only COUNTER types are supported.
func (rc *MetricsChecker) check(results map[string]*clmo.MetricFamily, metric string, check metricCheck) error {
klog.InfoS("Running metrics checks", "metricschecker", rc.name)

for podName, result := range results {
if result.GetType() != check.value.tp {
return errors.New("result type and check type mismatch")
}
switch result.GetType() {
case clmo.MetricType_COUNTER:
var sum float64
// Accumulate values from all labels
for _, mf := range result.GetMetric() {
sum += mf.GetCounter().GetValue()
}

success := false
if check.op&opEqual != 0 {
success = sum == check.value.val
}
if check.op&opGreater != 0 {
success = success || (sum > check.value.val)
}
if check.op&opLess != 0 {
success = success || (sum < check.value.val)
}
if !success {
return fmt.Errorf("failed metricscheck for metric '%s' on pod '%s'", metric, podName)
}
default:
// TODO implement check logic for non-counter types
return errors.New("metrics checker unsupported for non-counter types")
}
}
return nil
}

// Name returns the name of the checker
func (rc *MetricsChecker) Name() string {
return rc.name
}
7 changes: 7 additions & 0 deletions tests/e2e/tests/labels/labels_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ import (
"sigs.k8s.io/e2e-framework/pkg/features"
"sigs.k8s.io/e2e-framework/third_party/helm"

"github.com/cilium/tetragon/tests/e2e/metricschecker"

ec "github.com/cilium/tetragon/api/v1/tetragon/codegen/eventchecker"
sm "github.com/cilium/tetragon/pkg/matchers/stringmatcher"
"github.com/cilium/tetragon/tests/e2e/checker"
Expand Down Expand Up @@ -115,8 +117,13 @@ func TestLabelsDemoApp(t *testing.T) {
uninstall := features.New("Uninstall Demo App").
Assess("Uninstall", uninstallDemoApp()).Feature()

metricsChecker := metricschecker.NewMetricsChecker("labelsMetricsChecker")
metrics := features.New("Run Metrics Checks").
Assess("Run Metrics Checks", metricsChecker.Greater("tetragon_events_total", 0)).Feature()

// Spawn workload and run checker
runner.TestInParallel(t, runEventChecker, runWorkload)
runner.Test(t, metrics)
runner.Test(t, uninstall)
}

Expand Down
4 changes: 4 additions & 0 deletions tests/e2e/tests/policyfilter/policyfilter_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ import (
"sigs.k8s.io/e2e-framework/pkg/envconf"
"sigs.k8s.io/e2e-framework/pkg/features"

"github.com/cilium/tetragon/tests/e2e/metricschecker"

"github.com/cilium/tetragon/api/v1/tetragon"
ec "github.com/cilium/tetragon/api/v1/tetragon/codegen/eventchecker"
"github.com/cilium/tetragon/tests/e2e/checker"
Expand Down Expand Up @@ -85,6 +87,7 @@ func TestMain(m *testing.M) {

func TestNamespacedPolicy(t *testing.T) {
checker := nsChecker().WithTimeLimit(30 * time.Second).WithEventLimit(20)
metricsChecker := metricschecker.NewMetricsChecker("policyMetricsChecker")

runEventChecker := features.New("Run Event Checks").
Assess("Run Event Checks", checker.CheckWithFilters(
Expand Down Expand Up @@ -127,6 +130,7 @@ func TestNamespacedPolicy(t *testing.T) {
}
return ctx
}).
Assess("Run Metrics Checks", metricsChecker.Greater("tetragon_policy_events_total", 0)).
Assess("Uninstall policy", func(ctx context.Context, _ *testing.T, c *envconf.Config) context.Context {
ctx, err := helpers.UnloadCRDString(policyNamespace, namespacedPolicy, false)(ctx, c)
if err != nil {
Expand Down
5 changes: 5 additions & 0 deletions tests/e2e/tests/skeleton/skeleton_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ import (
"sigs.k8s.io/e2e-framework/pkg/envconf"
"sigs.k8s.io/e2e-framework/pkg/features"

"github.com/cilium/tetragon/tests/e2e/metricschecker"

ec "github.com/cilium/tetragon/api/v1/tetragon/codegen/eventchecker"
sm "github.com/cilium/tetragon/pkg/matchers/stringmatcher"
"github.com/cilium/tetragon/tests/e2e/checker"
Expand Down Expand Up @@ -88,6 +90,8 @@ func TestSkeletonBasic(t *testing.T) {
// Create an curl event checker with a limit or 10 events or 30 seconds, whichever comes first
curlChecker := curlEventChecker(kversion).WithEventLimit(100).WithTimeLimit(30 * time.Second)

metricsChecker := metricschecker.NewMetricsChecker("skeletonMetricsChecker")

// Define test features here. These can be used to perform actions like:
// - Spawning an event checker and running checks
// - Modifying resources in the cluster
Expand All @@ -110,6 +114,7 @@ func TestSkeletonBasic(t *testing.T) {
}
return ctx
}).
Assess("Run Metrics Checks", metricsChecker.Greater("tetragon_events_total", 0)).
Assess("Uninstall policy", func(ctx context.Context, _ *testing.T, c *envconf.Config) context.Context {
ctx, err := helpers.UnloadCRDString(namespace, curlPod, true)(ctx, c)
if err != nil {
Expand Down
Loading