Skip to content

Commit 1fdc18a

Browse files
pipiland2612amol-verma-allen
authored andcommitted
jaegertracing#5608 Improve SPM e2e test with test for error rate (jaegertracing#6991)
## Which problem is this PR solving? - part of jaegertracing#5608 ## Description of the changes - Add smoke test for /metrics/errors - validate correct service name - validate correct labels (case groupByOperation=true) ## How was this change tested? - run `scripts/e2e/spm.sh` locally ## Checklist - [x] I have read https://github.com/jaegertracing/jaeger/blob/master/CONTRIBUTING_GUIDELINES.md - [x] I have signed all commits - [x] I have added unit tests for the new functionality - [x] I have run lint and test steps successfully - for `jaeger`: `make lint test` - for `jaeger-ui`: `npm run lint` and `npm run test` --------- Signed-off-by: pipiland <[email protected]>
1 parent 1577f9a commit 1fdc18a

File tree

1 file changed

+75
-26
lines changed

1 file changed

+75
-26
lines changed

scripts/e2e/spm.sh

Lines changed: 75 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -76,53 +76,102 @@ validate_service_metrics() {
7676
local fiveMinutes=300000
7777
local oneMinute=60000
7878
local fifteenSec=15000 # Prometheus is also configured to scrape every 15sec.
79+
7980
# When endTs=(blank) the server will default it to now().
8081
local url="http://localhost:16686/api/metrics/calls?service=${service}&endTs=&lookback=${fiveMinutes}&step=${fifteenSec}&ratePer=${oneMinute}"
8182
response=$(curl -s "$url")
82-
service_name=$(echo "$response" | jq -r 'if .metrics and .metrics[0] then .metrics[0].labels[] | select(.name=="service_name") | .value else empty end')
83-
if [ "$service_name" != "$service" ]; then
84-
echo "⏳ No metrics found for service '$service'"
83+
if ! assert_service_name_equals "$response" "$service" ; then
8584
return 1
8685
fi
87-
# Store the values in an array
88-
mapfile -t metric_points < <(echo "$response" | jq -r '.metrics[0].metricPoints[].gaugeValue.doubleValue')
89-
echo "Metric datapoints found for service '$service': " "${metric_points[@]}"
90-
# Check that atleast some values are non-zero after the threshold
91-
local non_zero_count=0
86+
87+
# Check that at least some values are non-zero after the threshold
88+
local non_zero_count
89+
non_zero_count=$(count_non_zero_metrics_point "$response")
9290
local expected_non_zero_count=4
93-
local zero_count=0
91+
local zero_count
92+
zero_count=$(count_zero_metrics_point "$response")
9493
local expected_max_zero_count=4
95-
for value in "${metric_points[@]}"; do
96-
if [[ $(echo "$value > 0.0" | bc) == "1" ]]; then
97-
non_zero_count=$((non_zero_count + 1))
98-
else
99-
zero_count=$((zero_count + 1))
100-
fi
94+
echo "⏳ Metrics data points found: ${zero_count} zero, ${non_zero_count} non-zero"
10195

102-
if [[ $zero_count -gt $expected_max_zero_count ]]; then
103-
echo "❌ ERROR: Zero values crossing threshold limit not expected (Threshold limit - '$expected_max_zero_count')"
104-
return 1
105-
fi
106-
done
107-
if [ $non_zero_count -lt $expected_non_zero_count ]; then
96+
if [[ $zero_count -gt $expected_max_zero_count ]]; then
97+
echo "❌ ERROR: Zero values crossing threshold limit not expected (Threshold limit - '$expected_max_zero_count')"
98+
return 1
99+
fi
100+
if [[ $non_zero_count -lt $expected_non_zero_count ]]; then
108101
echo "⏳ Expecting at least 4 non-zero data points"
109102
return 1
110103
fi
111104

112105
# Validate if labels are correct
113106
local url="http://localhost:16686/api/metrics/calls?service=${service}&groupByOperation=true&endTs=&lookback=${fiveMinutes}&step=${fifteenSec}&ratePer=${oneMinute}"
107+
response=$(curl -s "$url")
108+
if ! assert_labels_set_equals "$response" "operation service_name" ; then
109+
return 1
110+
fi
114111

115-
local labels
116-
labels=$(curl -s "$url" | jq -r '.metrics[0].labels[].name' | sort | tr '\n' ' ')
117-
local exp_labels="operation service_name "
112+
### Validate Errors Rate metrics
113+
local url="http://localhost:16686/api/metrics/errors?service=${service}&endTs=&lookback=${fiveMinutes}&step=${fifteenSec}&ratePer=${oneMinute}"
114+
response=$(curl -s "$url")
115+
if ! assert_service_name_equals "$response" "$service" ; then
116+
return 1
117+
fi
118118

119-
if [[ "$labels" != "$exp_labels" ]]; then
120-
echo "❌ ERROR: Obtained labels: '$labels' are not same as expected labels: '$exp_labels'"
119+
local url="http://localhost:16686/api/metrics/errors?service=${service}&groupByOperation=true&endTs=&lookback=${fiveMinutes}&step=${fifteenSec}&ratePer=${oneMinute}"
120+
response=$(curl -s "$url")
121+
if ! assert_labels_set_equals "$response" "operation service_name" ; then
121122
return 1
122123
fi
124+
125+
non_zero_count=$(count_non_zero_metrics_point "$response")
126+
local services_with_error="driver frontend ui redis"
127+
if [[ "$services_with_error" =~ $service ]]; then # the service is in the list
128+
if [[ $non_zero_count == "0" ]]; then
129+
echo "❌ ERROR: expect service $service to have positive errors rate"
130+
return 1
131+
fi
132+
else
133+
if [[ $non_zero_count != "0" ]]; then
134+
echo "❌ ERROR: expect service $service to have 0 errors, but have $non_zero_count data points with positive errors"
135+
return 1
136+
fi
137+
fi
138+
139+
123140
return 0
124141
}
125142

143+
assert_service_name_equals() {
144+
local response=$1
145+
local expected=$2
146+
service_name=$(echo "$response" | jq -r 'if .metrics and .metrics[0] then .metrics[0].labels[] | select(.name=="service_name") | .value else empty end')
147+
if [[ "$service_name" != "$expected" ]]; then
148+
echo "❌ ERROR: Obtained service_name: '$service_name' are not same as expected: '$expected'"
149+
return 1
150+
fi
151+
return 0
152+
}
153+
154+
assert_labels_set_equals() {
155+
local response=$1
156+
local expected="$2 " # need one extra space due to how labels is computed
157+
158+
labels=$(echo "$response" | jq -r '.metrics[0].labels[].name' | sort | tr '\n' ' ')
159+
160+
if [[ "$labels" != "$expected" ]]; then
161+
echo "❌ ERROR: Obtained labels: '$labels' are not same as expected labels: '$expected'"
162+
return 1
163+
fi
164+
return 0
165+
}
166+
167+
count_zero_metrics_point() {
168+
echo "$1" | jq -r '[.metrics[0].metricPoints[].gaugeValue.doubleValue | select(. == 0)] | length'
169+
}
170+
171+
count_non_zero_metrics_point() {
172+
echo "$1" | jq -r '[.metrics[0].metricPoints[].gaugeValue.doubleValue | select(. != 0)] | length'
173+
}
174+
126175
check_spm() {
127176
local wait_seconds=10
128177
local successful_service=0

0 commit comments

Comments
 (0)