Skip to content

Commit 18cb683

Browse files
[v2] Configure health check extension for all configs (#5861)
**Which problem is this PR solving?** Part of #5633, part of #5859 **Description of the changes** * Integrate health check extension to monitor and report Jaeger V2 component's health * Enhance all-in-one CI test to ping the new health port **How was this change tested?** The changes were tested by running the following command: ```bash make test ``` ```bash CI actions and new Unit Tests ``` **Checklist** - [x] I have read [CONTRIBUTING_GUIDELINES.md](https://github.com/jaegertracing/jaeger/blob/master/CONTRIBUTING_GUIDELINES.md) - [x] I have signed all commits - [x] I have added unit tests for the new functionality - [x] I have run lint and test steps successfully - `for jaeger: make lint test` - `for jaeger-ui: yarn lint` and `yarn test` --------- Signed-off-by: Wise-Wizard <[email protected]> Signed-off-by: Yuri Shkuro <[email protected]> Co-authored-by: Yuri Shkuro <[email protected]> Co-authored-by: Yuri Shkuro <[email protected]>
1 parent 8f2543c commit 18cb683

14 files changed

+146
-51
lines changed

cmd/jaeger/config-badger.yaml

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,16 @@
11
service:
2-
extensions: [jaeger_storage, jaeger_query]
2+
extensions: [jaeger_storage, jaeger_query, healthcheckv2]
33
pipelines:
44
traces:
55
receivers: [otlp]
66
processors: [batch]
77
exporters: [jaeger_storage_exporter]
88

99
extensions:
10+
healthcheckv2:
11+
use_v2: true
12+
http:
13+
1014
jaeger_query:
1115
trace_storage: some_store
1216
trace_storage_archive: another_store

cmd/jaeger/config-cassandra.yaml

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,16 @@
11
service:
2-
extensions: [jaeger_storage, jaeger_query]
2+
extensions: [jaeger_storage, jaeger_query, healthcheckv2]
33
pipelines:
44
traces:
55
receivers: [otlp]
66
processors: [batch]
77
exporters: [jaeger_storage_exporter]
88

99
extensions:
10+
healthcheckv2:
11+
use_v2: true
12+
http:
13+
1014
jaeger_query:
1115
trace_storage: some_storage
1216
trace_storage_archive: another_storage

cmd/jaeger/config-elasticsearch.yaml

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,16 @@
11
service:
2-
extensions: [jaeger_storage, jaeger_query]
2+
extensions: [jaeger_storage, jaeger_query, healthcheckv2]
33
pipelines:
44
traces:
55
receivers: [otlp]
66
processors: [batch]
77
exporters: [jaeger_storage_exporter]
88

99
extensions:
10+
healthcheckv2:
11+
use_v2: true
12+
http:
13+
1014
jaeger_query:
1115
trace_storage: some_storage
1216
trace_storage_archive: another_storage

cmd/jaeger/collector-with-kafka.yaml renamed to cmd/jaeger/config-kafka-collector.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
service:
2+
extensions: [healthcheckv2]
23
pipelines:
34
traces:
45
receivers: [otlp, jaeger]
@@ -10,6 +11,11 @@ service:
1011
metrics:
1112
level: detailed
1213

14+
extensions:
15+
healthcheckv2:
16+
use_v2: true
17+
http:
18+
1319
receivers:
1420
otlp:
1521
protocols:

cmd/jaeger/ingester-remote-storage.yaml renamed to cmd/jaeger/config-kafka-ingester.yaml

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
service:
2-
extensions: [jaeger_storage, jaeger_query]
2+
extensions: [jaeger_storage, jaeger_query, healthcheckv2]
33
pipelines:
44
traces:
55
receivers: [kafka]
@@ -12,6 +12,12 @@ service:
1212
level: debug
1313

1414
extensions:
15+
healthcheckv2:
16+
use_v2: true
17+
http:
18+
# use different port to avoid conflict with collector
19+
endpoint: 0.0.0.0:14133
20+
1521
jaeger_query:
1622
trace_storage: some_storage
1723

@@ -21,7 +27,7 @@ extensions:
2127
memory:
2228
max_traces: 100000
2329

24-
receivers:
30+
receivers:
2531
kafka:
2632
brokers:
2733
- localhost:9092

cmd/jaeger/config-opensearch.yaml

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,16 @@
11
service:
2-
extensions: [jaeger_storage, jaeger_query]
2+
extensions: [jaeger_storage, jaeger_query, healthcheckv2]
33
pipelines:
44
traces:
55
receivers: [otlp]
66
processors: [batch]
77
exporters: [jaeger_storage_exporter]
88

99
extensions:
10+
healthcheckv2:
11+
use_v2: true
12+
http:
13+
1014
jaeger_query:
1115
trace_storage: some_storage
1216
trace_storage_archive: another_storage

cmd/jaeger/config-remote-storage.yaml

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,16 @@
11
service:
2-
extensions: [jaeger_storage, jaeger_query]
2+
extensions: [jaeger_storage, jaeger_query, healthcheckv2]
33
pipelines:
44
traces:
55
receivers: [otlp]
66
processors: [batch]
77
exporters: [jaeger_storage_exporter]
88

99
extensions:
10+
healthcheckv2:
11+
use_v2: true
12+
http:
13+
1014
jaeger_query:
1115
trace_storage: some-storage
1216
ui_config: ./cmd/jaeger/config-ui.json

cmd/jaeger/config.yaml

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,16 @@
11
service:
2-
extensions: [jaeger_storage, jaeger_query, remote_sampling]
2+
extensions: [jaeger_storage, jaeger_query, remote_sampling, healthcheckv2]
33
pipelines:
44
traces:
55
receivers: [otlp, jaeger, zipkin]
66
processors: [batch, adaptive_sampling]
77
exporters: [jaeger_storage_exporter]
88

99
extensions:
10-
# health_check:
10+
healthcheckv2:
11+
use_v2: true
12+
http:
13+
1114
# pprof:
1215
# endpoint: 0.0.0.0:1777
1316
# zpages:

cmd/jaeger/internal/integration/e2e_integration.go

Lines changed: 70 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -4,13 +4,16 @@
44
package integration
55

66
import (
7+
"bytes"
78
"context"
9+
"encoding/json"
810
"fmt"
911
"io"
1012
"net/http"
1113
"os"
1214
"os/exec"
1315
"path/filepath"
16+
"strings"
1417
"testing"
1518
"time"
1619

@@ -41,6 +44,7 @@ type E2EStorageIntegration struct {
4144

4245
SkipStorageCleaner bool
4346
ConfigFile string
47+
BinaryName string
4448
HealthCheckEndpoint string
4549
}
4650

@@ -49,32 +53,34 @@ type E2EStorageIntegration struct {
4953
// This function should be called before any of the tests start.
5054
func (s *E2EStorageIntegration) e2eInitialize(t *testing.T, storage string) {
5155
logger := zaptest.NewLogger(t, zaptest.WrapOptions(zap.AddCaller()))
56+
if s.BinaryName == "" {
57+
s.BinaryName = "jaeger-v2"
58+
}
5259
configFile := s.ConfigFile
5360
if !s.SkipStorageCleaner {
5461
configFile = createStorageCleanerConfig(t, s.ConfigFile, storage)
5562
}
56-
5763
configFile, err := filepath.Abs(configFile)
5864
require.NoError(t, err, "Failed to get absolute path of the config file")
5965
require.FileExists(t, configFile, "Config file does not exist at the resolved path")
6066

61-
t.Logf("Starting Jaeger-v2 in the background with config file %s", configFile)
67+
t.Logf("Starting %s in the background with config file %s", s.BinaryName, configFile)
6268

6369
outFile, err := os.OpenFile(
6470
filepath.Join(t.TempDir(), "jaeger_output_logs.txt"),
6571
os.O_CREATE|os.O_WRONLY,
6672
os.ModePerm,
6773
)
6874
require.NoError(t, err)
69-
t.Logf("Writing the Jaeger-v2 output logs into %s", outFile.Name())
75+
t.Logf("Writing the %s output logs into %s", s.BinaryName, outFile.Name())
7076

7177
errFile, err := os.OpenFile(
7278
filepath.Join(t.TempDir(), "jaeger_error_logs.txt"),
7379
os.O_CREATE|os.O_WRONLY,
7480
os.ModePerm,
7581
)
7682
require.NoError(t, err)
77-
t.Logf("Writing the Jaeger-v2 error logs into %s", errFile.Name())
83+
t.Logf("Writing the %s error logs into %s", s.BinaryName, errFile.Name())
7884

7985
cmd := exec.Cmd{
8086
Path: "./cmd/jaeger/jaeger",
@@ -88,57 +94,38 @@ func (s *E2EStorageIntegration) e2eInitialize(t *testing.T, storage string) {
8894
}
8995
t.Logf("Running command: %v", cmd.Args)
9096
require.NoError(t, cmd.Start())
91-
92-
// Wait for the binary to start and become ready to serve requests.
93-
healthCheckEndpoint := s.HealthCheckEndpoint
94-
if healthCheckEndpoint == "" {
95-
healthCheckEndpoint = fmt.Sprintf("http://localhost:%d/", ports.QueryHTTP)
96-
}
97-
require.Eventually(t, func() bool {
98-
t.Logf("Checking if Jaeger-v2 is available on %s", healthCheckEndpoint)
99-
ctx, cancel := context.WithTimeout(context.Background(), 3*time.Second)
100-
defer cancel()
101-
req, err := http.NewRequestWithContext(ctx, http.MethodGet, healthCheckEndpoint, nil)
102-
if err != nil {
103-
t.Logf("HTTP request creation failed: %v", err)
104-
return false
105-
}
106-
resp, err := http.DefaultClient.Do(req)
107-
if err != nil {
108-
t.Logf("HTTP request failed: %v", err)
109-
return false
110-
}
111-
defer resp.Body.Close()
112-
return resp.StatusCode == http.StatusOK
113-
}, 60*time.Second, 3*time.Second, "Jaeger-v2 did not start")
114-
t.Log("Jaeger-v2 is ready")
11597
t.Cleanup(func() {
11698
if err := cmd.Process.Kill(); err != nil {
117-
t.Errorf("Failed to kill Jaeger-v2 process: %v", err)
99+
t.Errorf("Failed to kill %s process: %v", s.BinaryName, err)
118100
}
119101
if t.Failed() {
120102
// A Github Actions special annotation to create a foldable section
121103
// in the Github runner output.
122104
// https://docs.github.com/en/actions/using-workflows/workflow-commands-for-github-actions#grouping-log-lines
123-
fmt.Println("::group::🚧 🚧 🚧 Jaeger-v2 binary logs")
105+
fmt.Printf("::group::🚧 🚧 🚧 %s binary logs\n", s.BinaryName)
124106
outLogs, err := os.ReadFile(outFile.Name())
125107
if err != nil {
126108
t.Errorf("Failed to read output logs: %v", err)
127109
} else {
128-
fmt.Printf("🚧 🚧 🚧 Jaeger-v2 output logs:\n%s", outLogs)
110+
fmt.Printf("🚧 🚧 🚧 %s output logs:\n%s", s.BinaryName, outLogs)
129111
}
130112

131113
errLogs, err := os.ReadFile(errFile.Name())
132114
if err != nil {
133115
t.Errorf("Failed to read error logs: %v", err)
134116
} else {
135-
fmt.Printf("🚧 🚧 🚧 Jaeger-v2 error logs:\n%s", errLogs)
117+
fmt.Printf("🚧 🚧 🚧 %s error logs:\n%s", s.BinaryName, errLogs)
136118
}
137119
// End of Github Actions foldable section annotation.
138120
fmt.Println("::endgroup::")
139121
}
140122
})
141123

124+
// Wait for the binary to start and become ready to serve requests.
125+
require.Eventually(t, func() bool { return s.doHealthCheck(t) },
126+
60*time.Second, 3*time.Second, "%s did not start", s.BinaryName)
127+
t.Logf("%s is ready", s.BinaryName)
128+
142129
s.SpanWriter, err = createSpanWriter(logger, otlpPort)
143130
require.NoError(t, err)
144131
s.SpanReader, err = createSpanReader(logger, ports.QueryGRPC)
@@ -150,6 +137,56 @@ func (s *E2EStorageIntegration) e2eInitialize(t *testing.T, storage string) {
150137
})
151138
}
152139

140+
func (s *E2EStorageIntegration) doHealthCheck(t *testing.T) bool {
141+
healthCheckEndpoint := s.HealthCheckEndpoint
142+
if healthCheckEndpoint == "" {
143+
healthCheckEndpoint = "http://localhost:13133/status"
144+
}
145+
t.Logf("Checking if %s is available on %s", s.BinaryName, healthCheckEndpoint)
146+
ctx, cancel := context.WithTimeout(context.Background(), 3*time.Second)
147+
defer cancel()
148+
req, err := http.NewRequestWithContext(ctx, http.MethodGet, healthCheckEndpoint, nil)
149+
if err != nil {
150+
t.Logf("HTTP request creation failed: %v", err)
151+
return false
152+
}
153+
resp, err := http.DefaultClient.Do(req)
154+
if err != nil {
155+
t.Logf("HTTP request failed: %v", err)
156+
return false
157+
}
158+
defer resp.Body.Close()
159+
body, err := io.ReadAll(resp.Body)
160+
if err != nil {
161+
t.Logf("Failed to read HTTP response body: %v", err)
162+
return false
163+
}
164+
if resp.StatusCode != http.StatusOK {
165+
t.Logf("HTTP response not OK: %v", string(body))
166+
return false
167+
}
168+
// for backwards compatibility with other healthchecks
169+
if !strings.HasSuffix(healthCheckEndpoint, "/status") {
170+
t.Logf("OK HTTP from endpoint that is not healthcheckv2")
171+
return true
172+
}
173+
174+
var healthResponse struct {
175+
Status string `json:"status"`
176+
}
177+
if err := json.NewDecoder(bytes.NewReader(body)).Decode(&healthResponse); err != nil {
178+
t.Logf("Failed to decode JSON response '%s': %v", string(body), err)
179+
return false
180+
}
181+
182+
// Check if the status field in the JSON is "StatusOK"
183+
if healthResponse.Status != "StatusOK" {
184+
t.Logf("Received non-K status %s: %s", healthResponse.Status, string(body))
185+
return false
186+
}
187+
return true
188+
}
189+
153190
// e2eCleanUp closes the SpanReader and SpanWriter gRPC connection.
154191
// This function should be called after all the tests are finished.
155192
func (s *E2EStorageIntegration) e2eCleanUp(t *testing.T) {
@@ -205,6 +242,7 @@ func createStorageCleanerConfig(t *testing.T, configFile string, storage string)
205242
err = os.WriteFile(tempFile, newData, 0o600)
206243
require.NoError(t, err)
207244

245+
t.Logf("Transformed configuration file %s to %s", configFile, tempFile)
208246
return tempFile
209247
}
210248

cmd/jaeger/internal/integration/grpc_test.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,11 @@
44
package integration
55

66
import (
7+
"fmt"
78
"testing"
89

910
"github.com/jaegertracing/jaeger/plugin/storage/integration"
11+
"github.com/jaegertracing/jaeger/ports"
1012
)
1113

1214
type GRPCStorageIntegration struct {
@@ -30,6 +32,8 @@ func TestGRPCStorage(t *testing.T) {
3032
s := &GRPCStorageIntegration{
3133
E2EStorageIntegration: E2EStorageIntegration{
3234
ConfigFile: "../../config-remote-storage.yaml",
35+
// TODO this should be removed in favor of default health check endpoint
36+
HealthCheckEndpoint: fmt.Sprintf("http://localhost:%d/", ports.QueryHTTP),
3337
},
3438
}
3539
s.CleanUp = s.cleanUp

0 commit comments

Comments
 (0)