Skip to content

Commit 7e94861

Browse files
BinaryFissionGamesdjaglowski
authored andcommitted
[cmd/opampsupervisor] Fix windows support (open-telemetry#34573)
**Description:** <Describe what has changed.> * Fixed windows improper signalling for the agent to shutdown. * Fixed not closing files in e2e tests (windows will error if a file is not closed when it attempts to delete the temp directory said file is in) * Fixed test config templates quoting (windows filepaths would have trouble since the backslash acts like a character escape) * Added a workflow to allow running e2e tests for windows **Link to tracking Issue:** Closes open-telemetry#34570 **Testing:** * Existing e2e tests cover this. * I've manually tested remotely configuring a windows supervisor with BindPlane. --------- Co-authored-by: Daniel Jaglowski <[email protected]>
1 parent e251e20 commit 7e94861

15 files changed

+185
-25
lines changed
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
# Use this changelog template to create an entry for release notes.
2+
3+
# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix'
4+
change_type: bug_fix
5+
6+
# The name of the component, or a single word describing the area of concern, (e.g. filelogreceiver)
7+
component: cmd/opampsupervisor
8+
9+
# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`).
10+
note: "Fix supervisor support for Windows."
11+
12+
# Mandatory: One or more tracking issues related to the change. You can use the PR number here if no issue exists.
13+
issues: [34570]
Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
name: e2e-tests-windows
2+
3+
on:
4+
push:
5+
branches:
6+
- main
7+
tags:
8+
- "v[0-9]+.[0-9]+.[0-9]+*"
9+
paths-ignore:
10+
- "**/README.md"
11+
pull_request:
12+
paths-ignore:
13+
- "**/README.md"
14+
merge_group:
15+
16+
env:
17+
# Make sure to exit early if cache segment download times out after 2 minutes.
18+
# We limit cache download as a whole to 5 minutes.
19+
SEGMENT_DOWNLOAD_TIMEOUT_MINS: 2
20+
21+
jobs:
22+
collector-build:
23+
runs-on: windows-latest
24+
if: ${{ github.actor != 'dependabot[bot]' && (contains(github.event.pull_request.labels.*.name, 'Run Windows') || github.event_name == 'push' || github.event_name == 'merge_group') }}
25+
steps:
26+
- name: Checkout
27+
uses: actions/checkout@v4
28+
- uses: actions/setup-go@v5
29+
with:
30+
go-version: "1.21.12"
31+
cache: false
32+
- name: Cache Go
33+
id: go-mod-cache
34+
timeout-minutes: 25
35+
uses: actions/cache@v4
36+
with:
37+
path: |
38+
~\go\pkg\mod
39+
~\AppData\Local\go-build
40+
key: go-build-cache-${{ runner.os }}-${{ matrix.group }}-go-${{ hashFiles('**/go.sum') }}
41+
- name: Install dependencies
42+
if: steps.go-mod-cache.outputs.cache-hit != 'true'
43+
run: make -j2 gomoddownload
44+
- name: Build Collector
45+
run: make otelcontribcol
46+
- name: Upload Collector Binary
47+
uses: actions/upload-artifact@v4
48+
with:
49+
name: collector-binary
50+
path: ./bin/*
51+
52+
supervisor-test:
53+
runs-on: windows-latest
54+
if: ${{ github.actor != 'dependabot[bot]' && (contains(github.event.pull_request.labels.*.name, 'Run Windows') || github.event_name == 'push' || github.event_name == 'merge_group') }}
55+
needs: [collector-build]
56+
steps:
57+
- uses: actions/checkout@v4
58+
- uses: actions/setup-go@v5
59+
with:
60+
go-version: "1.21.12"
61+
cache: false
62+
- name: Cache Go
63+
id: go-mod-cache
64+
timeout-minutes: 25
65+
uses: actions/cache@v4
66+
with:
67+
path: |
68+
~\go\pkg\mod
69+
~\AppData\Local\go-build
70+
key: go-build-cache-${{ runner.os }}-${{ matrix.group }}-go-${{ hashFiles('**/go.sum') }}
71+
- name: Install dependencies
72+
if: steps.go-mod-cache.outputs.cache-hit != 'true'
73+
run: make -j2 gomoddownload
74+
- name: Download Collector Binary
75+
uses: actions/download-artifact@v4
76+
with:
77+
name: collector-binary
78+
path: bin/
79+
- name: Run opampsupervisor e2e tests
80+
run: |
81+
cd cmd/opampsupervisor
82+
go test -v --tags=e2e

cmd/opampsupervisor/e2e_test.go

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -172,6 +172,7 @@ func getSupervisorConfig(t *testing.T, configType string, extraConfigData map[st
172172
if runtime.GOOS == "windows" {
173173
extension = ".exe"
174174
}
175+
175176
configData := map[string]string{
176177
"goos": runtime.GOOS,
177178
"goarch": runtime.GOARCH,
@@ -184,7 +185,10 @@ func getSupervisorConfig(t *testing.T, configType string, extraConfigData map[st
184185
}
185186
err = templ.Execute(&buf, configData)
186187
require.NoError(t, err)
187-
cfgFile, _ := os.CreateTemp(t.TempDir(), "config_*.yaml")
188+
cfgFile, err := os.CreateTemp(t.TempDir(), "config_*.yaml")
189+
require.NoError(t, err)
190+
t.Cleanup(func() { cfgFile.Close() })
191+
188192
_, err = cfgFile.Write(buf.Bytes())
189193
require.NoError(t, err)
190194

@@ -617,11 +621,13 @@ func TestSupervisorReportsEffectiveConfig(t *testing.T) {
617621
tempDir := t.TempDir()
618622
testKeyFile, err := os.CreateTemp(tempDir, "confKey")
619623
require.NoError(t, err)
624+
t.Cleanup(func() { testKeyFile.Close() })
625+
620626
n, err := testKeyFile.Write([]byte(testKeyFile.Name()))
621627
require.NoError(t, err)
622628
require.NotZero(t, n)
623629

624-
colCfgTpl, err := os.ReadFile(path.Join("testdata", "collector", "split_config.yaml"))
630+
colCfgTpl, err := os.ReadFile(filepath.Join("testdata", "collector", "split_config.yaml"))
625631
require.NoError(t, err)
626632

627633
templ, err := template.New("").Parse(string(colCfgTpl))
@@ -770,9 +776,11 @@ func createSimplePipelineCollectorConf(t *testing.T) (*bytes.Buffer, []byte, *os
770776
tempDir := t.TempDir()
771777
inputFile, err := os.CreateTemp(tempDir, "input_*.yaml")
772778
require.NoError(t, err)
779+
t.Cleanup(func() { inputFile.Close() })
773780

774781
outputFile, err := os.CreateTemp(tempDir, "output_*.yaml")
775782
require.NoError(t, err)
783+
t.Cleanup(func() { outputFile.Close() })
776784

777785
colCfgTpl, err := os.ReadFile(path.Join(wd, "testdata", "collector", "simple_pipeline.yaml"))
778786
require.NoError(t, err)

cmd/opampsupervisor/go.mod

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ require (
1717
go.opentelemetry.io/collector/semconv v0.107.1-0.20240816132030-9fd84668bb02
1818
go.uber.org/goleak v1.3.0
1919
go.uber.org/zap v1.27.0
20+
golang.org/x/sys v0.21.0
2021
google.golang.org/protobuf v1.34.2
2122
gopkg.in/yaml.v3 v3.0.1
2223
)
@@ -32,5 +33,4 @@ require (
3233
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect
3334
go.uber.org/multierr v1.11.0 // indirect
3435
golang.org/x/net v0.23.0 // indirect
35-
golang.org/x/sys v0.21.0 // indirect
3636
)

cmd/opampsupervisor/supervisor/commander/commander.go

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@ import (
1111
"os/exec"
1212
"path/filepath"
1313
"sync/atomic"
14-
"syscall"
1514
"time"
1615

1716
"go.uber.org/zap"
@@ -73,26 +72,28 @@ func (c *Commander) Start(ctx context.Context) error {
7372
c.logger.Debug("Starting agent", zap.String("agent", c.cfg.Executable))
7473

7574
logFilePath := filepath.Join(c.logsDir, "agent.log")
76-
logFile, err := os.Create(logFilePath)
75+
stdoutFile, err := os.Create(logFilePath)
7776
if err != nil {
7877
return fmt.Errorf("cannot create %s: %w", logFilePath, err)
7978
}
8079

8180
c.cmd = exec.CommandContext(ctx, c.cfg.Executable, c.args...) // #nosec G204
81+
c.cmd.SysProcAttr = sysProcAttrs()
8282

8383
// Capture standard output and standard error.
8484
// https://github.com/open-telemetry/opentelemetry-collector-contrib/issues/21072
85-
c.cmd.Stdout = logFile
86-
c.cmd.Stderr = logFile
85+
c.cmd.Stdout = stdoutFile
86+
c.cmd.Stderr = stdoutFile
8787

8888
if err := c.cmd.Start(); err != nil {
89+
stdoutFile.Close()
8990
return err
9091
}
9192

9293
c.logger.Debug("Agent process started", zap.Int("pid", c.cmd.Process.Pid))
9394
c.running.Store(1)
9495

95-
go c.watch()
96+
go c.watch(stdoutFile)
9697

9798
return nil
9899
}
@@ -106,7 +107,9 @@ func (c *Commander) Restart(ctx context.Context) error {
106107
return c.Start(ctx)
107108
}
108109

109-
func (c *Commander) watch() {
110+
func (c *Commander) watch(stdoutFile *os.File) {
111+
defer stdoutFile.Close()
112+
110113
err := c.cmd.Wait()
111114

112115
// cmd.Wait returns an exec.ExitError when the Collector exits unsuccessfully or stops
@@ -160,7 +163,7 @@ func (c *Commander) Stop(ctx context.Context) error {
160163
c.logger.Debug("Stopping agent process", zap.Int("pid", pid))
161164

162165
// Gracefully signal process to stop.
163-
if err := c.cmd.Process.Signal(syscall.SIGTERM); err != nil {
166+
if err := sendShutdownSignal(c.cmd.Process); err != nil {
164167
return err
165168
}
166169

@@ -181,7 +184,7 @@ func (c *Commander) Stop(ctx context.Context) error {
181184
c.logger.Debug(
182185
"Agent process is not responding to SIGTERM. Sending SIGKILL to kill forcibly.",
183186
zap.Int("pid", pid))
184-
if innerErr = c.cmd.Process.Signal(syscall.SIGKILL); innerErr != nil {
187+
if innerErr = c.cmd.Process.Signal(os.Kill); innerErr != nil {
185188
return
186189
}
187190
}()
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
// Copyright The OpenTelemetry Authors
2+
// SPDX-License-Identifier: Apache-2.0
3+
4+
//go:build !windows
5+
6+
package commander
7+
8+
import (
9+
"os"
10+
"syscall"
11+
)
12+
13+
func sendShutdownSignal(process *os.Process) error {
14+
return process.Signal(os.Interrupt)
15+
}
16+
17+
func sysProcAttrs() *syscall.SysProcAttr {
18+
// On non-windows systems, no extra attributes are needed.
19+
return nil
20+
}
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
// Copyright The OpenTelemetry Authors
2+
// SPDX-License-Identifier: Apache-2.0
3+
4+
//go:build windows
5+
6+
package commander
7+
8+
import (
9+
"os"
10+
"syscall"
11+
12+
"golang.org/x/sys/windows"
13+
)
14+
15+
var (
16+
kernel32API = windows.NewLazySystemDLL("kernel32.dll")
17+
18+
ctrlEventProc = kernel32API.NewProc("GenerateConsoleCtrlEvent")
19+
)
20+
21+
func sendShutdownSignal(process *os.Process) error {
22+
// signaling with os.Interrupt is not supported on windows systems,
23+
// so we need to use the windows API to properly send a graceful shutdown signal.
24+
// See: https://learn.microsoft.com/en-us/windows/console/generateconsolectrlevent
25+
r, _, e := ctrlEventProc.Call(syscall.CTRL_BREAK_EVENT, uintptr(process.Pid))
26+
if r == 0 {
27+
return e
28+
}
29+
30+
return nil
31+
}
32+
33+
func sysProcAttrs() *syscall.SysProcAttr {
34+
// By default, a ctrl-break event applies to a whole process group, which ends up
35+
// shutting down the supervisor. Instead, we spawn the collector in its own process
36+
// group, so that sending a ctrl-break event shuts down just the collector.
37+
return &syscall.SysProcAttr{
38+
CreationFlags: syscall.CREATE_NEW_PROCESS_GROUP,
39+
}
40+
}

cmd/opampsupervisor/supervisor/supervisor.go

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -459,17 +459,11 @@ func (s *Supervisor) startOpAMPClient() error {
459459
func (s *Supervisor) startOpAMPServer() error {
460460
s.opampServer = server.New(newLoggerFromZap(s.logger))
461461

462-
var err error
463-
s.opampServerPort, err = s.findRandomPort()
464-
if err != nil {
465-
return err
466-
}
467-
468462
s.logger.Debug("Starting OpAMP server...")
469463

470464
connected := &atomic.Bool{}
471465

472-
err = s.opampServer.Start(flattenedSettings{
466+
err := s.opampServer.Start(flattenedSettings{
473467
endpoint: fmt.Sprintf("localhost:%d", s.opampServerPort),
474468
onConnectingFunc: func(_ *http.Request) (bool, int) {
475469
// Only allow one agent to be connected the this server at a time.

cmd/opampsupervisor/testdata/collector/split_config.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,4 +11,4 @@ service:
1111
exporters: [nop]
1212
telemetry:
1313
resource:
14-
test_key: "${file:{{.TestKeyFile}}}"
14+
test_key: '${file:{{.TestKeyFile}}}'

cmd/opampsupervisor/testdata/supervisor/supervisor_accepts_conn.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ capabilities:
1212
accepts_opamp_connection_settings: true
1313

1414
storage:
15-
directory: "{{.storage_dir}}"
15+
directory: '{{.storage_dir}}'
1616

1717
agent:
1818
executable: ../../bin/otelcontribcol_{{.goos}}_{{.goarch}}{{.extension}}

cmd/opampsupervisor/testdata/supervisor/supervisor_agent_description.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ capabilities:
1212
accepts_restart_command: true
1313

1414
storage:
15-
directory: "{{.storage_dir}}"
15+
directory: '{{.storage_dir}}'
1616

1717
agent:
1818
executable: ../../bin/otelcontribcol_{{.goos}}_{{.goarch}}{{.extension}}

cmd/opampsupervisor/testdata/supervisor/supervisor_basic.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ capabilities:
1212
accepts_restart_command: true
1313

1414
storage:
15-
directory: "{{.storage_dir}}"
15+
directory: '{{.storage_dir}}'
1616

1717
agent:
1818
executable: ../../bin/otelcontribcol_{{.goos}}_{{.goarch}}{{.extension}}

cmd/opampsupervisor/testdata/supervisor/supervisor_nocap.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ capabilities:
1111
reports_remote_config: false
1212

1313
storage:
14-
directory: "{{.storage_dir}}"
14+
directory: '{{.storage_dir}}'
1515

1616
agent:
1717
executable: ../../bin/otelcontribcol_{{.goos}}_{{.goarch}}{{.extension}}

cmd/opampsupervisor/testdata/supervisor/supervisor_persistence.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ capabilities:
1111
reports_remote_config: true
1212

1313
storage:
14-
directory: {{.storage_dir}}
14+
directory: '{{.storage_dir}}'
1515

1616
agent:
1717
executable: ../../bin/otelcontribcol_{{.goos}}_{{.goarch}}{{.extension}}

cmd/opampsupervisor/testdata/supervisor/supervisor_test.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ capabilities:
1111
reports_remote_config: true
1212

1313
storage:
14-
directory: "{{.storage_dir}}"
14+
directory: '{{.storage_dir}}'
1515

1616
agent:
1717
executable: ../../bin/otelcontribcol_darwin_arm64

0 commit comments

Comments
 (0)