Skip to content

Commit da15e5f

Browse files
authored
[observer/ecs] Don't report EC2 tasks with unassigned container instances (#23279)
When ECS task is in state Provisioning/Pending, it can contain container(s) which don't have EC2 instance yet. Such containers have `nil` instance arn. This change fixes service discovery error: ```error [email protected]/error.go:77 attachContainerInstance failed: describe container instanced failed offset=0: ecs.DescribeContainerInstance failed: InvalidParameterException: Container instance can not be blank. {"kind": "extension", "name": "ecs_observer", "ErrScope": "Unknown"} ``` **Testing:** Related unit test is added.
1 parent aeb0bc9 commit da15e5f

File tree

3 files changed

+82
-20
lines changed

3 files changed

+82
-20
lines changed
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
# Use this changelog template to create an entry for release notes.
2+
# If your change doesn't affect end users, such as a test fix or a tooling change,
3+
# you should instead start your pull request title with [chore] or use the "Skip Changelog" label.
4+
5+
# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix'
6+
change_type: bug_fix
7+
8+
# The name of the component, or a single word describing the area of concern, (e.g. filelogreceiver)
9+
component: ecsobserver
10+
11+
# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`).
12+
note: Don't fail with error when finding a task of EC2 launch type and missing container instance, just ignore them. This fixes behavior when task is provisioning and its containers are not assigned to instances yet.
13+
14+
# Mandatory: One or more tracking issues related to the change. You can use the PR number here if no issue exists.
15+
issues: [23279]
16+
17+
# (Optional) One or more lines of additional information to render under the primary note.
18+
# These lines will be padded with 2 spaces and then inserted directly into the document.
19+
# Use pipe (|) for multiline entries.
20+
subtext:

extension/observer/ecsobserver/fetcher.go

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -128,9 +128,9 @@ func newTaskFetcher(opts taskFetcherOptions) (*taskFetcher, error) {
128128

129129
func (f *taskFetcher) fetchAndDecorate(ctx context.Context) ([]*taskAnnotated, error) {
130130
// taskAnnotated
131-
rawTasks, err := f.getAllTasks(ctx)
131+
rawTasks, err := f.getDiscoverableTasks(ctx)
132132
if err != nil {
133-
return nil, fmt.Errorf("getAllTasks failed: %w", err)
133+
return nil, fmt.Errorf("getDiscoverableTasks failed: %w", err)
134134
}
135135
tasks, err := f.attachTaskDefinition(ctx, rawTasks)
136136
if err != nil {
@@ -151,9 +151,10 @@ func (f *taskFetcher) fetchAndDecorate(ctx context.Context) ([]*taskAnnotated, e
151151
return tasks, nil
152152
}
153153

154-
// getAllTasks get arns of all running tasks and describe those tasks.
154+
// getDiscoverableTasks get arns of all running tasks and describe those tasks
155+
// and filter only fargate tasks or EC2 task which container instance is known.
155156
// There is no API to list task detail without arn so we need to call two APIs.
156-
func (f *taskFetcher) getAllTasks(ctx context.Context) ([]*ecs.Task, error) {
157+
func (f *taskFetcher) getDiscoverableTasks(ctx context.Context) ([]*ecs.Task, error) {
157158
svc := f.ecs
158159
cluster := aws.String(f.cluster)
159160
req := ecs.ListTasksInput{Cluster: cluster}
@@ -171,7 +172,16 @@ func (f *taskFetcher) getAllTasks(ctx context.Context) ([]*ecs.Task, error) {
171172
if err != nil {
172173
return nil, fmt.Errorf("ecs.DescribeTasks failed: %w", err)
173174
}
174-
tasks = append(tasks, descRes.Tasks...)
175+
176+
for _, task := range descRes.Tasks {
177+
// Preserve only fargate tasks or EC2 tasks with non-nil ContainerInstanceArn.
178+
// When ECS task of EC2 launch type is in state Provisioning/Pending, it may
179+
// not have EC2 instance. Such tasks have `nil` instance arn and the
180+
// attachContainerInstance call will fail
181+
if task.ContainerInstanceArn != nil || aws.StringValue(task.LaunchType) != ecs.LaunchTypeEc2 {
182+
tasks = append(tasks, task)
183+
}
184+
}
175185
if listRes.NextToken == nil {
176186
break
177187
}

extension/observer/ecsobserver/fetcher_test.go

Lines changed: 47 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -69,15 +69,47 @@ func TestFetcher_FetchAndDecorate(t *testing.T) {
6969
assert.Equal(t, "s0", aws.StringValue(tasks[0].Service.ServiceArn))
7070
}
7171

72-
func TestFetcher_GetAllTasks(t *testing.T) {
73-
c := ecsmock.NewCluster()
74-
f := newTestTaskFetcher(t, c)
75-
const nTasks = 203
76-
c.SetTasks(ecsmock.GenTasks("p", nTasks, nil))
77-
ctx := context.Background()
78-
tasks, err := f.getAllTasks(ctx)
79-
require.NoError(t, err)
80-
assert.Equal(t, nTasks, len(tasks))
72+
func TestFetcher_GetDiscoverableTasks(t *testing.T) {
73+
t.Run("without non discoverable tasks", func(t *testing.T) {
74+
c := ecsmock.NewCluster()
75+
f := newTestTaskFetcher(t, c)
76+
const nTasks = 203
77+
c.SetTasks(ecsmock.GenTasks("p", nTasks, nil))
78+
ctx := context.Background()
79+
tasks, err := f.getDiscoverableTasks(ctx)
80+
require.NoError(t, err)
81+
assert.Equal(t, nTasks, len(tasks))
82+
})
83+
84+
t.Run("with non discoverable tasks", func(t *testing.T) {
85+
c := ecsmock.NewCluster()
86+
f := newTestTaskFetcher(t, c)
87+
nTasks := 3
88+
89+
c.SetTaskDefinitions(ecsmock.GenTaskDefinitions("d", 1, 1, nil))
90+
c.SetTasks(ecsmock.GenTasks("t", nTasks, func(i int, task *ecs.Task) {
91+
task.TaskDefinitionArn = aws.String("d0:1")
92+
switch i {
93+
case 0:
94+
task.LaunchType = aws.String(ecs.LaunchTypeEc2)
95+
task.ContainerInstanceArn = nil
96+
case 1:
97+
task.LaunchType = aws.String(ecs.LaunchTypeFargate)
98+
case 2:
99+
task.LaunchType = aws.String(ecs.LaunchTypeEc2)
100+
task.ContainerInstanceArn = aws.String("ci0")
101+
}
102+
}))
103+
104+
ctx := context.Background()
105+
tasks, err := f.getDiscoverableTasks(ctx)
106+
require.NoError(t, err)
107+
108+
// Expect 2 tasks, with LaunchType Fargate and EC2 with non-nil ContainerInstanceArn
109+
assert.Equal(t, 2, len(tasks))
110+
assert.Equal(t, ecs.LaunchTypeFargate, aws.StringValue(tasks[0].LaunchType))
111+
assert.Equal(t, ecs.LaunchTypeEc2, aws.StringValue(tasks[1].LaunchType))
112+
})
81113
}
82114

83115
func TestFetcher_AttachTaskDefinitions(t *testing.T) {
@@ -93,7 +125,7 @@ func TestFetcher_AttachTaskDefinitions(t *testing.T) {
93125
c.SetTaskDefinitions(ecsmock.GenTaskDefinitions("pdef", nTasks, 1, nil))
94126

95127
// no cache
96-
tasks, err := f.getAllTasks(ctx)
128+
tasks, err := f.getDiscoverableTasks(ctx)
97129
require.NoError(t, err)
98130
attached, err := f.attachTaskDefinition(ctx, tasks)
99131
stats := c.Stats()
@@ -102,7 +134,7 @@ func TestFetcher_AttachTaskDefinitions(t *testing.T) {
102134
assert.Equal(t, nTasks, stats.DescribeTaskDefinition.Called)
103135

104136
// all cached
105-
tasks, err = f.getAllTasks(ctx)
137+
tasks, err = f.getDiscoverableTasks(ctx)
106138
require.NoError(t, err)
107139
// do it again to trigger cache logic
108140
attached, err = f.attachTaskDefinition(ctx, tasks)
@@ -116,7 +148,7 @@ func TestFetcher_AttachTaskDefinitions(t *testing.T) {
116148
task.TaskDefinitionArn = aws.String(fmt.Sprintf("pdef%d:1", i))
117149
}))
118150
c.SetTaskDefinitions(ecsmock.GenTaskDefinitions("pdef", nTasks+1, 1, nil))
119-
tasks, err = f.getAllTasks(ctx)
151+
tasks, err = f.getDiscoverableTasks(ctx)
120152
require.NoError(t, err)
121153
_, err = f.attachTaskDefinition(ctx, tasks)
122154
stats = c.Stats()
@@ -144,7 +176,7 @@ func TestFetcher_AttachContainerInstance(t *testing.T) {
144176
c.SetEc2Instances(ecsmock.GenEc2Instances("i-", nInstances, nil))
145177

146178
ctx := context.Background()
147-
rawTasks, err := f.getAllTasks(ctx)
179+
rawTasks, err := f.getDiscoverableTasks(ctx)
148180
require.NoError(t, err)
149181
assert.Equal(t, nTasks, len(rawTasks))
150182

@@ -182,7 +214,7 @@ func TestFetcher_AttachContainerInstance(t *testing.T) {
182214
c.SetEc2Instances(ecsmock.GenEc2Instances("i-", nInstances, nil))
183215

184216
ctx := context.Background()
185-
rawTasks, err := f.getAllTasks(ctx)
217+
rawTasks, err := f.getDiscoverableTasks(ctx)
186218
require.NoError(t, err)
187219
assert.Equal(t, nTasks, len(rawTasks))
188220

@@ -235,7 +267,7 @@ func TestFetcher_AttachService(t *testing.T) {
235267
}))
236268

237269
ctx := context.Background()
238-
rawTasks, err := f.getAllTasks(ctx)
270+
rawTasks, err := f.getDiscoverableTasks(ctx)
239271
require.NoError(t, err)
240272
tasks, err := f.attachTaskDefinition(ctx, rawTasks)
241273
require.NoError(t, err)

0 commit comments

Comments
 (0)