Skip to content
This repository was archived by the owner on Dec 20, 2025. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -18,16 +18,19 @@

import com.amazonaws.services.ec2.model.Instance;
import com.amazonaws.services.ecs.model.ContainerDefinition;
import com.amazonaws.services.ecs.model.LoadBalancer;
import com.amazonaws.services.ecs.model.NetworkBinding;
import com.amazonaws.services.ecs.model.TaskDefinition;
import com.netflix.spinnaker.clouddriver.ecs.cache.Keys;
import com.netflix.spinnaker.clouddriver.ecs.cache.client.ContainerInstanceCacheClient;
import com.netflix.spinnaker.clouddriver.ecs.cache.client.EcsInstanceCacheClient;
import com.netflix.spinnaker.clouddriver.ecs.cache.client.ServiceCacheClient;
import com.netflix.spinnaker.clouddriver.ecs.cache.client.TargetHealthCacheClient;
import com.netflix.spinnaker.clouddriver.ecs.cache.client.TaskCacheClient;
import com.netflix.spinnaker.clouddriver.ecs.cache.client.TaskDefinitionCacheClient;
import com.netflix.spinnaker.clouddriver.ecs.cache.client.TaskHealthCacheClient;
import com.netflix.spinnaker.clouddriver.ecs.cache.model.ContainerInstance;
import com.netflix.spinnaker.clouddriver.ecs.cache.model.EcsTargetHealth;
import com.netflix.spinnaker.clouddriver.ecs.cache.model.Service;
import com.netflix.spinnaker.clouddriver.ecs.cache.model.Task;
import com.netflix.spinnaker.clouddriver.ecs.cache.model.TaskHealth;
Expand All @@ -37,6 +40,7 @@
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Component;

Expand All @@ -50,6 +54,7 @@ public class ContainerInformationService {
private final TaskDefinitionCacheClient taskDefinitionCacheClient;
private final EcsInstanceCacheClient ecsInstanceCacheClient;
private final ContainerInstanceCacheClient containerInstanceCacheClient;
private final TargetHealthCacheClient targetHealthCacheClient;

@Autowired
public ContainerInformationService(
Expand All @@ -59,14 +64,16 @@ public ContainerInformationService(
TaskHealthCacheClient taskHealthCacheClient,
TaskDefinitionCacheClient taskDefinitionCacheClient,
EcsInstanceCacheClient ecsInstanceCacheClient,
ContainerInstanceCacheClient containerInstanceCacheClient) {
ContainerInstanceCacheClient containerInstanceCacheClient,
TargetHealthCacheClient targetHealthCacheClient) {
this.ecsCredentialsConfig = ecsCredentialsConfig;
this.taskCacheClient = taskCacheClient;
this.serviceCacheClient = serviceCacheClient;
this.taskHealthCacheClient = taskHealthCacheClient;
this.taskDefinitionCacheClient = taskDefinitionCacheClient;
this.ecsInstanceCacheClient = ecsInstanceCacheClient;
this.containerInstanceCacheClient = containerInstanceCacheClient;
this.targetHealthCacheClient = targetHealthCacheClient;
}

public List<Map<String, Object>> getHealthStatus(
Expand Down Expand Up @@ -101,8 +108,15 @@ public List<Map<String, Object>> getHealthStatus(
// Task-based health
if (task != null) {
boolean hasHealthCheck = false;
EcsTargetHealth targetHealth = null;
if (service != null) {
hasHealthCheck = taskHasHealthCheck(service, accountName, region);
LoadBalancer loadBalancer = service.getLoadBalancers().stream().findFirst().orElse(null);
if (loadBalancer != null) {
String targetGroupKey =
Keys.getTargetHealthKey(accountName, region, loadBalancer.getTargetGroupArn());
targetHealth = targetHealthCacheClient.get(targetGroupKey);
}
}

Map<String, Object> taskPlatformHealth = new HashMap<>();
Expand All @@ -111,7 +125,8 @@ public List<Map<String, Object>> getHealthStatus(
taskPlatformHealth.put("healthClass", "platform");
taskPlatformHealth.put(
"state",
toPlatformHealthState(task.getLastStatus(), task.getHealthStatus(), hasHealthCheck));
toPlatformHealthState(
task.getLastStatus(), task.getHealthStatus(), hasHealthCheck, targetHealth));
healthMetrics.add(taskPlatformHealth);
}

Expand All @@ -138,13 +153,20 @@ public boolean taskHasHealthCheck(Service service, String accountName, String re
}

private String toPlatformHealthState(
String ecsTaskStatus, String ecsTaskHealthStatus, boolean hasHealthCheck) {
String ecsTaskStatus,
String ecsTaskHealthStatus,
boolean hasHealthCheck,
EcsTargetHealth ecsTargetHealth) {
if (hasHealthCheck && "UNKNOWN".equals(ecsTaskHealthStatus)) {
return "Starting";
} else if ("UNHEALTHY".equals(ecsTaskHealthStatus)) {
return "Down";
}

if (ecsTargetHealth != null) {
return getPlatformHealthStateFromTargetGroup(ecsTargetHealth);
}

switch (ecsTaskStatus) {
case "PROVISIONING":
case "PENDING":
Expand All @@ -157,6 +179,27 @@ private String toPlatformHealthState(
}
}

// based on:
// https://docs.aws.amazon.com/elasticloadbalancing/latest/application/target-group-health-checks.html#target-health-states
private String getPlatformHealthStateFromTargetGroup(EcsTargetHealth targetHealth) {
Set<String> statuses =
targetHealth.getTargetHealthDescriptions().stream()
.map(tg -> tg.getTargetHealth().getState())
.collect(Collectors.toSet());

for (String status : statuses) {
if ("healthy".equalsIgnoreCase(status)) {
return "Up";
}
if ("initial".equalsIgnoreCase(status)) {
return "Starting";
}
}

// statuses: unhealthy, unused, draining, unavailable
return "Down";
}

public String getClusterArn(String accountName, String region, String taskId) {
String key = Keys.getTaskKey(accountName, region, taskId);
Task task = taskCacheClient.get(key);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,11 @@ import com.amazonaws.services.ecs.model.HealthCheck
import com.amazonaws.services.ecs.model.LoadBalancer
import com.amazonaws.services.ecs.model.NetworkBinding
import com.amazonaws.services.ecs.model.TaskDefinition
import com.amazonaws.services.elasticloadbalancingv2.model.TargetHealth
import com.amazonaws.services.elasticloadbalancingv2.model.TargetHealthDescription
import com.netflix.spinnaker.clouddriver.ecs.cache.client.*
import com.netflix.spinnaker.clouddriver.ecs.cache.model.ContainerInstance
import com.netflix.spinnaker.clouddriver.ecs.cache.model.EcsTargetHealth
import com.netflix.spinnaker.clouddriver.ecs.cache.model.Service
import com.netflix.spinnaker.clouddriver.ecs.cache.model.Task
import com.netflix.spinnaker.clouddriver.ecs.cache.model.TaskHealth
Expand All @@ -42,6 +45,7 @@ class ContainerInformationServiceSpec extends Specification {
def taskDefinitionCacheClient = Mock(TaskDefinitionCacheClient)
def ecsInstanceCacheClient = Mock(EcsInstanceCacheClient)
def containerInstanceCacheClient = Mock(ContainerInstanceCacheClient)
def targetHealthCacheClient = Mock(TargetHealthCacheClient)

@Subject
def service = new ContainerInformationService(ecsCredentialsConfig,
Expand All @@ -50,7 +54,8 @@ class ContainerInformationServiceSpec extends Specification {
taskHealthCacheClient,
taskDefinitionCacheClient,
ecsInstanceCacheClient,
containerInstanceCacheClient)
containerInstanceCacheClient,
targetHealthCacheClient)

def 'should return a proper health status'() {
given:
Expand Down Expand Up @@ -247,6 +252,142 @@ class ContainerInformationServiceSpec extends Specification {
'HEALTHY' | 'Up' | 'RUNNING'
}

def 'should return Up health check status if task is running but healthcheck in container definition is null and targetHealthchecks related container is null'() {
given:
def taskId = 'task-id'
def serviceName = 'test-service-name'
def type = 'loadBalancer'

def cachedService = new Service(
serviceName: serviceName,
loadBalancers: [new LoadBalancer()]
)

serviceCacheClient.get(_) >> cachedService
taskCacheClient.get(_) >> new Task(lastStatus: lastStatus, healthStatus: healthStatus)
taskDefinitionCacheClient.get(_) >> new TaskDefinition(containerDefinitions: Lists.newArrayList(new ContainerDefinition(healthCheck: null)))
targetHealthCacheClient.get(_) >> null

def expectedHealthStatus = [
[
instanceId: taskId,
state : 'Unknown',
type : type
],
[
instanceId: taskId,
state : resultStatus,
type : 'ecs',
healthClass: 'platform'
]
]
def retrievedHealthStatus = service.getHealthStatus(taskId, serviceName, 'test-account', 'us-west-1')

expect:
retrievedHealthStatus == expectedHealthStatus

where:
healthStatus | resultStatus | lastStatus
'UNKNOWN' | 'Starting' | 'PROVISIONING'
'UNKNOWN' | 'Starting' | 'PENDING'
'UNKNOWN' | 'Starting' | 'ACTIVATING'
'UNKNOWN' | 'Up' | 'RUNNING'
}

def 'should return health status based on target group if task is running but healthcheck in container definition is null and container has a targetHealthcheck defined'() {
given:
def taskId = 'task-id'
def serviceName = 'test-service-name'
def type = 'loadBalancer'

def cachedService = new Service(
serviceName: serviceName,
loadBalancers: [new LoadBalancer()]
)

serviceCacheClient.get(_) >> cachedService
taskCacheClient.get(_) >> new Task(lastStatus: lastStatus, healthStatus: healthStatus)
taskDefinitionCacheClient.get(_) >> new TaskDefinition(containerDefinitions: Lists.newArrayList(new ContainerDefinition(healthCheck: null)))
targetHealthCacheClient.get(_) >> new EcsTargetHealth(targetHealthDescriptions: List.of(
new TargetHealthDescription(targetHealth: new TargetHealth(state: targetHealthStatus))
))

def expectedHealthStatus = [
[
instanceId: taskId,
state : 'Unknown',
type : type
],
[
instanceId: taskId,
state : resultStatus,
type : 'ecs',
healthClass: 'platform'
]
]
def retrievedHealthStatus = service.getHealthStatus(taskId, serviceName, 'test-account', 'us-west-1')

expect:
retrievedHealthStatus == expectedHealthStatus

where:
healthStatus | resultStatus | lastStatus | targetHealthStatus
'UNKNOWN' | 'Starting' | 'RUNNING' | 'initial'
'UNKNOWN' | 'Up' | 'RUNNING' | 'healthy'
'UNKNOWN' | 'Down' | 'RUNNING' | 'unhealthy'
'UNKNOWN' | 'Down' | 'RUNNING' | 'unused'
'UNKNOWN' | 'Down' | 'RUNNING' | 'draining'
'UNKNOWN' | 'Down' | 'RUNNING' | 'unavailable'

}

def 'should return health status based on target group if task is running but healthcheck in container definition is null and container has multiple targetHealthcheck related'() {
given:
def taskId = 'task-id'
def serviceName = 'test-service-name'
def type = 'loadBalancer'

def cachedService = new Service(
serviceName: serviceName,
loadBalancers: [new LoadBalancer()]
)

serviceCacheClient.get(_) >> cachedService
taskCacheClient.get(_) >> new Task(lastStatus: lastStatus, healthStatus: healthStatus)
taskDefinitionCacheClient.get(_) >> new TaskDefinition(containerDefinitions: Lists.newArrayList(new ContainerDefinition(healthCheck: null)))
targetHealthCacheClient.get(_) >> new EcsTargetHealth(targetHealthDescriptions: List.of(
new TargetHealthDescription(targetHealth: new TargetHealth(state: targetHealthStatus1)),
new TargetHealthDescription(targetHealth: new TargetHealth(state: targetHealthStatus2))
))

def expectedHealthStatus = [
[
instanceId: taskId,
state : 'Unknown',
type : type
],
[
instanceId: taskId,
state : resultStatus,
type : 'ecs',
healthClass: 'platform'
]
]
def retrievedHealthStatus = service.getHealthStatus(taskId, serviceName, 'test-account', 'us-west-1')

expect:
retrievedHealthStatus == expectedHealthStatus

where:
healthStatus | resultStatus | lastStatus | targetHealthStatus1 | targetHealthStatus2
'UNKNOWN' | 'Starting' | 'RUNNING' | 'initial' | 'draining'
'UNKNOWN' | 'Starting' | 'RUNNING' | 'draining' | 'initial'
'UNKNOWN' | 'Up' | 'RUNNING' | 'healthy' | 'draining'
'UNKNOWN' | 'Up' | 'RUNNING' | 'draining' | 'healthy'
'UNKNOWN' | 'Up' | 'RUNNING' | 'unhealthy' | 'healthy'

}

def 'should return a proper private address for a task'() {
given:
def account = 'test-account'
Expand Down
Loading