Skip to content

Commit fc2448f

Browse files
Include autoscaling for ECS service
- Ensure we handle both services which do and which do not autoscale Scaling out at 5min intervals is likely sufficient based on past behavior of production - We have data resolution at 1 minute so 5x resolution is responsive without being too fickle to spikes - 60% as a target still gives us plenty of headroom for increase as we do not see any massive spikes in production
1 parent 9d28435 commit fc2448f

File tree

7 files changed

+78
-42
lines changed

7 files changed

+78
-42
lines changed

terraform/app/autoscaling.tf

Lines changed: 0 additions & 27 deletions
This file was deleted.

terraform/app/ecs.tf

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -41,10 +41,20 @@ module "web_service" {
4141
target_group_arn = local.ecs_initial_lb_target_group
4242
container_port = 4000
4343
}
44+
autoscaling_policies = tomap({
45+
cpu = {
46+
predefined_metric_type = "ECSServiceAverageCPUUtilization"
47+
target_value = 60
48+
scale_in_cooldown = 600
49+
scale_out_cooldown = 300
50+
}
51+
})
4452
cluster_id = aws_ecs_cluster.cluster.id
53+
cluster_name = aws_ecs_cluster.cluster.name
54+
minimum_replica_count = var.minimum_web_replicas
55+
maximum_replica_count = var.maximum_web_replicas
4556
environment = var.environment
4657
server_type = "web"
47-
desired_count = var.minimum_web_replicas
4858
deployment_controller = "CODE_DEPLOY"
4959
}
5060

@@ -66,8 +76,9 @@ module "good_job_service" {
6676
subnets = [aws_subnet.private_subnet_a.id, aws_subnet.private_subnet_b.id]
6777
vpc_id = aws_vpc.application_vpc.id
6878
}
69-
cluster_id = aws_ecs_cluster.cluster.id
70-
environment = var.environment
71-
server_type = "good-job"
72-
desired_count = var.minimum_good_job_replicas
79+
minimum_replica_count = var.minimum_good_job_replicas
80+
cluster_id = aws_ecs_cluster.cluster.id
81+
cluster_name = aws_ecs_cluster.cluster.name
82+
environment = var.environment
83+
server_type = "good-job"
7384
}

terraform/app/env/poc.tfvars

Whitespace-only changes.
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
resource "aws_appautoscaling_target" "ecs_target" {
2+
count = length(var.autoscaling_policies) == 0 ? 0 : 1
3+
resource_id = "service/${var.cluster_name}/${aws_ecs_service.this.name}"
4+
max_capacity = var.maximum_replica_count
5+
min_capacity = var.minimum_replica_count
6+
service_namespace = "ecs"
7+
scalable_dimension = "ecs:service:DesiredCount"
8+
}
9+
10+
11+
resource "aws_appautoscaling_policy" "ecs_cpu" {
12+
for_each = var.autoscaling_policies
13+
name = "${var.server_type}-${each.key}-scaling-${var.environment}"
14+
policy_type = "TargetTrackingScaling"
15+
resource_id = aws_appautoscaling_target.ecs_target[0].resource_id
16+
scalable_dimension = aws_appautoscaling_target.ecs_target[0].scalable_dimension
17+
service_namespace = aws_appautoscaling_target.ecs_target[0].service_namespace
18+
19+
target_tracking_scaling_policy_configuration {
20+
predefined_metric_specification {
21+
predefined_metric_type = each.value.predefined_metric_type
22+
}
23+
target_value = each.value.target_value
24+
scale_in_cooldown = each.value.scale_in_cooldown
25+
scale_out_cooldown = each.value.scale_out_cooldown
26+
}
27+
}

terraform/app/modules/ecs_service/main.tf

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ resource "aws_ecs_service" "this" {
3030
name = "mavis-${var.environment}-${var.server_type}"
3131
cluster = var.cluster_id
3232
task_definition = aws_ecs_task_definition.this.arn
33-
desired_count = var.desired_count
33+
desired_count = var.minimum_replica_count
3434
launch_type = "FARGATE"
3535
enable_execute_command = true
3636
health_check_grace_period_seconds = 60

terraform/app/modules/ecs_service/variables.tf

Lines changed: 32 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,12 +10,36 @@ variable "server_type" {
1010
nullable = false
1111
}
1212

13-
variable "desired_count" {
13+
variable "minimum_replica_count" {
1414
type = number
15-
description = "The initial amount of instances when creating the service"
15+
description = "Minimum amount of allowed replicas for the service. Also the replica count when creating th service."
1616
nullable = false
1717
}
1818

19+
variable "maximum_replica_count" {
20+
type = number
21+
description = "The maximum amount of instances by which the service can scale"
22+
default = null
23+
nullable = true
24+
validation {
25+
condition = length(var.autoscaling_policies) > 0 ? var.maximum_replica_count > var.minimum_replica_count : var.maximum_replica_count == null
26+
error_message = "Maximum replica count must be greater than initial replica count when autoscaling policies are defined and null otherwise"
27+
}
28+
}
29+
30+
variable "autoscaling_policies" {
31+
type = map(object({
32+
predefined_metric_type = string
33+
target_value = number
34+
scale_in_cooldown = number
35+
scale_out_cooldown = number
36+
}))
37+
description = "List of autoscaling policy configuration parameters for the ECS service"
38+
default = {}
39+
nullable = false
40+
}
41+
42+
1943
variable "task_config" {
2044
type = object({
2145
environment = list(object({
@@ -45,6 +69,12 @@ variable "cluster_id" {
4569
nullable = false
4670
}
4771

72+
variable "cluster_name" {
73+
type = string
74+
description = "The name of the ECS cluster."
75+
nullable = false
76+
}
77+
4878
variable "network_params" {
4979
type = object({
5080
subnets = list(string)

terraform/app/variables.tf

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -230,16 +230,11 @@ variable "backup_retention_period" {
230230
}
231231

232232
########## ESC/Scaling Configuration ##########
233-
variable "enable_autoscaling" {
234-
type = bool
235-
default = false
236-
description = "Boolean toggle to determine whether the ECS service should have autoscaling enabled."
237-
}
238233

239234
variable "minimum_web_replicas" {
240235
type = number
241236
default = 3
242-
description = "Minimum amount of allowed replicas for web service"
237+
description = "Minimum amount of allowed replicas for web service. Also the replica count when creating th service."
243238
}
244239

245240
variable "maximum_web_replicas" {
@@ -251,7 +246,7 @@ variable "maximum_web_replicas" {
251246
variable "minimum_good_job_replicas" {
252247
type = number
253248
default = 2
254-
description = "Minimum amount of allowed replicas for good-job service"
249+
description = "Minimum amount of allowed replicas for good-job service. Also the replica count when creating th service."
255250
}
256251

257252
variable "max_aurora_capacity_units" {

0 commit comments

Comments
 (0)