Skip to content

Commit 0b8296d

Browse files
Include autoscaling for ECS service, but enabled for qa environment only (#3429)
- Ensure we handle both services which do and which do not autoscale - Scaling out at 5min intervals is likely sufficient based on past behavior of production - We have data resolution at 1 minute so 5x resolution is responsive without being too fickle to spikes - 60% as a target for web-service still gives us plenty of headroom for increase as we do not see any massive spikes in production - Only enable on QA for now to evaluate configuration in non-prod environments
2 parents 03bb557 + 8dc4e77 commit 0b8296d

File tree

10 files changed

+94
-49
lines changed

10 files changed

+94
-49
lines changed

terraform/app/autoscaling.tf

Lines changed: 0 additions & 27 deletions
This file was deleted.

terraform/app/ecs.tf

Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -41,10 +41,20 @@ module "web_service" {
4141
target_group_arn = local.ecs_initial_lb_target_group
4242
container_port = 4000
4343
}
44+
autoscaling_policies = tomap({
45+
cpu = {
46+
predefined_metric_type = "ECSServiceAverageCPUUtilization"
47+
target_value = 60
48+
scale_in_cooldown = 600
49+
scale_out_cooldown = 300
50+
}
51+
})
4452
cluster_id = aws_ecs_cluster.cluster.id
53+
cluster_name = aws_ecs_cluster.cluster.name
54+
minimum_replica_count = var.minimum_web_replicas
55+
maximum_replica_count = var.maximum_web_replicas
4556
environment = var.environment
4657
server_type = "web"
47-
desired_count = var.minimum_web_replicas
4858
deployment_controller = "CODE_DEPLOY"
4959
}
5060

@@ -66,8 +76,10 @@ module "good_job_service" {
6676
subnets = [aws_subnet.private_subnet_a.id, aws_subnet.private_subnet_b.id]
6777
vpc_id = aws_vpc.application_vpc.id
6878
}
69-
cluster_id = aws_ecs_cluster.cluster.id
70-
environment = var.environment
71-
server_type = "good-job"
72-
desired_count = var.minimum_good_job_replicas
79+
minimum_replica_count = var.good_job_replicas
80+
maximum_replica_count = var.good_job_replicas
81+
cluster_id = aws_ecs_cluster.cluster.id
82+
cluster_name = aws_ecs_cluster.cluster.name
83+
environment = var.environment
84+
server_type = "good-job"
7385
}

terraform/app/env/preview.tfvars

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,5 +24,4 @@ http_hosts = {
2424
MAVIS__GIVE_OR_REFUSE_CONSENT_HOST = "preview.mavistesting.com"
2525
}
2626

27-
appspec_bucket = "nhse-mavis-appspec-bucket-preview"
28-
active_lb_target_group = "green"
27+
appspec_bucket = "nhse-mavis-appspec-bucket-preview"

terraform/app/env/qa.tfvars

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,4 +22,6 @@ http_hosts = {
2222
MAVIS__HOST = "qa.mavistesting.com"
2323
MAVIS__GIVE_OR_REFUSE_CONSENT_HOST = "qa.mavistesting.com"
2424
}
25-
appspec_bucket = "nhse-mavis-appspec-bucket-qa"
25+
appspec_bucket = "nhse-mavis-appspec-bucket-qa"
26+
minimum_web_replicas = 2
27+
maximum_web_replicas = 4

terraform/app/env/sandbox-alpha.tfvars

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,4 +21,6 @@ enable_cis2 = false
2121
enable_pds_enqueue_bulk_updates = false
2222

2323
appspec_bucket = "nhse-mavis-appspec-bucket-sandbox-alpha"
24-
minimum_web_replicas = 2
24+
minimum_web_replicas = 1
25+
maximum_web_replicas = 2
26+
good_job_replicas = 1

terraform/app/env/sandbox-beta.tfvars

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,4 +21,6 @@ enable_cis2 = false
2121
enable_pds_enqueue_bulk_updates = false
2222

2323
appspec_bucket = "nhse-mavis-appspec-bucket-sandbox-beta"
24-
minimum_web_replicas = 2
24+
minimum_web_replicas = 1
25+
maximum_web_replicas = 2
26+
good_job_replicas = 1
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
resource "aws_appautoscaling_target" "this" {
2+
count = length(var.autoscaling_policies) > 0 && local.autoscaling_enabled ? 1 : 0
3+
resource_id = "service/${var.cluster_name}/${aws_ecs_service.this.name}"
4+
max_capacity = var.maximum_replica_count
5+
min_capacity = var.minimum_replica_count
6+
service_namespace = "ecs"
7+
scalable_dimension = "ecs:service:DesiredCount"
8+
}
9+
10+
11+
resource "aws_appautoscaling_policy" "this" {
12+
for_each = local.autoscaling_enabled ? var.autoscaling_policies : {}
13+
name = "${var.server_type}-${each.key}-scaling-${var.environment}"
14+
policy_type = "TargetTrackingScaling"
15+
resource_id = aws_appautoscaling_target.this[0].resource_id
16+
scalable_dimension = aws_appautoscaling_target.this[0].scalable_dimension
17+
service_namespace = aws_appautoscaling_target.this[0].service_namespace
18+
19+
target_tracking_scaling_policy_configuration {
20+
predefined_metric_specification {
21+
predefined_metric_type = each.value.predefined_metric_type
22+
}
23+
target_value = each.value.target_value
24+
scale_in_cooldown = each.value.scale_in_cooldown
25+
scale_out_cooldown = each.value.scale_out_cooldown
26+
}
27+
}

terraform/app/modules/ecs_service/main.tf

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ resource "aws_ecs_service" "this" {
3030
name = "mavis-${var.environment}-${var.server_type}"
3131
cluster = var.cluster_id
3232
task_definition = aws_ecs_task_definition.this.arn
33-
desired_count = var.desired_count
33+
desired_count = var.minimum_replica_count
3434
launch_type = "FARGATE"
3535
enable_execute_command = true
3636
health_check_grace_period_seconds = 60
@@ -63,7 +63,7 @@ resource "aws_ecs_service" "this" {
6363
ignore_changes = [
6464
task_definition,
6565
load_balancer,
66-
# desired_count TODO: Uncomment once we include autoscaling
66+
desired_count
6767
]
6868
create_before_destroy = true
6969
}

terraform/app/modules/ecs_service/variables.tf

Lines changed: 35 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,12 +10,35 @@ variable "server_type" {
1010
nullable = false
1111
}
1212

13-
variable "desired_count" {
13+
variable "minimum_replica_count" {
1414
type = number
15-
description = "The initial amount of instances when creating the service"
15+
description = "Minimum amount of allowed replicas for the service. Also the replica count when creating th service."
1616
nullable = false
1717
}
1818

19+
variable "maximum_replica_count" {
20+
type = number
21+
description = "The maximum amount of instances by which the service can scale. If equal to the minimum_replica_count, autoscaling will be disabled."
22+
nullable = false
23+
validation {
24+
condition = var.maximum_replica_count >= var.minimum_replica_count
25+
error_message = "Maximum replica count must be greater than initial replica count when autoscaling policies are defined and null otherwise"
26+
}
27+
}
28+
29+
variable "autoscaling_policies" {
30+
type = map(object({
31+
predefined_metric_type = string
32+
target_value = number
33+
scale_in_cooldown = number
34+
scale_out_cooldown = number
35+
}))
36+
description = "List of autoscaling policy configuration parameters for the ECS service"
37+
default = {}
38+
nullable = false
39+
}
40+
41+
1942
variable "task_config" {
2043
type = object({
2144
environment = list(object({
@@ -45,6 +68,12 @@ variable "cluster_id" {
4568
nullable = false
4669
}
4770

71+
variable "cluster_name" {
72+
type = string
73+
description = "The name of the ECS cluster."
74+
nullable = false
75+
}
76+
4877
variable "network_params" {
4978
type = object({
5079
subnets = list(string)
@@ -77,3 +106,7 @@ variable "container_name" {
77106
default = "application"
78107
nullable = false
79108
}
109+
110+
locals {
111+
autoscaling_enabled = var.maximum_replica_count > var.minimum_replica_count
112+
}

terraform/app/variables.tf

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -230,16 +230,11 @@ variable "backup_retention_period" {
230230
}
231231

232232
########## ESC/Scaling Configuration ##########
233-
variable "enable_autoscaling" {
234-
type = bool
235-
default = false
236-
description = "Boolean toggle to determine whether the ECS service should have autoscaling enabled."
237-
}
238233

239234
variable "minimum_web_replicas" {
240235
type = number
241236
default = 3
242-
description = "Minimum amount of allowed replicas for web service"
237+
description = "Minimum amount of allowed replicas for web service. Also the replica count when creating th service."
243238
}
244239

245240
variable "maximum_web_replicas" {
@@ -248,10 +243,10 @@ variable "maximum_web_replicas" {
248243
description = "Maximum amount of allowed replicas for web service"
249244
}
250245

251-
variable "minimum_good_job_replicas" {
246+
variable "good_job_replicas" {
252247
type = number
253248
default = 2
254-
description = "Minimum amount of allowed replicas for good-job service"
249+
description = "Amount of replicas for the good-job service"
255250
}
256251

257252
variable "max_aurora_capacity_units" {

0 commit comments

Comments
 (0)