diff --git a/Gemfile.lock b/Gemfile.lock index af16206436..96cbc90c2a 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -246,6 +246,7 @@ GEM concurrent-ruby (~> 1.1) webrick (~> 1.7) websocket-driver (~> 0.7) + ffi (1.17.1-aarch64-linux-gnu) ffi (1.17.1-arm64-darwin) ffi (1.17.1-x86_64-linux-gnu) fhir_models (5.0.0) @@ -394,6 +395,8 @@ GEM net-protocol nio4r (2.7.4) nkf (0.2.0) + nokogiri (1.18.9-aarch64-linux-gnu) + racc (~> 1.4) nokogiri (1.18.9-arm64-darwin) racc (~> 1.4) nokogiri (1.18.9-x86_64-linux-gnu) @@ -743,6 +746,7 @@ GEM zeitwerk (2.7.3) PLATFORMS + aarch64-linux arm64-darwin-22 arm64-darwin-23 arm64-darwin-24 diff --git a/app/controllers/api/reporting/one_time_tokens_controller.rb b/app/controllers/api/reporting/one_time_tokens_controller.rb index ef5d199f05..b1a6b706f8 100644 --- a/app/controllers/api/reporting/one_time_tokens_controller.rb +++ b/app/controllers/api/reporting/one_time_tokens_controller.rb @@ -42,7 +42,4 @@ def jwt(token) ) end - def ensure_reporting_api_feature_enabled - render status: :forbidden and return unless Flipper.enabled?(:reporting_api) - end end diff --git a/config/feature_flags.yml b/config/feature_flags.yml index fce2acebce..6b54819018 100644 --- a/config/feature_flags.yml +++ b/config/feature_flags.yml @@ -1,3 +1,8 @@ + +reporting_api: Enables the Commissioner reporting component to authenticate to Mavis via OAUTH 2.0 + Authorization Code Flow (https://datatracker.ietf.org/doc/html/rfc6749#section-4.1), and retrieve + statistics from /api/reporting/ + basic_auth: Require users to sign in with basic authentication before they can use the service. diff --git a/config/routes.rb b/config/routes.rb index 90b303a50b..8e223a6122 100644 --- a/config/routes.rb +++ b/config/routes.rb @@ -113,6 +113,10 @@ get "totals", controller: :totals, action: :index end end + namespace :reporting do + post "authorize", to: "one_time_tokens#authorize" + get "totals", controller: :totals, action: :index + end end resources :class_imports, path: "class-imports", except: %i[index destroy] diff --git a/docs/ops-tasks.md b/docs/ops-tasks.md index 758f648557..b7add36acf 100644 --- a/docs/ops-tasks.md +++ b/docs/ops-tasks.md @@ -19,7 +19,7 @@ session.patient_sessions.all?(&:safe_to_destroy?) session.patients.update_all( cohort_id: nil, home_educated: false, - school_id: nil + school_id: nil, ) # removes all patients from the session diff --git a/docs/reporting-component-authentication.adoc b/docs/reporting-component-authentication.adoc index c98cc2d82d..8b9c850569 100644 --- a/docs/reporting-component-authentication.adoc +++ b/docs/reporting-component-authentication.adoc @@ -10,7 +10,7 @@ ifdef::env-github[] // When PRing changes to the diagrams you can change this attributes // temporarily to the name of the branch you're working on. But don't forget // to change it back to main before merging!! -:github-branch: main +:github-branch: spike/MAV-1406-auth-sharing-with-reporting :github-repo: nhsuk/manage-vaccinations-in-schools diff --git a/spec/controllers/concerns/authentication_concern_spec.rb b/spec/controllers/concerns/authentication_concern_spec.rb index 8233de2e3d..bd4b97e166 100644 --- a/spec/controllers/concerns/authentication_concern_spec.rb +++ b/spec/controllers/concerns/authentication_concern_spec.rb @@ -8,6 +8,19 @@ Class .new do # rubocop:disable Style/BlockDelimiters include AuthenticationConcern + attr_accessor :request, :session + + def initialize(request: nil, session: {}) + @request = request + @session = session + end + + def params + {} + end + + def render(content = {}, args = {}) + end attr_accessor :request, :session diff --git a/spec/features/import_child_pds_lookup_extravaganza_spec.rb b/spec/features/import_child_pds_lookup_extravaganza_spec.rb index 6b5fa210d3..33aad7f97c 100644 --- a/spec/features/import_child_pds_lookup_extravaganza_spec.rb +++ b/spec/features/import_child_pds_lookup_extravaganza_spec.rb @@ -217,7 +217,13 @@ def and_an_existing_patient_record_exists @existing_patient_duplicate_review_on_demographics = create( :patient, - given_name: "Maia", + given_name# It looks like the code snippet is incomplete and contains a syntax error. The code + # seems to be attempting to assign a value to a variable named + # `ent_duplicate_review_on_demographics`, but it is missing the assignment operator + # and the value to be assigned. Additionally, the word "crea" does not seem to be a + # valid Ruby keyword or variable name. Please provide more context or correct the + # code snippet for further assistance. + : "Maia", family_name: "Smith", nhs_number: nil, date_of_birth: Date.new(2010, 8, 15), # Different from CSV diff --git a/spec/fixtures/cohort_import/pds_extravaganza.csv b/spec/fixtures/cohort_import/pds_extravaganza.csv index ddd9116c75..eb90bf52df 100644 --- a/spec/fixtures/cohort_import/pds_extravaganza.csv +++ b/spec/fixtures/cohort_import/pds_extravaganza.csv @@ -7,4 +7,4 @@ CHILD_SCHOOL_URN,PARENT_1_NAME,PARENT_1_RELATIONSHIP,PARENT_1_EMAIL,PARENT_1_PHO 123456,Jane Doe,,,01234567896,,,,,Oliver,Green,,2010-08-15,8,789 Silent Street,,London,SW1W 8JL,9435753868 123456,,,,,,,,,Lara,Williams,,2010-05-15,8,,,,B1 1AA, 123456,,,,,,,,,Lucy,McCarthy,,2010-08-16,8,789 Silent Street,,London,SW7 5LE,9435815065 -123456,,,,,,,,,Maia,Smith,,2010-08-16,8,790 Silent Street,,London,W2 3PE,9435789102 \ No newline at end of file +123456,,,,,,,,,Maia,Smith,,2010-08-16,8,790 Silent Street,,London,W2 3PE,9435789102 diff --git a/terraform/account/deployment_permissions.tf b/terraform/account/deployment_permissions.tf index 5352642b2e..7d8847e615 100644 --- a/terraform/account/deployment_permissions.tf +++ b/terraform/account/deployment_permissions.tf @@ -3,7 +3,8 @@ resource "aws_iam_role" "mavis_deploy" { name = "GithubDeployMavisAndInfrastructure" description = "Role allowing terraform deployment from github workflows" assume_role_policy = templatefile("resources/iam_role_github_trust_policy_${var.environment}.json.tftpl", { - account_id = var.account_id + account_id = var.account_id, + repository = "nhsuk/manage-vaccinations-in-schools" }) } @@ -27,7 +28,8 @@ resource "aws_iam_role" "data_replication_deploy" { name = "GithubDeployDataReplicationInfrastructure" description = "Role to be assumed by github workflows dealing with the creation and destruction of the data-replication infrastructure." assume_role_policy = templatefile("resources/iam_role_github_trust_policy_${var.environment}.json.tftpl", { - account_id = var.account_id + account_id = var.account_id, + repository = "nhsuk/manage-vaccinations-in-schools" }) } @@ -46,28 +48,55 @@ resource "aws_iam_role_policy_attachment" "data_replication" { policy_arn = each.value } +################ Deploy ECS Service ################ + +resource "aws_iam_role" "deploy_ecs_service" { + name = "GithubDeployECSService" + description = "Role allowing terraform deployment of ECS services from github workflows" + assume_role_policy = templatefile("resources/iam_role_github_trust_policy_${var.environment}.json.tftpl", { + account_id = var.account_id, + repository = "NHSDigital/manage-vaccinations-in-schools-reporting" + }) +} + +resource "aws_iam_policy" "deploy_ecs_service" { + name = "DeployECSServiceResources" + description = "Permissions for GithubDeployECSService role" + policy = file("resources/iam_policy_DeployECSServiceResources.json") + lifecycle { + ignore_changes = [description] + } +} + +resource "aws_iam_role_policy_attachment" "deploy_ecs_service" { + for_each = local.ecs_deploy_policies + role = aws_iam_role.deploy_ecs_service.name + policy_arn = each.value +} + ################# Deploy Monitoring ################ -resource "aws_iam_role" "monitoring_deploy" { - name = "GithubDeployMonitoring" - description = "Role allowing terraform deployment of monitoring resources from github workflows" +resource "aws_iam_role" "deploy_ecs_service" { + name = "GithubDeployECSService" + description = "Role allowing terraform deployment of ECS services from github workflows" assume_role_policy = templatefile("resources/iam_role_github_trust_policy_${var.environment}.json.tftpl", { - account_id = var.account_id + account_id = var.account_id, + repository = "nhsuk/manage-vaccinations-in-schools" }) } -resource "aws_iam_policy" "monitoring_deploy" { - name = "DeployMonitoringResources" - description = "Permissions for GithubDeployMonitoring role" - policy = file("resources/iam_policy_DeployMonitoringResources.json") +resource "aws_iam_policy" "deploy_ecs_service" { + name = "DeployECSServiceResources" + description = "Permissions for GithubDeployECSService role" + policy = file("resources/iam_policy_DeployECSServiceResources.json") lifecycle { ignore_changes = [description] } } -resource "aws_iam_role_policy_attachment" "monitoring_deploy" { - for_each = local.monitoring_policies - role = aws_iam_role.monitoring_deploy.name +resource "aws_iam_role_policy_attachment" "deploy_ecs_service" { + for_each = local.ecs_deploy_policies + role = aws_iam_role.deploy_ecs_service.name policy_arn = each.value } diff --git a/terraform/account/main.tf b/terraform/account/main.tf index 64d754f3cb..e1552ae1c9 100644 --- a/terraform/account/main.tf +++ b/terraform/account/main.tf @@ -122,6 +122,11 @@ resource "aws_ecr_lifecycle_policy" "mavis" { }) } +resource "aws_ecr_repository" "mavis_reporting" { + name = "mavis/reporting" + image_tag_mutability = "MUTABLE" +} + #### Access Analyzer diff --git a/terraform/account/resources/iam_policy_DeployECSServiceResources.json b/terraform/account/resources/iam_policy_DeployECSServiceResources.json new file mode 100644 index 0000000000..8fe50c1090 --- /dev/null +++ b/terraform/account/resources/iam_policy_DeployECSServiceResources.json @@ -0,0 +1,61 @@ +{ + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Allow", + "Action": [ + "ecs:UpdateService", + "ecs:DescribeServices", + "ecs:ListServices", + "ecs:RegisterTaskDefinition", + "ecs:DeregisterTaskDefinition", + "ecs:DescribeTaskDefinition", + "ecs:ListTaskDefinitions", + "ecs:DescribeClusters", + "ecs:DescribeTasks", + "ecs:DescribeTaskSets", + "ecs:StartTask", + "ecs:ListServiceDeployments", + "ecs:DescribeServiceDeployments", + "ecs:UntagResource", + "ecs:TagResource", + "ecs:ListClusters", + "ecs:ListContainerInstances", + "ecs:ListTaskDefinitionFamilies", + "ecs:ListTasks", + "ecr:PutImage", + "ecr:InitiateLayerUpload", + "ecr:UploadLayerPart", + "ecr:CompleteLayerUpload", + "codedeploy:BatchGetApplicationRevisions", + "codedeploy:BatchGetApplications", + "codedeploy:BatchGetDeploymentGroups", + "codedeploy:BatchGetDeployments", + "codedeploy:ContinueDeployment", + "codedeploy:CreateApplication", + "codedeploy:CreateDeployment", + "codedeploy:CreateDeploymentGroup", + "codedeploy:GetApplication", + "codedeploy:GetApplicationRevision", + "codedeploy:GetDeployment", + "codedeploy:GetDeploymentConfig", + "codedeploy:GetDeploymentGroup", + "codedeploy:GetDeploymentTarget", + "codedeploy:ListApplicationRevisions", + "codedeploy:ListApplications", + "codedeploy:ListDeploymentConfigs", + "codedeploy:ListDeploymentGroups", + "codedeploy:ListDeployments", + "codedeploy:ListDeploymentTargets", + "codedeploy:RegisterApplicationRevision", + "codedeploy:StopDeployment", + "ssm:DescribeParameters", + "ssm:GetParameter", + "ssm:GetParameters", + "ssm:GetParametersByPath", + "iam:PassRole" + ], + "Resource": ["*"] + } + ] +} diff --git a/terraform/account/resources/iam_policy_DeployMavisResources.json b/terraform/account/resources/iam_policy_DeployMavisResources.json index e8a3e90c84..07a0931028 100644 --- a/terraform/account/resources/iam_policy_DeployMavisResources.json +++ b/terraform/account/resources/iam_policy_DeployMavisResources.json @@ -156,6 +156,12 @@ "elasticache:ModifyCacheParameterGroup", "elasticache:ModifyCacheSubnetGroup", "elasticache:IncreaseReplicaCount" + "lambda:InvokeFunction", + "lambda:DeleteFunction", + "lambda:CreateFunction", + "lambda:CreateAlias", + "lambda:DeleteAlias", + "lambda:UpdateAlias" ], "Resource": ["*"] } diff --git a/terraform/account/resources/iam_role_github_trust_policy_development.json.tftpl b/terraform/account/resources/iam_role_github_trust_policy_development.json.tftpl index c8b00eb1b5..378b9fea87 100644 --- a/terraform/account/resources/iam_role_github_trust_policy_development.json.tftpl +++ b/terraform/account/resources/iam_role_github_trust_policy_development.json.tftpl @@ -12,7 +12,7 @@ "token.actions.githubusercontent.com:aud": "sts.amazonaws.com" }, "StringLike": { - "token.actions.githubusercontent.com:sub": "repo:nhsuk/manage-vaccinations-in-schools:*" + "token.actions.githubusercontent.com:sub": "repo:${repository}:*" } } } diff --git a/terraform/account/resources/iam_role_github_trust_policy_production.json.tftpl b/terraform/account/resources/iam_role_github_trust_policy_production.json.tftpl index d1b2396034..ce256d6583 100644 --- a/terraform/account/resources/iam_role_github_trust_policy_production.json.tftpl +++ b/terraform/account/resources/iam_role_github_trust_policy_production.json.tftpl @@ -10,8 +10,8 @@ "Condition": { "StringEquals": { "token.actions.githubusercontent.com:sub": [ - "repo:nhsuk/manage-vaccinations-in-schools:ref:refs/heads/main", - "repo:nhsuk/manage-vaccinations-in-schools:environment:production" + "repo:${repository}:ref:refs/heads/main", + "repo:${repository}:environment:production" ], "token.actions.githubusercontent.com:aud": "sts.amazonaws.com" } diff --git a/terraform/account/variables.tf b/terraform/account/variables.tf index d7a1fa3f3c..592407e056 100644 --- a/terraform/account/variables.tf +++ b/terraform/account/variables.tf @@ -29,6 +29,11 @@ locals { mavis_deploy = aws_iam_policy.mavis_deploy.arn }) + ecs_deploy_policies = { + ecr_read = "arn:aws:iam::aws:policy/AmazonEC2ContainerRegistryReadOnly" + deploy_ecs_service = aws_iam_policy.deploy_ecs_service.arn + } + monitoring_policies = merge(local.base_policies, { monitoring_deploy = aws_iam_policy.monitoring_deploy.arn }) diff --git a/terraform/app/codedeploy.tf b/terraform/app/codedeploy.tf index 3a4190ef09..638f4f8541 100644 --- a/terraform/app/codedeploy.tf +++ b/terraform/app/codedeploy.tf @@ -52,6 +52,55 @@ resource "aws_codedeploy_deployment_group" "blue_green_deployment_group" { } } +resource "aws_codedeploy_deployment_group" "reporting" { + app_name = aws_codedeploy_app.mavis.name + deployment_config_name = "CodeDeployDefault.ECSAllAtOnce" + deployment_group_name = "reporting-${var.environment}" + service_role_arn = aws_iam_role.code_deploy.arn + + auto_rollback_configuration { + enabled = true + events = ["DEPLOYMENT_FAILURE"] + } + + blue_green_deployment_config { + deployment_ready_option { + action_on_timeout = "CONTINUE_DEPLOYMENT" + } + + terminate_blue_instances_on_deployment_success { + action = "TERMINATE" + termination_wait_time_in_minutes = 1 + } + } + + deployment_style { + deployment_option = "WITH_TRAFFIC_CONTROL" + deployment_type = "BLUE_GREEN" + } + + ecs_service { + cluster_name = aws_ecs_cluster.cluster.name + service_name = module.reporting_service.service.name + } + + load_balancer_info { + target_group_pair_info { + prod_traffic_route { + listener_arns = [aws_lb_listener.app_listener_https.arn] + } + + target_group { + name = aws_lb_target_group.reporting_blue.name + } + + target_group { + name = aws_lb_target_group.reporting_green.name + } + } + } +} + resource "aws_s3_bucket" "code_deploy_bucket" { bucket = var.appspec_bucket force_destroy = true diff --git a/terraform/app/ecs.tf b/terraform/app/ecs.tf index 8014eebcc8..da10d82b0a 100644 --- a/terraform/app/ecs.tf +++ b/terraform/app/ecs.tf @@ -1,7 +1,7 @@ resource "aws_security_group_rule" "web_service_alb_ingress" { type = "ingress" - from_port = 4000 - to_port = 4000 + from_port = local.container_ports.web + to_port = local.container_ports.web protocol = "tcp" security_group_id = module.web_service.security_group_id source_security_group_id = aws_security_group.lb_service_sg.id @@ -10,6 +10,30 @@ resource "aws_security_group_rule" "web_service_alb_ingress" { } } +resource "aws_security_group_rule" "reporting_service_alb_ingress" { + type = "ingress" + from_port = local.container_ports.reporting + to_port = local.container_ports.reporting + protocol = "tcp" + security_group_id = module.reporting_service.security_group_id + source_security_group_id = aws_security_group.lb_service_sg.id + lifecycle { + create_before_destroy = true + } +} + +resource "aws_security_group_rule" "reporting_to_web_service" { + type = "ingress" + from_port = local.container_ports.web + to_port = local.container_ports.web + protocol = "tcp" + security_group_id = module.web_service.security_group_id + source_security_group_id = module.reporting_service.security_group_id + lifecycle { + create_before_destroy = true + } +} + resource "aws_ecs_cluster" "cluster" { name = "mavis-${var.environment}" @@ -19,11 +43,50 @@ resource "aws_ecs_cluster" "cluster" { } } +resource "aws_service_discovery_private_dns_namespace" "internal" { + name = "mavis.${var.environment}.aws-int" + description = "Private namespace for ECS service discovery" + vpc = aws_vpc.application_vpc.id + + tags = { + Name = "ecs-service-discovery-${var.environment}" + } +} + +resource "aws_service_discovery_service" "web" { + name = "web" + + dns_config { + namespace_id = aws_service_discovery_private_dns_namespace.internal.id + dns_records { + ttl = 10 # TODO: Decide on optimal caching time for DNS records + type = "A" + } + routing_policy = "MULTIVALUE" # For multiple tasks; use "WEIGHTED" if custom weights needed + } + + tags = { + Name = "maivs-${var.environment}-web" + } +} + module "web_service" { source = "./modules/ecs_service" task_config = { - environment = local.task_envs - secrets = local.task_secrets + environment = concat(local.task_envs, [ + { + name = "MAVIS__REPORTING_API__CLIENT_APP__CLIENT_ID" + value = aws_secretsmanager_secret.jwt_sign.name + } + ] + ) + secrets = concat( + local.task_secrets, + [{ + name = "MAVIS__REPORTING_API__CLIENT_APP__SECRET" + valueFrom = aws_secretsmanager_secret.jwt_sign.arn + }] + ) cpu = 1024 memory = 2048 docker_image = "${var.account_id}.dkr.ecr.eu-west-2.amazonaws.com/${var.docker_image}@${var.image_digest}" @@ -31,7 +94,7 @@ module "web_service" { task_role_arn = aws_iam_role.ecs_task_role.arn log_group_name = aws_cloudwatch_log_group.ecs_log_group.name region = var.region - health_check_command = ["CMD-SHELL", "./bin/internal_healthcheck http://localhost:4000/health/database"] + health_check_command = ["CMD-SHELL", "./bin/internal_healthcheck http://localhost:${local.container_ports.web}/health/database"] } network_params = { subnets = [aws_subnet.private_subnet_a.id, aws_subnet.private_subnet_b.id] @@ -39,7 +102,7 @@ module "web_service" { } loadbalancer = { target_group_arn = local.ecs_initial_lb_target_group - container_port = 4000 + container_port = local.container_ports.web } autoscaling_policies = tomap({ cpu = { @@ -49,13 +112,14 @@ module "web_service" { scale_out_cooldown = 300 } }) - cluster_id = aws_ecs_cluster.cluster.id - cluster_name = aws_ecs_cluster.cluster.name - minimum_replica_count = var.minimum_web_replicas - maximum_replica_count = var.maximum_web_replicas - environment = var.environment - server_type = "web" - deployment_controller = "CODE_DEPLOY" + cluster_id = aws_ecs_cluster.cluster.id + cluster_name = aws_ecs_cluster.cluster.name + minimum_replica_count = var.minimum_web_replicas + maximum_replica_count = var.maximum_web_replicas + environment = var.environment + server_type = "web" + deployment_controller = "CODE_DEPLOY" + service_discovery_service_arn = aws_service_discovery_service.web.arn } module "good_job_service" { @@ -70,7 +134,7 @@ module "good_job_service" { task_role_arn = aws_iam_role.ecs_task_role.arn log_group_name = aws_cloudwatch_log_group.ecs_log_group.name region = var.region - health_check_command = ["CMD-SHELL", "./bin/internal_healthcheck http://localhost:4000/status/connected"] + health_check_command = ["CMD-SHELL", "./bin/internal_healthcheck http://localhost:${local.container_ports.good_job}/status/connected"] } network_params = { subnets = [aws_subnet.private_subnet_a.id, aws_subnet.private_subnet_b.id] @@ -84,14 +148,30 @@ module "good_job_service" { server_type = "good-job" } -module "sidekiq_service" { +module "reporting_service" { source = "./modules/ecs_service" task_config = { - environment = local.task_envs - secrets = local.task_secrets + environment = [ + { + name = "VALKEY_ADDRESS" + value = aws_elasticache_serverless_cache.reporting_service.endpoint[0].address + }, + { + name = "VALKEY_PORT" + value = aws_elasticache_serverless_cache.reporting_service.endpoint[0].port + }, + { + name = "CLIENT_ID" + value = aws_secretsmanager_secret.jwt_sign.name + } + ] + secrets = [{ + name = "CLIENT_SECRET" + valueFrom = aws_secretsmanager_secret.jwt_sign.arn + }] cpu = 1024 memory = 2048 - docker_image = "${var.account_id}.dkr.ecr.eu-west-2.amazonaws.com/${var.docker_image}@${var.image_digest}" + docker_image = local.reporting_image execution_role_arn = aws_iam_role.ecs_task_execution_role.arn task_role_arn = aws_iam_role.ecs_task_role.arn log_group_name = aws_cloudwatch_log_group.ecs_log_group.name @@ -102,14 +182,39 @@ module "sidekiq_service" { subnets = [aws_subnet.private_subnet_a.id, aws_subnet.private_subnet_b.id] vpc_id = aws_vpc.application_vpc.id } - minimum_replica_count = var.sidekiq_replicas - maximum_replica_count = var.sidekiq_replicas + loadbalancer = { + target_group_arn = local.reporting_initial_lb_target_group + container_port = local.container_ports.reporting + } + autoscaling_policies = tomap({ + cpu = { + predefined_metric_type = "ECSServiceAverageCPUUtilization" + target_value = 60 + scale_in_cooldown = 600 + scale_out_cooldown = 300 + } + }) + container_port = local.container_ports.reporting + minimum_replica_count = var.minimum_reporting_replicas + maximum_replica_count = var.maximum_reporting_replicas cluster_id = aws_ecs_cluster.cluster.id cluster_name = aws_ecs_cluster.cluster.name environment = var.environment - server_type = "sidekiq" + server_type = "reporting" + deployment_controller = "CODE_DEPLOY" +} + +# Fetch the task definition for the reporting service if it exists, as it has a separate deployment process/pipeline +# TODO: Remove this workaround when extracting infrastructure from the monorepo is complete +data "aws_ecs_task_definition" "reporting" { + count = var.reporting_digest == null ? 1 : 0 + task_definition = "arn:aws:ecs:${var.region}:${var.account_id}:task-definition/mavis-reporting-task-definition-${var.environment}" +} - depends_on = [ - aws_elasticache_replication_group.valkey - ] +locals { + reporting_image = ( + var.reporting_digest == null ? + jsondecode(data.aws_ecs_task_definition.reporting[0].container_definitions)[0]["image"] : + "${var.account_id}.dkr.ecr.eu-west-2.amazonaws.com/mavis/reporting@${var.reporting_digest}" + ) } diff --git a/terraform/app/iam_policy_documents.tf b/terraform/app/iam_policy_documents.tf index 622073eb80..b0425b31db 100644 --- a/terraform/app/iam_policy_documents.tf +++ b/terraform/app/iam_policy_documents.tf @@ -3,13 +3,16 @@ data "aws_iam_policy_document" "codedeploy" { statement { actions = ["ecs:DescribeServices", "ecs:UpdateServicePrimaryTaskSet"] - resources = [module.web_service.service.id] + resources = [module.web_service.service.id, module.reporting_service.service.id] effect = "Allow" } statement { - actions = ["ecs:CreateTaskSet", "ecs:DeleteTaskSet"] - resources = ["arn:aws:ecs:*:*:task-set/${aws_ecs_cluster.cluster.name}/${module.web_service.service.name}/*"] - effect = "Allow" + actions = ["ecs:CreateTaskSet", "ecs:DeleteTaskSet"] + resources = [ + "arn:aws:ecs:*:*:task-set/${aws_ecs_cluster.cluster.name}/${module.web_service.service.name}/*", + "arn:aws:ecs:*:*:task-set/${aws_ecs_cluster.cluster.name}/${module.reporting_service.service.name}/*" + ] + effect = "Allow" } statement { actions = [ @@ -51,16 +54,21 @@ data "aws_iam_policy_document" "ecs_secrets_access" { statement { sid = "railsKeySid" actions = ["ssm:GetParameters"] - resources = concat([ - "arn:aws:ssm:${var.region}:${var.account_id}:parameter${var.rails_master_key_path}" - ], local.parameter_store_arns) + resources = concat( + [ + "arn:aws:ssm:${var.region}:${var.account_id}:parameter${var.rails_master_key_path}", + ], + local.parameter_store_arns, + + ) effect = "Allow" } statement { sid = "dbSecretSid" actions = ["secretsmanager:GetSecretValue"] resources = [ - aws_rds_cluster.core.master_user_secret[0].secret_arn + aws_rds_cluster.core.master_user_secret[0].secret_arn, + aws_secretsmanager_secret.jwt_sign.arn ] effect = "Allow" } diff --git a/terraform/app/kms.tf b/terraform/app/kms.tf index 752bb752d5..fb6dc7f129 100644 --- a/terraform/app/kms.tf +++ b/terraform/app/kms.tf @@ -42,3 +42,22 @@ resource "aws_kms_key" "rds_cluster" { ] }) } + +resource "aws_kms_key" "reporting_valkey" { + description = "Custom KMS key for new Aurora cluster" + policy = jsonencode({ + Version = "2012-10-17" + Statement = [ + { + Sid = "AllowAccount" + Effect = "Allow" + Principal = { + AWS = ["arn:aws:iam::${var.account_id}:root"] + } + Action = "kms:*" + Resource = "*" + } + ] + }) +} + diff --git a/terraform/app/loadbalancer.tf b/terraform/app/loadbalancer.tf index c25240ecdc..a0178d473b 100644 --- a/terraform/app/loadbalancer.tf +++ b/terraform/app/loadbalancer.tf @@ -68,12 +68,13 @@ resource "aws_lb" "app_lb" { } security_groups = [aws_security_group.lb_service_sg.id] subnets = [aws_subnet.public_subnet_a.id, aws_subnet.public_subnet_b.id] + depends_on = [aws_security_group_rule.lb_ingress_https] #TODO: Delete after migration drop_invalid_header_fields = true } resource "aws_lb_target_group" "blue" { name = "mavis-blue-${var.environment}" - port = 4000 + port = local.container_ports.web protocol = "HTTP" vpc_id = aws_vpc.application_vpc.id target_type = "ip" @@ -91,7 +92,7 @@ resource "aws_lb_target_group" "blue" { resource "aws_lb_target_group" "green" { name = "mavis-green-${var.environment}" - port = 4000 + port = local.container_ports.web protocol = "HTTP" vpc_id = aws_vpc.application_vpc.id target_type = "ip" @@ -107,6 +108,42 @@ resource "aws_lb_target_group" "green" { } } +resource "aws_lb_target_group" "reporting_blue" { + name = "mavis-rep-blue-${var.environment}" + port = local.container_ports.reporting + protocol = "HTTP" + vpc_id = aws_vpc.application_vpc.id + target_type = "ip" + health_check { + path = "/reporting/healthcheck" + protocol = "HTTP" + port = "traffic-port" + matcher = "200" + interval = 10 + timeout = 5 + healthy_threshold = 2 + unhealthy_threshold = 2 + } +} + +resource "aws_lb_target_group" "reporting_green" { + name = "mavis-rep-green-${var.environment}" + port = local.container_ports.reporting + protocol = "HTTP" + vpc_id = aws_vpc.application_vpc.id + target_type = "ip" + health_check { + path = "/reporting/healthcheck" + protocol = "HTTP" + port = "traffic-port" + matcher = "200" + interval = 10 + timeout = 5 + healthy_threshold = 2 + unhealthy_threshold = 2 + } +} + resource "aws_lb_target_group" "dump" { name = "dump-${var.environment}" port = 80 @@ -168,6 +205,29 @@ resource "aws_lb_listener_rule" "forward_to_app" { } } +resource "aws_lb_listener_rule" "forward_to_reporting" { + listener_arn = aws_lb_listener.app_listener_https.arn + priority = 49000 + action { + type = "forward" + target_group_arn = local.reporting_initial_lb_target_group + } + condition { + path_pattern { + values = var.reporting_endpoints + } + } + condition { + host_header { + values = local.host_headers + } + } + + lifecycle { + ignore_changes = [action] + } +} + resource "aws_lb_listener_rule" "redirect_to_https" { listener_arn = aws_lb_listener.app_listener_http.arn priority = 50000 @@ -198,4 +258,4 @@ module "dns_route53" { zone_id = aws_lb.app_lb.zone_id zone_name = var.zone_name domain_names = tolist(toset(values(var.http_hosts))) -} +} \ No newline at end of file diff --git a/terraform/app/main.tf b/terraform/app/main.tf index 412cdc1c98..3ed6d41a21 100644 --- a/terraform/app/main.tf +++ b/terraform/app/main.tf @@ -9,6 +9,10 @@ terraform { source = "hashicorp/time" version = "~> 0.12" } + archive = { + source = "hashicorp/archive" + version = "~> 2.7" + } } backend "s3" { diff --git a/terraform/app/modules/ecs_service/main.tf b/terraform/app/modules/ecs_service/main.tf index 92647cd96d..11e28fe252 100644 --- a/terraform/app/modules/ecs_service/main.tf +++ b/terraform/app/modules/ecs_service/main.tf @@ -57,6 +57,12 @@ resource "aws_ecs_service" "this" { container_port = var.loadbalancer.container_port } } + dynamic "service_registries" { + for_each = var.service_discovery_service_arn != null ? [1] : [] + content { + registry_arn = var.service_discovery_service_arn + } + } deployment_minimum_healthy_percent = 100 deployment_maximum_percent = 200 lifecycle { @@ -85,8 +91,8 @@ resource "aws_ecs_task_definition" "this" { readonlyRootFileSystem = true portMappings = [ { - containerPort = 4000 - hostPort = 4000 + containerPort = var.container_port + hostPort = var.host_port == null ? var.container_port : var.host_port } ] environment = concat(var.task_config.environment, [{ name = "SERVER_TYPE", value = var.server_type }]) @@ -109,3 +115,15 @@ resource "aws_ecs_task_definition" "this" { } ]) } + +resource "aws_ssm_parameter" "container_variables" { + name = "/${var.environment}/mavis/ecs/${local.server_type_name}/container_variables" + type = "String" + + value = jsonencode({ + task_envs = concat(var.task_config.environment, [{ name = "SERVER_TYPE", value = var.server_type }]) + task_secrets = var.task_config.secrets + execution_role_arn = var.task_config.execution_role_arn + task_role_arn = var.task_config.task_role_arn + }) +} diff --git a/terraform/app/modules/ecs_service/variables.tf b/terraform/app/modules/ecs_service/variables.tf index 785da0c6c3..1c12832a13 100644 --- a/terraform/app/modules/ecs_service/variables.tf +++ b/terraform/app/modules/ecs_service/variables.tf @@ -90,6 +90,13 @@ variable "network_params" { nullable = false } +variable "service_discovery_service_arn" { + type = string + description = "Arn of the Service Discovery service for the ECS service. If this is not set, the service will not be discoverable via Cloud Map." + default = null + nullable = true +} + variable "loadbalancer" { type = object({ target_group_arn = string @@ -114,6 +121,20 @@ variable "container_name" { nullable = false } +variable "container_port" { + type = number + description = "The port on the container that the service will bind to. If not specified, it defaults to 4000." + default = 4000 + nullable = false +} + +variable "host_port" { + type = number + description = "The port on the host that the container will bind to. If not specified, it defaults to the same value as container_port." + default = null + nullable = true +} + locals { autoscaling_enabled = var.maximum_replica_count > var.minimum_replica_count server_type_name = var.server_type_name != null ? var.server_type_name : var.server_type diff --git a/terraform/app/rds.tf b/terraform/app/rds.tf index 2f3bc7b0ba..4f970edea1 100644 --- a/terraform/app/rds.tf +++ b/terraform/app/rds.tf @@ -12,13 +12,13 @@ resource "aws_security_group" "rds_security_group" { } resource "aws_security_group_rule" "rds_ecs_ingress" { - count = length(local.ecs_sg_ids) + count = length(local.db_access_sg_ids) type = "ingress" from_port = 5432 to_port = 5432 protocol = "tcp" security_group_id = aws_security_group.rds_security_group.id - source_security_group_id = local.ecs_sg_ids[count.index] + source_security_group_id = local.db_access_sg_ids[count.index] lifecycle { create_before_destroy = true } diff --git a/terraform/app/resources/.gitignore b/terraform/app/resources/.gitignore new file mode 100644 index 0000000000..6f66c74b0e --- /dev/null +++ b/terraform/app/resources/.gitignore @@ -0,0 +1 @@ +*.zip \ No newline at end of file diff --git a/terraform/app/resources/rotate_secret.py b/terraform/app/resources/rotate_secret.py new file mode 100644 index 0000000000..b09393b16a --- /dev/null +++ b/terraform/app/resources/rotate_secret.py @@ -0,0 +1,94 @@ +import boto3 +import logging +import os +import json + +logger = logging.getLogger() +logger.setLevel(logging.INFO) + +def lambda_handler(event, context): + arn = event['SecretId'] + token = event['ClientRequestToken'] + step = event['Step'] + + service_client = boto3.client('secretsmanager') + + metadata = service_client.describe_secret(SecretId=arn) + if not metadata['RotationEnabled']: + logger.error(f"Secret {arn} is not enabled for rotation") + raise ValueError(f"Secret {arn} is not enabled for rotation") + + versions = metadata['VersionIdsToStages'] + if token not in versions: + logger.error(f"Secret version {token} has no stage for rotation of secret {arn}.") + raise ValueError(f"Secret version {token} has no stage for rotation of secret {arn}.") + + if "AWSCURRENT" in versions[token]: + logger.info(f"Secret version {token} already set as AWSCURRENT for secret {arn}.") + return + + if "AWSPENDING" not in versions[token]: + logger.error(f"Secret version {token} not set as AWSPENDING for rotation of secret {arn}.") + raise ValueError(f"Secret version {token} not set as AWSPENDING for rotation of secret {arn}.") + + if step == "createSecret": + create_secret(service_client, arn, token) + elif step == "setSecret": + set_secret(service_client, arn, token) + elif step == "testSecret": + test_secret(service_client, arn, token) + elif step == "finishSecret": + finish_secret(service_client, arn, token) + else: + logger.error(f"Invalid step parameter: {step}") + raise ValueError("Invalid step parameter") + +def create_secret(service_client, arn, token): + try: + # Check if AWSPENDING secret exists + service_client.get_secret_value(SecretId=arn, VersionId=token, VersionStage="AWSPENDING") + logger.info(f"createSecret: Secret already exists for {arn} version {token} as AWSPENDING.") + except service_client.exceptions.ResourceNotFoundException: + # Generate a 32-character hexadecimal secret (0-9, a-f) + logger.info(f"createSecret: Generating new 32-character hexadecimal secret for {arn}.") + new_secret = service_client.get_random_password( + PasswordLength=32, + ExcludeUppercase=True, + ExcludePunctuation=True, + IncludeSpace=False, + RequireEachIncludedType=False, + ExcludeCharacters='ghijklmnopqrstuvwxyz' + )['RandomPassword'] + service_client.put_secret_value( + SecretId=arn, + ClientRequestToken=token, + SecretString=new_secret, + VersionStages=['AWSPENDING'] + ) + logger.info(f"createSecret: Successfully put secret for ARN {arn} and version {token}.") + +def set_secret(service_client, arn, token): + # No external service to update for a generic secret + pass + +def test_secret(service_client, arn, token): + # No validation required for a generic secret + pass + +def finish_secret(service_client, arn, token): + metadata = service_client.describe_secret(SecretId=arn) + current_version = None + for version, stages in metadata["VersionIdsToStages"].items(): + if "AWSCURRENT" in stages: + if version == token: + logger.info(f"finishSecret: Version {version} already marked as AWSCURRENT for {arn}") + return + current_version = version + break + service_client.update_secret_version_stage( + SecretId=arn, + VersionStage="AWSCURRENT", + MoveToVersionId=token, + RemoveFromVersionId=current_version if current_version else token + ) + logger.info(f"finishSecret: Successfully set AWSCURRENT stage to version {token} for secret {arn}.") \ No newline at end of file diff --git a/terraform/app/ssm_parameters.tf b/terraform/app/ssm_parameters.tf index fcd4e83708..19000b325c 100644 --- a/terraform/app/ssm_parameters.tf +++ b/terraform/app/ssm_parameters.tf @@ -5,3 +5,94 @@ resource "aws_ssm_parameter" "environment_config" { value = each.value } + +resource "aws_secretsmanager_secret" "jwt_sign" { + name = "rep-jwt-signing-secret-${var.environment}" + description = "Secret for JSON signing key" + recovery_window_in_days = 7 + tags = { + Name = "json-signing-${var.environment}" + } +} + +resource "aws_secretsmanager_secret_rotation" "jwt_sign" { + secret_id = aws_secretsmanager_secret.jwt_sign.arn + rotate_immediately = true + rotation_lambda_arn = aws_lambda_function.rotate_jwt_sign.arn + rotation_rules { + schedule_expression = "cron(0 1 ? * MON *)" # Rotate every Monday at 01:00 UTC + duration = "1h" + } +} + +data "archive_file" "jwt_sign_lambda_zip" { + type = "zip" + source_file = "${path.module}/resources/rotate_secret.py" # Directory containing the Lambda function code + output_path = "${path.module}/resources/rotate_secret.zip" +} + +resource "aws_lambda_function" "rotate_jwt_sign" { + function_name = "rep-jwt-secret-rotation-${var.environment}" + handler = "rotate_secret.lambda_handler" + runtime = "python3.12" + role = aws_iam_role.jwt_rotate_lambda.arn + filename = data.archive_file.jwt_sign_lambda_zip.output_path + source_code_hash = data.archive_file.jwt_sign_lambda_zip.output_base64sha256 +} + +resource "aws_iam_role" "jwt_rotate_lambda" { + name = "secret-rotation-lambda-role" + + assume_role_policy = jsonencode({ + Version = "2012-10-17" + Statement = [ + { + Action = "sts:AssumeRole" + Effect = "Allow" + Principal = { + Service = "lambda.amazonaws.com" + } + } + ] + }) +} + +resource "aws_iam_role_policy" "jwt_rotate_lambda" { + name = "secret-rotation-lambda-policy" + role = aws_iam_role.jwt_rotate_lambda.id + + policy = jsonencode({ + Version = "2012-10-17" + Statement = [ + { + Effect = "Allow" + Action = [ + "secretsmanager:RotateSecret", + "secretsmanager:GetSecretValue", + "secretsmanager:PutSecretValue", + "secretsmanager:DescribeSecret", + "secretsmanager:UpdateSecretVersionStage", + ] + Resource = aws_secretsmanager_secret.jwt_sign.arn + }, + { + Effect = "Allow" + Action = [ + "logs:CreateLogGroup", + "logs:CreateLogStream", + "logs:PutLogEvents", + "secretsmanager:GetRandomPassword", + ] + Resource = "*" + } + ] + }) +} + +resource "aws_lambda_permission" "jwt_sign" { + statement_id = "AllowExecutionFromSecretsManager" + action = "lambda:InvokeFunction" + function_name = aws_lambda_function.rotate_jwt_sign.function_name + principal = "secretsmanager.amazonaws.com" + source_arn = aws_secretsmanager_secret.jwt_sign.arn +} \ No newline at end of file diff --git a/terraform/app/valkey.tf b/terraform/app/valkey.tf index e9afc3080e..b400cbf0ba 100644 --- a/terraform/app/valkey.tf +++ b/terraform/app/valkey.tf @@ -1,6 +1,6 @@ -resource "aws_security_group" "valkey" { +resource "aws_security_group" "reporting_valkey" { name = "mavis-cache-${var.environment}" - description = "Security group for Valkey ElastiCache (self-designed cluster)" + description = "Security group for Valkey ElastiCache for the reporting service" vpc_id = aws_vpc.application_vpc.id tags = { @@ -12,101 +12,33 @@ resource "aws_security_group" "valkey" { } } -resource "aws_security_group_rule" "valkey_ecs_services_ingress" { - count = length(local.ecs_sg_ids) +resource "aws_security_group_rule" "reporting_valkey_ingress" { type = "ingress" - from_port = var.valkey_port - to_port = var.valkey_port + from_port = aws_elasticache_serverless_cache.reporting_service.endpoint[0].port + to_port = aws_elasticache_serverless_cache.reporting_service.endpoint[0].port protocol = "tcp" - security_group_id = aws_security_group.valkey.id - source_security_group_id = local.ecs_sg_ids[count.index] + security_group_id = aws_security_group.reporting_valkey.id + source_security_group_id = module.reporting_service.security_group_id lifecycle { create_before_destroy = true } } -resource "aws_elasticache_subnet_group" "valkey" { - name = "mavis-cache-subnet-group-${var.environment}" - subnet_ids = [aws_subnet.private_subnet_a.id, aws_subnet.private_subnet_b.id] - - tags = { - Name = "mavis-cache-subnet-group-${var.environment}" - } -} - -resource "aws_elasticache_replication_group" "valkey" { - replication_group_id = "mavis-cache-${var.environment}" - description = "Valkey cluster for Sidekiq" - - engine = "valkey" - engine_version = var.valkey_engine_version - node_type = var.valkey_node_type - port = var.valkey_port - parameter_group_name = aws_elasticache_parameter_group.valkey.name - - automatic_failover_enabled = var.valkey_failover_enabled - num_cache_clusters = length(local.valkey_cache_availability_zones) - subnet_group_name = aws_elasticache_subnet_group.valkey.name - security_group_ids = [aws_security_group.valkey.id] - preferred_cache_cluster_azs = local.valkey_cache_availability_zones - snapshot_retention_limit = var.valkey_snapshot_retention_limit - snapshot_window = var.valkey_snapshot_window - maintenance_window = var.valkey_maintenance_window - - at_rest_encryption_enabled = true - transit_encryption_enabled = true - - log_delivery_configuration { - destination = aws_cloudwatch_log_group.valkey_slow_log.name - destination_type = "cloudwatch-logs" - log_format = "json" - log_type = "slow-log" - } - - log_delivery_configuration { - destination = aws_cloudwatch_log_group.valkey_engine_log.name - destination_type = "cloudwatch-logs" - log_format = "json" - log_type = "engine-log" - } - - tags = { - Name = "mavis-cache-${var.environment}" - Purpose = "sidekiq-job-processing" - } - apply_immediately = true -} - -resource "aws_elasticache_parameter_group" "valkey" { - family = "valkey8" - name = "mavis-cache-params-${var.environment}" - - # Optimize for Sidekiq workload - parameter { - name = "maxmemory-policy" - value = "noeviction" - } - - tags = { - Name = "mavis-cache-params-${var.environment}" - } -} - -resource "aws_cloudwatch_log_group" "valkey_slow_log" { - name = "/aws/elasticache/valkey/${var.environment}/slow-log" - retention_in_days = var.valkey_log_retention_days - - tags = { - Name = "mavis-cache-slow-log-${var.environment}" - } -} - -resource "aws_cloudwatch_log_group" "valkey_engine_log" { - name = "/aws/elasticache/valkey/${var.environment}/engine-log" - retention_in_days = var.valkey_log_retention_days - - tags = { - Name = "mavis-cache-engine-log-${var.environment}" - } +resource "aws_elasticache_serverless_cache" "reporting_service" { + engine = "valkey" + name = "mavis-reporting-${var.environment}" + cache_usage_limits { + data_storage { + maximum = 1 + unit = "GB" + } + ecpu_per_second { + maximum = 1000 + } + } + kms_key_id = aws_kms_key.reporting_valkey.arn + major_engine_version = "8" + security_group_ids = [aws_security_group.reporting_valkey.id] + subnet_ids = [aws_subnet.private_subnet_a.id, aws_subnet.private_subnet_b.id] } \ No newline at end of file diff --git a/terraform/app/variables.tf b/terraform/app/variables.tf index 4064844bcc..9f5c396c07 100644 --- a/terraform/app/variables.tf +++ b/terraform/app/variables.tf @@ -133,6 +133,13 @@ variable "image_digest" { nullable = false } +variable "reporting_digest" { + type = string + description = "The docker image digest for the reporting container in the task definition." + default = null + nullable = true +} + variable "enable_cis2" { type = bool default = true @@ -257,6 +264,11 @@ locals { valueFrom = var.rails_master_key_path } ], local.parameter_store_config_list) + container_ports = { + web = 4000 + good_job = 4000 + reporting = 5000 + } } ########## RDS configuration ########## @@ -330,6 +342,18 @@ variable "sidekiq_replicas" { description = "Amount of replicas for the sidekiq service" } +variable "minimum_reporting_replicas" { + type = number + default = 2 + description = "Minimum amount of allowed replicas for reporting service. Also the replica count when creating the service." +} + +variable "maximum_reporting_replicas" { + type = number + default = 4 + description = "Maximum amount of allowed replicas for reporting service" +} + variable "max_aurora_capacity_units" { type = number default = 8 @@ -421,3 +445,16 @@ locals { ecs_sg_ids = [module.web_service.security_group_id, module.good_job_service.security_group_id, module.sidekiq_service.security_group_id] valkey_cache_availability_zones = var.valkey_failover_enabled ? [aws_subnet.private_subnet_a.availability_zone, aws_subnet.private_subnet_b.availability_zone] : [aws_subnet.private_subnet_a.availability_zone] } + +variable "reporting_endpoints" { + type = list(string) + description = "List of endpoints for the loadbalancer to forward to the reporting service" + default = ["/reporting", "/reporting/*"] + nullable = false +} + +locals { + ecs_initial_lb_target_group = var.active_lb_target_group == "green" ? aws_lb_target_group.green.arn : aws_lb_target_group.blue.arn + reporting_initial_lb_target_group = var.active_lb_target_group == "green" ? aws_lb_target_group.reporting_green.arn : aws_lb_target_group.reporting_blue.arn + db_access_sg_ids = [module.web_service.security_group_id, module.good_job_service.security_group_id] +}