From 7cb0a37d0cbdbf95cab45fbcc42ca11286d5f5bb Mon Sep 17 00:00:00 2001 From: chrispsheehan Date: Thu, 14 May 2026 12:46:48 +0100 Subject: [PATCH 01/34] chore: inject dependency for init plan --- REPO_INSTRUCTIONS.md | 3 +++ infra/live/dependencies/network.hcl | 30 ++++++++++++++++++++++ infra/live/dev/aws/network/terragrunt.hcl | 6 +++++ infra/live/prod/aws/network/terragrunt.hcl | 6 +++++ infra/modules/aws/network/README.md | 24 ++++++----------- infra/modules/aws/network/data.tf | 20 --------------- infra/modules/aws/network/main.tf | 10 ++++---- infra/modules/aws/network/variables.tf | 20 +++++++++++++++ 8 files changed, 78 insertions(+), 41 deletions(-) create mode 100644 infra/live/dependencies/network.hcl diff --git a/REPO_INSTRUCTIONS.md b/REPO_INSTRUCTIONS.md index a52d5af1..5f1a9d7f 100644 --- a/REPO_INSTRUCTIONS.md +++ b/REPO_INSTRUCTIONS.md @@ -37,6 +37,9 @@ These instructions apply to the entire repository. - verify required infra resources exist (CodeDeploy app/deployment group, listeners/target groups, alarms, VPC link if applicable) - when changing reusable workflow contracts, compare every caller `with:` block to the callee `workflow_call.inputs` - check apply/deploy/destroy, and avoid unnecessary `terraform_remote_state` coupling (especially for fast-changing outputs) +- for bootstrap-sensitive or plan-sensitive cross-stack contracts, prefer Terragrunt `dependency` inputs in the live stack and `mock_outputs` for non-mutating commands rather than reading upstream state directly inside Terraform modules +- if CI plan failures are caused by missing upstream state, fix the contract shape first instead of papering over the issue with more direct `terraform_remote_state` reads +- when the same Terragrunt dependency wiring or mocks are needed across environments, centralize that shared config under `infra/live/dependencies/` in a capability-scoped helper such as `network.hcl` and have environment stacks read it rather than duplicating the same blocks in `dev`, `prod`, or `ci` ## Terragrunt Plan Expectation diff --git a/infra/live/dependencies/network.hcl b/infra/live/dependencies/network.hcl new file mode 100644 index 00000000..b07fae22 --- /dev/null +++ b/infra/live/dependencies/network.hcl @@ -0,0 +1,30 @@ +dependency "security" { + config_path = "${get_original_terragrunt_dir()}/../security" + + mock_outputs = { + load_balancer_sg = "sg-00000000000000001" + api_vpc_link_sg = "sg-00000000000000002" + vpc_endpoint_sg = "sg-00000000000000003" + } + + mock_outputs_allowed_terraform_commands = ["validate", "plan", "destroy", "init", "show"] +} + +dependency "cognito" { + config_path = "${get_original_terragrunt_dir()}/../cognito" + + mock_outputs = { + user_pool_client_id = "mock-user-pool-client-id" + issuer_url = "https://cognito-idp.eu-west-2.amazonaws.com/eu-west-2_mock" + } + + mock_outputs_allowed_terraform_commands = ["validate", "plan", "destroy", "init", "show"] +} + +inputs = { + load_balancer_sg = dependency.security.outputs.load_balancer_sg + api_vpc_link_sg = dependency.security.outputs.api_vpc_link_sg + vpc_endpoint_sg = dependency.security.outputs.vpc_endpoint_sg + auth_user_pool_client_id = dependency.cognito.outputs.user_pool_client_id + auth_issuer_url = dependency.cognito.outputs.issuer_url +} diff --git a/infra/live/dev/aws/network/terragrunt.hcl b/infra/live/dev/aws/network/terragrunt.hcl index 92b17cab..860571f1 100644 --- a/infra/live/dev/aws/network/terragrunt.hcl +++ b/infra/live/dev/aws/network/terragrunt.hcl @@ -2,6 +2,12 @@ include "root" { path = find_in_parent_folders("root.hcl") } +locals { + network_dependencies = read_terragrunt_config(find_in_parent_folders("dependencies/network.hcl")) +} + terraform { source = "../../../../modules//aws//network" } + +inputs = local.network_dependencies.inputs diff --git a/infra/live/prod/aws/network/terragrunt.hcl b/infra/live/prod/aws/network/terragrunt.hcl index 92b17cab..860571f1 100644 --- a/infra/live/prod/aws/network/terragrunt.hcl +++ b/infra/live/prod/aws/network/terragrunt.hcl @@ -2,6 +2,12 @@ include "root" { path = find_in_parent_folders("root.hcl") } +locals { + network_dependencies = read_terragrunt_config(find_in_parent_folders("dependencies/network.hcl")) +} + terraform { source = "../../../../modules//aws//network" } + +inputs = local.network_dependencies.inputs diff --git a/infra/modules/aws/network/README.md b/infra/modules/aws/network/README.md index 02d76f72..e7c58a19 100644 --- a/infra/modules/aws/network/README.md +++ b/infra/modules/aws/network/README.md @@ -31,28 +31,20 @@ In the common ECS API shape used here: ## Dependencies - pre-existing tagged VPC and private subnets discovered with `data` lookups -- shared security groups from `security` -- `cognito` remote state for the shared JWT issuer and audience +- shared security-group outputs from the `security` live stack +- shared Cognito outputs from the `cognito` live stack for the JWT issuer and audience + +The live Terragrunt stack is expected to provide those upstream values as explicit module inputs. For plan and validate flows before upstream stacks exist, prefer Terragrunt `dependency` mocks in the live stack instead of reading cross-stack state directly inside the Terraform module. ## Bootstrap Notes -This module is not bootstrap-independent. It reads multiple outputs from the `security` stack through remote state, including `vpc_endpoint_sg` for the interface VPC endpoints and `api_vpc_link_sg` for the shared API Gateway VPC link. +This module still depends on upstream `security` and `cognito` stacks at apply time, but the bootstrap-sensitive contract should live in the Terragrunt wrapper rather than in Terraform `terraform_remote_state` blocks inside the module. That means: -- `security` must be applied successfully before `network` -- the `security` state file must contain the current outputs, not just an empty or partially initialized state -- a failed or stale bootstrap of `security` can surface here as an `Unsupported attribute` error when Terraform tries to read `data.terraform_remote_state.security.outputs.*` - -If you see an error like: - -```text -Error: Unsupported attribute -data.terraform_remote_state.security.outputs is object with no attributes -This object does not have an attribute named "vpc_endpoint_sg". -``` - -then the problem is usually not the `network` module itself. It means the upstream `security` stack has not produced readable outputs yet. In that case, apply `security` first and confirm its state includes `vpc_endpoint_sg`, `api_vpc_link_sg`, and the other expected outputs before retrying `network`. +- `security` and `cognito` still need to exist for real applies +- plan and validate flows can use Terragrunt `dependency` mocks when those upstream stacks are not available yet +- if apply-time values are missing, fix the upstream stack or the live-stack dependency wiring rather than adding direct cross-stack remote-state reads back into the module ## Feasibility Constraints diff --git a/infra/modules/aws/network/data.tf b/infra/modules/aws/network/data.tf index 918deda7..a346ce5a 100644 --- a/infra/modules/aws/network/data.tf +++ b/infra/modules/aws/network/data.tf @@ -23,23 +23,3 @@ data "aws_route_tables" "private" { values = data.aws_subnets.private.ids } } - -data "terraform_remote_state" "security" { - backend = "s3" - - config = { - bucket = var.state_bucket - key = "${var.environment}/aws/security/terraform.tfstate" - region = var.aws_region - } -} - -data "terraform_remote_state" "cognito" { - backend = "s3" - - config = { - bucket = var.state_bucket - key = "${var.environment}/aws/cognito/terraform.tfstate" - region = var.aws_region - } -} diff --git a/infra/modules/aws/network/main.tf b/infra/modules/aws/network/main.tf index d8fd0272..5c18afd5 100644 --- a/infra/modules/aws/network/main.tf +++ b/infra/modules/aws/network/main.tf @@ -2,7 +2,7 @@ resource "aws_lb" "this" { name = local.load_balancer_name internal = true load_balancer_type = "application" - security_groups = [data.terraform_remote_state.security.outputs.load_balancer_sg] + security_groups = [var.load_balancer_sg] subnets = data.aws_subnets.private.ids } @@ -14,7 +14,7 @@ resource "aws_apigatewayv2_api" "http_api" { resource "aws_apigatewayv2_vpc_link" "http_api" { name = "${var.project_name}-${var.environment}-http-vpc-link" subnet_ids = data.aws_subnets.private.ids - security_group_ids = [data.terraform_remote_state.security.outputs.api_vpc_link_sg] + security_group_ids = [var.api_vpc_link_sg] } resource "aws_apigatewayv2_stage" "default" { @@ -30,8 +30,8 @@ resource "aws_apigatewayv2_authorizer" "cognito_jwt" { identity_sources = ["$request.header.Authorization"] jwt_configuration { - audience = [data.terraform_remote_state.cognito.outputs.user_pool_client_id] - issuer = data.terraform_remote_state.cognito.outputs.issuer_url + audience = [var.auth_user_pool_client_id] + issuer = var.auth_issuer_url } } @@ -41,7 +41,7 @@ resource "aws_vpc_endpoint" "interface_endpoints" { vpc_id = data.aws_vpc.this.id service_name = "com.amazonaws.${var.aws_region}.${each.value}" vpc_endpoint_type = "Interface" - security_group_ids = [data.terraform_remote_state.security.outputs.vpc_endpoint_sg] + security_group_ids = [var.vpc_endpoint_sg] subnet_ids = data.aws_subnets.private.ids private_dns_enabled = true } diff --git a/infra/modules/aws/network/variables.tf b/infra/modules/aws/network/variables.tf index 6242bad8..f8830886 100644 --- a/infra/modules/aws/network/variables.tf +++ b/infra/modules/aws/network/variables.tf @@ -16,6 +16,26 @@ variable "state_bucket" { } ### end of static vars set in root.hcl ### +variable "load_balancer_sg" { + type = string +} + +variable "api_vpc_link_sg" { + type = string +} + +variable "vpc_endpoint_sg" { + type = string +} + +variable "auth_user_pool_client_id" { + type = string +} + +variable "auth_issuer_url" { + type = string +} + variable "vpc_name" { type = string } From 5a7cc12e62ccef441a75ff0ee60f217bf5ab4936 Mon Sep 17 00:00:00 2001 From: chrispsheehan Date: Thu, 14 May 2026 13:03:09 +0100 Subject: [PATCH 02/34] chore: use mocked inputs for all dependencies --- README.md | 4 ++++ REPO_INSTRUCTIONS.md | 1 + infra/live/dependencies/ecs_runtime_security.hcl | 13 +++++++++++++ infra/live/dependencies/lambda_runtime_security.hcl | 13 +++++++++++++ infra/live/dev/aws/migrations/terragrunt.hcl | 6 ++++++ infra/live/dev/aws/service_api/terragrunt.hcl | 6 ++++++ infra/live/dev/aws/service_worker/terragrunt.hcl | 6 ++++++ infra/live/prod/aws/migrations/terragrunt.hcl | 6 ++++++ infra/live/prod/aws/service_api/terragrunt.hcl | 6 ++++++ infra/live/prod/aws/service_worker/terragrunt.hcl | 6 ++++++ infra/modules/aws/migrations/README.md | 1 + infra/modules/aws/migrations/data.tf | 10 ---------- infra/modules/aws/migrations/main.tf | 2 +- infra/modules/aws/migrations/variables.tf | 4 ++++ infra/modules/aws/service_api/README.md | 4 +++- infra/modules/aws/service_api/data.tf | 10 ---------- infra/modules/aws/service_api/main.tf | 2 +- infra/modules/aws/service_api/variables.tf | 4 ++++ infra/modules/aws/service_worker/README.md | 4 +++- infra/modules/aws/service_worker/data.tf | 10 ---------- infra/modules/aws/service_worker/main.tf | 2 +- infra/modules/aws/service_worker/variables.tf | 4 ++++ 22 files changed, 89 insertions(+), 35 deletions(-) create mode 100644 infra/live/dependencies/ecs_runtime_security.hcl create mode 100644 infra/live/dependencies/lambda_runtime_security.hcl diff --git a/README.md b/README.md index 08a8b9bc..79a21d26 100644 --- a/README.md +++ b/README.md @@ -21,6 +21,10 @@ Lambda + ECS with CodeDeploy rollouts, plus provisioned concurrency controls for - shared deployment patterns for Lambda and ECS, with repo-local `just` commands for local and CI operations - runtime and infrastructure layouts designed to be extended without having to rediscover the whole repo each time +## Bootstrap-Friendly Plans + +For cross-stack contracts that often block CI plans before upstream stacks exist, this repo prefers Terragrunt `dependency` wiring in the live stack plus `mock_outputs` for non-mutating commands such as `plan` and `validate`. The Terraform modules should consume explicit inputs rather than reaching back into sibling stack state directly when the contract needs bootstrap-friendly plan behavior. + Use [CONTRIBUTING.md](CONTRIBUTING.md) for expectations when changing the repo itself. ## Prerequisites diff --git a/REPO_INSTRUCTIONS.md b/REPO_INSTRUCTIONS.md index 5f1a9d7f..3c60e78b 100644 --- a/REPO_INSTRUCTIONS.md +++ b/REPO_INSTRUCTIONS.md @@ -40,6 +40,7 @@ These instructions apply to the entire repository. - for bootstrap-sensitive or plan-sensitive cross-stack contracts, prefer Terragrunt `dependency` inputs in the live stack and `mock_outputs` for non-mutating commands rather than reading upstream state directly inside Terraform modules - if CI plan failures are caused by missing upstream state, fix the contract shape first instead of papering over the issue with more direct `terraform_remote_state` reads - when the same Terragrunt dependency wiring or mocks are needed across environments, centralize that shared config under `infra/live/dependencies/` in a capability-scoped helper such as `network.hcl` and have environment stacks read it rather than duplicating the same blocks in `dev`, `prod`, or `ci` +- keep this approach visible to users as well: when you introduce or expand this pattern, update the top-level `README.md` so the bootstrap-friendly mock strategy is documented outside agent-only instructions ## Terragrunt Plan Expectation diff --git a/infra/live/dependencies/ecs_runtime_security.hcl b/infra/live/dependencies/ecs_runtime_security.hcl new file mode 100644 index 00000000..c550f972 --- /dev/null +++ b/infra/live/dependencies/ecs_runtime_security.hcl @@ -0,0 +1,13 @@ +dependency "security" { + config_path = "${get_original_terragrunt_dir()}/../security" + + mock_outputs = { + ecs_sg = "sg-00000000000000004" + } + + mock_outputs_allowed_terraform_commands = ["validate", "plan", "destroy", "init", "show"] +} + +inputs = { + ecs_security_group_id = dependency.security.outputs.ecs_sg +} diff --git a/infra/live/dependencies/lambda_runtime_security.hcl b/infra/live/dependencies/lambda_runtime_security.hcl new file mode 100644 index 00000000..595df973 --- /dev/null +++ b/infra/live/dependencies/lambda_runtime_security.hcl @@ -0,0 +1,13 @@ +dependency "security" { + config_path = "${get_original_terragrunt_dir()}/../security" + + mock_outputs = { + runtime_sg = "sg-00000000000000005" + } + + mock_outputs_allowed_terraform_commands = ["validate", "plan", "destroy", "init", "show"] +} + +inputs = { + runtime_security_group_id = dependency.security.outputs.runtime_sg +} diff --git a/infra/live/dev/aws/migrations/terragrunt.hcl b/infra/live/dev/aws/migrations/terragrunt.hcl index 0856befd..305b535f 100644 --- a/infra/live/dev/aws/migrations/terragrunt.hcl +++ b/infra/live/dev/aws/migrations/terragrunt.hcl @@ -2,6 +2,12 @@ include "root" { path = find_in_parent_folders("root.hcl") } +locals { + runtime_security = read_terragrunt_config(find_in_parent_folders("dependencies/lambda_runtime_security.hcl")) +} + terraform { source = "../../../../modules//aws//migrations" } + +inputs = local.runtime_security.inputs diff --git a/infra/live/dev/aws/service_api/terragrunt.hcl b/infra/live/dev/aws/service_api/terragrunt.hcl index 97be2f29..961e3092 100644 --- a/infra/live/dev/aws/service_api/terragrunt.hcl +++ b/infra/live/dev/aws/service_api/terragrunt.hcl @@ -2,6 +2,12 @@ include "root" { path = find_in_parent_folders("root.hcl") } +locals { + runtime_security = read_terragrunt_config(find_in_parent_folders("dependencies/ecs_runtime_security.hcl")) +} + terraform { source = "../../../../modules//aws//service_api" } + +inputs = local.runtime_security.inputs diff --git a/infra/live/dev/aws/service_worker/terragrunt.hcl b/infra/live/dev/aws/service_worker/terragrunt.hcl index 8e44b264..f44d7e37 100644 --- a/infra/live/dev/aws/service_worker/terragrunt.hcl +++ b/infra/live/dev/aws/service_worker/terragrunt.hcl @@ -2,6 +2,12 @@ include "root" { path = find_in_parent_folders("root.hcl") } +locals { + runtime_security = read_terragrunt_config(find_in_parent_folders("dependencies/ecs_runtime_security.hcl")) +} + terraform { source = "../../../../modules//aws//service_worker" } + +inputs = local.runtime_security.inputs diff --git a/infra/live/prod/aws/migrations/terragrunt.hcl b/infra/live/prod/aws/migrations/terragrunt.hcl index 0856befd..305b535f 100644 --- a/infra/live/prod/aws/migrations/terragrunt.hcl +++ b/infra/live/prod/aws/migrations/terragrunt.hcl @@ -2,6 +2,12 @@ include "root" { path = find_in_parent_folders("root.hcl") } +locals { + runtime_security = read_terragrunt_config(find_in_parent_folders("dependencies/lambda_runtime_security.hcl")) +} + terraform { source = "../../../../modules//aws//migrations" } + +inputs = local.runtime_security.inputs diff --git a/infra/live/prod/aws/service_api/terragrunt.hcl b/infra/live/prod/aws/service_api/terragrunt.hcl index 97be2f29..961e3092 100644 --- a/infra/live/prod/aws/service_api/terragrunt.hcl +++ b/infra/live/prod/aws/service_api/terragrunt.hcl @@ -2,6 +2,12 @@ include "root" { path = find_in_parent_folders("root.hcl") } +locals { + runtime_security = read_terragrunt_config(find_in_parent_folders("dependencies/ecs_runtime_security.hcl")) +} + terraform { source = "../../../../modules//aws//service_api" } + +inputs = local.runtime_security.inputs diff --git a/infra/live/prod/aws/service_worker/terragrunt.hcl b/infra/live/prod/aws/service_worker/terragrunt.hcl index 8e44b264..f44d7e37 100644 --- a/infra/live/prod/aws/service_worker/terragrunt.hcl +++ b/infra/live/prod/aws/service_worker/terragrunt.hcl @@ -2,6 +2,12 @@ include "root" { path = find_in_parent_folders("root.hcl") } +locals { + runtime_security = read_terragrunt_config(find_in_parent_folders("dependencies/ecs_runtime_security.hcl")) +} + terraform { source = "../../../../modules//aws//service_worker" } + +inputs = local.runtime_security.inputs diff --git a/infra/modules/aws/migrations/README.md b/infra/modules/aws/migrations/README.md index 1fc35b48..35ec2507 100644 --- a/infra/modules/aws/migrations/README.md +++ b/infra/modules/aws/migrations/README.md @@ -17,5 +17,6 @@ Lambda wrapper for database migrations using packaged SQLAlchemy models. - `cloudwatch_log_group` This module is intended for manual or pipeline-triggered schema migrations against the shared Aurora PostgreSQL database. It runs inside the VPC and reuses the shared runtime security group from `security` so it can reach the database without introducing a second database-ingress rule pattern. +The live Terragrunt stack is expected to pass that runtime security group id as an explicit input. For bootstrap-friendly plan and validate flows, prefer Terragrunt dependency mocks in the live stack instead of direct `security` remote-state reads in the module. The current handler loads the packaged SQLAlchemy models, checks whether its owned table already exists, and creates the declared table metadata directly in the default schema when needed for the worker runtime. In this repo's reusable code deploy workflow, the function is also invoked automatically when `migrations` is part of the Lambda deployment matrix. diff --git a/infra/modules/aws/migrations/data.tf b/infra/modules/aws/migrations/data.tf index 2c4aca5e..e82a7380 100644 --- a/infra/modules/aws/migrations/data.tf +++ b/infra/modules/aws/migrations/data.tf @@ -8,16 +8,6 @@ data "terraform_remote_state" "database" { } } -data "terraform_remote_state" "security" { - backend = "s3" - - config = { - bucket = var.state_bucket - key = "${var.environment}/aws/security/terraform.tfstate" - region = var.aws_region - } -} - data "aws_vpc" "this" { filter { name = "tag:Name" diff --git a/infra/modules/aws/migrations/main.tf b/infra/modules/aws/migrations/main.tf index 283970b2..a3ec1ff8 100644 --- a/infra/modules/aws/migrations/main.tf +++ b/infra/modules/aws/migrations/main.tf @@ -27,6 +27,6 @@ module "migrations" { vpc_subnet_ids = data.aws_subnets.private.ids vpc_security_group_ids = [ - data.terraform_remote_state.security.outputs.runtime_sg, + var.runtime_security_group_id, ] } diff --git a/infra/modules/aws/migrations/variables.tf b/infra/modules/aws/migrations/variables.tf index 4bdb1076..eca02022 100644 --- a/infra/modules/aws/migrations/variables.tf +++ b/infra/modules/aws/migrations/variables.tf @@ -26,3 +26,7 @@ variable "otel_sample_rate" { variable "vpc_name" { type = string } + +variable "runtime_security_group_id" { + type = string +} diff --git a/infra/modules/aws/service_api/README.md b/infra/modules/aws/service_api/README.md index de5584d3..9bf935c8 100644 --- a/infra/modules/aws/service_api/README.md +++ b/infra/modules/aws/service_api/README.md @@ -40,8 +40,10 @@ Concrete ECS API service wrapper for the sample API service. ## Dependency Notes - reads `task_api` remote state for the task definition -- reads `cluster`, `network`, and `security` remote state +- reads `cluster` and `network` remote state +- expects the live Terragrunt stack to pass the ECS runtime security group id as an explicit input - depends on the `network` stack owning the shared VPC link, ALB listener path, and JWT authorizer inputs +- for bootstrap-friendly plan and validate flows, prefer Terragrunt dependency mocks in the live stack instead of direct `security` remote-state reads in the module ## Inherits Behavior From diff --git a/infra/modules/aws/service_api/data.tf b/infra/modules/aws/service_api/data.tf index 4e912710..965b9c79 100644 --- a/infra/modules/aws/service_api/data.tf +++ b/infra/modules/aws/service_api/data.tf @@ -19,16 +19,6 @@ data "terraform_remote_state" "network" { } } -data "terraform_remote_state" "security" { - backend = "s3" - - config = { - bucket = var.state_bucket - key = "${var.environment}/aws/security/terraform.tfstate" - region = var.aws_region - } -} - data "terraform_remote_state" "cluster" { backend = "s3" diff --git a/infra/modules/aws/service_api/main.tf b/infra/modules/aws/service_api/main.tf index 5442302c..5e216fad 100644 --- a/infra/modules/aws/service_api/main.tf +++ b/infra/modules/aws/service_api/main.tf @@ -13,7 +13,7 @@ module "service_api" { cluster_id = data.terraform_remote_state.cluster.outputs.cluster_id cluster_name = data.terraform_remote_state.cluster.outputs.cluster_name - ecs_security_group_id = data.terraform_remote_state.security.outputs.ecs_sg + ecs_security_group_id = var.ecs_security_group_id default_target_group_arn = data.terraform_remote_state.network.outputs.default_target_group_arn load_balancer_arn = data.terraform_remote_state.network.outputs.load_balancer_arn diff --git a/infra/modules/aws/service_api/variables.tf b/infra/modules/aws/service_api/variables.tf index 7facdd20..b2885d71 100644 --- a/infra/modules/aws/service_api/variables.tf +++ b/infra/modules/aws/service_api/variables.tf @@ -75,3 +75,7 @@ variable "bootstrap_image_uri" { error_message = "bootstrap_image_uri must be set when bootstrap is true." } } + +variable "ecs_security_group_id" { + type = string +} diff --git a/infra/modules/aws/service_worker/README.md b/infra/modules/aws/service_worker/README.md index 820a4d3b..03cd7d3c 100644 --- a/infra/modules/aws/service_worker/README.md +++ b/infra/modules/aws/service_worker/README.md @@ -37,8 +37,10 @@ Concrete ECS worker service wrapper. - reads `task_worker` remote state - reads `worker_messaging` remote state -- reads `cluster`, `network`, and `security` remote state +- reads `cluster` and `network` remote state +- expects the live Terragrunt stack to pass the ECS runtime security group id as an explicit input - relies on `worker_messaging` owning the queue contract rather than duplicating queue state locally +- for bootstrap-friendly plan and validate flows, prefer Terragrunt dependency mocks in the live stack instead of direct `security` remote-state reads in the module It uses the shared ECS worker queue name exported by `worker_messaging` for service autoscaling. During bootstrap applies, it uses placeholder values instead of reading task outputs directly so the bootstrap path does not need a pre-existing task state file. diff --git a/infra/modules/aws/service_worker/data.tf b/infra/modules/aws/service_worker/data.tf index ae6015ee..00448982 100644 --- a/infra/modules/aws/service_worker/data.tf +++ b/infra/modules/aws/service_worker/data.tf @@ -30,16 +30,6 @@ data "terraform_remote_state" "network" { } } -data "terraform_remote_state" "security" { - backend = "s3" - - config = { - bucket = var.state_bucket - key = "${var.environment}/aws/security/terraform.tfstate" - region = var.aws_region - } -} - data "terraform_remote_state" "cluster" { backend = "s3" diff --git a/infra/modules/aws/service_worker/main.tf b/infra/modules/aws/service_worker/main.tf index 626599dc..84170267 100644 --- a/infra/modules/aws/service_worker/main.tf +++ b/infra/modules/aws/service_worker/main.tf @@ -13,7 +13,7 @@ module "service_worker" { cluster_id = data.terraform_remote_state.cluster.outputs.cluster_id cluster_name = data.terraform_remote_state.cluster.outputs.cluster_name - ecs_security_group_id = data.terraform_remote_state.security.outputs.ecs_sg + ecs_security_group_id = var.ecs_security_group_id default_target_group_arn = data.terraform_remote_state.network.outputs.default_target_group_arn default_http_listener_arn = data.terraform_remote_state.network.outputs.default_http_listener_arn diff --git a/infra/modules/aws/service_worker/variables.tf b/infra/modules/aws/service_worker/variables.tf index 4b065255..7878b66f 100644 --- a/infra/modules/aws/service_worker/variables.tf +++ b/infra/modules/aws/service_worker/variables.tf @@ -75,3 +75,7 @@ variable "bootstrap_image_uri" { error_message = "bootstrap_image_uri must be set when bootstrap is true." } } + +variable "ecs_security_group_id" { + type = string +} From d575747a9a26d0c23820ac68379709d631676be1 Mon Sep 17 00:00:00 2001 From: chrispsheehan Date: Thu, 14 May 2026 13:21:23 +0100 Subject: [PATCH 03/34] docs: update readme --- infra/README.md | 50 +++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 40 insertions(+), 10 deletions(-) diff --git a/infra/README.md b/infra/README.md index a7f407e9..440611b9 100644 --- a/infra/README.md +++ b/infra/README.md @@ -104,16 +104,46 @@ That `containers/lib` directory is helper code only and is not treated as a depl ## Dependency Notes -- many modules use `data.terraform_remote_state` to read outputs from other stacks -- prefer using `data.terraform_remote_state` only for outputs that are expected to stay stable or change rarely; avoid using it as the normal handoff for values that change as part of the same rollout, because downstream plans can then drift from the upstream state they were planned against -- because of that, workflow ordering matters for apply, deploy, and destroy -- `service_api` consumes the shared JWT authorizer output from `network`, so `cognito` and `network` must exist before that ECS API service stack applies, and the service must destroy before `network` is torn down -- on destroy, `network` can tear down once downstream consumers such as `frontend`, `service_*`, `task_*`, and `database` are gone -- on destroy, `cluster` can tear down in parallel with `network` once `service_*`, `task_*`, and other real cluster consumers are gone; `frontend` is not a cluster dependency -- on destroy, `security` must wait for VPC-attached lambdas such as `migrations` as well as `network`, otherwise the shared runtime security group can still be attached during Lambda ENI cleanup -- avoid making one runtime depend on another runtime's state ownership unnecessarily; for example, shared worker fanout state is owned by `worker_messaging` rather than by `lambda_worker` or `task_worker` -- some shared infrastructure, such as the landing-zone VPC and tagged private subnets, is discovered with `data` lookups and must already exist -- frontend custom-domain deploys also require the matching Route53 hosted zone to already exist +- modules use Terragrunt `dependency` blocks to consume outputs from other stacks instead of `data.terraform_remote_state` +- this allows Terragrunt to understand the dependency graph explicitly and manage ordering for apply and destroy operations + +### Dependency Strategy + +- prefer `dependency` blocks for all cross-stack communication +- use `mock_outputs` for dependencies during `plan`, `validate`, and other non-apply commands to allow independent iteration without requiring upstream stacks to be deployed +- restrict mocks using `mock_outputs_allowed_terraform_commands` to ensure real outputs are always used during `apply` + +### When to Use Remote State + +- avoid using `data.terraform_remote_state` as the default mechanism for passing values between stacks +- it may still be used for: + - infrastructure that is managed outside of Terragrunt + - globally stable/shared resources that rarely change + - cross-account or external dependencies where Terragrunt `dependency` is not practical + +### Workflow and Ordering + +- Terragrunt dependencies define ordering implicitly, but logical constraints still apply: + + - `service_api` consumes the shared JWT authorizer output from `network`, so `cognito` and `network` must exist before the ECS API service stack applies + - the API service must be destroyed before `network` is torn down + +- on destroy: + + - `network` can tear down once downstream consumers such as `frontend`, `service_*`, `task_*`, and `database` are gone + - `cluster` can tear down in parallel with `network` once `service_*`, `task_*`, and other cluster consumers are gone; `frontend` is not a cluster dependency + - `security` must wait for VPC-attached lambdas such as `migrations` as well as `network`, otherwise the shared runtime security group may still be attached during Lambda ENI cleanup + +### Design Guidelines + +- avoid making one runtime depend on another runtime's state ownership unnecessarily + - for example, shared worker fanout state is owned by `worker_messaging` rather than by `lambda_worker` or `task_worker` + +- prefer explicit ownership boundaries between stacks + +- some shared infrastructure, such as the landing-zone VPC and tagged private subnets, is discovered via `data` lookups and must already exist + +- frontend custom-domain deploys require the matching Route53 hosted zone to already exist ## Deployment Model From 613003bd742ae8d4489c6330f8971590e849c6c7 Mon Sep 17 00:00:00 2001 From: chrispsheehan Date: Thu, 14 May 2026 13:42:28 +0100 Subject: [PATCH 04/34] chore: big bang convert to dependency + mock outputs --- .github/docs/README.md | 4 +- infra/live/dependencies/cluster.hcl | 15 ++++++ infra/live/dependencies/database.hcl | 21 ++++++++ infra/live/dependencies/network_runtime.hcl | 35 ++++++++++++ infra/live/dependencies/task_api.hcl | 13 +++++ infra/live/dependencies/task_worker.hcl | 13 +++++ infra/live/dependencies/worker_messaging.hcl | 33 ++++++++++++ infra/live/dev/aws/lambda_api/terragrunt.hcl | 53 +++++++++++-------- .../live/dev/aws/lambda_worker/terragrunt.hcl | 47 +++++++++------- infra/live/dev/aws/migrations/terragrunt.hcl | 3 +- .../dev/aws/rds_reader_tagger/terragrunt.hcl | 6 +++ infra/live/dev/aws/service_api/terragrunt.hcl | 10 +++- .../dev/aws/service_worker/terragrunt.hcl | 12 ++++- infra/live/dev/aws/task_worker/terragrunt.hcl | 7 +++ infra/live/prod/aws/lambda_api/terragrunt.hcl | 53 +++++++++++-------- .../prod/aws/lambda_worker/terragrunt.hcl | 47 +++++++++------- infra/live/prod/aws/migrations/terragrunt.hcl | 3 +- .../prod/aws/rds_reader_tagger/terragrunt.hcl | 6 +++ .../live/prod/aws/service_api/terragrunt.hcl | 10 +++- .../prod/aws/service_worker/terragrunt.hcl | 12 ++++- .../live/prod/aws/task_worker/terragrunt.hcl | 7 +++ infra/modules/aws/lambda_api/data.tf | 22 +------- infra/modules/aws/lambda_api/main.tf | 24 ++++----- infra/modules/aws/lambda_api/outputs.tf | 6 +-- infra/modules/aws/lambda_api/variables.tf | 34 +++++++++++- infra/modules/aws/lambda_worker/data.tf | 9 ---- infra/modules/aws/lambda_worker/main.tf | 12 ++--- infra/modules/aws/lambda_worker/outputs.tf | 8 +-- infra/modules/aws/lambda_worker/variables.tf | 28 +++++++++- infra/modules/aws/migrations/data.tf | 12 +---- infra/modules/aws/migrations/main.tf | 8 +-- infra/modules/aws/migrations/variables.tf | 16 ++++++ infra/modules/aws/rds_reader_tagger/README.md | 2 +- infra/modules/aws/rds_reader_tagger/data.tf | 10 ---- infra/modules/aws/rds_reader_tagger/main.tf | 2 +- .../aws/rds_reader_tagger/variables.tf | 4 ++ infra/modules/aws/service_api/README.md | 6 +-- infra/modules/aws/service_api/data.tf | 31 ----------- infra/modules/aws/service_api/main.tf | 26 ++++----- infra/modules/aws/service_api/outputs.tf | 2 +- infra/modules/aws/service_api/variables.tf | 52 ++++++++++++++++++ infra/modules/aws/service_worker/README.md | 8 +-- infra/modules/aws/service_worker/data.tf | 42 --------------- infra/modules/aws/service_worker/locals.tf | 7 +-- infra/modules/aws/service_worker/main.tf | 20 +++---- infra/modules/aws/service_worker/outputs.tf | 2 +- infra/modules/aws/service_worker/variables.tf | 48 +++++++++++++++++ infra/modules/aws/task_worker/README.md | 6 +-- infra/modules/aws/task_worker/data.tf | 22 +------- infra/modules/aws/task_worker/main.tf | 12 ++--- infra/modules/aws/task_worker/outputs.tf | 6 +-- infra/modules/aws/task_worker/variables.tf | 28 ++++++++++ 52 files changed, 605 insertions(+), 320 deletions(-) create mode 100644 infra/live/dependencies/cluster.hcl create mode 100644 infra/live/dependencies/database.hcl create mode 100644 infra/live/dependencies/network_runtime.hcl create mode 100644 infra/live/dependencies/task_api.hcl create mode 100644 infra/live/dependencies/task_worker.hcl create mode 100644 infra/live/dependencies/worker_messaging.hcl delete mode 100644 infra/modules/aws/lambda_worker/data.tf diff --git a/.github/docs/README.md b/.github/docs/README.md index 98deb3dc..563cdc8f 100644 --- a/.github/docs/README.md +++ b/.github/docs/README.md @@ -88,7 +88,7 @@ flowchart LR - `shared_infra_apply_from_plan.yml` Apply-from-plan wrapper around `shared_infra.yml`. It takes `plan_artifact_run_id`, resolves the same artifact bucket split used by release artifacts (`dev` stays on `dev`, non-`dev` uses `ci`) inside its `metadata` job, configures artifact-account AWS credentials once for that job, derives the matching plan-artifact S3 prefix via `justfile.tg`, downloads `infra-plan-metadata` from that location via `justfile.ci`, reads the frozen graph inputs back out, and then calls `shared_infra.yml` with `tg_action: apply_plan` plus that same resolved prefix. - `shared_infra.yml` - Pure ordered infra graph executor. It applies shared stacks first, then runtime stacks, then frontend infrastructure. Shared stacks now include the CloudWatch observability dashboard. It accepts `tg_action` so the same graph can run a normal apply, upload derived per-stack plan artifacts to the resolved code bucket under `terragrunt_plan/`, or apply from previously uploaded plan artifacts. The wrapper workflows resolve one `plan_artifact_s3_prefix` and set it in the workflow env once, while each Terragrunt job configures AWS credentials at job start and then reuses that ambient session in the repo-local Terragrunt action. That means each infra run has one shared `plan-metadata.json` file for the whole graph and one separate saved plan bundle per Terragrunt stack or module. In `apply_plan` mode, each stack job first downloads its own saved plan files via `justfile.tg` through the Terragrunt action, then the Terragrunt action validates and applies those local files. Its visible step labels now follow the high-level operation, so both direct apply and apply-from-plan render as `Apply` while plan still renders as `Plan`. The `security -> network` edge is a real bootstrap dependency because `network` reads security outputs like `vpc_endpoint_sg` from remote state; if those outputs do not exist yet, `network` fails with an upstream `Unsupported attribute` error rather than a networking-specific error. + Pure ordered infra graph executor. It applies shared stacks first, then runtime stacks, then frontend infrastructure. Shared stacks now include the CloudWatch observability dashboard. It accepts `tg_action` so the same graph can run a normal apply, upload derived per-stack plan artifacts to the resolved code bucket under `terragrunt_plan/`, or apply from previously uploaded plan artifacts. The wrapper workflows resolve one `plan_artifact_s3_prefix` and set it in the workflow env once, while each Terragrunt job configures AWS credentials at job start and then reuses that ambient session in the repo-local Terragrunt action. That means each infra run has one shared `plan-metadata.json` file for the whole graph and one separate saved plan bundle per Terragrunt stack or module. In `apply_plan` mode, each stack job first downloads its own saved plan files via `justfile.tg` through the Terragrunt action, then the Terragrunt action validates and applies those local files. Its visible step labels now follow the high-level operation, so both direct apply and apply-from-plan render as `Apply` while plan still renders as `Plan`. Bootstrap-sensitive edges such as `security -> network` should be modeled with Terragrunt `dependency` blocks plus constrained `mock_outputs` in the live stack so `plan` and `validate` can run before upstream state exists, while `apply` still resolves real outputs. - The shared infra wrappers must forward the permissions required by the nested reusable call chain. In practice that means `id-token: write` everywhere the Terragrunt action may assume AWS OIDC and `contents: read` for checkout. The shared plan/apply wrappers now rely on AWS access to the shared code bucket rather than GitHub artifact permissions for cross-run recovery. - `shared_deploy.yml` Rolls out Lambda code, optional migrations, optional reconciliation Lambdas, ECS task and service updates, and optional frontend deploys. Its multi-step AWS jobs now configure credentials once at job start and let the local `just` and Terragrunt actions reuse that ambient session. The reusable workflow renders its Lambda and ECS CodeDeploy AppSpec files from the shared templates under `config/deploy/`, and its mutating `just` steps should target `justfile.deploy` rather than the repo-root `justfile`. @@ -188,7 +188,7 @@ Run these checks on every CI, workflow, or deploy-contract change. ### Dependency Safety - check apply, deploy, and destroy behavior, not just apply -- verify downstream consumers of remote state still exist and are ordered correctly +- verify Terragrunt dependencies and their downstream consumers still exist and are ordered correctly - confirm every `needs..outputs.*` reference is in scope - confirm matrix values still match the naming contract expected by workflows and modules - do not change CI ordering blindly; first check whether the real issue is avoidable cross-stack coupling diff --git a/infra/live/dependencies/cluster.hcl b/infra/live/dependencies/cluster.hcl new file mode 100644 index 00000000..2bbd9e09 --- /dev/null +++ b/infra/live/dependencies/cluster.hcl @@ -0,0 +1,15 @@ +dependency "cluster" { + config_path = "${get_original_terragrunt_dir()}/../cluster" + + mock_outputs = { + cluster_id = "mock-cluster-id" + cluster_name = "mock-cluster" + } + + mock_outputs_allowed_terraform_commands = ["validate", "plan", "destroy", "init", "show"] +} + +inputs = { + cluster_id = dependency.cluster.outputs.cluster_id + cluster_name = dependency.cluster.outputs.cluster_name +} diff --git a/infra/live/dependencies/database.hcl b/infra/live/dependencies/database.hcl new file mode 100644 index 00000000..6ca280c3 --- /dev/null +++ b/infra/live/dependencies/database.hcl @@ -0,0 +1,21 @@ +dependency "database" { + config_path = "${get_original_terragrunt_dir()}/../database" + + mock_outputs = { + credentials_secret_arn = "arn:aws:secretsmanager:eu-west-2:111111111111:secret:mock-database-credentials" + readwrite_endpoint = "mock-database.cluster-abcdefghijkl.eu-west-2.rds.amazonaws.com" + database_name = "app" + database_port = 5432 + cluster_identifier = "mock-database-cluster" + } + + mock_outputs_allowed_terraform_commands = ["validate", "plan", "destroy", "init", "show"] +} + +inputs = { + database_credentials_secret_arn = dependency.database.outputs.credentials_secret_arn + database_readwrite_endpoint = dependency.database.outputs.readwrite_endpoint + database_name = dependency.database.outputs.database_name + database_port = dependency.database.outputs.database_port + database_cluster_identifier = dependency.database.outputs.cluster_identifier +} diff --git a/infra/live/dependencies/network_runtime.hcl b/infra/live/dependencies/network_runtime.hcl new file mode 100644 index 00000000..de2a1de5 --- /dev/null +++ b/infra/live/dependencies/network_runtime.hcl @@ -0,0 +1,35 @@ +dependency "network" { + config_path = "${get_original_terragrunt_dir()}/../network" + + mock_outputs = { + default_target_group_arn = "arn:aws:elasticloadbalancing:eu-west-2:111111111111:targetgroup/mock-default/1234567890abcdef" + load_balancer_arn = "arn:aws:elasticloadbalancing:eu-west-2:111111111111:loadbalancer/app/mock-internal/1234567890abcdef" + default_http_listener_arn = "arn:aws:elasticloadbalancing:eu-west-2:111111111111:listener/app/mock-internal/1234567890abcdef/abcdef1234567890" + load_balancer_arn_suffix = "app/mock-internal/1234567890abcdef" + target_group_arn_suffix = "targetgroup/mock-default/1234567890abcdef" + internal_invoke_url = "http://mock-internal-123456.eu-west-2.elb.amazonaws.com" + api_id = "mockapi123" + api_invoke_url = "https://mockapi123.execute-api.eu-west-2.amazonaws.com" + api_execution_arn = "arn:aws:execute-api:eu-west-2:111111111111:mockapi123" + api_stage_name = "$default" + vpc_link_id = "vpclink-mock123" + http_api_authorizer_id = "auth-mock123" + } + + mock_outputs_allowed_terraform_commands = ["validate", "plan", "destroy", "init", "show"] +} + +inputs = { + network_default_target_group_arn = dependency.network.outputs.default_target_group_arn + network_load_balancer_arn = dependency.network.outputs.load_balancer_arn + network_default_http_listener_arn = dependency.network.outputs.default_http_listener_arn + network_load_balancer_arn_suffix = dependency.network.outputs.load_balancer_arn_suffix + network_target_group_arn_suffix = dependency.network.outputs.target_group_arn_suffix + network_internal_invoke_url = dependency.network.outputs.internal_invoke_url + network_api_id = dependency.network.outputs.api_id + network_api_invoke_url = dependency.network.outputs.api_invoke_url + network_api_execution_arn = dependency.network.outputs.api_execution_arn + network_api_stage_name = dependency.network.outputs.api_stage_name + network_vpc_link_id = dependency.network.outputs.vpc_link_id + network_http_api_authorizer_id = dependency.network.outputs.http_api_authorizer_id +} diff --git a/infra/live/dependencies/task_api.hcl b/infra/live/dependencies/task_api.hcl new file mode 100644 index 00000000..18dccde7 --- /dev/null +++ b/infra/live/dependencies/task_api.hcl @@ -0,0 +1,13 @@ +dependency "task_api" { + config_path = "${get_original_terragrunt_dir()}/../task_api" + + mock_outputs = { + task_definition_arn = "arn:aws:ecs:eu-west-2:111111111111:task-definition/mock-task-api:1" + } + + mock_outputs_allowed_terraform_commands = ["validate", "plan", "destroy", "init", "show"] +} + +inputs = { + task_definition_arn = dependency.task_api.outputs.task_definition_arn +} diff --git a/infra/live/dependencies/task_worker.hcl b/infra/live/dependencies/task_worker.hcl new file mode 100644 index 00000000..95521892 --- /dev/null +++ b/infra/live/dependencies/task_worker.hcl @@ -0,0 +1,13 @@ +dependency "task_worker" { + config_path = "${get_original_terragrunt_dir()}/../task_worker" + + mock_outputs = { + task_definition_arn = "arn:aws:ecs:eu-west-2:111111111111:task-definition/mock-task-worker:1" + } + + mock_outputs_allowed_terraform_commands = ["validate", "plan", "destroy", "init", "show"] +} + +inputs = { + task_definition_arn = dependency.task_worker.outputs.task_definition_arn +} diff --git a/infra/live/dependencies/worker_messaging.hcl b/infra/live/dependencies/worker_messaging.hcl new file mode 100644 index 00000000..f0e8638e --- /dev/null +++ b/infra/live/dependencies/worker_messaging.hcl @@ -0,0 +1,33 @@ +dependency "worker_messaging" { + config_path = "${get_original_terragrunt_dir()}/../worker_messaging" + + mock_outputs = { + sns_topic_name = "mock-worker-events" + sns_topic_arn = "arn:aws:sns:eu-west-2:111111111111:mock-worker-events" + lambda_worker_queue_name = "mock-lambda-worker-queue" + lambda_worker_queue_arn = "arn:aws:sqs:eu-west-2:111111111111:mock-lambda-worker-queue" + lambda_worker_queue_url = "https://sqs.eu-west-2.amazonaws.com/111111111111/mock-lambda-worker-queue" + lambda_worker_queue_read_policy_arn = "arn:aws:iam::111111111111:policy/mock-lambda-worker-queue-read" + lambda_worker_dead_letter_queue_name = "mock-lambda-worker-dlq" + lambda_worker_dead_letter_queue_url = "https://sqs.eu-west-2.amazonaws.com/111111111111/mock-lambda-worker-dlq" + ecs_worker_queue_name = "mock-ecs-worker-queue" + ecs_worker_queue_url = "https://sqs.eu-west-2.amazonaws.com/111111111111/mock-ecs-worker-queue" + ecs_worker_queue_read_policy_arn = "arn:aws:iam::111111111111:policy/mock-ecs-worker-queue-read" + } + + mock_outputs_allowed_terraform_commands = ["validate", "plan", "destroy", "init", "show"] +} + +inputs = { + worker_topic_name = dependency.worker_messaging.outputs.sns_topic_name + worker_topic_arn = dependency.worker_messaging.outputs.sns_topic_arn + lambda_worker_queue_name = dependency.worker_messaging.outputs.lambda_worker_queue_name + lambda_worker_queue_arn = dependency.worker_messaging.outputs.lambda_worker_queue_arn + lambda_worker_queue_url = dependency.worker_messaging.outputs.lambda_worker_queue_url + lambda_worker_queue_read_policy_arn = dependency.worker_messaging.outputs.lambda_worker_queue_read_policy_arn + lambda_worker_dead_letter_queue_name = dependency.worker_messaging.outputs.lambda_worker_dead_letter_queue_name + lambda_worker_dead_letter_queue_url = dependency.worker_messaging.outputs.lambda_worker_dead_letter_queue_url + ecs_worker_queue_name = dependency.worker_messaging.outputs.ecs_worker_queue_name + ecs_worker_queue_url = dependency.worker_messaging.outputs.ecs_worker_queue_url + ecs_worker_queue_read_policy_arn = dependency.worker_messaging.outputs.ecs_worker_queue_read_policy_arn +} diff --git a/infra/live/dev/aws/lambda_api/terragrunt.hcl b/infra/live/dev/aws/lambda_api/terragrunt.hcl index 2b12aa0c..9c0a5612 100644 --- a/infra/live/dev/aws/lambda_api/terragrunt.hcl +++ b/infra/live/dev/aws/lambda_api/terragrunt.hcl @@ -2,30 +2,39 @@ include "root" { path = find_in_parent_folders("root.hcl") } -inputs = { - api_5xx_alarm_threshold = 20.0 - api_5xx_alarm_evaluation_periods = 1 - api_5xx_alarm_datapoints_to_alarm = 1 - - deployment_config = { - strategy = "canary" - percentage = 10 - interval_minutes = 3 - } - - provisioned_config = { - auto_scale = { - max = 2 - min = 1 - trigger_percent = 20 - scale_in_cooldown_seconds = 60 - scale_out_cooldown_seconds = 60 - } - - reserved_concurrency = 10 - } +locals { + network_runtime = read_terragrunt_config(find_in_parent_folders("dependencies/network_runtime.hcl")) + worker_messaging = read_terragrunt_config(find_in_parent_folders("dependencies/worker_messaging.hcl")) } terraform { source = "../../../../modules//aws//lambda_api" } + +inputs = merge( + local.network_runtime.inputs, + local.worker_messaging.inputs, + { + api_5xx_alarm_threshold = 20.0 + api_5xx_alarm_evaluation_periods = 1 + api_5xx_alarm_datapoints_to_alarm = 1 + + deployment_config = { + strategy = "canary" + percentage = 10 + interval_minutes = 3 + } + + provisioned_config = { + auto_scale = { + max = 2 + min = 1 + trigger_percent = 20 + scale_in_cooldown_seconds = 60 + scale_out_cooldown_seconds = 60 + } + + reserved_concurrency = 10 + } + }, +) diff --git a/infra/live/dev/aws/lambda_worker/terragrunt.hcl b/infra/live/dev/aws/lambda_worker/terragrunt.hcl index e25cfa13..d33c0637 100644 --- a/infra/live/dev/aws/lambda_worker/terragrunt.hcl +++ b/infra/live/dev/aws/lambda_worker/terragrunt.hcl @@ -2,28 +2,35 @@ include "root" { path = find_in_parent_folders("root.hcl") } -inputs = { - sqs_dlq_alarm_threshold = 1 # fail when any messages are in the DLQ (quick fail for testing) - sqs_dlq_alarm_evaluation_periods = 1 - sqs_dlq_alarm_datapoints_to_alarm = 1 - - deployment_config = { - strategy = "canary" - percentage = 50 - interval_minutes = 3 # this should be > the CloudWatch alarm evaluation period to ensure we catch the alarm if it triggers - } - - provisioned_config = { - sqs_scale = { - min = 1 - max = 5 - visible_messages = 10 - scale_in_cooldown_seconds = 60 - scale_out_cooldown_seconds = 60 - } - } +locals { + worker_messaging = read_terragrunt_config(find_in_parent_folders("dependencies/worker_messaging.hcl")) } terraform { source = "../../../../modules//aws//lambda_worker" } + +inputs = merge( + local.worker_messaging.inputs, + { + sqs_dlq_alarm_threshold = 1 # fail when any messages are in the DLQ (quick fail for testing) + sqs_dlq_alarm_evaluation_periods = 1 + sqs_dlq_alarm_datapoints_to_alarm = 1 + + deployment_config = { + strategy = "canary" + percentage = 50 + interval_minutes = 3 # this should be > the CloudWatch alarm evaluation period to ensure we catch the alarm if it triggers + } + + provisioned_config = { + sqs_scale = { + min = 1 + max = 5 + visible_messages = 10 + scale_in_cooldown_seconds = 60 + scale_out_cooldown_seconds = 60 + } + } + }, +) diff --git a/infra/live/dev/aws/migrations/terragrunt.hcl b/infra/live/dev/aws/migrations/terragrunt.hcl index 305b535f..cf527510 100644 --- a/infra/live/dev/aws/migrations/terragrunt.hcl +++ b/infra/live/dev/aws/migrations/terragrunt.hcl @@ -4,10 +4,11 @@ include "root" { locals { runtime_security = read_terragrunt_config(find_in_parent_folders("dependencies/lambda_runtime_security.hcl")) + database = read_terragrunt_config(find_in_parent_folders("dependencies/database.hcl")) } terraform { source = "../../../../modules//aws//migrations" } -inputs = local.runtime_security.inputs +inputs = merge(local.runtime_security.inputs, local.database.inputs) diff --git a/infra/live/dev/aws/rds_reader_tagger/terragrunt.hcl b/infra/live/dev/aws/rds_reader_tagger/terragrunt.hcl index 6d8d6e98..bc618a77 100644 --- a/infra/live/dev/aws/rds_reader_tagger/terragrunt.hcl +++ b/infra/live/dev/aws/rds_reader_tagger/terragrunt.hcl @@ -2,6 +2,12 @@ include "root" { path = find_in_parent_folders("root.hcl") } +locals { + database = read_terragrunt_config(find_in_parent_folders("dependencies/database.hcl")) +} + terraform { source = "../../../../modules//aws//rds_reader_tagger" } + +inputs = local.database.inputs diff --git a/infra/live/dev/aws/service_api/terragrunt.hcl b/infra/live/dev/aws/service_api/terragrunt.hcl index 961e3092..9bcdd427 100644 --- a/infra/live/dev/aws/service_api/terragrunt.hcl +++ b/infra/live/dev/aws/service_api/terragrunt.hcl @@ -4,10 +4,18 @@ include "root" { locals { runtime_security = read_terragrunt_config(find_in_parent_folders("dependencies/ecs_runtime_security.hcl")) + task_api = read_terragrunt_config(find_in_parent_folders("dependencies/task_api.hcl")) + cluster = read_terragrunt_config(find_in_parent_folders("dependencies/cluster.hcl")) + network_runtime = read_terragrunt_config(find_in_parent_folders("dependencies/network_runtime.hcl")) } terraform { source = "../../../../modules//aws//service_api" } -inputs = local.runtime_security.inputs +inputs = merge( + local.runtime_security.inputs, + local.task_api.inputs, + local.cluster.inputs, + local.network_runtime.inputs, +) diff --git a/infra/live/dev/aws/service_worker/terragrunt.hcl b/infra/live/dev/aws/service_worker/terragrunt.hcl index f44d7e37..bd0fa8c2 100644 --- a/infra/live/dev/aws/service_worker/terragrunt.hcl +++ b/infra/live/dev/aws/service_worker/terragrunt.hcl @@ -4,10 +4,20 @@ include "root" { locals { runtime_security = read_terragrunt_config(find_in_parent_folders("dependencies/ecs_runtime_security.hcl")) + task_worker = read_terragrunt_config(find_in_parent_folders("dependencies/task_worker.hcl")) + worker_messaging = read_terragrunt_config(find_in_parent_folders("dependencies/worker_messaging.hcl")) + cluster = read_terragrunt_config(find_in_parent_folders("dependencies/cluster.hcl")) + network_runtime = read_terragrunt_config(find_in_parent_folders("dependencies/network_runtime.hcl")) } terraform { source = "../../../../modules//aws//service_worker" } -inputs = local.runtime_security.inputs +inputs = merge( + local.runtime_security.inputs, + local.task_worker.inputs, + local.worker_messaging.inputs, + local.cluster.inputs, + local.network_runtime.inputs, +) diff --git a/infra/live/dev/aws/task_worker/terragrunt.hcl b/infra/live/dev/aws/task_worker/terragrunt.hcl index b0a81635..723c7a10 100644 --- a/infra/live/dev/aws/task_worker/terragrunt.hcl +++ b/infra/live/dev/aws/task_worker/terragrunt.hcl @@ -2,6 +2,13 @@ include "root" { path = find_in_parent_folders("root.hcl") } +locals { + worker_messaging = read_terragrunt_config(find_in_parent_folders("dependencies/worker_messaging.hcl")) + database = read_terragrunt_config(find_in_parent_folders("dependencies/database.hcl")) +} + terraform { source = "../../../../modules//aws//task_worker" } + +inputs = merge(local.worker_messaging.inputs, local.database.inputs) diff --git a/infra/live/prod/aws/lambda_api/terragrunt.hcl b/infra/live/prod/aws/lambda_api/terragrunt.hcl index f17ea0ab..9d0d7665 100644 --- a/infra/live/prod/aws/lambda_api/terragrunt.hcl +++ b/infra/live/prod/aws/lambda_api/terragrunt.hcl @@ -2,30 +2,39 @@ include "root" { path = find_in_parent_folders("root.hcl") } -inputs = { - api_5xx_alarm_threshold = 5.0 - api_5xx_alarm_evaluation_periods = 3 - api_5xx_alarm_datapoints_to_alarm = 3 - - deployment_config = { - strategy = "canary" - percentage = 10 - interval_minutes = 5 - } - - provisioned_config = { - auto_scale = { - max = 2 - min = 1 - trigger_percent = 20 - scale_in_cooldown_seconds = 60 - scale_out_cooldown_seconds = 60 - } - - reserved_concurrency = 10 - } +locals { + network_runtime = read_terragrunt_config(find_in_parent_folders("dependencies/network_runtime.hcl")) + worker_messaging = read_terragrunt_config(find_in_parent_folders("dependencies/worker_messaging.hcl")) } terraform { source = "../../../../modules//aws//lambda_api" } + +inputs = merge( + local.network_runtime.inputs, + local.worker_messaging.inputs, + { + api_5xx_alarm_threshold = 5.0 + api_5xx_alarm_evaluation_periods = 3 + api_5xx_alarm_datapoints_to_alarm = 3 + + deployment_config = { + strategy = "canary" + percentage = 10 + interval_minutes = 5 + } + + provisioned_config = { + auto_scale = { + max = 2 + min = 1 + trigger_percent = 20 + scale_in_cooldown_seconds = 60 + scale_out_cooldown_seconds = 60 + } + + reserved_concurrency = 10 + } + }, +) diff --git a/infra/live/prod/aws/lambda_worker/terragrunt.hcl b/infra/live/prod/aws/lambda_worker/terragrunt.hcl index 21cdef71..6d85a43e 100644 --- a/infra/live/prod/aws/lambda_worker/terragrunt.hcl +++ b/infra/live/prod/aws/lambda_worker/terragrunt.hcl @@ -2,28 +2,35 @@ include "root" { path = find_in_parent_folders("root.hcl") } -inputs = { - sqs_dlq_alarm_threshold = 5 # fail when there are 5 messages in the DLQ - sqs_dlq_alarm_evaluation_periods = 3 - sqs_dlq_alarm_datapoints_to_alarm = 3 - - deployment_config = { - strategy = "canary" - percentage = 10 - interval_minutes = 3 # this should be > the CloudWatch alarm evaluation period to ensure we catch the alarm if it triggers - } - - provisioned_config = { - sqs_scale = { - min = 1 - max = 5 - visible_messages = 10 - scale_in_cooldown_seconds = 60 - scale_out_cooldown_seconds = 60 - } - } +locals { + worker_messaging = read_terragrunt_config(find_in_parent_folders("dependencies/worker_messaging.hcl")) } terraform { source = "../../../../modules//aws//lambda_worker" } + +inputs = merge( + local.worker_messaging.inputs, + { + sqs_dlq_alarm_threshold = 5 # fail when there are 5 messages in the DLQ + sqs_dlq_alarm_evaluation_periods = 3 + sqs_dlq_alarm_datapoints_to_alarm = 3 + + deployment_config = { + strategy = "canary" + percentage = 10 + interval_minutes = 3 # this should be > the CloudWatch alarm evaluation period to ensure we catch the alarm if it triggers + } + + provisioned_config = { + sqs_scale = { + min = 1 + max = 5 + visible_messages = 10 + scale_in_cooldown_seconds = 60 + scale_out_cooldown_seconds = 60 + } + } + }, +) diff --git a/infra/live/prod/aws/migrations/terragrunt.hcl b/infra/live/prod/aws/migrations/terragrunt.hcl index 305b535f..cf527510 100644 --- a/infra/live/prod/aws/migrations/terragrunt.hcl +++ b/infra/live/prod/aws/migrations/terragrunt.hcl @@ -4,10 +4,11 @@ include "root" { locals { runtime_security = read_terragrunt_config(find_in_parent_folders("dependencies/lambda_runtime_security.hcl")) + database = read_terragrunt_config(find_in_parent_folders("dependencies/database.hcl")) } terraform { source = "../../../../modules//aws//migrations" } -inputs = local.runtime_security.inputs +inputs = merge(local.runtime_security.inputs, local.database.inputs) diff --git a/infra/live/prod/aws/rds_reader_tagger/terragrunt.hcl b/infra/live/prod/aws/rds_reader_tagger/terragrunt.hcl index 6d8d6e98..bc618a77 100644 --- a/infra/live/prod/aws/rds_reader_tagger/terragrunt.hcl +++ b/infra/live/prod/aws/rds_reader_tagger/terragrunt.hcl @@ -2,6 +2,12 @@ include "root" { path = find_in_parent_folders("root.hcl") } +locals { + database = read_terragrunt_config(find_in_parent_folders("dependencies/database.hcl")) +} + terraform { source = "../../../../modules//aws//rds_reader_tagger" } + +inputs = local.database.inputs diff --git a/infra/live/prod/aws/service_api/terragrunt.hcl b/infra/live/prod/aws/service_api/terragrunt.hcl index 961e3092..9bcdd427 100644 --- a/infra/live/prod/aws/service_api/terragrunt.hcl +++ b/infra/live/prod/aws/service_api/terragrunt.hcl @@ -4,10 +4,18 @@ include "root" { locals { runtime_security = read_terragrunt_config(find_in_parent_folders("dependencies/ecs_runtime_security.hcl")) + task_api = read_terragrunt_config(find_in_parent_folders("dependencies/task_api.hcl")) + cluster = read_terragrunt_config(find_in_parent_folders("dependencies/cluster.hcl")) + network_runtime = read_terragrunt_config(find_in_parent_folders("dependencies/network_runtime.hcl")) } terraform { source = "../../../../modules//aws//service_api" } -inputs = local.runtime_security.inputs +inputs = merge( + local.runtime_security.inputs, + local.task_api.inputs, + local.cluster.inputs, + local.network_runtime.inputs, +) diff --git a/infra/live/prod/aws/service_worker/terragrunt.hcl b/infra/live/prod/aws/service_worker/terragrunt.hcl index f44d7e37..bd0fa8c2 100644 --- a/infra/live/prod/aws/service_worker/terragrunt.hcl +++ b/infra/live/prod/aws/service_worker/terragrunt.hcl @@ -4,10 +4,20 @@ include "root" { locals { runtime_security = read_terragrunt_config(find_in_parent_folders("dependencies/ecs_runtime_security.hcl")) + task_worker = read_terragrunt_config(find_in_parent_folders("dependencies/task_worker.hcl")) + worker_messaging = read_terragrunt_config(find_in_parent_folders("dependencies/worker_messaging.hcl")) + cluster = read_terragrunt_config(find_in_parent_folders("dependencies/cluster.hcl")) + network_runtime = read_terragrunt_config(find_in_parent_folders("dependencies/network_runtime.hcl")) } terraform { source = "../../../../modules//aws//service_worker" } -inputs = local.runtime_security.inputs +inputs = merge( + local.runtime_security.inputs, + local.task_worker.inputs, + local.worker_messaging.inputs, + local.cluster.inputs, + local.network_runtime.inputs, +) diff --git a/infra/live/prod/aws/task_worker/terragrunt.hcl b/infra/live/prod/aws/task_worker/terragrunt.hcl index b0a81635..723c7a10 100644 --- a/infra/live/prod/aws/task_worker/terragrunt.hcl +++ b/infra/live/prod/aws/task_worker/terragrunt.hcl @@ -2,6 +2,13 @@ include "root" { path = find_in_parent_folders("root.hcl") } +locals { + worker_messaging = read_terragrunt_config(find_in_parent_folders("dependencies/worker_messaging.hcl")) + database = read_terragrunt_config(find_in_parent_folders("dependencies/database.hcl")) +} + terraform { source = "../../../../modules//aws//task_worker" } + +inputs = merge(local.worker_messaging.inputs, local.database.inputs) diff --git a/infra/modules/aws/lambda_api/data.tf b/infra/modules/aws/lambda_api/data.tf index 1a52292a..02351f14 100644 --- a/infra/modules/aws/lambda_api/data.tf +++ b/infra/modules/aws/lambda_api/data.tf @@ -1,23 +1,3 @@ -data "terraform_remote_state" "network" { - backend = "s3" - - config = { - bucket = var.state_bucket - key = "${var.environment}/aws/network/terraform.tfstate" - region = var.aws_region - } -} - -data "terraform_remote_state" "worker_messaging" { - backend = "s3" - - config = { - bucket = var.state_bucket - key = "${var.environment}/aws/worker_messaging/terraform.tfstate" - region = var.aws_region - } -} - data "aws_iam_policy_document" "worker_topic_publish" { statement { actions = [ @@ -25,7 +5,7 @@ data "aws_iam_policy_document" "worker_topic_publish" { ] resources = [ - data.terraform_remote_state.worker_messaging.outputs.sns_topic_arn, + var.worker_topic_arn, ] } } diff --git a/infra/modules/aws/lambda_api/main.tf b/infra/modules/aws/lambda_api/main.tf index cbdd57fb..37083e3d 100644 --- a/infra/modules/aws/lambda_api/main.tf +++ b/infra/modules/aws/lambda_api/main.tf @@ -15,8 +15,8 @@ module "lambda_api" { environment_variables = { DEBUG_DELAY_MS = 500 - WORKER_TOPIC_ARN = data.terraform_remote_state.worker_messaging.outputs.sns_topic_arn - WORKER_TOPIC_NAME = data.terraform_remote_state.worker_messaging.outputs.sns_topic_name + WORKER_TOPIC_ARN = var.worker_topic_arn + WORKER_TOPIC_NAME = var.worker_topic_name } additional_policy_arns = [ @@ -33,26 +33,26 @@ module "lambda_api" { } resource "aws_apigatewayv2_integration" "lambda_proxy" { - api_id = data.terraform_remote_state.network.outputs.api_id + api_id = var.network_api_id integration_type = "AWS_PROXY" integration_uri = module.lambda_api.alias_arn payload_format_version = "2.0" } resource "aws_apigatewayv2_route" "root" { - api_id = data.terraform_remote_state.network.outputs.api_id + api_id = var.network_api_id route_key = "ANY /" target = "integrations/${aws_apigatewayv2_integration.lambda_proxy.id}" authorization_type = "JWT" - authorizer_id = data.terraform_remote_state.network.outputs.http_api_authorizer_id + authorizer_id = var.network_http_api_authorizer_id } resource "aws_apigatewayv2_route" "proxy" { - api_id = data.terraform_remote_state.network.outputs.api_id + api_id = var.network_api_id route_key = "ANY /{proxy+}" target = "integrations/${aws_apigatewayv2_integration.lambda_proxy.id}" authorization_type = "JWT" - authorizer_id = data.terraform_remote_state.network.outputs.http_api_authorizer_id + authorizer_id = var.network_http_api_authorizer_id } resource "aws_lambda_permission" "allow_invoke" { @@ -60,7 +60,7 @@ resource "aws_lambda_permission" "allow_invoke" { action = "lambda:InvokeFunction" function_name = module.lambda_api.alias_arn principal = "apigateway.amazonaws.com" - source_arn = "${data.terraform_remote_state.network.outputs.api_execution_arn}/*/*" # all routes/stages + source_arn = "${var.network_api_execution_arn}/*/*" # all routes/stages } resource "aws_cloudwatch_metric_alarm" "api_5xx_rate" { @@ -97,8 +97,8 @@ resource "aws_cloudwatch_metric_alarm" "api_5xx_rate" { period = 60 # most aws metrics are emitted at 1-minute intervals, so using a shorter period can lead to more volatile alarms dimensions = { - ApiId = data.terraform_remote_state.network.outputs.api_id - Stage = data.terraform_remote_state.network.outputs.api_stage_name + ApiId = var.network_api_id + Stage = var.network_api_stage_name } } } @@ -115,8 +115,8 @@ resource "aws_cloudwatch_metric_alarm" "api_5xx_rate" { period = 60 dimensions = { - ApiId = data.terraform_remote_state.network.outputs.api_id - Stage = data.terraform_remote_state.network.outputs.api_stage_name + ApiId = var.network_api_id + Stage = var.network_api_stage_name } } } diff --git a/infra/modules/aws/lambda_api/outputs.tf b/infra/modules/aws/lambda_api/outputs.tf index 770a3861..3be5dfba 100644 --- a/infra/modules/aws/lambda_api/outputs.tf +++ b/infra/modules/aws/lambda_api/outputs.tf @@ -1,13 +1,13 @@ output "invoke_url" { - value = data.terraform_remote_state.network.outputs.api_invoke_url + value = var.network_api_invoke_url } output "api_id" { - value = data.terraform_remote_state.network.outputs.api_id + value = var.network_api_id } output "vpc_link_id" { - value = data.terraform_remote_state.network.outputs.vpc_link_id + value = var.network_vpc_link_id } output "cloudwatch_log_group" { diff --git a/infra/modules/aws/lambda_api/variables.tf b/infra/modules/aws/lambda_api/variables.tf index dba57dda..209139c2 100644 --- a/infra/modules/aws/lambda_api/variables.tf +++ b/infra/modules/aws/lambda_api/variables.tf @@ -11,7 +11,7 @@ variable "environment" { variable "aws_region" { type = string - description = "AWS region used for remote state lookups" + description = "AWS region used for provider resources" } variable "state_bucket" { @@ -79,3 +79,35 @@ variable "vpc_name" { type = string description = "VPC name tag used to look up private subnets for the shared API Gateway VPC link" } + +variable "network_api_id" { + type = string +} + +variable "network_api_invoke_url" { + type = string +} + +variable "network_api_execution_arn" { + type = string +} + +variable "network_api_stage_name" { + type = string +} + +variable "network_vpc_link_id" { + type = string +} + +variable "network_http_api_authorizer_id" { + type = string +} + +variable "worker_topic_arn" { + type = string +} + +variable "worker_topic_name" { + type = string +} diff --git a/infra/modules/aws/lambda_worker/data.tf b/infra/modules/aws/lambda_worker/data.tf deleted file mode 100644 index 5c9e2ae6..00000000 --- a/infra/modules/aws/lambda_worker/data.tf +++ /dev/null @@ -1,9 +0,0 @@ -data "terraform_remote_state" "worker_messaging" { - backend = "s3" - - config = { - bucket = var.state_bucket - key = "${var.environment}/aws/worker_messaging/terraform.tfstate" - region = var.aws_region - } -} diff --git a/infra/modules/aws/lambda_worker/main.tf b/infra/modules/aws/lambda_worker/main.tf index 7d2df03f..a340afa9 100644 --- a/infra/modules/aws/lambda_worker/main.tf +++ b/infra/modules/aws/lambda_worker/main.tf @@ -14,7 +14,7 @@ module "lambda_worker" { } additional_policy_arns = [ - data.terraform_remote_state.worker_messaging.outputs.lambda_worker_queue_read_policy_arn + var.lambda_worker_queue_read_policy_arn ] deployment_config = var.deployment_config @@ -29,7 +29,7 @@ module "lambda_worker" { sqs_scale = merge( var.provisioned_config.sqs_scale, { - queue_name = data.terraform_remote_state.worker_messaging.outputs.lambda_worker_queue_name + queue_name = var.lambda_worker_queue_name } ) } @@ -37,7 +37,7 @@ module "lambda_worker" { } resource "aws_lambda_event_source_mapping" "sqs" { - event_source_arn = data.terraform_remote_state.worker_messaging.outputs.lambda_worker_queue_arn + event_source_arn = var.lambda_worker_queue_arn function_name = module.lambda_worker.function_name batch_size = local.sqs_chunk_size @@ -47,8 +47,8 @@ resource "aws_lambda_event_source_mapping" "sqs" { } resource "aws_cloudwatch_metric_alarm" "dlq_new_messages" { - alarm_name = "${data.terraform_remote_state.worker_messaging.outputs.lambda_worker_dead_letter_queue_name}-new-messages" - alarm_description = "New messages sent to DLQ ${data.terraform_remote_state.worker_messaging.outputs.lambda_worker_dead_letter_queue_name}" + alarm_name = "${var.lambda_worker_dead_letter_queue_name}-new-messages" + alarm_description = "New messages sent to DLQ ${var.lambda_worker_dead_letter_queue_name}" actions_enabled = true namespace = "AWS/SQS" @@ -64,6 +64,6 @@ resource "aws_cloudwatch_metric_alarm" "dlq_new_messages" { treat_missing_data = "notBreaching" dimensions = { - QueueName = data.terraform_remote_state.worker_messaging.outputs.lambda_worker_dead_letter_queue_name + QueueName = var.lambda_worker_dead_letter_queue_name } } diff --git a/infra/modules/aws/lambda_worker/outputs.tf b/infra/modules/aws/lambda_worker/outputs.tf index 9a93388c..aadade6d 100644 --- a/infra/modules/aws/lambda_worker/outputs.tf +++ b/infra/modules/aws/lambda_worker/outputs.tf @@ -15,17 +15,17 @@ output "lambda_alias_name" { } output "sqs_queue_url" { - value = data.terraform_remote_state.worker_messaging.outputs.lambda_worker_queue_url + value = var.lambda_worker_queue_url } output "sqs_queue_name" { - value = data.terraform_remote_state.worker_messaging.outputs.lambda_worker_queue_name + value = var.lambda_worker_queue_name } output "sqs_queue_read_policy_arn" { - value = data.terraform_remote_state.worker_messaging.outputs.lambda_worker_queue_read_policy_arn + value = var.lambda_worker_queue_read_policy_arn } output "dead_letter_queue_url" { - value = data.terraform_remote_state.worker_messaging.outputs.lambda_worker_dead_letter_queue_url + value = var.lambda_worker_dead_letter_queue_url } diff --git a/infra/modules/aws/lambda_worker/variables.tf b/infra/modules/aws/lambda_worker/variables.tf index fa8be37f..d3550685 100644 --- a/infra/modules/aws/lambda_worker/variables.tf +++ b/infra/modules/aws/lambda_worker/variables.tf @@ -11,12 +11,12 @@ variable "environment" { variable "state_bucket" { type = string - description = "Remote state bucket used to read shared stack outputs" + description = "Terraform state bucket" } variable "aws_region" { type = string - description = "AWS region for remote state and provider resources" + description = "AWS region for provider resources" } variable "code_bucket" { @@ -75,3 +75,27 @@ variable "sqs_dlq_alarm_datapoints_to_alarm" { type = number description = "The number of evaluated periods that must be breaching to trigger ALARM" } + +variable "lambda_worker_queue_name" { + type = string +} + +variable "lambda_worker_queue_arn" { + type = string +} + +variable "lambda_worker_queue_url" { + type = string +} + +variable "lambda_worker_queue_read_policy_arn" { + type = string +} + +variable "lambda_worker_dead_letter_queue_name" { + type = string +} + +variable "lambda_worker_dead_letter_queue_url" { + type = string +} diff --git a/infra/modules/aws/migrations/data.tf b/infra/modules/aws/migrations/data.tf index e82a7380..9aa389d5 100644 --- a/infra/modules/aws/migrations/data.tf +++ b/infra/modules/aws/migrations/data.tf @@ -1,13 +1,3 @@ -data "terraform_remote_state" "database" { - backend = "s3" - - config = { - bucket = var.state_bucket - key = "${var.environment}/aws/database/terraform.tfstate" - region = var.aws_region - } -} - data "aws_vpc" "this" { filter { name = "tag:Name" @@ -34,7 +24,7 @@ data "aws_iam_policy_document" "database_secret_read" { ] resources = [ - data.terraform_remote_state.database.outputs.credentials_secret_arn, + var.database_credentials_secret_arn, ] } } diff --git a/infra/modules/aws/migrations/main.tf b/infra/modules/aws/migrations/main.tf index a3ec1ff8..376baf73 100644 --- a/infra/modules/aws/migrations/main.tf +++ b/infra/modules/aws/migrations/main.tf @@ -15,10 +15,10 @@ module "migrations" { lambda_name = local.lambda_name environment_variables = { - DB_HOST = data.terraform_remote_state.database.outputs.readwrite_endpoint - DB_NAME = data.terraform_remote_state.database.outputs.database_name - DB_PORT = tostring(data.terraform_remote_state.database.outputs.database_port) - DB_SECRET_ARN = data.terraform_remote_state.database.outputs.credentials_secret_arn + DB_HOST = var.database_readwrite_endpoint + DB_NAME = var.database_name + DB_PORT = tostring(var.database_port) + DB_SECRET_ARN = var.database_credentials_secret_arn } additional_policy_arns = [ diff --git a/infra/modules/aws/migrations/variables.tf b/infra/modules/aws/migrations/variables.tf index eca02022..75be630a 100644 --- a/infra/modules/aws/migrations/variables.tf +++ b/infra/modules/aws/migrations/variables.tf @@ -30,3 +30,19 @@ variable "vpc_name" { variable "runtime_security_group_id" { type = string } + +variable "database_readwrite_endpoint" { + type = string +} + +variable "database_name" { + type = string +} + +variable "database_port" { + type = number +} + +variable "database_credentials_secret_arn" { + type = string +} diff --git a/infra/modules/aws/rds_reader_tagger/README.md b/infra/modules/aws/rds_reader_tagger/README.md index 6f4c0719..5f23e197 100644 --- a/infra/modules/aws/rds_reader_tagger/README.md +++ b/infra/modules/aws/rds_reader_tagger/README.md @@ -30,7 +30,7 @@ EventBridge-triggered and directly invokable Lambda that syncs cluster tags onto ## Dependency Notes -- reads the shared `database` remote state to get the expected Aurora cluster identifier +- expects the live Terragrunt stack to pass the shared `database` cluster identifier through a `dependency` block - relies on the shared Lambda build and deploy flow for shipping the tagging code - when `rds_reader_tagger` is present in the Lambda deploy matrix, the reusable `deploy.yml` workflow invokes it once after Lambda rollout so existing readers are reconciled too - uses a shortened AWS resource-name prefix (`rds-tag-sync`) so the Lambda, IAM, CodeDeploy, and EventBridge resources stay within AWS name limits while the stack directory remains `rds_reader_tagger` diff --git a/infra/modules/aws/rds_reader_tagger/data.tf b/infra/modules/aws/rds_reader_tagger/data.tf index 941ad6ca..23579557 100644 --- a/infra/modules/aws/rds_reader_tagger/data.tf +++ b/infra/modules/aws/rds_reader_tagger/data.tf @@ -1,13 +1,3 @@ -data "terraform_remote_state" "database" { - backend = "s3" - - config = { - bucket = var.state_bucket - key = "${var.environment}/aws/database/terraform.tfstate" - region = var.aws_region - } -} - data "aws_iam_policy_document" "reader_tag_sync" { statement { actions = [ diff --git a/infra/modules/aws/rds_reader_tagger/main.tf b/infra/modules/aws/rds_reader_tagger/main.tf index fc365f2f..ea68c86a 100644 --- a/infra/modules/aws/rds_reader_tagger/main.tf +++ b/infra/modules/aws/rds_reader_tagger/main.tf @@ -15,7 +15,7 @@ module "rds_reader_tagger" { lambda_name = local.lambda_name environment_variables = { - EXPECTED_CLUSTER_IDENTIFIER = data.terraform_remote_state.database.outputs.cluster_identifier + EXPECTED_CLUSTER_IDENTIFIER = var.database_cluster_identifier } additional_policy_arns = [ diff --git a/infra/modules/aws/rds_reader_tagger/variables.tf b/infra/modules/aws/rds_reader_tagger/variables.tf index f9fcabc7..fdcc7224 100644 --- a/infra/modules/aws/rds_reader_tagger/variables.tf +++ b/infra/modules/aws/rds_reader_tagger/variables.tf @@ -22,3 +22,7 @@ variable "otel_sample_rate" { type = number default = 1.0 } + +variable "database_cluster_identifier" { + type = string +} diff --git a/infra/modules/aws/service_api/README.md b/infra/modules/aws/service_api/README.md index 9bf935c8..73608c66 100644 --- a/infra/modules/aws/service_api/README.md +++ b/infra/modules/aws/service_api/README.md @@ -39,11 +39,11 @@ Concrete ECS API service wrapper for the sample API service. ## Dependency Notes -- reads `task_api` remote state for the task definition -- reads `cluster` and `network` remote state +- expects the live Terragrunt stack to pass the `task_api` task definition through a `dependency` block +- expects the live Terragrunt stack to pass the shared `cluster` and `network` outputs as explicit inputs - expects the live Terragrunt stack to pass the ECS runtime security group id as an explicit input - depends on the `network` stack owning the shared VPC link, ALB listener path, and JWT authorizer inputs -- for bootstrap-friendly plan and validate flows, prefer Terragrunt dependency mocks in the live stack instead of direct `security` remote-state reads in the module +- for bootstrap-friendly plan and validate flows, prefer Terragrunt dependency mocks in the live stack rather than sibling state reads inside the module ## Inherits Behavior From diff --git a/infra/modules/aws/service_api/data.tf b/infra/modules/aws/service_api/data.tf index 965b9c79..93d02b4c 100644 --- a/infra/modules/aws/service_api/data.tf +++ b/infra/modules/aws/service_api/data.tf @@ -1,34 +1,3 @@ -data "terraform_remote_state" "task_api" { - count = var.bootstrap ? 0 : 1 - backend = "s3" - - config = { - bucket = var.state_bucket - key = "${var.environment}/aws/task_api/terraform.tfstate" - region = var.aws_region - } -} - -data "terraform_remote_state" "network" { - backend = "s3" - - config = { - bucket = var.state_bucket - key = "${var.environment}/aws/network/terraform.tfstate" - region = var.aws_region - } -} - -data "terraform_remote_state" "cluster" { - backend = "s3" - - config = { - bucket = var.state_bucket - key = "${var.environment}/aws/cluster/terraform.tfstate" - region = var.aws_region - } -} - data "aws_vpc" "this" { filter { name = "tag:Name" diff --git a/infra/modules/aws/service_api/main.tf b/infra/modules/aws/service_api/main.tf index 5e216fad..a7ad4e89 100644 --- a/infra/modules/aws/service_api/main.tf +++ b/infra/modules/aws/service_api/main.tf @@ -2,7 +2,7 @@ module "service_api" { source = "../_shared/service" service_name = var.service_name - task_definition_arn = var.bootstrap ? "" : data.terraform_remote_state.task_api[0].outputs.task_definition_arn + task_definition_arn = var.bootstrap ? "" : var.task_definition_arn container_port = var.container_port root_path = var.root_path connection_type = var.connection_type @@ -11,22 +11,22 @@ module "service_api" { vpc_id = data.aws_vpc.this.id private_subnet_ids = data.aws_subnets.private.ids - cluster_id = data.terraform_remote_state.cluster.outputs.cluster_id - cluster_name = data.terraform_remote_state.cluster.outputs.cluster_name + cluster_id = var.cluster_id + cluster_name = var.cluster_name ecs_security_group_id = var.ecs_security_group_id - default_target_group_arn = data.terraform_remote_state.network.outputs.default_target_group_arn - load_balancer_arn = data.terraform_remote_state.network.outputs.load_balancer_arn - default_http_listener_arn = data.terraform_remote_state.network.outputs.default_http_listener_arn - load_balancer_arn_suffix = data.terraform_remote_state.network.outputs.load_balancer_arn_suffix - target_group_arn_suffix = data.terraform_remote_state.network.outputs.target_group_arn_suffix + default_target_group_arn = var.network_default_target_group_arn + load_balancer_arn = var.network_load_balancer_arn + default_http_listener_arn = var.network_default_http_listener_arn + load_balancer_arn_suffix = var.network_load_balancer_arn_suffix + target_group_arn_suffix = var.network_target_group_arn_suffix - api_id = data.terraform_remote_state.network.outputs.api_id - vpc_link_id = data.terraform_remote_state.network.outputs.vpc_link_id - internal_invoke_url = data.terraform_remote_state.network.outputs.internal_invoke_url - api_invoke_url = data.terraform_remote_state.network.outputs.api_invoke_url + api_id = var.network_api_id + vpc_link_id = var.network_vpc_link_id + internal_invoke_url = var.network_internal_invoke_url + api_invoke_url = var.network_api_invoke_url authorization_type = "JWT" - authorizer_id = data.terraform_remote_state.network.outputs.http_api_authorizer_id + authorizer_id = var.network_http_api_authorizer_id bootstrap = var.bootstrap bootstrap_image_uri = var.bootstrap_image_uri diff --git a/infra/modules/aws/service_api/outputs.tf b/infra/modules/aws/service_api/outputs.tf index d2be3728..6981573c 100644 --- a/infra/modules/aws/service_api/outputs.tf +++ b/infra/modules/aws/service_api/outputs.tf @@ -3,7 +3,7 @@ output "service_name" { } output "cluster_name" { - value = data.terraform_remote_state.cluster.outputs.cluster_name + value = var.cluster_name } output "codedeploy_app_name" { diff --git a/infra/modules/aws/service_api/variables.tf b/infra/modules/aws/service_api/variables.tf index b2885d71..a3de6e79 100644 --- a/infra/modules/aws/service_api/variables.tf +++ b/infra/modules/aws/service_api/variables.tf @@ -79,3 +79,55 @@ variable "bootstrap_image_uri" { variable "ecs_security_group_id" { type = string } + +variable "task_definition_arn" { + type = string +} + +variable "cluster_id" { + type = string +} + +variable "cluster_name" { + type = string +} + +variable "network_default_target_group_arn" { + type = string +} + +variable "network_load_balancer_arn" { + type = string +} + +variable "network_default_http_listener_arn" { + type = string +} + +variable "network_load_balancer_arn_suffix" { + type = string +} + +variable "network_target_group_arn_suffix" { + type = string +} + +variable "network_api_id" { + type = string +} + +variable "network_vpc_link_id" { + type = string +} + +variable "network_internal_invoke_url" { + type = string +} + +variable "network_api_invoke_url" { + type = string +} + +variable "network_http_api_authorizer_id" { + type = string +} diff --git a/infra/modules/aws/service_worker/README.md b/infra/modules/aws/service_worker/README.md index 03cd7d3c..53e07b98 100644 --- a/infra/modules/aws/service_worker/README.md +++ b/infra/modules/aws/service_worker/README.md @@ -35,12 +35,12 @@ Concrete ECS worker service wrapper. ## Dependency Notes -- reads `task_worker` remote state -- reads `worker_messaging` remote state -- reads `cluster` and `network` remote state +- expects the live Terragrunt stack to pass the `task_worker` task definition through a `dependency` block +- expects the live Terragrunt stack to pass the shared ECS worker queue name through a `dependency` block to drive autoscaling +- expects the live Terragrunt stack to pass the shared `cluster` and `network` outputs as explicit inputs - expects the live Terragrunt stack to pass the ECS runtime security group id as an explicit input - relies on `worker_messaging` owning the queue contract rather than duplicating queue state locally -- for bootstrap-friendly plan and validate flows, prefer Terragrunt dependency mocks in the live stack instead of direct `security` remote-state reads in the module +- for bootstrap-friendly plan and validate flows, prefer Terragrunt dependency mocks in the live stack rather than sibling state reads inside the module It uses the shared ECS worker queue name exported by `worker_messaging` for service autoscaling. During bootstrap applies, it uses placeholder values instead of reading task outputs directly so the bootstrap path does not need a pre-existing task state file. diff --git a/infra/modules/aws/service_worker/data.tf b/infra/modules/aws/service_worker/data.tf index 00448982..93d02b4c 100644 --- a/infra/modules/aws/service_worker/data.tf +++ b/infra/modules/aws/service_worker/data.tf @@ -1,45 +1,3 @@ -data "terraform_remote_state" "task_worker" { - count = var.bootstrap ? 0 : 1 - backend = "s3" - - config = { - bucket = var.state_bucket - key = "${var.environment}/aws/task_worker/terraform.tfstate" - region = var.aws_region - } -} - -data "terraform_remote_state" "worker_messaging" { - count = var.bootstrap ? 0 : 1 - backend = "s3" - - config = { - bucket = var.state_bucket - key = "${var.environment}/aws/worker_messaging/terraform.tfstate" - region = var.aws_region - } -} - -data "terraform_remote_state" "network" { - backend = "s3" - - config = { - bucket = var.state_bucket - key = "${var.environment}/aws/network/terraform.tfstate" - region = var.aws_region - } -} - -data "terraform_remote_state" "cluster" { - backend = "s3" - - config = { - bucket = var.state_bucket - key = "${var.environment}/aws/cluster/terraform.tfstate" - region = var.aws_region - } -} - data "aws_vpc" "this" { filter { name = "tag:Name" diff --git a/infra/modules/aws/service_worker/locals.tf b/infra/modules/aws/service_worker/locals.tf index 7d4d849e..a1c86e06 100644 --- a/infra/modules/aws/service_worker/locals.tf +++ b/infra/modules/aws/service_worker/locals.tf @@ -1,7 +1,4 @@ locals { - task_worker_outputs = var.bootstrap ? null : one(data.terraform_remote_state.task_worker[*].outputs) - worker_messaging_outputs = var.bootstrap ? null : one(data.terraform_remote_state.worker_messaging[*].outputs) - - task_definition_arn = var.bootstrap ? "" : local.task_worker_outputs.task_definition_arn - autoscaling_queue_name = var.bootstrap ? "not_set" : local.worker_messaging_outputs.ecs_worker_queue_name + task_definition_arn = var.bootstrap ? "" : var.task_definition_arn + autoscaling_queue_name = var.bootstrap ? "not_set" : var.ecs_worker_queue_name } diff --git a/infra/modules/aws/service_worker/main.tf b/infra/modules/aws/service_worker/main.tf index 84170267..6aef2011 100644 --- a/infra/modules/aws/service_worker/main.tf +++ b/infra/modules/aws/service_worker/main.tf @@ -11,19 +11,19 @@ module "service_worker" { vpc_id = data.aws_vpc.this.id private_subnet_ids = data.aws_subnets.private.ids - cluster_id = data.terraform_remote_state.cluster.outputs.cluster_id - cluster_name = data.terraform_remote_state.cluster.outputs.cluster_name + cluster_id = var.cluster_id + cluster_name = var.cluster_name ecs_security_group_id = var.ecs_security_group_id - default_target_group_arn = data.terraform_remote_state.network.outputs.default_target_group_arn - default_http_listener_arn = data.terraform_remote_state.network.outputs.default_http_listener_arn - load_balancer_arn_suffix = data.terraform_remote_state.network.outputs.load_balancer_arn_suffix - target_group_arn_suffix = data.terraform_remote_state.network.outputs.target_group_arn_suffix + default_target_group_arn = var.network_default_target_group_arn + default_http_listener_arn = var.network_default_http_listener_arn + load_balancer_arn_suffix = var.network_load_balancer_arn_suffix + target_group_arn_suffix = var.network_target_group_arn_suffix - api_id = data.terraform_remote_state.network.outputs.api_id - vpc_link_id = data.terraform_remote_state.network.outputs.vpc_link_id - internal_invoke_url = data.terraform_remote_state.network.outputs.internal_invoke_url - api_invoke_url = data.terraform_remote_state.network.outputs.api_invoke_url + api_id = var.network_api_id + vpc_link_id = var.network_vpc_link_id + internal_invoke_url = var.network_internal_invoke_url + api_invoke_url = var.network_api_invoke_url bootstrap = var.bootstrap bootstrap_image_uri = var.bootstrap_image_uri diff --git a/infra/modules/aws/service_worker/outputs.tf b/infra/modules/aws/service_worker/outputs.tf index 38a97a9d..67794a66 100644 --- a/infra/modules/aws/service_worker/outputs.tf +++ b/infra/modules/aws/service_worker/outputs.tf @@ -3,7 +3,7 @@ output "service_name" { } output "cluster_name" { - value = data.terraform_remote_state.cluster.outputs.cluster_name + value = var.cluster_name } output "codedeploy_app_name" { diff --git a/infra/modules/aws/service_worker/variables.tf b/infra/modules/aws/service_worker/variables.tf index 7878b66f..871f9098 100644 --- a/infra/modules/aws/service_worker/variables.tf +++ b/infra/modules/aws/service_worker/variables.tf @@ -79,3 +79,51 @@ variable "bootstrap_image_uri" { variable "ecs_security_group_id" { type = string } + +variable "task_definition_arn" { + type = string +} + +variable "ecs_worker_queue_name" { + type = string +} + +variable "cluster_id" { + type = string +} + +variable "cluster_name" { + type = string +} + +variable "network_default_target_group_arn" { + type = string +} + +variable "network_default_http_listener_arn" { + type = string +} + +variable "network_load_balancer_arn_suffix" { + type = string +} + +variable "network_target_group_arn_suffix" { + type = string +} + +variable "network_api_id" { + type = string +} + +variable "network_vpc_link_id" { + type = string +} + +variable "network_internal_invoke_url" { + type = string +} + +variable "network_api_invoke_url" { + type = string +} diff --git a/infra/modules/aws/task_worker/README.md b/infra/modules/aws/task_worker/README.md index e132b56b..a09ed8bd 100644 --- a/infra/modules/aws/task_worker/README.md +++ b/infra/modules/aws/task_worker/README.md @@ -40,8 +40,8 @@ Concrete ECS worker task wrapper. ## Dependency Notes -- reads queue details from `worker_messaging` remote state -- reads database connection details from the shared `database` stack +- expects the live Terragrunt stack to pass queue details from `worker_messaging` through a `dependency` block +- expects the live Terragrunt stack to pass shared `database` connection details as explicit inputs - publishes the task definition consumed by `service_worker` -This module is the image-driven deployment unit for the ECS worker. It reads the ECS worker queue from the `worker_messaging` stack so the task definition and service can consume the same fanout event stream as the Lambda worker, and it reads the shared `database` stack so the worker can persist consumed messages to Aurora PostgreSQL. +This module is the image-driven deployment unit for the ECS worker. It consumes the ECS worker queue contract owned by `worker_messaging` and the shared `database` contract passed in from the live Terragrunt stack so the task definition and service can use the same fanout event stream and Aurora PostgreSQL connection details without the module reading sibling stack state directly. diff --git a/infra/modules/aws/task_worker/data.tf b/infra/modules/aws/task_worker/data.tf index a67310da..4499a1c9 100644 --- a/infra/modules/aws/task_worker/data.tf +++ b/infra/modules/aws/task_worker/data.tf @@ -1,23 +1,3 @@ -data "terraform_remote_state" "worker_messaging" { - backend = "s3" - - config = { - bucket = var.state_bucket - key = "${var.environment}/aws/worker_messaging/terraform.tfstate" - region = var.aws_region - } -} - -data "terraform_remote_state" "database" { - backend = "s3" - - config = { - bucket = var.state_bucket - key = "${var.environment}/aws/database/terraform.tfstate" - region = var.aws_region - } -} - data "aws_iam_policy_document" "database_secret_read" { statement { actions = [ @@ -25,7 +5,7 @@ data "aws_iam_policy_document" "database_secret_read" { ] resources = [ - data.terraform_remote_state.database.outputs.credentials_secret_arn, + var.database_credentials_secret_arn, ] } } diff --git a/infra/modules/aws/task_worker/main.tf b/infra/modules/aws/task_worker/main.tf index 6b964062..04f3073b 100644 --- a/infra/modules/aws/task_worker/main.tf +++ b/infra/modules/aws/task_worker/main.tf @@ -24,23 +24,23 @@ module "task_worker" { additional_env_vars = [ { name = "AWS_SQS_QUEUE_URL" - value = data.terraform_remote_state.worker_messaging.outputs.ecs_worker_queue_url + value = var.ecs_worker_queue_url }, { name = "DB_HOST" - value = data.terraform_remote_state.database.outputs.readwrite_endpoint + value = var.database_readwrite_endpoint }, { name = "DB_NAME" - value = data.terraform_remote_state.database.outputs.database_name + value = var.database_name }, { name = "DB_PORT" - value = tostring(data.terraform_remote_state.database.outputs.database_port) + value = tostring(var.database_port) }, { name = "DB_SECRET_ARN" - value = data.terraform_remote_state.database.outputs.credentials_secret_arn + value = var.database_credentials_secret_arn }, { name = "HEARTBEAT_FILE" @@ -48,7 +48,7 @@ module "task_worker" { } ] additional_runtime_policy_arns = [ - data.terraform_remote_state.worker_messaging.outputs.ecs_worker_queue_read_policy_arn, + var.ecs_worker_queue_read_policy_arn, aws_iam_policy.database_secret_read.arn, ] diff --git a/infra/modules/aws/task_worker/outputs.tf b/infra/modules/aws/task_worker/outputs.tf index 3f353262..0e6dc60a 100644 --- a/infra/modules/aws/task_worker/outputs.tf +++ b/infra/modules/aws/task_worker/outputs.tf @@ -15,13 +15,13 @@ output "service_name" { } output "sqs_queue_name" { - value = data.terraform_remote_state.worker_messaging.outputs.ecs_worker_queue_name + value = var.ecs_worker_queue_name } output "sqs_queue_url" { - value = data.terraform_remote_state.worker_messaging.outputs.ecs_worker_queue_url + value = var.ecs_worker_queue_url } output "sqs_queue_read_policy_arn" { - value = data.terraform_remote_state.worker_messaging.outputs.ecs_worker_queue_read_policy_arn + value = var.ecs_worker_queue_read_policy_arn } diff --git a/infra/modules/aws/task_worker/variables.tf b/infra/modules/aws/task_worker/variables.tf index c4bd09a5..a629b8b1 100644 --- a/infra/modules/aws/task_worker/variables.tf +++ b/infra/modules/aws/task_worker/variables.tf @@ -62,3 +62,31 @@ variable "xray_enabled" { type = bool default = false } + +variable "ecs_worker_queue_name" { + type = string +} + +variable "ecs_worker_queue_url" { + type = string +} + +variable "ecs_worker_queue_read_policy_arn" { + type = string +} + +variable "database_readwrite_endpoint" { + type = string +} + +variable "database_name" { + type = string +} + +variable "database_port" { + type = number +} + +variable "database_credentials_secret_arn" { + type = string +} From 2b19dde800d6a654cbb7a7d0b3115e65ef0d5c94 Mon Sep 17 00:00:00 2001 From: chrispsheehan Date: Thu, 14 May 2026 15:29:38 +0100 Subject: [PATCH 05/34] chore: swap out more remote state datablocks --- .github/workflows/shared_infra.yml | 12 ---------- infra/live/dependencies/database_security.hcl | 13 +++++++++++ infra/live/dev/aws/database/terragrunt.hcl | 21 +++++++++++------ infra/live/prod/aws/database/terragrunt.hcl | 23 ++++++++++++------- 4 files changed, 42 insertions(+), 27 deletions(-) create mode 100644 infra/live/dependencies/database_security.hcl diff --git a/.github/workflows/shared_infra.yml b/.github/workflows/shared_infra.yml index c0fd3f85..88ea29d7 100644 --- a/.github/workflows/shared_infra.yml +++ b/.github/workflows/shared_infra.yml @@ -221,8 +221,6 @@ jobs: security: needs: oidc runs-on: ubuntu-latest - outputs: - postgres_sg: ${{ steps.get_security_outputs.outputs.postgres_sg }} steps: - uses: actions/checkout@v6 with: @@ -240,14 +238,6 @@ jobs: tg_directory: infra/live/${{ inputs.environment }}/aws/security tg_action: ${{ inputs.tg_action }} - - name: Get security outputs - id: get_security_outputs - if: inputs.tg_action != 'destroy' - env: - TG_OUTPUTS: ${{ steps.deploy-security.outputs.tg_outputs }} - run: | - echo "postgres_sg=$(echo "$TG_OUTPUTS" | jq -r '.postgres_sg.value')" >> "$GITHUB_OUTPUT" - database: needs: - oidc @@ -265,8 +255,6 @@ jobs: - name: ${{ env.TG_ACTION_LABEL }} database infra uses: ./.github/actions/terragrunt - env: - TF_VAR_database_security_group_id: ${{ needs.security.outputs.postgres_sg }} with: tg_directory: infra/live/${{ inputs.environment }}/aws/database tg_action: ${{ inputs.tg_action }} diff --git a/infra/live/dependencies/database_security.hcl b/infra/live/dependencies/database_security.hcl new file mode 100644 index 00000000..d78237b1 --- /dev/null +++ b/infra/live/dependencies/database_security.hcl @@ -0,0 +1,13 @@ +dependency "security" { + config_path = "${get_original_terragrunt_dir()}/../security" + + mock_outputs = { + postgres_sg = "sg-00000000000000006" + } + + mock_outputs_allowed_terraform_commands = ["validate", "plan", "destroy", "init", "show"] +} + +inputs = { + database_security_group_id = dependency.security.outputs.postgres_sg +} diff --git a/infra/live/dev/aws/database/terragrunt.hcl b/infra/live/dev/aws/database/terragrunt.hcl index 87094184..6f0238aa 100644 --- a/infra/live/dev/aws/database/terragrunt.hcl +++ b/infra/live/dev/aws/database/terragrunt.hcl @@ -2,15 +2,22 @@ include "root" { path = find_in_parent_folders("root.hcl") } -inputs = { - database_name = "app" - backup_retention_period = 1 - rds_min_capacity = 0.5 - rds_max_capacity = 1.0 - rds_max_reader_count = 0 - performance_insights_enabled = false +locals { + database_security = read_terragrunt_config(find_in_parent_folders("dependencies/database_security.hcl")) } terraform { source = "../../../../modules//aws//database" } + +inputs = merge( + local.database_security.inputs, + { + database_name = "app" + backup_retention_period = 1 + rds_min_capacity = 0.5 + rds_max_capacity = 1.0 + rds_max_reader_count = 0 + performance_insights_enabled = false + }, +) diff --git a/infra/live/prod/aws/database/terragrunt.hcl b/infra/live/prod/aws/database/terragrunt.hcl index 4772e5da..af9ebc4a 100644 --- a/infra/live/prod/aws/database/terragrunt.hcl +++ b/infra/live/prod/aws/database/terragrunt.hcl @@ -2,16 +2,23 @@ include "root" { path = find_in_parent_folders("root.hcl") } -inputs = { - database_name = "app" - backup_retention_period = 7 - rds_min_capacity = 0.5 - rds_max_capacity = 2.0 - rds_max_reader_count = 1 - performance_insights_enabled = true - performance_insights_retention_period = 7 +locals { + database_security = read_terragrunt_config(find_in_parent_folders("dependencies/database_security.hcl")) } terraform { source = "../../../../modules//aws//database" } + +inputs = merge( + local.database_security.inputs, + { + database_name = "app" + backup_retention_period = 7 + rds_min_capacity = 0.5 + rds_max_capacity = 2.0 + rds_max_reader_count = 1 + performance_insights_enabled = true + performance_insights_retention_period = 7 + }, +) From 205c0eae1a0a57820e7c420540f69a2f389ab898 Mon Sep 17 00:00:00 2001 From: chrispsheehan Date: Thu, 14 May 2026 15:33:14 +0100 Subject: [PATCH 06/34] chore: ask why terraform_remote_state used --- REPO_INSTRUCTIONS.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/REPO_INSTRUCTIONS.md b/REPO_INSTRUCTIONS.md index 3c60e78b..aee29f28 100644 --- a/REPO_INSTRUCTIONS.md +++ b/REPO_INSTRUCTIONS.md @@ -41,6 +41,8 @@ These instructions apply to the entire repository. - if CI plan failures are caused by missing upstream state, fix the contract shape first instead of papering over the issue with more direct `terraform_remote_state` reads - when the same Terragrunt dependency wiring or mocks are needed across environments, centralize that shared config under `infra/live/dependencies/` in a capability-scoped helper such as `network.hcl` and have environment stacks read it rather than duplicating the same blocks in `dev`, `prod`, or `ci` - keep this approach visible to users as well: when you introduce or expand this pattern, update the top-level `README.md` so the bootstrap-friendly mock strategy is documented outside agent-only instructions +- if you intentionally add a Terraform `data "terraform_remote_state"` block, add a `# remote_state_reason: ...` comment immediately above it explaining why Terragrunt `dependency` plus `mock_outputs` is not practical for that case +- if you intentionally add a Terraform `data "terraform_remote_state"` block, add a `# remote_state_reason: ...` comment immediately above it explaining why Terragrunt `dependency` plus `mock_outputs` is not practical for that case ## Terragrunt Plan Expectation From 98c71c8b16124c72e856c052281d7f2c4ef5242e Mon Sep 17 00:00:00 2001 From: chrispsheehan Date: Thu, 14 May 2026 15:37:37 +0100 Subject: [PATCH 07/34] fix: rm no longer required init to get outputs --- .github/workflows/shared_infra.yml | 42 -------------------- infra/live/dependencies/frontend_runtime.hcl | 30 ++++++++++++++ infra/live/dev/aws/frontend/terragrunt.hcl | 6 +++ infra/live/prod/aws/frontend/terragrunt.hcl | 6 +++ 4 files changed, 42 insertions(+), 42 deletions(-) create mode 100644 infra/live/dependencies/frontend_runtime.hcl diff --git a/.github/workflows/shared_infra.yml b/.github/workflows/shared_infra.yml index 88ea29d7..84bb0a44 100644 --- a/.github/workflows/shared_infra.yml +++ b/.github/workflows/shared_infra.yml @@ -149,52 +149,10 @@ jobs: role-to-assume: ${{ env.AWS_OIDC_ROLE_ARN }} aws-region: ${{ env.AWS_REGION }} - - name: Get network infra - if: inputs.tg_action != 'apply_plan' - uses: ./.github/actions/terragrunt - id: get-network - with: - tg_directory: infra/live/${{ inputs.environment }}/aws/network - tg_action: init - - - name: Get cognito infra - if: inputs.tg_action != 'apply_plan' - uses: ./.github/actions/terragrunt - id: get-cognito - env: - TF_VAR_domain_name: ${{ env.DOMAIN_NAME }} - with: - tg_directory: infra/live/${{ inputs.environment }}/aws/cognito - tg_action: init - - - name: Get api invoke url - if: inputs.tg_action != 'apply_plan' - id: get_api_vars - env: - TG_OUTPUTS: ${{ steps.get-network.outputs.tg_outputs }} - run: | - echo "invoke_url=$(echo $TG_OUTPUTS | jq -r '.api_invoke_url.value')" >> $GITHUB_OUTPUT - - - name: Get cognito values - if: inputs.tg_action != 'apply_plan' - id: get_cognito_vars - env: - TG_OUTPUTS: ${{ steps.get-cognito.outputs.tg_outputs }} - run: | - echo "user_pool_id=$(echo "$TG_OUTPUTS" | jq -r '.user_pool_id.value')" >> "$GITHUB_OUTPUT" - echo "user_pool_client_id=$(echo "$TG_OUTPUTS" | jq -r '.user_pool_client_id.value')" >> "$GITHUB_OUTPUT" - echo "hosted_ui_url=$(echo "$TG_OUTPUTS" | jq -r '.hosted_ui_url.value')" >> "$GITHUB_OUTPUT" - echo "readonly_group_name=$(echo "$TG_OUTPUTS" | jq -r '.readonly_group_name.value')" >> "$GITHUB_OUTPUT" - - name: ${{ env.TG_ACTION_LABEL }} frontend infra uses: ./.github/actions/terragrunt env: - TF_VAR_api_invoke_url: ${{ inputs.tg_action == 'apply_plan' && '' || steps.get_api_vars.outputs.invoke_url }} TF_VAR_domain_name: ${{ env.DOMAIN_NAME }} - TF_VAR_auth_user_pool_id: ${{ inputs.tg_action == 'apply_plan' && '' || steps.get_cognito_vars.outputs.user_pool_id }} - TF_VAR_auth_user_pool_client_id: ${{ inputs.tg_action == 'apply_plan' && '' || steps.get_cognito_vars.outputs.user_pool_client_id }} - TF_VAR_auth_hosted_ui_url: ${{ inputs.tg_action == 'apply_plan' && '' || steps.get_cognito_vars.outputs.hosted_ui_url }} - TF_VAR_auth_readonly_group_name: ${{ inputs.tg_action == 'apply_plan' && '' || steps.get_cognito_vars.outputs.readonly_group_name }} with: tg_directory: infra/live/${{ inputs.environment }}/aws/frontend tg_action: ${{ inputs.tg_action }} diff --git a/infra/live/dependencies/frontend_runtime.hcl b/infra/live/dependencies/frontend_runtime.hcl new file mode 100644 index 00000000..93b5dcaf --- /dev/null +++ b/infra/live/dependencies/frontend_runtime.hcl @@ -0,0 +1,30 @@ +dependency "network" { + config_path = "${get_original_terragrunt_dir()}/../network" + + mock_outputs = { + api_invoke_url = "https://mockapi123.execute-api.eu-west-2.amazonaws.com" + } + + mock_outputs_allowed_terraform_commands = ["validate", "plan", "destroy", "init", "show"] +} + +dependency "cognito" { + config_path = "${get_original_terragrunt_dir()}/../cognito" + + mock_outputs = { + user_pool_id = "eu-west-2_mock" + user_pool_client_id = "mock-user-pool-client-id" + hosted_ui_url = "https://mock-domain.auth.eu-west-2.amazoncognito.com" + readonly_group_name = "readonly" + } + + mock_outputs_allowed_terraform_commands = ["validate", "plan", "destroy", "init", "show"] +} + +inputs = { + api_invoke_url = dependency.network.outputs.api_invoke_url + auth_user_pool_id = dependency.cognito.outputs.user_pool_id + auth_user_pool_client_id = dependency.cognito.outputs.user_pool_client_id + auth_hosted_ui_url = dependency.cognito.outputs.hosted_ui_url + auth_readonly_group_name = dependency.cognito.outputs.readonly_group_name +} diff --git a/infra/live/dev/aws/frontend/terragrunt.hcl b/infra/live/dev/aws/frontend/terragrunt.hcl index 3086bb63..fd2d5c39 100644 --- a/infra/live/dev/aws/frontend/terragrunt.hcl +++ b/infra/live/dev/aws/frontend/terragrunt.hcl @@ -2,6 +2,12 @@ include "root" { path = find_in_parent_folders("root.hcl") } +locals { + frontend_runtime = read_terragrunt_config(find_in_parent_folders("dependencies/frontend_runtime.hcl")) +} + terraform { source = "../../../../modules//aws//frontend" } + +inputs = local.frontend_runtime.inputs diff --git a/infra/live/prod/aws/frontend/terragrunt.hcl b/infra/live/prod/aws/frontend/terragrunt.hcl index 3086bb63..fd2d5c39 100644 --- a/infra/live/prod/aws/frontend/terragrunt.hcl +++ b/infra/live/prod/aws/frontend/terragrunt.hcl @@ -2,6 +2,12 @@ include "root" { path = find_in_parent_folders("root.hcl") } +locals { + frontend_runtime = read_terragrunt_config(find_in_parent_folders("dependencies/frontend_runtime.hcl")) +} + terraform { source = "../../../../modules//aws//frontend" } + +inputs = local.frontend_runtime.inputs From a0c206abe690d88d3850676bee79e69a3926ba02 Mon Sep 17 00:00:00 2001 From: chrispsheehan Date: Thu, 14 May 2026 15:58:06 +0100 Subject: [PATCH 08/34] docs: nots on plan artifacts with mocked inputs --- REPO_INSTRUCTIONS.md | 2 ++ infra/README.md | 2 ++ 2 files changed, 4 insertions(+) diff --git a/REPO_INSTRUCTIONS.md b/REPO_INSTRUCTIONS.md index aee29f28..52c094a4 100644 --- a/REPO_INSTRUCTIONS.md +++ b/REPO_INSTRUCTIONS.md @@ -49,6 +49,8 @@ These instructions apply to the entire repository. - when a change touches `*.hcl`, Terraform modules, live Terragrunt stacks, or downstream dependencies that can affect Terraform evaluation or plan output, run the relevant `just tg plan` command before closing the task when feasible - choose the smallest relevant plan surface rather than defaulting to `run-all`; for example, plan only the affected `dev`, `ci`, or `prod` stack(s) - when shared modules or remote-state contracts change, consider the downstream consumer stacks too and run plans for the affected dependents, not just the module wrapper you edited +- treat saved plans as apply-intent artifacts, not as general previews: only keep a `plan` you expect to apply, because Terraform reuses the exact planned variable values during `apply_plan` +- be especially careful on first deploys or bootstrap-sensitive stacks that use Terragrunt `mock_outputs` for planability; if a saved plan captured mock values, discard it and create a fresh plan after the upstream real outputs exist - if a plan is not feasible because credentials, network, permissions, or state access are unavailable, say that explicitly in the final response and name the plan command that should be run manually ## High-Signal Edit Warnings diff --git a/infra/README.md b/infra/README.md index 440611b9..d632a168 100644 --- a/infra/README.md +++ b/infra/README.md @@ -112,6 +112,8 @@ That `containers/lib` directory is helper code only and is not treated as a depl - prefer `dependency` blocks for all cross-stack communication - use `mock_outputs` for dependencies during `plan`, `validate`, and other non-apply commands to allow independent iteration without requiring upstream stacks to be deployed - restrict mocks using `mock_outputs_allowed_terraform_commands` to ensure real outputs are always used during `apply` +- treat saved `plan` artifacts as apply-intent only: Terraform will reuse the exact variable values captured in the plan file during `apply_plan` +- for first deploys or other bootstrap-sensitive stacks, do not reuse a saved plan that captured `mock_outputs`; re-plan after the upstream real outputs exist before running `apply_plan` ### When to Use Remote State From 85d93c980abd1d02ff93e3767995561d2fd17df9 Mon Sep 17 00:00:00 2001 From: chrispsheehan Date: Thu, 14 May 2026 15:59:07 +0100 Subject: [PATCH 09/34] fix: don't allow mocked plan values --- .github/actions/terragrunt/action.yml | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/.github/actions/terragrunt/action.yml b/.github/actions/terragrunt/action.yml index da37b6f8..bfd46349 100644 --- a/.github/actions/terragrunt/action.yml +++ b/.github/actions/terragrunt/action.yml @@ -109,6 +109,7 @@ runs: working-directory: ${{ inputs.tg_directory }} run: | PLAN_PATH="$(pwd)/terragrunt.tfplan" + PLAN_LOG_PATH="$(pwd)/terragrunt.plan.log" case "${{ inputs.tg_action }}" in apply) @@ -116,14 +117,19 @@ runs: ;; plan) set +e - terragrunt plan -input=false -lock=false -detailed-exitcode -compact-warnings -out="$PLAN_PATH" -var-file=override_tg_vars.tfvars.json - plan_exit_code=$? + terragrunt plan -input=false -lock=false -detailed-exitcode -compact-warnings -out="$PLAN_PATH" -var-file=override_tg_vars.tfvars.json 2>&1 | tee "$PLAN_LOG_PATH" + plan_exit_code=${PIPESTATUS[0]} set -e if [ "$plan_exit_code" -eq 1 ]; then exit 1 fi + if grep -Fq "mock outputs provided and returning those in dependency output" "$PLAN_LOG_PATH"; then + echo "::error title=Mock outputs used during plan::Terragrunt used dependency mock outputs while creating a saved plan. Discard this plan and re-run after the upstream real outputs exist." + exit 1 + fi + echo "plan_exit_code=$plan_exit_code" >> "$GITHUB_OUTPUT" echo "Terragrunt binary plan path: $PLAN_PATH" ls -l "$PLAN_PATH" From c0f5238a96d99b7672771d6fabab1d00f7e5454d Mon Sep 17 00:00:00 2001 From: chrispsheehan Date: Thu, 14 May 2026 16:25:05 +0100 Subject: [PATCH 10/34] fix: guard for mocked outputs in plan file --- .github/actions/terragrunt/action.yml | 10 ++++++++-- justfile.tg | 9 ++++++++- 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/.github/actions/terragrunt/action.yml b/.github/actions/terragrunt/action.yml index bfd46349..1efab0f9 100644 --- a/.github/actions/terragrunt/action.yml +++ b/.github/actions/terragrunt/action.yml @@ -96,6 +96,9 @@ runs: if [ "$(jq -r '.has_changes' "$PLAN_META_PATH")" != "true" ]; then echo "::warning title=Empty saved plan::Saved plan '$PLAN_PATH' contains no mutating resource changes. Skipping apply." echo "should_apply=false" >> "$GITHUB_OUTPUT" + elif [ "$(jq -r '.contains_mocked_outputs // false' "$PLAN_META_PATH")" = "true" ]; then + echo "::error title=Saved plan contains mocked outputs::Saved plan '$PLAN_PATH' was created while Terragrunt dependency mock outputs were in use. Regenerate the plan after upstream real outputs exist." + exit 1 else echo "should_apply=true" >> "$GITHUB_OUTPUT" fi @@ -125,12 +128,14 @@ runs: exit 1 fi + plan_contains_mocked_outputs=false if grep -Fq "mock outputs provided and returning those in dependency output" "$PLAN_LOG_PATH"; then - echo "::error title=Mock outputs used during plan::Terragrunt used dependency mock outputs while creating a saved plan. Discard this plan and re-run after the upstream real outputs exist." - exit 1 + plan_contains_mocked_outputs=true + echo "::warning title=Mock outputs used during plan::Terragrunt used dependency mock outputs while creating a saved plan. This plan artifact should not be used with apply_plan until a fresh plan is created from real upstream outputs." fi echo "plan_exit_code=$plan_exit_code" >> "$GITHUB_OUTPUT" + echo "plan_contains_mocked_outputs=$plan_contains_mocked_outputs" >> "$GITHUB_OUTPUT" echo "Terragrunt binary plan path: $PLAN_PATH" ls -l "$PLAN_PATH" @@ -157,6 +162,7 @@ runs: env: TG_DIRECTORY: ${{ inputs.tg_directory }} TG_PLAN_EXIT_CODE: ${{ steps.terragrunt_action.outputs.plan_exit_code }} + TG_PLAN_CONTAINS_MOCKED_OUTPUTS: ${{ steps.terragrunt_action.outputs.plan_contains_mocked_outputs }} with: justfile_path: justfile.tg just_action: terragrunt-plan-render diff --git a/justfile.tg b/justfile.tg index 9491885b..85f050aa 100644 --- a/justfile.tg +++ b/justfile.tg @@ -14,6 +14,7 @@ PLAN_META_FILE := "terragrunt.plan.meta.json" # Expected environment variables: # - TG_DIRECTORY: directory containing the saved plan file # - TG_PLAN_EXIT_CODE: detailed-exitcode from `terragrunt plan` (0 or 2) +# - TG_PLAN_CONTAINS_MOCKED_OUTPUTS: whether Terragrunt reported dependency mock outputs during plan terragrunt-plan-render: #!/usr/bin/env bash set -euo pipefail @@ -28,6 +29,11 @@ terragrunt-plan-render: exit 1 fi + if [[ -z "${TG_PLAN_CONTAINS_MOCKED_OUTPUTS:-}" ]]; then + echo "❌ TG_PLAN_CONTAINS_MOCKED_OUTPUTS environment variable is not set." + exit 1 + fi + cd "$TG_DIRECTORY" PLAN_PATH="$(pwd)/{{PLAN_FILE}}" @@ -45,7 +51,8 @@ terragrunt-plan-render: --arg tg_directory "$TG_DIRECTORY" \ --argjson exit_code "$TG_PLAN_EXIT_CODE" \ --argjson has_changes "$([ "$TG_PLAN_EXIT_CODE" -eq 2 ] && echo true || echo false)" \ - '{tg_directory: $tg_directory, exit_code: $exit_code, has_changes: $has_changes}' \ + --argjson contains_mocked_outputs "$TG_PLAN_CONTAINS_MOCKED_OUTPUTS" \ + '{tg_directory: $tg_directory, exit_code: $exit_code, has_changes: $has_changes, contains_mocked_outputs: $contains_mocked_outputs}' \ > "$PLAN_META_PATH" echo "Terragrunt binary plan path: $PLAN_PATH" From 31afbca8dd99bcc9b5d7a1def2ba9b9c3e68234f Mon Sep 17 00:00:00 2001 From: chrispsheehan Date: Thu, 14 May 2026 16:38:00 +0100 Subject: [PATCH 11/34] fix Deprecated Parameter warning --- .github/actions/terragrunt/action.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/actions/terragrunt/action.yml b/.github/actions/terragrunt/action.yml index 1efab0f9..9e1d9f66 100644 --- a/.github/actions/terragrunt/action.yml +++ b/.github/actions/terragrunt/action.yml @@ -141,7 +141,7 @@ runs: ;; apply_plan) - terragrunt apply -auto-approve -compact-warnings "$PLAN_PATH" + terragrunt apply -auto-approve "$PLAN_PATH" ;; destroy) terragrunt destroy -auto-approve -compact-warnings -var-file=override_tg_vars.tfvars.json From 4cf8b304e8d76bf840f82b6dc07534a85bc9cbf5 Mon Sep 17 00:00:00 2001 From: chrispsheehan Date: Fri, 15 May 2026 09:33:19 +0100 Subject: [PATCH 12/34] chore: fail on non-existant plan id --- .github/workflows/shared_infra_apply_from_plan.yml | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/.github/workflows/shared_infra_apply_from_plan.yml b/.github/workflows/shared_infra_apply_from_plan.yml index eca062ca..078362e4 100644 --- a/.github/workflows/shared_infra_apply_from_plan.yml +++ b/.github/workflows/shared_infra_apply_from_plan.yml @@ -66,6 +66,18 @@ jobs: justfile_path: justfile.tg just_action: terragrunt-plan-base-s3-prefix + - name: Check requested plan artifact run id exists + shell: bash + env: + PLAN_ARTIFACT_S3_PREFIX: ${{ steps.plan_artifact_s3_prefix.outputs.just_outputs }} + PLAN_ARTIFACT_RUN_ID: ${{ inputs.plan_artifact_run_id }} + run: | + metadata_object="${PLAN_ARTIFACT_S3_PREFIX}/infra-plan-metadata/plan-metadata.json" + if ! aws s3 ls "$metadata_object" >/dev/null 2>&1; then + echo "::error title=Plan artifact run id not found::No saved plan metadata was found for plan_artifact_run_id '${PLAN_ARTIFACT_RUN_ID}' at '${metadata_object}'. Check the run id and make sure that earlier plan workflow completed and uploaded artifacts." + exit 1 + fi + - name: Download plan metadata from S3 uses: ./.github/actions/just env: From 7efb1f16260fabc031b475a8c6657e0baf75efa7 Mon Sep 17 00:00:00 2001 From: chrispsheehan Date: Fri, 15 May 2026 14:46:15 +0100 Subject: [PATCH 13/34] chore: mv upload/download to hcl --- .github/actions/terragrunt/README.md | 21 ++- .github/actions/terragrunt/action.yml | 76 +------- .github/docs/README.md | 20 +- .github/workflows/shared_infra.yml | 7 +- .../shared_infra_apply_from_plan.yml | 61 +----- .github/workflows/shared_infra_plan.yml | 32 +--- REPO_INSTRUCTIONS.md | 2 +- infra/README.md | 10 +- infra/root.hcl | 37 ++++ infra/scripts/ensure-artifact-bucket.sh | 27 +++ infra/scripts/handle-plan-artifact.sh | 67 +++++++ justfile.ci | 30 --- justfile.tg | 174 ------------------ 13 files changed, 179 insertions(+), 385 deletions(-) create mode 100644 infra/scripts/ensure-artifact-bucket.sh create mode 100644 infra/scripts/handle-plan-artifact.sh delete mode 100644 justfile.tg diff --git a/.github/actions/terragrunt/README.md b/.github/actions/terragrunt/README.md index ff59465e..56c33109 100644 --- a/.github/actions/terragrunt/README.md +++ b/.github/actions/terragrunt/README.md @@ -10,7 +10,7 @@ This GitHub Action sets up **Terraform** and **Terragrunt** and runs a specified - Optionally passes Terragrunt variables via JSON tfvars - Supports `plan` mode for producing local saved plan files - Supports `init` mode for outputs-only reads -- Uses the repo-local `./.github/actions/just` action for saved plan artifact upload and download +- Relies on shared Terragrunt root hooks for per-stack saved plan artifact upload and download - Exports Terragrunt outputs as compact JSON when state exists The Terragrunt install step is kept in this repo-local action rather than hidden behind a third-party Terragrunt wrapper action so the repo can control the exact setup-action revision and react quickly to GitHub Actions runtime deprecations or nested dependency warnings. @@ -38,9 +38,9 @@ The Terragrunt install step is kept in this repo-local action rather than hidden - `apply` Runs `terragrunt apply -auto-approve` - `plan` - Runs `terragrunt plan -detailed-exitcode -out=/terragrunt.tfplan`, then renders `terragrunt.plan.txt` and writes `terragrunt.plan.meta.json` via the repo `justfile.tg` recipe `terragrunt-plan-render`. It then uploads those files to S3 through the repo-local `./.github/actions/just` action using the AWS credentials already configured in the job. + Runs `terragrunt plan -detailed-exitcode -out=/terragrunt.tfplan`. The shared Terragrunt root `after_hook` then renders `terragrunt.plan.txt`, writes `terragrunt.plan.meta.json`, and uploads the per-stack plan bundle to the derived plan bucket when `TG_ENABLE_PLAN_ARTIFACTS=true` and `PLAN_RUN_ID` is set. - `apply_plan` - Downloads the saved plan files into `tg_directory` via the repo-local `./.github/actions/just` action and `justfile.tg`, using the caller-provided `PLAN_ARTIFACT_S3_PREFIX` environment variable plus the stack-derived suffix from `tg_directory`. It then fails if the binary plan file or `terragrunt.plan.meta.json` is missing, reads `has_changes` from the saved metadata file, and skips apply with a GitHub Actions warning when the saved plan contains no mutating resource changes. Otherwise it runs `terragrunt apply` against the absolute stack-path plan file. + Runs `terragrunt apply` against the absolute stack-path plan file. The shared Terragrunt root `before_hook` downloads the saved plan bundle into `tg_directory` when `TG_ENABLE_PLAN_ARTIFACTS=true` and `PLAN_RUN_ID` is set, and fails early if the saved metadata reports mocked dependency outputs. - `destroy` Runs `terragrunt destroy -auto-approve` - `init` @@ -48,16 +48,17 @@ The Terragrunt install step is kept in this repo-local action rather than hidden ## Saved Plan Layout -- One run-level metadata file is stored separately by the shared infra wrapper at: - - `/infra-plan-metadata/plan-metadata.json` +- One run-level metadata file is stored separately by the shared infra wrapper as a GitHub Actions artifact: + - artifact name: `infra-plan-metadata` + - file: `plan-metadata.json` - Each Terragrunt stack or module stores its own plan bundle at: - - `/terragrunt-plan-/terragrunt.tfplan` - - `/terragrunt-plan-/terragrunt.plan.txt` - - `/terragrunt-plan-/terragrunt.plan.meta.json` + - `s3:///terragrunt_plan///terragrunt-plan-/terragrunt.tfplan` + - `s3:///terragrunt_plan///terragrunt-plan-/terragrunt.plan.txt` + - `s3:///terragrunt_plan///terragrunt-plan-/terragrunt.plan.meta.json` ## AWS Credentials -Configure AWS credentials in the workflow job before calling this action. The action then reuses those ambient credentials for Terragrunt itself and for any saved-plan upload or download steps. +Configure AWS credentials in the workflow job before calling this action. The action then reuses those ambient credentials for Terragrunt itself and for any Terragrunt-hook-driven saved-plan upload or download steps. ## Usage @@ -164,4 +165,4 @@ jobs: tg_action: apply_plan ``` -This action expects the workflow to download `terragrunt.tfplan`, `terragrunt.plan.txt`, and `terragrunt.plan.meta.json` into `tg_directory` before calling `tg_action: apply_plan`. +This action expects the workflow to set both `TG_ENABLE_PLAN_ARTIFACTS=true` and `PLAN_RUN_ID` when using cross-run saved plans so the shared Terragrunt root hooks can resolve the per-stack plan bundle location from the derived plan bucket and environment. diff --git a/.github/actions/terragrunt/action.yml b/.github/actions/terragrunt/action.yml index 9e1d9f66..a8188b7b 100644 --- a/.github/actions/terragrunt/action.yml +++ b/.github/actions/terragrunt/action.yml @@ -28,7 +28,7 @@ inputs: outputs: tg_outputs: description: "All Terraform outputs in JSON format" - value: ${{ steps.tg_outputs.outputs.terraform_json || steps.tg_outputs_skip.outputs.terraform_json }} + value: ${{ steps.tg_outputs.outputs.terraform_json }} runs: using: "composite" @@ -61,54 +61,12 @@ runs: run: | echo "$OVERRIDE_TG_VARS" | jq -c . > ${{ inputs.tg_directory }}/override_tg_vars.tfvars.json - - name: Download saved plan artifacts - if: inputs.tg_action == 'apply_plan' - uses: ./.github/actions/just - env: - TG_DIRECTORY: ${{ inputs.tg_directory }} - with: - aws_region: ${{ inputs.aws_region }} - justfile_path: justfile.tg - just_action: terragrunt-plan-download - - - name: Verify plan artifact files exist - if: inputs.tg_action == 'apply_plan' - shell: bash - run: | - test -f "${{ inputs.tg_directory }}/terragrunt.tfplan" || { - echo "Expected plan file '${{ inputs.tg_directory }}/terragrunt.tfplan' was not found before apply_plan." >&2 - exit 1 - } - test -f "${{ inputs.tg_directory }}/terragrunt.plan.meta.json" || { - echo "Expected plan metadata file '${{ inputs.tg_directory }}/terragrunt.plan.meta.json' was not found before apply_plan." >&2 - exit 1 - } - - - name: Check saved plan for mutating changes - if: inputs.tg_action == 'apply_plan' - id: apply_plan_guard - shell: bash - working-directory: ${{ inputs.tg_directory }} - run: | - PLAN_PATH="$(pwd)/terragrunt.tfplan" - PLAN_META_PATH="$(pwd)/terragrunt.plan.meta.json" - - if [ "$(jq -r '.has_changes' "$PLAN_META_PATH")" != "true" ]; then - echo "::warning title=Empty saved plan::Saved plan '$PLAN_PATH' contains no mutating resource changes. Skipping apply." - echo "should_apply=false" >> "$GITHUB_OUTPUT" - elif [ "$(jq -r '.contains_mocked_outputs // false' "$PLAN_META_PATH")" = "true" ]; then - echo "::error title=Saved plan contains mocked outputs::Saved plan '$PLAN_PATH' was created while Terragrunt dependency mock outputs were in use. Regenerate the plan after upstream real outputs exist." - exit 1 - else - echo "should_apply=true" >> "$GITHUB_OUTPUT" - fi - - name: Action Terragrunt - if: inputs.tg_action != 'apply_plan' || steps.apply_plan_guard.outputs.should_apply == 'true' id: terragrunt_action shell: bash env: TF_IN_AUTOMATION: true + TG_PLAN_LOG_FILENAME: terragrunt.plan.log working-directory: ${{ inputs.tg_directory }} run: | PLAN_PATH="$(pwd)/terragrunt.tfplan" @@ -156,29 +114,8 @@ runs: ;; esac - - name: Render plan sidecar artifacts - if: inputs.tg_action == 'plan' - uses: ./.github/actions/just - env: - TG_DIRECTORY: ${{ inputs.tg_directory }} - TG_PLAN_EXIT_CODE: ${{ steps.terragrunt_action.outputs.plan_exit_code }} - TG_PLAN_CONTAINS_MOCKED_OUTPUTS: ${{ steps.terragrunt_action.outputs.plan_contains_mocked_outputs }} - with: - justfile_path: justfile.tg - just_action: terragrunt-plan-render - - - name: Upload saved plan artifacts - if: inputs.tg_action == 'plan' - uses: ./.github/actions/just - env: - TG_DIRECTORY: ${{ inputs.tg_directory }} - with: - aws_region: ${{ inputs.aws_region }} - justfile_path: justfile.tg - just_action: terragrunt-plan-upload - - name: Capture Terraform Outputs - if: inputs.tg_action != 'destroy' && (inputs.tg_action != 'apply_plan' || steps.apply_plan_guard.outputs.should_apply == 'true') + if: inputs.tg_action != 'destroy' id: tg_outputs shell: bash working-directory: ${{ inputs.tg_directory }} @@ -192,10 +129,3 @@ runs: fi echo "terraform_json=$TERRAGRUNT_OUTPUTS" >> "$GITHUB_OUTPUT" echo "✅ Terraform outputs captured successfully." - - - name: Capture Terraform Outputs For Skipped Apply Plan - if: inputs.tg_action == 'apply_plan' && steps.apply_plan_guard.outputs.should_apply != 'true' - id: tg_outputs_skip - shell: bash - run: | - echo "terraform_json={}" >> "$GITHUB_OUTPUT" diff --git a/.github/docs/README.md b/.github/docs/README.md index 563cdc8f..3d7402de 100644 --- a/.github/docs/README.md +++ b/.github/docs/README.md @@ -82,13 +82,13 @@ flowchart LR ### Infra And Code Rollout - `shared_infra_plan.yml` - Plan wrapper around `shared_infra.yml`. It takes resolved workflow inputs directly, derives a single plan-artifact S3 prefix via `justfile.tg`, uploads one run-level metadata file under `/infra-plan-metadata/plan-metadata.json` via `justfile.ci`, and then calls `shared_infra.yml` with `tg_action: plan` plus that same resolved prefix. The bucket resolution follows the same artifact split as ECR and build outputs: `dev` uses the `dev` code bucket, while non-`dev` environments reuse the `ci` code bucket. After the plan completes, it prints the current workflow `github.run_id` into both the logs and the GitHub Actions step summary as `plan_artifact_run_id`, and exposes that value as a reusable-workflow output. + Plan wrapper around `shared_infra.yml`. It takes resolved workflow inputs directly, uploads one run-level `plan-metadata.json` file as a GitHub Actions artifact named `infra-plan-metadata`, and then calls `shared_infra.yml` with `tg_action: plan` plus `plan_run_id: ${{ github.run_id }}`. After the plan completes, it prints the current workflow `github.run_id` into both the logs and the GitHub Actions step summary as `plan_artifact_run_id`, and exposes that value as a reusable-workflow output. - `shared_infra_apply.yml` Direct-input apply wrapper around `shared_infra.yml`. It takes resolved workflow inputs directly and calls `shared_infra.yml` with `tg_action: apply`. - `shared_infra_apply_from_plan.yml` - Apply-from-plan wrapper around `shared_infra.yml`. It takes `plan_artifact_run_id`, resolves the same artifact bucket split used by release artifacts (`dev` stays on `dev`, non-`dev` uses `ci`) inside its `metadata` job, configures artifact-account AWS credentials once for that job, derives the matching plan-artifact S3 prefix via `justfile.tg`, downloads `infra-plan-metadata` from that location via `justfile.ci`, reads the frozen graph inputs back out, and then calls `shared_infra.yml` with `tg_action: apply_plan` plus that same resolved prefix. + Apply-from-plan wrapper around `shared_infra.yml`. It takes `plan_artifact_run_id`, downloads the `infra-plan-metadata` GitHub artifact from that earlier workflow run, reads the frozen graph inputs back out, and then calls `shared_infra.yml` with `tg_action: apply_plan` plus `plan_run_id: `. Per-stack plan bundle download still happens inside the shared Terragrunt root `before_hook`. - `shared_infra.yml` - Pure ordered infra graph executor. It applies shared stacks first, then runtime stacks, then frontend infrastructure. Shared stacks now include the CloudWatch observability dashboard. It accepts `tg_action` so the same graph can run a normal apply, upload derived per-stack plan artifacts to the resolved code bucket under `terragrunt_plan/`, or apply from previously uploaded plan artifacts. The wrapper workflows resolve one `plan_artifact_s3_prefix` and set it in the workflow env once, while each Terragrunt job configures AWS credentials at job start and then reuses that ambient session in the repo-local Terragrunt action. That means each infra run has one shared `plan-metadata.json` file for the whole graph and one separate saved plan bundle per Terragrunt stack or module. In `apply_plan` mode, each stack job first downloads its own saved plan files via `justfile.tg` through the Terragrunt action, then the Terragrunt action validates and applies those local files. Its visible step labels now follow the high-level operation, so both direct apply and apply-from-plan render as `Apply` while plan still renders as `Plan`. Bootstrap-sensitive edges such as `security -> network` should be modeled with Terragrunt `dependency` blocks plus constrained `mock_outputs` in the live stack so `plan` and `validate` can run before upstream state exists, while `apply` still resolves real outputs. + Pure ordered infra graph executor. It applies shared stacks first, then runtime stacks, then frontend infrastructure. Shared stacks now include the CloudWatch observability dashboard. It accepts `tg_action` so the same graph can run a normal apply, upload derived per-stack plan artifacts to the dedicated plan bucket under `terragrunt_plan/`, or apply from previously uploaded plan artifacts. The wrapper workflows now pass a single `plan_run_id`, while each Terragrunt job configures AWS credentials at job start and then reuses that ambient session in the repo-local Terragrunt action. That means each infra run has one shared run-level metadata artifact (`infra-plan-metadata`) for the whole graph and one separate saved plan bundle per Terragrunt stack or module. Saved-plan transfer is opt-in: the shared workflow sets `TG_ENABLE_PLAN_ARTIFACTS=true` only for `plan` and `apply_plan`. In `plan` mode, the shared Terragrunt root `after_hook` renders and uploads each per-stack plan bundle. In `apply_plan` mode, the shared Terragrunt root `before_hook` downloads the saved plan bundle before `terragrunt apply` runs and fails if the saved metadata says mocked outputs were used. Its visible step labels now follow the high-level operation, so both direct apply and apply-from-plan render as `Apply` while plan still renders as `Plan`. Bootstrap-sensitive edges such as `security -> network` should be modeled with Terragrunt `dependency` blocks plus constrained `mock_outputs` in the live stack so `plan` and `validate` can run before upstream state exists, while `apply` still resolves real outputs. - The shared infra wrappers must forward the permissions required by the nested reusable call chain. In practice that means `id-token: write` everywhere the Terragrunt action may assume AWS OIDC and `contents: read` for checkout. The shared plan/apply wrappers now rely on AWS access to the shared code bucket rather than GitHub artifact permissions for cross-run recovery. - `shared_deploy.yml` Rolls out Lambda code, optional migrations, optional reconciliation Lambdas, ECS task and service updates, and optional frontend deploys. Its multi-step AWS jobs now configure credentials once at job start and let the local `just` and Terragrunt actions reuse that ambient session. The reusable workflow renders its Lambda and ECS CodeDeploy AppSpec files from the shared templates under `config/deploy/`, and its mutating `just` steps should target `justfile.deploy` rather than the repo-root `justfile`. @@ -142,18 +142,17 @@ Run these checks on every CI, workflow, or deploy-contract change. - compare every caller `with:` block against the callee `workflow_call.inputs` - compare expected outputs against actual `jobs..outputs.*` - verify optional inputs are intentionally omitted, not accidentally missing -- the repo-local `./.github/actions/terragrunt` action supports `tg_action: plan` for producing the binary plan locally; it renders `terragrunt.plan.txt` and writes `terragrunt.plan.meta.json` via `justfile.tg` (`terragrunt-plan-render`) -- `./.github/actions/terragrunt` always uploads per-stack plan artifacts on `plan` and always downloads them on `apply_plan`, using the caller-provided `PLAN_ARTIFACT_S3_PREFIX` environment variable, so graph executors like `shared_infra.yml` do not need separate `./.github/actions/just` steps for those transfers +- the repo-local `./.github/actions/terragrunt` action supports `tg_action: plan` for producing the binary plan locally; the shared Terragrunt root `after_hook` then renders `terragrunt.plan.txt` and writes `terragrunt.plan.meta.json` +- shared Terragrunt root hooks now upload per-stack plan artifacts on `plan` and download them on `apply_plan` only when `TG_ENABLE_PLAN_ARTIFACTS=true`, using the caller-provided `PLAN_RUN_ID` plus the root-derived `plan_bucket`, so graph executors like `shared_infra.yml` do not need separate `./.github/actions/just` steps for those transfers - both repo-local composite actions, `./.github/actions/just` and `./.github/actions/terragrunt`, now assume AWS credentials are already configured in the current job when they need AWS access. The repo pattern is to run `aws-actions/configure-aws-credentials` at the top of each AWS-using job and then call the local actions without extra auth inputs - `./.github/actions/just` installs the requested `just` version through `extractions/setup-crate@v2` in the same minimal composite-action shape as `extractions/setup-just`, rather than depending on `extractions/setup-just` itself - `./.github/actions/terragrunt` installs the requested Terragrunt version through `jdx/mise-action@v4`, while Terraform stays pinned separately through `hashicorp/setup-terraform` - saved infra-plan storage is intentionally split into two levels: - - one run-level metadata file at `/infra-plan-metadata/plan-metadata.json` - - one per-stack plan bundle under `/terragrunt-plan-/` -- plan artifact storage follows the same artifact environment split as ECR and build outputs: `dev` uses the `dev` code bucket, while non-`dev` environments read and write `terragrunt_plan/` in the shared `ci` code bucket -- `./.github/actions/terragrunt` skips `apply_plan` with a warning when the saved `terragrunt.plan.meta.json` reports `has_changes: false` + - one run-level metadata artifact named `infra-plan-metadata` containing `plan-metadata.json` + - one per-stack plan bundle under `s3:///terragrunt_plan///terragrunt-plan-/` +- the dedicated plan bucket is repo-wide, derived as `---tfplan`, and plan uniqueness comes from `terragrunt_plan///...` - `./.github/actions/terragrunt` derives its plan artifact name from `tg_directory`, so callers do not need to pass artifact naming inputs -- if `apply_plan` is used across separate workflow runs, pass the earlier workflow `run_id` through `plan_artifact_run_id`; the shared wrappers recover both metadata and per-stack plan files by deriving the matching `plan_artifact_s3_prefix` and reading from the shared code bucket under `terragrunt_plan///...` +- if `apply_plan` is used across separate workflow runs, pass the earlier workflow `run_id` through `plan_artifact_run_id`; the shared wrappers recover both metadata and per-stack plan files from the dedicated plan bucket under `terragrunt_plan///...` - if a cross-run apply should not ask the operator to re-enter versions or recompute artifact resolution, store both the input versions and the resolved reusable-workflow outputs in a metadata artifact during plan and recover them in the apply wrapper from the earlier `run_id` - keep `shared_infra.yml` as the pure graph executor and prefer handling metadata creation/recovery in the dedicated plan/apply wrappers - when using `./.github/actions/just`, check whether the caller needs the repo-root `justfile` or an explicit `justfile_path` @@ -161,7 +160,6 @@ Run these checks on every CI, workflow, or deploy-contract change. - keep the split `just` ownership clear: - repo-root `justfile` for local/developer commands - `justfile.ci` for read-only CI helpers - - `justfile.tg` for Terragrunt plan artifact helpers (render/upload/download) - `justfile.deploy` for mutating CI build and deploy steps - `justfile.destroy` for explicit teardown and post-destroy cleanup steps diff --git a/.github/workflows/shared_infra.yml b/.github/workflows/shared_infra.yml index 84bb0a44..98edbd73 100644 --- a/.github/workflows/shared_infra.yml +++ b/.github/workflows/shared_infra.yml @@ -33,8 +33,8 @@ on: required: false type: string default: "apply" - plan_artifact_s3_prefix: - description: "Optional resolved S3 prefix used for saved plan artifacts" + plan_run_id: + description: "Optional unique run id used to derive saved plan artifact paths" required: false type: string default: "" @@ -53,7 +53,8 @@ env: AWS_OIDC_ROLE_ARN: arn:aws:iam::${{ vars.AWS_ACCOUNT_ID }}:role/${{ vars.PROJECT_NAME }}-${{ inputs.environment }}-github-oidc-role AWS_REGION: ${{ vars.AWS_REGION }} DOMAIN_NAME: ${{ vars.DOMAIN_NAME }} - PLAN_ARTIFACT_S3_PREFIX: ${{ inputs.plan_artifact_s3_prefix }} + TG_ENABLE_PLAN_ARTIFACTS: ${{ (inputs.tg_action == 'plan' || inputs.tg_action == 'apply_plan') && 'true' || 'false' }} + PLAN_RUN_ID: ${{ inputs.plan_run_id }} TG_ACTION_LABEL: ${{ (inputs.tg_action == 'apply' || inputs.tg_action == 'apply_plan') && 'Apply' || inputs.tg_action == 'plan' && 'Plan' || inputs.tg_action == 'destroy' && 'Destroy' || inputs.tg_action == 'init' && 'Init' || 'Run' }} jobs: diff --git a/.github/workflows/shared_infra_apply_from_plan.yml b/.github/workflows/shared_infra_apply_from_plan.yml index 078362e4..08bfc836 100644 --- a/.github/workflows/shared_infra_apply_from_plan.yml +++ b/.github/workflows/shared_infra_apply_from_plan.yml @@ -20,8 +20,6 @@ permissions: env: AWS_OIDC_ROLE_ARN: arn:aws:iam::${{ vars.AWS_ACCOUNT_ID }}:role/${{ vars.PROJECT_NAME }}-${{ inputs.environment }}-github-oidc-role AWS_REGION: ${{ vars.AWS_REGION }} - ARTIFACT_ENVIRONMENT: ${{ inputs.environment == 'dev' && 'dev' || 'ci' }} - ARTIFACT_AWS_OIDC_ROLE_ARN: arn:aws:iam::${{ vars.AWS_ACCOUNT_ID }}:role/${{ vars.PROJECT_NAME }}-${{ inputs.environment == 'dev' && 'dev' || 'ci' }}-github-oidc-role jobs: metadata: @@ -31,66 +29,23 @@ jobs: code_bucket: ${{ steps.read_metadata.outputs.code_bucket }} lambda_matrix: ${{ steps.read_metadata.outputs.lambda_matrix }} bootstrap_image_uri: ${{ steps.read_metadata.outputs.bootstrap_image_uri }} - plan_artifact_s3_prefix: ${{ steps.plan_artifact_s3_prefix.outputs.just_outputs }} service_matrix: ${{ steps.read_metadata.outputs.service_matrix }} steps: - uses: actions/checkout@v6 - - uses: aws-actions/configure-aws-credentials@v6 + - name: Download plan metadata artifact + uses: actions/download-artifact@v5 with: - role-to-assume: ${{ env.ARTIFACT_AWS_OIDC_ROLE_ARN }} - aws-region: ${{ env.AWS_REGION }} - - - name: Get shared code bucket outputs - uses: ./.github/actions/terragrunt - id: code_action - with: - tg_directory: infra/live/${{ env.ARTIFACT_ENVIRONMENT }}/aws/code_bucket - tg_action: init - - - name: Get bucket name - id: get_bucket_name - env: - TG_OUTPUTS: ${{ steps.code_action.outputs.tg_outputs }} - run: | - echo "bucket=$(echo "$TG_OUTPUTS" | jq -r '.bucket.value // empty')" >> "$GITHUB_OUTPUT" - - - name: Get plan artifact S3 prefix - id: plan_artifact_s3_prefix - uses: ./.github/actions/just - env: - BUCKET_NAME: ${{ steps.get_bucket_name.outputs.bucket }} - ENVIRONMENT: ${{ inputs.environment }} - RUN_ID: ${{ inputs.plan_artifact_run_id }} - with: - justfile_path: justfile.tg - just_action: terragrunt-plan-base-s3-prefix - - - name: Check requested plan artifact run id exists - shell: bash - env: - PLAN_ARTIFACT_S3_PREFIX: ${{ steps.plan_artifact_s3_prefix.outputs.just_outputs }} - PLAN_ARTIFACT_RUN_ID: ${{ inputs.plan_artifact_run_id }} - run: | - metadata_object="${PLAN_ARTIFACT_S3_PREFIX}/infra-plan-metadata/plan-metadata.json" - if ! aws s3 ls "$metadata_object" >/dev/null 2>&1; then - echo "::error title=Plan artifact run id not found::No saved plan metadata was found for plan_artifact_run_id '${PLAN_ARTIFACT_RUN_ID}' at '${metadata_object}'. Check the run id and make sure that earlier plan workflow completed and uploaded artifacts." - exit 1 - fi - - - name: Download plan metadata from S3 - uses: ./.github/actions/just - env: - PLAN_ARTIFACT_S3_PREFIX: ${{ steps.plan_artifact_s3_prefix.outputs.just_outputs }} - with: - justfile_path: justfile.ci - just_action: infra-plan-metadata-download + name: infra-plan-metadata + github-token: ${{ github.token }} + run-id: ${{ inputs.plan_artifact_run_id }} + path: . - name: Check plan metadata artifact shell: bash run: | if [ ! -f plan-metadata.json ]; then - echo "::error title=Missing plan metadata artifact::Expected 'plan-metadata.json' at ${{ steps.plan_artifact_s3_prefix.outputs.just_outputs }}/infra-plan-metadata/plan-metadata.json, but it was not downloaded." + echo "::error title=Missing plan metadata artifact::Expected artifact 'infra-plan-metadata' containing 'plan-metadata.json' from workflow run '${{ inputs.plan_artifact_run_id }}', but it was not downloaded." exit 1 fi @@ -116,4 +71,4 @@ jobs: bootstrap_image_uri: ${{ needs.metadata.outputs.bootstrap_image_uri }} service_matrix: ${{ needs.metadata.outputs.service_matrix }} tg_action: apply_plan - plan_artifact_s3_prefix: ${{ needs.metadata.outputs.plan_artifact_s3_prefix }} + plan_run_id: ${{ inputs.plan_artifact_run_id }} diff --git a/.github/workflows/shared_infra_plan.yml b/.github/workflows/shared_infra_plan.yml index 6ad67abf..9485b9d9 100644 --- a/.github/workflows/shared_infra_plan.yml +++ b/.github/workflows/shared_infra_plan.yml @@ -40,34 +40,15 @@ permissions: env: AWS_OIDC_ROLE_ARN: arn:aws:iam::${{ vars.AWS_ACCOUNT_ID }}:role/${{ vars.PROJECT_NAME }}-${{ inputs.environment }}-github-oidc-role - AWS_REGION: ${{ vars.AWS_REGION }} jobs: metadata: runs-on: ubuntu-latest - outputs: - plan_artifact_s3_prefix: ${{ steps.plan_artifact_s3_prefix.outputs.just_outputs }} steps: - uses: actions/checkout@v6 with: ref: ${{ inputs.infra_version }} - - uses: aws-actions/configure-aws-credentials@v6 - with: - role-to-assume: ${{ env.AWS_OIDC_ROLE_ARN }} - aws-region: ${{ env.AWS_REGION }} - - - name: Get plan artifact S3 prefix - id: plan_artifact_s3_prefix - uses: ./.github/actions/just - env: - BUCKET_NAME: ${{ inputs.code_bucket }} - ENVIRONMENT: ${{ inputs.environment }} - RUN_ID: ${{ github.run_id }} - with: - justfile_path: justfile.tg - just_action: terragrunt-plan-base-s3-prefix - - name: Write plan metadata from workflow inputs shell: bash run: | @@ -81,13 +62,12 @@ jobs: } EOF - - name: Upload plan metadata to S3 - uses: ./.github/actions/just - env: - PLAN_ARTIFACT_S3_PREFIX: ${{ steps.plan_artifact_s3_prefix.outputs.just_outputs }} + - name: Upload plan metadata artifact + uses: actions/upload-artifact@v4 with: - justfile_path: justfile.ci - just_action: infra-plan-metadata-upload + name: infra-plan-metadata + path: plan-metadata.json + retention-days: 14 infra: needs: @@ -101,7 +81,7 @@ jobs: bootstrap_image_uri: ${{ inputs.bootstrap_image_uri }} service_matrix: ${{ inputs.service_matrix }} tg_action: plan - plan_artifact_s3_prefix: ${{ needs.metadata.outputs.plan_artifact_s3_prefix }} + plan_run_id: ${{ github.run_id }} plan_context: name: Plan Context diff --git a/REPO_INSTRUCTIONS.md b/REPO_INSTRUCTIONS.md index 0edad44a..6aacb201 100644 --- a/REPO_INSTRUCTIONS.md +++ b/REPO_INSTRUCTIONS.md @@ -117,6 +117,6 @@ These instructions apply to the entire repository. ## High-Signal Edit Warnings -- before editing `justfile.destroy` or `justfile.tg`, print an explicit terminal warning in commentary (destroy/terragrunt command ownership boundary) +- before editing `justfile.destroy`, print an explicit terminal warning in commentary (destroy command ownership boundary) - before editing `.github/workflows/shared_*.yml`, print an explicit terminal warning in commentary (shared CI workflow blast radius) - before editing `infra/modules/aws/_shared/**`, print an explicit terminal warning in commentary (shared-contract blast radius) diff --git a/infra/README.md b/infra/README.md index d632a168..7f8cf925 100644 --- a/infra/README.md +++ b/infra/README.md @@ -28,13 +28,15 @@ The root Terragrunt file derives state paths from the live stack path: - bucket: `---tfstate` - key: `///terraform.tfstate` -Shared artifact names also follow environment-aware conventions from `infra/root.hcl`: +Shared artifact names also follow naming conventions from `infra/root.hcl`: - shared artifact base: `dev -> ...-dev`, otherwise `...-ci` +- dedicated saved-plan bucket: `---tfplan` - code bucket: `-code` - ECS ECR repository: `-ecr` -- saved Terragrunt plan artifacts: `s3:///terragrunt_plan///...` -- code-bucket lifecycle inputs: `code_artifact_expiration_days` for deployable code artifacts and `infra_plan_artifact_expiration_days` for `terragrunt_plan/` +- saved Terragrunt plan artifacts: `s3:///terragrunt_plan///...` +- code-bucket lifecycle inputs: `code_artifact_expiration_days` for deployable code artifacts and `infra_plan_artifact_expiration_days` for `terragrunt_plan/` when the code-bucket module is still used for plan retention +- during `terragrunt init`, the root hook ensures the dedicated saved-plan bucket exists; interactive runs prompt before creation and non-interactive runs create it automatically So a stack at: @@ -153,7 +155,7 @@ That `containers/lib` directory is helper code only and is not treated as a depl - build workflows produce Lambda zips and container images - `*_infra` wrappers need the inputs required to apply infra safely, such as directory-derived stack matrices and any artifact-derived bootstrap references - in `prod`, the `*_infra` wrappers read shared artifact resources from `ci` but only apply service and task stacks in `prod` -- saved `plan` / `apply_plan` artifacts live in the shared code bucket under `terragrunt_plan///...`; `dev` uses the `dev` code bucket, while non-`dev` environments reuse the shared `ci` code bucket +- saved `plan` / `apply_plan` artifacts live in the dedicated plan bucket under `terragrunt_plan///...` - deploy workflows: - publish Lambda versions and use Lambda CodeDeploy - optionally invoke the `migrations` Lambda when it is part of the Lambda deploy matrix diff --git a/infra/root.hcl b/infra/root.hcl index 6e713ae9..f0b91334 100644 --- a/infra/root.hcl +++ b/infra/root.hcl @@ -12,6 +12,7 @@ locals { global_vars = read_terragrunt_config(find_in_parent_folders("global_vars.hcl")) environment_vars = read_terragrunt_config(find_in_parent_folders("environment_vars.hcl")) + infra_root_dir = abspath(dirname(find_in_parent_folders("root.hcl"))) project_name = element(split("/", local.github_repo), 1) @@ -20,6 +21,7 @@ locals { deploy_role_name = "${local.project_name}-${local.environment}-github-oidc-role" deploy_role_arn = "arn:aws:iam::${local.aws_account_id}:role/${local.deploy_role_name}" state_bucket = "${local.base_reference}-tfstate" + plan_bucket = "${local.base_reference}-tfplan" state_key = "${local.environment}/${local.provider}/${local.module}/terraform.tfstate" state_lock_table = "${local.project_name}-tf-lockid" # separate shared artifact resources when dev, otherwise ci @@ -35,6 +37,40 @@ terraform { "bash", "-c", "echo STATE:${local.state_bucket}/${local.state_key} TABLE:${local.state_lock_table}" ] } + + before_hook "ensure_plan_bucket" { + commands = ["init"] + execute = [ + "bash", + "${local.infra_root_dir}/scripts/ensure-artifact-bucket.sh", + local.plan_bucket, + local.aws_region, + ] + } + + before_hook "download_saved_plan" { + commands = ["apply"] + execute = [ + "bash", + "${local.infra_root_dir}/scripts/handle-plan-artifact.sh", + "download", + get_terragrunt_dir(), + local.plan_bucket, + local.environment, + ] + } + + after_hook "upload_saved_plan" { + commands = ["plan"] + execute = [ + "bash", + "${local.infra_root_dir}/scripts/handle-plan-artifact.sh", + "upload", + get_terragrunt_dir(), + local.plan_bucket, + local.environment, + ] + } } remote_state { @@ -101,6 +137,7 @@ inputs = merge( deploy_role_name = local.deploy_role_name deploy_role_arn = local.deploy_role_arn state_bucket = local.state_bucket + plan_bucket = local.plan_bucket state_lock_table = local.state_lock_table code_bucket = local.code_bucket ecr_repository_name = local.ecr_repository_name diff --git a/infra/scripts/ensure-artifact-bucket.sh b/infra/scripts/ensure-artifact-bucket.sh new file mode 100644 index 00000000..9061c38d --- /dev/null +++ b/infra/scripts/ensure-artifact-bucket.sh @@ -0,0 +1,27 @@ +#!/usr/bin/env bash +set -euo pipefail + +bucket_name="${1:?bucket name is required}" +aws_region="${2:?aws region is required}" + +if aws s3api head-bucket --bucket "$bucket_name" >/dev/null 2>&1; then + exit 0 +fi + +if [ -t 0 ] && [ -t 1 ]; then + printf "Artifact bucket '%s' does not exist. Create it in %s? [y/N] " "$bucket_name" "$aws_region" >&2 + read -r response + case "$response" in + [yY]|[yY][eE][sS]) ;; + *) + echo "Artifact bucket creation declined." >&2 + exit 1 + ;; + esac +fi + +if [ "$aws_region" = "us-east-1" ]; then + aws s3api create-bucket --bucket "$bucket_name" >/dev/null +else + aws s3api create-bucket --bucket "$bucket_name" --create-bucket-configuration "LocationConstraint=$aws_region" >/dev/null +fi diff --git a/infra/scripts/handle-plan-artifact.sh b/infra/scripts/handle-plan-artifact.sh new file mode 100644 index 00000000..1b2276b3 --- /dev/null +++ b/infra/scripts/handle-plan-artifact.sh @@ -0,0 +1,67 @@ +#!/usr/bin/env bash +set -euo pipefail + +mode="${1:?mode is required}" +logical_tg_dir="${2:?terragrunt directory is required}" +plan_bucket="${3:?plan bucket is required}" +environment="${4:?environment is required}" +infra_plan_dir="${INFRA_PLAN_DIR:-terragrunt_plan}" + +plan_path="${PWD}/terragrunt.tfplan" +plan_text_path="${PWD}/terragrunt.plan.txt" +plan_meta_path="${PWD}/terragrunt.plan.meta.json" +plan_json_path="${PWD}/terragrunt.plan.json" +plan_log_path="${PWD}/${TG_PLAN_LOG_FILENAME:-terragrunt.plan.log}" + +if [[ "${TG_ENABLE_PLAN_ARTIFACTS:-false}" != "true" ]]; then + exit 0 +fi + +if [[ -z "${PLAN_RUN_ID:-}" ]]; then + exit 0 +fi + +sanitized_dir="$(echo "$logical_tg_dir" | tr '/.' '--')" +artifact_s3_prefix="s3://${plan_bucket}/${infra_plan_dir}/${environment}/${PLAN_RUN_ID}/terragrunt-plan-${sanitized_dir}" + +case "$mode" in + download) + aws s3 cp "${artifact_s3_prefix}/terragrunt.tfplan" "$plan_path" + aws s3 cp "${artifact_s3_prefix}/terragrunt.plan.txt" "$plan_text_path" + aws s3 cp "${artifact_s3_prefix}/terragrunt.plan.meta.json" "$plan_meta_path" + + if [[ "$(jq -r '.contains_mocked_outputs // false' "$plan_meta_path")" == "true" ]]; then + echo "Saved plan for '$logical_tg_dir' contains mocked outputs. Regenerate it after upstream real outputs exist." >&2 + exit 1 + fi + ;; + upload) + if [[ ! -f "$plan_path" ]]; then + exit 0 + fi + + terraform show -no-color "$plan_path" > "$plan_text_path" + terraform show -json "$plan_path" > "$plan_json_path" + + contains_mocked_outputs=false + if [[ -f "$plan_log_path" ]] && grep -Fq "mock outputs provided and returning those in dependency output" "$plan_log_path"; then + contains_mocked_outputs=true + fi + + jq -n \ + --arg tg_directory "$logical_tg_dir" \ + --argjson has_changes "$(jq -r '([(.resource_changes // [])[]?.change.actions[]?] | any(. != "no-op")) or ((.output_changes // {}) | length > 0)' "$plan_json_path")" \ + --argjson contains_mocked_outputs "$contains_mocked_outputs" \ + '{tg_directory: $tg_directory, has_changes: $has_changes, contains_mocked_outputs: $contains_mocked_outputs}' \ + > "$plan_meta_path" + + aws s3 cp "$plan_path" "${artifact_s3_prefix}/terragrunt.tfplan" + aws s3 cp "$plan_text_path" "${artifact_s3_prefix}/terragrunt.plan.txt" + aws s3 cp "$plan_meta_path" "${artifact_s3_prefix}/terragrunt.plan.meta.json" + rm -f "$plan_json_path" + ;; + *) + echo "Unknown mode '$mode'." >&2 + exit 2 + ;; +esac diff --git a/justfile.ci b/justfile.ci index 1e402741..c0ec334c 100644 --- a/justfile.ci +++ b/justfile.ci @@ -108,36 +108,6 @@ get-version-file-keys: | jq -s -c . -# Upload shared infra plan metadata to the shared code bucket. -infra-plan-metadata-upload: - #!/usr/bin/env bash - set -euo pipefail - - if [[ -z "${PLAN_ARTIFACT_S3_PREFIX:-}" ]]; then - echo "❌ PLAN_ARTIFACT_S3_PREFIX environment variable is not set." - exit 1 - fi - - artifact_s3_prefix="$(just --justfile "{{PROJECT_DIR}}/justfile.tg" terragrunt-plan-base-s3-prefix)" - - aws s3 cp "plan-metadata.json" "${artifact_s3_prefix}/infra-plan-metadata/plan-metadata.json" - - -# Download shared infra plan metadata from the shared code bucket. -infra-plan-metadata-download: - #!/usr/bin/env bash - set -euo pipefail - - if [[ -z "${PLAN_ARTIFACT_S3_PREFIX:-}" ]]; then - echo "❌ PLAN_ARTIFACT_S3_PREFIX environment variable is not set." - exit 1 - fi - - artifact_s3_prefix="$(just --justfile "{{PROJECT_DIR}}/justfile.tg" terragrunt-plan-base-s3-prefix)" - - aws s3 cp "${artifact_s3_prefix}/infra-plan-metadata/plan-metadata.json" "plan-metadata.json" - - # Return the Lambda artifact directory name from the repo-root justfile. code-bucket-get-lambda-artifact-dir: @just --justfile "{{PROJECT_DIR}}/justfile" code-bucket-get-lambda-artifact-dir diff --git a/justfile.tg b/justfile.tg deleted file mode 100644 index 85f050aa..00000000 --- a/justfile.tg +++ /dev/null @@ -1,174 +0,0 @@ -# Terragrunt plan artifact helpers. -# This file is for producing, downloading, and uploading saved Terragrunt plan files. - -PROJECT_DIR := `just --justfile justfile --evaluate PROJECT_DIR` -INFRA_PLAN_DIR := `just --justfile justfile --evaluate INFRA_PLAN_DIR` - -PLAN_FILE := "terragrunt.tfplan" -PLAN_TEXT_FILE := "terragrunt.plan.txt" -PLAN_META_FILE := "terragrunt.plan.meta.json" - - -# Render Terragrunt plan sidecars (plan text + metadata) for an existing binary plan. -# -# Expected environment variables: -# - TG_DIRECTORY: directory containing the saved plan file -# - TG_PLAN_EXIT_CODE: detailed-exitcode from `terragrunt plan` (0 or 2) -# - TG_PLAN_CONTAINS_MOCKED_OUTPUTS: whether Terragrunt reported dependency mock outputs during plan -terragrunt-plan-render: - #!/usr/bin/env bash - set -euo pipefail - - if [[ -z "${TG_DIRECTORY:-}" ]]; then - echo "❌ TG_DIRECTORY environment variable is not set." - exit 1 - fi - - if [[ -z "${TG_PLAN_EXIT_CODE:-}" ]]; then - echo "❌ TG_PLAN_EXIT_CODE environment variable is not set." - exit 1 - fi - - if [[ -z "${TG_PLAN_CONTAINS_MOCKED_OUTPUTS:-}" ]]; then - echo "❌ TG_PLAN_CONTAINS_MOCKED_OUTPUTS environment variable is not set." - exit 1 - fi - - cd "$TG_DIRECTORY" - - PLAN_PATH="$(pwd)/{{PLAN_FILE}}" - PLAN_TEXT_PATH="$(pwd)/{{PLAN_TEXT_FILE}}" - PLAN_META_PATH="$(pwd)/{{PLAN_META_FILE}}" - - if [[ ! -f "$PLAN_PATH" ]]; then - echo "❌ Expected plan file '$PLAN_PATH' was not found." - exit 1 - fi - - terragrunt show -no-color "$PLAN_PATH" > "$PLAN_TEXT_PATH" - - jq -n \ - --arg tg_directory "$TG_DIRECTORY" \ - --argjson exit_code "$TG_PLAN_EXIT_CODE" \ - --argjson has_changes "$([ "$TG_PLAN_EXIT_CODE" -eq 2 ] && echo true || echo false)" \ - --argjson contains_mocked_outputs "$TG_PLAN_CONTAINS_MOCKED_OUTPUTS" \ - '{tg_directory: $tg_directory, exit_code: $exit_code, has_changes: $has_changes, contains_mocked_outputs: $contains_mocked_outputs}' \ - > "$PLAN_META_PATH" - - echo "Terragrunt binary plan path: $PLAN_PATH" - ls -l "$PLAN_PATH" - echo "Terragrunt rendered plan path: $PLAN_TEXT_PATH" - cat "$PLAN_TEXT_PATH" - echo "Terragrunt plan metadata path: $PLAN_META_PATH" - cat "$PLAN_META_PATH" - - -# Derive the shared S3 base prefix for Terragrunt plan artifacts. -terragrunt-plan-base-prefix: - #!/usr/bin/env bash - set -euo pipefail - - if [[ -n "${PLAN_ARTIFACT_S3_PREFIX:-}" ]]; then - echo "${PLAN_ARTIFACT_S3_PREFIX#s3://*/}" - exit 0 - fi - - if [[ -z "${ENVIRONMENT:-}" ]]; then - echo "❌ ENVIRONMENT environment variable is not set." - exit 1 - fi - - if [[ -z "${RUN_ID:-}" ]]; then - echo "❌ RUN_ID environment variable is not set." - exit 1 - fi - - infra_plan_dir="${TF_VAR_infra_plan_dir:-{{INFRA_PLAN_DIR}}}" - - echo "${infra_plan_dir}/${ENVIRONMENT}/${RUN_ID}" - - -# Derive the shared S3 URI prefix for Terragrunt plan artifacts. -terragrunt-plan-base-s3-prefix: - #!/usr/bin/env bash - set -euo pipefail - - if [[ -n "${PLAN_ARTIFACT_S3_PREFIX:-}" ]]; then - echo "${PLAN_ARTIFACT_S3_PREFIX}" - exit 0 - fi - - if [[ -z "${BUCKET_NAME:-}" ]]; then - echo "❌ BUCKET_NAME environment variable is not set." - exit 1 - fi - - artifact_base_prefix="$(just --justfile "{{PROJECT_DIR}}/justfile.tg" terragrunt-plan-base-prefix)" - - echo "s3://${BUCKET_NAME}/${artifact_base_prefix}" - - -# Derive the shared S3 prefix for Terragrunt plan artifacts. -terragrunt-plan-prefix: - #!/usr/bin/env bash - set -euo pipefail - - if [[ -z "${TG_DIRECTORY:-}" ]]; then - echo "❌ TG_DIRECTORY environment variable is not set." - exit 1 - fi - - artifact_base_prefix="$(just --justfile "{{PROJECT_DIR}}/justfile.tg" terragrunt-plan-base-prefix)" - sanitized_dir="$(echo "$TG_DIRECTORY" | tr '/.' '--')" - artifact_name="terragrunt-plan-${sanitized_dir}" - artifact_prefix="${artifact_base_prefix}/${artifact_name}" - - echo "$artifact_prefix" - - -# Download saved Terragrunt plan files for a stack from the shared code bucket. -terragrunt-plan-download: - #!/usr/bin/env bash - set -euo pipefail - - if [[ -z "${PLAN_ARTIFACT_S3_PREFIX:-}" && -z "${BUCKET_NAME:-}" ]]; then - echo "❌ PLAN_ARTIFACT_S3_PREFIX or BUCKET_NAME environment variable is required." - exit 1 - fi - - if [[ -z "${TG_DIRECTORY:-}" ]]; then - echo "❌ TG_DIRECTORY environment variable is not set." - exit 1 - fi - - artifact_s3_base_prefix="$(just --justfile "{{PROJECT_DIR}}/justfile.tg" terragrunt-plan-base-s3-prefix)" - artifact_prefix="$(just --justfile "{{PROJECT_DIR}}/justfile.tg" terragrunt-plan-prefix)" - - mkdir -p "$TG_DIRECTORY" - - aws s3 cp "${artifact_s3_base_prefix}/${artifact_prefix##*/}/{{PLAN_FILE}}" "${TG_DIRECTORY}/{{PLAN_FILE}}" - aws s3 cp "${artifact_s3_base_prefix}/${artifact_prefix##*/}/{{PLAN_TEXT_FILE}}" "${TG_DIRECTORY}/{{PLAN_TEXT_FILE}}" - aws s3 cp "${artifact_s3_base_prefix}/${artifact_prefix##*/}/{{PLAN_META_FILE}}" "${TG_DIRECTORY}/{{PLAN_META_FILE}}" - - -# Upload saved Terragrunt plan files for a stack to the shared code bucket. -terragrunt-plan-upload: - #!/usr/bin/env bash - set -euo pipefail - - if [[ -z "${PLAN_ARTIFACT_S3_PREFIX:-}" && -z "${BUCKET_NAME:-}" ]]; then - echo "❌ PLAN_ARTIFACT_S3_PREFIX or BUCKET_NAME environment variable is required." - exit 1 - fi - - if [[ -z "${TG_DIRECTORY:-}" ]]; then - echo "❌ TG_DIRECTORY environment variable is not set." - exit 1 - fi - - artifact_s3_base_prefix="$(just --justfile "{{PROJECT_DIR}}/justfile.tg" terragrunt-plan-base-s3-prefix)" - artifact_prefix="$(just --justfile "{{PROJECT_DIR}}/justfile.tg" terragrunt-plan-prefix)" - - aws s3 cp "${TG_DIRECTORY}/{{PLAN_FILE}}" "${artifact_s3_base_prefix}/${artifact_prefix##*/}/{{PLAN_FILE}}" - aws s3 cp "${TG_DIRECTORY}/{{PLAN_TEXT_FILE}}" "${artifact_s3_base_prefix}/${artifact_prefix##*/}/{{PLAN_TEXT_FILE}}" - aws s3 cp "${TG_DIRECTORY}/{{PLAN_META_FILE}}" "${artifact_s3_base_prefix}/${artifact_prefix##*/}/{{PLAN_META_FILE}}" From a8487d5184731b3885bf951f0d0d5687642325fa Mon Sep 17 00:00:00 2001 From: chrispsheehan Date: Fri, 15 May 2026 15:52:20 +0100 Subject: [PATCH 14/34] chore: upload artifact fixes --- .github/actions/terragrunt/README.md | 6 +++--- .github/docs/README.md | 4 ++-- .github/workflows/shared_infra.yml | 2 +- infra/README.md | 14 +++++++++++++- infra/live/global_vars.hcl | 2 +- infra/root.hcl | 11 ++++++----- ...ct-bucket.sh => ensure-plan-artifact-bucket.sh} | 12 ++++++++---- infra/scripts/handle-plan-artifact.sh | 12 +++++++++--- justfile | 2 -- 9 files changed, 43 insertions(+), 22 deletions(-) rename infra/scripts/{ensure-artifact-bucket.sh => ensure-plan-artifact-bucket.sh} (54%) diff --git a/.github/actions/terragrunt/README.md b/.github/actions/terragrunt/README.md index 56c33109..1a968820 100644 --- a/.github/actions/terragrunt/README.md +++ b/.github/actions/terragrunt/README.md @@ -38,9 +38,9 @@ The Terragrunt install step is kept in this repo-local action rather than hidden - `apply` Runs `terragrunt apply -auto-approve` - `plan` - Runs `terragrunt plan -detailed-exitcode -out=/terragrunt.tfplan`. The shared Terragrunt root `after_hook` then renders `terragrunt.plan.txt`, writes `terragrunt.plan.meta.json`, and uploads the per-stack plan bundle to the derived plan bucket when `TG_ENABLE_PLAN_ARTIFACTS=true` and `PLAN_RUN_ID` is set. + Runs `terragrunt plan -detailed-exitcode -out=/terragrunt.tfplan`. The shared Terragrunt root `after_hook` then renders `terragrunt.plan.txt`, writes `terragrunt.plan.meta.json`, and uploads the per-stack plan bundle to the derived plan bucket when `TG_ENABLE_PLAN_ARTIFACTS=true` and `PLAN_ARTIFACT_RUN_ID` is set. - `apply_plan` - Runs `terragrunt apply` against the absolute stack-path plan file. The shared Terragrunt root `before_hook` downloads the saved plan bundle into `tg_directory` when `TG_ENABLE_PLAN_ARTIFACTS=true` and `PLAN_RUN_ID` is set, and fails early if the saved metadata reports mocked dependency outputs. + Runs `terragrunt apply` against the absolute stack-path plan file. The shared Terragrunt root `before_hook` downloads the saved plan bundle into `tg_directory` when `TG_ENABLE_PLAN_ARTIFACTS=true` and `PLAN_ARTIFACT_RUN_ID` is set, and fails early if the saved metadata reports mocked dependency outputs. - `destroy` Runs `terragrunt destroy -auto-approve` - `init` @@ -165,4 +165,4 @@ jobs: tg_action: apply_plan ``` -This action expects the workflow to set both `TG_ENABLE_PLAN_ARTIFACTS=true` and `PLAN_RUN_ID` when using cross-run saved plans so the shared Terragrunt root hooks can resolve the per-stack plan bundle location from the derived plan bucket and environment. +This action expects the workflow to set both `TG_ENABLE_PLAN_ARTIFACTS=true` and `PLAN_ARTIFACT_RUN_ID` when using cross-run saved plans so the shared Terragrunt root hooks can resolve the per-stack plan bundle location from the derived plan bucket and environment. diff --git a/.github/docs/README.md b/.github/docs/README.md index 3d7402de..952ffc81 100644 --- a/.github/docs/README.md +++ b/.github/docs/README.md @@ -88,7 +88,7 @@ flowchart LR - `shared_infra_apply_from_plan.yml` Apply-from-plan wrapper around `shared_infra.yml`. It takes `plan_artifact_run_id`, downloads the `infra-plan-metadata` GitHub artifact from that earlier workflow run, reads the frozen graph inputs back out, and then calls `shared_infra.yml` with `tg_action: apply_plan` plus `plan_run_id: `. Per-stack plan bundle download still happens inside the shared Terragrunt root `before_hook`. - `shared_infra.yml` - Pure ordered infra graph executor. It applies shared stacks first, then runtime stacks, then frontend infrastructure. Shared stacks now include the CloudWatch observability dashboard. It accepts `tg_action` so the same graph can run a normal apply, upload derived per-stack plan artifacts to the dedicated plan bucket under `terragrunt_plan/`, or apply from previously uploaded plan artifacts. The wrapper workflows now pass a single `plan_run_id`, while each Terragrunt job configures AWS credentials at job start and then reuses that ambient session in the repo-local Terragrunt action. That means each infra run has one shared run-level metadata artifact (`infra-plan-metadata`) for the whole graph and one separate saved plan bundle per Terragrunt stack or module. Saved-plan transfer is opt-in: the shared workflow sets `TG_ENABLE_PLAN_ARTIFACTS=true` only for `plan` and `apply_plan`. In `plan` mode, the shared Terragrunt root `after_hook` renders and uploads each per-stack plan bundle. In `apply_plan` mode, the shared Terragrunt root `before_hook` downloads the saved plan bundle before `terragrunt apply` runs and fails if the saved metadata says mocked outputs were used. Its visible step labels now follow the high-level operation, so both direct apply and apply-from-plan render as `Apply` while plan still renders as `Plan`. Bootstrap-sensitive edges such as `security -> network` should be modeled with Terragrunt `dependency` blocks plus constrained `mock_outputs` in the live stack so `plan` and `validate` can run before upstream state exists, while `apply` still resolves real outputs. + Pure ordered infra graph executor. It applies shared stacks first, then runtime stacks, then frontend infrastructure. Shared stacks now include the CloudWatch observability dashboard. It accepts `tg_action` so the same graph can run a normal apply, upload derived per-stack plan artifacts to the dedicated plan bucket under `terragrunt_plan/`, or apply from previously uploaded plan artifacts. The wrapper workflows now pass a single `plan_run_id`, exported to Terragrunt jobs as `PLAN_ARTIFACT_RUN_ID`, while each Terragrunt job configures AWS credentials at job start and then reuses that ambient session in the repo-local Terragrunt action. That means each infra run has one shared run-level metadata artifact (`infra-plan-metadata`) for the whole graph and one separate saved plan bundle per Terragrunt stack or module. Saved-plan transfer is opt-in: the shared workflow sets `TG_ENABLE_PLAN_ARTIFACTS=true` only for `plan` and `apply_plan`. In `plan` mode, the shared Terragrunt root `after_hook` renders and uploads each per-stack plan bundle. In `apply_plan` mode, the shared Terragrunt root `before_hook` downloads the saved plan bundle before `terragrunt apply` runs and fails if the saved metadata says mocked outputs were used. Its visible step labels now follow the high-level operation, so both direct apply and apply-from-plan render as `Apply` while plan still renders as `Plan`. Bootstrap-sensitive edges such as `security -> network` should be modeled with Terragrunt `dependency` blocks plus constrained `mock_outputs` in the live stack so `plan` and `validate` can run before upstream state exists, while `apply` still resolves real outputs. - The shared infra wrappers must forward the permissions required by the nested reusable call chain. In practice that means `id-token: write` everywhere the Terragrunt action may assume AWS OIDC and `contents: read` for checkout. The shared plan/apply wrappers now rely on AWS access to the shared code bucket rather than GitHub artifact permissions for cross-run recovery. - `shared_deploy.yml` Rolls out Lambda code, optional migrations, optional reconciliation Lambdas, ECS task and service updates, and optional frontend deploys. Its multi-step AWS jobs now configure credentials once at job start and let the local `just` and Terragrunt actions reuse that ambient session. The reusable workflow renders its Lambda and ECS CodeDeploy AppSpec files from the shared templates under `config/deploy/`, and its mutating `just` steps should target `justfile.deploy` rather than the repo-root `justfile`. @@ -143,7 +143,7 @@ Run these checks on every CI, workflow, or deploy-contract change. - compare expected outputs against actual `jobs..outputs.*` - verify optional inputs are intentionally omitted, not accidentally missing - the repo-local `./.github/actions/terragrunt` action supports `tg_action: plan` for producing the binary plan locally; the shared Terragrunt root `after_hook` then renders `terragrunt.plan.txt` and writes `terragrunt.plan.meta.json` -- shared Terragrunt root hooks now upload per-stack plan artifacts on `plan` and download them on `apply_plan` only when `TG_ENABLE_PLAN_ARTIFACTS=true`, using the caller-provided `PLAN_RUN_ID` plus the root-derived `plan_bucket`, so graph executors like `shared_infra.yml` do not need separate `./.github/actions/just` steps for those transfers +- shared Terragrunt root hooks now upload per-stack plan artifacts on `plan` and download them on `apply_plan` only when `TG_ENABLE_PLAN_ARTIFACTS=true`, using the caller-provided `PLAN_ARTIFACT_RUN_ID` plus the root-derived `plan_bucket`, so graph executors like `shared_infra.yml` do not need separate `./.github/actions/just` steps for those transfers - both repo-local composite actions, `./.github/actions/just` and `./.github/actions/terragrunt`, now assume AWS credentials are already configured in the current job when they need AWS access. The repo pattern is to run `aws-actions/configure-aws-credentials` at the top of each AWS-using job and then call the local actions without extra auth inputs - `./.github/actions/just` installs the requested `just` version through `extractions/setup-crate@v2` in the same minimal composite-action shape as `extractions/setup-just`, rather than depending on `extractions/setup-just` itself - `./.github/actions/terragrunt` installs the requested Terragrunt version through `jdx/mise-action@v4`, while Terraform stays pinned separately through `hashicorp/setup-terraform` diff --git a/.github/workflows/shared_infra.yml b/.github/workflows/shared_infra.yml index 98edbd73..b7e0a115 100644 --- a/.github/workflows/shared_infra.yml +++ b/.github/workflows/shared_infra.yml @@ -54,7 +54,7 @@ env: AWS_REGION: ${{ vars.AWS_REGION }} DOMAIN_NAME: ${{ vars.DOMAIN_NAME }} TG_ENABLE_PLAN_ARTIFACTS: ${{ (inputs.tg_action == 'plan' || inputs.tg_action == 'apply_plan') && 'true' || 'false' }} - PLAN_RUN_ID: ${{ inputs.plan_run_id }} + PLAN_ARTIFACT_RUN_ID: ${{ inputs.plan_run_id }} TG_ACTION_LABEL: ${{ (inputs.tg_action == 'apply' || inputs.tg_action == 'apply_plan') && 'Apply' || inputs.tg_action == 'plan' && 'Plan' || inputs.tg_action == 'destroy' && 'Destroy' || inputs.tg_action == 'init' && 'Init' || 'Run' }} jobs: diff --git a/infra/README.md b/infra/README.md index 7f8cf925..5a365464 100644 --- a/infra/README.md +++ b/infra/README.md @@ -36,7 +36,7 @@ Shared artifact names also follow naming conventions from `infra/root.hcl`: - ECS ECR repository: `-ecr` - saved Terragrunt plan artifacts: `s3:///terragrunt_plan///...` - code-bucket lifecycle inputs: `code_artifact_expiration_days` for deployable code artifacts and `infra_plan_artifact_expiration_days` for `terragrunt_plan/` when the code-bucket module is still used for plan retention -- during `terragrunt init`, the root hook ensures the dedicated saved-plan bucket exists; interactive runs prompt before creation and non-interactive runs create it automatically +- during `terragrunt init` and saved-plan `plan`, the root hook ensures the dedicated saved-plan bucket exists; interactive runs prompt before creation and non-interactive runs fail if no prompt is possible So a stack at: @@ -197,6 +197,18 @@ just --justfile justfile.deploy lambda-get-version just --justfile justfile.deploy frontend-build ``` +For a local saved-plan run that can upload plan artifacts through the normal repo wrapper, enable artifact mode, provide a unique run id, and pass the Terragrunt operation as one quoted argument: + +```sh +TG_ENABLE_PLAN_ARTIFACTS=true \ +PLAN_ARTIFACT_RUN_ID="local-$(date +%s)" \ +just tg dev aws/oidc 'plan -out=terragrunt.tfplan' +``` + +The `tg` recipe treats the final argument as the Terragrunt operation string, so quoting lets you pass flags such as `-out=...` through the wrapper. The current saved-plan hook expects the binary plan filename to be `terragrunt.tfplan`; if you choose a different `-out` filename, the upload hook will not find it. + +Per-stack saved-plan bundles in S3 use the live stack identity rather than your full local filesystem path, for example `terragrunt-plan-dev-aws-oidc`. + ## Naming Conventions - `task_` diff --git a/infra/live/global_vars.hcl b/infra/live/global_vars.hcl index fac34513..4c04287b 100644 --- a/infra/live/global_vars.hcl +++ b/infra/live/global_vars.hcl @@ -24,7 +24,7 @@ locals { "secretsmanager:*", "kms:*", "acm:*", - "route53:*", + "route53:**", "cognito-idp:*", "tag:GetResources", ] diff --git a/infra/root.hcl b/infra/root.hcl index f0b91334..23959ece 100644 --- a/infra/root.hcl +++ b/infra/root.hcl @@ -23,6 +23,7 @@ locals { state_bucket = "${local.base_reference}-tfstate" plan_bucket = "${local.base_reference}-tfplan" state_key = "${local.environment}/${local.provider}/${local.module}/terraform.tfstate" + plan_artifact_stack_key = "${local.environment}/${local.provider}/${local.module}" state_lock_table = "${local.project_name}-tf-lockid" # separate shared artifact resources when dev, otherwise ci artifact_base = local.environment == "dev" ? "${local.base_reference}-${local.environment}" : "${local.base_reference}-ci" @@ -38,11 +39,11 @@ terraform { ] } - before_hook "ensure_plan_bucket" { - commands = ["init"] + before_hook "ensure_plan_artifact_bucket" { + commands = ["init", "plan"] execute = [ "bash", - "${local.infra_root_dir}/scripts/ensure-artifact-bucket.sh", + "${local.infra_root_dir}/scripts/ensure-plan-artifact-bucket.sh", local.plan_bucket, local.aws_region, ] @@ -54,7 +55,7 @@ terraform { "bash", "${local.infra_root_dir}/scripts/handle-plan-artifact.sh", "download", - get_terragrunt_dir(), + local.plan_artifact_stack_key, local.plan_bucket, local.environment, ] @@ -66,7 +67,7 @@ terraform { "bash", "${local.infra_root_dir}/scripts/handle-plan-artifact.sh", "upload", - get_terragrunt_dir(), + local.plan_artifact_stack_key, local.plan_bucket, local.environment, ] diff --git a/infra/scripts/ensure-artifact-bucket.sh b/infra/scripts/ensure-plan-artifact-bucket.sh similarity index 54% rename from infra/scripts/ensure-artifact-bucket.sh rename to infra/scripts/ensure-plan-artifact-bucket.sh index 9061c38d..290ef556 100644 --- a/infra/scripts/ensure-artifact-bucket.sh +++ b/infra/scripts/ensure-plan-artifact-bucket.sh @@ -8,16 +8,20 @@ if aws s3api head-bucket --bucket "$bucket_name" >/dev/null 2>&1; then exit 0 fi -if [ -t 0 ] && [ -t 1 ]; then - printf "Artifact bucket '%s' does not exist. Create it in %s? [y/N] " "$bucket_name" "$aws_region" >&2 - read -r response +if [ -r /dev/tty ] && [ -w /dev/tty ]; then + printf "Plan bucket '%s' does not exist. Create it in %s? [y/N] " "$bucket_name" "$aws_region" > /dev/tty + read -r response < /dev/tty case "$response" in [yY]|[yY][eE][sS]) ;; *) - echo "Artifact bucket creation declined." >&2 + echo "Plan bucket creation declined." >&2 exit 1 ;; esac +else + echo "Plan bucket '$bucket_name' does not exist and no interactive terminal is available for confirmation." >&2 + echo "Create it manually or rerun from a terminal where Terragrunt hooks can prompt." >&2 + exit 1 fi if [ "$aws_region" = "us-east-1" ]; then diff --git a/infra/scripts/handle-plan-artifact.sh b/infra/scripts/handle-plan-artifact.sh index 1b2276b3..6cd801f3 100644 --- a/infra/scripts/handle-plan-artifact.sh +++ b/infra/scripts/handle-plan-artifact.sh @@ -14,21 +14,25 @@ plan_json_path="${PWD}/terragrunt.plan.json" plan_log_path="${PWD}/${TG_PLAN_LOG_FILENAME:-terragrunt.plan.log}" if [[ "${TG_ENABLE_PLAN_ARTIFACTS:-false}" != "true" ]]; then + echo "TG_ENABLE_PLAN_ARTIFACTS=false, skipping plan artifact ${mode}." >&2 exit 0 fi -if [[ -z "${PLAN_RUN_ID:-}" ]]; then - exit 0 +if [[ -z "${PLAN_ARTIFACT_RUN_ID:-}" ]]; then + echo "PLAN_ARTIFACT_RUN_ID is required when TG_ENABLE_PLAN_ARTIFACTS=true." >&2 + exit 1 fi sanitized_dir="$(echo "$logical_tg_dir" | tr '/.' '--')" -artifact_s3_prefix="s3://${plan_bucket}/${infra_plan_dir}/${environment}/${PLAN_RUN_ID}/terragrunt-plan-${sanitized_dir}" +artifact_s3_prefix="s3://${plan_bucket}/${infra_plan_dir}/${environment}/${PLAN_ARTIFACT_RUN_ID}/terragrunt-plan-${sanitized_dir}" case "$mode" in download) + echo "Downloading plan artifacts from ${artifact_s3_prefix}" >&2 aws s3 cp "${artifact_s3_prefix}/terragrunt.tfplan" "$plan_path" aws s3 cp "${artifact_s3_prefix}/terragrunt.plan.txt" "$plan_text_path" aws s3 cp "${artifact_s3_prefix}/terragrunt.plan.meta.json" "$plan_meta_path" + echo "Downloaded plan artifacts for ${logical_tg_dir}" >&2 if [[ "$(jq -r '.contains_mocked_outputs // false' "$plan_meta_path")" == "true" ]]; then echo "Saved plan for '$logical_tg_dir' contains mocked outputs. Regenerate it after upstream real outputs exist." >&2 @@ -55,9 +59,11 @@ case "$mode" in '{tg_directory: $tg_directory, has_changes: $has_changes, contains_mocked_outputs: $contains_mocked_outputs}' \ > "$plan_meta_path" + echo "Uploading plan artifacts for ${logical_tg_dir} to ${artifact_s3_prefix}" >&2 aws s3 cp "$plan_path" "${artifact_s3_prefix}/terragrunt.tfplan" aws s3 cp "$plan_text_path" "${artifact_s3_prefix}/terragrunt.plan.txt" aws s3 cp "$plan_meta_path" "${artifact_s3_prefix}/terragrunt.plan.meta.json" + echo "Uploaded plan artifacts for ${logical_tg_dir}" >&2 rm -f "$plan_json_path" ;; *) diff --git a/justfile b/justfile index 6a50aa06..f29abaa9 100644 --- a/justfile +++ b/justfile @@ -3,8 +3,6 @@ _default: @just --list @printf '\nCI recipes (`just --justfile justfile.ci --list`):\n' @just --justfile justfile.ci --list - @printf '\nTerragrunt recipes (`just --justfile justfile.tg --list`):\n' - @just --justfile justfile.tg --list @printf '\nDeploy recipes (`just --justfile justfile.deploy --list`):\n' @just --justfile justfile.deploy --list @printf '\nDestroy recipes (`just --justfile justfile.destroy --list`):\n' From 49a06d85269466cce413b308f218f489c02616bc Mon Sep 17 00:00:00 2001 From: chrispsheehan Date: Fri, 15 May 2026 16:05:56 +0100 Subject: [PATCH 15/34] chore: error handling --- infra/README.md | 10 +++++++++- infra/scripts/handle-plan-artifact.sh | 6 ++++++ 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/infra/README.md b/infra/README.md index 5a365464..de3eee0c 100644 --- a/infra/README.md +++ b/infra/README.md @@ -201,7 +201,7 @@ For a local saved-plan run that can upload plan artifacts through the normal rep ```sh TG_ENABLE_PLAN_ARTIFACTS=true \ -PLAN_ARTIFACT_RUN_ID="local-$(date +%s)" \ +PLAN_ARTIFACT_RUN_ID="local-example-run" \ just tg dev aws/oidc 'plan -out=terragrunt.tfplan' ``` @@ -209,6 +209,14 @@ The `tg` recipe treats the final argument as the Terragrunt operation string, so Per-stack saved-plan bundles in S3 use the live stack identity rather than your full local filesystem path, for example `terragrunt-plan-dev-aws-oidc`. +To apply that same saved plan later, reuse the same run id: + +```sh +TG_ENABLE_PLAN_ARTIFACTS=true \ +PLAN_ARTIFACT_RUN_ID="local-example-ru" \ +just tg dev aws/oidc 'apply terragrunt.tfplan' +``` + ## Naming Conventions - `task_` diff --git a/infra/scripts/handle-plan-artifact.sh b/infra/scripts/handle-plan-artifact.sh index 6cd801f3..2135e601 100644 --- a/infra/scripts/handle-plan-artifact.sh +++ b/infra/scripts/handle-plan-artifact.sh @@ -29,6 +29,12 @@ artifact_s3_prefix="s3://${plan_bucket}/${infra_plan_dir}/${environment}/${PLAN_ case "$mode" in download) echo "Downloading plan artifacts from ${artifact_s3_prefix}" >&2 + if ! aws s3 ls "${artifact_s3_prefix}/terragrunt.tfplan" >/dev/null 2>&1; then + echo "Saved plan artifact not found for ${logical_tg_dir} and PLAN_ARTIFACT_RUN_ID=${PLAN_ARTIFACT_RUN_ID}." >&2 + echo "Expected plan bundle at ${artifact_s3_prefix}" >&2 + exit 1 + fi + aws s3 cp "${artifact_s3_prefix}/terragrunt.tfplan" "$plan_path" aws s3 cp "${artifact_s3_prefix}/terragrunt.plan.txt" "$plan_text_path" aws s3 cp "${artifact_s3_prefix}/terragrunt.plan.meta.json" "$plan_meta_path" From ae902b03cd0441b6d22a1dade97cc9e14f071263 Mon Sep 17 00:00:00 2001 From: chrispsheehan Date: Fri, 15 May 2026 16:14:06 +0100 Subject: [PATCH 16/34] chore: ensure_lifecycle() --- infra/README.md | 6 ++--- infra/live/prod/environment_vars.hcl | 20 ++++++++------- infra/root.hcl | 21 +++++++++------ infra/scripts/ensure-plan-artifact-bucket.sh | 27 ++++++++++++++++++++ 4 files changed, 54 insertions(+), 20 deletions(-) diff --git a/infra/README.md b/infra/README.md index de3eee0c..6959949c 100644 --- a/infra/README.md +++ b/infra/README.md @@ -35,8 +35,8 @@ Shared artifact names also follow naming conventions from `infra/root.hcl`: - code bucket: `-code` - ECS ECR repository: `-ecr` - saved Terragrunt plan artifacts: `s3:///terragrunt_plan///...` -- code-bucket lifecycle inputs: `code_artifact_expiration_days` for deployable code artifacts and `infra_plan_artifact_expiration_days` for `terragrunt_plan/` when the code-bucket module is still used for plan retention -- during `terragrunt init` and saved-plan `plan`, the root hook ensures the dedicated saved-plan bucket exists; interactive runs prompt before creation and non-interactive runs fail if no prompt is possible +- plan-bucket retention: `infra_plan_artifact_expiration_days` applies an S3 lifecycle rule to `terragrunt_plan/` in the dedicated saved-plan bucket +- during `terragrunt init` and saved-plan `plan`, the root hook ensures the dedicated saved-plan bucket exists; interactive runs prompt before creation, non-interactive runs fail if no prompt is possible, and successful checks also enforce the configured plan-artifact retention rule So a stack at: @@ -213,7 +213,7 @@ To apply that same saved plan later, reuse the same run id: ```sh TG_ENABLE_PLAN_ARTIFACTS=true \ -PLAN_ARTIFACT_RUN_ID="local-example-ru" \ +PLAN_ARTIFACT_RUN_ID="local-example-run" \ just tg dev aws/oidc 'apply terragrunt.tfplan' ``` diff --git a/infra/live/prod/environment_vars.hcl b/infra/live/prod/environment_vars.hcl index e2f272bf..be4a6c71 100644 --- a/infra/live/prod/environment_vars.hcl +++ b/infra/live/prod/environment_vars.hcl @@ -1,14 +1,16 @@ locals { - log_retention_days = 14 - deploy_branches = ["main"] - cognito_callback_urls = ["http://localhost:5173"] - cognito_logout_urls = ["http://localhost:5173"] + log_retention_days = 14 + deploy_branches = ["main"] + cognito_callback_urls = ["http://localhost:5173"] + cognito_logout_urls = ["http://localhost:5173"] + infra_plan_artifact_expiration_days = 30 } inputs = { - log_retention_days = local.log_retention_days - deploy_branches = local.deploy_branches - otel_sample_rate = 0.1 # 10% of traces sampled - callback_urls = local.cognito_callback_urls - logout_urls = local.cognito_logout_urls + log_retention_days = local.log_retention_days + deploy_branches = local.deploy_branches + otel_sample_rate = 0.1 # 10% of traces sampled + callback_urls = local.cognito_callback_urls + logout_urls = local.cognito_logout_urls + infra_plan_artifact_expiration_days = local.infra_plan_artifact_expiration_days } diff --git a/infra/root.hcl b/infra/root.hcl index 23959ece..40d8460d 100644 --- a/infra/root.hcl +++ b/infra/root.hcl @@ -16,15 +16,19 @@ locals { project_name = element(split("/", local.github_repo), 1) - aws_region = local.global_vars.inputs.aws_region - base_reference = "${local.aws_account_id}-${local.aws_region}-${local.project_name}" - deploy_role_name = "${local.project_name}-${local.environment}-github-oidc-role" - deploy_role_arn = "arn:aws:iam::${local.aws_account_id}:role/${local.deploy_role_name}" - state_bucket = "${local.base_reference}-tfstate" - plan_bucket = "${local.base_reference}-tfplan" - state_key = "${local.environment}/${local.provider}/${local.module}/terraform.tfstate" + aws_region = local.global_vars.inputs.aws_region + base_reference = "${local.aws_account_id}-${local.aws_region}-${local.project_name}" + deploy_role_name = "${local.project_name}-${local.environment}-github-oidc-role" + deploy_role_arn = "arn:aws:iam::${local.aws_account_id}:role/${local.deploy_role_name}" + state_bucket = "${local.base_reference}-tfstate" + plan_bucket = "${local.base_reference}-tfplan" + state_key = "${local.environment}/${local.provider}/${local.module}/terraform.tfstate" plan_artifact_stack_key = "${local.environment}/${local.provider}/${local.module}" - state_lock_table = "${local.project_name}-tf-lockid" + state_lock_table = "${local.project_name}-tf-lockid" + plan_artifact_retention_days = try( + local.environment_vars.inputs.infra_plan_artifact_expiration_days, + 1, + ) # separate shared artifact resources when dev, otherwise ci artifact_base = local.environment == "dev" ? "${local.base_reference}-${local.environment}" : "${local.base_reference}-ci" code_bucket = "${local.artifact_base}-code" @@ -46,6 +50,7 @@ terraform { "${local.infra_root_dir}/scripts/ensure-plan-artifact-bucket.sh", local.plan_bucket, local.aws_region, + tostring(local.plan_artifact_retention_days), ] } diff --git a/infra/scripts/ensure-plan-artifact-bucket.sh b/infra/scripts/ensure-plan-artifact-bucket.sh index 290ef556..0adc6ca9 100644 --- a/infra/scripts/ensure-plan-artifact-bucket.sh +++ b/infra/scripts/ensure-plan-artifact-bucket.sh @@ -3,8 +3,33 @@ set -euo pipefail bucket_name="${1:?bucket name is required}" aws_region="${2:?aws region is required}" +retention_days="${3:-0}" +plan_prefix="${INFRA_PLAN_DIR:-terragrunt_plan/}" + +if [[ "$plan_prefix" != */ ]]; then + plan_prefix="${plan_prefix}/" +fi + +ensure_lifecycle() { + if [[ "$retention_days" =~ ^[0-9]+$ ]] && [ "$retention_days" -gt 0 ]; then + aws s3api put-bucket-lifecycle-configuration \ + --bucket "$bucket_name" \ + --lifecycle-configuration "{ + \"Rules\": [ + { + \"ID\": \"expire-plan-artifacts\", + \"Status\": \"Enabled\", + \"Filter\": {\"Prefix\": \"$plan_prefix\"}, + \"Expiration\": {\"Days\": $retention_days} + } + ] + }" >/dev/null + echo "Ensured plan artifact retention of ${retention_days} days on s3://${bucket_name}/${plan_prefix}" + fi +} if aws s3api head-bucket --bucket "$bucket_name" >/dev/null 2>&1; then + ensure_lifecycle exit 0 fi @@ -29,3 +54,5 @@ if [ "$aws_region" = "us-east-1" ]; then else aws s3api create-bucket --bucket "$bucket_name" --create-bucket-configuration "LocationConstraint=$aws_region" >/dev/null fi + +ensure_lifecycle From 563002f30289a5477e42d234e25b3b39de5534b2 Mon Sep 17 00:00:00 2001 From: chrispsheehan Date: Fri, 15 May 2026 16:31:28 +0100 Subject: [PATCH 17/34] fix: annoying python cli output --- justfile.deploy | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/justfile.deploy b/justfile.deploy index f58f03e9..cd9cd0d3 100644 --- a/justfile.deploy +++ b/justfile.deploy @@ -122,8 +122,8 @@ lambda-build: exit 1 fi - python3 -m venv venv - source venv/bin/activate + python3 -m venv .venv + source .venv/bin/activate LAMBDA_BUILD_DIR="{{PROJECT_DIR}}/{{LAMBDA_DIR}}/build" From d8b4e26e875f00f3cf1a1f64f9d3db312d8fbd00 Mon Sep 17 00:00:00 2001 From: chrispsheehan Date: Fri, 15 May 2026 16:32:03 +0100 Subject: [PATCH 18/34] chore: TG_RESET_PLAN_ARTIFACT_BUCKET=true --- .github/workflows/shared_infra.yml | 2 ++ infra/README.md | 3 ++- infra/scripts/ensure-plan-artifact-bucket.sh | 5 ++++- 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/.github/workflows/shared_infra.yml b/.github/workflows/shared_infra.yml index b7e0a115..43af3482 100644 --- a/.github/workflows/shared_infra.yml +++ b/.github/workflows/shared_infra.yml @@ -72,6 +72,8 @@ jobs: - name: ${{ env.TG_ACTION_LABEL }} oidc role infra uses: ./.github/actions/terragrunt + env: + TG_RESET_PLAN_ARTIFACT_BUCKET: "true" # this ensures that the plan artifact bucket is reset on every run, preventing stale artifacts from being used with: tg_directory: infra/live/${{ inputs.environment }}/aws/oidc tg_action: ${{ inputs.tg_action }} diff --git a/infra/README.md b/infra/README.md index 6959949c..88ac6740 100644 --- a/infra/README.md +++ b/infra/README.md @@ -36,7 +36,8 @@ Shared artifact names also follow naming conventions from `infra/root.hcl`: - ECS ECR repository: `-ecr` - saved Terragrunt plan artifacts: `s3:///terragrunt_plan///...` - plan-bucket retention: `infra_plan_artifact_expiration_days` applies an S3 lifecycle rule to `terragrunt_plan/` in the dedicated saved-plan bucket -- during `terragrunt init` and saved-plan `plan`, the root hook ensures the dedicated saved-plan bucket exists; interactive runs prompt before creation, non-interactive runs fail if no prompt is possible, and successful checks also enforce the configured plan-artifact retention rule +- during `terragrunt init` and saved-plan `plan`, the root hook ensures the dedicated saved-plan bucket exists; interactive runs prompt before creation and non-interactive runs fail if no prompt is possible +- to reapply the configured `infra_plan_artifact_expiration_days` lifecycle rule locally for an existing bucket, rerun with `TG_RESET_PLAN_ARTIFACT_BUCKET=true` So a stack at: diff --git a/infra/scripts/ensure-plan-artifact-bucket.sh b/infra/scripts/ensure-plan-artifact-bucket.sh index 0adc6ca9..b1596b3e 100644 --- a/infra/scripts/ensure-plan-artifact-bucket.sh +++ b/infra/scripts/ensure-plan-artifact-bucket.sh @@ -5,6 +5,7 @@ bucket_name="${1:?bucket name is required}" aws_region="${2:?aws region is required}" retention_days="${3:-0}" plan_prefix="${INFRA_PLAN_DIR:-terragrunt_plan/}" +reset_flag="${TG_RESET_PLAN_ARTIFACT_BUCKET:-false}" if [[ "$plan_prefix" != */ ]]; then plan_prefix="${plan_prefix}/" @@ -29,7 +30,9 @@ ensure_lifecycle() { } if aws s3api head-bucket --bucket "$bucket_name" >/dev/null 2>&1; then - ensure_lifecycle + if [ "$reset_flag" = "true" ]; then + ensure_lifecycle + fi exit 0 fi From 8d6578b0e7576b053eea2cb3d65e7438180757f4 Mon Sep 17 00:00:00 2001 From: chrispsheehan Date: Fri, 15 May 2026 16:47:16 +0100 Subject: [PATCH 19/34] fix: tf plan path --- .github/actions/terragrunt/README.md | 4 ++-- .github/actions/terragrunt/action.yml | 6 +++--- infra/scripts/handle-plan-artifact.sh | 3 +++ 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/.github/actions/terragrunt/README.md b/.github/actions/terragrunt/README.md index 1a968820..fbe58fcc 100644 --- a/.github/actions/terragrunt/README.md +++ b/.github/actions/terragrunt/README.md @@ -38,9 +38,9 @@ The Terragrunt install step is kept in this repo-local action rather than hidden - `apply` Runs `terragrunt apply -auto-approve` - `plan` - Runs `terragrunt plan -detailed-exitcode -out=/terragrunt.tfplan`. The shared Terragrunt root `after_hook` then renders `terragrunt.plan.txt`, writes `terragrunt.plan.meta.json`, and uploads the per-stack plan bundle to the derived plan bucket when `TG_ENABLE_PLAN_ARTIFACTS=true` and `PLAN_ARTIFACT_RUN_ID` is set. + Runs `terragrunt plan -detailed-exitcode -out=terragrunt.tfplan`. The shared Terragrunt root `after_hook` then renders `terragrunt.plan.txt`, writes `terragrunt.plan.meta.json`, and uploads the per-stack plan bundle to the derived plan bucket when `TG_ENABLE_PLAN_ARTIFACTS=true` and `PLAN_ARTIFACT_RUN_ID` is set. - `apply_plan` - Runs `terragrunt apply` against the absolute stack-path plan file. The shared Terragrunt root `before_hook` downloads the saved plan bundle into `tg_directory` when `TG_ENABLE_PLAN_ARTIFACTS=true` and `PLAN_ARTIFACT_RUN_ID` is set, and fails early if the saved metadata reports mocked dependency outputs. + Runs `terragrunt apply terragrunt.tfplan`. The shared Terragrunt root `before_hook` downloads the saved plan bundle into the Terragrunt working directory when `TG_ENABLE_PLAN_ARTIFACTS=true` and `PLAN_ARTIFACT_RUN_ID` is set, and fails early if the saved metadata reports mocked dependency outputs. - `destroy` Runs `terragrunt destroy -auto-approve` - `init` diff --git a/.github/actions/terragrunt/action.yml b/.github/actions/terragrunt/action.yml index a8188b7b..1b43bd44 100644 --- a/.github/actions/terragrunt/action.yml +++ b/.github/actions/terragrunt/action.yml @@ -67,9 +67,10 @@ runs: env: TF_IN_AUTOMATION: true TG_PLAN_LOG_FILENAME: terragrunt.plan.log + TG_PLAN_LOG_ABS_PATH: ${{ github.workspace }}/${{ inputs.tg_directory }}/terragrunt.plan.log working-directory: ${{ inputs.tg_directory }} run: | - PLAN_PATH="$(pwd)/terragrunt.tfplan" + PLAN_PATH="terragrunt.tfplan" PLAN_LOG_PATH="$(pwd)/terragrunt.plan.log" case "${{ inputs.tg_action }}" in @@ -94,8 +95,7 @@ runs: echo "plan_exit_code=$plan_exit_code" >> "$GITHUB_OUTPUT" echo "plan_contains_mocked_outputs=$plan_contains_mocked_outputs" >> "$GITHUB_OUTPUT" - echo "Terragrunt binary plan path: $PLAN_PATH" - ls -l "$PLAN_PATH" + echo "Terragrunt binary plan path in cache: $PLAN_PATH" ;; apply_plan) diff --git a/infra/scripts/handle-plan-artifact.sh b/infra/scripts/handle-plan-artifact.sh index 2135e601..8fa0b24e 100644 --- a/infra/scripts/handle-plan-artifact.sh +++ b/infra/scripts/handle-plan-artifact.sh @@ -12,6 +12,7 @@ plan_text_path="${PWD}/terragrunt.plan.txt" plan_meta_path="${PWD}/terragrunt.plan.meta.json" plan_json_path="${PWD}/terragrunt.plan.json" plan_log_path="${PWD}/${TG_PLAN_LOG_FILENAME:-terragrunt.plan.log}" +fallback_plan_log_path="${TG_PLAN_LOG_ABS_PATH:-}" if [[ "${TG_ENABLE_PLAN_ARTIFACTS:-false}" != "true" ]]; then echo "TG_ENABLE_PLAN_ARTIFACTS=false, skipping plan artifact ${mode}." >&2 @@ -56,6 +57,8 @@ case "$mode" in contains_mocked_outputs=false if [[ -f "$plan_log_path" ]] && grep -Fq "mock outputs provided and returning those in dependency output" "$plan_log_path"; then contains_mocked_outputs=true + elif [[ -n "$fallback_plan_log_path" ]] && [[ -f "$fallback_plan_log_path" ]] && grep -Fq "mock outputs provided and returning those in dependency output" "$fallback_plan_log_path"; then + contains_mocked_outputs=true fi jq -n \ From 08ad19f4a9c1c2dd95984f74c02a7c8567bdf297 Mon Sep 17 00:00:00 2001 From: Chris Sheehan Date: Sun, 17 May 2026 14:14:35 +0100 Subject: [PATCH 20/34] chore: proper erorr for gha --- .github/actions/terragrunt/action.yml | 14 +++++++++++++- infra/scripts/handle-plan-artifact.sh | 2 +- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/.github/actions/terragrunt/action.yml b/.github/actions/terragrunt/action.yml index 1b43bd44..0be77a40 100644 --- a/.github/actions/terragrunt/action.yml +++ b/.github/actions/terragrunt/action.yml @@ -99,7 +99,19 @@ runs: ;; apply_plan) - terragrunt apply -auto-approve "$PLAN_PATH" + set +e + APPLY_LOG_PATH="$(pwd)/terragrunt.apply.log" + terragrunt apply -auto-approve "$PLAN_PATH" 2>&1 | tee "$APPLY_LOG_PATH" + apply_exit_code=${PIPESTATUS[0]} + set -e + + if [ "$apply_exit_code" -ne 0 ]; then + if grep -Fq "contains mocked outputs. Regenerate it after upstream real outputs exist." "$APPLY_LOG_PATH"; then + err_line="$(grep -F \"contains mocked outputs. Regenerate it after upstream real outputs exist.\" \"$APPLY_LOG_PATH\" | head -n 1)" + echo "::error title=Saved plan contains mocked outputs::${err_line}" + fi + exit "$apply_exit_code" + fi ;; destroy) terragrunt destroy -auto-approve -compact-warnings -var-file=override_tg_vars.tfvars.json diff --git a/infra/scripts/handle-plan-artifact.sh b/infra/scripts/handle-plan-artifact.sh index 8fa0b24e..4cee9200 100644 --- a/infra/scripts/handle-plan-artifact.sh +++ b/infra/scripts/handle-plan-artifact.sh @@ -42,7 +42,7 @@ case "$mode" in echo "Downloaded plan artifacts for ${logical_tg_dir}" >&2 if [[ "$(jq -r '.contains_mocked_outputs // false' "$plan_meta_path")" == "true" ]]; then - echo "Saved plan for '$logical_tg_dir' contains mocked outputs. Regenerate it after upstream real outputs exist." >&2 + echo "Saved plan for '${logical_tg_dir}' contains mocked outputs. Regenerate it after upstream real outputs exist." >&2 exit 1 fi ;; From 786b278b88d9bffc2ef626ce44b7c6edb074494c Mon Sep 17 00:00:00 2001 From: Chris Sheehan Date: Sun, 17 May 2026 14:22:30 +0100 Subject: [PATCH 21/34] chore: more error handling --- .github/actions/terragrunt/action.yml | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/.github/actions/terragrunt/action.yml b/.github/actions/terragrunt/action.yml index 0be77a40..75acae60 100644 --- a/.github/actions/terragrunt/action.yml +++ b/.github/actions/terragrunt/action.yml @@ -110,6 +110,30 @@ runs: err_line="$(grep -F \"contains mocked outputs. Regenerate it after upstream real outputs exist.\" \"$APPLY_LOG_PATH\" | head -n 1)" echo "::error title=Saved plan contains mocked outputs::${err_line}" fi + if grep -Fq "Error: Saved plan is stale" "$APPLY_LOG_PATH" || grep -Fq "Saved plan is stale" "$APPLY_LOG_PATH"; then + err_line="$(grep -F \"Saved plan is stale\" \"$APPLY_LOG_PATH\" | head -n 1)" + echo "::error title=Saved plan is stale::${err_line}" + fi + if grep -Fq "Saved plan artifact not found for" "$APPLY_LOG_PATH"; then + err_line="$(grep -F \"Saved plan artifact not found for\" \"$APPLY_LOG_PATH\" | head -n 1)" + echo "::error title=Saved plan artifact missing::${err_line}" + fi + if grep -Fq "PLAN_ARTIFACT_RUN_ID is required when TG_ENABLE_PLAN_ARTIFACTS=true" "$APPLY_LOG_PATH"; then + err_line="$(grep -F \"PLAN_ARTIFACT_RUN_ID is required when TG_ENABLE_PLAN_ARTIFACTS=true\" \"$APPLY_LOG_PATH\" | head -n 1)" + echo "::error title=Missing PLAN_ARTIFACT_RUN_ID::${err_line}" + fi + if grep -Fq "Plan bucket '" "$APPLY_LOG_PATH" && grep -Fq "does not exist" "$APPLY_LOG_PATH"; then + err_line="$(grep -F \"Plan bucket '\" \"$APPLY_LOG_PATH\" | head -n 1)" + echo "::error title=Plan bucket missing::${err_line}" + fi + if grep -Fq "Plan bucket creation declined." "$APPLY_LOG_PATH"; then + err_line="$(grep -F \"Plan bucket creation declined.\" \"$APPLY_LOG_PATH\" | head -n 1)" + echo "::error title=Plan bucket creation declined::${err_line}" + fi + if grep -Fq "no interactive terminal is available for confirmation" "$APPLY_LOG_PATH"; then + err_line="$(grep -F \"no interactive terminal is available for confirmation\" \"$APPLY_LOG_PATH\" | head -n 1)" + echo "::error title=Plan bucket confirmation unavailable::${err_line}" + fi exit "$apply_exit_code" fi ;; From 069282fb0a5e441ba321ceca8bed6c22928f0b31 Mon Sep 17 00:00:00 2001 From: Chris Sheehan Date: Sun, 17 May 2026 14:27:55 +0100 Subject: [PATCH 22/34] fix: '' escaping --- .github/actions/terragrunt/action.yml | 50 +++++++++++++-------------- 1 file changed, 24 insertions(+), 26 deletions(-) diff --git a/.github/actions/terragrunt/action.yml b/.github/actions/terragrunt/action.yml index 75acae60..274b858e 100644 --- a/.github/actions/terragrunt/action.yml +++ b/.github/actions/terragrunt/action.yml @@ -106,34 +106,32 @@ runs: set -e if [ "$apply_exit_code" -ne 0 ]; then - if grep -Fq "contains mocked outputs. Regenerate it after upstream real outputs exist." "$APPLY_LOG_PATH"; then - err_line="$(grep -F \"contains mocked outputs. Regenerate it after upstream real outputs exist.\" \"$APPLY_LOG_PATH\" | head -n 1)" - echo "::error title=Saved plan contains mocked outputs::${err_line}" - fi - if grep -Fq "Error: Saved plan is stale" "$APPLY_LOG_PATH" || grep -Fq "Saved plan is stale" "$APPLY_LOG_PATH"; then - err_line="$(grep -F \"Saved plan is stale\" \"$APPLY_LOG_PATH\" | head -n 1)" - echo "::error title=Saved plan is stale::${err_line}" - fi - if grep -Fq "Saved plan artifact not found for" "$APPLY_LOG_PATH"; then - err_line="$(grep -F \"Saved plan artifact not found for\" \"$APPLY_LOG_PATH\" | head -n 1)" - echo "::error title=Saved plan artifact missing::${err_line}" - fi - if grep -Fq "PLAN_ARTIFACT_RUN_ID is required when TG_ENABLE_PLAN_ARTIFACTS=true" "$APPLY_LOG_PATH"; then - err_line="$(grep -F \"PLAN_ARTIFACT_RUN_ID is required when TG_ENABLE_PLAN_ARTIFACTS=true\" \"$APPLY_LOG_PATH\" | head -n 1)" - echo "::error title=Missing PLAN_ARTIFACT_RUN_ID::${err_line}" - fi - if grep -Fq "Plan bucket '" "$APPLY_LOG_PATH" && grep -Fq "does not exist" "$APPLY_LOG_PATH"; then - err_line="$(grep -F \"Plan bucket '\" \"$APPLY_LOG_PATH\" | head -n 1)" - echo "::error title=Plan bucket missing::${err_line}" - fi - if grep -Fq "Plan bucket creation declined." "$APPLY_LOG_PATH"; then - err_line="$(grep -F \"Plan bucket creation declined.\" \"$APPLY_LOG_PATH\" | head -n 1)" - echo "::error title=Plan bucket creation declined::${err_line}" + emit_error() { + local title="$1" + local pattern="$2" + + if grep -Fq "$pattern" "$APPLY_LOG_PATH"; then + local err_line + err_line="$(grep -F "$pattern" "$APPLY_LOG_PATH" | head -n 1)" + echo "::error title=${title}::${err_line}" + fi + } + + emit_error "Saved plan contains mocked outputs" "contains mocked outputs. Regenerate it after upstream real outputs exist." + + if grep -Fq "Saved plan is stale" "$APPLY_LOG_PATH"; then + emit_error "Saved plan is stale" "Saved plan is stale" fi - if grep -Fq "no interactive terminal is available for confirmation" "$APPLY_LOG_PATH"; then - err_line="$(grep -F \"no interactive terminal is available for confirmation\" \"$APPLY_LOG_PATH\" | head -n 1)" - echo "::error title=Plan bucket confirmation unavailable::${err_line}" + + emit_error "Saved plan artifact missing" "Saved plan artifact not found for" + emit_error "Missing PLAN_ARTIFACT_RUN_ID" "PLAN_ARTIFACT_RUN_ID is required when TG_ENABLE_PLAN_ARTIFACTS=true" + + if grep -Fq "Plan bucket" "$APPLY_LOG_PATH" && grep -Fq "does not exist" "$APPLY_LOG_PATH"; then + emit_error "Plan bucket missing" "Plan bucket" fi + + emit_error "Plan bucket creation declined" "Plan bucket creation declined." + emit_error "Plan bucket confirmation unavailable" "no interactive terminal is available for confirmation" exit "$apply_exit_code" fi ;; From 651155ef79ef315e6ab7945e9a49a7a39d41cc41 Mon Sep 17 00:00:00 2001 From: Chris Sheehan Date: Sun, 17 May 2026 14:34:12 +0100 Subject: [PATCH 23/34] chore: use latest upload/download action versions --- .github/workflows/shared_infra_apply_from_plan.yml | 2 +- .github/workflows/shared_infra_plan.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/shared_infra_apply_from_plan.yml b/.github/workflows/shared_infra_apply_from_plan.yml index 08bfc836..8b1a8bdb 100644 --- a/.github/workflows/shared_infra_apply_from_plan.yml +++ b/.github/workflows/shared_infra_apply_from_plan.yml @@ -34,7 +34,7 @@ jobs: - uses: actions/checkout@v6 - name: Download plan metadata artifact - uses: actions/download-artifact@v5 + uses: actions/download-artifact@v7 with: name: infra-plan-metadata github-token: ${{ github.token }} diff --git a/.github/workflows/shared_infra_plan.yml b/.github/workflows/shared_infra_plan.yml index 9485b9d9..1a31a7d0 100644 --- a/.github/workflows/shared_infra_plan.yml +++ b/.github/workflows/shared_infra_plan.yml @@ -63,7 +63,7 @@ jobs: EOF - name: Upload plan metadata artifact - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@v7 with: name: infra-plan-metadata path: plan-metadata.json From 2417ba160e4c016df2be1aa6057debe64d8a2dd7 Mon Sep 17 00:00:00 2001 From: Chris Sheehan Date: Sun, 17 May 2026 20:18:14 +0100 Subject: [PATCH 24/34] fix: set dummy task arn var --- infra/live/dev/aws/service_api/terragrunt.hcl | 2 -- infra/live/dev/aws/service_worker/terragrunt.hcl | 1 - infra/live/prod/aws/service_api/terragrunt.hcl | 2 -- infra/live/prod/aws/service_worker/terragrunt.hcl | 1 - infra/modules/aws/service_api/variables.tf | 3 ++- infra/modules/aws/service_worker/variables.tf | 3 ++- 6 files changed, 4 insertions(+), 8 deletions(-) diff --git a/infra/live/dev/aws/service_api/terragrunt.hcl b/infra/live/dev/aws/service_api/terragrunt.hcl index 9bcdd427..b57bb8f9 100644 --- a/infra/live/dev/aws/service_api/terragrunt.hcl +++ b/infra/live/dev/aws/service_api/terragrunt.hcl @@ -4,7 +4,6 @@ include "root" { locals { runtime_security = read_terragrunt_config(find_in_parent_folders("dependencies/ecs_runtime_security.hcl")) - task_api = read_terragrunt_config(find_in_parent_folders("dependencies/task_api.hcl")) cluster = read_terragrunt_config(find_in_parent_folders("dependencies/cluster.hcl")) network_runtime = read_terragrunt_config(find_in_parent_folders("dependencies/network_runtime.hcl")) } @@ -15,7 +14,6 @@ terraform { inputs = merge( local.runtime_security.inputs, - local.task_api.inputs, local.cluster.inputs, local.network_runtime.inputs, ) diff --git a/infra/live/dev/aws/service_worker/terragrunt.hcl b/infra/live/dev/aws/service_worker/terragrunt.hcl index bd0fa8c2..f9a0e67e 100644 --- a/infra/live/dev/aws/service_worker/terragrunt.hcl +++ b/infra/live/dev/aws/service_worker/terragrunt.hcl @@ -4,7 +4,6 @@ include "root" { locals { runtime_security = read_terragrunt_config(find_in_parent_folders("dependencies/ecs_runtime_security.hcl")) - task_worker = read_terragrunt_config(find_in_parent_folders("dependencies/task_worker.hcl")) worker_messaging = read_terragrunt_config(find_in_parent_folders("dependencies/worker_messaging.hcl")) cluster = read_terragrunt_config(find_in_parent_folders("dependencies/cluster.hcl")) network_runtime = read_terragrunt_config(find_in_parent_folders("dependencies/network_runtime.hcl")) diff --git a/infra/live/prod/aws/service_api/terragrunt.hcl b/infra/live/prod/aws/service_api/terragrunt.hcl index 9bcdd427..b57bb8f9 100644 --- a/infra/live/prod/aws/service_api/terragrunt.hcl +++ b/infra/live/prod/aws/service_api/terragrunt.hcl @@ -4,7 +4,6 @@ include "root" { locals { runtime_security = read_terragrunt_config(find_in_parent_folders("dependencies/ecs_runtime_security.hcl")) - task_api = read_terragrunt_config(find_in_parent_folders("dependencies/task_api.hcl")) cluster = read_terragrunt_config(find_in_parent_folders("dependencies/cluster.hcl")) network_runtime = read_terragrunt_config(find_in_parent_folders("dependencies/network_runtime.hcl")) } @@ -15,7 +14,6 @@ terraform { inputs = merge( local.runtime_security.inputs, - local.task_api.inputs, local.cluster.inputs, local.network_runtime.inputs, ) diff --git a/infra/live/prod/aws/service_worker/terragrunt.hcl b/infra/live/prod/aws/service_worker/terragrunt.hcl index bd0fa8c2..f9a0e67e 100644 --- a/infra/live/prod/aws/service_worker/terragrunt.hcl +++ b/infra/live/prod/aws/service_worker/terragrunt.hcl @@ -4,7 +4,6 @@ include "root" { locals { runtime_security = read_terragrunt_config(find_in_parent_folders("dependencies/ecs_runtime_security.hcl")) - task_worker = read_terragrunt_config(find_in_parent_folders("dependencies/task_worker.hcl")) worker_messaging = read_terragrunt_config(find_in_parent_folders("dependencies/worker_messaging.hcl")) cluster = read_terragrunt_config(find_in_parent_folders("dependencies/cluster.hcl")) network_runtime = read_terragrunt_config(find_in_parent_folders("dependencies/network_runtime.hcl")) diff --git a/infra/modules/aws/service_api/variables.tf b/infra/modules/aws/service_api/variables.tf index a3de6e79..38f61457 100644 --- a/infra/modules/aws/service_api/variables.tf +++ b/infra/modules/aws/service_api/variables.tf @@ -81,7 +81,8 @@ variable "ecs_security_group_id" { } variable "task_definition_arn" { - type = string + type = string + default = "arn:aws:ecs:eu-west-2:111111111111:task-definition/mock-task-api:1" } variable "cluster_id" { diff --git a/infra/modules/aws/service_worker/variables.tf b/infra/modules/aws/service_worker/variables.tf index 871f9098..a2c58837 100644 --- a/infra/modules/aws/service_worker/variables.tf +++ b/infra/modules/aws/service_worker/variables.tf @@ -81,7 +81,8 @@ variable "ecs_security_group_id" { } variable "task_definition_arn" { - type = string + type = string + default = "arn:aws:ecs:eu-west-2:111111111111:task-definition/mock-task-worker:1" } variable "ecs_worker_queue_name" { From 0d946d20acd8f6906a3088a658c1d42fc42321ba Mon Sep 17 00:00:00 2001 From: Chris Sheehan Date: Sun, 17 May 2026 20:32:54 +0100 Subject: [PATCH 25/34] chore: rename deps files --- infra/live/dependencies/database_security.hcl | 13 ----- .../dependencies/ecs_runtime_security.hcl | 13 ----- .../{frontend_runtime.hcl => frontend.hcl} | 1 + .../dependencies/lambda_runtime_security.hcl | 13 ----- infra/live/dependencies/network.hcl | 47 ++++++++++--------- infra/live/dependencies/network_runtime.hcl | 35 -------------- infra/live/dependencies/security.hcl | 14 ++++++ infra/live/dependencies/task_api.hcl | 13 ----- infra/live/dependencies/task_worker.hcl | 13 ----- infra/live/dev/aws/database/terragrunt.hcl | 8 ++-- infra/live/dev/aws/frontend/terragrunt.hcl | 2 +- infra/live/dev/aws/lambda_api/terragrunt.hcl | 2 +- infra/live/dev/aws/migrations/terragrunt.hcl | 12 ++++- infra/live/dev/aws/network/terragrunt.hcl | 23 +++++++-- infra/live/dev/aws/service_api/terragrunt.hcl | 11 +++-- .../dev/aws/service_worker/terragrunt.hcl | 12 +++-- infra/live/prod/aws/database/terragrunt.hcl | 8 ++-- infra/live/prod/aws/frontend/terragrunt.hcl | 2 +- infra/live/prod/aws/lambda_api/terragrunt.hcl | 2 +- infra/live/prod/aws/migrations/terragrunt.hcl | 12 ++++- infra/live/prod/aws/network/terragrunt.hcl | 23 +++++++-- .../live/prod/aws/service_api/terragrunt.hcl | 11 +++-- .../prod/aws/service_worker/terragrunt.hcl | 12 +++-- 23 files changed, 147 insertions(+), 155 deletions(-) delete mode 100644 infra/live/dependencies/database_security.hcl delete mode 100644 infra/live/dependencies/ecs_runtime_security.hcl rename infra/live/dependencies/{frontend_runtime.hcl => frontend.hcl} (99%) delete mode 100644 infra/live/dependencies/lambda_runtime_security.hcl delete mode 100644 infra/live/dependencies/network_runtime.hcl create mode 100644 infra/live/dependencies/security.hcl delete mode 100644 infra/live/dependencies/task_api.hcl delete mode 100644 infra/live/dependencies/task_worker.hcl diff --git a/infra/live/dependencies/database_security.hcl b/infra/live/dependencies/database_security.hcl deleted file mode 100644 index d78237b1..00000000 --- a/infra/live/dependencies/database_security.hcl +++ /dev/null @@ -1,13 +0,0 @@ -dependency "security" { - config_path = "${get_original_terragrunt_dir()}/../security" - - mock_outputs = { - postgres_sg = "sg-00000000000000006" - } - - mock_outputs_allowed_terraform_commands = ["validate", "plan", "destroy", "init", "show"] -} - -inputs = { - database_security_group_id = dependency.security.outputs.postgres_sg -} diff --git a/infra/live/dependencies/ecs_runtime_security.hcl b/infra/live/dependencies/ecs_runtime_security.hcl deleted file mode 100644 index c550f972..00000000 --- a/infra/live/dependencies/ecs_runtime_security.hcl +++ /dev/null @@ -1,13 +0,0 @@ -dependency "security" { - config_path = "${get_original_terragrunt_dir()}/../security" - - mock_outputs = { - ecs_sg = "sg-00000000000000004" - } - - mock_outputs_allowed_terraform_commands = ["validate", "plan", "destroy", "init", "show"] -} - -inputs = { - ecs_security_group_id = dependency.security.outputs.ecs_sg -} diff --git a/infra/live/dependencies/frontend_runtime.hcl b/infra/live/dependencies/frontend.hcl similarity index 99% rename from infra/live/dependencies/frontend_runtime.hcl rename to infra/live/dependencies/frontend.hcl index 93b5dcaf..dd680a90 100644 --- a/infra/live/dependencies/frontend_runtime.hcl +++ b/infra/live/dependencies/frontend.hcl @@ -28,3 +28,4 @@ inputs = { auth_hosted_ui_url = dependency.cognito.outputs.hosted_ui_url auth_readonly_group_name = dependency.cognito.outputs.readonly_group_name } + diff --git a/infra/live/dependencies/lambda_runtime_security.hcl b/infra/live/dependencies/lambda_runtime_security.hcl deleted file mode 100644 index 595df973..00000000 --- a/infra/live/dependencies/lambda_runtime_security.hcl +++ /dev/null @@ -1,13 +0,0 @@ -dependency "security" { - config_path = "${get_original_terragrunt_dir()}/../security" - - mock_outputs = { - runtime_sg = "sg-00000000000000005" - } - - mock_outputs_allowed_terraform_commands = ["validate", "plan", "destroy", "init", "show"] -} - -inputs = { - runtime_security_group_id = dependency.security.outputs.runtime_sg -} diff --git a/infra/live/dependencies/network.hcl b/infra/live/dependencies/network.hcl index b07fae22..de2a1de5 100644 --- a/infra/live/dependencies/network.hcl +++ b/infra/live/dependencies/network.hcl @@ -1,30 +1,35 @@ -dependency "security" { - config_path = "${get_original_terragrunt_dir()}/../security" +dependency "network" { + config_path = "${get_original_terragrunt_dir()}/../network" mock_outputs = { - load_balancer_sg = "sg-00000000000000001" - api_vpc_link_sg = "sg-00000000000000002" - vpc_endpoint_sg = "sg-00000000000000003" - } - - mock_outputs_allowed_terraform_commands = ["validate", "plan", "destroy", "init", "show"] -} - -dependency "cognito" { - config_path = "${get_original_terragrunt_dir()}/../cognito" - - mock_outputs = { - user_pool_client_id = "mock-user-pool-client-id" - issuer_url = "https://cognito-idp.eu-west-2.amazonaws.com/eu-west-2_mock" + default_target_group_arn = "arn:aws:elasticloadbalancing:eu-west-2:111111111111:targetgroup/mock-default/1234567890abcdef" + load_balancer_arn = "arn:aws:elasticloadbalancing:eu-west-2:111111111111:loadbalancer/app/mock-internal/1234567890abcdef" + default_http_listener_arn = "arn:aws:elasticloadbalancing:eu-west-2:111111111111:listener/app/mock-internal/1234567890abcdef/abcdef1234567890" + load_balancer_arn_suffix = "app/mock-internal/1234567890abcdef" + target_group_arn_suffix = "targetgroup/mock-default/1234567890abcdef" + internal_invoke_url = "http://mock-internal-123456.eu-west-2.elb.amazonaws.com" + api_id = "mockapi123" + api_invoke_url = "https://mockapi123.execute-api.eu-west-2.amazonaws.com" + api_execution_arn = "arn:aws:execute-api:eu-west-2:111111111111:mockapi123" + api_stage_name = "$default" + vpc_link_id = "vpclink-mock123" + http_api_authorizer_id = "auth-mock123" } mock_outputs_allowed_terraform_commands = ["validate", "plan", "destroy", "init", "show"] } inputs = { - load_balancer_sg = dependency.security.outputs.load_balancer_sg - api_vpc_link_sg = dependency.security.outputs.api_vpc_link_sg - vpc_endpoint_sg = dependency.security.outputs.vpc_endpoint_sg - auth_user_pool_client_id = dependency.cognito.outputs.user_pool_client_id - auth_issuer_url = dependency.cognito.outputs.issuer_url + network_default_target_group_arn = dependency.network.outputs.default_target_group_arn + network_load_balancer_arn = dependency.network.outputs.load_balancer_arn + network_default_http_listener_arn = dependency.network.outputs.default_http_listener_arn + network_load_balancer_arn_suffix = dependency.network.outputs.load_balancer_arn_suffix + network_target_group_arn_suffix = dependency.network.outputs.target_group_arn_suffix + network_internal_invoke_url = dependency.network.outputs.internal_invoke_url + network_api_id = dependency.network.outputs.api_id + network_api_invoke_url = dependency.network.outputs.api_invoke_url + network_api_execution_arn = dependency.network.outputs.api_execution_arn + network_api_stage_name = dependency.network.outputs.api_stage_name + network_vpc_link_id = dependency.network.outputs.vpc_link_id + network_http_api_authorizer_id = dependency.network.outputs.http_api_authorizer_id } diff --git a/infra/live/dependencies/network_runtime.hcl b/infra/live/dependencies/network_runtime.hcl deleted file mode 100644 index de2a1de5..00000000 --- a/infra/live/dependencies/network_runtime.hcl +++ /dev/null @@ -1,35 +0,0 @@ -dependency "network" { - config_path = "${get_original_terragrunt_dir()}/../network" - - mock_outputs = { - default_target_group_arn = "arn:aws:elasticloadbalancing:eu-west-2:111111111111:targetgroup/mock-default/1234567890abcdef" - load_balancer_arn = "arn:aws:elasticloadbalancing:eu-west-2:111111111111:loadbalancer/app/mock-internal/1234567890abcdef" - default_http_listener_arn = "arn:aws:elasticloadbalancing:eu-west-2:111111111111:listener/app/mock-internal/1234567890abcdef/abcdef1234567890" - load_balancer_arn_suffix = "app/mock-internal/1234567890abcdef" - target_group_arn_suffix = "targetgroup/mock-default/1234567890abcdef" - internal_invoke_url = "http://mock-internal-123456.eu-west-2.elb.amazonaws.com" - api_id = "mockapi123" - api_invoke_url = "https://mockapi123.execute-api.eu-west-2.amazonaws.com" - api_execution_arn = "arn:aws:execute-api:eu-west-2:111111111111:mockapi123" - api_stage_name = "$default" - vpc_link_id = "vpclink-mock123" - http_api_authorizer_id = "auth-mock123" - } - - mock_outputs_allowed_terraform_commands = ["validate", "plan", "destroy", "init", "show"] -} - -inputs = { - network_default_target_group_arn = dependency.network.outputs.default_target_group_arn - network_load_balancer_arn = dependency.network.outputs.load_balancer_arn - network_default_http_listener_arn = dependency.network.outputs.default_http_listener_arn - network_load_balancer_arn_suffix = dependency.network.outputs.load_balancer_arn_suffix - network_target_group_arn_suffix = dependency.network.outputs.target_group_arn_suffix - network_internal_invoke_url = dependency.network.outputs.internal_invoke_url - network_api_id = dependency.network.outputs.api_id - network_api_invoke_url = dependency.network.outputs.api_invoke_url - network_api_execution_arn = dependency.network.outputs.api_execution_arn - network_api_stage_name = dependency.network.outputs.api_stage_name - network_vpc_link_id = dependency.network.outputs.vpc_link_id - network_http_api_authorizer_id = dependency.network.outputs.http_api_authorizer_id -} diff --git a/infra/live/dependencies/security.hcl b/infra/live/dependencies/security.hcl new file mode 100644 index 00000000..07ac0a9e --- /dev/null +++ b/infra/live/dependencies/security.hcl @@ -0,0 +1,14 @@ +dependency "security" { + config_path = "${get_original_terragrunt_dir()}/../security" + + mock_outputs = { + load_balancer_sg = "sg-00000000000000001" + api_vpc_link_sg = "sg-00000000000000002" + vpc_endpoint_sg = "sg-00000000000000003" + ecs_sg = "sg-00000000000000004" + runtime_sg = "sg-00000000000000005" + postgres_sg = "sg-00000000000000006" + } + + mock_outputs_allowed_terraform_commands = ["validate", "plan", "destroy", "init", "show"] +} diff --git a/infra/live/dependencies/task_api.hcl b/infra/live/dependencies/task_api.hcl deleted file mode 100644 index 18dccde7..00000000 --- a/infra/live/dependencies/task_api.hcl +++ /dev/null @@ -1,13 +0,0 @@ -dependency "task_api" { - config_path = "${get_original_terragrunt_dir()}/../task_api" - - mock_outputs = { - task_definition_arn = "arn:aws:ecs:eu-west-2:111111111111:task-definition/mock-task-api:1" - } - - mock_outputs_allowed_terraform_commands = ["validate", "plan", "destroy", "init", "show"] -} - -inputs = { - task_definition_arn = dependency.task_api.outputs.task_definition_arn -} diff --git a/infra/live/dependencies/task_worker.hcl b/infra/live/dependencies/task_worker.hcl deleted file mode 100644 index 95521892..00000000 --- a/infra/live/dependencies/task_worker.hcl +++ /dev/null @@ -1,13 +0,0 @@ -dependency "task_worker" { - config_path = "${get_original_terragrunt_dir()}/../task_worker" - - mock_outputs = { - task_definition_arn = "arn:aws:ecs:eu-west-2:111111111111:task-definition/mock-task-worker:1" - } - - mock_outputs_allowed_terraform_commands = ["validate", "plan", "destroy", "init", "show"] -} - -inputs = { - task_definition_arn = dependency.task_worker.outputs.task_definition_arn -} diff --git a/infra/live/dev/aws/database/terragrunt.hcl b/infra/live/dev/aws/database/terragrunt.hcl index 6f0238aa..c6ee9ddc 100644 --- a/infra/live/dev/aws/database/terragrunt.hcl +++ b/infra/live/dev/aws/database/terragrunt.hcl @@ -2,8 +2,8 @@ include "root" { path = find_in_parent_folders("root.hcl") } -locals { - database_security = read_terragrunt_config(find_in_parent_folders("dependencies/database_security.hcl")) +include "security" { + path = find_in_parent_folders("dependencies/security.hcl") } terraform { @@ -11,7 +11,9 @@ terraform { } inputs = merge( - local.database_security.inputs, + { + database_security_group_id = dependency.security.outputs.postgres_sg + }, { database_name = "app" backup_retention_period = 1 diff --git a/infra/live/dev/aws/frontend/terragrunt.hcl b/infra/live/dev/aws/frontend/terragrunt.hcl index fd2d5c39..562b6146 100644 --- a/infra/live/dev/aws/frontend/terragrunt.hcl +++ b/infra/live/dev/aws/frontend/terragrunt.hcl @@ -3,7 +3,7 @@ include "root" { } locals { - frontend_runtime = read_terragrunt_config(find_in_parent_folders("dependencies/frontend_runtime.hcl")) + frontend_runtime = read_terragrunt_config(find_in_parent_folders("dependencies/frontend.hcl")) } terraform { diff --git a/infra/live/dev/aws/lambda_api/terragrunt.hcl b/infra/live/dev/aws/lambda_api/terragrunt.hcl index 9c0a5612..3977103d 100644 --- a/infra/live/dev/aws/lambda_api/terragrunt.hcl +++ b/infra/live/dev/aws/lambda_api/terragrunt.hcl @@ -3,7 +3,7 @@ include "root" { } locals { - network_runtime = read_terragrunt_config(find_in_parent_folders("dependencies/network_runtime.hcl")) + network_runtime = read_terragrunt_config(find_in_parent_folders("dependencies/network.hcl")) worker_messaging = read_terragrunt_config(find_in_parent_folders("dependencies/worker_messaging.hcl")) } diff --git a/infra/live/dev/aws/migrations/terragrunt.hcl b/infra/live/dev/aws/migrations/terragrunt.hcl index cf527510..5cc2b041 100644 --- a/infra/live/dev/aws/migrations/terragrunt.hcl +++ b/infra/live/dev/aws/migrations/terragrunt.hcl @@ -2,8 +2,11 @@ include "root" { path = find_in_parent_folders("root.hcl") } +include "security" { + path = find_in_parent_folders("dependencies/security.hcl") +} + locals { - runtime_security = read_terragrunt_config(find_in_parent_folders("dependencies/lambda_runtime_security.hcl")) database = read_terragrunt_config(find_in_parent_folders("dependencies/database.hcl")) } @@ -11,4 +14,9 @@ terraform { source = "../../../../modules//aws//migrations" } -inputs = merge(local.runtime_security.inputs, local.database.inputs) +inputs = merge( + { + runtime_security_group_id = dependency.security.outputs.runtime_sg + }, + local.database.inputs, +) diff --git a/infra/live/dev/aws/network/terragrunt.hcl b/infra/live/dev/aws/network/terragrunt.hcl index 860571f1..ce31ee15 100644 --- a/infra/live/dev/aws/network/terragrunt.hcl +++ b/infra/live/dev/aws/network/terragrunt.hcl @@ -2,12 +2,29 @@ include "root" { path = find_in_parent_folders("root.hcl") } -locals { - network_dependencies = read_terragrunt_config(find_in_parent_folders("dependencies/network.hcl")) +include "security" { + path = find_in_parent_folders("dependencies/security.hcl") } terraform { source = "../../../../modules//aws//network" } -inputs = local.network_dependencies.inputs +dependency "cognito" { + config_path = "${get_original_terragrunt_dir()}/../cognito" + + mock_outputs = { + user_pool_client_id = "mock-user-pool-client-id" + issuer_url = "https://cognito-idp.eu-west-2.amazonaws.com/eu-west-2_mock" + } + + mock_outputs_allowed_terraform_commands = ["validate", "plan", "destroy", "init", "show"] +} + +inputs = { + load_balancer_sg = dependency.security.outputs.load_balancer_sg + api_vpc_link_sg = dependency.security.outputs.api_vpc_link_sg + vpc_endpoint_sg = dependency.security.outputs.vpc_endpoint_sg + auth_user_pool_client_id = dependency.cognito.outputs.user_pool_client_id + auth_issuer_url = dependency.cognito.outputs.issuer_url +} diff --git a/infra/live/dev/aws/service_api/terragrunt.hcl b/infra/live/dev/aws/service_api/terragrunt.hcl index b57bb8f9..25c92a99 100644 --- a/infra/live/dev/aws/service_api/terragrunt.hcl +++ b/infra/live/dev/aws/service_api/terragrunt.hcl @@ -2,10 +2,13 @@ include "root" { path = find_in_parent_folders("root.hcl") } +include "security" { + path = find_in_parent_folders("dependencies/security.hcl") +} + locals { - runtime_security = read_terragrunt_config(find_in_parent_folders("dependencies/ecs_runtime_security.hcl")) cluster = read_terragrunt_config(find_in_parent_folders("dependencies/cluster.hcl")) - network_runtime = read_terragrunt_config(find_in_parent_folders("dependencies/network_runtime.hcl")) + network_runtime = read_terragrunt_config(find_in_parent_folders("dependencies/network.hcl")) } terraform { @@ -13,7 +16,9 @@ terraform { } inputs = merge( - local.runtime_security.inputs, + { + ecs_security_group_id = dependency.security.outputs.ecs_sg + }, local.cluster.inputs, local.network_runtime.inputs, ) diff --git a/infra/live/dev/aws/service_worker/terragrunt.hcl b/infra/live/dev/aws/service_worker/terragrunt.hcl index f9a0e67e..2b39c22d 100644 --- a/infra/live/dev/aws/service_worker/terragrunt.hcl +++ b/infra/live/dev/aws/service_worker/terragrunt.hcl @@ -2,11 +2,14 @@ include "root" { path = find_in_parent_folders("root.hcl") } +include "security" { + path = find_in_parent_folders("dependencies/security.hcl") +} + locals { - runtime_security = read_terragrunt_config(find_in_parent_folders("dependencies/ecs_runtime_security.hcl")) worker_messaging = read_terragrunt_config(find_in_parent_folders("dependencies/worker_messaging.hcl")) cluster = read_terragrunt_config(find_in_parent_folders("dependencies/cluster.hcl")) - network_runtime = read_terragrunt_config(find_in_parent_folders("dependencies/network_runtime.hcl")) + network_runtime = read_terragrunt_config(find_in_parent_folders("dependencies/network.hcl")) } terraform { @@ -14,8 +17,9 @@ terraform { } inputs = merge( - local.runtime_security.inputs, - local.task_worker.inputs, + { + ecs_security_group_id = dependency.security.outputs.ecs_sg + }, local.worker_messaging.inputs, local.cluster.inputs, local.network_runtime.inputs, diff --git a/infra/live/prod/aws/database/terragrunt.hcl b/infra/live/prod/aws/database/terragrunt.hcl index af9ebc4a..5901ba70 100644 --- a/infra/live/prod/aws/database/terragrunt.hcl +++ b/infra/live/prod/aws/database/terragrunt.hcl @@ -2,8 +2,8 @@ include "root" { path = find_in_parent_folders("root.hcl") } -locals { - database_security = read_terragrunt_config(find_in_parent_folders("dependencies/database_security.hcl")) +include "security" { + path = find_in_parent_folders("dependencies/security.hcl") } terraform { @@ -11,7 +11,9 @@ terraform { } inputs = merge( - local.database_security.inputs, + { + database_security_group_id = dependency.security.outputs.postgres_sg + }, { database_name = "app" backup_retention_period = 7 diff --git a/infra/live/prod/aws/frontend/terragrunt.hcl b/infra/live/prod/aws/frontend/terragrunt.hcl index fd2d5c39..562b6146 100644 --- a/infra/live/prod/aws/frontend/terragrunt.hcl +++ b/infra/live/prod/aws/frontend/terragrunt.hcl @@ -3,7 +3,7 @@ include "root" { } locals { - frontend_runtime = read_terragrunt_config(find_in_parent_folders("dependencies/frontend_runtime.hcl")) + frontend_runtime = read_terragrunt_config(find_in_parent_folders("dependencies/frontend.hcl")) } terraform { diff --git a/infra/live/prod/aws/lambda_api/terragrunt.hcl b/infra/live/prod/aws/lambda_api/terragrunt.hcl index 9d0d7665..ac499c4f 100644 --- a/infra/live/prod/aws/lambda_api/terragrunt.hcl +++ b/infra/live/prod/aws/lambda_api/terragrunt.hcl @@ -3,7 +3,7 @@ include "root" { } locals { - network_runtime = read_terragrunt_config(find_in_parent_folders("dependencies/network_runtime.hcl")) + network_runtime = read_terragrunt_config(find_in_parent_folders("dependencies/network.hcl")) worker_messaging = read_terragrunt_config(find_in_parent_folders("dependencies/worker_messaging.hcl")) } diff --git a/infra/live/prod/aws/migrations/terragrunt.hcl b/infra/live/prod/aws/migrations/terragrunt.hcl index cf527510..5cc2b041 100644 --- a/infra/live/prod/aws/migrations/terragrunt.hcl +++ b/infra/live/prod/aws/migrations/terragrunt.hcl @@ -2,8 +2,11 @@ include "root" { path = find_in_parent_folders("root.hcl") } +include "security" { + path = find_in_parent_folders("dependencies/security.hcl") +} + locals { - runtime_security = read_terragrunt_config(find_in_parent_folders("dependencies/lambda_runtime_security.hcl")) database = read_terragrunt_config(find_in_parent_folders("dependencies/database.hcl")) } @@ -11,4 +14,9 @@ terraform { source = "../../../../modules//aws//migrations" } -inputs = merge(local.runtime_security.inputs, local.database.inputs) +inputs = merge( + { + runtime_security_group_id = dependency.security.outputs.runtime_sg + }, + local.database.inputs, +) diff --git a/infra/live/prod/aws/network/terragrunt.hcl b/infra/live/prod/aws/network/terragrunt.hcl index 860571f1..ce31ee15 100644 --- a/infra/live/prod/aws/network/terragrunt.hcl +++ b/infra/live/prod/aws/network/terragrunt.hcl @@ -2,12 +2,29 @@ include "root" { path = find_in_parent_folders("root.hcl") } -locals { - network_dependencies = read_terragrunt_config(find_in_parent_folders("dependencies/network.hcl")) +include "security" { + path = find_in_parent_folders("dependencies/security.hcl") } terraform { source = "../../../../modules//aws//network" } -inputs = local.network_dependencies.inputs +dependency "cognito" { + config_path = "${get_original_terragrunt_dir()}/../cognito" + + mock_outputs = { + user_pool_client_id = "mock-user-pool-client-id" + issuer_url = "https://cognito-idp.eu-west-2.amazonaws.com/eu-west-2_mock" + } + + mock_outputs_allowed_terraform_commands = ["validate", "plan", "destroy", "init", "show"] +} + +inputs = { + load_balancer_sg = dependency.security.outputs.load_balancer_sg + api_vpc_link_sg = dependency.security.outputs.api_vpc_link_sg + vpc_endpoint_sg = dependency.security.outputs.vpc_endpoint_sg + auth_user_pool_client_id = dependency.cognito.outputs.user_pool_client_id + auth_issuer_url = dependency.cognito.outputs.issuer_url +} diff --git a/infra/live/prod/aws/service_api/terragrunt.hcl b/infra/live/prod/aws/service_api/terragrunt.hcl index b57bb8f9..25c92a99 100644 --- a/infra/live/prod/aws/service_api/terragrunt.hcl +++ b/infra/live/prod/aws/service_api/terragrunt.hcl @@ -2,10 +2,13 @@ include "root" { path = find_in_parent_folders("root.hcl") } +include "security" { + path = find_in_parent_folders("dependencies/security.hcl") +} + locals { - runtime_security = read_terragrunt_config(find_in_parent_folders("dependencies/ecs_runtime_security.hcl")) cluster = read_terragrunt_config(find_in_parent_folders("dependencies/cluster.hcl")) - network_runtime = read_terragrunt_config(find_in_parent_folders("dependencies/network_runtime.hcl")) + network_runtime = read_terragrunt_config(find_in_parent_folders("dependencies/network.hcl")) } terraform { @@ -13,7 +16,9 @@ terraform { } inputs = merge( - local.runtime_security.inputs, + { + ecs_security_group_id = dependency.security.outputs.ecs_sg + }, local.cluster.inputs, local.network_runtime.inputs, ) diff --git a/infra/live/prod/aws/service_worker/terragrunt.hcl b/infra/live/prod/aws/service_worker/terragrunt.hcl index f9a0e67e..2b39c22d 100644 --- a/infra/live/prod/aws/service_worker/terragrunt.hcl +++ b/infra/live/prod/aws/service_worker/terragrunt.hcl @@ -2,11 +2,14 @@ include "root" { path = find_in_parent_folders("root.hcl") } +include "security" { + path = find_in_parent_folders("dependencies/security.hcl") +} + locals { - runtime_security = read_terragrunt_config(find_in_parent_folders("dependencies/ecs_runtime_security.hcl")) worker_messaging = read_terragrunt_config(find_in_parent_folders("dependencies/worker_messaging.hcl")) cluster = read_terragrunt_config(find_in_parent_folders("dependencies/cluster.hcl")) - network_runtime = read_terragrunt_config(find_in_parent_folders("dependencies/network_runtime.hcl")) + network_runtime = read_terragrunt_config(find_in_parent_folders("dependencies/network.hcl")) } terraform { @@ -14,8 +17,9 @@ terraform { } inputs = merge( - local.runtime_security.inputs, - local.task_worker.inputs, + { + ecs_security_group_id = dependency.security.outputs.ecs_sg + }, local.worker_messaging.inputs, local.cluster.inputs, local.network_runtime.inputs, From e29828bcbf48daef5c066b8aef05a3695a7f9340 Mon Sep 17 00:00:00 2001 From: chrispsheehan Date: Mon, 18 May 2026 10:19:03 +0100 Subject: [PATCH 26/34] chore: check for output/var check --- REPO_INSTRUCTIONS.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/REPO_INSTRUCTIONS.md b/REPO_INSTRUCTIONS.md index 6aacb201..aa002a83 100644 --- a/REPO_INSTRUCTIONS.md +++ b/REPO_INSTRUCTIONS.md @@ -98,6 +98,8 @@ These instructions apply to the entire repository. - verify runtime type (Lambda/ECS), deploy mode, and (for ECS) connection type and load-balancer shape - verify required infra resources exist (CodeDeploy app/deployment group, listeners/target groups, alarms, VPC link if applicable) - when changing reusable workflow contracts, compare every caller `with:` block to the callee `workflow_call.inputs` +- when adding or renaming Terraform module `output` values that are intended for Terragrunt `dependency..outputs` passthrough, verify every downstream consumer wrapper declares a `variable` with the exact same name +- if that same-name output-to-variable contract does not hold yet, do not leave it implicit: either add the matching variables, or call out the mismatch explicitly before closing the task - check apply/deploy/destroy, and avoid unnecessary `terraform_remote_state` coupling (especially for fast-changing outputs) - for bootstrap-sensitive or plan-sensitive cross-stack contracts, prefer Terragrunt `dependency` inputs in the live stack and `mock_outputs` for non-mutating commands rather than reading upstream state directly inside Terraform modules - if CI plan failures are caused by missing upstream state, fix the contract shape first instead of papering over the issue with more direct `terraform_remote_state` reads From 9c4a69c86761373e3522decf3518a333e4c846d2 Mon Sep 17 00:00:00 2001 From: chrispsheehan Date: Mon, 18 May 2026 10:27:29 +0100 Subject: [PATCH 27/34] chore: prefix database_ outs/vars --- infra/live/dependencies/database.hcl | 16 ++++++++-------- infra/modules/aws/database/README.md | 14 +++++++------- infra/modules/aws/database/outputs.tf | 14 +++++++------- 3 files changed, 22 insertions(+), 22 deletions(-) diff --git a/infra/live/dependencies/database.hcl b/infra/live/dependencies/database.hcl index 6ca280c3..02dd17c6 100644 --- a/infra/live/dependencies/database.hcl +++ b/infra/live/dependencies/database.hcl @@ -2,20 +2,20 @@ dependency "database" { config_path = "${get_original_terragrunt_dir()}/../database" mock_outputs = { - credentials_secret_arn = "arn:aws:secretsmanager:eu-west-2:111111111111:secret:mock-database-credentials" - readwrite_endpoint = "mock-database.cluster-abcdefghijkl.eu-west-2.rds.amazonaws.com" - database_name = "app" - database_port = 5432 - cluster_identifier = "mock-database-cluster" + database_credentials_secret_arn = "arn:aws:secretsmanager:eu-west-2:111111111111:secret:mock-database-credentials" + database_readwrite_endpoint = "mock-database.cluster-abcdefghijkl.eu-west-2.rds.amazonaws.com" + database_name = "app" + database_port = 5432 + database_cluster_identifier = "mock-database-cluster" } mock_outputs_allowed_terraform_commands = ["validate", "plan", "destroy", "init", "show"] } inputs = { - database_credentials_secret_arn = dependency.database.outputs.credentials_secret_arn - database_readwrite_endpoint = dependency.database.outputs.readwrite_endpoint + database_credentials_secret_arn = dependency.database.outputs.database_credentials_secret_arn + database_readwrite_endpoint = dependency.database.outputs.database_readwrite_endpoint database_name = dependency.database.outputs.database_name database_port = dependency.database.outputs.database_port - database_cluster_identifier = dependency.database.outputs.cluster_identifier + database_cluster_identifier = dependency.database.outputs.database_cluster_identifier } diff --git a/infra/modules/aws/database/README.md b/infra/modules/aws/database/README.md index 3f4cf9c0..1d968481 100644 --- a/infra/modules/aws/database/README.md +++ b/infra/modules/aws/database/README.md @@ -27,15 +27,15 @@ Concrete Aurora PostgreSQL wrapper. ## Key outputs -- `cluster_identifier` -- `security_group_id` -- `credentials_secret_arn` -- `readonly_endpoint_ssm_name` -- `readwrite_endpoint_ssm_name` +- `database_cluster_identifier` +- `database_security_group_id` +- `database_credentials_secret_arn` +- `database_readonly_endpoint_ssm_name` +- `database_readwrite_endpoint_ssm_name` - `database_name` - `database_port` -- `readonly_endpoint` -- `readwrite_endpoint` +- `database_readonly_endpoint` +- `database_readwrite_endpoint` This module keeps repo-specific network lookup logic out of `_shared/database`. It selects public or private subnets by `tag:Name` based on `publicly_accessible` and passes the resulting subnet ids into the shared Aurora module. The database credentials outputs point at the Aurora-managed master secret rather than a repo-created fixed-name secret. diff --git a/infra/modules/aws/database/outputs.tf b/infra/modules/aws/database/outputs.tf index 93afbf47..df108b78 100644 --- a/infra/modules/aws/database/outputs.tf +++ b/infra/modules/aws/database/outputs.tf @@ -1,20 +1,20 @@ -output "credentials_secret_arn" { +output "database_credentials_secret_arn" { value = module.database.credentials_secret_arn } -output "readonly_endpoint_ssm_name" { +output "database_readonly_endpoint_ssm_name" { value = module.database.readonly_endpoint_ssm_name } -output "readwrite_endpoint_ssm_name" { +output "database_readwrite_endpoint_ssm_name" { value = module.database.readwrite_endpoint_ssm_name } -output "cluster_identifier" { +output "database_cluster_identifier" { value = module.database.cluster_identifier } -output "security_group_id" { +output "database_security_group_id" { value = module.database.security_group_id } @@ -26,10 +26,10 @@ output "database_port" { value = module.database.database_port } -output "readonly_endpoint" { +output "database_readonly_endpoint" { value = module.database.readonly_endpoint } -output "readwrite_endpoint" { +output "database_readwrite_endpoint" { value = module.database.readwrite_endpoint } From 3e3f7534bbe3a0f56f71056e426c1dfcb72ce713 Mon Sep 17 00:00:00 2001 From: chrispsheehan Date: Mon, 18 May 2026 10:35:12 +0100 Subject: [PATCH 28/34] chore: rename auth_ --- infra/live/dependencies/frontend.hcl | 17 ++++++++--------- infra/live/dev/aws/network/terragrunt.hcl | 8 ++++---- infra/live/prod/aws/network/terragrunt.hcl | 8 ++++---- infra/modules/aws/cognito/README.md | 14 +++++++------- infra/modules/aws/cognito/outputs.tf | 14 +++++++------- 5 files changed, 30 insertions(+), 31 deletions(-) diff --git a/infra/live/dependencies/frontend.hcl b/infra/live/dependencies/frontend.hcl index dd680a90..01dba858 100644 --- a/infra/live/dependencies/frontend.hcl +++ b/infra/live/dependencies/frontend.hcl @@ -12,10 +12,10 @@ dependency "cognito" { config_path = "${get_original_terragrunt_dir()}/../cognito" mock_outputs = { - user_pool_id = "eu-west-2_mock" - user_pool_client_id = "mock-user-pool-client-id" - hosted_ui_url = "https://mock-domain.auth.eu-west-2.amazoncognito.com" - readonly_group_name = "readonly" + auth_user_pool_id = "eu-west-2_mock" + auth_user_pool_client_id = "mock-user-pool-client-id" + auth_hosted_ui_url = "https://mock-domain.auth.eu-west-2.amazoncognito.com" + auth_readonly_group_name = "readonly" } mock_outputs_allowed_terraform_commands = ["validate", "plan", "destroy", "init", "show"] @@ -23,9 +23,8 @@ dependency "cognito" { inputs = { api_invoke_url = dependency.network.outputs.api_invoke_url - auth_user_pool_id = dependency.cognito.outputs.user_pool_id - auth_user_pool_client_id = dependency.cognito.outputs.user_pool_client_id - auth_hosted_ui_url = dependency.cognito.outputs.hosted_ui_url - auth_readonly_group_name = dependency.cognito.outputs.readonly_group_name + auth_user_pool_id = dependency.cognito.outputs.auth_user_pool_id + auth_user_pool_client_id = dependency.cognito.outputs.auth_user_pool_client_id + auth_hosted_ui_url = dependency.cognito.outputs.auth_hosted_ui_url + auth_readonly_group_name = dependency.cognito.outputs.auth_readonly_group_name } - diff --git a/infra/live/dev/aws/network/terragrunt.hcl b/infra/live/dev/aws/network/terragrunt.hcl index ce31ee15..93443bf3 100644 --- a/infra/live/dev/aws/network/terragrunt.hcl +++ b/infra/live/dev/aws/network/terragrunt.hcl @@ -14,8 +14,8 @@ dependency "cognito" { config_path = "${get_original_terragrunt_dir()}/../cognito" mock_outputs = { - user_pool_client_id = "mock-user-pool-client-id" - issuer_url = "https://cognito-idp.eu-west-2.amazonaws.com/eu-west-2_mock" + auth_user_pool_client_id = "mock-user-pool-client-id" + auth_issuer_url = "https://cognito-idp.eu-west-2.amazonaws.com/eu-west-2_mock" } mock_outputs_allowed_terraform_commands = ["validate", "plan", "destroy", "init", "show"] @@ -25,6 +25,6 @@ inputs = { load_balancer_sg = dependency.security.outputs.load_balancer_sg api_vpc_link_sg = dependency.security.outputs.api_vpc_link_sg vpc_endpoint_sg = dependency.security.outputs.vpc_endpoint_sg - auth_user_pool_client_id = dependency.cognito.outputs.user_pool_client_id - auth_issuer_url = dependency.cognito.outputs.issuer_url + auth_user_pool_client_id = dependency.cognito.outputs.auth_user_pool_client_id + auth_issuer_url = dependency.cognito.outputs.auth_issuer_url } diff --git a/infra/live/prod/aws/network/terragrunt.hcl b/infra/live/prod/aws/network/terragrunt.hcl index ce31ee15..93443bf3 100644 --- a/infra/live/prod/aws/network/terragrunt.hcl +++ b/infra/live/prod/aws/network/terragrunt.hcl @@ -14,8 +14,8 @@ dependency "cognito" { config_path = "${get_original_terragrunt_dir()}/../cognito" mock_outputs = { - user_pool_client_id = "mock-user-pool-client-id" - issuer_url = "https://cognito-idp.eu-west-2.amazonaws.com/eu-west-2_mock" + auth_user_pool_client_id = "mock-user-pool-client-id" + auth_issuer_url = "https://cognito-idp.eu-west-2.amazonaws.com/eu-west-2_mock" } mock_outputs_allowed_terraform_commands = ["validate", "plan", "destroy", "init", "show"] @@ -25,6 +25,6 @@ inputs = { load_balancer_sg = dependency.security.outputs.load_balancer_sg api_vpc_link_sg = dependency.security.outputs.api_vpc_link_sg vpc_endpoint_sg = dependency.security.outputs.vpc_endpoint_sg - auth_user_pool_client_id = dependency.cognito.outputs.user_pool_client_id - auth_issuer_url = dependency.cognito.outputs.issuer_url + auth_user_pool_client_id = dependency.cognito.outputs.auth_user_pool_client_id + auth_issuer_url = dependency.cognito.outputs.auth_issuer_url } diff --git a/infra/modules/aws/cognito/README.md b/infra/modules/aws/cognito/README.md index 8447be65..1f635eb9 100644 --- a/infra/modules/aws/cognito/README.md +++ b/infra/modules/aws/cognito/README.md @@ -19,13 +19,13 @@ Concrete Cognito user-auth module for the frontend and HTTP API. ## Key outputs -- `user_pool_id` -- `user_pool_arn` -- `user_pool_client_id` -- `issuer_url` -- `hosted_ui_url` -- `hosted_ui_domain` -- `readonly_group_name` +- `auth_user_pool_id` +- `auth_user_pool_arn` +- `auth_user_pool_client_id` +- `auth_issuer_url` +- `auth_hosted_ui_url` +- `auth_hosted_ui_domain` +- `auth_readonly_group_name` This module intentionally creates infrastructure, not individual users. In this repo, user seeding is expected to happen operationally with AWS CLI or `just` recipes so access can be granted explicitly to a small allowlist such as the initial `readonly` user. The module derives the deployed frontend URL as `https://..` and adds it to the Hosted UI callback and logout URLs alongside any local development URLs. diff --git a/infra/modules/aws/cognito/outputs.tf b/infra/modules/aws/cognito/outputs.tf index 8ddbc2ef..d7d31e2f 100644 --- a/infra/modules/aws/cognito/outputs.tf +++ b/infra/modules/aws/cognito/outputs.tf @@ -1,27 +1,27 @@ -output "user_pool_id" { +output "auth_user_pool_id" { value = aws_cognito_user_pool.this.id } -output "user_pool_arn" { +output "auth_user_pool_arn" { value = aws_cognito_user_pool.this.arn } -output "user_pool_client_id" { +output "auth_user_pool_client_id" { value = aws_cognito_user_pool_client.frontend.id } -output "issuer_url" { +output "auth_issuer_url" { value = local.issuer_url } -output "hosted_ui_url" { +output "auth_hosted_ui_url" { value = local.hosted_ui_url } -output "hosted_ui_domain" { +output "auth_hosted_ui_domain" { value = aws_cognito_user_pool_domain.this.domain } -output "readonly_group_name" { +output "auth_readonly_group_name" { value = aws_cognito_user_group.readonly.name } From 25c3d580d08c7b5862141b8f4e3c4556c77abe3e Mon Sep 17 00:00:00 2001 From: chrispsheehan Date: Mon, 18 May 2026 10:47:20 +0100 Subject: [PATCH 29/34] chore: clearer dep naming --- infra/live/dev/aws/frontend/terragrunt.hcl | 4 ++-- infra/live/dev/aws/lambda_api/terragrunt.hcl | 4 ++-- infra/live/dev/aws/service_api/terragrunt.hcl | 6 +++--- infra/live/dev/aws/service_worker/terragrunt.hcl | 4 ++-- infra/live/prod/aws/frontend/terragrunt.hcl | 4 ++-- infra/live/prod/aws/lambda_api/terragrunt.hcl | 4 ++-- infra/live/prod/aws/service_api/terragrunt.hcl | 6 +++--- infra/live/prod/aws/service_worker/terragrunt.hcl | 4 ++-- 8 files changed, 18 insertions(+), 18 deletions(-) diff --git a/infra/live/dev/aws/frontend/terragrunt.hcl b/infra/live/dev/aws/frontend/terragrunt.hcl index 562b6146..fa22a838 100644 --- a/infra/live/dev/aws/frontend/terragrunt.hcl +++ b/infra/live/dev/aws/frontend/terragrunt.hcl @@ -3,11 +3,11 @@ include "root" { } locals { - frontend_runtime = read_terragrunt_config(find_in_parent_folders("dependencies/frontend.hcl")) + frontend = read_terragrunt_config(find_in_parent_folders("dependencies/frontend.hcl")) } terraform { source = "../../../../modules//aws//frontend" } -inputs = local.frontend_runtime.inputs +inputs = local.frontend.inputs diff --git a/infra/live/dev/aws/lambda_api/terragrunt.hcl b/infra/live/dev/aws/lambda_api/terragrunt.hcl index 3977103d..524bbb8a 100644 --- a/infra/live/dev/aws/lambda_api/terragrunt.hcl +++ b/infra/live/dev/aws/lambda_api/terragrunt.hcl @@ -3,7 +3,7 @@ include "root" { } locals { - network_runtime = read_terragrunt_config(find_in_parent_folders("dependencies/network.hcl")) + network = read_terragrunt_config(find_in_parent_folders("dependencies/network.hcl")) worker_messaging = read_terragrunt_config(find_in_parent_folders("dependencies/worker_messaging.hcl")) } @@ -12,7 +12,7 @@ terraform { } inputs = merge( - local.network_runtime.inputs, + local.network.inputs, local.worker_messaging.inputs, { api_5xx_alarm_threshold = 20.0 diff --git a/infra/live/dev/aws/service_api/terragrunt.hcl b/infra/live/dev/aws/service_api/terragrunt.hcl index 25c92a99..501e7a44 100644 --- a/infra/live/dev/aws/service_api/terragrunt.hcl +++ b/infra/live/dev/aws/service_api/terragrunt.hcl @@ -7,8 +7,8 @@ include "security" { } locals { - cluster = read_terragrunt_config(find_in_parent_folders("dependencies/cluster.hcl")) - network_runtime = read_terragrunt_config(find_in_parent_folders("dependencies/network.hcl")) + cluster = read_terragrunt_config(find_in_parent_folders("dependencies/cluster.hcl")) + network = read_terragrunt_config(find_in_parent_folders("dependencies/network.hcl")) } terraform { @@ -20,5 +20,5 @@ inputs = merge( ecs_security_group_id = dependency.security.outputs.ecs_sg }, local.cluster.inputs, - local.network_runtime.inputs, + local.network.inputs, ) diff --git a/infra/live/dev/aws/service_worker/terragrunt.hcl b/infra/live/dev/aws/service_worker/terragrunt.hcl index 2b39c22d..becbcf76 100644 --- a/infra/live/dev/aws/service_worker/terragrunt.hcl +++ b/infra/live/dev/aws/service_worker/terragrunt.hcl @@ -9,7 +9,7 @@ include "security" { locals { worker_messaging = read_terragrunt_config(find_in_parent_folders("dependencies/worker_messaging.hcl")) cluster = read_terragrunt_config(find_in_parent_folders("dependencies/cluster.hcl")) - network_runtime = read_terragrunt_config(find_in_parent_folders("dependencies/network.hcl")) + network = read_terragrunt_config(find_in_parent_folders("dependencies/network.hcl")) } terraform { @@ -22,5 +22,5 @@ inputs = merge( }, local.worker_messaging.inputs, local.cluster.inputs, - local.network_runtime.inputs, + local.network.inputs, ) diff --git a/infra/live/prod/aws/frontend/terragrunt.hcl b/infra/live/prod/aws/frontend/terragrunt.hcl index 562b6146..fa22a838 100644 --- a/infra/live/prod/aws/frontend/terragrunt.hcl +++ b/infra/live/prod/aws/frontend/terragrunt.hcl @@ -3,11 +3,11 @@ include "root" { } locals { - frontend_runtime = read_terragrunt_config(find_in_parent_folders("dependencies/frontend.hcl")) + frontend = read_terragrunt_config(find_in_parent_folders("dependencies/frontend.hcl")) } terraform { source = "../../../../modules//aws//frontend" } -inputs = local.frontend_runtime.inputs +inputs = local.frontend.inputs diff --git a/infra/live/prod/aws/lambda_api/terragrunt.hcl b/infra/live/prod/aws/lambda_api/terragrunt.hcl index ac499c4f..766b1366 100644 --- a/infra/live/prod/aws/lambda_api/terragrunt.hcl +++ b/infra/live/prod/aws/lambda_api/terragrunt.hcl @@ -3,7 +3,7 @@ include "root" { } locals { - network_runtime = read_terragrunt_config(find_in_parent_folders("dependencies/network.hcl")) + network = read_terragrunt_config(find_in_parent_folders("dependencies/network.hcl")) worker_messaging = read_terragrunt_config(find_in_parent_folders("dependencies/worker_messaging.hcl")) } @@ -12,7 +12,7 @@ terraform { } inputs = merge( - local.network_runtime.inputs, + local.network.inputs, local.worker_messaging.inputs, { api_5xx_alarm_threshold = 5.0 diff --git a/infra/live/prod/aws/service_api/terragrunt.hcl b/infra/live/prod/aws/service_api/terragrunt.hcl index 25c92a99..501e7a44 100644 --- a/infra/live/prod/aws/service_api/terragrunt.hcl +++ b/infra/live/prod/aws/service_api/terragrunt.hcl @@ -7,8 +7,8 @@ include "security" { } locals { - cluster = read_terragrunt_config(find_in_parent_folders("dependencies/cluster.hcl")) - network_runtime = read_terragrunt_config(find_in_parent_folders("dependencies/network.hcl")) + cluster = read_terragrunt_config(find_in_parent_folders("dependencies/cluster.hcl")) + network = read_terragrunt_config(find_in_parent_folders("dependencies/network.hcl")) } terraform { @@ -20,5 +20,5 @@ inputs = merge( ecs_security_group_id = dependency.security.outputs.ecs_sg }, local.cluster.inputs, - local.network_runtime.inputs, + local.network.inputs, ) diff --git a/infra/live/prod/aws/service_worker/terragrunt.hcl b/infra/live/prod/aws/service_worker/terragrunt.hcl index 2b39c22d..becbcf76 100644 --- a/infra/live/prod/aws/service_worker/terragrunt.hcl +++ b/infra/live/prod/aws/service_worker/terragrunt.hcl @@ -9,7 +9,7 @@ include "security" { locals { worker_messaging = read_terragrunt_config(find_in_parent_folders("dependencies/worker_messaging.hcl")) cluster = read_terragrunt_config(find_in_parent_folders("dependencies/cluster.hcl")) - network_runtime = read_terragrunt_config(find_in_parent_folders("dependencies/network.hcl")) + network = read_terragrunt_config(find_in_parent_folders("dependencies/network.hcl")) } terraform { @@ -22,5 +22,5 @@ inputs = merge( }, local.worker_messaging.inputs, local.cluster.inputs, - local.network_runtime.inputs, + local.network.inputs, ) From d4565a82da955654a39933a433ecc69c8f029352 Mon Sep 17 00:00:00 2001 From: chrispsheehan Date: Mon, 18 May 2026 10:49:10 +0100 Subject: [PATCH 30/34] fix: fmt --- infra/live/dev/aws/migrations/terragrunt.hcl | 2 +- infra/live/prod/aws/migrations/terragrunt.hcl | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/infra/live/dev/aws/migrations/terragrunt.hcl b/infra/live/dev/aws/migrations/terragrunt.hcl index 5cc2b041..edc1b7f2 100644 --- a/infra/live/dev/aws/migrations/terragrunt.hcl +++ b/infra/live/dev/aws/migrations/terragrunt.hcl @@ -7,7 +7,7 @@ include "security" { } locals { - database = read_terragrunt_config(find_in_parent_folders("dependencies/database.hcl")) + database = read_terragrunt_config(find_in_parent_folders("dependencies/database.hcl")) } terraform { diff --git a/infra/live/prod/aws/migrations/terragrunt.hcl b/infra/live/prod/aws/migrations/terragrunt.hcl index 5cc2b041..edc1b7f2 100644 --- a/infra/live/prod/aws/migrations/terragrunt.hcl +++ b/infra/live/prod/aws/migrations/terragrunt.hcl @@ -7,7 +7,7 @@ include "security" { } locals { - database = read_terragrunt_config(find_in_parent_folders("dependencies/database.hcl")) + database = read_terragrunt_config(find_in_parent_folders("dependencies/database.hcl")) } terraform { From d3512c8bc740954188f4af142942dbe346e62286 Mon Sep 17 00:00:00 2001 From: chrispsheehan Date: Mon, 18 May 2026 10:54:53 +0100 Subject: [PATCH 31/34] chore: rn worker_messaging -> messaging --- .github/workflows/destroy.yml | 4 +-- .github/workflows/shared_infra.yml | 10 +++--- infra/README.md | 8 ++--- infra/live/dependencies/messaging.hcl | 22 +++++++++++++ infra/live/dependencies/worker_messaging.hcl | 33 ------------------- infra/live/dev/aws/lambda_api/terragrunt.hcl | 6 ++-- .../live/dev/aws/lambda_worker/terragrunt.hcl | 4 +-- .../terragrunt.hcl | 2 +- .../dev/aws/service_worker/terragrunt.hcl | 8 ++--- infra/live/dev/aws/task_worker/terragrunt.hcl | 6 ++-- infra/live/prod/aws/lambda_api/terragrunt.hcl | 6 ++-- .../prod/aws/lambda_worker/terragrunt.hcl | 4 +-- .../terragrunt.hcl | 2 +- .../prod/aws/service_worker/terragrunt.hcl | 8 ++--- .../live/prod/aws/task_worker/terragrunt.hcl | 6 ++-- infra/modules/aws/lambda_api/README.md | 2 +- infra/modules/aws/lambda_worker/README.md | 2 +- .../{worker_messaging => messaging}/README.md | 2 +- .../{worker_messaging => messaging}/data.tf | 0 .../{worker_messaging => messaging}/local.tf | 0 .../{worker_messaging => messaging}/main.tf | 0 .../outputs.tf | 0 .../variables.tf | 0 .../versions.tf | 0 infra/modules/aws/service_worker/README.md | 6 ++-- infra/modules/aws/task_worker/README.md | 4 +-- 26 files changed, 67 insertions(+), 78 deletions(-) create mode 100644 infra/live/dependencies/messaging.hcl delete mode 100644 infra/live/dependencies/worker_messaging.hcl rename infra/live/dev/aws/{worker_messaging => messaging}/terragrunt.hcl (58%) rename infra/live/prod/aws/{worker_messaging => messaging}/terragrunt.hcl (58%) rename infra/modules/aws/{worker_messaging => messaging}/README.md (96%) rename infra/modules/aws/{worker_messaging => messaging}/data.tf (100%) rename infra/modules/aws/{worker_messaging => messaging}/local.tf (100%) rename infra/modules/aws/{worker_messaging => messaging}/main.tf (100%) rename infra/modules/aws/{worker_messaging => messaging}/outputs.tf (100%) rename infra/modules/aws/{worker_messaging => messaging}/variables.tf (100%) rename infra/modules/aws/{worker_messaging => messaging}/versions.tf (100%) diff --git a/.github/workflows/destroy.yml b/.github/workflows/destroy.yml index 8fde8de8..1dc6d880 100644 --- a/.github/workflows/destroy.yml +++ b/.github/workflows/destroy.yml @@ -190,10 +190,10 @@ jobs: role-to-assume: ${{ env.AWS_OIDC_ROLE_ARN }} aws-region: ${{ env.AWS_REGION }} - - name: Destroy worker messaging infra + - name: Destroy messaging infra uses: ./.github/actions/terragrunt with: - tg_directory: infra/live/${{ inputs.environment }}/aws/worker_messaging + tg_directory: infra/live/${{ inputs.environment }}/aws/messaging tg_action: destroy database: diff --git a/.github/workflows/shared_infra.yml b/.github/workflows/shared_infra.yml index 43af3482..aead7d34 100644 --- a/.github/workflows/shared_infra.yml +++ b/.github/workflows/shared_infra.yml @@ -78,7 +78,7 @@ jobs: tg_directory: infra/live/${{ inputs.environment }}/aws/oidc tg_action: ${{ inputs.tg_action }} - worker_messaging: + messaging: needs: oidc runs-on: ubuntu-latest steps: @@ -91,10 +91,10 @@ jobs: role-to-assume: ${{ env.AWS_OIDC_ROLE_ARN }} aws-region: ${{ env.AWS_REGION }} - - name: ${{ env.TG_ACTION_LABEL }} worker messaging infra + - name: ${{ env.TG_ACTION_LABEL }} messaging infra uses: ./.github/actions/terragrunt with: - tg_directory: infra/live/${{ inputs.environment }}/aws/worker_messaging + tg_directory: infra/live/${{ inputs.environment }}/aws/messaging tg_action: ${{ inputs.tg_action }} observability: @@ -247,7 +247,7 @@ jobs: - security - network - database - - worker_messaging + - messaging runs-on: ubuntu-latest strategy: fail-fast: false # this is to prevent terraform lock issues @@ -276,7 +276,7 @@ jobs: - cluster - network - database - - worker_messaging + - messaging runs-on: ubuntu-latest strategy: fail-fast: false diff --git a/infra/README.md b/infra/README.md index 88ac6740..5c9264c0 100644 --- a/infra/README.md +++ b/infra/README.md @@ -80,7 +80,7 @@ stores state at: Owns the VPC-attached Lambda used to run schema migrations against the shared Aurora PostgreSQL stack. - `rds_reader_tagger` Owns the EventBridge rule and Lambda that sync cluster tags onto new Aurora reader instances created later by scale-out. -- `worker_messaging` +- `messaging` Owns the shared worker SNS topic plus the Lambda-worker and ECS-worker SQS queues used for fanout. - `task_*` Register ECS task definitions. @@ -95,10 +95,10 @@ Current examples include: Shared CloudWatch dashboard shape for recent Lambda logs, ECS app logs, and ECS OTEL logs. - `rds_reader_tagger` Event-driven Aurora reader tag-sync shape: catch the RDS instance-created event, derive the parent cluster, and copy the cluster's non-AWS tags onto the new reader. -- `worker_messaging` +- `messaging` Shared worker fanout shape: one SNS topic publishes to two independent worker queues so Lambda and ECS consumers each receive the same event. - `task_worker` / `service_worker` - Internal ECS worker service shape, with the ECS worker queue owned by `worker_messaging` and a container health check based on a local worker heartbeat file. + Internal ECS worker service shape, with the ECS worker queue owned by `messaging` and a container health check based on a local worker heartbeat file. - `task_api` / `service_api` ECS API service shape exposed on the shared API Gateway at `/ecs` using `vpc_link` and `blue_green`, backed by a dedicated listener on the shared ALB. Through the frontend distribution it is reached at `/api/ecs/*`, while the Lambda API is reached at `/api/*`. @@ -142,7 +142,7 @@ That `containers/lib` directory is helper code only and is not treated as a depl ### Design Guidelines - avoid making one runtime depend on another runtime's state ownership unnecessarily - - for example, shared worker fanout state is owned by `worker_messaging` rather than by `lambda_worker` or `task_worker` + - for example, shared worker fanout state is owned by `messaging` rather than by `lambda_worker` or `task_worker` - prefer explicit ownership boundaries between stacks diff --git a/infra/live/dependencies/messaging.hcl b/infra/live/dependencies/messaging.hcl new file mode 100644 index 00000000..12399468 --- /dev/null +++ b/infra/live/dependencies/messaging.hcl @@ -0,0 +1,22 @@ +dependency "messaging" { + config_path = "${get_original_terragrunt_dir()}/../messaging" + + mock_outputs = { + worker_topic_name = "mock-worker-events" + worker_topic_arn = "arn:aws:sns:eu-west-2:111111111111:mock-worker-events" + worker_topic_publish_policy_arn = "arn:aws:iam::111111111111:policy/mock-worker-topic-publish" + lambda_worker_queue_name = "mock-lambda-worker-queue" + lambda_worker_queue_arn = "arn:aws:sqs:eu-west-2:111111111111:mock-lambda-worker-queue" + lambda_worker_queue_url = "https://sqs.eu-west-2.amazonaws.com/111111111111/mock-lambda-worker-queue" + lambda_worker_queue_read_policy_arn = "arn:aws:iam::111111111111:policy/mock-lambda-worker-queue-read" + lambda_worker_dead_letter_queue_name = "mock-lambda-worker-dlq" + lambda_worker_dead_letter_queue_url = "https://sqs.eu-west-2.amazonaws.com/111111111111/mock-lambda-worker-dlq" + ecs_worker_queue_name = "mock-ecs-worker-queue" + ecs_worker_queue_url = "https://sqs.eu-west-2.amazonaws.com/111111111111/mock-ecs-worker-queue" + ecs_worker_queue_read_policy_arn = "arn:aws:iam::111111111111:policy/mock-ecs-worker-queue-read" + } + + mock_outputs_allowed_terraform_commands = ["validate", "plan", "destroy", "init", "show"] +} + +inputs = dependency.messaging.outputs diff --git a/infra/live/dependencies/worker_messaging.hcl b/infra/live/dependencies/worker_messaging.hcl deleted file mode 100644 index f0e8638e..00000000 --- a/infra/live/dependencies/worker_messaging.hcl +++ /dev/null @@ -1,33 +0,0 @@ -dependency "worker_messaging" { - config_path = "${get_original_terragrunt_dir()}/../worker_messaging" - - mock_outputs = { - sns_topic_name = "mock-worker-events" - sns_topic_arn = "arn:aws:sns:eu-west-2:111111111111:mock-worker-events" - lambda_worker_queue_name = "mock-lambda-worker-queue" - lambda_worker_queue_arn = "arn:aws:sqs:eu-west-2:111111111111:mock-lambda-worker-queue" - lambda_worker_queue_url = "https://sqs.eu-west-2.amazonaws.com/111111111111/mock-lambda-worker-queue" - lambda_worker_queue_read_policy_arn = "arn:aws:iam::111111111111:policy/mock-lambda-worker-queue-read" - lambda_worker_dead_letter_queue_name = "mock-lambda-worker-dlq" - lambda_worker_dead_letter_queue_url = "https://sqs.eu-west-2.amazonaws.com/111111111111/mock-lambda-worker-dlq" - ecs_worker_queue_name = "mock-ecs-worker-queue" - ecs_worker_queue_url = "https://sqs.eu-west-2.amazonaws.com/111111111111/mock-ecs-worker-queue" - ecs_worker_queue_read_policy_arn = "arn:aws:iam::111111111111:policy/mock-ecs-worker-queue-read" - } - - mock_outputs_allowed_terraform_commands = ["validate", "plan", "destroy", "init", "show"] -} - -inputs = { - worker_topic_name = dependency.worker_messaging.outputs.sns_topic_name - worker_topic_arn = dependency.worker_messaging.outputs.sns_topic_arn - lambda_worker_queue_name = dependency.worker_messaging.outputs.lambda_worker_queue_name - lambda_worker_queue_arn = dependency.worker_messaging.outputs.lambda_worker_queue_arn - lambda_worker_queue_url = dependency.worker_messaging.outputs.lambda_worker_queue_url - lambda_worker_queue_read_policy_arn = dependency.worker_messaging.outputs.lambda_worker_queue_read_policy_arn - lambda_worker_dead_letter_queue_name = dependency.worker_messaging.outputs.lambda_worker_dead_letter_queue_name - lambda_worker_dead_letter_queue_url = dependency.worker_messaging.outputs.lambda_worker_dead_letter_queue_url - ecs_worker_queue_name = dependency.worker_messaging.outputs.ecs_worker_queue_name - ecs_worker_queue_url = dependency.worker_messaging.outputs.ecs_worker_queue_url - ecs_worker_queue_read_policy_arn = dependency.worker_messaging.outputs.ecs_worker_queue_read_policy_arn -} diff --git a/infra/live/dev/aws/lambda_api/terragrunt.hcl b/infra/live/dev/aws/lambda_api/terragrunt.hcl index 524bbb8a..957503d9 100644 --- a/infra/live/dev/aws/lambda_api/terragrunt.hcl +++ b/infra/live/dev/aws/lambda_api/terragrunt.hcl @@ -3,8 +3,8 @@ include "root" { } locals { - network = read_terragrunt_config(find_in_parent_folders("dependencies/network.hcl")) - worker_messaging = read_terragrunt_config(find_in_parent_folders("dependencies/worker_messaging.hcl")) + network = read_terragrunt_config(find_in_parent_folders("dependencies/network.hcl")) + messaging = read_terragrunt_config(find_in_parent_folders("dependencies/messaging.hcl")) } terraform { @@ -13,7 +13,7 @@ terraform { inputs = merge( local.network.inputs, - local.worker_messaging.inputs, + local.messaging.inputs, { api_5xx_alarm_threshold = 20.0 api_5xx_alarm_evaluation_periods = 1 diff --git a/infra/live/dev/aws/lambda_worker/terragrunt.hcl b/infra/live/dev/aws/lambda_worker/terragrunt.hcl index d33c0637..bdb94fdd 100644 --- a/infra/live/dev/aws/lambda_worker/terragrunt.hcl +++ b/infra/live/dev/aws/lambda_worker/terragrunt.hcl @@ -3,7 +3,7 @@ include "root" { } locals { - worker_messaging = read_terragrunt_config(find_in_parent_folders("dependencies/worker_messaging.hcl")) + messaging = read_terragrunt_config(find_in_parent_folders("dependencies/messaging.hcl")) } terraform { @@ -11,7 +11,7 @@ terraform { } inputs = merge( - local.worker_messaging.inputs, + local.messaging.inputs, { sqs_dlq_alarm_threshold = 1 # fail when any messages are in the DLQ (quick fail for testing) sqs_dlq_alarm_evaluation_periods = 1 diff --git a/infra/live/dev/aws/worker_messaging/terragrunt.hcl b/infra/live/dev/aws/messaging/terragrunt.hcl similarity index 58% rename from infra/live/dev/aws/worker_messaging/terragrunt.hcl rename to infra/live/dev/aws/messaging/terragrunt.hcl index fba5f066..b85cf717 100644 --- a/infra/live/dev/aws/worker_messaging/terragrunt.hcl +++ b/infra/live/dev/aws/messaging/terragrunt.hcl @@ -3,5 +3,5 @@ include "root" { } terraform { - source = "../../../../modules//aws//worker_messaging" + source = "../../../../modules//aws//messaging" } diff --git a/infra/live/dev/aws/service_worker/terragrunt.hcl b/infra/live/dev/aws/service_worker/terragrunt.hcl index becbcf76..7b23580e 100644 --- a/infra/live/dev/aws/service_worker/terragrunt.hcl +++ b/infra/live/dev/aws/service_worker/terragrunt.hcl @@ -7,9 +7,9 @@ include "security" { } locals { - worker_messaging = read_terragrunt_config(find_in_parent_folders("dependencies/worker_messaging.hcl")) - cluster = read_terragrunt_config(find_in_parent_folders("dependencies/cluster.hcl")) - network = read_terragrunt_config(find_in_parent_folders("dependencies/network.hcl")) + messaging = read_terragrunt_config(find_in_parent_folders("dependencies/messaging.hcl")) + cluster = read_terragrunt_config(find_in_parent_folders("dependencies/cluster.hcl")) + network = read_terragrunt_config(find_in_parent_folders("dependencies/network.hcl")) } terraform { @@ -20,7 +20,7 @@ inputs = merge( { ecs_security_group_id = dependency.security.outputs.ecs_sg }, - local.worker_messaging.inputs, + local.messaging.inputs, local.cluster.inputs, local.network.inputs, ) diff --git a/infra/live/dev/aws/task_worker/terragrunt.hcl b/infra/live/dev/aws/task_worker/terragrunt.hcl index 723c7a10..b28cbca9 100644 --- a/infra/live/dev/aws/task_worker/terragrunt.hcl +++ b/infra/live/dev/aws/task_worker/terragrunt.hcl @@ -3,12 +3,12 @@ include "root" { } locals { - worker_messaging = read_terragrunt_config(find_in_parent_folders("dependencies/worker_messaging.hcl")) - database = read_terragrunt_config(find_in_parent_folders("dependencies/database.hcl")) + messaging = read_terragrunt_config(find_in_parent_folders("dependencies/messaging.hcl")) + database = read_terragrunt_config(find_in_parent_folders("dependencies/database.hcl")) } terraform { source = "../../../../modules//aws//task_worker" } -inputs = merge(local.worker_messaging.inputs, local.database.inputs) +inputs = merge(local.messaging.inputs, local.database.inputs) diff --git a/infra/live/prod/aws/lambda_api/terragrunt.hcl b/infra/live/prod/aws/lambda_api/terragrunt.hcl index 766b1366..8e29bf96 100644 --- a/infra/live/prod/aws/lambda_api/terragrunt.hcl +++ b/infra/live/prod/aws/lambda_api/terragrunt.hcl @@ -3,8 +3,8 @@ include "root" { } locals { - network = read_terragrunt_config(find_in_parent_folders("dependencies/network.hcl")) - worker_messaging = read_terragrunt_config(find_in_parent_folders("dependencies/worker_messaging.hcl")) + network = read_terragrunt_config(find_in_parent_folders("dependencies/network.hcl")) + messaging = read_terragrunt_config(find_in_parent_folders("dependencies/messaging.hcl")) } terraform { @@ -13,7 +13,7 @@ terraform { inputs = merge( local.network.inputs, - local.worker_messaging.inputs, + local.messaging.inputs, { api_5xx_alarm_threshold = 5.0 api_5xx_alarm_evaluation_periods = 3 diff --git a/infra/live/prod/aws/lambda_worker/terragrunt.hcl b/infra/live/prod/aws/lambda_worker/terragrunt.hcl index 6d85a43e..294342f8 100644 --- a/infra/live/prod/aws/lambda_worker/terragrunt.hcl +++ b/infra/live/prod/aws/lambda_worker/terragrunt.hcl @@ -3,7 +3,7 @@ include "root" { } locals { - worker_messaging = read_terragrunt_config(find_in_parent_folders("dependencies/worker_messaging.hcl")) + messaging = read_terragrunt_config(find_in_parent_folders("dependencies/messaging.hcl")) } terraform { @@ -11,7 +11,7 @@ terraform { } inputs = merge( - local.worker_messaging.inputs, + local.messaging.inputs, { sqs_dlq_alarm_threshold = 5 # fail when there are 5 messages in the DLQ sqs_dlq_alarm_evaluation_periods = 3 diff --git a/infra/live/prod/aws/worker_messaging/terragrunt.hcl b/infra/live/prod/aws/messaging/terragrunt.hcl similarity index 58% rename from infra/live/prod/aws/worker_messaging/terragrunt.hcl rename to infra/live/prod/aws/messaging/terragrunt.hcl index fba5f066..b85cf717 100644 --- a/infra/live/prod/aws/worker_messaging/terragrunt.hcl +++ b/infra/live/prod/aws/messaging/terragrunt.hcl @@ -3,5 +3,5 @@ include "root" { } terraform { - source = "../../../../modules//aws//worker_messaging" + source = "../../../../modules//aws//messaging" } diff --git a/infra/live/prod/aws/service_worker/terragrunt.hcl b/infra/live/prod/aws/service_worker/terragrunt.hcl index becbcf76..7b23580e 100644 --- a/infra/live/prod/aws/service_worker/terragrunt.hcl +++ b/infra/live/prod/aws/service_worker/terragrunt.hcl @@ -7,9 +7,9 @@ include "security" { } locals { - worker_messaging = read_terragrunt_config(find_in_parent_folders("dependencies/worker_messaging.hcl")) - cluster = read_terragrunt_config(find_in_parent_folders("dependencies/cluster.hcl")) - network = read_terragrunt_config(find_in_parent_folders("dependencies/network.hcl")) + messaging = read_terragrunt_config(find_in_parent_folders("dependencies/messaging.hcl")) + cluster = read_terragrunt_config(find_in_parent_folders("dependencies/cluster.hcl")) + network = read_terragrunt_config(find_in_parent_folders("dependencies/network.hcl")) } terraform { @@ -20,7 +20,7 @@ inputs = merge( { ecs_security_group_id = dependency.security.outputs.ecs_sg }, - local.worker_messaging.inputs, + local.messaging.inputs, local.cluster.inputs, local.network.inputs, ) diff --git a/infra/live/prod/aws/task_worker/terragrunt.hcl b/infra/live/prod/aws/task_worker/terragrunt.hcl index 723c7a10..b28cbca9 100644 --- a/infra/live/prod/aws/task_worker/terragrunt.hcl +++ b/infra/live/prod/aws/task_worker/terragrunt.hcl @@ -3,12 +3,12 @@ include "root" { } locals { - worker_messaging = read_terragrunt_config(find_in_parent_folders("dependencies/worker_messaging.hcl")) - database = read_terragrunt_config(find_in_parent_folders("dependencies/database.hcl")) + messaging = read_terragrunt_config(find_in_parent_folders("dependencies/messaging.hcl")) + database = read_terragrunt_config(find_in_parent_folders("dependencies/database.hcl")) } terraform { source = "../../../../modules//aws//task_worker" } -inputs = merge(local.worker_messaging.inputs, local.database.inputs) +inputs = merge(local.messaging.inputs, local.database.inputs) diff --git a/infra/modules/aws/lambda_api/README.md b/infra/modules/aws/lambda_api/README.md index ca353d15..b9195bf0 100644 --- a/infra/modules/aws/lambda_api/README.md +++ b/infra/modules/aws/lambda_api/README.md @@ -13,7 +13,7 @@ Lambda-backed public HTTP API module. ## Dependencies - shared API Gateway HTTP API, VPC link, and JWT authorizer from `network` -- shared worker SNS topic from `worker_messaging` +- shared worker SNS topic from `messaging` ## Key outputs diff --git a/infra/modules/aws/lambda_worker/README.md b/infra/modules/aws/lambda_worker/README.md index 5fa82303..378f707d 100644 --- a/infra/modules/aws/lambda_worker/README.md +++ b/infra/modules/aws/lambda_worker/README.md @@ -15,4 +15,4 @@ Worker Lambda wrapper module. - SQS read policy ARN - log group -This is the concrete worker implementation on top of the shared Lambda primitives. It reads the Lambda worker queue from the `worker_messaging` stack so the same SNS event can fan out to both the Lambda and ECS worker consumers. +This is the concrete worker implementation on top of the shared Lambda primitives. It reads the Lambda worker queue from the `messaging` stack so the same SNS event can fan out to both the Lambda and ECS worker consumers. diff --git a/infra/modules/aws/worker_messaging/README.md b/infra/modules/aws/messaging/README.md similarity index 96% rename from infra/modules/aws/worker_messaging/README.md rename to infra/modules/aws/messaging/README.md index fa2a55d5..bbbfea40 100644 --- a/infra/modules/aws/worker_messaging/README.md +++ b/infra/modules/aws/messaging/README.md @@ -1,4 +1,4 @@ -# `worker_messaging` +# `messaging` Shared worker messaging stack. diff --git a/infra/modules/aws/worker_messaging/data.tf b/infra/modules/aws/messaging/data.tf similarity index 100% rename from infra/modules/aws/worker_messaging/data.tf rename to infra/modules/aws/messaging/data.tf diff --git a/infra/modules/aws/worker_messaging/local.tf b/infra/modules/aws/messaging/local.tf similarity index 100% rename from infra/modules/aws/worker_messaging/local.tf rename to infra/modules/aws/messaging/local.tf diff --git a/infra/modules/aws/worker_messaging/main.tf b/infra/modules/aws/messaging/main.tf similarity index 100% rename from infra/modules/aws/worker_messaging/main.tf rename to infra/modules/aws/messaging/main.tf diff --git a/infra/modules/aws/worker_messaging/outputs.tf b/infra/modules/aws/messaging/outputs.tf similarity index 100% rename from infra/modules/aws/worker_messaging/outputs.tf rename to infra/modules/aws/messaging/outputs.tf diff --git a/infra/modules/aws/worker_messaging/variables.tf b/infra/modules/aws/messaging/variables.tf similarity index 100% rename from infra/modules/aws/worker_messaging/variables.tf rename to infra/modules/aws/messaging/variables.tf diff --git a/infra/modules/aws/worker_messaging/versions.tf b/infra/modules/aws/messaging/versions.tf similarity index 100% rename from infra/modules/aws/worker_messaging/versions.tf rename to infra/modules/aws/messaging/versions.tf diff --git a/infra/modules/aws/service_worker/README.md b/infra/modules/aws/service_worker/README.md index 53e07b98..13447734 100644 --- a/infra/modules/aws/service_worker/README.md +++ b/infra/modules/aws/service_worker/README.md @@ -15,7 +15,7 @@ Concrete ECS worker service wrapper. ## Inputs That Change Behavior - uses the worker task revision exported by `task_worker` -- uses autoscaling inputs derived from the shared ECS worker queue owned by `worker_messaging` +- uses autoscaling inputs derived from the shared ECS worker queue owned by `messaging` - uses placeholder values during bootstrap applies so the first service apply does not require pre-existing task state ## Outputs Consumers Rely On @@ -39,10 +39,10 @@ Concrete ECS worker service wrapper. - expects the live Terragrunt stack to pass the shared ECS worker queue name through a `dependency` block to drive autoscaling - expects the live Terragrunt stack to pass the shared `cluster` and `network` outputs as explicit inputs - expects the live Terragrunt stack to pass the ECS runtime security group id as an explicit input -- relies on `worker_messaging` owning the queue contract rather than duplicating queue state locally +- relies on `messaging` owning the queue contract rather than duplicating queue state locally - for bootstrap-friendly plan and validate flows, prefer Terragrunt dependency mocks in the live stack rather than sibling state reads inside the module -It uses the shared ECS worker queue name exported by `worker_messaging` for service autoscaling. +It uses the shared ECS worker queue name exported by `messaging` for service autoscaling. During bootstrap applies, it uses placeholder values instead of reading task outputs directly so the bootstrap path does not need a pre-existing task state file. ## Inherits Behavior From diff --git a/infra/modules/aws/task_worker/README.md b/infra/modules/aws/task_worker/README.md index a09ed8bd..25b6b622 100644 --- a/infra/modules/aws/task_worker/README.md +++ b/infra/modules/aws/task_worker/README.md @@ -40,8 +40,8 @@ Concrete ECS worker task wrapper. ## Dependency Notes -- expects the live Terragrunt stack to pass queue details from `worker_messaging` through a `dependency` block +- expects the live Terragrunt stack to pass queue details from `messaging` through a `dependency` block - expects the live Terragrunt stack to pass shared `database` connection details as explicit inputs - publishes the task definition consumed by `service_worker` -This module is the image-driven deployment unit for the ECS worker. It consumes the ECS worker queue contract owned by `worker_messaging` and the shared `database` contract passed in from the live Terragrunt stack so the task definition and service can use the same fanout event stream and Aurora PostgreSQL connection details without the module reading sibling stack state directly. +This module is the image-driven deployment unit for the ECS worker. It consumes the ECS worker queue contract owned by `messaging` and the shared `database` contract passed in from the live Terragrunt stack so the task definition and service can use the same fanout event stream and Aurora PostgreSQL connection details without the module reading sibling stack state directly. From 6e3a68fbbdeeeae093edfc0f2a0c2d12024d18ba Mon Sep 17 00:00:00 2001 From: chrispsheehan Date: Mon, 18 May 2026 11:23:16 +0100 Subject: [PATCH 32/34] fix: output naming --- infra/modules/aws/messaging/README.md | 5 +++-- infra/modules/aws/messaging/outputs.tf | 6 +++--- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/infra/modules/aws/messaging/README.md b/infra/modules/aws/messaging/README.md index bbbfea40..cf5910ab 100644 --- a/infra/modules/aws/messaging/README.md +++ b/infra/modules/aws/messaging/README.md @@ -11,8 +11,9 @@ Shared worker messaging stack. ## Key outputs -- `sns_topic_arn` -- `sns_topic_publish_policy_arn` +- `worker_topic_name` +- `worker_topic_arn` +- `worker_topic_publish_policy_arn` - `lambda_worker_queue_name` - `lambda_worker_queue_url` - `lambda_worker_queue_read_policy_arn` diff --git a/infra/modules/aws/messaging/outputs.tf b/infra/modules/aws/messaging/outputs.tf index a365faf1..fe513772 100644 --- a/infra/modules/aws/messaging/outputs.tf +++ b/infra/modules/aws/messaging/outputs.tf @@ -1,12 +1,12 @@ -output "sns_topic_name" { +output "worker_topic_name" { value = aws_sns_topic.worker_events.name } -output "sns_topic_arn" { +output "worker_topic_arn" { value = aws_sns_topic.worker_events.arn } -output "sns_topic_publish_policy_arn" { +output "worker_topic_publish_policy_arn" { value = aws_iam_policy.topic_publish.arn } From 4b4dbf4b0b7cb02732be1a70ef8775dbc0946276 Mon Sep 17 00:00:00 2001 From: chrispsheehan Date: Mon, 18 May 2026 11:38:26 +0100 Subject: [PATCH 33/34] chore: un-auth health ep --- infra/modules/aws/lambda_api/README.md | 1 + infra/modules/aws/lambda_api/main.tf | 6 ++++++ lambdas/lambda_api/README.md | 2 +- 3 files changed, 8 insertions(+), 1 deletion(-) diff --git a/infra/modules/aws/lambda_api/README.md b/infra/modules/aws/lambda_api/README.md index b9195bf0..212c0a50 100644 --- a/infra/modules/aws/lambda_api/README.md +++ b/infra/modules/aws/lambda_api/README.md @@ -25,3 +25,4 @@ Lambda-backed public HTTP API module. This module is Lambda-specific. The shared API surface and shared JWT authorizer now live in `network`. When accessed through the frontend CloudFront distribution, the public Lambda path is `/api/*` because CloudFront strips the leading `/api` prefix before forwarding to API Gateway. The packaged runtime can publish JSON payloads to the shared worker SNS topic via `POST /messages`, which fans the message out to both the Lambda and ECS worker queues. +The public `GET /health` route is intentionally left unauthenticated so external uptime checks do not need a JWT, while the catch-all API routes remain JWT-protected. diff --git a/infra/modules/aws/lambda_api/main.tf b/infra/modules/aws/lambda_api/main.tf index 37083e3d..ef453167 100644 --- a/infra/modules/aws/lambda_api/main.tf +++ b/infra/modules/aws/lambda_api/main.tf @@ -47,6 +47,12 @@ resource "aws_apigatewayv2_route" "root" { authorizer_id = var.network_http_api_authorizer_id } +resource "aws_apigatewayv2_route" "health" { + api_id = var.network_api_id + route_key = "GET /health" + target = "integrations/${aws_apigatewayv2_integration.lambda_proxy.id}" +} + resource "aws_apigatewayv2_route" "proxy" { api_id = var.network_api_id route_key = "ANY /{proxy+}" diff --git a/lambdas/lambda_api/README.md b/lambdas/lambda_api/README.md index 0b7d16c4..2e1b42f6 100644 --- a/lambdas/lambda_api/README.md +++ b/lambdas/lambda_api/README.md @@ -14,7 +14,7 @@ Public Lambda-backed HTTP API. - `GET /` Basic success response - `GET /health` - Health response + Health response. This route is intentionally unauthenticated at the API Gateway layer. - `GET /fail` - `GET /error` Forced 500 response for alarm and rollback testing From da3e7afe4d493bb2346c72ba1b5a387a4ffe7d14 Mon Sep 17 00:00:00 2001 From: chrispsheehan Date: Mon, 18 May 2026 12:33:33 +0100 Subject: [PATCH 34/34] chore: fix renaming --- .github/workflows/destroy.yml | 4 ++-- README.md | 2 +- infra/modules/aws/lambda_worker/README.md | 2 +- infra/modules/aws/messaging/README.md | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/destroy.yml b/.github/workflows/destroy.yml index 1dc6d880..0ef17300 100644 --- a/.github/workflows/destroy.yml +++ b/.github/workflows/destroy.yml @@ -175,8 +175,8 @@ jobs: tg_directory: infra/live/${{ inputs.environment }}/aws/${{ matrix.value }} tg_action: destroy - worker-messaging: - name: Worker Messaging + messaging: + name: Messaging runs-on: ubuntu-latest needs: - lambdas diff --git a/README.md b/README.md index e3a921c7..948b90d8 100644 --- a/README.md +++ b/README.md @@ -164,7 +164,7 @@ just frontend That Vite server is also started automatically by `just start`. It proxies `/api/*` to the local Lambda API and `/api/ecs/*` to the local ECS API with the same prefix stripping the deployed CloudFront distribution performs. It also serves `auth-config.json` with no-cache headers locally so frontend auth config changes are picked up immediately. When `frontend/public/auth-config.json` has `"enabled": false`, the frontend runs in a local unauthenticated mode instead of redirecting to Cognito. -The local ElasticMQ config now mirrors the shared AWS worker-messaging contract by exposing: +The local ElasticMQ config now mirrors the shared AWS messaging contract by exposing: - `lambda-worker-queue` for the Lambda worker consumer - `ecs-worker-queue` for the ECS worker consumer diff --git a/infra/modules/aws/lambda_worker/README.md b/infra/modules/aws/lambda_worker/README.md index 378f707d..9009bd15 100644 --- a/infra/modules/aws/lambda_worker/README.md +++ b/infra/modules/aws/lambda_worker/README.md @@ -5,7 +5,7 @@ Worker Lambda wrapper module. ## Owns - worker Lambda via `_shared/lambda` -- Lambda worker event-source mapping onto the shared worker messaging queue +- Lambda worker event-source mapping onto the shared messaging queue - DLQ alarming for the Lambda worker queue ## Key outputs diff --git a/infra/modules/aws/messaging/README.md b/infra/modules/aws/messaging/README.md index cf5910ab..8790208d 100644 --- a/infra/modules/aws/messaging/README.md +++ b/infra/modules/aws/messaging/README.md @@ -1,6 +1,6 @@ # `messaging` -Shared worker messaging stack. +Shared messaging stack. ## Owns