From c5f0c43dd40788fc6c85f686746517db0243fa17 Mon Sep 17 00:00:00 2001 From: Sandy Forrester Date: Thu, 29 Jan 2026 11:37:14 +0000 Subject: [PATCH 01/19] NRL-721 Create clone and delete scripts WIP --- README.md | 5 +- scripts/clone_dynamodb_table.py | 103 +++++++++++++++ scripts/delete_all_table_items.py | 39 ++++++ scripts/seed_sandbox_table.py | 204 ++++++++++++++++++++++++++++++ 4 files changed, 349 insertions(+), 2 deletions(-) create mode 100755 scripts/clone_dynamodb_table.py create mode 100755 scripts/delete_all_table_items.py create mode 100755 scripts/seed_sandbox_table.py diff --git a/README.md b/README.md index 9dd572102..b4bf5c83d 100644 --- a/README.md +++ b/README.md @@ -375,8 +375,9 @@ In order to deploy to a sandbox environment (`dev-sandbox`, `qa-sandbox`, `int-s ### Sandbox database clear and reseed -Any workspace suffixed with `-sandbox` has a small amount of additional infrastructure deployed to clear and reseed the DynamoDB tables (auth and document pointers) using a Lambda running -on a cron schedule that can be found in the `cron/seed_sandbox` directory in the root of this project. The data used to seed the DynamoDB tables can found in the `cron/seed_sandbox/data` directory. + + ### Sandbox authorisation diff --git a/scripts/clone_dynamodb_table.py b/scripts/clone_dynamodb_table.py new file mode 100755 index 000000000..326d25876 --- /dev/null +++ b/scripts/clone_dynamodb_table.py @@ -0,0 +1,103 @@ +#!/usr/bin/env python +import boto3 +import fire + + +def clone_table(source_table_name, target_table_name, copy_items=True, max_items=None): + """ + Create a copy of a DynamoDB table for testing. + + Args: + source_table_name: Name of table to clone + target_table_name: Name for the new table + copy_items: Whether to copy data (default: True) + max_items: Max items to copy (None = all) + """ + dynamodb = boto3.client("dynamodb") + resource = boto3.resource("dynamodb") + + # Get source table schema + source = dynamodb.describe_table(TableName=source_table_name)["Table"] + + # Create new table with same schema + create_params = { + "TableName": target_table_name, + "KeySchema": source["KeySchema"], + "AttributeDefinitions": source["AttributeDefinitions"], + } + + # Copy billing mode from source table + if "BillingModeSummary" in source: + create_params["BillingMode"] = source["BillingModeSummary"]["BillingMode"] + # If provisioned, copy the capacity settings + if source["BillingModeSummary"]["BillingMode"] == "PROVISIONED": + create_params["ProvisionedThroughput"] = { + "ReadCapacityUnits": source["ProvisionedThroughput"][ + "ReadCapacityUnits" + ], + "WriteCapacityUnits": source["ProvisionedThroughput"][ + "WriteCapacityUnits" + ], + } + else: + # Older tables without BillingModeSummary default to provisioned + create_params["BillingMode"] = "PROVISIONED" + create_params["ProvisionedThroughput"] = { + "ReadCapacityUnits": source["ProvisionedThroughput"]["ReadCapacityUnits"], + "WriteCapacityUnits": source["ProvisionedThroughput"]["WriteCapacityUnits"], + } + + # Copy GSIs if they exist + if "GlobalSecondaryIndexes" in source: + create_params["GlobalSecondaryIndexes"] = [ + { + "IndexName": gsi["IndexName"], + "KeySchema": gsi["KeySchema"], + "Projection": gsi["Projection"], + } + for gsi in source["GlobalSecondaryIndexes"] + ] + + print(f"Creating table {target_table_name}...") + dynamodb.create_table(**create_params) + + # Wait for table to be active + waiter = dynamodb.get_waiter("table_exists") + waiter.wait(TableName=target_table_name) + print("Table created and active") + + # Copy items if requested + if copy_items: + source_table = resource.Table(source_table_name) + target_table = resource.Table(target_table_name) + + count = 0 + with target_table.batch_writer() as batch: + response = source_table.scan() + + for item in response["Items"]: + batch.put_item(Item=item) + count += 1 + if max_items and count >= max_items: + break + + # Paginate if needed + while "LastEvaluatedKey" in response and ( + not max_items or count < max_items + ): + response = source_table.scan( + ExclusiveStartKey=response["LastEvaluatedKey"] + ) + for item in response["Items"]: + batch.put_item(Item=item) + count += 1 + if max_items and count >= max_items: + break + + print(f"Copied {count} items") + + return target_table_name + + +if __name__ == "__main__": + fire.Fire(clone_table) diff --git a/scripts/delete_all_table_items.py b/scripts/delete_all_table_items.py new file mode 100755 index 000000000..dc6e17cd5 --- /dev/null +++ b/scripts/delete_all_table_items.py @@ -0,0 +1,39 @@ +#!/usr/bin/env python +import boto3 +import fire + + +def delete_all_table_items(table_name): + """Delete all items from a DynamoDB table.""" + dynamodb = boto3.resource("dynamodb") + table = dynamodb.Table(table_name) + + # Get the table's key schema + key_names = [key["AttributeName"] for key in table.key_schema] + + # Scan and delete + with table.batch_writer() as batch: + scan_kwargs = { + "ProjectionExpression": ",".join(key_names), # Only fetch keys + "ConsistentRead": False, + } + + while True: + response = table.scan(**scan_kwargs) + + for item in response["Items"]: + batch.delete_item(Key=item) + + # Check for more items + if "LastEvaluatedKey" not in response: + break + scan_kwargs["ExclusiveStartKey"] = response["LastEvaluatedKey"] + + print(f"Cleared all items from {table_name}") + + +if __name__ == "__main__": + fire.Fire(delete_all_table_items) + +# Usage +# delete_all_table_items('my-sandbox-table') diff --git a/scripts/seed_sandbox_table.py b/scripts/seed_sandbox_table.py new file mode 100755 index 000000000..c346002b9 --- /dev/null +++ b/scripts/seed_sandbox_table.py @@ -0,0 +1,204 @@ +#!/usr/bin/env python +""" +Seeds a sandbox table with realistic pointer data using sample templates. +Creates 2 pointers of each type for 2 different custodians. +""" +import csv +import json +import os +from datetime import datetime, timezone +from pathlib import Path +from typing import Any + +import boto3 +import fire + +from nrlf.core.dynamodb.model import DocumentPointer +from nrlf.core.logger import logger +from nrlf.tests.data import load_document_reference +from tests.performance.perftest_environment import TestNhsNumbersIterator + +logger.setLevel("ERROR") + +resource = boto3.resource("dynamodb") + +# Mapping of sample files to pointer types +SAMPLE_TEMPLATES = { + "1382601000000107": "QUY_RESPECT_FORM_Feb25.json", + "16521000000101": "G3H9E_LLOYD_GEORGE_RECORD_FOLDER_Aug25.json", + "2181441000000107": "11X_PERSONALISED_CARE_AND_SUPPORT_PLAN_Feb25.json", + "735324008": "11X_TREATMENT_ESCALATION_PLAN_Feb25.json", + "736253002": "RAT_MENTAL_HEALTH_PLAN_Feb25.json", + "736366004": "11X_ADVANCE_CARE_PLAN_Feb25.json", + "861421000000109": "VM8W7_EOL_COORDINATION_SUMMARY_Feb25.json", + "887701000000100": "B3H2B_EMERGENCY_HEALTHCARE_PLAN_Feb25.json", +} + +CUSTODIANS = ["RX898", "Y12345"] # Two different custodians + + +def _load_sample_template(filename: str) -> dict: + """Load a sample JSON file as a template.""" + samples_dir = Path(__file__).parent.parent / "tests" / "data" / "samples" + filepath = samples_dir / filename + + with open(filepath, "r") as f: + return json.load(f) + + +def _make_realistic_pointer( + template: dict, + pointer_type: str, + custodian: str, + nhs_number: str, + counter: int, +) -> DocumentPointer: + """ + Create a pointer from a realistic template, customizing key fields. + """ + # Deep copy the template + doc_ref_dict = json.loads(json.dumps(template)) + + # Customize with test-specific values + doc_ref_dict["id"] = f"{custodian}-SANDBOX-{str(counter).zfill(6)}" + doc_ref_dict["subject"]["identifier"]["value"] = nhs_number + doc_ref_dict["custodian"]["identifier"]["value"] = custodian + doc_ref_dict["author"][0]["identifier"]["value"] = custodian + + # Ensure the masterIdentifier is unique + if "masterIdentifier" in doc_ref_dict: + doc_ref_dict["masterIdentifier"]["value"] = f"sandbox-{custodian}-{counter}" + + # Convert dict to DocumentReference and then to DocumentPointer + from nrlf.core.model import DocumentReference + + doc_ref = DocumentReference(**doc_ref_dict) + + pointer = DocumentPointer.from_document_reference(doc_ref, source="SANDBOX-SEED") + return pointer + + +def seed_sandbox_table(table_name: str, pointers_per_type: int = 2): + """ + Seed a sandbox table with realistic pointer data. + + Args: + table_name: Name of the DynamoDB table to seed + pointers_per_type: Number of pointers per type per custodian (default: 2) + """ + print( + f"Seeding table {table_name} with {pointers_per_type} pointers per type per custodian" + ) + print(f"Total pointer types: {len(SAMPLE_TEMPLATES)}") + print(f"Total custodians: {len(CUSTODIANS)}") + print( + f"Total pointers to create: {len(SAMPLE_TEMPLATES) * len(CUSTODIANS) * pointers_per_type}" + ) + + table = resource.Table(table_name) + + # Set up NHS number iterator + testnum_cls = TestNhsNumbersIterator() + testnum_iter = iter(testnum_cls) + + counter = 0 + pointer_data: list[list[str]] = [] + batch_upsert_items: list[dict[str, Any]] = [] + + start_time = datetime.now(tz=timezone.utc) + + # Load templates once to avoid repeated file I/O + templates = {} + for pointer_type, filename in SAMPLE_TEMPLATES.items(): + try: + templates[pointer_type] = _load_sample_template(filename) + print(f"✓ Loaded template for type {pointer_type}") + except Exception as e: + print(f"✗ Failed to load template {filename}: {e}") + continue + + # Generate pointers + for pointer_type, template in templates.items(): + for custodian in CUSTODIANS: + for i in range(pointers_per_type): + counter += 1 + nhs_number = next(testnum_iter) + + try: + pointer = _make_realistic_pointer( + template, pointer_type, custodian, nhs_number, counter + ) + + put_req = {"PutRequest": {"Item": pointer.model_dump()}} + batch_upsert_items.append(put_req) + + pointer_data.append( + [ + pointer.id, + pointer_type, + pointer.custodian, + pointer.nhs_number, + ] + ) + + # Batch write every 25 items + if len(batch_upsert_items) >= 25: + response = resource.batch_write_item( + RequestItems={table_name: batch_upsert_items} + ) + + if response.get("UnprocessedItems"): + unprocessed = len( + response["UnprocessedItems"].get(table_name, []) + ) + print(f"Warning: {unprocessed} unprocessed items") + + batch_upsert_items = [] + print(".", end="", flush=True) + + except Exception as e: + print(f"\n✗ Error creating pointer {counter}: {e}") + continue + + # Write any remaining items + if batch_upsert_items: + response = resource.batch_write_item( + RequestItems={table_name: batch_upsert_items} + ) + if response.get("UnprocessedItems"): + unprocessed = len(response["UnprocessedItems"].get(table_name, [])) + print(f"\nWarning: {unprocessed} unprocessed items in final batch") + + print("\n✓ Done!") + + end_time = datetime.now(tz=timezone.utc) + duration = (end_time - start_time).total_seconds() + + print(f"\nCreated {counter} pointers in {duration:.2f} seconds") + print(f"Average: {counter/duration:.2f} pointers/second") + + # Write extract file + _write_pointer_extract(table_name, pointer_data) + + return counter + + +def _write_pointer_extract(table_name: str, pointer_data: list[list[str]]): + """Write pointer data to CSV file for reference.""" + output_dir = Path(__file__).parent.parent / "dist" / "sandbox" + output_dir.mkdir(parents=True, exist_ok=True) + + csv_file = ( + output_dir / f"sandbox-pointers-{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv" + ) + + with open(csv_file, "w") as f: + writer = csv.writer(f) + writer.writerow(["pointer_id", "pointer_type", "custodian", "nhs_number"]) + writer.writerows(pointer_data) + + print(f"Pointer data saved to {csv_file}") + + +if __name__ == "__main__": + fire.Fire(seed_sandbox_table) From 8a876b37e1c6ddef5501533949ffb9f0d3b10d78 Mon Sep 17 00:00:00 2001 From: Sandy Forrester Date: Fri, 6 Feb 2026 14:39:08 +0000 Subject: [PATCH 02/19] NRL-721 Reseed pointer scripts and tests --- layer/nrlf/core/constants.py | 1 + scripts/delete_all_table_items.py | 73 ++-- scripts/reset_sandbox_table.py | 48 +++ scripts/seed_nft_tables.py | 25 +- scripts/seed_sandbox_table.py | 282 +++++++++++----- scripts/seed_utils.py | 37 +++ scripts/tests/test_delete_all_table_items.py | 267 +++++++++++++++ scripts/tests/test_seed_sandbox_table.py | 329 +++++++++++++++++++ 8 files changed, 934 insertions(+), 128 deletions(-) create mode 100755 scripts/reset_sandbox_table.py create mode 100644 scripts/seed_utils.py create mode 100644 scripts/tests/test_delete_all_table_items.py create mode 100644 scripts/tests/test_seed_sandbox_table.py diff --git a/layer/nrlf/core/constants.py b/layer/nrlf/core/constants.py index 793c2458c..ec4485ddd 100644 --- a/layer/nrlf/core/constants.py +++ b/layer/nrlf/core/constants.py @@ -5,6 +5,7 @@ class Source(Enum): NRLF = "NRLF" LEGACY = "NRL" # not actually used PERFTEST = "NFT-SEED" + SANDBOX = "SANDBOX-SEED" VALID_SOURCES = frozenset(item.value for item in Source.__members__.values()) diff --git a/scripts/delete_all_table_items.py b/scripts/delete_all_table_items.py index dc6e17cd5..3c1e752a6 100755 --- a/scripts/delete_all_table_items.py +++ b/scripts/delete_all_table_items.py @@ -1,6 +1,9 @@ #!/usr/bin/env python +import sys + import boto3 import fire +from botocore.exceptions import ClientError def delete_all_table_items(table_name): @@ -8,32 +11,62 @@ def delete_all_table_items(table_name): dynamodb = boto3.resource("dynamodb") table = dynamodb.Table(table_name) - # Get the table's key schema - key_names = [key["AttributeName"] for key in table.key_schema] + try: + # Verify the table exists + key_names = [key["AttributeName"] for key in table.key_schema] + except ClientError as e: + error_code = e.response["Error"]["Code"] + if error_code == "ResourceNotFoundException": + print(f"Error: Table '{table_name}' does not exist") + sys.exit(1) + elif error_code == "AccessDeniedException": + print(f"Error: No permission to access table '{table_name}'") + sys.exit(1) + else: + print(f"Error accessing table: {e}") + sys.exit(1) + + # Scan and delete items in batches + deleted_count = 0 + try: + with table.batch_writer() as batch: + scan_kwargs = { + "ProjectionExpression": ",".join(key_names), + } + + while True: + try: + response = table.scan(**scan_kwargs) - # Scan and delete - with table.batch_writer() as batch: - scan_kwargs = { - "ProjectionExpression": ",".join(key_names), # Only fetch keys - "ConsistentRead": False, - } + for item in response["Items"]: + batch.delete_item(Key=item) + deleted_count += 1 - while True: - response = table.scan(**scan_kwargs) + if "LastEvaluatedKey" not in response: + break + scan_kwargs["ExclusiveStartKey"] = response["LastEvaluatedKey"] - for item in response["Items"]: - batch.delete_item(Key=item) + if deleted_count % 100 == 0: + print(f"Deleted {deleted_count} items...", end="\r") - # Check for more items - if "LastEvaluatedKey" not in response: - break - scan_kwargs["ExclusiveStartKey"] = response["LastEvaluatedKey"] + except ClientError as e: + error_code = e.response["Error"]["Code"] + if error_code == "ProvisionedThroughputExceededException": + print( + f"\nWarning: Throttled at {deleted_count} items. Retrying..." + ) + continue + else: + raise - print(f"Cleared all items from {table_name}") + except Exception as e: + print(f"\nError during deletion: {e}") + print(f"Successfully deleted {deleted_count} items before error") + sys.exit(1) + + print(f"\n✓ Cleared {deleted_count} items from {table_name}") + return deleted_count if __name__ == "__main__": fire.Fire(delete_all_table_items) - -# Usage -# delete_all_table_items('my-sandbox-table') diff --git a/scripts/reset_sandbox_table.py b/scripts/reset_sandbox_table.py new file mode 100755 index 000000000..954e59432 --- /dev/null +++ b/scripts/reset_sandbox_table.py @@ -0,0 +1,48 @@ +#!/usr/bin/env python +""" +Resets a sandbox table by clearing all items and reseeding with fresh data. +""" +import sys + +import fire +from delete_all_table_items import delete_all_table_items +from seed_sandbox_table import seed_sandbox_table + + +def reset_sandbox_table(table_name: str, pointers_per_type: int = 2): + """ + Reset a sandbox table by clearing all items and reseeding with fresh data. + + Args: + table_name: Name of the DynamoDB table to reset + pointers_per_type: Number of pointers per type per custodian (default: 2) + """ + print(f"=== Resetting Sandbox Table: {table_name} ===\n") + + print("Step 1: Deleting all existing items...") + try: + delete_all_table_items(table_name) + print() + except SystemExit as e: + print("✗ Failed to delete items. Aborting reset.") + sys.exit(e.code) + except Exception as e: + print(f"✗ Unexpected error during deletion: {e}") + sys.exit(1) + + print("Step 2: Seeding with fresh pointer data...") + try: + count = seed_sandbox_table(table_name, pointers_per_type, force=True) + print(f"\n=== ✓ Reset Complete ===") + print(f"Table '{table_name}' has been reset with {count} fresh pointers") + return count + except SystemExit as e: + print("✗ Failed to seed table after deletion.") + sys.exit(e.code) + except Exception as e: + print(f"✗ Unexpected error during seeding: {e}") + sys.exit(1) + + +if __name__ == "__main__": + fire.Fire(reset_sandbox_table) diff --git a/scripts/seed_nft_tables.py b/scripts/seed_nft_tables.py index 5c5d118bd..9b69ffd22 100644 --- a/scripts/seed_nft_tables.py +++ b/scripts/seed_nft_tables.py @@ -9,6 +9,7 @@ import boto3 import fire import numpy as np +from seed_utils import TestNhsNumbersIterator from nrlf.core.boto import get_s3_client from nrlf.core.constants import ( @@ -38,30 +39,6 @@ DOC_REF_TEMPLATE = load_document_reference("NFT-template") -class TestNhsNumbersIterator: - def __iter__(self): - self.first9 = 900000000 - return self - - def __next__(self): - if self.first9 > 999999999: - raise StopIteration - checksum = 10 - while checksum == 10: - self.first9 += 1 - nhs_no_digits = list(map(int, str(self.first9))) - checksum = ( - sum( - weight * digit - for weight, digit in zip(CHECKSUM_WEIGHTS, nhs_no_digits) - ) - * -1 - % 11 - ) - nhs_no = str(self.first9) + str(checksum) - return nhs_no - - def _make_seed_pointer( type_code: str, custodian: str, nhs_number: str, counter: int ) -> DocumentPointer: diff --git a/scripts/seed_sandbox_table.py b/scripts/seed_sandbox_table.py index c346002b9..daf6ee847 100755 --- a/scripts/seed_sandbox_table.py +++ b/scripts/seed_sandbox_table.py @@ -1,28 +1,31 @@ #!/usr/bin/env python """ Seeds a sandbox table with realistic pointer data using sample templates. -Creates 2 pointers of each type for 2 different custodians. +Creates 2 pointers of each type for 2 different custodians, one of which is the custodian that all sandbox users are represented by. """ +import copy import csv import json import os +import sys from datetime import datetime, timezone from pathlib import Path from typing import Any import boto3 import fire +from botocore.exceptions import ClientError +from seed_utils import TestNhsNumbersIterator from nrlf.core.dynamodb.model import DocumentPointer from nrlf.core.logger import logger +from nrlf.producer.fhir.r4.model import DocumentReference from nrlf.tests.data import load_document_reference -from tests.performance.perftest_environment import TestNhsNumbersIterator logger.setLevel("ERROR") resource = boto3.resource("dynamodb") -# Mapping of sample files to pointer types SAMPLE_TEMPLATES = { "1382601000000107": "QUY_RESPECT_FORM_Feb25.json", "16521000000101": "G3H9E_LLOYD_GEORGE_RECORD_FOLDER_Aug25.json", @@ -34,11 +37,12 @@ "887701000000100": "B3H2B_EMERGENCY_HEALTHCARE_PLAN_Feb25.json", } -CUSTODIANS = ["RX898", "Y12345"] # Two different custodians +# Y05868 is the test custodian required for int-sandbox, since it's the custodian that all sandbox users are represented by +CUSTODIANS = ["Y05868", "Y12345"] +AUTHOR = "X54321" def _load_sample_template(filename: str) -> dict: - """Load a sample JSON file as a template.""" samples_dir = Path(__file__).parent.parent / "tests" / "data" / "samples" filepath = samples_dir / filename @@ -53,24 +57,20 @@ def _make_realistic_pointer( nhs_number: str, counter: int, ) -> DocumentPointer: - """ - Create a pointer from a realistic template, customizing key fields. - """ - # Deep copy the template - doc_ref_dict = json.loads(json.dumps(template)) - # Customize with test-specific values - doc_ref_dict["id"] = f"{custodian}-SANDBOX-{str(counter).zfill(6)}" + doc_ref_dict = copy.deepcopy(template) + + doc_ref_dict["id"] = ( + f"{custodian}-SANDBOX-{str(counter).zfill(6)}" # Make this more realistic - currently similar to Y12345-SANDBOX-000016 + ) doc_ref_dict["subject"]["identifier"]["value"] = nhs_number doc_ref_dict["custodian"]["identifier"]["value"] = custodian - doc_ref_dict["author"][0]["identifier"]["value"] = custodian + doc_ref_dict["author"][0]["identifier"]["value"] = AUTHOR - # Ensure the masterIdentifier is unique if "masterIdentifier" in doc_ref_dict: - doc_ref_dict["masterIdentifier"]["value"] = f"sandbox-{custodian}-{counter}" - - # Convert dict to DocumentReference and then to DocumentPointer - from nrlf.core.model import DocumentReference + doc_ref_dict["masterIdentifier"][ + "value" + ] = f"sandbox-{custodian}-{counter}" # Make this more realistic - currently similar to "sandbox-Y12345-32" doc_ref = DocumentReference(**doc_ref_dict) @@ -78,51 +78,94 @@ def _make_realistic_pointer( return pointer -def seed_sandbox_table(table_name: str, pointers_per_type: int = 2): - """ - Seed a sandbox table with realistic pointer data. - - Args: - table_name: Name of the DynamoDB table to seed - pointers_per_type: Number of pointers per type per custodian (default: 2) - """ - print( - f"Seeding table {table_name} with {pointers_per_type} pointers per type per custodian" - ) - print(f"Total pointer types: {len(SAMPLE_TEMPLATES)}") - print(f"Total custodians: {len(CUSTODIANS)}") - print( - f"Total pointers to create: {len(SAMPLE_TEMPLATES) * len(CUSTODIANS) * pointers_per_type}" - ) - - table = resource.Table(table_name) - - # Set up NHS number iterator - testnum_cls = TestNhsNumbersIterator() - testnum_iter = iter(testnum_cls) - - counter = 0 - pointer_data: list[list[str]] = [] - batch_upsert_items: list[dict[str, Any]] = [] - - start_time = datetime.now(tz=timezone.utc) +def _validate_table_access(table_name: str): + """Validate that the table exists and can be accessed""" + try: + table = resource.Table(table_name) + table.load() + return table + except ClientError as e: + error_code = e.response["Error"]["Code"] + if error_code == "ResourceNotFoundException": + print(f"Error: Table '{table_name}' does not exist") + sys.exit(1) + elif error_code == "AccessDeniedException": + print(f"Error: No permission to access table '{table_name}'") + sys.exit(1) + else: + print(f"Error accessing table: {e}") + sys.exit(1) + + +def _check_for_existing_sandbox_pointers(table, force: bool): + if force: + print("⚠️ Force mode enabled - will overwrite existing sandbox pointers") + return + + try: + response = table.scan( + FilterExpression="begins_with(#src, :sandbox)", + ExpressionAttributeNames={"#src": "source"}, + ExpressionAttributeValues={":sandbox": "SANDBOX"}, + Limit=1, + ProjectionExpression="id", + ) - # Load templates once to avoid repeated file I/O + if response.get("Items"): + print("\n⚠️ Warning: Sandbox pointers already exist in this table.") + print( + "Running this script will OVERWRITE any existing sandbox pointers that have the same IDs." + ) + print("\nOptions:") + print(" 1. Use --force flag to overwrite existing pointers") + print(" 2. Use reset_sandbox_table.py to clear all items first") + print(" 3. Use delete_all_table_items.py to manually clear the table\n") + sys.exit(1) + except ClientError as e: + print(f"Warning: Could not check for existing pointers: {e}") + + +def _load_pointer_templates() -> dict[str, dict]: templates = {} for pointer_type, filename in SAMPLE_TEMPLATES.items(): try: templates[pointer_type] = _load_sample_template(filename) print(f"✓ Loaded template for type {pointer_type}") + except FileNotFoundError: + print(f"✗ Template file not found: {filename}") + continue + except json.JSONDecodeError as e: + print(f"✗ Invalid JSON in template {filename}: {e}") + continue except Exception as e: print(f"✗ Failed to load template {filename}: {e}") continue - # Generate pointers + if not templates: + print("Error: No templates could be loaded. Exiting.") + sys.exit(1) + + return templates + + +def _generate_and_write_pointers( + table_name: str, templates: dict[str, dict], pointers_per_type: int, testnum_iter +) -> tuple[list[list[str]], int]: + """Generate pointers and write them to DynamoDB in batches.""" + counter = 0 + pointer_data: list[list[str]] = [] + batch_upsert_items: list[dict[str, Any]] = [] + for pointer_type, template in templates.items(): for custodian in CUSTODIANS: for i in range(pointers_per_type): counter += 1 - nhs_number = next(testnum_iter) + + try: + nhs_number = next(testnum_iter) + except StopIteration: + print(f"\n✗ Error: Ran out of NHS numbers at pointer {counter}") + break try: pointer = _make_realistic_pointer( @@ -141,63 +184,134 @@ def seed_sandbox_table(table_name: str, pointers_per_type: int = 2): ] ) - # Batch write every 25 items if len(batch_upsert_items) >= 25: - response = resource.batch_write_item( - RequestItems={table_name: batch_upsert_items} - ) - - if response.get("UnprocessedItems"): - unprocessed = len( - response["UnprocessedItems"].get(table_name, []) + try: + response = resource.batch_write_item( + RequestItems={table_name: batch_upsert_items} ) - print(f"Warning: {unprocessed} unprocessed items") - - batch_upsert_items = [] - print(".", end="", flush=True) + if response.get("UnprocessedItems"): + unprocessed = len( + response["UnprocessedItems"].get(table_name, []) + ) + print(f"\nWarning: {unprocessed} unprocessed items") + + batch_upsert_items = [] + print(".", end="", flush=True) + + except ClientError as e: + error_code = e.response["Error"]["Code"] + if error_code == "ProvisionedThroughputExceededException": + print( + f"\n✗ Throttled at pointer {counter}. Retrying batch..." + ) + else: + print(f"\n✗ Error writing batch, batch cancelled: {e}") + batch_upsert_items = [] + + except ValueError as e: + print(f"\n✗ Validation error for pointer {counter}: {e}") + continue except Exception as e: print(f"\n✗ Error creating pointer {counter}: {e}") continue - # Write any remaining items if batch_upsert_items: - response = resource.batch_write_item( - RequestItems={table_name: batch_upsert_items} - ) - if response.get("UnprocessedItems"): - unprocessed = len(response["UnprocessedItems"].get(table_name, [])) - print(f"\nWarning: {unprocessed} unprocessed items in final batch") + try: + response = resource.batch_write_item( + RequestItems={table_name: batch_upsert_items} + ) + if response.get("UnprocessedItems"): + unprocessed = len(response["UnprocessedItems"].get(table_name, [])) + print(f"\nWarning: {unprocessed} unprocessed items in final batch") + except ClientError as e: + print(f"\n✗ Error writing final batch, batch cancelled: {e}") + + return pointer_data, counter + + +def seed_sandbox_table( + table_name: str, pointers_per_type: int = 2, force: bool = False +): + """ + Seed a sandbox table with realistic pointer data. + + Args: + table_name: Name of the DynamoDB table to seed + pointers_per_type: Number of pointers per type per custodian (default: 2) + force: If True, overwrite existing sandbox pointers without prompting (default: False) + """ + print( + f"Seeding table {table_name} with {pointers_per_type} pointers per type per custodian" + ) + print(f"Total pointer types: {len(SAMPLE_TEMPLATES)}") + print(f"Total custodians: {len(CUSTODIANS)}") + print( + f"Total pointers to create: {len(SAMPLE_TEMPLATES) * len(CUSTODIANS) * pointers_per_type}" + ) + + table = _validate_table_access(table_name) + _check_for_existing_sandbox_pointers(table, force) + + testnum_cls = TestNhsNumbersIterator() + testnum_iter = iter(testnum_cls) + + start_time = datetime.now(tz=timezone.utc) + + templates = _load_pointer_templates() + pointer_data, total_attempts = _generate_and_write_pointers( + table_name, templates, pointers_per_type, testnum_iter + ) print("\n✓ Done!") end_time = datetime.now(tz=timezone.utc) duration = (end_time - start_time).total_seconds() - print(f"\nCreated {counter} pointers in {duration:.2f} seconds") - print(f"Average: {counter/duration:.2f} pointers/second") + total_pointers_created = len(pointer_data) + print( + f"\nAttempted {total_attempts} pointers, successfully created {total_pointers_created}" + ) + + if total_attempts > total_pointers_created: + failed = total_attempts - total_pointers_created + print(f"⚠️ {failed} pointer(s) failed to create") + + print(f"Completed in {duration:.2f} seconds") + if duration > 0: + print(f"Average: {total_pointers_created/duration:.2f} pointers/second") - # Write extract file - _write_pointer_extract(table_name, pointer_data) + try: + _write_pointer_extract(table_name, pointer_data) + except Exception as e: + print(f"Warning: Failed to write CSV extract: {e}") - return counter + return total_pointers_created def _write_pointer_extract(table_name: str, pointer_data: list[list[str]]): """Write pointer data to CSV file for reference.""" - output_dir = Path(__file__).parent.parent / "dist" / "sandbox" - output_dir.mkdir(parents=True, exist_ok=True) + try: + output_dir = Path(__file__).parent.parent / "dist" / "sandbox" + output_dir.mkdir(parents=True, exist_ok=True) - csv_file = ( - output_dir / f"sandbox-pointers-{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv" - ) - - with open(csv_file, "w") as f: - writer = csv.writer(f) - writer.writerow(["pointer_id", "pointer_type", "custodian", "nhs_number"]) - writer.writerows(pointer_data) + csv_file = ( + output_dir + / f"sandbox-pointers-{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv" + ) - print(f"Pointer data saved to {csv_file}") + with open(csv_file, "w") as f: + writer = csv.writer(f) + writer.writerow(["pointer_id", "pointer_type", "custodian", "nhs_number"]) + writer.writerows(pointer_data) + + print(f"Pointer data saved to {csv_file}") + except PermissionError: + print(f"Error: Permission denied writing to {output_dir}") + raise + except Exception as e: + print(f"Error writing CSV file: {e}") + raise if __name__ == "__main__": diff --git a/scripts/seed_utils.py b/scripts/seed_utils.py new file mode 100644 index 000000000..9d55c0389 --- /dev/null +++ b/scripts/seed_utils.py @@ -0,0 +1,37 @@ +import sys +from pathlib import Path + +# Add parent directory to path to allow imports from tests +sys.path.insert(0, str(Path(__file__).parent.parent)) + +from tests.performance.seed_data_constants import CHECKSUM_WEIGHTS + +""" +Shared utilities for seeding DynamoDB tables with pointer data. +""" + + +class TestNhsNumbersIterator: + """Iterator that generates valid NHS numbers with proper checksums.""" + + def __iter__(self): + self.first9 = 900000000 + return self + + def __next__(self): + if self.first9 > 999999999: + raise StopIteration + checksum = 10 + while checksum == 10: + self.first9 += 1 + nhs_no_digits = list(map(int, str(self.first9))) + checksum = ( + sum( + weight * digit + for weight, digit in zip(CHECKSUM_WEIGHTS, nhs_no_digits) + ) + * -1 + % 11 + ) + nhs_no = str(self.first9) + str(checksum) + return nhs_no diff --git a/scripts/tests/test_delete_all_table_items.py b/scripts/tests/test_delete_all_table_items.py new file mode 100644 index 000000000..ea4261574 --- /dev/null +++ b/scripts/tests/test_delete_all_table_items.py @@ -0,0 +1,267 @@ +import sys +from unittest.mock import MagicMock, PropertyMock, patch + +import pytest +from botocore.exceptions import ClientError +from delete_all_table_items import delete_all_table_items + + +class TestDeleteAllTableItems: + + @patch("delete_all_table_items.boto3") + def test_successful_deletion_single_page(self, mock_boto3): + + mock_table = MagicMock() + mock_table.key_schema = [ + {"AttributeName": "id", "KeyType": "HASH"}, + ] + mock_table.scan.return_value = { + "Items": [{"id": "item1"}, {"id": "item2"}, {"id": "item3"}], + } + + mock_resource = MagicMock() + mock_resource.Table.return_value = mock_table + mock_boto3.resource.return_value = mock_resource + + result = delete_all_table_items("test-table") + + assert result == 3 + mock_resource.Table.assert_called_once_with("test-table") + mock_table.scan.assert_called_once() + + @patch("delete_all_table_items.boto3") + def test_successful_deletion_multiple_pages(self, mock_boto3): + + mock_table = MagicMock() + mock_table.key_schema = [ + {"AttributeName": "id", "KeyType": "HASH"}, + ] + + mock_table.scan.side_effect = [ + { + "Items": [{"id": f"item{i}"} for i in range(100)], + "LastEvaluatedKey": {"id": "item99"}, + }, + { + "Items": [{"id": f"item{i}"} for i in range(100, 150)], + }, + ] + + mock_resource = MagicMock() + mock_resource.Table.return_value = mock_table + mock_boto3.resource.return_value = mock_resource + + result = delete_all_table_items("test-table") + + assert result == 150 + assert mock_table.scan.call_count == 2 + + @patch("delete_all_table_items.boto3") + def test_successful_deletion_composite_key(self, mock_boto3): + """Test deletion with composite key (hash + range).""" + + mock_table = MagicMock() + mock_table.key_schema = [ + {"AttributeName": "pk", "KeyType": "HASH"}, + {"AttributeName": "sk", "KeyType": "RANGE"}, + ] + mock_table.scan.return_value = { + "Items": [ + {"pk": "cust1", "sk": "ptr1"}, + {"pk": "cust1", "sk": "ptr2"}, + ], + } + + mock_resource = MagicMock() + mock_resource.Table.return_value = mock_table + mock_boto3.resource.return_value = mock_resource + + result = delete_all_table_items("test-table") + + assert result == 2 + call_kwargs = mock_table.scan.call_args[1] + assert "pk,sk" in call_kwargs["ProjectionExpression"] + + @patch("delete_all_table_items.boto3") + def test_empty_table(self, mock_boto3): + + mock_table = MagicMock() + mock_table.key_schema = [{"AttributeName": "id", "KeyType": "HASH"}] + mock_table.scan.return_value = {"Items": []} + + mock_resource = MagicMock() + mock_resource.Table.return_value = mock_table + mock_boto3.resource.return_value = mock_resource + + result = delete_all_table_items("test-table") + + assert result == 0 + + @patch("delete_all_table_items.boto3") + @patch("builtins.print") + @patch("sys.exit") + def test_table_not_found(self, mock_exit, mock_print, mock_boto3): + + mock_exit.side_effect = SystemExit(1) + + mock_table = MagicMock() + type(mock_table).key_schema = PropertyMock( + side_effect=ClientError( + { + "Error": { + "Code": "ResourceNotFoundException", + "Message": "Table not found", + } + }, + "DescribeTable", + ) + ) + + mock_resource = MagicMock() + mock_resource.Table.return_value = mock_table + mock_boto3.resource.return_value = mock_resource + + with pytest.raises(SystemExit): + delete_all_table_items("nonexistent-table") + + mock_exit.assert_called_once_with(1) + mock_print.assert_called_with("Error: Table 'nonexistent-table' does not exist") + + @patch("delete_all_table_items.boto3") + @patch("builtins.print") + @patch("sys.exit") + def test_access_denied(self, mock_exit, mock_print, mock_boto3): + + mock_exit.side_effect = SystemExit(1) + + mock_table = MagicMock() + type(mock_table).key_schema = PropertyMock( + side_effect=ClientError( + { + "Error": { + "Code": "AccessDeniedException", + "Message": "Access denied", + } + }, + "DescribeTable", + ) + ) + + mock_resource = MagicMock() + mock_resource.Table.return_value = mock_table + mock_boto3.resource.return_value = mock_resource + + with pytest.raises(SystemExit): + delete_all_table_items("protected-table") + + mock_exit.assert_called_once_with(1) + mock_print.assert_called_with( + "Error: No permission to access table 'protected-table'" + ) + + @patch("delete_all_table_items.boto3") + @patch("builtins.print") + def test_throttling_warning(self, mock_print, mock_boto3): + + mock_table = MagicMock() + mock_table.key_schema = [{"AttributeName": "id", "KeyType": "HASH"}] + + throttle_count = [0] + + def scan_side_effect(**kwargs): + if throttle_count[0] == 0: + throttle_count[0] += 1 + raise ClientError( + {"Error": {"Code": "ProvisionedThroughputExceededException"}}, + "Scan", + ) + return {"Items": [{"id": "item1"}]} + + mock_table.scan.side_effect = scan_side_effect + + mock_resource = MagicMock() + mock_resource.Table.return_value = mock_table + mock_boto3.resource.return_value = mock_resource + + result = delete_all_table_items("test-table") + + assert result == 1 + warning_calls = [ + call for call in mock_print.call_args_list if "Throttled" in str(call) + ] + assert len(warning_calls) > 0 + + @patch("delete_all_table_items.boto3") + @patch("sys.exit") + def test_unexpected_error(self, mock_exit, mock_boto3): + + mock_exit.side_effect = SystemExit(1) + + mock_table = MagicMock() + mock_table.key_schema = [{"AttributeName": "id", "KeyType": "HASH"}] + mock_table.scan.side_effect = RuntimeError("Unexpected error") + + mock_resource = MagicMock() + mock_resource.Table.return_value = mock_table + mock_boto3.resource.return_value = mock_resource + + with pytest.raises(SystemExit): + delete_all_table_items("test-table") + + mock_exit.assert_called_once_with(1) + + @patch("delete_all_table_items.boto3") + @patch("builtins.print") + def test_progress_indicator(self, mock_print, mock_boto3): + + mock_table = MagicMock() + mock_table.key_schema = [{"AttributeName": "id", "KeyType": "HASH"}] + + mock_table.scan.side_effect = [ + { + "Items": [{"id": f"item{i}"} for i in range(100)], + "LastEvaluatedKey": {"id": "item99"}, + }, + { + "Items": [{"id": f"item{i}"} for i in range(100, 200)], + "LastEvaluatedKey": {"id": "item199"}, + }, + { + "Items": [{"id": f"item{i}"} for i in range(200, 250)], + }, + ] + + mock_resource = MagicMock() + mock_resource.Table.return_value = mock_table + mock_boto3.resource.return_value = mock_resource + + result = delete_all_table_items("test-table") + + assert result == 250 + + progress_calls = [ + call + for call in mock_print.call_args_list + if "Deleted" in str(call) and "items..." in str(call) + ] + assert len(progress_calls) == 2 + + @patch("delete_all_table_items.boto3") + def test_batch_writer_context_manager(self, mock_boto3): + + mock_batch_writer = MagicMock() + mock_table = MagicMock() + mock_table.key_schema = [{"AttributeName": "id", "KeyType": "HASH"}] + mock_table.scan.return_value = { + "Items": [{"id": "item1"}], + } + mock_table.batch_writer.return_value.__enter__.return_value = mock_batch_writer + + mock_resource = MagicMock() + mock_resource.Table.return_value = mock_table + mock_boto3.resource.return_value = mock_resource + + result = delete_all_table_items("test-table") + + mock_table.batch_writer.assert_called_once() + mock_batch_writer.delete_item.assert_called_once_with(Key={"id": "item1"}) diff --git a/scripts/tests/test_seed_sandbox_table.py b/scripts/tests/test_seed_sandbox_table.py new file mode 100644 index 000000000..f1a93eff7 --- /dev/null +++ b/scripts/tests/test_seed_sandbox_table.py @@ -0,0 +1,329 @@ +import json +import sys +from pathlib import Path +from unittest.mock import MagicMock, PropertyMock, mock_open, patch + +import pytest +from botocore.exceptions import ClientError +from seed_sandbox_table import ( + _check_for_existing_sandbox_pointers, + _generate_and_write_pointers, + _load_pointer_templates, + _load_sample_template, + _make_realistic_pointer, + _validate_table_access, + _write_pointer_extract, + seed_sandbox_table, +) + + +class TestLoadSampleTemplate: + + @patch("builtins.open", new_callable=mock_open, read_data='{"id": "test"}') + def test_load_valid_template(self, mock_file): + result = _load_sample_template("test.json") + + assert result == {"id": "test"} + mock_file.assert_called_once() + + @patch("builtins.open", side_effect=FileNotFoundError) + def test_load_missing_template(self, mock_file): + with pytest.raises(FileNotFoundError): + _load_sample_template("missing.json") + + @patch("builtins.open", new_callable=mock_open, read_data="invalid json") + def test_load_invalid_json(self, mock_file): + with pytest.raises(json.JSONDecodeError): + _load_sample_template("invalid.json") + + +class TestValidateTableAccess: + + @patch("seed_sandbox_table.resource") + def test_successful_table_access(self, mock_resource): + mock_table = MagicMock() + mock_resource.Table.return_value = mock_table + + result = _validate_table_access("test-table") + + assert result == mock_table + mock_resource.Table.assert_called_once_with("test-table") + mock_table.load.assert_called_once() + + @patch("seed_sandbox_table.resource") + @patch("builtins.print") + @patch("sys.exit") + def test_table_not_found(self, mock_exit, mock_print, mock_resource): + mock_exit.side_effect = SystemExit(1) + + mock_table = MagicMock() + mock_table.load.side_effect = ClientError( + { + "Error": { + "Code": "ResourceNotFoundException", + "Message": "Table not found", + } + }, + "DescribeTable", + ) + mock_resource.Table.return_value = mock_table + + with pytest.raises(SystemExit): + _validate_table_access("nonexistent-table") + + mock_exit.assert_called_once_with(1) + mock_print.assert_called_with("Error: Table 'nonexistent-table' does not exist") + + @patch("seed_sandbox_table.resource") + @patch("builtins.print") + @patch("sys.exit") + def test_access_denied(self, mock_exit, mock_print, mock_resource): + mock_exit.side_effect = SystemExit(1) + + mock_table = MagicMock() + mock_table.load.side_effect = ClientError( + {"Error": {"Code": "AccessDeniedException", "Message": "Access denied"}}, + "DescribeTable", + ) + mock_resource.Table.return_value = mock_table + + with pytest.raises(SystemExit): + _validate_table_access("protected-table") + + mock_exit.assert_called_once_with(1) + mock_print.assert_called_with( + "Error: No permission to access table 'protected-table'" + ) + + +class TestCheckForExistingSandboxPointers: + + @patch("builtins.print") + def test_force_mode_enabled(self, mock_print): + mock_table = MagicMock() + + _check_for_existing_sandbox_pointers(mock_table, force=True) + + mock_table.scan.assert_not_called() + mock_print.assert_called_with( + "⚠️ Force mode enabled - will overwrite existing sandbox pointers" + ) + + def test_no_existing_pointers(self): + mock_table = MagicMock() + mock_table.scan.return_value = {"Items": []} + + _check_for_existing_sandbox_pointers(mock_table, force=False) + + mock_table.scan.assert_called_once() + + @patch("builtins.print") + @patch("sys.exit") + def test_existing_pointers_found(self, mock_exit, mock_print): + mock_exit.side_effect = SystemExit(1) + + mock_table = MagicMock() + mock_table.scan.return_value = {"Items": [{"id": "existing"}]} + + with pytest.raises(SystemExit): + _check_for_existing_sandbox_pointers(mock_table, force=False) + + mock_exit.assert_called_once_with(1) + + print_calls = [str(call) for call in mock_print.call_args_list] + assert any( + "Warning: Sandbox pointers already exist" in call for call in print_calls + ) + + +class TestLoadPointerTemplates: + + @patch("seed_sandbox_table._load_sample_template") + @patch( + "seed_sandbox_table.SAMPLE_TEMPLATES", + {"type1": "file1.json", "type2": "file2.json"}, + ) + def test_load_all_templates_success(self, mock_load): + mock_load.side_effect = [{"template": "1"}, {"template": "2"}] + + result = _load_pointer_templates() + + assert len(result) == 2 + assert result["type1"] == {"template": "1"} + assert result["type2"] == {"template": "2"} + + @patch("seed_sandbox_table._load_sample_template") + @patch( + "seed_sandbox_table.SAMPLE_TEMPLATES", + {"type1": "file1.json", "type2": "file2.json"}, + ) + @patch("builtins.print") + def test_load_templates_with_failures(self, mock_print, mock_load): + mock_load.side_effect = [{"template": "1"}, FileNotFoundError()] + + result = _load_pointer_templates() + + assert len(result) == 1 + assert result["type1"] == {"template": "1"} + mock_print.assert_any_call("✗ Template file not found: file2.json") + + @patch("seed_sandbox_table._load_sample_template") + @patch("seed_sandbox_table.SAMPLE_TEMPLATES", {"type1": "file1.json"}) + @patch("builtins.print") + @patch("sys.exit") + def test_load_templates_all_fail(self, mock_exit, mock_print, mock_load): + mock_exit.side_effect = SystemExit(1) + mock_load.side_effect = FileNotFoundError() + + with pytest.raises(SystemExit): + _load_pointer_templates() + + mock_exit.assert_called_once_with(1) + mock_print.assert_any_call("Error: No templates could be loaded. Exiting.") + + +class TestMakeRealisticPointer: + + @patch("seed_sandbox_table.DocumentReference") + @patch("seed_sandbox_table.DocumentPointer") + def test_create_pointer_success(self, mock_pointer_class, mock_doc_ref_class): + template = { + "id": "original", + "subject": {"identifier": {"value": "0000000000"}}, + "custodian": {"identifier": {"value": "OLD"}}, + "author": [{"identifier": {"value": "OLD_AUTHOR"}}], + "masterIdentifier": {"value": "old-master"}, + } + + mock_doc_ref = MagicMock() + mock_doc_ref_class.return_value = mock_doc_ref + + mock_pointer = MagicMock() + mock_pointer_class.from_document_reference.return_value = mock_pointer + + result = _make_realistic_pointer(template, "type1", "Y12345", "9000000001", 1) + + assert result == mock_pointer + mock_doc_ref_class.assert_called_once() + mock_pointer_class.from_document_reference.assert_called_once_with( + mock_doc_ref, source="SANDBOX-SEED" + ) + + +class TestGenerateAndWritePointers: + + @patch("seed_sandbox_table.resource") + @patch("seed_sandbox_table._make_realistic_pointer") + @patch("seed_sandbox_table.CUSTODIANS", ["CUST1"]) + def test_generate_pointers_success(self, mock_make_pointer, mock_resource): + templates = {"type1": {"template": "data"}} + + mock_pointer = MagicMock() + mock_pointer.id = "TEST-001" + mock_pointer.custodian = "CUST1" + mock_pointer.nhs_number = "9000000001" + mock_pointer.model_dump.return_value = {"id": "TEST-001"} + mock_make_pointer.return_value = mock_pointer + + mock_resource.batch_write_item.return_value = {} + + nhs_iter = iter(["9000000001", "9000000002"]) + + pointer_data, total_attempts = _generate_and_write_pointers( + "test-table", templates, 2, nhs_iter + ) + + assert total_attempts == 2 + assert len(pointer_data) == 2 + assert pointer_data[0][0] == "TEST-001" + + @patch("seed_sandbox_table.resource") + @patch("seed_sandbox_table._make_realistic_pointer") + @patch("seed_sandbox_table.CUSTODIANS", ["CUST1"]) + def test_generate_pointers_with_validation_error( + self, mock_make_pointer, mock_resource + ): + templates = {"type1": {"template": "data"}} + + mock_make_pointer.side_effect = [ValueError("Invalid data"), MagicMock()] + + nhs_iter = iter(["9000000001", "9000000002"]) + + pointer_data, total_attempts = _generate_and_write_pointers( + "test-table", templates, 2, nhs_iter + ) + + # First attempt failed, second succeeded + assert total_attempts == 2 + assert len(pointer_data) == 1 + + +class TestSeedSandboxTable: + + @patch("seed_sandbox_table._validate_table_access") + @patch("seed_sandbox_table._check_for_existing_sandbox_pointers") + @patch("seed_sandbox_table._load_pointer_templates") + @patch("seed_sandbox_table._generate_and_write_pointers") + @patch("seed_sandbox_table._write_pointer_extract") + @patch("seed_sandbox_table.TestNhsNumbersIterator") + def test_seed_table_success( + self, + mock_nhs_iter_class, + mock_write_extract, + mock_generate, + mock_load_templates, + mock_check_pointers, + mock_validate, + ): + mock_table = MagicMock() + mock_validate.return_value = mock_table + + mock_templates = {"type1": {"template": "data"}} + mock_load_templates.return_value = mock_templates + + pointer_data = [["PTR-001", "type1", "CUST1", "9000000001"]] + mock_generate.return_value = (pointer_data, 1) + + result = seed_sandbox_table("test-table", pointers_per_type=1, force=False) + + assert result == 1 + mock_validate.assert_called_once_with("test-table") + mock_check_pointers.assert_called_once_with(mock_table, False) + mock_generate.assert_called_once() + mock_write_extract.assert_called_once() + + @patch("seed_sandbox_table._validate_table_access") + @patch("seed_sandbox_table._check_for_existing_sandbox_pointers") + @patch("seed_sandbox_table._load_pointer_templates") + @patch("seed_sandbox_table._generate_and_write_pointers") + @patch("seed_sandbox_table._write_pointer_extract") + @patch("seed_sandbox_table.TestNhsNumbersIterator") + @patch("builtins.print") + def test_seed_table_with_failures( + self, + mock_print, + mock_nhs_iter_class, + mock_write_extract, + mock_generate, + mock_load_templates, + mock_check_pointers, + mock_validate, + ): + mock_table = MagicMock() + mock_validate.return_value = mock_table + + mock_templates = {"type1": {"template": "data"}} + mock_load_templates.return_value = mock_templates + + # 5 attempts, 3 successful + pointer_data = [ + ["PTR-001", "type1", "CUST1", "9000000001"], + ["PTR-002", "type1", "CUST1", "9000000002"], + ["PTR-003", "type1", "CUST1", "9000000003"], + ] + mock_generate.return_value = (pointer_data, 5) + + result = seed_sandbox_table("test-table", pointers_per_type=1, force=False) + + assert result == 3 + mock_print.assert_any_call("⚠️ 2 pointer(s) failed to create") From 321d709d21bd9e29998cea97128bb0bb20590d39 Mon Sep 17 00:00:00 2001 From: Sandy Forrester Date: Fri, 6 Feb 2026 15:36:45 +0000 Subject: [PATCH 03/19] NRL-721 Add failed attempts to output --- scripts/reset_sandbox_table.py | 15 +++++++++++---- scripts/seed_sandbox_table.py | 6 +++++- scripts/tests/test_seed_sandbox_table.py | 7 +++++-- 3 files changed, 21 insertions(+), 7 deletions(-) diff --git a/scripts/reset_sandbox_table.py b/scripts/reset_sandbox_table.py index 954e59432..f9a7b69de 100755 --- a/scripts/reset_sandbox_table.py +++ b/scripts/reset_sandbox_table.py @@ -1,6 +1,10 @@ #!/usr/bin/env python """ -Resets a sandbox table by clearing all items and reseeding with fresh data. +Resets a sandbox table by clearing all items and reseeding with fresh data + +This script is for manual cli use to reset a sandbox table + +There is a separate lambda function in place (../lambdas/seed_sandbox) which performs this same reset operation on a weekly schedule, but this script allows for on-demand resets without needing to wait for the scheduled job """ import sys @@ -32,10 +36,13 @@ def reset_sandbox_table(table_name: str, pointers_per_type: int = 2): print("Step 2: Seeding with fresh pointer data...") try: - count = seed_sandbox_table(table_name, pointers_per_type, force=True) + result = seed_sandbox_table(table_name, pointers_per_type, force=True) print(f"\n=== ✓ Reset Complete ===") - print(f"Table '{table_name}' has been reset with {count} fresh pointers") - return count + print( + f"Table '{table_name}' has been reset with {result['successful']} fresh pointers" + ) + if result["failed"] > 0: + print(f"⚠️ {result['failed']} pointer(s) failed to create") except SystemExit as e: print("✗ Failed to seed table after deletion.") sys.exit(e.code) diff --git a/scripts/seed_sandbox_table.py b/scripts/seed_sandbox_table.py index daf6ee847..8e01365eb 100755 --- a/scripts/seed_sandbox_table.py +++ b/scripts/seed_sandbox_table.py @@ -286,7 +286,11 @@ def seed_sandbox_table( except Exception as e: print(f"Warning: Failed to write CSV extract: {e}") - return total_pointers_created + return { + "successful": total_pointers_created, + "attempted": total_attempts, + "failed": total_attempts - total_pointers_created, + } def _write_pointer_extract(table_name: str, pointer_data: list[list[str]]): diff --git a/scripts/tests/test_seed_sandbox_table.py b/scripts/tests/test_seed_sandbox_table.py index f1a93eff7..a0beb1a44 100644 --- a/scripts/tests/test_seed_sandbox_table.py +++ b/scripts/tests/test_seed_sandbox_table.py @@ -286,7 +286,7 @@ def test_seed_table_success( result = seed_sandbox_table("test-table", pointers_per_type=1, force=False) - assert result == 1 + assert result == {"successful": 1, "attempted": 1, "failed": 0} mock_validate.assert_called_once_with("test-table") mock_check_pointers.assert_called_once_with(mock_table, False) mock_generate.assert_called_once() @@ -325,5 +325,8 @@ def test_seed_table_with_failures( result = seed_sandbox_table("test-table", pointers_per_type=1, force=False) - assert result == 3 + # Should return dict with counts + assert result == {"successful": 3, "attempted": 5, "failed": 2} + + # Verify error message about failures mock_print.assert_any_call("⚠️ 2 pointer(s) failed to create") From f10dba1f983834144774436326ec11db08a4e8ab Mon Sep 17 00:00:00 2001 From: Sandy Forrester Date: Fri, 6 Feb 2026 16:36:37 +0000 Subject: [PATCH 04/19] NRL-721 Implement lambda and eventbridge --- lambdas/seed_sandbox/Makefile | 28 +++++++ lambdas/seed_sandbox/index.py | 82 +++++++++++++++++++ .../modules/seed_sandbox_lambda/cloudwatch.tf | 27 ------ .../seed_sandbox_lambda/eventbridge.tf | 20 +++++ .../modules/seed_sandbox_lambda/lambda.tf | 10 ++- .../modules/seed_sandbox_lambda/logs.tf | 5 ++ .../modules/seed_sandbox_lambda/vars.tf | 5 ++ .../infrastructure/seed_sandbox_lambda.tf | 40 +++++---- 8 files changed, 169 insertions(+), 48 deletions(-) create mode 100644 lambdas/seed_sandbox/Makefile create mode 100644 lambdas/seed_sandbox/index.py delete mode 100644 terraform/infrastructure/modules/seed_sandbox_lambda/cloudwatch.tf create mode 100644 terraform/infrastructure/modules/seed_sandbox_lambda/eventbridge.tf create mode 100644 terraform/infrastructure/modules/seed_sandbox_lambda/logs.tf diff --git a/lambdas/seed_sandbox/Makefile b/lambdas/seed_sandbox/Makefile new file mode 100644 index 000000000..a9badf0d4 --- /dev/null +++ b/lambdas/seed_sandbox/Makefile @@ -0,0 +1,28 @@ +.PHONY: build clean + +build: clean + @echo "Building Lambda deployment package..." + mkdir -p dist + + # Copy the handler + cp index.py dist/ + + # Copy the required scripts + mkdir -p dist/scripts + cp ../../scripts/delete_all_table_items.py dist/scripts/ + cp ../../scripts/seed_sandbox_table.py dist/scripts/ + cp ../../scripts/seed_utils.py dist/scripts/ + + # Copy the pointer template data + mkdir -p dist/tests/data/samples + cp -r ../../tests/data/samples/*.json dist/tests/data/samples/ + + # Create the zip file + cd dist && zip -r seed_sandbox.zip . -x "*.pyc" -x "__pycache__/*" -x ".DS_Store" + + @echo "✓ Lambda package created: dist/seed_sandbox.zip" + +clean: + @echo "Cleaning build artifacts..." + rm -rf dist + @echo "✓ Clean complete" diff --git a/lambdas/seed_sandbox/index.py b/lambdas/seed_sandbox/index.py new file mode 100644 index 000000000..0900842d2 --- /dev/null +++ b/lambdas/seed_sandbox/index.py @@ -0,0 +1,82 @@ +""" +Lambda handler for resetting the sandbox DynamoDB table. + +This Lambda function runs on a weekly schedule to clear and reseed the +sandbox pointers table with fresh test data. +""" + +# flake8: noqa: T201 + +import json +import os +import sys +from pathlib import Path + +# Add scripts directory to path so we can import our scripts +SCRIPTS_DIR = Path(__file__).parent.parent.parent / "scripts" +sys.path.insert(0, str(SCRIPTS_DIR)) + +from delete_all_table_items import delete_all_table_items +from seed_sandbox_table import seed_sandbox_table + + +def handler(event, context): + """ + Lambda handler that orchestrates the reset of the sandbox table. + + The sandbox table to be reset is determined by the TABLE_NAME environment variable. + + Args: + event: Lambda event (from EventBridge schedule) + context: Lambda context + + Returns: + dict: Response with status and details + """ + table_name = os.environ.get("TABLE_NAME") + pointers_per_type = int(os.environ.get("POINTERS_PER_TYPE", "2")) + + if not table_name: + error_msg = "TABLE_NAME environment variable is required" + print(f"ERROR: {error_msg}") + return {"statusCode": 500, "body": json.dumps({"error": error_msg})} + + print(f"Starting sandbox table reset for: {table_name}") + print(f"Pointers per type: {pointers_per_type}") + + try: + print("Step 1: Deleting all items from table...") + pointers_deleted_count = delete_all_table_items(table_name=table_name) + print("✓ All items deleted successfully") + + print("Step 2: Seeding table with fresh data...") + seed_result = seed_sandbox_table( + table_name=table_name, pointers_per_type=pointers_per_type, force=True + ) + print("✓ Table seeded successfully") + + result = { + "statusCode": 200, + "body": json.dumps( + { + "message": "Sandbox table reset completed successfully", + "table_name": table_name, + "pointers_deleted": pointers_deleted_count, + "pointers_created": seed_result["successful"], + "pointers_attempted": seed_result["attempted"], + "pointers_failed": seed_result["failed"], + "pointers_per_type": pointers_per_type, + } + ), + } + + print(f"SUCCESS: {result}") + return result + + except Exception as e: + error_msg = f"Failed to reset sandbox table: {str(e)}" + print(f"ERROR: {error_msg}") + return { + "statusCode": 500, + "body": json.dumps({"error": error_msg, "table_name": table_name}), + } diff --git a/terraform/infrastructure/modules/seed_sandbox_lambda/cloudwatch.tf b/terraform/infrastructure/modules/seed_sandbox_lambda/cloudwatch.tf deleted file mode 100644 index 8a2e36b96..000000000 --- a/terraform/infrastructure/modules/seed_sandbox_lambda/cloudwatch.tf +++ /dev/null @@ -1,27 +0,0 @@ -resource "aws_cloudwatch_log_group" "lambda_cloudwatch_log_group" { - name = "/aws/lambda/${aws_lambda_function.lambda_function.function_name}" - retention_in_days = local.lambda_log_retention_in_days - kms_key_id = var.kms_key_id -} - - -resource "aws_cloudwatch_event_rule" "event_rule" { - name = "${var.prefix}--event_rule" - description = "Rule to fire to clear and reseed sandbox data" - schedule_expression = "cron(0 3 ? * * *)" # 3am, every day -} - -resource "aws_cloudwatch_event_target" "event_target" { - target_id = "${var.prefix}--event_target" - rule = aws_cloudwatch_event_rule.event_rule.name - arn = aws_lambda_function.lambda_function.arn -} - - -resource "aws_lambda_permission" "allow_execution_from_cloudwatch" { - statement_id = "AllowExecutionFromCloudWatch" - action = "lambda:InvokeFunction" - function_name = aws_lambda_function.lambda_function.arn - principal = "events.amazonaws.com" - source_arn = aws_cloudwatch_event_rule.event_rule.arn -} diff --git a/terraform/infrastructure/modules/seed_sandbox_lambda/eventbridge.tf b/terraform/infrastructure/modules/seed_sandbox_lambda/eventbridge.tf new file mode 100644 index 000000000..c603d5384 --- /dev/null +++ b/terraform/infrastructure/modules/seed_sandbox_lambda/eventbridge.tf @@ -0,0 +1,20 @@ +resource "aws_eventbridge_rule" "event_rule" { + name = "${var.prefix}--event_rule" + description = "Rule to clear and reseed sandbox data" + schedule_expression = "cron(0 2 ? * SUN *)" # 2am UTC, every Sunday +} + +resource "aws_eventbridge_target" "event_target" { + target_id = "${var.prefix}--event_target" + rule = aws_eventbridge_rule.event_rule.name + arn = aws_lambda_function.lambda_function.arn +} + + +resource "aws_lambda_permission" "allow_execution_from_eventbridge" { + statement_id = "AllowExecutionFromEventBridge" + action = "lambda:InvokeFunction" + function_name = aws_lambda_function.lambda_function.arn + principal = "events.amazonaws.com" + source_arn = aws_eventbridge_rule.event_rule.arn +} diff --git a/terraform/infrastructure/modules/seed_sandbox_lambda/lambda.tf b/terraform/infrastructure/modules/seed_sandbox_lambda/lambda.tf index 8854454bf..879fb1bcc 100644 --- a/terraform/infrastructure/modules/seed_sandbox_lambda/lambda.tf +++ b/terraform/infrastructure/modules/seed_sandbox_lambda/lambda.tf @@ -1,15 +1,17 @@ resource "aws_lambda_function" "lambda_function" { function_name = "${var.prefix}--sandbox-seeder" runtime = "python3.9" - handler = "cron.seed_sandbox.index.handler" + handler = "index.handler" role = aws_iam_role.lambda_role.arn - filename = "${path.module}/../../../../cron/seed_sandbox/dist/seed_sandbox.zip" - source_code_hash = filebase64sha256("${path.module}/../../../../cron/seed_sandbox/dist/seed_sandbox.zip") + filename = "${path.module}/../../../../lambdas/seed_sandbox/dist/seed_sandbox.zip" + source_code_hash = filebase64sha256("${path.module}/../../../../lambdas/seed_sandbox/dist/seed_sandbox.zip") timeout = local.lambda_timeout memory_size = 128 environment { - variables = var.environment_variables + variables = merge(var.environment_variables, { + TABLE_NAME = var.table_name + }) } layers = var.layers diff --git a/terraform/infrastructure/modules/seed_sandbox_lambda/logs.tf b/terraform/infrastructure/modules/seed_sandbox_lambda/logs.tf new file mode 100644 index 000000000..7fb1b8838 --- /dev/null +++ b/terraform/infrastructure/modules/seed_sandbox_lambda/logs.tf @@ -0,0 +1,5 @@ +resource "aws_cloudwatch_log_group" "lambda_cloudwatch_log_group" { + name = "/aws/lambda/${aws_lambda_function.lambda_function.function_name}" + retention_in_days = local.lambda_log_retention_in_days + kms_key_id = var.kms_key_id +} diff --git a/terraform/infrastructure/modules/seed_sandbox_lambda/vars.tf b/terraform/infrastructure/modules/seed_sandbox_lambda/vars.tf index 65473ab61..7222cef88 100644 --- a/terraform/infrastructure/modules/seed_sandbox_lambda/vars.tf +++ b/terraform/infrastructure/modules/seed_sandbox_lambda/vars.tf @@ -11,3 +11,8 @@ variable "environment_variables" {} variable "additional_policies" { default = [] } + +variable "table_name" { + description = "The name of the DynamoDB table to reset" + type = string +} diff --git a/terraform/infrastructure/seed_sandbox_lambda.tf b/terraform/infrastructure/seed_sandbox_lambda.tf index 3b1ec0f71..b3e30f5be 100644 --- a/terraform/infrastructure/seed_sandbox_lambda.tf +++ b/terraform/infrastructure/seed_sandbox_lambda.tf @@ -1,18 +1,24 @@ +module "seed_sandbox_lambda" { + count = contains(["int", "dev"], local.environment) ? 1 : 0 + source = "./modules/seed_sandbox_lambda" + region = local.region + prefix = local.prefix + layers = [module.lambda-utils.layer_arn, module.nrlf.layer_arn, module.third_party.layer_arn] + kms_key_id = module.kms__cloudwatch.kms_arn -# module "seed_sandbox_lambda" { -# count = endswith(local.environment, "-sandbox") ? 1 : 0 -# source = "./modules/seed_sandbox_lambda" -# region = local.region -# prefix = local.prefix -# layers = [module.lambda-utils.layer_arn, module.nrlf.layer_arn, module.third_party.layer_arn] -# kms_key_id = module.kms__cloudwatch.kms_arn -# environment_variables = { -# PREFIX = "${local.prefix}--" -# ENVIRONMENT = local.environment -# } -# additional_policies = [ -# aws_iam_policy.document-pointer__dynamodb-write.arn, -# aws_iam_policy.document-pointer__dynamodb-read.arn, -# aws_iam_policy.document-pointer__kms-read-write.arn -# ] -# } + # Use sandy-dev table for dev environment, int-sandboxcopy for int + # This will be changed to the int-sandbox table once development and testing is complete + table_name = local.environment == "dev" ? "${local.prefix}--sandy-dev-pointers-table" : "${local.prefix}--int-sandboxcopy-pointers-table" + + environment_variables = { + PREFIX = "${local.prefix}--" + ENVIRONMENT = local.environment + POINTERS_PER_TYPE = "2" + } + + additional_policies = [ + aws_iam_policy.document-pointer__dynamodb-write.arn, + aws_iam_policy.document-pointer__dynamodb-read.arn, + aws_iam_policy.document-pointer__kms-read-write.arn + ] +} From 9dae7ae5867cd9b7d87c33cdd503733e2158f927 Mon Sep 17 00:00:00 2001 From: Sandy Forrester Date: Mon, 9 Feb 2026 16:42:21 +0000 Subject: [PATCH 05/19] NRL-721 Get lambda working successfully --- Makefile | 6 ++- lambdas/seed_sandbox/index.py | 15 +++---- scripts/delete_all_table_items.py | 10 ++++- scripts/seed_nft_tables.py | 3 +- scripts/seed_sandbox_table.py | 45 ++++++++++++++----- scripts/seed_utils.py | 11 ++--- scripts/tests/test_seed_sandbox_table.py | 31 +++++++++++++ .../seed_sandbox_lambda/eventbridge.tf | 16 ++++--- .../modules/seed_sandbox_lambda/iam.tf | 22 +++++++++ .../modules/seed_sandbox_lambda/lambda.tf | 2 +- .../infrastructure/seed_sandbox_lambda.tf | 12 +++-- 11 files changed, 125 insertions(+), 48 deletions(-) diff --git a/Makefile b/Makefile index 394150e3a..34da8fb02 100644 --- a/Makefile +++ b/Makefile @@ -58,7 +58,11 @@ check-deploy: ## check the deploy environment is setup correctly check-deploy-warn: @SHOULD_WARN_ONLY=true ./scripts/check-deploy-environment.sh -build: check-warn build-api-packages build-layers build-dependency-layer ## Build the project +build: check-warn build-api-packages build-layers build-dependency-layer build-seed-sandbox-lambda ## Build the project + +build-seed-sandbox-lambda: + @echo "Building seed_sandbox Lambda" + @cd lambdas/seed_sandbox && make build build-dependency-layer: @echo "Building Lambda dependency layer" diff --git a/lambdas/seed_sandbox/index.py b/lambdas/seed_sandbox/index.py index 0900842d2..a798c66d4 100644 --- a/lambdas/seed_sandbox/index.py +++ b/lambdas/seed_sandbox/index.py @@ -9,15 +9,9 @@ import json import os -import sys -from pathlib import Path -# Add scripts directory to path so we can import our scripts -SCRIPTS_DIR = Path(__file__).parent.parent.parent / "scripts" -sys.path.insert(0, str(SCRIPTS_DIR)) - -from delete_all_table_items import delete_all_table_items -from seed_sandbox_table import seed_sandbox_table +from scripts.delete_all_table_items import delete_all_table_items +from scripts.seed_sandbox_table import seed_sandbox_table def handler(event, context): @@ -51,7 +45,10 @@ def handler(event, context): print("Step 2: Seeding table with fresh data...") seed_result = seed_sandbox_table( - table_name=table_name, pointers_per_type=pointers_per_type, force=True + table_name=table_name, + pointers_per_type=pointers_per_type, + force=True, + write_csv=False, ) print("✓ Table seeded successfully") diff --git a/scripts/delete_all_table_items.py b/scripts/delete_all_table_items.py index 3c1e752a6..3247c09c6 100755 --- a/scripts/delete_all_table_items.py +++ b/scripts/delete_all_table_items.py @@ -2,9 +2,14 @@ import sys import boto3 -import fire from botocore.exceptions import ClientError +# Needed for when the script is run in Lambda where modules are in scripts subdirectory +try: + import fire +except ImportError: + fire = None + def delete_all_table_items(table_name): """Delete all items from a DynamoDB table.""" @@ -69,4 +74,7 @@ def delete_all_table_items(table_name): if __name__ == "__main__": + if fire is None: + print("Error: fire module not available") + sys.exit(1) fire.Fire(delete_all_table_items) diff --git a/scripts/seed_nft_tables.py b/scripts/seed_nft_tables.py index 9b69ffd22..a39d13194 100644 --- a/scripts/seed_nft_tables.py +++ b/scripts/seed_nft_tables.py @@ -9,7 +9,7 @@ import boto3 import fire import numpy as np -from seed_utils import TestNhsNumbersIterator +from seed_utils import CHECKSUM_WEIGHTS, TestNhsNumbersIterator from nrlf.core.boto import get_s3_client from nrlf.core.constants import ( @@ -23,7 +23,6 @@ from nrlf.tests.data import load_document_reference from tests.performance.perftest_environment import create_extract_metadata_file from tests.performance.seed_data_constants import ( # DEFAULT_COUNT_DISTRIBUTIONS, - CHECKSUM_WEIGHTS, CUSTODIAN_DISTRIBUTION_PROFILES, TYPE_DISTRIBUTION_PROFILES, ) diff --git a/scripts/seed_sandbox_table.py b/scripts/seed_sandbox_table.py index 8e01365eb..d40bdc81f 100755 --- a/scripts/seed_sandbox_table.py +++ b/scripts/seed_sandbox_table.py @@ -13,16 +13,29 @@ from typing import Any import boto3 -import fire from botocore.exceptions import ClientError -from seed_utils import TestNhsNumbersIterator -from nrlf.core.dynamodb.model import DocumentPointer -from nrlf.core.logger import logger -from nrlf.producer.fhir.r4.model import DocumentReference -from nrlf.tests.data import load_document_reference +# Needed for when the script is run in Lambda where modules are in scripts subdirectory +try: + from seed_utils import TestNhsNumbersIterator +except ImportError: + # In Lambda, modules are in scripts subdirectory + from scripts.seed_utils import TestNhsNumbersIterator -logger.setLevel("ERROR") +try: + import fire +except ImportError: + fire = None + +try: + from nrlf.core.dynamodb.model import DocumentPointer + from nrlf.core.logger import logger + from nrlf.producer.fhir.r4.model import DocumentReference + + logger.setLevel("ERROR") +except ImportError as e: + print(f"Warning: Failed to import NRLF modules: {e}") + raise resource = boto3.resource("dynamodb") @@ -231,7 +244,10 @@ def _generate_and_write_pointers( def seed_sandbox_table( - table_name: str, pointers_per_type: int = 2, force: bool = False + table_name: str, + pointers_per_type: int = 2, + force: bool = False, + write_csv: bool = True, ): """ Seed a sandbox table with realistic pointer data. @@ -240,6 +256,7 @@ def seed_sandbox_table( table_name: Name of the DynamoDB table to seed pointers_per_type: Number of pointers per type per custodian (default: 2) force: If True, overwrite existing sandbox pointers without prompting (default: False) + write_csv: If True, write pointer data to CSV file (default: True) """ print( f"Seeding table {table_name} with {pointers_per_type} pointers per type per custodian" @@ -281,10 +298,11 @@ def seed_sandbox_table( if duration > 0: print(f"Average: {total_pointers_created/duration:.2f} pointers/second") - try: - _write_pointer_extract(table_name, pointer_data) - except Exception as e: - print(f"Warning: Failed to write CSV extract: {e}") + if write_csv: + try: + _write_pointer_extract(table_name, pointer_data) + except Exception as e: + print(f"Warning: Failed to write CSV extract: {e}") return { "successful": total_pointers_created, @@ -319,4 +337,7 @@ def _write_pointer_extract(table_name: str, pointer_data: list[list[str]]): if __name__ == "__main__": + if fire is None: + print("Error: fire module not available") + sys.exit(1) fire.Fire(seed_sandbox_table) diff --git a/scripts/seed_utils.py b/scripts/seed_utils.py index 9d55c0389..50f38e8e8 100644 --- a/scripts/seed_utils.py +++ b/scripts/seed_utils.py @@ -1,15 +1,10 @@ -import sys -from pathlib import Path - -# Add parent directory to path to allow imports from tests -sys.path.insert(0, str(Path(__file__).parent.parent)) - -from tests.performance.seed_data_constants import CHECKSUM_WEIGHTS - """ Shared utilities for seeding DynamoDB tables with pointer data. """ +# NHS number checksum weights (10, 9, 8, 7, 6, 5, 4, 3, 2) +CHECKSUM_WEIGHTS = [i for i in range(10, 1, -1)] + class TestNhsNumbersIterator: """Iterator that generates valid NHS numbers with proper checksums.""" diff --git a/scripts/tests/test_seed_sandbox_table.py b/scripts/tests/test_seed_sandbox_table.py index a0beb1a44..52d50ec98 100644 --- a/scripts/tests/test_seed_sandbox_table.py +++ b/scripts/tests/test_seed_sandbox_table.py @@ -330,3 +330,34 @@ def test_seed_table_with_failures( # Verify error message about failures mock_print.assert_any_call("⚠️ 2 pointer(s) failed to create") + + @patch("seed_sandbox_table._validate_table_access") + @patch("seed_sandbox_table._check_for_existing_sandbox_pointers") + @patch("seed_sandbox_table._load_pointer_templates") + @patch("seed_sandbox_table._generate_and_write_pointers") + @patch("seed_sandbox_table._write_pointer_extract") + @patch("seed_sandbox_table.TestNhsNumbersIterator") + def test_seed_table_skip_csv_writing( + self, + mock_nhs_iter_class, + mock_write_extract, + mock_generate, + mock_load_templates, + mock_check_pointers, + mock_validate, + ): + mock_table = MagicMock() + mock_validate.return_value = mock_table + + mock_templates = {"type1": {"template": "data"}} + mock_load_templates.return_value = mock_templates + + pointer_data = [["PTR-001", "type1", "CUST1", "9000000001"]] + mock_generate.return_value = (pointer_data, 1) + + result = seed_sandbox_table( + "test-table", pointers_per_type=1, force=False, write_csv=False + ) + + assert result == {"successful": 1, "attempted": 1, "failed": 0} + mock_write_extract.assert_not_called() diff --git a/terraform/infrastructure/modules/seed_sandbox_lambda/eventbridge.tf b/terraform/infrastructure/modules/seed_sandbox_lambda/eventbridge.tf index c603d5384..a0393e77c 100644 --- a/terraform/infrastructure/modules/seed_sandbox_lambda/eventbridge.tf +++ b/terraform/infrastructure/modules/seed_sandbox_lambda/eventbridge.tf @@ -1,12 +1,14 @@ -resource "aws_eventbridge_rule" "event_rule" { - name = "${var.prefix}--event_rule" - description = "Rule to clear and reseed sandbox data" - schedule_expression = "cron(0 2 ? * SUN *)" # 2am UTC, every Sunday +resource "aws_cloudwatch_event_rule" "event_rule" { + name = "${var.prefix}--event_rule" + description = "Rule to clear and reseed sandbox data" + # Set this to weekly once development and testing is complete + # schedule_expression = "cron(0 2 ? * SUN *)" # 2am UTC, every Sunday + schedule_expression = "cron(0 9-17 ? * MON-FRI *)" # Hourly, 9am-5pm UTC, Monday-Friday } -resource "aws_eventbridge_target" "event_target" { +resource "aws_cloudwatch_event_target" "event_target" { target_id = "${var.prefix}--event_target" - rule = aws_eventbridge_rule.event_rule.name + rule = aws_cloudwatch_event_rule.event_rule.name arn = aws_lambda_function.lambda_function.arn } @@ -16,5 +18,5 @@ resource "aws_lambda_permission" "allow_execution_from_eventbridge" { action = "lambda:InvokeFunction" function_name = aws_lambda_function.lambda_function.arn principal = "events.amazonaws.com" - source_arn = aws_eventbridge_rule.event_rule.arn + source_arn = aws_cloudwatch_event_rule.event_rule.arn } diff --git a/terraform/infrastructure/modules/seed_sandbox_lambda/iam.tf b/terraform/infrastructure/modules/seed_sandbox_lambda/iam.tf index 0e4a0c3fa..3b5d47db5 100644 --- a/terraform/infrastructure/modules/seed_sandbox_lambda/iam.tf +++ b/terraform/infrastructure/modules/seed_sandbox_lambda/iam.tf @@ -28,3 +28,25 @@ resource "aws_iam_role_policy_attachment" "additional_policies" { count = length(var.additional_policies) policy_arn = var.additional_policies[count.index] } + +# NOTE: These permissions (DescribeTable and BatchWriteItem) could be added to the core pointers-table module policies if we're happy for them to be in there: +# - DescribeTable could be added to modules/pointers-table/iam.tf "pointers-table-read" policy (used by the producer & consumer lambdas) +# - BatchWriteItem could be added to modules/pointers-table/iam.tf "pointers-table-write" policy (used by the producer lambdas) +resource "aws_iam_role_policy" "seed_sandbox_additional_permissions" { + name = "${var.prefix}--sandbox-seeder-additional" + role = aws_iam_role.lambda_role.id + + policy = jsonencode({ + Version = "2012-10-17" + Statement = [ + { + Effect = "Allow" + Action = [ + "dynamodb:DescribeTable", + "dynamodb:BatchWriteItem" + ] + Resource = "arn:aws:dynamodb:${var.region}:*:table/${var.table_name}" + } + ] + }) +} diff --git a/terraform/infrastructure/modules/seed_sandbox_lambda/lambda.tf b/terraform/infrastructure/modules/seed_sandbox_lambda/lambda.tf index 879fb1bcc..568175758 100644 --- a/terraform/infrastructure/modules/seed_sandbox_lambda/lambda.tf +++ b/terraform/infrastructure/modules/seed_sandbox_lambda/lambda.tf @@ -1,6 +1,6 @@ resource "aws_lambda_function" "lambda_function" { function_name = "${var.prefix}--sandbox-seeder" - runtime = "python3.9" + runtime = "python3.12" handler = "index.handler" role = aws_iam_role.lambda_role.arn filename = "${path.module}/../../../../lambdas/seed_sandbox/dist/seed_sandbox.zip" diff --git a/terraform/infrastructure/seed_sandbox_lambda.tf b/terraform/infrastructure/seed_sandbox_lambda.tf index b3e30f5be..e259f3264 100644 --- a/terraform/infrastructure/seed_sandbox_lambda.tf +++ b/terraform/infrastructure/seed_sandbox_lambda.tf @@ -3,12 +3,11 @@ module "seed_sandbox_lambda" { source = "./modules/seed_sandbox_lambda" region = local.region prefix = local.prefix - layers = [module.lambda-utils.layer_arn, module.nrlf.layer_arn, module.third_party.layer_arn] + layers = [module.nrlf.layer_arn, module.third_party.layer_arn, module.nrlf_permissions.layer_arn] kms_key_id = module.kms__cloudwatch.kms_arn - # Use sandy-dev table for dev environment, int-sandboxcopy for int - # This will be changed to the int-sandbox table once development and testing is complete - table_name = local.environment == "dev" ? "${local.prefix}--sandy-dev-pointers-table" : "${local.prefix}--int-sandboxcopy-pointers-table" + # Use int-sandboxcopy for int but change to int-sandbox table once development and testing is complete + table_name = local.environment == "dev" ? "${local.prefix}-pointers-table" : "${local.prefix}copy-pointers-table" environment_variables = { PREFIX = "${local.prefix}--" @@ -17,8 +16,7 @@ module "seed_sandbox_lambda" { } additional_policies = [ - aws_iam_policy.document-pointer__dynamodb-write.arn, - aws_iam_policy.document-pointer__dynamodb-read.arn, - aws_iam_policy.document-pointer__kms-read-write.arn + local.pointers_table_write_policy_arn, + local.pointers_table_read_policy_arn ] } From 1c7c0abbd7cb49c7aa4d6c482f6dcd6a538d0daa Mon Sep 17 00:00:00 2001 From: Sandy Forrester Date: Thu, 12 Feb 2026 16:09:05 +0000 Subject: [PATCH 06/19] NRL-721 Make seed lambda ac wide and enable ac wide layers --- lambdas/seed_sandbox/index.py | 127 ++++++++++++------ scripts/seed_sandbox_table.py | 8 +- .../dev/lambda-layers.tf | 6 + .../dev/lambda__seed-sandbox.tf | 22 +++ .../modules/lambda-layers/layers.tf | 41 ++++++ .../modules/lambda-layers/outputs.tf | 14 ++ .../modules/lambda-layers/variables.tf | 4 + .../seed_sandbox_lambda/eventbridge.tf | 0 .../modules/seed_sandbox_lambda/iam.tf | 16 +-- .../modules/seed_sandbox_lambda/lambda.tf | 2 +- .../modules/seed_sandbox_lambda/locals.tf | 0 .../modules/seed_sandbox_lambda/logs.tf | 0 .../modules/seed_sandbox_lambda/output.tf | 0 .../modules/seed_sandbox_lambda/vars.tf | 14 ++ .../test/lambda-layers.tf | 6 + .../test/lambda__seed-sandbox.tf | 24 ++++ .../modules/layer/lambda_layer.tf | 9 ++ .../modules/seed_sandbox_lambda/vars.tf | 18 --- .../infrastructure/seed_sandbox_lambda.tf | 22 --- 19 files changed, 235 insertions(+), 98 deletions(-) create mode 100644 terraform/account-wide-infrastructure/dev/lambda-layers.tf create mode 100644 terraform/account-wide-infrastructure/dev/lambda__seed-sandbox.tf create mode 100644 terraform/account-wide-infrastructure/modules/lambda-layers/layers.tf create mode 100644 terraform/account-wide-infrastructure/modules/lambda-layers/outputs.tf create mode 100644 terraform/account-wide-infrastructure/modules/lambda-layers/variables.tf rename terraform/{infrastructure => account-wide-infrastructure}/modules/seed_sandbox_lambda/eventbridge.tf (100%) rename terraform/{infrastructure => account-wide-infrastructure}/modules/seed_sandbox_lambda/iam.tf (57%) rename terraform/{infrastructure => account-wide-infrastructure}/modules/seed_sandbox_lambda/lambda.tf (93%) rename terraform/{infrastructure => account-wide-infrastructure}/modules/seed_sandbox_lambda/locals.tf (100%) rename terraform/{infrastructure => account-wide-infrastructure}/modules/seed_sandbox_lambda/logs.tf (100%) rename terraform/{infrastructure => account-wide-infrastructure}/modules/seed_sandbox_lambda/output.tf (100%) create mode 100644 terraform/account-wide-infrastructure/modules/seed_sandbox_lambda/vars.tf create mode 100644 terraform/account-wide-infrastructure/test/lambda-layers.tf create mode 100644 terraform/account-wide-infrastructure/test/lambda__seed-sandbox.tf delete mode 100644 terraform/infrastructure/modules/seed_sandbox_lambda/vars.tf delete mode 100644 terraform/infrastructure/seed_sandbox_lambda.tf diff --git a/lambdas/seed_sandbox/index.py b/lambdas/seed_sandbox/index.py index a798c66d4..a3a644c8b 100644 --- a/lambdas/seed_sandbox/index.py +++ b/lambdas/seed_sandbox/index.py @@ -1,8 +1,8 @@ """ -Lambda handler for resetting the sandbox DynamoDB table. +Lambda handler for resetting specified DynamoDB tables with seed test data. -This Lambda function runs on a weekly schedule to clear and reseed the -sandbox pointers table with fresh test data. +This Lambda function runs on a schedule to clear and reseed specified +pointers tables with fresh test data. """ # flake8: noqa: T201 @@ -16,64 +16,109 @@ def handler(event, context): """ - Lambda handler that orchestrates the reset of the sandbox table. + Lambda handler that orchestrates the reset of specified tables - The sandbox table to be reset is determined by the TABLE_NAME environment variable. + The tables to be reset are specified by the TABLE_NAMES environment variable + as a comma-separated list Args: event: Lambda event (from EventBridge schedule) context: Lambda context Returns: - dict: Response with status and details + dict: Response with status and details for each table """ - table_name = os.environ.get("TABLE_NAME") + table_names_str = os.environ.get("TABLE_NAMES", "") pointers_per_type = int(os.environ.get("POINTERS_PER_TYPE", "2")) - if not table_name: - error_msg = "TABLE_NAME environment variable is required" + if not table_names_str: + error_msg = "TABLE_NAMES environment variable is required" print(f"ERROR: {error_msg}") return {"statusCode": 500, "body": json.dumps({"error": error_msg})} - print(f"Starting sandbox table reset for: {table_name}") - print(f"Pointers per type: {pointers_per_type}") + table_names = [name.strip() for name in table_names_str.split(",") if name.strip()] - try: - print("Step 1: Deleting all items from table...") - pointers_deleted_count = delete_all_table_items(table_name=table_name) - print("✓ All items deleted successfully") - - print("Step 2: Seeding table with fresh data...") - seed_result = seed_sandbox_table( - table_name=table_name, - pointers_per_type=pointers_per_type, - force=True, - write_csv=False, - ) - print("✓ Table seeded successfully") + if not table_names: + error_msg = "No valid table names provided in TABLE_NAMES" + print(f"ERROR: {error_msg}") + return {"statusCode": 500, "body": json.dumps({"error": error_msg})} + + print( + f"Starting table reset for {len(table_names)} table(s): {', '.join(table_names)}" + ) + print(f"Pointers per type: {pointers_per_type}") - result = { - "statusCode": 200, - "body": json.dumps( + results = [] + failed_tables = [] + + for table_name in table_names: + print(f"\n{'='*60}") + print(f"Processing table: {table_name}") + print(f"{'='*60}") + + try: + print("Step 1: Deleting all items from table...") + pointers_deleted_count = delete_all_table_items(table_name=table_name) + print(f"✓ Deleted {pointers_deleted_count} items") + + print("Step 2: Seeding table with fresh data...") + seed_result = seed_sandbox_table( + table_name=table_name, + pointers_per_type=pointers_per_type, + force=True, + write_csv=False, + ) + print(f"✓ Created {seed_result['successful']} pointers") + + results.append( { - "message": "Sandbox table reset completed successfully", "table_name": table_name, + "status": "success", "pointers_deleted": pointers_deleted_count, "pointers_created": seed_result["successful"], "pointers_attempted": seed_result["attempted"], "pointers_failed": seed_result["failed"], - "pointers_per_type": pointers_per_type, } - ), - } + ) - print(f"SUCCESS: {result}") - return result - - except Exception as e: - error_msg = f"Failed to reset sandbox table: {str(e)}" - print(f"ERROR: {error_msg}") - return { - "statusCode": 500, - "body": json.dumps({"error": error_msg, "table_name": table_name}), - } + except Exception as e: + error_msg = f"Failed to reset table {table_name}: {str(e)}" + print(f"ERROR: {error_msg}") + failed_tables.append(table_name) + results.append( + { + "table_name": table_name, + "status": "failed", + "error": str(e), + } + ) + + if failed_tables: + status_code = ( + 500 if len(failed_tables) == len(table_names) else 207 + ) # 207 = Multi-Status + message = ( + f"Failed to reset {len(failed_tables)} table(s): {', '.join(failed_tables)}" + ) + else: + status_code = 200 + message = f"Successfully reset {len(table_names)} table(s)" + + result = { + "statusCode": status_code, + "body": json.dumps( + { + "message": message, + "tables_processed": len(table_names), + "tables_succeeded": len(table_names) - len(failed_tables), + "tables_failed": len(failed_tables), + "results": results, + "pointers_per_type": pointers_per_type, + } + ), + } + + print(f"\n{'='*60}") + print(f"RESULT: {message}") + print(f"{'='*60}") + return result diff --git a/scripts/seed_sandbox_table.py b/scripts/seed_sandbox_table.py index d40bdc81f..a366a8763 100755 --- a/scripts/seed_sandbox_table.py +++ b/scripts/seed_sandbox_table.py @@ -73,17 +73,13 @@ def _make_realistic_pointer( doc_ref_dict = copy.deepcopy(template) - doc_ref_dict["id"] = ( - f"{custodian}-SANDBOX-{str(counter).zfill(6)}" # Make this more realistic - currently similar to Y12345-SANDBOX-000016 - ) + doc_ref_dict["id"] = f"{custodian}-SANDBOX-{str(counter).zfill(6)}" doc_ref_dict["subject"]["identifier"]["value"] = nhs_number doc_ref_dict["custodian"]["identifier"]["value"] = custodian doc_ref_dict["author"][0]["identifier"]["value"] = AUTHOR if "masterIdentifier" in doc_ref_dict: - doc_ref_dict["masterIdentifier"][ - "value" - ] = f"sandbox-{custodian}-{counter}" # Make this more realistic - currently similar to "sandbox-Y12345-32" + doc_ref_dict["masterIdentifier"]["value"] = f"sandbox-{custodian}-{counter}" doc_ref = DocumentReference(**doc_ref_dict) diff --git a/terraform/account-wide-infrastructure/dev/lambda-layers.tf b/terraform/account-wide-infrastructure/dev/lambda-layers.tf new file mode 100644 index 000000000..697e28618 --- /dev/null +++ b/terraform/account-wide-infrastructure/dev/lambda-layers.tf @@ -0,0 +1,6 @@ +# Account-wide Lambda layers for shared infrastructure +# Built once per account, used by account-wide Lambdas like the seed Lambda +module "shared_lambda_layers" { + source = "../modules/lambda-layers" + name_prefix = local.prefix +} diff --git a/terraform/account-wide-infrastructure/dev/lambda__seed-sandbox.tf b/terraform/account-wide-infrastructure/dev/lambda__seed-sandbox.tf new file mode 100644 index 000000000..edf5d306f --- /dev/null +++ b/terraform/account-wide-infrastructure/dev/lambda__seed-sandbox.tf @@ -0,0 +1,22 @@ +# Lambda to reset specified DynamoDB tables with seed test data on a schedule +# Uses account-wide Lambda layers + +module "seed_sandbox_lambda" { + source = "../modules/seed_sandbox_lambda" + region = local.region + prefix = local.prefix + layers = [ + module.shared_lambda_layers.nrlf_layer_arn, + module.shared_lambda_layers.third_party_layer_arn, + module.shared_lambda_layers.nrlf_permissions_layer_arn + ] + kms_key_id = module.dev-sandbox-pointers-table.kms_key_arn #not sure about this + + table_names = ["nhsd-nrlf--dev-sandy-dev-pointers-table"] + + environment_variables = { + PREFIX = "${local.prefix}--" + ENVIRONMENT = local.environment + POINTERS_PER_TYPE = "2" + } +} diff --git a/terraform/account-wide-infrastructure/modules/lambda-layers/layers.tf b/terraform/account-wide-infrastructure/modules/lambda-layers/layers.tf new file mode 100644 index 000000000..cb2348749 --- /dev/null +++ b/terraform/account-wide-infrastructure/modules/lambda-layers/layers.tf @@ -0,0 +1,41 @@ +# Account-wide Lambda layers for shared infrastructure +# These reference the same pre-built zips as workspace layers +# but are deployed once per account rather than per workspace + +locals { + dist_dir = "${path.module}/../../../../dist" + + # IMPORTANT: These filenames must match: + # 1. The build outputs from Makefile (see: make build-lambda-layer) + # 2. The workspace layer module at terraform/infrastructure/modules/layer/lambda_layer.tf + # If you change the build process, update both locations. + layer_zips = { + nrlf = "nrlf.zip" + third_party = "dependency_layer.zip" + nrlf_permissions = "nrlf_permissions.zip" + } +} + +resource "aws_lambda_layer_version" "nrlf" { + layer_name = "${var.name_prefix}--nrlf-layer" + filename = "${local.dist_dir}/${local.layer_zips.nrlf}" + source_code_hash = filebase64sha256("${local.dist_dir}/${local.layer_zips.nrlf}") + compatible_runtimes = ["python3.12"] + description = "NRLF core library layer (account-wide)" +} + +resource "aws_lambda_layer_version" "third_party" { + layer_name = "${var.name_prefix}--dependency-layer" + filename = "${local.dist_dir}/${local.layer_zips.third_party}" + source_code_hash = filebase64sha256("${local.dist_dir}/${local.layer_zips.third_party}") + compatible_runtimes = ["python3.12"] + description = "Third party dependencies layer (account-wide)" +} + +resource "aws_lambda_layer_version" "nrlf_permissions" { + layer_name = "${var.name_prefix}--nrlf-permissions-layer" + filename = "${local.dist_dir}/${local.layer_zips.nrlf_permissions}" + source_code_hash = filebase64sha256("${local.dist_dir}/${local.layer_zips.nrlf_permissions}") + compatible_runtimes = ["python3.12"] + description = "NRLF permissions library layer (account-wide)" +} diff --git a/terraform/account-wide-infrastructure/modules/lambda-layers/outputs.tf b/terraform/account-wide-infrastructure/modules/lambda-layers/outputs.tf new file mode 100644 index 000000000..e86ad3467 --- /dev/null +++ b/terraform/account-wide-infrastructure/modules/lambda-layers/outputs.tf @@ -0,0 +1,14 @@ +output "nrlf_layer_arn" { + description = "ARN of the NRLF Lambda layer" + value = aws_lambda_layer_version.nrlf.arn +} + +output "third_party_layer_arn" { + description = "ARN of the third party dependencies Lambda layer" + value = aws_lambda_layer_version.third_party.arn +} + +output "nrlf_permissions_layer_arn" { + description = "ARN of the NRLF permissions Lambda layer" + value = aws_lambda_layer_version.nrlf_permissions.arn +} diff --git a/terraform/account-wide-infrastructure/modules/lambda-layers/variables.tf b/terraform/account-wide-infrastructure/modules/lambda-layers/variables.tf new file mode 100644 index 000000000..8c9de5637 --- /dev/null +++ b/terraform/account-wide-infrastructure/modules/lambda-layers/variables.tf @@ -0,0 +1,4 @@ +variable "name_prefix" { + description = "Resource name prefix (e.g., nhsd-nrlf--dev)" + type = string +} diff --git a/terraform/infrastructure/modules/seed_sandbox_lambda/eventbridge.tf b/terraform/account-wide-infrastructure/modules/seed_sandbox_lambda/eventbridge.tf similarity index 100% rename from terraform/infrastructure/modules/seed_sandbox_lambda/eventbridge.tf rename to terraform/account-wide-infrastructure/modules/seed_sandbox_lambda/eventbridge.tf diff --git a/terraform/infrastructure/modules/seed_sandbox_lambda/iam.tf b/terraform/account-wide-infrastructure/modules/seed_sandbox_lambda/iam.tf similarity index 57% rename from terraform/infrastructure/modules/seed_sandbox_lambda/iam.tf rename to terraform/account-wide-infrastructure/modules/seed_sandbox_lambda/iam.tf index 3b5d47db5..ce269bcf8 100644 --- a/terraform/infrastructure/modules/seed_sandbox_lambda/iam.tf +++ b/terraform/account-wide-infrastructure/modules/seed_sandbox_lambda/iam.tf @@ -23,15 +23,6 @@ resource "aws_iam_role_policy_attachment" "lambda_policy_attachment" { ] } -resource "aws_iam_role_policy_attachment" "additional_policies" { - role = aws_iam_role.lambda_role.name - count = length(var.additional_policies) - policy_arn = var.additional_policies[count.index] -} - -# NOTE: These permissions (DescribeTable and BatchWriteItem) could be added to the core pointers-table module policies if we're happy for them to be in there: -# - DescribeTable could be added to modules/pointers-table/iam.tf "pointers-table-read" policy (used by the producer & consumer lambdas) -# - BatchWriteItem could be added to modules/pointers-table/iam.tf "pointers-table-write" policy (used by the producer lambdas) resource "aws_iam_role_policy" "seed_sandbox_additional_permissions" { name = "${var.prefix}--sandbox-seeder-additional" role = aws_iam_role.lambda_role.id @@ -43,9 +34,14 @@ resource "aws_iam_role_policy" "seed_sandbox_additional_permissions" { Effect = "Allow" Action = [ "dynamodb:DescribeTable", + "dynamodb:Scan", + "dynamodb:Query", + "dynamodb:GetItem", + "dynamodb:PutItem", + "dynamodb:DeleteItem", "dynamodb:BatchWriteItem" ] - Resource = "arn:aws:dynamodb:${var.region}:*:table/${var.table_name}" + Resource = [for table_name in var.table_names : "arn:aws:dynamodb:${var.region}:*:table/${table_name}"] } ] }) diff --git a/terraform/infrastructure/modules/seed_sandbox_lambda/lambda.tf b/terraform/account-wide-infrastructure/modules/seed_sandbox_lambda/lambda.tf similarity index 93% rename from terraform/infrastructure/modules/seed_sandbox_lambda/lambda.tf rename to terraform/account-wide-infrastructure/modules/seed_sandbox_lambda/lambda.tf index 568175758..88bfc9e4e 100644 --- a/terraform/infrastructure/modules/seed_sandbox_lambda/lambda.tf +++ b/terraform/account-wide-infrastructure/modules/seed_sandbox_lambda/lambda.tf @@ -10,7 +10,7 @@ resource "aws_lambda_function" "lambda_function" { environment { variables = merge(var.environment_variables, { - TABLE_NAME = var.table_name + TABLE_NAMES = join(",", var.table_names) }) } diff --git a/terraform/infrastructure/modules/seed_sandbox_lambda/locals.tf b/terraform/account-wide-infrastructure/modules/seed_sandbox_lambda/locals.tf similarity index 100% rename from terraform/infrastructure/modules/seed_sandbox_lambda/locals.tf rename to terraform/account-wide-infrastructure/modules/seed_sandbox_lambda/locals.tf diff --git a/terraform/infrastructure/modules/seed_sandbox_lambda/logs.tf b/terraform/account-wide-infrastructure/modules/seed_sandbox_lambda/logs.tf similarity index 100% rename from terraform/infrastructure/modules/seed_sandbox_lambda/logs.tf rename to terraform/account-wide-infrastructure/modules/seed_sandbox_lambda/logs.tf diff --git a/terraform/infrastructure/modules/seed_sandbox_lambda/output.tf b/terraform/account-wide-infrastructure/modules/seed_sandbox_lambda/output.tf similarity index 100% rename from terraform/infrastructure/modules/seed_sandbox_lambda/output.tf rename to terraform/account-wide-infrastructure/modules/seed_sandbox_lambda/output.tf diff --git a/terraform/account-wide-infrastructure/modules/seed_sandbox_lambda/vars.tf b/terraform/account-wide-infrastructure/modules/seed_sandbox_lambda/vars.tf new file mode 100644 index 000000000..c513acd91 --- /dev/null +++ b/terraform/account-wide-infrastructure/modules/seed_sandbox_lambda/vars.tf @@ -0,0 +1,14 @@ +variable "prefix" {} + +variable "region" {} + +variable "layers" {} + +variable "kms_key_id" {} + +variable "environment_variables" {} + +variable "table_names" { + description = "List of DynamoDB table names to reset" + type = list(string) +} diff --git a/terraform/account-wide-infrastructure/test/lambda-layers.tf b/terraform/account-wide-infrastructure/test/lambda-layers.tf new file mode 100644 index 000000000..697e28618 --- /dev/null +++ b/terraform/account-wide-infrastructure/test/lambda-layers.tf @@ -0,0 +1,6 @@ +# Account-wide Lambda layers for shared infrastructure +# Built once per account, used by account-wide Lambdas like the seed Lambda +module "shared_lambda_layers" { + source = "../modules/lambda-layers" + name_prefix = local.prefix +} diff --git a/terraform/account-wide-infrastructure/test/lambda__seed-sandbox.tf b/terraform/account-wide-infrastructure/test/lambda__seed-sandbox.tf new file mode 100644 index 000000000..454e165ff --- /dev/null +++ b/terraform/account-wide-infrastructure/test/lambda__seed-sandbox.tf @@ -0,0 +1,24 @@ +# Lambda to reset specified DynamoDB tables with seed test data on a schedule +# Deployed at account level to avoid duplication across workspaces +# Uses account-wide Lambda layers + +module "seed_sandbox_lambda" { + source = "../modules/seed_sandbox_lambda" + region = local.region + prefix = local.prefix + layers = [ + module.shared_lambda_layers.nrlf_layer_arn, + module.shared_lambda_layers.third_party_layer_arn, + module.shared_lambda_layers.nrlf_permissions_layer_arn + ] + kms_key_id = module.int-sandbox-pointers-table.kms_key_arn + + # Use int-sandboxcopy for now, change to int-sandbox-pointers-table when ready + table_names = ["nhsd-nrlf--int-sandboxcopy-pointers-table"] + + environment_variables = { + PREFIX = "${local.prefix}--" + ENVIRONMENT = local.environment + POINTERS_PER_TYPE = "2" + } +} diff --git a/terraform/infrastructure/modules/layer/lambda_layer.tf b/terraform/infrastructure/modules/layer/lambda_layer.tf index f945794a1..f95856fa0 100644 --- a/terraform/infrastructure/modules/layer/lambda_layer.tf +++ b/terraform/infrastructure/modules/layer/lambda_layer.tf @@ -1,3 +1,12 @@ +# Workspace-level Lambda layer module +# Creates layers per workspace for API Lambdas +# +# IMPORTANT: The layer zip filenames (${var.name}.zip) must match: +# 1. The build outputs from Makefile (see: make build-lambda-layer) +# 2. The account-wide layer module at terraform/account-wide-infrastructure/modules/lambda-layers/layers.tf +# Expected filenames: nrlf.zip, dependency_layer.zip, nrlf_permissions.zip +# If you change the build process, update both locations. + resource "aws_lambda_layer_version" "lambda_layer" { layer_name = "${var.prefix}--${replace(var.name, "_", "-")}" filename = "${path.module}/../../../../dist/${var.name}.zip" diff --git a/terraform/infrastructure/modules/seed_sandbox_lambda/vars.tf b/terraform/infrastructure/modules/seed_sandbox_lambda/vars.tf deleted file mode 100644 index 7222cef88..000000000 --- a/terraform/infrastructure/modules/seed_sandbox_lambda/vars.tf +++ /dev/null @@ -1,18 +0,0 @@ -variable "prefix" {} - -variable "region" {} - -variable "layers" {} - -variable "kms_key_id" {} - -variable "environment_variables" {} - -variable "additional_policies" { - default = [] -} - -variable "table_name" { - description = "The name of the DynamoDB table to reset" - type = string -} diff --git a/terraform/infrastructure/seed_sandbox_lambda.tf b/terraform/infrastructure/seed_sandbox_lambda.tf deleted file mode 100644 index e259f3264..000000000 --- a/terraform/infrastructure/seed_sandbox_lambda.tf +++ /dev/null @@ -1,22 +0,0 @@ -module "seed_sandbox_lambda" { - count = contains(["int", "dev"], local.environment) ? 1 : 0 - source = "./modules/seed_sandbox_lambda" - region = local.region - prefix = local.prefix - layers = [module.nrlf.layer_arn, module.third_party.layer_arn, module.nrlf_permissions.layer_arn] - kms_key_id = module.kms__cloudwatch.kms_arn - - # Use int-sandboxcopy for int but change to int-sandbox table once development and testing is complete - table_name = local.environment == "dev" ? "${local.prefix}-pointers-table" : "${local.prefix}copy-pointers-table" - - environment_variables = { - PREFIX = "${local.prefix}--" - ENVIRONMENT = local.environment - POINTERS_PER_TYPE = "2" - } - - additional_policies = [ - local.pointers_table_write_policy_arn, - local.pointers_table_read_policy_arn - ] -} From 3b56521bd8cc2e70e7282738b296bbdae3c011c1 Mon Sep 17 00:00:00 2001 From: Sandy Forrester Date: Thu, 12 Feb 2026 16:48:06 +0000 Subject: [PATCH 07/19] NRL-721 Add build to ac wide wf and deploy only when tables are listed --- .github/workflows/deploy-account-wide-infra.yml | 6 ++++++ lambdas/seed_sandbox/index.py | 4 +--- .../dev/lambda__seed-sandbox.tf | 9 +++++++-- .../modules/seed_sandbox_lambda/logs.tf | 1 - .../modules/seed_sandbox_lambda/vars.tf | 2 -- .../test/lambda__seed-sandbox.tf | 10 +++++++--- 6 files changed, 21 insertions(+), 11 deletions(-) diff --git a/.github/workflows/deploy-account-wide-infra.yml b/.github/workflows/deploy-account-wide-infra.yml index 564a8e1d1..7a1701264 100644 --- a/.github/workflows/deploy-account-wide-infra.yml +++ b/.github/workflows/deploy-account-wide-infra.yml @@ -51,6 +51,9 @@ jobs: echo "${HOME}/.asdf/bin" >> $GITHUB_PATH poetry install --no-root + - name: Build Lambda Layers + run: make build-lambda-layer + - name: Configure Management Credentials uses: aws-actions/configure-aws-credentials@7474bc4690e29a8392af63c5b98e7449536d5c3a #v4.3.1 with: @@ -109,6 +112,9 @@ jobs: echo "${HOME}/.asdf/bin" >> $GITHUB_PATH poetry install --no-root + - name: Build Lambda Layers + run: make build-lambda-layer + - name: Configure Management Credentials uses: aws-actions/configure-aws-credentials@7474bc4690e29a8392af63c5b98e7449536d5c3a #v4.3.1 with: diff --git a/lambdas/seed_sandbox/index.py b/lambdas/seed_sandbox/index.py index a3a644c8b..02946da06 100644 --- a/lambdas/seed_sandbox/index.py +++ b/lambdas/seed_sandbox/index.py @@ -94,9 +94,7 @@ def handler(event, context): ) if failed_tables: - status_code = ( - 500 if len(failed_tables) == len(table_names) else 207 - ) # 207 = Multi-Status + status_code = 500 if len(failed_tables) == len(table_names) else 207 message = ( f"Failed to reset {len(failed_tables)} table(s): {', '.join(failed_tables)}" ) diff --git a/terraform/account-wide-infrastructure/dev/lambda__seed-sandbox.tf b/terraform/account-wide-infrastructure/dev/lambda__seed-sandbox.tf index edf5d306f..652591c6b 100644 --- a/terraform/account-wide-infrastructure/dev/lambda__seed-sandbox.tf +++ b/terraform/account-wide-infrastructure/dev/lambda__seed-sandbox.tf @@ -1,7 +1,13 @@ # Lambda to reset specified DynamoDB tables with seed test data on a schedule # Uses account-wide Lambda layers +# Only deploys if tables are specified + +locals { + seed_table_names = ["nhsd-nrlf--dev-sandy-dev-pointers-table"] +} module "seed_sandbox_lambda" { + count = length(local.seed_table_names) > 0 ? 1 : 0 source = "../modules/seed_sandbox_lambda" region = local.region prefix = local.prefix @@ -10,9 +16,8 @@ module "seed_sandbox_lambda" { module.shared_lambda_layers.third_party_layer_arn, module.shared_lambda_layers.nrlf_permissions_layer_arn ] - kms_key_id = module.dev-sandbox-pointers-table.kms_key_arn #not sure about this - table_names = ["nhsd-nrlf--dev-sandy-dev-pointers-table"] + table_names = local.seed_table_names environment_variables = { PREFIX = "${local.prefix}--" diff --git a/terraform/account-wide-infrastructure/modules/seed_sandbox_lambda/logs.tf b/terraform/account-wide-infrastructure/modules/seed_sandbox_lambda/logs.tf index 7fb1b8838..a9d01f3c7 100644 --- a/terraform/account-wide-infrastructure/modules/seed_sandbox_lambda/logs.tf +++ b/terraform/account-wide-infrastructure/modules/seed_sandbox_lambda/logs.tf @@ -1,5 +1,4 @@ resource "aws_cloudwatch_log_group" "lambda_cloudwatch_log_group" { name = "/aws/lambda/${aws_lambda_function.lambda_function.function_name}" retention_in_days = local.lambda_log_retention_in_days - kms_key_id = var.kms_key_id } diff --git a/terraform/account-wide-infrastructure/modules/seed_sandbox_lambda/vars.tf b/terraform/account-wide-infrastructure/modules/seed_sandbox_lambda/vars.tf index c513acd91..4b3962ad2 100644 --- a/terraform/account-wide-infrastructure/modules/seed_sandbox_lambda/vars.tf +++ b/terraform/account-wide-infrastructure/modules/seed_sandbox_lambda/vars.tf @@ -4,8 +4,6 @@ variable "region" {} variable "layers" {} -variable "kms_key_id" {} - variable "environment_variables" {} variable "table_names" { diff --git a/terraform/account-wide-infrastructure/test/lambda__seed-sandbox.tf b/terraform/account-wide-infrastructure/test/lambda__seed-sandbox.tf index 454e165ff..76d9261dd 100644 --- a/terraform/account-wide-infrastructure/test/lambda__seed-sandbox.tf +++ b/terraform/account-wide-infrastructure/test/lambda__seed-sandbox.tf @@ -1,8 +1,14 @@ # Lambda to reset specified DynamoDB tables with seed test data on a schedule # Deployed at account level to avoid duplication across workspaces # Uses account-wide Lambda layers +# Only deploys if tables are specified + +locals { + seed_table_names = ["nhsd-nrlf--int-sandboxcopy-pointers-table"] # Change to int-sandbox-pointers-table when ready +} module "seed_sandbox_lambda" { + count = length(local.seed_table_names) > 0 ? 1 : 0 source = "../modules/seed_sandbox_lambda" region = local.region prefix = local.prefix @@ -11,10 +17,8 @@ module "seed_sandbox_lambda" { module.shared_lambda_layers.third_party_layer_arn, module.shared_lambda_layers.nrlf_permissions_layer_arn ] - kms_key_id = module.int-sandbox-pointers-table.kms_key_arn - # Use int-sandboxcopy for now, change to int-sandbox-pointers-table when ready - table_names = ["nhsd-nrlf--int-sandboxcopy-pointers-table"] + table_names = local.seed_table_names environment_variables = { PREFIX = "${local.prefix}--" From 5aebcd6b07f468d3817ba67175ee537d70cc1cfe Mon Sep 17 00:00:00 2001 From: Sandy Forrester Date: Thu, 12 Feb 2026 16:55:26 +0000 Subject: [PATCH 08/19] NRL-721 Remove unused param --- scripts/seed_sandbox_table.py | 3 +-- scripts/tests/test_seed_sandbox_table.py | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/scripts/seed_sandbox_table.py b/scripts/seed_sandbox_table.py index a366a8763..688628200 100755 --- a/scripts/seed_sandbox_table.py +++ b/scripts/seed_sandbox_table.py @@ -65,7 +65,6 @@ def _load_sample_template(filename: str) -> dict: def _make_realistic_pointer( template: dict, - pointer_type: str, custodian: str, nhs_number: str, counter: int, @@ -178,7 +177,7 @@ def _generate_and_write_pointers( try: pointer = _make_realistic_pointer( - template, pointer_type, custodian, nhs_number, counter + template, custodian, nhs_number, counter ) put_req = {"PutRequest": {"Item": pointer.model_dump()}} diff --git a/scripts/tests/test_seed_sandbox_table.py b/scripts/tests/test_seed_sandbox_table.py index 52d50ec98..c7096d68d 100644 --- a/scripts/tests/test_seed_sandbox_table.py +++ b/scripts/tests/test_seed_sandbox_table.py @@ -201,7 +201,7 @@ def test_create_pointer_success(self, mock_pointer_class, mock_doc_ref_class): mock_pointer = MagicMock() mock_pointer_class.from_document_reference.return_value = mock_pointer - result = _make_realistic_pointer(template, "type1", "Y12345", "9000000001", 1) + result = _make_realistic_pointer(template, "Y12345", "9000000001", 1) assert result == mock_pointer mock_doc_ref_class.assert_called_once() From 02b98bf2dc7c2850990c4c268d89c6382dbaf578 Mon Sep 17 00:00:00 2001 From: Sandy Forrester Date: Thu, 12 Feb 2026 17:31:43 +0000 Subject: [PATCH 09/19] NRL-721 Correct make command --- .github/workflows/deploy-account-wide-infra.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/deploy-account-wide-infra.yml b/.github/workflows/deploy-account-wide-infra.yml index 7a1701264..0379e4ea4 100644 --- a/.github/workflows/deploy-account-wide-infra.yml +++ b/.github/workflows/deploy-account-wide-infra.yml @@ -52,7 +52,7 @@ jobs: poetry install --no-root - name: Build Lambda Layers - run: make build-lambda-layer + run: make build-layers - name: Configure Management Credentials uses: aws-actions/configure-aws-credentials@7474bc4690e29a8392af63c5b98e7449536d5c3a #v4.3.1 @@ -113,7 +113,7 @@ jobs: poetry install --no-root - name: Build Lambda Layers - run: make build-lambda-layer + run: make build-layers - name: Configure Management Credentials uses: aws-actions/configure-aws-credentials@7474bc4690e29a8392af63c5b98e7449536d5c3a #v4.3.1 From 221c9bc623a9279481ac030e85d61ba4e75235f1 Mon Sep 17 00:00:00 2001 From: Sandy Forrester Date: Thu, 12 Feb 2026 19:08:45 +0000 Subject: [PATCH 10/19] NRL-721 Add build dependencies and perms --- .../workflows/deploy-account-wide-infra.yml | 20 +++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/.github/workflows/deploy-account-wide-infra.yml b/.github/workflows/deploy-account-wide-infra.yml index 0379e4ea4..c958dac1e 100644 --- a/.github/workflows/deploy-account-wide-infra.yml +++ b/.github/workflows/deploy-account-wide-infra.yml @@ -52,7 +52,9 @@ jobs: poetry install --no-root - name: Build Lambda Layers - run: make build-layers + run: | + make build-layers + make build-dependency-layer - name: Configure Management Credentials uses: aws-actions/configure-aws-credentials@7474bc4690e29a8392af63c5b98e7449536d5c3a #v4.3.1 @@ -61,6 +63,12 @@ jobs: role-to-assume: ${{ secrets.MGMT_ROLE_ARN }} role-session-name: github-actions-ci-${{ inputs.environment }}-${{ github.run_id }} + - name: Add S3 Permissions to Lambda Layer + env: + ACCOUNT_NAME: ${{ vars.ACCOUNT_NAME }} + run: | + make get-s3-perms ENV=${ACCOUNT_NAME} + - name: Retrieve Server Certificates env: ACCOUNT_NAME: ${{ vars.ACCOUNT_NAME }} @@ -113,7 +121,9 @@ jobs: poetry install --no-root - name: Build Lambda Layers - run: make build-layers + run: | + make build-layers + make build-dependency-layer - name: Configure Management Credentials uses: aws-actions/configure-aws-credentials@7474bc4690e29a8392af63c5b98e7449536d5c3a #v4.3.1 @@ -122,6 +132,12 @@ jobs: role-to-assume: ${{ secrets.MGMT_ROLE_ARN }} role-session-name: github-actions-ci-${{ inputs.environment }}-${{ github.run_id}} + - name: Add S3 Permissions to Lambda Layer + env: + ACCOUNT_NAME: ${{ vars.ACCOUNT_NAME }} + run: | + make get-s3-perms ENV=${ACCOUNT_NAME} + - name: Download Terraform Plan Artifacts env: ACCOUNT_NAME: ${{ vars.ACCOUNT_NAME }} From f13c88e0009a856e424e96bd3cc94957c8d8908c Mon Sep 17 00:00:00 2001 From: Sandy Forrester Date: Thu, 12 Feb 2026 20:36:00 +0000 Subject: [PATCH 11/19] NRL-721 Add build seed lamda --- .github/workflows/deploy-account-wide-infra.yml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/.github/workflows/deploy-account-wide-infra.yml b/.github/workflows/deploy-account-wide-infra.yml index c958dac1e..d81fe0d80 100644 --- a/.github/workflows/deploy-account-wide-infra.yml +++ b/.github/workflows/deploy-account-wide-infra.yml @@ -56,6 +56,9 @@ jobs: make build-layers make build-dependency-layer + - name: Build Seed Sandbox Lambda + run: make build-seed-sandbox-lambda + - name: Configure Management Credentials uses: aws-actions/configure-aws-credentials@7474bc4690e29a8392af63c5b98e7449536d5c3a #v4.3.1 with: @@ -125,6 +128,9 @@ jobs: make build-layers make build-dependency-layer + - name: Build Seed Sandbox Lambda + run: make build-seed-sandbox-lambda + - name: Configure Management Credentials uses: aws-actions/configure-aws-credentials@7474bc4690e29a8392af63c5b98e7449536d5c3a #v4.3.1 with: From 3f43b9663c467fb09a14d7e585a7a99660c4a16c Mon Sep 17 00:00:00 2001 From: Sandy Forrester Date: Fri, 13 Feb 2026 09:23:42 +0000 Subject: [PATCH 12/19] NRL-721 Save lambda artifacts for apply and move lambda build to dist --- .../workflows/deploy-account-wide-infra.yml | 25 ++++++++----------- lambdas/seed_sandbox/Makefile | 25 ++++++++++--------- .../modules/seed_sandbox_lambda/lambda.tf | 4 +-- 3 files changed, 26 insertions(+), 28 deletions(-) diff --git a/.github/workflows/deploy-account-wide-infra.yml b/.github/workflows/deploy-account-wide-infra.yml index d81fe0d80..1752e5e0c 100644 --- a/.github/workflows/deploy-account-wide-infra.yml +++ b/.github/workflows/deploy-account-wide-infra.yml @@ -106,6 +106,11 @@ jobs: aws s3 cp terraform/account-wide-infrastructure/$ACCOUNT_NAME/tfplan.txt s3://nhsd-nrlf--mgmt--github-ci-logging/acc-$ACCOUNT_NAME/${{ github.run_id }}/tfplan.txt aws s3 cp terraform/account-wide-infrastructure/modules/glue/files/src.zip s3://nhsd-nrlf--mgmt--github-ci-logging/acc-$ACCOUNT_NAME/${{ github.run_id }}/glue-src.zip + aws s3 cp dist/nrlf.zip s3://nhsd-nrlf--mgmt--github-ci-logging/acc-$ACCOUNT_NAME/${{ github.run_id }}/nrlf.zip + aws s3 cp dist/dependency_layer.zip s3://nhsd-nrlf--mgmt--github-ci-logging/acc-$ACCOUNT_NAME/${{ github.run_id }}/dependency_layer.zip + aws s3 cp dist/nrlf_permissions.zip s3://nhsd-nrlf--mgmt--github-ci-logging/acc-$ACCOUNT_NAME/${{ github.run_id }}/nrlf_permissions.zip + aws s3 cp dist/seed_sandbox.zip s3://nhsd-nrlf--mgmt--github-ci-logging/acc-$ACCOUNT_NAME/${{ github.run_id }}/seed_sandbox.zip + terraform-apply: name: Terraform Apply - ${{ inputs.environment }} needs: [terraform-plan] @@ -123,14 +128,6 @@ jobs: echo "${HOME}/.asdf/bin" >> $GITHUB_PATH poetry install --no-root - - name: Build Lambda Layers - run: | - make build-layers - make build-dependency-layer - - - name: Build Seed Sandbox Lambda - run: make build-seed-sandbox-lambda - - name: Configure Management Credentials uses: aws-actions/configure-aws-credentials@7474bc4690e29a8392af63c5b98e7449536d5c3a #v4.3.1 with: @@ -138,12 +135,6 @@ jobs: role-to-assume: ${{ secrets.MGMT_ROLE_ARN }} role-session-name: github-actions-ci-${{ inputs.environment }}-${{ github.run_id}} - - name: Add S3 Permissions to Lambda Layer - env: - ACCOUNT_NAME: ${{ vars.ACCOUNT_NAME }} - run: | - make get-s3-perms ENV=${ACCOUNT_NAME} - - name: Download Terraform Plan Artifacts env: ACCOUNT_NAME: ${{ vars.ACCOUNT_NAME }} @@ -154,6 +145,12 @@ jobs: mkdir -p terraform/account-wide-infrastructure/modules/glue/files aws s3 cp s3://nhsd-nrlf--mgmt--github-ci-logging/acc-$ACCOUNT_NAME/${{ github.run_id }}/glue-src.zip terraform/account-wide-infrastructure/modules/glue/files/src.zip + mkdir -p dist + aws s3 cp s3://nhsd-nrlf--mgmt--github-ci-logging/acc-$ACCOUNT_NAME/${{ github.run_id }}/nrlf.zip dist/nrlf.zip + aws s3 cp s3://nhsd-nrlf--mgmt--github-ci-logging/acc-$ACCOUNT_NAME/${{ github.run_id }}/dependency_layer.zip dist/dependency_layer.zip + aws s3 cp s3://nhsd-nrlf--mgmt--github-ci-logging/acc-$ACCOUNT_NAME/${{ github.run_id }}/nrlf_permissions.zip dist/nrlf_permissions.zip + aws s3 cp s3://nhsd-nrlf--mgmt--github-ci-logging/acc-$ACCOUNT_NAME/${{ github.run_id }}/seed_sandbox.zip dist/seed_sandbox.zip + - name: Retrieve Server Certificates env: ACCOUNT_NAME: ${{ vars.ACCOUNT_NAME }} diff --git a/lambdas/seed_sandbox/Makefile b/lambdas/seed_sandbox/Makefile index a9badf0d4..29965b148 100644 --- a/lambdas/seed_sandbox/Makefile +++ b/lambdas/seed_sandbox/Makefile @@ -2,27 +2,28 @@ build: clean @echo "Building Lambda deployment package..." - mkdir -p dist + mkdir -p build # Copy the handler - cp index.py dist/ + cp index.py build/ # Copy the required scripts - mkdir -p dist/scripts - cp ../../scripts/delete_all_table_items.py dist/scripts/ - cp ../../scripts/seed_sandbox_table.py dist/scripts/ - cp ../../scripts/seed_utils.py dist/scripts/ + mkdir -p build/scripts + cp ../../scripts/delete_all_table_items.py build/scripts/ + cp ../../scripts/seed_sandbox_table.py build/scripts/ + cp ../../scripts/seed_utils.py build/scripts/ # Copy the pointer template data - mkdir -p dist/tests/data/samples - cp -r ../../tests/data/samples/*.json dist/tests/data/samples/ + mkdir -p build/tests/data/samples + cp -r ../../tests/data/samples/*.json build/tests/data/samples/ - # Create the zip file - cd dist && zip -r seed_sandbox.zip . -x "*.pyc" -x "__pycache__/*" -x ".DS_Store" + # Create the zip file in root dist + mkdir -p ../../dist + cd build && zip -r ../../../dist/seed_sandbox.zip . -x "*.pyc" -x "__pycache__/*" -x ".DS_Store" - @echo "✓ Lambda package created: dist/seed_sandbox.zip" + @echo "✓ Lambda package created: ../../dist/seed_sandbox.zip" clean: @echo "Cleaning build artifacts..." - rm -rf dist + rm -rf build @echo "✓ Clean complete" diff --git a/terraform/account-wide-infrastructure/modules/seed_sandbox_lambda/lambda.tf b/terraform/account-wide-infrastructure/modules/seed_sandbox_lambda/lambda.tf index 88bfc9e4e..c261f8b41 100644 --- a/terraform/account-wide-infrastructure/modules/seed_sandbox_lambda/lambda.tf +++ b/terraform/account-wide-infrastructure/modules/seed_sandbox_lambda/lambda.tf @@ -3,8 +3,8 @@ resource "aws_lambda_function" "lambda_function" { runtime = "python3.12" handler = "index.handler" role = aws_iam_role.lambda_role.arn - filename = "${path.module}/../../../../lambdas/seed_sandbox/dist/seed_sandbox.zip" - source_code_hash = filebase64sha256("${path.module}/../../../../lambdas/seed_sandbox/dist/seed_sandbox.zip") + filename = "${path.module}/../../../../dist/seed_sandbox.zip" + source_code_hash = filebase64sha256("${path.module}/../../../../dist/seed_sandbox.zip") timeout = local.lambda_timeout memory_size = 128 From f630602208b69104381d89ab28230b31188399a7 Mon Sep 17 00:00:00 2001 From: Sandy Forrester Date: Fri, 13 Feb 2026 11:08:59 +0000 Subject: [PATCH 13/19] NRL-721 Increase schedule, correct table name & sqube fixes --- scripts/seed_sandbox_table.py | 4 ++-- scripts/seed_utils.py | 2 +- scripts/tests/test_delete_all_table_items.py | 2 +- .../account-wide-infrastructure/dev/lambda__seed-sandbox.tf | 2 +- .../modules/seed_sandbox_lambda/eventbridge.tf | 2 +- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/scripts/seed_sandbox_table.py b/scripts/seed_sandbox_table.py index 688628200..6e5d66692 100755 --- a/scripts/seed_sandbox_table.py +++ b/scripts/seed_sandbox_table.py @@ -295,7 +295,7 @@ def seed_sandbox_table( if write_csv: try: - _write_pointer_extract(table_name, pointer_data) + _write_pointer_extract(pointer_data) except Exception as e: print(f"Warning: Failed to write CSV extract: {e}") @@ -306,7 +306,7 @@ def seed_sandbox_table( } -def _write_pointer_extract(table_name: str, pointer_data: list[list[str]]): +def _write_pointer_extract(pointer_data: list[list[str]]): """Write pointer data to CSV file for reference.""" try: output_dir = Path(__file__).parent.parent / "dist" / "sandbox" diff --git a/scripts/seed_utils.py b/scripts/seed_utils.py index 50f38e8e8..3d9e76ecf 100644 --- a/scripts/seed_utils.py +++ b/scripts/seed_utils.py @@ -3,7 +3,7 @@ """ # NHS number checksum weights (10, 9, 8, 7, 6, 5, 4, 3, 2) -CHECKSUM_WEIGHTS = [i for i in range(10, 1, -1)] +CHECKSUM_WEIGHTS = list(range(10, 1, -1)) class TestNhsNumbersIterator: diff --git a/scripts/tests/test_delete_all_table_items.py b/scripts/tests/test_delete_all_table_items.py index ea4261574..ac260fdd3 100644 --- a/scripts/tests/test_delete_all_table_items.py +++ b/scripts/tests/test_delete_all_table_items.py @@ -261,7 +261,7 @@ def test_batch_writer_context_manager(self, mock_boto3): mock_resource.Table.return_value = mock_table mock_boto3.resource.return_value = mock_resource - result = delete_all_table_items("test-table") + delete_all_table_items("test-table") mock_table.batch_writer.assert_called_once() mock_batch_writer.delete_item.assert_called_once_with(Key={"id": "item1"}) diff --git a/terraform/account-wide-infrastructure/dev/lambda__seed-sandbox.tf b/terraform/account-wide-infrastructure/dev/lambda__seed-sandbox.tf index 652591c6b..f479e971a 100644 --- a/terraform/account-wide-infrastructure/dev/lambda__seed-sandbox.tf +++ b/terraform/account-wide-infrastructure/dev/lambda__seed-sandbox.tf @@ -3,7 +3,7 @@ # Only deploys if tables are specified locals { - seed_table_names = ["nhsd-nrlf--dev-sandy-dev-pointers-table"] + seed_table_names = ["nhsd-nrlf--sandy-dev-pointers-table"] } module "seed_sandbox_lambda" { diff --git a/terraform/account-wide-infrastructure/modules/seed_sandbox_lambda/eventbridge.tf b/terraform/account-wide-infrastructure/modules/seed_sandbox_lambda/eventbridge.tf index a0393e77c..e60061832 100644 --- a/terraform/account-wide-infrastructure/modules/seed_sandbox_lambda/eventbridge.tf +++ b/terraform/account-wide-infrastructure/modules/seed_sandbox_lambda/eventbridge.tf @@ -3,7 +3,7 @@ resource "aws_cloudwatch_event_rule" "event_rule" { description = "Rule to clear and reseed sandbox data" # Set this to weekly once development and testing is complete # schedule_expression = "cron(0 2 ? * SUN *)" # 2am UTC, every Sunday - schedule_expression = "cron(0 9-17 ? * MON-FRI *)" # Hourly, 9am-5pm UTC, Monday-Friday + schedule_expression = "cron(0/15 9-17 ? * MON-FRI *)" # Every 15 minutes, 9am-5pm UTC, Monday-Friday } resource "aws_cloudwatch_event_target" "event_target" { From 476c4c59919ba7269cfd5db465ea08a476f3e01c Mon Sep 17 00:00:00 2001 From: Sandy Forrester Date: Fri, 13 Feb 2026 12:06:32 +0000 Subject: [PATCH 14/19] NRL-721 Add kms perms to lambda policy --- scripts/reset_sandbox_table.py | 2 +- scripts/seed_sandbox_table.py | 2 +- .../modules/seed_sandbox_lambda/iam.tf | 8 ++++++++ 3 files changed, 10 insertions(+), 2 deletions(-) diff --git a/scripts/reset_sandbox_table.py b/scripts/reset_sandbox_table.py index f9a7b69de..e743c9b59 100755 --- a/scripts/reset_sandbox_table.py +++ b/scripts/reset_sandbox_table.py @@ -37,7 +37,7 @@ def reset_sandbox_table(table_name: str, pointers_per_type: int = 2): print("Step 2: Seeding with fresh pointer data...") try: result = seed_sandbox_table(table_name, pointers_per_type, force=True) - print(f"\n=== ✓ Reset Complete ===") + print("\n=== ✓ Reset Complete ===") print( f"Table '{table_name}' has been reset with {result['successful']} fresh pointers" ) diff --git a/scripts/seed_sandbox_table.py b/scripts/seed_sandbox_table.py index 6e5d66692..8ae5a2f2f 100755 --- a/scripts/seed_sandbox_table.py +++ b/scripts/seed_sandbox_table.py @@ -166,7 +166,7 @@ def _generate_and_write_pointers( for pointer_type, template in templates.items(): for custodian in CUSTODIANS: - for i in range(pointers_per_type): + for _ in range(pointers_per_type): counter += 1 try: diff --git a/terraform/account-wide-infrastructure/modules/seed_sandbox_lambda/iam.tf b/terraform/account-wide-infrastructure/modules/seed_sandbox_lambda/iam.tf index ce269bcf8..5f06417d7 100644 --- a/terraform/account-wide-infrastructure/modules/seed_sandbox_lambda/iam.tf +++ b/terraform/account-wide-infrastructure/modules/seed_sandbox_lambda/iam.tf @@ -42,6 +42,14 @@ resource "aws_iam_role_policy" "seed_sandbox_additional_permissions" { "dynamodb:BatchWriteItem" ] Resource = [for table_name in var.table_names : "arn:aws:dynamodb:${var.region}:*:table/${table_name}"] + }, + { + Effect = "Allow" + Action = [ + "kms:Decrypt", + "kms:DescribeKey" + ] + Resource = "*" } ] }) From 30cfd570f2cc7601c8fcf8cff8904306de02a639 Mon Sep 17 00:00:00 2001 From: Sandy Forrester Date: Fri, 13 Feb 2026 14:58:34 +0000 Subject: [PATCH 15/19] NRL-721 Add second table --- .../account-wide-infrastructure/dev/lambda__seed-sandbox.tf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/terraform/account-wide-infrastructure/dev/lambda__seed-sandbox.tf b/terraform/account-wide-infrastructure/dev/lambda__seed-sandbox.tf index f479e971a..edacc704c 100644 --- a/terraform/account-wide-infrastructure/dev/lambda__seed-sandbox.tf +++ b/terraform/account-wide-infrastructure/dev/lambda__seed-sandbox.tf @@ -3,7 +3,7 @@ # Only deploys if tables are specified locals { - seed_table_names = ["nhsd-nrlf--sandy-dev-pointers-table"] + seed_table_names = ["nhsd-nrlf--sandy-dev-pointers-table", "nhsd-nrlf--sandycopy-dev-pointers-table"] } module "seed_sandbox_lambda" { From f30fb60e92ba362d8f983f3fef5f0588fb755844 Mon Sep 17 00:00:00 2001 From: Sandy Forrester Date: Fri, 13 Feb 2026 16:44:08 +0000 Subject: [PATCH 16/19] NRL-721 Sonarqube fixes --- scripts/delete_all_table_items.py | 94 ++++++++------- scripts/seed_sandbox_table.py | 112 +++++++++--------- scripts/tests/test_seed_sandbox_table.py | 142 ++++++++++++++++++++++- 3 files changed, 246 insertions(+), 102 deletions(-) diff --git a/scripts/delete_all_table_items.py b/scripts/delete_all_table_items.py index 3247c09c6..f2e287973 100755 --- a/scripts/delete_all_table_items.py +++ b/scripts/delete_all_table_items.py @@ -11,58 +11,66 @@ fire = None +def _handle_table_access_error(e, table_name): + error_code = e.response["Error"]["Code"] + if error_code == "ResourceNotFoundException": + print(f"Error: Table '{table_name}' does not exist") + elif error_code == "AccessDeniedException": + print(f"Error: No permission to access table '{table_name}'") + else: + print(f"Error accessing table: {e}") + sys.exit(1) + + +def _scan_and_delete_batch(table, key_names, scan_kwargs, deleted_count): + try: + response = table.scan(**scan_kwargs) + except ClientError as e: + if e.response["Error"]["Code"] == "ProvisionedThroughputExceededException": + print(f"\nWarning: Throttled at {deleted_count} items. Retrying...") + return scan_kwargs.get("ExclusiveStartKey"), deleted_count, True + raise + + with table.batch_writer() as batch: + for item in response["Items"]: + batch.delete_item(Key=item) + deleted_count += 1 + + if deleted_count % 100 == 0: + print(f"Deleted {deleted_count} items...", end="\r") + + return response.get("LastEvaluatedKey"), deleted_count, False + + def delete_all_table_items(table_name): - """Delete all items from a DynamoDB table.""" dynamodb = boto3.resource("dynamodb") table = dynamodb.Table(table_name) try: - # Verify the table exists key_names = [key["AttributeName"] for key in table.key_schema] except ClientError as e: - error_code = e.response["Error"]["Code"] - if error_code == "ResourceNotFoundException": - print(f"Error: Table '{table_name}' does not exist") - sys.exit(1) - elif error_code == "AccessDeniedException": - print(f"Error: No permission to access table '{table_name}'") - sys.exit(1) - else: - print(f"Error accessing table: {e}") - sys.exit(1) - - # Scan and delete items in batches + _handle_table_access_error(e, table_name) + + scan_kwargs = {"ProjectionExpression": ",".join(key_names)} deleted_count = 0 + try: - with table.batch_writer() as batch: - scan_kwargs = { - "ProjectionExpression": ",".join(key_names), - } - - while True: - try: - response = table.scan(**scan_kwargs) - - for item in response["Items"]: - batch.delete_item(Key=item) - deleted_count += 1 - - if "LastEvaluatedKey" not in response: - break - scan_kwargs["ExclusiveStartKey"] = response["LastEvaluatedKey"] - - if deleted_count % 100 == 0: - print(f"Deleted {deleted_count} items...", end="\r") - - except ClientError as e: - error_code = e.response["Error"]["Code"] - if error_code == "ProvisionedThroughputExceededException": - print( - f"\nWarning: Throttled at {deleted_count} items. Retrying..." - ) - continue - else: - raise + while True: + last_key, deleted_count, was_throttled = _scan_and_delete_batch( + table, key_names, scan_kwargs, deleted_count + ) + + if was_throttled: + if last_key: + scan_kwargs["ExclusiveStartKey"] = last_key + elif "ExclusiveStartKey" in scan_kwargs: + del scan_kwargs["ExclusiveStartKey"] + continue + + if not last_key: + break + + scan_kwargs["ExclusiveStartKey"] = last_key except Exception as e: print(f"\nError during deletion: {e}") diff --git a/scripts/seed_sandbox_table.py b/scripts/seed_sandbox_table.py index 8ae5a2f2f..f451ba587 100755 --- a/scripts/seed_sandbox_table.py +++ b/scripts/seed_sandbox_table.py @@ -156,6 +156,53 @@ def _load_pointer_templates() -> dict[str, dict]: return templates +def _write_batch_to_dynamodb(table_name: str, batch_items: list[dict]) -> bool: + if not batch_items: + return True + + try: + response = resource.batch_write_item(RequestItems={table_name: batch_items}) + + if response.get("UnprocessedItems"): + unprocessed = len(response["UnprocessedItems"].get(table_name, [])) + print(f"\nWarning: {unprocessed} unprocessed items") + + print(".", end="", flush=True) + return True + + except ClientError as e: + error_code = e.response["Error"]["Code"] + if error_code == "ProvisionedThroughputExceededException": + print(f"\n✗ Throttled. Retrying batch...") + else: + print(f"\n✗ Error writing batch, batch cancelled: {e}") + return False + + +def _create_pointer_item( + template: dict, + custodian: str, + nhs_number: str, + counter: int, + pointer_type: str, +) -> tuple[dict | None, list[str] | None]: + """Create a single pointer and return the put request and CSV data.""" + try: + pointer = _make_realistic_pointer(template, custodian, nhs_number, counter) + + put_req = {"PutRequest": {"Item": pointer.model_dump()}} + csv_data = [pointer.id, pointer_type, pointer.custodian, pointer.nhs_number] + + return put_req, csv_data + + except ValueError as e: + print(f"\n✗ Validation error for pointer {counter}: {e}") + return None, None + except Exception as e: + print(f"\n✗ Error creating pointer {counter}: {e}") + return None, None + + def _generate_and_write_pointers( table_name: str, templates: dict[str, dict], pointers_per_type: int, testnum_iter ) -> tuple[list[list[str]], int]: @@ -175,65 +222,19 @@ def _generate_and_write_pointers( print(f"\n✗ Error: Ran out of NHS numbers at pointer {counter}") break - try: - pointer = _make_realistic_pointer( - template, custodian, nhs_number, counter - ) + put_req, csv_data = _create_pointer_item( + template, custodian, nhs_number, counter, pointer_type + ) - put_req = {"PutRequest": {"Item": pointer.model_dump()}} + if put_req and csv_data: batch_upsert_items.append(put_req) - - pointer_data.append( - [ - pointer.id, - pointer_type, - pointer.custodian, - pointer.nhs_number, - ] - ) + pointer_data.append(csv_data) if len(batch_upsert_items) >= 25: - try: - response = resource.batch_write_item( - RequestItems={table_name: batch_upsert_items} - ) - - if response.get("UnprocessedItems"): - unprocessed = len( - response["UnprocessedItems"].get(table_name, []) - ) - print(f"\nWarning: {unprocessed} unprocessed items") - - batch_upsert_items = [] - print(".", end="", flush=True) - - except ClientError as e: - error_code = e.response["Error"]["Code"] - if error_code == "ProvisionedThroughputExceededException": - print( - f"\n✗ Throttled at pointer {counter}. Retrying batch..." - ) - else: - print(f"\n✗ Error writing batch, batch cancelled: {e}") - batch_upsert_items = [] - - except ValueError as e: - print(f"\n✗ Validation error for pointer {counter}: {e}") - continue - except Exception as e: - print(f"\n✗ Error creating pointer {counter}: {e}") - continue - - if batch_upsert_items: - try: - response = resource.batch_write_item( - RequestItems={table_name: batch_upsert_items} - ) - if response.get("UnprocessedItems"): - unprocessed = len(response["UnprocessedItems"].get(table_name, [])) - print(f"\nWarning: {unprocessed} unprocessed items in final batch") - except ClientError as e: - print(f"\n✗ Error writing final batch, batch cancelled: {e}") + _write_batch_to_dynamodb(table_name, batch_upsert_items) + batch_upsert_items = [] + + _write_batch_to_dynamodb(table_name, batch_upsert_items) return pointer_data, counter @@ -307,7 +308,6 @@ def seed_sandbox_table( def _write_pointer_extract(pointer_data: list[list[str]]): - """Write pointer data to CSV file for reference.""" try: output_dir = Path(__file__).parent.parent / "dist" / "sandbox" output_dir.mkdir(parents=True, exist_ok=True) diff --git a/scripts/tests/test_seed_sandbox_table.py b/scripts/tests/test_seed_sandbox_table.py index c7096d68d..0813558df 100644 --- a/scripts/tests/test_seed_sandbox_table.py +++ b/scripts/tests/test_seed_sandbox_table.py @@ -7,11 +7,13 @@ from botocore.exceptions import ClientError from seed_sandbox_table import ( _check_for_existing_sandbox_pointers, + _create_pointer_item, _generate_and_write_pointers, _load_pointer_templates, _load_sample_template, _make_realistic_pointer, _validate_table_access, + _write_batch_to_dynamodb, _write_pointer_extract, seed_sandbox_table, ) @@ -210,6 +212,143 @@ def test_create_pointer_success(self, mock_pointer_class, mock_doc_ref_class): ) +class TestWriteBatchToDynamodb: + + @patch("seed_sandbox_table.resource") + def test_empty_batch_returns_true(self, mock_resource): + result = _write_batch_to_dynamodb("test-table", []) + + assert result is True + mock_resource.batch_write_item.assert_not_called() + + @patch("seed_sandbox_table.resource") + @patch("builtins.print") + def test_successful_batch_write(self, mock_print, mock_resource): + mock_resource.batch_write_item.return_value = {} + + batch_items = [{"PutRequest": {"Item": {"id": "test"}}}] + result = _write_batch_to_dynamodb("test-table", batch_items) + + assert result is True + mock_resource.batch_write_item.assert_called_once_with( + RequestItems={"test-table": batch_items} + ) + mock_print.assert_called_once_with(".", end="", flush=True) + + @patch("seed_sandbox_table.resource") + @patch("builtins.print") + def test_batch_write_with_unprocessed_items(self, mock_print, mock_resource): + mock_resource.batch_write_item.return_value = { + "UnprocessedItems": {"test-table": [{"PutRequest": {"Item": {"id": "1"}}}]} + } + + batch_items = [{"PutRequest": {"Item": {"id": "test"}}}] + result = _write_batch_to_dynamodb("test-table", batch_items) + + assert result is True + mock_print.assert_any_call("\nWarning: 1 unprocessed items") + + @patch("seed_sandbox_table.resource") + @patch("builtins.print") + def test_batch_write_throttling_error(self, mock_print, mock_resource): + mock_resource.batch_write_item.side_effect = ClientError( + { + "Error": { + "Code": "ProvisionedThroughputExceededException", + "Message": "Throttled", + } + }, + "BatchWriteItem", + ) + + batch_items = [{"PutRequest": {"Item": {"id": "test"}}}] + result = _write_batch_to_dynamodb("test-table", batch_items) + + assert result is False + mock_print.assert_called_with("\n✗ Throttled. Retrying batch...") + + @patch("seed_sandbox_table.resource") + @patch("builtins.print") + def test_batch_write_other_error(self, mock_print, mock_resource): + error = ClientError( + {"Error": {"Code": "ValidationException", "Message": "Invalid"}}, + "BatchWriteItem", + ) + mock_resource.batch_write_item.side_effect = error + + batch_items = [{"PutRequest": {"Item": {"id": "test"}}}] + result = _write_batch_to_dynamodb("test-table", batch_items) + + assert result is False + print_calls = [str(call) for call in mock_print.call_args_list] + assert any("Error writing batch" in call for call in print_calls) + + +class TestCreatePointerItem: + + @patch("seed_sandbox_table._make_realistic_pointer") + def test_successful_pointer_creation(self, mock_make_pointer): + template = {"id": "test"} + mock_pointer = MagicMock() + mock_pointer.id = "PTR-001" + mock_pointer.custodian = "CUST1" + mock_pointer.nhs_number = "9000000001" + mock_pointer.model_dump.return_value = { + "id": "PTR-001", + "custodian": "CUST1", + "nhs_number": "9000000001", + } + mock_make_pointer.return_value = mock_pointer + + put_req, csv_data = _create_pointer_item( + template, "CUST1", "9000000001", 1, "type1" + ) + + assert put_req == { + "PutRequest": { + "Item": { + "id": "PTR-001", + "custodian": "CUST1", + "nhs_number": "9000000001", + } + } + } + assert csv_data == ["PTR-001", "type1", "CUST1", "9000000001"] + mock_make_pointer.assert_called_once_with(template, "CUST1", "9000000001", 1) + + @patch("seed_sandbox_table._make_realistic_pointer") + @patch("builtins.print") + def test_pointer_creation_value_error(self, mock_print, mock_make_pointer): + template = {"id": "test"} + mock_make_pointer.side_effect = ValueError("Invalid NHS number") + + put_req, csv_data = _create_pointer_item( + template, "CUST1", "invalid", 1, "type1" + ) + + assert put_req is None + assert csv_data is None + mock_print.assert_called_once_with( + "\n✗ Validation error for pointer 1: Invalid NHS number" + ) + + @patch("seed_sandbox_table._make_realistic_pointer") + @patch("builtins.print") + def test_pointer_creation_general_error(self, mock_print, mock_make_pointer): + template = {"id": "test"} + mock_make_pointer.side_effect = Exception("Unexpected error") + + put_req, csv_data = _create_pointer_item( + template, "CUST1", "9000000001", 1, "type1" + ) + + assert put_req is None + assert csv_data is None + mock_print.assert_called_once_with( + "\n✗ Error creating pointer 1: Unexpected error" + ) + + class TestGenerateAndWritePointers: @patch("seed_sandbox_table.resource") @@ -315,7 +454,6 @@ def test_seed_table_with_failures( mock_templates = {"type1": {"template": "data"}} mock_load_templates.return_value = mock_templates - # 5 attempts, 3 successful pointer_data = [ ["PTR-001", "type1", "CUST1", "9000000001"], ["PTR-002", "type1", "CUST1", "9000000002"], @@ -325,10 +463,8 @@ def test_seed_table_with_failures( result = seed_sandbox_table("test-table", pointers_per_type=1, force=False) - # Should return dict with counts assert result == {"successful": 3, "attempted": 5, "failed": 2} - # Verify error message about failures mock_print.assert_any_call("⚠️ 2 pointer(s) failed to create") @patch("seed_sandbox_table._validate_table_access") From 490c0c0fbdd1df083d0e5cb7535d3f060766ed54 Mon Sep 17 00:00:00 2001 From: Sandy Forrester Date: Fri, 13 Feb 2026 17:01:43 +0000 Subject: [PATCH 17/19] NRL-721 Temp remove clone db script --- scripts/clone_dynamodb_table.py | 103 -------------------------------- 1 file changed, 103 deletions(-) delete mode 100755 scripts/clone_dynamodb_table.py diff --git a/scripts/clone_dynamodb_table.py b/scripts/clone_dynamodb_table.py deleted file mode 100755 index 326d25876..000000000 --- a/scripts/clone_dynamodb_table.py +++ /dev/null @@ -1,103 +0,0 @@ -#!/usr/bin/env python -import boto3 -import fire - - -def clone_table(source_table_name, target_table_name, copy_items=True, max_items=None): - """ - Create a copy of a DynamoDB table for testing. - - Args: - source_table_name: Name of table to clone - target_table_name: Name for the new table - copy_items: Whether to copy data (default: True) - max_items: Max items to copy (None = all) - """ - dynamodb = boto3.client("dynamodb") - resource = boto3.resource("dynamodb") - - # Get source table schema - source = dynamodb.describe_table(TableName=source_table_name)["Table"] - - # Create new table with same schema - create_params = { - "TableName": target_table_name, - "KeySchema": source["KeySchema"], - "AttributeDefinitions": source["AttributeDefinitions"], - } - - # Copy billing mode from source table - if "BillingModeSummary" in source: - create_params["BillingMode"] = source["BillingModeSummary"]["BillingMode"] - # If provisioned, copy the capacity settings - if source["BillingModeSummary"]["BillingMode"] == "PROVISIONED": - create_params["ProvisionedThroughput"] = { - "ReadCapacityUnits": source["ProvisionedThroughput"][ - "ReadCapacityUnits" - ], - "WriteCapacityUnits": source["ProvisionedThroughput"][ - "WriteCapacityUnits" - ], - } - else: - # Older tables without BillingModeSummary default to provisioned - create_params["BillingMode"] = "PROVISIONED" - create_params["ProvisionedThroughput"] = { - "ReadCapacityUnits": source["ProvisionedThroughput"]["ReadCapacityUnits"], - "WriteCapacityUnits": source["ProvisionedThroughput"]["WriteCapacityUnits"], - } - - # Copy GSIs if they exist - if "GlobalSecondaryIndexes" in source: - create_params["GlobalSecondaryIndexes"] = [ - { - "IndexName": gsi["IndexName"], - "KeySchema": gsi["KeySchema"], - "Projection": gsi["Projection"], - } - for gsi in source["GlobalSecondaryIndexes"] - ] - - print(f"Creating table {target_table_name}...") - dynamodb.create_table(**create_params) - - # Wait for table to be active - waiter = dynamodb.get_waiter("table_exists") - waiter.wait(TableName=target_table_name) - print("Table created and active") - - # Copy items if requested - if copy_items: - source_table = resource.Table(source_table_name) - target_table = resource.Table(target_table_name) - - count = 0 - with target_table.batch_writer() as batch: - response = source_table.scan() - - for item in response["Items"]: - batch.put_item(Item=item) - count += 1 - if max_items and count >= max_items: - break - - # Paginate if needed - while "LastEvaluatedKey" in response and ( - not max_items or count < max_items - ): - response = source_table.scan( - ExclusiveStartKey=response["LastEvaluatedKey"] - ) - for item in response["Items"]: - batch.put_item(Item=item) - count += 1 - if max_items and count >= max_items: - break - - print(f"Copied {count} items") - - return target_table_name - - -if __name__ == "__main__": - fire.Fire(clone_table) From 1160b495a9b743d22f2a982d73836c6df61c34e6 Mon Sep 17 00:00:00 2001 From: Sandy Forrester Date: Fri, 13 Feb 2026 17:44:50 +0000 Subject: [PATCH 18/19] NRL-721 Add retrieval mechanism to seed pointers --- scripts/seed_sandbox_table.py | 27 ++++++ scripts/tests/test_seed_sandbox_table.py | 111 +++++++++++++++++++++++ 2 files changed, 138 insertions(+) diff --git a/scripts/seed_sandbox_table.py b/scripts/seed_sandbox_table.py index f451ba587..a85ef74ea 100755 --- a/scripts/seed_sandbox_table.py +++ b/scripts/seed_sandbox_table.py @@ -80,6 +80,33 @@ def _make_realistic_pointer( if "masterIdentifier" in doc_ref_dict: doc_ref_dict["masterIdentifier"]["value"] = f"sandbox-{custodian}-{counter}" + retrieval_mechanism_ext = { + "url": "https://fhir.nhs.uk/England/StructureDefinition/Extension-England-NRLRetrievalMechanism", + "valueCodeableConcept": { + "coding": [ + { + "system": "https://fhir.nhs.uk/England/CodeSystem/England-NRLRetrievalMechanism", + "code": "SSP", + "display": "Spine Secure Proxy", + } + ] + }, + } + + if "content" not in doc_ref_dict or not doc_ref_dict["content"]: + doc_ref_dict["content"] = [{"extension": [retrieval_mechanism_ext]}] + else: + extensions = doc_ref_dict["content"][0].get("extension", []) + has_retrieval_mechanism = any( + ext.get("url") + == "https://fhir.nhs.uk/England/StructureDefinition/Extension-England-NRLRetrievalMechanism" + for ext in extensions + ) + if not has_retrieval_mechanism: + if "extension" not in doc_ref_dict["content"][0]: + doc_ref_dict["content"][0]["extension"] = [] + doc_ref_dict["content"][0]["extension"].append(retrieval_mechanism_ext) + doc_ref = DocumentReference(**doc_ref_dict) pointer = DocumentPointer.from_document_reference(doc_ref, source="SANDBOX-SEED") diff --git a/scripts/tests/test_seed_sandbox_table.py b/scripts/tests/test_seed_sandbox_table.py index 0813558df..e7ec9660f 100644 --- a/scripts/tests/test_seed_sandbox_table.py +++ b/scripts/tests/test_seed_sandbox_table.py @@ -211,6 +211,117 @@ def test_create_pointer_success(self, mock_pointer_class, mock_doc_ref_class): mock_doc_ref, source="SANDBOX-SEED" ) + @patch("seed_sandbox_table.DocumentReference") + @patch("seed_sandbox_table.DocumentPointer") + def test_creates_content_structure_when_missing( + self, mock_pointer_class, mock_doc_ref_class + ): + template = { + "id": "original", + "subject": {"identifier": {"value": "0000000000"}}, + "custodian": {"identifier": {"value": "OLD"}}, + "author": [{"identifier": {"value": "OLD_AUTHOR"}}], + } + + mock_doc_ref = MagicMock() + mock_doc_ref_class.return_value = mock_doc_ref + + mock_pointer = MagicMock() + mock_pointer_class.from_document_reference.return_value = mock_pointer + + result = _make_realistic_pointer(template, "Y12345", "9000000001", 1) + + call_args = mock_doc_ref_class.call_args[1] + assert "content" in call_args + assert len(call_args["content"]) > 0 + assert "extension" in call_args["content"][0] + extensions = call_args["content"][0]["extension"] + assert any( + ext.get("url") + == "https://fhir.nhs.uk/England/StructureDefinition/Extension-England-NRLRetrievalMechanism" + for ext in extensions + ) + + @patch("seed_sandbox_table.DocumentReference") + @patch("seed_sandbox_table.DocumentPointer") + def test_adds_retrieval_mechanism_when_content_exists_without_it( + self, mock_pointer_class, mock_doc_ref_class + ): + template = { + "id": "original", + "subject": {"identifier": {"value": "0000000000"}}, + "custodian": {"identifier": {"value": "OLD"}}, + "author": [{"identifier": {"value": "OLD_AUTHOR"}}], + "content": [ + {"extension": [{"url": "some-other-extension", "value": "test"}]} + ], + } + + mock_doc_ref = MagicMock() + mock_doc_ref_class.return_value = mock_doc_ref + + mock_pointer = MagicMock() + mock_pointer_class.from_document_reference.return_value = mock_pointer + + result = _make_realistic_pointer(template, "Y12345", "9000000001", 1) + + call_args = mock_doc_ref_class.call_args[1] + extensions = call_args["content"][0]["extension"] + assert len(extensions) == 2 + assert any( + ext.get("url") + == "https://fhir.nhs.uk/England/StructureDefinition/Extension-England-NRLRetrievalMechanism" + for ext in extensions + ) + + @patch("seed_sandbox_table.DocumentReference") + @patch("seed_sandbox_table.DocumentPointer") + def test_does_not_duplicate_retrieval_mechanism_when_already_present( + self, mock_pointer_class, mock_doc_ref_class + ): + template = { + "id": "original", + "subject": {"identifier": {"value": "0000000000"}}, + "custodian": {"identifier": {"value": "OLD"}}, + "author": [{"identifier": {"value": "OLD_AUTHOR"}}], + "content": [ + { + "extension": [ + { + "url": "https://fhir.nhs.uk/England/StructureDefinition/Extension-England-NRLRetrievalMechanism", + "valueCodeableConcept": { + "coding": [ + { + "system": "https://fhir.nhs.uk/England/CodeSystem/England-NRLRetrievalMechanism", + "code": "SSP", + "display": "Spine Secure Proxy", + } + ] + }, + } + ] + } + ], + } + + mock_doc_ref = MagicMock() + mock_doc_ref_class.return_value = mock_doc_ref + + mock_pointer = MagicMock() + mock_pointer_class.from_document_reference.return_value = mock_pointer + + result = _make_realistic_pointer(template, "Y12345", "9000000001", 1) + + call_args = mock_doc_ref_class.call_args[1] + extensions = call_args["content"][0]["extension"] + retrieval_mechanism_count = sum( + 1 + for ext in extensions + if ext.get("url") + == "https://fhir.nhs.uk/England/StructureDefinition/Extension-England-NRLRetrievalMechanism" + ) + assert retrieval_mechanism_count == 1 + class TestWriteBatchToDynamodb: From b494013664aa91cf1c44aa1588d33ced02c3ff94 Mon Sep 17 00:00:00 2001 From: Sandy Forrester Date: Fri, 13 Feb 2026 17:57:07 +0000 Subject: [PATCH 19/19] NRL-721 Sonarqube fixes --- scripts/delete_all_table_items.py | 4 ++-- scripts/seed_sandbox_table.py | 2 +- scripts/tests/test_seed_sandbox_table.py | 6 +++--- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/scripts/delete_all_table_items.py b/scripts/delete_all_table_items.py index f2e287973..231fde055 100755 --- a/scripts/delete_all_table_items.py +++ b/scripts/delete_all_table_items.py @@ -22,7 +22,7 @@ def _handle_table_access_error(e, table_name): sys.exit(1) -def _scan_and_delete_batch(table, key_names, scan_kwargs, deleted_count): +def _scan_and_delete_batch(table, scan_kwargs, deleted_count): try: response = table.scan(**scan_kwargs) except ClientError as e: @@ -57,7 +57,7 @@ def delete_all_table_items(table_name): try: while True: last_key, deleted_count, was_throttled = _scan_and_delete_batch( - table, key_names, scan_kwargs, deleted_count + table, scan_kwargs, deleted_count ) if was_throttled: diff --git a/scripts/seed_sandbox_table.py b/scripts/seed_sandbox_table.py index a85ef74ea..1709252ad 100755 --- a/scripts/seed_sandbox_table.py +++ b/scripts/seed_sandbox_table.py @@ -200,7 +200,7 @@ def _write_batch_to_dynamodb(table_name: str, batch_items: list[dict]) -> bool: except ClientError as e: error_code = e.response["Error"]["Code"] if error_code == "ProvisionedThroughputExceededException": - print(f"\n✗ Throttled. Retrying batch...") + print("\n✗ Throttled. Retrying batch...") else: print(f"\n✗ Error writing batch, batch cancelled: {e}") return False diff --git a/scripts/tests/test_seed_sandbox_table.py b/scripts/tests/test_seed_sandbox_table.py index e7ec9660f..a6240b494 100644 --- a/scripts/tests/test_seed_sandbox_table.py +++ b/scripts/tests/test_seed_sandbox_table.py @@ -229,7 +229,7 @@ def test_creates_content_structure_when_missing( mock_pointer = MagicMock() mock_pointer_class.from_document_reference.return_value = mock_pointer - result = _make_realistic_pointer(template, "Y12345", "9000000001", 1) + _make_realistic_pointer(template, "Y12345", "9000000001", 1) call_args = mock_doc_ref_class.call_args[1] assert "content" in call_args @@ -263,7 +263,7 @@ def test_adds_retrieval_mechanism_when_content_exists_without_it( mock_pointer = MagicMock() mock_pointer_class.from_document_reference.return_value = mock_pointer - result = _make_realistic_pointer(template, "Y12345", "9000000001", 1) + _make_realistic_pointer(template, "Y12345", "9000000001", 1) call_args = mock_doc_ref_class.call_args[1] extensions = call_args["content"][0]["extension"] @@ -310,7 +310,7 @@ def test_does_not_duplicate_retrieval_mechanism_when_already_present( mock_pointer = MagicMock() mock_pointer_class.from_document_reference.return_value = mock_pointer - result = _make_realistic_pointer(template, "Y12345", "9000000001", 1) + _make_realistic_pointer(template, "Y12345", "9000000001", 1) call_args = mock_doc_ref_class.call_args[1] extensions = call_args["content"][0]["extension"]