From 3c096156af41ec2c20a114f8c90cd44c8e329285 Mon Sep 17 00:00:00 2001 From: Kim Gustyr Date: Fri, 8 May 2026 23:34:50 +0100 Subject: [PATCH 01/12] feat(segment_membership): Daily Snowflake-backed per-env segment counts MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Backfills identities from Dynamo to Snowflake daily, then refreshes per-(segment, environment) match counts in the new `SegmentMembership` cache. The translator from `flagsmith-sql-flag-engine` turns each canonical segment into a SQL `WHERE` predicate; counts are materialised as `COUNT(*) ... GROUP BY environment_id` per segment. The serializer surfaces them as a list of `{environment, count, last_synced_at}`, ready to back per-env count badges in the Identities-tab environment dropdown. Pipeline shape: - `backfill_identities_to_snowflake` is the daily recurring task (`timeout=4h` to fit large environments). After backfilling each project's environments it dispatches one `refresh_project_segment_counts(project_id)` per project so the count refresh always sees the freshly backfilled snapshot rather than racing a separate schedule. - `refresh_project_segment_counts` opens its own Snowpark session, re-checks the FoF flag at execution time so a stale fan-out skips orgs that have since been disabled, and bulk-upserts via Postgres `ON CONFLICT` (single statement per project). - `compute_segment_counts_for_project` returns a list of unsaved `SegmentMembership` instances; the task stamps `last_synced_at` consistently across the batch. Untranslatable segments emit a structlog `compute.segment.skipped` error event so we hear about predicate gaps rather than silently dropping rows. Both tasks short-circuit when SNOWFLAKE_* env vars are unset and skip per-organisation when the `segment_membership_inspection` Flagsmith-on-Flagsmith flag is False, so SaaS rolls out gradually and self-hosted is unaffected. DELETE-then-INSERT runs without an explicit transaction. Snowflake holds micropartition locks for the lifetime of an open transaction, and at 10M+ identities a BEGIN/COMMIT around the whole env partition would keep that lock open for minutes. Per-statement implicit commits leave a brief mid-refresh window where readers see an empty partition; acceptable under the FoF flag's gradual rollout. Backfill writes via Snowpark DataFrames against the canonical IDENTITIES schema, with `DynamoIdentity` documents projected through `segment_membership.mappers.map_identity_document_to_snowflake_row`. Refresh issues a single batched UNION ALL using parameterised SQL — env keys are bound, predicates from the engine are already escape- safe. Schema setup is a `RunPython` migration gated on `is_snowflake_configured()`, so it no-ops on self-hosted and in the test suite. The segment serializer surfaces cached counts via a new `memberships` list field; absence of an entry is the read-side signal, no flag check on the read path. `SegmentMembershipSerializer` gives drf-spectacular a typed schema. Adds a generic `batched` helper to `api/util/util.py` for the per-INSERT batching. beep boop --- api/app/settings/common.py | 14 + api/segment_membership/__init__.py | 0 api/segment_membership/apps.py | 6 + api/segment_membership/mappers.py | 60 ++++ .../migrations/0001_initial.py | 57 ++++ .../0002_setup_snowflake_identities_schema.py | 43 +++ api/segment_membership/migrations/__init__.py | 0 api/segment_membership/models.py | 32 ++ api/segment_membership/services.py | 161 ++++++++++ api/segment_membership/tasks.py | 177 +++++++++++ api/segments/serializers.py | 15 + api/segments/views.py | 1 + api/tests/integration/segments/__init__.py | 0 .../segments/test_segment_membership_field.py | 65 ++++ api/tests/unit/segment_membership/__init__.py | 0 .../test_unit_segment_membership_mappers.py | 129 ++++++++ .../test_unit_segment_membership_migration.py | 47 +++ .../test_unit_segment_membership_services.py | 245 +++++++++++++++ .../test_unit_segment_membership_tasks.py | 290 ++++++++++++++++++ api/tests/unit/util/test_util.py | 23 +- api/util/util.py | 10 + .../observability/_events-catalogue.md | 67 +++- 22 files changed, 1440 insertions(+), 2 deletions(-) create mode 100644 api/segment_membership/__init__.py create mode 100644 api/segment_membership/apps.py create mode 100644 api/segment_membership/mappers.py create mode 100644 api/segment_membership/migrations/0001_initial.py create mode 100644 api/segment_membership/migrations/0002_setup_snowflake_identities_schema.py create mode 100644 api/segment_membership/migrations/__init__.py create mode 100644 api/segment_membership/models.py create mode 100644 api/segment_membership/services.py create mode 100644 api/segment_membership/tasks.py create mode 100644 api/tests/integration/segments/__init__.py create mode 100644 api/tests/integration/segments/test_segment_membership_field.py create mode 100644 api/tests/unit/segment_membership/__init__.py create mode 100644 api/tests/unit/segment_membership/test_unit_segment_membership_mappers.py create mode 100644 api/tests/unit/segment_membership/test_unit_segment_membership_migration.py create mode 100644 api/tests/unit/segment_membership/test_unit_segment_membership_services.py create mode 100644 api/tests/unit/segment_membership/test_unit_segment_membership_tasks.py diff --git a/api/app/settings/common.py b/api/app/settings/common.py index 3653efc5d3b8..bf4e1cc84dcc 100644 --- a/api/app/settings/common.py +++ b/api/app/settings/common.py @@ -118,6 +118,7 @@ "features.workflows.core", "features.release_pipelines.core", "segments", + "segment_membership", "app", "e2etests", "simple_history", @@ -1424,3 +1425,16 @@ PYLON_IDENTITY_VERIFICATION_SECRET = env.str("PYLON_IDENTITY_VERIFICATION_SECRET", None) OSIC_UPDATE_BATCH_SIZE = env.int("OSIC_UPDATE_BATCH_SIZE", default=500) + +# --- Snowflake (segment membership inspection) ------------------------------- +# All-None default disables the segment_membership backfill and refresh tasks. +# When set, the api/segments/membership tasks open a Snowpark session and run +# against this account. See docs/deployment/observability/segment-membership.md +# for the operational shape. +SNOWFLAKE_ACCOUNT = env.str("SNOWFLAKE_ACCOUNT", default=None) +SNOWFLAKE_USER = env.str("SNOWFLAKE_USER", default=None) +SNOWFLAKE_PRIVATE_KEY_PATH = env.str("SNOWFLAKE_PRIVATE_KEY_PATH", default=None) +SNOWFLAKE_ROLE = env.str("SNOWFLAKE_ROLE", default=None) +SNOWFLAKE_WAREHOUSE = env.str("SNOWFLAKE_WAREHOUSE", default=None) +SNOWFLAKE_DATABASE = env.str("SNOWFLAKE_DATABASE", default=None) +SNOWFLAKE_SCHEMA = env.str("SNOWFLAKE_SCHEMA", default=None) diff --git a/api/segment_membership/__init__.py b/api/segment_membership/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/api/segment_membership/apps.py b/api/segment_membership/apps.py new file mode 100644 index 000000000000..42753fd75971 --- /dev/null +++ b/api/segment_membership/apps.py @@ -0,0 +1,6 @@ +from core.apps import BaseAppConfig + + +class SegmentMembershipConfig(BaseAppConfig): + name = "segment_membership" + default = True diff --git a/api/segment_membership/mappers.py b/api/segment_membership/mappers.py new file mode 100644 index 000000000000..245f893e7b68 --- /dev/null +++ b/api/segment_membership/mappers.py @@ -0,0 +1,60 @@ +import hashlib +from decimal import Decimal + +from flagsmith_schemas.dynamodb import Identity as DynamoIdentity +from flagsmith_schemas.dynamodb import Trait as DynamoTrait + +# (environment_id, id, identifier, identity_key, traits) +SnowflakeIdentityRow = tuple[str, int, str, str, dict[str, object] | None] + + +def map_identity_document_to_snowflake_row( + env_key: str, identity_doc: DynamoIdentity +) -> SnowflakeIdentityRow: + """Project a Dynamo identity document onto the canonical IDENTITIES + row tuple. The returned tuple aligns positionally with the schema + `(environment_id, id, identifier, identity_key, traits)`.""" + identity_uuid = str(identity_doc["identity_uuid"]) + identifier = str(identity_doc.get("identifier") or "") + composite_key = str(identity_doc.get("composite_key") or identity_uuid) + traits = _flatten_traits(identity_doc.get("identity_traits")) + return ( + env_key, + _identity_id(identity_uuid), + identifier, + composite_key, + traits or None, + ) + + +def _identity_id(identity_uuid: str) -> int: + """Stable 64-bit IDENTITIES.id derived from `identity_uuid`. Same + uuid always produces the same id, so re-runs of the backfill are + idempotent on the (environment_id, id) primary key.""" + digest = hashlib.md5(identity_uuid.encode("utf-8")).digest() + return int.from_bytes(digest[:8], "big", signed=False) + + +def _coerce_trait_value(value: object) -> object: + """Coerce Dynamo-decoded values for VARIANT serialisation. boto3 + returns `Decimal` for numbers; we narrow to int when whole, float + otherwise, so the VARIANT keeps a useful numeric type.""" + if isinstance(value, Decimal): + if value == value.to_integral_value(): + return int(value) + return float(value) + return value + + +def _flatten_traits( + identity_traits: list[DynamoTrait] | None, +) -> dict[str, object]: + """Convert Dynamo's `[{trait_key, trait_value}, ...]` list into a + flat trait map, dropping entries with falsy keys.""" + if not identity_traits: + return {} + return { + t["trait_key"]: _coerce_trait_value(t.get("trait_value")) + for t in identity_traits + if t.get("trait_key") + } diff --git a/api/segment_membership/migrations/0001_initial.py b/api/segment_membership/migrations/0001_initial.py new file mode 100644 index 000000000000..0024b54bb990 --- /dev/null +++ b/api/segment_membership/migrations/0001_initial.py @@ -0,0 +1,57 @@ +# Generated by Django 5.2.13 on 2026-05-08 22:03 + +import django.db.models.deletion +from django.db import migrations, models + + +class Migration(migrations.Migration): + + initial = True + + dependencies = [ + ("environments", "0037_add_uuid_field"), + ("segments", "0030_add_default_to_segment_version"), + ] + + operations = [ + migrations.CreateModel( + name="SegmentMembership", + fields=[ + ( + "id", + models.AutoField( + auto_created=True, + primary_key=True, + serialize=False, + verbose_name="ID", + ), + ), + ("count", models.PositiveIntegerField()), + ("last_synced_at", models.DateTimeField()), + ( + "environment", + models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, + related_name="+", + to="environments.environment", + ), + ), + ( + "segment", + models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, + related_name="memberships", + to="segments.segment", + ), + ), + ], + options={ + "constraints": [ + models.UniqueConstraint( + fields=("segment", "environment"), + name="segment_membership_unique_segment_environment", + ) + ], + }, + ), + ] diff --git a/api/segment_membership/migrations/0002_setup_snowflake_identities_schema.py b/api/segment_membership/migrations/0002_setup_snowflake_identities_schema.py new file mode 100644 index 000000000000..a510036f339b --- /dev/null +++ b/api/segment_membership/migrations/0002_setup_snowflake_identities_schema.py @@ -0,0 +1,43 @@ +"""Create the canonical IDENTITIES table the SQL flag engine emits +against when a Snowflake account is configured. + +No-op when SNOWFLAKE_* settings are unset, so self-hosted installs +without Snowflake (and the test suite) migrate cleanly. +""" + +from django.db import migrations +from django.db.backends.base.schema import BaseDatabaseSchemaEditor +from django.db.migrations.state import StateApps +from flagsmith_sql_flag_engine.dialects import SnowflakeDialect + +from segment_membership.services import ( + is_snowflake_configured, + open_snowflake_session, +) + + +def setup_snowflake_identities_schema( + apps: StateApps, schema_editor: BaseDatabaseSchemaEditor +) -> None: + if not is_snowflake_configured(): + return + with open_snowflake_session() as sess: + sess.sql(SnowflakeDialect.schema_ddl).collect() + + +class Migration(migrations.Migration): + # The Snowflake DDL talks to a remote service; running it inside + # Django's default-atomic migration block would couple this Postgres + # migration to a Snowflake transaction we don't actually need. + atomic = False + + dependencies = [ + ("segment_membership", "0001_initial"), + ] + + operations = [ + migrations.RunPython( + setup_snowflake_identities_schema, + reverse_code=migrations.RunPython.noop, + ), + ] diff --git a/api/segment_membership/migrations/__init__.py b/api/segment_membership/migrations/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/api/segment_membership/models.py b/api/segment_membership/models.py new file mode 100644 index 000000000000..cb80a20577f4 --- /dev/null +++ b/api/segment_membership/models.py @@ -0,0 +1,32 @@ +from django.db import models + +from environments.models import Environment +from segments.models import Segment + + +class SegmentMembership(models.Model): + """ + Cached count of identities matching a canonical segment within a + single environment. One row per (segment, environment) pair. + """ + + segment = models.ForeignKey( + Segment, + on_delete=models.CASCADE, + related_name="memberships", + ) + environment = models.ForeignKey( + Environment, + on_delete=models.CASCADE, + related_name="+", + ) + count = models.PositiveIntegerField() + last_synced_at = models.DateTimeField() + + class Meta: + constraints = [ + models.UniqueConstraint( + fields=["segment", "environment"], + name="segment_membership_unique_segment_environment", + ), + ] diff --git a/api/segment_membership/services.py b/api/segment_membership/services.py new file mode 100644 index 000000000000..82770206eff6 --- /dev/null +++ b/api/segment_membership/services.py @@ -0,0 +1,161 @@ +from contextlib import contextmanager +from typing import Iterator + +import structlog +from django.conf import settings +from flag_engine.context.types import EvaluationContext +from flagsmith_sql_flag_engine import TranslateContext, translate_segment +from flagsmith_sql_flag_engine.dialects import SnowflakeDialect +from snowflake.snowpark import Session + +from integrations.flagsmith.client import get_openfeature_client +from organisations.models import Organisation +from projects.models import Project +from segment_membership.models import SegmentMembership +from segments.models import Segment +from util.engine_models.context.mappers import map_segment_to_segment_context +from util.mappers.engine import map_segment_to_engine + +logger = structlog.get_logger("segment_membership") + + +def is_membership_enabled(organisation: Organisation) -> bool: + """Resolve the per-org Flagsmith-on-Flagsmith flag for segment- + membership inspection. Defaults False when the flag is missing.""" + return get_openfeature_client().get_boolean_value( + "segment_membership_inspection", + default_value=False, + evaluation_context=organisation.openfeature_evaluation_context, + ) + + +def is_snowflake_configured() -> bool: + """All SNOWFLAKE_* settings required to open a session must be + populated. Tasks short-circuit when this returns False.""" + return all( + getattr(settings, name) + for name in ( + "SNOWFLAKE_ACCOUNT", + "SNOWFLAKE_USER", + "SNOWFLAKE_PRIVATE_KEY_PATH", + "SNOWFLAKE_DATABASE", + "SNOWFLAKE_SCHEMA", + "SNOWFLAKE_WAREHOUSE", + ) + ) + + +@contextmanager +def open_snowflake_session() -> Iterator[Session]: + """Open a Snowpark session from `SNOWFLAKE_*` settings.""" + config: dict[str, str | None] = { + "account": settings.SNOWFLAKE_ACCOUNT, + "user": settings.SNOWFLAKE_USER, + "warehouse": settings.SNOWFLAKE_WAREHOUSE, + "database": settings.SNOWFLAKE_DATABASE, + "schema": settings.SNOWFLAKE_SCHEMA, + "private_key_file": settings.SNOWFLAKE_PRIVATE_KEY_PATH, + } + if settings.SNOWFLAKE_ROLE: + config["role"] = settings.SNOWFLAKE_ROLE + sess = Session.builder.configs(config).create() + try: + yield sess + finally: + sess.close() + + +def get_projects_to_process() -> Iterator[Project]: + """Yield projects that hold at least one canonical segment and whose + organisation has the segment-membership FoF flag enabled. Used by + both the backfill and refresh tasks to scope work.""" + project_ids = Segment.live_objects.values_list("project_id", flat=True).distinct() + for project in Project.objects.filter(id__in=project_ids).select_related( + "organisation" + ): + if not is_membership_enabled(project.organisation): + continue + yield project + + +def compute_segment_counts_for_project( + project: Project, session: Session +) -> list[SegmentMembership]: + """Run one batched `SELECT ... UNION ALL` counting identity matches + for every (canonical-segment, environment) pair in `project`. + + Returns a list of unsaved `SegmentMembership` instances — `count` + and the `(segment_id, environment_id)` keys are populated; + `last_synced_at` is left for the caller to stamp consistently + across the batch. + + The SQL groups by `environment_id` per segment, so cardinality is + one SELECT per segment rather than per (segment, env) pair. Pairs + with zero matches are absent from the result; the caller treats + absent pairs as "no row" rather than count = 0. + + Segments whose predicate is currently untranslatable — e.g. a + regex pattern unsupported by the active dialect — are skipped + entirely. + + Environment keys are bound as parameters, not f-string-spliced; + the predicate from `translate_segment` is already escape-safe per + the SQL flag engine's contract. + """ + segments = list(Segment.live_objects.filter(project=project)) + env_id_by_key: dict[str, int] = dict( + project.environments.values_list("api_key", "id"), + ) + if not segments or not env_id_by_key: + return [] + + env_keys = list(env_id_by_key) + env_placeholders = ",".join("?" * len(env_keys)) + dialect = SnowflakeDialect() + + select_clauses: list[str] = [] + for seg in segments: + translate_ctx = TranslateContext( + evaluation_context=EvaluationContext( + environment={"key": "_count", "name": project.name} + ), + dialect=dialect, + ) + predicate = translate_segment( + map_segment_to_segment_context(map_segment_to_engine(seg)), + translate_ctx, + ) + if predicate is None: + logger.error( + "compute.segment.skipped", + project__id=project.id, + segment__id=seg.id, + reason="untranslatable", + ) + continue + select_clauses.append( + f"SELECT {seg.id} AS segment_id, " + f"i.environment_id AS env_key, COUNT(*) AS c " + f"FROM IDENTITIES i " + f"WHERE i.environment_id IN ({env_placeholders}) AND ({predicate}) " + f"GROUP BY i.environment_id" + ) + + if not select_clauses: + return [] + + sql = "\nUNION ALL\n".join(select_clauses) + rows = session.sql(sql, params=env_keys * len(select_clauses)).collect() + memberships: list[SegmentMembership] = [] + for row in rows: + env_id = env_id_by_key.get(str(row["ENV_KEY"])) + if env_id is None: + continue + memberships.append( + SegmentMembership( + segment_id=int(row["SEGMENT_ID"]), + environment_id=env_id, + count=int(row["C"]), + ) + ) + return memberships diff --git a/api/segment_membership/tasks.py b/api/segment_membership/tasks.py new file mode 100644 index 000000000000..1fe17a7feed5 --- /dev/null +++ b/api/segment_membership/tasks.py @@ -0,0 +1,177 @@ +"""Tasks: backfill IDENTITIES from Dynamo to Snowflake daily, then +refresh per-segment counts in the `SegmentMembership` cache. + +The backfill recurs daily and, once it finishes, fans out one +`refresh_project_segment_counts` per project — guarantees the refresh +always reads the freshly backfilled snapshot rather than racing a +separate schedule. Both tasks short-circuit when SNOWFLAKE_* settings +are unset, and skip per-organisation when the +`segment_membership_inspection` FoF flag is False. +""" + +from datetime import timedelta +from typing import cast + +import structlog +from django.utils import timezone +from flagsmith_schemas.dynamodb import Identity as DynamoIdentity +from snowflake.snowpark.types import ( + LongType, + StringType, + StructField, + StructType, + VariantType, +) +from task_processor.decorators import ( + register_recurring_task, + register_task_handler, +) + +from environments.dynamodb.wrappers.identity_wrapper import DynamoIdentityWrapper +from projects.models import Project +from segment_membership.mappers import map_identity_document_to_snowflake_row +from segment_membership.models import SegmentMembership +from segment_membership.services import ( + compute_segment_counts_for_project, + get_projects_to_process, + is_membership_enabled, + is_snowflake_configured, + open_snowflake_session, +) +from util.util import batched + +logger = structlog.get_logger("segment_membership") + +# Per-INSERT row count; bounds memory while loading large environments. +_INSERT_BATCH_SIZE = 1000 + +_IDENTITIES_SCHEMA = StructType( + [ + StructField("environment_id", StringType()), + StructField("id", LongType()), + StructField("identifier", StringType()), + StructField("identity_key", StringType()), + StructField("traits", VariantType()), + ] +) + + +@register_recurring_task( + run_every=timedelta(days=1), + # The default timeout doesn't fit the per-environment + # backfill at SaaS scale; 4 hours leaves + # headroom for several large environments back-to-back without + # truncating the task processor's lease. + timeout=timedelta(hours=4), +) +def backfill_identities_to_snowflake() -> None: + """Replace Snowflake's IDENTITIES rows for every relevant + environment with the current Dynamo state. Once the backfill + finishes, fans out one `refresh_project_segment_counts` task per + project so the count refresh always sees fresh data. + + Per-statement implicit commits leave a brief window where readers + see an empty partition mid-refresh — a PoC tradeoff later fixed + by CDC. + """ + if not is_snowflake_configured(): + logger.info("backfill.skipped", reason="snowflake_not_configured") + return + + wrapper = DynamoIdentityWrapper() + if not wrapper.is_enabled: + logger.info("backfill.skipped", reason="dynamo_disabled") + return + + refreshable_project_ids: list[int] = [] + with open_snowflake_session() as sess: + for project in get_projects_to_process(): + refreshable_project_ids.append(project.id) + for env in project.environments.all(): + env_key = env.api_key + row_count = 0 + try: + sess.sql( + "DELETE FROM IDENTITIES WHERE environment_id = ?", + params=[env_key], + ).collect() + for batch in batched( + wrapper.iter_all_items_paginated(env_key), + _INSERT_BATCH_SIZE, + ): + rows = [ + map_identity_document_to_snowflake_row( + env_key, cast(DynamoIdentity, doc) + ) + for doc in batch + ] + sess.create_dataframe( + rows, schema=_IDENTITIES_SCHEMA + ).write.mode("append").save_as_table("IDENTITIES") + row_count += len(rows) + except Exception: + logger.exception( + "backfill.environment.failed", + project__id=project.id, + environment__id=env.id, + ) + continue + logger.info( + "backfill.environment.completed", + project__id=project.id, + environment__id=env.id, + rows__count=row_count, + ) + + for project_id in refreshable_project_ids: + refresh_project_segment_counts.delay(args=(project_id,)) + + +@register_task_handler( + # One project's predicate matrix at SaaS scale takes seconds to a + # few minutes; 30 minutes bounds runaway queries without cutting + # legitimate ones short. + timeout=timedelta(minutes=30), +) +def refresh_project_segment_counts(project_id: int) -> None: + """Compute per-segment match counts for a single project and upsert + into `SegmentMembership`. Re-checks the FoF flag at execution time + so a stale fan-out skips orgs that have since been disabled.""" + if not is_snowflake_configured(): + logger.info( + "refresh.project.skipped", + project__id=project_id, + reason="snowflake_not_configured", + ) + return + + project = Project.objects.select_related("organisation").get(pk=project_id) + if not is_membership_enabled(project.organisation): + logger.info( + "refresh.project.skipped", + project__id=project_id, + reason="ff_disabled", + ) + return + + with open_snowflake_session() as sess: + try: + memberships = compute_segment_counts_for_project(project, sess) + except Exception: + logger.exception("refresh.project.failed", project__id=project_id) + return + + now = timezone.now() + for m in memberships: + m.last_synced_at = now + SegmentMembership.objects.bulk_create( + memberships, + update_conflicts=True, + unique_fields=["segment", "environment"], + update_fields=["count", "last_synced_at"], + ) + logger.info( + "refresh.project.completed", + project__id=project_id, + memberships__count=len(memberships), + ) diff --git a/api/segments/serializers.py b/api/segments/serializers.py index 6415d26bde73..114b43fbeef0 100644 --- a/api/segments/serializers.py +++ b/api/segments/serializers.py @@ -10,6 +10,7 @@ from edge_api.utils import is_edge_enabled from metadata.serializers import MetadataSerializer, MetadataSerializerMixin from projects.models import Project +from segment_membership.models import SegmentMembership from segments.models import Condition, Segment, SegmentRule logger = structlog.get_logger(__name__) @@ -17,6 +18,17 @@ DictList = list[dict[str, Any]] +class SegmentMembershipSerializer(serializers.ModelSerializer[SegmentMembership]): + """One materialised count of identities matching a canonical + segment within an environment, refreshed daily by + `segment_membership.tasks.refresh_project_segment_counts`.""" + + class Meta: + model = SegmentMembership + fields = ["environment", "count", "last_synced_at"] + read_only_fields = ["environment", "count", "last_synced_at"] + + class ConditionSerializer(serializers.ModelSerializer[Condition]): delete = serializers.BooleanField( write_only=True, @@ -82,6 +94,7 @@ class Meta: class SegmentSerializer(MetadataSerializerMixin, WritableNestedModelSerializer): rules = SegmentRuleSerializer(many=True, required=True, allow_empty=False) metadata = MetadataSerializer(required=False, many=True) + memberships = SegmentMembershipSerializer(many=True, read_only=True) def __init__(self, *args: Any, **kwargs: Any) -> None: """ @@ -112,7 +125,9 @@ class Meta: "version_of", "rules", "metadata", + "memberships", ] + read_only_fields = ["memberships"] def validate(self, attrs: dict[str, Any]) -> dict[str, Any]: attrs = super().validate(attrs) diff --git a/api/segments/views.py b/api/segments/views.py index ca8f421eb03b..e80fb360b4ba 100644 --- a/api/segments/views.py +++ b/api/segments/views.py @@ -108,6 +108,7 @@ def get_queryset(self): # type: ignore[no-untyped-def] # TODO: at the moment, the UI only shows the name and description of the segment in the list view. # we shouldn't return all of the rules and conditions in the list view. queryset = queryset.prefetch_related( + "memberships", "rules", "rules__conditions", "rules__rules", diff --git a/api/tests/integration/segments/__init__.py b/api/tests/integration/segments/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/api/tests/integration/segments/test_segment_membership_field.py b/api/tests/integration/segments/test_segment_membership_field.py new file mode 100644 index 000000000000..ad2bf9b32603 --- /dev/null +++ b/api/tests/integration/segments/test_segment_membership_field.py @@ -0,0 +1,65 @@ +from datetime import datetime, timezone +from typing import Any + +from django.urls import reverse +from rest_framework import status +from rest_framework.test import APIClient + +from segment_membership.models import SegmentMembership + + +def test_get_segment__no_memberships__returns_empty_list( + admin_client: APIClient, + project: int, + segment: int, +) -> None: + # Given a segment with no materialised SegmentMembership rows + # (the daily refresh has not yet run for this org, or the FoF flag + # is off so the refresh task skips it) + # When the segment is fetched + response = admin_client.get( + reverse( + "api-v1:projects:project-segments-detail", + args=[project, segment], + ) + ) + + # Then the memberships field is present and empty + assert response.status_code == status.HTTP_200_OK + body: dict[str, Any] = response.json() + assert body["memberships"] == [] + + +def test_get_segment__one_membership_per_environment__returns_per_env_counts( + admin_client: APIClient, + project: int, + segment: int, + environment: int, +) -> None: + # Given one SegmentMembership row in this segment's environment + synced_at = datetime(2026, 5, 1, tzinfo=timezone.utc) + SegmentMembership.objects.create( + segment_id=segment, + environment_id=environment, + count=42, + last_synced_at=synced_at, + ) + + # When the segment is fetched + response = admin_client.get( + reverse( + "api-v1:projects:project-segments-detail", + args=[project, segment], + ) + ) + + # Then the memberships field carries one entry keyed by environment id + assert response.status_code == status.HTTP_200_OK + body: dict[str, Any] = response.json() + assert body["memberships"] == [ + { + "environment": environment, + "count": 42, + "last_synced_at": synced_at.isoformat().replace("+00:00", "Z"), + } + ] diff --git a/api/tests/unit/segment_membership/__init__.py b/api/tests/unit/segment_membership/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/api/tests/unit/segment_membership/test_unit_segment_membership_mappers.py b/api/tests/unit/segment_membership/test_unit_segment_membership_mappers.py new file mode 100644 index 000000000000..d0ad2ab8c084 --- /dev/null +++ b/api/tests/unit/segment_membership/test_unit_segment_membership_mappers.py @@ -0,0 +1,129 @@ +from decimal import Decimal + +from flagsmith_schemas.dynamodb import Identity as DynamoIdentity + +from segment_membership.mappers import ( + _coerce_trait_value, + _flatten_traits, + _identity_id, + map_identity_document_to_snowflake_row, +) + + +def test_identity_id__same_uuid__produces_same_id() -> None: + # Given the same identity_uuid + uuid = "abc-123" + + # When the helper runs twice + a = _identity_id(uuid) + b = _identity_id(uuid) + + # Then the result is identical and fits in a non-negative 64-bit int + assert a == b + assert 0 <= a < 2**64 + + +def test_coerce_trait_value__decimal_int__narrows_to_int() -> None: + # Given a Decimal that's a whole number + # When coerced + # Then it becomes a plain int + assert _coerce_trait_value(Decimal("3")) == 3 + assert isinstance(_coerce_trait_value(Decimal("3")), int) + + +def test_coerce_trait_value__decimal_fraction__narrows_to_float() -> None: + # Given a Decimal with a fractional component + # When coerced + # Then it becomes a float + assert _coerce_trait_value(Decimal("1.5")) == 1.5 + assert isinstance(_coerce_trait_value(Decimal("1.5")), float) + + +def test_coerce_trait_value__non_decimal__passes_through_unchanged() -> None: + # Given a value that isn't a Decimal + # When coerced + # Then it passes through unchanged + assert _coerce_trait_value("growth") == "growth" + assert _coerce_trait_value(True) is True + + +def test_flatten_traits__none__returns_empty_dict() -> None: + # Given no traits + # When flattened + # Then the result is an empty dict + assert _flatten_traits(None) == {} + + +def test_flatten_traits__list__returns_dict_dropping_empty_keys() -> None: + # Given a Dynamo trait list with one well-formed and one empty-key entry + # When flattened + # Then only the well-formed entry survives + assert _flatten_traits( + [ + {"trait_key": "plan", "trait_value": "growth"}, + {"trait_key": "", "trait_value": "skipped"}, + ] + ) == {"plan": "growth"} + + +def test_map_identity_document_to_snowflake_row__with_traits__returns_tuple() -> None: + # Given a Dynamo identity document with traits + doc: DynamoIdentity = { + "identity_uuid": "uuid-1", + "identifier": "alice", + "environment_api_key": "env-key", + "composite_key": "env_x_alice", + "created_date": "2026-05-08T00:00:00Z", + "identity_traits": [{"trait_key": "plan", "trait_value": "growth"}], + } + + # When mapped + env_id, _id, identifier, identity_key, traits = ( + map_identity_document_to_snowflake_row("env-key", doc) + ) + + # Then the columns line up positionally with the IDENTITIES schema + assert env_id == "env-key" + assert _id == _identity_id("uuid-1") + assert identifier == "alice" + assert identity_key == "env_x_alice" + assert traits == {"plan": "growth"} + + +def test_map_identity_document_to_snowflake_row__no_traits__returns_none_for_traits() -> ( + None +): + # Given a Dynamo identity document with no trait entries + doc: DynamoIdentity = { + "identity_uuid": "uuid-1", + "identifier": "alice", + "environment_api_key": "env-key", + "composite_key": "env_x_alice", + "created_date": "2026-05-08T00:00:00Z", + "identity_traits": [], + } + + # When mapped + *_, traits = map_identity_document_to_snowflake_row("env-key", doc) + + # Then the traits VARIANT slot is None (NULL) + assert traits is None + + +def test_map_identity_document_to_snowflake_row__no_composite_key__falls_back_to_uuid() -> ( + None +): + # Given an identity document missing the composite_key + doc: DynamoIdentity = { # type: ignore[typeddict-item] + "identity_uuid": "uuid-1", + "identifier": "alice", + "environment_api_key": "env-key", + "created_date": "2026-05-08T00:00:00Z", + "identity_traits": [], + } + + # When mapped + *_, identity_key, _traits = map_identity_document_to_snowflake_row("env-key", doc) + + # Then identity_key falls back to identity_uuid + assert identity_key == "uuid-1" diff --git a/api/tests/unit/segment_membership/test_unit_segment_membership_migration.py b/api/tests/unit/segment_membership/test_unit_segment_membership_migration.py new file mode 100644 index 000000000000..7a5defcd36af --- /dev/null +++ b/api/tests/unit/segment_membership/test_unit_segment_membership_migration.py @@ -0,0 +1,47 @@ +import importlib +from unittest.mock import MagicMock + +from pytest_mock import MockerFixture + +migration_module = importlib.import_module( + "segment_membership.migrations.0002_setup_snowflake_identities_schema" +) + + +def test_setup_snowflake_identities_schema__unconfigured__skips( + mocker: MockerFixture, +) -> None: + # Given Snowflake settings unconfigured + mocker.patch.object( + migration_module, + "is_snowflake_configured", + return_value=False, + ) + open_sess = mocker.patch.object(migration_module, "open_snowflake_session") + + # When the migration's RunPython entry runs + migration_module.setup_snowflake_identities_schema(MagicMock(), MagicMock()) + + # Then it short-circuits without opening a session + open_sess.assert_not_called() + + +def test_setup_snowflake_identities_schema__configured__runs_dialect_ddl( + mocker: MockerFixture, +) -> None: + # Given Snowflake configured and a mocked Snowpark session + mocker.patch.object( + migration_module, + "is_snowflake_configured", + return_value=True, + ) + sess = MagicMock() + open_sess = mocker.patch.object(migration_module, "open_snowflake_session") + open_sess.return_value.__enter__.return_value = sess + + # When the migration's RunPython entry runs + migration_module.setup_snowflake_identities_schema(MagicMock(), MagicMock()) + + # Then the dialect's schema DDL was executed against the session + sess.sql.assert_called_once() + sess.sql.return_value.collect.assert_called_once_with() diff --git a/api/tests/unit/segment_membership/test_unit_segment_membership_services.py b/api/tests/unit/segment_membership/test_unit_segment_membership_services.py new file mode 100644 index 000000000000..5612ad969827 --- /dev/null +++ b/api/tests/unit/segment_membership/test_unit_segment_membership_services.py @@ -0,0 +1,245 @@ +from unittest.mock import MagicMock + +from pytest_django.fixtures import SettingsWrapper +from pytest_mock import MockerFixture + +from environments.models import Environment +from organisations.models import Organisation +from projects.models import Project +from segment_membership import services +from segment_membership.services import ( + compute_segment_counts_for_project, + get_projects_to_process, + is_membership_enabled, + is_snowflake_configured, + open_snowflake_session, +) +from segments.models import Segment, SegmentRule +from tests.types import EnableFeaturesFixture + + +def test_is_membership_enabled__flag_off__returns_false( + organisation: Organisation, +) -> None: + # Given the FoF flag is not enabled (default state of the test + # OpenFeature provider) + # When the helper resolves the flag for the organisation + # Then it returns False + assert is_membership_enabled(organisation) is False + + +def test_is_membership_enabled__flag_on__returns_true( + organisation: Organisation, + enable_features: EnableFeaturesFixture, +) -> None: + # Given the FoF flag is enabled + enable_features("segment_membership_inspection") + + # When the helper resolves the flag + # Then it returns True + assert is_membership_enabled(organisation) is True + + +def test_is_snowflake_configured__all_set__returns_true( + settings: SettingsWrapper, +) -> None: + # Given every required SNOWFLAKE_* setting is populated + settings.SNOWFLAKE_ACCOUNT = "acc" + settings.SNOWFLAKE_USER = "u" + settings.SNOWFLAKE_PRIVATE_KEY_PATH = "/key" + settings.SNOWFLAKE_DATABASE = "db" + settings.SNOWFLAKE_SCHEMA = "sch" + settings.SNOWFLAKE_WAREHOUSE = "wh" + + # When checked + # Then the helper reports the feature configured + assert is_snowflake_configured() is True + + +def test_is_snowflake_configured__missing_account__returns_false( + settings: SettingsWrapper, +) -> None: + # Given one required setting is unset + settings.SNOWFLAKE_ACCOUNT = None + settings.SNOWFLAKE_USER = "u" + settings.SNOWFLAKE_PRIVATE_KEY_PATH = "/key" + settings.SNOWFLAKE_DATABASE = "db" + settings.SNOWFLAKE_SCHEMA = "sch" + settings.SNOWFLAKE_WAREHOUSE = "wh" + + # When checked + # Then the helper reports the feature unconfigured + assert is_snowflake_configured() is False + + +def test_open_snowflake_session__configured__yields_session_and_closes( + mocker: MockerFixture, + settings: SettingsWrapper, +) -> None: + # Given populated SNOWFLAKE_* settings and a mocked Snowpark builder + settings.SNOWFLAKE_ACCOUNT = "acc" + settings.SNOWFLAKE_USER = "u" + settings.SNOWFLAKE_ROLE = "ACCOUNTADMIN" + settings.SNOWFLAKE_WAREHOUSE = "wh" + settings.SNOWFLAKE_DATABASE = "db" + settings.SNOWFLAKE_SCHEMA = "sch" + settings.SNOWFLAKE_PRIVATE_KEY_PATH = "/key" + + fake_session = MagicMock() + builder = MagicMock() + builder.configs.return_value.create.return_value = fake_session + mocker.patch.object(services, "Session", MagicMock(builder=builder)) + + # When the context manager is entered and exited + with open_snowflake_session() as sess: + # Then it yields the underlying Snowpark session... + assert sess is fake_session + + # ...and closes it on exit + fake_session.close.assert_called_once_with() + + +def test_get_projects_to_process__no_canonical_segments__yields_nothing( + project: Project, +) -> None: + # Given a project with no canonical segments + # When iterating projects to process + # Then nothing is yielded + assert list(get_projects_to_process()) == [] + + +def test_get_projects_to_process__ff_disabled__skips_organisation( + project: Project, + segment: Segment, +) -> None: + # Given a project with a canonical segment but FoF flag off + # When iterating projects to process + # Then the project is skipped + assert list(get_projects_to_process()) == [] + + +def test_get_projects_to_process__ff_enabled__yields_project( + project: Project, + segment: Segment, + enable_features: EnableFeaturesFixture, +) -> None: + # Given a project with a canonical segment and the FoF flag on + enable_features("segment_membership_inspection") + + # When iterating projects to process + # Then the project is yielded + assert list(get_projects_to_process()) == [project] + + +def test_compute_segment_counts_for_project__no_segments__returns_empty( + project: Project, +) -> None: + # Given a project with no canonical segments + sess = MagicMock() + + # When counts are computed + result = compute_segment_counts_for_project(project, sess) + + # Then the result is empty and Snowflake was not queried + assert result == [] + sess.sql.assert_not_called() + + +def test_compute_segment_counts_for_project__no_environments__returns_empty( + project: Project, + segment: Segment, +) -> None: + # Given a project with a segment but no environments + project.environments.all().delete() + sess = MagicMock() + + # When counts are computed + result = compute_segment_counts_for_project(project, sess) + + # Then the result is empty and Snowflake was not queried + assert result == [] + sess.sql.assert_not_called() + + +def test_compute_segment_counts_for_project__one_segment__returns_membership_instances( + project: Project, + environment: Environment, + segment: Segment, + segment_rule: SegmentRule, + mocker: MockerFixture, +) -> None: + # Given a project with one segment, one environment, and a stubbed + # SQL translator that emits a trivial predicate + mocker.patch( + "segment_membership.services.translate_segment", + return_value="TRUE", + ) + sess = MagicMock() + sess.sql.return_value.collect.return_value = [ + {"SEGMENT_ID": segment.id, "ENV_KEY": environment.api_key, "C": 7} + ] + + # When counts are computed + result = compute_segment_counts_for_project(project, sess) + + # Then Snowflake was queried once, the predicate landed in the SQL, + # and the row decodes into an unsaved SegmentMembership keyed by + # (segment, environment) — last_synced_at left for the caller + assert len(result) == 1 + [membership] = result + assert membership.segment_id == segment.id + assert membership.environment_id == environment.id + assert membership.count == 7 + assert membership.last_synced_at is None + sess.sql.assert_called_once() + sql = sess.sql.call_args.args[0] + assert f"SELECT {segment.id} AS segment_id" in sql + assert "GROUP BY i.environment_id" in sql + + +def test_compute_segment_counts_for_project__unknown_env_key_in_row__skips( + project: Project, + environment: Environment, + segment: Segment, + segment_rule: SegmentRule, + mocker: MockerFixture, +) -> None: + # Given a Snowflake row whose env_key isn't in this project — would + # only happen via stale/cross-project data, but we defend against it + mocker.patch( + "segment_membership.services.translate_segment", + return_value="TRUE", + ) + sess = MagicMock() + sess.sql.return_value.collect.return_value = [ + {"SEGMENT_ID": segment.id, "ENV_KEY": "ghost-env", "C": 99} + ] + + # When counts are computed + result = compute_segment_counts_for_project(project, sess) + + # Then the unknown-env row is skipped, no spurious membership emitted + assert result == [] + + +def test_compute_segment_counts_for_project__untranslatable_segment__skips( + project: Project, + environment: Environment, + segment: Segment, + segment_rule: SegmentRule, + mocker: MockerFixture, +) -> None: + # Given a project with a segment whose predicate the translator can't compile + mocker.patch( + "segment_membership.services.translate_segment", + return_value=None, + ) + sess = MagicMock() + + # When counts are computed + result = compute_segment_counts_for_project(project, sess) + + # Then the segment is skipped entirely (no row, not even count = 0) + # and Snowflake is not queried at all + assert result == [] + sess.sql.assert_not_called() diff --git a/api/tests/unit/segment_membership/test_unit_segment_membership_tasks.py b/api/tests/unit/segment_membership/test_unit_segment_membership_tasks.py new file mode 100644 index 000000000000..435a93be1f69 --- /dev/null +++ b/api/tests/unit/segment_membership/test_unit_segment_membership_tasks.py @@ -0,0 +1,290 @@ +from unittest.mock import MagicMock + +from pytest_mock import MockerFixture +from pytest_structlog import StructuredLogCapture + +from environments.models import Environment +from projects.models import Project +from segment_membership import tasks +from segment_membership.models import SegmentMembership +from segment_membership.tasks import ( + backfill_identities_to_snowflake, + refresh_project_segment_counts, +) +from segments.models import Segment +from tests.types import EnableFeaturesFixture + + +def test_backfill_identities_to_snowflake__no_snowflake_creds__skips( + mocker: MockerFixture, + log: StructuredLogCapture, +) -> None: + # Given Snowflake settings unconfigured + mocker.patch.object(tasks, "is_snowflake_configured", return_value=False) + spy = mocker.patch.object(tasks, "open_snowflake_session") + + # When the task runs + backfill_identities_to_snowflake() + + # Then it short-circuits without opening a session + spy.assert_not_called() + assert any(e["event"] == "backfill.skipped" for e in log.events) + + +def test_backfill_identities_to_snowflake__dynamo_disabled__skips( + mocker: MockerFixture, +) -> None: + # Given Snowflake configured but Dynamo wrapper disabled + mocker.patch.object(tasks, "is_snowflake_configured", return_value=True) + spy = mocker.patch.object(tasks, "open_snowflake_session") + mocker.patch.object( + tasks, + "DynamoIdentityWrapper", + return_value=MagicMock(is_enabled=False), + ) + + # When the task runs + backfill_identities_to_snowflake() + + # Then it skips without opening a session + spy.assert_not_called() + + +def test_backfill_identities_to_snowflake__happy_path__deletes_then_inserts( + mocker: MockerFixture, + project: Project, + environment: Environment, + segment: Segment, + enable_features: EnableFeaturesFixture, + log: StructuredLogCapture, +) -> None: + # Given a project with a canonical segment and a Dynamo wrapper + # yielding two identities for its environment + enable_features("segment_membership_inspection") + mocker.patch.object(tasks, "is_snowflake_configured", return_value=True) + sess = MagicMock() + mocker.patch.object( + tasks, "open_snowflake_session" + ).return_value.__enter__.return_value = sess + refresh_dispatch = mocker.patch.object(tasks, "refresh_project_segment_counts") + wrapper = MagicMock(is_enabled=True) + wrapper.iter_all_items_paginated.return_value = iter( + [ + { + "identity_uuid": "u-1", + "identifier": "a", + "composite_key": "k1", + "environment_api_key": environment.api_key, + "created_date": "2026-05-08T00:00:00Z", + "identity_traits": [], + }, + { + "identity_uuid": "u-2", + "identifier": "b", + "composite_key": "k2", + "environment_api_key": environment.api_key, + "created_date": "2026-05-08T00:00:00Z", + "identity_traits": [], + }, + ] + ) + mocker.patch.object(tasks, "DynamoIdentityWrapper", return_value=wrapper) + + # When the task runs + backfill_identities_to_snowflake() + + # Then DELETE binds the env api key as a parameter and the identities + # are written via the Snowpark DataFrame writer + delete_calls = [ + call + for call in sess.sql.call_args_list + if call.args and call.args[0].startswith("DELETE FROM IDENTITIES") + ] + assert len(delete_calls) == 1 + assert delete_calls[0].kwargs == {"params": [environment.api_key]} + + sess.create_dataframe.assert_called_once() + rows_arg = sess.create_dataframe.call_args.args[0] + assert {row[0] for row in rows_arg} == {environment.api_key} + assert {row[2] for row in rows_arg} == {"a", "b"} + sess.create_dataframe.return_value.write.mode.assert_called_once_with("append") + sess.create_dataframe.return_value.write.mode.return_value.save_as_table.assert_called_once_with( + "IDENTITIES" + ) + assert any( + e["event"] == "backfill.environment.completed" and e["rows__count"] == 2 + for e in log.events + ) + # And a per-project count refresh is dispatched once the backfill + # finishes. + refresh_dispatch.delay.assert_called_once_with(args=(project.id,)) + + +def test_backfill_identities_to_snowflake__insert_fails__logs_and_continues( + mocker: MockerFixture, + project: Project, + environment: Environment, + segment: Segment, + enable_features: EnableFeaturesFixture, + log: StructuredLogCapture, +) -> None: + # Given the DataFrame write blows up mid-batch + enable_features("segment_membership_inspection") + mocker.patch.object(tasks, "is_snowflake_configured", return_value=True) + sess = MagicMock() + sess.create_dataframe.side_effect = RuntimeError("boom") + mocker.patch.object( + tasks, "open_snowflake_session" + ).return_value.__enter__.return_value = sess + wrapper = MagicMock(is_enabled=True) + wrapper.iter_all_items_paginated.return_value = iter( + [ + { + "identity_uuid": "u-1", + "identifier": "a", + "composite_key": "k1", + "environment_api_key": environment.api_key, + "created_date": "2026-05-08T00:00:00Z", + "identity_traits": [], + } + ] + ) + mocker.patch.object(tasks, "DynamoIdentityWrapper", return_value=wrapper) + + # When the task runs + backfill_identities_to_snowflake() + + # Then the failure is logged and the loop continues + assert any(e["event"] == "backfill.environment.failed" for e in log.events) + + +def test_backfill_identities_to_snowflake__multiple_projects__fans_out_refresh_per_project( + mocker: MockerFixture, + project: Project, + project_b: Project, + segment: Segment, + enable_features: EnableFeaturesFixture, +) -> None: + # Given two FoF-enabled projects with canonical segments + enable_features("segment_membership_inspection") + Segment.objects.create(name="seg-b", project=project_b) + mocker.patch.object(tasks, "is_snowflake_configured", return_value=True) + sess = MagicMock() + mocker.patch.object( + tasks, "open_snowflake_session" + ).return_value.__enter__.return_value = sess + refresh_dispatch = mocker.patch.object(tasks, "refresh_project_segment_counts") + wrapper = MagicMock(is_enabled=True) + wrapper.iter_all_items_paginated.return_value = iter([]) + mocker.patch.object(tasks, "DynamoIdentityWrapper", return_value=wrapper) + + # When the backfill runs + backfill_identities_to_snowflake() + + # Then a per-project refresh is dispatched for each project we + # actually processed (deduped) — once per project, not once per env + dispatched_ids = { + call.kwargs["args"][0] for call in refresh_dispatch.delay.call_args_list + } + assert dispatched_ids == {project.id, project_b.id} + + +def test_refresh_project_segment_counts__no_snowflake_creds__skips( + mocker: MockerFixture, + project: Project, + log: StructuredLogCapture, +) -> None: + # Given Snowflake unconfigured + mocker.patch.object(tasks, "is_snowflake_configured", return_value=False) + spy = mocker.patch.object(tasks, "open_snowflake_session") + + # When the per-project task runs + refresh_project_segment_counts(project.id) + + # Then it short-circuits without opening a session + spy.assert_not_called() + assert any( + e["event"] == "refresh.project.skipped" + and e["reason"] == "snowflake_not_configured" + for e in log.events + ) + + +def test_refresh_project_segment_counts__ff_disabled__skips( + mocker: MockerFixture, + project: Project, + log: StructuredLogCapture, +) -> None: + # Given Snowflake configured but FoF flag off (default) + mocker.patch.object(tasks, "is_snowflake_configured", return_value=True) + spy = mocker.patch.object(tasks, "open_snowflake_session") + + # When the per-project task runs + refresh_project_segment_counts(project.id) + + # Then it skips without opening a session + spy.assert_not_called() + assert any( + e["event"] == "refresh.project.skipped" and e["reason"] == "ff_disabled" + for e in log.events + ) + + +def test_refresh_project_segment_counts__compute_fails__logs( + mocker: MockerFixture, + project: Project, + segment: Segment, + enable_features: EnableFeaturesFixture, + log: StructuredLogCapture, +) -> None: + # Given a project where count compute throws + enable_features("segment_membership_inspection") + mocker.patch.object(tasks, "is_snowflake_configured", return_value=True) + sess = MagicMock() + mocker.patch.object( + tasks, "open_snowflake_session" + ).return_value.__enter__.return_value = sess + mocker.patch.object( + tasks, "compute_segment_counts_for_project", side_effect=RuntimeError("boom") + ) + + # When the per-project task runs + refresh_project_segment_counts(project.id) + + # Then the failure is logged + assert any(e["event"] == "refresh.project.failed" for e in log.events) + + +def test_refresh_project_segment_counts__counts_returned__upserts_per_env_rows( + mocker: MockerFixture, + project: Project, + environment: Environment, + segment: Segment, + enable_features: EnableFeaturesFixture, +) -> None: + # Given a project with a canonical segment and stubbed compute + enable_features("segment_membership_inspection") + mocker.patch.object(tasks, "is_snowflake_configured", return_value=True) + sess = MagicMock() + mocker.patch.object( + tasks, "open_snowflake_session" + ).return_value.__enter__.return_value = sess + mocker.patch.object( + tasks, + "compute_segment_counts_for_project", + return_value=[ + SegmentMembership( + segment_id=segment.id, + environment_id=environment.id, + count=42, + ), + ], + ) + + # When the per-project task runs + refresh_project_segment_counts(project.id) + + # Then a SegmentMembership row exists keyed by (segment, environment) + membership = SegmentMembership.objects.get(segment=segment, environment=environment) + assert membership.count == 42 + assert membership.last_synced_at is not None diff --git a/api/tests/unit/util/test_util.py b/api/tests/unit/util/test_util.py index 4f35ca8c098a..4aa75833918c 100644 --- a/api/tests/unit/util/test_util.py +++ b/api/tests/unit/util/test_util.py @@ -1,6 +1,6 @@ import pytest -from util.util import iter_chunked_concat, iter_paired_chunks +from util.util import batched, iter_chunked_concat, iter_paired_chunks def test_iter_paired_chunks__both_empty__returns_empty_list() -> None: @@ -121,3 +121,24 @@ def test_iter_chunked_concat__various_inputs__returns_expected_chunks( # Then assert list(result) == expected_result + + +def test_batched__empty_iterable__yields_nothing() -> None: + # Given an empty iterable + # When batched + # Then no batches are yielded + assert list(batched([], 3)) == [] + + +def test_batched__exact_multiple__yields_full_batches() -> None: + # Given an iterable whose length is a multiple of the batch size + # When batched + # Then every batch is full + assert list(batched(range(6), 2)) == [[0, 1], [2, 3], [4, 5]] + + +def test_batched__remainder__yields_smaller_final_batch() -> None: + # Given an iterable whose length isn't a multiple of the batch size + # When batched + # Then the final batch carries the remainder + assert list(batched([1, 2, 3, 4, 5], 2)) == [[1, 2], [3, 4], [5]] diff --git a/api/util/util.py b/api/util/util.py index 37cded499214..157fb11e3170 100644 --- a/api/util/util.py +++ b/api/util/util.py @@ -93,3 +93,13 @@ def truncate( separated by a delimiter. """ return delimiter.join([value[:ends_len], value[-ends_len:]]) + + +def batched(iterable: Iterable[T], size: int) -> Generator[list[T], None, None]: + """Yield consecutive batches of `size` items from `iterable`. The + final batch may be smaller. + + Backport from Python 3.12.""" + iterator = iter(iterable) + while batch := list(islice(iterator, size)): + yield batch diff --git a/docs/docs/deployment-self-hosting/observability/_events-catalogue.md b/docs/docs/deployment-self-hosting/observability/_events-catalogue.md index b7e979e6b046..ec998967329a 100644 --- a/docs/docs/deployment-self-hosting/observability/_events-catalogue.md +++ b/docs/docs/deployment-self-hosting/observability/_events-catalogue.md @@ -327,10 +327,75 @@ Logged at `warning` from: Attributes: +### `segment_membership.backfill.environment.completed` + +Logged at `info` from: + - `api/segment_membership/tasks.py:119` + +Attributes: + - `environment.id` + - `project.id` + - `rows.count` + +### `segment_membership.backfill.environment.failed` + +Logged at `exception` from: + - `api/segment_membership/tasks.py:113` + +Attributes: + - `environment.id` + - `project.id` + +### `segment_membership.backfill.skipped` + +Logged at `info` from: + - `api/segment_membership/tasks.py:78` + - `api/segment_membership/tasks.py:83` + +Attributes: + - `reason` + +### `segment_membership.compute.segment.skipped` + +Logged at `error` from: + - `api/segment_membership/services.py:129` + +Attributes: + - `project.id` + - `reason` + - `segment.id` + +### `segment_membership.refresh.project.completed` + +Logged at `info` from: + - `api/segment_membership/tasks.py:173` + +Attributes: + - `memberships.count` + - `project.id` + +### `segment_membership.refresh.project.failed` + +Logged at `exception` from: + - `api/segment_membership/tasks.py:161` + +Attributes: + - `project.id` + +### `segment_membership.refresh.project.skipped` + +Logged at `info` from: + - `api/segment_membership/tasks.py:141` + - `api/segment_membership/tasks.py:150` + +Attributes: + - `project.id` + - `reason` + ### `segments.serializers.segment_revision_created` Logged at `info` from: - - `api/segments/serializers.py:142` + - `api/segments/serializers.py:157` Attributes: - `revision_id` From 48afbd0001dd3b9ba4491259cc8e1daa7bf8e57c Mon Sep 17 00:00:00 2001 From: Kim Gustyr Date: Sat, 9 May 2026 00:29:44 +0100 Subject: [PATCH 02/12] fix(segment_membership): Bump segment-list query counts for memberships prefetch The new `prefetch_related("memberships")` adds one IN-clause query per list response, even when no rows exist. Update the regression expectations so the existing test suite reflects the new baseline. beep boop --- api/tests/unit/segments/test_unit_segments_views.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/api/tests/unit/segments/test_unit_segments_views.py b/api/tests/unit/segments/test_unit_segments_views.py index 80cb26e3679a..c015d2e348f1 100644 --- a/api/tests/unit/segments/test_unit_segments_views.py +++ b/api/tests/unit/segments/test_unit_segments_views.py @@ -594,8 +594,8 @@ def test_get_segment_by_uuid__existing_segment__returns_segment_data( # type: i @pytest.mark.parametrize( "client, num_queries", [ - (lazy_fixture("admin_master_api_key_client"), 12), - (lazy_fixture("admin_client"), 14), + (lazy_fixture("admin_master_api_key_client"), 13), + (lazy_fixture("admin_client"), 15), ], ) def test_list_segments__without_rbac__expected_num_queries( @@ -651,8 +651,8 @@ def test_list_segments__system_segment_exists__excludes_system_segment( @pytest.mark.parametrize( "client, num_queries", [ - (lazy_fixture("admin_master_api_key_client"), 12), - (lazy_fixture("admin_client"), 15), + (lazy_fixture("admin_master_api_key_client"), 13), + (lazy_fixture("admin_client"), 16), ], ) def test_list_segments__with_rbac__expected_num_queries( From 1145359f6b0c486cd27fa656a791a48d2c9f6ece Mon Sep 17 00:00:00 2001 From: Kim Gustyr Date: Sat, 9 May 2026 01:04:48 +0100 Subject: [PATCH 03/12] ci(segment_membership): Pin flagsmith-sql-flag-engine to CodeArtifact pre-release MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Switches the api dep from a private-repo git URL — which the Docker build can't clone in CI — to a versioned pin against Flagsmith's staging CodeArtifact PyPI (`flagsmith-pypi-staging`, account 302456015006, eu-west-2). Initial published release: 0.1.0a1. The reusable docker-build workflow now unconditionally assumes the OIDC role `arn:aws:iam::302456015006:role/codeartifact-github-actions-staging` (trust policy allows any `repo:Flagsmith/*`), fetches an authorisation token, and exposes it to every build as the `codeartifact_token` BuildKit secret. Builds that don't mount the secret simply ignore it; the OIDC + token cost is a couple of seconds per build. `Dockerfile`'s four `make install*` lines mount the `codeartifact_token` secret and export `POETRY_HTTP_BASIC_FLAGSMITH_PYPI_STAGING_*` so poetry resolves the dep from CodeArtifact. The header documents the `--secret="id=codeartifact_token,env=..."` incantation for local builds. beep boop --- .github/workflows/.reusable-docker-build.yml | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/.github/workflows/.reusable-docker-build.yml b/.github/workflows/.reusable-docker-build.yml index 049292e1bb76..254d91409698 100644 --- a/.github/workflows/.reusable-docker-build.yml +++ b/.github/workflows/.reusable-docker-build.yml @@ -97,6 +97,22 @@ jobs: username: ${{ github.actor }} password: ${{ secrets.GITHUB_TOKEN }} + - name: Configure AWS credentials for CodeArtifact + uses: aws-actions/configure-aws-credentials@v4 + with: + role-to-assume: arn:aws:iam::302456015006:role/codeartifact-github-actions-staging + aws-region: eu-west-2 + + - name: Fetch CodeArtifact authorisation token + id: codeartifact + run: | + token=$(aws codeartifact get-authorization-token \ + --domain flagsmith-staging \ + --domain-owner 302456015006 \ + --query authorizationToken --output text) + echo "::add-mask::$token" + echo "token=$token" >> "$GITHUB_OUTPUT" + - name: Extract Docker metadata id: meta uses: docker/metadata-action@v5 @@ -113,7 +129,9 @@ jobs: save: ${{ inputs.ephemeral }} push: ${{ !inputs.ephemeral }} platforms: linux/amd64,linux/arm64 - secrets: ${{ secrets.secrets }} + secrets: | + ${{ secrets.secrets }} + codeartifact_token=${{ steps.codeartifact.outputs.token }} target: ${{ inputs.target }} build-args: | CI_COMMIT_SHA=${{ github.sha }} From 4775bf661dc733bcf0ecff4686af8653272a415b Mon Sep 17 00:00:00 2001 From: Kim Gustyr Date: Sat, 9 May 2026 01:15:56 +0100 Subject: [PATCH 04/12] ci(segment_membership): Authenticate Poetry installs against CodeArtifact The unit-test, MCP-schema-push, makefile-target, and update-flagsmith workflows all run `make install-packages`, which now needs CodeArtifact credentials to resolve the `flagsmith-sql-flag-engine` pre-release. Encapsulate the OIDC role assumption + token fetch in a composite action, reuse it from the Docker build workflow, and wire it into every workflow that runs poetry install. beep boop --- .github/actions/codeartifact-login/action.yml | 29 +++++++++++++++++++ .github/workflows/.reusable-docker-build.yml | 16 ++-------- .../workflows/api-deploy-production-ecs.yml | 6 ++++ .github/workflows/api-pull-request.yml | 5 +++- .github/workflows/api-run-makefile-target.yml | 4 +++ .../api-tests-with-private-packages.yml | 5 +++- .../update-flagsmith-environment.yml | 7 +++++ 7 files changed, 56 insertions(+), 16 deletions(-) create mode 100644 .github/actions/codeartifact-login/action.yml diff --git a/.github/actions/codeartifact-login/action.yml b/.github/actions/codeartifact-login/action.yml new file mode 100644 index 000000000000..384b55221a1a --- /dev/null +++ b/.github/actions/codeartifact-login/action.yml @@ -0,0 +1,29 @@ +name: CodeArtifact login +description: Assume the staging CodeArtifact role via OIDC, fetch an authorisation token, and export Poetry HTTP basic auth env vars + +outputs: + token: + description: CodeArtifact authorisation token + value: ${{ steps.codeartifact.outputs.token }} + +runs: + using: composite + steps: + - name: Configure AWS credentials for CodeArtifact + uses: aws-actions/configure-aws-credentials@v4 + with: + role-to-assume: arn:aws:iam::302456015006:role/codeartifact-github-actions-staging + aws-region: eu-west-2 + + - name: Fetch CodeArtifact authorisation token + id: codeartifact + shell: bash + run: | + token=$(aws codeartifact get-authorization-token \ + --domain flagsmith-staging \ + --domain-owner 302456015006 \ + --query authorizationToken --output text) + echo "::add-mask::$token" + echo "token=$token" >> "$GITHUB_OUTPUT" + echo "POETRY_HTTP_BASIC_FLAGSMITH_PYPI_STAGING_USERNAME=aws" >> "$GITHUB_ENV" + echo "POETRY_HTTP_BASIC_FLAGSMITH_PYPI_STAGING_PASSWORD=$token" >> "$GITHUB_ENV" diff --git a/.github/workflows/.reusable-docker-build.yml b/.github/workflows/.reusable-docker-build.yml index 254d91409698..5bae713b9be7 100644 --- a/.github/workflows/.reusable-docker-build.yml +++ b/.github/workflows/.reusable-docker-build.yml @@ -97,21 +97,9 @@ jobs: username: ${{ github.actor }} password: ${{ secrets.GITHUB_TOKEN }} - - name: Configure AWS credentials for CodeArtifact - uses: aws-actions/configure-aws-credentials@v4 - with: - role-to-assume: arn:aws:iam::302456015006:role/codeartifact-github-actions-staging - aws-region: eu-west-2 - - - name: Fetch CodeArtifact authorisation token + - name: Authenticate with CodeArtifact id: codeartifact - run: | - token=$(aws codeartifact get-authorization-token \ - --domain flagsmith-staging \ - --domain-owner 302456015006 \ - --query authorizationToken --output text) - echo "::add-mask::$token" - echo "token=$token" >> "$GITHUB_OUTPUT" + uses: ./.github/actions/codeartifact-login - name: Extract Docker metadata id: meta diff --git a/.github/workflows/api-deploy-production-ecs.yml b/.github/workflows/api-deploy-production-ecs.yml index 663ea0f666cc..d8f2883cb80d 100644 --- a/.github/workflows/api-deploy-production-ecs.yml +++ b/.github/workflows/api-deploy-production-ecs.yml @@ -24,6 +24,9 @@ jobs: needs: deploy-ecs name: Push MCP Schema to Gram runs-on: depot-ubuntu-latest + permissions: + contents: read + id-token: write # For CodeArtifact OIDC defaults: run: working-directory: api @@ -35,6 +38,9 @@ jobs: with: python-version: "3.12" + - name: Authenticate with CodeArtifact + uses: ./.github/actions/codeartifact-login + - name: Install dependencies run: | echo "https://${{ secrets.GH_PRIVATE_ACCESS_TOKEN }}:@github.com" > ${HOME}/.git-credentials diff --git a/.github/workflows/api-pull-request.yml b/.github/workflows/api-pull-request.yml index 2453f5de1513..a5ce9afc1d3d 100644 --- a/.github/workflows/api-pull-request.yml +++ b/.github/workflows/api-pull-request.yml @@ -2,7 +2,7 @@ name: API Pull Request permissions: contents: read # For actions/checkout - id-token: write # For Codecov OIDC + id-token: write # For Codecov and CodeArtifact OIDC on: pull_request: @@ -47,6 +47,9 @@ jobs: with: python-version: ${{ matrix.python-version }} + - name: Authenticate with CodeArtifact + uses: ./.github/actions/codeartifact-login + - name: Install Dependencies run: make install-packages opts='--extra dev' diff --git a/.github/workflows/api-run-makefile-target.yml b/.github/workflows/api-run-makefile-target.yml index 8a876c5ef0c1..f4a7ce051de5 100644 --- a/.github/workflows/api-run-makefile-target.yml +++ b/.github/workflows/api-run-makefile-target.yml @@ -21,6 +21,7 @@ on: permissions: contents: write pull-requests: write + id-token: write # For CodeArtifact OIDC defaults: run: @@ -38,6 +39,9 @@ jobs: with: python-version: 3.13 + - name: Authenticate with CodeArtifact + uses: ./.github/actions/codeartifact-login + - name: Install Dependencies run: make install-packages opts='--extra dev' diff --git a/.github/workflows/api-tests-with-private-packages.yml b/.github/workflows/api-tests-with-private-packages.yml index af2ebb6ad4a0..a809f492acb5 100644 --- a/.github/workflows/api-tests-with-private-packages.yml +++ b/.github/workflows/api-tests-with-private-packages.yml @@ -2,7 +2,7 @@ name: API Pull Request with Private Packages permissions: contents: read # For actions/checkout - id-token: write # For Codecov OIDC + id-token: write # For Codecov and CodeArtifact OIDC on: pull_request: @@ -48,6 +48,9 @@ jobs: - name: Install SAML Dependencies run: sudo apt-get install -y xmlsec1 + - name: Authenticate with CodeArtifact + uses: ./.github/actions/codeartifact-login + - name: Install packages and Tests shell: bash run: | diff --git a/.github/workflows/update-flagsmith-environment.yml b/.github/workflows/update-flagsmith-environment.yml index 87a9c9cf5a0f..1999357b2b0f 100644 --- a/.github/workflows/update-flagsmith-environment.yml +++ b/.github/workflows/update-flagsmith-environment.yml @@ -9,6 +9,10 @@ defaults: run: working-directory: api +permissions: + contents: read + id-token: write # For CodeArtifact OIDC + jobs: update_server_defaults: runs-on: depot-ubuntu-latest @@ -25,6 +29,9 @@ jobs: with: python-version: 3.12 + - name: Authenticate with CodeArtifact + uses: ./.github/actions/codeartifact-login + - name: Install Dependencies run: make install-packages From 0176170ba8c75cc09af28708f0610949d6ff8ca2 Mon Sep 17 00:00:00 2001 From: Kim Gustyr Date: Sat, 9 May 2026 01:31:29 +0100 Subject: [PATCH 05/12] fix(segment_membership): Derive IDENTITIES.id from UUID bytes, not MD5 CodeQL flagged the MD5 truncation as a sensitive-data hashing risk. UUIDv4 already gives us the random bits we need for a dedup key, so take the high 64 bits directly via int.from_bytes and drop the hash. beep boop --- api/segment_membership/mappers.py | 9 +++------ .../test_unit_segment_membership_mappers.py | 16 ++++++++-------- 2 files changed, 11 insertions(+), 14 deletions(-) diff --git a/api/segment_membership/mappers.py b/api/segment_membership/mappers.py index 245f893e7b68..779c475f9173 100644 --- a/api/segment_membership/mappers.py +++ b/api/segment_membership/mappers.py @@ -1,4 +1,4 @@ -import hashlib +import uuid from decimal import Decimal from flagsmith_schemas.dynamodb import Identity as DynamoIdentity @@ -28,11 +28,8 @@ def map_identity_document_to_snowflake_row( def _identity_id(identity_uuid: str) -> int: - """Stable 64-bit IDENTITIES.id derived from `identity_uuid`. Same - uuid always produces the same id, so re-runs of the backfill are - idempotent on the (environment_id, id) primary key.""" - digest = hashlib.md5(identity_uuid.encode("utf-8")).digest() - return int.from_bytes(digest[:8], "big", signed=False) + """Project a UUID onto a stable signed 64-bit IDENTITIES.id.""" + return int.from_bytes(uuid.UUID(identity_uuid).bytes[:8], "big", signed=True) def _coerce_trait_value(value: object) -> object: diff --git a/api/tests/unit/segment_membership/test_unit_segment_membership_mappers.py b/api/tests/unit/segment_membership/test_unit_segment_membership_mappers.py index d0ad2ab8c084..8e823fcc0f76 100644 --- a/api/tests/unit/segment_membership/test_unit_segment_membership_mappers.py +++ b/api/tests/unit/segment_membership/test_unit_segment_membership_mappers.py @@ -12,15 +12,15 @@ def test_identity_id__same_uuid__produces_same_id() -> None: # Given the same identity_uuid - uuid = "abc-123" + uuid = "f47ac10b-58cc-4372-a567-0e02b2c3d479" # When the helper runs twice a = _identity_id(uuid) b = _identity_id(uuid) - # Then the result is identical and fits in a non-negative 64-bit int + # Then the result is identical and fits in a signed 64-bit int assert a == b - assert 0 <= a < 2**64 + assert -(2**63) <= a < 2**63 def test_coerce_trait_value__decimal_int__narrows_to_int() -> None: @@ -69,7 +69,7 @@ def test_flatten_traits__list__returns_dict_dropping_empty_keys() -> None: def test_map_identity_document_to_snowflake_row__with_traits__returns_tuple() -> None: # Given a Dynamo identity document with traits doc: DynamoIdentity = { - "identity_uuid": "uuid-1", + "identity_uuid": "f47ac10b-58cc-4372-a567-0e02b2c3d479", "identifier": "alice", "environment_api_key": "env-key", "composite_key": "env_x_alice", @@ -84,7 +84,7 @@ def test_map_identity_document_to_snowflake_row__with_traits__returns_tuple() -> # Then the columns line up positionally with the IDENTITIES schema assert env_id == "env-key" - assert _id == _identity_id("uuid-1") + assert _id == _identity_id("f47ac10b-58cc-4372-a567-0e02b2c3d479") assert identifier == "alice" assert identity_key == "env_x_alice" assert traits == {"plan": "growth"} @@ -95,7 +95,7 @@ def test_map_identity_document_to_snowflake_row__no_traits__returns_none_for_tra ): # Given a Dynamo identity document with no trait entries doc: DynamoIdentity = { - "identity_uuid": "uuid-1", + "identity_uuid": "f47ac10b-58cc-4372-a567-0e02b2c3d479", "identifier": "alice", "environment_api_key": "env-key", "composite_key": "env_x_alice", @@ -115,7 +115,7 @@ def test_map_identity_document_to_snowflake_row__no_composite_key__falls_back_to ): # Given an identity document missing the composite_key doc: DynamoIdentity = { # type: ignore[typeddict-item] - "identity_uuid": "uuid-1", + "identity_uuid": "f47ac10b-58cc-4372-a567-0e02b2c3d479", "identifier": "alice", "environment_api_key": "env-key", "created_date": "2026-05-08T00:00:00Z", @@ -126,4 +126,4 @@ def test_map_identity_document_to_snowflake_row__no_composite_key__falls_back_to *_, identity_key, _traits = map_identity_document_to_snowflake_row("env-key", doc) # Then identity_key falls back to identity_uuid - assert identity_key == "uuid-1" + assert identity_key == "f47ac10b-58cc-4372-a567-0e02b2c3d479" From a3942460c03becd0d1aac6ee9f1dd430c54a48b0 Mon Sep 17 00:00:00 2001 From: Kim Gustyr Date: Sat, 9 May 2026 04:03:46 +0100 Subject: [PATCH 06/12] feat(segment_membership): Observability for backfill and refresh MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds four global Prometheus metrics covering the daily Dynamo→Snowflake backfill and the per-project count refresh: identities mirrored, per-environment backfill duration, refresh duration, and refresh failures. Metrics are global — env/project labels would blow Prometheus cardinality at SaaS scale. Snowpark sessions now carry a QUERY_TAG for spend attribution, set via Snowpark's `session.query_tag` setter. Backfill tags by org+project per env iteration; refresh tags by org+project. Spend grouped by tag is queryable from Snowflake's QUERY_HISTORY for 365 days. beep boop --- api/segment_membership/metrics.py | 26 ++++++++ api/segment_membership/tasks.py | 60 +++++++++++++------ .../test_unit_segment_membership_tasks.py | 6 +- .../observability/_events-catalogue.md | 16 ++--- .../observability/_metrics-catalogue.md | 32 ++++++++++ 5 files changed, 110 insertions(+), 30 deletions(-) create mode 100644 api/segment_membership/metrics.py diff --git a/api/segment_membership/metrics.py b/api/segment_membership/metrics.py new file mode 100644 index 000000000000..ba5cde4e2c44 --- /dev/null +++ b/api/segment_membership/metrics.py @@ -0,0 +1,26 @@ +import prometheus_client + +# All metrics are global — refresh and backfill cardinality scales with +# project + environment counts, which would blow up Prometheus storage. +# Drill-down lives in Snowflake's query history (tagged via QUERY_TAG) +# and in structlog events that carry per-project/env IDs. + +flagsmith_segment_membership_backfill_identities_total = prometheus_client.Counter( + "flagsmith_segment_membership_backfill_identities_total", + "Total identities mirrored from Dynamo to Snowflake by the segment-membership backfill task across all environments.", +) + +flagsmith_segment_membership_backfill_duration_seconds = prometheus_client.Histogram( + "flagsmith_segment_membership_backfill_duration_seconds", + "Duration of a segment-membership backfill for one environment.", +) + +flagsmith_segment_membership_refresh_duration_seconds = prometheus_client.Histogram( + "flagsmith_segment_membership_refresh_duration_seconds", + "Duration of a single segment-membership count-refresh run for one project.", +) + +flagsmith_segment_membership_refresh_failures_total = prometheus_client.Counter( + "flagsmith_segment_membership_refresh_failures_total", + "Total segment-membership refresh runs that failed for any reason.", +) diff --git a/api/segment_membership/tasks.py b/api/segment_membership/tasks.py index 1fe17a7feed5..49cff07f1312 100644 --- a/api/segment_membership/tasks.py +++ b/api/segment_membership/tasks.py @@ -30,6 +30,12 @@ from environments.dynamodb.wrappers.identity_wrapper import DynamoIdentityWrapper from projects.models import Project from segment_membership.mappers import map_identity_document_to_snowflake_row +from segment_membership.metrics import ( + flagsmith_segment_membership_backfill_duration_seconds, + flagsmith_segment_membership_backfill_identities_total, + flagsmith_segment_membership_refresh_duration_seconds, + flagsmith_segment_membership_refresh_failures_total, +) from segment_membership.models import SegmentMembership from segment_membership.services import ( compute_segment_counts_for_project, @@ -90,25 +96,31 @@ def backfill_identities_to_snowflake() -> None: for env in project.environments.all(): env_key = env.api_key row_count = 0 + sess.query_tag = ( + "flagsmith:segment_membership:backfill" + f":org_{project.organisation_id}" + f":project_{project.id}" + ) try: - sess.sql( - "DELETE FROM IDENTITIES WHERE environment_id = ?", - params=[env_key], - ).collect() - for batch in batched( - wrapper.iter_all_items_paginated(env_key), - _INSERT_BATCH_SIZE, - ): - rows = [ - map_identity_document_to_snowflake_row( - env_key, cast(DynamoIdentity, doc) - ) - for doc in batch - ] - sess.create_dataframe( - rows, schema=_IDENTITIES_SCHEMA - ).write.mode("append").save_as_table("IDENTITIES") - row_count += len(rows) + with flagsmith_segment_membership_backfill_duration_seconds.time(): + sess.sql( + "DELETE FROM IDENTITIES WHERE environment_id = ?", + params=[env_key], + ).collect() + for batch in batched( + wrapper.iter_all_items_paginated(env_key), + _INSERT_BATCH_SIZE, + ): + rows = [ + map_identity_document_to_snowflake_row( + env_key, cast(DynamoIdentity, doc) + ) + for doc in batch + ] + sess.create_dataframe( + rows, schema=_IDENTITIES_SCHEMA + ).write.mode("append").save_as_table("IDENTITIES") + row_count += len(rows) except Exception: logger.exception( "backfill.environment.failed", @@ -116,6 +128,7 @@ def backfill_identities_to_snowflake() -> None: environment__id=env.id, ) continue + flagsmith_segment_membership_backfill_identities_total.inc(row_count) logger.info( "backfill.environment.completed", project__id=project.id, @@ -154,10 +167,19 @@ def refresh_project_segment_counts(project_id: int) -> None: ) return - with open_snowflake_session() as sess: + with ( + flagsmith_segment_membership_refresh_duration_seconds.time(), + open_snowflake_session() as sess, + ): + sess.query_tag = ( + "flagsmith:segment_membership:refresh" + f":org_{project.organisation_id}" + f":project_{project.id}" + ) try: memberships = compute_segment_counts_for_project(project, sess) except Exception: + flagsmith_segment_membership_refresh_failures_total.inc() logger.exception("refresh.project.failed", project__id=project_id) return diff --git a/api/tests/unit/segment_membership/test_unit_segment_membership_tasks.py b/api/tests/unit/segment_membership/test_unit_segment_membership_tasks.py index 435a93be1f69..720c78a327f6 100644 --- a/api/tests/unit/segment_membership/test_unit_segment_membership_tasks.py +++ b/api/tests/unit/segment_membership/test_unit_segment_membership_tasks.py @@ -71,7 +71,7 @@ def test_backfill_identities_to_snowflake__happy_path__deletes_then_inserts( wrapper.iter_all_items_paginated.return_value = iter( [ { - "identity_uuid": "u-1", + "identity_uuid": "f47ac10b-58cc-4372-a567-0e02b2c3d479", "identifier": "a", "composite_key": "k1", "environment_api_key": environment.api_key, @@ -79,7 +79,7 @@ def test_backfill_identities_to_snowflake__happy_path__deletes_then_inserts( "identity_traits": [], }, { - "identity_uuid": "u-2", + "identity_uuid": "550e8400-e29b-41d4-a716-446655440000", "identifier": "b", "composite_key": "k2", "environment_api_key": environment.api_key, @@ -140,7 +140,7 @@ def test_backfill_identities_to_snowflake__insert_fails__logs_and_continues( wrapper.iter_all_items_paginated.return_value = iter( [ { - "identity_uuid": "u-1", + "identity_uuid": "f47ac10b-58cc-4372-a567-0e02b2c3d479", "identifier": "a", "composite_key": "k1", "environment_api_key": environment.api_key, diff --git a/docs/docs/deployment-self-hosting/observability/_events-catalogue.md b/docs/docs/deployment-self-hosting/observability/_events-catalogue.md index ec998967329a..af9b0fff05cb 100644 --- a/docs/docs/deployment-self-hosting/observability/_events-catalogue.md +++ b/docs/docs/deployment-self-hosting/observability/_events-catalogue.md @@ -330,7 +330,7 @@ Attributes: ### `segment_membership.backfill.environment.completed` Logged at `info` from: - - `api/segment_membership/tasks.py:119` + - `api/segment_membership/tasks.py:132` Attributes: - `environment.id` @@ -340,7 +340,7 @@ Attributes: ### `segment_membership.backfill.environment.failed` Logged at `exception` from: - - `api/segment_membership/tasks.py:113` + - `api/segment_membership/tasks.py:125` Attributes: - `environment.id` @@ -349,8 +349,8 @@ Attributes: ### `segment_membership.backfill.skipped` Logged at `info` from: - - `api/segment_membership/tasks.py:78` - - `api/segment_membership/tasks.py:83` + - `api/segment_membership/tasks.py:84` + - `api/segment_membership/tasks.py:89` Attributes: - `reason` @@ -368,7 +368,7 @@ Attributes: ### `segment_membership.refresh.project.completed` Logged at `info` from: - - `api/segment_membership/tasks.py:173` + - `api/segment_membership/tasks.py:195` Attributes: - `memberships.count` @@ -377,7 +377,7 @@ Attributes: ### `segment_membership.refresh.project.failed` Logged at `exception` from: - - `api/segment_membership/tasks.py:161` + - `api/segment_membership/tasks.py:183` Attributes: - `project.id` @@ -385,8 +385,8 @@ Attributes: ### `segment_membership.refresh.project.skipped` Logged at `info` from: - - `api/segment_membership/tasks.py:141` - - `api/segment_membership/tasks.py:150` + - `api/segment_membership/tasks.py:154` + - `api/segment_membership/tasks.py:163` Attributes: - `project.id` diff --git a/docs/docs/deployment-self-hosting/observability/_metrics-catalogue.md b/docs/docs/deployment-self-hosting/observability/_metrics-catalogue.md index 6cae297b29e4..f90a72b8a5c7 100644 --- a/docs/docs/deployment-self-hosting/observability/_metrics-catalogue.md +++ b/docs/docs/deployment-self-hosting/observability/_metrics-catalogue.md @@ -70,6 +70,38 @@ Labels: - `method` - `response_status` +### `flagsmith_segment_membership_backfill_duration_seconds` + +Histogram. + +Duration of a segment-membership backfill for one environment. + +Labels: + +### `flagsmith_segment_membership_backfill_identities` + +Counter. + +Total identities mirrored from Dynamo to Snowflake by the segment-membership backfill task across all environments. + +Labels: + +### `flagsmith_segment_membership_refresh_duration_seconds` + +Histogram. + +Duration of a single segment-membership count-refresh run for one project. + +Labels: + +### `flagsmith_segment_membership_refresh_failures` + +Counter. + +Total segment-membership refresh runs that failed for any reason. + +Labels: + ### `flagsmith_task_processor_enqueued_tasks` Counter. From b02f3d88e00afa57c45e6a4cfd5dbd8bbda80f2d Mon Sep 17 00:00:00 2001 From: Kim Gustyr Date: Sun, 10 May 2026 01:00:31 +0100 Subject: [PATCH 07/12] chore(segment_membership): Local smoke-test harness Wires the segment-membership pipeline against DynamoDB Local + a real Snowflake account: seeds a project, environment, and segment in core Postgres; creates the EdgeIdentities table; seeds 25 matching + 25 non-matching identities; runs backfill + refresh tasks; asserts SegmentMembership.count equals the matching seed. Run with `make docker-up django-migrate` followed by `make smoke-test-segment-membership`. SNOWFLAKE_* env vars come from .env via Make's existing dotenv include; cleans the env's Snowflake rows on exit. beep boop --- api/Makefile | 24 ++ api/segment_membership/management/__init__.py | 0 .../management/commands/__init__.py | 0 .../commands/smoke_test_segment_membership.py | 233 ++++++++++++++++++ 4 files changed, 257 insertions(+) create mode 100644 api/segment_membership/management/__init__.py create mode 100644 api/segment_membership/management/commands/__init__.py create mode 100644 api/segment_membership/management/commands/smoke_test_segment_membership.py diff --git a/api/Makefile b/api/Makefile index cdecfa723564..ecdeb7943d82 100644 --- a/api/Makefile +++ b/api/Makefile @@ -106,6 +106,30 @@ django-collect-static: serve: docker-up wait-for-db uv run flagsmith start --reload api +.PHONY: smoke-test-segment-membership-up +smoke-test-segment-membership-up: + docker rm -f flagsmith-dynamodb-smoke 2>/dev/null || true + docker run -d --name flagsmith-dynamodb-smoke -p 8002:8000 amazon/dynamodb-local + +.PHONY: smoke-test-segment-membership-down +smoke-test-segment-membership-down: + docker rm -f flagsmith-dynamodb-smoke 2>/dev/null || true + +.PHONY: smoke-test-segment-membership +# Drives the segment-membership pipeline against DynamoDB Local + the +# Snowflake account configured via SNOWFLAKE_* env vars in .env. Requires +# `make docker-up django-migrate` to have run first so core Postgres and +# the Snowflake IDENTITIES schema are in place. +smoke-test-segment-membership: AWS_ENDPOINT_URL_DYNAMODB=http://localhost:8002 +smoke-test-segment-membership: IDENTITIES_TABLE_NAME_DYNAMO=flagsmith_smoke_identities +smoke-test-segment-membership: AWS_ACCESS_KEY_ID=local +smoke-test-segment-membership: AWS_SECRET_ACCESS_KEY=local +smoke-test-segment-membership: AWS_DEFAULT_REGION=us-east-1 +smoke-test-segment-membership: smoke-test-segment-membership-up + @sleep 2 + -poetry run python manage.py smoke_test_segment_membership $(opts) + $(MAKE) smoke-test-segment-membership-down + .PHONY: run-task-processor run-task-processor: docker-up wait-for-db uv run flagsmith start --reload --bind 0.0.0.0:8001 task-processor diff --git a/api/segment_membership/management/__init__.py b/api/segment_membership/management/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/api/segment_membership/management/commands/__init__.py b/api/segment_membership/management/commands/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/api/segment_membership/management/commands/smoke_test_segment_membership.py b/api/segment_membership/management/commands/smoke_test_segment_membership.py new file mode 100644 index 000000000000..2bde52d9ccbf --- /dev/null +++ b/api/segment_membership/management/commands/smoke_test_segment_membership.py @@ -0,0 +1,233 @@ +"""Smoke-test the segment membership pipeline against a local DynamoDB ++ a real Snowflake account configured via SNOWFLAKE_* env vars. + +The command: + +1. Creates the EdgeIdentities table in the configured DynamoDB endpoint. +2. Seeds a project, environment, and segment in core Postgres if missing. +3. Seeds identities in Dynamo whose traits match the segment predicate + (and an equal number that don't). +4. Runs `backfill_identities_to_snowflake()` synchronously. +5. Runs `refresh_project_segment_counts(project_id)` synchronously. +6. Asserts the resulting `SegmentMembership.count` equals the number of + matching identities seeded. + +`is_membership_enabled` is patched to True for the run so the +Flagsmith-on-Flagsmith flag isn't required. + +Usage: + + AWS_ENDPOINT_URL_DYNAMODB=http://localhost:8000 \ + IDENTITIES_TABLE_NAME_DYNAMO=flagsmith_smoke_identities \ + AWS_ACCESS_KEY_ID=local AWS_SECRET_ACCESS_KEY=local AWS_DEFAULT_REGION=us-east-1 \ + poetry run python manage.py smoke_test_segment_membership +""" + +import logging +import uuid +from datetime import datetime, timezone +from unittest.mock import patch + +import boto3 +from django.conf import settings +from django.core.management.base import BaseCommand, CommandError + +from environments.models import Environment +from organisations.models import Organisation +from projects.models import Project +from segment_membership import services as membership_services +from segment_membership import tasks as membership_tasks +from segment_membership.models import SegmentMembership +from segment_membership.tasks import ( + backfill_identities_to_snowflake, + refresh_project_segment_counts, +) +from segments.models import Condition, Segment, SegmentRule + +logger = logging.getLogger(__name__) + +PROJECT_NAME = "smoke-test-segment-membership" +ENV_NAME = "smoke-env" +SEGMENT_NAME = "growth-plan-users" +TRAIT_KEY = "plan" +MATCH_VALUE = "growth" +NON_MATCH_VALUE = "basic" +N_PER_BUCKET = 25 # 25 matches + 25 non-matches = 50 total + + +class Command(BaseCommand): + help = "End-to-end smoke test for segment membership against DynamoDB Local + Snowflake." + + def add_arguments(self, parser): # type: ignore[no-untyped-def] + parser.add_argument( + "--keep", + action="store_true", + help="Skip Snowflake row cleanup at the end.", + ) + + def handle(self, *args, **options): # type: ignore[no-untyped-def] + if not settings.IDENTITIES_TABLE_NAME_DYNAMO: + raise CommandError("IDENTITIES_TABLE_NAME_DYNAMO must be set") + if not membership_services.is_snowflake_configured(): + raise CommandError("SNOWFLAKE_* settings must be populated") + + org, project, environment, segment = _ensure_fixtures() + self.stdout.write( + f"fixtures: project={project.id} env={environment.id} segment={segment.id}" + ) + + table_name = _ensure_dynamo_table() + self.stdout.write(f"dynamo table ready: {table_name}") + + _seed_identities(environment.api_key) + self.stdout.write( + f"seeded {N_PER_BUCKET} matching + {N_PER_BUCKET} non-matching identities" + ) + + with ( + patch.object( + membership_services, "is_membership_enabled", return_value=True + ), + patch.object(membership_tasks, "is_membership_enabled", return_value=True), + ): + backfill_identities_to_snowflake() + self.stdout.write("backfill complete") + + refresh_project_segment_counts(project.id) + self.stdout.write("refresh complete") + + with membership_services.open_snowflake_session() as sess: + sess.query_tag = "flagsmith:segment_membership:smoke_test:diag" + total = sess.sql( + "SELECT COUNT(*) AS c FROM IDENTITIES WHERE environment_id = ?", + params=[environment.api_key], + ).collect()[0]["C"] + self.stdout.write(f"snowflake IDENTITIES rows for env: {total}") + sample = sess.sql( + "SELECT identifier, traits FROM IDENTITIES WHERE environment_id = ? LIMIT 1", + params=[environment.api_key], + ).collect() + self.stdout.write(f"sample row: {sample}") + self.stdout.write( + f"live segments: {list(Segment.live_objects.filter(project=project).values_list('id', 'name'))}" + ) + sess.query_tag = "flagsmith:segment_membership:smoke_test:diag" + results = membership_services.compute_segment_counts_for_project( + project, sess + ) + self.stdout.write(f"compute_segment_counts_for_project: {results}") + + try: + membership = SegmentMembership.objects.get( + segment=segment, environment=environment + ) + self.stdout.write( + f"SegmentMembership: count={membership.count} last_synced_at={membership.last_synced_at}" + ) + if membership.count != N_PER_BUCKET: + raise CommandError( + f"Expected count={N_PER_BUCKET}, got {membership.count}" + ) + except SegmentMembership.DoesNotExist: + raise CommandError( + "SegmentMembership row not created — see diagnostics above" + ) + self.stdout.write(self.style.SUCCESS("✓ Counts match expected")) + + if not options["keep"]: + _cleanup_snowflake(environment.api_key) + self.stdout.write("snowflake rows cleaned up") + + +def _ensure_fixtures() -> tuple[Organisation, Project, Environment, Segment]: + org, _ = Organisation.objects.get_or_create(name="smoke-test") + project, _ = Project.objects.get_or_create(name=PROJECT_NAME, organisation=org) + environment, _ = Environment.objects.get_or_create(name=ENV_NAME, project=project) + segment, created = Segment.objects.get_or_create( + name=SEGMENT_NAME, + project=project, + defaults={"description": "smoke-test segment"}, + ) + if created or not segment.rules.exists(): + # ALL > ANY > condition tree mirroring how the dashboard builds segments. + outer = SegmentRule.objects.create(segment=segment, type=SegmentRule.ALL_RULE) + inner = SegmentRule.objects.create(rule=outer, type=SegmentRule.ANY_RULE) + Condition.objects.create( + rule=inner, + property=TRAIT_KEY, + operator="EQUAL", + value=MATCH_VALUE, + ) + return org, project, environment, segment + + +def _ensure_dynamo_table() -> str: + name = settings.IDENTITIES_TABLE_NAME_DYNAMO + client = boto3.client("dynamodb") + try: + client.describe_table(TableName=name) + client.delete_table(TableName=name) + client.get_waiter("table_not_exists").wait(TableName=name) + except client.exceptions.ResourceNotFoundException: + pass + + client.create_table( + TableName=name, + KeySchema=[{"AttributeName": "composite_key", "KeyType": "HASH"}], + AttributeDefinitions=[ + {"AttributeName": "composite_key", "AttributeType": "S"}, + {"AttributeName": "environment_api_key", "AttributeType": "S"}, + {"AttributeName": "identifier", "AttributeType": "S"}, + {"AttributeName": "identity_uuid", "AttributeType": "S"}, + ], + GlobalSecondaryIndexes=[ + { + "IndexName": "environment_api_key-identifier-index", + "KeySchema": [ + {"AttributeName": "environment_api_key", "KeyType": "HASH"}, + {"AttributeName": "identifier", "KeyType": "RANGE"}, + ], + "Projection": {"ProjectionType": "ALL"}, + }, + { + "IndexName": "identity_uuid-index", + "KeySchema": [ + {"AttributeName": "identity_uuid", "KeyType": "HASH"}, + ], + "Projection": {"ProjectionType": "ALL"}, + }, + ], + BillingMode="PAY_PER_REQUEST", + ) + client.get_waiter("table_exists").wait(TableName=name) + return name + + +def _seed_identities(env_api_key: str) -> None: + table = boto3.resource("dynamodb").Table(settings.IDENTITIES_TABLE_NAME_DYNAMO) + now = datetime.now(timezone.utc).isoformat() + with table.batch_writer() as batch: + for i in range(N_PER_BUCKET): + for value, label in ((MATCH_VALUE, "match"), (NON_MATCH_VALUE, "no")): + identifier = f"{label}-{i}" + batch.put_item( + Item={ + "composite_key": f"{env_api_key}_{identifier}", + "environment_api_key": env_api_key, + "identifier": identifier, + "identity_uuid": str(uuid.uuid4()), + "created_date": now, + "identity_traits": [ + {"trait_key": TRAIT_KEY, "trait_value": value}, + ], + } + ) + + +def _cleanup_snowflake(env_api_key: str) -> None: + with membership_services.open_snowflake_session() as sess: + sess.query_tag = "flagsmith:segment_membership:smoke_test:cleanup" + sess.sql( + "DELETE FROM IDENTITIES WHERE environment_id = ?", + params=[env_api_key], + ).collect() From cf4c71a0acadbf63895ce0f912b79c48da56c393 Mon Sep 17 00:00:00 2001 From: Kim Gustyr Date: Sun, 10 May 2026 14:28:32 +0100 Subject: [PATCH 08/12] Revert "chore(segment_membership): Local smoke-test harness" This reverts commit 9d31cb3dad173369560f36f9fdd895707ced4088. --- api/Makefile | 24 -- api/segment_membership/management/__init__.py | 0 .../management/commands/__init__.py | 0 .../commands/smoke_test_segment_membership.py | 233 ------------------ 4 files changed, 257 deletions(-) delete mode 100644 api/segment_membership/management/__init__.py delete mode 100644 api/segment_membership/management/commands/__init__.py delete mode 100644 api/segment_membership/management/commands/smoke_test_segment_membership.py diff --git a/api/Makefile b/api/Makefile index ecdeb7943d82..cdecfa723564 100644 --- a/api/Makefile +++ b/api/Makefile @@ -106,30 +106,6 @@ django-collect-static: serve: docker-up wait-for-db uv run flagsmith start --reload api -.PHONY: smoke-test-segment-membership-up -smoke-test-segment-membership-up: - docker rm -f flagsmith-dynamodb-smoke 2>/dev/null || true - docker run -d --name flagsmith-dynamodb-smoke -p 8002:8000 amazon/dynamodb-local - -.PHONY: smoke-test-segment-membership-down -smoke-test-segment-membership-down: - docker rm -f flagsmith-dynamodb-smoke 2>/dev/null || true - -.PHONY: smoke-test-segment-membership -# Drives the segment-membership pipeline against DynamoDB Local + the -# Snowflake account configured via SNOWFLAKE_* env vars in .env. Requires -# `make docker-up django-migrate` to have run first so core Postgres and -# the Snowflake IDENTITIES schema are in place. -smoke-test-segment-membership: AWS_ENDPOINT_URL_DYNAMODB=http://localhost:8002 -smoke-test-segment-membership: IDENTITIES_TABLE_NAME_DYNAMO=flagsmith_smoke_identities -smoke-test-segment-membership: AWS_ACCESS_KEY_ID=local -smoke-test-segment-membership: AWS_SECRET_ACCESS_KEY=local -smoke-test-segment-membership: AWS_DEFAULT_REGION=us-east-1 -smoke-test-segment-membership: smoke-test-segment-membership-up - @sleep 2 - -poetry run python manage.py smoke_test_segment_membership $(opts) - $(MAKE) smoke-test-segment-membership-down - .PHONY: run-task-processor run-task-processor: docker-up wait-for-db uv run flagsmith start --reload --bind 0.0.0.0:8001 task-processor diff --git a/api/segment_membership/management/__init__.py b/api/segment_membership/management/__init__.py deleted file mode 100644 index e69de29bb2d1..000000000000 diff --git a/api/segment_membership/management/commands/__init__.py b/api/segment_membership/management/commands/__init__.py deleted file mode 100644 index e69de29bb2d1..000000000000 diff --git a/api/segment_membership/management/commands/smoke_test_segment_membership.py b/api/segment_membership/management/commands/smoke_test_segment_membership.py deleted file mode 100644 index 2bde52d9ccbf..000000000000 --- a/api/segment_membership/management/commands/smoke_test_segment_membership.py +++ /dev/null @@ -1,233 +0,0 @@ -"""Smoke-test the segment membership pipeline against a local DynamoDB -+ a real Snowflake account configured via SNOWFLAKE_* env vars. - -The command: - -1. Creates the EdgeIdentities table in the configured DynamoDB endpoint. -2. Seeds a project, environment, and segment in core Postgres if missing. -3. Seeds identities in Dynamo whose traits match the segment predicate - (and an equal number that don't). -4. Runs `backfill_identities_to_snowflake()` synchronously. -5. Runs `refresh_project_segment_counts(project_id)` synchronously. -6. Asserts the resulting `SegmentMembership.count` equals the number of - matching identities seeded. - -`is_membership_enabled` is patched to True for the run so the -Flagsmith-on-Flagsmith flag isn't required. - -Usage: - - AWS_ENDPOINT_URL_DYNAMODB=http://localhost:8000 \ - IDENTITIES_TABLE_NAME_DYNAMO=flagsmith_smoke_identities \ - AWS_ACCESS_KEY_ID=local AWS_SECRET_ACCESS_KEY=local AWS_DEFAULT_REGION=us-east-1 \ - poetry run python manage.py smoke_test_segment_membership -""" - -import logging -import uuid -from datetime import datetime, timezone -from unittest.mock import patch - -import boto3 -from django.conf import settings -from django.core.management.base import BaseCommand, CommandError - -from environments.models import Environment -from organisations.models import Organisation -from projects.models import Project -from segment_membership import services as membership_services -from segment_membership import tasks as membership_tasks -from segment_membership.models import SegmentMembership -from segment_membership.tasks import ( - backfill_identities_to_snowflake, - refresh_project_segment_counts, -) -from segments.models import Condition, Segment, SegmentRule - -logger = logging.getLogger(__name__) - -PROJECT_NAME = "smoke-test-segment-membership" -ENV_NAME = "smoke-env" -SEGMENT_NAME = "growth-plan-users" -TRAIT_KEY = "plan" -MATCH_VALUE = "growth" -NON_MATCH_VALUE = "basic" -N_PER_BUCKET = 25 # 25 matches + 25 non-matches = 50 total - - -class Command(BaseCommand): - help = "End-to-end smoke test for segment membership against DynamoDB Local + Snowflake." - - def add_arguments(self, parser): # type: ignore[no-untyped-def] - parser.add_argument( - "--keep", - action="store_true", - help="Skip Snowflake row cleanup at the end.", - ) - - def handle(self, *args, **options): # type: ignore[no-untyped-def] - if not settings.IDENTITIES_TABLE_NAME_DYNAMO: - raise CommandError("IDENTITIES_TABLE_NAME_DYNAMO must be set") - if not membership_services.is_snowflake_configured(): - raise CommandError("SNOWFLAKE_* settings must be populated") - - org, project, environment, segment = _ensure_fixtures() - self.stdout.write( - f"fixtures: project={project.id} env={environment.id} segment={segment.id}" - ) - - table_name = _ensure_dynamo_table() - self.stdout.write(f"dynamo table ready: {table_name}") - - _seed_identities(environment.api_key) - self.stdout.write( - f"seeded {N_PER_BUCKET} matching + {N_PER_BUCKET} non-matching identities" - ) - - with ( - patch.object( - membership_services, "is_membership_enabled", return_value=True - ), - patch.object(membership_tasks, "is_membership_enabled", return_value=True), - ): - backfill_identities_to_snowflake() - self.stdout.write("backfill complete") - - refresh_project_segment_counts(project.id) - self.stdout.write("refresh complete") - - with membership_services.open_snowflake_session() as sess: - sess.query_tag = "flagsmith:segment_membership:smoke_test:diag" - total = sess.sql( - "SELECT COUNT(*) AS c FROM IDENTITIES WHERE environment_id = ?", - params=[environment.api_key], - ).collect()[0]["C"] - self.stdout.write(f"snowflake IDENTITIES rows for env: {total}") - sample = sess.sql( - "SELECT identifier, traits FROM IDENTITIES WHERE environment_id = ? LIMIT 1", - params=[environment.api_key], - ).collect() - self.stdout.write(f"sample row: {sample}") - self.stdout.write( - f"live segments: {list(Segment.live_objects.filter(project=project).values_list('id', 'name'))}" - ) - sess.query_tag = "flagsmith:segment_membership:smoke_test:diag" - results = membership_services.compute_segment_counts_for_project( - project, sess - ) - self.stdout.write(f"compute_segment_counts_for_project: {results}") - - try: - membership = SegmentMembership.objects.get( - segment=segment, environment=environment - ) - self.stdout.write( - f"SegmentMembership: count={membership.count} last_synced_at={membership.last_synced_at}" - ) - if membership.count != N_PER_BUCKET: - raise CommandError( - f"Expected count={N_PER_BUCKET}, got {membership.count}" - ) - except SegmentMembership.DoesNotExist: - raise CommandError( - "SegmentMembership row not created — see diagnostics above" - ) - self.stdout.write(self.style.SUCCESS("✓ Counts match expected")) - - if not options["keep"]: - _cleanup_snowflake(environment.api_key) - self.stdout.write("snowflake rows cleaned up") - - -def _ensure_fixtures() -> tuple[Organisation, Project, Environment, Segment]: - org, _ = Organisation.objects.get_or_create(name="smoke-test") - project, _ = Project.objects.get_or_create(name=PROJECT_NAME, organisation=org) - environment, _ = Environment.objects.get_or_create(name=ENV_NAME, project=project) - segment, created = Segment.objects.get_or_create( - name=SEGMENT_NAME, - project=project, - defaults={"description": "smoke-test segment"}, - ) - if created or not segment.rules.exists(): - # ALL > ANY > condition tree mirroring how the dashboard builds segments. - outer = SegmentRule.objects.create(segment=segment, type=SegmentRule.ALL_RULE) - inner = SegmentRule.objects.create(rule=outer, type=SegmentRule.ANY_RULE) - Condition.objects.create( - rule=inner, - property=TRAIT_KEY, - operator="EQUAL", - value=MATCH_VALUE, - ) - return org, project, environment, segment - - -def _ensure_dynamo_table() -> str: - name = settings.IDENTITIES_TABLE_NAME_DYNAMO - client = boto3.client("dynamodb") - try: - client.describe_table(TableName=name) - client.delete_table(TableName=name) - client.get_waiter("table_not_exists").wait(TableName=name) - except client.exceptions.ResourceNotFoundException: - pass - - client.create_table( - TableName=name, - KeySchema=[{"AttributeName": "composite_key", "KeyType": "HASH"}], - AttributeDefinitions=[ - {"AttributeName": "composite_key", "AttributeType": "S"}, - {"AttributeName": "environment_api_key", "AttributeType": "S"}, - {"AttributeName": "identifier", "AttributeType": "S"}, - {"AttributeName": "identity_uuid", "AttributeType": "S"}, - ], - GlobalSecondaryIndexes=[ - { - "IndexName": "environment_api_key-identifier-index", - "KeySchema": [ - {"AttributeName": "environment_api_key", "KeyType": "HASH"}, - {"AttributeName": "identifier", "KeyType": "RANGE"}, - ], - "Projection": {"ProjectionType": "ALL"}, - }, - { - "IndexName": "identity_uuid-index", - "KeySchema": [ - {"AttributeName": "identity_uuid", "KeyType": "HASH"}, - ], - "Projection": {"ProjectionType": "ALL"}, - }, - ], - BillingMode="PAY_PER_REQUEST", - ) - client.get_waiter("table_exists").wait(TableName=name) - return name - - -def _seed_identities(env_api_key: str) -> None: - table = boto3.resource("dynamodb").Table(settings.IDENTITIES_TABLE_NAME_DYNAMO) - now = datetime.now(timezone.utc).isoformat() - with table.batch_writer() as batch: - for i in range(N_PER_BUCKET): - for value, label in ((MATCH_VALUE, "match"), (NON_MATCH_VALUE, "no")): - identifier = f"{label}-{i}" - batch.put_item( - Item={ - "composite_key": f"{env_api_key}_{identifier}", - "environment_api_key": env_api_key, - "identifier": identifier, - "identity_uuid": str(uuid.uuid4()), - "created_date": now, - "identity_traits": [ - {"trait_key": TRAIT_KEY, "trait_value": value}, - ], - } - ) - - -def _cleanup_snowflake(env_api_key: str) -> None: - with membership_services.open_snowflake_session() as sess: - sess.query_tag = "flagsmith:segment_membership:smoke_test:cleanup" - sess.sql( - "DELETE FROM IDENTITIES WHERE environment_id = ?", - params=[env_api_key], - ).collect() From b0d4cf203d89baa2a8cdce909984f411ad5ad2f1 Mon Sep 17 00:00:00 2001 From: Kim Gustyr Date: Sun, 10 May 2026 15:21:58 +0100 Subject: [PATCH 09/12] test(segment_membership): Tighten mappers + migration tests Mappers: drop private-helper tests, replace with parametrised cases exercising `map_identity_document_to_snowflake_row` directly; trust TypedDict-required fields rather than caring for absent ones. Migration: assert the full DDL fed into `sess.sql(...)` and `spec` the Snowpark mock against the real Session class. beep boop --- api/segment_membership/mappers.py | 24 +- .../test_unit_segment_membership_mappers.py | 217 ++++++++++-------- .../test_unit_segment_membership_migration.py | 8 +- 3 files changed, 136 insertions(+), 113 deletions(-) diff --git a/api/segment_membership/mappers.py b/api/segment_membership/mappers.py index 779c475f9173..25b88bba9a37 100644 --- a/api/segment_membership/mappers.py +++ b/api/segment_membership/mappers.py @@ -1,29 +1,30 @@ import uuid from decimal import Decimal -from flagsmith_schemas.dynamodb import Identity as DynamoIdentity -from flagsmith_schemas.dynamodb import Trait as DynamoTrait +from flagsmith_schemas import dynamodb # (environment_id, id, identifier, identity_key, traits) SnowflakeIdentityRow = tuple[str, int, str, str, dict[str, object] | None] def map_identity_document_to_snowflake_row( - env_key: str, identity_doc: DynamoIdentity + env_key: str, + identity_doc: dynamodb.Identity, ) -> SnowflakeIdentityRow: """Project a Dynamo identity document onto the canonical IDENTITIES row tuple. The returned tuple aligns positionally with the schema `(environment_id, id, identifier, identity_key, traits)`.""" - identity_uuid = str(identity_doc["identity_uuid"]) - identifier = str(identity_doc.get("identifier") or "") - composite_key = str(identity_doc.get("composite_key") or identity_uuid) - traits = _flatten_traits(identity_doc.get("identity_traits")) + identity_uuid = identity_doc["identity_uuid"] + identifier = identity_doc["identifier"] + composite_key = identity_doc["composite_key"] + raw_traits = identity_doc.get("identity_traits") + traits = _flatten_traits(raw_traits) if raw_traits else None return ( env_key, _identity_id(identity_uuid), identifier, composite_key, - traits or None, + traits, ) @@ -44,14 +45,11 @@ def _coerce_trait_value(value: object) -> object: def _flatten_traits( - identity_traits: list[DynamoTrait] | None, + identity_traits: list[dynamodb.Trait], ) -> dict[str, object]: """Convert Dynamo's `[{trait_key, trait_value}, ...]` list into a - flat trait map, dropping entries with falsy keys.""" - if not identity_traits: - return {} + flat trait map.""" return { t["trait_key"]: _coerce_trait_value(t.get("trait_value")) for t in identity_traits - if t.get("trait_key") } diff --git a/api/tests/unit/segment_membership/test_unit_segment_membership_mappers.py b/api/tests/unit/segment_membership/test_unit_segment_membership_mappers.py index 8e823fcc0f76..fa6243a86709 100644 --- a/api/tests/unit/segment_membership/test_unit_segment_membership_mappers.py +++ b/api/tests/unit/segment_membership/test_unit_segment_membership_mappers.py @@ -1,129 +1,150 @@ from decimal import Decimal +import pytest from flagsmith_schemas.dynamodb import Identity as DynamoIdentity -from segment_membership.mappers import ( - _coerce_trait_value, - _flatten_traits, - _identity_id, - map_identity_document_to_snowflake_row, +from segment_membership.mappers import map_identity_document_to_snowflake_row + +UUID_A = "f47ac10b-58cc-4372-a567-0e02b2c3d479" +UUID_B = "550e8400-e29b-41d4-a716-446655440000" + + +@pytest.mark.parametrize( + "doc,expected", + [ + pytest.param( + { + "identity_uuid": UUID_A, + "identifier": "alice", + "environment_api_key": "env-key", + "composite_key": "env_x_alice", + "created_date": "2026-05-08T00:00:00Z", + "identity_traits": [ + {"trait_key": "plan", "trait_value": "growth"}, + ], + }, + ("env-key", "alice", "env_x_alice", {"plan": "growth"}), + id="single string trait", + ), + pytest.param( + { + "identity_uuid": UUID_A, + "identifier": "alice", + "environment_api_key": "env-key", + "composite_key": "env_x_alice", + "created_date": "2026-05-08T00:00:00Z", + "identity_traits": [], + }, + ("env-key", "alice", "env_x_alice", None), + id="empty traits collapse to NULL", + ), + pytest.param( + { + "identity_uuid": UUID_A, + "identifier": "alice", + "environment_api_key": "env-key", + "composite_key": "env_x_alice", + "created_date": "2026-05-08T00:00:00Z", + "identity_traits": [ + {"trait_key": "age", "trait_value": Decimal("18")}, + ], + }, + ("env-key", "alice", "env_x_alice", {"age": 18}), + id="whole-number Decimal narrows to int", + ), + pytest.param( + { + "identity_uuid": UUID_A, + "identifier": "alice", + "environment_api_key": "env-key", + "composite_key": "env_x_alice", + "created_date": "2026-05-08T00:00:00Z", + "identity_traits": [ + {"trait_key": "score", "trait_value": Decimal("1.5")}, + ], + }, + ("env-key", "alice", "env_x_alice", {"score": 1.5}), + id="fractional Decimal narrows to float", + ), + pytest.param( + { + "identity_uuid": UUID_A, + "identifier": "alice", + "environment_api_key": "env-key", + "composite_key": "env_x_alice", + "created_date": "2026-05-08T00:00:00Z", + "identity_traits": [ + {"trait_key": "plan", "trait_value": "growth"}, + {"trait_key": "team", "trait_value": "alpha"}, + ], + }, + ( + "env-key", + "alice", + "env_x_alice", + {"plan": "growth", "team": "alpha"}, + ), + id="multiple traits flatten to a single dict", + ), + ], ) +def test_map_identity_document_to_snowflake_row__cases__return_expected( + doc: DynamoIdentity, + expected: tuple[str, str, str, dict[str, object] | None], +) -> None: + # Given a Dynamo identity document + # When mapped onto an IDENTITIES row + env_id, _id, identifier, identity_key, traits = ( + map_identity_document_to_snowflake_row("env-key", doc) + ) - -def test_identity_id__same_uuid__produces_same_id() -> None: - # Given the same identity_uuid - uuid = "f47ac10b-58cc-4372-a567-0e02b2c3d479" - - # When the helper runs twice - a = _identity_id(uuid) - b = _identity_id(uuid) - - # Then the result is identical and fits in a signed 64-bit int - assert a == b - assert -(2**63) <= a < 2**63 - - -def test_coerce_trait_value__decimal_int__narrows_to_int() -> None: - # Given a Decimal that's a whole number - # When coerced - # Then it becomes a plain int - assert _coerce_trait_value(Decimal("3")) == 3 - assert isinstance(_coerce_trait_value(Decimal("3")), int) - - -def test_coerce_trait_value__decimal_fraction__narrows_to_float() -> None: - # Given a Decimal with a fractional component - # When coerced - # Then it becomes a float - assert _coerce_trait_value(Decimal("1.5")) == 1.5 - assert isinstance(_coerce_trait_value(Decimal("1.5")), float) - - -def test_coerce_trait_value__non_decimal__passes_through_unchanged() -> None: - # Given a value that isn't a Decimal - # When coerced - # Then it passes through unchanged - assert _coerce_trait_value("growth") == "growth" - assert _coerce_trait_value(True) is True - - -def test_flatten_traits__none__returns_empty_dict() -> None: - # Given no traits - # When flattened - # Then the result is an empty dict - assert _flatten_traits(None) == {} - - -def test_flatten_traits__list__returns_dict_dropping_empty_keys() -> None: - # Given a Dynamo trait list with one well-formed and one empty-key entry - # When flattened - # Then only the well-formed entry survives - assert _flatten_traits( - [ - {"trait_key": "plan", "trait_value": "growth"}, - {"trait_key": "", "trait_value": "skipped"}, - ] - ) == {"plan": "growth"} + # Then non-id columns line up positionally with the IDENTITIES schema + assert (env_id, identifier, identity_key, traits) == expected + # ...and the id column is a stable signed 64-bit projection of the UUID + assert -(2**63) <= _id < 2**63 -def test_map_identity_document_to_snowflake_row__with_traits__returns_tuple() -> None: - # Given a Dynamo identity document with traits +def test_map_identity_document_to_snowflake_row__same_uuid__same_id() -> None: + # Given two documents sharing an identity_uuid doc: DynamoIdentity = { - "identity_uuid": "f47ac10b-58cc-4372-a567-0e02b2c3d479", + "identity_uuid": UUID_A, "identifier": "alice", "environment_api_key": "env-key", "composite_key": "env_x_alice", "created_date": "2026-05-08T00:00:00Z", - "identity_traits": [{"trait_key": "plan", "trait_value": "growth"}], + "identity_traits": [], } - # When mapped - env_id, _id, identifier, identity_key, traits = ( - map_identity_document_to_snowflake_row("env-key", doc) - ) + # When mapped twice + a = map_identity_document_to_snowflake_row("env-a", doc) + b = map_identity_document_to_snowflake_row("env-b", doc) - # Then the columns line up positionally with the IDENTITIES schema - assert env_id == "env-key" - assert _id == _identity_id("f47ac10b-58cc-4372-a567-0e02b2c3d479") - assert identifier == "alice" - assert identity_key == "env_x_alice" - assert traits == {"plan": "growth"} + # Then the id projection is stable across calls + assert a[1] == b[1] -def test_map_identity_document_to_snowflake_row__no_traits__returns_none_for_traits() -> ( - None -): - # Given a Dynamo identity document with no trait entries - doc: DynamoIdentity = { - "identity_uuid": "f47ac10b-58cc-4372-a567-0e02b2c3d479", +def test_map_identity_document_to_snowflake_row__different_uuid__different_id() -> None: + # Given two documents with distinct identity_uuids + doc_a: DynamoIdentity = { + "identity_uuid": UUID_A, "identifier": "alice", "environment_api_key": "env-key", "composite_key": "env_x_alice", "created_date": "2026-05-08T00:00:00Z", "identity_traits": [], } - - # When mapped - *_, traits = map_identity_document_to_snowflake_row("env-key", doc) - - # Then the traits VARIANT slot is None (NULL) - assert traits is None - - -def test_map_identity_document_to_snowflake_row__no_composite_key__falls_back_to_uuid() -> ( - None -): - # Given an identity document missing the composite_key - doc: DynamoIdentity = { # type: ignore[typeddict-item] - "identity_uuid": "f47ac10b-58cc-4372-a567-0e02b2c3d479", - "identifier": "alice", + doc_b: DynamoIdentity = { + "identity_uuid": UUID_B, + "identifier": "bob", "environment_api_key": "env-key", + "composite_key": "env_x_bob", "created_date": "2026-05-08T00:00:00Z", "identity_traits": [], } # When mapped - *_, identity_key, _traits = map_identity_document_to_snowflake_row("env-key", doc) + a = map_identity_document_to_snowflake_row("env-key", doc_a) + b = map_identity_document_to_snowflake_row("env-key", doc_b) - # Then identity_key falls back to identity_uuid - assert identity_key == "f47ac10b-58cc-4372-a567-0e02b2c3d479" + # Then the id projections are distinct + assert a[1] != b[1] diff --git a/api/tests/unit/segment_membership/test_unit_segment_membership_migration.py b/api/tests/unit/segment_membership/test_unit_segment_membership_migration.py index 7a5defcd36af..a969906477e6 100644 --- a/api/tests/unit/segment_membership/test_unit_segment_membership_migration.py +++ b/api/tests/unit/segment_membership/test_unit_segment_membership_migration.py @@ -1,8 +1,12 @@ import importlib from unittest.mock import MagicMock +from flagsmith_sql_flag_engine.dialects import SnowflakeDialect from pytest_mock import MockerFixture +from snowflake.snowpark import Session +# Migration module names start with a digit, which `import` can't parse; +# `importlib.import_module` is the only way in. migration_module = importlib.import_module( "segment_membership.migrations.0002_setup_snowflake_identities_schema" ) @@ -35,7 +39,7 @@ def test_setup_snowflake_identities_schema__configured__runs_dialect_ddl( "is_snowflake_configured", return_value=True, ) - sess = MagicMock() + sess = MagicMock(spec=Session) open_sess = mocker.patch.object(migration_module, "open_snowflake_session") open_sess.return_value.__enter__.return_value = sess @@ -43,5 +47,5 @@ def test_setup_snowflake_identities_schema__configured__runs_dialect_ddl( migration_module.setup_snowflake_identities_schema(MagicMock(), MagicMock()) # Then the dialect's schema DDL was executed against the session - sess.sql.assert_called_once() + sess.sql.assert_called_once_with(SnowflakeDialect.schema_ddl) sess.sql.return_value.collect.assert_called_once_with() From 15d2c65b2106c0974d78c7582b70cdc0d74436f9 Mon Sep 17 00:00:00 2001 From: Kim Gustyr Date: Wed, 13 May 2026 22:36:46 +0100 Subject: [PATCH 10/12] refactor(segment_membership): Migrate PoC from Snowflake to ClickHouse Drops snowpark + the original Snowflake-pinned `flagsmith-sql-flag-engine` from the dependency tree; pins `flagsmith-sql-flag-engine = 0.1.0a2` (includes the new `ClickHouseDialect` from the engine-side stacked PR); adds `clickhouse-connect`. Highlights: - `is_clickhouse_configured()` short-circuits when `CLICKHOUSE_HOST` is unset. Other `CLICKHOUSE_*` settings have sensible defaults (port 8443, user `default`, secure HTTPS). - `open_clickhouse_client(log_comment=...)` is the CH session-level analogue of Snowflake's `query_tag`. The comment lands in `system.query_log` for per-(org, project) spend rollups. - Backfill: `ReplacingMergeTree(inserted_at)` table; daily INSERTs dedupe at merge time (most-recent `inserted_at` wins). Drops the per-environment DELETE the Snowpark path used. - Refresh: `FROM IDENTITIES FINAL i` forces dedup at read time so counts always reflect the latest backfill snapshot, regardless of where the merge cycle has gotten to. - The PoC's DDL is opinionated about ReplacingMergeTree + inserted_at version column; the engine's published `schema_ddl` stays `MergeTree` as the simplest correct shape, and the PoC overrides for its mutation-heavy workload. Translator output is engine- agnostic, so the override is invisible to the SQL flag engine. uv dep wiring: `[[tool.uv.index]]` adds the staging CodeArtifact PyPI; `[tool.uv.sources]` pins `flagsmith-sql-flag-engine` to that index. beep boop --- api/app/settings/common.py | 25 +-- api/pyproject.toml | 15 ++ api/segment_membership/mappers.py | 18 +- api/segment_membership/metrics.py | 7 +- ...0002_setup_clickhouse_identities_schema.py | 78 ++++++++ .../0002_setup_snowflake_identities_schema.py | 43 ----- api/segment_membership/services.py | 96 +++++----- api/segment_membership/tasks.py | 104 ++++++----- .../test_unit_segment_membership_mappers.py | 20 ++- .../test_unit_segment_membership_migration.py | 41 ++--- .../test_unit_segment_membership_services.py | 166 +++++++++++------- .../test_unit_segment_membership_tasks.py | 153 ++++++++-------- api/uv.lock | 155 ++++++++++++++++ .../observability/_events-catalogue.md | 16 +- .../observability/_metrics-catalogue.md | 2 +- 15 files changed, 601 insertions(+), 338 deletions(-) create mode 100644 api/segment_membership/migrations/0002_setup_clickhouse_identities_schema.py delete mode 100644 api/segment_membership/migrations/0002_setup_snowflake_identities_schema.py diff --git a/api/app/settings/common.py b/api/app/settings/common.py index bf4e1cc84dcc..bcf65582a9cb 100644 --- a/api/app/settings/common.py +++ b/api/app/settings/common.py @@ -1426,15 +1426,16 @@ OSIC_UPDATE_BATCH_SIZE = env.int("OSIC_UPDATE_BATCH_SIZE", default=500) -# --- Snowflake (segment membership inspection) ------------------------------- -# All-None default disables the segment_membership backfill and refresh tasks. -# When set, the api/segments/membership tasks open a Snowpark session and run -# against this account. See docs/deployment/observability/segment-membership.md -# for the operational shape. -SNOWFLAKE_ACCOUNT = env.str("SNOWFLAKE_ACCOUNT", default=None) -SNOWFLAKE_USER = env.str("SNOWFLAKE_USER", default=None) -SNOWFLAKE_PRIVATE_KEY_PATH = env.str("SNOWFLAKE_PRIVATE_KEY_PATH", default=None) -SNOWFLAKE_ROLE = env.str("SNOWFLAKE_ROLE", default=None) -SNOWFLAKE_WAREHOUSE = env.str("SNOWFLAKE_WAREHOUSE", default=None) -SNOWFLAKE_DATABASE = env.str("SNOWFLAKE_DATABASE", default=None) -SNOWFLAKE_SCHEMA = env.str("SNOWFLAKE_SCHEMA", default=None) +# --- ClickHouse (segment membership inspection) ------------------------------ +# All-None CLICKHOUSE_HOST disables the segment_membership backfill and refresh +# tasks. When set, the api/segments/membership tasks open a clickhouse-connect +# client and run against this account. See +# docs/deployment/observability/segment-membership.md for the operational shape. +CLICKHOUSE_HOST = env.str("CLICKHOUSE_HOST", default=None) +CLICKHOUSE_PORT = env.int("CLICKHOUSE_PORT", default=8443) +CLICKHOUSE_USER = env.str("CLICKHOUSE_USER", default="default") +CLICKHOUSE_PASSWORD = env.str("CLICKHOUSE_PASSWORD", default="") +CLICKHOUSE_DATABASE = env.str("CLICKHOUSE_DATABASE", default="default") +# ClickHouse Cloud uses HTTPS on 8443; OSS deployments typically run HTTP on +# 8123. Set CLICKHOUSE_SECURE=1 for HTTPS. +CLICKHOUSE_SECURE = env.bool("CLICKHOUSE_SECURE", default=True) diff --git a/api/pyproject.toml b/api/pyproject.toml index d470d8644028..444cecc734d5 100644 --- a/api/pyproject.toml +++ b/api/pyproject.toml @@ -44,6 +44,8 @@ dependencies = [ "drf-writable-nested>=0.6.2,<0.7.0", "django-filter>=2.4.0,<2.5.0", "flagsmith-flag-engine>=10.1.0,<11.0.0", + "flagsmith-sql-flag-engine==0.1.0a2", + "clickhouse-connect>=0.15,<1.0", "boto3>=1.35.95,<1.36.0", "slack-sdk>=3.9.0,<3.10.0", "asgiref>=3.8.1,<3.9.0", @@ -177,6 +179,15 @@ flagsmith-ldap = { git = "https://github.com/flagsmith/flagsmith-ldap", tag = "v workflows-logic = { git = "https://github.com/flagsmith/flagsmith-workflows", tag = "v3.4.0" } licensing = { git = "https://github.com/flagsmith/licensing", tag = "v0.3.0" } flagsmith-private = { git = "https://github.com/Flagsmith/flagsmith-private", tag = "v0.4.4" } +flagsmith-sql-flag-engine = { index = "flagsmith-pypi-staging" } + +# CodeArtifact-hosted private index for pre-release Flagsmith packages. +# Auth token is fetched at install time via `aws codeartifact get-authorization-token` +# (see Makefile `install-packages` target). +[[tool.uv.index]] +name = "flagsmith-pypi-staging" +url = "https://flagsmith-staging-302456015006.d.codeartifact.eu-west-2.amazonaws.com/pypi/flagsmith-pypi-staging/simple/" +explicit = true [tool.uv] required-version = "==0.8.14" @@ -539,6 +550,10 @@ ignore_missing_imports = true module = ["openfeature_flagsmith.*"] ignore_missing_imports = true +[[tool.mypy.overrides]] +module = ["clickhouse_connect.*"] +ignore_missing_imports = true + [tool.django-stubs] django_settings_module = "app.settings.local" diff --git a/api/segment_membership/mappers.py b/api/segment_membership/mappers.py index 25b88bba9a37..8b21a6ce34cc 100644 --- a/api/segment_membership/mappers.py +++ b/api/segment_membership/mappers.py @@ -4,16 +4,21 @@ from flagsmith_schemas import dynamodb # (environment_id, id, identifier, identity_key, traits) -SnowflakeIdentityRow = tuple[str, int, str, str, dict[str, object] | None] +ClickHouseIdentityRow = tuple[str, int, str, str, dict[str, object] | None] -def map_identity_document_to_snowflake_row( +def map_identity_document_to_clickhouse_row( env_key: str, identity_doc: dynamodb.Identity, -) -> SnowflakeIdentityRow: +) -> ClickHouseIdentityRow: """Project a Dynamo identity document onto the canonical IDENTITIES row tuple. The returned tuple aligns positionally with the schema - `(environment_id, id, identifier, identity_key, traits)`.""" + `(environment_id, id, identifier, identity_key, traits)`. + + ClickHouse's `JSON` column accepts Python dicts directly via + `clickhouse-connect`'s bulk insert — clickhouse-connect serialises + on the way out; CH stores each top-level key as a typed subcolumn. + """ identity_uuid = identity_doc["identity_uuid"] identifier = identity_doc["identifier"] composite_key = identity_doc["composite_key"] @@ -34,9 +39,10 @@ def _identity_id(identity_uuid: str) -> int: def _coerce_trait_value(value: object) -> object: - """Coerce Dynamo-decoded values for VARIANT serialisation. boto3 + """Coerce Dynamo-decoded values for JSON serialisation. boto3 returns `Decimal` for numbers; we narrow to int when whole, float - otherwise, so the VARIANT keeps a useful numeric type.""" + otherwise, so the JSON column stores a meaningful numeric subcolumn + type (Int64 / UInt64 / Float64) rather than failing to serialise.""" if isinstance(value, Decimal): if value == value.to_integral_value(): return int(value) diff --git a/api/segment_membership/metrics.py b/api/segment_membership/metrics.py index ba5cde4e2c44..db44c47d2143 100644 --- a/api/segment_membership/metrics.py +++ b/api/segment_membership/metrics.py @@ -2,12 +2,13 @@ # All metrics are global — refresh and backfill cardinality scales with # project + environment counts, which would blow up Prometheus storage. -# Drill-down lives in Snowflake's query history (tagged via QUERY_TAG) -# and in structlog events that carry per-project/env IDs. +# Drill-down lives in ClickHouse's `system.query_log` (tagged via per-query +# `log_comment` settings) and in structlog events that carry per-project/env +# IDs. flagsmith_segment_membership_backfill_identities_total = prometheus_client.Counter( "flagsmith_segment_membership_backfill_identities_total", - "Total identities mirrored from Dynamo to Snowflake by the segment-membership backfill task across all environments.", + "Total identities mirrored from Dynamo to ClickHouse by the segment-membership backfill task across all environments.", ) flagsmith_segment_membership_backfill_duration_seconds = prometheus_client.Histogram( diff --git a/api/segment_membership/migrations/0002_setup_clickhouse_identities_schema.py b/api/segment_membership/migrations/0002_setup_clickhouse_identities_schema.py new file mode 100644 index 000000000000..64a0d8e3d7f2 --- /dev/null +++ b/api/segment_membership/migrations/0002_setup_clickhouse_identities_schema.py @@ -0,0 +1,78 @@ +"""Create the canonical IDENTITIES table the SQL flag engine emits +against when a ClickHouse cluster is configured. + +The engine's published `ClickHouseDialect.schema_ddl` is `MergeTree` +with five columns — the "simplest correct shape" for any consumer. +The PoC overrides to `ReplacingMergeTree(inserted_at)` over +`(environment_id, id)` plus an `inserted_at` version column: daily +backfill INSERTs into the same primary key get deduplicated at merge +time (most-recent `inserted_at` wins), and the refresh task adds +`FROM IDENTITIES FINAL` for strict reads. The translator's emitted +predicates are engine-agnostic and work unchanged. + +No-op when `CLICKHOUSE_HOST` is unset, so self-hosted installs +without ClickHouse (and the test suite) migrate cleanly. +""" + +from django.db import migrations +from django.db.backends.base.schema import BaseDatabaseSchemaEditor +from django.db.migrations.state import StateApps + +from segment_membership.services import ( + is_clickhouse_configured, + open_clickhouse_client, +) + +_SCHEMA_DDL = """\ +CREATE TABLE IF NOT EXISTS IDENTITIES ( + -- environment.key from EnvironmentContext; used as the env partition + environment_id String, + + -- stable per-identity row id derived from identity_uuid bytes (signed 64-bit) + id UInt64, + + -- the identity's external identifier, exposed as $.identity.identifier + identifier String, + + -- the composite identity key, exposed as $.identity.key + identity_key String, + + -- the identity's full trait map. ClickHouse's `JSON` type stores each + -- path as a typed subcolumn so trait lookups are columnar reads, not + -- per-row JSON parses. SQL NULL for an identity with no traits. + traits JSON, + + -- version column for ReplacingMergeTree dedup. Defaults to insert time + -- so the most-recent backfill of a given (environment_id, id) wins. + inserted_at DateTime DEFAULT now() +) +ENGINE = ReplacingMergeTree(inserted_at) +ORDER BY (environment_id, id) +""" + + +def setup_clickhouse_identities_schema( + apps: StateApps, schema_editor: BaseDatabaseSchemaEditor +) -> None: + if not is_clickhouse_configured(): + return + with open_clickhouse_client() as client: + client.command(_SCHEMA_DDL) + + +class Migration(migrations.Migration): + # The ClickHouse DDL talks to a remote service; running it inside + # Django's default-atomic migration block would couple this Postgres + # migration to a ClickHouse transaction we don't actually need. + atomic = False + + dependencies = [ + ("segment_membership", "0001_initial"), + ] + + operations = [ + migrations.RunPython( + setup_clickhouse_identities_schema, + reverse_code=migrations.RunPython.noop, + ), + ] diff --git a/api/segment_membership/migrations/0002_setup_snowflake_identities_schema.py b/api/segment_membership/migrations/0002_setup_snowflake_identities_schema.py deleted file mode 100644 index a510036f339b..000000000000 --- a/api/segment_membership/migrations/0002_setup_snowflake_identities_schema.py +++ /dev/null @@ -1,43 +0,0 @@ -"""Create the canonical IDENTITIES table the SQL flag engine emits -against when a Snowflake account is configured. - -No-op when SNOWFLAKE_* settings are unset, so self-hosted installs -without Snowflake (and the test suite) migrate cleanly. -""" - -from django.db import migrations -from django.db.backends.base.schema import BaseDatabaseSchemaEditor -from django.db.migrations.state import StateApps -from flagsmith_sql_flag_engine.dialects import SnowflakeDialect - -from segment_membership.services import ( - is_snowflake_configured, - open_snowflake_session, -) - - -def setup_snowflake_identities_schema( - apps: StateApps, schema_editor: BaseDatabaseSchemaEditor -) -> None: - if not is_snowflake_configured(): - return - with open_snowflake_session() as sess: - sess.sql(SnowflakeDialect.schema_ddl).collect() - - -class Migration(migrations.Migration): - # The Snowflake DDL talks to a remote service; running it inside - # Django's default-atomic migration block would couple this Postgres - # migration to a Snowflake transaction we don't actually need. - atomic = False - - dependencies = [ - ("segment_membership", "0001_initial"), - ] - - operations = [ - migrations.RunPython( - setup_snowflake_identities_schema, - reverse_code=migrations.RunPython.noop, - ), - ] diff --git a/api/segment_membership/services.py b/api/segment_membership/services.py index 82770206eff6..9f9915fc1354 100644 --- a/api/segment_membership/services.py +++ b/api/segment_membership/services.py @@ -1,12 +1,13 @@ from contextlib import contextmanager from typing import Iterator +import clickhouse_connect import structlog +from clickhouse_connect.driver import Client from django.conf import settings from flag_engine.context.types import EvaluationContext from flagsmith_sql_flag_engine import TranslateContext, translate_segment -from flagsmith_sql_flag_engine.dialects import SnowflakeDialect -from snowflake.snowpark import Session +from flagsmith_sql_flag_engine.dialects import ClickHouseDialect from integrations.flagsmith.client import get_openfeature_client from organisations.models import Organisation @@ -29,40 +30,41 @@ def is_membership_enabled(organisation: Organisation) -> bool: ) -def is_snowflake_configured() -> bool: - """All SNOWFLAKE_* settings required to open a session must be - populated. Tasks short-circuit when this returns False.""" - return all( - getattr(settings, name) - for name in ( - "SNOWFLAKE_ACCOUNT", - "SNOWFLAKE_USER", - "SNOWFLAKE_PRIVATE_KEY_PATH", - "SNOWFLAKE_DATABASE", - "SNOWFLAKE_SCHEMA", - "SNOWFLAKE_WAREHOUSE", - ) - ) +def is_clickhouse_configured() -> bool: + """`CLICKHOUSE_HOST` is the gate — every other CLICKHOUSE_* setting + has a sensible default. Tasks short-circuit when this returns False.""" + return bool(settings.CLICKHOUSE_HOST) @contextmanager -def open_snowflake_session() -> Iterator[Session]: - """Open a Snowpark session from `SNOWFLAKE_*` settings.""" - config: dict[str, str | None] = { - "account": settings.SNOWFLAKE_ACCOUNT, - "user": settings.SNOWFLAKE_USER, - "warehouse": settings.SNOWFLAKE_WAREHOUSE, - "database": settings.SNOWFLAKE_DATABASE, - "schema": settings.SNOWFLAKE_SCHEMA, - "private_key_file": settings.SNOWFLAKE_PRIVATE_KEY_PATH, +def open_clickhouse_client(*, log_comment: str | None = None) -> Iterator[Client]: + """Open a clickhouse-connect client from `CLICKHOUSE_*` settings. + + `log_comment` lands on every query the client runs as a + `log_comment` session setting; it's our spend-attribution analogue + of Snowflake's `QUERY_TAG` and shows up in `system.query_log` for + per-org / per-project rollups. + """ + client_settings: dict[str, str | int] = { + # Required for `JSON`-column DDL on ClickHouse Cloud as of 25.12. + # No-op on OSS builds where the type is already GA. + "allow_experimental_json_type": 1, } - if settings.SNOWFLAKE_ROLE: - config["role"] = settings.SNOWFLAKE_ROLE - sess = Session.builder.configs(config).create() + if log_comment: + client_settings["log_comment"] = log_comment + client = clickhouse_connect.get_client( + host=settings.CLICKHOUSE_HOST, + port=settings.CLICKHOUSE_PORT, + username=settings.CLICKHOUSE_USER, + password=settings.CLICKHOUSE_PASSWORD, + database=settings.CLICKHOUSE_DATABASE, + secure=settings.CLICKHOUSE_SECURE, + settings=client_settings, + ) try: - yield sess + yield client finally: - sess.close() + client.close() def get_projects_to_process() -> Iterator[Project]: @@ -79,7 +81,7 @@ def get_projects_to_process() -> Iterator[Project]: def compute_segment_counts_for_project( - project: Project, session: Session + project: Project, client: Client ) -> list[SegmentMembership]: """Run one batched `SELECT ... UNION ALL` counting identity matches for every (canonical-segment, environment) pair in `project`. @@ -98,9 +100,14 @@ def compute_segment_counts_for_project( regex pattern unsupported by the active dialect — are skipped entirely. - Environment keys are bound as parameters, not f-string-spliced; - the predicate from `translate_segment` is already escape-safe per - the SQL flag engine's contract. + Environment keys are bound as a named array parameter; the + predicate from `translate_segment` is already escape-safe per the + SQL flag engine's contract. + + The `FROM IDENTITIES FINAL` keyword forces ReplacingMergeTree to + dedupe rows at query time. Counts are read strictly against the + most-recent backfill, regardless of how many merge passes have + happened since. """ segments = list(Segment.live_objects.filter(project=project)) env_id_by_key: dict[str, int] = dict( @@ -109,10 +116,7 @@ def compute_segment_counts_for_project( if not segments or not env_id_by_key: return [] - env_keys = list(env_id_by_key) - env_placeholders = ",".join("?" * len(env_keys)) - dialect = SnowflakeDialect() - + dialect = ClickHouseDialect() select_clauses: list[str] = [] for seg in segments: translate_ctx = TranslateContext( @@ -135,9 +139,9 @@ def compute_segment_counts_for_project( continue select_clauses.append( f"SELECT {seg.id} AS segment_id, " - f"i.environment_id AS env_key, COUNT(*) AS c " - f"FROM IDENTITIES i " - f"WHERE i.environment_id IN ({env_placeholders}) AND ({predicate}) " + f"i.environment_id AS env_key, count() AS c " + f"FROM IDENTITIES FINAL i " + f"WHERE i.environment_id IN {{env_keys:Array(String)}} AND ({predicate}) " f"GROUP BY i.environment_id" ) @@ -145,17 +149,17 @@ def compute_segment_counts_for_project( return [] sql = "\nUNION ALL\n".join(select_clauses) - rows = session.sql(sql, params=env_keys * len(select_clauses)).collect() + result = client.query(sql, parameters={"env_keys": list(env_id_by_key)}) memberships: list[SegmentMembership] = [] - for row in rows: - env_id = env_id_by_key.get(str(row["ENV_KEY"])) + for row in result.result_rows: + env_id = env_id_by_key.get(str(row[1])) if env_id is None: continue memberships.append( SegmentMembership( - segment_id=int(row["SEGMENT_ID"]), + segment_id=int(row[0]), environment_id=env_id, - count=int(row["C"]), + count=int(row[2]), ) ) return memberships diff --git a/api/segment_membership/tasks.py b/api/segment_membership/tasks.py index 49cff07f1312..5d61b25e68f5 100644 --- a/api/segment_membership/tasks.py +++ b/api/segment_membership/tasks.py @@ -1,12 +1,17 @@ -"""Tasks: backfill IDENTITIES from Dynamo to Snowflake daily, then +"""Tasks: backfill IDENTITIES from Dynamo to ClickHouse daily, then refresh per-segment counts in the `SegmentMembership` cache. The backfill recurs daily and, once it finishes, fans out one `refresh_project_segment_counts` per project — guarantees the refresh always reads the freshly backfilled snapshot rather than racing a -separate schedule. Both tasks short-circuit when SNOWFLAKE_* settings -are unset, and skip per-organisation when the +separate schedule. Both tasks short-circuit when `CLICKHOUSE_HOST` is +unset, and skip per-organisation when the `segment_membership_inspection` FoF flag is False. + +ClickHouse's `IDENTITIES` table is `ReplacingMergeTree(inserted_at) +ORDER BY (environment_id, id)`. Daily re-inserts keep "most-recent +wins" semantics at merge time; `compute_segment_counts_for_project` +emits `FROM IDENTITIES FINAL` to dedupe at read time. """ from datetime import timedelta @@ -15,13 +20,6 @@ import structlog from django.utils import timezone from flagsmith_schemas.dynamodb import Identity as DynamoIdentity -from snowflake.snowpark.types import ( - LongType, - StringType, - StructField, - StructType, - VariantType, -) from task_processor.decorators import ( register_recurring_task, register_task_handler, @@ -29,7 +27,7 @@ from environments.dynamodb.wrappers.identity_wrapper import DynamoIdentityWrapper from projects.models import Project -from segment_membership.mappers import map_identity_document_to_snowflake_row +from segment_membership.mappers import map_identity_document_to_clickhouse_row from segment_membership.metrics import ( flagsmith_segment_membership_backfill_duration_seconds, flagsmith_segment_membership_backfill_identities_total, @@ -40,9 +38,9 @@ from segment_membership.services import ( compute_segment_counts_for_project, get_projects_to_process, + is_clickhouse_configured, is_membership_enabled, - is_snowflake_configured, - open_snowflake_session, + open_clickhouse_client, ) from util.util import batched @@ -51,14 +49,12 @@ # Per-INSERT row count; bounds memory while loading large environments. _INSERT_BATCH_SIZE = 1000 -_IDENTITIES_SCHEMA = StructType( - [ - StructField("environment_id", StringType()), - StructField("id", LongType()), - StructField("identifier", StringType()), - StructField("identity_key", StringType()), - StructField("traits", VariantType()), - ] +_IDENTITIES_COLUMN_NAMES = ( + "environment_id", + "id", + "identifier", + "identity_key", + "traits", ) @@ -70,18 +66,17 @@ # truncating the task processor's lease. timeout=timedelta(hours=4), ) -def backfill_identities_to_snowflake() -> None: - """Replace Snowflake's IDENTITIES rows for every relevant - environment with the current Dynamo state. Once the backfill - finishes, fans out one `refresh_project_segment_counts` task per - project so the count refresh always sees fresh data. - - Per-statement implicit commits leave a brief window where readers - see an empty partition mid-refresh — a PoC tradeoff later fixed - by CDC. +def backfill_identities_to_clickhouse() -> None: + """Insert the current Dynamo state for every relevant environment + into ClickHouse's IDENTITIES table. The table is a + `ReplacingMergeTree` keyed on `(environment_id, id)` — duplicates + from prior runs are deduplicated at merge time (most-recent + `inserted_at` wins). Once the backfill finishes, fans out one + `refresh_project_segment_counts` task per project so the count + refresh always sees fresh data. """ - if not is_snowflake_configured(): - logger.info("backfill.skipped", reason="snowflake_not_configured") + if not is_clickhouse_configured(): + logger.info("backfill.skipped", reason="clickhouse_not_configured") return wrapper = DynamoIdentityWrapper() @@ -90,36 +85,35 @@ def backfill_identities_to_snowflake() -> None: return refreshable_project_ids: list[int] = [] - with open_snowflake_session() as sess: + with open_clickhouse_client() as client: for project in get_projects_to_process(): refreshable_project_ids.append(project.id) + log_comment = ( + "flagsmith:segment_membership:backfill" + f":org_{project.organisation_id}" + f":project_{project.id}" + ) for env in project.environments.all(): env_key = env.api_key row_count = 0 - sess.query_tag = ( - "flagsmith:segment_membership:backfill" - f":org_{project.organisation_id}" - f":project_{project.id}" - ) try: with flagsmith_segment_membership_backfill_duration_seconds.time(): - sess.sql( - "DELETE FROM IDENTITIES WHERE environment_id = ?", - params=[env_key], - ).collect() for batch in batched( wrapper.iter_all_items_paginated(env_key), _INSERT_BATCH_SIZE, ): rows = [ - map_identity_document_to_snowflake_row( + map_identity_document_to_clickhouse_row( env_key, cast(DynamoIdentity, doc) ) for doc in batch ] - sess.create_dataframe( - rows, schema=_IDENTITIES_SCHEMA - ).write.mode("append").save_as_table("IDENTITIES") + client.insert( + "IDENTITIES", + rows, + column_names=list(_IDENTITIES_COLUMN_NAMES), + settings={"log_comment": log_comment}, + ) row_count += len(rows) except Exception: logger.exception( @@ -150,11 +144,11 @@ def refresh_project_segment_counts(project_id: int) -> None: """Compute per-segment match counts for a single project and upsert into `SegmentMembership`. Re-checks the FoF flag at execution time so a stale fan-out skips orgs that have since been disabled.""" - if not is_snowflake_configured(): + if not is_clickhouse_configured(): logger.info( "refresh.project.skipped", project__id=project_id, - reason="snowflake_not_configured", + reason="clickhouse_not_configured", ) return @@ -167,17 +161,17 @@ def refresh_project_segment_counts(project_id: int) -> None: ) return + log_comment = ( + "flagsmith:segment_membership:refresh" + f":org_{project.organisation_id}" + f":project_{project.id}" + ) with ( flagsmith_segment_membership_refresh_duration_seconds.time(), - open_snowflake_session() as sess, + open_clickhouse_client(log_comment=log_comment) as client, ): - sess.query_tag = ( - "flagsmith:segment_membership:refresh" - f":org_{project.organisation_id}" - f":project_{project.id}" - ) try: - memberships = compute_segment_counts_for_project(project, sess) + memberships = compute_segment_counts_for_project(project, client) except Exception: flagsmith_segment_membership_refresh_failures_total.inc() logger.exception("refresh.project.failed", project__id=project_id) diff --git a/api/tests/unit/segment_membership/test_unit_segment_membership_mappers.py b/api/tests/unit/segment_membership/test_unit_segment_membership_mappers.py index fa6243a86709..fca714975354 100644 --- a/api/tests/unit/segment_membership/test_unit_segment_membership_mappers.py +++ b/api/tests/unit/segment_membership/test_unit_segment_membership_mappers.py @@ -3,7 +3,7 @@ import pytest from flagsmith_schemas.dynamodb import Identity as DynamoIdentity -from segment_membership.mappers import map_identity_document_to_snowflake_row +from segment_membership.mappers import map_identity_document_to_clickhouse_row UUID_A = "f47ac10b-58cc-4372-a567-0e02b2c3d479" UUID_B = "550e8400-e29b-41d4-a716-446655440000" @@ -88,14 +88,14 @@ ), ], ) -def test_map_identity_document_to_snowflake_row__cases__return_expected( +def test_map_identity_document_to_clickhouse_row__cases__return_expected( doc: DynamoIdentity, expected: tuple[str, str, str, dict[str, object] | None], ) -> None: # Given a Dynamo identity document # When mapped onto an IDENTITIES row env_id, _id, identifier, identity_key, traits = ( - map_identity_document_to_snowflake_row("env-key", doc) + map_identity_document_to_clickhouse_row("env-key", doc) ) # Then non-id columns line up positionally with the IDENTITIES schema @@ -104,7 +104,7 @@ def test_map_identity_document_to_snowflake_row__cases__return_expected( assert -(2**63) <= _id < 2**63 -def test_map_identity_document_to_snowflake_row__same_uuid__same_id() -> None: +def test_map_identity_document_to_clickhouse_row__same_uuid__same_id() -> None: # Given two documents sharing an identity_uuid doc: DynamoIdentity = { "identity_uuid": UUID_A, @@ -116,14 +116,16 @@ def test_map_identity_document_to_snowflake_row__same_uuid__same_id() -> None: } # When mapped twice - a = map_identity_document_to_snowflake_row("env-a", doc) - b = map_identity_document_to_snowflake_row("env-b", doc) + a = map_identity_document_to_clickhouse_row("env-a", doc) + b = map_identity_document_to_clickhouse_row("env-b", doc) # Then the id projection is stable across calls assert a[1] == b[1] -def test_map_identity_document_to_snowflake_row__different_uuid__different_id() -> None: +def test_map_identity_document_to_clickhouse_row__different_uuid__different_id() -> ( + None +): # Given two documents with distinct identity_uuids doc_a: DynamoIdentity = { "identity_uuid": UUID_A, @@ -143,8 +145,8 @@ def test_map_identity_document_to_snowflake_row__different_uuid__different_id() } # When mapped - a = map_identity_document_to_snowflake_row("env-key", doc_a) - b = map_identity_document_to_snowflake_row("env-key", doc_b) + a = map_identity_document_to_clickhouse_row("env-key", doc_a) + b = map_identity_document_to_clickhouse_row("env-key", doc_b) # Then the id projections are distinct assert a[1] != b[1] diff --git a/api/tests/unit/segment_membership/test_unit_segment_membership_migration.py b/api/tests/unit/segment_membership/test_unit_segment_membership_migration.py index a969906477e6..dc9be961ca23 100644 --- a/api/tests/unit/segment_membership/test_unit_segment_membership_migration.py +++ b/api/tests/unit/segment_membership/test_unit_segment_membership_migration.py @@ -1,51 +1,52 @@ import importlib from unittest.mock import MagicMock -from flagsmith_sql_flag_engine.dialects import SnowflakeDialect +from clickhouse_connect.driver import Client from pytest_mock import MockerFixture -from snowflake.snowpark import Session # Migration module names start with a digit, which `import` can't parse; # `importlib.import_module` is the only way in. migration_module = importlib.import_module( - "segment_membership.migrations.0002_setup_snowflake_identities_schema" + "segment_membership.migrations.0002_setup_clickhouse_identities_schema" ) -def test_setup_snowflake_identities_schema__unconfigured__skips( +def test_setup_clickhouse_identities_schema__unconfigured__skips( mocker: MockerFixture, ) -> None: - # Given Snowflake settings unconfigured + # Given ClickHouse settings unconfigured mocker.patch.object( migration_module, - "is_snowflake_configured", + "is_clickhouse_configured", return_value=False, ) - open_sess = mocker.patch.object(migration_module, "open_snowflake_session") + open_client = mocker.patch.object(migration_module, "open_clickhouse_client") # When the migration's RunPython entry runs - migration_module.setup_snowflake_identities_schema(MagicMock(), MagicMock()) + migration_module.setup_clickhouse_identities_schema(MagicMock(), MagicMock()) - # Then it short-circuits without opening a session - open_sess.assert_not_called() + # Then it short-circuits without opening a client + open_client.assert_not_called() -def test_setup_snowflake_identities_schema__configured__runs_dialect_ddl( +def test_setup_clickhouse_identities_schema__configured__runs_ddl( mocker: MockerFixture, ) -> None: - # Given Snowflake configured and a mocked Snowpark session + # Given ClickHouse configured and a mocked client mocker.patch.object( migration_module, - "is_snowflake_configured", + "is_clickhouse_configured", return_value=True, ) - sess = MagicMock(spec=Session) - open_sess = mocker.patch.object(migration_module, "open_snowflake_session") - open_sess.return_value.__enter__.return_value = sess + client = MagicMock(spec=Client) + open_client = mocker.patch.object(migration_module, "open_clickhouse_client") + open_client.return_value.__enter__.return_value = client # When the migration's RunPython entry runs - migration_module.setup_snowflake_identities_schema(MagicMock(), MagicMock()) + migration_module.setup_clickhouse_identities_schema(MagicMock(), MagicMock()) - # Then the dialect's schema DDL was executed against the session - sess.sql.assert_called_once_with(SnowflakeDialect.schema_ddl) - sess.sql.return_value.collect.assert_called_once_with() + # Then the migration's DDL was executed against the client. The PoC's + # schema overrides the engine's default with ReplacingMergeTree + an + # inserted_at version column; assert on the full DDL string the + # migration owns rather than peeking inside it. + client.command.assert_called_once_with(migration_module._SCHEMA_DDL) diff --git a/api/tests/unit/segment_membership/test_unit_segment_membership_services.py b/api/tests/unit/segment_membership/test_unit_segment_membership_services.py index 5612ad969827..28da10a8c893 100644 --- a/api/tests/unit/segment_membership/test_unit_segment_membership_services.py +++ b/api/tests/unit/segment_membership/test_unit_segment_membership_services.py @@ -1,18 +1,18 @@ from unittest.mock import MagicMock +from clickhouse_connect.driver import Client from pytest_django.fixtures import SettingsWrapper from pytest_mock import MockerFixture from environments.models import Environment from organisations.models import Organisation from projects.models import Project -from segment_membership import services from segment_membership.services import ( compute_segment_counts_for_project, get_projects_to_process, + is_clickhouse_configured, is_membership_enabled, - is_snowflake_configured, - open_snowflake_session, + open_clickhouse_client, ) from segments.models import Segment, SegmentRule from tests.types import EnableFeaturesFixture @@ -40,63 +40,97 @@ def test_is_membership_enabled__flag_on__returns_true( assert is_membership_enabled(organisation) is True -def test_is_snowflake_configured__all_set__returns_true( +def test_is_clickhouse_configured__host_set__returns_true( settings: SettingsWrapper, ) -> None: - # Given every required SNOWFLAKE_* setting is populated - settings.SNOWFLAKE_ACCOUNT = "acc" - settings.SNOWFLAKE_USER = "u" - settings.SNOWFLAKE_PRIVATE_KEY_PATH = "/key" - settings.SNOWFLAKE_DATABASE = "db" - settings.SNOWFLAKE_SCHEMA = "sch" - settings.SNOWFLAKE_WAREHOUSE = "wh" + # Given CLICKHOUSE_HOST is populated + settings.CLICKHOUSE_HOST = "ch.example.com" # When checked # Then the helper reports the feature configured - assert is_snowflake_configured() is True + assert is_clickhouse_configured() is True -def test_is_snowflake_configured__missing_account__returns_false( +def test_is_clickhouse_configured__host_unset__returns_false( settings: SettingsWrapper, ) -> None: - # Given one required setting is unset - settings.SNOWFLAKE_ACCOUNT = None - settings.SNOWFLAKE_USER = "u" - settings.SNOWFLAKE_PRIVATE_KEY_PATH = "/key" - settings.SNOWFLAKE_DATABASE = "db" - settings.SNOWFLAKE_SCHEMA = "sch" - settings.SNOWFLAKE_WAREHOUSE = "wh" + # Given CLICKHOUSE_HOST is unset + settings.CLICKHOUSE_HOST = None # When checked # Then the helper reports the feature unconfigured - assert is_snowflake_configured() is False + assert is_clickhouse_configured() is False -def test_open_snowflake_session__configured__yields_session_and_closes( +def test_open_clickhouse_client__no_log_comment__yields_client_and_closes( mocker: MockerFixture, settings: SettingsWrapper, ) -> None: - # Given populated SNOWFLAKE_* settings and a mocked Snowpark builder - settings.SNOWFLAKE_ACCOUNT = "acc" - settings.SNOWFLAKE_USER = "u" - settings.SNOWFLAKE_ROLE = "ACCOUNTADMIN" - settings.SNOWFLAKE_WAREHOUSE = "wh" - settings.SNOWFLAKE_DATABASE = "db" - settings.SNOWFLAKE_SCHEMA = "sch" - settings.SNOWFLAKE_PRIVATE_KEY_PATH = "/key" - - fake_session = MagicMock() - builder = MagicMock() - builder.configs.return_value.create.return_value = fake_session - mocker.patch.object(services, "Session", MagicMock(builder=builder)) + # Given populated CLICKHOUSE_* settings and a mocked client factory + settings.CLICKHOUSE_HOST = "ch.example.com" + settings.CLICKHOUSE_PORT = 8443 + settings.CLICKHOUSE_USER = "default" + settings.CLICKHOUSE_PASSWORD = "secret" + settings.CLICKHOUSE_DATABASE = "default" + settings.CLICKHOUSE_SECURE = True + + fake_client = MagicMock(spec=Client) + get_client = mocker.patch( + "clickhouse_connect.get_client", + return_value=fake_client, + ) # When the context manager is entered and exited - with open_snowflake_session() as sess: - # Then it yields the underlying Snowpark session... - assert sess is fake_session + with open_clickhouse_client() as client: + # Then it yields the underlying clickhouse-connect client... + assert client is fake_client + + # ...connects with the settings from `CLICKHOUSE_*` and the experimental + # JSON-type flag flipped, with no log_comment override + get_client.assert_called_once_with( + host="ch.example.com", + port=8443, + username="default", + password="secret", + database="default", + secure=True, + settings={"allow_experimental_json_type": 1}, + ) + # ...and closes the client on exit + fake_client.close.assert_called_once_with() + + +def test_open_clickhouse_client__with_log_comment__sets_session_attribution( + mocker: MockerFixture, + settings: SettingsWrapper, +) -> None: + # Given a log_comment passed through (the per-task attribution string) + settings.CLICKHOUSE_HOST = "ch.example.com" + settings.CLICKHOUSE_PORT = 8443 + settings.CLICKHOUSE_USER = "default" + settings.CLICKHOUSE_PASSWORD = "" + settings.CLICKHOUSE_DATABASE = "default" + settings.CLICKHOUSE_SECURE = True + + fake_client = MagicMock(spec=Client) + get_client = mocker.patch( + "clickhouse_connect.get_client", + return_value=fake_client, + ) + + # When the context manager opens with a log_comment + with open_clickhouse_client( + log_comment="flagsmith:segment_membership:refresh:org_1:project_2" + ): + pass - # ...and closes it on exit - fake_session.close.assert_called_once_with() + # Then the comment lands in the session-level `log_comment` setting so + # every query the client issues is attributable in CH's query_log. + _, kwargs = get_client.call_args + assert kwargs["settings"] == { + "allow_experimental_json_type": 1, + "log_comment": "flagsmith:segment_membership:refresh:org_1:project_2", + } def test_get_projects_to_process__no_canonical_segments__yields_nothing( @@ -135,14 +169,14 @@ def test_compute_segment_counts_for_project__no_segments__returns_empty( project: Project, ) -> None: # Given a project with no canonical segments - sess = MagicMock() + client = MagicMock(spec=Client) # When counts are computed - result = compute_segment_counts_for_project(project, sess) + result = compute_segment_counts_for_project(project, client) - # Then the result is empty and Snowflake was not queried + # Then the result is empty and ClickHouse was not queried assert result == [] - sess.sql.assert_not_called() + client.query.assert_not_called() def test_compute_segment_counts_for_project__no_environments__returns_empty( @@ -151,14 +185,14 @@ def test_compute_segment_counts_for_project__no_environments__returns_empty( ) -> None: # Given a project with a segment but no environments project.environments.all().delete() - sess = MagicMock() + client = MagicMock(spec=Client) # When counts are computed - result = compute_segment_counts_for_project(project, sess) + result = compute_segment_counts_for_project(project, client) - # Then the result is empty and Snowflake was not queried + # Then the result is empty and ClickHouse was not queried assert result == [] - sess.sql.assert_not_called() + client.query.assert_not_called() def test_compute_segment_counts_for_project__one_segment__returns_membership_instances( @@ -174,15 +208,13 @@ def test_compute_segment_counts_for_project__one_segment__returns_membership_ins "segment_membership.services.translate_segment", return_value="TRUE", ) - sess = MagicMock() - sess.sql.return_value.collect.return_value = [ - {"SEGMENT_ID": segment.id, "ENV_KEY": environment.api_key, "C": 7} - ] + client = MagicMock(spec=Client) + client.query.return_value.result_rows = [(segment.id, environment.api_key, 7)] # When counts are computed - result = compute_segment_counts_for_project(project, sess) + result = compute_segment_counts_for_project(project, client) - # Then Snowflake was queried once, the predicate landed in the SQL, + # Then ClickHouse was queried once, the predicate landed in the SQL, # and the row decodes into an unsaved SegmentMembership keyed by # (segment, environment) — last_synced_at left for the caller assert len(result) == 1 @@ -191,9 +223,13 @@ def test_compute_segment_counts_for_project__one_segment__returns_membership_ins assert membership.environment_id == environment.id assert membership.count == 7 assert membership.last_synced_at is None - sess.sql.assert_called_once() - sql = sess.sql.call_args.args[0] + client.query.assert_called_once() + sql = client.query.call_args.args[0] assert f"SELECT {segment.id} AS segment_id" in sql + # The PoC's refresh query forces ReplacingMergeTree dedup at read + # time — without FINAL the most-recent backfill might not be visible + # until a merge pass runs. + assert "FROM IDENTITIES FINAL i" in sql assert "GROUP BY i.environment_id" in sql @@ -204,19 +240,17 @@ def test_compute_segment_counts_for_project__unknown_env_key_in_row__skips( segment_rule: SegmentRule, mocker: MockerFixture, ) -> None: - # Given a Snowflake row whose env_key isn't in this project — would + # Given a ClickHouse row whose env_key isn't in this project — would # only happen via stale/cross-project data, but we defend against it mocker.patch( "segment_membership.services.translate_segment", return_value="TRUE", ) - sess = MagicMock() - sess.sql.return_value.collect.return_value = [ - {"SEGMENT_ID": segment.id, "ENV_KEY": "ghost-env", "C": 99} - ] + client = MagicMock(spec=Client) + client.query.return_value.result_rows = [(segment.id, "ghost-env", 99)] # When counts are computed - result = compute_segment_counts_for_project(project, sess) + result = compute_segment_counts_for_project(project, client) # Then the unknown-env row is skipped, no spurious membership emitted assert result == [] @@ -234,12 +268,12 @@ def test_compute_segment_counts_for_project__untranslatable_segment__skips( "segment_membership.services.translate_segment", return_value=None, ) - sess = MagicMock() + client = MagicMock(spec=Client) # When counts are computed - result = compute_segment_counts_for_project(project, sess) + result = compute_segment_counts_for_project(project, client) # Then the segment is skipped entirely (no row, not even count = 0) - # and Snowflake is not queried at all + # and ClickHouse is not queried at all assert result == [] - sess.sql.assert_not_called() + client.query.assert_not_called() diff --git a/api/tests/unit/segment_membership/test_unit_segment_membership_tasks.py b/api/tests/unit/segment_membership/test_unit_segment_membership_tasks.py index 720c78a327f6..f29adb538e76 100644 --- a/api/tests/unit/segment_membership/test_unit_segment_membership_tasks.py +++ b/api/tests/unit/segment_membership/test_unit_segment_membership_tasks.py @@ -1,5 +1,6 @@ from unittest.mock import MagicMock +from clickhouse_connect.driver import Client from pytest_mock import MockerFixture from pytest_structlog import StructuredLogCapture @@ -8,35 +9,35 @@ from segment_membership import tasks from segment_membership.models import SegmentMembership from segment_membership.tasks import ( - backfill_identities_to_snowflake, + backfill_identities_to_clickhouse, refresh_project_segment_counts, ) from segments.models import Segment from tests.types import EnableFeaturesFixture -def test_backfill_identities_to_snowflake__no_snowflake_creds__skips( +def test_backfill_identities_to_clickhouse__no_clickhouse_creds__skips( mocker: MockerFixture, log: StructuredLogCapture, ) -> None: - # Given Snowflake settings unconfigured - mocker.patch.object(tasks, "is_snowflake_configured", return_value=False) - spy = mocker.patch.object(tasks, "open_snowflake_session") + # Given ClickHouse settings unconfigured + mocker.patch.object(tasks, "is_clickhouse_configured", return_value=False) + spy = mocker.patch.object(tasks, "open_clickhouse_client") # When the task runs - backfill_identities_to_snowflake() + backfill_identities_to_clickhouse() - # Then it short-circuits without opening a session + # Then it short-circuits without opening a client spy.assert_not_called() assert any(e["event"] == "backfill.skipped" for e in log.events) -def test_backfill_identities_to_snowflake__dynamo_disabled__skips( +def test_backfill_identities_to_clickhouse__dynamo_disabled__skips( mocker: MockerFixture, ) -> None: - # Given Snowflake configured but Dynamo wrapper disabled - mocker.patch.object(tasks, "is_snowflake_configured", return_value=True) - spy = mocker.patch.object(tasks, "open_snowflake_session") + # Given ClickHouse configured but Dynamo wrapper disabled + mocker.patch.object(tasks, "is_clickhouse_configured", return_value=True) + spy = mocker.patch.object(tasks, "open_clickhouse_client") mocker.patch.object( tasks, "DynamoIdentityWrapper", @@ -44,13 +45,13 @@ def test_backfill_identities_to_snowflake__dynamo_disabled__skips( ) # When the task runs - backfill_identities_to_snowflake() + backfill_identities_to_clickhouse() - # Then it skips without opening a session + # Then it skips without opening a client spy.assert_not_called() -def test_backfill_identities_to_snowflake__happy_path__deletes_then_inserts( +def test_backfill_identities_to_clickhouse__happy_path__bulk_inserts( mocker: MockerFixture, project: Project, environment: Environment, @@ -61,11 +62,11 @@ def test_backfill_identities_to_snowflake__happy_path__deletes_then_inserts( # Given a project with a canonical segment and a Dynamo wrapper # yielding two identities for its environment enable_features("segment_membership_inspection") - mocker.patch.object(tasks, "is_snowflake_configured", return_value=True) - sess = MagicMock() + mocker.patch.object(tasks, "is_clickhouse_configured", return_value=True) + client = MagicMock(spec=Client) mocker.patch.object( - tasks, "open_snowflake_session" - ).return_value.__enter__.return_value = sess + tasks, "open_clickhouse_client" + ).return_value.__enter__.return_value = client refresh_dispatch = mocker.patch.object(tasks, "refresh_project_segment_counts") wrapper = MagicMock(is_enabled=True) wrapper.iter_all_items_paginated.return_value = iter( @@ -91,26 +92,31 @@ def test_backfill_identities_to_snowflake__happy_path__deletes_then_inserts( mocker.patch.object(tasks, "DynamoIdentityWrapper", return_value=wrapper) # When the task runs - backfill_identities_to_snowflake() - - # Then DELETE binds the env api key as a parameter and the identities - # are written via the Snowpark DataFrame writer - delete_calls = [ - call - for call in sess.sql.call_args_list - if call.args and call.args[0].startswith("DELETE FROM IDENTITIES") - ] - assert len(delete_calls) == 1 - assert delete_calls[0].kwargs == {"params": [environment.api_key]} - - sess.create_dataframe.assert_called_once() - rows_arg = sess.create_dataframe.call_args.args[0] + backfill_identities_to_clickhouse() + + # Then ReplacingMergeTree handles dedup — no DELETE, just one bulk + # INSERT for the two identities, tagged with the per-(org, project) + # log_comment for spend attribution. + client.insert.assert_called_once() + args = client.insert.call_args + assert args.args[0] == "IDENTITIES" + rows_arg = args.args[1] assert {row[0] for row in rows_arg} == {environment.api_key} assert {row[2] for row in rows_arg} == {"a", "b"} - sess.create_dataframe.return_value.write.mode.assert_called_once_with("append") - sess.create_dataframe.return_value.write.mode.return_value.save_as_table.assert_called_once_with( - "IDENTITIES" - ) + assert args.kwargs["column_names"] == [ + "environment_id", + "id", + "identifier", + "identity_key", + "traits", + ] + assert args.kwargs["settings"] == { + "log_comment": ( + f"flagsmith:segment_membership:backfill" + f":org_{project.organisation_id}" + f":project_{project.id}" + ) + } assert any( e["event"] == "backfill.environment.completed" and e["rows__count"] == 2 for e in log.events @@ -120,7 +126,7 @@ def test_backfill_identities_to_snowflake__happy_path__deletes_then_inserts( refresh_dispatch.delay.assert_called_once_with(args=(project.id,)) -def test_backfill_identities_to_snowflake__insert_fails__logs_and_continues( +def test_backfill_identities_to_clickhouse__insert_fails__logs_and_continues( mocker: MockerFixture, project: Project, environment: Environment, @@ -128,14 +134,14 @@ def test_backfill_identities_to_snowflake__insert_fails__logs_and_continues( enable_features: EnableFeaturesFixture, log: StructuredLogCapture, ) -> None: - # Given the DataFrame write blows up mid-batch + # Given the bulk insert blows up mid-batch enable_features("segment_membership_inspection") - mocker.patch.object(tasks, "is_snowflake_configured", return_value=True) - sess = MagicMock() - sess.create_dataframe.side_effect = RuntimeError("boom") + mocker.patch.object(tasks, "is_clickhouse_configured", return_value=True) + client = MagicMock(spec=Client) + client.insert.side_effect = RuntimeError("boom") mocker.patch.object( - tasks, "open_snowflake_session" - ).return_value.__enter__.return_value = sess + tasks, "open_clickhouse_client" + ).return_value.__enter__.return_value = client wrapper = MagicMock(is_enabled=True) wrapper.iter_all_items_paginated.return_value = iter( [ @@ -152,13 +158,13 @@ def test_backfill_identities_to_snowflake__insert_fails__logs_and_continues( mocker.patch.object(tasks, "DynamoIdentityWrapper", return_value=wrapper) # When the task runs - backfill_identities_to_snowflake() + backfill_identities_to_clickhouse() # Then the failure is logged and the loop continues assert any(e["event"] == "backfill.environment.failed" for e in log.events) -def test_backfill_identities_to_snowflake__multiple_projects__fans_out_refresh_per_project( +def test_backfill_identities_to_clickhouse__multiple_projects__fans_out_refresh_per_project( mocker: MockerFixture, project: Project, project_b: Project, @@ -168,18 +174,18 @@ def test_backfill_identities_to_snowflake__multiple_projects__fans_out_refresh_p # Given two FoF-enabled projects with canonical segments enable_features("segment_membership_inspection") Segment.objects.create(name="seg-b", project=project_b) - mocker.patch.object(tasks, "is_snowflake_configured", return_value=True) - sess = MagicMock() + mocker.patch.object(tasks, "is_clickhouse_configured", return_value=True) + client = MagicMock(spec=Client) mocker.patch.object( - tasks, "open_snowflake_session" - ).return_value.__enter__.return_value = sess + tasks, "open_clickhouse_client" + ).return_value.__enter__.return_value = client refresh_dispatch = mocker.patch.object(tasks, "refresh_project_segment_counts") wrapper = MagicMock(is_enabled=True) wrapper.iter_all_items_paginated.return_value = iter([]) mocker.patch.object(tasks, "DynamoIdentityWrapper", return_value=wrapper) # When the backfill runs - backfill_identities_to_snowflake() + backfill_identities_to_clickhouse() # Then a per-project refresh is dispatched for each project we # actually processed (deduped) — once per project, not once per env @@ -189,23 +195,23 @@ def test_backfill_identities_to_snowflake__multiple_projects__fans_out_refresh_p assert dispatched_ids == {project.id, project_b.id} -def test_refresh_project_segment_counts__no_snowflake_creds__skips( +def test_refresh_project_segment_counts__no_clickhouse_creds__skips( mocker: MockerFixture, project: Project, log: StructuredLogCapture, ) -> None: - # Given Snowflake unconfigured - mocker.patch.object(tasks, "is_snowflake_configured", return_value=False) - spy = mocker.patch.object(tasks, "open_snowflake_session") + # Given ClickHouse unconfigured + mocker.patch.object(tasks, "is_clickhouse_configured", return_value=False) + spy = mocker.patch.object(tasks, "open_clickhouse_client") # When the per-project task runs refresh_project_segment_counts(project.id) - # Then it short-circuits without opening a session + # Then it short-circuits without opening a client spy.assert_not_called() assert any( e["event"] == "refresh.project.skipped" - and e["reason"] == "snowflake_not_configured" + and e["reason"] == "clickhouse_not_configured" for e in log.events ) @@ -215,14 +221,14 @@ def test_refresh_project_segment_counts__ff_disabled__skips( project: Project, log: StructuredLogCapture, ) -> None: - # Given Snowflake configured but FoF flag off (default) - mocker.patch.object(tasks, "is_snowflake_configured", return_value=True) - spy = mocker.patch.object(tasks, "open_snowflake_session") + # Given ClickHouse configured but FoF flag off (default) + mocker.patch.object(tasks, "is_clickhouse_configured", return_value=True) + spy = mocker.patch.object(tasks, "open_clickhouse_client") # When the per-project task runs refresh_project_segment_counts(project.id) - # Then it skips without opening a session + # Then it skips without opening a client spy.assert_not_called() assert any( e["event"] == "refresh.project.skipped" and e["reason"] == "ff_disabled" @@ -239,11 +245,11 @@ def test_refresh_project_segment_counts__compute_fails__logs( ) -> None: # Given a project where count compute throws enable_features("segment_membership_inspection") - mocker.patch.object(tasks, "is_snowflake_configured", return_value=True) - sess = MagicMock() + mocker.patch.object(tasks, "is_clickhouse_configured", return_value=True) + client = MagicMock(spec=Client) mocker.patch.object( - tasks, "open_snowflake_session" - ).return_value.__enter__.return_value = sess + tasks, "open_clickhouse_client" + ).return_value.__enter__.return_value = client mocker.patch.object( tasks, "compute_segment_counts_for_project", side_effect=RuntimeError("boom") ) @@ -264,11 +270,10 @@ def test_refresh_project_segment_counts__counts_returned__upserts_per_env_rows( ) -> None: # Given a project with a canonical segment and stubbed compute enable_features("segment_membership_inspection") - mocker.patch.object(tasks, "is_snowflake_configured", return_value=True) - sess = MagicMock() - mocker.patch.object( - tasks, "open_snowflake_session" - ).return_value.__enter__.return_value = sess + mocker.patch.object(tasks, "is_clickhouse_configured", return_value=True) + client = MagicMock(spec=Client) + open_client = mocker.patch.object(tasks, "open_clickhouse_client") + open_client.return_value.__enter__.return_value = client mocker.patch.object( tasks, "compute_segment_counts_for_project", @@ -288,3 +293,13 @@ def test_refresh_project_segment_counts__counts_returned__upserts_per_env_rows( membership = SegmentMembership.objects.get(segment=segment, environment=environment) assert membership.count == 42 assert membership.last_synced_at is not None + + # ...and the client was opened with a per-(org, project) log_comment so + # the refresh's CH spend attributes cleanly. + open_client.assert_called_once_with( + log_comment=( + f"flagsmith:segment_membership:refresh" + f":org_{project.organisation_id}" + f":project_{project.id}" + ) + ) diff --git a/api/uv.lock b/api/uv.lock index ee1e690eb986..82a4071103db 100644 --- a/api/uv.lock +++ b/api/uv.lock @@ -613,6 +613,45 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/1a/70/e63223f8116931d365993d4a6b7ef653a4d920b41d03de7c59499962821f/click-8.1.6-py3-none-any.whl", hash = "sha256:fa244bb30b3b5ee2cae3da8f55c9e5e0c0e86093306301fb418eb9dc40fbded5", size = 97909, upload-time = "2023-07-18T20:05:12.481Z" }, ] +[[package]] +name = "clickhouse-connect" +version = "0.15.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "certifi" }, + { name = "lz4" }, + { name = "pytz" }, + { name = "urllib3" }, + { name = "zstandard" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/a5/b1/a17eb4409e2741286ccdac06b6ea15db178cdf1f0ed997bbf9ad3448f78e/clickhouse_connect-0.15.1.tar.gz", hash = "sha256:f2aaf5fc0bb3098c24f0d8ca7e4ecbe605a26957481dfca2c8cef9d1fad7b7ca", size = 126840, upload-time = "2026-03-30T18:58:31.113Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/0b/6b/d9c222683e01469c14262a4f924c94ede359b654f656e8b993e0cf8ccaf4/clickhouse_connect-0.15.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:9610ef6ff653f8a030f50e39cdeb1a39bea925c48f9196d787ea4b9f5eb1c8f0", size = 285313, upload-time = "2026-03-30T18:57:09.033Z" }, + { url = "https://files.pythonhosted.org/packages/66/cf/27d6ee5a15c0211bd13a1bc2b516b45e8b16055e877d3375f0c8cbbbd78a/clickhouse_connect-0.15.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:cd41ebe8b7f1c2579b22bbc414a800f3f8f5c843928019aca27c81592f70c5a7", size = 278360, upload-time = "2026-03-30T18:57:10.338Z" }, + { url = "https://files.pythonhosted.org/packages/7d/83/d7d727a011580be3495dfb113d5a29f67745b1f7133aac87d19e205fa0c4/clickhouse_connect-0.15.1-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1ff5d10c6e49d36ee6941f52c4233f2bfb4198e9c726fed224f725974a667e37", size = 1097483, upload-time = "2026-03-30T18:57:11.504Z" }, + { url = "https://files.pythonhosted.org/packages/01/1b/b69dde105ba12b8f3cb9bb7e36c3aae76a81d447a4b32294b036aa31aa3f/clickhouse_connect-0.15.1-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:99d55aab64fdeb53d74c16d2c46ae5491e90aa37ba55c24884a68a869418ee8e", size = 1102402, upload-time = "2026-03-30T18:57:12.63Z" }, + { url = "https://files.pythonhosted.org/packages/d5/fb/1cfea665564a232f9a79e271c1c58ec2b627c920f1a47483eed720eba307/clickhouse_connect-0.15.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:57ad606e878fd284242713449217a0c475fde6b9b7ab59e7ba9e9c388431f004", size = 1084076, upload-time = "2026-03-30T18:57:13.89Z" }, + { url = "https://files.pythonhosted.org/packages/5e/ad/80df558d79dd85a493948dd10f6ec1c95a029d7aa78725b35af9726b2131/clickhouse_connect-0.15.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:693a03e44256886ac5dd26dc708833913157ec72e3b3a44fb89fd5fc202f85dc", size = 1106768, upload-time = "2026-03-30T18:57:15.155Z" }, + { url = "https://files.pythonhosted.org/packages/57/c9/322f0e85e477d46b7eca58dd6b7b49668dafa574237a4a724db411df5552/clickhouse_connect-0.15.1-cp311-cp311-win32.whl", hash = "sha256:f03814b6e6a72892ce913eaef3931e6d011068480e9c19b80e5c640fdac55109", size = 257103, upload-time = "2026-03-30T18:57:16.494Z" }, + { url = "https://files.pythonhosted.org/packages/d4/4a/67bb7a8423b7ab2f26ec8a12b14f276a07be8ca113a81d40f88d31bb5df6/clickhouse_connect-0.15.1-cp311-cp311-win_amd64.whl", hash = "sha256:f13c34ad1ddb0d1efc92bc4039b50b534da94c51bbce25e61484bfd28b231cb5", size = 275290, upload-time = "2026-03-30T18:57:17.566Z" }, + { url = "https://files.pythonhosted.org/packages/d1/b6/d0881ac34617b13ad555a4749aae042e0242bedbf8a258373719089885cd/clickhouse_connect-0.15.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:0bef871fb9803ae82b4dc1f797b6e784de0a4dec351591191a0c1a6008548284", size = 287187, upload-time = "2026-03-30T18:57:18.962Z" }, + { url = "https://files.pythonhosted.org/packages/d6/6e/27823c38e54247ea22d96b3f4fde32831a10e5203761c0e2893bc2fc587f/clickhouse_connect-0.15.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:df93fa024d6ed46dbc3182b6202180be4cf2bbe9c331dcb21f85963b1b3fd1e5", size = 278086, upload-time = "2026-03-30T18:57:20.104Z" }, + { url = "https://files.pythonhosted.org/packages/6a/88/f1096e8b4f08e628674490e5d186c7bf09174bbbc5fefa530e28e6b39da3/clickhouse_connect-0.15.1-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d6e98c0cf53db3b24dc0ff9f522fcf13205b1d191c632567d1744fbd4671741f", size = 1122144, upload-time = "2026-03-30T18:57:21.205Z" }, + { url = "https://files.pythonhosted.org/packages/af/e5/027f8b94b54a39dcdf9b314a7cd66cb882d8ba166efc584908997c6d5acb/clickhouse_connect-0.15.1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4bf70933ab860bd2f0a872db624603706bed400c915c7aeef382956cf8ebbdf3", size = 1138503, upload-time = "2026-03-30T18:57:22.554Z" }, + { url = "https://files.pythonhosted.org/packages/cb/46/a830bcb46f0081630a88cb932c29804553728645c17fd1cff874fe71b1ba/clickhouse_connect-0.15.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:60aa8c9c775d22db324260265f4c656f803fbc71de9193ef83cf8d8d0ef6ab9a", size = 1101890, upload-time = "2026-03-30T18:57:23.788Z" }, + { url = "https://files.pythonhosted.org/packages/4c/05/91cf7cc817ff91bc96f1e2afc84346b42e88831c9c0a7fd56e78907b5320/clickhouse_connect-0.15.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:5462bad97d97919a4ed230e2ef28d0b76bec0354a343218647830aac7744a43b", size = 1133723, upload-time = "2026-03-30T18:57:25.105Z" }, + { url = "https://files.pythonhosted.org/packages/d7/b0/e7a71b96b7bc1df6bbacf9fa71f0cc3b8f195f58386535b72aa92304b1fb/clickhouse_connect-0.15.1-cp312-cp312-win32.whl", hash = "sha256:e1a157205efd47884c22bfe061fc6f8c9aea844929ee755c47b446093805d21a", size = 257279, upload-time = "2026-03-30T18:57:26.288Z" }, + { url = "https://files.pythonhosted.org/packages/b9/03/0ef116ef0efc6861d6e9674419709b9873603f330f95853220a145748576/clickhouse_connect-0.15.1-cp312-cp312-win_amd64.whl", hash = "sha256:5de299ada0f7eb9090bb5a6304d8d78163d4d9cc8eb04d8f552bfb82bafb61d5", size = 275916, upload-time = "2026-03-30T18:57:27.372Z" }, + { url = "https://files.pythonhosted.org/packages/dc/94/1c62f55439287999049ddb650fffcc0898ff7639865239d8e414984c7c6b/clickhouse_connect-0.15.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:08df7857ecd2e345abbbdfc54d80fa060732cf75c953940355140af9a73b730a", size = 285254, upload-time = "2026-03-30T18:57:28.765Z" }, + { url = "https://files.pythonhosted.org/packages/41/eb/00cf4967be5553b5eca53fbea491a0860816e8f867005d4a3c60b280595c/clickhouse_connect-0.15.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:d3fca3e0781b664556690decc788e7d25691043bf67a0d241e9c29233a2990d5", size = 276341, upload-time = "2026-03-30T18:57:29.895Z" }, + { url = "https://files.pythonhosted.org/packages/19/d3/a3c9ce572d3766bfd44626e40ad94539e86516f0a4821417d728e1c01c4e/clickhouse_connect-0.15.1-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:fa01fdb92db6bf72cb9509eecd0a0057a4558a4f40c02eebffbc2d61b644620e", size = 1089774, upload-time = "2026-03-30T18:57:31.005Z" }, + { url = "https://files.pythonhosted.org/packages/6b/45/f10a275a5798cf90fb3a88f64f7410571bab4579cdd443d0820697f0e685/clickhouse_connect-0.15.1-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c12d9f2b2fc57adaf5ea267804f00e520771794641227ed5285e38fdf36557a6", size = 1110275, upload-time = "2026-03-30T18:57:32.513Z" }, + { url = "https://files.pythonhosted.org/packages/c9/c3/654107d5be702761670eaa58b6882455c428eaf4acc69e0920d87c6d5061/clickhouse_connect-0.15.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:a9d1e12bf86cd96626f74d21e3ac237abcda105f55cd2e78d139197d35f86209", size = 1072017, upload-time = "2026-03-30T18:57:33.847Z" }, + { url = "https://files.pythonhosted.org/packages/90/aa/0f3b8f18761d3f8ec7f31a09b0329d19509b7b8f0760a327890ca29f0526/clickhouse_connect-0.15.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:b6d107b5f964af97f25a0d1bfd59fe3510f2a646c87ad4f9ab9014bb0c66aa1c", size = 1105152, upload-time = "2026-03-30T18:57:35.204Z" }, + { url = "https://files.pythonhosted.org/packages/28/23/684f6074bf682fd31b1449125d895054af0880bb8481a2902c84e2c9e03b/clickhouse_connect-0.15.1-cp313-cp313-win32.whl", hash = "sha256:46bcebd00aff52ea5f7433e9cee1157b411dba9187f6677a18378c799c27c8aa", size = 256905, upload-time = "2026-03-30T18:57:36.555Z" }, + { url = "https://files.pythonhosted.org/packages/9c/77/39518de3dfb5da2234b2748a133d7626430aa26d7d38bb390c4d8b299cd3/clickhouse_connect-0.15.1-cp313-cp313-win_amd64.whl", hash = "sha256:4f87d283399cbda676c8765605bf60dc6559df6fd38cbb9ea07048a4b34dda26", size = 274792, upload-time = "2026-03-30T18:57:37.79Z" }, +] + [[package]] name = "colorama" version = "0.4.6" @@ -1369,6 +1408,7 @@ dependencies = [ { name = "backoff" }, { name = "boto3" }, { name = "chargebee" }, + { name = "clickhouse-connect" }, { name = "coreapi" }, { name = "dj-database-url" }, { name = "django" }, @@ -1401,6 +1441,7 @@ dependencies = [ { name = "flagsmith" }, { name = "flagsmith-common", extra = ["common-core", "flagsmith-schemas", "task-processor"] }, { name = "flagsmith-flag-engine" }, + { name = "flagsmith-sql-flag-engine" }, { name = "google-api-python-client" }, { name = "google-re2" }, { name = "gunicorn" }, @@ -1508,6 +1549,7 @@ requires-dist = [ { name = "boto3", specifier = ">=1.35.95,<1.36.0" }, { name = "boto3-stubs", marker = "extra == 'dev'", specifier = ">=1.36.20,<2.0.0" }, { name = "chargebee", specifier = ">=3.10.0,<4.0.0" }, + { name = "clickhouse-connect", specifier = ">=0.15,<1.0" }, { name = "coreapi", specifier = ">=2.3.3,<2.4.0" }, { name = "cryptography", marker = "extra == 'licensing'", specifier = ">=44.0.1" }, { name = "datamodel-code-generator", marker = "extra == 'dev'", specifier = ">=0.25,<0.26.0" }, @@ -1553,6 +1595,7 @@ requires-dist = [ { name = "flagsmith-flag-engine", specifier = ">=10.1.0,<11.0.0" }, { name = "flagsmith-ldap", marker = "extra == 'ldap'", git = "https://github.com/flagsmith/flagsmith-ldap?tag=v0.1.2" }, { name = "flagsmith-private", marker = "extra == 'release-pipelines'", git = "https://github.com/Flagsmith/flagsmith-private?tag=v0.4.4" }, + { name = "flagsmith-sql-flag-engine", specifier = "==0.1.0a2", index = "https://flagsmith-staging-302456015006.d.codeartifact.eu-west-2.amazonaws.com/pypi/flagsmith-pypi-staging/simple/" }, { name = "google-api-python-client", specifier = ">=1.12.5,<1.13.0" }, { name = "google-re2", specifier = ">=1.0,<2.0.0" }, { name = "gunicorn", specifier = ">=23.0.0,<23.1.0" }, @@ -1695,6 +1738,19 @@ name = "flagsmith-private" version = "0.1.0" source = { git = "https://github.com/Flagsmith/flagsmith-private?tag=v0.4.4#a5318e69b5712307e3204a57ecebb64995526200" } +[[package]] +name = "flagsmith-sql-flag-engine" +version = "0.1.0a2" +source = { registry = "https://flagsmith-staging-302456015006.d.codeartifact.eu-west-2.amazonaws.com/pypi/flagsmith-pypi-staging/simple/" } +dependencies = [ + { name = "flagsmith-flag-engine" }, + { name = "jsonpath-rfc9535" }, +] +sdist = { url = "https://flagsmith-staging-302456015006.d.codeartifact.eu-west-2.amazonaws.com/pypi/flagsmith-pypi-staging/simple/flagsmith-sql-flag-engine/0.1.0a2/flagsmith_sql_flag_engine-0.1.0a2.tar.gz", hash = "sha256:822779efd32c2b55a279603f93c9cceceafeb449bf688308b74881f7eff8dbdd" } +wheels = [ + { url = "https://flagsmith-staging-302456015006.d.codeartifact.eu-west-2.amazonaws.com/pypi/flagsmith-pypi-staging/simple/flagsmith-sql-flag-engine/0.1.0a2/flagsmith_sql_flag_engine-0.1.0a2-py3-none-any.whl", hash = "sha256:82bbc592d31e430f2423d5928adedf243c5317a5fc2046e3590492e05bbfb836" }, +] + [[package]] name = "freezegun" version = "1.5.5" @@ -2180,6 +2236,46 @@ name = "licensing" version = "0.0.0" source = { git = "https://github.com/flagsmith/licensing?tag=v0.3.0#d38953cd93cb99eab7fd92a0467d8bfcccc4cf96" } +[[package]] +name = "lz4" +version = "4.4.5" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/57/51/f1b86d93029f418033dddf9b9f79c8d2641e7454080478ee2aab5123173e/lz4-4.4.5.tar.gz", hash = "sha256:5f0b9e53c1e82e88c10d7c180069363980136b9d7a8306c4dca4f760d60c39f0", size = 172886, upload-time = "2025-11-03T13:02:36.061Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/93/5b/6edcd23319d9e28b1bedf32768c3d1fd56eed8223960a2c47dacd2cec2af/lz4-4.4.5-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:d6da84a26b3aa5da13a62e4b89ab36a396e9327de8cd48b436a3467077f8ccd4", size = 207391, upload-time = "2025-11-03T13:01:36.644Z" }, + { url = "https://files.pythonhosted.org/packages/34/36/5f9b772e85b3d5769367a79973b8030afad0d6b724444083bad09becd66f/lz4-4.4.5-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:61d0ee03e6c616f4a8b69987d03d514e8896c8b1b7cc7598ad029e5c6aedfd43", size = 207146, upload-time = "2025-11-03T13:01:37.928Z" }, + { url = "https://files.pythonhosted.org/packages/04/f4/f66da5647c0d72592081a37c8775feacc3d14d2625bbdaabd6307c274565/lz4-4.4.5-cp311-cp311-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:33dd86cea8375d8e5dd001e41f321d0a4b1eb7985f39be1b6a4f466cd480b8a7", size = 1292623, upload-time = "2025-11-03T13:01:39.341Z" }, + { url = "https://files.pythonhosted.org/packages/85/fc/5df0f17467cdda0cad464a9197a447027879197761b55faad7ca29c29a04/lz4-4.4.5-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:609a69c68e7cfcfa9d894dc06be13f2e00761485b62df4e2472f1b66f7b405fb", size = 1279982, upload-time = "2025-11-03T13:01:40.816Z" }, + { url = "https://files.pythonhosted.org/packages/25/3b/b55cb577aa148ed4e383e9700c36f70b651cd434e1c07568f0a86c9d5fbb/lz4-4.4.5-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:75419bb1a559af00250b8f1360d508444e80ed4b26d9d40ec5b09fe7875cb989", size = 1368674, upload-time = "2025-11-03T13:01:42.118Z" }, + { url = "https://files.pythonhosted.org/packages/fb/31/e97e8c74c59ea479598e5c55cbe0b1334f03ee74ca97726e872944ed42df/lz4-4.4.5-cp311-cp311-win32.whl", hash = "sha256:12233624f1bc2cebc414f9efb3113a03e89acce3ab6f72035577bc61b270d24d", size = 88168, upload-time = "2025-11-03T13:01:43.282Z" }, + { url = "https://files.pythonhosted.org/packages/18/47/715865a6c7071f417bef9b57c8644f29cb7a55b77742bd5d93a609274e7e/lz4-4.4.5-cp311-cp311-win_amd64.whl", hash = "sha256:8a842ead8ca7c0ee2f396ca5d878c4c40439a527ebad2b996b0444f0074ed004", size = 99491, upload-time = "2025-11-03T13:01:44.167Z" }, + { url = "https://files.pythonhosted.org/packages/14/e7/ac120c2ca8caec5c945e6356ada2aa5cfabd83a01e3170f264a5c42c8231/lz4-4.4.5-cp311-cp311-win_arm64.whl", hash = "sha256:83bc23ef65b6ae44f3287c38cbf82c269e2e96a26e560aa551735883388dcc4b", size = 91271, upload-time = "2025-11-03T13:01:45.016Z" }, + { url = "https://files.pythonhosted.org/packages/1b/ac/016e4f6de37d806f7cc8f13add0a46c9a7cfc41a5ddc2bc831d7954cf1ce/lz4-4.4.5-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:df5aa4cead2044bab83e0ebae56e0944cc7fcc1505c7787e9e1057d6d549897e", size = 207163, upload-time = "2025-11-03T13:01:45.895Z" }, + { url = "https://files.pythonhosted.org/packages/8d/df/0fadac6e5bd31b6f34a1a8dbd4db6a7606e70715387c27368586455b7fc9/lz4-4.4.5-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:6d0bf51e7745484d2092b3a51ae6eb58c3bd3ce0300cf2b2c14f76c536d5697a", size = 207150, upload-time = "2025-11-03T13:01:47.205Z" }, + { url = "https://files.pythonhosted.org/packages/b7/17/34e36cc49bb16ca73fb57fbd4c5eaa61760c6b64bce91fcb4e0f4a97f852/lz4-4.4.5-cp312-cp312-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:7b62f94b523c251cf32aa4ab555f14d39bd1a9df385b72443fd76d7c7fb051f5", size = 1292045, upload-time = "2025-11-03T13:01:48.667Z" }, + { url = "https://files.pythonhosted.org/packages/90/1c/b1d8e3741e9fc89ed3b5f7ef5f22586c07ed6bb04e8343c2e98f0fa7ff04/lz4-4.4.5-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:2c3ea562c3af274264444819ae9b14dbbf1ab070aff214a05e97db6896c7597e", size = 1279546, upload-time = "2025-11-03T13:01:50.159Z" }, + { url = "https://files.pythonhosted.org/packages/55/d9/e3867222474f6c1b76e89f3bd914595af69f55bf2c1866e984c548afdc15/lz4-4.4.5-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:24092635f47538b392c4eaeff14c7270d2c8e806bf4be2a6446a378591c5e69e", size = 1368249, upload-time = "2025-11-03T13:01:51.273Z" }, + { url = "https://files.pythonhosted.org/packages/b2/e7/d667d337367686311c38b580d1ca3d5a23a6617e129f26becd4f5dc458df/lz4-4.4.5-cp312-cp312-win32.whl", hash = "sha256:214e37cfe270948ea7eb777229e211c601a3e0875541c1035ab408fbceaddf50", size = 88189, upload-time = "2025-11-03T13:01:52.605Z" }, + { url = "https://files.pythonhosted.org/packages/a5/0b/a54cd7406995ab097fceb907c7eb13a6ddd49e0b231e448f1a81a50af65c/lz4-4.4.5-cp312-cp312-win_amd64.whl", hash = "sha256:713a777de88a73425cf08eb11f742cd2c98628e79a8673d6a52e3c5f0c116f33", size = 99497, upload-time = "2025-11-03T13:01:53.477Z" }, + { url = "https://files.pythonhosted.org/packages/6a/7e/dc28a952e4bfa32ca16fa2eb026e7a6ce5d1411fcd5986cd08c74ec187b9/lz4-4.4.5-cp312-cp312-win_arm64.whl", hash = "sha256:a88cbb729cc333334ccfb52f070463c21560fca63afcf636a9f160a55fac3301", size = 91279, upload-time = "2025-11-03T13:01:54.419Z" }, + { url = "https://files.pythonhosted.org/packages/2f/46/08fd8ef19b782f301d56a9ccfd7dafec5fd4fc1a9f017cf22a1accb585d7/lz4-4.4.5-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:6bb05416444fafea170b07181bc70640975ecc2a8c92b3b658c554119519716c", size = 207171, upload-time = "2025-11-03T13:01:56.595Z" }, + { url = "https://files.pythonhosted.org/packages/8f/3f/ea3334e59de30871d773963997ecdba96c4584c5f8007fd83cfc8f1ee935/lz4-4.4.5-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:b424df1076e40d4e884cfcc4c77d815368b7fb9ebcd7e634f937725cd9a8a72a", size = 207163, upload-time = "2025-11-03T13:01:57.721Z" }, + { url = "https://files.pythonhosted.org/packages/41/7b/7b3a2a0feb998969f4793c650bb16eff5b06e80d1f7bff867feb332f2af2/lz4-4.4.5-cp313-cp313-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:216ca0c6c90719731c64f41cfbd6f27a736d7e50a10b70fad2a9c9b262ec923d", size = 1292136, upload-time = "2025-11-03T13:02:00.375Z" }, + { url = "https://files.pythonhosted.org/packages/89/d1/f1d259352227bb1c185288dd694121ea303e43404aa77560b879c90e7073/lz4-4.4.5-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:533298d208b58b651662dd972f52d807d48915176e5b032fb4f8c3b6f5fe535c", size = 1279639, upload-time = "2025-11-03T13:02:01.649Z" }, + { url = "https://files.pythonhosted.org/packages/d2/fb/ba9256c48266a09012ed1d9b0253b9aa4fe9cdff094f8febf5b26a4aa2a2/lz4-4.4.5-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:451039b609b9a88a934800b5fc6ee401c89ad9c175abf2f4d9f8b2e4ef1afc64", size = 1368257, upload-time = "2025-11-03T13:02:03.35Z" }, + { url = "https://files.pythonhosted.org/packages/a5/6d/dee32a9430c8b0e01bbb4537573cabd00555827f1a0a42d4e24ca803935c/lz4-4.4.5-cp313-cp313-win32.whl", hash = "sha256:a5f197ffa6fc0e93207b0af71b302e0a2f6f29982e5de0fbda61606dd3a55832", size = 88191, upload-time = "2025-11-03T13:02:04.406Z" }, + { url = "https://files.pythonhosted.org/packages/18/e0/f06028aea741bbecb2a7e9648f4643235279a770c7ffaf70bd4860c73661/lz4-4.4.5-cp313-cp313-win_amd64.whl", hash = "sha256:da68497f78953017deb20edff0dba95641cc86e7423dfadf7c0264e1ac60dc22", size = 99502, upload-time = "2025-11-03T13:02:05.886Z" }, + { url = "https://files.pythonhosted.org/packages/61/72/5bef44afb303e56078676b9f2486f13173a3c1e7f17eaac1793538174817/lz4-4.4.5-cp313-cp313-win_arm64.whl", hash = "sha256:c1cfa663468a189dab510ab231aad030970593f997746d7a324d40104db0d0a9", size = 91285, upload-time = "2025-11-03T13:02:06.77Z" }, + { url = "https://files.pythonhosted.org/packages/49/55/6a5c2952971af73f15ed4ebfdd69774b454bd0dc905b289082ca8664fba1/lz4-4.4.5-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:67531da3b62f49c939e09d56492baf397175ff39926d0bd5bd2d191ac2bff95f", size = 207348, upload-time = "2025-11-03T13:02:08.117Z" }, + { url = "https://files.pythonhosted.org/packages/4e/d7/fd62cbdbdccc35341e83aabdb3f6d5c19be2687d0a4eaf6457ddf53bba64/lz4-4.4.5-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:a1acbbba9edbcbb982bc2cac5e7108f0f553aebac1040fbec67a011a45afa1ba", size = 207340, upload-time = "2025-11-03T13:02:09.152Z" }, + { url = "https://files.pythonhosted.org/packages/77/69/225ffadaacb4b0e0eb5fd263541edd938f16cd21fe1eae3cd6d5b6a259dc/lz4-4.4.5-cp313-cp313t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:a482eecc0b7829c89b498fda883dbd50e98153a116de612ee7c111c8bcf82d1d", size = 1293398, upload-time = "2025-11-03T13:02:10.272Z" }, + { url = "https://files.pythonhosted.org/packages/c6/9e/2ce59ba4a21ea5dc43460cba6f34584e187328019abc0e66698f2b66c881/lz4-4.4.5-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e099ddfaa88f59dd8d36c8a3c66bd982b4984edf127eb18e30bb49bdba68ce67", size = 1281209, upload-time = "2025-11-03T13:02:12.091Z" }, + { url = "https://files.pythonhosted.org/packages/80/4f/4d946bd1624ec229b386a3bc8e7a85fa9a963d67d0a62043f0af0978d3da/lz4-4.4.5-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a2af2897333b421360fdcce895c6f6281dc3fab018d19d341cf64d043fc8d90d", size = 1369406, upload-time = "2025-11-03T13:02:13.683Z" }, + { url = "https://files.pythonhosted.org/packages/02/a2/d429ba4720a9064722698b4b754fb93e42e625f1318b8fe834086c7c783b/lz4-4.4.5-cp313-cp313t-win32.whl", hash = "sha256:66c5de72bf4988e1b284ebdd6524c4bead2c507a2d7f172201572bac6f593901", size = 88325, upload-time = "2025-11-03T13:02:14.743Z" }, + { url = "https://files.pythonhosted.org/packages/4b/85/7ba10c9b97c06af6c8f7032ec942ff127558863df52d866019ce9d2425cf/lz4-4.4.5-cp313-cp313t-win_amd64.whl", hash = "sha256:cdd4bdcbaf35056086d910d219106f6a04e1ab0daa40ec0eeef1626c27d0fddb", size = 99643, upload-time = "2025-11-03T13:02:15.978Z" }, + { url = "https://files.pythonhosted.org/packages/77/4d/a175459fb29f909e13e57c8f475181ad8085d8d7869bd8ad99033e3ee5fa/lz4-4.4.5-cp313-cp313t-win_arm64.whl", hash = "sha256:28ccaeb7c5222454cd5f60fcd152564205bcb801bd80e125949d2dfbadc76bbd", size = 91504, upload-time = "2025-11-03T13:02:17.313Z" }, +] + [[package]] name = "markupsafe" version = "2.1.3" @@ -4132,3 +4228,62 @@ sdist = { url = "https://files.pythonhosted.org/packages/e3/02/0f2892c661036d50e wheels = [ { url = "https://files.pythonhosted.org/packages/2e/54/647ade08bf0db230bfea292f893923872fd20be6ac6f53b2b936ba839d75/zipp-3.23.0-py3-none-any.whl", hash = "sha256:071652d6115ed432f5ce1d34c336c0adfd6a884660d1e9712a256d3d3bd4b14e", size = 10276, upload-time = "2025-06-08T17:06:38.034Z" }, ] + +[[package]] +name = "zstandard" +version = "0.25.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/fd/aa/3e0508d5a5dd96529cdc5a97011299056e14c6505b678fd58938792794b1/zstandard-0.25.0.tar.gz", hash = "sha256:7713e1179d162cf5c7906da876ec2ccb9c3a9dcbdffef0cc7f70c3667a205f0b", size = 711513, upload-time = "2025-09-14T22:15:54.002Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/2a/83/c3ca27c363d104980f1c9cee1101cc8ba724ac8c28a033ede6aab89585b1/zstandard-0.25.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:933b65d7680ea337180733cf9e87293cc5500cc0eb3fc8769f4d3c88d724ec5c", size = 795254, upload-time = "2025-09-14T22:16:26.137Z" }, + { url = "https://files.pythonhosted.org/packages/ac/4d/e66465c5411a7cf4866aeadc7d108081d8ceba9bc7abe6b14aa21c671ec3/zstandard-0.25.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:a3f79487c687b1fc69f19e487cd949bf3aae653d181dfb5fde3bf6d18894706f", size = 640559, upload-time = "2025-09-14T22:16:27.973Z" }, + { url = "https://files.pythonhosted.org/packages/12/56/354fe655905f290d3b147b33fe946b0f27e791e4b50a5f004c802cb3eb7b/zstandard-0.25.0-cp311-cp311-manylinux2010_i686.manylinux2014_i686.manylinux_2_12_i686.manylinux_2_17_i686.whl", hash = "sha256:0bbc9a0c65ce0eea3c34a691e3c4b6889f5f3909ba4822ab385fab9057099431", size = 5348020, upload-time = "2025-09-14T22:16:29.523Z" }, + { url = "https://files.pythonhosted.org/packages/3b/13/2b7ed68bd85e69a2069bcc72141d378f22cae5a0f3b353a2c8f50ef30c1b/zstandard-0.25.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:01582723b3ccd6939ab7b3a78622c573799d5d8737b534b86d0e06ac18dbde4a", size = 5058126, upload-time = "2025-09-14T22:16:31.811Z" }, + { url = "https://files.pythonhosted.org/packages/c9/dd/fdaf0674f4b10d92cb120ccff58bbb6626bf8368f00ebfd2a41ba4a0dc99/zstandard-0.25.0-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:5f1ad7bf88535edcf30038f6919abe087f606f62c00a87d7e33e7fc57cb69fcc", size = 5405390, upload-time = "2025-09-14T22:16:33.486Z" }, + { url = "https://files.pythonhosted.org/packages/0f/67/354d1555575bc2490435f90d67ca4dd65238ff2f119f30f72d5cde09c2ad/zstandard-0.25.0-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:06acb75eebeedb77b69048031282737717a63e71e4ae3f77cc0c3b9508320df6", size = 5452914, upload-time = "2025-09-14T22:16:35.277Z" }, + { url = "https://files.pythonhosted.org/packages/bb/1f/e9cfd801a3f9190bf3e759c422bbfd2247db9d7f3d54a56ecde70137791a/zstandard-0.25.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:9300d02ea7c6506f00e627e287e0492a5eb0371ec1670ae852fefffa6164b072", size = 5559635, upload-time = "2025-09-14T22:16:37.141Z" }, + { url = "https://files.pythonhosted.org/packages/21/88/5ba550f797ca953a52d708c8e4f380959e7e3280af029e38fbf47b55916e/zstandard-0.25.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:bfd06b1c5584b657a2892a6014c2f4c20e0db0208c159148fa78c65f7e0b0277", size = 5048277, upload-time = "2025-09-14T22:16:38.807Z" }, + { url = "https://files.pythonhosted.org/packages/46/c0/ca3e533b4fa03112facbe7fbe7779cb1ebec215688e5df576fe5429172e0/zstandard-0.25.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:f373da2c1757bb7f1acaf09369cdc1d51d84131e50d5fa9863982fd626466313", size = 5574377, upload-time = "2025-09-14T22:16:40.523Z" }, + { url = "https://files.pythonhosted.org/packages/12/9b/3fb626390113f272abd0799fd677ea33d5fc3ec185e62e6be534493c4b60/zstandard-0.25.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:6c0e5a65158a7946e7a7affa6418878ef97ab66636f13353b8502d7ea03c8097", size = 4961493, upload-time = "2025-09-14T22:16:43.3Z" }, + { url = "https://files.pythonhosted.org/packages/cb/d3/23094a6b6a4b1343b27ae68249daa17ae0651fcfec9ed4de09d14b940285/zstandard-0.25.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:c8e167d5adf59476fa3e37bee730890e389410c354771a62e3c076c86f9f7778", size = 5269018, upload-time = "2025-09-14T22:16:45.292Z" }, + { url = "https://files.pythonhosted.org/packages/8c/a7/bb5a0c1c0f3f4b5e9d5b55198e39de91e04ba7c205cc46fcb0f95f0383c1/zstandard-0.25.0-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:98750a309eb2f020da61e727de7d7ba3c57c97cf6213f6f6277bb7fb42a8e065", size = 5443672, upload-time = "2025-09-14T22:16:47.076Z" }, + { url = "https://files.pythonhosted.org/packages/27/22/503347aa08d073993f25109c36c8d9f029c7d5949198050962cb568dfa5e/zstandard-0.25.0-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:22a086cff1b6ceca18a8dd6096ec631e430e93a8e70a9ca5efa7561a00f826fa", size = 5822753, upload-time = "2025-09-14T22:16:49.316Z" }, + { url = "https://files.pythonhosted.org/packages/e2/be/94267dc6ee64f0f8ba2b2ae7c7a2df934a816baaa7291db9e1aa77394c3c/zstandard-0.25.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:72d35d7aa0bba323965da807a462b0966c91608ef3a48ba761678cb20ce5d8b7", size = 5366047, upload-time = "2025-09-14T22:16:51.328Z" }, + { url = "https://files.pythonhosted.org/packages/7b/a3/732893eab0a3a7aecff8b99052fecf9f605cf0fb5fb6d0290e36beee47a4/zstandard-0.25.0-cp311-cp311-win32.whl", hash = "sha256:f5aeea11ded7320a84dcdd62a3d95b5186834224a9e55b92ccae35d21a8b63d4", size = 436484, upload-time = "2025-09-14T22:16:55.005Z" }, + { url = "https://files.pythonhosted.org/packages/43/a3/c6155f5c1cce691cb80dfd38627046e50af3ee9ddc5d0b45b9b063bfb8c9/zstandard-0.25.0-cp311-cp311-win_amd64.whl", hash = "sha256:daab68faadb847063d0c56f361a289c4f268706b598afbf9ad113cbe5c38b6b2", size = 506183, upload-time = "2025-09-14T22:16:52.753Z" }, + { url = "https://files.pythonhosted.org/packages/8c/3e/8945ab86a0820cc0e0cdbf38086a92868a9172020fdab8a03ac19662b0e5/zstandard-0.25.0-cp311-cp311-win_arm64.whl", hash = "sha256:22a06c5df3751bb7dc67406f5374734ccee8ed37fc5981bf1ad7041831fa1137", size = 462533, upload-time = "2025-09-14T22:16:53.878Z" }, + { url = "https://files.pythonhosted.org/packages/82/fc/f26eb6ef91ae723a03e16eddb198abcfce2bc5a42e224d44cc8b6765e57e/zstandard-0.25.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:7b3c3a3ab9daa3eed242d6ecceead93aebbb8f5f84318d82cee643e019c4b73b", size = 795738, upload-time = "2025-09-14T22:16:56.237Z" }, + { url = "https://files.pythonhosted.org/packages/aa/1c/d920d64b22f8dd028a8b90e2d756e431a5d86194caa78e3819c7bf53b4b3/zstandard-0.25.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:913cbd31a400febff93b564a23e17c3ed2d56c064006f54efec210d586171c00", size = 640436, upload-time = "2025-09-14T22:16:57.774Z" }, + { url = "https://files.pythonhosted.org/packages/53/6c/288c3f0bd9fcfe9ca41e2c2fbfd17b2097f6af57b62a81161941f09afa76/zstandard-0.25.0-cp312-cp312-manylinux2010_i686.manylinux2014_i686.manylinux_2_12_i686.manylinux_2_17_i686.whl", hash = "sha256:011d388c76b11a0c165374ce660ce2c8efa8e5d87f34996aa80f9c0816698b64", size = 5343019, upload-time = "2025-09-14T22:16:59.302Z" }, + { url = "https://files.pythonhosted.org/packages/1e/15/efef5a2f204a64bdb5571e6161d49f7ef0fffdbca953a615efbec045f60f/zstandard-0.25.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:6dffecc361d079bb48d7caef5d673c88c8988d3d33fb74ab95b7ee6da42652ea", size = 5063012, upload-time = "2025-09-14T22:17:01.156Z" }, + { url = "https://files.pythonhosted.org/packages/b7/37/a6ce629ffdb43959e92e87ebdaeebb5ac81c944b6a75c9c47e300f85abdf/zstandard-0.25.0-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:7149623bba7fdf7e7f24312953bcf73cae103db8cae49f8154dd1eadc8a29ecb", size = 5394148, upload-time = "2025-09-14T22:17:03.091Z" }, + { url = "https://files.pythonhosted.org/packages/e3/79/2bf870b3abeb5c070fe2d670a5a8d1057a8270f125ef7676d29ea900f496/zstandard-0.25.0-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:6a573a35693e03cf1d67799fd01b50ff578515a8aeadd4595d2a7fa9f3ec002a", size = 5451652, upload-time = "2025-09-14T22:17:04.979Z" }, + { url = "https://files.pythonhosted.org/packages/53/60/7be26e610767316c028a2cbedb9a3beabdbe33e2182c373f71a1c0b88f36/zstandard-0.25.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:5a56ba0db2d244117ed744dfa8f6f5b366e14148e00de44723413b2f3938a902", size = 5546993, upload-time = "2025-09-14T22:17:06.781Z" }, + { url = "https://files.pythonhosted.org/packages/85/c7/3483ad9ff0662623f3648479b0380d2de5510abf00990468c286c6b04017/zstandard-0.25.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:10ef2a79ab8e2974e2075fb984e5b9806c64134810fac21576f0668e7ea19f8f", size = 5046806, upload-time = "2025-09-14T22:17:08.415Z" }, + { url = "https://files.pythonhosted.org/packages/08/b3/206883dd25b8d1591a1caa44b54c2aad84badccf2f1de9e2d60a446f9a25/zstandard-0.25.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:aaf21ba8fb76d102b696781bddaa0954b782536446083ae3fdaa6f16b25a1c4b", size = 5576659, upload-time = "2025-09-14T22:17:10.164Z" }, + { url = "https://files.pythonhosted.org/packages/9d/31/76c0779101453e6c117b0ff22565865c54f48f8bd807df2b00c2c404b8e0/zstandard-0.25.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:1869da9571d5e94a85a5e8d57e4e8807b175c9e4a6294e3b66fa4efb074d90f6", size = 4953933, upload-time = "2025-09-14T22:17:11.857Z" }, + { url = "https://files.pythonhosted.org/packages/18/e1/97680c664a1bf9a247a280a053d98e251424af51f1b196c6d52f117c9720/zstandard-0.25.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:809c5bcb2c67cd0ed81e9229d227d4ca28f82d0f778fc5fea624a9def3963f91", size = 5268008, upload-time = "2025-09-14T22:17:13.627Z" }, + { url = "https://files.pythonhosted.org/packages/1e/73/316e4010de585ac798e154e88fd81bb16afc5c5cb1a72eeb16dd37e8024a/zstandard-0.25.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:f27662e4f7dbf9f9c12391cb37b4c4c3cb90ffbd3b1fb9284dadbbb8935fa708", size = 5433517, upload-time = "2025-09-14T22:17:16.103Z" }, + { url = "https://files.pythonhosted.org/packages/5b/60/dd0f8cfa8129c5a0ce3ea6b7f70be5b33d2618013a161e1ff26c2b39787c/zstandard-0.25.0-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:99c0c846e6e61718715a3c9437ccc625de26593fea60189567f0118dc9db7512", size = 5814292, upload-time = "2025-09-14T22:17:17.827Z" }, + { url = "https://files.pythonhosted.org/packages/fc/5f/75aafd4b9d11b5407b641b8e41a57864097663699f23e9ad4dbb91dc6bfe/zstandard-0.25.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:474d2596a2dbc241a556e965fb76002c1ce655445e4e3bf38e5477d413165ffa", size = 5360237, upload-time = "2025-09-14T22:17:19.954Z" }, + { url = "https://files.pythonhosted.org/packages/ff/8d/0309daffea4fcac7981021dbf21cdb2e3427a9e76bafbcdbdf5392ff99a4/zstandard-0.25.0-cp312-cp312-win32.whl", hash = "sha256:23ebc8f17a03133b4426bcc04aabd68f8236eb78c3760f12783385171b0fd8bd", size = 436922, upload-time = "2025-09-14T22:17:24.398Z" }, + { url = "https://files.pythonhosted.org/packages/79/3b/fa54d9015f945330510cb5d0b0501e8253c127cca7ebe8ba46a965df18c5/zstandard-0.25.0-cp312-cp312-win_amd64.whl", hash = "sha256:ffef5a74088f1e09947aecf91011136665152e0b4b359c42be3373897fb39b01", size = 506276, upload-time = "2025-09-14T22:17:21.429Z" }, + { url = "https://files.pythonhosted.org/packages/ea/6b/8b51697e5319b1f9ac71087b0af9a40d8a6288ff8025c36486e0c12abcc4/zstandard-0.25.0-cp312-cp312-win_arm64.whl", hash = "sha256:181eb40e0b6a29b3cd2849f825e0fa34397f649170673d385f3598ae17cca2e9", size = 462679, upload-time = "2025-09-14T22:17:23.147Z" }, + { url = "https://files.pythonhosted.org/packages/35/0b/8df9c4ad06af91d39e94fa96cc010a24ac4ef1378d3efab9223cc8593d40/zstandard-0.25.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:ec996f12524f88e151c339688c3897194821d7f03081ab35d31d1e12ec975e94", size = 795735, upload-time = "2025-09-14T22:17:26.042Z" }, + { url = "https://files.pythonhosted.org/packages/3f/06/9ae96a3e5dcfd119377ba33d4c42a7d89da1efabd5cb3e366b156c45ff4d/zstandard-0.25.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:a1a4ae2dec3993a32247995bdfe367fc3266da832d82f8438c8570f989753de1", size = 640440, upload-time = "2025-09-14T22:17:27.366Z" }, + { url = "https://files.pythonhosted.org/packages/d9/14/933d27204c2bd404229c69f445862454dcc101cd69ef8c6068f15aaec12c/zstandard-0.25.0-cp313-cp313-manylinux2010_i686.manylinux2014_i686.manylinux_2_12_i686.manylinux_2_17_i686.whl", hash = "sha256:e96594a5537722fdfb79951672a2a63aec5ebfb823e7560586f7484819f2a08f", size = 5343070, upload-time = "2025-09-14T22:17:28.896Z" }, + { url = "https://files.pythonhosted.org/packages/6d/db/ddb11011826ed7db9d0e485d13df79b58586bfdec56e5c84a928a9a78c1c/zstandard-0.25.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:bfc4e20784722098822e3eee42b8e576b379ed72cca4a7cb856ae733e62192ea", size = 5063001, upload-time = "2025-09-14T22:17:31.044Z" }, + { url = "https://files.pythonhosted.org/packages/db/00/87466ea3f99599d02a5238498b87bf84a6348290c19571051839ca943777/zstandard-0.25.0-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:457ed498fc58cdc12fc48f7950e02740d4f7ae9493dd4ab2168a47c93c31298e", size = 5394120, upload-time = "2025-09-14T22:17:32.711Z" }, + { url = "https://files.pythonhosted.org/packages/2b/95/fc5531d9c618a679a20ff6c29e2b3ef1d1f4ad66c5e161ae6ff847d102a9/zstandard-0.25.0-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:fd7a5004eb1980d3cefe26b2685bcb0b17989901a70a1040d1ac86f1d898c551", size = 5451230, upload-time = "2025-09-14T22:17:34.41Z" }, + { url = "https://files.pythonhosted.org/packages/63/4b/e3678b4e776db00f9f7b2fe58e547e8928ef32727d7a1ff01dea010f3f13/zstandard-0.25.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:8e735494da3db08694d26480f1493ad2cf86e99bdd53e8e9771b2752a5c0246a", size = 5547173, upload-time = "2025-09-14T22:17:36.084Z" }, + { url = "https://files.pythonhosted.org/packages/4e/d5/ba05ed95c6b8ec30bd468dfeab20589f2cf709b5c940483e31d991f2ca58/zstandard-0.25.0-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:3a39c94ad7866160a4a46d772e43311a743c316942037671beb264e395bdd611", size = 5046736, upload-time = "2025-09-14T22:17:37.891Z" }, + { url = "https://files.pythonhosted.org/packages/50/d5/870aa06b3a76c73eced65c044b92286a3c4e00554005ff51962deef28e28/zstandard-0.25.0-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:172de1f06947577d3a3005416977cce6168f2261284c02080e7ad0185faeced3", size = 5576368, upload-time = "2025-09-14T22:17:40.206Z" }, + { url = "https://files.pythonhosted.org/packages/5d/35/398dc2ffc89d304d59bc12f0fdd931b4ce455bddf7038a0a67733a25f550/zstandard-0.25.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:3c83b0188c852a47cd13ef3bf9209fb0a77fa5374958b8c53aaa699398c6bd7b", size = 4954022, upload-time = "2025-09-14T22:17:41.879Z" }, + { url = "https://files.pythonhosted.org/packages/9a/5c/36ba1e5507d56d2213202ec2b05e8541734af5f2ce378c5d1ceaf4d88dc4/zstandard-0.25.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:1673b7199bbe763365b81a4f3252b8e80f44c9e323fc42940dc8843bfeaf9851", size = 5267889, upload-time = "2025-09-14T22:17:43.577Z" }, + { url = "https://files.pythonhosted.org/packages/70/e8/2ec6b6fb7358b2ec0113ae202647ca7c0e9d15b61c005ae5225ad0995df5/zstandard-0.25.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:0be7622c37c183406f3dbf0cba104118eb16a4ea7359eeb5752f0794882fc250", size = 5433952, upload-time = "2025-09-14T22:17:45.271Z" }, + { url = "https://files.pythonhosted.org/packages/7b/01/b5f4d4dbc59ef193e870495c6f1275f5b2928e01ff5a81fecb22a06e22fb/zstandard-0.25.0-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:5f5e4c2a23ca271c218ac025bd7d635597048b366d6f31f420aaeb715239fc98", size = 5814054, upload-time = "2025-09-14T22:17:47.08Z" }, + { url = "https://files.pythonhosted.org/packages/b2/e5/fbd822d5c6f427cf158316d012c5a12f233473c2f9c5fe5ab1ae5d21f3d8/zstandard-0.25.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:4f187a0bb61b35119d1926aee039524d1f93aaf38a9916b8c4b78ac8514a0aaf", size = 5360113, upload-time = "2025-09-14T22:17:48.893Z" }, + { url = "https://files.pythonhosted.org/packages/8e/e0/69a553d2047f9a2c7347caa225bb3a63b6d7704ad74610cb7823baa08ed7/zstandard-0.25.0-cp313-cp313-win32.whl", hash = "sha256:7030defa83eef3e51ff26f0b7bfb229f0204b66fe18e04359ce3474ac33cbc09", size = 436936, upload-time = "2025-09-14T22:17:52.658Z" }, + { url = "https://files.pythonhosted.org/packages/d9/82/b9c06c870f3bd8767c201f1edbdf9e8dc34be5b0fbc5682c4f80fe948475/zstandard-0.25.0-cp313-cp313-win_amd64.whl", hash = "sha256:1f830a0dac88719af0ae43b8b2d6aef487d437036468ef3c2ea59c51f9d55fd5", size = 506232, upload-time = "2025-09-14T22:17:50.402Z" }, + { url = "https://files.pythonhosted.org/packages/d4/57/60c3c01243bb81d381c9916e2a6d9e149ab8627c0c7d7abb2d73384b3c0c/zstandard-0.25.0-cp313-cp313-win_arm64.whl", hash = "sha256:85304a43f4d513f5464ceb938aa02c1e78c2943b29f44a750b48b25ac999a049", size = 462671, upload-time = "2025-09-14T22:17:51.533Z" }, +] diff --git a/docs/docs/deployment-self-hosting/observability/_events-catalogue.md b/docs/docs/deployment-self-hosting/observability/_events-catalogue.md index af9b0fff05cb..8d12eb4125e6 100644 --- a/docs/docs/deployment-self-hosting/observability/_events-catalogue.md +++ b/docs/docs/deployment-self-hosting/observability/_events-catalogue.md @@ -330,7 +330,7 @@ Attributes: ### `segment_membership.backfill.environment.completed` Logged at `info` from: - - `api/segment_membership/tasks.py:132` + - `api/segment_membership/tasks.py:126` Attributes: - `environment.id` @@ -340,7 +340,7 @@ Attributes: ### `segment_membership.backfill.environment.failed` Logged at `exception` from: - - `api/segment_membership/tasks.py:125` + - `api/segment_membership/tasks.py:119` Attributes: - `environment.id` @@ -349,8 +349,8 @@ Attributes: ### `segment_membership.backfill.skipped` Logged at `info` from: + - `api/segment_membership/tasks.py:79` - `api/segment_membership/tasks.py:84` - - `api/segment_membership/tasks.py:89` Attributes: - `reason` @@ -358,7 +358,7 @@ Attributes: ### `segment_membership.compute.segment.skipped` Logged at `error` from: - - `api/segment_membership/services.py:129` + - `api/segment_membership/services.py:133` Attributes: - `project.id` @@ -368,7 +368,7 @@ Attributes: ### `segment_membership.refresh.project.completed` Logged at `info` from: - - `api/segment_membership/tasks.py:195` + - `api/segment_membership/tasks.py:189` Attributes: - `memberships.count` @@ -377,7 +377,7 @@ Attributes: ### `segment_membership.refresh.project.failed` Logged at `exception` from: - - `api/segment_membership/tasks.py:183` + - `api/segment_membership/tasks.py:177` Attributes: - `project.id` @@ -385,8 +385,8 @@ Attributes: ### `segment_membership.refresh.project.skipped` Logged at `info` from: - - `api/segment_membership/tasks.py:154` - - `api/segment_membership/tasks.py:163` + - `api/segment_membership/tasks.py:148` + - `api/segment_membership/tasks.py:157` Attributes: - `project.id` diff --git a/docs/docs/deployment-self-hosting/observability/_metrics-catalogue.md b/docs/docs/deployment-self-hosting/observability/_metrics-catalogue.md index f90a72b8a5c7..b931a958595e 100644 --- a/docs/docs/deployment-self-hosting/observability/_metrics-catalogue.md +++ b/docs/docs/deployment-self-hosting/observability/_metrics-catalogue.md @@ -82,7 +82,7 @@ Labels: Counter. -Total identities mirrored from Dynamo to Snowflake by the segment-membership backfill task across all environments. +Total identities mirrored from Dynamo to ClickHouse by the segment-membership backfill task across all environments. Labels: From 6e1584b91688fe88cc26fe2d76b4003b96f418d7 Mon Sep 17 00:00:00 2001 From: Kim Gustyr Date: Wed, 13 May 2026 22:54:30 +0100 Subject: [PATCH 11/12] fix(segment_membership): Make IDENTITIES.id fit UInt64 and FROM clause parse MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit E2E smoke caught two bugs in the ClickHouse migration: - `_identity_id` was using `int.from_bytes(..., signed=True)`, which produces negative ints for half the UUID space. The CH schema has `id UInt64`, so negative IDs failed the bulk INSERT with `Unable to create Python array. ... trying to insert None values into a ClickHouse column that is not Nullable`. Switch to `signed=False`. - `FROM IDENTITIES FINAL i` is invalid CH SQL — the alias must come via `AS` when `FINAL` is present. Use `FROM IDENTITIES AS i FINAL`. beep boop --- api/segment_membership/mappers.py | 6 ++++-- api/segment_membership/services.py | 2 +- .../test_unit_segment_membership_mappers.py | 4 ++-- .../test_unit_segment_membership_services.py | 2 +- 4 files changed, 8 insertions(+), 6 deletions(-) diff --git a/api/segment_membership/mappers.py b/api/segment_membership/mappers.py index 8b21a6ce34cc..b53c1dbd041a 100644 --- a/api/segment_membership/mappers.py +++ b/api/segment_membership/mappers.py @@ -34,8 +34,10 @@ def map_identity_document_to_clickhouse_row( def _identity_id(identity_uuid: str) -> int: - """Project a UUID onto a stable signed 64-bit IDENTITIES.id.""" - return int.from_bytes(uuid.UUID(identity_uuid).bytes[:8], "big", signed=True) + """Project a UUID onto a stable unsigned 64-bit IDENTITIES.id. + Matches the `id UInt64` column; signed would refuse negatives on + the way into ClickHouse.""" + return int.from_bytes(uuid.UUID(identity_uuid).bytes[:8], "big", signed=False) def _coerce_trait_value(value: object) -> object: diff --git a/api/segment_membership/services.py b/api/segment_membership/services.py index 9f9915fc1354..57fd974a4847 100644 --- a/api/segment_membership/services.py +++ b/api/segment_membership/services.py @@ -140,7 +140,7 @@ def compute_segment_counts_for_project( select_clauses.append( f"SELECT {seg.id} AS segment_id, " f"i.environment_id AS env_key, count() AS c " - f"FROM IDENTITIES FINAL i " + f"FROM IDENTITIES AS i FINAL " f"WHERE i.environment_id IN {{env_keys:Array(String)}} AND ({predicate}) " f"GROUP BY i.environment_id" ) diff --git a/api/tests/unit/segment_membership/test_unit_segment_membership_mappers.py b/api/tests/unit/segment_membership/test_unit_segment_membership_mappers.py index fca714975354..174cc93b4f28 100644 --- a/api/tests/unit/segment_membership/test_unit_segment_membership_mappers.py +++ b/api/tests/unit/segment_membership/test_unit_segment_membership_mappers.py @@ -100,8 +100,8 @@ def test_map_identity_document_to_clickhouse_row__cases__return_expected( # Then non-id columns line up positionally with the IDENTITIES schema assert (env_id, identifier, identity_key, traits) == expected - # ...and the id column is a stable signed 64-bit projection of the UUID - assert -(2**63) <= _id < 2**63 + # ...and the id column is a stable unsigned 64-bit projection of the UUID + assert 0 <= _id < 2**64 def test_map_identity_document_to_clickhouse_row__same_uuid__same_id() -> None: diff --git a/api/tests/unit/segment_membership/test_unit_segment_membership_services.py b/api/tests/unit/segment_membership/test_unit_segment_membership_services.py index 28da10a8c893..7eff97aadaf1 100644 --- a/api/tests/unit/segment_membership/test_unit_segment_membership_services.py +++ b/api/tests/unit/segment_membership/test_unit_segment_membership_services.py @@ -229,7 +229,7 @@ def test_compute_segment_counts_for_project__one_segment__returns_membership_ins # The PoC's refresh query forces ReplacingMergeTree dedup at read # time — without FINAL the most-recent backfill might not be visible # until a merge pass runs. - assert "FROM IDENTITIES FINAL i" in sql + assert "FROM IDENTITIES AS i FINAL" in sql assert "GROUP BY i.environment_id" in sql From 280df5eab454d7f65a346f98e24ce783099fbefe Mon Sep 17 00:00:00 2001 From: Kim Gustyr Date: Fri, 15 May 2026 20:27:23 +0100 Subject: [PATCH 12/12] chore(api): Bump flagsmith-common to 3.9.1 Picks up the test-tools plugin fix for `Counter.clear()` on parameterless metrics, which was breaking the segment-membership backfill tests. beep boop --- api/pyproject.toml | 5 +---- api/uv.lock | 8 ++++---- 2 files changed, 5 insertions(+), 8 deletions(-) diff --git a/api/pyproject.toml b/api/pyproject.toml index 2114e16bd6f4..c03d0ead74ec 100644 --- a/api/pyproject.toml +++ b/api/pyproject.toml @@ -73,7 +73,7 @@ dependencies = [ "hubspot-api-client>=12.0.0,<13.0.0", "djangorestframework-dataclasses>=1.3.1,<2.0.0", "pyotp>=2.9.0,<3.0.0", - "flagsmith-common[common-core,flagsmith-schemas,task-processor]>=3.9.0,<4", + "flagsmith-common[common-core,flagsmith-schemas,task-processor]>=3.9.1,<4", "django-stubs>=5.1.3,<6.0.0", "tzdata>=2024.1,<2025.0.0", "djangorestframework-simplejwt>=5.5.1,<6.0.0", @@ -189,9 +189,6 @@ licensing = { git = "https://github.com/flagsmith/licensing", tag = "v0.3.0" } flagsmith-private = { index = "flagsmith-pypi-production" } flagsmith-sql-flag-engine = { index = "flagsmith-pypi-staging" } -# CodeArtifact-hosted private index for pre-release Flagsmith packages. -# Auth token is fetched at install time via `aws codeartifact get-authorization-token` -# (see Makefile `install-packages` target). [[tool.uv.index]] name = "flagsmith-pypi-staging" url = "https://flagsmith-staging-302456015006.d.codeartifact.eu-west-2.amazonaws.com/pypi/flagsmith-pypi-staging/simple/" diff --git a/api/uv.lock b/api/uv.lock index c6d19ea03247..15512625654c 100644 --- a/api/uv.lock +++ b/api/uv.lock @@ -1606,7 +1606,7 @@ requires-dist = [ { name = "email-validator", marker = "extra == 'dev'", specifier = ">=2.0.0" }, { name = "environs", specifier = ">=14.1.1,<15.0.0" }, { name = "flagsmith", specifier = ">=5.3.0,<6.0.0" }, - { name = "flagsmith-common", extras = ["common-core", "flagsmith-schemas", "task-processor"], specifier = ">=3.9.0,<4" }, + { name = "flagsmith-common", extras = ["common-core", "flagsmith-schemas", "task-processor"], specifier = ">=3.9.1,<4" }, { name = "flagsmith-common", extras = ["test-tools"], marker = "extra == 'dev'" }, { name = "flagsmith-flag-engine", specifier = ">=10.1.0,<11.0.0" }, { name = "flagsmith-ldap", marker = "extra == 'ldap'", git = "https://github.com/flagsmith/flagsmith-ldap?tag=v0.1.2" }, @@ -1681,11 +1681,11 @@ provides-extras = ["auth-controller", "saml", "ldap", "workflows", "licensing", [[package]] name = "flagsmith-common" -version = "3.9.0" +version = "3.9.1" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/f3/a8/c65b2989644c1a0acf45e63692b112be2b5f13a37753aba26460553cdc0d/flagsmith_common-3.9.0.tar.gz", hash = "sha256:b47b141d366a6714285a0768e08e24adbc9849400294d6fc4e6030087928d8e6", size = 59007, upload-time = "2026-05-01T11:02:08.999Z" } +sdist = { url = "https://files.pythonhosted.org/packages/77/02/6db44d9089832b0267f9b8bac73cf57eeb3769364e6a523112273ee5cbea/flagsmith_common-3.9.1.tar.gz", hash = "sha256:2b78015b290c571d20e2ba59ee621346cf7ec8340bfc01acf3620af3725d9318", size = 59166, upload-time = "2026-05-11T16:46:29.797Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/e2/7e/ddf4be1f0cbd3c53a78c44e7aa78226949f8e290e5e9cc6fb873b00bbb70/flagsmith_common-3.9.0-py3-none-any.whl", hash = "sha256:782183d05b891ed5f19bebad2f281a2ebd6f69728c3d3f524c4bebc25a654cf6", size = 96586, upload-time = "2026-05-01T11:02:07.19Z" }, + { url = "https://files.pythonhosted.org/packages/06/fd/67f602c3859eba1baf30fc1916d0997cb3541b71acea5964d47c2b4b5a3e/flagsmith_common-3.9.1-py3-none-any.whl", hash = "sha256:32bd530a32ecd0ff7a6e868341b60d7e778d56ebf0a7ed0623ecdfcabd87fef8", size = 96771, upload-time = "2026-05-11T16:46:28.271Z" }, ] [package.optional-dependencies]