-
Notifications
You must be signed in to change notification settings - Fork 522
fix(Code References): Code references are slow to query #7463
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
4ead702
32be058
54744b3
4a59b20
2b0f99e
e9575cd
803642d
4fb1afb
29f3276
52e663c
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,5 +1,2 @@ | ||
| # TODO: Implement history cleanup? | ||
| FEATURE_FLAG_CODE_REFERENCES_RETENTION_DAYS = 30 | ||
|
gagantrivedi marked this conversation as resolved.
|
||
|
|
||
| # Linux maximum file path length, as per limits.h/PATH_MAX | ||
| MAX_FILE_PATH_LENGTH = 4096 | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,224 @@ | ||
| import hashlib | ||
| import json | ||
|
gagantrivedi marked this conversation as resolved.
|
||
| from itertools import groupby | ||
| from operator import attrgetter | ||
| from typing import TypedDict | ||
|
|
||
| import django.db.models.deletion | ||
| from django.apps.registry import Apps | ||
| from django.db import migrations, models | ||
| from django.db.models import Max | ||
|
|
||
|
|
||
| class LegacyCodeReference(TypedDict): | ||
| feature_name: str | ||
| file_path: str | ||
| line_number: int | ||
|
|
||
|
|
||
| class StoredCodeReference(TypedDict): | ||
| file_path: str | ||
| line_number: int | ||
|
|
||
|
|
||
| def _hash_references(references: list[StoredCodeReference]) -> str: | ||
| return hashlib.md5( | ||
| json.dumps(references, sort_keys=True).encode(), | ||
| usedforsecurity=False, | ||
| ).hexdigest() | ||
|
|
||
|
|
||
| def migrate_scans_forward(apps: Apps, _: object) -> None: | ||
| """Split each legacy scan into new cardinality (per-repository and per-feature)""" | ||
|
|
||
| LegacyScan = apps.get_model("code_references", "FeatureFlagCodeReferencesScan") | ||
| PerFeatureScan = apps.get_model("code_references", "ScannedCodeReferences") | ||
| Repository = apps.get_model("code_references", "VCSRepository") | ||
| Feature = apps.get_model("features", "Feature") | ||
|
|
||
| legacy_scans_summaries = LegacyScan.objects.values( | ||
| "project_id", | ||
| "repository_url", | ||
| "vcs_provider", | ||
| ).annotate(last_scanned_at=Max("created_at")) | ||
|
|
||
| repositories = { | ||
| (summary["project_id"], summary["repository_url"]): Repository.objects.create( | ||
| project_id=summary["project_id"], | ||
| url=summary["repository_url"], | ||
| vcs_provider=summary["vcs_provider"], | ||
| last_scanned_at=summary["last_scanned_at"], | ||
| ) | ||
| for summary in legacy_scans_summaries | ||
| } | ||
|
|
||
| # Oldest-first per project so the newest scan wins on hash collisions | ||
| legacy_scans = LegacyScan.objects.order_by("project_id", "created_at").iterator() | ||
| grouped_scans = groupby(legacy_scans, key=attrgetter("project_id")) | ||
| for project_id, project_scans in grouped_scans: | ||
| features = { | ||
| (feature.project_id, feature.name): feature | ||
| for feature in Feature.objects.filter( | ||
| project_id=project_id, | ||
| deleted_at__isnull=True, # Historical models drop SoftDeleteManager | ||
| ) | ||
| } | ||
| for legacy_scan in project_scans: | ||
| repository_url = legacy_scan.repository_url | ||
| repository = repositories[project_id, repository_url] | ||
|
|
||
| references_by_feature: dict[str, list[StoredCodeReference]] = {} | ||
| for reference in legacy_scan.code_references: | ||
| feature_name = reference["feature_name"] | ||
| references_by_feature.setdefault(feature_name, []).append( | ||
| StoredCodeReference( | ||
| file_path=reference["file_path"], | ||
| line_number=reference["line_number"], | ||
| ) | ||
| ) | ||
|
|
||
| for feature_name, references in references_by_feature.items(): | ||
| if not (feature := features.get((project_id, feature_name))): | ||
| continue | ||
| PerFeatureScan.objects.update_or_create( | ||
| feature=feature, | ||
| repository=repository, | ||
| code_references_hash=_hash_references(references), | ||
| defaults={ | ||
| "revision": legacy_scan.revision, | ||
| "code_references": references, | ||
| "created_at": legacy_scan.created_at, | ||
| }, | ||
| ) | ||
|
|
||
|
|
||
| def migrate_scans_backward(apps: Apps, _: object) -> None: | ||
| """Mirror each per-feature row back into the legacy single-table layout.""" | ||
| LegacyScan = apps.get_model("code_references", "FeatureFlagCodeReferencesScan") | ||
| PerFeatureScan = apps.get_model("code_references", "ScannedCodeReferences") | ||
| LegacyScan._meta.get_field("created_at").auto_now_add = False | ||
|
|
||
| per_feature_scans = PerFeatureScan.objects.select_related( | ||
| "repository", | ||
| "feature", | ||
| ).iterator(chunk_size=200) | ||
|
|
||
| for per_feature_scan in per_feature_scans: | ||
| repository = per_feature_scan.repository | ||
| feature_name = per_feature_scan.feature.name | ||
| LegacyScan.objects.create( | ||
| project_id=repository.project_id, | ||
| repository_url=repository.url, | ||
| vcs_provider=repository.vcs_provider, | ||
| revision=per_feature_scan.revision, | ||
| code_references=[ | ||
| {"feature_name": feature_name, **reference} | ||
| for reference in per_feature_scan.code_references | ||
| ], | ||
| created_at=per_feature_scan.created_at, | ||
| ) | ||
|
|
||
|
|
||
| class Migration(migrations.Migration): | ||
| dependencies = [ | ||
| ("code_references", "0002_add_project_repo_created_index"), | ||
| ("features", "0066_constrain_feature_type"), | ||
| ("projects", "0029_bump_default_project_limits"), | ||
| ] | ||
|
|
||
| operations = [ | ||
| migrations.CreateModel( | ||
| name="VCSRepository", | ||
| fields=[ | ||
| ( | ||
| "id", | ||
| models.AutoField( | ||
| auto_created=True, | ||
| primary_key=True, | ||
| serialize=False, | ||
| verbose_name="ID", | ||
| ), | ||
| ), | ||
| ("created_at", models.DateTimeField(auto_now_add=True)), | ||
| ("url", models.URLField()), | ||
| ( | ||
| "vcs_provider", | ||
| models.CharField( | ||
| choices=[("github", "GitHub")], | ||
| max_length=50, | ||
| ), | ||
| ), | ||
| ("last_scanned_at", models.DateTimeField(null=True)), | ||
| ( | ||
| "project", | ||
| models.ForeignKey( | ||
| on_delete=django.db.models.deletion.CASCADE, | ||
| related_name="vcs_repositories", | ||
| to="projects.project", | ||
| ), | ||
| ), | ||
| ], | ||
| ), | ||
| migrations.AddConstraint( | ||
| model_name="vcsrepository", | ||
| constraint=models.UniqueConstraint( | ||
| fields=("project", "url"), | ||
| name="unique_vcs_repository", | ||
| ), | ||
| ), | ||
| migrations.CreateModel( | ||
| name="ScannedCodeReferences", | ||
| fields=[ | ||
| ( | ||
| "id", | ||
| models.AutoField( | ||
| auto_created=True, | ||
| primary_key=True, | ||
| serialize=False, | ||
| verbose_name="ID", | ||
| ), | ||
| ), | ||
| ("created_at", models.DateTimeField()), | ||
| ("revision", models.CharField(max_length=100)), | ||
| ("code_references", models.JSONField(default=list)), | ||
| ("code_references_hash", models.CharField(max_length=32)), | ||
| ( | ||
| "feature", | ||
| models.ForeignKey( | ||
| on_delete=django.db.models.deletion.CASCADE, | ||
| related_name="scanned_code_references", | ||
| to="features.feature", | ||
| ), | ||
| ), | ||
| ( | ||
| "repository", | ||
| models.ForeignKey( | ||
| on_delete=django.db.models.deletion.CASCADE, | ||
| related_name="scanned_code_references", | ||
| to="code_references.vcsrepository", | ||
| ), | ||
| ), | ||
| ], | ||
| ), | ||
| migrations.AddConstraint( | ||
| model_name="scannedcodereferences", | ||
| constraint=models.UniqueConstraint( | ||
| fields=("feature", "repository", "code_references_hash"), | ||
| name="unique_scanned_code_references", | ||
| ), | ||
| ), | ||
| migrations.AddIndex( | ||
| model_name="scannedcodereferences", | ||
| index=models.Index( | ||
| fields=("feature", "repository", "created_at"), | ||
| name="cr_feature_repo_created_idx", | ||
| ), | ||
| ), | ||
| migrations.RunPython( | ||
| code=migrate_scans_forward, | ||
| reverse_code=migrate_scans_backward, | ||
| ), | ||
| migrations.DeleteModel( | ||
| name="FeatureFlagCodeReferencesScan", | ||
| ), | ||
| ] | ||
| Original file line number | Diff line number | Diff line change | ||||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| @@ -1,37 +1,75 @@ | ||||||||||||||||||
| from django.db import models | ||||||||||||||||||
|
|
||||||||||||||||||
| from projects.code_references.types import JSONCodeReference, VCSProvider | ||||||||||||||||||
| from projects.code_references.types import StoredCodeReference, VCSProvider | ||||||||||||||||||
|
|
||||||||||||||||||
|
|
||||||||||||||||||
| class FeatureFlagCodeReferencesScan(models.Model): | ||||||||||||||||||
| class VCSRepository(models.Model): | ||||||||||||||||||
| """ | ||||||||||||||||||
| A scan of feature flag code references in a repository | ||||||||||||||||||
| A VCS repository that is scanned for feature flag code references | ||||||||||||||||||
| """ | ||||||||||||||||||
|
|
||||||||||||||||||
| created_at = models.DateTimeField(auto_now_add=True) | ||||||||||||||||||
|
|
||||||||||||||||||
| project = models.ForeignKey( | ||||||||||||||||||
| "projects.Project", | ||||||||||||||||||
| on_delete=models.CASCADE, | ||||||||||||||||||
| related_name="code_references", | ||||||||||||||||||
| related_name="vcs_repositories", | ||||||||||||||||||
| ) | ||||||||||||||||||
|
|
||||||||||||||||||
| # Provider-agnostic URL to the web UI of the repository, e.g. https://github.flagsmith.com/backend/ | ||||||||||||||||||
| repository_url = models.URLField() | ||||||||||||||||||
| url = models.URLField() | ||||||||||||||||||
|
|
||||||||||||||||||
| vcs_provider = models.CharField( | ||||||||||||||||||
| max_length=50, | ||||||||||||||||||
| choices=VCSProvider.choices, | ||||||||||||||||||
| default=VCSProvider.GITHUB, # TODO: Remove when adding other providers | ||||||||||||||||||
| ) | ||||||||||||||||||
|
|
||||||||||||||||||
| last_scanned_at = models.DateTimeField(null=True) | ||||||||||||||||||
|
|
||||||||||||||||||
| class Meta: | ||||||||||||||||||
| constraints = [ | ||||||||||||||||||
| models.UniqueConstraint( | ||||||||||||||||||
| fields=["project", "url"], | ||||||||||||||||||
| name="unique_vcs_repository", | ||||||||||||||||||
| ), | ||||||||||||||||||
| ] | ||||||||||||||||||
|
|
||||||||||||||||||
|
|
||||||||||||||||||
| class ScannedCodeReferences(models.Model): | ||||||||||||||||||
| """ | ||||||||||||||||||
| A list of code references for a feature scanned from a VCS repository | ||||||||||||||||||
| """ | ||||||||||||||||||
|
|
||||||||||||||||||
| created_at = models.DateTimeField() | ||||||||||||||||||
|
|
||||||||||||||||||
| feature = models.ForeignKey( | ||||||||||||||||||
| "features.Feature", | ||||||||||||||||||
| on_delete=models.CASCADE, | ||||||||||||||||||
| related_name="scanned_code_references", | ||||||||||||||||||
| ) | ||||||||||||||||||
|
|
||||||||||||||||||
| repository = models.ForeignKey( | ||||||||||||||||||
| VCSRepository, | ||||||||||||||||||
| on_delete=models.CASCADE, | ||||||||||||||||||
| related_name="scanned_code_references", | ||||||||||||||||||
| ) | ||||||||||||||||||
|
|
||||||||||||||||||
| revision = models.CharField(max_length=100) | ||||||||||||||||||
| code_references = models.JSONField[list[JSONCodeReference]](default=list) | ||||||||||||||||||
|
|
||||||||||||||||||
| created_at = models.DateTimeField(auto_now_add=True, db_index=True) | ||||||||||||||||||
| code_references = models.JSONField[list[StoredCodeReference]](default=list) | ||||||||||||||||||
|
|
||||||||||||||||||
| code_references_hash = models.CharField(max_length=32) | ||||||||||||||||||
|
|
||||||||||||||||||
| class Meta: | ||||||||||||||||||
| ordering = ["-created_at"] | ||||||||||||||||||
| constraints = [ | ||||||||||||||||||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Did you see this being used in your query plan? Mine isn't using it, which brings me to another important question — do you think we should test the query on the staging DB at least? The production DB is very different from a MacBook, and the query still looks complex enough to warrant testing on a prod-like DB
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Yes! The constraint index was used heavily in my local tests to help narrowing down row search (feature, repository). But seemingly not enough, so thanks for flagging.
I ran this scenario in staging, via direct database access, and temporary tables matching the ones created in this PR: "a project with 400 features, 350 are present in code, 10 merges / day (mostly dupes), over 6 months". Results revealed slowness would bite us again in the future for big customers running micro services, as
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sorry forgot to add this in my response above. Real benchmarking, formatted by LLM: Scenario 1, common: small project, steady scanning40 features, 2 repos, 5 unique scans/repo/week, 6 months retained, 10,400 rows in
All three queries use
|
||||||||||||||||||
| query | time |
|---|---|
| list endpoint (full history) | 1,805 ms |
| list endpoint (3-month window) | 1,488 ms (~18% faster) |
| detail endpoint × 100 features | 85 ms total (≈0.85 ms each) |
The list endpoint stays above 1 s at this scale even with the covering index, because the inner subplan still loops once per (feature, repository) pair (318,500 loops, 1 row each). The 3-month window helps less than expected because the seed distributes scans uniformly over 6 months.
EXPLAIN ANALYZE full output
--- LIST ANNOTATION (full project) ---
QUERY PLAN
------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Index Scan using features_feature_pkey on public.features_feature f (cost=0.3..564849.7 rows=77 width=36) (actual time=12.8..1804.5 rows=400 loops=1)
Output: f.id, (SubPlan 2)
Filter: ((f.deleted_at IS NULL) AND (f.project_id = 25968))
Rows Removed by Filter: 32295
Buffers: shared hit=1297014
SubPlan 2
-> Aggregate (cost=7315.6..7315.6 rows=1 width=32) (actual time=4.5..4.5 rows=1 loops=400)
Output: array_agg((jsonb_build_object('repository_url', r.url, 'last_successful_repository_scanned_at', r.last_scanned_at, 'last_feature_found_at', scr.created_at, 'count', COALESCE((SubPlan 1), 0))))
Buffers: shared hit=1283893
-> Unique (cost=7311.3..7315.5 rows=12 width=86) (actual time=4.4..4.5 rows=9 loops=400)
Output: (jsonb_build_object('repository_url', r.url, 'last_successful_repository_scanned_at', r.last_scanned_at, 'last_feature_found_at', scr.created_at, 'count', COALESCE((SubPlan 1), 0))), r.url, scr.created_at
Buffers: shared hit=1283893
-> Sort (cost=7311.3..7313.4 rows=843 width=86) (actual time=4.4..4.4 rows=796 loops=400)
Output: (jsonb_build_object('repository_url', r.url, 'last_successful_repository_scanned_at', r.last_scanned_at, 'last_feature_found_at', scr.created_at, 'count', COALESCE((SubPlan 1), 0))), r.url, scr.created_at
Sort Key: r.url, scr.created_at DESC
Sort Method: quicksort Memory: 25kB
Buffers: shared hit=1283893
-> Hash Join (cost=1.7..7270.3 rows=843 width=86) (actual time=0.0..3.7 rows=796 loops=400)
Output: jsonb_build_object('repository_url', r.url, 'last_successful_repository_scanned_at', r.last_scanned_at, 'last_feature_found_at', scr.created_at, 'count', COALESCE((SubPlan 1), 0)), r.url, scr.created_at
Inner Unique: true
Hash Cond: (scr.repository_id = r.id)
Buffers: shared hit=1283887
-> Index Only Scan using bench_cr_feature_repo_created_idx on public.bench_code_references_scannedcodereferences scr (cost=0.4..142.7 rows=843 width=16) (actual time=0.0..0.1 rows=796 loops=400)
Output: scr.feature_id, scr.repository_id, scr.created_at
Index Cond: (scr.feature_id = f.id)
Heap Fetches: 0
Buffers: shared hit=9886
-> Hash (cost=1.1..1.1 rows=12 width=58) (actual time=0.0..0.0 rows=12 loops=1)
Output: r.url, r.last_scanned_at, r.id
Buckets: 1024 Batches: 1 Memory Usage: 10kB
Buffers: shared hit=1
-> Seq Scan on public.bench_code_references_vcsrepository r (cost=0.0..1.1 rows=12 width=58) (actual time=0.0..0.0 rows=12 loops=1)
Output: r.url, r.last_scanned_at, r.id
Buffers: shared hit=1
SubPlan 1
-> Limit (cost=0.4..8.4 rows=1 width=12) (actual time=0.0..0.0 rows=1 loops=318500)
Output: (jsonb_array_length(inner_scr.code_references)), inner_scr.created_at
Buffers: shared hit=1274000
-> Index Scan using bench_cr_feature_repo_created_idx on public.bench_code_references_scannedcodereferences inner_scr (cost=0.4..8.4 rows=1 width=12) (actual time=0.0..0.0 rows=1 loops=318500)
Output: jsonb_array_length(inner_scr.code_references), inner_scr.created_at
Index Cond: ((inner_scr.feature_id = scr.feature_id) AND (inner_scr.repository_id = scr.repository_id) AND (inner_scr.created_at = r.last_scanned_at))
Buffers: shared hit=1274000
Query Identifier: -4966899296074004523
Planning:
Buffers: shared hit=411
Planning Time: 1.1 ms
Execution Time: 1804.6 ms
(47 rows)
Time: 2152.9 ms (00:2.2)
--- LIST ANNOTATION with 3-month window ---
QUERY PLAN
Index Scan using features_feature_pkey on public.features_feature f (cost=0.3..281069.1 rows=77 width=36) (actual time=9.6..1488.2 rows=400 loops=1)
Output: f.id, (SubPlan 2)
Filter: ((f.deleted_at IS NULL) AND (f.project_id = 25968))
Rows Removed by Filter: 32295
Buffers: shared hit=650730
SubPlan 2
-> Aggregate (cost=3630.2..3630.2 rows=1 width=32) (actual time=3.7..3.7 rows=1 loops=400)
Output: array_agg((jsonb_build_object('repository_url', r.url, 'last_successful_repository_scanned_at', r.last_scanned_at, 'last_feature_found_at', scr.created_at, 'count', COALESCE((SubPlan 1), 0))))
Buffers: shared hit=637609
-> Unique (cost=3627.9..3630.0 rows=12 width=86) (actual time=3.6..3.7 rows=9 loops=400)
Output: (jsonb_build_object('repository_url', r.url, 'last_successful_repository_scanned_at', r.last_scanned_at, 'last_feature_found_at', scr.created_at, 'count', COALESCE((SubPlan 1), 0))), r.url, scr.created_at
Buffers: shared hit=637609
-> Sort (cost=3627.9..3629.0 rows=416 width=86) (actual time=3.6..3.7 rows=394 loops=400)
Output: (jsonb_build_object('repository_url', r.url, 'last_successful_repository_scanned_at', r.last_scanned_at, 'last_feature_found_at', scr.created_at, 'count', COALESCE((SubPlan 1), 0))), r.url, scr.created_at
Sort Key: r.url, scr.created_at DESC
Sort Method: quicksort Memory: 25kB
Buffers: shared hit=637609
-> Hash Join (cost=1.7..3609.8 rows=416 width=86) (actual time=0.0..3.3 rows=394 loops=400)
Output: jsonb_build_object('repository_url', r.url, 'last_successful_repository_scanned_at', r.last_scanned_at, 'last_feature_found_at', scr.created_at, 'count', COALESCE((SubPlan 1), 0)), r.url, scr.created_at
Inner Unique: true
Hash Cond: (scr.repository_id = r.id)
Buffers: shared hit=637609
-> Index Only Scan using bench_cr_feature_repo_created_idx on public.bench_code_references_scannedcodereferences scr (cost=0.4..88.8 rows=416 width=16) (actual time=0.0..0.1 rows=394 loops=400)
Output: scr.feature_id, scr.repository_id, scr.created_at
Index Cond: ((scr.feature_id = f.id) AND (scr.created_at >= (now() - '3 mons'::interval)))
Heap Fetches: 0
Buffers: shared hit=7608
-> Hash (cost=1.1..1.1 rows=12 width=58) (actual time=0.0..0.0 rows=12 loops=1)
Output: r.url, r.last_scanned_at, r.id
Buckets: 1024 Batches: 1 Memory Usage: 10kB
Buffers: shared hit=1
-> Seq Scan on public.bench_code_references_vcsrepository r (cost=0.0..1.1 rows=12 width=58) (actual time=0.0..0.0 rows=12 loops=1)
Output: r.url, r.last_scanned_at, r.id
Buffers: shared hit=1
SubPlan 1
-> Limit (cost=0.4..8.5 rows=1 width=12) (actual time=0.0..0.0 rows=1 loops=157500)
Output: (jsonb_array_length(inner_scr.code_references)), inner_scr.created_at
Buffers: shared hit=630000
-> Index Scan using bench_cr_feature_repo_created_idx on public.bench_code_references_scannedcodereferences inner_scr (cost=0.4..8.5 rows=1 width=12) (actual time=0.0..0.0 rows=1 loops=157500)
Output: jsonb_array_length(inner_scr.code_references), inner_scr.created_at
Index Cond: ((inner_scr.feature_id = scr.feature_id) AND (inner_scr.repository_id = scr.repository_id) AND (inner_scr.created_at >= (now() - '3 mons'::interval)) AND (inner_scr.created_at = r.last_scanned_at))
Buffers: shared hit=630000
Query Identifier: 5778658752241488958
Planning:
Buffers: shared hit=12
Planning Time: 0.3 ms
Execution Time: 1488.4 ms
(47 rows)
Time: 1744.8 ms (00:1.7)
--- DETAIL QUERY across 100 features (single plan, 100 loops on the inner scan) ---
QUERY PLAN
Incremental Sort (cost=1041.6..16836.9 rows=729 width=1575) (actual time=78.2..84.4 rows=1000 loops=1)
Output: s.feature_id, scr.id, scr.created_at, scr.revision, scr.code_references, r.url, r.vcs_provider, r.last_scanned_at
Sort Key: s.feature_id, r.url
Presorted Key: s.feature_id
Full-sort Groups: 25 Sort Method: quicksort Average Memory: 88kB Peak Memory: 88kB
Buffers: shared hit=28571
-> Nested Loop (cost=1001.0..16810.9 rows=729 width=1575) (actual time=77.8..81.6 rows=1000 loops=1)
Output: s.feature_id, scr.id, scr.created_at, scr.revision, scr.code_references, r.url, r.vcs_provider, r.last_scanned_at
Buffers: shared hit=28571
-> Limit (cost=1000.6..6539.9 rows=100 width=4) (actual time=77.7..78.0 rows=100 loops=1)
Output: s.feature_id
Buffers: shared hit=23871
-> Unique (cost=1000.6..22603.9 rows=390 width=4) (actual time=77.7..78.0 rows=100 loops=1)
Output: s.feature_id
Buffers: shared hit=23871
-> Gather Merge (cost=1000.6..22602.0 rows=780 width=4) (actual time=77.7..78.0 rows=200 loops=1)
Output: s.feature_id
Workers Planned: 2
Workers Launched: 2
Buffers: shared hit=23871
-> Unique (cost=0.6..21511.9 rows=390 width=4) (actual time=0.2..45.3 rows=234 loops=3)
Output: s.feature_id
Buffers: shared hit=23871
Worker 0: actual time=0.2..66.3 rows=350 loops=1
Buffers: shared hit=17134
Worker 1: actual time=0.1..69.4 rows=350 loops=1
Buffers: shared hit=6440
-> Nested Loop (cost=0.6..21509.5 rows=988 width=4) (actual time=0.2..45.2 rows=1167 loops=3)
Output: s.feature_id
Inner Unique: true
Buffers: shared hit=23871
Worker 0: actual time=0.2..66.1 rows=1810 loops=1
Buffers: shared hit=17134
Worker 1: actual time=0.1..69.3 rows=1689 loops=1
Buffers: shared hit=6440
-> Parallel Index Only Scan using bench_cr_feature_repo_created_idx on public.bench_code_references_scannedcodereferences s (cost=0.4..17668.6 rows=137042 width=16) (actual time=0.0..12.1 rows=109633 loops=3)
Output: s.feature_id, s.repository_id, s.created_at
Heap Fetches: 10400
Buffers: shared hit=19127
Worker 0: actual time=0.0..20.4 rows=173049 loops=1
Buffers: shared hit=14793
Worker 1: actual time=0.0..16.0 rows=155706 loops=1
Buffers: shared hit=4327
-> Memoize (cost=0.1..0.2 rows=1 width=12) (actual time=0.0..0.0 rows=0 loops=328900)
Output: r_1.id, r_1.last_scanned_at
Cache Key: s.repository_id, s.created_at
Cache Mode: logical
Hits: 0 Misses: 145 Evictions: 0 Overflows: 0 Memory Usage: 12kB
Buffers: shared hit=4744
Worker 0: actual time=0.0..0.0 rows=0 loops=173049
Hits: 171879 Misses: 1170 Evictions: 0 Overflows: 0 Memory Usage: 92kB
Buffers: shared hit=2341
Worker 1: actual time=0.0..0.0 rows=0 loops=155706
Hits: 154650 Misses: 1056 Evictions: 0 Overflows: 0 Memory Usage: 83kB
Buffers: shared hit=2113
-> Index Scan using bench_code_references_vcsrepository_pkey on public.bench_code_references_vcsrepository r_1 (cost=0.1..0.2 rows=1 width=12) (actual time=0.0..0.0 rows=0 loops=2371)
Output: r_1.id, r_1.last_scanned_at
Index Cond: (r_1.id = s.repository_id)
Filter: ((r_1.project_id = 25968) AND (s.created_at = r_1.last_scanned_at))
Rows Removed by Filter: 1
Buffers: shared hit=4744
Worker 0: actual time=0.0..0.0 rows=0 loops=1170
Buffers: shared hit=2341
Worker 1: actual time=0.0..0.0 rows=0 loops=1056
Buffers: shared hit=2113
-> Nested Loop (cost=0.4..102.6 rows=12 width=1575) (actual time=0.0..0.0 rows=10 loops=100)
Output: scr.id, scr.created_at, scr.revision, scr.code_references, scr.feature_id, r.url, r.vcs_provider, r.last_scanned_at
Buffers: shared hit=4700
-> Seq Scan on public.bench_code_references_vcsrepository r (cost=0.0..1.1 rows=12 width=65) (actual time=0.0..0.0 rows=12 loops=100)
Output: r.url, r.vcs_provider, r.last_scanned_at, r.id
Buffers: shared hit=100
-> Index Scan using bench_cr_feature_repo_created_idx on public.bench_code_references_scannedcodereferences scr (cost=0.4..8.4 rows=1 width=1518) (actual time=0.0..0.0 rows=1 loops=1200)
Output: scr.id, scr.created_at, scr.revision, scr.code_references, scr.code_references_hash, scr.feature_id, scr.repository_id
Index Cond: ((scr.feature_id = s.feature_id) AND (scr.repository_id = r.id) AND (scr.created_at = r.last_scanned_at))
Buffers: shared hit=4600
Query Identifier: 180893212974011606
Planning:
Buffers: shared hit=32
Planning Time: 0.4 ms
Execution Time: 84.5 ms
(80 rows)
Time: 343.6 ms
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do we need this comment?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I've added it to hint at where this is materialised, because I personally find it useful, but one could find it if they search. Weakly held, let me know if you prefer the 🔪