Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
"""add unique constraint on replaces_id

Revision ID: 4dbf24ed1857
Revises: 398067c53257
Create Date: 2026-05-13 11:22:59.646605

"""
from alembic import op

# revision identifiers, used by Alembic.
revision = '4dbf24ed1857'
down_revision = '398067c53257'
branch_labels = None
depends_on = None


def upgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.drop_index('ix_scoresets_replaces_id', table_name='scoresets')
op.create_index(op.f('ix_scoresets_replaces_id'), 'scoresets', ['replaces_id'], unique=True)
# ### end Alembic commands ###


def downgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.drop_index(op.f('ix_scoresets_replaces_id'), table_name='scoresets')
op.create_index('ix_scoresets_replaces_id', 'scoresets', ['replaces_id'], unique=False)
# ### end Alembic commands ###
14 changes: 7 additions & 7 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

22 changes: 21 additions & 1 deletion src/mavedb/lib/annotation/annotate.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,13 @@
See: https://va-spec.ga4gh.org/en/latest/va-standard-profiles/community-profiles/acmg-2015-profiles.html#variant-pathogenicity-statement-acmg-2015
"""

from typing import Optional
from typing import Optional, Union

from ga4gh.va_spec.acmg_2015 import VariantPathogenicityStatement
from ga4gh.va_spec.base.core import ExperimentalVariantFunctionalImpactStudyResult, Statement

from mavedb.lib.annotation.classification import functional_classification_of_variant
from mavedb.lib.annotation.exceptions import MappingDataDoesntExistException
from mavedb.lib.annotation.evidence_line import acmg_evidence_line, functional_evidence_line
from mavedb.lib.annotation.proposition import (
mapped_variant_to_experimental_variant_clinical_impact_proposition,
Expand Down Expand Up @@ -132,3 +133,22 @@ def variant_pathogenicity_statement(
return mapped_variant_to_pathogenicity_statement(
mapped_variant, clinical_proposition, clinical_evidence, strongest_calibration, strongest_range
)


def variant_highest_level_annotation(
mapped_variant: MappedVariant,
) -> Optional[Union[ExperimentalVariantFunctionalImpactStudyResult, Statement, VariantPathogenicityStatement]]:
"""
Build the single highest-materialized VA-Spec layer for a mapped variant.

Layer ladder (highest to lowest): pathogenicity statement -> functional impact statement -> study result.
Returns None when the variant has no post-mapped allele and therefore cannot be annotated.
"""
try:
if can_annotate_variant_for_pathogenicity_evidence(mapped_variant):
return variant_pathogenicity_statement(mapped_variant)
if can_annotate_variant_for_functional_statement(mapped_variant):
return variant_functional_impact_statement(mapped_variant)
return variant_study_result(mapped_variant)
except MappingDataDoesntExistException:
return None
19 changes: 14 additions & 5 deletions src/mavedb/lib/annotation/document.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,11 +84,20 @@ def score_calibration_as_document(score_calibration: ScoreCalibration) -> Docume
name="MaveDB Score Calibration",
title=score_calibration.title,
extensions=[
Extension(
name="Baseline score",
value=score_calibration.baseline_score,
description=score_calibration.baseline_score_description
or "No description for this baseline score provided.",
# Omit the baseline-score extension when no baseline score exists: Extension.value is required,
# so an extension with a null value will be dropped by model_dump(exclude_none=True) and will
# not round trip when served by the API.
*(
[
Extension(
name="Baseline score",
value=score_calibration.baseline_score,
description=score_calibration.baseline_score_description
or "No description for this baseline score provided.",
)
]
if score_calibration.baseline_score is not None
else []
),
Extension(
name="Research use only",
Expand Down
61 changes: 53 additions & 8 deletions src/mavedb/lib/score_sets.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from pandas.testing import assert_index_equal
from sqlalchemy import Integer, and_, cast, func, or_, select
from sqlalchemy.orm import Query, Session, aliased, contains_eager, joinedload, selectinload
from sqlalchemy.exc import IntegrityError

from mavedb.lib.exceptions import ValidationError
from mavedb.lib.logging.context import logging_context, save_to_logging_context
Expand Down Expand Up @@ -409,9 +410,7 @@ def fetch_score_set_search_filter_options(
controlled_keywords_counter_list = []
for key, label_counter in controlled_keywords_counter.items():
for label, count in label_counter.items():
controlled_keywords_counter_list.append(
ControlledKeywordFilterOption(key=key, value=label, count=count)
)
controlled_keywords_counter_list.append(ControlledKeywordFilterOption(key=key, value=label, count=count))

logger.debug(msg="Score set search filter options were fetched.", extra=logging_context())

Expand Down Expand Up @@ -555,6 +554,39 @@ def find_publish_or_private_superseded_score_set_tail(
return score_set


def get_current_mapped_variants_for_annotation(db: Session, score_set: ScoreSet) -> Sequence[MappedVariant]:
"""
Load the current mapped variants for a score set with the relationships required to build VA-Spec
annotations eagerly loaded.

This is the single source of truth for the eager-load shape shared by the annotated-variant
streaming endpoints and the public data export. The annotation builders reach through
``MappedVariant.variant.score_set`` for publications, contributors, license, experiment, and score
calibrations, so each of those is loaded up front to avoid per-variant lazy loads.
"""
return (
db.query(MappedVariant)
.join(MappedVariant.variant)
.join(Variant.score_set)
.filter(Variant.score_set_id == score_set.id)
.filter(MappedVariant.current.is_(True))
.options(
contains_eager(MappedVariant.variant).contains_eager(Variant.score_set),
contains_eager(MappedVariant.variant)
.contains_eager(Variant.score_set)
.selectinload(ScoreSet.publication_identifier_associations),
contains_eager(MappedVariant.variant).contains_eager(Variant.score_set).selectinload(ScoreSet.created_by),
contains_eager(MappedVariant.variant).contains_eager(Variant.score_set).selectinload(ScoreSet.modified_by),
contains_eager(MappedVariant.variant).contains_eager(Variant.score_set).selectinload(ScoreSet.license),
contains_eager(MappedVariant.variant).contains_eager(Variant.score_set).selectinload(ScoreSet.experiment),
contains_eager(MappedVariant.variant)
.contains_eager(Variant.score_set)
.selectinload(ScoreSet.score_calibrations),
)
.all()
)


def get_score_set_variants_as_csv(
db: Session,
score_set: ScoreSet,
Expand Down Expand Up @@ -642,11 +674,11 @@ def get_score_set_variants_as_csv(
namespaced_score_set_columns[ns] = ["clinical_significance", "clinical_review_status"]

need_mappings = (
include_post_mapped_hgvs
or "clingen" in namespaces
or "vep" in namespaces
or "gnomad" in namespaces
or bool(clinvar_namespaces)
include_post_mapped_hgvs
or "clingen" in namespaces
or "vep" in namespaces
or "gnomad" in namespaces
or bool(clinvar_namespaces)
)
need_gnomad = "gnomad" in namespaces

Expand Down Expand Up @@ -807,6 +839,19 @@ def is_null(value):
return null_values_re.fullmatch(value) or not value


def is_replaces_id_unique_violation(exc: IntegrityError) -> bool:
"""
Return True if the IntegrityError was caused by the unique constraint on score_set.replaces_id.
"""
orig = getattr(exc, "orig", None)
if orig is None:
return False

diag = getattr(orig, "diag", None)
detail = getattr(diag, "detail", "") or ""
return "replaces_id" in detail


def variant_to_csv_row(
variant: Variant,
columns: dict[str, list[str]],
Expand Down
2 changes: 1 addition & 1 deletion src/mavedb/models/score_set.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ class ScoreSet(Base):
# TODO Standardize on US or GB spelling for licenc/se.
licence_id = Column(Integer, ForeignKey("licenses.id"), index=True, nullable=False)
license: Mapped["License"] = relationship("License")
superseded_score_set_id = Column("replaces_id", Integer, ForeignKey("scoresets.id"), index=True, nullable=True)
superseded_score_set_id = Column("replaces_id", Integer, ForeignKey("scoresets.id"), index=True, nullable=True, unique=True,)
superseded_score_set: Mapped[Optional["ScoreSet"]] = relationship(
"ScoreSet",
uselist=False,
Expand Down
84 changes: 27 additions & 57 deletions src/mavedb/routers/score_sets.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
from ga4gh.va_spec.base.core import ExperimentalVariantFunctionalImpactStudyResult, Statement
from pydantic import ValidationError
from sqlalchemy import or_, select
from sqlalchemy.exc import MultipleResultsFound
from sqlalchemy.exc import MultipleResultsFound, IntegrityError
from sqlalchemy.orm import Session, contains_eager

from mavedb import deps
Expand Down Expand Up @@ -54,7 +54,9 @@
csv_data_to_df,
fetch_score_set_search_filter_options,
find_meta_analyses_for_experiment_sets,
get_current_mapped_variants_for_annotation,
get_score_set_variants_as_csv,
is_replaces_id_unique_violation,
refresh_variant_urns,
variants_to_csv_rows,
)
Expand Down Expand Up @@ -1287,24 +1289,7 @@ def get_score_set_annotated_variants(

assert_permission(user_data, score_set, Action.READ)

mapped_variants = (
db.query(MappedVariant)
.join(MappedVariant.variant)
.join(Variant.score_set)
.filter(ScoreSet.urn == urn)
.filter(MappedVariant.current.is_(True))
.options(
contains_eager(MappedVariant.variant).contains_eager(Variant.score_set),
contains_eager(MappedVariant.variant)
.contains_eager(Variant.score_set)
.selectinload(ScoreSet.publication_identifier_associations),
contains_eager(MappedVariant.variant).contains_eager(Variant.score_set).selectinload(ScoreSet.created_by),
contains_eager(MappedVariant.variant).contains_eager(Variant.score_set).selectinload(ScoreSet.modified_by),
contains_eager(MappedVariant.variant).contains_eager(Variant.score_set).selectinload(ScoreSet.license),
contains_eager(MappedVariant.variant).contains_eager(Variant.score_set).selectinload(ScoreSet.experiment),
)
.all()
)
mapped_variants = get_current_mapped_variants_for_annotation(db, score_set)

if not mapped_variants:
logger.info(msg="No mapped variants are associated with the requested score set.", extra=logging_context())
Expand Down Expand Up @@ -1396,24 +1381,7 @@ def get_score_set_annotated_variants_functional_statement(

assert_permission(user_data, score_set, Action.READ)

mapped_variants = (
db.query(MappedVariant)
.join(MappedVariant.variant)
.join(Variant.score_set)
.filter(ScoreSet.urn == urn)
.filter(MappedVariant.current.is_(True))
.options(
contains_eager(MappedVariant.variant).contains_eager(Variant.score_set),
contains_eager(MappedVariant.variant)
.contains_eager(Variant.score_set)
.selectinload(ScoreSet.publication_identifier_associations),
contains_eager(MappedVariant.variant).contains_eager(Variant.score_set).selectinload(ScoreSet.created_by),
contains_eager(MappedVariant.variant).contains_eager(Variant.score_set).selectinload(ScoreSet.modified_by),
contains_eager(MappedVariant.variant).contains_eager(Variant.score_set).selectinload(ScoreSet.license),
contains_eager(MappedVariant.variant).contains_eager(Variant.score_set).selectinload(ScoreSet.experiment),
)
.all()
)
mapped_variants = get_current_mapped_variants_for_annotation(db, score_set)

if not mapped_variants:
logger.info(msg="No mapped variants are associated with the requested score set.", extra=logging_context())
Expand Down Expand Up @@ -1509,24 +1477,7 @@ def get_score_set_annotated_variants_functional_study_result(

assert_permission(user_data, score_set, Action.READ)

mapped_variants = (
db.query(MappedVariant)
.join(MappedVariant.variant)
.join(Variant.score_set)
.filter(ScoreSet.urn == urn)
.filter(MappedVariant.current.is_(True))
.options(
contains_eager(MappedVariant.variant).contains_eager(Variant.score_set),
contains_eager(MappedVariant.variant)
.contains_eager(Variant.score_set)
.selectinload(ScoreSet.publication_identifier_associations),
contains_eager(MappedVariant.variant).contains_eager(Variant.score_set).selectinload(ScoreSet.created_by),
contains_eager(MappedVariant.variant).contains_eager(Variant.score_set).selectinload(ScoreSet.modified_by),
contains_eager(MappedVariant.variant).contains_eager(Variant.score_set).selectinload(ScoreSet.license),
contains_eager(MappedVariant.variant).contains_eager(Variant.score_set).selectinload(ScoreSet.experiment),
)
.all()
)
mapped_variants = get_current_mapped_variants_for_annotation(db, score_set)

if not mapped_variants:
logger.info(msg="No mapped variants are associated with the requested score set.", extra=logging_context())
Expand Down Expand Up @@ -1613,6 +1564,16 @@ async def create_score_set(
status_code=404,
detail="The requested superseded score set does not exist",
)

if superseded_score_set.superseding_score_set:
logger.info(
msg=f"Failed to create score set. This score set has been superseded by score set: {superseded_score_set.superseding_score_set.urn}.",
extra=logging_context(),
)
raise HTTPException(
status_code=409,
detail=f"This score set has been superseded by score set: {superseded_score_set.superseding_score_set.urn}.",
)
else:
superseded_score_set = None

Expand Down Expand Up @@ -1867,8 +1828,17 @@ async def create_score_set(
score_calibrations=score_calibrations,
) # type: ignore[call-arg]

db.add(item)
db.commit()
try:
db.add(item)
db.commit()
except IntegrityError as e:
db.rollback()
if is_replaces_id_unique_violation(e):
raise HTTPException(
status_code=409,
detail="The requested score set has already been superseded.",
)
raise
db.refresh(item)

save_to_logging_context({"created_resource": item.urn})
Expand Down
Loading
Loading