diff --git a/pyproject.toml b/pyproject.toml index 09622e00e..01fd9aace 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -32,7 +32,8 @@ classifiers = [ dependencies = [ "backports.entry_points_selectable", "defusedxml", # For safely parsing XML files - "pydantic<2", # Locked to <2 by cygwin terminal + "pydantic>=2", + "pydantic-settings", "requests", "rich", "werkzeug", @@ -47,7 +48,7 @@ client = [ "websocket-client", ] developer = [ - "bump-my-version<0.11.0", # Version control + "bump-my-version", # Version control "ipykernel", # Enable interactive coding with VS Code and Jupyter Notebook "pre-commit", # Formatting, linting, type checking, etc. "pytest", # Test code functionality @@ -61,7 +62,7 @@ server = [ "aiohttp", "cryptography", "fastapi[standard]", - "ispyb", # Responsible for setting requirements for SQLAlchemy and mysql-connector-python; v10.0.0: sqlalchemy <2, mysql-connector-python >=8.0.32 + "ispyb>=10.2.4", # Responsible for setting requirements for SQLAlchemy and mysql-connector-python; "jinja2", "mrcfile", "numpy<2", @@ -73,7 +74,7 @@ server = [ "sqlalchemy[postgresql]", # Add as explicit dependency "sqlmodel", "stomp-py<=8.1.0", # 8.1.1 (released 2024-04-06) doesn't work with our project - "zocalo", + "zocalo>=1", ] [project.urls] Bug-Tracker = "https://github.com/DiamondLightSource/python-murfey/issues" diff --git a/src/murfey/client/contexts/spa.py b/src/murfey/client/contexts/spa.py index 004422bf9..99a182ea5 100644 --- a/src/murfey/client/contexts/spa.py +++ b/src/murfey/client/contexts/spa.py @@ -506,7 +506,8 @@ def post_transfer( except Exception as e: # try to continue if position information gathering fails so that movie is processed anyway logger.warning( - f"Unable to register foil hole for {str(file_transferred_to)}. Exception: {str(e)}" + f"Unable to register foil hole for {str(file_transferred_to)}. Exception: {str(e)}", + exc_info=True, ) foil_hole = None diff --git a/src/murfey/client/contexts/tomo.py b/src/murfey/client/contexts/tomo.py index 2fec6eb91..08fe22c20 100644 --- a/src/murfey/client/contexts/tomo.py +++ b/src/murfey/client/contexts/tomo.py @@ -173,7 +173,7 @@ def register_tomography_data_collections( ) except Exception as e: - logger.error(f"ERROR {e}, {self.data_collection_parameters}") + logger.error(f"ERROR {e}, {self.data_collection_parameters}", exc_info=True) def _file_transferred_to( self, environment: MurfeyInstanceEnvironment, source: Path, file_path: Path @@ -533,7 +533,9 @@ def gather_metadata( try: for_parsing = xml.read() except Exception: - logger.warning(f"Failed to parse file {metadata_file}") + logger.warning( + f"Failed to parse file {metadata_file}", exc_info=True + ) return OrderedDict({}) data = xmltodict.parse(for_parsing) try: @@ -628,7 +630,9 @@ def gather_metadata( / int(mdoc_metadata["num_eer_frames"]) ) except Exception as e: - logger.error(f"Exception encountered in metadata gathering: {str(e)}") + logger.error( + f"Exception encountered in metadata gathering: {str(e)}", exc_info=True + ) return OrderedDict({}) return mdoc_metadata diff --git a/src/murfey/client/instance_environment.py b/src/murfey/client/instance_environment.py index df30d1da0..ad0b29557 100644 --- a/src/murfey/client/instance_environment.py +++ b/src/murfey/client/instance_environment.py @@ -8,7 +8,7 @@ from typing import Dict, List, NamedTuple, Optional from urllib.parse import ParseResult -from pydantic import BaseModel +from pydantic import BaseModel, ConfigDict from murfey.client.watchdir import DirWatcher @@ -56,9 +56,7 @@ class MurfeyInstanceEnvironment(BaseModel): samples: Dict[Path, SampleInfo] = {} rsync_url: str = "" - class Config: - validate_assignment: bool = True - arbitrary_types_allowed: bool = True + model_config = ConfigDict(arbitrary_types_allowed=True) def clear(self): self.sources = [] diff --git a/src/murfey/client/tui/screens.py b/src/murfey/client/tui/screens.py index f20eb5bb0..2dec96595 100644 --- a/src/murfey/client/tui/screens.py +++ b/src/murfey/client/tui/screens.py @@ -196,7 +196,7 @@ def validate_form(form: dict, model: BaseModel) -> bool: try: convert = lambda x: None if x == "None" else x validated = model(**{k: convert(v) for k, v in form.items()}) - log.info(validated.dict()) + log.info(validated.model_dump()) return True except (AttributeError, ValidationError) as e: log.warning(f"Form validation failed: {str(e)}") diff --git a/src/murfey/instrument_server/api.py b/src/murfey/instrument_server/api.py index 05b4d228b..3ba3a88e9 100644 --- a/src/murfey/instrument_server/api.py +++ b/src/murfey/instrument_server/api.py @@ -335,14 +335,14 @@ def register_processing_parameters( session_id: MurfeySessionID, proc_param_block: ProcessingParameterBlock ): data_collection_parameters[proc_param_block.label] = {} - for k, v in proc_param_block.params.dict().items(): + for k, v in proc_param_block.params.model_dump().items(): if v is not None: data_collection_parameters[proc_param_block.label][k] = v if controllers.get(session_id): controllers[session_id].data_collection_parameters.update( data_collection_parameters[proc_param_block.label] ) - for k, v in proc_param_block.params.dict().items(): + for k, v in proc_param_block.params.model_dump().items(): if v is not None and hasattr(controllers[session_id]._environment, k): setattr(controllers[session_id]._environment, k, v) return {"success": True} diff --git a/src/murfey/server/api/clem.py b/src/murfey/server/api/clem.py index a13b547b7..7f7fb28b2 100644 --- a/src/murfey/server/api/clem.py +++ b/src/murfey/server/api/clem.py @@ -10,7 +10,7 @@ from backports.entry_points_selectable import entry_points from fastapi import APIRouter -from pydantic import BaseModel, validator +from pydantic import BaseModel, field_validator from sqlalchemy.exc import NoResultFound from sqlmodel import Session, select @@ -820,10 +820,8 @@ class AlignAndMergeParams(BaseModel): flatten: Literal["mean", "min", "max", ""] = "" align_across: Literal["enabled", ""] = "" - @validator( - "images", - pre=True, - ) + @field_validator("images", mode="before") + @classmethod def parse_stringified_list(cls, value): if isinstance(value, str): try: diff --git a/src/murfey/server/api/prometheus.py b/src/murfey/server/api/prometheus.py index 6457f25be..c93e49b27 100644 --- a/src/murfey/server/api/prometheus.py +++ b/src/murfey/server/api/prometheus.py @@ -39,7 +39,7 @@ def increment_rsync_file_count( logger.error( f"Failed to find rsync instance for visit {sanitise(visit_name)} " "with the following properties: \n" - f"{rsyncer_info.dict()}", + f"{rsyncer_info.model_dump()}", exc_info=True, ) return None diff --git a/src/murfey/server/api/session_control.py b/src/murfey/server/api/session_control.py index 8be560b40..8ed27901c 100644 --- a/src/murfey/server/api/session_control.py +++ b/src/murfey/server/api/session_control.py @@ -96,7 +96,7 @@ def get_current_visits(instrument_name: str, db=ispyb_db): class SessionInfo(BaseModel): - session_id: Optional[int] + session_id: Optional[int] = None session_name: str = "" rescale: bool = True diff --git a/src/murfey/server/api/workflow.py b/src/murfey/server/api/workflow.py index fe2cc71b9..d1714cc51 100644 --- a/src/murfey/server/api/workflow.py +++ b/src/murfey/server/api/workflow.py @@ -81,7 +81,7 @@ class DCGroupParameters(BaseModel): tag: str atlas: str = "" sample: Optional[int] = None - atlas_pixel_size: int = 0 + atlas_pixel_size: float = 0 @router.post("/visits/{visit_name}/{session_id}/register_data_collection_group") @@ -346,13 +346,13 @@ class SPAProcessFile(BaseModel): tag: str path: str description: str - processing_job: Optional[int] - data_collection_id: Optional[int] + processing_job: Optional[int] = None + data_collection_id: Optional[int] = None image_number: int - autoproc_program_id: Optional[int] - foil_hole_id: Optional[int] - pixel_size: Optional[float] - dose_per_frame: Optional[float] + autoproc_program_id: Optional[int] = None + foil_hole_id: Optional[int] = None + pixel_size: Optional[float] = None + dose_per_frame: Optional[float] = None mc_binning: Optional[int] = 1 gain_ref: Optional[str] = None extract_downscale: bool = True @@ -608,9 +608,9 @@ class TomoProcessFile(BaseModel): tag: str image_number: int pixel_size: float - dose_per_frame: Optional[float] + dose_per_frame: Optional[float] = None frame_count: int - tilt_axis: Optional[float] + tilt_axis: Optional[float] = None mc_uuid: Optional[int] = None voltage: float = 300 mc_binning: int = 1 @@ -894,7 +894,7 @@ class Sample(BaseModel): sample_group_id: int sample_id: int subsample_id: int - image_path: Optional[Path] + image_path: Optional[Path] = None @correlative_router.get("/visit/{visit_name}/samples") diff --git a/src/murfey/server/demo_api.py b/src/murfey/server/demo_api.py index c10d6a14f..3328d7346 100644 --- a/src/murfey/server/demo_api.py +++ b/src/murfey/server/demo_api.py @@ -13,32 +13,38 @@ from fastapi.responses import FileResponse, HTMLResponse from ispyb.sqlalchemy import BLSession from PIL import Image -from pydantic import BaseModel, BaseSettings +from pydantic import BaseModel +from pydantic_settings import BaseSettings from sqlalchemy import func from sqlmodel import select from werkzeug.utils import secure_filename import murfey.server.api.bootstrap import murfey.server.prometheus as prom -from murfey.server import ( - _flush_grid_square_records, - _murfey_id, - get_hostname, - get_microscope, - sanitise, - sanitise_path, -) -from murfey.server import shutdown as _shutdown -from murfey.server import templates -from murfey.server.api.auth import MurfeySessionID, validate_token +from murfey.server.api import templates +from murfey.server.api.auth import MurfeySessionIDFrontend as MurfeySessionID +from murfey.server.api.auth import validate_token from murfey.server.api.session_info import Visit from murfey.server.api.workflow import ( DCGroupParameters, DCParameters, ProcessingJobParameters, ) +from murfey.server.feedback import ( + _flush_grid_square_records, + _murfey_id, + get_microscope, + sanitise, +) from murfey.server.murfey_db import murfey_db -from murfey.util.config import MachineConfig, from_file, security_from_file +from murfey.server.run import shutdown as _shutdown +from murfey.util import sanitise_path +from murfey.util.config import ( + MachineConfig, + from_file, + get_hostname, + security_from_file, +) from murfey.util.db import ( AutoProcProgram, ClientEnvironment, diff --git a/src/murfey/server/main.py b/src/murfey/server/main.py index d588476af..b3fd3e3c1 100644 --- a/src/murfey/server/main.py +++ b/src/murfey/server/main.py @@ -7,7 +7,7 @@ from fastapi.middleware.cors import CORSMiddleware from fastapi.staticfiles import StaticFiles from prometheus_client import make_asgi_app -from pydantic import BaseSettings +from pydantic_settings import BaseSettings import murfey.server import murfey.server.api.auth diff --git a/src/murfey/util/config.py b/src/murfey/util/config.py index b07452bb7..8ff64dc4a 100644 --- a/src/murfey/util/config.py +++ b/src/murfey/util/config.py @@ -4,11 +4,21 @@ import socket from functools import lru_cache from pathlib import Path -from typing import Literal, Optional, Union +from typing import Any, Literal, Optional import yaml from backports.entry_points_selectable import entry_points -from pydantic import BaseModel, BaseSettings, Extra, validator +from pydantic import BaseModel, ConfigDict, RootModel, ValidationInfo, field_validator +from pydantic_settings import BaseSettings + + +class MagnificationTable(RootModel[dict[int, float]]): + pass + + +CALIBRATIONS_VALIDATION_SCHEMAS = { + "magnification": MagnificationTable, +} class MachineConfig(BaseModel): # type: ignore @@ -26,7 +36,7 @@ class MachineConfig(BaseModel): # type: ignore # Hardware and software ----------------------------------------------------------- camera: str = "FALCON" superres: bool = False - calibrations: dict[str, dict[str, Union[dict, float]]] + calibrations: dict[str, Any] acquisition_software: list[str] software_versions: dict[str, str] = {} software_settings_output_directories: dict[str, list[str]] = {} @@ -72,7 +82,7 @@ class MachineConfig(BaseModel): # type: ignore # Particle picking setup default_model: Path - model_search_directory: str = "processing" + picking_model_search_directory: str = "processing" initial_model_search_directory: str = "processing/initial_model" # Data analysis plugins @@ -93,15 +103,43 @@ class MachineConfig(BaseModel): # type: ignore node_creator_queue: str = "node_creator" notifications_queue: str = "pato_notification" - class Config: - """ - Inner class that defines this model's parsing and serialising behaviour - """ + # Pydantic BaseModel settings + model_config = ConfigDict(extra="allow") + + @field_validator("calibrations", mode="before") + @classmethod + def validate_calibration_data( + cls, v: dict[str, dict[Any, Any]] + ) -> dict[str, dict[Any, Any]]: + # Pass the calibration dictionaries through their matching Pydantic models, if any are set + if isinstance(v, dict): + validated = {} + for ( + key, + value, + ) in v.items(): + model_cls = CALIBRATIONS_VALIDATION_SCHEMAS.get(key) + if model_cls: + try: + # Validate and store as a dict object with the corrected types + validated[key] = model_cls.model_validate(value).root + except Exception as e: + raise ValueError(f"Validation failed for key '{key}': {e}") + else: + validated[key] = value + return validated + # Let it validate and fail as-is + return v - extra = Extra.allow - json_encoders = { - Path: str, - } + @field_validator("software_versions", mode="before") + @classmethod + def validate_software_versions(cls, v: dict[str, Any]) -> dict[str, str]: + # Software versions should be numerical strings, even if they appear int- or float-like + if isinstance(v, dict): + validated = {key: str(value) for key, value in v.items()} + return validated + # Let it validate and fail as-is + return v def from_file(config_file_path: Path, instrument: str = "") -> dict[str, MachineConfig]: @@ -144,16 +182,13 @@ class Security(BaseModel): graylog_host: str = "" graylog_port: Optional[int] = None - class Config: - json_encoders = { - Path: str, - } + model_config = ConfigDict() - @validator("graylog_port") + @field_validator("graylog_port") def check_port_present_if_host_is( - cls, v: Optional[int], values: dict, **kwargs + cls, v: Optional[int], info: ValidationInfo, **kwargs ) -> Optional[int]: - if values["graylog_host"] and v is None: + if info.data.get("graylog_host") and v is None: raise ValueError("The Graylog port must be set if the Graylog host is") return v diff --git a/src/murfey/util/models.py b/src/murfey/util/models.py index 9c80204d3..de6f3bd43 100644 --- a/src/murfey/util/models.py +++ b/src/murfey/util/models.py @@ -1,10 +1,11 @@ from __future__ import annotations +import math from datetime import datetime from pathlib import Path from typing import Any, Dict, List, Optional -from pydantic import BaseModel +from pydantic import BaseModel, field_validator """ General Models @@ -46,6 +47,15 @@ class File(BaseModel): timestamp: datetime full_path: str + @field_validator("size", mode="before") + @classmethod + def round_file_size_correctly(cls, v: Any) -> int: + if isinstance(v, float): + if v - math.floor(v) == 0.5: + return math.ceil(v) + return round(v) + return v + class ConnectionFileParameters(BaseModel): filename: str @@ -77,8 +87,8 @@ class RsyncerInfo(BaseModel): class ProcessingParametersSPA(BaseModel): tag: str - dose_per_frame: Optional[float] - gain_ref: Optional[str] + dose_per_frame: Optional[float] = None + gain_ref: Optional[str] = None experiment_type: str voltage: float image_size_x: int @@ -97,8 +107,8 @@ class ProcessingParametersSPA(BaseModel): phase_plate: bool = False class Base(BaseModel): - dose_per_frame: Optional[float] - gain_ref: Optional[str] + dose_per_frame: Optional[float] = None + gain_ref: Optional[str] = None symmetry: str eer_fractionation: int @@ -156,10 +166,10 @@ class Token(BaseModel): class ProcessingParametersTomo(BaseModel): - dose_per_frame: Optional[float] + dose_per_frame: Optional[float] = None frame_count: int tilt_axis: float - gain_ref: Optional[str] + gain_ref: Optional[str] = None experiment_type: str voltage: float image_size_x: int @@ -169,12 +179,12 @@ class ProcessingParametersTomo(BaseModel): file_extension: str tag: str tilt_series_tag: str - eer_fractionation_file: Optional[str] + eer_fractionation_file: Optional[str] = None eer_fractionation: int class Base(BaseModel): - dose_per_frame: Optional[float] - gain_ref: Optional[str] + dose_per_frame: Optional[float] = None + gain_ref: Optional[str] = None eer_fractionation: int diff --git a/src/murfey/util/processing_params.py b/src/murfey/util/processing_params.py index aa32f6934..65a51c20c 100644 --- a/src/murfey/util/processing_params.py +++ b/src/murfey/util/processing_params.py @@ -40,12 +40,14 @@ def cryolo_model_path(visit: str, instrument_name: str) -> Path: machine_config = get_machine_config(instrument_name=instrument_name)[ instrument_name ] - if machine_config.model_search_directory: + if machine_config.picking_model_search_directory: visit_directory = ( machine_config.rsync_basepath / str(datetime.now().year) / visit ) possible_models = list( - (visit_directory / machine_config.model_search_directory).glob("*.h5") + (visit_directory / machine_config.picking_model_search_directory).glob( + "*.h5" + ) ) if possible_models: return sorted(possible_models, key=lambda x: x.stat().st_ctime)[-1] diff --git a/src/murfey/util/rsync.py b/src/murfey/util/rsync.py index 84953b39a..bcbb4b059 100644 --- a/src/murfey/util/rsync.py +++ b/src/murfey/util/rsync.py @@ -108,6 +108,7 @@ def _single_rsync( else: cmd.append(str(self._finaldir / sub_struct) + "/") self._transferring = True + runner = subprocess.run( cmd, capture_output=True, diff --git a/src/murfey/workflows/clem/register_align_and_merge_results.py b/src/murfey/workflows/clem/register_align_and_merge_results.py index c2c7eb5a2..b9de46d64 100644 --- a/src/murfey/workflows/clem/register_align_and_merge_results.py +++ b/src/murfey/workflows/clem/register_align_and_merge_results.py @@ -7,7 +7,7 @@ from pathlib import Path from typing import Optional -from pydantic import BaseModel, validator +from pydantic import BaseModel, field_validator from sqlmodel import Session from murfey.util.db import CLEMImageSeries @@ -24,10 +24,8 @@ class AlignAndMergeResult(BaseModel): align_across: Optional[str] = None composite_image: Path - @validator( - "image_stacks", - pre=True, - ) + @field_validator("image_stacks", mode="before") + @classmethod def parse_stringified_list(cls, value): if isinstance(value, str): try: diff --git a/src/murfey/workflows/clem/register_preprocessing_results.py b/src/murfey/workflows/clem/register_preprocessing_results.py index 642165bc2..738d36bce 100644 --- a/src/murfey/workflows/clem/register_preprocessing_results.py +++ b/src/murfey/workflows/clem/register_preprocessing_results.py @@ -13,7 +13,7 @@ from ast import literal_eval from pathlib import Path -from pydantic import BaseModel, validator +from pydantic import BaseModel, field_validator from sqlmodel import Session, select from murfey.server import _transport_object @@ -215,10 +215,8 @@ class TIFFPreprocessingResult(BaseModel): number_of_members: int parent_tiffs: list[Path] - @validator( - "parent_tiffs", - pre=True, - ) + @field_validator("parent_tiffs", mode="before") + @classmethod def parse_stringified_list(cls, value): if isinstance(value, str): try: diff --git a/tests/cli/test_decrypt_password.py b/tests/cli/test_decrypt_password.py index 65952e5e8..f7b9a192a 100644 --- a/tests/cli/test_decrypt_password.py +++ b/tests/cli/test_decrypt_password.py @@ -13,7 +13,7 @@ def test_decrypt_password(capsys, tmp_path): crypto_key = Fernet.generate_key() security_config.crypto_key = crypto_key.decode("ascii") with open(tmp_path / "config.yaml", "w") as cfg: - yaml.dump(security_config.dict(), cfg) + yaml.dump(security_config.model_dump(), cfg) os.environ["MURFEY_SECURITY_CONFIGURATION"] = str(tmp_path / "config.yaml") password = "abcd" f = Fernet(crypto_key) diff --git a/tests/cli/test_generate_password.py b/tests/cli/test_generate_password.py index fa48e9cf2..e7d4ead05 100644 --- a/tests/cli/test_generate_password.py +++ b/tests/cli/test_generate_password.py @@ -12,7 +12,7 @@ def test_generate_password(capsys, tmp_path): crypto_key = Fernet.generate_key() security_config.crypto_key = crypto_key.decode("ascii") with open(tmp_path / "config.yaml", "w") as cfg: - yaml.dump(security_config.dict(), cfg) + yaml.dump(security_config.model_dump(), cfg) os.environ["MURFEY_SECURITY_CONFIGURATION"] = str(tmp_path / "config.yaml") run() captured = capsys.readouterr()