From 8f05d01b954c354762309f860e69f6d7b63086b1 Mon Sep 17 00:00:00 2001 From: Dan Hatton Date: Fri, 17 Jan 2025 15:23:02 +0000 Subject: [PATCH 01/13] On reading EpuSession.dm post all the grid squares found --- src/murfey/client/contexts/spa_metadata.py | 51 ++++++++-------------- 1 file changed, 18 insertions(+), 33 deletions(-) diff --git a/src/murfey/client/contexts/spa_metadata.py b/src/murfey/client/contexts/spa_metadata.py index 7cd98b4e6..70d0edf1a 100644 --- a/src/murfey/client/contexts/spa_metadata.py +++ b/src/murfey/client/contexts/spa_metadata.py @@ -117,37 +117,22 @@ def post_transfer( "atlas_pixel_size": atlas_pixel_size, } capture_post(url, json=dcg_data) - registered_grid_squares = ( - requests.get( - f"{str(environment.url.geturl())}/sessions/{environment.murfey_session}/grid_squares" - ) - .json() - .get(str(source), []) + gs_pix_positions = _get_grid_square_atlas_positions( + _atlas_destination(environment, source, transferred_file) + / environment.samples[source].atlas ) - if registered_grid_squares: - gs_pix_positions = _get_grid_square_atlas_positions( - _atlas_destination(environment, source, transferred_file) - / environment.samples[source].atlas - ) - for gs in registered_grid_squares: - pos_data = gs_pix_positions.get(str(gs["name"])) - if pos_data: - capture_post( - f"{str(environment.url.geturl())}/sessions/{environment.murfey_session}/grid_square/{gs['name']}", - json={ - "tag": gs["tag"], - "readout_area_x": gs["readout_area_x"], - "readout_area_y": gs["readout_area_y"], - "thumbnail_size_x": gs["thumbnail_size_x"], - "thumbnail_size_y": gs["thumbnail_size_y"], - "pixel_size": gs["pixel_size"], - "image": gs["image"], - "x_location": pos_data[0], - "y_location": pos_data[1], - "x_stage_position": pos_data[2], - "y_stage_position": pos_data[3], - "width": pos_data[4], - "height": pos_data[5], - "angle": pos_data[6], - }, - ) + for gs, pos_data in gs_pix_positions.items(): + if pos_data: + capture_post( + f"{str(environment.url.geturl())}/sessions/{environment.murfey_session}/grid_square/{gs}", + json={ + "tag": str(source), + "x_location": pos_data[0], + "y_location": pos_data[1], + "x_stage_position": pos_data[2], + "y_stage_position": pos_data[3], + "width": pos_data[4], + "height": pos_data[5], + "angle": pos_data[6], + }, + ) From 96e4428a3895974e13981c9b1f7d59e774f32ea5 Mon Sep 17 00:00:00 2001 From: Dan Hatton Date: Tue, 21 Jan 2025 13:11:12 +0000 Subject: [PATCH 02/13] Record holes present in grid square .dm files when these files are seen This will mean that all foil holes will be recorded, not just those with data associated --- src/murfey/client/contexts/spa_metadata.py | 90 +++++++++++++++++++++- 1 file changed, 89 insertions(+), 1 deletion(-) diff --git a/src/murfey/client/contexts/spa_metadata.py b/src/murfey/client/contexts/spa_metadata.py index 70d0edf1a..d062d31d4 100644 --- a/src/murfey/client/contexts/spa_metadata.py +++ b/src/murfey/client/contexts/spa_metadata.py @@ -1,6 +1,6 @@ import logging from pathlib import Path -from typing import Optional +from typing import Dict, NamedTuple, Optional import requests import xmltodict @@ -15,6 +15,71 @@ requests.get, requests.post, requests.put, requests.delete = authorised_requests() +class FoilHole(NamedTuple): + x_location: int + y_location: int + diameter: int + x_stage_position: Optional[float] = None + y_stage_position: Optional[float] = None + readout_area_x: Optional[int] = None + readout_area_y: Optional[int] = None + thumbnail_size_x: Optional[int] = None + thumbnail_size_y: Optional[int] = None + pixel_size: Optional[float] = None + image: str = "" + + +def _foil_hole_positions(xml_path: Path, grid_square: int) -> Dict[str, FoilHole]: + with open(xml_path, "r") as xml: + for_parsing = xml.read() + data = xmltodict.parse(for_parsing) + data = data["GridSquareXml"] + readout_area = data["MicroscopeImage"]["microscopeData"]["acquisition"]["camera"][ + "ReadoutArea" + ] + pixel_size = data["MicroscopeImage"]["SpatialScale"]["pixelSize"]["x"][ + "numericValue" + ] + full_size = (int(readout_area["a:width"]), int(readout_area["a:height"])) + serialization_array = data["TargetLocations"]["TargetLocationsEfficient"][ + "a:m_serializationArray" + ] + required_key = "" + for key in serialization_array.keys(): + if key.startswith("b:KeyValuePairOfintTargetLocation"): + required_key = key + break + if not required_key: + return {} + foil_holes = {} + for fh_block in serialization_array[required_key]: + if fh_block["b:value"]["IsNearGridBar"] == "false": + image_paths = list( + (xml_path.parent.parent).glob( + f"Images-Disc*/GridSquare_{grid_square}/FoilHoles/FoilHole_{fh_block['b:key']}_*.jpg" + ) + ) + image_paths.sort(key=lambda x: x.stat().st_ctime) + image_path: str = str(image_paths[-1]) if image_paths else "" + stage = fh_block["b:value"]["PixelCenter"] + stage = fh_block["b:value"]["StagePosition"] + diameter = fh_block["b:value"]["PixelWidthHeight"]["c:width"] + foil_holes[fh_block["b:key"]] = FoilHole( + x_location=int(float(stage["c:x"])), + y_location=int(float(stage["c:y"])), + x_stage_position=float(stage["c:X"]), + y_stage_position=float(stage["c:Y"]), + readout_area_x=full_size[0] if image_path else None, + readout_area_y=full_size[1] if image_path else None, + thumbnail_size_x=None, + thumbnail_size_y=None, + pixel_size=float(pixel_size) if image_path else None, + image=str(image_path), + diameter=int(float(diameter)), + ) + return foil_holes + + def _atlas_destination( environment: MurfeyInstanceEnvironment, source: Path, file_path: Path ) -> Path: @@ -136,3 +201,26 @@ def post_transfer( "angle": pos_data[6], }, ) + + elif transferred_file.suffix == ".dm" and environment: + gs_name = transferred_file.name.split("_")[1] + fh_positions = _foil_hole_positions(transferred_file, int(gs_name)) + for fh, fh_data in fh_positions.items(): + capture_post( + f"{str(environment.url.geturl())}/sessions/{environment.murfey_session}/grid_square/{gs_name}/foil_hole", + json={ + "name": fh, + "x_location": fh_data.x_location, + "y_location": fh_data.y_location, + "x_stage_position": fh_data.x_stage_position, + "y_stage_position": fh_data.y_stage_position, + "readout_area_x": fh_data.readout_area_x, + "readout_area_y": fh_data.readout_area_y, + "thumbnail_size_x": fh_data.thumbnail_size_x, + "thumbnail_size_y": fh_data.thumbnail_size_y, + "pixel_size": fh_data.pixel_size, + "diameter": fh_data.diameter, + "tag": str(source), + "image": fh_data.image, + }, + ) From 1d10b93b888de4f446b19c246cd44daa6e74e7bc Mon Sep 17 00:00:00 2001 From: Dan Hatton Date: Tue, 21 Jan 2025 13:16:43 +0000 Subject: [PATCH 03/13] Need to get the correct source, which is the source for the fractions data rather than the metadata --- src/murfey/client/contexts/spa_metadata.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/murfey/client/contexts/spa_metadata.py b/src/murfey/client/contexts/spa_metadata.py index d062d31d4..c88700c86 100644 --- a/src/murfey/client/contexts/spa_metadata.py +++ b/src/murfey/client/contexts/spa_metadata.py @@ -205,6 +205,8 @@ def post_transfer( elif transferred_file.suffix == ".dm" and environment: gs_name = transferred_file.name.split("_")[1] fh_positions = _foil_hole_positions(transferred_file, int(gs_name)) + source = _get_source(transferred_file, environment=environment) + visitless_source = str(source).replace(f"/{environment.visit}", "") for fh, fh_data in fh_positions.items(): capture_post( f"{str(environment.url.geturl())}/sessions/{environment.murfey_session}/grid_square/{gs_name}/foil_hole", @@ -220,7 +222,7 @@ def post_transfer( "thumbnail_size_y": fh_data.thumbnail_size_y, "pixel_size": fh_data.pixel_size, "diameter": fh_data.diameter, - "tag": str(source), + "tag": visitless_source, "image": fh_data.image, }, ) From 29669cebd4e25aec5ea27e18d2c657fe137c116c Mon Sep 17 00:00:00 2001 From: Stephen Riggs <122790971+stephen-riggs@users.noreply.github.com> Date: Mon, 13 Jan 2025 10:06:13 +0000 Subject: [PATCH 04/13] Typo (#440) --- src/murfey/server/api/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/murfey/server/api/__init__.py b/src/murfey/server/api/__init__.py index d0e062416..2a19c7153 100644 --- a/src/murfey/server/api/__init__.py +++ b/src/murfey/server/api/__init__.py @@ -1377,7 +1377,7 @@ class Dest(BaseModel): @router.post("/sessions/{session_id}/make_rsyncer_destination") def make_rsyncer_destination(session_id: int, destination: Dest, db=murfey_db): - secure_path_parts = [secure_filename(p) for p in destination.destintion.parts] + secure_path_parts = [secure_filename(p) for p in destination.destination.parts] destination_path = "/".join(secure_path_parts) instrument_name = ( db.exec(select(Session).where(Session.id == session_id)).one().instrument_name From c06f050a42a9e12beec9df4bfcc127d80c87245f Mon Sep 17 00:00:00 2001 From: Daniel Hatton Date: Mon, 13 Jan 2025 15:30:30 +0000 Subject: [PATCH 05/13] Fixes for SPA flushing (#441) Use the recipes from Murfey configuration rather than hardcoding them. Also, add foil hole ID to preprocessing message during flush. --- src/murfey/server/__init__.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/src/murfey/server/__init__.py b/src/murfey/server/__init__.py index 335525851..823df3063 100644 --- a/src/murfey/server/__init__.py +++ b/src/murfey/server/__init__.py @@ -1947,6 +1947,7 @@ def _flush_spa_preprocessing(message: dict): machine_config = get_machine_config(instrument_name=instrument_name)[ instrument_name ] + recipe_name = machine_config.recipes.get("em-spa-preprocess", "em-spa-preprocess") collected_ids = murfey_db.exec( select( db.DataCollectionGroup, @@ -1959,7 +1960,7 @@ def _flush_spa_preprocessing(message: dict): .where(db.DataCollection.dcg_id == db.DataCollectionGroup.id) .where(db.ProcessingJob.dc_id == db.DataCollection.id) .where(db.AutoProcProgram.pj_id == db.ProcessingJob.id) - .where(db.ProcessingJob.recipe == "em-spa-preprocess") + .where(db.ProcessingJob.recipe == recipe_name) ).one() params = murfey_db.exec( select(db.SPARelionParameters, db.SPAFeedbackParameters) @@ -1973,7 +1974,7 @@ def _flush_spa_preprocessing(message: dict): f"No SPA processing parameters found for client processing job ID {collected_ids[2].id}" ) raise ValueError( - "No processing parameters were foudn in the database when flushing SPA preprocessing" + "No processing parameters were found in the database when flushing SPA preprocessing" ) murfey_ids = _murfey_id( @@ -2000,7 +2001,7 @@ def _flush_spa_preprocessing(message: dict): ) murfey_db.add(movie) zocalo_message: dict = { - "recipes": ["em-spa-preprocess"], + "recipes": [recipe_name], "parameters": { "node_creator_queue": machine_config.node_creator_queue, "dcid": collected_ids[1].id, @@ -2020,6 +2021,7 @@ def _flush_spa_preprocessing(message: dict): "particle_diameter": proc_params.particle_diameter or 0, "fm_int_file": f.eer_fractionation_file, "do_icebreaker_jobs": default_spa_parameters.do_icebreaker_jobs, + "foil_hole_id": f.foil_hole_id, }, } if _transport_object: @@ -2075,6 +2077,8 @@ def _flush_tomography_preprocessing(message: dict): ) return + recipe_name = machine_config.get("em-tomo-preprocess", "em-tomo-preprocess") + for f in stashed_files: collected_ids = murfey_db.exec( select( @@ -2089,7 +2093,7 @@ def _flush_tomography_preprocessing(message: dict): .where(db.DataCollection.tag == f.tag) .where(db.ProcessingJob.dc_id == db.DataCollection.id) .where(db.AutoProcProgram.pj_id == db.ProcessingJob.id) - .where(db.ProcessingJob.recipe == "em-tomo-preprocess") + .where(db.ProcessingJob.recipe == recipe_name) ).one() detached_ids = [c.id for c in collected_ids] @@ -2105,7 +2109,7 @@ def _flush_tomography_preprocessing(message: dict): ) murfey_db.add(movie) zocalo_message: dict = { - "recipes": ["em-tomo-preprocess"], + "recipes": [recipe_name], "parameters": { "node_creator_queue": machine_config.node_creator_queue, "dcid": detached_ids[1], From baba743e3bc185f0e288e3f7f77ab33c6540b5ad Mon Sep 17 00:00:00 2001 From: Daniel Hatton Date: Mon, 13 Jan 2025 15:34:35 +0000 Subject: [PATCH 06/13] Add logging for foil hole registration debugging (#442) --- src/murfey/client/contexts/spa.py | 5 ++++- src/murfey/server/api/__init__.py | 6 +++++- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/src/murfey/client/contexts/spa.py b/src/murfey/client/contexts/spa.py index 70c43dd6c..65cf34a35 100644 --- a/src/murfey/client/contexts/spa.py +++ b/src/murfey/client/contexts/spa.py @@ -785,8 +785,11 @@ def post_transfer( foil_hole: Optional[int] = self._position_analysis( transferred_file, environment, source, machine_config ) - except Exception: + except Exception as e: # try to continue if position information gathering fails so that movie is processed anyway + logger.warning( + f"Unable to register foil hole for {str(file_transferred_to)}. Exception: {str(e)}" + ) foil_hole = None preproc_url = f"{str(environment.url.geturl())}/visits/{environment.visit}/{environment.murfey_session}/spa_preprocess" diff --git a/src/murfey/server/api/__init__.py b/src/murfey/server/api/__init__.py index 2a19c7153..6f5d411e9 100644 --- a/src/murfey/server/api/__init__.py +++ b/src/murfey/server/api/__init__.py @@ -1135,7 +1135,11 @@ async def request_spa_preprocessing( .one()[0] .id ) - except Exception: + except Exception as e: + log.warning( + f"Foil hole ID not found for foil hole {sanitise(str(proc_file.foil_hole_id))}: {e}", + exc_info=True, + ) foil_hole_id = None if proc_params: From b44bac5ac69c32184456f40c8259c969391dcca9 Mon Sep 17 00:00:00 2001 From: Daniel Hatton Date: Mon, 13 Jan 2025 13:26:01 +0000 Subject: [PATCH 07/13] The client needs to read metadata from the original location, not the destination --- src/murfey/client/contexts/spa_metadata.py | 31 ++++++++-------------- 1 file changed, 11 insertions(+), 20 deletions(-) diff --git a/src/murfey/client/contexts/spa_metadata.py b/src/murfey/client/contexts/spa_metadata.py index c88700c86..b8e3b84e0 100644 --- a/src/murfey/client/contexts/spa_metadata.py +++ b/src/murfey/client/contexts/spa_metadata.py @@ -126,23 +126,18 @@ def post_transfer( ]["#text"] visit_index = windows_path.split("\\").index(environment.visit) partial_path = "/".join(windows_path.split("\\")[visit_index + 1 :]) - visitless_path = Path( - str(transferred_file).replace(f"/{environment.visit}", "") - ) - visit_index_of_transferred_file = transferred_file.parts.index( - environment.visit - ) + + source = _get_source(transferred_file, environment) + if not source: + logger.warning( + f"Source could not be indentified for {str(transferred_file)}" + ) + return + + source_visit_dir = source.parent + atlas_xml_path = list( - ( - Path( - "/".join( - transferred_file.parts[ - : visit_index_of_transferred_file + 1 - ] - ) - ) - / partial_path - ).parent.glob("Atlas_*.xml") + (source_visit_dir / partial_path).parent.glob("Atlas_*.xml") )[0] with open(atlas_xml_path, "rb") as atlas_xml: atlas_xml_data = xmltodict.parse(atlas_xml) @@ -153,10 +148,6 @@ def post_transfer( # need to calculate the pixel size of the downscaled image atlas_pixel_size = atlas_original_pixel_size * 7.8 - source = _get_source( - visitless_path.parent / "Images-Disc1" / visitless_path.name, - environment, - ) sample = None for p in partial_path.split("/"): if p.startswith("Sample"): From 0bedee7cd9422a7de64a196e060e42bec6d1c4b9 Mon Sep 17 00:00:00 2001 From: Daniel Hatton Date: Mon, 13 Jan 2025 15:44:29 +0000 Subject: [PATCH 08/13] Allow the avoidance of the creation of a thread inside rsyncer finalisation --- src/murfey/client/multigrid_control.py | 1 + src/murfey/client/rsync.py | 21 ++++++++++++--------- 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/src/murfey/client/multigrid_control.py b/src/murfey/client/multigrid_control.py index 9833f2353..2d590956b 100644 --- a/src/murfey/client/multigrid_control.py +++ b/src/murfey/client/multigrid_control.py @@ -159,6 +159,7 @@ def _finalise_rsyncer(self, source: Path): finalise_thread = threading.Thread( name=f"Controller finaliser thread ({source})", target=self.rsync_processes[source].finalise, + kwargs={"thread": False}, daemon=True, ) finalise_thread.start() diff --git a/src/murfey/client/rsync.py b/src/murfey/client/rsync.py index 80e1c99a6..74a122f56 100644 --- a/src/murfey/client/rsync.py +++ b/src/murfey/client/rsync.py @@ -172,18 +172,21 @@ def stop(self): self.thread.join() logger.debug("RSync thread stop completed") - def finalise(self): + def finalise(self, thread: bool = True): self.stop() self._remove_files = True self._notify = False - self.thread = threading.Thread( - name=f"RSync finalisation {self._basepath}:{self._remote}", - target=self._process, - daemon=True, - ) - for f in self._basepath.glob("**/*"): - self.queue.put(f) - self.stop() + if thread: + self.thread = threading.Thread( + name=f"RSync finalisation {self._basepath}:{self._remote}", + target=self._process, + daemon=True, + ) + for f in self._basepath.glob("**/*"): + self.queue.put(f) + self.stop() + else: + self._transfer(list(self._basepath.glob("**/*"))) def enqueue(self, file_path: Path): if not self._stopping: From 6fab878ebc6e0d76a579ed60bca5ae7a15228561 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Tue, 14 Jan 2025 10:07:30 +0000 Subject: [PATCH 09/13] Version update (patch) (#444) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Bump version: 0.16.1 → 0.16.2 --------- Co-authored-by: DiamondLightSource-build-server --- .bumpclient.toml | 2 +- .bumpversion.toml | 2 +- pyproject.toml | 2 +- src/murfey/__init__.py | 4 ++-- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.bumpclient.toml b/.bumpclient.toml index a1b3918e1..e41bfff0d 100644 --- a/.bumpclient.toml +++ b/.bumpclient.toml @@ -1,5 +1,5 @@ [tool.bumpversion] -current_version = "0.16.1" +current_version = "0.16.2" commit = true tag = false diff --git a/.bumpversion.toml b/.bumpversion.toml index 7829b27b0..a60fff56d 100644 --- a/.bumpversion.toml +++ b/.bumpversion.toml @@ -1,5 +1,5 @@ [tool.bumpversion] -current_version = "0.16.1" +current_version = "0.16.2" commit = true tag = true diff --git a/pyproject.toml b/pyproject.toml index 6fe0fb663..d956932fa 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -7,7 +7,7 @@ requires = [ [project] name = "murfey" -version = "0.16.1" +version = "0.16.2" description = "Client-Server architecture hauling Cryo-EM data" readme = "README.md" keywords = [ diff --git a/src/murfey/__init__.py b/src/murfey/__init__.py index 2b6d8e9c3..78d1067f9 100644 --- a/src/murfey/__init__.py +++ b/src/murfey/__init__.py @@ -1,4 +1,4 @@ from __future__ import annotations -__version__ = "0.16.1" -__supported_client_version__ = "0.16.1" +__version__ = "0.16.2" +__supported_client_version__ = "0.16.2" From 4db1b291a6f3bc4909f8c60a8a07094c75a6dc1f Mon Sep 17 00:00:00 2001 From: "Tien, Eu Pin" <48775880+tieneupin@users.noreply.github.com> Date: Wed, 15 Jan 2025 17:16:02 +0000 Subject: [PATCH 10/13] Resolved client-server import chain and added installation key for instrument server dependencies (#448) * Moves the `_midpoint()` function to `murfey.util.tomo` to keep client and server modules detached * Replaces remaining instances of `procrunner.run()` with `subprocess.run()` * Creates a new key for instrument server package dependencies --- pyproject.toml | 11 ++-- src/murfey/cli/transfer.py | 4 +- src/murfey/client/contexts/tomo.py | 17 +----- src/murfey/instrument_server/api.py | 19 +++--- src/murfey/server/__init__.py | 4 +- src/murfey/server/api/__init__.py | 4 +- src/murfey/util/rsync.py | 90 ++++++++++++++--------------- src/murfey/util/tomo.py | 16 +++++ 8 files changed, 81 insertions(+), 84 deletions(-) create mode 100644 src/murfey/util/tomo.py diff --git a/pyproject.toml b/pyproject.toml index d956932fa..1ee9240d2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -32,17 +32,16 @@ classifiers = [ dependencies = [ "backports.entry_points_selectable", "defusedxml", # For safely parsing XML files - "pydantic<2", # Locked to <2 by zocalo + "pydantic<2", # Locked to <2 by cygwin terminal "requests", "rich", "werkzeug", ] [project.optional-dependencies] cicd = [ - "pytest-cov", # Used by Azure Pipelines for PyTest coverage reports + "pytest-cov", # Used for generating PyTest coverage reports ] client = [ - "procrunner", "textual==0.42.0", "websocket-client", "xmltodict", @@ -53,8 +52,12 @@ developer = [ "pre-commit", # Formatting, linting, type checking, etc. "pytest", # Test code functionality ] +instrument-server = [ + "fastapi[standard]", + "python-jose[cryptography]", + "uvicorn[standard]", +] server = [ - # "matplotlib", # For visual statistical analysis of images "aiohttp", "cryptography", "fastapi[standard]", diff --git a/src/murfey/cli/transfer.py b/src/murfey/cli/transfer.py index 3c03c8e05..4b23793c4 100644 --- a/src/murfey/cli/transfer.py +++ b/src/murfey/cli/transfer.py @@ -1,10 +1,10 @@ from __future__ import annotations import argparse +import subprocess from pathlib import Path from urllib.parse import urlparse -import procrunner import requests from rich.console import Console from rich.prompt import Confirm @@ -76,6 +76,6 @@ def run(): cmd.extend(list(Path(args.source or ".").glob("*"))) cmd.append(f"{murfey_url.hostname}::{args.destination}") - result = procrunner.run(cmd) + result = subprocess.run(cmd) if result.returncode: console.print(f"[red]rsync failed returning code {result.returncode}") diff --git a/src/murfey/client/contexts/tomo.py b/src/murfey/client/contexts/tomo.py index e9bcd2aaf..f09dad951 100644 --- a/src/murfey/client/contexts/tomo.py +++ b/src/murfey/client/contexts/tomo.py @@ -21,6 +21,7 @@ ) from murfey.util import authorised_requests, capture_post, get_machine_config_client from murfey.util.mdoc import get_block, get_global_data, get_num_blocks +from murfey.util.tomo import midpoint logger = logging.getLogger("murfey.client.contexts.tomo") @@ -64,20 +65,6 @@ def _construct_tilt_series_name(file_path: Path) -> str: return "_".join(split_name[:-5]) -def _midpoint(angles: List[float]) -> int: - if not angles: - return 0 - if len(angles) <= 2: - return round(angles[0]) - sorted_angles = sorted(angles) - return round( - sorted_angles[len(sorted_angles) // 2] - if sorted_angles[len(sorted_angles) // 2] - and sorted_angles[len(sorted_angles) // 2 + 1] - else 0 - ) - - class ProcessFileIncomplete(BaseModel): dest: Path source: Path @@ -738,7 +725,7 @@ def gather_metadata( if environment else None ) - mdoc_metadata["manual_tilt_offset"] = -_midpoint( + mdoc_metadata["manual_tilt_offset"] = -midpoint( [float(b["TiltAngle"]) for b in blocks] ) mdoc_metadata["source"] = str(self._basepath) diff --git a/src/murfey/instrument_server/api.py b/src/murfey/instrument_server/api.py index c2bee0516..229acfa9b 100644 --- a/src/murfey/instrument_server/api.py +++ b/src/murfey/instrument_server/api.py @@ -1,6 +1,7 @@ from __future__ import annotations import secrets +import subprocess import time from datetime import datetime from functools import partial @@ -9,7 +10,6 @@ from typing import Annotated, Dict, List, Optional, Union from urllib.parse import urlparse -import procrunner import requests from fastapi import APIRouter, Depends, HTTPException, status from fastapi.security import OAuth2PasswordBearer @@ -21,7 +21,7 @@ from murfey.client.multigrid_control import MultigridController from murfey.client.rsync import RSyncer from murfey.client.watchdir_multigrid import MultigridDirWatcher -from murfey.util import sanitise_nonpath, secure_path +from murfey.util import sanitise, sanitise_nonpath, secure_path from murfey.util.instrument_models import MultigridWatcherSpec from murfey.util.models import File, Token @@ -278,19 +278,16 @@ class GainReference(BaseModel): @router.post("/sessions/{session_id}/upload_gain_reference") def upload_gain_reference(session_id: MurfeySessionID, gain_reference: GainReference): + safe_gain_path = sanitise(str(gain_reference.gain_path)) + safe_visit_path = sanitise(gain_reference.visit_path) + safe_destination_dir = sanitise(gain_reference.gain_destination_dir) cmd = [ "rsync", - str(gain_reference.gain_path), - f"{urlparse(_get_murfey_url(), allow_fragments=False).hostname}::{gain_reference.visit_path}/{gain_reference.gain_destination_dir}/{secure_filename(gain_reference.gain_path.name)}", + safe_gain_path, + f"{urlparse(_get_murfey_url(), allow_fragments=False).hostname}::{safe_visit_path}/{safe_destination_dir}/{secure_filename(gain_reference.gain_path.name)}", ] - gain_rsync = procrunner.run(cmd) + gain_rsync = subprocess.run(cmd) if gain_rsync.returncode: - safe_gain_path = ( - str(gain_reference.gain_path).replace("\r\n", "").replace("\n", "") - ) - safe_visit_path = gain_reference.visit_path.replace("\r\n", "").replace( - "\n", "" - ) logger.warning( f"Gain reference file {safe_gain_path} was not successfully transferred to {safe_visit_path}/processing" ) diff --git a/src/murfey/server/__init__.py b/src/murfey/server/__init__.py index 823df3063..57b10dcc7 100644 --- a/src/murfey/server/__init__.py +++ b/src/murfey/server/__init__.py @@ -48,7 +48,6 @@ import murfey.server.prometheus as prom import murfey.server.websocket import murfey.util.db as db -from murfey.client.contexts.tomo import _midpoint from murfey.server.murfey_db import url # murfey_db from murfey.util import LogFilter from murfey.util.config import ( @@ -60,6 +59,7 @@ ) from murfey.util.processing_params import default_spa_parameters from murfey.util.state import global_state +from murfey.util.tomo import midpoint try: from murfey.server.ispyb import TransportManager # Session @@ -2576,7 +2576,7 @@ def feedback_callback(header: dict, message: dict) -> None: ) if not stack_file.parent.exists(): stack_file.parent.mkdir(parents=True) - tilt_offset = _midpoint([float(get_angle(t)) for t in tilts]) + tilt_offset = midpoint([float(get_angle(t)) for t in tilts]) zocalo_message = { "recipes": ["em-tomo-align"], "parameters": { diff --git a/src/murfey/server/api/__init__.py b/src/murfey/server/api/__init__.py index 6f5d411e9..d6232f9ac 100644 --- a/src/murfey/server/api/__init__.py +++ b/src/murfey/server/api/__init__.py @@ -33,7 +33,6 @@ import murfey.server.websocket as ws import murfey.util.eer from murfey.server import ( - _midpoint, _murfey_id, _transport_object, check_tilt_series_mc, @@ -108,6 +107,7 @@ ) from murfey.util.processing_params import default_spa_parameters from murfey.util.state import global_state +from murfey.util.tomo import midpoint log = logging.getLogger("murfey.server.api") @@ -840,7 +840,7 @@ def register_completed_tilt_series( ) if not stack_file.parent.exists(): stack_file.parent.mkdir(parents=True) - tilt_offset = _midpoint([float(get_angle(t)) for t in tilts]) + tilt_offset = midpoint([float(get_angle(t)) for t in tilts]) zocalo_message = { "recipes": ["em-tomo-align"], "parameters": { diff --git a/src/murfey/util/rsync.py b/src/murfey/util/rsync.py index 51836f692..84953b39a 100644 --- a/src/murfey/util/rsync.py +++ b/src/murfey/util/rsync.py @@ -1,11 +1,10 @@ from __future__ import annotations import logging +import subprocess from pathlib import Path from typing import Callable, Dict, List, Optional, Tuple, Union -import procrunner - from murfey.util import Processor from murfey.util.file_monitor import Monitor @@ -32,7 +31,7 @@ def __init__( self.received_bytes = 0 self.byte_rate: float = 0 self.total_size = 0 - self.runner_return: List[procrunner.ReturnObject] = [] + self.runner_return: List[subprocess.CompletedProcess] = [] self._root = root self._sub_structure: Optional[Path] = None self._notify = notify or (lambda f: None) @@ -53,7 +52,7 @@ def _run_rsync( retry: bool = True, ): """ - Run rsync -v on a list of files using procrunner. + Run rsync -v on a list of files using subprocess. :param root: root path of files for transferring; structure below the root is preserved :type root: pathlib.Path object @@ -109,17 +108,20 @@ def _single_rsync( else: cmd.append(str(self._finaldir / sub_struct) + "/") self._transferring = True - runner = procrunner.run( + runner = subprocess.run( cmd, - callback_stdout=self._parse_rsync_stdout, - callback_stderr=self._parse_rsync_stderr, + capture_output=True, ) + for line in runner.stdout.decode("utf-8", "replace").split("\n"): + self._parse_rsync_stdout(line) + for line in runner.stderr.decode("utf-8", "replace").split("\n"): + self._parse_rsync_stderr(line) self.runner_return.append(runner) self.failed.extend(root / sub_struct / f for f in self._failed_tmp) if retry: self._in.put(root / sub_struct / f for f in self._failed_tmp) - def _parse_rsync_stdout(self, stdout: bytes): + def _parse_rsync_stdout(self, line: str): """ Parse rsync stdout to collect information such as the paths of transferred files and the amount of data transferred. @@ -127,51 +129,43 @@ def _parse_rsync_stdout(self, stdout: bytes): :param stdout: stdout of rsync process :type stdout: bytes """ - stringy_stdout = str(stdout) - if stringy_stdout: - if self._transferring: - if stringy_stdout.startswith("sent"): - self._transferring = False - byte_info = stringy_stdout.split() - self.sent_bytes = int( - byte_info[byte_info.index("sent") + 1].replace(",", "") - ) - self.received_bytes = int( - byte_info[byte_info.index("received") + 1].replace(",", "") - ) - self.byte_rate = float( - byte_info[byte_info.index("bytes/sec") - 1].replace(",", "") - ) - elif len(stringy_stdout.split()) == 1: - if self._root and self._sub_structure: - self._notify( - self._finaldir / self._sub_structure / stringy_stdout - ) - self._out.put(self._root / self._sub_structure / stringy_stdout) - else: - logger.warning( - f"root or substructure not set for transfer of {stringy_stdout}" - ) - else: - if "total size" in stringy_stdout: - self.total_size = int( - stringy_stdout.replace("total size", "").split()[1] + if self._transferring: + if line.startswith("sent"): + self._transferring = False + byte_info = line.split() + self.sent_bytes = int( + byte_info[byte_info.index("sent") + 1].replace(",", "") + ) + self.received_bytes = int( + byte_info[byte_info.index("received") + 1].replace(",", "") + ) + self.byte_rate = float( + byte_info[byte_info.index("bytes/sec") - 1].replace(",", "") + ) + elif len(line.split()) == 1: + if self._root and self._sub_structure: + self._notify(self._finaldir / self._sub_structure / line) + self._out.put(self._root / self._sub_structure / line) + else: + logger.warning( + f"root or substructure not set for transfer of {line}" ) + else: + if "total size" in line: + self.total_size = int(line.replace("total size", "").split()[1]) - def _parse_rsync_stderr(self, stderr: bytes): + def _parse_rsync_stderr(self, line: str): """ Parse rsync stderr to collect information on any files that failed to transfer. :param stderr: stderr of rsync process :type stderr: bytes """ - stringy_stderr = str(stderr) - if stringy_stderr: - if ( - stringy_stderr.startswith("rsync: link_stat") - or stringy_stderr.startswith("rsync: [sender] link_stat") - ) and "failed" in stringy_stderr: - failed_msg = stringy_stderr.split() - self._failed_tmp.append( - failed_msg[failed_msg.index("failed:") - 1].replace('"', "") - ) + if ( + line.startswith("rsync: link_stat") + or line.startswith("rsync: [sender] link_stat") + ) and "failed" in line: + failed_msg = line.split() + self._failed_tmp.append( + failed_msg[failed_msg.index("failed:") - 1].replace('"', "") + ) diff --git a/src/murfey/util/tomo.py b/src/murfey/util/tomo.py new file mode 100644 index 000000000..dc7314ec1 --- /dev/null +++ b/src/murfey/util/tomo.py @@ -0,0 +1,16 @@ +def midpoint(angles: list[float]) -> int: + """ + Utility function to calculate the midpoint of the angles used in a tilt series. + Used primarily in the tomography workflow. + """ + if not angles: + return 0 + if len(angles) <= 2: + return round(angles[0]) + sorted_angles = sorted(angles) + return round( + sorted_angles[len(sorted_angles) // 2] + if sorted_angles[len(sorted_angles) // 2] + and sorted_angles[len(sorted_angles) // 2 + 1] + else 0 + ) From aaf3d5258d5fb908d9235b0f33e879a547d0dde9 Mon Sep 17 00:00:00 2001 From: yxd92326 Date: Thu, 16 Jan 2025 09:10:02 +0000 Subject: [PATCH 11/13] Fix to get from the recipes during tomo flushing --- src/murfey/server/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/murfey/server/__init__.py b/src/murfey/server/__init__.py index 57b10dcc7..a258f71ae 100644 --- a/src/murfey/server/__init__.py +++ b/src/murfey/server/__init__.py @@ -2077,7 +2077,7 @@ def _flush_tomography_preprocessing(message: dict): ) return - recipe_name = machine_config.get("em-tomo-preprocess", "em-tomo-preprocess") + recipe_name = machine_config.recipes.get("em-tomo-preprocess", "em-tomo-preprocess") for f in stashed_files: collected_ids = murfey_db.exec( From 40058682fa59069f4c00ed471f53fdd1185e9d0f Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Thu, 16 Jan 2025 18:25:51 +0000 Subject: [PATCH 12/13] Version update (patch) (#452) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Bump version: 0.16.2 → 0.16.3 * Bump supported client version: 0.16.2 → 0.16.3 --------- Co-authored-by: DiamondLightSource-build-server --- .bumpclient.toml | 2 +- .bumpversion.toml | 2 +- pyproject.toml | 2 +- src/murfey/__init__.py | 4 ++-- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.bumpclient.toml b/.bumpclient.toml index e41bfff0d..32e4888c7 100644 --- a/.bumpclient.toml +++ b/.bumpclient.toml @@ -1,5 +1,5 @@ [tool.bumpversion] -current_version = "0.16.2" +current_version = "0.16.3" commit = true tag = false diff --git a/.bumpversion.toml b/.bumpversion.toml index a60fff56d..7c7e2aae8 100644 --- a/.bumpversion.toml +++ b/.bumpversion.toml @@ -1,5 +1,5 @@ [tool.bumpversion] -current_version = "0.16.2" +current_version = "0.16.3" commit = true tag = true diff --git a/pyproject.toml b/pyproject.toml index 1ee9240d2..2c6da691a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -7,7 +7,7 @@ requires = [ [project] name = "murfey" -version = "0.16.2" +version = "0.16.3" description = "Client-Server architecture hauling Cryo-EM data" readme = "README.md" keywords = [ diff --git a/src/murfey/__init__.py b/src/murfey/__init__.py index 78d1067f9..d054d53a6 100644 --- a/src/murfey/__init__.py +++ b/src/murfey/__init__.py @@ -1,4 +1,4 @@ from __future__ import annotations -__version__ = "0.16.2" -__supported_client_version__ = "0.16.2" +__version__ = "0.16.3" +__supported_client_version__ = "0.16.3" From f0fc894f9975a14dc5578668258c45ab1ec097c5 Mon Sep 17 00:00:00 2001 From: Dan Hatton Date: Tue, 21 Jan 2025 13:43:07 +0000 Subject: [PATCH 13/13] Add some logging --- src/murfey/client/contexts/spa_metadata.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/murfey/client/contexts/spa_metadata.py b/src/murfey/client/contexts/spa_metadata.py index b8e3b84e0..9c1974568 100644 --- a/src/murfey/client/contexts/spa_metadata.py +++ b/src/murfey/client/contexts/spa_metadata.py @@ -124,8 +124,10 @@ def post_transfer( windows_path = data["EpuSessionXml"]["Samples"]["_items"]["SampleXml"][0][ "AtlasId" ]["#text"] + logger.info(f"Windows path to atlas metadata found: {windows_path}") visit_index = windows_path.split("\\").index(environment.visit) partial_path = "/".join(windows_path.split("\\")[visit_index + 1 :]) + logger.info("Partial Linux path successfully constructed from Windows path") source = _get_source(transferred_file, environment) if not source: @@ -136,9 +138,13 @@ def post_transfer( source_visit_dir = source.parent + logger.info( + f"Looking for atlas XML file in metadata directory {str((source_visit_dir / partial_path).parent)}" + ) atlas_xml_path = list( (source_visit_dir / partial_path).parent.glob("Atlas_*.xml") )[0] + logger.info(f"Atlas XML path {str(atlas_xml_path)} found") with open(atlas_xml_path, "rb") as atlas_xml: atlas_xml_data = xmltodict.parse(atlas_xml) atlas_original_pixel_size = atlas_xml_data["MicroscopeImage"][ @@ -147,6 +153,7 @@ def post_transfer( # need to calculate the pixel size of the downscaled image atlas_pixel_size = atlas_original_pixel_size * 7.8 + logger.info(f"Atlas image pixel size determined to be {atlas_pixel_size}") sample = None for p in partial_path.split("/"): @@ -154,7 +161,7 @@ def post_transfer( sample = int(p.replace("Sample", "")) break else: - logger.warning(f"Sample could not be indetified for {transferred_file}") + logger.warning(f"Sample could not be identified for {transferred_file}") return if source: environment.samples[source] = SampleInfo(