Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 27 additions & 0 deletions monailabel/datastore/cvat.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
import tempfile
import time
import urllib.parse
from typing import Any, Dict

import numpy as np
import requests
Expand Down Expand Up @@ -318,6 +319,32 @@ def download_from_cvat(self, max_retry_count=5, retry_wait_time=10):
retry_count += 1
return None

def add_directory(self, directory_id: str, filename: str, info: Dict[str, Any]) -> str:
"""
Not implemented for this datastore

Abstract method for adding a directory to cvat
"""
raise NotImplementedError("This datastore does not support adding directories")

def get_is_multichannel(self) -> bool:
"""
Not implemented for this datastore

Returns whether the application's studies is directed at multichannel (4D) data
"""
logger.info("The function get_is_multichannel is not implemented for this datastore")
return False

def get_is_multi_file(self) -> bool:
"""
Not implemented for this datastore

Returns whether the application's studies is directed at directories containing multiple images per sample
"""
logger.info("The function get_is_multi_file is not implemented for this datastore")
return False


"""
def main():
Expand Down
26 changes: 26 additions & 0 deletions monailabel/datastore/dicom.py
Original file line number Diff line number Diff line change
Expand Up @@ -264,3 +264,29 @@ def _download_labeled_data(self):
def datalist(self, full_path=True) -> List[Dict[str, Any]]:
self._download_labeled_data()
return super().datalist(full_path)

def add_directory(self, directory_id: str, filename: str, info: Dict[str, Any]) -> str:
"""
Not implemented

Abstract method for adding a directory to DICOMWeb
"""
raise NotImplementedError("This datastore does not support adding directories")

def get_is_multichannel(self) -> bool:
"""
Not implemented for this datastore

Returns whether the application's studies is directed at multichannel (4D) data
"""
logger.info("The function get_is_multichannel is not implemented for this datastore")
return False

def get_is_multi_file(self) -> bool:
"""
Not implemented for this datastore

Returns whether the application's studies is directed at directories containing multiple images per sample
"""
logger.info("The function get_is_multi_file is not implemented for this datastore")
return False
26 changes: 26 additions & 0 deletions monailabel/datastore/dsa.py
Original file line number Diff line number Diff line change
Expand Up @@ -270,6 +270,32 @@ def status(self) -> Dict[str, Any]:
def json(self):
return self.datalist()

def add_directory(self, directory_id: str, filename: str, info: Dict[str, Any]) -> str:
"""
Not implemented for this datastore

Abstract method for adding a directory to dsa
"""
raise NotImplementedError("This datastore does not support adding directories")

def get_is_multichannel(self) -> bool:
"""
Not implemented for this datastore

Returns whether the application's studies is directed at multichannel (4D) data
"""
logger.info("The function get_is_multichannel is not implemented for this datastore")
return False

def get_is_multi_file(self) -> bool:
"""
Not implemented for this datastore

Returns whether the application's studies is directed at directories containing multiple images per sample
"""
logger.info("The function get_is_multi_file is not implemented for this datastore")
return False


"""
def main():
Expand Down
102 changes: 85 additions & 17 deletions monailabel/datastore/local.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,9 +102,11 @@ def __init__(
images_dir: str = ".",
labels_dir: str = "labels",
datastore_config: str = "datastore_v2.json",
extensions=("*.nii.gz", "*.nii"),
extensions=("*.nii.gz", "*.nii", "*.nrrd"),
auto_reload=False,
read_only=False,
multichannel: bool = False,
multi_file: bool = False,
):
"""
Creates a `LocalDataset` object
Expand All @@ -124,6 +126,14 @@ def __init__(
self._ignore_event_config = False
self._config_ts = 0
self._auto_reload = auto_reload
if multichannel and multi_file:
raise ValueError(
"multichannel and multi_file are mutually exclusive: "
"multichannel expects a single 4D NIfTI volume per sample, "
"while multi_file expects a directory of separate modality files."
)
self._multichannel: bool = multichannel
self._multi_file: bool = multi_file

logging.getLogger("filelock").setLevel(logging.ERROR)

Expand Down Expand Up @@ -256,6 +266,18 @@ def datalist(self, full_path=True) -> List[Dict[str, Any]]:
ds = json.loads(json.dumps(ds).replace(f"{self._datastore_path.rstrip(os.pathsep)}{os.pathsep}", ""))
return ds

def get_is_multichannel(self) -> bool:
"""
Returns whether the dataset is multichannel or not
"""
return self._multichannel

def get_is_multi_file(self) -> bool:
"""
Returns whether the dataset is multi-file or not
"""
return self._multi_file

def get_image(self, image_id: str, params=None) -> Any:
"""
Retrieve image object based on image id
Expand Down Expand Up @@ -431,6 +453,43 @@ def refresh(self):
"""
self._reconcile_datastore()

def add_directory(self, directory_id: str, filename: str, info: Dict[str, Any]) -> str:
"""
Add a directory to the datastore

:param directory_id: the directory id
:param filename: the filename
:param info: additional info

:return: directory id
"""
id = os.path.basename(os.path.normpath(filename))
if not directory_id:
directory_id = id

logger.info(f"Adding Image: {directory_id} => {filename}")
name = directory_id
dest = os.path.realpath(os.path.join(self._datastore.image_path(), name))

with FileLock(self._lock_file):
logger.debug("Acquired the lock!")
if os.path.isdir(filename):
if os.path.exists(dest):
shutil.rmtree(dest)
shutil.copytree(filename, dest)
else:
shutil.copy2(filename, dest)

info = info if info else {}
info["ts"] = int(time.time())
info["name"] = name

# images = get_directory_contents(filename)
self._datastore.objects[directory_id] = ImageLabelModel(image=DataModel(info=info, ext=""))
self._update_datastore_file(lock=False)
logger.debug("Released the lock!")
return directory_id
Comment on lines +456 to +491
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

🧩 Analysis chain

🏁 Script executed:

# First, let's examine the add_directory method and surrounding code
cat -n monailabel/datastore/local.py | sed -n '456,491p'

Repository: Project-MONAI/MONAILabel

Length of output: 1643


🏁 Script executed:

# Now check lines 654-673 mentioned as "Also applies to"
cat -n monailabel/datastore/local.py | sed -n '654,673p'

Repository: Project-MONAI/MONAILabel

Length of output: 1065


🏁 Script executed:

# Find the get_image method in the same file
rg -n "def get_image" monailabel/datastore/local.py

Repository: Project-MONAI/MONAILabel

Length of output: 253


🏁 Script executed:

# Find the get_dataset_archive method
rg -n "def get_dataset_archive" monailabel/datastore/local.py

Repository: Project-MONAI/MONAILabel

Length of output: 141


🏁 Script executed:

# Look for multi_file discovery path
rg -n "multi_file" monailabel/datastore/local.py

Repository: Project-MONAI/MONAILabel

Length of output: 514


🏁 Script executed:

# Check the /datastore/image endpoint
cat -n monailabel/endpoints/datastore.py

Repository: Project-MONAI/MONAILabel

Length of output: 13996


🏁 Script executed:

# Get get_image() implementation
sed -n '281,310p' monailabel/datastore/local.py | cat -n

Repository: Project-MONAI/MONAILabel

Length of output: 1377


🏁 Script executed:

# Get get_dataset_archive() implementation  
sed -n '398,450p' monailabel/datastore/local.py | cat -n

Repository: Project-MONAI/MONAILabel

Length of output: 2561


🏁 Script executed:

# Get the full get_image() method to see what it does
ast-grep --pattern 'def get_image($_) { $$$ }'

Repository: Project-MONAI/MONAILabel

Length of output: 50


🏁 Script executed:

# Better approach - look for get_image implementation with context
rg -A 20 "def get_image\(self, image_id" monailabel/datastore/local.py

Repository: Project-MONAI/MONAILabel

Length of output: 854


🏁 Script executed:

# Look for get_dataset_archive implementation with context
rg -A 40 "def get_dataset_archive" monailabel/datastore/local.py

Repository: Project-MONAI/MONAILabel

Length of output: 1838


🏁 Script executed:

# Find _to_bytes implementation
rg -A 10 "def _to_bytes" monailabel/datastore/local.py

Repository: Project-MONAI/MONAILabel

Length of output: 441


🏁 Script executed:

# Find _filename implementation
rg -A 10 "def _filename" monailabel/datastore/local.py

Repository: Project-MONAI/MONAILabel

Length of output: 431


🏁 Script executed:

# Check what datalist() returns - used by get_dataset_archive
rg -A 30 "def datalist" monailabel/datastore/local.py

Repository: Project-MONAI/MONAILabel

Length of output: 1180


🏁 Script executed:

# Check if there are any tests or usage of multi_file mode
fd -type f "\.py$" | xargs rg -l "multi_file.*True" | head -5

Repository: Project-MONAI/MONAILabel

Length of output: 491


Directory-backed images leak into APIs that assume file semantics.

Both add_directory() (line 488) and the multi_file discovery path (lines 658-673) persist samples with ext="", resulting in directory URIs instead of file paths. This breaks multiple APIs:

  1. get_image() will fail with IsADirectoryError when pathlib.Path(file).read_bytes() is called on a directory path in _to_bytes()
  2. /datastore/image endpoint correctly rejects non-files via os.path.isfile() check (line 145), but callers expecting to download images will receive 404 errors
  3. get_dataset_archive() uses zipfile.ZipFile.write(path) which only archives the directory entry itself, not its contents

Test and fix these read/export paths before merging multi_file support.


def add_image(self, image_id: str, image_filename: str, image_info: Dict[str, Any]) -> str:
id, image_ext = self._to_id(os.path.basename(image_filename))
if not image_id:
Expand Down Expand Up @@ -552,10 +611,17 @@ def _list_files(self, path, patterns):
files = os.listdir(path)

filtered = dict()
for pattern in patterns:
matching = fnmatch.filter(files, pattern)
for file in matching:
filtered[os.path.basename(file)] = file
if not self._multi_file:
for pattern in patterns:
matching = fnmatch.filter(files, pattern)
for file in matching:
filtered[os.path.basename(file)] = file
else:
ignored = {"labels", ".lock", os.path.basename(self._datastore_config_path).lower()}
for file in files:
abs_file = os.path.join(path, file)
if os.path.isdir(abs_file) and file.lower() not in ignored:
filtered[os.path.basename(file)] = file
return filtered

def _reconcile_datastore(self):
Expand Down Expand Up @@ -585,24 +651,26 @@ def _add_non_existing_images(self) -> int:
invalidate = 0
self._init_from_datastore_file()

local_images = self._list_files(self._datastore.image_path(), self._extensions)
local_files = self._list_files(self._datastore.image_path(), self._extensions)

image_ids = list(self._datastore.objects.keys())
for image_file in local_images:
image_id, image_ext = self._to_id(image_file)
if image_id not in image_ids:
logger.info(f"Adding New Image: {image_id} => {image_file}")
ids = list(self._datastore.objects.keys())
for file in local_files:
if self._multi_file:
# Directories have no extension — use the name as-is
file_id = file
file_ext_str = ""
else:
file_id, file_ext_str = self._to_id(file)

name = self._filename(image_id, image_ext)
image_info = {
if file_id not in ids:
logger.info(f"Adding New Image: {file_id} => {file}")
name = self._filename(file_id, file_ext_str)
file_info = {
"ts": int(time.time()),
# "checksum": file_checksum(os.path.join(self._datastore.image_path(), name)),
"name": name,
}

invalidate += 1
self._datastore.objects[image_id] = ImageLabelModel(image=DataModel(info=image_info, ext=image_ext))

self._datastore.objects[file_id] = ImageLabelModel(image=DataModel(info=file_info, ext=file_ext_str))
return invalidate

def _add_non_existing_labels(self, tag) -> int:
Expand Down
26 changes: 26 additions & 0 deletions monailabel/datastore/xnat.py
Original file line number Diff line number Diff line change
Expand Up @@ -386,6 +386,32 @@ def __upload_assessment(self, aiaa_model_name, image_id, file_path, type):

self._request_put(url, data, type=type)

def add_directory(self, directory_id: str, filename: str, info: Dict[str, Any]) -> str:
"""
Not implemented for this datastore

Abstract method for adding a directory to xnat
"""
raise NotImplementedError("This datastore does not support adding directories")

def get_is_multichannel(self) -> bool:
"""
Not implemented for this datastore

Returns whether the application's studies is directed at multichannel (4D) data
"""
logger.info("The function get_is_multichannel is not implemented for this datastore")
return False

def get_is_multi_file(self) -> bool:
"""
Not implemented for this datastore

Returns whether the application's studies is directed at directories containing multiple images per sample
"""
logger.info("The function get_is_multi_file is not implemented for this datastore")
return False


"""
def main():
Expand Down
16 changes: 12 additions & 4 deletions monailabel/endpoints/datastore.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,20 +66,28 @@ def add_image(
user: Optional[str] = None,
):
logger.info(f"Image: {image}; File: {file}; params: {params}")
file_ext = "".join(pathlib.Path(file.filename).suffixes) if file.filename else ".nii.gz"

image_id = image if image else os.path.basename(file.filename).replace(file_ext, "")
instance: MONAILabelApp = app_instance()
if instance.datastore().get_is_multi_file():
raise HTTPException(
status_code=400,
detail="Multi-file datastore does not support single-file uploads. "
"Data must be pre-staged as sample subdirectories on the server filesystem.",
)

file_ext = "".join(pathlib.Path(file.filename).suffixes) if file.filename else ".nii.gz"
id = image if image else os.path.basename(file.filename).replace(file_ext, "")
image_file = tempfile.NamedTemporaryFile(suffix=file_ext).name

with open(image_file, "wb") as buffer:
shutil.copyfileobj(file.file, buffer)
background_tasks.add_task(remove_file, image_file)

instance: MONAILabelApp = app_instance()
save_params: Dict[str, Any] = json.loads(params) if params else {}
if user:
save_params["user"] = user
image_id = instance.datastore().add_image(image_id, image_file, save_params)

image_id = instance.datastore().add_image(id, image_file, save_params)
return {"image": image_id}


Expand Down
15 changes: 13 additions & 2 deletions monailabel/interfaces/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,9 @@ def __init__(
self.app_dir = app_dir
self.studies = studies
self.conf = conf if conf else {}

self.multichannel: bool = strtobool(conf.get("multichannel", False))
self.multi_file: bool = strtobool(conf.get("multi_file", False))
self.input_channels = conf.get("input_channels", False)
self.name = name
self.description = description
self.version = version
Expand Down Expand Up @@ -146,6 +148,8 @@ def init_datastore(self) -> Datastore:
extensions=settings.MONAI_LABEL_DATASTORE_FILE_EXT,
auto_reload=settings.MONAI_LABEL_DATASTORE_AUTO_RELOAD,
read_only=settings.MONAI_LABEL_DATASTORE_READ_ONLY,
multichannel=self.multichannel,
multi_file=self.multi_file,
)

def init_remote_datastore(self) -> Datastore:
Expand Down Expand Up @@ -282,6 +286,9 @@ def infer(self, request, datastore=None):
)

request = copy.deepcopy(request)
request["multi_file"] = self.multi_file
request["multichannel"] = self.multichannel
request["input_channels"] = self.input_channels
request["description"] = task.description

image_id = request["image"]
Expand All @@ -292,7 +299,7 @@ def infer(self, request, datastore=None):
else:
request["image"] = datastore.get_image_uri(request["image"])

if os.path.isdir(request["image"]):
if os.path.isdir(request["image"]) and not self.multi_file:
logger.info("Input is a Directory; Consider it as DICOM")

logger.debug(f"Image => {request['image']}")
Expand Down Expand Up @@ -431,6 +438,10 @@ def train(self, request):
)

request = copy.deepcopy(request)
# 4D image support, send train task information regarding data
request["multi_file"] = self.multi_file
request["multichannel"] = self.multichannel
request["input_channels"] = self.input_channels
result = task(request, self.datastore())

# Run all scoring methods
Expand Down
Loading