Source code for darfix.core.data_selection
import os
import urllib.parse
import urllib.request
from typing import Optional
from typing import Union
from esrf_pathlib import ESRFPath
from silx.io.url import DataUrl
from darfix.core.dataset import ImageDataset
[docs]
def load_process_data(
detector_url: Union[str, DataUrl],
root_dir: Optional[str] = None,
dark_detector_url: Optional[Union[str, DataUrl]] = None,
title: str = "",
metadata_url=None,
):
"""
Loads data from `detector_url`.
If `detector_url` is:
- a str: consider it as a file pattern (for EDF files).
- a DataUrl: consider it readable by silx `get_data` function
:param detector_url: detector_url to be loaded.
:param metadata_url: path to the scan metadata for HDF5 containing positioner information in order to load metadata for non-edf files
"""
root_dir_specified = bool(root_dir)
if isinstance(detector_url, DataUrl):
assert detector_url.file_path() not in (
"",
None,
), "no file_path provided to the DataUrl"
if not root_dir_specified:
root_dir = os.path.dirname(detector_url.file_path())
dataset = ImageDataset(
_dir=root_dir,
detector_url=detector_url,
title=title,
metadata_url=metadata_url,
)
elif isinstance(detector_url, str):
if not detector_url:
raise ValueError("'detector_url' cannot be an empty string")
if not root_dir_specified:
root_dir = _get_root_dir(detector_url)
dataset = ImageDataset(
_dir=root_dir,
detector_url=detector_url,
title=title,
metadata_url=metadata_url,
)
else:
raise TypeError(
f"Expected detector_url to be a string or a silx DataUrl. Got {type(detector_url)} instead."
)
if not dark_detector_url:
bg_dataset = None
elif isinstance(dark_detector_url, str):
dark_root_dir = os.path.join(dataset.dir, "dark")
os.makedirs(dark_root_dir, exist_ok=True)
bg_dataset = ImageDataset(
_dir=dark_root_dir,
detector_url=dark_detector_url,
metadata_url=None,
)
elif isinstance(dark_detector_url, DataUrl):
assert dark_detector_url.file_path() not in (
"",
None,
), "no file_path provided to the DataUrl"
dark_root_dir = os.path.join(dataset.dir, "dark")
os.makedirs(dark_root_dir, exist_ok=True)
bg_dataset = ImageDataset(
_dir=dark_root_dir,
detector_url=dark_detector_url,
metadata_url=None,
)
else:
raise TypeError(
f"Expected dark_detector_url to be a string or a silx DataUrl. Got {type(dark_detector_url)} instead."
)
assert dataset.data is not None and dataset.data.size > 0, "No data was loaded!"
return dataset, bg_dataset
def _get_root_dir(filename: str) -> str:
url = urllib.parse.urlparse(filename, scheme="file")
return os.path.dirname(urllib.request.url2pathname(url.path))
def get_default_output_directory(raw_data_file: str) -> str:
esrf_raw_data_file = ESRFPath(raw_data_file)
try:
return str(esrf_raw_data_file.processed_dataset_path)
except AttributeError:
# Not an ESRF path : Default directory is cwd.
return os.getcwd()