235 lines
7.3 KiB
Python
Executable file
235 lines
7.3 KiB
Python
Executable file
#!/usr/bin/env python3
|
|
"""
|
|
|
|
"""
|
|
from datetime import datetime
|
|
import json
|
|
import math
|
|
import os
|
|
from pathlib import Path
|
|
import subprocess
|
|
import sys
|
|
import tempfile
|
|
from uuid import uuid4
|
|
|
|
from datalad.api import catalog_add
|
|
from datalad_catalog.webcatalog import WebCatalog
|
|
from datalad_catalog.schema_utils import get_metadata_item
|
|
|
|
# this points to the top of the ICF data store.
|
|
# internally it will be amended with the missing components
|
|
# for study and visit deposit locations
|
|
icfstore_baseurl = 'https://data.inm-icf.de'
|
|
|
|
# which DICOM tags to extract from DICOM files and store as
|
|
# git-annex metadata (e.g., to enable metadata-driven views
|
|
# of visit datasets)
|
|
dicom_metadata_keys = [
|
|
"SeriesDescription",
|
|
"SeriesNumber",
|
|
"Modality",
|
|
"MRAcquisitionType",
|
|
"ProtocolName",
|
|
"PulseSequenceName",
|
|
]
|
|
|
|
|
|
def main(store_dir: str,
|
|
study_id: str,
|
|
visit_id: str):
|
|
store_base_dir = Path(store_dir)
|
|
# where to add the catalog entry
|
|
study_catalog_path = store_base_dir / study_id / 'catalog'
|
|
# locate metadata files
|
|
dataset_metadata_path = store_base_dir / study_id / \
|
|
f'{visit_id}_metadata_tarball.json'
|
|
file_metadata_path = store_base_dir / study_id / \
|
|
f'{visit_id}_metadata_dicoms.json'
|
|
# Grab or create the catalog (WebCatalog class)
|
|
# (creating includes generating and adding a study entry for the catalog)
|
|
ctlg = get_catalog(study_id, study_catalog_path)
|
|
# Generate and add a visit entry for the catalog
|
|
visit_entry = generate_visit_entry(
|
|
study_id,
|
|
visit_id,
|
|
dataset_metadata_path,
|
|
file_metadata_path)
|
|
catalog_add(
|
|
catalog=study_catalog_path,
|
|
metadata=visit_entry,
|
|
)
|
|
# Add visit entry as subdataset to study entry
|
|
super_dict = ctlg.get_main_dataset()
|
|
subdatasets = [
|
|
{
|
|
'dataset_id': visit_entry['dataset_id'],
|
|
'dataset_version': visit_entry['dataset_version'],
|
|
'dataset_path': visit_id,
|
|
}
|
|
]
|
|
update_entry(
|
|
super_dict['dataset_id'],
|
|
super_dict['dataset_version'],
|
|
study_id,
|
|
'subdatasets',
|
|
subdatasets,
|
|
study_catalog_path)
|
|
|
|
|
|
def get_catalog(study_id, catalog_path):
|
|
""""""
|
|
package_path = Path(__file__).resolve().parent.parent
|
|
# Instantiate WebCatalog object
|
|
ctlg = WebCatalog(location=str(catalog_path))
|
|
# If catalog does not exist:
|
|
if not ctlg.is_created():
|
|
# 1. create it
|
|
ctlg.create(
|
|
config_file=str(package_path / 'assets' / 'catalog_config.json'),
|
|
)
|
|
# 2. generate and add the study-level catalog entry
|
|
study_entry = generate_study_entry(study_id)
|
|
catalog_add(
|
|
catalog=catalog_path,
|
|
metadata=study_entry,
|
|
)
|
|
# 3. set catalog home page
|
|
ctlg.set_main_dataset(
|
|
dataset_id=study_entry.get('dataset_id'),
|
|
dataset_version=study_entry.get('dataset_version'),
|
|
)
|
|
return ctlg
|
|
|
|
|
|
def generate_study_entry(study_id):
|
|
""""""
|
|
desc=f"""This data catalog presents the DICOM data collected
|
|
for all visits of the study: {study_id}. Browse through details
|
|
of all study visits in the 'Subdatasets' tab below."""
|
|
meta_item = get_metadata_item(
|
|
item_type='dataset',
|
|
dataset_id=str(uuid4()),
|
|
dataset_version='latest',
|
|
source_name='automated_addition',
|
|
source_version='0.1.0',
|
|
)
|
|
meta_item['name'] = study_id
|
|
meta_item['description'] = desc
|
|
return meta_item
|
|
|
|
|
|
def update_entry(ds_id, ds_version, ds_name, key, value, study_catalog_path):
|
|
meta_item = get_metadata_item(
|
|
item_type='dataset',
|
|
dataset_id=ds_id,
|
|
dataset_version=ds_version,
|
|
source_name='automated_addition',
|
|
source_version='0.1.0',
|
|
)
|
|
meta_item['name'] = ds_name
|
|
meta_item.update({key: value})
|
|
catalog_add(
|
|
catalog=study_catalog_path,
|
|
metadata=meta_item,
|
|
)
|
|
return meta_item
|
|
|
|
|
|
def generate_visit_entry(study_id, visit_id, metapath_dataset, metapath_file):
|
|
""""""
|
|
# Create base visit entry
|
|
desc=f"""This page presents the DICOM data collected for the visit
|
|
{visit_id} during the imaging study {study_id}. Browse through details
|
|
of this particular study visit in the 'DICOM' tab below."""
|
|
meta_item = get_metadata_item(
|
|
item_type='dataset',
|
|
dataset_id=str(uuid4()),
|
|
dataset_version='latest',
|
|
source_name='automated_addition',
|
|
source_version='0.1.0',
|
|
)
|
|
meta_item['name'] = visit_id
|
|
meta_item['description'] = desc
|
|
# Load tarball metadata
|
|
tar_metadata = read_json_file(metapath_dataset)
|
|
expected_keys = ('size', 'md5', 'dspath', 'storepath')
|
|
if not all(k in tar_metadata for k in expected_keys):
|
|
raise ValueError(f'incomplete tarball metadata at {metapath_dataset}')
|
|
# add dataset url
|
|
access_url_pre = 'datalad-annex::?type=external&externaltype=uncurl&url='
|
|
access_url_post = '_{{annex_key}}&encryption=none'
|
|
access_url = f'{access_url_pre}{icfstore_baseurl}/{study_id}/{visit_id}{access_url_post}'
|
|
meta_item.update(dict(url=access_url))
|
|
# Load dicom metadata and derive some summary measures
|
|
dicoms = read_json_file(metapath_file)
|
|
nr_files = len(dicoms)
|
|
unique_tag_vals = {}
|
|
for k in dicom_metadata_keys:
|
|
unique_tag_vals[k] = list(filter(None, list(set(d[k] for d in dicoms))))
|
|
additional_keyvals = {
|
|
"keywords": unique_tag_vals['ProtocolName'] + unique_tag_vals['Modality'],
|
|
"additional_display": [
|
|
{
|
|
"name": "DICOM",
|
|
"icon": "far fa-file-image",
|
|
"content": unique_tag_vals
|
|
}
|
|
],
|
|
"top_display": [
|
|
{
|
|
"name": "TAR file size",
|
|
"value": format_bytes(tar_metadata['size'])
|
|
},
|
|
{
|
|
"name": "Nr of DICOMs",
|
|
"value": nr_files
|
|
},
|
|
]
|
|
}
|
|
# add summary measures to visit entry
|
|
meta_item.update(additional_keyvals)
|
|
return meta_item
|
|
|
|
|
|
def read_json_file(file_path):
|
|
"""
|
|
Load content from catalog metadata file for current node
|
|
"""
|
|
try:
|
|
with open(file_path) as f:
|
|
return json.load(f)
|
|
except OSError as err:
|
|
raise("OS error: {0}".format(err))
|
|
except:
|
|
raise("Unexpected error:", sys.exc_info()[0])
|
|
|
|
|
|
def format_bytes(bytes, decimals=2):
|
|
if bytes == 0:
|
|
return "0 Bytes"
|
|
k = 1024
|
|
dm = 0 if decimals < 0 else decimals
|
|
sizes = ["Bytes", "KB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB"]
|
|
i = int(math.floor(math.log(bytes) / math.log(k)))
|
|
return f"{round(bytes / math.pow(k, i), dm)} {sizes[i]}"
|
|
|
|
|
|
if __name__ == '__main__':
|
|
import argparse
|
|
p = argparse.ArgumentParser(description=__doc__)
|
|
p.add_argument(
|
|
"-o", "--store-dir", metavar='PATH', default=os.getcwd(),
|
|
help="Root directory of the ICF data store. "
|
|
"Visit data will be read from it, and the DataLad dataset will be "
|
|
"deposited into it."
|
|
)
|
|
p.add_argument(
|
|
'--id', nargs=2, metavar=('STUDY-ID', 'VISIT-ID'), required=True,
|
|
help="The study and visit identifiers, used to "
|
|
"locate the visit archive in the storage organization. "
|
|
)
|
|
args = p.parse_args()
|
|
main(store_dir=args.store_dir,
|
|
study_id=args.id[0],
|
|
visit_id=args.id[1],
|
|
)
|