inm-icf-utilities/tests/test_datalad_workflows/test_pipeline.py
Michael Hanke 5845c49a58 getmeta_studyvisit -> deposit_visit_metadata
Document more comprehensively and remove the ICF label from the help.
2023-05-31 14:00:01 +02:00

181 lines
5.4 KiB
Python

from __future__ import annotations
import json
import logging
import os
from pathlib import Path
from typing import Iterable
from urllib.parse import (
urlparse,
urlunparse,
ParseResult,
)
from datalad.api import clone
from datalad.distribution.dataset import Dataset
from datalad.runner.runner import WitlessRunner
from datalad.tests.utils_pytest import on_appveyor
from datalad_catalog.webcatalog import WebCatalog
from datalad_next.credman import CredentialManager
from .utils import get_restricted_realm
existing_visits = ['visit_a', 'visit_b']
def run_script(name: str,
working_directory: str | Path,
study_id: str,
visit_id: str,
):
script_path = Path(*(Path(__file__).parts[:-3] + ('bin',))) / name
runner = WitlessRunner(cwd=working_directory, env=dict(os.environ))
runner.run(
[
str(script_path),
'--id',
study_id,
visit_id
]
)
def process_visits(studies_dir: Path,
studies: list[str],
visits: list[str]
):
for study in studies:
for visit in visits:
# run metadata generation script
run_script(
'deposit_visit_metadata',
studies_dir,
study, visit
)
# run dataladification script
run_script(
'dataladify_studyvisit_from_meta',
studies_dir,
study, visit
)
# run catalogification script
run_script(
'catalogify_studyvisit_from_meta',
studies_dir,
study, visit
)
def get_clone_annex_url(source_url: str,
study: str,
visit: str
) -> tuple[str, str]:
if source_url.endswith('/'):
source_url = source_url[:-1]
access_url = f'{source_url}/{study}'
auth_realm = get_restricted_realm(access_url)
parsed_url = urlparse(access_url)
copy_attributes = ['scheme', 'netloc', 'path', 'params', 'query', 'fragment']
realm_parsed_url = ParseResult(**{
name: getattr(parsed_url, name) if name != 'path' else auth_realm
for name in copy_attributes
})
return (
'datalad-annex::?type=external&externaltype=uncurl'
f'&url={source_url}/{study}/{visit}_{{{{annex_key}}}}'
'&encryption=none',
urlunparse(realm_parsed_url),
)
def add_credentials(realms: Iterable[str],
credentials: dict,
credman: CredentialManager,
):
# Add credentials for all URLs to credential manager
for index, realm in enumerate(realms):
kwargs = {
**credentials,
'name': f'icf-test-credentials-{index}',
'realm': realm
}
result = credman.set(**kwargs)
assert result is not None
def clone_visit(path: Path,
source_url: str,
study: str,
visit: str,
credentials: dict,
credman: CredentialManager,
) -> Dataset:
# Get involved URLs and set credentials for `auth_url`
clone_url, auth_realm = get_clone_annex_url(source_url, study, visit)
add_credentials([auth_realm], credentials, credman)
# Clone a visit from the clone URL
return clone(source=clone_url, path=path)
def test_pipeline(tmp_path: Path,
test_studies_dir: str,
test_study_names: list[str],
data_webserver: str,
dataaccess_credential: dict,
credman: CredentialManager,
):
# Perform metadata generation, dataladification, and catalogification
process_visits(
Path(test_studies_dir),
test_study_names,
existing_visits,
)
# 1. Test metadata generation
# - assert generated metadata files exist and load their content
metadata_types = ['tarball', 'dicoms']
for study in test_study_names:
for visit in existing_visits:
for m in metadata_types:
metadata_path = Path(test_studies_dir) /\
study / f'{visit}_metadata_{m}.json'
assert metadata_path.exists()
with open(metadata_path) as f:
json.load(f)
# 2. Test dataset generation
# - Try to clone the datasets and fetch the dicom tarfile
for study in test_study_names:
for visit in existing_visits:
dataset = clone_visit(
tmp_path / f'ds_{study}_{visit}',
data_webserver,
study,
visit,
dataaccess_credential,
credman,
)
# TODO reenable once the server setup is actually compatible
# TODO swap the order of gets, or actually drop the tar get
# completely. Pulling individual files will do all that internally
# Try to get the tar file and the DICOMs
#dataset.get(f'icf/{visit}_dicom.tar')
#dataset.get(f'{study}_{visit}')
# 3. Test catalog generation
# - assert that study catalogs have been created using webcatalog method
for study in test_study_names:
catalog_path = Path(test_studies_dir) / study / 'catalog'
ctlg = WebCatalog(location=str(catalog_path))
assert ctlg.is_created()