-
Notifications
You must be signed in to change notification settings - Fork 535
Improved alternate DataGrabber interface #623
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
da3b725
d985f67
c5f6fcd
5fea91d
f32be88
537a7e2
010f121
90e8536
049139d
5e38643
632fd41
e85b023
cefcb7f
94077ce
4b29046
ef0aba7
748f8c7
70a2a06
bc6adbe
89cd205
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -18,7 +18,9 @@ | |
|
||
""" | ||
import glob | ||
import string | ||
import os | ||
import os.path as op | ||
import shutil | ||
import re | ||
import tempfile | ||
|
@@ -528,9 +530,9 @@ def _list_outputs(self): | |
filledtemplate = template | ||
if argtuple: | ||
try: | ||
filledtemplate = template%tuple(argtuple) | ||
filledtemplate = template % tuple(argtuple) | ||
except TypeError as e: | ||
raise TypeError(e.message + ": Template %s failed to convert with args %s"%(template, str(tuple(argtuple)))) | ||
raise TypeError(e.message + ": Template %s failed to convert with args %s" % (template, str(tuple(argtuple)))) | ||
outfiles = glob.glob(filledtemplate) | ||
if len(outfiles) == 0: | ||
msg = 'Output key: %s Template: %s returned no files' % (key, filledtemplate) | ||
|
@@ -551,18 +553,146 @@ def _list_outputs(self): | |
outputs[key] = outputs[key][0] | ||
return outputs | ||
|
||
class DataFinderInputSpec(DynamicTraitedSpec, BaseInterfaceInputSpec): | ||
|
||
class SelectFilesInputSpec(DynamicTraitedSpec, BaseInterfaceInputSpec): | ||
|
||
base_directory = Directory(exists=True, | ||
desc="Root path common to templates.") | ||
sort_filelist = traits.Bool(True, usedefault=True, | ||
desc="When matching mutliple files, return them in sorted order.") | ||
raise_on_empty = traits.Bool(True, usedefault=True, | ||
desc="Raise an exception if a template pattern matches no files.") | ||
force_lists = traits.Bool(False, usedefault=True, | ||
desc="Return all values as lists even when matching a single file.") | ||
|
||
|
||
class SelectFiles(IOBase): | ||
"""Flexibly collect data from disk to feed into workflows. | ||
|
||
This interface uses the {}-based string formatting syntax to plug | ||
values (possibly known only at workflow execution time) into string | ||
templates and collect files from persistant storage. These templates | ||
can also be combined with glob wildcards. The field names in the | ||
formatting template (i.e. the terms in braces) will become inputs | ||
fields on the interface, and the keys in the templates dictionary | ||
will form the output fields. | ||
|
||
Examples | ||
-------- | ||
|
||
>>> from nipype import SelectFiles, Node | ||
>>> templates={"T1": "{subject_id}/struct/T1.nii", | ||
... "epi": "{subject_id}/func/f[0, 1].nii"} | ||
>>> dg = Node(SelectFiles(templates), "selectfiles") | ||
>>> dg.inputs.subject_id = "subj1" | ||
>>> dg.outputs.get() | ||
{'T1': <undefined>, 'epi': <undefined>} | ||
|
||
The same thing with dynamic grabbing of specific files: | ||
|
||
>>> templates["epi"] = "{subject_id}/func/f{run!s}.nii" | ||
>>> dg = Node(SelectFiles(templates), "selectfiles") | ||
>>> dg.inputs.subject_id = "subj1" | ||
>>> dg.inputs.run = [2, 4] | ||
|
||
""" | ||
input_spec = SelectFilesInputSpec | ||
output_spec = DynamicTraitedSpec | ||
_always_run = True | ||
|
||
def __init__(self, templates, **kwargs): | ||
"""Create an instance with specific input fields. | ||
|
||
Parameters | ||
---------- | ||
templates : dictionary | ||
Mapping from string keys to string template values. | ||
The keys become output fields on the interface. | ||
The templates should use {}-formatting syntax, where | ||
the names in curly braces become inputs fields on the interface. | ||
Format strings can also use glob wildcards to match multiple | ||
files. At runtime, the values of the interface inputs will be | ||
plugged into these templates, and the resulting strings will be | ||
used to select files. | ||
|
||
""" | ||
super(SelectFiles, self).__init__(**kwargs) | ||
|
||
# Infer the infields and outfields from the template | ||
infields = [] | ||
for name, template in templates.iteritems(): | ||
for _, field_name, _, _ in string.Formatter().parse(template): | ||
if field_name is not None and field_name not in infields: | ||
infields.append(field_name) | ||
|
||
self._infields = infields | ||
self._outfields = list(templates) | ||
self._templates = templates | ||
|
||
# Add the dynamic input fields | ||
undefined_traits = {} | ||
for field in infields: | ||
self.inputs.add_trait(field, traits.Any) | ||
undefined_traits[field] = Undefined | ||
self.inputs.trait_set(trait_change_notify=False, **undefined_traits) | ||
|
||
def _add_output_traits(self, base): | ||
"""Add the dynamic output fields""" | ||
return add_traits(base, self._templates.keys()) | ||
|
||
def _list_outputs(self): | ||
"""Find the files and expose them as interface outputs.""" | ||
outputs = {} | ||
info = dict([(k, v) for k, v in self.inputs.__dict__.items() | ||
if k in self._infields]) | ||
|
||
for field, template in self._templates.iteritems(): | ||
|
||
# Build the full template path | ||
if isdefined(self.inputs.base_directory): | ||
template = op.abspath(op.join( | ||
self.inputs.base_directory, template)) | ||
else: | ||
template = op.abspath(template) | ||
|
||
# Fill in the template and glob for files | ||
filled_template = template.format(**info) | ||
filelist = glob.glob(filled_template) | ||
|
||
# Handle the case where nothing matched | ||
if not filelist: | ||
msg = "No files were found matching %s template: %s" % ( | ||
field, template) | ||
if self.inputs.raise_on_empty: | ||
raise IOError(msg) | ||
else: | ||
warn(msg) | ||
|
||
# Possibly sort the list | ||
if self.inputs.sort_filelist: | ||
filelist.sort() | ||
|
||
# Handle whether this must be a list or not | ||
if not self.inputs.force_lists: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. should force_list be output specific? or should people to multiple SelectFiles? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I assume you mean force_lists be a {field, bool} mapping? That seems to complicate things in the service of what is probably a rare need. Because SelectFiles is lightweight, in cases where you want to be differently particular about what results from the grab, it's probably better to define multiple interfaces. |
||
filelist = list_to_filename(filelist) | ||
|
||
outputs[field] = filelist | ||
|
||
return outputs | ||
|
||
|
||
class DataFinderInputSpec(DynamicTraitedSpec, BaseInterfaceInputSpec): | ||
root_paths = traits.Either(traits.List(), | ||
traits.Str(), | ||
mandatory=True,) | ||
match_regex = traits.Str('(.+)', | ||
match_regex = traits.Str('(.+)', | ||
usedefault=True, | ||
desc=("Regular expression for matching " | ||
"paths.")) | ||
ignore_regexes = traits.List(desc=("List of regular expressions, " | ||
"if any match the path it will be " | ||
"ignored.") | ||
) | ||
) | ||
max_depth = traits.Int(desc="The maximum depth to search beneath " | ||
"the root_paths") | ||
min_depth = traits.Int(desc="The minimum depth to search beneath " | ||
|
@@ -573,23 +703,19 @@ class DataFinderInputSpec(DynamicTraitedSpec, BaseInterfaceInputSpec): | |
|
||
|
||
class DataFinder(IOBase): | ||
"""Search for paths that match a given regular expression. Allows a less | ||
"""Search for paths that match a given regular expression. Allows a less | ||
proscriptive approach to gathering input files compared to DataGrabber. | ||
Will recursively search any subdirectories by default. This can be limited | ||
with the min/max depth options. | ||
|
||
Matched paths are available in the output 'out_paths'. Any named groups of | ||
captured text from the regular expression are also available as ouputs of | ||
Will recursively search any subdirectories by default. This can be limited | ||
with the min/max depth options. | ||
Matched paths are available in the output 'out_paths'. Any named groups of | ||
captured text from the regular expression are also available as ouputs of | ||
the same name. | ||
|
||
Examples | ||
-------- | ||
|
||
>>> from nipype.interfaces.io import DataFinder | ||
|
||
Look for Nifti files in directories with "ep2d_fid" or "qT1" in the name, | ||
Look for Nifti files in directories with "ep2d_fid" or "qT1" in the name, | ||
starting in the current directory. | ||
|
||
>>> df = DataFinder() | ||
>>> df.inputs.root_paths = '.' | ||
>>> df.inputs.match_regex = '.+/(?P<series_dir>.+(qT1|ep2d_fid_T1).+)/(?P<basename>.+)\.nii.gz' | ||
|
@@ -599,45 +725,39 @@ class DataFinder(IOBase): | |
'./018-ep2d_fid_T1_Gd2/acquisition.nii.gz', | ||
'./016-ep2d_fid_T1_Gd1/acquisition.nii.gz', | ||
'./013-ep2d_fid_T1_pre/acquisition.nii.gz'] | ||
|
||
>>> print result.outputs.series_dir # doctest: +SKIP | ||
['027-ep2d_fid_T1_Gd4', | ||
'018-ep2d_fid_T1_Gd2', | ||
'016-ep2d_fid_T1_Gd1', | ||
'013-ep2d_fid_T1_pre'] | ||
|
||
>>> print result.outputs.basename # doctest: +SKIP | ||
['acquisition', | ||
'acquisition', | ||
'acquisition', | ||
'acquisition'] | ||
|
||
""" | ||
|
||
input_spec = DataFinderInputSpec | ||
output_spec = DynamicTraitedSpec | ||
_always_run = True | ||
|
||
def _match_path(self, target_path): | ||
#Check if we should ignore the path | ||
for ignore_re in self.ignore_regexes: | ||
if ignore_re.search(target_path): | ||
return | ||
|
||
#Check if we can match the path | ||
match = self.match_regex.search(target_path) | ||
if not match is None: | ||
match_dict = match.groupdict() | ||
|
||
if self.result is None: | ||
self.result = {'out_paths' : []} | ||
self.result = {'out_paths': []} | ||
for key in match_dict.keys(): | ||
self.result[key] = [] | ||
|
||
self.result['out_paths'].append(target_path) | ||
for key, val in match_dict.iteritems(): | ||
self.result[key].append(val) | ||
|
||
def _run_interface(self, runtime): | ||
#Prepare some of the inputs | ||
if isinstance(self.inputs.root_paths, str): | ||
|
@@ -655,56 +775,49 @@ def _run_interface(self, runtime): | |
self.ignore_regexes = [] | ||
else: | ||
self.ignore_regexes = \ | ||
[re.compile(regex) | ||
[re.compile(regex) | ||
for regex in self.inputs.ignore_regexes] | ||
|
||
self.result = None | ||
for root_path in self.inputs.root_paths: | ||
#Handle tilda/env variables and remove extra seperators | ||
root_path = os.path.normpath(os.path.expandvars(os.path.expanduser(root_path))) | ||
|
||
#Check if the root_path is a file | ||
if os.path.isfile(root_path): | ||
if min_depth == 0: | ||
self._match_path(root_path) | ||
continue | ||
|
||
#Walk through directory structure checking paths | ||
#Walk through directory structure checking paths | ||
for curr_dir, sub_dirs, files in os.walk(root_path): | ||
#Determine the current depth from the root_path | ||
curr_depth = (curr_dir.count(os.sep) - | ||
curr_depth = (curr_dir.count(os.sep) - | ||
root_path.count(os.sep)) | ||
|
||
#If the max path depth has been reached, clear sub_dirs | ||
#If the max path depth has been reached, clear sub_dirs | ||
#and files | ||
if (not max_depth is None and | ||
curr_depth >= max_depth): | ||
if not max_depth is not None and curr_depth >= max_depth: | ||
sub_dirs[:] = [] | ||
files = [] | ||
|
||
#Test the path for the curr_dir and all files | ||
if curr_depth >= min_depth: | ||
self._match_path(curr_dir) | ||
if curr_depth >= (min_depth - 1): | ||
for infile in files: | ||
full_path = os.path.join(curr_dir, infile) | ||
self._match_path(full_path) | ||
|
||
if (self.inputs.unpack_single and | ||
if (self.inputs.unpack_single and | ||
len(self.result['out_paths']) == 1 | ||
): | ||
): | ||
for key, vals in self.result.iteritems(): | ||
self.result[key] = vals[0] | ||
|
||
if not self.result: | ||
raise RuntimeError("Regular expression did not match any files!") | ||
return runtime | ||
|
||
def _list_outputs(self): | ||
outputs = self._outputs().get() | ||
outputs.update(self.result) | ||
return outputs | ||
|
||
|
||
class FSSourceInputSpec(BaseInterfaceInputSpec): | ||
subjects_dir = Directory(mandatory=True, | ||
desc='Freesurfer subjects directory.') | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
could you give another example of how one might use this to select specific files?
and then another example where those numbers are provided as an input.