Skip to content

Improved alternate DataGrabber interface #623

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 20 commits into from
Aug 10, 2013
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGES
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
Next release
============

* ENH: SelectFiles: a streamlined version of DataGrabber
* ENH: New interfaces: spm.ResliceToReference

* FIX: Deals properly with 3d files in SPM Realign
Expand Down
2 changes: 1 addition & 1 deletion nipype/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

from pipeline import Node, MapNode, Workflow
from interfaces import (fsl, spm, freesurfer, afni, ants, slicer, dipy, nipy,
mrtrix, camino, DataGrabber, DataSink,
mrtrix, camino, DataGrabber, DataSink, SelectFiles,
IdentityInterface, Rename, Function, Select, Merge)


Expand Down
2 changes: 1 addition & 1 deletion nipype/interfaces/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,6 @@
"""
__docformat__ = 'restructuredtext'

from io import DataGrabber, DataSink
from io import DataGrabber, DataSink, SelectFiles
from utility import IdentityInterface, Rename, Function, Select, Merge
import fsl, spm, freesurfer, afni, ants, slicer, dipy, nipy, mrtrix, camino
193 changes: 153 additions & 40 deletions nipype/interfaces/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,9 @@

"""
import glob
import string
import os
import os.path as op
import shutil
import re
import tempfile
Expand Down Expand Up @@ -528,9 +530,9 @@ def _list_outputs(self):
filledtemplate = template
if argtuple:
try:
filledtemplate = template%tuple(argtuple)
filledtemplate = template % tuple(argtuple)
except TypeError as e:
raise TypeError(e.message + ": Template %s failed to convert with args %s"%(template, str(tuple(argtuple))))
raise TypeError(e.message + ": Template %s failed to convert with args %s" % (template, str(tuple(argtuple))))
outfiles = glob.glob(filledtemplate)
if len(outfiles) == 0:
msg = 'Output key: %s Template: %s returned no files' % (key, filledtemplate)
Expand All @@ -551,18 +553,146 @@ def _list_outputs(self):
outputs[key] = outputs[key][0]
return outputs

class DataFinderInputSpec(DynamicTraitedSpec, BaseInterfaceInputSpec):

class SelectFilesInputSpec(DynamicTraitedSpec, BaseInterfaceInputSpec):

base_directory = Directory(exists=True,
desc="Root path common to templates.")
sort_filelist = traits.Bool(True, usedefault=True,
desc="When matching mutliple files, return them in sorted order.")
raise_on_empty = traits.Bool(True, usedefault=True,
desc="Raise an exception if a template pattern matches no files.")
force_lists = traits.Bool(False, usedefault=True,
desc="Return all values as lists even when matching a single file.")


class SelectFiles(IOBase):
"""Flexibly collect data from disk to feed into workflows.

This interface uses the {}-based string formatting syntax to plug
values (possibly known only at workflow execution time) into string
templates and collect files from persistant storage. These templates
can also be combined with glob wildcards. The field names in the
formatting template (i.e. the terms in braces) will become inputs
fields on the interface, and the keys in the templates dictionary
will form the output fields.

Examples
--------

>>> from nipype import SelectFiles, Node
>>> templates={"T1": "{subject_id}/struct/T1.nii",
... "epi": "{subject_id}/func/f[0, 1].nii"}
>>> dg = Node(SelectFiles(templates), "selectfiles")
>>> dg.inputs.subject_id = "subj1"
>>> dg.outputs.get()
{'T1': <undefined>, 'epi': <undefined>}

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

could you give another example of how one might use this to select specific files?

"epi": "{subject_id}/func/f[3,5].nii"

and then another example where those numbers are provided as an input.

The same thing with dynamic grabbing of specific files:

>>> templates["epi"] = "{subject_id}/func/f{run!s}.nii"
>>> dg = Node(SelectFiles(templates), "selectfiles")
>>> dg.inputs.subject_id = "subj1"
>>> dg.inputs.run = [2, 4]

"""
input_spec = SelectFilesInputSpec
output_spec = DynamicTraitedSpec
_always_run = True

def __init__(self, templates, **kwargs):
"""Create an instance with specific input fields.

Parameters
----------
templates : dictionary
Mapping from string keys to string template values.
The keys become output fields on the interface.
The templates should use {}-formatting syntax, where
the names in curly braces become inputs fields on the interface.
Format strings can also use glob wildcards to match multiple
files. At runtime, the values of the interface inputs will be
plugged into these templates, and the resulting strings will be
used to select files.

"""
super(SelectFiles, self).__init__(**kwargs)

# Infer the infields and outfields from the template
infields = []
for name, template in templates.iteritems():
for _, field_name, _, _ in string.Formatter().parse(template):
if field_name is not None and field_name not in infields:
infields.append(field_name)

self._infields = infields
self._outfields = list(templates)
self._templates = templates

# Add the dynamic input fields
undefined_traits = {}
for field in infields:
self.inputs.add_trait(field, traits.Any)
undefined_traits[field] = Undefined
self.inputs.trait_set(trait_change_notify=False, **undefined_traits)

def _add_output_traits(self, base):
"""Add the dynamic output fields"""
return add_traits(base, self._templates.keys())

def _list_outputs(self):
"""Find the files and expose them as interface outputs."""
outputs = {}
info = dict([(k, v) for k, v in self.inputs.__dict__.items()
if k in self._infields])

for field, template in self._templates.iteritems():

# Build the full template path
if isdefined(self.inputs.base_directory):
template = op.abspath(op.join(
self.inputs.base_directory, template))
else:
template = op.abspath(template)

# Fill in the template and glob for files
filled_template = template.format(**info)
filelist = glob.glob(filled_template)

# Handle the case where nothing matched
if not filelist:
msg = "No files were found matching %s template: %s" % (
field, template)
if self.inputs.raise_on_empty:
raise IOError(msg)
else:
warn(msg)

# Possibly sort the list
if self.inputs.sort_filelist:
filelist.sort()

# Handle whether this must be a list or not
if not self.inputs.force_lists:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should force_list be output specific? or should people to multiple SelectFiles?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I assume you mean force_lists be a {field, bool} mapping? That seems to complicate things in the service of what is probably a rare need. Because SelectFiles is lightweight, in cases where you want to be differently particular about what results from the grab, it's probably better to define multiple interfaces.

filelist = list_to_filename(filelist)

outputs[field] = filelist

return outputs


class DataFinderInputSpec(DynamicTraitedSpec, BaseInterfaceInputSpec):
root_paths = traits.Either(traits.List(),
traits.Str(),
mandatory=True,)
match_regex = traits.Str('(.+)',
match_regex = traits.Str('(.+)',
usedefault=True,
desc=("Regular expression for matching "
"paths."))
ignore_regexes = traits.List(desc=("List of regular expressions, "
"if any match the path it will be "
"ignored.")
)
)
max_depth = traits.Int(desc="The maximum depth to search beneath "
"the root_paths")
min_depth = traits.Int(desc="The minimum depth to search beneath "
Expand All @@ -573,23 +703,19 @@ class DataFinderInputSpec(DynamicTraitedSpec, BaseInterfaceInputSpec):


class DataFinder(IOBase):
"""Search for paths that match a given regular expression. Allows a less
"""Search for paths that match a given regular expression. Allows a less
proscriptive approach to gathering input files compared to DataGrabber.
Will recursively search any subdirectories by default. This can be limited
with the min/max depth options.

Matched paths are available in the output 'out_paths'. Any named groups of
captured text from the regular expression are also available as ouputs of
Will recursively search any subdirectories by default. This can be limited
with the min/max depth options.
Matched paths are available in the output 'out_paths'. Any named groups of
captured text from the regular expression are also available as ouputs of
the same name.

Examples
--------

>>> from nipype.interfaces.io import DataFinder

Look for Nifti files in directories with "ep2d_fid" or "qT1" in the name,
Look for Nifti files in directories with "ep2d_fid" or "qT1" in the name,
starting in the current directory.

>>> df = DataFinder()
>>> df.inputs.root_paths = '.'
>>> df.inputs.match_regex = '.+/(?P<series_dir>.+(qT1|ep2d_fid_T1).+)/(?P<basename>.+)\.nii.gz'
Expand All @@ -599,45 +725,39 @@ class DataFinder(IOBase):
'./018-ep2d_fid_T1_Gd2/acquisition.nii.gz',
'./016-ep2d_fid_T1_Gd1/acquisition.nii.gz',
'./013-ep2d_fid_T1_pre/acquisition.nii.gz']

>>> print result.outputs.series_dir # doctest: +SKIP
['027-ep2d_fid_T1_Gd4',
'018-ep2d_fid_T1_Gd2',
'016-ep2d_fid_T1_Gd1',
'013-ep2d_fid_T1_pre']

>>> print result.outputs.basename # doctest: +SKIP
['acquisition',
'acquisition',
'acquisition',
'acquisition']

"""

input_spec = DataFinderInputSpec
output_spec = DynamicTraitedSpec
_always_run = True

def _match_path(self, target_path):
#Check if we should ignore the path
for ignore_re in self.ignore_regexes:
if ignore_re.search(target_path):
return

#Check if we can match the path
match = self.match_regex.search(target_path)
if not match is None:
match_dict = match.groupdict()

if self.result is None:
self.result = {'out_paths' : []}
self.result = {'out_paths': []}
for key in match_dict.keys():
self.result[key] = []

self.result['out_paths'].append(target_path)
for key, val in match_dict.iteritems():
self.result[key].append(val)

def _run_interface(self, runtime):
#Prepare some of the inputs
if isinstance(self.inputs.root_paths, str):
Expand All @@ -655,56 +775,49 @@ def _run_interface(self, runtime):
self.ignore_regexes = []
else:
self.ignore_regexes = \
[re.compile(regex)
[re.compile(regex)
for regex in self.inputs.ignore_regexes]

self.result = None
for root_path in self.inputs.root_paths:
#Handle tilda/env variables and remove extra seperators
root_path = os.path.normpath(os.path.expandvars(os.path.expanduser(root_path)))

#Check if the root_path is a file
if os.path.isfile(root_path):
if min_depth == 0:
self._match_path(root_path)
continue

#Walk through directory structure checking paths
#Walk through directory structure checking paths
for curr_dir, sub_dirs, files in os.walk(root_path):
#Determine the current depth from the root_path
curr_depth = (curr_dir.count(os.sep) -
curr_depth = (curr_dir.count(os.sep) -
root_path.count(os.sep))

#If the max path depth has been reached, clear sub_dirs
#If the max path depth has been reached, clear sub_dirs
#and files
if (not max_depth is None and
curr_depth >= max_depth):
if not max_depth is not None and curr_depth >= max_depth:
sub_dirs[:] = []
files = []

#Test the path for the curr_dir and all files
if curr_depth >= min_depth:
self._match_path(curr_dir)
if curr_depth >= (min_depth - 1):
for infile in files:
full_path = os.path.join(curr_dir, infile)
self._match_path(full_path)

if (self.inputs.unpack_single and
if (self.inputs.unpack_single and
len(self.result['out_paths']) == 1
):
):
for key, vals in self.result.iteritems():
self.result[key] = vals[0]

if not self.result:
raise RuntimeError("Regular expression did not match any files!")
return runtime

def _list_outputs(self):
outputs = self._outputs().get()
outputs.update(self.result)
return outputs


class FSSourceInputSpec(BaseInterfaceInputSpec):
subjects_dir = Directory(mandatory=True,
desc='Freesurfer subjects directory.')
Expand Down
Loading