Source code for teneto.classes.bids

"""TenetoBIDS is a class to use Teneto functions with data organized with BIDS (neuroimaging data)."""
import os
import inspect
import json
# import bids
import importlib
import numpy as np
import pandas as pd
import bids
from .. import __path__ as tenetopath
from .. import __version__ as tenetoversion
from ..neuroimagingtools import load_tabular_file, get_sidecar
#from .network import TemporalNetwork

[docs] class TenetoBIDS: """ Class for analysing data in BIDS. TenetoBIDS allows for an analysis to be performed across a dataset. All different functions from Teneto can be applied to all files in a dataset organized in BIDS. Data should be first preprocessed (e.g. fMRIPrep). Parameters ---------- bids_dir : str string to BIDS directory selected_pipeline : str or dict the directory that is in the bids_dir/derivatives/<selected_pipeline>/. This fine will be used as the input to any teneto function (first argument). If multiple inputs are required for a function, then you can specify: {'netin': 'tvc', 'communities': 'coms'} With this, the input for netin with be from bids_dir/derivatives/[teneto-]tvc/, and the input for communities will be from bids_dir/derivatives/[teneto-]coms/. The keys in this dictionary must match the names of the teneto funciton inputs. bids_filter : dict history : bool update_pipeline : bool If true, the output_pipeline becomes the new selected_pipeline exist_ok : bool If False, will raise an error if the output directory already exist_ok. If True, will not raise an error. This can lead to files being overwritten, if desc is not set. nettsv : str can be nn-t or ijt. nn-t means networks are node-node x time. ijt means daframs are ijt columns. """ def __init__(self, bids_dir, selected_pipeline, bids_filter=None, bidsvalidator=False, update_pipeline=True, history=None, exist_ok=False, layout=None, nettsv='nn-t'): if layout is None: self.BIDSLayout = bids.BIDSLayout(bids_dir, validate=bidsvalidator) self.BIDSLayout.add_derivatives(bids_dir) else: self.BIDSLayout = layout self.bids_dir = bids_dir self.selected_pipeline = selected_pipeline self.nettsv = nettsv self.bids_filter = {} if bids_filter is None else bids_filter if history is not None: self.history = {} self.exist_ok = exist_ok self.update_pipeline = update_pipeline with open(tenetopath[0] + '/config/tenetobids/tenetobids_description.json') as f: self.tenetobids_description = json.load(f) self.tenetobids_description['PipelineDescription']['Version'] = tenetoversion with open(tenetopath[0] + '/config/tenetobids/tenetobids_structure.json') as f: self.tenetobids_structure = json.load(f) # def set_selected_pipeline(self, selected_pipeline): # bids.
[docs] def update_bids_layout(self): """ Function that upddates to new bids l """ self.BIDSLayout = bids.BIDSLayout(self.bids_dir, derivatives=True)
[docs] def create_output_pipeline(self, runc_func, output_pipeline_name, exist_ok=None): """Creates the directories of the saved file. Parameters ---------- output_pipeline : str name of output pipeline exist_ok : bool If False, will raise error if pipeline already exist_ok. If True, will not raise an error. This can lead to files being overwritten, if desc is not set. If None, will use the exist_ok set during init. Returns ------- Creates the output pipeline directory in: bids_dir/teneto-[output_pipeline]/ """ if exist_ok is not None: self.exist_ok = exist_ok output_pipeline = 'teneto-' output_pipeline += runc_func.split('.')[-1] output_pipeline = output_pipeline.replace('_', '-') if output_pipeline_name is not None: output_pipeline += '_' + output_pipeline_name output_pipeline_path = self.bids_dir + '/derivatives/' + output_pipeline if os.path.exists(output_pipeline_path) and not self.exist_ok: raise ValueError( 'Output_pipeline already exists. Set exist_ok to True if this is desired behaviour.') os.makedirs(output_pipeline_path, exist_ok=self.exist_ok) # Initiate with dataset_description datainfo = self.tenetobids_description.copy() datainfo['PipelineDescription']['Name'] = output_pipeline with open(output_pipeline_path + '/dataset_description.json', 'w') as fs: json.dump(datainfo, fs) self.update_bids_layout() return output_pipeline
[docs] def run(self, run_func, input_params, output_desc=None, output_pipeline_name=None, bids_filter=None, update_pipeline=True, exist_ok=None, troubleshoot=False): """Runs a runction on the selected files. Parameters --------------- run_func : str str should correspond to a teneto function. So to run the funciton teneto.timeseries.derive_temporalnetwork the input should be: 'timeseries.derive_temporalnetwork' input_params : dict keyword and value pairing of arguments for the function being run. The input data to each function will be located automatically. This input_params does not need to include the input network. For any other input that needs to be loaded loaded within the teneto_bidsstructure (communities, events, confounds), you can pass the value "bids" if they can be found within the current selected_pipeline. If they are found within a different selected_pipeline, type "bids_[selected_pipeline]". output_desc : str If none, no desc is used (removed any previous file) If 'keep', then desc is preserved. If any other str, desc is set to that string output_pipeline_name : str If set, then the data is saved in teneto_[functionname]_[output_pipeline_name]. If run_func is teneto.timeseries.derive_temporalnetwork and output_pipeline_name is jackknife then then the pipeline the data is saved in is teneto-generatetemporalnetwork_jackknife update_pipeline : bool If set to True (default), then the selected_pipeline updates to output of function exist_ok : bool If set to True, then overwrites direcotry is possible. troubleshoot : bool If True, prints out certain information during running. Useful to run if reporting a bug. """ if exist_ok is not None: self.exist_ok = exist_ok # Import teneto if it has not been already if 'teneto' not in globals(): teneto = importlib.import_module('teneto') func = teneto for f in self.tenetobids_structure[run_func]['module'].split('.'): func = getattr(func, f) functype = self.tenetobids_structure[run_func]['functype'] func = getattr(func, run_func) # Only set up an output pipeline if the functype is ondata if functype == 'on_data': output_pipeline = self.create_output_pipeline( run_func, output_pipeline_name, self.exist_ok) input_files = self.get_selected_files(run_func.split('.')[-1]) if not input_files: raise ValueError('No input files') if troubleshoot: self.troubleshoot('Initial input files', {'input_files': input_files}) # Check number of required arguments for the function funcparams, get_confounds = self._check_run_function_args(func, input_params, functype) good_files = bad_files = 0 for f in input_files: f_entities = f.get_entities() if get_confounds == 1: input_params['confounds'] = self.get_aux_file(f, filetype='confounds') data, sidecar = self.load_file(f) if troubleshoot: self.troubleshoot('Input file name', {'f': f, 'f_entities': f_entities, 'sidecar': sidecar}) if 'sidecar' in dict(funcparams): input_params['sidecar'] = sidecar if data is None: # Skip if data not found bad_files += 1 else: if functype == 'on_data': result = func(data, **input_params) # if sidecar is in input_params, then sidecar is also returned if 'sidecar' in dict(funcparams): result, sidecar = result # if output_desc is None, then keep desc if output_desc is None and 'desc' in f_entities: f_entities.pop('desc') elif output_desc == 'keep': pass elif output_desc is not None: f_entities['desc'] = output_desc f_entities.update( self.tenetobids_structure[run_func.split('.')[-1]]['output']) output_pattern = '/sub-{subject}/[ses-{session}/]func/sub-{subject}[_ses-{ses}][_run-{run}]_task-{task}[_desc-{desc}]_{suffix}.{extension}' save_name = self.BIDSLayout.build_path( f_entities, path_patterns=output_pattern, validate=False) save_path = self.bids_dir + '/derivatives/' + output_pipeline if troubleshoot: self.troubleshoot('File name consruction', {'f_entities': f_entities, 'save_name': save_name, 'save_path': save_path}) # Exist ok here has to be true, otherwise multiple runs causes an error # Any exist_ok is caught in create pipeline. os.makedirs( '/'.join((save_path + save_name).split('/')[:-1]), exist_ok=True) # Save file # Probably should check the output type in tenetobidsstructure # Table needs column header if isinstance(result, np.ndarray): if len(result.shape) == 3: # Should be made hdf5 at sometime # Idea here is to make 3D array to 2D by concatenating node dimensions. # At reload: to ([np.sqrt(shape[0]), np.sqrt(shape[0]), np.sqrt(shape[1])]) shape = result.shape result = result.reshape([shape[0] * shape[1], shape[2]]) result = pd.DataFrame(result) elif len(result.shape) == 2: result = pd.DataFrame(result) elif len(result.shape) == 1: result = pd.Series(result) else: raise ValueError( 'Output was array with more than 3 dimensions (unexpected)') elif isinstance(result, list): result = pd.DataFrame(result) elif isinstance(result, (int, float)): result = pd.Series(result) if isinstance(result, (pd.DataFrame, pd.Series)): result.to_csv(save_path + save_name, sep='\t', header=True) else: raise ValueError('Unexpected output type') # add information to sidecar sidecar['DerivativeSource'] = f.path sidecar['TenetoFunction'] = {} sidecar['TenetoFunction']['Name'] = run_func # For aux_input more is needed here too. if get_confounds == 1: input_params['confounds'] = 'Loaded automatically via TenetoBIDS' elif 'confounds' in input_params: input_params['confounds'] = 'Passed as argument' if 'sidecar' in input_params: input_params['sidecar'] = 'Loaded automatically via TenetoBIDS' # Loop through input params content and make any nparray input to list for sidecar sidecar['TenetoFunction']['Parameters'] = {} for key, value in input_params.items(): if teneto.utils.is_jsonable(value): sidecar['TenetoFunction']['Parameters'][key] = input_params[key] else: if isinstance(input_params[key], np.ndarray): sidecar['TenetoFunction']['Parameters'][key] = input_params[key].tolist() else: print('Warning: Dropping input (' + key + ') from sidecar (not JSONable).') elif functype == 'on_sidecar': sidecar = func(**input_params) update_pipeline = False save_path = f.dirname + '/' save_name = f.filename # Save sidecar with open(save_path + save_name.replace('.tsv', '.json'), 'w') as f: json.dump(sidecar, f) good_files += 1 report = '## ' + run_func + '\n' report += str(good_files) + ' files were included (' + \ str(bad_files) + ' excluded from run)' self.report = report if update_pipeline: if functype == 'on_data': self.selected_pipeline = output_pipeline # Create new bids_filter dictionary that only contains # sub/ses/run/task as other tags are dropped. bids_filter = dict(self.bids_filter) self.bids_filter = {} bids_filters_allowed = ['subject', 'ses', 'run', 'task'] [self.update_bids_filter({f: bids_filter[f]}) for f in bids_filters_allowed if f in bids_filter] self.update_bids_layout()
def _check_run_function_args(self, func, input_params, functype): """ Helper function for TenetoBIDS.run. Function checks that the input parametes match the function. Returns ======== funcparams : dict parameters of the input function get_confounds : bool 1 if confound files need to be loaded. """ sig = inspect.signature(func) funcparams = sig.parameters.items() required_args = 0 input_args = 0 for p_name, p in funcparams: if p.default == inspect._empty: required_args += 1 if p_name in input_params: input_args += 1 get_confounds = 0 expected_arg_defecit = 1 if 'sidecar' in dict(funcparams) and functype == 'on_data': expected_arg_defecit += 1 # Calculate the different betwee n required and input arguments arg_diff = required_args - input_args if arg_diff != expected_arg_defecit: # Three conditoinals to be met in order to get confounds confounds_not_input = 'confounds' not in input_params confounds_in_func = 'confounds' in dict(funcparams) arg_needed = arg_diff == expected_arg_defecit + 1 if confounds_not_input and confounds_in_func and arg_needed: # Get confounds automatically get_confounds = 1 else: raise ValueError( 'Expecting one unspecified input argument.\ Enter all required input arguments in input_params except for the data files.') return funcparams, get_confounds
[docs] def get_selected_files(self, output=None): """ Uses information in selected_pipeline and the bids layout and shows the files that will be processed when calling TenetoBIDS.run(). If you specify a particular output, it will tell you which files will get selected for that output """ if output is not None: filters = self.tenetobids_structure[output]['input'] else: # input can only be these files filters = {'extension': ['.tsv', '.nii', '.nii.gz']} # Add predefined filters to the check filters.update(self.bids_filter) return self.BIDSLayout.derivatives[self.selected_pipeline].get(**filters)
[docs] def get_run_options(self, for_selected=True): """Returns the different function names that can be called using TenetoBIDS.run() Parameters =========== for_selected : bool If True, only return run options for the selected files. If False, returns all options. Returns ======== options : str a list of options that can be run. """ funcs = self.tenetobids_structure.keys() if for_selected: funcs_filter = [] files = self.get_selected_files() suffix = [f.get_entities()['suffix'] for f in files] suffix = list(np.unique(suffix)) for t in list(funcs): s = self.tenetobids_structure[t]['input']['suffix'] if isinstance(s, str): s = [s] for su in suffix: if su in s: funcs_filter.append(t) funcs = sorted(list(set(funcs_filter))) return ', '.join(funcs)
[docs] def update_bids_filter(self, filter_addons): """Updates TenetoBIDS.bids_filter Parameters ========== filter_addons : dict dictionary that updates TenetoBIDS.bids_filter """ self.bids_filter.update(filter_addons)
[docs] def get_aux_file(self, bidsfile, filetype='confounds'): """Tries to automatically get auxiliary data for input files, and loads it Paramters ========== bidsfile : BIDSDataFile or BIDSImageFile The BIDS file that the confound file is gong to be matched. filetype : string Can be confounds, events. Specified if you want to get the confound or events data. """ if filetype == 'confounds': suffix = 'timeseries' desc = 'confounds' derivative = 'fMRIPrep' elif filetype == 'events': suffix = 'events' derivative = self.selected_pipeline else: raise ValueError('unknown file type') # Get the entities of the filename print(bidsfile) file_entities = bidsfile.get_entities() # Ensure that the extension and suffix are correct file_entities['suffix'] = suffix file_entities['extension'] = '.tsv' if 'desc' in file_entities: file_entities.pop('desc') if filetype == 'confounds': if 'space' in file_entities: file_entities.pop('space') if 'atlas' in file_entities: file_entities.pop('atlas') file_entities['desc'] = desc print(file_entities) auxfile = self.BIDSLayout.derivatives[derivative].get(**file_entities) if len(auxfile) == 0: raise ValueError('Non auxiliary file (type: ' + filetype + ') found') elif len(auxfile) > 1: raise ValueError('More than one auxiliary file (type: ' + filetype + ') found') # Load the aux file aux = load_tabular_file( auxfile[0].dirname + '/' + auxfile[0].filename, index_col=False) return aux
[docs] def load_data(self, bids_filter=None): """Returns data, default is the input data. bids_filter : dict default is None. If set, load data will load all files found by the bids_filter. Any preset BIDS filter is used as well, but will get overwritten by this input. """ if bids_filter is None: files = self.get_selected_files() else: filters = dict(self.bids_filter) filters.update(bids_filter) files = self.BIDSLayout.derivatives[self.selected_pipeline].get(**filters) data = {} for f in files: if f.filename in data: raise ValueError('Same name appears twice in selected files') data[f.filename], _ = self.load_file(f) return data
[docs] def load_file(self, bidsfile): """Aux function to load the data and sidecar from a BIDSFile Paramters ========== bidsfile : BIDSDataFile or BIDSImageFile The BIDS file that the confound file is gong to be matched. """ # Get sidecar and see if file has been rejected at a previous step # (note sidecar could be called in input_files, but this will require loading sidecar twice) sidecar = get_sidecar(bidsfile.dirname + '/' + bidsfile.filename) if not sidecar['BadFile']: if hasattr(bidsfile, 'get_image'): data = bidsfile.get_image() elif hasattr(bidsfile, 'get_df'): # This can be changed if/when pybids is updated. Assumes index_col=0 in tsv file data = load_tabular_file( bidsfile.dirname + '/' + bidsfile.filename) else: data = None # Since temporal networks are currently saved in 2D collapsed arrays # The following checks if they should be resized, and resizes if '_temporalconnectivity.tsv' in bidsfile.filename: dimord = sidecar['TenetoFunction']['Parameters']['params']['dimord'] if (self.nettsv == 'nn-t' or dimord == 'node,time'): n_nodes = int(np.sqrt(data.shape[0])) n_time = data.shape[1] data = data.values.reshape([n_nodes, n_nodes, n_time]) print(data.shape) return data, sidecar
[docs] def troubleshoot(self, stepname, status): """ Prints ongoing info to assist with troubleshooting """ print('******** TROUBLESHOOT STEP: ' + stepname + ', start ********') for step in status: print('++++++++') print(step) print('------') print(status[step]) print('++++++++') print('******** TROUBLESHOOT STEP: ' + stepname + ', end ********')
[docs] def load_events(self): """ Loads event data for selected files """ input_files = self.get_selected_files() events = {} for f in input_files: events[f.filename] = self.get_aux_file(f, filetype='events') return events