Source code for teneto.classes.bids

"""TenetoBIDS is a class to use Teneto functions with data organized with BIDS (neuroimaging data)."""
import os
import inspect
import json
# import bids
import importlib
import numpy as np
import pandas as pd
import bids
from .. import __path__ as tenetopath
from .. import __version__ as tenetoversion
from ..neuroimagingtools import load_tabular_file, get_sidecar
#from .network import TemporalNetwork


[docs]
class TenetoBIDS:
    """
    Class for analysing data in BIDS.

    TenetoBIDS allows for an analysis to be performed across a dataset.
    All different functions from Teneto can be applied to all files in a dataset organized in BIDS.
    Data should be first preprocessed (e.g. fMRIPrep).

    Parameters
    ----------

    bids_dir : str
        string to BIDS directory
    selected_pipeline : str or dict
        the directory that is in the bids_dir/derivatives/<selected_pipeline>/.
        This fine will be used as the input to any teneto function (first argument).
        If multiple inputs are required for a function, then you can specify:
            {'netin': 'tvc',
            'communities': 'coms'}
        With this, the input for netin with be from bids_dir/derivatives/[teneto-]tvc/,
        and the input for communities will be from bids_dir/derivatives/[teneto-]coms/.
        The keys in this dictionary must match the names of the teneto funciton inputs.

    bids_filter : dict
    history : bool
    update_pipeline : bool
        If true, the output_pipeline becomes the new selected_pipeline
    exist_ok : bool
        If False, will raise an error if the output directory already exist_ok.
        If True, will not raise an error.
        This can lead to files being overwritten, if desc is not set.
    nettsv : str can be nn-t or ijt.
        nn-t means networks are node-node x time.
        ijt means daframs are ijt columns.
    """

    def __init__(self, bids_dir, selected_pipeline, bids_filter=None, bidsvalidator=False,
                 update_pipeline=True, history=None, exist_ok=False, layout=None, nettsv='nn-t'):

        if layout is None:
            self.BIDSLayout = bids.BIDSLayout(bids_dir, validate=bidsvalidator)
            self.BIDSLayout.add_derivatives(bids_dir)
        else:
            self.BIDSLayout = layout
        self.bids_dir = bids_dir
        self.selected_pipeline = selected_pipeline
        self.nettsv = nettsv
        self.bids_filter = {} if bids_filter is None else bids_filter
        if history is not None:
            self.history = {}
        self.exist_ok = exist_ok
        self.update_pipeline = update_pipeline

        with open(tenetopath[0] + '/config/tenetobids/tenetobids_description.json') as f:
            self.tenetobids_description = json.load(f)
        self.tenetobids_description['PipelineDescription']['Version'] = tenetoversion

        with open(tenetopath[0] + '/config/tenetobids/tenetobids_structure.json') as f:
            self.tenetobids_structure = json.load(f)

    # def set_selected_pipeline(self, selected_pipeline):
    #    bids.


[docs]
    def update_bids_layout(self):
        """
        Function that upddates to new bids l
        """
        self.BIDSLayout = bids.BIDSLayout(self.bids_dir, derivatives=True)



[docs]
    def create_output_pipeline(self, runc_func, output_pipeline_name, exist_ok=None):
        """Creates the directories of the saved file.

        Parameters
        ----------
        output_pipeline : str
            name of output pipeline
        exist_ok : bool
            If False, will raise error if pipeline already exist_ok.
            If True, will not raise an error.
            This can lead to files being overwritten, if desc is not set.
            If None, will use the exist_ok set during init.

        Returns
        -------
        Creates the output pipeline directory in:
            bids_dir/teneto-[output_pipeline]/

        """
        if exist_ok is not None:
            self.exist_ok = exist_ok
        output_pipeline = 'teneto-'
        output_pipeline += runc_func.split('.')[-1]
        output_pipeline = output_pipeline.replace('_', '-')
        if output_pipeline_name is not None:
            output_pipeline += '_' + output_pipeline_name
        output_pipeline_path = self.bids_dir + '/derivatives/' + output_pipeline
        if os.path.exists(output_pipeline_path) and not self.exist_ok:
            raise ValueError(
                'Output_pipeline already exists. Set exist_ok to True if this is desired behaviour.')
        os.makedirs(output_pipeline_path, exist_ok=self.exist_ok)
        # Initiate with dataset_description
        datainfo = self.tenetobids_description.copy()
        datainfo['PipelineDescription']['Name'] = output_pipeline
        with open(output_pipeline_path + '/dataset_description.json', 'w') as fs:
            json.dump(datainfo, fs)
        self.update_bids_layout()
        return output_pipeline



[docs]
    def run(self, run_func, input_params, output_desc=None, output_pipeline_name=None, bids_filter=None, update_pipeline=True, exist_ok=None, troubleshoot=False):
        """Runs a runction on the selected files.

        Parameters
        ---------------
        run_func : str
            str should correspond to a teneto function.
            So to run the funciton teneto.timeseries.derive_temporalnetwork
            the input should be: 'timeseries.derive_temporalnetwork'
        input_params : dict
            keyword and value pairing of arguments for the function being run.
            The input data to each function will be located automatically.
            This input_params does not need to include the input network.
            For any other input that needs to be loaded loaded within the teneto_bidsstructure
            (communities, events, confounds),
            you can pass the value "bids" if they can be found within the current selected_pipeline.
            If they are found within a different selected_pipeline, type "bids_[selected_pipeline]".
        output_desc : str
            If none, no desc is used (removed any previous file)
            If 'keep', then desc is preserved.
            If any other str, desc is set to that string
        output_pipeline_name : str
            If set, then the data is saved in teneto_[functionname]_[output_pipeline_name].
            If run_func is teneto.timeseries.derive_temporalnetwork and output_pipeline_name
            is jackknife then then the pipeline the data is saved in is
            teneto-generatetemporalnetwork_jackknife
        update_pipeline : bool
            If set to True (default), then the selected_pipeline updates to output of function
        exist_ok : bool
            If set to True, then overwrites direcotry is possible.
        troubleshoot : bool 
            If True, prints out certain information during running.
            Useful to run if reporting a bug.
        """
        if exist_ok is not None:
            self.exist_ok = exist_ok
        # Import teneto if it has not been already
        if 'teneto' not in globals():
            teneto = importlib.import_module('teneto')
        func = teneto
        for f in self.tenetobids_structure[run_func]['module'].split('.'):
            func = getattr(func, f)
        functype = self.tenetobids_structure[run_func]['functype']
        func = getattr(func, run_func)

        # Only set up an output pipeline if the functype is ondata
        if functype == 'on_data':
            output_pipeline = self.create_output_pipeline(
                run_func, output_pipeline_name, self.exist_ok)

        input_files = self.get_selected_files(run_func.split('.')[-1])

        if not input_files:
            raise ValueError('No input files')
        if troubleshoot:
            self.troubleshoot('Initial input files', {'input_files': input_files})

        # Check number of required arguments for the function
        funcparams, get_confounds = self._check_run_function_args(func, input_params, functype)

        good_files = bad_files = 0
        for f in input_files:
            f_entities = f.get_entities()
            if get_confounds == 1:
                input_params['confounds'] = self.get_aux_file(f, filetype='confounds')
            data, sidecar = self.load_file(f)
            if troubleshoot:
                self.troubleshoot('Input file name', {'f': f,
                                                    'f_entities': f_entities,
                                                    'sidecar': sidecar})
            if 'sidecar' in dict(funcparams):
                input_params['sidecar'] = sidecar
            if data is None:
                # Skip if data not found
                bad_files += 1
            else:
                if functype == 'on_data':
                    result = func(data, **input_params)
                    # if sidecar is in input_params, then sidecar is also returned
                    if 'sidecar' in dict(funcparams):
                        result, sidecar = result
                        # if output_desc is None, then keep desc
                    if output_desc is None and 'desc' in f_entities:
                        f_entities.pop('desc')
                    elif output_desc == 'keep':
                        pass
                    elif output_desc is not None:
                        f_entities['desc'] = output_desc
                    f_entities.update(
                        self.tenetobids_structure[run_func.split('.')[-1]]['output'])
                    output_pattern = '/sub-{subject}/[ses-{session}/]func/sub-{subject}[_ses-{ses}][_run-{run}]_task-{task}[_desc-{desc}]_{suffix}.{extension}'
                    save_name = self.BIDSLayout.build_path(
                        f_entities, path_patterns=output_pattern, validate=False)
                    save_path = self.bids_dir + '/derivatives/' + output_pipeline
                    if troubleshoot:
                        self.troubleshoot('File name consruction', {'f_entities': f_entities,
                                                                    'save_name': save_name,
                                                                    'save_path': save_path})

                    # Exist ok here has to be true, otherwise multiple runs causes an error
                    # Any exist_ok is caught in create pipeline.
                    os.makedirs(
                        '/'.join((save_path + save_name).split('/')[:-1]), exist_ok=True)
                    # Save file
                    # Probably should check the output type in tenetobidsstructure
                    # Table needs column header
                    if isinstance(result, np.ndarray):
                        if len(result.shape) == 3:
                            # Should be made hdf5 at sometime
                            # Idea here is to make 3D array to 2D by concatenating node dimensions.
                            # At reload: to ([np.sqrt(shape[0]), np.sqrt(shape[0]), np.sqrt(shape[1])])
                            shape = result.shape
                            result = result.reshape([shape[0] * shape[1], shape[2]])
                            result = pd.DataFrame(result)
                        elif len(result.shape) == 2:
                            result = pd.DataFrame(result)
                        elif len(result.shape) == 1:
                            result = pd.Series(result)
                        else:
                            raise ValueError(
                                'Output was array with more than 3 dimensions (unexpected)')
                    elif isinstance(result, list):
                        result = pd.DataFrame(result)
                    elif isinstance(result, (int, float)):
                        result = pd.Series(result)
                    if isinstance(result, (pd.DataFrame, pd.Series)):
                        result.to_csv(save_path + save_name, sep='\t', header=True)
                    else:
                        raise ValueError('Unexpected output type')
                    # add information to sidecar
                    sidecar['DerivativeSource'] = f.path
                    sidecar['TenetoFunction'] = {}
                    sidecar['TenetoFunction']['Name'] = run_func
                    # For aux_input more is needed here too.
                    if get_confounds == 1:
                        input_params['confounds'] = 'Loaded automatically via TenetoBIDS'
                    elif 'confounds' in input_params:
                        input_params['confounds'] = 'Passed as argument'
                    if 'sidecar' in input_params:
                        input_params['sidecar'] = 'Loaded automatically via TenetoBIDS'
                    # Loop through input params content and make any nparray input to list for sidecar
                    sidecar['TenetoFunction']['Parameters'] = {}
                    for key, value in input_params.items():
                        if teneto.utils.is_jsonable(value):
                            sidecar['TenetoFunction']['Parameters'][key] = input_params[key]
                        else:
                            if isinstance(input_params[key], np.ndarray):
                                sidecar['TenetoFunction']['Parameters'][key] = input_params[key].tolist()
                            else:
                                print('Warning: Dropping input (' + key + ') from sidecar (not JSONable).')
                elif functype == 'on_sidecar':
                    sidecar = func(**input_params)
                    update_pipeline = False
                    save_path = f.dirname + '/'
                    save_name = f.filename
                # Save sidecar
                with open(save_path + save_name.replace('.tsv', '.json'), 'w') as f:
                    json.dump(sidecar, f)
                good_files += 1
        report = '## ' + run_func + '\n'
        report += str(good_files) + ' files were included (' + \
            str(bad_files) + ' excluded from run)'
        self.report = report

        if update_pipeline:
            if functype == 'on_data':
                self.selected_pipeline = output_pipeline
            # Create new bids_filter dictionary that only contains
            # sub/ses/run/task as other tags are dropped.
            bids_filter = dict(self.bids_filter)
            self.bids_filter = {}
            bids_filters_allowed = ['subject', 'ses', 'run', 'task']
            [self.update_bids_filter({f: bids_filter[f]})
            for f in bids_filters_allowed
            if f in bids_filter]

        self.update_bids_layout()


    def _check_run_function_args(self, func, input_params, functype):
        """
        Helper function for TenetoBIDS.run. 
        
        Function checks that the input parametes match the function.

        Returns
        ========
        funcparams : dict 
            parameters of the input function
        get_confounds : bool
            1 if confound files need to be loaded.
        """
        sig = inspect.signature(func)
        funcparams = sig.parameters.items()
        required_args = 0
        input_args = 0
        for p_name, p in funcparams:
            if p.default == inspect._empty:
                required_args += 1
                if p_name in input_params:
                    input_args += 1
        get_confounds = 0
        expected_arg_defecit = 1
        if 'sidecar' in dict(funcparams) and functype == 'on_data':
            expected_arg_defecit += 1
        # Calculate the different betwee n required and input arguments
        arg_diff = required_args - input_args
        if arg_diff != expected_arg_defecit:
            # Three conditoinals to be met in order to get confounds
            confounds_not_input = 'confounds' not in input_params
            confounds_in_func = 'confounds' in  dict(funcparams)
            arg_needed = arg_diff == expected_arg_defecit + 1
            if confounds_not_input and confounds_in_func and arg_needed:
                # Get confounds automatically
                get_confounds = 1
            else:
                raise ValueError(
                    'Expecting one unspecified input argument.\
                    Enter all required input arguments in input_params except for the data files.')
        return funcparams, get_confounds



[docs]
    def get_selected_files(self, output=None):
        """
        Uses information in selected_pipeline and the bids layout and shows the files that will be processed when calling TenetoBIDS.run().

        If you specify a particular output, it will tell you which files will get selected for that output
        """
        if output is not None:
            filters = self.tenetobids_structure[output]['input']
        else:
            # input can only be these files
            filters = {'extension': ['.tsv', '.nii', '.nii.gz']}
        # Add predefined filters to the check
        filters.update(self.bids_filter)
        return self.BIDSLayout.derivatives[self.selected_pipeline].get(**filters)



[docs]
    def get_run_options(self, for_selected=True):
        """Returns the different function names that can be called using TenetoBIDS.run()

        Parameters
        ===========
        for_selected : bool
            If True, only return run options for the selected files.
            If False, returns all options.

        Returns
        ========
        options : str
            a list of options that can be run.
        """
        funcs = self.tenetobids_structure.keys()
        if for_selected:
            funcs_filter = []
            files = self.get_selected_files()
            suffix = [f.get_entities()['suffix'] for f in files]
            suffix = list(np.unique(suffix))
            for t in list(funcs):
                s = self.tenetobids_structure[t]['input']['suffix']
                if isinstance(s, str):
                    s = [s]
                for su in suffix:
                    if su in s:
                        funcs_filter.append(t)
            funcs = sorted(list(set(funcs_filter)))
        return ', '.join(funcs)



[docs]
    def update_bids_filter(self, filter_addons):
        """Updates TenetoBIDS.bids_filter

        Parameters
        ==========
        filter_addons : dict
            dictionary that updates TenetoBIDS.bids_filter
        """
        self.bids_filter.update(filter_addons)



[docs]
    def get_aux_file(self, bidsfile, filetype='confounds'):
        """Tries to automatically get auxiliary data for input files, and loads it

        Paramters
        ==========
        bidsfile : BIDSDataFile or BIDSImageFile
            The BIDS file that the confound file is gong to be matched.
        filetype : string
            Can be confounds, events. 
            Specified if you want to get the confound or events data.
        """
        if filetype == 'confounds':
            suffix = 'timeseries'
            desc = 'confounds'
            derivative = 'fMRIPrep'
        elif filetype == 'events': 
            suffix = 'events'
            derivative = self.selected_pipeline
        else:
            raise ValueError('unknown file type')
        # Get the entities of the filename
        print(bidsfile)
        file_entities = bidsfile.get_entities()
        # Ensure that the extension and suffix are correct
        file_entities['suffix'] = suffix
        file_entities['extension'] = '.tsv'
        if 'desc' in file_entities:
            file_entities.pop('desc')
        if filetype == 'confounds':
            if 'space' in file_entities:
                file_entities.pop('space')
            if 'atlas' in file_entities:
                file_entities.pop('atlas')
            file_entities['desc'] = desc
        print(file_entities)
        auxfile = self.BIDSLayout.derivatives[derivative].get(**file_entities)
        if len(auxfile) == 0:
            raise ValueError('Non auxiliary file (type: ' + filetype + ') found')
        elif len(auxfile) > 1:
            raise ValueError('More than one auxiliary file (type: ' + filetype + ') found')
        # Load the aux file
        aux = load_tabular_file(
            auxfile[0].dirname + '/' + auxfile[0].filename, index_col=False)
        return aux



[docs]
    def load_data(self, bids_filter=None):
        """Returns data, default is the input data.

        bids_filter : dict
            default is None. If set, load data will load all files found by the bids_filter.
            Any preset BIDS filter is used as well, but will get overwritten by this input.
        """
        if bids_filter is None:
            files = self.get_selected_files()
        else:
            filters = dict(self.bids_filter)
            filters.update(bids_filter)
            files = self.BIDSLayout.derivatives[self.selected_pipeline].get(**filters)
        data = {}
        for f in files:
            if f.filename in data:
                raise ValueError('Same name appears twice in selected files')
            data[f.filename], _ = self.load_file(f)
        return data



[docs]
    def load_file(self, bidsfile):
        """Aux function to load the data and sidecar from a BIDSFile

        Paramters
        ==========
        bidsfile : BIDSDataFile or BIDSImageFile
            The BIDS file that the confound file is gong to be matched.

        """
        # Get sidecar and see if file has been rejected at a previous step
        # (note sidecar could be called in input_files, but this will require loading sidecar twice)
        sidecar = get_sidecar(bidsfile.dirname + '/' + bidsfile.filename)
        if not sidecar['BadFile']:
            if hasattr(bidsfile, 'get_image'):
                data = bidsfile.get_image()
            elif hasattr(bidsfile, 'get_df'):
                # This can be changed if/when pybids is updated. Assumes index_col=0 in tsv file
                data = load_tabular_file(
                    bidsfile.dirname + '/' + bidsfile.filename)
        else:
            data = None
        # Since temporal networks are currently saved in 2D collapsed arrays
        # The following checks if they should be resized, and resizes
        if '_temporalconnectivity.tsv' in bidsfile.filename:
            dimord = sidecar['TenetoFunction']['Parameters']['params']['dimord']
            if (self.nettsv == 'nn-t' or dimord == 'node,time'):
                n_nodes = int(np.sqrt(data.shape[0]))
                n_time = data.shape[1]
                data = data.values.reshape([n_nodes, n_nodes, n_time])
                print(data.shape)
        return data, sidecar



[docs]
    def troubleshoot(self, stepname, status):
        """
        Prints ongoing info to assist with troubleshooting
        """
        print('******** TROUBLESHOOT STEP: ' + stepname + ', start ********')
        for step in status:
            print('++++++++')
            print(step)
            print('------')
            print(status[step])
            print('++++++++')
        print('******** TROUBLESHOOT STEP: ' + stepname + ', end ********')



[docs]
    def load_events(self):
        """
        Loads event data for selected files
        """
        input_files = self.get_selected_files()
        events = {}
        for f in input_files:
            events[f.filename] = self.get_aux_file(f, filetype='events')
        return events