Source code for teneto.utils.utils

"""General utility functions."""
import collections
import itertools
import operator
import json
import numpy as np
import pandas as pd
import scipy.spatial.distance as distance
import teneto
#from ..classes import teneto.TemporalNetwork
#from ..trajectory import rdp



[docs]def graphlet2contact(tnet, params=None): """ Converts array representation to contact representation. Contact representation are more efficient for memory storing. Also includes metadata which can made it easier for plotting. A contact representation contains all non-zero edges. Parameters ---------- tnet : array_like Temporal network. params : dict, optional Dictionary of parameters for contact representation. *Fs* : int, default=1 sampling rate. *timeunit* : str, default='' Sampling rate in for units (e.g. seconds, minutes, years). *nettype* : str, default='auto' Define what type of network. Can be: 'auto': detects automatically; 'wd': weighted, directed; 'bd': binary, directed; 'wu': weighted, undirected; 'bu': binary, undirected. *diagonal* : int, default = 0. What should the diagonal be. *timetype* : str, default='discrete' Time units can be The params file becomes the foundation of 'C'. Any other information in params, will added to C. *nodelabels* : list Set nod labels. *t0*: int Time label at first index. Returns ------- C : dict Contact representation of temporal network. Includes 'contacts', 'values' (if nettype[0]='w'),'nettype','netshape', 'Fs', 'dimord' and 'timeunit', 'timetype'. """ # Create config dictionary if missing if params is None: params = {} # Check that temporal network is vald input. if tnet.shape[0] != tnet.shape[1]: raise ValueError( 'Input tnet (node x node x time), requires Rows and Columns to be the same size.') if len(tnet.shape) == 2: tnet = np.atleast_3d(tnet) if len(tnet.shape) != 3: raise ValueError( 'Input tnet must be three dimensions (node x node x time)') # Check number of nodes is correct, if specfied if 'nodelabels' in params.keys(): if params['nodelabels']: if len(params['nodelabels']) != tnet.shape[0]: raise ValueError( 'Specified list of node names has to be equal in length to number of nodes') if 't0' in params.keys(): params['t0'] = np.atleast_1d(np.array(params['t0'])) if len(params['t0']) != 1: raise ValueError( 't0 must be sigular be either integer representing time at first temporal index)') params['t0'] = np.squeeze(params['t0']) # Check that all inputs in params are correct. if 'nettype' not in params.keys() or params['nettype'] == 'auto': params['nettype'] = gen_nettype(tnet) if params['nettype'] not in {'bd', 'bu', 'wd', 'wu', 'auto'}: raise ValueError('\'nettype\' (in params) must be a string \'wd\',\'bd\',\'wu\',\'bu\').') if 'Fs' not in params.keys(): params['Fs'] = 1 #print('Warning, no sampling rate set. Assuming 1.') if 'timeunit' not in params.keys(): params['timeunit'] = '' if 'diagonal' not in params.keys(): params['diagonal'] = 0 if 'nodelabels' not in params.keys(): params['nodelabels'] = '' else: params['nodelabels'] = list(params['nodelabels']) if 't0' not in params.keys(): params['t0'] = 1 nt = params['nettype'] # Set diagonal to 0 to make contacts 0. tnet = set_diagonal(tnet, 0) # Very convoluted way to get all the indexes into a tuple, ordered by time if nt[1] == 'u': tnet = [np.triu(tnet[:, :, t], k=1) for t in range(0, tnet.shape[2])] tnet = np.transpose(tnet, [1, 2, 0]) edg = np.where(np.abs(tnet) > 0) sortTime = np.argsort(edg[2]) contacts = np.array([tuple([edg[0][i], edg[1][i], edg[2][i]]) for i in sortTime]) # Get each of the values if weighted matrix if nt[0] == 'w': values = list(tnet[edg[0][sortTime], edg[1] [sortTime], edg[2][sortTime]]) # build output dictionary C = params C['contacts'] = contacts C['netshape'] = tnet.shape C['dimord'] = 'node,node,time' # Obviously this needs to change C['timetype'] = 'discrete' if nt[0] == 'w': C['values'] = values return C
[docs]def contact2graphlet(C): """ Converts contact representation to array representation. Graphlet representation discards all meta information in contacts. Parameters ---------- C : dict A contact representation. Must include keys: 'dimord', 'netshape', 'nettype', 'contacts' and, if weighted, 'values'. Returns ------- tnet : array Graphlet representation of temporal network. Note ---- Returning elements of tnet will be float, even if binary graph. """ # Check that contact sequence is vald input. if 'dimord' not in C.keys(): raise ValueError('\'dimord\' must be present in C.') if C['dimord'] != 'node,node,time': raise ValueError('\'dimord\' must be string \'node,node,time\'.') if 'nettype' not in C.keys(): raise ValueError( 'C must include parameter \'nettype\' (wd,bd,wu,bu). \ w: weighted network. b: binary network. u: undirected network. d: directed network') if C['nettype'] not in {'bd', 'bu', 'wd', 'wu'}: raise ValueError( '\'nettype\' in (C) must be a string \'wd\',\'bd\',\'wu\',\'bu\').') if 'netshape' not in C.keys(): raise ValueError( 'C must include netshape expressing size of target network (tuple)') if not isinstance(C['netshape'], tuple): raise ValueError('\'netshape\' (in C) should be a tuple') if len(C['netshape']) != 3: raise ValueError('\'netshape\' tuple should be of 3 dimensions') if C['nettype'][0] == 'w' and 'values' not in C.keys(): raise ValueError('values not in C and asked for weighted network') if 'contacts' not in C.keys(): raise ValueError('contacts must be expressed (list of tuples)') if C['timetype'] != 'discrete': print('Warning: timetype is not discrete. In future updates timetype in dictionary should be \'discrete\' to be converted to grpahlets') nt = C['nettype'] # Preallocate tnet = np.zeros(C['netshape']) # Convert indexes of C to numpy friend idx list idx = np.array(list(map(list, C['contacts']))) if nt[0] == 'b': tnet[idx[:, 0], idx[:, 1], idx[:, 2]] = 1 if nt[1] == 'u': tnet[idx[:, 1], idx[:, 0], idx[:, 2]] = 1 elif nt[0] == 'w': tnet[idx[:, 0], idx[:, 1], idx[:, 2]] = C['values'] if nt[1] == 'u': tnet[idx[:, 1], idx[:, 0], idx[:, 2]] = C['values'] # If diagonal is not 0, fill it to whatever it is set to if C['diagonal'] != 0: tnet = set_diagonal(tnet, C['diagonal']) return tnet
[docs]def binarize_percent(netin, level, sign='pos', axis='time'): """ Binarizes a network proprtionally. When axis='time' (only one available at the moment) then the top values for each edge time series are considered. Parameters ---------- netin : array or dict network (graphlet or contact representation), level : float Percent to keep (expressed as decimal, e.g. 0.1 = top 10%) sign : str, default='pos' States the sign of the thresholding. Can be 'pos', 'neg' or 'both'. If "neg", only negative values are thresholded and vice versa. axis : str, default='time' Specify which dimension thresholding is applied against. Can be 'time' (takes top % for each edge time-series) or 'graphlet' (takes top % for each graphlet) Returns ------- netout : array or dict (depending on input) Binarized network """ netin, netinfo = process_input(netin, ['C', 'G', 'TN']) # Set diagonal to 0 netin = set_diagonal(netin, 0) if axis == 'graphlet' and netinfo['nettype'][-1] == 'u': triu = np.triu_indices(netinfo['netshape'][0], k=1) netin = netin[triu[0], triu[1], :] netin = netin.transpose() if sign == 'both': net_sorted = np.argsort(np.abs(netin), axis=-1) elif sign == 'pos': net_sorted = np.argsort(netin, axis=-1) elif sign == 'neg': net_sorted = np.argsort(-1*netin, axis=-1) else: raise ValueError('Unknown value for parameter: sign') # Predefine netout = np.zeros(netinfo['netshape']) if axis == 'time': # These for loops can probabaly be removed for speed for i in range(netinfo['netshape'][0]): for j in range(netinfo['netshape'][1]): netout[i, j, net_sorted[i, j, - int(round(net_sorted.shape[-1])*level):]] = 1 elif axis == 'graphlet': netout_tmp = np.zeros(netin.shape) for i in range(netout_tmp.shape[0]): netout_tmp[i, net_sorted[i, - int(round(net_sorted.shape[-1])*level):]] = 1 netout_tmp = netout_tmp.transpose() netout[triu[0], triu[1], :] = netout_tmp netout[triu[1], triu[0], :] = netout_tmp netout = set_diagonal(netout, 0) # If input is contact, output contact if netinfo['inputtype'] == 'C': netinfo['nettype'] = 'b' + netinfo['nettype'][1] netout = graphlet2contact(netout, netinfo) netout.pop('inputtype') netout.pop('values') netout['diagonal'] = 0 return netout
# To do: set diagonal to 0.
[docs]def binarize_rdp(netin, level, sign='pos', axis='time'): """ Binarizes a network based on RDP compression. Parameters ---------- netin : array or dict Network (graphlet or contact representation), level : float Delta parameter which is the tolorated error in RDP compression. sign : str, default='pos' States the sign of the thresholding. Can be 'pos', 'neg' or 'both'. If "neg", only negative values are thresholded and vice versa. Returns ------- netout : array or dict (dependning on input) Binarized network """ netin, netinfo = process_input(netin, ['C', 'G', 'TN']) trajectory = teneto.trajectory.rdp(netin, level) contacts = [] # Use the trajectory points as threshold for n in range(trajectory['index'].shape[0]): if sign == 'pos': sel = trajectory['trajectory_points'][n][trajectory['trajectory'] [n][trajectory['trajectory_points'][n]] > 0] elif sign == 'neg': sel = trajectory['trajectory_points'][n][trajectory['trajectory'] [n][trajectory['trajectory_points'][n]] < 0] else: sel = trajectory['trajectory_points'] i_ind = np.repeat(trajectory['index'][n, 0], len(sel)) j_ind = np.repeat(trajectory['index'][n, 1], len(sel)) contacts.append(np.array([i_ind, j_ind, sel]).transpose()) contacts = np.concatenate(contacts) # Create output dictionary netout = dict(netinfo) netout['contacts'] = contacts netout['nettype'] = 'b' + netout['nettype'][1] netout['dimord'] = 'node,node,time' netout['timetype'] = 'discrete' netout['diagonal'] = 0 # If input is graphlet, output graphlet if netinfo['inputtype'] == 'G': netout = contact2graphlet(netout) else: netout.pop('inputtype') return netout
[docs]def binarize_magnitude(netin, level, sign='pos'): """ Make binary network based on magnitude thresholding. Parameters ---------- netin : array or dict Network (graphlet or contact representation), level : float Magnitude level threshold at. sign : str, default='pos' States the sign of the thresholding. Can be 'pos', 'neg' or 'both'. If "neg", only negative values are thresholded and vice versa. axis : str, default='time' Specify which dimension thresholding is applied against. Only 'time' option exists at present. Returns ------- netout : array or dict (depending on input) Binarized network """ netin, netinfo = process_input(netin, ['C', 'G', 'TN']) # Predefine netout = np.zeros(netinfo['netshape']) if sign == 'pos' or sign == 'both': netout[netin > level] = 1 if sign == 'neg' or sign == 'both': netout[netin < level] = 1 # Set diagonal to 0 netout = set_diagonal(netout, 0) # If input is contact, output contact if netinfo['inputtype'] == 'C': netinfo['nettype'] = 'b' + netinfo['nettype'][1] netout = graphlet2contact(netout, netinfo) netout.pop('inputtype') netout.pop('values') netout['diagonal'] = 0 return netout
[docs]def binarize(netin, threshold_type, threshold_level, outputformat='auto', sign='pos', axis='time'): """ Binarizes a network, returning the network. General wrapper function for different binarization functions. Parameters ---------- netin : array or dict Network (graphlet or contact representation), threshold_type : str What type of thresholds to make binarization. Options: 'rdp', 'percent', 'magnitude'. threshold_level : str Paramter dependent on threshold type. If 'rdp', it is the delta (i.e. error allowed in compression). If 'percent', it is the percentage to keep (e.g. 0.1, means keep 10% of signal). If 'magnitude', it is the amplitude of signal to keep. outputformat : str specify what format you want the output in: G, C, TN, or DF. If 'auto', input form is returned. sign : str, default='pos' States the sign of the thresholding. Can be 'pos', 'neg' or 'both'. If "neg", only negative values are thresholded and vice versa. axis : str Threshold over specfied axis. Valid for percent and rdp. Can be time or graphlet. Returns ------- netout : array or dict (depending on input) Binarized network """ if outputformat == 'auto': outputformat = check_input(netin) if threshold_type == 'percent': netout = binarize_percent(netin, threshold_level, sign, axis) elif threshold_type == 'magnitude': netout = binarize_magnitude(netin, threshold_level, sign) elif threshold_type == 'rdp': netout = binarize_rdp(netin, threshold_level, sign, axis) else: raise ValueError('Unknown value to parameter: threshold_type.') netout = process_input(netout, ['G'], outputformat=outputformat) if outputformat == 'G': netout = netout[0] return netout
[docs]def set_diagonal(tnet, val=0): """ Generally diagonal is set to 0. This function helps set the diagonal across time. Parameters ---------- tnet : array temporal network (graphlet) val : value to set diagonal to (default 0). Returns ------- tnet : array Graphlet representation with new diagonal """ for t in range(0, tnet.shape[2]): np.fill_diagonal(tnet[:, :, t], val) return tnet
[docs]def gen_nettype(tnet, weightonly=False): r""" Attempts to identify what nettype input graphlet tnet is. Diagonal is ignored. Paramters --------- tnet : array temporal network (graphlet) Returns ------- nettype : str \'wu\', \'bu\', \'wd\', or \'bd\' """ if np.array_equal(tnet, tnet.astype(bool)): nettype = 'b' else: nettype = 'w' if not weightonly: if np.allclose(tnet.transpose(1, 0, 2), tnet): direction = 'u' else: direction = 'd' nettype = nettype + direction return nettype
[docs]def get_distance_function(requested_metric): """ This function returns a specified distance function. Paramters --------- requested_metric: str Distance function. Can be any function in: https://docs.scipy.org/doc/scipy/reference/spatial.distance.html. Returns ------- requested_metric : distance function """ distance_options = { 'braycurtis': distance.braycurtis, 'canberra': distance.canberra, 'chebyshev': distance.chebyshev, 'cityblock': distance.cityblock, 'correlation': distance.correlation, 'cosine': distance.cosine, 'euclidean': distance.euclidean, 'sqeuclidean': distance.sqeuclidean, 'dice': distance.dice, 'hamming': distance.hamming, 'jaccard': distance.jaccard, 'kulsinski': distance.kulsinski, 'matching': distance.matching, 'rogerstanimoto': distance.rogerstanimoto, 'russellrao': distance.russellrao, 'sokalmichener': distance.sokalmichener, 'sokalsneath': distance.sokalsneath, 'yule': distance.yule, } if requested_metric in distance_options: return distance_options[requested_metric] else: raise ValueError('Distance function cannot be found.')
[docs]def clean_community_indexes(communityID): """ Takes input of community assignments. Returns reindexed community assignment by using smallest numbers possible. Parameters ---------- communityID : array-like list or array of integers. Output from community detection algorithems. Returns ------- new_communityID : array cleaned list going from 0 to len(np.unique(communityID))-1 Note ----- Behaviour of funciton entails that the lowest community integer in communityID will recieve the lowest integer in new_communityID. """ communityID = np.array(communityID) cid_shape = communityID.shape if len(cid_shape) > 1: communityID = communityID.flatten() new_communityID = np.zeros(len(communityID)) for i, n in enumerate(np.unique(communityID)): new_communityID[communityID == n] = i if len(cid_shape) > 1: new_communityID = new_communityID.reshape(cid_shape) return new_communityID
[docs]def multiple_contacts_get_values(C): """ Given an contact representation with repeated contacts, this function removes duplicates and creates a value Parameters ---------- C : dict contact representation with multiple repeated contacts. Returns ------- :C_out: dict Contact representation with duplicate contacts removed and the number of duplicates is now in the 'values' field. """ d = collections.OrderedDict() for c in C['contacts']: ct = tuple(c) if ct in d: d[ct] += 1 else: d[ct] = 1 new_contacts = [] new_values = [] for (key, value) in d.items(): new_values.append(value) new_contacts.append(key) C_out = C C_out['contacts'] = new_contacts C_out['values'] = new_values return C_out
[docs]def is_jsonable(x): """ Check if a dict is jsonable. Credit: https://stackoverflow.com/a/53112659 """ try: json.dumps(x) return True except (TypeError, OverflowError): return False
[docs]def df_to_array(df, netshape, nettype, start_at='min'): """ Returns a numpy array (snapshot representation) from thedataframe contact list Parameters: df : pandas df pandas df with columns, i,j,t. netshape : tuple network shape, format: (node, time) nettype : str 'wu', 'wd', 'bu', 'bd' start_at : str 'min' or 'zero' or int. If min, the 0th time-point in the array is min t value. If zero, the 0th time-point in the array is 0. If int, the 0th time-point in array starts at int in df. Returns: -------- tnet : array (node,node,time) array for the network """ # Check input if dataframe if not isinstance(df, pd.DataFrame): raise ValueError('Input must be dataframe') # Fix the time indicies if isinstance(start_at, int): tlen = df['t'].max() + 1 - start_at idx_toffset = start_at elif start_at == 'zero': tlen = df['t'].max() + 1 idx_toffset = 0 elif start_at == 'min': tlen = netshape[1] idx_toffset = df['t'].min() # Check if df is non-empty if df.shape[0] > 0: # Get indices and values idx = np.array(list(map(list, df.values))) tnet = np.zeros([netshape[0], netshape[0], int(tlen)]) idx[:, 2] = idx[:, 2] - idx_toffset # Checkif binary or weighted. # idx.shape[1] == 3, implies binary if idx.shape[1] == 3: # if undirected, copy the indices from j to i. if nettype[-1] == 'u': idx = np.vstack([idx, idx[:, [1, 0, 2]]]) idx = idx.astype(int) tnet[idx[:, 0], idx[:, 1], idx[:, 2]] = 1 # idx.shape[1] == 4, implies weighted elif idx.shape[1] == 4: if nettype[-1] == 'u': idx = np.vstack([idx, idx[:, [1, 0, 2, 3]]]) weights = idx[:, 3] idx = np.array(idx[:, :3], dtype=int) tnet[idx[:, 0], idx[:, 1], idx[:, 2]] = weights else: tnet = np.zeros([netshape[0], netshape[0], int(tlen)]) return tnet
[docs]def check_distance_funciton_input(distance_func_name, netinfo): """ Function checks distance_func_name, if it is specified as 'default'. Then given the type of the network selects a default distance function. Parameters ---------- distance_func_name : str distance function name. netinfo : dict the output of utils.process_input Returns ------- distance_func_name : str distance function name. """ if distance_func_name == 'default' and netinfo['nettype'][0] == 'b': print('Default distance funciton specified. As network is binary, using Hamming') distance_func_name = 'hamming' elif distance_func_name == 'default' and netinfo['nettype'][0] == 'w': distance_func_name = 'euclidean' print( 'Default distance funciton specified. ' 'As network is weighted, using Euclidean') return distance_func_name
[docs]def get_dimord(measure, calc=None, community=None): """ Get the dimension order of a network measure. Parameters ---------- measure : str Name of funciton in teneto.networkmeasures. calc : str, default=None Calc parameter for the function community : bool, default=None If not null, then community property is assumed to be believed. Returns ------- dimord : str Dimension order. So "node,node,time" would define the dimensions of the network measure. """ if not calc: calc = '' else: calc = '_' + calc if not community: community = '' else: community = 'community' if 'community' in calc and 'community' in community: community = '' if calc == 'community_avg' or calc == 'community_pairs': community = '' dimord_dict = { 'temporal_closeness_centrality': 'node', 'temporal_degree_centrality': 'node', 'temporal_degree_centralit_avg': 'node', 'temporal_degree_centrality_time': 'node,time', 'temporal_efficiency': 'global', 'temporal_efficiency_global': 'global', 'temporal_efficiency_node': 'node', 'temporal_efficiency_to': 'node', 'sid_global': 'global,time', 'community_pairs': 'community,community,time', 'community_avg': 'community,time', 'sid': 'community,community,time', 'reachability_latency_global': 'global', 'reachability_latency': 'global', 'reachability_latency_node': 'node', 'fluctuability': 'node', 'fluctuability_global': 'global', 'bursty_coeff': 'edge,edge', 'bursty_coeff_edge': 'edge,edge', 'bursty_coeff_node': 'node', 'bursty_coeff_meanEdgePerNode': 'node', 'volatility_global': 'time', } if measure + calc + community in dimord_dict: return dimord_dict[measure + calc + community] else: print('WARNINGL: get_dimord() returned unknown dimension labels') return 'unknown'
[docs]def get_network_when(tnet, i=None, j=None, t=None, ij=None, logic='and', copy=False, asarray=False, netshape=None, nettype=None): r""" Returns subset of dataframe that matches index Parameters ---------- tnet : df, array or teneto.TemporalNetwork teneto.TemporalNetwork object or pandas dataframe edgelist i : list or int get nodes in column i (source nodes in directed networks) j : list or int get nodes in column j (target nodes in directed networks) t : list or int get edges at this time-points. ij : list or int get nodes for column i or j (logic and can still persist for t). Cannot be specified along with i or j logic : str options: \'and\' or \'or\'. If \'and\', functions returns rows that corrspond that match all i,j,t arguments. If \'or\', only has to match one of them copy : bool default False. If True, returns a copy of the dataframe. Note relevant if hd5 data. asarray : bool default False. If True, returns the list of edges as a numpy array. Returns ------- df : pandas dataframe Unless asarray are set to true. """ if isinstance(tnet, pd.DataFrame): network = tnet hdf5 = False sparse = True elif isinstance(tnet, np.ndarray): network = tnet sparse = False # Can add hdfstore elif isinstance(tnet, object): network = tnet.network hdf5 = tnet.hdf5 sparse = tnet.sparse nettype = tnet.nettype netshape = tnet.netshape if ij is not None and (i is not None or j is not None): raise ValueError('ij cannoed be specifed along with i or j') # Make non list inputs a list if i is not None and not isinstance(i, list): i = [i] if j is not None and not isinstance(j, list): j = [j] if t is not None and not isinstance(t, list): t = [t] if ij is not None and not isinstance(ij, list): ij = [ij] if hdf5: l = {'or': ' | ', 'and': ' & '} if i is not None and j is not None and t is not None: isinstr = 'i in ' + str(i) + l[logic] + 'j in ' + \ str(j) + l[logic] + 't in ' + str(t) elif ij is not None and t is not None: isinstr = '(i in ' + str(ij) + ' | ' + 'j in ' + \ str(ij) + ') & ' + 't in ' + str(t) elif i is not None and j is not None: isinstr = 'i in ' + str(i) + l[logic] + 'j in ' + str(j) elif i is not None and t is not None: isinstr = 'i in ' + str(i) + l[logic] + 't in ' + str(t) elif j is not None and t is not None: isinstr = 'j in ' + str(j) + l[logic] + 't in ' + str(t) elif i is not None: isinstr = 'i in ' + str(i) elif j is not None: isinstr = 'j in ' + str(j) elif t is not None: isinstr = 't in ' + str(t) elif ij is not None: isinstr = 'i in ' + str(ij) + l['or'] + 'j in ' + str(ij) df = pd.read_hdf(network, where=isinstr) elif not sparse: if logic == 'or': raise ValueError( 'OR logic not implemented with array/dense format yet!') else: if t is None: t = np.arange(network.shape[-1]) if i is None: i = np.arange(network.shape[0]) if j is None: j = np.arange(network.shape[0]) if ij is not None: i = ij j = np.arange(network.shape[0]) ind = list(zip(*itertools.product(i, j, t))) ind = np.array(ind) if ij is None: ind2 = np.array(list(zip(*itertools.product(j, i, t)))) ind = np.hstack([ind, ind2]) edges = network[ind[0], ind[1], ind[2]] ind = ind[:, edges != 0] edges = edges[edges != 0] df = pd.DataFrame( data={'i': ind[0], 'j': ind[1], 't': ind[2], 'weight': edges}) df['i'] = df['i'].astype(int) df['j'] = df['j'].astype(int) if nettype[1] == 'u': df = df_drop_ij_duplicates(df) else: l = {'or': operator.__or__, 'and': operator.__and__} if i is not None and j is not None and t is not None: df = network[l[logic]((network['i'].isin(i)), l[logic](( network['j'].isin(j)), (network['t'].isin(t))))] elif ij is not None and t is not None: df = network[((network['i'].isin(ij)) | l[logic](( network['j'].isin(ij)), (network['t'].isin(t))))] elif i is not None and j is not None: df = network[l[logic]((network['i'].isin(i)), (network['j'].isin(j)))] elif i is not None and t is not None: df = network[l[logic]((network['i'].isin(i)), (network['t'].isin(t)))] elif j is not None and t is not None: df = network[l[logic]((network['j'].isin(j)), (network['t'].isin(t)))] elif i is not None: df = network[network['i'].isin(i)] elif j is not None: df = network[network['j'].isin(j)] elif t is not None: df = network[network['t'].isin(t)] elif ij is not None: df = network[(network['i'].isin(ij)) | (network['j'].isin(ij))] if copy: df = df.copy() if asarray: df = df_to_array(df, netshape, nettype) return df
[docs]def create_supraadjacency_matrix(tnet, intersliceweight=1): """ Returns a supraadjacency matrix from a temporal network structure Parameters -------- tnet : teneto.TemporalNetwork Temporal network (any network type) intersliceweight : int Weight that links the same node from adjacent time-points Returns -------- supranet : dataframe Supraadjacency matrix """ tnet = process_input(tnet, ['G', 'C', 'TN'], 'TN', forcesparse=True) newnetwork = tnet.network.copy() newnetwork['i'] = (tnet.network['i']) + \ ((tnet.netshape[0]) * (tnet.network['t'])) newnetwork['j'] = (tnet.network['j']) + \ ((tnet.netshape[0]) * (tnet.network['t'])) if 'weight' not in newnetwork.columns: newnetwork['weight'] = 1 newnetwork.drop('t', axis=1, inplace=True) timepointconns = pd.DataFrame() timepointconns['i'] = np.arange(0, (tnet.N*tnet.T)-tnet.N) timepointconns['j'] = np.arange(tnet.N, (tnet.N*tnet.T)) timepointconns['weight'] = intersliceweight supranet = pd.concat([newnetwork, timepointconns]).reset_index(drop=True) return supranet
[docs]def df_drop_ij_duplicates(df): """ """ df['ij'] = list(map(lambda x: tuple(sorted(x)), list( zip(*[df['i'].values, df['j'].values])))) df.drop_duplicates(['ij', 't'], inplace=True) df.reset_index(inplace=True, drop=True) df.drop('ij', inplace=True, axis=1) return df
[docs]def check_input(netin, rasie_if_undirected=1, conmat=0): """ This function checks that netin input is either graphlet (tnet) or contact (C). Parameters ---------- netin : array or dict temporal network, (graphlet or contact). rasie_if_undirected : int, default=1. Options 1 or 0. Error is raised if not found to be tnet or C conmat : int, default=0. Options 1 or 0. If 1, input is allowed to be a 2 dimensional connectivity matrix. Allows output to be 'M' Returns ------- inputtype : str String indicating input type. 'G','C', 'M' or 'U' (unknown). M is special case only allowed when conmat=1 for a 2D connectivity matrix. """ inputis = 'U' if isinstance(netin, np.ndarray): netShape = netin.shape if len(netShape) == 3 and netShape[0] == netShape[1]: inputis = 'G' elif netShape[0] == netShape[1] and conmat == 1: inputis = 'M' elif isinstance(netin, dict): if 'nettype' in netin and 'contacts' in netin and 'dimord' in netin and 'timetype' in netin: if netin['nettype'] in {'bd', 'bu', 'wd', 'wu'} and netin['timetype'] == 'discrete' and netin['dimord'] == 'node,node,time': inputis = 'C' elif isinstance(netin, object): if hasattr(netin, 'network'): inputis = 'TN' elif isinstance(netin, pd.DataFrame): inputis = 'DF' if rasie_if_undirected == 1 and inputis == 'U': raise ValueError( 'Input cannot be identified as graphlet or contact representation') return inputis
[docs]def process_input(netin, allowedformats, outputformat='G', forcesparse=False): """ Takes input network and checks what the input is. Parameters ---------- netin : array, dict, or teneto.TemporalNetwork Network (graphlet, contact or object) allowedformats : list or str Which format of network objects that are allowed. Options: 'C', 'TN', 'G'. outputformat: str, default=G Target output format. Options: 'C' or 'G'. Returns ------- C : dict OR tnet : array Graphlet representation. netinfo : dict Metainformation about network. OR tnet : object object of teneto.TemporalNetwork class """ netinfo = {} if outputformat == 'DF': outputformat = 'TN' return_df = True forcesparse = True else: return_df = False inputtype = check_input(netin) if inputtype == 'DF': netin = teneto.TemporalNetwork(from_df=netin) inputtype = 'TN' # Convert TN to tnet representation if inputtype == 'TN' and 'TN' in allowedformats and outputformat != 'TN': if netin.sparse: tnet = netin.df_to_array() else: tnet = netin.network netinfo = {'nettype': netin.nettype, 'netshape': [ netin.netshape[0], netin.netshape[0], netin.netshape[1]]} elif inputtype == 'TN' and 'TN' in allowedformats and outputformat == 'TN': if not netin.sparse and forcesparse: tnet = teneto.TemporalNetwork(from_array=netin.network, forcesparse=True) else: tnet = netin elif inputtype == 'C' and 'C' in allowedformats and outputformat == 'G': tnet = contact2graphlet(netin) netinfo = dict(netin) netinfo.pop('contacts') elif inputtype == 'C' and 'C' in allowedformats and outputformat == 'TN': tnet = teneto.TemporalNetwork(from_dict=netin) elif inputtype == 'G' and 'G' in allowedformats and outputformat == 'TN': tnet = teneto.TemporalNetwork(from_array=netin, forcesparse=forcesparse) # Get network type if not set yet elif inputtype == 'G' and 'G' in allowedformats: netinfo = {} netinfo['netshape'] = netin.shape netinfo['nettype'] = gen_nettype(netin) tnet = netin elif inputtype == 'C' and outputformat == 'C': pass else: raise ValueError('Input invalid.') if outputformat == 'TN' and isinstance(tnet.network, pd.DataFrame): tnet.network['i'] = tnet.network['i'].astype(int) tnet.network['j'] = tnet.network['j'].astype(int) tnet.network['t'] = tnet.network['t'].astype(int) if outputformat == 'C' or outputformat == 'G': netinfo['inputtype'] = inputtype if inputtype != 'C' and outputformat == 'C': return graphlet2contact(tnet, netinfo) if outputformat == 'G': return tnet, netinfo elif outputformat == 'C': return netin elif outputformat == 'TN': if return_df: return tnet.network else: return tnet