#!/usr/bin/env python3
import collections
VALID_NORMALIZE_METHODS = [None, 'type']
DEFAULT_NORMALIZE_METHOD = VALID_NORMALIZE_METHODS[1]
# These mean, as indicated by the required uniqueness condition on edges:
# 'type': unique based on {node1, node2, type}
# 'node': unique based on {node1, node2}
# 'exact': unique based on {node1, node2, weight, type}
# They are listed in order of preference/likelihood.
VALID_DROP_DUPLICATES_METHOD = [None, 'type', 'node', 'exact']
DROP_DUPLICATES_METHOD = VALID_DROP_DUPLICATES_METHOD[1]
[docs]def add_config_args(parser): #NEW, UNDOC
"""
Add arguments specific to this module.
Parameters:
parser (argparse.parser): the parser to add arguments to
Returns:
argparse.parser: the parser with the arguments added
"""
parser.add_argument('-und', '--make_undirected', action='store_true')
parser.add_argument('-unw', '--make_unweighted', action='store_true')
parser.add_argument('-norm', '--normalize', action='store_true')
parser.add_argument('-normm', '--normalize_method', type=str,
choices=VALID_NORMALIZE_METHODS,
default=DEFAULT_NORMALIZE_METHOD)
parser.add_argument('-na', '--ignore_nas', action='store_true')
parser.add_argument('-v', '--verbose', action='store_true')
return parser
[docs]def make_network_unweighted(n_df, wgt):
"""
Make the network unweighted, by setting the weights on all the edges to the
same value (1).
Parameters:
n_df (list): the data
wgt (int): the weight column
Returns:
list: the modified data
"""
return [n[:wgt] + [1] + n[wgt+1:] for n in n_df]
[docs]def make_network_undirected(n_df):
"""
Make the network undirected; that is, the network should be symmetric, but
only the edges in one direction are included. So make the edges in the
other direction explicit in the network. This assumes that the first two
columns are the two nodes.
Parameters:
n_df (list): the data
Returns:
list: the modified data
"""
return n_df + [[n[1], n[0]] + n[2:] for n in n_df]
[docs]def sort_network(n_df):
"""
Sort the network.
Parameters:
n_df (list): the data
Returns:
list: the modified data
"""
return sorted(n_df, reverse=True)
[docs]def drop_duplicates_by_type_or_node(n_df, n1, n2, typ):
"""
Drop the duplicates in the network, by type or by node.
For each set of "duplicate" edges, only the edge with the maximum weight
will be kept.
By type, the duplicates are where nd1, nd2, and typ are identical; by node,
the duplicates are where nd1, and nd2 are identical.
Parameters:
n_df (list): the data
n1 (int): the column for the firts node
n2 (int): the column for the second node
typ (int): the column for the type
Returns:
list: the modified data
"""
# If n_df is sorted, this method will work, iterating through the
# rows and only keeping the first row of a group of duplicate rows
prev_nd1_val = None
prev_nd2_val = None
prev_type_val = None
new_n_df = []
for row in n_df:
nd1_val = row[n1]
nd2_val = row[n2]
type_val = row[typ]
nodes_differ = nd1_val != prev_nd1_val or nd2_val != prev_nd2_val
type_differs = type_val != prev_type_val
if (DROP_DUPLICATES_METHOD == 'node' and nodes_differ) or (nodes_differ or type_differs):
new_n_df.append(row)
prev_nd1_val = nd1_val
prev_nd2_val = nd2_val
prev_type_val = type_val
return new_n_df
[docs]def normalize_network_by_type(n_df, typ, wgt):
"""
Normalize the network.
Currently the only normalization method implemented is by type.
Parameters:
n_df (list): the data
typ (int): the type column
wgt (int): the weight column
Returns:
list: the modified data
"""
sums = collections.Counter()
for i in n_df:
sums[i[typ]] += i[wgt]
return [i[:wgt] + ("{:.6g}".format(i[wgt]/sums[i[typ]]),) + i[wgt+1:] for i in n_df]
[docs]def upper_triangle(n_df, n1, n2):
"""Makes a (sparse) matrix upper triangular.
"""
return [edge for edge in n_df if edge[n1] < edge[n2]]