-
Notifications
You must be signed in to change notification settings - Fork 0
/
augment_pair_result.py
62 lines (46 loc) · 1.51 KB
/
augment_pair_result.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
"""
augment our result (seed pairs) by various statistics
exclude those that:
- C1 or C2 is small (<{threshold} nodes)
"""
import sys
import pandas as pd
import numpy as np
import networkx as nx
from helpers import (
pos_adj, neg_adj
)
from stat_helpers import populate_fields
graph = sys.argv[1]
if len(sys.argv) > 2:
k = int(sys.argv[2])
print('restricting k={}'.format(k))
else:
print('dropping k')
k = None
g = nx.read_gpickle('graphs/{}.pkl'.format(graph))
A = nx.adj_matrix(g, weight='sign')
df = pd.read_pickle('outputs/{}_seed_pair.pkl'.format(graph))
if k is not None:
df = df[df['k'] == k]
print(df.shape[0], ' rows')
pos_A, neg_A = pos_adj(A), neg_adj(A)
# for weighted matrix, make it unweighted
if not (pos_A.data == 1).all():
pos_A.data = np.ones(pos_A.data.shape, dtype='float64')
if not (neg_A.data == 1).all():
neg_A.data = np.ones(neg_A.data.shape, dtype='float64')
threshold = 5
# k_value = 200
# df = df[df['k'] == k_value]
df['size1'] = df['C1'].apply(lambda d: d.shape[0])
df['size2'] = df['C2'].apply(lambda d: d.shape[0])
df['balancedness'] = np.minimum(df['size1'], df['size2']) / (df['size1'] + df['size2'])
df = df[(df['size1'] > threshold) & (df['size2'] > threshold)]
df['q1deg'] = df['seed1'].apply(lambda n: g.degree(n))
df['q2deg'] = df['seed2'].apply(lambda n: g.degree(n))
df = populate_fields(df, pos_A, neg_A)
if k is not None:
df.to_pickle('outputs/{}_seed_pair_aug_k{}.pkl'.format(graph, k))
else:
df.to_pickle('outputs/{}_seed_pair_aug.pkl'.format(graph))