Can you make the objective function better via gm, compared to the purported 1-1?
Running GM on time stamped data with a known 1-1 correspondance
# collapse
import sys
sys.path
sys.path.insert(0, '/Users/asaadeldin/Downloads/GitHub/scipy')
from scipy.optimize import quadratic_assignment
# collapse
%pylab inline
import pandas as pd
from graspy.utils import pass_to_ranks
import seaborn as sns
Experiment Summary
If is the adjacency matrix at time index , then with time indices, for do , where and are pre-matched based on the known 1-1 correspondance.
For each graph pair, run times, with each corresponding to a different random permutation on .
Internally in GM, is shuffled, that is where is sampled uniformly from the set of permutations matrices, where is the size of the vertex set.
is run from the barycenter ().
Compare the objective function values of the matrices with the known matching () and the average objective function resulting from runs of
# collapse
def load_adj(file):
df = pd.read_csv(f'org_sig1_max/{file}.csv', names = ['from', 'to', 'weight'])
return df
# collapse
times = [1,4,11,17,25,34,45,48,52,55,63,69,70,76,80,83,90,97,103,111,117,129,130,132,139,140,146,153,160,167,
174,181,188,192,195,202,209,216,223,229]
# collapse
from scipy.stats import sem
t = 20
ofvs = np.zeros((len(times)-1,3)) # [opt_ofv, gm_ofv]
for i in range(len(times)-1):
# constructing the adjacency matrices
Ael = load_adj(times[i])
Bel = load_adj(times[i+1])
nodes = np.concatenate((Ael['from'],Ael['to'],Bel['from'],Bel['to']), axis=0)
nodes = list(set(nodes))
n = len(nodes)
A = np.zeros((n,n))
B = np.zeros((n,n))
row_list_A = [nodes.index(x) for x in Ael['from']]
col_list_A = [nodes.index(x) for x in Ael['to']]
A[row_list_A, col_list_A] = Ael['weight']
row_list_B = [nodes.index(x) for x in Bel['from']]
col_list_B = [nodes.index(x) for x in Bel['to']]
B[row_list_B, col_list_B] = Bel['weight']
A = pass_to_ranks(A)
B = pass_to_ranks(B)
gm_ofvs = np.zeros(t)
for j in range(t):
gmp = {'maximize':True}
res = quadratic_assignment(A,B, options=gmp)
gm_ofvs[j] = res.fun
gm_ofv = np.mean(gm_ofvs)
gm_error = sem(gm_ofvs)
opt_ofv = (A*B).sum()
ofvs[i,:] = [opt_ofv, gm_ofv, 2*gm_error]
# collapse
sns.set_context('paper')
sns.set(rc={'figure.figsize':(12,8)})
plt.scatter(np.arange(len(times)-1), ofvs[:,0], label = 'opt ofv')
#plt.scatter(np.arange(len(times)), ofvs[:,1], label = 'gm ofv')
plt.errorbar(np.arange(len(times)-1),ofvs[:,1], ofvs[:,2],label = 'average gm ofv +/- 2 s.e.',marker='o', fmt = ' ' ,capsize=3, elinewidth=1, markeredgewidth=1,color='orange')
plt.legend()
plt.ylabel('objective function value')
plt.xlabel('time stamp (A_x & A_{x+1})')
# collapse
plt.scatter(np.arange(len(times)-1), ofvs[:,1]/ofvs[:,0])
plt.hlines(1,0,40,linestyles='dashed', color = 'red', label='y=1 (above means gm maximes ofv more)')
plt.legend()
plt.xlabel('Time Stamp')
plt.ylabel('gm_ofv / pre-matched_ofv')
# collapse
df = pd.DataFrame(ofvs,columns=["Pre Matched OFV","Avergae GM OFV", "2*s.e. GM OFV"])
print(df)