Can you make the objective function better via gm, compared to the purported 1-1?
Running GM on time stamped data with a known 1-1 correspondance
# collapse
import sys
sys.path
sys.path.insert(0, '/Users/asaadeldin/Downloads/GitHub/scipy')
from scipy.optimize import quadratic_assignment
# collapse
%pylab inline
import pandas as pd
from graspy.utils import pass_to_ranks
import seaborn as sns
Experiment Summary
If $A_i$ is the adjacency matrix at time index $i$, then with $n$ time indices, for $i = [1, n-1]$ do $GM(A_i, A_{i+1})$, where $A_i$ and $A_{i+1}$ are pre-matched based on the known 1-1 correspondance.
For each graph pair, run $GM$ $t = 20$ times, with each $t$ corresponding to a different random permutation on $A_{i+1}$.
Internally in GM, $A_{i+1}$ is shuffled, that is $A_{i+1}' = Q A_{i+1} Q^T,$ where $Q$ is sampled uniformly from the set of $m x m$ permutations matrices, where $m$ is the size of the vertex set.
$GM$ is run from the barycenter ($\gamma = 0$).
Compare the objective function values of the matrices with the known matching ($trace (A_i A_{i+1}^T)$) and the average objective function resulting from $t$ runs of $GM(A_i, A_{i+1})$
# collapse
def load_adj(file):
df = pd.read_csv(f'org_sig1_max/{file}.csv', names = ['from', 'to', 'weight'])
return df
# collapse
times = [1,4,11,17,25,34,45,48,52,55,63,69,70,76,80,83,90,97,103,111,117,129,130,132,139,140,146,153,160,167,
174,181,188,192,195,202,209,216,223,229]
# collapse
from scipy.stats import sem
t = 20
ofvs = np.zeros((len(times)-1,3)) # [opt_ofv, gm_ofv]
for i in range(len(times)-1):
# constructing the adjacency matrices
Ael = load_adj(times[i])
Bel = load_adj(times[i+1])
nodes = np.concatenate((Ael['from'],Ael['to'],Bel['from'],Bel['to']), axis=0)
nodes = list(set(nodes))
n = len(nodes)
A = np.zeros((n,n))
B = np.zeros((n,n))
row_list_A = [nodes.index(x) for x in Ael['from']]
col_list_A = [nodes.index(x) for x in Ael['to']]
A[row_list_A, col_list_A] = Ael['weight']
row_list_B = [nodes.index(x) for x in Bel['from']]
col_list_B = [nodes.index(x) for x in Bel['to']]
B[row_list_B, col_list_B] = Bel['weight']
A = pass_to_ranks(A)
B = pass_to_ranks(B)
gm_ofvs = np.zeros(t)
for j in range(t):
gmp = {'maximize':True}
res = quadratic_assignment(A,B, options=gmp)
gm_ofvs[j] = res.fun
gm_ofv = np.mean(gm_ofvs)
gm_error = sem(gm_ofvs)
opt_ofv = (A*B).sum()
ofvs[i,:] = [opt_ofv, gm_ofv, 2*gm_error]
# collapse
sns.set_context('paper')
sns.set(rc={'figure.figsize':(12,8)})
plt.scatter(np.arange(len(times)-1), ofvs[:,0], label = 'opt ofv')
#plt.scatter(np.arange(len(times)), ofvs[:,1], label = 'gm ofv')
plt.errorbar(np.arange(len(times)-1),ofvs[:,1], ofvs[:,2],label = 'average gm ofv +/- 2 s.e.',marker='o', fmt = ' ' ,capsize=3, elinewidth=1, markeredgewidth=1,color='orange')
plt.legend()
plt.ylabel('objective function value')
plt.xlabel('time stamp (A_x & A_{x+1})')
# collapse
plt.scatter(np.arange(len(times)-1), ofvs[:,1]/ofvs[:,0])
plt.hlines(1,0,40,linestyles='dashed', color = 'red', label='y=1 (above means gm maximes ofv more)')
plt.legend()
plt.xlabel('Time Stamp')
plt.ylabel('gm_ofv / pre-matched_ofv')
# collapse
df = pd.DataFrame(ofvs,columns=["Pre Matched OFV","Avergae GM OFV", "2*s.e. GM OFV"])
print(df)