Modularity¶
Modularity is a mathematical notion of how assortative a network is under some partition of the network into different groups (or communities, or modules, etc.). Here we apply a modularity maximization algorithm to the maggot network separated by edge types and compare various properties of the partitions and modularities.
Preliminaries¶
from pkg.utils import set_warnings
import datetime
import time
import matplotlib.pyplot as plt
import networkx as nx
import numpy as np
import pandas as pd
import seaborn as sns
from giskard.plot import crosstabplot
from graspologic.partition import leiden, modularity
from graspologic.utils import symmetrize
from pkg.data import load_maggot_graph, load_palette
from pkg.io import savefig
from pkg.plot import set_theme
from sklearn.metrics import adjusted_rand_score
from src.hierarchy import signal_flow
from src.visualization import adjplot
t0 = time.time()
set_theme()
def stashfig(name, **kwargs):
foldername = "modularity"
savefig(name, foldername=foldername, **kwargs)
Load the data¶
edge_types = [
"ad",
"aa",
"dd",
"da",
"sum",
]
colors = sns.color_palette("deep", desat=None)
edge_type_palette = dict(zip(edge_types, colors))
nice_edge_types = dict(
zip(edge_types, [r"A $\to$ D", r"A $\to$ A", r"D $\to$ D", r"D $\to$ A", "Sum"])
)
palette = load_palette()
mg = load_maggot_graph()
mg = mg[mg.nodes["paper_clustered_neurons"]]
mg.to_largest_connected_component()
adj = mg.sum.adj
sf = signal_flow(adj)
mg.nodes["neg_sf"] = -sf
mg.nodes["sf"] = sf
Optimize modularity¶
Here I use Leiden to find partitions which maximize the modularity score.
def preprocess_for_leiden(adj):
sym_adj = adj.copy()
sym_adj = symmetrize(sym_adj)
# convert datatype
# NOTE: have to do some weirdness because leiden only wants str keys right now
undirected_g = nx.from_numpy_array(sym_adj)
str_arange = [f"{i}" for i in range(len(undirected_g))]
arange = np.arange(len(undirected_g))
str_node_map = dict(zip(arange, str_arange))
nx.relabel_nodes(undirected_g, str_node_map, copy=False)
return undirected_g
def optimize_leiden(g, n_restarts=25, resolution=1, randomness=0.001):
best_modularity = -np.inf
best_partition = {}
for i in range(n_restarts):
partition = leiden(
g,
resolution=resolution,
randomness=randomness,
check_directed=False,
extra_forced_iterations=10,
)
modularity_score = modularity(g, partitions=partition, resolution=resolution)
if modularity_score > best_modularity:
best_partition = partition
best_modularity = modularity_score
return best_partition, best_modularity
nodelist = mg.nodes.index
n_restarts = 50
randomness = 0.01
resolution = 1.0
rows = []
currtime = time.time()
for edge_type in edge_types:
edge_type_mg = mg.to_edge_type_graph(edge_type)
adj = edge_type_mg.adj
sym_g = preprocess_for_leiden(adj)
partition, modularity_score = optimize_leiden(
sym_g,
n_restarts=n_restarts,
resolution=resolution,
randomness=randomness,
)
str_arange = [f"{i}" for i in range(len(sym_g))]
flat_partition = list(map(partition.get, str_arange))
row = {
"modularity_score": modularity_score,
"resolution": resolution,
"edge_type": edge_type,
"nice_edge_type": nice_edge_types[edge_type],
"partition": flat_partition,
"adj": adj,
"g": sym_g,
}
rows.append(row)
print(f"{time.time() - currtime:.3f} seconds elapsed to fit all partitions.")
results = pd.DataFrame(rows)
149.793 seconds elapsed to fit all partitions.
Plot results¶
Plot the adjacency matrices sorted by fit partition¶
for _, row in results.iterrows():
edge_type = row["edge_type"]
nice_edge_type = nice_edge_types[edge_type]
adj = row["adj"]
meta = mg.nodes
meta["partition"] = row["partition"]
meta["partition"] = meta["partition"].fillna(-1)
meta["is_unpartitioned"] = meta["partition"] == -1
modularity_score = row["modularity_score"]
adjplot(
adj,
meta=meta,
sort_class="partition",
class_order=["is_unpartitioned", "neg_sf"],
colors="simple_group",
palette=palette,
plot_type="scattermap",
item_order=["simple_group", "neg_sf"],
ticks=False,
sizes=(1, 2),
gridline_kws=dict(linewidth=0),
title=f"{nice_edge_type}, Modularity = {modularity_score:.2f}",
blocklines="partition",
)
stashfig(f"modularity-sorted-adj-edge_type={edge_type}")
Plot the modularity for each edge type¶
fig, ax = plt.subplots(1, 1, figsize=(8, 6))
sns.stripplot(
data=results, jitter=False, x="nice_edge_type", y="modularity_score", ax=ax
)
ax.set(ylabel="Modularity", xlabel="")
stashfig("modularity-by-edge-type")
Conclusion
The different edge types have distinct modularities (we should be able to get p-values for this). In particular, the A to D seems the least modular.
Plot the similarities between modularity-maximizing partitions¶
def compute_pairwise_ari(results):
pairwise = pd.DataFrame(index=results.index, columns=results.index, dtype=float)
# pairwise.index.name = None
# pairwise.columns.name = None
for idx1, row1 in results.iterrows():
for idx2, row2 in results.iterrows():
partition1 = row1["partition"]
partition2 = row2["partition"]
partition1 = np.array(partition1)
partition2 = np.array(partition2)
partition1[partition1 == None] = -1
partition2[partition2 == None] = -1
isvalid1 = partition1 != -1
isvalid2 = partition2 != -1
keep_inds = np.where(isvalid1 & isvalid2)[0]
ari = adjusted_rand_score(partition1[keep_inds], partition2[keep_inds])
pairwise.loc[idx1, idx2] = ari
return pairwise
pairwise_ari = compute_pairwise_ari(results.set_index("nice_edge_type"))
fig, ax = plt.subplots(1, 1, figsize=(6, 6))
inds = np.tril_indices(len(pairwise_ari), k=0)
mask = np.zeros((len(pairwise_ari), len(pairwise_ari)), dtype=bool)
mask[inds] = 1
sns.heatmap(
pairwise_ari,
ax=ax,
square=True,
annot=True,
mask=mask,
vmin=0,
vmax=1,
center=0,
cmap="RdBu_r",
cbar=False,
)
ax.set(xlabel="Edge type", ylabel="Edge type", title="Partition ARIs")
plt.setp(ax.get_yticklabels(), rotation=0)
stashfig("pairwise-aris-modularity")
Conclusion
The modularity-maximizing partitions between the different edge types are quite different. Put another way, the “modules” you get by looking at each edge type separately don’t seem very similar.
Plot the modularity of one edge type graph under another’s fit partition¶
edge_sorted_results = results.set_index("nice_edge_type")
pairwise_modularity = pd.DataFrame(
index=edge_sorted_results.index, columns=edge_sorted_results.index, dtype=float
)
for idx1, row1 in edge_sorted_results.iterrows():
for idx2, row2 in edge_sorted_results.iterrows():
source_partition = row1["partition"]
source_partition = [str(s) for s in source_partition]
nodes = [str(s) for s in range(len(source_partition))]
partition_map = dict(zip(nodes, source_partition))
target_g = row2["g"]
modularity_score = modularity(target_g, partition_map)
pairwise_modularity.loc[idx1, idx2] = modularity_score
pairwise_modularity
fig, ax = plt.subplots(1, 1, figsize=(6, 6))
sns.heatmap(
pairwise_modularity,
ax=ax,
square=True,
annot=True,
vmin=0,
vmax=1,
center=0,
cmap="RdBu_r",
cbar=False,
)
ax.set(
xlabel="Target modularity partition",
ylabel="Source modularity partition",
title="Modularity",
)
plt.setp(ax.get_yticklabels(), rotation=0)
stashfig("pairwise-modularity")
Plot the composition of the predicted modules¶
for _, row in results.iterrows():
edge_type = row["edge_type"]
nice_edge_type = nice_edge_types[edge_type]
adj = row["adj"]
meta = mg.nodes
meta["partition"] = row["partition"]
ax = crosstabplot(
meta,
group="partition",
group_order="sf",
hue="simple_group",
hue_order="sf",
palette=palette,
)
ax.set(xlabel="Module", xticks=[])
stashfig(f"{edge_type}-crosstabplot")
End¶
elapsed = time.time() - t0
delta = datetime.timedelta(seconds=elapsed)
print("----")
print(f"Script took {delta}")
print(f"Completed at {datetime.datetime.now()}")
print("----")
----
Script took 0:03:36.493232
Completed at 2021-04-27 09:04:25.451133
----