Modularity

Modularity is a mathematical notion of how assortative a network is under some partition of the network into different groups (or communities, or modules, etc.). Here we apply a modularity maximization algorithm to the maggot network separated by edge types and compare various properties of the partitions and modularities.

Preliminaries

from pkg.utils import set_warnings

import datetime
import time

import matplotlib.pyplot as plt
import networkx as nx
import numpy as np
import pandas as pd
import seaborn as sns
from giskard.plot import crosstabplot
from graspologic.partition import leiden, modularity
from graspologic.utils import symmetrize
from pkg.data import load_maggot_graph, load_palette
from pkg.io import savefig
from pkg.plot import set_theme
from sklearn.metrics import adjusted_rand_score
from src.hierarchy import signal_flow
from src.visualization import adjplot

t0 = time.time()

set_theme()


def stashfig(name, **kwargs):
    foldername = "modularity"
    savefig(name, foldername=foldername, **kwargs)

Load the data

edge_types = [
    "ad",
    "aa",
    "dd",
    "da",
    "sum",
]
colors = sns.color_palette("deep", desat=None)
edge_type_palette = dict(zip(edge_types, colors))
nice_edge_types = dict(
    zip(edge_types, [r"A $\to$ D", r"A $\to$ A", r"D $\to$ D", r"D $\to$ A", "Sum"])
)
palette = load_palette()
mg = load_maggot_graph()
mg = mg[mg.nodes["paper_clustered_neurons"]]
mg.to_largest_connected_component()
adj = mg.sum.adj
sf = signal_flow(adj)
mg.nodes["neg_sf"] = -sf
mg.nodes["sf"] = sf

Optimize modularity

Here I use Leiden to find partitions which maximize the modularity score.

def preprocess_for_leiden(adj):
    sym_adj = adj.copy()
    sym_adj = symmetrize(sym_adj)

    # convert datatype
    # NOTE: have to do some weirdness because leiden only wants str keys right now
    undirected_g = nx.from_numpy_array(sym_adj)
    str_arange = [f"{i}" for i in range(len(undirected_g))]
    arange = np.arange(len(undirected_g))
    str_node_map = dict(zip(arange, str_arange))
    nx.relabel_nodes(undirected_g, str_node_map, copy=False)
    return undirected_g


def optimize_leiden(g, n_restarts=25, resolution=1, randomness=0.001):
    best_modularity = -np.inf
    best_partition = {}
    for i in range(n_restarts):
        partition = leiden(
            g,
            resolution=resolution,
            randomness=randomness,
            check_directed=False,
            extra_forced_iterations=10,
        )
        modularity_score = modularity(g, partitions=partition, resolution=resolution)
        if modularity_score > best_modularity:
            best_partition = partition
            best_modularity = modularity_score
    return best_partition, best_modularity


nodelist = mg.nodes.index
n_restarts = 50
randomness = 0.01
resolution = 1.0
rows = []
currtime = time.time()
for edge_type in edge_types:
    edge_type_mg = mg.to_edge_type_graph(edge_type)
    adj = edge_type_mg.adj
    sym_g = preprocess_for_leiden(adj)
    partition, modularity_score = optimize_leiden(
        sym_g,
        n_restarts=n_restarts,
        resolution=resolution,
        randomness=randomness,
    )
    str_arange = [f"{i}" for i in range(len(sym_g))]
    flat_partition = list(map(partition.get, str_arange))
    row = {
        "modularity_score": modularity_score,
        "resolution": resolution,
        "edge_type": edge_type,
        "nice_edge_type": nice_edge_types[edge_type],
        "partition": flat_partition,
        "adj": adj,
        "g": sym_g,
    }
    rows.append(row)

print(f"{time.time() - currtime:.3f} seconds elapsed to fit all partitions.")
results = pd.DataFrame(rows)
149.793 seconds elapsed to fit all partitions.

Plot results

Plot the adjacency matrices sorted by fit partition

for _, row in results.iterrows():
    edge_type = row["edge_type"]
    nice_edge_type = nice_edge_types[edge_type]
    adj = row["adj"]
    meta = mg.nodes
    meta["partition"] = row["partition"]
    meta["partition"] = meta["partition"].fillna(-1)
    meta["is_unpartitioned"] = meta["partition"] == -1
    modularity_score = row["modularity_score"]
    adjplot(
        adj,
        meta=meta,
        sort_class="partition",
        class_order=["is_unpartitioned", "neg_sf"],
        colors="simple_group",
        palette=palette,
        plot_type="scattermap",
        item_order=["simple_group", "neg_sf"],
        ticks=False,
        sizes=(1, 2),
        gridline_kws=dict(linewidth=0),
        title=f"{nice_edge_type}, Modularity = {modularity_score:.2f}",
        blocklines="partition",
    )
    stashfig(f"modularity-sorted-adj-edge_type={edge_type}")
_images/modularity_9_0.png _images/modularity_9_1.png _images/modularity_9_2.png _images/modularity_9_3.png _images/modularity_9_4.png

Plot the modularity for each edge type

fig, ax = plt.subplots(1, 1, figsize=(8, 6))
sns.stripplot(
    data=results, jitter=False, x="nice_edge_type", y="modularity_score", ax=ax
)
ax.set(ylabel="Modularity", xlabel="")
stashfig("modularity-by-edge-type")
_images/modularity_11_0.png

Conclusion

The different edge types have distinct modularities (we should be able to get p-values for this). In particular, the A to D seems the least modular.

Plot the similarities between modularity-maximizing partitions

def compute_pairwise_ari(results):
    pairwise = pd.DataFrame(index=results.index, columns=results.index, dtype=float)
    # pairwise.index.name = None
    # pairwise.columns.name = None
    for idx1, row1 in results.iterrows():
        for idx2, row2 in results.iterrows():
            partition1 = row1["partition"]
            partition2 = row2["partition"]
            partition1 = np.array(partition1)
            partition2 = np.array(partition2)
            partition1[partition1 == None] = -1
            partition2[partition2 == None] = -1
            isvalid1 = partition1 != -1
            isvalid2 = partition2 != -1
            keep_inds = np.where(isvalid1 & isvalid2)[0]
            ari = adjusted_rand_score(partition1[keep_inds], partition2[keep_inds])
            pairwise.loc[idx1, idx2] = ari
    return pairwise


pairwise_ari = compute_pairwise_ari(results.set_index("nice_edge_type"))

fig, ax = plt.subplots(1, 1, figsize=(6, 6))

inds = np.tril_indices(len(pairwise_ari), k=0)
mask = np.zeros((len(pairwise_ari), len(pairwise_ari)), dtype=bool)
mask[inds] = 1
sns.heatmap(
    pairwise_ari,
    ax=ax,
    square=True,
    annot=True,
    mask=mask,
    vmin=0,
    vmax=1,
    center=0,
    cmap="RdBu_r",
    cbar=False,
)
ax.set(xlabel="Edge type", ylabel="Edge type", title="Partition ARIs")
plt.setp(ax.get_yticklabels(), rotation=0)
stashfig("pairwise-aris-modularity")
_images/modularity_14_0.png

Conclusion

The modularity-maximizing partitions between the different edge types are quite different. Put another way, the “modules” you get by looking at each edge type separately don’t seem very similar.

Plot the modularity of one edge type graph under another’s fit partition

edge_sorted_results = results.set_index("nice_edge_type")
pairwise_modularity = pd.DataFrame(
    index=edge_sorted_results.index, columns=edge_sorted_results.index, dtype=float
)
for idx1, row1 in edge_sorted_results.iterrows():
    for idx2, row2 in edge_sorted_results.iterrows():
        source_partition = row1["partition"]
        source_partition = [str(s) for s in source_partition]
        nodes = [str(s) for s in range(len(source_partition))]
        partition_map = dict(zip(nodes, source_partition))
        target_g = row2["g"]
        modularity_score = modularity(target_g, partition_map)
        pairwise_modularity.loc[idx1, idx2] = modularity_score
pairwise_modularity

fig, ax = plt.subplots(1, 1, figsize=(6, 6))
sns.heatmap(
    pairwise_modularity,
    ax=ax,
    square=True,
    annot=True,
    vmin=0,
    vmax=1,
    center=0,
    cmap="RdBu_r",
    cbar=False,
)
ax.set(
    xlabel="Target modularity partition",
    ylabel="Source modularity partition",
    title="Modularity",
)
plt.setp(ax.get_yticklabels(), rotation=0)
stashfig("pairwise-modularity")
_images/modularity_17_0.png

Plot the composition of the predicted modules

for _, row in results.iterrows():
    edge_type = row["edge_type"]
    nice_edge_type = nice_edge_types[edge_type]
    adj = row["adj"]
    meta = mg.nodes
    meta["partition"] = row["partition"]
    ax = crosstabplot(
        meta,
        group="partition",
        group_order="sf",
        hue="simple_group",
        hue_order="sf",
        palette=palette,
    )
    ax.set(xlabel="Module", xticks=[])
    stashfig(f"{edge_type}-crosstabplot")
_images/modularity_19_0.png _images/modularity_19_1.png _images/modularity_19_2.png _images/modularity_19_3.png _images/modularity_19_4.png

End

elapsed = time.time() - t0
delta = datetime.timedelta(seconds=elapsed)
print("----")
print(f"Script took {delta}")
print(f"Completed at {datetime.datetime.now()}")
print("----")
----
Script took 0:03:36.493232
Completed at 2021-04-27 09:04:25.451133
----