Wrangle the data

# collapse
import matplotlib.pyplot as plt
import networkx as nx
import numpy as np
import pandas as pd
import seaborn as sns
from adjustText import adjust_text

from giskard.plot import palplot

years = np.arange(2016, 2022)


savefig_kws = dict(
    dpi=300, pad_inches=0.3, transparent=False, bbox_inches="tight", facecolor="white"
)

layoutplot_kws = dict(node_alpha=0.6, adjust=True)


def stashfig(name, **kwargs):
    plt.savefig(f"sandbox/results/kavli/{name}.pdf", format="pdf", **savefig_kws)
    plt.savefig(f"sandbox/results/kavli/{name}.png", format="png", **savefig_kws)


class MapDict(dict):
    __missing__ = lambda self, key: key


def flatten_muligraph(multigraph, filter_edge_type=None):
    # REF: https://stackoverflow.com/questions/15590812/networkx-convert-multigraph-into-simple-graph-with-weighted-edges
    if isinstance(filter_edge_type, str):
        filter_edge_type = [filter_edge_type]
    g = nx.Graph()
    # for node in multigraph.nodes():
    #     g.add_node(node)
    for i, j, data in multigraph.edges(data=True):
        edge_type = data["type"]
        if filter_edge_type is None or edge_type in filter_edge_type:
            w = data["weight"] if "weight" in data else 1.0
            if g.has_edge(i, j):
                g[i][j]["weight"] += w
            else:
                g.add_edge(i, j, weight=w)
    return g


# load the data
data_loc = "sandbox/data/kavli/Kavli Interconnections 2021 new.xlsx"
data = pd.read_excel(data_loc)
data["source"] = list(data["source_first"] + " " + data["source_last"])
data["target"] = list(data["target_first"] + " " + data["target_last"])

# ignore anything having to do with "joined" for the purposes of the edgelist
edges = data[data["type"] != "joined"].copy()
# these are edges that are not edges?
bad_edges = edges[edges["source"].isna() | edges["target"].isna()]
print("Ignoring bad edges:")
print()
bad_edges

/Users/bpedigo/miniconda3/envs/sandbox/lib/python3.7/site-packages/umap/__init__.py:9: UserWarning: Tensorflow not installed; ParametricUMAP will be unavailable
  warn("Tensorflow not installed; ParametricUMAP will be unavailable")
Ignoring bad edges:

# collapse
edges = edges[~edges.index.isin(bad_edges.index)]
edges = edges[edges["type"] != "Seminar"]

mg = nx.from_pandas_edgelist(
    edges, edge_attr=["type", "ternal", "start", "end"], create_using=nx.MultiGraph
)
g = flatten_muligraph(mg)

# some node metadata
targets = edges["target"].unique()
sources = edges["source"].unique()
node_ids = np.unique(np.array(list(targets) + list(sources)))
nodes = pd.DataFrame(index=node_ids)
departments = edges.groupby("target")["target_dept"].agg(lambda x: x.mode()[0])
nodes["dept"] = nodes.index.map(departments).fillna("unk")

edge_types = edges["type"].unique()
joined = data[data["type"] == "joined"].set_index("source")
nodes["core"] = False
nodes.loc[nodes.index.isin(joined.index), "core"] = True
nodes["joined"] = nodes.index.map(joined["start"])

# mapping departments
dept_map = {
    "BME/JHM": "BME",
    "Mind Brain Institute": "MBI",
    "Mind Brain Institute, Neuroscience": "Neuroscience",
    "BME/Ophthalmology": "BME",
    "Neuro": "Neuroscience",
    "Neuro + BME": "Neuroscience",
    "Biostat": "Biostats",
    "ME": "Mechanical Engineering",
}

dept_map = MapDict(dept_map)

nodes["dept"] = nodes["dept"].map(dept_map)
nodes.loc["Peter Searson", "dept"] = "Materials Science and Engineering"

Create a color palette by department/institute

# collapse
colors = sns.color_palette("husl", nodes["dept"].nunique())
palette = dict(zip(sorted(nodes["dept"].unique()), colors))
nodes["color"] = nodes["dept"].map(palette)
sns.set_context("talk")
palplot(palette)
stashfig("palette")

Plot the complete network

# collapse


def layoutplot(
    g,
    pos,
    nodes,
    ax=None,
    figsize=(10, 10),
    weight_scale=1,
    adjust=True,
    log_weights=True,
    node_alpha=1,
    label_nodes=True,
    node_size=300,
):
    if ax is None:
        fig, ax = plt.subplots(1, 1, figsize=figsize)

    edgelist = g.edges()
    weights = np.array([g[u][v]["weight"] for u, v in edgelist])
    # weight transformations happen here, can be important
    if log_weights:
        weights = np.log(weights + 1)
    weights *= weight_scale

    # plot the actual layout
    nx.draw_networkx_nodes(
        g,
        pos,
        nodelist=nodes.index,
        node_color="white",
        zorder=-1,
        node_size=node_size,
        ax=ax,
    )
    nx.draw_networkx_nodes(
        g,
        pos,
        nodelist=nodes.index,
        node_color=nodes["color"],
        alpha=node_alpha,
        node_size=node_size,
        ax=ax,
    )
    nx.draw_networkx_edges(
        g,
        pos,
        edgelist=edgelist,
        nodelist=nodes.index,
        width=weights,
        zorder=-2,
        edge_color="lightgrey",
        alpha=1,
        ax=ax,
        node_size=node_size,  # connectionstyle="arc3,rad=0.2"
    )
    if label_nodes:
        texts = []
        for node in nodes.index:
            node_pos = pos[node]
            split = node.split(" ")
            name = split[0] + "\n"
            name += node[len(split[0]) + 1 :]
            text = ax.text(
                node_pos[0],
                node_pos[1],
                name,
                ha="center",
                va="center",
                fontname="DejaVu Sans",
            )
            # text.set_bbox(dict(facecolor="white", alpha=0.3, edgecolor="white"))
            texts.append(text)
        if adjust:
            adjust_text(
                texts,
                expand_text=(1.03, 1.1),
                lim=200,
                avoid_self=False,
                autoalign=False,
            )

    ax.axis("off")

    return ax


sns.set_context("talk", font_scale=0.55)
fig, ax = plt.subplots(1, 1, figsize=(12, 12))
pos = nx.kamada_kawai_layout(g, weight=None)
layoutplot(g, pos, nodes, ax=ax, **layoutplot_kws)
stashfig("kavli-all")

Plot the core Kavli group by year

# collapse
def calculate_bounds(pos, pad=0.05):
    xy = np.stack(list(pos.values()), axis=1).T
    xmin = xy[:, 0].min()
    xmax = xy[:, 0].max()
    ymin = xy[:, 1].min()
    ymax = xy[:, 1].max()
    xpad = (xmax - xmin) * pad
    ypad = (ymax - ymin) * pad
    xlim = (xmin - xpad, xmax + xpad)
    ylim = (ymin - ypad, ymax + ypad)
    return xlim, ylim


def extract_year_mgs(mg):
    year_mgs = {}
    for year in years:
        select_edges = []
        for (u, v, k, d) in mg.edges(data=True, keys=True):
            edge_year = d["start"]
            if edge_year <= year:
                select_edges.append((u, v, k))
        year_mg = nx.edge_subgraph(mg, select_edges)
        year_mgs[year] = year_mg
    return year_mgs


# layoutplot_kws["adjust"] = True

core_mg = nx.subgraph(mg, nodes[nodes["core"]].index)
core_flat_g = flatten_muligraph(core_mg)
core_pos = nx.kamada_kawai_layout(core_flat_g, weight=None)
xlim, ylim = calculate_bounds(core_pos)

year_mgs = extract_year_mgs(core_mg)

for year in years:
    year_mg = year_mgs[year]
    year_g = flatten_muligraph(year_mg)
    fig, ax = plt.subplots(1, 1, figsize=(12, 12))
    layoutplot(
        year_g,
        core_pos,
        nodes[nodes.index.isin(core_flat_g.nodes)],
        ax=ax,
        **layoutplot_kws,
    )
    ax.set_title(year, fontsize=30)
    ax.set(xlim=xlim, ylim=ylim)
    stashfig(f"kavli-core-year-{year}")

Plot the entire network by year

# collapse
flat_g = flatten_muligraph(mg)
pos = nx.kamada_kawai_layout(flat_g, weight=None)
xlim, ylim = calculate_bounds(pos)
year_mgs = extract_year_mgs(mg)
for year in years:
    year_mg = year_mgs[year]
    year_g = flatten_muligraph(year_mg)
    fig, ax = plt.subplots(1, 1, figsize=(12, 12))
    layoutplot(
        year_g, pos, nodes[nodes.index.isin(year_g.nodes)], ax=ax, **layoutplot_kws
    )
    ax.set_title(year, fontsize=30)
    ax.set(xlim=xlim, ylim=ylim)
    stashfig(f"kavli-all-year-{year}")

Plot each type of connection separately

# collapse
for edge_type in edge_types:
    edge_type_g = flatten_muligraph(mg, filter_edge_type=edge_type)
    fig, ax = plt.subplots(1, 1, figsize=(12, 12))
    layoutplot(
        edge_type_g,
        pos,
        nodes[nodes.index.isin(edge_type_g.nodes)],
        ax=ax,
        **layoutplot_kws,
    )
    ax.set_title(edge_type, fontsize=30)
    ax.set(xlim=xlim, ylim=ylim)
    stashfig(f"kavli-all-edge-type-{edge_type}")

Plot each type of connection on the same set of axes

fig, axs = plt.subplots(2, 4, figsize=(20, 10))
for i, edge_type in enumerate(edge_types):
    ax = axs.flat[i]
    edge_type_g = flatten_muligraph(mg, filter_edge_type=edge_type)
    layoutplot(
        edge_type_g,
        pos,
        nodes[nodes.index.isin(edge_type_g.nodes)],
        ax=ax,
        node_size=100,
        label_nodes=False,
        **layoutplot_kws,
    )
    ax.set_title(edge_type, fontsize=20)
    ax.set(xlim=xlim, ylim=ylim)
for ax in axs.flat:
    ax.axis("off")
stashfig(f"kavli-all-edge-types")

	source_first	source_last	target_first	target_last	ternal	target_dept	start	end	type	Unnamed: 9	...	Unnamed: 26	Unnamed: 27	Unnamed: 28	Unnamed: 29	Unnamed: 30	Unnamed: 31	Unnamed: 32	Unnamed: 33	source	target
5	Andreas	Andreou	NaN	NaN	External	Cornelia Fermuller, Shihab Shamma, Behtash Bab...	2021.0	2026	Funded Grant	NaN	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	Andreas Andreou	NaN
6	Andreas	Andreou	NaN	NaN	External	Northrop Grumman	2021.0	2025	Funded Grant	NaN	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	Andreas Andreou	NaN
42	Seth	Blackshaw	NaN	NaN	NaN	Neuroscience	2020.0	2025	Funded Grant	NaN	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	Seth Blackshaw	NaN
43	Seth	Blackshaw	NaN	NaN	NaN	Neuroscience	2020.0	2022	Funded Grant	NaN	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	Seth Blackshaw	NaN
44	Seth	Blackshaw	NaN	NaN	NaN	Neuroscience	2021.0	2026	Funded Grant	NaN	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	Seth Blackshaw	NaN
83	Brian	Caffo	NaN	NaN	NaN	Biostat	2020.0	2023	Funded Grant	NaN	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	Brian Caffo	NaN
123	Brian	Caffo	NaN	NaN	NaN	Biostat	NaN	2016	Publication	NaN	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	Brian Caffo	NaN
125	Vikram	Chib	NaN	NaN	NaN	Neuroscience	2019.0	2023	Funded Grant	NaN	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	Vikram Chib	NaN
126	Vikram	Chib	NaN	NaN	NaN	Neuroscience	2019.0	2024	Funded Grant	NaN	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	Vikram Chib	NaN
302	James	Knierim	NaN	NaN	NaN	Neuroscience	2018.0	Ongoing	Funded Grant	NaN	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	James Knierim	NaN
369	Jonathan	Ling	NaN	NaN	NaN	Neuropathology	2021.0	Ongoing	Funded Grant	NaN	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	Jonathan Ling	NaN
370	Jonathan	Ling	NaN	NaN	NaN	Neuropathology	2021.0	Ongoing	Funded Grant	NaN	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	Jonathan Ling	NaN
371	Jonathan	Ling	NaN	NaN	NaN	Neuroscience	2019.0	2020	Funded Grant	NaN	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	Jonathan Ling	NaN
378	Jonathan	Ling	NaN	NaN	NaN	Neuropathology	NaN	2020	Submitted Grant	NaN	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	Jonathan Ling	NaN
379	Jonathan	Ling	NaN	NaN	NaN	Neuropathology	NaN	2020	Submitted Grant	NaN	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	Jonathan Ling	NaN
380	Jonathan	Ling	NaN	NaN	NaN	Neuropathology	NaN	2020	Submitted Grant	NaN	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	Jonathan Ling	NaN
482	Marshall	Shuler	NaN	NaN	NaN	Neuroscience	2020.0	Ongoing	Funded Grant	NaN	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	Marshall Shuler	NaN
483	Marshall	Shuler	NaN	NaN	NaN	Neuroscience	2020.0	Ongoing	Funded Grant	NaN	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	Marshall Shuler	NaN
484	Marshall	Shuler	NaN	NaN	NaN	Neuroscience	2019.0	2020	Funded Grant	NaN	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	Marshall Shuler	NaN
510	Genevieve	Stein-O'Brien	NaN	NaN	External	Neuroscience	2021.0	Ongoing	Funded Grant	NaN	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	Genevieve Stein-O'Brien	NaN
530	Genevieve	Stein-O'Brien	NaN	NaN	External	Neuroscience	2021.0	NaN	Submitted Grant	NaN	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	Genevieve Stein-O'Brien	NaN
543	Joshua	Vogelstein	NaN	NaN	Internal	BME	2018.0	2020	Funded Grant	NaN	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	Joshua Vogelstein	NaN
544	Joshua	Vogelstein	NaN	NaN	Internal	BME	NaN	NaN	Funded Grant	NaN	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	Joshua Vogelstein	NaN
558	Joshua	Vogelstein	NaN	NaN	Internal	BME	2017.0	2021	Project	NaN	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	Joshua Vogelstein	NaN
559	Joshua	Vogelstein	NaN	NaN	Internal	BME	2017.0	2020	Project	NaN	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	Joshua Vogelstein	NaN