{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Data Preprocessing\n", "\n", "The outputs from the `m2g` pipeline is available in our open-access AWS S3 bucket: `s3://open-neurodata/m2`. You can use the file tree to browse the outputs [http://open-neurodata.s3-website-us-east-1.amazonaws.com/](http://open-neurodata.s3-website-us-east-1.amazonaws.com/)." ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/Users/j1c/miniconda3/envs/m2g/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", " from .autonotebook import tqdm as notebook_tqdm\n" ] } ], "source": [ "import boto3\n", "from botocore import UNSIGNED\n", "from botocore.client import Config\n", "\n", "from pathlib import Path\n", "import numpy as np\n", "\n", "from graspologic.utils import import_edgelist, pass_to_ranks" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "modalities = [\"Diffusion\", \"Functional\"]\n", "diffusion_datasets = [\n", " \"SWU4\",\n", " \"HNU1\",\n", " \"NKIENH\",\n", " \"XHCUMS\",\n", " \"BNU1\",\n", " \"BNU3\",\n", " \"NKI1\",\n", " \"NKI24\",\n", " \"IPCAS8\",\n", " \"MRN_1\",\n", "]\n", "functional_datasets = [\n", " \"NYU_2\",\n", " \"SWU4\",\n", " \"HNU1\",\n", " \"XHCUMS\",\n", " \"UPSM_1\",\n", " \"BNU3\",\n", " \"IPCAS7\",\n", " \"SWU1\",\n", " \"IPCAS1\",\n", " \"BNU1\",\n", "]\n", "\n", "datasets = {\"Diffusion\": diffusion_datasets, \"Functional\": functional_datasets}" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Fetch from S3 and Download to Local\n", "\n", "The files will be stored at `m2g/docs/paper/data/` directory." ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Downloading m2g/Diffusion/SWU4-8-27-20-m2g-native-csa-det/... Total files: 422\n", "Downloading m2g/Diffusion/HNU1-8-27-20-m2g-native-csa-det/... Total files: 300\n", "Downloading m2g/Diffusion/NKIENH-11-01-20-m2g-native-csa-det/... Total files: 129\n", "Downloading m2g/Diffusion/XHCUMS-8-27-20-m2g-native-csa-det/... Total files: 117\n", "Downloading m2g/Diffusion/BNU1-8-27-20-m2g-native-csa-det/... Total files: 114\n", "Downloading m2g/Diffusion/BNU3-11-01-20-m2g-native-csa-det/... Total files: 47\n", "Downloading m2g/Diffusion/NKI1-8-24-20-m2g-native-csa-det/... Total files: 40\n", "Downloading m2g/Diffusion/NKI24-11-01-20-m2g-native-csa-det/... Total files: 38\n", "Downloading m2g/Diffusion/IPCAS8-8-27-20-m2g-native-csa-det/... Total files: 26\n", "Downloading m2g/Diffusion/MRN_1-8-27-20-m2g-native-csa-det/... Total files: 19\n", "Downloading m2g/Functional/NYU_2-11-27-20-m2g-func/... Total files: 494\n", "Downloading m2g/Functional/SWU4-11-12-20-m2g-func/... Total files: 425\n", "Downloading m2g/Functional/HNU1-11-12-20-m2g-func/... Total files: 300\n", "Downloading m2g/Functional/XHCUMS-11-27-20-m2g-func/... Total files: 247\n", "Downloading m2g/Functional/UPSM_1-11-27-20-m2g-func/... Total files: 230\n", "Downloading m2g/Functional/BNU3-11-12-20-m2g-func/... Total files: 144\n", "Downloading m2g/Functional/IPCAS7-11-27-20-m2g-func/... Total files: 144\n", "Downloading m2g/Functional/SWU1-11-27-20-m2g-func/... Total files: 119\n", "Downloading m2g/Functional/IPCAS1-11-27-20-m2g-func/... Total files: 118\n", "Downloading m2g/Functional/BNU1-11-12-20-m2g-func/... Total files: 106\n" ] } ], "source": [ "parcellation = \"DKT_space-MNI152NLin6_res-2x2x2\"\n", "bucket = \"open-neurodata\"\n", "\n", "for modality in modalities:\n", " if modality == \"Diffusion\":\n", " parcellation = \"DKT_space-MNI152NLin6_res-2x2x2\"\n", " else:\n", " parcellation = \"DKT_space-MNI152NLin6_res-2x2x2.nii.gz\"\n", "\n", " prefix = f\"m2g/{modality}/\"\n", "\n", " s3 = boto3.client(\"s3\", config=Config(signature_version=UNSIGNED))\n", " resp = s3.list_objects_v2(Bucket=bucket, Prefix=prefix, Delimiter=\"/\")\n", "\n", " dataset_fullnames = []\n", " for dset in datasets[modality]:\n", " for r in resp.get(\"CommonPrefixes\"):\n", " if dset in r.get(\"Prefix\"):\n", " dataset_fullnames.append(r.get(\"Prefix\"))\n", "\n", " for dset, dset_abbrev in zip(dataset_fullnames, datasets[modality]):\n", " prefix = f\"{dset}Connectomes/{parcellation}/\"\n", "\n", " resp = s3.list_objects_v2(Bucket=bucket, Prefix=prefix, Delimiter=\"/\")\n", " contents = resp[\"Contents\"]\n", "\n", " files = []\n", " for obj in contents:\n", " key = obj[\"Key\"]\n", " if modality == \"Functional\":\n", " if key.endswith(\".csv\") and \"abs\" in key:\n", " files.append(key)\n", " else:\n", " if key.endswith(\".csv\"):\n", " files.append(key)\n", "\n", " print(f\"Downloading {dset}... Total files: {len(files)}\")\n", "\n", " # Save to data folder\n", " p = Path(f\"./data/{modality}/{dset_abbrev}\")\n", " p.mkdir(parents=True, exist_ok=True)\n", "\n", " # Download files\n", " for f in files:\n", " out = p / Path(f).name\n", " if not out.exists():\n", " s3.download_file(bucket, f, out)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Compute mean connectomes\n", "\n", "This data will be used for plotting in Figure 2." ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Computing mean graph for Diffusion SWU4... Total files: 422\n", "Computing mean graph for Diffusion HNU1... Total files: 300\n", "Computing mean graph for Diffusion NKIENH... Total files: 129\n", "Computing mean graph for Diffusion XHCUMS... Total files: 117\n", "Computing mean graph for Diffusion BNU1... Total files: 114\n", "Computing mean graph for Diffusion BNU3... Total files: 47\n", "Computing mean graph for Diffusion NKI1... Total files: 40\n", "Computing mean graph for Diffusion NKI24... Total files: 38\n", "Computing mean graph for Diffusion IPCAS8... Total files: 26\n", "Computing mean graph for Diffusion MRN_1... Total files: 19\n", "Computing mean graph for Functional NYU_2... Total files: 494\n", "Computing mean graph for Functional SWU4... Total files: 425\n", "Computing mean graph for Functional HNU1... Total files: 300\n", "Computing mean graph for Functional XHCUMS... Total files: 247\n", "Computing mean graph for Functional UPSM_1... Total files: 230\n", "Computing mean graph for Functional BNU3... Total files: 144\n", "Computing mean graph for Functional IPCAS7... Total files: 144\n", "Computing mean graph for Functional SWU1... Total files: 119\n", "Computing mean graph for Functional IPCAS1... Total files: 118\n", "Computing mean graph for Functional BNU1... Total files: 106\n" ] } ], "source": [ "out_dir = Path(f\"./data/mean_connectomes/\")\n", "out_dir.mkdir(parents=True, exist_ok=True)\n", "\n", "for modality, dsets in datasets.items():\n", " if modality == \"Functional\":\n", " keyword = \"*abs*\"\n", " else:\n", " keyword = \"*\"\n", "\n", " for dset in dsets:\n", " p = Path(f\"./data/{modality}/{dset}\")\n", " files = list(p.glob(keyword))\n", "\n", " print(\n", " f\"Computing mean graph for {modality} {dset}... Total files: {len(files)}\"\n", " )\n", "\n", " graphs = import_edgelist(files, \"csv\")\n", " graphs = [pass_to_ranks(g) for g in graphs]\n", "\n", " # Compute mean graph\n", " mean_graph = np.array(graphs).mean(axis=0)\n", "\n", " # Save mean graph\n", " np.save(out_dir / f\"{len(files):>03}_{modality}_{dset}\", mean_graph)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "m2g", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.14" } }, "nbformat": 4, "nbformat_minor": 2 }