{ "cells": [ { "cell_type": "markdown", "id": "8012b1be-3b83-4b39-9621-93f4a1723f4c", "metadata": {}, "source": [ "# Spatial domain identification and UMAP visualization" ] }, { "cell_type": "code", "execution_count": null, "id": "78f1c525-ad32-4a2b-b41e-52731fc8c14b", "metadata": {}, "outputs": [], "source": [ "import csv\n", "import os\n", "import numpy as np\n", "import pandas as pd\n", "import anndata as ad\n", "import scanpy as sc\n", "import seaborn as sns\n", "import matplotlib.pyplot as plt\n", "\n", "from umap.umap_ import UMAP\n", "\n", "from matplotlib.lines import Line2D\n", "import matplotlib as mpl\n", "\n", "mpl.rcParams['pdf.fonttype'] = 42\n", "mpl.rcParams['ps.fonttype'] = 42\n", "\n", "import warnings\n", "warnings.filterwarnings(\"ignore\")" ] }, { "cell_type": "markdown", "id": "7a1be113-cc58-4cae-9b41-90d6317993b9", "metadata": {}, "source": [ "### Load data" ] }, { "cell_type": "code", "execution_count": null, "id": "68ed9061-e34c-4233-bf00-8d239c32fc11", "metadata": {}, "outputs": [], "source": [ "save = True\n", "file_format = 'png'\n", "\n", "data_dir = '../../data/spCASdata/HumanMouse_Deng2022/preprocessed/'\n", "save_dir = '../../results/HumanMouse_Deng2022/'\n", "slice_name_list = ['GSM5238385_ME11_50um', 'GSM5238386_ME13_50um', 'GSM5238387_ME13_50um_2']\n", "label_list = ['GSM5238385', 'GSM5238386', 'GSM5238387']\n", "slice_used = [0, 1, 2]\n", "slice_name_list = [slice_name_list[i] for i in slice_used]\n", "label_list = [label_list[i] for i in slice_used]\n", "slice_index_list = list(range(len(slice_name_list)))\n", "\n", "save_dir = f'../../results/HumanMouse_Deng2022/{slice_used}/'\n", "\n", "method = 'leiden'\n", "\n", "cas_list = [ad.read_h5ad(save_dir + f\"filtered_merged_{sample}.h5ad\") for sample in slice_name_list]\n", "result = ad.concat(cas_list, label=\"slice_name\", keys=label_list)\n", "\n", "result.obsm['INSTINCT_latent'] = pd.read_csv(save_dir + f'INSTINCT_embed.csv', header=None).values" ] }, { "cell_type": "markdown", "id": "fbda3387-b0dc-4259-a225-aa9a37533974", "metadata": {}, "source": [ "### Clustering" ] }, { "cell_type": "code", "execution_count": null, "id": "2dec688f-dc17-40c9-8ed2-eca8e3e16c51", "metadata": {}, "outputs": [], "source": [ "spots_count = [0]\n", "n = 0\n", "for sample in cas_list:\n", " num = sample.shape[0]\n", " n += num\n", " spots_count.append(n)\n", "\n", "sc.pp.neighbors(result, use_rep='INSTINCT_latent', random_state=1234)\n", "# sc.tl.louvain(result, random_state=1234)\n", "sc.tl.leiden(result, resolution=1, random_state=1234)\n", "for i in range(len(cas_list)):\n", " # cas_list[i].obs['louvain'] = result.obs['louvain'][spots_count[i]:spots_count[i + 1]].copy()\n", " cas_list[i].obs['leiden'] = result.obs['leiden'][spots_count[i]:spots_count[i + 1]].copy()\n", " if save:\n", " cas_list[i].write(save_dir + f'clustered_{slice_name_list[i]}.h5ad')" ] }, { "cell_type": "markdown", "id": "527883aa-f5bc-4a9c-a316-a7efba680eb3", "metadata": {}, "source": [ "### Visualization" ] }, { "cell_type": "code", "execution_count": null, "id": "30acf63f-0235-4ade-8cd7-a42d39a5fdab", "metadata": {}, "outputs": [], "source": [ "reducer = UMAP(n_neighbors=30, n_components=2, metric=\"correlation\", n_epochs=None, learning_rate=1.0,\n", " min_dist=0.3, spread=1.0, set_op_mix_ratio=1.0, local_connectivity=1, repulsion_strength=1,\n", " negative_sample_rate=5, a=None, b=None, random_state=1234, metric_kwds=None,\n", " angular_rp_forest=False, verbose=True)\n", "\n", "# raw\n", "raw_pca = np.load(save_dir + f'input_matrix.npy')\n", "sp_embedding = reducer.fit_transform(raw_pca)\n", "if save:\n", " with open(save_dir + f'sp_embeddings_raw.csv', 'w', newline='') as file:\n", " writer = csv.writer(file)\n", " writer.writerows(sp_embedding)\n", "n_spots = result.shape[0]\n", "size = 10000 / n_spots\n", "order = np.arange(n_spots)[::-1]\n", "colors_for_slices = [[0.2298057, 0.29871797, 0.75368315],\n", " [0.70567316, 0.01555616, 0.15023281],\n", " [0.2298057, 0.70567316, 0.15023281],]\n", "slice_cmap = {label_list[i]: colors_for_slices[i] for i in range(len(label_list))}\n", "colors = list(result.obs['slice_name'].astype('str').map(slice_cmap))[::-1]\n", "plt.figure(figsize=(5, 5))\n", "plt.scatter(sp_embedding[order, 0], sp_embedding[order, 1], s=size, c=colors)\n", "plt.tick_params(axis='both', bottom=False, top=False, left=False, right=False,\n", " labelleft=False, labelbottom=False, grid_alpha=0)\n", "legend_handles = [\n", " Line2D([0], [0], marker='o', color='w', markersize=8, markerfacecolor=slice_cmap[label_list[i]], label=label_list[i])\n", " for i in range(len(label_list))\n", " ]\n", "plt.legend(handles=legend_handles, fontsize=8, title='Slices', title_fontsize=10,\n", " loc='upper left')\n", "plt.title(f'Raw', fontsize=16)\n", "if save:\n", " save_path = save_dir + f\"raw_slices_umap.{file_format}\"\n", " plt.savefig(save_path)\n", "\n", "# integrated\n", "sp_embedding = reducer.fit_transform(result.obsm['INSTINCT_latent'])\n", "if save:\n", " with open(save_dir + f'sp_embeddings_integrated.csv', 'w', newline='') as file:\n", " writer = csv.writer(file)\n", " writer.writerows(sp_embedding)\n", "n_spots = result.shape[0]\n", "size = 10000 / n_spots\n", "order = np.arange(n_spots)[::-1]\n", "colors_for_slices = [[0.2298057, 0.29871797, 0.75368315],\n", " [0.70567316, 0.01555616, 0.15023281],\n", " [0.2298057, 0.70567316, 0.15023281],]\n", "slice_cmap = {label_list[i]: colors_for_slices[i] for i in range(len(label_list))}\n", "colors = list(result.obs['slice_name'].astype('str').map(slice_cmap))[::-1]\n", "plt.figure(figsize=(5, 5))\n", "plt.scatter(sp_embedding[order, 0], sp_embedding[order, 1], s=size, c=colors)\n", "plt.tick_params(axis='both', bottom=False, top=False, left=False, right=False,\n", " labelleft=False, labelbottom=False, grid_alpha=0)\n", "legend_handles = [\n", " Line2D([0], [0], marker='o', color='w', markersize=8, markerfacecolor=slice_cmap[label_list[i]], label=label_list[i])\n", " for i in range(len(label_list))\n", " ]\n", "plt.legend(handles=legend_handles, fontsize=8, title='Slices', title_fontsize=10,\n", " loc='lower left')\n", "plt.title(f'Integrated', fontsize=16)\n", "if save:\n", " save_path = save_dir + f\"integrated_slices_umap.{file_format}\"\n", " plt.savefig(save_path)\n", "\n", "\n", "unique_labels = result.obs[method].unique()\n", "print(len(unique_labels))\n", "if method == 'louvain':\n", " color_palette = ['gold', 'dodgerblue', 'orange', 'deepskyblue',\n", " 'g', 'limegreen', 'gainsboro', 'y',\n", " 'darkorange', 'darkgray', 'saddlebrown', 'chocolate']\n", " if len(unique_labels) > len(color_palette):\n", " color_palette = sns.color_palette(\"tab20\", n_colors=len(unique_labels))\n", "elif method == 'leiden':\n", " color_palette = ['orange', 'dodgerblue', 'wheat', 'deepskyblue', 'g',\n", " 'gold', 'crimson', 'limegreen', 'yellowgreen', 'lightcoral',\n", " 'fuchsia', 'sienna', 'lightgray', 'violet', 'hotpink',]\n", " if len(unique_labels) > len(color_palette):\n", " color_palette = sns.color_palette(\"tab20\", n_colors=len(unique_labels))\n", "color_list = [color_palette[i] for i in range(len(unique_labels))]\n", "color_dict = {f'{i}': color_palette[i] for i in range(len(unique_labels))}\n", "legend_elements = [Line2D([0], [0], marker='o', color='w', label=label, markerfacecolor=color, markersize=8)\n", " for label, color in zip(list(range(len(unique_labels))), color_list)]\n", "colors = list(result.obs[method].astype('str').map(color_dict))[::-1]\n", "plt.figure(figsize=(5, 5))\n", "plt.rc('axes', linewidth=1)\n", "plt.scatter(sp_embedding[order, 0], sp_embedding[order, 1], s=size, c=colors)\n", "plt.tick_params(axis='both', bottom=False, top=False, left=False, right=False,\n", " labelleft=False, labelbottom=False, grid_alpha=0)\n", "plt.title(f'Identified Clusters', fontsize=16)\n", "plt.legend(handles=legend_elements, fontsize=8, title='Clusters', title_fontsize=10, bbox_to_anchor=(1, 1))\n", "plt.gcf().subplots_adjust(left=0.05, top=None, bottom=None, right=0.85)\n", "if save:\n", " save_path = save_dir + f\"{method}_identified_clusters_umap.{file_format}\"\n", " plt.savefig(save_path)\n", "\n", "if len(cas_list) == 2:\n", " fig, axs = plt.subplots(1, 2, figsize=(8, 4))\n", "elif len(cas_list) == 3:\n", " fig, axs = plt.subplots(1, 3, figsize=(12, 4))\n", "fig.suptitle(f'Clustering Results', fontsize=16)\n", "for i in range(len(cas_list)):\n", " cluster_colors = list(cas_list[i].obs[method].astype('str').map(color_dict))\n", " axs[i].scatter(cas_list[i].obsm['spatial'][:, 0], cas_list[i].obsm['spatial'][:, 1], linewidth=1, s=40,\n", " marker=\".\", color=cluster_colors, alpha=0.9)\n", " axs[i].invert_yaxis()\n", " axs[i].set_title(f'{label_list[i]}', size=12)\n", " axs[i].axis('off')\n", "legend_elements = [Line2D([0], [0], marker='o', color='w', label=label, markerfacecolor=color, markersize=8)\n", " for label, color in zip(list(range(len(unique_labels))), color_list)]\n", "axs[len(cas_list)-1].legend(handles=legend_elements,\n", " fontsize=8, title='Clusters', title_fontsize=10, bbox_to_anchor=(1, 1))\n", "plt.gcf().subplots_adjust(left=0.05, top=0.8, bottom=0.05, right=0.90)\n", "if save:\n", " save_path = save_dir + f'{method}_clustering_results.{file_format}'\n", " plt.savefig(save_path)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.5" } }, "nbformat": 4, "nbformat_minor": 5 }