{ "cells": [ { "cell_type": "markdown", "id": "46a5b4b9-bb70-4f66-a23f-9c90151695ec", "metadata": {}, "source": [ "# Cross-sample annotation\n", "Using the annotated S1 slice to annotated the corresponding unlabeled S2 slice based on the integrating results." ] }, { "cell_type": "code", "execution_count": null, "id": "af73bb05-5c41-43fa-9356-2ce73e85dd49", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import anndata as ad\n", "\n", "from umap.umap_ import UMAP\n", "\n", "import matplotlib as mpl\n", "mpl.rcParams['pdf.fonttype'] = 42\n", "mpl.rcParams['ps.fonttype'] = 42\n", "\n", "import warnings\n", "warnings.filterwarnings(\"ignore\")" ] }, { "cell_type": "markdown", "id": "70da9086-0d49-490b-b08a-bc3f7d7aaa54", "metadata": {}, "source": [ "### Load the data\n", "Load the two slices and the latent embedings of spots, and set the current label of each spot in S2 as 'Unidentified'" ] }, { "cell_type": "code", "execution_count": null, "id": "d0ca8494-610d-48ef-8b93-821798e0a44c", "metadata": {}, "outputs": [], "source": [ "model = 'INSTINCT'\n", "mode_index = 3\n", "mode_list = ['E11_0', 'E13_5', 'E15_5', 'E18_5']\n", "mode = mode_list[mode_index]\n", "\n", "data_dir = '../../data/spMOdata/EpiTran_MouseBrain_Jiang2023/preprocessed/'\n", "save_dir = f'../../results/MouseBrain_Jiang2023/vertical/{mode}/'\n", "slice_name_list = [f'{mode}-S1', f'{mode}-S2']\n", "\n", "cas_list = [ad.read_h5ad(save_dir + f'filtered_merged_{sample}_atac.h5ad') for sample in slice_name_list]\n", "cas_list[1].obs['Annotation_for_Combined'] = 'Unidentified'\n", "adata_concat = ad.concat(cas_list, label=\"slice_name\", keys=slice_name_list)\n", "\n", "spots_count = [0]\n", "n = 0\n", "for sample in cas_list:\n", " num = sample.shape[0]\n", " n += num\n", " spots_count.append(n)\n", "\n", "adata_concat.obsm['latent'] = pd.read_csv(save_dir + f'{model}/{mode}_INSTINCT_embed.csv', header=None).values\n", "for j in range(len(cas_list)):\n", " cas_list[j].obsm['latent'] = adata_concat.obsm['latent'][spots_count[j]:spots_count[j + 1]].copy()" ] }, { "cell_type": "code", "execution_count": null, "id": "2632f370-f334-4ce1-a7b3-911f0e231069", "metadata": {}, "outputs": [], "source": [ "from sklearn.neighbors import KNeighborsClassifier\n", "from sklearn.preprocessing import LabelEncoder\n", "\n", "\n", "def StrLabel2Idx(string_labels):\n", "\n", " label_encoder = LabelEncoder()\n", " idx_labels = label_encoder.fit_transform(string_labels)\n", "\n", " return np.array(idx_labels)\n", "\n", "\n", "def knn_label_translation(reference_X, reference_y, target_X, k=20):\n", " label_encoder = LabelEncoder()\n", " reference_y_idx = label_encoder.fit_transform(reference_y)\n", " neigh = KNeighborsClassifier(n_neighbors=k)\n", " neigh.fit(reference_X, reference_y_idx)\n", " target_y_idx = neigh.predict(target_X)\n", " target_y = label_encoder.inverse_transform(target_y_idx)\n", "\n", " return target_y" ] }, { "cell_type": "code", "execution_count": null, "id": "1e18df8f-9c8b-4696-bf6b-f912f3e7b530", "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import matplotlib.pyplot as plt\n", "\n", "from matplotlib.lines import Line2D\n", "\n", "def plot_mousebrain_verti(cas_list, adata_concat, ground_truth_key, annotation_key, cluster_to_color_map,\n", " slice_name_list, cls_list, sp_embedding, mode,\n", " save_root=None, save=False, plot=False):\n", "\n", " fig, axs = plt.subplots(1, 2, figsize=(10, 4))\n", " fig.suptitle(f'{mode} Annotation Results', fontsize=14)\n", "\n", " real_colors = list(cas_list[0].obs[ground_truth_key].astype('str').map(cluster_to_color_map))\n", " axs[0].scatter(cas_list[0].obsm['spatial'][:, 0], cas_list[0].obsm['spatial'][:, 1], linewidth=0.5, s=50,\n", " marker=\".\", color=real_colors, alpha=0.9)\n", " axs[0].set_title(f'{slice_name_list[0]} (Ture Labels)', size=12)\n", " axs[0].invert_yaxis()\n", " axs[0].axis('off')\n", "\n", " anno_colors = list(cas_list[1].obs[annotation_key].astype('str').map(cluster_to_color_map))\n", " axs[1].scatter(cas_list[1].obsm['spatial'][:, 0], cas_list[1].obsm['spatial'][:, 1], linewidth=0.5, s=50,\n", " marker=\".\", color=anno_colors, alpha=0.9)\n", " axs[1].set_title(f'{slice_name_list[1]} (Annotation)', size=12)\n", " if mode == 'E13_5':\n", " axs[1].invert_xaxis()\n", " else:\n", " axs[1].invert_yaxis()\n", " axs[1].axis('off')\n", "\n", " legend_handles = [\n", " Line2D([0], [0], marker='o', color='w', markersize=8, markerfacecolor=cluster_to_color_map[cluster], label=cluster)\n", " for cluster in cls_list\n", " ]\n", " axs[1].legend(\n", " handles=legend_handles,\n", " fontsize=8, title='Spot-types', title_fontsize=10, bbox_to_anchor=(1, 1))\n", " plt.gcf().subplots_adjust(left=0.05, top=0.8, bottom=0.1, right=0.75)\n", " if save:\n", " save_path = save_root + f'annotation_results.pdf'\n", " plt.savefig(save_path)\n", "\n", " spots_count = [0]\n", " n = 0\n", " for sample in cas_list:\n", " num = sample.shape[0]\n", " n += num\n", " spots_count.append(n)\n", "\n", " n_spots = adata_concat.shape[0]\n", " size = 10000 / n_spots\n", " # order = np.arange(n_spots)\n", " colors_for_slices = [[0.70567316, 0.01555616, 0.15023281],\n", " [0.2298057, 0.70567316, 0.15023281]]\n", " slice_cmap = {slice_name_list[i]: colors_for_slices[i] for i in range(len(slice_name_list))}\n", " colors = list(adata_concat.obs['slice_name'].astype('str').map(slice_cmap))\n", " plt.figure(figsize=(5, 5))\n", " plt.rc('axes', linewidth=1)\n", " plt.scatter(sp_embedding[spots_count[1]:spots_count[2], 0], sp_embedding[spots_count[1]:spots_count[2], 1],\n", " s=size, c=colors[spots_count[1]:spots_count[2]])\n", " plt.scatter(sp_embedding[spots_count[0]:spots_count[1], 0], sp_embedding[spots_count[0]:spots_count[1], 1],\n", " s=size, c=colors[spots_count[0]:spots_count[1]])\n", " plt.tick_params(axis='both', bottom=False, top=False, left=False, right=False,\n", " labelleft=False, labelbottom=False, grid_alpha=0)\n", " legend_handles = [\n", " Line2D([0], [0], marker='o', color='w', markersize=8, markerfacecolor=slice_cmap[slice_name_list[i]],\n", " label=slice_name_list[i])\n", " for i in range(len(slice_name_list))\n", " ]\n", " plt.legend(handles=legend_handles, fontsize=8, title='Slices', title_fontsize=10,\n", " loc='upper left')\n", " plt.title(f'{mode} Slices', fontsize=14)\n", " if save:\n", " save_path = save_root + f\"slices_umap.pdf\"\n", " plt.savefig(save_path)\n", "\n", " colors = list(cas_list[0].obs[ground_truth_key].astype('str').map(cluster_to_color_map))\n", " plt.figure(figsize=(5, 5))\n", " plt.rc('axes', linewidth=1)\n", " plt.scatter(sp_embedding[spots_count[1]:spots_count[2], 0], sp_embedding[spots_count[1]:spots_count[2], 1],\n", " s=size, c='gray')\n", " plt.scatter(sp_embedding[spots_count[0]:spots_count[1], 0], sp_embedding[spots_count[0]:spots_count[1], 1],\n", " s=size, c=colors)\n", " plt.tick_params(axis='both', bottom=False, top=False, left=False, right=False,\n", " labelleft=False, labelbottom=False, grid_alpha=0)\n", " plt.title(f'{mode} True Labels', fontsize=14)\n", " if save:\n", " save_path = save_root + f\"true_labels_umap.pdf\"\n", " plt.savefig(save_path)\n", "\n", " colors = list(cas_list[1].obs[annotation_key].astype('str').map(cluster_to_color_map))\n", " plt.figure(figsize=(5, 5))\n", " plt.rc('axes', linewidth=1)\n", " plt.scatter(sp_embedding[spots_count[1]:spots_count[2], 0], sp_embedding[spots_count[1]:spots_count[2], 1],\n", " s=size, c=colors)\n", " plt.tick_params(axis='both', bottom=False, top=False, left=False, right=False,\n", " labelleft=False, labelbottom=False, grid_alpha=0)\n", " plt.title(f'{mode} Annotation', fontsize=14)\n", " if save:\n", " save_path = save_root + f\"annotation_umap.pdf\"\n", " plt.savefig(save_path)\n", "\n", " if plot:\n", " plt.show()" ] }, { "cell_type": "markdown", "id": "4d62b9eb-8c16-4d46-a3e1-21efcdc7400d", "metadata": {}, "source": [ "### Annotation" ] }, { "cell_type": "code", "execution_count": null, "id": "bd6b7e4e-022c-403a-8816-0a4a3adc8680", "metadata": {}, "outputs": [], "source": [ "# annotation\n", "cas_list[1].obs['predicted_labels'] = knn_label_translation(cas_list[0].obsm['latent'].copy(),\n", " cas_list[0].obs['Annotation_for_Combined'].copy(),\n", " cas_list[1].obsm['latent'].copy(), k=20)\n", "\n", "# save the annotated S2 slice\n", "cas_list[1].write(save_dir + f'{model}/annotated_{slice_name_list[1]}_atac.h5ad')" ] }, { "cell_type": "markdown", "id": "d20d1752-3874-4c88-8306-84854f1d543f", "metadata": {}, "source": [ "### Plot spatial organization and UMAP visualization of annotation results" ] }, { "cell_type": "code", "execution_count": null, "id": "98d69e8d-e95f-46d2-b30e-f844bd375baa", "metadata": {}, "outputs": [], "source": [ "reducer = UMAP(n_neighbors=30, n_components=2, metric=\"correlation\", n_epochs=None, learning_rate=1.0,\n", " min_dist=0.3, spread=1.0, set_op_mix_ratio=1.0, local_connectivity=1, repulsion_strength=1,\n", " negative_sample_rate=5, a=None, b=None, random_state=1234, metric_kwds=None,\n", " angular_rp_forest=False, verbose=False)\n", "\n", "sp_embedding = reducer.fit_transform(adata_concat.obsm['latent'])\n", "\n", "cls_list_all = ['Primary_brain_1', 'Primary_brain_2', 'Midbrain', 'Diencephalon_and_hindbrain', 'Basal_plate_of_hindbrain',\n", " 'Subpallium_1', 'Subpallium_2', 'Cartilage_1', 'Cartilage_2', 'Cartilage_3', 'Cartilage_4',\n", " 'Mesenchyme', 'Muscle', 'Thalamus', 'DPallm', 'DPallv']\n", "\n", "colors_for_all = ['red', 'tomato', 'chocolate', 'orange', 'goldenrod',\n", " 'b', 'royalblue', 'g', 'limegreen', 'lime', 'springgreen',\n", " 'deepskyblue', 'pink', 'fuchsia', 'yellowgreen', 'olivedrab']\n", "\n", "cls_list = list(set(list(cas_list[0].obs['Annotation_for_Combined'])))\n", "cls_list_reordered = [cls for cls in cls_list_all if cls in cls_list]\n", "colors_for_clusters = [colors_for_all[i] for i in range(len(colors_for_all)) if cls_list_all[i] in cls_list]\n", "\n", "cluster_to_color_map = {cluster: color for cluster, color in zip(cls_list_reordered, colors_for_clusters)}\n", "print(cluster_to_color_map)\n", "\n", "plot_mousebrain_verti(cas_list, adata_concat, 'Annotation_for_Combined', 'predicted_labels', cluster_to_color_map,\n", " slice_name_list, cls_list_reordered, sp_embedding, mode,\n", " save_root=save_dir+f'{model}/', save=True, plot=True)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.5" } }, "nbformat": 4, "nbformat_minor": 5 }