{ "cells": [ { "cell_type": "markdown", "id": "71d217ff-59c6-41a8-b3cf-6334b2411f9d", "metadata": {}, "source": [ "# Integrating two vertically adjacent slices from the same developmental stage\n", "In this case, we integrate the two vertically adjacent slices from the same developmental stage and prepare for cross-sample annotation and downstream analysis." ] }, { "cell_type": "code", "execution_count": null, "id": "181b4345-0499-40e9-a56d-ca685287fb32", "metadata": {}, "outputs": [], "source": [ "import os\n", "import csv\n", "import torch\n", "import numpy as np\n", "import anndata as ad\n", "\n", "from sklearn.decomposition import PCA\n", "\n", "import warnings\n", "warnings.filterwarnings(\"ignore\")\n", "\n", "device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')\n", "\n", "import INSTINCT" ] }, { "cell_type": "markdown", "id": "e1b83e3c-56e0-4f7d-b08a-f5bf531f28ed", "metadata": {}, "source": [ "### Load the raw data" ] }, { "cell_type": "code", "execution_count": null, "id": "d4fbbbfb-a257-4263-9bab-0ab7900b7b0d", "metadata": {}, "outputs": [], "source": [ "# set the mode_index to the index of a specific stage in the mode_list\n", "# mode_index = 3 means integrating the two slices from E18.5\n", "mode_index = 3\n", "mode_list = ['E11_0', 'E13_5', 'E15_5', 'E18_5']\n", "mode = mode_list[mode_index]\n", "\n", "# mouse brain\n", "data_dir = '../../data/spMOdata/EpiTran_MouseBrain_Jiang2023/preprocessed/'\n", "if not os.path.exists(data_dir + f'{mode}/'):\n", " os.makedirs(data_dir + f'{mode}/')\n", "save_dir = f'../../results/MouseBrain_Jiang2023/vertical/{mode}/'\n", "if not os.path.exists(save_dir + 'INSTINCT/'):\n", " os.makedirs(save_dir + 'INSTINCT/')\n", "slice_name_list = [f'{mode}-S1', f'{mode}-S2']\n", "\n", "# load raw data\n", "cas_dict = {}\n", "for sample in slice_name_list:\n", " sample_data = ad.read_h5ad(data_dir + sample + '_atac.h5ad')\n", "\n", " if 'insertion' in sample_data.obsm:\n", " del sample_data.obsm['insertion']\n", "\n", " cas_dict[sample] = sample_data\n", "cas_list = [cas_dict[sample] for sample in slice_name_list]" ] }, { "cell_type": "markdown", "id": "3e730663-3319-439f-8405-f471538cb08b", "metadata": {}, "source": [ "### Merge the peaks" ] }, { "cell_type": "code", "execution_count": null, "id": "609a506d-23a3-452c-ba36-53171f318fa4", "metadata": {}, "outputs": [], "source": [ "cas_list = INSTINCT.peak_sets_alignment(cas_list)\n", "\n", "# save the merged data\n", "for idx, adata in enumerate(cas_list):\n", " adata.write_h5ad(f'{data_dir}{mode}/merged_{slice_name_list[idx]}_atac.h5ad')" ] }, { "cell_type": "code", "execution_count": null, "id": "547a0d5a-597b-4c9f-a66a-ed0c899ab89c", "metadata": {}, "outputs": [], "source": [ "# load the merged data\n", "cas_list = [ad.read_h5ad(data_dir + mode + '/merged_' + sample + '_atac.h5ad') for sample in slice_name_list]\n", "for j in range(len(cas_list)):\n", " cas_list[j].obs_names = [x + '_' + slice_name_list[j] for x in cas_list[j].obs_names]\n", "\n", "# concatenation\n", "adata_concat = ad.concat(cas_list, label=\"slice_name\", keys=slice_name_list)\n", "# adata_concat.obs_names_make_unique()" ] }, { "cell_type": "markdown", "id": "c478c324-f71d-4ab0-b259-4def6d7094a5", "metadata": {}, "source": [ "### Data preprocessing" ] }, { "cell_type": "code", "execution_count": null, "id": "20acf3c9-197f-4afd-8b92-ff7c81dfbfbd", "metadata": {}, "outputs": [], "source": [ "# preprocess CAS data\n", "print('Start preprocessing')\n", "INSTINCT.preprocess_CAS(cas_list, adata_concat, use_fragment_count=True, min_cells_rate=0.03)\n", "print(adata_concat.shape)\n", "print('Done!')" ] }, { "cell_type": "code", "execution_count": null, "id": "92aaf58a-cb9a-47bb-8af9-2ce16412cd05", "metadata": {}, "outputs": [], "source": [ "adata_concat.write_h5ad(save_dir + f\"{mode}_preprocessed_concat_atac.h5ad\")\n", "for i in range(len(slice_name_list)):\n", " cas_list[i].write_h5ad(save_dir + f\"filtered_merged_{slice_name_list[i]}_atac.h5ad\")\n", "\n", "cas_list = [ad.read_h5ad(save_dir + f\"filtered_merged_{sample}_atac.h5ad\") for sample in slice_name_list]\n", "# origin_concat = ad.concat(cas_list, label=\"slice_name\", keys=slice_name_list)\n", "adata_concat = ad.read_h5ad(save_dir + f\"{mode}_preprocessed_concat_atac.h5ad\")" ] }, { "cell_type": "markdown", "id": "4c8602eb-c71e-442f-8068-c77e860c5b14", "metadata": {}, "source": [ "### Perform PCA" ] }, { "cell_type": "code", "execution_count": null, "id": "08965fbe-c0bc-446c-bd74-aebc17623539", "metadata": {}, "outputs": [], "source": [ "print(f'Applying PCA to reduce the feature dimension to 100 ...')\n", "pca = PCA(n_components=100, random_state=1234)\n", "input_matrix = pca.fit_transform(adata_concat.X.toarray())\n", "np.save(save_dir + f'{mode}_input_matrix_atac.npy', input_matrix)\n", "print('Done !')\n", "\n", "input_matrix = np.load(save_dir + f'{mode}_input_matrix_atac.npy')\n", "adata_concat.obsm['X_pca'] = input_matrix" ] }, { "cell_type": "markdown", "id": "5dd41a21-af44-421b-baa7-bdead21e8c59", "metadata": {}, "source": [ "### Create neighbor graph" ] }, { "cell_type": "code", "execution_count": null, "id": "5a1e8aa0-3b56-4545-8c1a-9698087b74e8", "metadata": {}, "outputs": [], "source": [ "# calculate the spatial graph\n", "INSTINCT.create_neighbor_graph(cas_list, adata_concat)" ] }, { "cell_type": "markdown", "id": "8efdbf8b-0d9b-4106-9716-240e09f66ab7", "metadata": {}, "source": [ "### Run model" ] }, { "cell_type": "code", "execution_count": null, "id": "8493c9f9-34ba-461a-9dab-75006d8d2342", "metadata": {}, "outputs": [], "source": [ "INSTINCT_model = INSTINCT.INSTINCT_Model(cas_list, adata_concat, device=device)\n", "\n", "INSTINCT_model.train(report_loss=True, report_interval=100)\n", "\n", "INSTINCT_model.eval(cas_list)" ] }, { "cell_type": "markdown", "id": "ddb93ffd-612f-4bea-8c29-841c80ff0992", "metadata": {}, "source": [ "### Save the latent embeddings\n", "Save the latent representations of spots for further analyses" ] }, { "cell_type": "code", "execution_count": null, "id": "0ffa2926-ee3c-4e32-87e8-8354dd32889b", "metadata": {}, "outputs": [], "source": [ "result = ad.concat(cas_list, label=\"slice_name\", keys=slice_name_list)\n", "\n", "with open(save_dir + f'INSTINCT/{mode}_INSTINCT_embed.csv', 'w', newline='') as file:\n", " writer = csv.writer(file)\n", " writer.writerows(result.obsm['INSTINCT_latent'])\n", "\n", "with open(save_dir + f'INSTINCT/{mode}_INSTINCT_noise_embed.csv', 'w', newline='') as file:\n", " writer = csv.writer(file)\n", " writer.writerows(result.obsm['INSTINCT_latent_noise'])" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.5" } }, "nbformat": 4, "nbformat_minor": 5 }