Cross-sample annotation
Using the annotated S1 slice to annotated the corresponding unlabeled S2 slice based on the integrating results.
[ ]:
import pandas as pd
import anndata as ad
from umap.umap_ import UMAP
import matplotlib as mpl
mpl.rcParams['pdf.fonttype'] = 42
mpl.rcParams['ps.fonttype'] = 42
import warnings
warnings.filterwarnings("ignore")
Load the data
Load the two slices and the latent embedings of spots, and set the current label of each spot in S2 as ‘Unidentified’
[ ]:
model = 'INSTINCT'
mode_index = 3
mode_list = ['E11_0', 'E13_5', 'E15_5', 'E18_5']
mode = mode_list[mode_index]
data_dir = '../../data/spMOdata/EpiTran_MouseBrain_Jiang2023/preprocessed/'
save_dir = f'../../results/MouseBrain_Jiang2023/vertical/{mode}/'
slice_name_list = [f'{mode}-S1', f'{mode}-S2']
cas_list = [ad.read_h5ad(save_dir + f'filtered_merged_{sample}_atac.h5ad') for sample in slice_name_list]
cas_list[1].obs['Annotation_for_Combined'] = 'Unidentified'
adata_concat = ad.concat(cas_list, label="slice_name", keys=slice_name_list)
spots_count = [0]
n = 0
for sample in cas_list:
num = sample.shape[0]
n += num
spots_count.append(n)
adata_concat.obsm['latent'] = pd.read_csv(save_dir + f'{model}/{mode}_INSTINCT_embed.csv', header=None).values
for j in range(len(cas_list)):
cas_list[j].obsm['latent'] = adata_concat.obsm['latent'][spots_count[j]:spots_count[j + 1]].copy()
[ ]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import LabelEncoder
def StrLabel2Idx(string_labels):
label_encoder = LabelEncoder()
idx_labels = label_encoder.fit_transform(string_labels)
return np.array(idx_labels)
def knn_label_translation(reference_X, reference_y, target_X, k=20):
label_encoder = LabelEncoder()
reference_y_idx = label_encoder.fit_transform(reference_y)
neigh = KNeighborsClassifier(n_neighbors=k)
neigh.fit(reference_X, reference_y_idx)
target_y_idx = neigh.predict(target_X)
target_y = label_encoder.inverse_transform(target_y_idx)
return target_y
[ ]:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.lines import Line2D
def plot_mousebrain_verti(cas_list, adata_concat, ground_truth_key, annotation_key, cluster_to_color_map,
slice_name_list, cls_list, sp_embedding, mode,
save_root=None, save=False, plot=False):
fig, axs = plt.subplots(1, 2, figsize=(10, 4))
fig.suptitle(f'{mode} Annotation Results', fontsize=14)
real_colors = list(cas_list[0].obs[ground_truth_key].astype('str').map(cluster_to_color_map))
axs[0].scatter(cas_list[0].obsm['spatial'][:, 0], cas_list[0].obsm['spatial'][:, 1], linewidth=0.5, s=50,
marker=".", color=real_colors, alpha=0.9)
axs[0].set_title(f'{slice_name_list[0]} (Ture Labels)', size=12)
axs[0].invert_yaxis()
axs[0].axis('off')
anno_colors = list(cas_list[1].obs[annotation_key].astype('str').map(cluster_to_color_map))
axs[1].scatter(cas_list[1].obsm['spatial'][:, 0], cas_list[1].obsm['spatial'][:, 1], linewidth=0.5, s=50,
marker=".", color=anno_colors, alpha=0.9)
axs[1].set_title(f'{slice_name_list[1]} (Annotation)', size=12)
if mode == 'E13_5':
axs[1].invert_xaxis()
else:
axs[1].invert_yaxis()
axs[1].axis('off')
legend_handles = [
Line2D([0], [0], marker='o', color='w', markersize=8, markerfacecolor=cluster_to_color_map[cluster], label=cluster)
for cluster in cls_list
]
axs[1].legend(
handles=legend_handles,
fontsize=8, title='Spot-types', title_fontsize=10, bbox_to_anchor=(1, 1))
plt.gcf().subplots_adjust(left=0.05, top=0.8, bottom=0.1, right=0.75)
if save:
save_path = save_root + f'annotation_results.pdf'
plt.savefig(save_path)
spots_count = [0]
n = 0
for sample in cas_list:
num = sample.shape[0]
n += num
spots_count.append(n)
n_spots = adata_concat.shape[0]
size = 10000 / n_spots
# order = np.arange(n_spots)
colors_for_slices = [[0.70567316, 0.01555616, 0.15023281],
[0.2298057, 0.70567316, 0.15023281]]
slice_cmap = {slice_name_list[i]: colors_for_slices[i] for i in range(len(slice_name_list))}
colors = list(adata_concat.obs['slice_name'].astype('str').map(slice_cmap))
plt.figure(figsize=(5, 5))
plt.rc('axes', linewidth=1)
plt.scatter(sp_embedding[spots_count[1]:spots_count[2], 0], sp_embedding[spots_count[1]:spots_count[2], 1],
s=size, c=colors[spots_count[1]:spots_count[2]])
plt.scatter(sp_embedding[spots_count[0]:spots_count[1], 0], sp_embedding[spots_count[0]:spots_count[1], 1],
s=size, c=colors[spots_count[0]:spots_count[1]])
plt.tick_params(axis='both', bottom=False, top=False, left=False, right=False,
labelleft=False, labelbottom=False, grid_alpha=0)
legend_handles = [
Line2D([0], [0], marker='o', color='w', markersize=8, markerfacecolor=slice_cmap[slice_name_list[i]],
label=slice_name_list[i])
for i in range(len(slice_name_list))
]
plt.legend(handles=legend_handles, fontsize=8, title='Slices', title_fontsize=10,
loc='upper left')
plt.title(f'{mode} Slices', fontsize=14)
if save:
save_path = save_root + f"slices_umap.pdf"
plt.savefig(save_path)
colors = list(cas_list[0].obs[ground_truth_key].astype('str').map(cluster_to_color_map))
plt.figure(figsize=(5, 5))
plt.rc('axes', linewidth=1)
plt.scatter(sp_embedding[spots_count[1]:spots_count[2], 0], sp_embedding[spots_count[1]:spots_count[2], 1],
s=size, c='gray')
plt.scatter(sp_embedding[spots_count[0]:spots_count[1], 0], sp_embedding[spots_count[0]:spots_count[1], 1],
s=size, c=colors)
plt.tick_params(axis='both', bottom=False, top=False, left=False, right=False,
labelleft=False, labelbottom=False, grid_alpha=0)
plt.title(f'{mode} True Labels', fontsize=14)
if save:
save_path = save_root + f"true_labels_umap.pdf"
plt.savefig(save_path)
colors = list(cas_list[1].obs[annotation_key].astype('str').map(cluster_to_color_map))
plt.figure(figsize=(5, 5))
plt.rc('axes', linewidth=1)
plt.scatter(sp_embedding[spots_count[1]:spots_count[2], 0], sp_embedding[spots_count[1]:spots_count[2], 1],
s=size, c=colors)
plt.tick_params(axis='both', bottom=False, top=False, left=False, right=False,
labelleft=False, labelbottom=False, grid_alpha=0)
plt.title(f'{mode} Annotation', fontsize=14)
if save:
save_path = save_root + f"annotation_umap.pdf"
plt.savefig(save_path)
if plot:
plt.show()
Annotation
[ ]:
# annotation
cas_list[1].obs['predicted_labels'] = knn_label_translation(cas_list[0].obsm['latent'].copy(),
cas_list[0].obs['Annotation_for_Combined'].copy(),
cas_list[1].obsm['latent'].copy(), k=20)
# save the annotated S2 slice
cas_list[1].write(save_dir + f'{model}/annotated_{slice_name_list[1]}_atac.h5ad')
Plot spatial organization and UMAP visualization of annotation results
[ ]:
reducer = UMAP(n_neighbors=30, n_components=2, metric="correlation", n_epochs=None, learning_rate=1.0,
min_dist=0.3, spread=1.0, set_op_mix_ratio=1.0, local_connectivity=1, repulsion_strength=1,
negative_sample_rate=5, a=None, b=None, random_state=1234, metric_kwds=None,
angular_rp_forest=False, verbose=False)
sp_embedding = reducer.fit_transform(adata_concat.obsm['latent'])
cls_list_all = ['Primary_brain_1', 'Primary_brain_2', 'Midbrain', 'Diencephalon_and_hindbrain', 'Basal_plate_of_hindbrain',
'Subpallium_1', 'Subpallium_2', 'Cartilage_1', 'Cartilage_2', 'Cartilage_3', 'Cartilage_4',
'Mesenchyme', 'Muscle', 'Thalamus', 'DPallm', 'DPallv']
colors_for_all = ['red', 'tomato', 'chocolate', 'orange', 'goldenrod',
'b', 'royalblue', 'g', 'limegreen', 'lime', 'springgreen',
'deepskyblue', 'pink', 'fuchsia', 'yellowgreen', 'olivedrab']
cls_list = list(set(list(cas_list[0].obs['Annotation_for_Combined'])))
cls_list_reordered = [cls for cls in cls_list_all if cls in cls_list]
colors_for_clusters = [colors_for_all[i] for i in range(len(colors_for_all)) if cls_list_all[i] in cls_list]
cluster_to_color_map = {cluster: color for cluster, color in zip(cls_list_reordered, colors_for_clusters)}
print(cluster_to_color_map)
plot_mousebrain_verti(cas_list, adata_concat, 'Annotation_for_Combined', 'predicted_labels', cluster_to_color_map,
slice_name_list, cls_list_reordered, sp_embedding, mode,
save_root=save_dir+f'{model}/', save=True, plot=True)