Find DEGs
[ ]:
import os
import anndata as ad
import scanpy as sc
import pandas as pd
import warnings
warnings.filterwarnings("ignore")
import matplotlib as mpl
mpl.rcParams['pdf.fonttype'] = 42
mpl.rcParams['ps.fonttype'] = 42
import warnings
warnings.filterwarnings("ignore")
[ ]:
save = True
slice_name_list = ["GSM6204623_MouseBrain_20um", "GSM6758284_MouseBrain_20um_repATAC", "GSM6758285_MouseBrain_20um_100barcodes_ATAC"]
rna_slice_name_list = ["GSM6204636_MouseBrain_20um", "GSM6753041_MouseBrain_20um_repATAC", "GSM6753043_MouseBrain_20um_100barcodes_ATAC"]
label_list = ["GSM6204623", "GSM6758284", "GSM6758285"]
slice_index_list = list(range(len(slice_name_list)))
data_dir = '../../data/spMOdata/EpiTran_HumanMouse_Zhang2023/preprocessed_from_fragments/'
save_dir = f'../../results/HumanMouse_Zhang2023/mb/'
method = 'leiden'
if not os.path.exists(save_dir + f'DEGs_concat/'):
os.makedirs(save_dir + f'DEGs_concat/')
for i in range(len(slice_name_list)):
if not os.path.exists(save_dir + f'DEGs_slice{i}/'):
os.makedirs(save_dir + f'DEGs_slice{i}/')
Load the spATAC-seq data and the corresponding SRT data
[ ]:
# read the filtered and clustered CAS data
cas_list = [ad.read_h5ad(save_dir + f'clustered_{sample}.h5ad') for sample in slice_name_list]
# read the raw RNA data
rna_list = [ad.read_h5ad(data_dir + f'{sample}.h5ad') for sample in rna_slice_name_list]
for j in range(len(rna_list)):
rna_list[j].obs_names = [x + '-1_' + slice_name_list[j] for x in rna_list[j].obs_names]
print(rna_list[j].shape)
# filter and reorder spots in rna slices
for i in range(len(slice_name_list)):
obs_list = [obs_name for obs_name in cas_list[i].obs_names if obs_name in rna_list[i].obs_names]
cas_list[i] = cas_list[i][obs_list, :]
rna_list[i] = rna_list[i][obs_list, :]
# transfer the cluster labels from cas slices to rna slice
rna_list[i].obs[method] = cas_list[i].obs[method].copy()
print(rna_list[i].shape)
Normalization
[ ]:
# normalization
for i in range(len(slice_name_list)):
sc.pp.normalize_total(rna_list[i], target_sum=1e4)
sc.pp.log1p(rna_list[i])
rna_list[i].var_names_make_unique()
# concatenate the rna slices
rna_concat = ad.concat(rna_list, label='slice_name', keys=slice_name_list)
print(rna_concat.shape)
Find DEGs
[ ]:
# find DEGs
group_list = list(set(rna_concat.obs[method]))
print(group_list)
rna_concat_degs_list = []
sc.tl.rank_genes_groups(rna_concat, method, groups=group_list, method='wilcoxon')
rna_concat_genes = pd.DataFrame(rna_concat.uns["rank_genes_groups"]["names"])
rna_concat_logfoldchanges = pd.DataFrame(rna_concat.uns["rank_genes_groups"]["logfoldchanges"])
rna_concat_pvals_adj = pd.DataFrame(rna_concat.uns["rank_genes_groups"]["pvals_adj"])
for col in list(rna_concat_genes.columns):
concat_genes = rna_concat_genes[col].tolist()
concat_logfoldchanges = rna_concat_logfoldchanges[col].tolist()
concat_pvals_adj = rna_concat_pvals_adj[col].tolist()
concat_degs_list = [concat_genes[i] for i in range(len(concat_genes)) if concat_logfoldchanges[i] > 1 and concat_pvals_adj[i] < 0.01]
rna_concat_degs_list.append(concat_degs_list)
# save DEGs
if save:
if not concat_degs_list:
with open(save_dir + f'DEGs_concat/{col}_DEGs.txt', 'w') as f:
pass
else:
with open(save_dir + f'DEGs_concat/{col}_DEGs.txt', 'w') as f:
# f.write(','.join(concat_degs_list))
for item in concat_degs_list:
f.write(item + '\n')
print(f"Label: {col}, Number of DEGs: {len(concat_degs_list)}")
# find DEGs slice specific
for i in range(len(slice_name_list)):
group_list = list(set(rna_list[i].obs[method]))
print(group_list)
rna_degs_list = []
sc.tl.rank_genes_groups(rna_list[i], method, groups=group_list, method='wilcoxon')
rna_genes = pd.DataFrame(rna_list[i].uns["rank_genes_groups"]["names"])
rna_logfoldchanges = pd.DataFrame(rna_list[i].uns["rank_genes_groups"]["logfoldchanges"])
rna_pvals_adj = pd.DataFrame(rna_list[i].uns["rank_genes_groups"]["pvals_adj"])
for col in list(rna_genes.columns):
genes = rna_genes[col].tolist()
logfoldchanges = rna_logfoldchanges[col].tolist()
pvals_adj = rna_pvals_adj[col].tolist()
degs_list = [genes[j] for j in range(len(genes)) if logfoldchanges[j] > 1 and pvals_adj[j] < 0.05]
rna_degs_list.append(degs_list)
# save DEGs
if save:
if not degs_list:
with open(save_dir + f'DEGs_slice{i}/{col}_DEGs.txt', 'w') as f:
pass
else:
with open(save_dir + f'DEGs_slice{i}/{col}_DEGs.txt', 'w') as f:
# f.write(','.join(degs_list))
for item in degs_list:
f.write(item + '\n')
print(f"Label: {col}, Number of DEGs: {len(degs_list)}")
Plot DEGs
[ ]:
import numpy as np
import matplotlib.pyplot as plt
save = True
slice_name_list = ["GSM6204623_MouseBrain_20um", "GSM6758284_MouseBrain_20um_repATAC", "GSM6758285_MouseBrain_20um_100barcodes_ATAC"]
rna_slice_name_list = ["GSM6204636_MouseBrain_20um", "GSM6753041_MouseBrain_20um_repATAC", "GSM6753043_MouseBrain_20um_100barcodes_ATAC"]
label_list = ["GSM6204623", "GSM6758284", "GSM6758285"]
slice_index_list = list(range(len(slice_name_list)))
data_dir = '../../data/spMOdata/EpiTran_HumanMouse_Zhang2023/preprocessed_from_fragments/'
save_dir = f'../../results/HumanMouse_Zhang2023/mb/'
marker_list = ['Rgs9', 'Pde10a', 'Gng7', 'Bcl11b', 'Foxp1',
'Plp1', 'Mbp', 'Tspan2',
'Dgkg',
'Sox4', 'Dlx1', # 'Zbtb20',
'Isl1', 'Rreb1',
'Mef2c',
]
marker = 'Plp1'
if not os.path.exists(save_dir + f'{marker}/') and save:
os.makedirs(save_dir + f'{marker}/')
[ ]:
# read the filtered and clustered CAS data
cas_list = [ad.read_h5ad(save_dir + f'clustered_{sample}.h5ad') for sample in slice_name_list]
cas_concat = ad.concat(cas_list, label='slice_name', keys=slice_name_list)
cas_concat.obsm['X_pca'] = np.load(save_dir + f'input_matrix.npy')
cas_concat.obsm['INSTINCT_latent'] = pd.read_csv(save_dir + f'INSTINCT_embed.csv', header=None).values
cas_concat.obsm['raw_emb'] = pd.read_csv(save_dir + f'sp_embeddings_raw.csv', header=None).values
cas_concat.obsm['inte_emb'] = pd.read_csv(save_dir + f'sp_embeddings_integrated.csv', header=None).values
# read the raw RNA data
rna_list = [ad.read_h5ad(data_dir + f'{sample}.h5ad') for sample in rna_slice_name_list]
for j in range(len(rna_list)):
rna_list[j].obs_names = [x + '-1_' + slice_name_list[j] for x in rna_list[j].obs_names]
print(rna_list[j].shape)
# filter and reorder spots in rna slices
for i in range(len(slice_name_list)):
obs_list = [obs_name for obs_name in cas_list[i].obs_names if obs_name in rna_list[i].obs_names]
cas_list[i] = cas_list[i][obs_list, :]
rna_list[i] = rna_list[i][obs_list, :]
print(rna_list[i].shape)
# concatenate the rna slices
rna_concat = ad.concat(rna_list, label='slice_name', keys=slice_name_list)
sc.pp.filter_genes(rna_concat, min_cells=500)
sc.pp.normalize_total(rna_concat, target_sum=1e4)
sc.pp.log1p(rna_concat)
print(rna_concat.shape)
cas_concat = cas_concat[rna_concat.obs_names, :]
rna_concat.obsm['X_pca'] = cas_concat.obsm['X_pca']
rna_concat.obsm['INSTINCT_latent'] = cas_concat.obsm['INSTINCT_latent']
rna_concat.obsm['raw_emb'] = cas_concat.obsm['raw_emb']
rna_concat.obsm['inte_emb'] = cas_concat.obsm['inte_emb']
data_column = rna_concat[:, marker].copy()
# raw
sp_embedding = rna_concat.obsm['raw_emb'].copy()
n_spots = rna_concat.shape[0]
size = 10000 / n_spots
order = np.arange(n_spots)
plt.figure(figsize=(6, 5))
plt.scatter(sp_embedding[order, 0], sp_embedding[order, 1], s=size, c=data_column.X.toarray(), cmap='viridis')
plt.tick_params(axis='both', bottom=False, top=False, left=False, right=False,
labelleft=False, labelbottom=False, grid_alpha=0)
plt.colorbar(label='Color')
plt.title(f'Raw', fontsize=16)
if save:
save_path = save_dir + f"{marker}/{marker}_raw_umap.pdf"
plt.savefig(save_path)
# integrated
sp_embedding = rna_concat.obsm['inte_emb'].copy()
n_spots = rna_concat.shape[0]
size = 10000 / n_spots
order = np.arange(n_spots)
plt.figure(figsize=(6, 5))
plt.scatter(sp_embedding[order, 0], sp_embedding[order, 1], s=size, c=data_column.X.toarray(), cmap='viridis')
plt.tick_params(axis='both', bottom=False, top=False, left=False, right=False,
labelleft=False, labelbottom=False, grid_alpha=0)
plt.colorbar(label='Color')
plt.title(f'Integrated', fontsize=16)
if save:
save_path = save_dir + f"{marker}/{marker}_integrated_umap.pdf"
plt.savefig(save_path)
scalar_mappable = plt.cm.ScalarMappable(cmap='viridis')
colors = scalar_mappable.to_rgba(data_column.X.toarray())
spots_count = [0]
n = 0
for sample in rna_list:
num = sample.shape[0]
n += num
spots_count.append(n)
# fig, axs = plt.subplots(1, 2, figsize=(8, 4))
fig, axs = plt.subplots(1, 3, figsize=(12, 4))
for i in range(len(rna_list)):
cluster_colors = list(colors[spots_count[i]: spots_count[i+1]])
if i == 2:
s = 10
else:
s = 40
axs[i].scatter(rna_list[i].obsm['spatial'][:, 1], rna_list[i].obsm['spatial'][:, 0], linewidth=1, s=s,
marker=".", color=cluster_colors, alpha=0.9)
if i != 1:
axs[i].invert_yaxis()
if i != 0:
axs[i].invert_xaxis()
axs[i].set_title(f'{label_list[i]}', size=12)
axs[i].axis('off')
# plt.colorbar(label='Color')
plt.gcf().subplots_adjust(left=0.05, top=0.8, bottom=0.05, right=0.90)
if save:
save_path = save_dir + f'{marker}/{marker}_slices.pdf'
plt.savefig(save_path)
plt.show()