{ "cells": [ { "cell_type": "markdown", "id": "3fdddf5c-d6f7-45e0-83ff-c9d0d98a4cac", "metadata": {}, "source": [ "# Partitioned heritability analysis\n", "Utilize the SNPs found in each set of spot-type-specific peaks and the set of background peaks to perform partitioned heritability analysis." ] }, { "cell_type": "code", "execution_count": null, "id": "a6132db9-c494-49bd-ab49-030bfbcc2cda", "metadata": {}, "outputs": [], "source": [ "options(warn=-1)\n", "library(scABC)\n", "library(chromVAR)\n", "library(motifmatchr)\n", "library(SummarizedExperiment)\n", "library(Matrix)\n", "# library(ggplot2)\n", "library(BiocParallel)\n", "library(BSgenome.Hsapiens.UCSC.hg19)\n", "library(JASPAR2016)\n", "library(data.table)\n", "library(parallel)\n", "set.seed(1234)" ] }, { "cell_type": "markdown", "id": "6353395c-4ebc-45fd-af26-e627166b73d1", "metadata": {}, "source": [ "### Untar the downloaded reference data\n", "The reference data can be downloaded [here](https://doi.org/10.5281/zenodo.7768714)." ] }, { "cell_type": "code", "execution_count": null, "id": "e37ff440-e7c1-4670-ae19-b0a0e2335957", "metadata": {}, "outputs": [], "source": [ "archive_file <- \"C:/University/FINAL/data/SNP_files/1000G_Phase3_plinkfiles.tgz\"\n", "extract_dir <- \"C:/University/FINAL/data/SNP_files/\"\n", "untar(archive_file, exdir = extract_dir)" ] }, { "cell_type": "code", "execution_count": null, "id": "68ec56cb-fa8e-4fee-9876-72052175f527", "metadata": {}, "outputs": [], "source": [ "# find and return indices for overlaps\n", "find.index <- function(df1,df2,type='reg'){\n", " #colnames(df1) <- colnames(df2) <- c('V1','V2','V3')\n", " library(GenomicRanges)\n", " df1.gr = GRanges (IRanges(start = df1$V2, end = df1$V3), seqnames=df1$V1)\n", " if(type=='reg'){\n", " df2.gr = GRanges(IRanges(start=df2$V2, end = df2$V3), seqnames = df2$V1)\n", " }\n", " if(type=='pos'){\n", " df2.gr = GRanges(IRanges(start=df2$V4, end = df2$V4), seqnames = df2$V1)\n", " }\n", " df1.git = GNCList(df1.gr)\n", " df2.git = GNCList(df2.gr)\n", " overlap_git = findOverlaps(df2.git, df1.git)\n", " overlap_git\n", " temp <- as.data.frame(overlap_git)\n", " colnames(temp) <- c('df2','df1')\n", " return(temp)\n", "}" ] }, { "cell_type": "markdown", "id": "d6d6ebad-c0bc-4c6f-982e-a5e820d292f3", "metadata": {}, "source": [ "### Find SNPs for each set of specific peaks\n", "The peaks should be first map to GRCh37/hg19 from mm10 using [LiftOver](http://genome.ucsc.edu/) and named accordingly." ] }, { "cell_type": "code", "execution_count": null, "id": "c71a87c7-0f6c-489f-80c9-cf5092f9d73c", "metadata": {}, "outputs": [], "source": [ "mode_list <- c('E11_0', 'E13_5', 'E15_5', 'E18_5')\n", "mode_index <- 4\n", "mode = mode_list[mode_index]\n", "\n", "\n", "# Load data\n", "data_path <- \"C:/University/FINAL/data/SNP_files/\"\n", "for(chr in 1:22){\n", " if(chr==1){\n", " # https://doi.org/10.5281/zenodo.7768714 -> 1000G_Phase3_plinkfiles.tgz\n", " bim <- fread(paste0(data_path, '1000G_EUR_Phase3_plink/1000G.EUR.QC.1.bim'))\n", " }else{\n", " bim <- rbind(bim,fread(paste0(data_path, '1000G_EUR_Phase3_plink/1000G.EUR.QC.',chr,'.bim')))\n", " }\n", "} ## 1000G bim file\n", "bim$V1 <- paste0('chr',bim$V1)\n", "\n", "\n", "bg_mode <- 'all'\n", "\n", "inputpath <- paste0(\"C:/University/FINAL/results/MouseBrain_Jiang2023/vertical/\", mode, '/INSTINCT/GRCh37hg19/S2/')\n", "outputpath <- paste0(\"C:/University/FINAL/results/MouseBrain_Jiang2023/vertical/\", mode, '/INSTINCT/SNPs_2/S2_', bg_mode, '/')\n", "if (!dir.exists(outputpath)) {\n", " dir.create(outputpath, recursive = TRUE)\n", "}\n", "\n", "\n", "bg_name <- paste0('bg_', bg_mode)\n", "bed_file <- fread(paste0(inputpath, bg_name, '.bed'), header = FALSE, sep = \"\\t\", quote = \"\")\n", "chromatin <- sub(\":.*\", \"\", bed_file$V1)\n", "positions <- gsub(\".*:\", \"\", bed_file$V1)\n", "start <- as.numeric(gsub(\"-.*\", \"\", positions))\n", "end <- as.numeric(gsub(\".*-\", \"\", positions))\n", "bg1 <- data.frame(V1 = chromatin, V2 = start, V3 = end)\n", "ind.bg <- find.index(bg1,bim,type='pos')\n", "snpset <- list()\n", "snpset[[1]] <- bim$V2[ind.bg$df2]\n", "\n", "\n", "## 2. specific annotations\n", "peak <- list()\n", "name <- c('Midbrain', 'Diencephalon_and_hindbrain', 'Subpallium_1', 'Subpallium_2', 'Cartilage_1', 'Cartilage_2', \n", " 'Cartilage_3', 'Cartilage_4', 'Mesenchyme', 'Muscle', 'Thalamus', 'DPallm', 'DPallv')\n", "for(k in 1:length(name)){\n", " print(name[k])\n", " bed_file <- fread(paste0(inputpath,name[k],'.bed'), header = FALSE, sep = \"\\t\", quote = \"\")\n", " chromatin <- sub(\":.*\", \"\", bed_file$V1)\n", " positions <- gsub(\".*:\", \"\", bed_file$V1)\n", " start <- as.numeric(gsub(\"-.*\", \"\", positions))\n", " end <- as.numeric(gsub(\".*-\", \"\", positions))\n", " peak[[k]] <- data.frame(V1 = chromatin, V2 = start, V3 = end)\n", " ind.temp <- find.index(peak[[k]],bim,type='pos')\n", " snpset[[k+1]] <- bim$V2[ind.temp$df2]\n", "}\n", "\n", "\n", "for(chr in 1:22){\n", " print(chr)\n", " bim2 <- fread(paste0(data_path, '1000G_EUR_Phase3_plink/1000G.EUR.QC.',chr,'.bim')) ## 1000G bim file for each chromosome\n", " for(j in 1:(length(name)+1)){\n", " index <- which(bim2$V2%in%snpset[[j]])\n", " anno <- rep(0,nrow(bim2))\n", " anno[index] <- 1\n", " if(j==1){\n", " anno1 <- cbind(rep(1,nrow(bim2)),anno)\n", " }else{\n", " anno1 <- cbind(anno1,anno)\n", " }\n", " }\n", " colnames(anno1) <- c('base',bg_name,name)\n", " write.table(anno1,paste0(outputpath,'bg.mm',chr,'.annot'),quote=F,row.names=F,col.names=T,sep='\\t')\n", "}" ] }, { "cell_type": "markdown", "id": "59ab4899-1406-4baa-be25-fb11eafd0d10", "metadata": {}, "source": [ "### Perform partitioned heritability analysis using [LDSC](http://www.github.com/bulik/ldsc)" ] }, { "cell_type": "code", "execution_count": null, "id": "a37ff671-679e-49ed-9995-d851b301d6d9", "metadata": {}, "outputs": [], "source": [ "# shell\n", "for i in {1..22}\n", "do /home/liuyy/anaconda3/envs/ldsc/bin/python2 /home/liuyy/Final/ldsc/ldsc.py --l2 --bfile /home/liuyy/Final/data/SNP_files/1000G_EUR_Phase3_plink/1000G.EUR.QC.$i --ld-wind-cm 1 --annot /home/liuyy/Final/results/MouseBrain_Jiang2023/vertical/E18_5/INSTINCT/SNPs_2/S2_all/bg.mm$i.annot --thin-annot --out /home/liuyy/Final/results/MouseBrain_Jiang2023/vertical/E18_5/INSTINCT/SNPs_2/S2_all/bg.mm$i \n", "done" ] }, { "cell_type": "code", "execution_count": null, "id": "1d88e4a8-ab73-4cae-859c-ac9eb4d03b25", "metadata": {}, "outputs": [], "source": [ "for name in `ls /home/liuyy/Final/data/SNP_files/sumstats/*.gz`;\n", "do \n", "echo ${name}\n", "mkdir /home/liuyy/Final/results/MouseBrain_Jiang2023/vertical/E18_5/INSTINCT/herit/S2_all/${name#*sumstats/}\n", "/home/liuyy/anaconda3/envs/ldsc/bin/python2 /home/liuyy/Final/ldsc/ldsc.py --h2 ${name} --ref-ld-chr /home/liuyy/Final/results/MouseBrain_Jiang2023/vertical/E18_5/INSTINCT/SNPs_2/S2_all/bg.mm --w-ld-chr /home/liuyy/Final/data/SNP_files/1000G_Phase3_weights_hm3_no_MHC/weights.hm3_noMHC. --overlap-annot --not-M-5-50 --out /home/liuyy/Final/results/MouseBrain_Jiang2023/vertical/E18_5/INSTINCT/herit/S2_all/${name#*sumstats/}/res\n", "done" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.5" } }, "nbformat": 4, "nbformat_minor": 5 }