##MethylKit and pvclust R code for generating bootstrapped dendrogram #!/usr/bin/Rscript library("methylKit") library("pvclust") rowSds <- function(x, center=NULL, ...) { n <- !is.na(x); n <- rowSums(n); n[n <= 1] <- NA; if (is.null(center)) { center <- rowMeans(x, ...); } x <- x - center; x <- x*x; x <- rowSums(x, ...); x <- x/(n-1); sqrt(x); } colSds <- function(x, ...) { x <- t(x); rowSds(x, ...); } file.list=list("16H.bsseq.input.txt","16L.bsseq.input.txt","22H.bsseq.input.txt","22L.bsseq.input.txt","5H.bsseq.input.txt","5L.bsseq.input.txt","7H.bsseq.input.txt","7L.bsseq.input.txt","8H.bsseq.input.txt","8L.bsseq.input.txt","9H.bsseq.input.txt","9L.bsseq.input.txt") myobj=methRead( file.list,sample.id=list("16H","16L","22H","22L","5H","5L","7H","7L","8H","8L","9H","9L"),assembly="Dm2.4",treatment=c(1,0,1,0,1,0,1,0,1,0,1,0),context="CpG",pipeline="bismarkCoverage") filtered.myobj=filterByCoverage(myobj,lo.count=10,lo.perc=NULL,hi.count=NULL,hi.perc=99.9) meth=unite(myobj, destrand=FALSE) mat =getData(meth) mat =mat[ rowSums(is.na(mat))==0, ] meth.mat = mat[, meth@numCs.index]/ (mat[,meth@numCs.index] + mat[,meth@numTs.index] ) names(meth.mat)=meth@sample.ids sds=rowSds(as.matrix(meth.mat)) cutoff=quantile(sds,0.5) meth.mat=meth.mat[sds>cutoff,] dist.boot.10000=pvclust(meth.mat, method.dist="cor", method.hclust="ward.D2", nboot=10000, parallel=TRUE) svg(filename="pvclust.10000.wald2.svg") plot(dist.boot.10000) dev.off() pdf("pvclust.10000.wald2.pdf") plot(dist.boot.10000) dev.off() ##Bsseq Rscript: #!/usr/bin/Rscript library("bsseq") meth=read.bismark(files=c("16H.bsseq.input.txt", "22H.bsseq.input.txt", "5H.bsseq.input.txt", "7H.bsseq.input.txt", "8H.bsseq.input.txt", "9H.bsseq.input.txt", "16L.bsseq.input.txt", "22L.bsseq.input.txt", "5L.bsseq.input.txt", "7L.bsseq.input.txt", "8L.bsseq.input.txt", "9L.bsseq.input.txt"),sampleNames=c("16H","22H","5H","7H","8H","9H","16L","22L","5L","7L","8L","9L"),fileType="cov",mc.cores=10,strandCollapse=FALSE) Design <- data.frame(row.names = sampleNames(meth), condition=c("H","H","H","H","H","H","L","L","L","L","L","L"),mother=c("16","22","5","7","8","9","16","22","5","7","8","9")) pData(meth)<-Design BS.meth.smooth<- BSmooth(meth, mc.cores = 60, verbose = TRUE) BS.cov <- getCoverage(BS.meth.smooth) BS.meth <-(getMeth(BS.meth.smooth)) keepLoci.ex <- which(rowSums(BS.cov[, BS.meth.smooth$condition == "H"] >= 6) >= 2 &rowSums(BS.cov[, BS.meth.smooth$condition == "L"] >= 6) >= 2 & Reduce(`&`,as.data.frame(!is.na(BS.meth)))) BS.meth.smooth <- BS.meth.smooth[keepLoci.ex,] summary(getMeth(BS.meth.smooth)) meth.tstat <- BSmooth.tstat(BS.meth.smooth, group1 = c("16H","22H","5H","7H","8H","9H"),group2 = c("16L","22L","5L","7L","8L","9L"),estimate.var = "paired",local.correct=TRUE,verbose = TRUE) #write.csv(as.data.frame(meth.tstat), file="meth.tsat.2.csv") dmrs0 <- dmrFinder(meth.tstat, cutoff = c(-4.6, 4.6)) write.csv(as.data.frame(dmrs0), file="dmrs0.2.filtered.csv") dmrs <- subset(dmrs0, n >= 3 & abs(meanDiff) >= 0.1) write.csv(as.data.frame(dmrs), file="dmrs.filtered.csv") save(meth.tstat,file="meth.tstat.filtered.R") pData <- pData(BS.meth.smooth) pData$col <- rep(c("red", "blue"), each = 3) pData(BS.meth.smooth) <- pData pdf(file = "dmrs_top200.2.pdf", width = 10, height = 5) plotManyRegions(BS.meth.smooth, dmrs[1:200,], extend = 5000,addRegions = dmrs) dev.off() dmrs1 <- dmrFinder(meth.tstat,qcutoff = c(0.01, 0.99)) write.csv(as.data.frame(dmrs0), file="dmrs1.filtered.csv") dmrsDiff <- subset(dmrs1, n >= 3 & abs(meanDiff) >= 0.1) write.csv(as.data.frame(dmrsDiff), file="dmrsDiff.filtered.csv") ##Generate per CpG p-values and convert to q-values library(bsseq) library(qvalue) load("meth.tstat.filtered.R") tstats<-getStats(meth.tstat)[,"tstat.corrected"] p.value <- 2 * pt(-abs(as.vector(tstats)), df = 5) qobj=qvalue(p = p.value) qvals<-qobj$qvalues Values<-cbind(data.frame(meth.tstat@gr),data.frame(meth.tstat@stats),p.value,qvals) write.csv(file="AllPvalues.Nov18.InRevision.csv", Values) ##TopGO code used to identify significantly enriched terms library("topGO") args <- commandArgs(trailingOnly = TRUE) universeFile = args[1] interestingGenesFile = args[2] algorithm = args[3] geneID2GO <- readMappings(file= universeFile) geneNames <- names(geneID2GO) myInterestingGenes <- readLines(interestingGenesFile) geneList <- factor(as.integer(geneNames %in% myInterestingGenes)) names(geneList) <- geneNames GOdataBP <- new("topGOdata", ontology="BP",allGenes=geneList,annot=annFUN.gene2GO, gene2GO=geneID2GO, nodeSize = 5) GOdataMF <- new("topGOdata", ontology="MF",allGenes=geneList,annot=annFUN.gene2GO, gene2GO=geneID2GO, nodeSize = 5) GOdataCC <- new("topGOdata", ontology="CC",allGenes=geneList,annot=annFUN.gene2GO, gene2GO=geneID2GO, nodeSize = 5) resultFisherBP <- runTest(GOdataBP, algorithm = algorithm, statistic = "fisher") resultFisherMF <- runTest(GOdataMF, algorithm = algorithm, statistic = "fisher") resultFisherCC <- runTest(GOdataCC, algorithm = algorithm, statistic = "fisher") mysummaryBP <- summary(attributes(resultFisherBP)$score <= 0.01) as.matrix(mysummaryBP) length(mysummaryBP) #numsignifBP <- as.integer(mysummaryBP[[3]]) mysummaryMF <- summary(attributes(resultFisherMF)$score <= 0.01) as.matrix(mysummaryMF) length(mysummaryMF) #numsignifMF <- as.integer(mysummaryMF[[3]]) mysummaryCC <- summary(attributes(resultFisherCC)$score <= 0.01) matrixCC = as.matrix(mysummaryCC) length(mysummaryCC) length(matrixCC) is.matrix(matrixCC) #mysummaryCC <- as.integer(mysummaryCC[[3]]) if(length(mysummaryCC) == 3) { numsignifCC <- as.integer(mysummaryCC[[3]]) allResCC <- GenTable(GOdataCC, weight01=resultFisherCC, orderBy=algorithm, topNodes = numsignifCC) output_fileCC = paste(interestingGenesFile,algorithm,"CC.txt", sep=".") write.table(allResCC, file=output_fileCC, row.names=FALSE) printGraph(GOdataCC, resultFisherCC, firstSigNodes = numsignifCC, fn.prefix = paste(interestingGenesFile,algorithm,"CC", sep="."), useInfo = "all", pdfSW = FALSE) printGraph(GOdataCC, resultFisherCC, firstSigNodes = numsignifCC, fn.prefix = paste(interestingGenesFile,algorithm,"CC", sep="."), useInfo = "all", pdfSW = TRUE) GO.CC<-allResCC$GO.ID mygenes.CC <- genesInTerm(GOdataCC,GO.CC) GOTab.CC<-NULL for (i in 1:length(GO.CC)) { myterm <- GO.CC[i] mygenesforterm <- mygenes.CC[myterm][[1]] mygenesforterm <- paste(mygenesforterm, collapse=',') output_fileGO.CC = paste(interestingGenesFile,algorithm,"CC.GO2genes.txt", sep=".") GOTab.CC <-rbind(GOTab.CC,paste(myterm,mygenesforterm, collapse=',')) } write.table(GOTab.CC,file= output_fileGO.CC, row.names=FALSE,col.names=FALSE) } else { print ("TRUE in mysummaryCC does not exist!") } if(length(mysummaryBP) == 3) { numsignifBP <- as.integer(mysummaryBP[[3]]) allResBP <- GenTable(GOdataBP, weight01=resultFisherBP, orderBy=algorithm, topNodes = numsignifBP) output_fileBP = paste(interestingGenesFile,algorithm,"BP.txt", sep=".") write.table(allResBP, file=output_fileBP, row.names=FALSE) printGraph(GOdataBP, resultFisherBP, firstSigNodes = numsignifBP, fn.prefix = paste(interestingGenesFile,algorithm,"BP", sep="."), useInfo = "all", pdfSW = FALSE) printGraph(GOdataBP, resultFisherBP, firstSigNodes = numsignifBP, fn.prefix = paste(interestingGenesFile,algorithm,"BP", sep="."), useInfo = "all", pdfSW = TRUE) GO.BP<-allResBP$GO.ID mygenes.BP <- genesInTerm(GOdataBP,GO.BP) GOTab.BP<-NULL for (i in 1:length(GO.BP)) { myterm <- GO.BP[i] mygenesforterm <- mygenes.BP[myterm][[1]] mygenesforterm <- paste(mygenesforterm, collapse=',') output_fileGO.BP = paste(interestingGenesFile,algorithm,"BP.GO2genes.txt", sep=".") GOTab.BP <-rbind(GOTab.BP,paste(myterm,mygenesforterm, collapse=',')) } write.table(GOTab.BP,file=output_fileGO.BP, row.names=FALSE,col.names=FALSE) } else { print ("TRUE in mysummaryBP does not exist!") } if(length(mysummaryMF) == 3) { numsignifMF <- as.integer(mysummaryMF[[3]]) allResMF <- GenTable(GOdataMF, weight01=resultFisherMF, orderBy=algorithm, topNodes = numsignifMF) output_fileMF = paste(interestingGenesFile,algorithm,"MF.txt", sep=".") write.table(allResMF, file=output_fileMF, row.names=FALSE) printGraph(GOdataMF, resultFisherMF, firstSigNodes = numsignifMF, fn.prefix = paste(interestingGenesFile,algorithm,"MF", sep="."), useInfo = "all", pdfSW = FALSE) printGraph(GOdataMF, resultFisherMF, firstSigNodes = numsignifMF, fn.prefix = paste(interestingGenesFile,algorithm,"MF", sep="."), useInfo = "all", pdfSW = TRUE) GO.MF<-allResMF$GO.ID mygenes.MF <- genesInTerm(GOdataMF,GO.MF) GOTab.MF<-NULL for (i in 1:length(GO.MF)) { myterm <- GO.MF[i] mygenesforterm <- mygenes.MF[myterm][[1]] mygenesforterm <- paste(mygenesforterm, collapse=',') output_fileGO.MF = paste(interestingGenesFile,algorithm,"MF.GO2genes.txt", sep=".") GOTab.MF <-rbind(GOTab.MF,paste(myterm,mygenesforterm, collapse=',')) } write.table(GOTab.MF,file= output_fileGO.MF, row.names=FALSE,col.names=FALSE) } else { print ("TRUE in mysummaryMF does not exist!") }