########################################
## bash
# set scrolling of terminal to unlimited

########################################


########################################
############### R BASICS ###############

# assignments ("<-" or "=")
# vector
# variables (data$counts)
# dim()
# head(,param)
# sessionInfo()
# ?rtfm

########################################
########################################

source("http://www.bioconductor.org/biocLite.R")
biocLite("pheatmap")


library("edgeR")
library("DESeq2")
library("Rsubread")
library("GenomicAlignments")
library("BiocParallel")
library("pheatmap")
library("RColorBrewer")


########################################
## bash
# check number of CPUs
less /proc/cpuinfo
# check memory
less /proc/meminfo

########################################

loadDir<-"/seq/common/data/Course_BioTopIVB/bamtest/"
saveDir<-"/home/phpeters/biotopCourse/"


alignFile <- "/seq/common/data/Course_BioTopIVB/bamtest/Tni_noninf_01.bam"
alignFile <- paste0(loadDir,"Tni_noninf_01.bam")
alignFiles <- c(paste0(loadDir,"Tni_noninf_01.bam"))
annotFile <- paste0(loadDir,"Tni_transcripts.gtf")
outFile <- paste0(saveDir,"counts.txt")
file.exists(alignFile)
file.exists(annotFile)
file.exists(outFile)

########################################
## bash

# check the sam file (PG -> bowtie-command)
less /seq/common/data/Course_BioTopIVB/bamtest/Tni_transcripts.gtf
# check the gtf file (transcripts, gene_id)
less /seq/common/data/Course_BioTopIVB/bamtest/Tni_transcripts.gtf

########################################

# featureCount
# rtfm
# https://www.rdocumentation.org/packages/Rsubread/versions/1.22.2/topics/featureCounts

?featureCounts

features<-featureCounts(alignFile,annot.ext=annotFile,isGTFAnnotationFile = TRUE,GTF.featureType = "transcript", GTF.attrType = "gene_id", isPairedEnd=FALSE,strandSpecific=1,nthreads=16);
# Running time : 0.11 minutes

## get some information
summary(features)
head(features$counts,3)
colnames(features$counts)<-"noninf1"
head(features$counts,3)

# do it for all
alignFiles<-c(paste0(loadDir,"Tni_noninf_01.bam"),paste0(loadDir,"Tni_noninf_02.bam"),paste0(loadDir,"Tni_noninf_03.bam"),paste0(loadDir,"Tni_noninf_04.bam"),paste0(loadDir,"Tni_inf_01.bam"),paste0(loadDir,"Tni_inf_02.bam"),paste0(loadDir,"Tni_inf_03.bam"),paste0(loadDir,"Tni_inf_04.bam"))
file.exists(alignFiles)

features<-featureCounts(alignFiles,annot.ext=annotFile,isGTFAnnotationFile = TRUE,GTF.featureType = "transcript", GTF.attrType = "gene_id", isPairedEnd=FALSE,strandSpecific=1,nthreads=16);

colnames(features$counts)<-c("noninf1","noninf2","noninf3","noninf4","inf1","inf2","inf3","inf4")

summary(features$counts)
head(features$counts)

# access only one column
head(features$counts[,"noninf1"],3)
head(features$counts[,1],3)
# access only one row
features$counts[1,]
features$counts["GBKU01000001.1",]

# save results
outFile=paste0(saveDir,"countsAndLength.txt")
write.table(x=data.frame(features$annotation[,c("GeneID","Length")],features$counts,stringsAsFactors=FALSE),file=outFile,quote=FALSE,sep="\t",row.names=FALSE)

outFile=paste0(saveDir,"counts.txt")
write.table(x=data.frame(features$annotation[,c("GeneID")],features$counts,stringsAsFactors=FALSE),file=outFile,quote=FALSE,sep="\t",row.names=FALSE)

####################################################
#bash:
nano saveDir,"counts.txt"

####################################################
####################################################

## Normalisation
# TPM 
rate <-features$counts/features$annotation[,c("Length")]
denom <- sum(rate)/1000000
tpm<-rate/denom
head(tpm)

#RPKM
rate2 <-features$counts/(features$annotation[,c("Length")]/1000)
denom2 <- sum(features$counts)/1000000
rpkm<-rate2/denom2
head(rpkm)

# conversion RPKM to TPM
tpmtest<-rpkm/(sum(rpkm)/1000000)
head(tpmtest)


# check sample differences I
# heatmap
sampleDists <- dist(t(tpm))
sampleDistMatrix <- as.matrix(sampleDists)
pheatmap(sampleDistMatrix,clustering_distance_rows=sampleDists,clustering_distance_cols=sampleDists)
colors <- colorRampPalette( rev(brewer.pal(9, "Blues")) )(255)
pheatmap(sampleDistMatrix,clustering_distance_rows=sampleDists,clustering_distance_cols=sampleDists,col=colors)

# save plot
pdf(file=paste0("heatmap.pdf"));
pheatmap(sampleDistMatrix,clustering_distance_rows=sampleDists,clustering_distance_cols=sampleDists,col=colors)
dev.off()


# check sample differences II

pairs(~tpm[,1]+tpm[,2]+tpm[,3]+tpm[,4]+tpm[,5]+tpm[,6]+tpm[,7]+tpm[,8],labels=colnames(features$counts))

# correlation
cor(tpm[,1],tpm[,2])

panel.cor <- function(x, y, digits=2, prefix="", cex.cor){
usr <- par("usr"); on.exit(par(usr))
par(usr = c(0, 1, 0, 1))
r <- abs(cor(x, y))
txt <- format(c(r, 0.123456789), digits=digits)[1]
txt <- paste(prefix, txt, sep="")
if(missing(cex.cor)) cex <- 0.6/strwidth(txt)
text(0.5, 0.5, txt, cex = cex)
}
pairs(~tpm[,1]+tpm[,2]+tpm[,3]+tpm[,4]+tpm[,5]+tpm[,6]+tpm[,7]+tpm[,8],lower.panel=panel.smooth, upper.panel=panel.cor,labels=colnames(features$counts))


####################################################
####################################################

## Differential Expression Analysis

# read in the reads
countFile <- paste0(saveDir,"counts.txt")
rawcountdata <- read.table(countFile, header=T, row.names=1)

# take only the good samples
countData<-rawcountdata[,c(1:3,5:7)]


####################################################
# bash
# set up the csv file (overview over the experiments)


####################################################

csvFile<-paste0(saveDir,"Tni_experiments.csv")
(sampleTable <- read.csv(csvFile,row.names=1))

names(countData)<-rownames(sampleTable)
(group<-factor(sampleTable[,2]))

# create edgeR object

countsDGE<-DGEList(counts=as.matrix(countData),group=group)
countsDGE$samples$group <- relevel(countsDGE$samples$group,ref="noninf")
countsDGE
colSums(countsDGE$counts)


#normalizing by library size
countsDGE <- calcNormFactors(countsDGE)
countsDGE$samples

plotMDS(countsDGE, method="bcv", col=as.numeric(countsDGE$samples$group));
legend("right", as.character(unique(countsDGE$samples$group)), col=1:3, pch=20)

# estimate dispersion
countsDGE_1 <- estimateCommonDisp(countsDGE, verbose=T)
# For routine differential expresion analysis, we use empirical Bayes tagwise dispersions. Note that common dispersion needs to be estimated before estimating tagwise disp
countsDGE_1 <- estimateTagwiseDisp(countsDGE_1)
# plotBCV() plots the tagwise biological coefficient of variation (square root of dispersions) against log2-CPM.
plotBCV(countsDGE_1)
plotMeanVar(countsDGE_1, show.tagwise.vars=TRUE, NBline=TRUE,main="common")

# set p-value parameter
ALPHA=0.05

# test
countsDGE_1ET <- exactTest(countsDGE_1, pair=c(1,2), dispersion="tagwise")
countsDGE_1ET

countsDGE_1ET_Dec <- decideTestsDGE(countsDGE_1ET, adjust.method="BH", p.value=ALPHA)
summary(countsDGE_1ET_Dec)

topTags(countsDGE_1ET, n=10)
dim(topTags(countsDGE_1ET, n=Inf,p.value=ALPHA))


countsDGE_1tags <- rownames(countsDGE_1)[as.logical(countsDGE_1ET_Dec)] 
plotSmear(countsDGE_1ET, de.tags=countsDGE_1tags); abline(h = c(-2, 2), col = "blue")


# and fit the model
design <- model.matrix(~ 0 + countsDGE$samples$group)
colnames(design) <- levels(countsDGE$samples$group)
rownames(design) <- colnames(countsDGE)
countsDGE_2 <- estimateGLMCommonDisp(countsDGE,design)
countsDGE_2 <- estimateGLMTrendedDisp(countsDGE_2,design) #, method="power")
# You can change method to "auto", "bin.spline", "power", "spline", "bin.loess".# The default is "auto" which chooses "bin.spline" when > 200 tags and "power" otherwise.
countsDGE_2 <- estimateGLMTagwiseDisp(countsDGE_2,design)
plotBCV(countsDGE_2)
plotMeanVar(countsDGE_2, show.tagwise.vars=TRUE, NBline=TRUE,main="GLM")


#Differential Expression
# GLM
countsDGE_2FIT <- glmFit(countsDGE_2, design)
# compare (group 1 - group 2) to 0:
# this is equivalent to comparing group 1 to group 2
countsDGE_2FITLRT <- glmLRT(countsDGE_2FIT,contrast=c(-1,1))
topTags(countsDGE_2FITLRT, n=10)
dim(topTags(countsDGE_2FITLRT, n=Inf,p.value=ALPHA))
countsDGE_2FITLRT_Dec <- decideTestsDGE(countsDGE_2FITLRT, adjust.method="BH", p.value = ALPHA)
summary(countsDGE_2FITLRT_Dec)

countsDGE_2FITLRTtags <- rownames(countsDGE_2)[as.logical(countsDGE_2FITLRT_Dec)]
plotSmear(countsDGE_2FITLRT, de.tags=countsDGE_2FITLRTtags); abline(h = c(-2, 2), col = "blue")


# DESeq2

# put the countdata (with infos from above!) into a matrix and collect the column data
countData_HostNoninf_vs_HostPACE<-as.matrix(countdata)
(colData_HostNoninf_vs_HostPACE <- DataFrame(sampleTable))

# create DESeq object
dds_HostNoninf_vs_HostPACE <- DESeqDataSetFromMatrix(countData=countData_HostNoninf_vs_HostPACE,colData=colData_HostNoninf_vs_HostPACE,design= ~ treat)
# set reference level
dds_HostNoninf_vs_HostPACE$treat<-relevel(dds_HostNoninf_vs_HostPACE$treat, ref=reffi)
dds_HostNoninf_vs_HostPACE
# make DESeq analysis
dds_HostNoninf_vs_HostPACE_analysed <- DESeq(dds_HostNoninf_vs_HostPACE)
# get the results
res_dds_HostNoninf_vs_HostPACE_analysed <- results(dds_HostNoninf_vs_HostPACE_analysed)

resOrdered_dds_HostNoninf_vs_HostPACE_analysed <- res_dds_HostNoninf_vs_HostPACE_analysed[order(res_dds_HostNoninf_vs_HostPACE_analysed$padj),]
# get significant and non significant genes and write them to files
sig_resOrdered_dds_HostNoninf_vs_HostPACE_analysed <- subset(resOrdered_dds_HostNoninf_vs_HostPACE_analysed, padj<ALPHA)

sig_resOrdered_dds_HostNoninf_vs_HostPACE_analysed

# print summary and plotMA the results
summary(resOrdered_dds_HostNoninf_vs_HostPACE_analysed,alpha=ALPHA)
#pdf(file='/project/explusi/phpeters/GEA/LFCRegulatedTni_24.pdf');
#png(filename='/project/explusi/phpeters/GEA/LFCRegulatedTni.png');
plotMA(resOrdered_dds_HostNoninf_vs_HostPACE_analysed,main="Regulated genes of Trichoplusia ni\n24h vs. 24h",alpha=ALPHA,ylim=c(-2,2))
#dev.off()


#display the changes

## ----heatmap, dev="pdf", fig.width=5, fig.height=7-----------------------
library("pheatmap")
select <- order(rowMeans(counts(dds_HostNoninf_vs_HostPACE_analysed,normalized=TRUE)),decreasing=TRUE)[1:20]

nt <- normTransform(dds_HostNoninf_vs_HostPACE_analysed) # defaults to log2(x+1)
log2.norm.counts <- assay(nt)[select,]
df <- as.data.frame(colData(dds_HostNoninf_vs_HostPACE_analysed)[,c("condition","type")])
pheatmap(log2.norm.counts, cluster_rows=FALSE, show_rownames=FALSE,cluster_cols=FALSE, annotation_col=df)

rld <- rlog(dds_HostNoninf_vs_HostPACE_analysed, blind=FALSE)
pheatmap(assay(rld)[select,], cluster_rows=FALSE, show_rownames=FALSE,cluster_cols=FALSE, annotation_col=df)


### testing for specific logFC

## ----ddsNoPrior----------------------------------------------------------
ddsNoPrior <- DESeq(dds_HostNoninf_vs_HostPACE, betaPrior=FALSE)

## ----lfcThresh, fig.show="asis", fig.cap='MA-plots of tests of log2 fold change with respect to a threshold value.  Going left to right across rows, the tests are for \\Robject{altHypothesis = "greaterAbs"}, \\Robject{"lessAbs"}, \\Robject{"greater"}, and \\Robject{"less"}.\\label{figure/lfcThresh-1}'----
par(mfrow=c(2,2),mar=c(2,2,1,1))
yl <- c(-2.5,2.5)

resGA <- results(ddsNoPrior, lfcThreshold=.5, altHypothesis="greaterAbs")
resLA <- results(ddsNoPrior, lfcThreshold=.5, altHypothesis="lessAbs")
resG <- results(ddsNoPrior, lfcThreshold=.5, altHypothesis="greater")
resL <- results(ddsNoPrior, lfcThreshold=.5, altHypothesis="less")

plotMA(resGA, ylim=yl)
abline(h=c(-.5,.5),col="dodgerblue",lwd=2)
plotMA(resLA, ylim=yl)
abline(h=c(-.5,.5),col="dodgerblue",lwd=2)
plotMA(resG, ylim=yl)
abline(h=.5,col="dodgerblue",lwd=2)
plotMA(resL, ylim=yl)
abline(h=-.5,col="dodgerblue",lwd=2)

## ----mcols---------------------------------------------------------------
mcols(dds,use.names=TRUE)[1:4,1:4]
# here using substr() only for display purposes
substr(names(mcols(dds)),1,10) 
mcols(mcols(dds), use.names=TRUE)[1:4,]


# some filtering fun

CPM_THRESHOLD<-0;
SAMPLE_THRESHOLD <-0;


################################################################################################################################################################
################################################################################################################################################################
################################################################################################################################################################
################################################################################################################################################################
################################################################################################################################################################
################################################################################################################################################################


print(countFile)
file.exists(countFile)
countdata <- read.table(countFile, header=F, row.names=1)
(sampleTable <- read.csv(csvfile,row.names=1))

names(countdata)<-rownames(sampleTable)
group<-factor(sampleTable[,2])

#names(countdata)<-c(seq(s1,s1+40,20),seq(s1+pluss1,s1+pluss1+40,20))
#group<-factor(c("non_inf","non_inf","non_inf","pACE1","pACE1","pACE1"))
#group<-factor(c(1,1,1,2,2,2))


################################################################################################################
################################################################################################################
################################################################################################################

# edgeR

# create edger object and relevel the groups
countsDGE<-DGEList(counts=as.matrix(countdata),group=group)
countsDGE$samples$group <- relevel(countsDGE$samples$group, ref=reffi)

countsDGE.full<-countsDGE

# filter out low-expressed genes (if set to 0 -> keep all)
keep <- rowSums(cpm(countsDGE)>=CPM_THRESHOLD) >= SAMPLE_THRESHOLD
countsDGE <- countsDGE[keep,]
dim(countsDGE)

#reset library sizes after filtering
#realVec<-c(10193886,10780609,8567800,5904961,5608920,4050367)
# realVec<-c(10341831,8426358,9010495,3730722,3571162,3051243)
# plusVec<-c(0,0,0,12800520,10733629,7812879) #24 virus reads chen paper
#plusVec<-c(0,0,0,9366234,9394251,21587056) #48 virus reads chen paper
#countsDGE$samples$lib.size <- realVec
countsDGE$samples$lib.size <- colSums(countsDGE$counts)
#countsDGE$samples$lib.size <- colSums(countsDGE$counts)+plusVec
colSums(countsDGE$counts)
colSums(countsDGE.full$counts)


plotMDS(countsDGE, method="bcv", col=as.numeric(countsDGE$samples$group));
legend("bottomleft", as.character(unique(countsDGE$samples$group)), col=1:3, pch=20)

# estimate dispersion
countsDGE_1 <- estimateCommonDisp(countsDGE, verbose=T)
# For routine differential expresion analysis, we use empirical Bayes tagwise dispersions. Note that common dispersion needs to be estimated before estimating tagwise disp
countsDGE_1 <- estimateTagwiseDisp(countsDGE_1)
# plotBCV() plots the tagwise biological coefficient of variation (square root of dispersions) against log2-CPM.
plotBCV(countsDGE_1)
plotMeanVar(countsDGE_1, show.tagwise.vars=TRUE, NBline=TRUE,main="common")

countsDGE_1ET <- exactTest(countsDGE_1, pair=c(1,2), dispersion="tagwise")
topTags(countsDGE_1ET, n=10)
dim(topTags(countsDGE_1ET, n=Inf,p.value=ALPHA))

countsDGE_1ET_Dec <- decideTestsDGE(countsDGE_1ET, adjust.method="BH", p.value=ALPHA)
summary(countsDGE_1ET_Dec)

countsDGE_1tags <- rownames(countsDGE_1)[as.logical(countsDGE_1ET_Dec)] 
plotSmear(countsDGE_1ET, de.tags=countsDGE_1tags); abline(h = c(-2, 2), col = "blue")

out <- topTags(countsDGE_1ET, n=Inf,p.value=ALPHA)
write.table(out$table, file=outFile1, row.names = TRUE, col.names = NA, quote=FALSE, sep = "\t")
out2<-out$table["logFC"]>0
outup<-out[out2,]
out2<-out$table["logFC"]<0
outdown<-out[out2,]
write.table(outup$table, file=outFile1u, row.names = TRUE, col.names = NA, quote=FALSE, sep = "\t")
write.table(outdown$table, file=outFile1d, row.names = TRUE, col.names = NA, quote=FALSE, sep = "\t")


# and fit the model
design <- model.matrix(~ 0 + countsDGE$samples$group)
colnames(design) <- levels(countsDGE$samples$group)
rownames(design) <- colnames(countsDGE)
countsDGE_2 <- estimateGLMCommonDisp(countsDGE,design)
countsDGE_2 <- estimateGLMTrendedDisp(countsDGE_2,design) #, method="power")
# You can change method to "auto", "bin.spline", "power", "spline", "bin.loess".# The default is "auto" which chooses "bin.spline" when > 200 tags and "power" otherwise.
countsDGE_2 <- estimateGLMTagwiseDisp(countsDGE_2,design)
plotBCV(countsDGE_2)
plotMeanVar(countsDGE_2, show.tagwise.vars=TRUE, NBline=TRUE,main="GLM")


#Differential Expression
# GLM
countsDGE_2FIT <- glmFit(countsDGE_2, design)
# compare (group 1 - group 2) to 0:
# this is equivalent to comparing group 1 to group 2
countsDGE_2FITLRT <- glmLRT(countsDGE_2FIT,contrast=c(-1,1))
topTags(countsDGE_2FITLRT, n=10)
dim(topTags(countsDGE_2FITLRT, n=Inf,p.value=ALPHA))
countsDGE_2FITLRT_Dec <- decideTestsDGE(countsDGE_2FITLRT, adjust.method="BH", p.value = ALPHA)
summary(countsDGE_2FITLRT_Dec)

countsDGE_2FITLRTtags <- rownames(countsDGE_2)[as.logical(countsDGE_2FITLRT_Dec)]
plotSmear(countsDGE_2FITLRT, de.tags=countsDGE_2FITLRTtags); abline(h = c(-2, 2), col = "blue")

out <- topTags(countsDGE_2FITLRT, n=Inf,p.value=ALPHA)
write.table(out$table, file=outFile2, row.names = TRUE, col.names = NA, quote=FALSE, sep = "\t")
out2<-out$table["logFC"]>0
outup<-out[out2,]
out2<-out$table["logFC"]<0
outdown<-out[out2,]
write.table(outup$table, file=outFile2u, row.names = TRUE, col.names = NA, quote=FALSE, sep = "\t")
write.table(outdown$table, file=outFile2d, row.names = TRUE, col.names = NA, quote=FALSE, sep = "\t")


################################################################################################################
################################################################################################################
################################################################################################################

# DESeq2

# put the countdata (with infos from above!) into a matrix and collect the column data
countData_HostNoninf_vs_HostPACE<-as.matrix(countdata)
(colData_HostNoninf_vs_HostPACE <- DataFrame(sampleTable))

# create DESeq object
dds_HostNoninf_vs_HostPACE <- DESeqDataSetFromMatrix(countData=countData_HostNoninf_vs_HostPACE,colData=colData_HostNoninf_vs_HostPACE,design= ~ treat)
# set reference level
dds_HostNoninf_vs_HostPACE$treat<-relevel(dds_HostNoninf_vs_HostPACE$treat, ref=reffi)
dds_HostNoninf_vs_HostPACE
# make DESeq analysis
dds_HostNoninf_vs_HostPACE_analysed <- DESeq(dds_HostNoninf_vs_HostPACE)
# get the results
res_dds_HostNoninf_vs_HostPACE_analysed <- results(dds_HostNoninf_vs_HostPACE_analysed)

resOrdered_dds_HostNoninf_vs_HostPACE_analysed <- res_dds_HostNoninf_vs_HostPACE_analysed[order(res_dds_HostNoninf_vs_HostPACE_analysed$padj),]
# get significant and non significant genes and write them to files
sig_resOrdered_dds_HostNoninf_vs_HostPACE_analysed <- subset(resOrdered_dds_HostNoninf_vs_HostPACE_analysed, padj<ALPHA)
down_sig_resOrdered_dds_HostNoninf_vs_HostPACE_analysed <- subset(sig_resOrdered_dds_HostNoninf_vs_HostPACE_analysed, sig_resOrdered_dds_HostNoninf_vs_HostPACE_analysed$log2FoldChange<0)
up_sig_resOrdered_dds_HostNoninf_vs_HostPACE_analysed <- subset(sig_resOrdered_dds_HostNoninf_vs_HostPACE_analysed, sig_resOrdered_dds_HostNoninf_vs_HostPACE_analysed$log2FoldChange>0)
nonsig_resOrdered_dds_HostNoninf_vs_HostPACE_analysed <- subset(resOrdered_dds_HostNoninf_vs_HostPACE_analysed, padj>=ALPHA)
write.table(as.data.frame(sig_resOrdered_dds_HostNoninf_vs_HostPACE_analysed),file=outFile3,sep="\t",quote=F)
write.table(as.data.frame(nonsig_resOrdered_dds_HostNoninf_vs_HostPACE_analysed),file=outFile4,sep="\t",quote=F)
write.table(as.data.frame(up_sig_resOrdered_dds_HostNoninf_vs_HostPACE_analysed),file=outFile3u,sep="\t",quote=F)
write.table(as.data.frame(down_sig_resOrdered_dds_HostNoninf_vs_HostPACE_analysed),file=outFile3d,sep="\t",quote=F)
sig_resOrdered_dds_HostNoninf_vs_HostPACE_analysed
nonsig_resOrdered_dds_HostNoninf_vs_HostPACE_analysed
# print summary and plotMA the results
summary(resOrdered_dds_HostNoninf_vs_HostPACE_analysed,alpha=ALPHA)
#pdf(file='/project/explusi/phpeters/GEA/LFCRegulatedTni_24.pdf');
#png(filename='/project/explusi/phpeters/GEA/LFCRegulatedTni.png');
plotMA(resOrdered_dds_HostNoninf_vs_HostPACE_analysed,main="Regulated genes of Trichoplusia ni\n24h vs. 24h",alpha=ALPHA,ylim=c(-2,2))
#dev.off()


#display the changes

## ----heatmap, dev="pdf", fig.width=5, fig.height=7-----------------------
library("pheatmap")
select <- order(rowMeans(counts(dds,normalized=TRUE)),
                decreasing=TRUE)[1:20]

nt <- normTransform(dds) # defaults to log2(x+1)
log2.norm.counts <- assay(nt)[select,]
df <- as.data.frame(colData(dds)[,c("condition","type")])
pheatmap(log2.norm.counts, cluster_rows=FALSE, show_rownames=FALSE,
         cluster_cols=FALSE, annotation_col=df)

pheatmap(assay(rld)[select,], cluster_rows=FALSE, show_rownames=FALSE,
         cluster_cols=FALSE, annotation_col=df)

pheatmap(assay(vsd)[select,], cluster_rows=FALSE, show_rownames=FALSE,
         cluster_cols=FALSE, annotation_col=df)


### testing for specific logFC

## ----ddsNoPrior----------------------------------------------------------
ddsNoPrior <- DESeq(dds, betaPrior=FALSE)

## ----lfcThresh, fig.show="asis", fig.cap='MA-plots of tests of log2 fold change with respect to a threshold value.  Going left to right across rows, the tests are for \\Robject{altHypothesis = "greaterAbs"}, \\Robject{"lessAbs"}, \\Robject{"greater"}, and \\Robject{"less"}.\\label{figure/lfcThresh-1}'----
par(mfrow=c(2,2),mar=c(2,2,1,1))
yl <- c(-2.5,2.5)

resGA <- results(dds, lfcThreshold=.5, altHypothesis="greaterAbs")
resLA <- results(ddsNoPrior, lfcThreshold=.5, altHypothesis="lessAbs")
resG <- results(dds, lfcThreshold=.5, altHypothesis="greater")
resL <- results(dds, lfcThreshold=.5, altHypothesis="less")

plotMA(resGA, ylim=yl)
abline(h=c(-.5,.5),col="dodgerblue",lwd=2)
plotMA(resLA, ylim=yl)
abline(h=c(-.5,.5),col="dodgerblue",lwd=2)
plotMA(resG, ylim=yl)
abline(h=.5,col="dodgerblue",lwd=2)
plotMA(resL, ylim=yl)
abline(h=-.5,col="dodgerblue",lwd=2)

## ----mcols---------------------------------------------------------------
mcols(dds,use.names=TRUE)[1:4,1:4]
# here using substr() only for display purposes
substr(names(mcols(dds)),1,10) 
mcols(mcols(dds), use.names=TRUE)[1:4,]