I have no experience with doing RNAseq analysis and I am having trouble finding a tutorial on how to figure out what commands to use and why. I have the following code that was used by a former postdoc in my lab, but I don't even know how I'm supposed to set up the sample info text file so that I can actually start doing the analysis. I'm also not sure if this is the complete code or if it's just guidelines. I literally have only been able to make it through importing the data and assigning it to a variable.
My experimental design is a 3 bacteria system vs. 2 WT bacteria and 1 mutant, so I am trying to compare the expression of each gene in each bacterium versus the same bacterium in the mutant condition.
K <- read.csv("/Users/Amanda/Desktop/ExtraRNASeqfiles/K_keccounts.csv", header = T, row.names = 1) names(K) #here I used K = read.table("/Users/JN/Desktop/htseq_all.csv", header=T, row.names=1, com='') instead of the code provided above* K.ln1 <- log(K + 1) pairs.panels(F.ln1, gap = 0) sampleInfo <- read.csv("/Users/Amanda/Desktop/ExtraRNASeqfiles/Kkecalldesign.csv", header = T) # make a dgeList matrix dgeK <- DGEList(K, group=sampleInfo$condition) dgeK$samples sampleInfo$file # use above to check levels of group identification # visualize cpm normCounts <- cpm(dgeK) write.csv(normCounts, file="/Users/Amanda/Desktop/ExtraRNASeqfiles/NormCountsTHOR_K_kec.csv") pseudoNormCounts <- log2(normCounts + 1) boxplot(pseudoNormCounts, col="plum", las=3, notch = T, main = "cpm per Sample", xlab = "Sample", ylab = "Log2 cpm") # filter so at least 10 reads per gene apply(dgeK$counts, 2, sum) keep <- rowSums(cpm(dgeK)>10) >= 2 d <- dgeK[keep,] dim(dgeK) dim(d) # normalize TMM d <- calcNormFactors(d, method="TMM") d$samples plotMDS(d) plotMDS(d, col=as.numeric(sampleInfo$condition)) # estimate distribution d1 <- estimateDisp(d) plotBCV(d1) d1$samples$group # exactTest etKkec <- exactTest(d1, pair = c(1,2)) etKkec #check total number of regulated genes del1 <- decideTestsDGE(etKkec, adjust.method = "BH", p.value = 0.05, lfc = 1) summary(del1) del2 <- decideTestsDGE(etKkec, adjust.method = "BH", p.value = 0.05) summary(del2) topTags(etKkec) etKkectop <- topTags(etKkec, n = nrow(etKkec$table), adjust.method = "BH", p.value = 0.05)$table write.csv(etKkectop, file="/Users/Amanda/Desktop/etKkectop.csv")
> sessionInfo() R version 4.1.1 (2021-08-10) Platform: x86_64-w64-mingw32/x64 (64-bit) Running under: Windows 10 x64 (build 19042) Matrix products: default locale:  LC_COLLATE=English_United States.1252  LC_CTYPE=English_United States.1252  LC_MONETARY=English_United States.1252  LC_NUMERIC=C  LC_TIME=English_United States.1252 attached base packages:  stats graphics grDevices utils datasets  methods base other attached packages:  edgeR_3.36.0 limma_3.50.0 loaded via a namespace (and not attached):  BiocManager_1.30.16 compiler_4.1.1  tools_4.1.1 Rcpp_1.0.7  grid_4.1.1 locfit_1.5-9.4  lattice_0.20-45