Entering edit mode
mictadlo
▴
10
@mictadlo-10885
Last seen 4.8 years ago
Hi,
I got Length of 'group' must equal number of columns in 'counts'
with the below code.
> targets <- read.delim("phenoSp.txt", stringsAsFactors=FALSE) > targets
ids CellType Status 1 SpRT_15-1_ACAGTG_L003_R1_001 Root_tip 1.5 2 SpRT_15-2_GCCAAT_L003_R1_001 Root_tip 1.5 3 SpRT_15-3_CTTGTA_L003_R1_001 Root_tip 1.5 4 SpRT_15-4_GTGAAA_L003_R1_001 Root_tip 1.5 5 1916-MJO-0001_1 Leaves 1.5 6 1916-MJO-0002_1 Leaves 1.5 7 1916-MJO-0003_1 Leaves 1.5 8 1916-MJO-0004_1 Leaves 1.5 9 SpRT_2-1_ACAGTG_L002_R1_001 Root_tip 2.0 10 SpRT_2-2_GCCAAT_L002_R1_001 Root_tip 2.0 11 SpRT_2-3_CTTGTA_L002_R1_001 Root_tip 2.0 12 SpRT_2-4_GTGAAA_L002_R1_001 Root_tip 2.0 13 1916-MJO-0005_1 Leaves 2.0 14 1916-MJO-0006_1 Leaves 2.0 15 1916-MJO-0007_1 Leaves 2.0 16 1916-MJO-0008_1 Leaves 2.0 17 SpRT_3-1_ACAGTG_L001_R1_001 Root_tip 3.0 18 SpRT_3-2_GCCAAT_L001_R1_001 Root_tip 3.0 19 SpRT_3-3_CTTGTA_L001_R1_001 Root_tip 3.0 20 SpRT_3-4_GTGAAA_L001_R1_001 Root_tip 3.0 21 1916-MJO-0009_1 Leaves 3.0 22 1916-MJO-0010_1 Leaves 3.0 23 1916-MJO-0011_1 Leaves 3.0 24 1916-MJO-0012_1 Leaves 3.0> > ## ----group--------------------------------------------------------------- > group <- paste(targets$CellType, targets$Status, sep=".") > group <- factor(group) > table(group) group Leaves.1.5 Leaves.2 Leaves.3 Root_tip.1.5 Root_tip.2 Root_tip.3 4 4 4 4 4 4 > > > ## ----readcounds table------------------------------------------------ > cntdir <- paste(basedir, "Htseq_read_counts", sep="/") > pat <- "_001.sorted.out" > alignment.all <- list.files(path = cntdir, + pattern = pat, + all.files = TRUE, + recursive = FALSE, + ignore.case = FALSE, + include.dirs = FALSE) > > # we choose the 'all' series > myfiles <- alignment.all > DT <- list() > > # read each file as array element of DT and rename the last 2 cols > # we created a list of single sample tables > for (i in 1:length(myfiles) ) { + infile = paste(cntdir, myfiles[i], sep = "/") + DT[[myfiles[i]]] <- read.table(infile, header = F, stringsAsFactors = FALSE) + cnts <- gsub("(.*)_all_counts.txt", "\\1", myfiles[i]) + colnames(DT[[myfiles[i]]]) <- c("ID", cnts) + } > > # merge all elements based on first ID columns > data <- DT[[myfiles[1]]] > > # inspect > print("data <- DT[[myfiles[1]]]") [1] "data <- DT[[myfiles[1]]]" > head(data) ID SpRT_15-1_ACAGTG_L003_R1_001.sorted.out 1 augustus_masked-lcl_ScwjSwM_1-processed-gene-0.2 3 2 augustus_masked-lcl_ScwjSwM_100-processed-gene-0.0 8 3 augustus_masked-lcl_ScwjSwM_1000-processed-gene-0.1 152 4 augustus_masked-lcl_ScwjSwM_1000-processed-gene-0.3 7 5 augustus_masked-lcl_ScwjSwM_1000-processed-gene-1.13 92 6 augustus_masked-lcl_ScwjSwM_1000-processed-gene-2.0 78 > > # we now add each other table with the ID column as key > for (i in 2:length(myfiles)) { + y <- DT[[myfiles[i]]] + z <- merge(data, y, by = c("ID")) + data <- z + } > > # ID column becomes rownames > rownames(data) <- data$ID > data <- data[,-1] > > ## add total counts per sample > data <- rbind(data, tot.counts=colSums(data)) > > # inspect and look at the top row names! > print("!!! data <- rbind(data, tot.counts=colSums(data))") [1] "!!! data <- rbind(data, tot.counts=colSums(data))" > head(data) SpRT_15-1_ACAGTG_L003_R1_001.sorted.out __alignment_not_unique 23080187 __ambiguous 1268062 __no_feature 3375106 __not_aligned 2626074 __too_low_aQual 0 augustus_masked-lcl_ScwjSwM_1-processed-gene-0.2 3 SpRT_15-2_GCCAAT_L003_R1_001.sorted.out __alignment_not_unique 20726254 __ambiguous 1073940 __no_feature 2753049 __not_aligned 1972160 __too_low_aQual 0 augustus_masked-lcl_ScwjSwM_1-processed-gene-0.2 3 > tail(data) SpRT_15-1_ACAGTG_L003_R1_001.sorted.out snap_masked-lcl_ScwjSwM_998-processed-gene-0.5 0 snap_masked-lcl_ScwjSwM_998-processed-gene-1.32 0 snap_masked-lcl_ScwjSwM_998-processed-gene-2.27 6 snap_masked-lcl_ScwjSwM_999-processed-gene-0.14 122 snap_masked-lcl_ScwjSwM_999-processed-gene-3.28 0 tot.counts 65310548 SpRT_15-2_GCCAAT_L003_R1_001.sorted.out snap_masked-lcl_ScwjSwM_998-processed-gene-0.5 0 snap_masked-lcl_ScwjSwM_998-processed-gene-1.32 0 snap_masked-lcl_ScwjSwM_998-processed-gene-2.27 9 snap_masked-lcl_ScwjSwM_999-processed-gene-0.14 60 snap_masked-lcl_ScwjSwM_999-processed-gene-3.28 0 tot.counts 56198827 > > # transpose table for readability > data.all.summary <- data[grep("^ENS", rownames(data), perl=TRUE, invert=TRUE), ] > > # review > print('data.all.summary <- data[grep("^ENS", rownames(data), perl=TRUE, invert=TRUE), ]') [1] "data.all.summary <- data[grep(\"^ENS\", rownames(data), perl=TRUE, invert=TRUE), ]" > head(data.all.summary) SpRT_15-1_ACAGTG_L003_R1_001.sorted.out __alignment_not_unique 23080187 __ambiguous 1268062 __no_feature 3375106 __not_aligned 2626074 __too_low_aQual 0 augustus_masked-lcl_ScwjSwM_1-processed-gene-0.2 3 SpRT_15-2_GCCAAT_L003_R1_001.sorted.out __alignment_not_unique 20726254 __ambiguous 1073940 __no_feature 2753049 __not_aligned 1972160 __too_low_aQual 0 augustus_masked-lcl_ScwjSwM_1-processed-gene-0.2 3 > > # transpose table > #print("t(data.all.summary)") > #head(t(data.all.summary)) > > # write summary to file > write.csv(data.all.summary, file = "Htseq_read_counts/htseq_counts_all-summary.csv") > > > ## ----DGEList, message=FALSE---------------------------------------------- > library(edgeR) > y <- DGEList(data.all.summary[,-1], group=group, + genes=data.all.summary[,1,drop=FALSE]) Error in DGEList(data.all.summary[, -1], group = group, genes = data.all.summary[, : Length of 'group' must equal number of columns in 'counts'
> sessionInfo()
R version 3.3.3 (2017-03-06)
Platform: x86_64-pc-linux-gnu (64-bit)
Running under: Kali GNU/Linux Rolling
locale:
[1] LC_CTYPE=en_AU.UTF-8 LC_NUMERIC=C LC_TIME=en_AU.UTF-8
[4] LC_COLLATE=en_AU.UTF-8 LC_MONETARY=en_AU.UTF-8 LC_MESSAGES=en_AU.UTF-8
[7] LC_PAPER=en_AU.UTF-8 LC_NAME=C LC_ADDRESS=C
[10] LC_TELEPHONE=C LC_MEASUREMENT=en_AU.UTF-8 LC_IDENTIFICATION=C
attached base packages:
[1] stats graphics grDevices utils datasets methods base
other attached packages:
[1] edgeR_3.16.5 limma_3.30.13 BiocInstaller_1.24.0
loaded via a namespace (and not attached):
[1] tools_3.3.3 grid_3.3.3 locfit_1.5-9.1 lattice_0.20-35
What did I miss?
Thank you in advance.
I know this is over 5 years old, but I just had the same problem and found an odd fix for it. I was running
glimmaMA
with a DESeq2 object and getting the same error, but in my design for theDESeqDataSetFromMatrix
my group variable was calledGROUP
and for whatever stupid reason, renaming it togroup
made it work.