Question

Length of 'group' must equal number of columns in 'counts'

0

Entering edit mode

mictadlo ▴ 10

@mictadlo-10885

Last seen 5.5 years ago

Hi,

I got Length of 'group' must equal number of columns in 'counts' with the below code.

> targets <- read.delim("phenoSp.txt", stringsAsFactors=FALSE)
> targets
                            ids CellType Status
1  SpRT_15-1_ACAGTG_L003_R1_001 Root_tip    1.5
2  SpRT_15-2_GCCAAT_L003_R1_001 Root_tip    1.5
3  SpRT_15-3_CTTGTA_L003_R1_001 Root_tip    1.5
4  SpRT_15-4_GTGAAA_L003_R1_001 Root_tip    1.5
5               1916-MJO-0001_1   Leaves    1.5
6               1916-MJO-0002_1   Leaves    1.5
7               1916-MJO-0003_1   Leaves    1.5
8               1916-MJO-0004_1   Leaves    1.5
9   SpRT_2-1_ACAGTG_L002_R1_001 Root_tip    2.0
10  SpRT_2-2_GCCAAT_L002_R1_001 Root_tip    2.0
11  SpRT_2-3_CTTGTA_L002_R1_001 Root_tip    2.0
12  SpRT_2-4_GTGAAA_L002_R1_001 Root_tip    2.0
13              1916-MJO-0005_1   Leaves    2.0
14              1916-MJO-0006_1   Leaves    2.0
15              1916-MJO-0007_1   Leaves    2.0
16              1916-MJO-0008_1   Leaves    2.0
17  SpRT_3-1_ACAGTG_L001_R1_001 Root_tip    3.0
18  SpRT_3-2_GCCAAT_L001_R1_001 Root_tip    3.0
19  SpRT_3-3_CTTGTA_L001_R1_001 Root_tip    3.0
20  SpRT_3-4_GTGAAA_L001_R1_001 Root_tip    3.0
21              1916-MJO-0009_1   Leaves    3.0
22              1916-MJO-0010_1   Leaves    3.0
23              1916-MJO-0011_1   Leaves    3.0
24              1916-MJO-0012_1   Leaves    3.0
> 
> ## ----group---------------------------------------------------------------
> group <- paste(targets$CellType, targets$Status, sep=".")
> group <- factor(group)
> table(group)
group
  Leaves.1.5     Leaves.2     Leaves.3 Root_tip.1.5   Root_tip.2   Root_tip.3 
           4            4            4            4            4            4 
> 
> 
> ## ----readcounds table------------------------------------------------
> cntdir <- paste(basedir, "Htseq_read_counts", sep="/")
> pat <- "_001.sorted.out"
> alignment.all <- list.files(path = cntdir,
+                          pattern = pat,
+                          all.files = TRUE,
+                          recursive = FALSE,
+                          ignore.case = FALSE,
+                          include.dirs = FALSE)
> 
> # we choose the 'all' series
> myfiles <- alignment.all
> DT <- list()
> 
> # read each file as array element of DT and rename the last 2 cols
> # we created a list of single sample tables
> for (i in 1:length(myfiles) ) {
+   infile = paste(cntdir, myfiles[i], sep = "/")
+   DT[[myfiles[i]]] <- read.table(infile, header = F, stringsAsFactors = FALSE)
+   cnts <- gsub("(.*)_all_counts.txt", "\\1", myfiles[i])
+   colnames(DT[[myfiles[i]]]) <- c("ID", cnts)
+ }
> 
> # merge all elements based on first ID columns
> data <- DT[[myfiles[1]]]
> 
> # inspect
> print("data <- DT[[myfiles[1]]]")
[1] "data <- DT[[myfiles[1]]]"
> head(data)
                                                    ID SpRT_15-1_ACAGTG_L003_R1_001.sorted.out
1     augustus_masked-lcl_ScwjSwM_1-processed-gene-0.2                                       3
2   augustus_masked-lcl_ScwjSwM_100-processed-gene-0.0                                       8
3  augustus_masked-lcl_ScwjSwM_1000-processed-gene-0.1                                     152
4  augustus_masked-lcl_ScwjSwM_1000-processed-gene-0.3                                       7
5 augustus_masked-lcl_ScwjSwM_1000-processed-gene-1.13                                      92
6  augustus_masked-lcl_ScwjSwM_1000-processed-gene-2.0                                      78
> 
> # we now add each other table with the ID column as key
> for (i in 2:length(myfiles)) {
+   y <- DT[[myfiles[i]]]
+   z <- merge(data, y, by = c("ID"))
+   data <- z
+ }
> 
> # ID column becomes rownames
> rownames(data) <- data$ID
> data <- data[,-1]
> 
> ## add total counts per sample
> data <- rbind(data, tot.counts=colSums(data))
> 
> # inspect and look at the top row names!
> print("!!! data <- rbind(data, tot.counts=colSums(data))")
[1] "!!! data <- rbind(data, tot.counts=colSums(data))"
> head(data)
                                                 SpRT_15-1_ACAGTG_L003_R1_001.sorted.out
__alignment_not_unique                                                          23080187
__ambiguous                                                                      1268062
__no_feature                                                                     3375106
__not_aligned                                                                    2626074
__too_low_aQual                                                                        0
augustus_masked-lcl_ScwjSwM_1-processed-gene-0.2                                       3
                                                 SpRT_15-2_GCCAAT_L003_R1_001.sorted.out
__alignment_not_unique                                                          20726254
__ambiguous                                                                      1073940
__no_feature                                                                     2753049
__not_aligned                                                                    1972160
__too_low_aQual                                                                        0
augustus_masked-lcl_ScwjSwM_1-processed-gene-0.2                                       3
                                
> tail(data)
                                                SpRT_15-1_ACAGTG_L003_R1_001.sorted.out
snap_masked-lcl_ScwjSwM_998-processed-gene-0.5                                        0
snap_masked-lcl_ScwjSwM_998-processed-gene-1.32                                       0
snap_masked-lcl_ScwjSwM_998-processed-gene-2.27                                       6
snap_masked-lcl_ScwjSwM_999-processed-gene-0.14                                     122
snap_masked-lcl_ScwjSwM_999-processed-gene-3.28                                       0
tot.counts                                                                     65310548
                                                SpRT_15-2_GCCAAT_L003_R1_001.sorted.out
snap_masked-lcl_ScwjSwM_998-processed-gene-0.5                                        0
snap_masked-lcl_ScwjSwM_998-processed-gene-1.32                                       0
snap_masked-lcl_ScwjSwM_998-processed-gene-2.27                                       9
snap_masked-lcl_ScwjSwM_999-processed-gene-0.14                                      60
snap_masked-lcl_ScwjSwM_999-processed-gene-3.28                                       0
tot.counts                                                                     56198827
                                                
> 
> # transpose table for readability
> data.all.summary <- data[grep("^ENS", rownames(data), perl=TRUE, invert=TRUE), ]
> 
> # review
> print('data.all.summary <- data[grep("^ENS", rownames(data), perl=TRUE, invert=TRUE), ]')
[1] "data.all.summary <- data[grep(\"^ENS\", rownames(data), perl=TRUE, invert=TRUE), ]"
> head(data.all.summary)
                                                 SpRT_15-1_ACAGTG_L003_R1_001.sorted.out
__alignment_not_unique                                                          23080187
__ambiguous                                                                      1268062
__no_feature                                                                     3375106
__not_aligned                                                                    2626074
__too_low_aQual                                                                        0
augustus_masked-lcl_ScwjSwM_1-processed-gene-0.2                                       3
                                                 SpRT_15-2_GCCAAT_L003_R1_001.sorted.out
__alignment_not_unique                                                          20726254
__ambiguous                                                                      1073940
__no_feature                                                                     2753049
__not_aligned                                                                    1972160
__too_low_aQual                                                                        0
augustus_masked-lcl_ScwjSwM_1-processed-gene-0.2                                       3                                             
> 
> # transpose table
> #print("t(data.all.summary)")
> #head(t(data.all.summary))
> 
> # write summary to file
> write.csv(data.all.summary, file = "Htseq_read_counts/htseq_counts_all-summary.csv")
> 
> 
> ## ----DGEList, message=FALSE----------------------------------------------
> library(edgeR)
> y <- DGEList(data.all.summary[,-1], group=group,
+              genes=data.all.summary[,1,drop=FALSE])
Error in DGEList(data.all.summary[, -1], group = group, genes = data.all.summary[,  : 
  Length of 'group' must equal number of columns in 'counts'

> sessionInfo() 
R version 3.3.3 (2017-03-06)
Platform: x86_64-pc-linux-gnu (64-bit)
Running under: Kali GNU/Linux Rolling

locale:
 [1] LC_CTYPE=en_AU.UTF-8       LC_NUMERIC=C               LC_TIME=en_AU.UTF-8       
 [4] LC_COLLATE=en_AU.UTF-8     LC_MONETARY=en_AU.UTF-8    LC_MESSAGES=en_AU.UTF-8   
 [7] LC_PAPER=en_AU.UTF-8       LC_NAME=C                  LC_ADDRESS=C              
[10] LC_TELEPHONE=C             LC_MEASUREMENT=en_AU.UTF-8 LC_IDENTIFICATION=C       

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
[1] edgeR_3.16.5         limma_3.30.13        BiocInstaller_1.24.0

loaded via a namespace (and not attached):
[1] tools_3.3.3     grid_3.3.3      locfit_1.5-9.1  lattice_0.20-35

What did I miss?

Thank you in advance.

htseqcounts edger • 5.0k views

ADD COMMENT • link updated 2.8 years ago by Randy • 0 • written 7.8 years ago by mictadlo ▴ 10

0

Entering edit mode

I know this is over 5 years old, but I just had the same problem and found an odd fix for it. I was running glimmaMA with a DESeq2 object and getting the same error, but in my design for the DESeqDataSetFromMatrix my group variable was called GROUP and for whatever stupid reason, renaming it to group made it work.

ADD REPLY • link 2.8 years ago Randy • 0

score 1 · Answer 1 · 2017-09-04

1

Entering edit mode

Gordon Smyth 52k

@gordon-smyth

Last seen 10 minutes ago

WEHI, Melbourne, Australia

Have you tried the solution I suggested previously?

read counts for each gene for edgeR

ADD COMMENT • link 7.8 years ago Gordon Smyth 52k