Hello,
I need to remove duplicates in the animal_id column. I tried using tidySummarizedExperiment make the code more reproducible. However, distinct returns only a tibble with one column animal_id. I then cannot reassign it back to colData because the number of rows is obviously different.
library(TreeSummarizedExperiment)
library(tidySummarizedExperiment)
library(dplyr)
# Generate assay data
set.seed(42)
assay_data <- matrix(rpois(200, lambda = 10), nrow = 20, ncol = 10)
# Generate sample data
sample_data <- data.frame(
  animal_id = factor(rep(1:5, each = 2)),
  treated = factor(rep(c("yes", "no"), times = 5)),
  disease = factor(rep(c("disease1", "disease2"), times = 5))
)
# Create TSE
tse <- TreeSummarizedExperiment(
  assays = list(counts = assay_data),
  colData = sample_data
)
print(colData(tse))
# Need to remove animal_id duplicates
#> DataFrame with 10 rows and 3 columns
#>    animal_id  treated  disease
#>     <factor> <factor> <factor>
#> 1          1      yes disease1
#> 2          1      no  disease2
#> 3          2      yes disease1
#> 4          2      no  disease2
#> 5          3      yes disease1
#> 6          3      no  disease2
#> 7          4      yes disease1
#> 8          4      no  disease2
#> 9          5      yes disease1
#> 10         5      no  disease2
distinct_tse |> distinct(animal_id, .keep_all = TRUE)
#> tidySummarizedExperiment says: Key columns are missing. A data frame is returned for independent data analysis.
#> # A tibble: 5 × 1
#> animal_id
#> <fct>    
#> 1 1        
#> 2 2        
#> 3 3        
#> 4 4        
#> 5 5
sessionInfo( )
#> R version 4.3.1 (2023-06-16)
#> Platform: x86_64-pc-linux-gnu (64-bit)
#> Running under: Linux Mint 21.2
#> 
#> Matrix products: default
#> BLAS/LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.20.so;  LAPACK version 3.10.0
#> 
#> locale:
#>  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
#>  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
#>  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
#>  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
#>  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
#> [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
#> 
#> time zone: Europe/Helsinki
#> tzcode source: system (glibc)
#> 
#> attached base packages:
#> [1] stats     graphics  grDevices utils     datasets  methods   base     
#> 
#> loaded via a namespace (and not attached):
#>  [1] digest_0.6.33     fastmap_1.1.1     xfun_0.40         glue_1.6.2       
#>  [5] knitr_1.44        htmltools_0.5.6   rmarkdown_2.25    lifecycle_1.0.3  
#>  [9] cli_3.6.1         reprex_2.0.2      withr_2.5.0       compiler_4.3.1   
#> [13] rstudioapi_0.15.0 tools_4.3.1       evaluate_0.21     yaml_2.3.7       
#> [17] rlang_1.1.1       fs_1.6.3
                    
                
                