KM Plot for gene of interest (e.g. TP53) using TCGA-PAAD dataset
0
0
Entering edit mode
Manav • 0
@cb585415
Last seen 13 months ago
United States

Hello, I am new to bioinformatic analyses and I am trying to analyse the TCGA dataset to plot a survival curve based on the expression of a gene of interest (say TP53). I have written the following code to analyse the TCGA data, but I am unable to proceed further in my attempt to plot a KM plot for survival for my gene, in this case TP53. Following is the code I have written so far. I would be grateful if someone can help me proceed with my analysis.

Thank you for your valuable time and advice. Manav

Code should be placed in three backticks as shown below

# Load Packages
library(TCGAbiolinks)
library(SummarizedExperiment)
library(EDASeq)
library(edgeR)
library(survminer)

# setwd()

# Build a query to retrieve gene expression data of required samples
query <- GDCquery(project = "TCGA-PAAD",
                  data.category = "Transcriptome Profiling",
                  data.type = "Gene Expression Quantification",
                  workflow.type = "STAR - Counts")

samplesDown <- getResults(query,cols=c("cases"))


# Selection of tumor samples "TP"
dataTP <- TCGAquery_SampleTypes(barcode = samplesDown,typesample = "TP")
# Selection of normal samples "NT"
dataNT <- TCGAquery_SampleTypes(barcode = samplesDown,typesample = "NT")


query.selected.samples <- GDCquery(project = "TCGA-PAAD",
                                   data.category = "Transcriptome Profiling",
                                   data.type = "Gene Expression Quantification",
                                   workflow.type = "STAR - Counts",
                                   barcode = c(dataTP, dataNT))

# Download data
GDCdownload(query = query.selected.samples)

# Prepare expression matrix with geneID in the rows and samples (barcode) in the columns
dataPrep <- GDCprepare(query = query.selected.samples,
                       save = TRUE,
                       save.filename = "TCGA_PAAD.rda")

paad_matrix <- assay(dataPrep, 'fpkm_unstrand')

# For gene expression if you need to see a boxplot correlation and AAIC plot to define outliers you can run
dataPrep <- TCGAanalyze_Preprocessing(object = dataPrep,
                                      cor.cut = 0.6)                      

# Normalization of genes
dataNorm <- TCGAanalyze_Normalization(tabDF = dataPrep,
                                      geneInfo = geneInfoHT,
                                      method = "gcContent")


boxplot(dataPrep, outline = FALSE)
boxplot(dataNorm, outline = FALSE)

# Quantile filter of genes
dataFilt <- TCGAanalyze_Filtering(tabDF = dataNorm,
                                  method = "quantile",
                                  qnt.cut =  0.25)   

# Differential Expression Analysis (DEA)
dataDEGs_0.01 <- TCGAanalyze_DEA(mat1 = dataFilt[,dataTP],
                            mat2 = dataFilt[,dataNT],
                            Cond1type = "Normal",
                            Cond2type = "Tumor",
                            fdr.cut = 0.01,
                            logFC.cut = 1,
                            method = "glmLRT")
dataDEGs_0.05 <- TCGAanalyze_DEA(mat1 = dataFilt[,dataTP],
                                 mat2 = dataFilt[,dataNT],
                                 Cond1type = "Normal",
                                 Cond2type = "Tumor",
                                 fdr.cut = 0.05,
                                 logFC.cut = 1,
                                 method = "glmLRT")

# DEGs table with expression values in normal and tumor samples
dataDEGsFiltLevel_0.01 <- TCGAanalyze_LevelTab(FC_FDR_table_mRNA = dataDEGs_0.01,
                                          typeCond1 = "Tumor",
                                          typeCond2 = "Normal",
                                          TableCond1 = dataFilt[,dataNT],
                                          TableCond2 = dataFilt[,dataTP])
dataDEGsFiltLevel_0.05 <- TCGAanalyze_LevelTab(FC_FDR_table_mRNA = dataDEGs_0.05,
                                          typeCond1 = "Tumor",
                                          typeCond2 = "Normal",
                                          TableCond1 = dataFilt[,dataNT],
                                          TableCond2 = dataFilt[,dataTP])


sessionInfo( )
R version 4.2.1 (2022-06-23 ucrt)
Platform: x86_64-w64-mingw32/x64 (64-bit)
Running under: Windows 10 x64 (build 19045)

Matrix products: default

locale:
[1] LC_COLLATE=English_United States.utf8  LC_CTYPE=English_United States.utf8   
[3] LC_MONETARY=English_United States.utf8 LC_NUMERIC=C                          
[5] LC_TIME=English_United States.utf8    

attached base packages:
[1] stats4    stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
 [1] edgeR_3.38.4                limma_3.52.4                EDASeq_2.30.0              
 [4] ShortRead_1.54.0            GenomicAlignments_1.32.1    Rsamtools_2.12.0           
 [7] Biostrings_2.64.1           XVector_0.36.0              BiocParallel_1.30.4        
[10] SummarizedExperiment_1.26.1 Biobase_2.56.0              GenomicRanges_1.48.0       
[13] GenomeInfoDb_1.32.4         IRanges_2.30.1              S4Vectors_0.34.0           
[16] BiocGenerics_0.42.0         MatrixGenerics_1.8.1        matrixStats_0.63.0         
[19] survminer_0.4.9             ggpubr_0.6.0                ggplot2_3.4.1              
[22] survival_3.3-1              TCGAbiolinks_2.24.3        

loaded via a namespace (and not attached):
  [1] colorspace_2.1-0            rjson_0.2.21                ggsignif_0.6.4             
  [4] deldir_1.0-6                hwriter_1.3.2.1             ellipsis_0.3.2             
  [7] rstudioapi_0.14             bit64_4.0.5                 AnnotationDbi_1.58.0       
 [10] fansi_1.0.4                 xml2_1.3.3                  codetools_0.2-18           
 [13] splines_4.2.1               R.methodsS3_1.8.2           cachem_1.0.6               
 [16] knitr_1.42                  jsonlite_1.8.4              broom_1.0.3                
 [19] km.ci_0.5-6                 dbplyr_2.3.1                png_0.1-8                  
 [22] R.oo_1.25.0                 readr_2.1.4                 compiler_4.2.1             
 [25] httr_1.4.5                  backports_1.4.1             Matrix_1.5-3               
 [28] fastmap_1.1.0               cli_3.6.0                   prettyunits_1.1.1          
 [31] tools_4.2.1                 gtable_0.3.1                glue_1.6.2                 
 [34] GenomeInfoDbData_1.2.8      dplyr_1.1.0                 rappdirs_0.3.3             
 [37] Rcpp_1.0.10                 carData_3.0-5               vctrs_0.5.2                
 [40] rtracklayer_1.56.1          xfun_0.37                   stringr_1.5.0              
 [43] rvest_1.0.3                 lifecycle_1.0.3             restfulr_0.0.15            
 [46] rstatix_0.7.2               XML_3.99-0.13               zlibbioc_1.42.0            
 [49] zoo_1.8-11                  scales_1.2.1                aroma.light_3.26.0         
 [52] hms_1.1.2                   parallel_4.2.1              RColorBrewer_1.1-3         
 [55] yaml_2.3.7                  curl_5.0.0                  memoise_2.0.1              
 [58] gridExtra_2.3               KMsurv_0.1-5                downloader_0.4             
 [61] biomaRt_2.52.0              latticeExtra_0.6-30         stringi_1.7.12             
 [64] RSQLite_2.3.0               BiocIO_1.6.0                GenomicFeatures_1.48.4     
 [67] filelock_1.0.2              rlang_1.0.6                 pkgconfig_2.0.3            
 [70] bitops_1.0-7                TCGAbiolinksGUI.data_1.16.0 lattice_0.20-45            
 [73] purrr_1.0.1                 bit_4.0.5                   tidyselect_1.2.0           
 [76] plyr_1.8.8                  magrittr_2.0.3              R6_2.5.1                   
 [79] generics_0.1.3              DelayedArray_0.22.0         DBI_1.1.3                  
 [82] pillar_1.8.1                withr_2.5.0                 KEGGREST_1.36.3            
 [85] abind_1.4-5                 RCurl_1.98-1.10             tibble_3.1.8               
 [88] crayon_1.5.2                car_3.1-1                   survMisc_0.5.6             
 [91] interp_1.1-3                utf8_1.2.3                  BiocFileCache_2.4.0        
 [94] tzdb_0.3.0                  jpeg_0.1-10                 progress_1.2.2             
 [97] locfit_1.5-9.7              grid_4.2.1                  data.table_1.14.8          
[100] blob_1.2.3                  digest_0.6.31               xtable_1.8-4               
[103] tidyr_1.3.0                 R.utils_2.12.2              munsell_0.5.0
Survival TCGAbiolinks R PAAD • 533 views
ADD COMMENT

Login before adding your answer.

Traffic: 900 users visited in the last hour
Help About
FAQ
Access RSS
API
Stats

Use of this site constitutes acceptance of our User Agreement and Privacy Policy.

Powered by the version 2.3.6