Question

Deleted:Problem with finding specific columns in dataset downloaded form GDC.

0

Entering edit mode

Mikołaj Mierzejewski • 0

@d312723d

Last seen 2.3 years ago

Poland

Hello everyone,

Im trying to find columns called 'eventTime' and 'eventObserved' in my TCGA-GBM dataset with RNA-Seq data and clinical data. already called function colnames(colData(tcga_data)) but unfortunately no luck with that. Does anyone know how to do it?

library(pathwayPCA)
library(TCGAbiolinks)
library(tidyverse)
library(SummarizedExperiment)
#donwload data
TCGAbiolinks:::getProjectSummary('TCGA-GBM')

#redo our query
query_TCGA = GDCquery(
  project = "TCGA-GBM",
  data.category = "Transcriptome Profiling", # parameter enforced by GDCquery
  experimental.strategy = "RNA-Seq",
  data.type = 'Gene Expression Quantification',
  workflow.type = "STAR - Counts",
  sample.type = c("Primary Tumor",'Recurrent Tumor', 'Solid Tissue Normal'))

prad_res_TCGA = getResults(query_TCGA)

GDCdownload(query = query_TCGA)
tcga_data = GDCprepare(query_TCGA)

dim(tcga_data)
colnames(colData(tcga_data))

head(assay(tcga_data)[,1:10])

table(tcga_data@colData$race)

new_data = assay(tcga_data)
#transpose new_data
new_data2 =t(new_data)

#lets choose caterogical variable 
y = data.frame(tcga_data$gender, tcga_data$race,tcga_data$sample_id, tcga_data$days_to_last_follow_up, tcga_data$vital_status, tcga_data$age_at_index)

new_frame = cbind(new_data2, y)

#Changing male to 0 and female to 1 
new_frame$tcga_data.gender <-ifelse(new_frame$tcga_data.gender=="female",1,0)
head(new_frame$tcga_data.gender)

#Changing Dead to 0 and Alive to 1 
new_frame$tcga_data.vital_status <-ifelse(new_frame$tcga_data.vital_status=="Alive",1,0)

#Converting race from character to factor
new_frame$tcga_data.race <- as.factor(new_frame$tcga_data.race)  
new_frame$tcga_data.gender <- as.factor(new_frame$tcga_data.gender)  
new_frame$tcga_data.vital_status <- as.factor(new_frame$tcga_data.vital_status)  
#Removing all NA values in our data frame 
frame2 = new_frame[complete.cases(new_frame), ]

write.csv(frame2, '/Users/mikolajmierzejewski/Library/Mobile Documents/com~apple~CloudDocs/Magisterka_studia/Magisterka_3_semestr/Statistics_and_bioinformatisc_with_R/Report2/GBM.csv')




#pathway PCA
cp_pathwayCollection <- read_gmt('c2.all.v2022.1.Hs.entrez.gmt', description = TRUE)
head(cp_pathwayCollection)

assay_df = read.csv('GBM.csv')
assay_df
#remove NA values and rows that include them 
assay_df = assay_df[complete.cases(assay_df), ]
#remove duplicates 
assay_df = assay_df[!duplicated(assay_df$tcga_data.sample_id), ]

GBM_OmicsSurv = CreateOmics(
  assayData_df = assay_df[,(1:60660)],
  pathwayCollection_ls = cp_pathwayCollection,
  response = assay_df[,60663:60665],
  respType = 'surv'
)

TCGAbiolinks TCGAbiolinksGUI.data • 621 views

ADD COMMENT • link 2.4 years ago Mikołaj Mierzejewski • 0