Deleted:Problem with finding specific columns in dataset downloaded form GDC.
0
0
Entering edit mode
@d312723d
Last seen 2.3 years ago
Poland

Hello everyone,

Im trying to find columns called 'eventTime' and 'eventObserved' in my TCGA-GBM dataset with RNA-Seq data and clinical data. already called function colnames(colData(tcga_data)) but unfortunately no luck with that. Does anyone know how to do it?

library(pathwayPCA)
library(TCGAbiolinks)
library(tidyverse)
library(SummarizedExperiment)
#donwload data
TCGAbiolinks:::getProjectSummary('TCGA-GBM')

#redo our query
query_TCGA = GDCquery(
  project = "TCGA-GBM",
  data.category = "Transcriptome Profiling", # parameter enforced by GDCquery
  experimental.strategy = "RNA-Seq",
  data.type = 'Gene Expression Quantification',
  workflow.type = "STAR - Counts",
  sample.type = c("Primary Tumor",'Recurrent Tumor', 'Solid Tissue Normal'))

prad_res_TCGA = getResults(query_TCGA)

GDCdownload(query = query_TCGA)
tcga_data = GDCprepare(query_TCGA)

dim(tcga_data)
colnames(colData(tcga_data))

head(assay(tcga_data)[,1:10])

table(tcga_data@colData$race)

new_data = assay(tcga_data)
#transpose new_data
new_data2 =t(new_data)

#lets choose caterogical variable 
y = data.frame(tcga_data$gender, tcga_data$race,tcga_data$sample_id, tcga_data$days_to_last_follow_up, tcga_data$vital_status, tcga_data$age_at_index)

new_frame = cbind(new_data2, y)

#Changing male to 0 and female to 1 
new_frame$tcga_data.gender <-ifelse(new_frame$tcga_data.gender=="female",1,0)
head(new_frame$tcga_data.gender)

#Changing Dead to 0 and Alive to 1 
new_frame$tcga_data.vital_status <-ifelse(new_frame$tcga_data.vital_status=="Alive",1,0)

#Converting race from character to factor
new_frame$tcga_data.race <- as.factor(new_frame$tcga_data.race)  
new_frame$tcga_data.gender <- as.factor(new_frame$tcga_data.gender)  
new_frame$tcga_data.vital_status <- as.factor(new_frame$tcga_data.vital_status)  
#Removing all NA values in our data frame 
frame2 = new_frame[complete.cases(new_frame), ]

write.csv(frame2, '/Users/mikolajmierzejewski/Library/Mobile Documents/com~apple~CloudDocs/Magisterka_studia/Magisterka_3_semestr/Statistics_and_bioinformatisc_with_R/Report2/GBM.csv')




#pathway PCA
cp_pathwayCollection <- read_gmt('c2.all.v2022.1.Hs.entrez.gmt', description = TRUE)
head(cp_pathwayCollection)

assay_df = read.csv('GBM.csv')
assay_df
#remove NA values and rows that include them 
assay_df = assay_df[complete.cases(assay_df), ]
#remove duplicates 
assay_df = assay_df[!duplicated(assay_df$tcga_data.sample_id), ]

GBM_OmicsSurv = CreateOmics(
  assayData_df = assay_df[,(1:60660)],
  pathwayCollection_ls = cp_pathwayCollection,
  response = assay_df[,60663:60665],
  respType = 'surv'
)
TCGAbiolinks TCGAbiolinksGUI.data • 621 views
ADD COMMENT
This thread is not open. No new answers may be added
Traffic: 883 users visited in the last hour
Help About
FAQ
Access RSS
API
Stats

Use of this site constitutes acceptance of our User Agreement and Privacy Policy.

Powered by the version 2.3.6