Hello everyone,
I have a problem with creating OMICS object in my analysis. After downloading a TCGA data for glioblastoma I have tried crating OMICS object but unfortunately I stumbled upon a problem. Downloaded data from TCGA includes access number and not symbol of the gene.
library(pathwayPCA)
library(TCGAbiolinks)
library(tidyverse)
library(SummarizedExperiment)
query_TCGA = GDCquery(
project = "TCGA-GBM",
data.category = "Transcriptome Profiling", # parameter enforced by GDCquery
experimental.strategy = "RNA-Seq",
data.type = 'Gene Expression Quantification',
workflow.type = "STAR - Counts",
sample.type = c("Primary Tumor",'Recurrent Tumor', 'Solid Tissue Normal'))
prad_res_TCGA = getResults(query_TCGA)
GDCdownload(query = query_TCGA)
tcga_data = GDCprepare(query_TCGA)
dim(tcga_data)
colnames(colData(tcga_data))
new_data = assay(tcga_data)
#transpose new_data
new_data2 =t(new_data)
#lets choose caterogical variable
y = data.frame(tcga_data$gender, tcga_data$race,tcga_data$sample_id, tcga_data$days_to_last_follow_up, tcga_data$vital_status, tcga_data$age_at_index)
new_frame = cbind(new_data2, y)
#Changing male to 0 and female to 1
new_frame$tcga_data.gender <-ifelse(new_frame$tcga_data.gender=="female",1,0)
head(new_frame$tcga_data.gender)
#Changing Dead to 0 and Alive to 1
new_frame$tcga_data.vital_status <-ifelse(new_frame$tcga_data.vital_status=="Alive",1,0)
#Converting to factor
new_frame$tcga_data.gender <- as.factor(new_frame$tcga_data.gender)
new_frame$tcga_data.vital_status <- as.factor(new_frame$tcga_data.vital_status)
#Removing all NA values in our data frame
frame2 = new_frame[complete.cases(new_frame), ]
write.csv(frame2, '/Users/mikolajmierzejewski/Library/Mobile Documents/com~apple~CloudDocs/Magisterka_studia/Magisterka_3_semestr/Statistics_and_bioinformatisc_with_R/Report2/GBM.csv')
#pathway PCA
cp_pathwayCollection <- read_gmt('c2.all.v2022.1.Hs.entrez.gmt', description = TRUE)
head(cp_pathwayCollection)
assay_df = read.csv('GBM.csv')
assay_df
#remove NA values and rows that include them
assay_df = assay_df[complete.cases(assay_df), ]
#remove duplicates
assay_df = assay_df[!duplicated(assay_df$tcga_data.sample_id), ]
GBM_OmicsSurv = CreateOmics(
assayData_df = assay_df[,(1:60660)],
pathwayCollection_ls = cp_pathwayCollection,
response = assay_df[,60663:60665],
respType = 'surv'
)
Error that has been occurring after creating OMICS object:
These genes may be excluded from analysis. Proper gene names contain alphanumeric characters only, and start with a letter. Warning in 'CheckSampleIDs (assayData_df)' command: Row names will be ignored. Sample IDs must be in the first column of the data frame.
Warning in the command 'CheckSampleIDs (respClean_df)': Row names will be ignored. Sample IDs must be in the first column of the data frame.
Error in command 'CheckSampleIDs (respClean_df)': Sample IDs must be unique. Additionally: Warning message: In the '.convertPhenoDF (response, type = respType)' command: NA values appeared due to the transformation