hichip fragments extraction chip seq
0
0
Entering edit mode
@9ae4560e
Last seen 5 weeks ago
France

hello, I'm trying to extract the hichip fragments that interacts with my chip peak, So I did an overlapping between my chip peak and hichip(annotated hichip). The hichip are composed from 2 fragments as they identify the interactions between 2 long genomic ranges. Now the overlapping gives me in subjectHits the fragments that overlaps with my chip peak, that could be either frg1 or frg2, and so I want to see if its frg1 what's the frg2 that it interacts with it and if its frg2 what is the frg1 that it interacts with it , this is what I did so far, but im having difficulties to get these interactions. Does anyone have any idea? I thought about getting the Ids of fragments and find it in frg1 and frg2 object but it didn't help.

load("genomic_objects_hg38 (1).RData")
# Load HiChIP data
hichip_file <- ".../GSE222206_HiChIP_Mock.bedpe.gz"
hichip <- read.delim(hichip_file, header = FALSE)
colnames(hichip)[1:6] <- c("frg1", "start1", "end1", "frg2", "start2", "end2")

# Convert to GRanges
frg1 <- GRanges(seqnames = Rle(hichip$frg1), ranges = IRanges(start = hichip$start1, end = hichip$end1))
frg2 <- GRanges(seqnames = Rle(hichip$frg2), ranges = IRanges(start = hichip$start2, end = hichip$end2))
hichip <- c(frg1, frg2)
hichip <- unique(hichip)

# hichip annotation 
# Initialize columns for multiple annotations
mcols(hichip)$all_genes <- character(length(hichip))
mcols(hichip)$TSS <- character(length(hichip))
mcols(hichip)$promoter <- character(length(hichip))
mcols(hichip)$all_genes_status <- character(length(hichip))
mcols(hichip)$TSS_status <- character(length(hichip))
mcols(hichip)$promoter_status <- character(length(hichip))
mcols(hichip)$genomic_location <- character(length(hichip))

# Overlap with all genes
ovl1 <- findOverlaps(hichip, all.genes)
ovl1_df <- as.data.frame(ovl1)
all_genes_by_fragment <- tapply(all.genes$SYMBOL[ovl1_df$subjectHits], ovl1_df$queryHits, paste, collapse = ", ")
all_genes_status_by_fragment <- tapply(all.genes$in.microarray[ovl1_df$subjectHits], ovl1_df$queryHits, paste, collapse = ", ")
mcols(hichip)$all_genes[as.numeric(names(all_genes_by_fragment))] <- all_genes_by_fragment # ou  ovl_1_df$queryhits 
mcols(hichip)$all_genes_status[as.numeric(names(all_genes_status_by_fragment))] <- all_genes_status_by_fragment

# Overlap with TSS
ovl2 <- findOverlaps(hichip, TSS)
ovl2_df <- as.data.frame(ovl2)
tss_by_fragment <- tapply(TSS$SYMBOL[ovl2_df$subjectHits], ovl2_df$queryHits, paste, collapse = ", ")
tss_status_by_fragment <- tapply(TSS$in.microarray[ovl2_df$subjectHits], ovl2_df$queryHits, paste, collapse = ", ")
mcols(hichip)$TSS[as.numeric(names(tss_by_fragment))] <- tss_by_fragment
mcols(hichip)$TSS_status[as.numeric(names(tss_status_by_fragment))] <- tss_status_by_fragment

# Overlap with promoters
ovl3 <- findOverlaps(hichip, prom)
ovl3_df <- as.data.frame(ovl3)
promoters_by_fragment <- tapply(prom$SYMBOL[ovl3_df$subjectHits], ovl3_df$queryHits, paste, collapse = ", ")
promoters_status_by_fragment <- tapply(prom$in.microarray[ovl3_df$subjectHits], ovl3_df$queryHits, paste, collapse = ", ")
mcols(hichip)$promoter[as.numeric(names(promoters_by_fragment))] <- promoters_by_fragment
mcols(hichip)$promoter_status[as.numeric(names(promoters_status_by_fragment))] <- promoters_status_by_fragment

# Summary column
mcols(hichip)$summary <- ifelse(
  mcols(hichip)$TSS != "", 
  mcols(hichip)$TSS,  
  ifelse(
    mcols(hichip)$promoter != "", 
    mcols(hichip)$promoter,  
    ifelse(
      mcols(hichip)$all_genes != "",
      mcols(hichip)$all_genes, 
      "intergenic" 
    )
  )
)

# Summary status column
mcols(hichip)$summary_status <- ifelse(
  mcols(hichip)$TSS != "", 
  mcols(hichip)$TSS_status,  
  ifelse(
    mcols(hichip)$promoter != "", 
    mcols(hichip)$promoter_status,  
    ifelse(
      mcols(hichip)$all_genes != "",
      mcols(hichip)$all_genes_status,  
      NA  
    )
  )
)

mcols(hichip)$genomic_location <- ifelse(
  !is.na(mcols(hichip)$TSS) & mcols(hichip)$TSS != "", 
  "TSS",
  ifelse(
    !is.na(mcols(hichip)$promoter) & mcols(hichip)$promoter != "", 
    "promoter",
    ifelse(
      !is.na(mcols(hichip)$all_genes) & mcols(hichip)$all_genes != "",
      "gene_body",
      "intergenic"
    )
  )
)

chipseq_file <- ".../S1_Invi_fdr.bed"
S1 <- read.delim(chipseq_file, header = FALSE)
colnames(S1)[1:3] <- c("seqnames", "start", "end")
S1 <- GRanges(seqnames = Rle(S1$seqnames), ranges = IRanges(start = S1$start, end = S1$end))
ovl_s1 <- findOverlaps(S1, hichip)
ovl_s1_df <- as.data.frame(ovl_s1)

#IDs from overlaps
query_hits <- ovl_s1_df$queryHits
subject_hits <- ovl_s1_df$subjectHits

fragments <- hichip[subject_hits]
peaks <- hichip[query_hits]
sessionInfo()
R version 4.1.1 (2021-08-10)
Platform: x86_64-conda-linux-gnu (64-bit)
Running under: Ubuntu 20.04.6 LTS

Matrix products: default
BLAS/LAPACK: /shared/ifbstor1/software/miniconda/envs/r-4.1.1/lib/libopenblasp-r0.3.18.so

locale:
 [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C               LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8     LC_MONETARY=en_US.UTF-8   
 [6] LC_MESSAGES=en_US.UTF-8    LC_PAPER=en_US.UTF-8       LC_NAME=C                  LC_ADDRESS=C               LC_TELEPHONE=C            
[11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       

attached base packages:
[1] grid      stats4    stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
 [1] data.table_1.14.2                        reshape2_1.4.4                           TxDb.Hsapiens.UCSC.hg19.knownGene_3.2.2 
 [4] ChIPseeker_1.30.3                        ChIPpeakAnno_3.28.0                      msigdbr_7.4.1                           
 [7] ggupset_0.3.0                            DOSE_3.20.1                              RColorBrewer_1.1-3                      
[10] fgsea_1.20.0                             org.Hs.eg.db_3.14.0                      enrichplot_1.14.2                       
[13] clusterProfiler_4.2.2                    pheatmap_1.0.12                          VennDiagram_1.7.3                       
[16] futile.logger_1.4.3                      enrichR_3.2                              rtracklayer_1.54.0                      
[19] TxDb.Hsapiens.UCSC.hg38.knownGene_3.14.0 GenomicFeatures_1.46.1                   AnnotationDbi_1.56.1                    
[22] DiffBind_3.4.11                          BiocParallel_1.28.3                      BiocManager_1.30.23                     
[25] openxlsx_4.2.6.1                         readxl_1.4.3                             EnhancedVolcano_1.13.2                  
[28] ggrepel_0.9.5                            magrittr_2.0.3                           lubridate_1.9.3                         
[31] forcats_1.0.0                            stringr_1.5.1                            dplyr_1.1.4                             
[34] purrr_1.0.2                              readr_2.1.5                              tidyr_1.3.1                             
[37] tibble_3.2.1                             tidyverse_2.0.0                          DESeq2_1.34.0                           
[40] SummarizedExperiment_1.24.0              MatrixGenerics_1.6.0                     matrixStats_0.62.0                      
[43] GenomicRanges_1.46.1                     GenomeInfoDb_1.30.1                      IRanges_2.28.0                          
[46] S4Vectors_0.32.4                         Biobase_2.54.0                           BiocGenerics_0.40.0                     
[49] limma_3.50.3                             ggplot2_3.5.1                           

loaded via a namespace (and not attached):
  [1] utf8_1.2.2               tidyselect_1.2.1         RSQLite_2.2.8            htmlwidgets_1.5.4        scatterpie_0.1.7        
  [6] munsell_0.5.0            systemPipeR_2.0.4        withr_2.5.0              colorspace_2.0-3         GOSemSim_2.20.0         
 [11] filelock_1.0.2           rstudioapi_0.16.0        bbmle_1.0.24             GenomeInfoDbData_1.2.7   mixsqp_0.3-43           
 [16] hwriter_1.3.2            polyclip_1.10-0          bit64_4.0.5              farver_2.1.1             downloader_0.4          
 [21] treeio_1.18.1            coda_0.19-4              vctrs_0.6.5              generics_0.1.3           lambda.r_1.2.4          
 [26] timechange_0.3.0         BiocFileCache_2.2.0      regioneR_1.26.0          R6_2.5.1                 apeglm_1.16.0           
 [31] graphlayouts_0.8.0       invgamma_1.1             locfit_1.5-9.4           AnnotationFilter_1.18.0  bitops_1.0-7            
 [36] cachem_1.0.6             gridGraphics_0.5-1       DelayedArray_0.20.0      assertthat_0.2.1         BiocIO_1.4.0            
 [41] scales_1.3.0             ggraph_2.0.5             gtable_0.3.1             ensembldb_2.18.2         tidygraph_1.2.0         
 [46] rlang_1.1.4              genefilter_1.76.0        splines_4.1.1            lazyeval_0.2.2           yaml_2.3.6              
 [51] qvalue_2.26.0            RBGL_1.70.0              tools_4.1.1              ggplotify_0.1.0          gplots_3.1.3.1          
 [56] Rcpp_1.0.9               plyr_1.8.7               progress_1.2.2           zlibbioc_1.40.0          RCurl_1.98-1.9          
 [61] prettyunits_1.1.1        viridis_0.6.2            ashr_2.2-47              futile.options_1.0.1     DO.db_2.9               
 [66] truncnorm_1.0-8          mvtnorm_1.1-3            SQUAREM_2021.1           amap_0.8-19              ProtGenerics_1.26.0     
 [71] patchwork_1.1.1          hms_1.1.3                xtable_1.8-4             XML_3.99-0.9             emdbook_1.3.12          
 [76] jpeg_0.1-9               gridExtra_2.3            compiler_4.1.1           biomaRt_2.50.0           bdsmatrix_1.3-7         
 [81] shadowtext_0.0.9         KernSmooth_2.23-20       crayon_1.5.2             htmltools_0.5.2          ggfun_0.0.4             
 [86] tzdb_0.2.0               geneplotter_1.72.0       aplot_0.1.1              DBI_1.1.2                tweenr_1.0.2            
 [91] formatR_1.11             WriteXLS_6.7.0           dbplyr_2.1.1             MASS_7.3-54              rappdirs_0.3.3          
 [96] boot_1.3-28              babelgene_21.4           ShortRead_1.52.0         Matrix_1.3-4             cli_3.6.3               
[101] parallel_4.1.1           igraph_2.0.3             pkgconfig_2.0.3          GenomicAlignments_1.30.0 numDeriv_2016.8-1.1     
[106] InteractionSet_1.22.0    xml2_1.3.3               ggtree_3.2.1             annotate_1.72.0          multtest_2.50.0         
[111] XVector_0.34.0           yulab.utils_0.0.4        digest_0.6.30            graph_1.72.0             Biostrings_2.62.0       
[116] cellranger_1.1.0         fastmatch_1.1-3          tidytree_0.3.6           restfulr_0.0.13          GreyListChIP_1.26.0     
[121] curl_4.3.3               Rsamtools_2.10.0         gtools_3.9.3             rjson_0.2.20             jsonlite_1.8.8          
[126] nlme_3.1-153             lifecycle_1.0.3          viridisLite_0.4.1        BSgenome_1.62.0          fansi_1.0.3             
[131] pillar_1.9.0             lattice_0.20-45          plotrix_3.8-2            KEGGREST_1.34.0          fastmap_1.1.0           
[136] httr_1.4.4               survival_3.2-13          GO.db_3.14.0             glue_1.6.2               zip_2.2.0               
[141] png_0.1-7                bit_4.0.4                ggforce_0.3.3            stringi_1.7.8            blob_1.2.2              
[146] latticeExtra_0.6-29      caTools_1.18.2           memoise_2.0.1            ape_5.5                  irlba_2.3.5.1
ChIPSeqData interact GenomicRanges HiC • 271 views
ADD COMMENT

Login before adding your answer.

Traffic: 620 users visited in the last hour
Help About
FAQ
Access RSS
API
Stats

Use of this site constitutes acceptance of our User Agreement and Privacy Policy.

Powered by the version 2.3.6