Error in alpine::fitBiasModels shows as "offset needs to be finite"
1
0
Entering edit mode
bioinagesh • 0
@bioinagesh-21741
Last seen 2.8 years ago
India

Hi

While implementing the fitbiasmodel, I end up with the following error. Trying to figure out the reason by running the code step by step but still unable to come to a conclusion.

Error in alpine::fitBiasModels(genes = ebtFit, bam.file = bf, fragtypes = fragtypes,  :
  offset needs to be finite

Code: This R script is part of the JCC tool https://github.com/csoneson/jcc/blob/master/R/fitAlpineBiasModel.R

 fitAlpineBiasModel <- function(gtf, bam, organism, genome, genomeVersion,
                               version, minLength = 600, maxLength = 7000,
                               minCount = 500, maxCount = 10000,
                               subsample = TRUE, nbrSubsample = 200,
                               seed = 1, minSize = NULL,
                               maxSize = NULL, verbose = FALSE) {

  ## Define the temporary directory where the ensDb object will be saved
  tmpdir <- tempdir()

  ## Create TxDb
  if (verbose) message("Creating TxDb object...")
  txdb <-
    tryCatch({
      ensembldb::ensDbFromGtf(gtf, path = tmpdir, organism = organism,
                              genomeVersion = genomeVersion, version = version)
      ensembldb::EnsDb(paste0(tmpdir, "/", gsub("gtf", "sqlite",
                                                basename(gtf))))
    }, error = function(e) {
      GenomicFeatures::makeTxDbFromGFF(gtf, format = "gtf",
                                       organism = gsub("_", " ", organism))
    })

  ## Get list of transcripts
  if (verbose) message("Getting list of transcripts...")
  if (class(txdb) == "EnsDb") {
    ## data frame format
    txdf <- GenomicFeatures::transcripts(txdb, return.type = "DataFrame")
    ## GRanges format
    txps <- GenomicFeatures::transcripts(txdb)
  } else {
    txps <- GenomicFeatures::transcripts(txdb,
                                         columns = c("tx_name", "gene_id"),
                                         use.names = TRUE)
    txps$gene_id <- unlist(txps$gene_id)
    txdf <- S4Vectors::DataFrame(tx_id = as.character(txps$tx_name),
                                 gene_id = as.character(unlist(txps$gene_id)),
                                 tx_seq_start = as.integer(start(txps)),
                                 tx_seq_end = as.integer(end(txps)),
                                 tx_name = as.character(txps$tx_name))
  }

  ## Get list of exons by transcript
  if (verbose) message("Generating list of exons by transcript...")
  if (class(txdb) == "EnsDb") {
    ebt0 <- ensembldb::exonsBy(txdb, by = "tx")
  } else {
    ebt0 <- GenomicFeatures::exonsBy(txdb, by = "tx", use.names = TRUE)
  }

  ## Select genes with a single isoform
  if (verbose) message("Selecting genes with a single isoform...")
  tab <- table(txdf$gene_id)
  oneIsoGenes <- names(tab)[tab == 1]
  if (verbose) message(paste0(length(oneIsoGenes),
                              " single-isoform genes found."))

  ## Get transcript names for genes with a single isoform
  oneIsoTxs <- txdf$tx_id[txdf$gene_id %in% oneIsoGenes]

  ## Extract transcripts from one-isoform genes for use in fitting bias model
  ebtFit <- ebt0[oneIsoTxs]
  ebtFit <- GenomeInfoDb::keepStandardChromosomes(ebtFit,
                                                  pruning.mode = "coarse")

  ## Filter short genes and long genes
  if (verbose) message("Filtering out too short and too long transcripts...")
  minBp <- minLength
  maxBp <- maxLength
  geneLengths <- sum(width(ebtFit))
  ebtFit <- ebtFit[geneLengths > minBp & geneLengths < maxBp]
  if (verbose) message(paste0(length(ebtFit), " transcripts remaining."))

  ## Read bam file
  if (verbose) message("Reading bam file...")
  bamFiles <- bam
  names(bamFiles) <- "1"

  ## Subset to medium-to-high expressed genes
  if (verbose) message("Subsetting to medium-to-highly expressed genes...")
  txpsFit <- sort(txps[names(ebtFit)])
  cts <- Rsamtools::countBam(Rsamtools::BamFile(bamFiles),
                             param = Rsamtools::ScanBamParam(which = txpsFit))
  S4Vectors::mcols(txpsFit)$cts <- cts$records
  txpsFit <- txpsFit[names(ebtFit)]
  ebtFit <- ebtFit[txpsFit$cts > minCount & txpsFit$cts < maxCount]
  if (verbose) message(paste0(length(ebtFit), " genes retained."))

  ## Subsample genes to use for the fitting of the bias model
  if (subsample) {
    if (verbose) message("Subsampling genes for fitting bias model...")
    set.seed(seed)
    ebtFit <- ebtFit[sample(length(ebtFit), min(nbrSubsample, length(ebtFit)))]
    if (verbose) message(paste0(length(ebtFit), " genes retained."))
  }

  ## Get fragment width and read length
  message("Getting fragment widths and read lengths...")
  m <- sort(which(vapply(ebtFit, length, numeric(1)) > 1))
  m0 <- 0
  w <- NULL
  while(is.null(w)) {
    w <- tryCatch({
      m0 <- m0 + 1
      alpine::getFragmentWidths(bamFiles, ebtFit[[m[m0]]])
    }, error = function(e) {
      NULL
    })
  }
  quantiles <- stats::quantile(w, c(.025, .975))
  if (is.null(minSize)) minSize <- round(quantiles[1])
  if (is.null(maxSize)) maxSize <- round(quantiles[2])
  readLength <- stats::median(alpine::getReadLength(bamFiles))

  ## Names of genes to retain
  geneNames <-  
  names(geneNames) <- geneNames

  ## Build fragment types for selected genes
  if (verbose) message("Building fragment types...")
  fragtypes <- lapply(geneNames, function(geneName) {
    alpine::buildFragtypes(exons = ebtFit[[geneName]],
                           genome = Oindica,
                           readlength = readLength,
                           minsize = minSize,
                           maxsize = 220,
                           gc.str = FALSE)
  })

  ## Define models to fit
  if (verbose) message("Fitting bias model...")
  models <- list(
    "all" = list(
      formula = "count ~ ns(gc,knots=gc.knots,Boundary.knots=gc.bk) +
      ns(relpos,knots=relpos.knots,Boundary.knots=relpos.bk) + gene",
      offset = c("fraglen", "vlmm")
    )
  )

  ## Fit bias model
  fitpar <- lapply(bamFiles, function(bf) {
    alpine::fitBiasModels(genes = ebtFit,
                          bam.file = bf,
                          fragtypes = fragtypes,
                          genome = Oindica,
                          models = models,
                          readlength = readLength,
                          minsize = minSize,
                          maxsize = 220)
  })

  list(biasModel = fitpar, exonsByTx = ebt0, transcripts = txps)
}

Thanks Nagesh

Alpine jcc • 761 views
ADD COMMENT
0
Entering edit mode
@mikelove
Last seen 1 day ago
United States

Can you provide all of your code?

ADD COMMENT
0
Entering edit mode

Hi Michael, Please check the code which I have included with in the question.

ADD REPLY
0
Entering edit mode

So the offsets are VLMM and fragment length, you can inspect fragtypes to see if there are values that would cause a problem in the relevant columns of that data.frame.

ADD REPLY

Login before adding your answer.

Traffic: 947 users visited in the last hour
Help About
FAQ
Access RSS
API
Stats

Use of this site constitutes acceptance of our User Agreement and Privacy Policy.

Powered by the version 2.3.6