Hello Community!
I am trying to understand the underlying code used by scran::clusterRows() to define single-cell clusters.
Below is the summary of the code trail I have so far (followed by the specific chunks and their links). At the 4th nested level, I am unable to find the source code for generic_find_knn() used by BiocNeighbors::findKNN().
Can anyone help where I can find the code for this function or suggest an alternative R function for it?
Many thanks for your help and advice beforehand.
Code trail [See the code chunks below for details]
L1. scran::quickCluster() uses >
L2. bluster::clusterRows() uses >
L3. bluster::makeKNNGraph() uses >
L4. BiocNeighbors::findKNN() uses >
L5. generic_find_knn() [unable to find the code for this function]
Code links:
L1. scran::quickCluster() [https://github.com/MarioniLab/scran/blob/master/R/quickCluster.R]
L2. bluster::clusterRows() [https://github.com/LTLA/bluster/blob/master/R/NNGraphParam.R]
L3. bluster::makeKNNGraph() [https://github.com/LTLA/bluster/blob/master/R/NNGraphParam.R]
L4. BiocNeighbors::findKNN() [https://github.com/knncolle/BiocNeighbors/blob/master/R/findKNN.R]
Code chunks
# L1. scran::quickCluster() [https://github.com/MarioniLab/scran/blob/master/R/quickCluster.R] > bluster::clusterRows()
# clusterRows() #' @importFrom bluster clusterRows NNGraphParam HclustParam
if (method=="igraph") {
out <- clusterRows(y, NNGraphParam(..., cluster.fun=graph.fun), full=TRUE)
clusters <- .merge_closest_graph(out$objects$graph, as.integer(out$clusters), min.size=min.size)
clusters <- factor(clusters)
} else {
clusters <- clusterRows(y, HclustParam(method="ward.D2", cut.dynamic=TRUE, cut.params=list(minClusterSize=min.size, ...)))
unassigned <- clusters=="0"
if (any(unassigned)) {
warning(paste(sum(unassigned), "cells were not assigned to any cluster"))
}
}
clusters
# L2. bluster::clusterRows() [https://github.com/LTLA/bluster/blob/master/R/NNGraphParam.R] > bluster::makeKNNGraph()
#' @export
#' @rdname NNGraphParam-class
setMethod("clusterRows", c("ANY", "KNNGraphParam"), function(x, BLUSPARAM, full=FALSE) {
g <- makeKNNGraph(
x,
k=BLUSPARAM[["k"]],
directed=BLUSPARAM[["directed"]],
BNPARAM=BLUSPARAM[["BNPARAM"]],
num.threads=BLUSPARAM[["num.threads"]]
)
.cluster_igraph(g, BLUSPARAM, full=full)
})
# L3. bluster::makeKNNGraph() [https://github.com/LTLA/bluster/blob/master/R/NNGraphParam.R] > BiocNeighbors::findKNN()
#' @export
#' @rdname makeSNNGraph
#' @importFrom BiocParallel SerialParam
#' @importFrom BiocNeighbors KmknnParam findKNN
makeKNNGraph <- function(x, k=10, directed=FALSE, BNPARAM=KmknnParam(), BPPARAM=SerialParam()) {
nn.out <- findKNN(x, k=k, BNPARAM=BNPARAM, BPPARAM=BPPARAM, get.distance=FALSE)
neighborsToKNNGraph(nn.out$index, directed=directed)
}
# L4. BiocNeighbors::findKNN() [https://github.com/knncolle/BiocNeighbors/blob/master/R/findKNN.R] > generic_find_knn() [???]
#' @export
setMethod("findKNN", c("matrix", "ANY"), function(X, k, get.index=TRUE, get.distance=TRUE, num.threads=1, subset=NULL, ..., BPPARAM=NULL, BNPARAM=NULL) {
ptr <- buildIndex(X, ..., BNPARAM=BNPARAM)
callGeneric(ptr, k=k, get.index=get.index, get.distance=get.distance, num.threads=num.threads, subset=subset, ..., BPPARAM=BPPARAM)
})
#' @export
setMethod("findKNN", c("BiocNeighborGenericIndex", "ANY"), function(X, k, get.index=TRUE, get.distance=TRUE, num.threads=1, subset=NULL, ..., BPPARAM=NULL, BNPARAM=NULL) {
if (!is.null(BPPARAM)) {
num.threads <- BiocParallel::bpnworkers(BPPARAM)
}
output <- generic_find_knn(
X@ptr,
num_neighbors=as.integer(k),
force_variable_neighbors=is(k, "AsIs"),
chosen=.integerize_subset(X, subset),
num_threads=num.threads,
last_distance_only=FALSE,
report_index=!isFALSE(get.index),
report_distance=!isFALSE(get.distance)
)
output <- .format_output(output, "index", get.index)
output <- .format_output(output, "distance", get.distance)
output
})
#' @export
setMethod("findKNN", c("missing", "ANY"), function(X, k, get.index=TRUE, get.distance=TRUE, num.threads=1, subset=NULL, ..., BNINDEX=NULL, BNPARAM=NULL) {
callGeneric(BNINDEX, k=k, get.index=get.index, get.distance=get.distance, num.threads=num.threads, subset=subset, ..., BNPARAM=BNPARAM)
})
Many thanks for the link to the C++ code. I will see if I can find someone with C++ experience to help me with the code breakdown from here.