Skip to content
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# Generated by roxygen2: do not edit by hand

export(bambu)
export(bambu.singlecell)
export(plotBambu)
export(prepareAnnotations)
export(readFromGTF)
Expand Down
6 changes: 3 additions & 3 deletions R/bambu-assignDist.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
#' @inheritParams bambu
#' @import data.table
#' @noRd
assignReadClasstoTranscripts <- function(readClassList, annotations,
rcAssignmentParameters, verbose, sampleMetadata, demultiplexed,
assignReadClasstoTranscripts <- function(readClassList, annotations, rcAssignmentParameters,
verbose, sampleMetadata, extractBarcodeUMI,
returnDistTable = FALSE, trackReads = TRUE) {
if (is.character(readClassList)) readClassList <- readRDS(file = readClassList)
metadata(readClassList)$readClassDist <- calculateDistTable(readClassList, annotations, rcAssignmentParameters, verbose, returnDistTable)
Expand All @@ -17,7 +17,7 @@ assignReadClasstoTranscripts <- function(readClassList, annotations,
mutate(aval = 1) %>%
data.table()
#return non-em counts
ColData <- generateColData(readClassList, sampleMetadata, demultiplexed)
ColData <- generateColData(readClassList, sampleMetadata, extractBarcodeUMI)
quantData <- SummarizedExperiment(assays = SimpleList(
counts = generateUniqueCounts(readClassDt, metadata(readClassList)$countMatrix, annotations)),
rowRanges = annotations,
Expand Down
2 changes: 1 addition & 1 deletion R/bambu-extendAnnotations-utilityExtend.R
Original file line number Diff line number Diff line change
Expand Up @@ -890,7 +890,7 @@ isore.extendAnnotations.clusters <- function(readClassList, annotations, cluster
for(i in seq_along(clusters)){
print(names(clusters)[i])
###TODO need to account for the sample name here which is added to the barcode
index <- match(clusters[[i]],gsub('demultiplexed','',metadata(readClassList[[1]])$samples))
index <- match(clusters[[i]],gsub('demultiplexed','',metadata(readClassList[[1]])$samples))
Comment thread
ch99l marked this conversation as resolved.
index <- index[!is.na(index)]
print(length(index))
if(length(index)<20) next
Expand Down
61 changes: 24 additions & 37 deletions R/bambu-processReads.R
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ bambu.processReads <- function(reads, annotations, genomeSequence,
readClass.outputDir=NULL, yieldSize=1000000, bpParameters,
stranded=FALSE, verbose=FALSE, discoveryParameters = setDiscoveryParameters(NULL),
processByChromosome = FALSE, processByBam = TRUE, trackReads = trackReads, fusionMode = fusionMode,
demultiplexed = FALSE, cleanReads = FALSE, dedupUMI = FALSE, sampleNames = NULL, barcodesToFilter = NULL) {
extractBarcodeUMI = FALSE, dedupUMI = FALSE) {
genomeSequence <- checkInputSequence(genomeSequence)
# ===# create BamFileList object from character #===#
if (is(reads, "BamFile")) {
Expand All @@ -40,14 +40,6 @@ bambu.processReads <- function(reads, annotations, genomeSequence,
reads <- BamFileList(reads, yieldSize = yieldSize)
names(reads) <- tools::file_path_sans_ext(BiocGenerics::basename(reads))
}
if(!is.null(sampleNames)){
if(length(sampleNames==length(reads))){
names(reads) <- sampleNames
} else{
message("Not enough provided sample names. Using them in order of inputted files and the remaining files will use the file names")
names(reads)[seq_along(sampleNames)] <- sampleNames
}
}
min.readCount <- discoveryParameters[["min.readCount"]]
fitReadClassModel <- discoveryParameters[["fitReadClassModel"]]
defaultModels <- discoveryParameters[["defaultModels"]]
Expand All @@ -62,44 +54,43 @@ bambu.processReads <- function(reads, annotations, genomeSequence,
fitReadClassModel = fitReadClassModel, min.exonOverlap = min.exonOverlap,
defaultModels = defaultModels, returnModel = returnModel, verbose = verbose,
processByChromosome = processByChromosome, trackReads = trackReads, fusionMode = fusionMode,
demultiplexed = demultiplexed, cleanReads = cleanReads, dedupUMI = dedupUMI, index = 1, barcodesToFilter = barcodesToFilter)},
extractBarcodeUMI = extractBarcodeUMI, dedupUMI = dedupUMI, index = 1)},
BPPARAM = bpParameters)
} else {
readGrgList <- bplapply(seq_along(reads), function(i) {
bambu.readsByFile(bam.file = reads[i],
genomeSequence = genomeSequence,annotations = annotations,
stranded = stranded, min.readCount = min.readCount,
fitReadClassModel = fitReadClassModel, min.exonOverlap = min.exonOverlap,
defaultModels = defaultModels, returnModel = returnModel, verbose = verbose,
trackReads = trackReads, fusionMode = fusionMode,
demultiplexed = demultiplexed, cleanReads = cleanReads, dedupUMI = dedupUMI, index = i, barcodesToFilter = barcodesToFilter)},
stranded = stranded, min.readCount = min.readCount,
fitReadClassModel = fitReadClassModel, min.exonOverlap = min.exonOverlap,
defaultModels = defaultModels, returnModel = returnModel, verbose = verbose,
trackReads = trackReads, fusionMode = fusionMode,
extractBarcodeUMI = extractBarcodeUMI, dedupUMI = dedupUMI, index = i)},
BPPARAM = bpParameters)
sampleNames <- as.numeric(as.factor(sampleNames))
for(i in seq_along(readGrgList)){
if(!isFALSE(demultiplexed)){
if(extractBarcodeUMI){
mcols(readGrgList[[i]])$CB <- paste0(names(reads)[i], '_', mcols(readGrgList[[i]])$CB)
} else{
mcols(readGrgList[[i]])$CB <- sampleNames[i]
mcols(readGrgList[[i]])$CB <- names(reads)[i]
}

mcols(readGrgList[[i]])$CB <- as.factor(mcols(readGrgList[[i]])$CB)

}
readGrgList <- do.call(c, readGrgList)
mcols(readGrgList)$id <- seq_along(readGrgList)
if(!isFALSE(demultiplexed)){
if(extractBarcodeUMI){
mcols(readGrgList)$sampleID <- as.numeric(mcols(readGrgList)$CB)
} else {
mcols(readGrgList)$sampleID <- i
Comment thread
ch99l marked this conversation as resolved.
}
Comment thread
ch99l marked this conversation as resolved.
readClassList <- constructReadClasses(readGrgList, genomeSequence = genomeSequence,annotations = annotations,
stranded = stranded, min.readCount = min.readCount,
fitReadClassModel = fitReadClassModel, min.exonOverlap = min.exonOverlap,
defaultModels = defaultModels, returnModel = returnModel, verbose = verbose,
stranded = stranded, min.readCount = min.readCount,
fitReadClassModel = fitReadClassModel, min.exonOverlap = min.exonOverlap,
defaultModels = defaultModels, returnModel = returnModel, verbose = verbose,
processByChromosome = processByChromosome, trackReads = trackReads, fusionMode = fusionMode)
metadata(readClassList)$samples <- names(reads)
metadata(readClassList)$sampleNames <- names(reads)
if(!isFALSE(demultiplexed)) metadata(readClassList)$samples <- levels(mcols(readGrgList)$CB)
if(extractBarcodeUMI) metadata(readClassList)$samples <- levels(mcols(readGrgList)$CB)
readClassList <- list(readClassList)
}

Expand All @@ -124,14 +115,12 @@ bambu.processReads <- function(reads, annotations, genomeSequence,
bambu.processReadsByFile <- function(bam.file, genomeSequence, annotations,
yieldSize = NULL, stranded = FALSE, min.readCount = 2,
fitReadClassModel = TRUE, min.exonOverlap = 10, defaultModels = NULL, returnModel = FALSE,
verbose = FALSE, processByChromosome = FALSE, trackReads = FALSE, fusionMode = FALSE, demultiplexed = FALSE,
cleanReads = FALSE, dedupUMI = FALSE, index = 0, barcodesToFilter = NULL) {
verbose = FALSE, processByChromosome = FALSE, trackReads = FALSE, fusionMode = FALSE,
extractBarcodeUMI = FALSE, dedupUMI = FALSE, index = 0) {
if(verbose) message(names(bam.file)[1])
readGrgList <- prepareDataFromBam(bam.file[[1]], verbose = verbose, yieldSize = yieldSize, use.names = trackReads, demultiplexed = demultiplexed, cleanReads = cleanReads, dedupUMI = dedupUMI)
readGrgList <- prepareDataFromBam(bam.file[[1]], verbose = verbose, yieldSize = yieldSize, use.names = trackReads, extractBarcodeUMI = extractBarcodeUMI, dedupUMI = dedupUMI)
if(verbose) message(paste0("Number of alignments/reads: ",length(readGrgList)))
warnings <- c()
if(!is.null(barcodesToFilter) & !isFALSE(demultiplexed))
readGrgList <- readGrgList[!(mcols(readGrgList)$CB %in% barcodesToFilter)]
warnings <- seqlevelCheckReadsAnnotation(readGrgList, annotations)
if(verbose & length(warnings) > 0) warning(paste(warnings,collapse = "\n"))
#check seqlevels for consistency, drop ranges not present in genomeSequence
Expand Down Expand Up @@ -173,7 +162,7 @@ bambu.processReadsByFile <- function(bam.file, genomeSequence, annotations,

mcols(readGrgList)$id <- seq_along(readGrgList)

if(!isFALSE(demultiplexed)){
if(extractBarcodeUMI){
mcols(readGrgList)$sampleID <- as.numeric(mcols(readGrgList)$CB)
} else {
mcols(readGrgList)$sampleID <- index
Expand Down Expand Up @@ -210,7 +199,7 @@ bambu.processReadsByFile <- function(bam.file, genomeSequence, annotations,
fusionMode = fusionMode,
verbose = verbose)

if (demultiplexed) {
if (extractBarcodeUMI) {
barcodes <- levels(mcols(readGrgList)$CB)
metadata(se)$sampleData <- tibble(
id = paste(names(bam.file)[1], barcodes, sep = '_'),
Expand All @@ -234,12 +223,10 @@ bambu.processReadsByFile <- function(bam.file, genomeSequence, annotations,
bambu.readsByFile <- function(bam.file, genomeSequence, annotations,
yieldSize = NULL, stranded = FALSE, min.readCount = 2,
fitReadClassModel = TRUE, min.exonOverlap = 10, defaultModels = NULL, returnModel = FALSE,
verbose = FALSE, trackReads = FALSE, fusionMode = FALSE, demultiplexed = FALSE,
cleanReads = TRUE, dedupUMI = FALSE, index = 0, barcodesToFilter = NULL) {
readGrgList <- prepareDataFromBam(bam.file[[1]], verbose = verbose, yieldSize = yieldSize, use.names = trackReads, demultiplexed = demultiplexed, cleanReads = cleanReads, dedupUMI = dedupUMI)

if(!is.null(barcodesToFilter) & !isFALSE(demultiplexed)) readGrgList <- readGrgList[!mcols(readGrgList)$CB %in% barcodesToFilter]

verbose = FALSE, trackReads = FALSE, fusionMode = FALSE,
extractBarcodeUMI = FALSE, dedupUMI = FALSE, index = 0) {
readGrgList <- prepareDataFromBam(bam.file[[1]], verbose = verbose, yieldSize = yieldSize, use.names = trackReads, extractBarcodeUMI = extractBarcodeUMI, dedupUMI = dedupUMI)

if(verbose) message("Number of alignments/reads: ",length(readGrgList))

warnings <- c()
Expand Down Expand Up @@ -290,7 +277,7 @@ bambu.readsByFile <- function(bam.file, genomeSequence, annotations,
stop("No reads left after filtering.")

## add ###
#if (isTRUE(demultiplexed)){
#if (extractBarcodeUMI){
Comment thread
ch99l marked this conversation as resolved.
# cellBarcodeAssign <- tibble(index = mcols(readGrgList)$id, CB = mcols(readGrgList)$CB) %>% nest(.by = "CB")

# if (!dir.exists("CB")){
Expand Down
Loading
Loading