Skip to content

Commit 6dcb1d5

Browse files
authored
Merge pull request #561 from GoekeLab/sc_spatial_wrapper
Single cell wrapper and removal of barcodesToFilter, sampleName, cleanReads
2 parents d2db984 + 462dd28 commit 6dcb1d5

File tree

9 files changed

+147
-189
lines changed

9 files changed

+147
-189
lines changed

NAMESPACE

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
# Generated by roxygen2: do not edit by hand
22

33
export(bambu)
4+
export(bambu.singlecell)
45
export(plotBambu)
56
export(prepareAnnotations)
67
export(readFromGTF)

R/bambu-assignDist.R

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,8 @@
22
#' @inheritParams bambu
33
#' @import data.table
44
#' @noRd
5-
assignReadClasstoTranscripts <- function(readClassList, annotations,
6-
rcAssignmentParameters, verbose, sampleMetadata, demultiplexed,
5+
assignReadClasstoTranscripts <- function(readClassList, annotations, rcAssignmentParameters,
6+
verbose, sampleMetadata, extractBarcodeUMI,
77
returnDistTable = FALSE, trackReads = TRUE) {
88
if (is.character(readClassList)) readClassList <- readRDS(file = readClassList)
99
metadata(readClassList)$readClassDist <- calculateDistTable(readClassList, annotations, rcAssignmentParameters, verbose, returnDistTable)
@@ -17,7 +17,7 @@ assignReadClasstoTranscripts <- function(readClassList, annotations,
1717
mutate(aval = 1) %>%
1818
data.table()
1919
#return non-em counts
20-
ColData <- generateColData(readClassList, sampleMetadata, demultiplexed)
20+
ColData <- generateColData(readClassList, sampleMetadata, extractBarcodeUMI)
2121
quantData <- SummarizedExperiment(assays = SimpleList(
2222
counts = generateUniqueCounts(readClassDt, metadata(readClassList)$countMatrix, annotations)),
2323
rowRanges = annotations,

R/bambu-extendAnnotations-utilityExtend.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -890,7 +890,7 @@ isore.extendAnnotations.clusters <- function(readClassList, annotations, cluster
890890
for(i in seq_along(clusters)){
891891
print(names(clusters)[i])
892892
###TODO need to account for the sample name here which is added to the barcode
893-
index <- match(clusters[[i]],gsub('demultiplexed','',metadata(readClassList[[1]])$samples))
893+
index <- match(clusters[[i]],gsub('demultiplexed','',metadata(readClassList[[1]])$samples))
894894
index <- index[!is.na(index)]
895895
print(length(index))
896896
if(length(index)<20) next

R/bambu-processReads.R

Lines changed: 24 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ bambu.processReads <- function(reads, annotations, genomeSequence,
1616
readClass.outputDir=NULL, yieldSize=1000000, bpParameters,
1717
stranded=FALSE, verbose=FALSE, discoveryParameters = setDiscoveryParameters(NULL),
1818
processByChromosome = FALSE, processByBam = TRUE, trackReads = trackReads, fusionMode = fusionMode,
19-
demultiplexed = FALSE, cleanReads = FALSE, dedupUMI = FALSE, sampleNames = NULL, barcodesToFilter = NULL) {
19+
extractBarcodeUMI = FALSE, dedupUMI = FALSE) {
2020
genomeSequence <- checkInputSequence(genomeSequence)
2121
# ===# create BamFileList object from character #===#
2222
if (is(reads, "BamFile")) {
@@ -40,14 +40,6 @@ bambu.processReads <- function(reads, annotations, genomeSequence,
4040
reads <- BamFileList(reads, yieldSize = yieldSize)
4141
names(reads) <- tools::file_path_sans_ext(BiocGenerics::basename(reads))
4242
}
43-
if(!is.null(sampleNames)){
44-
if(length(sampleNames==length(reads))){
45-
names(reads) <- sampleNames
46-
} else{
47-
message("Not enough provided sample names. Using them in order of inputted files and the remaining files will use the file names")
48-
names(reads)[seq_along(sampleNames)] <- sampleNames
49-
}
50-
}
5143
min.readCount <- discoveryParameters[["min.readCount"]]
5244
fitReadClassModel <- discoveryParameters[["fitReadClassModel"]]
5345
defaultModels <- discoveryParameters[["defaultModels"]]
@@ -62,44 +54,43 @@ bambu.processReads <- function(reads, annotations, genomeSequence,
6254
fitReadClassModel = fitReadClassModel, min.exonOverlap = min.exonOverlap,
6355
defaultModels = defaultModels, returnModel = returnModel, verbose = verbose,
6456
processByChromosome = processByChromosome, trackReads = trackReads, fusionMode = fusionMode,
65-
demultiplexed = demultiplexed, cleanReads = cleanReads, dedupUMI = dedupUMI, index = 1, barcodesToFilter = barcodesToFilter)},
57+
extractBarcodeUMI = extractBarcodeUMI, dedupUMI = dedupUMI, index = 1)},
6658
BPPARAM = bpParameters)
6759
} else {
6860
readGrgList <- bplapply(seq_along(reads), function(i) {
6961
bambu.readsByFile(bam.file = reads[i],
7062
genomeSequence = genomeSequence,annotations = annotations,
71-
stranded = stranded, min.readCount = min.readCount,
72-
fitReadClassModel = fitReadClassModel, min.exonOverlap = min.exonOverlap,
73-
defaultModels = defaultModels, returnModel = returnModel, verbose = verbose,
74-
trackReads = trackReads, fusionMode = fusionMode,
75-
demultiplexed = demultiplexed, cleanReads = cleanReads, dedupUMI = dedupUMI, index = i, barcodesToFilter = barcodesToFilter)},
63+
stranded = stranded, min.readCount = min.readCount,
64+
fitReadClassModel = fitReadClassModel, min.exonOverlap = min.exonOverlap,
65+
defaultModels = defaultModels, returnModel = returnModel, verbose = verbose,
66+
trackReads = trackReads, fusionMode = fusionMode,
67+
extractBarcodeUMI = extractBarcodeUMI, dedupUMI = dedupUMI, index = i)},
7668
BPPARAM = bpParameters)
77-
sampleNames <- as.numeric(as.factor(sampleNames))
7869
for(i in seq_along(readGrgList)){
79-
if(!isFALSE(demultiplexed)){
70+
if(extractBarcodeUMI){
8071
mcols(readGrgList[[i]])$CB <- paste0(names(reads)[i], '_', mcols(readGrgList[[i]])$CB)
8172
} else{
82-
mcols(readGrgList[[i]])$CB <- sampleNames[i]
73+
mcols(readGrgList[[i]])$CB <- names(reads)[i]
8374
}
8475

8576
mcols(readGrgList[[i]])$CB <- as.factor(mcols(readGrgList[[i]])$CB)
8677

8778
}
8879
readGrgList <- do.call(c, readGrgList)
8980
mcols(readGrgList)$id <- seq_along(readGrgList)
90-
if(!isFALSE(demultiplexed)){
81+
if(extractBarcodeUMI){
9182
mcols(readGrgList)$sampleID <- as.numeric(mcols(readGrgList)$CB)
9283
} else {
9384
mcols(readGrgList)$sampleID <- i
9485
}
9586
readClassList <- constructReadClasses(readGrgList, genomeSequence = genomeSequence,annotations = annotations,
96-
stranded = stranded, min.readCount = min.readCount,
97-
fitReadClassModel = fitReadClassModel, min.exonOverlap = min.exonOverlap,
98-
defaultModels = defaultModels, returnModel = returnModel, verbose = verbose,
87+
stranded = stranded, min.readCount = min.readCount,
88+
fitReadClassModel = fitReadClassModel, min.exonOverlap = min.exonOverlap,
89+
defaultModels = defaultModels, returnModel = returnModel, verbose = verbose,
9990
processByChromosome = processByChromosome, trackReads = trackReads, fusionMode = fusionMode)
10091
metadata(readClassList)$samples <- names(reads)
10192
metadata(readClassList)$sampleNames <- names(reads)
102-
if(!isFALSE(demultiplexed)) metadata(readClassList)$samples <- levels(mcols(readGrgList)$CB)
93+
if(extractBarcodeUMI) metadata(readClassList)$samples <- levels(mcols(readGrgList)$CB)
10394
readClassList <- list(readClassList)
10495
}
10596

@@ -124,14 +115,12 @@ bambu.processReads <- function(reads, annotations, genomeSequence,
124115
bambu.processReadsByFile <- function(bam.file, genomeSequence, annotations,
125116
yieldSize = NULL, stranded = FALSE, min.readCount = 2,
126117
fitReadClassModel = TRUE, min.exonOverlap = 10, defaultModels = NULL, returnModel = FALSE,
127-
verbose = FALSE, processByChromosome = FALSE, trackReads = FALSE, fusionMode = FALSE, demultiplexed = FALSE,
128-
cleanReads = FALSE, dedupUMI = FALSE, index = 0, barcodesToFilter = NULL) {
118+
verbose = FALSE, processByChromosome = FALSE, trackReads = FALSE, fusionMode = FALSE,
119+
extractBarcodeUMI = FALSE, dedupUMI = FALSE, index = 0) {
129120
if(verbose) message(names(bam.file)[1])
130-
readGrgList <- prepareDataFromBam(bam.file[[1]], verbose = verbose, yieldSize = yieldSize, use.names = trackReads, demultiplexed = demultiplexed, cleanReads = cleanReads, dedupUMI = dedupUMI)
121+
readGrgList <- prepareDataFromBam(bam.file[[1]], verbose = verbose, yieldSize = yieldSize, use.names = trackReads, extractBarcodeUMI = extractBarcodeUMI, dedupUMI = dedupUMI)
131122
if(verbose) message(paste0("Number of alignments/reads: ",length(readGrgList)))
132123
warnings <- c()
133-
if(!is.null(barcodesToFilter) & !isFALSE(demultiplexed))
134-
readGrgList <- readGrgList[!(mcols(readGrgList)$CB %in% barcodesToFilter)]
135124
warnings <- seqlevelCheckReadsAnnotation(readGrgList, annotations)
136125
if(verbose & length(warnings) > 0) warning(paste(warnings,collapse = "\n"))
137126
#check seqlevels for consistency, drop ranges not present in genomeSequence
@@ -173,7 +162,7 @@ bambu.processReadsByFile <- function(bam.file, genomeSequence, annotations,
173162

174163
mcols(readGrgList)$id <- seq_along(readGrgList)
175164

176-
if(!isFALSE(demultiplexed)){
165+
if(extractBarcodeUMI){
177166
mcols(readGrgList)$sampleID <- as.numeric(mcols(readGrgList)$CB)
178167
} else {
179168
mcols(readGrgList)$sampleID <- index
@@ -210,7 +199,7 @@ bambu.processReadsByFile <- function(bam.file, genomeSequence, annotations,
210199
fusionMode = fusionMode,
211200
verbose = verbose)
212201

213-
if (demultiplexed) {
202+
if (extractBarcodeUMI) {
214203
barcodes <- levels(mcols(readGrgList)$CB)
215204
metadata(se)$sampleData <- tibble(
216205
id = paste(names(bam.file)[1], barcodes, sep = '_'),
@@ -234,12 +223,10 @@ bambu.processReadsByFile <- function(bam.file, genomeSequence, annotations,
234223
bambu.readsByFile <- function(bam.file, genomeSequence, annotations,
235224
yieldSize = NULL, stranded = FALSE, min.readCount = 2,
236225
fitReadClassModel = TRUE, min.exonOverlap = 10, defaultModels = NULL, returnModel = FALSE,
237-
verbose = FALSE, trackReads = FALSE, fusionMode = FALSE, demultiplexed = FALSE,
238-
cleanReads = TRUE, dedupUMI = FALSE, index = 0, barcodesToFilter = NULL) {
239-
readGrgList <- prepareDataFromBam(bam.file[[1]], verbose = verbose, yieldSize = yieldSize, use.names = trackReads, demultiplexed = demultiplexed, cleanReads = cleanReads, dedupUMI = dedupUMI)
240-
241-
if(!is.null(barcodesToFilter) & !isFALSE(demultiplexed)) readGrgList <- readGrgList[!mcols(readGrgList)$CB %in% barcodesToFilter]
242-
226+
verbose = FALSE, trackReads = FALSE, fusionMode = FALSE,
227+
extractBarcodeUMI = FALSE, dedupUMI = FALSE, index = 0) {
228+
readGrgList <- prepareDataFromBam(bam.file[[1]], verbose = verbose, yieldSize = yieldSize, use.names = trackReads, extractBarcodeUMI = extractBarcodeUMI, dedupUMI = dedupUMI)
229+
243230
if(verbose) message("Number of alignments/reads: ",length(readGrgList))
244231

245232
warnings <- c()
@@ -290,7 +277,7 @@ bambu.readsByFile <- function(bam.file, genomeSequence, annotations,
290277
stop("No reads left after filtering.")
291278

292279
## add ###
293-
#if (isTRUE(demultiplexed)){
280+
#if (extractBarcodeUMI){
294281
# cellBarcodeAssign <- tibble(index = mcols(readGrgList)$id, CB = mcols(readGrgList)$CB) %>% nest(.by = "CB")
295282

296283
# if (!dir.exists("CB")){

0 commit comments

Comments
 (0)