@@ -16,7 +16,7 @@ bambu.processReads <- function(reads, annotations, genomeSequence,
1616 readClass.outputDir = NULL , yieldSize = 1000000 , bpParameters ,
1717 stranded = FALSE , verbose = FALSE , discoveryParameters = setDiscoveryParameters(NULL ),
1818 processByChromosome = FALSE , processByBam = TRUE , trackReads = trackReads , fusionMode = fusionMode ,
19- demultiplexed = FALSE , cleanReads = FALSE , dedupUMI = FALSE , sampleNames = NULL , barcodesToFilter = NULL ) {
19+ extractBarcodeUMI = FALSE , dedupUMI = FALSE ) {
2020 genomeSequence <- checkInputSequence(genomeSequence )
2121 # ===# create BamFileList object from character #===#
2222 if (is(reads , " BamFile" )) {
@@ -40,14 +40,6 @@ bambu.processReads <- function(reads, annotations, genomeSequence,
4040 reads <- BamFileList(reads , yieldSize = yieldSize )
4141 names(reads ) <- tools :: file_path_sans_ext(BiocGenerics :: basename(reads ))
4242 }
43- if (! is.null(sampleNames )){
44- if (length(sampleNames == length(reads ))){
45- names(reads ) <- sampleNames
46- } else {
47- message(" Not enough provided sample names. Using them in order of inputted files and the remaining files will use the file names" )
48- names(reads )[seq_along(sampleNames )] <- sampleNames
49- }
50- }
5143 min.readCount <- discoveryParameters [[" min.readCount" ]]
5244 fitReadClassModel <- discoveryParameters [[" fitReadClassModel" ]]
5345 defaultModels <- discoveryParameters [[" defaultModels" ]]
@@ -62,44 +54,43 @@ bambu.processReads <- function(reads, annotations, genomeSequence,
6254 fitReadClassModel = fitReadClassModel , min.exonOverlap = min.exonOverlap ,
6355 defaultModels = defaultModels , returnModel = returnModel , verbose = verbose ,
6456 processByChromosome = processByChromosome , trackReads = trackReads , fusionMode = fusionMode ,
65- demultiplexed = demultiplexed , cleanReads = cleanReads , dedupUMI = dedupUMI , index = 1 , barcodesToFilter = barcodesToFilter )},
57+ extractBarcodeUMI = extractBarcodeUMI , dedupUMI = dedupUMI , index = 1 )},
6658 BPPARAM = bpParameters )
6759 } else {
6860 readGrgList <- bplapply(seq_along(reads ), function (i ) {
6961 bambu.readsByFile(bam.file = reads [i ],
7062 genomeSequence = genomeSequence ,annotations = annotations ,
71- stranded = stranded , min.readCount = min.readCount ,
72- fitReadClassModel = fitReadClassModel , min.exonOverlap = min.exonOverlap ,
73- defaultModels = defaultModels , returnModel = returnModel , verbose = verbose ,
74- trackReads = trackReads , fusionMode = fusionMode ,
75- demultiplexed = demultiplexed , cleanReads = cleanReads , dedupUMI = dedupUMI , index = i , barcodesToFilter = barcodesToFilter )},
63+ stranded = stranded , min.readCount = min.readCount ,
64+ fitReadClassModel = fitReadClassModel , min.exonOverlap = min.exonOverlap ,
65+ defaultModels = defaultModels , returnModel = returnModel , verbose = verbose ,
66+ trackReads = trackReads , fusionMode = fusionMode ,
67+ extractBarcodeUMI = extractBarcodeUMI , dedupUMI = dedupUMI , index = i )},
7668 BPPARAM = bpParameters )
77- sampleNames <- as.numeric(as.factor(sampleNames ))
7869 for (i in seq_along(readGrgList )){
79- if (! isFALSE( demultiplexed ) ){
70+ if (extractBarcodeUMI ){
8071 mcols(readGrgList [[i ]])$ CB <- paste0(names(reads )[i ], ' _' , mcols(readGrgList [[i ]])$ CB )
8172 } else {
82- mcols(readGrgList [[i ]])$ CB <- sampleNames [i ]
73+ mcols(readGrgList [[i ]])$ CB <- names( reads ) [i ]
8374 }
8475
8576 mcols(readGrgList [[i ]])$ CB <- as.factor(mcols(readGrgList [[i ]])$ CB )
8677
8778 }
8879 readGrgList <- do.call(c , readGrgList )
8980 mcols(readGrgList )$ id <- seq_along(readGrgList )
90- if (! isFALSE( demultiplexed )){
81+ if (extractBarcodeUMI ){
9182 mcols(readGrgList )$ sampleID <- as.numeric(mcols(readGrgList )$ CB )
9283 } else {
9384 mcols(readGrgList )$ sampleID <- i
9485 }
9586 readClassList <- constructReadClasses(readGrgList , genomeSequence = genomeSequence ,annotations = annotations ,
96- stranded = stranded , min.readCount = min.readCount ,
97- fitReadClassModel = fitReadClassModel , min.exonOverlap = min.exonOverlap ,
98- defaultModels = defaultModels , returnModel = returnModel , verbose = verbose ,
87+ stranded = stranded , min.readCount = min.readCount ,
88+ fitReadClassModel = fitReadClassModel , min.exonOverlap = min.exonOverlap ,
89+ defaultModels = defaultModels , returnModel = returnModel , verbose = verbose ,
9990 processByChromosome = processByChromosome , trackReads = trackReads , fusionMode = fusionMode )
10091 metadata(readClassList )$ samples <- names(reads )
10192 metadata(readClassList )$ sampleNames <- names(reads )
102- if (! isFALSE( demultiplexed ) ) metadata(readClassList )$ samples <- levels(mcols(readGrgList )$ CB )
93+ if (extractBarcodeUMI ) metadata(readClassList )$ samples <- levels(mcols(readGrgList )$ CB )
10394 readClassList <- list (readClassList )
10495 }
10596
@@ -124,14 +115,12 @@ bambu.processReads <- function(reads, annotations, genomeSequence,
124115bambu.processReadsByFile <- function (bam.file , genomeSequence , annotations ,
125116 yieldSize = NULL , stranded = FALSE , min.readCount = 2 ,
126117 fitReadClassModel = TRUE , min.exonOverlap = 10 , defaultModels = NULL , returnModel = FALSE ,
127- verbose = FALSE , processByChromosome = FALSE , trackReads = FALSE , fusionMode = FALSE , demultiplexed = FALSE ,
128- cleanReads = FALSE , dedupUMI = FALSE , index = 0 , barcodesToFilter = NULL ) {
118+ verbose = FALSE , processByChromosome = FALSE , trackReads = FALSE , fusionMode = FALSE ,
119+ extractBarcodeUMI = FALSE , dedupUMI = FALSE , index = 0 ) {
129120 if (verbose ) message(names(bam.file )[1 ])
130- readGrgList <- prepareDataFromBam(bam.file [[1 ]], verbose = verbose , yieldSize = yieldSize , use.names = trackReads , demultiplexed = demultiplexed , cleanReads = cleanReads , dedupUMI = dedupUMI )
121+ readGrgList <- prepareDataFromBam(bam.file [[1 ]], verbose = verbose , yieldSize = yieldSize , use.names = trackReads , extractBarcodeUMI = extractBarcodeUMI , dedupUMI = dedupUMI )
131122 if (verbose ) message(paste0(" Number of alignments/reads: " ,length(readGrgList )))
132123 warnings <- c()
133- if (! is.null(barcodesToFilter ) & ! isFALSE(demultiplexed ))
134- readGrgList <- readGrgList [! (mcols(readGrgList )$ CB %in% barcodesToFilter )]
135124 warnings <- seqlevelCheckReadsAnnotation(readGrgList , annotations )
136125 if (verbose & length(warnings ) > 0 ) warning(paste(warnings ,collapse = " \n " ))
137126 # check seqlevels for consistency, drop ranges not present in genomeSequence
@@ -173,7 +162,7 @@ bambu.processReadsByFile <- function(bam.file, genomeSequence, annotations,
173162
174163 mcols(readGrgList )$ id <- seq_along(readGrgList )
175164
176- if (! isFALSE( demultiplexed )){
165+ if (extractBarcodeUMI ){
177166 mcols(readGrgList )$ sampleID <- as.numeric(mcols(readGrgList )$ CB )
178167 } else {
179168 mcols(readGrgList )$ sampleID <- index
@@ -210,7 +199,7 @@ bambu.processReadsByFile <- function(bam.file, genomeSequence, annotations,
210199 fusionMode = fusionMode ,
211200 verbose = verbose )
212201
213- if (demultiplexed ) {
202+ if (extractBarcodeUMI ) {
214203 barcodes <- levels(mcols(readGrgList )$ CB )
215204 metadata(se )$ sampleData <- tibble(
216205 id = paste(names(bam.file )[1 ], barcodes , sep = ' _' ),
@@ -234,12 +223,10 @@ bambu.processReadsByFile <- function(bam.file, genomeSequence, annotations,
234223bambu.readsByFile <- function (bam.file , genomeSequence , annotations ,
235224 yieldSize = NULL , stranded = FALSE , min.readCount = 2 ,
236225 fitReadClassModel = TRUE , min.exonOverlap = 10 , defaultModels = NULL , returnModel = FALSE ,
237- verbose = FALSE , trackReads = FALSE , fusionMode = FALSE , demultiplexed = FALSE ,
238- cleanReads = TRUE , dedupUMI = FALSE , index = 0 , barcodesToFilter = NULL ) {
239- readGrgList <- prepareDataFromBam(bam.file [[1 ]], verbose = verbose , yieldSize = yieldSize , use.names = trackReads , demultiplexed = demultiplexed , cleanReads = cleanReads , dedupUMI = dedupUMI )
240-
241- if (! is.null(barcodesToFilter ) & ! isFALSE(demultiplexed )) readGrgList <- readGrgList [! mcols(readGrgList )$ CB %in% barcodesToFilter ]
242-
226+ verbose = FALSE , trackReads = FALSE , fusionMode = FALSE ,
227+ extractBarcodeUMI = FALSE , dedupUMI = FALSE , index = 0 ) {
228+ readGrgList <- prepareDataFromBam(bam.file [[1 ]], verbose = verbose , yieldSize = yieldSize , use.names = trackReads , extractBarcodeUMI = extractBarcodeUMI , dedupUMI = dedupUMI )
229+
243230 if (verbose ) message(" Number of alignments/reads: " ,length(readGrgList ))
244231
245232 warnings <- c()
@@ -290,7 +277,7 @@ bambu.readsByFile <- function(bam.file, genomeSequence, annotations,
290277 stop(" No reads left after filtering." )
291278
292279 # # add ###
293- # if (isTRUE(demultiplexed) ){
280+ # if (extractBarcodeUMI ){
294281 # cellBarcodeAssign <- tibble(index = mcols(readGrgList)$id, CB = mcols(readGrgList)$CB) %>% nest(.by = "CB")
295282
296283 # if (!dir.exists("CB")){
0 commit comments