-
Notifications
You must be signed in to change notification settings - Fork 28
fix: adjust NDR threshold behavior. Previously, NDR = 1 would overrid… #559
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -112,7 +112,7 @@ filterTranscriptsByAnnotation <- function(rowDataCombined, annotationGrangesList | |
| } else if(is.null(NDR)) { | ||
| NDR <- 0.5 | ||
| } | ||
| filterSet <- (rowDataCombined$NDR <= NDR | rowDataCombined$readClassType == "equal:compatible") | ||
| filterSet <- ((!is.na(rowDataCombined$NDR) & rowDataCombined$NDR <= NDR) | rowDataCombined$readClassType == "equal:compatible") | ||
| lowConfidenceTranscripts <- combindRowDataWithRanges( | ||
| rowDataCombined[!filterSet,], | ||
| exonRangesCombined[!filterSet]) | ||
|
|
@@ -224,7 +224,7 @@ calculateNDROnTranscripts <- function(combinedTranscripts, useTxScore = FALSE){ | |
| } else { | ||
| combinedTranscripts$NDR <- calculateNDR(combinedTranscripts$maxTxScore, equal) | ||
| } | ||
| combinedTranscripts$NDR[combinedTranscripts$maxTxScore==-1] <- 1 | ||
| combinedTranscripts$NDR[combinedTranscripts$maxTxScore==-1] <- NA | ||
|
||
| return(combinedTranscripts) | ||
| } | ||
|
|
||
|
|
@@ -827,15 +827,14 @@ addGeneIdsToReadClassTable <- function(readClassTable, distTable, | |
| #' @description This function train a model for use on other data | ||
| #' @param extendedAnnotations A GRangesList object produced from bambu(quant = FALSE) or rowRanges(se) | ||
| #' @param NDR The maximum NDR for novel transcripts to be in extendedAnnotations (0-1). If not provided a recommended NDR is calculated. | ||
| #' @param includeRef A boolean which if TRUE will also filter out reference annotations based on their NDR | ||
| #' @param prefix A string which determines which transcripts are considered novel by bambu and will be filtered (by default = 'Bambu') | ||
| #' @param baselineFDR a value between 0-1. Bambu uses this FDR on the trained model to recommend an equivilent NDR threshold to be used for the sample. By default, a baseline FDR of 0.1 is used. This does not impact the analysis if an NDR is set. | ||
| #' @param defaultModels a bambu trained model object that bambu will use when fitReadClassModel==FALSE or the data is not suitable for training, defaults to the pretrained model in the bambu package | ||
| #' Output - returns a similiar GRangesList object with entries swapped into or out of metadata(extendedAnnotations)$lowConfidenceTranscripts | ||
| #' @details | ||
| #' @return extendedAnnotations with a new NDR threshold | ||
| #' @export | ||
| setNDR <- function(extendedAnnotations, NDR = NULL, includeRef = FALSE, prefix = 'Bambu', baselineFDR = 0.1, defaultModels2 = defaultModels){ | ||
| setNDR <- function(extendedAnnotations, NDR = NULL, prefix = 'Bambu', baselineFDR = 0.1, defaultModels2 = defaultModels){ | ||
| #Check to see if the annotations/gtf are dervived from Bambu | ||
| if(is.null(mcols(extendedAnnotations)$NDR)){ | ||
| warning("Annotations were not extended by Bambu (or the wrong prefix was provided). NDR can not be set") | ||
|
|
@@ -852,17 +851,10 @@ setNDR <- function(extendedAnnotations, NDR = NULL, includeRef = FALSE, prefix = | |
| message("Recommending a novel discovery rate (NDR) of: ", NDR) | ||
| } | ||
|
|
||
| #If reference annotations should be filtered too (note that reference annotations with no read support arn't filtered) | ||
| if(includeRef){ | ||
| toRemove <- (!is.na(mcols(extendedAnnotations)$NDR) & mcols(extendedAnnotations)$NDR > NDR) | ||
| toAdd <- !is.na(mcols(metadata(extendedAnnotations)$lowConfidenceTranscripts)$NDR) & | ||
| mcols(metadata(extendedAnnotations)$lowConfidenceTranscripts)$NDR <= NDR | ||
| } else { | ||
| toRemove <- (mcols(extendedAnnotations)$NDR > NDR & | ||
| grepl(prefix, mcols(extendedAnnotations)$TXNAME)) | ||
| toAdd <- (mcols(metadata(extendedAnnotations)$lowConfidenceTranscripts)$NDR <= NDR & | ||
| grepl(prefix, mcols(metadata(extendedAnnotations)$lowConfidenceTranscripts)$TXNAME)) | ||
| } | ||
| toRemove <- (mcols(extendedAnnotations)$NDR > NDR & | ||
| grepl(prefix, mcols(extendedAnnotations)$TXNAME)) | ||
| toAdd <- (mcols(metadata(extendedAnnotations)$lowConfidenceTranscripts)$NDR <= NDR & | ||
| grepl(prefix, mcols(metadata(extendedAnnotations)$lowConfidenceTranscripts)$TXNAME)) | ||
|
|
||
| temp <- c(metadata(extendedAnnotations)$lowConfidenceTranscripts[!toAdd], extendedAnnotations[toRemove]) | ||
| extendedAnnotations <- c(extendedAnnotations[!toRemove], metadata(extendedAnnotations)$lowConfidenceTranscripts[toAdd]) | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This change alters the edge-case semantics for
NDR = 1vsNDR = 0.999by excluding transcripts withNDR = NA(subset / min.sampleNumber-filtered). There isn’t currently a regression test covering this behavior (i.e., thatNDR = 1no longer re-includes subset/low-confidence transcripts, whileNDR = 0.999matches previous results). Consider adding a test that runsisore.extendAnnotations()withNDR = 1and asserts subset/low-confidence transcripts are still excluded (e.g., viametadata(... )$lowConfidenceTranscripts/ counts).