Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ License: Artistic-2.0
Encoding: UTF-8
LazyData: true
Roxygen: list(markdown = TRUE)
RoxygenNote: 7.3.2
RoxygenNote: 7.3.3
biocViews: MassSpectrometry, Proteomics, Software, DataImport, QualityControl
Depends:
R (>= 4.0)
Expand Down
55 changes: 47 additions & 8 deletions R/clean_DIANN.R
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
#' Clean raw Diann files
#' @param msstats_object an object of class `MSstatsDIANNFiles`.
#' @param MBR True if analysis was done with match between runs
#' @param quantificationColumn Use 'FragmentQuantCorrected'(default) column for quantified intensities for DIANN 1.8.x.
#' Use 'FragmentQuantRaw' for quantified intensities for DIANN 1.9.x.
#' Use 'auto' for quantified intensities for DIANN 2.x where each fragment intensity is a separate column, e.g. Fr0Quantity.
#' @inheritParams DIANNtoMSstatsFormat
#' @return data.table
#' @importFrom stats na.omit
#' @keywords internal
.cleanRawDIANN <- function(msstats_object, MBR = TRUE,
quantificationColumn = "FragmentQuantCorrected") {
quantificationColumn = "FragmentQuantCorrected",
global_qvalue_cutoff = 0.01,
qvalue_cutoff = 0.01,
pg_qvalue_cutoff = 0.01) {
dn_input <- getInputFile(msstats_object, "input")
dn_input <- data.table::as.data.table(dn_input)

Expand All @@ -28,7 +28,10 @@
dn_input <- .cleanDIANNProcessFragmentInfo(dn_input, quantificationColumn)

# Clean and filter data
dn_input <- .cleanDIANNCleanAndFilterData(dn_input, quantificationColumn)
dn_input <- .cleanDIANNCleanAndFilterData(dn_input, MBR, quantificationColumn,
global_qvalue_cutoff,
qvalue_cutoff,
pg_qvalue_cutoff)

# Rename columns
dn_input <- .cleanDIANNRenameColumns(dn_input, quantificationColumn)
Expand Down Expand Up @@ -145,14 +148,50 @@

#' Clean and filter data by removing unwanted fragments and NA values
#' @param dn_input data.table input
#' @param quantificationColumn quantification column name
#' @inheritParams DIANNtoMSstatsFormat
#' @return cleaned data.table
#' @noRd
.cleanDIANNCleanAndFilterData <- function(dn_input, quantificationColumn) {
.cleanDIANNCleanAndFilterData <- function(dn_input, MBR, quantificationColumn,
global_qvalue_cutoff,
qvalue_cutoff,
pg_qvalue_cutoff) {
# Remove NH3 and H2O loss fragments & remove rows with NA in quant column
dn_input <- dn_input[!grepl("NH3|H2O", FragmentIon) &
!is.na(get(quantificationColumn))]

msg = paste0('** Filtering on Q.Value < ', global_qvalue_cutoff)
getOption("MSstatsLog")("INFO", msg)
getOption("MSstatsMsg")("INFO", msg)

dn_input = dn_input[QValue < global_qvalue_cutoff, ]
if (MBR) {
msg = '** MBR was used to analyze the data. Now setting names and filtering'
msg_1_mbr = paste0('-- LibPGQValue < ', pg_qvalue_cutoff)
msg_2_mbr = paste0('-- LibQValue < ', qvalue_cutoff)
dn_input = dn_input[LibPGQValue < pg_qvalue_cutoff, ]
dn_input = dn_input[LibQValue < qvalue_cutoff, ]
getOption("MSstatsLog")("INFO", msg)
getOption("MSstatsMsg")("INFO", msg)
getOption("MSstatsLog")("INFO", msg_1_mbr)
getOption("MSstatsMsg")("INFO", msg_1_mbr)
getOption("MSstatsLog")("INFO", msg_2_mbr)
getOption("MSstatsMsg")("INFO", msg_2_mbr)
# getOption("MSstatsLog")("INFO", "\n")
} else{
msg = '** MBR was not used to analyze the data. Now setting names and filtering'
msg_1 = paste0('-- Filtering on GlobalPGQValue < ', pg_qvalue_cutoff)
msg_2 = paste0('-- Filtering on GlobalQValue < ', qvalue_cutoff)
dn_input = dn_input[GlobalPGQValue < pg_qvalue_cutoff, ]
dn_input = dn_input[GlobalQValue < qvalue_cutoff, ]
getOption("MSstatsLog")("INFO", msg)
getOption("MSstatsMsg")("INFO", msg)
getOption("MSstatsLog")("INFO", msg_1)
getOption("MSstatsMsg")("INFO", msg_1)
getOption("MSstatsLog")("INFO", msg_2)
getOption("MSstatsMsg")("INFO", msg_2)
# getOption("MSstatsLog")("INFO", "\n")
}

return(dn_input)
}

Expand Down
41 changes: 5 additions & 36 deletions R/converters_DIANNtoMSstatsFormat.R
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,7 @@
#' @param pg_qvalue_cutoff If MBR is false, the qvalue cutoff for the Global.PG.Q.Value
#' column, i.e. the global q-value for the protein group. If MBR is true, the
#' qvalue cutoff for the Lib.PG.Q.Value column, i.e. the protein group q-value for
#' the library created after the first MBR pass. Run should be the same as filename.
#' Default is 0.01.
#' the library created after the first MBR pass. Default is 0.01.
#' @param useUniquePeptide should unique pepties be removed
#' @param removeFewMeasurements should proteins with few measurements be removed
#' @param removeOxidationMpeptides should peptides with oxidation be removed
Expand Down Expand Up @@ -71,7 +70,10 @@ DIANNtoMSstatsFormat = function(input, annotation = NULL,
input = MSstatsConvert::MSstatsImport(list(input = input),
"MSstats", "DIANN")
input = MSstatsConvert::MSstatsClean(input, MBR = MBR,
quantificationColumn = quantificationColumn)
quantificationColumn = quantificationColumn,
global_qvalue_cutoff = global_qvalue_cutoff,
qvalue_cutoff = qvalue_cutoff,
pg_qvalue_cutoff = pg_qvalue_cutoff)
annotation = MSstatsConvert::MSstatsMakeAnnotation(input, annotation)

decoy_filter = list(col_name = "ProteinName",
Expand All @@ -83,39 +85,6 @@ DIANNtoMSstatsFormat = function(input, annotation = NULL,
filter = removeOxidationMpeptides,
drop_column = FALSE)

msg = paste0('** Filtering on Global Q Value < ', global_qvalue_cutoff)
getOption("MSstatsLog")("INFO", msg)
getOption("MSstatsMsg")("INFO", msg)

input = input[DetectionQValue < global_qvalue_cutoff, ]
if (MBR) {
msg = '** MBR was used to analyze the data. Now setting names and filtering'
msg_1_mbr = paste0('-- LibPGQValue < ', pg_qvalue_cutoff)
msg_2_mbr = paste0('-- LibQValue < ', qvalue_cutoff)
input = input[LibPGQValue < pg_qvalue_cutoff, ]
input = input[LibQValue < qvalue_cutoff, ]
getOption("MSstatsLog")("INFO", msg)
getOption("MSstatsMsg")("INFO", msg)
getOption("MSstatsLog")("INFO", msg_1_mbr)
getOption("MSstatsMsg")("INFO", msg_1_mbr)
getOption("MSstatsLog")("INFO", msg_2_mbr)
getOption("MSstatsMsg")("INFO", msg_2_mbr)
# getOption("MSstatsLog")("INFO", "\n")
} else{
msg = '** MBR was not used to analyze the data. Now setting names and filtering'
msg_1 = paste0('-- Filtering on GlobalPGQValue < ', pg_qvalue_cutoff)
msg_2 = paste0('-- Filtering on GlobalQValue < ', qvalue_cutoff)
input = input[GlobalPGQValue < pg_qvalue_cutoff, ]
input = input[GlobalQValue < qvalue_cutoff, ]
getOption("MSstatsLog")("INFO", msg)
getOption("MSstatsMsg")("INFO", msg)
getOption("MSstatsLog")("INFO", msg_1)
getOption("MSstatsMsg")("INFO", msg_1)
getOption("MSstatsLog")("INFO", msg_2)
getOption("MSstatsMsg")("INFO", msg_2)
# getOption("MSstatsLog")("INFO", "\n")
}

feature_columns = c("PeptideSequence", "PrecursorCharge",
"FragmentIon", "ProductCharge")
input = MSstatsConvert::MSstatsPreprocess(
Expand Down
11 changes: 11 additions & 0 deletions inst/tinytest/test_clean_DIANN.R
Original file line number Diff line number Diff line change
Expand Up @@ -25,3 +25,14 @@ output = MSstatsConvert:::.cleanRawDIANN(input)
output = MSstatsConvert:::.cleanRawDIANN(input, quantificationColumn = "FragmentQuantRaw")
.validateOutput(output)

# Q-value filtering
output = MSstatsConvert:::.cleanRawDIANN(input, global_qvalue_cutoff = 0.005)
expect_equal(sum(output$DetectionQValue < 0.005), nrow(output))
output = MSstatsConvert:::.cleanRawDIANN(input, qvalue_cutoff = 0.00001)
expect_equal(sum(output$LibQValue < 0.00001), nrow(output))
output = MSstatsConvert:::.cleanRawDIANN(input, pg_qvalue_cutoff = 0.001)
expect_equal(sum(output$LibPGQValue < 0.001), nrow(output))
output = MSstatsConvert:::.cleanRawDIANN(input, MBR = TRUE, qvalue_cutoff = 0.005)
expect_equal(sum(output$LibQValue < 0.005), nrow(output))
output = MSstatsConvert:::.cleanRawDIANN(input, MBR = TRUE, pg_qvalue_cutoff = 0.001)
expect_equal(sum(output$LibPGQValue < 0.001), nrow(output))
3 changes: 1 addition & 2 deletions man/DIANNtoMSstatsFormat.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

18 changes: 16 additions & 2 deletions man/MSstatsClean.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

18 changes: 17 additions & 1 deletion man/dot-cleanRawDIANN.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.