diff --git a/DESCRIPTION b/DESCRIPTION index 9e70804..138f9bd 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -39,4 +39,4 @@ Encoding: UTF-8 URL: http://msstats.org, https://vitek-lab.github.io/MSstatsBioNet/ BugReports: https://groups.google.com/forum/#!forum/msstats Config/testthat/edition: 3 -RoxygenNote: 7.3.2 +RoxygenNote: 7.3.3 diff --git a/R/getSubnetworkFromIndra.R b/R/getSubnetworkFromIndra.R index d8a1896..d0d2f39 100644 --- a/R/getSubnetworkFromIndra.R +++ b/R/getSubnetworkFromIndra.R @@ -14,7 +14,7 @@ #' @param pvalueCutoff p-value cutoff for filtering. Default is NULL, i.e. no #' filtering #' @param statement_types list of interaction types to filter on. Equivalent to -#' statement type in INDRA. Default is c("IncreaseAmount", "DecreaseAmount"). +#' statement type in INDRA. Default is NULL. #' @param paper_count_cutoff number of papers to filter on. Default is 1. #' @param evidence_count_cutoff number of evidence to filter on for each #' paper. E.g. A paper may have 5 sentences describing the same interaction vs 1 @@ -27,9 +27,6 @@ #' @param logfc_cutoff absolute log fold change cutoff for filtering proteins. #' Only proteins with |logFC| greater than this value will be retained. Default #' is NULL, i.e. no logFC filtering. -#' @param force_include_proteins character vector of protein identifiers to exempt -#' from all filtering steps. These proteins will be retained regardless of p-value, -#' logFC, or other filtering criteria. Default is NULL, i.e. no exemptions. #' @param force_include_other character vector of identifiers to include in the #' network, regardless if those ids are in the input data. Should be formatted #' as "namespace:identifier", e.g. "HGNC:1234" or "CHEBI:4911". @@ -50,15 +47,14 @@ getSubnetworkFromIndra <- function(input, protein_level_data = NULL, pvalueCutoff = NULL, - statement_types = c("IncreaseAmount", "DecreaseAmount"), + statement_types = NULL, paper_count_cutoff = 1, evidence_count_cutoff = 1, correlation_cutoff = 0.3, sources_filter = NULL, logfc_cutoff = NULL, - force_include_proteins = NULL, force_include_other = NULL) { - input <- .filterGetSubnetworkFromIndraInput(input, pvalueCutoff, logfc_cutoff, force_include_proteins) + input <- .filterGetSubnetworkFromIndraInput(input, pvalueCutoff, logfc_cutoff, force_include_other) .validateGetSubnetworkFromIndraInput(input, protein_level_data, sources_filter, force_include_other) res <- .callIndraCogexApi(input$HgncId, force_include_other) res <- .filterIndraResponse(res, statement_types, evidence_count_cutoff, sources_filter) diff --git a/R/utils_getSubnetworkFromIndra.R b/R/utils_getSubnetworkFromIndra.R index eda9444..4d3071c 100644 --- a/R/utils_getSubnetworkFromIndra.R +++ b/R/utils_getSubnetworkFromIndra.R @@ -102,24 +102,25 @@ #' @param input groupComparison result #' @param pvalueCutoff p-value cutoff #' @param logfc_cutoff logFC cutoff -#' @param force_include_proteins list of proteins to exempt from filtering +#' @param force_include_other list of identifiers to exempt from filtering #' @return filtered groupComparison result #' @keywords internal #' @noRd -.filterGetSubnetworkFromIndraInput <- function(input, pvalueCutoff, logfc_cutoff, force_include_proteins) { +.filterGetSubnetworkFromIndraInput <- function(input, pvalueCutoff, logfc_cutoff, force_include_other) { input$Protein <- as.character(input$Protein) # Extract exempt proteins before any filtering exempt_proteins <- NULL - if (!is.null(force_include_proteins)) { - if (!is.character(force_include_proteins)) { - stop("force_include_proteins must be a character vector") + if (!is.null(force_include_other)) { + if (!is.character(force_include_other)) { + stop("force_include_other must be a character vector") } - missing_prots <- setdiff(force_include_proteins, input$Protein) - if (length(missing_prots) > 0) { - warning("force_include_proteins not found: ", paste(missing_prots, collapse = ", ")) + if ("HgncId" %in% colnames(input) && any(grepl("^HGNC:", force_include_other))) { + hgnc_ids_to_include <- gsub("^HGNC:", "", force_include_other[grepl("^HGNC:", force_include_other)]) + exempt_proteins <- input[input$HgncId %in% hgnc_ids_to_include, ] + } else { + exempt_proteins <- data.frame() } - exempt_proteins <- input[input$Protein %in% force_include_proteins,] } # Apply standard filtering @@ -293,6 +294,18 @@ colnames(nodes) = c("id", "hgncName", "Site", "logFC", "adj.pvalue") nodes = nodes[nodes$id %in% c(edges$source, edges$target), ] + extra_force_include_other <- setdiff(unique(c(edges$source, edges$target)), nodes$id) + if (length(extra_force_include_other) > 0) { + extra_nodes <- data.frame( + id = extra_force_include_other, + hgncName = NA, + Site = NA, + logFC = 0, + adj.pvalue = 1, + stringsAsFactors = FALSE + ) + nodes <- rbind(nodes, extra_nodes) + } nodes$hgncName = ifelse(is.na(nodes$hgncName), nodes$id, nodes$hgncName) return(nodes) diff --git a/man/getSubnetworkFromIndra.Rd b/man/getSubnetworkFromIndra.Rd index 2fc5e80..ac8fa6f 100644 --- a/man/getSubnetworkFromIndra.Rd +++ b/man/getSubnetworkFromIndra.Rd @@ -8,13 +8,12 @@ getSubnetworkFromIndra( input, protein_level_data = NULL, pvalueCutoff = NULL, - statement_types = c("IncreaseAmount", "DecreaseAmount"), + statement_types = NULL, paper_count_cutoff = 1, evidence_count_cutoff = 1, correlation_cutoff = 0.3, sources_filter = NULL, logfc_cutoff = NULL, - force_include_proteins = NULL, force_include_other = NULL ) } @@ -33,7 +32,7 @@ and applying correlation cutoffs.} filtering} \item{statement_types}{list of interaction types to filter on. Equivalent to -statement type in INDRA. Default is c("IncreaseAmount", "DecreaseAmount").} +statement type in INDRA. Default is NULL.} \item{paper_count_cutoff}{number of papers to filter on. Default is 1.} @@ -52,10 +51,6 @@ Otherwise, should be a list, e.g. c('reach', 'medscan').} Only proteins with |logFC| greater than this value will be retained. Default is NULL, i.e. no logFC filtering.} -\item{force_include_proteins}{character vector of protein identifiers to exempt -from all filtering steps. These proteins will be retained regardless of p-value, -logFC, or other filtering criteria. Default is NULL, i.e. no exemptions.} - \item{force_include_other}{character vector of identifiers to include in the network, regardless if those ids are in the input data. Should be formatted as "namespace:identifier", e.g. "HGNC:1234" or "CHEBI:4911".} diff --git a/vignettes/PTM-Analysis.Rmd b/vignettes/PTM-Analysis.Rmd index 67bd7cc..d86e23c 100644 --- a/vignettes/PTM-Analysis.Rmd +++ b/vignettes/PTM-Analysis.Rmd @@ -63,7 +63,7 @@ subnetwork of proteins from the INDRA database based on differential abundance analysis results. This function may help finding off target subnetworks. ```{r} -subnetwork <- getSubnetworkFromIndra(annotated_df, pvalueCutoff = 0.05, statement_types = c("Phosphorylation"), logfc_cutoff = 1, force_include_proteins = c("P00533_Y1110")) +subnetwork <- getSubnetworkFromIndra(annotated_df, pvalueCutoff = 0.05, statement_types = c("Phosphorylation"), logfc_cutoff = 1, force_include_other = c("HGNC:3236")) head(subnetwork$nodes) head(subnetwork$edges) ```