From 6a639b800e791d06c88997c9ffe8274f9d00bdf6 Mon Sep 17 00:00:00 2001 From: Tony Wu Date: Tue, 4 Nov 2025 10:51:11 -0500 Subject: [PATCH 1/4] refactor(getSubnetworkFromIndra): deprecate force_include_proteins parameter in favor of force_include_other --- DESCRIPTION | 2 +- R/getSubnetworkFromIndra.R | 10 +++------- R/utils_getSubnetworkFromIndra.R | 31 ++++++++++++++++++++++--------- man/getSubnetworkFromIndra.Rd | 9 ++------- 4 files changed, 28 insertions(+), 24 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 9e70804..138f9bd 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -39,4 +39,4 @@ Encoding: UTF-8 URL: http://msstats.org, https://vitek-lab.github.io/MSstatsBioNet/ BugReports: https://groups.google.com/forum/#!forum/msstats Config/testthat/edition: 3 -RoxygenNote: 7.3.2 +RoxygenNote: 7.3.3 diff --git a/R/getSubnetworkFromIndra.R b/R/getSubnetworkFromIndra.R index d8a1896..d0d2f39 100644 --- a/R/getSubnetworkFromIndra.R +++ b/R/getSubnetworkFromIndra.R @@ -14,7 +14,7 @@ #' @param pvalueCutoff p-value cutoff for filtering. Default is NULL, i.e. no #' filtering #' @param statement_types list of interaction types to filter on. Equivalent to -#' statement type in INDRA. Default is c("IncreaseAmount", "DecreaseAmount"). +#' statement type in INDRA. Default is NULL. #' @param paper_count_cutoff number of papers to filter on. Default is 1. #' @param evidence_count_cutoff number of evidence to filter on for each #' paper. E.g. A paper may have 5 sentences describing the same interaction vs 1 @@ -27,9 +27,6 @@ #' @param logfc_cutoff absolute log fold change cutoff for filtering proteins. #' Only proteins with |logFC| greater than this value will be retained. Default #' is NULL, i.e. no logFC filtering. -#' @param force_include_proteins character vector of protein identifiers to exempt -#' from all filtering steps. These proteins will be retained regardless of p-value, -#' logFC, or other filtering criteria. Default is NULL, i.e. no exemptions. #' @param force_include_other character vector of identifiers to include in the #' network, regardless if those ids are in the input data. Should be formatted #' as "namespace:identifier", e.g. "HGNC:1234" or "CHEBI:4911". @@ -50,15 +47,14 @@ getSubnetworkFromIndra <- function(input, protein_level_data = NULL, pvalueCutoff = NULL, - statement_types = c("IncreaseAmount", "DecreaseAmount"), + statement_types = NULL, paper_count_cutoff = 1, evidence_count_cutoff = 1, correlation_cutoff = 0.3, sources_filter = NULL, logfc_cutoff = NULL, - force_include_proteins = NULL, force_include_other = NULL) { - input <- .filterGetSubnetworkFromIndraInput(input, pvalueCutoff, logfc_cutoff, force_include_proteins) + input <- .filterGetSubnetworkFromIndraInput(input, pvalueCutoff, logfc_cutoff, force_include_other) .validateGetSubnetworkFromIndraInput(input, protein_level_data, sources_filter, force_include_other) res <- .callIndraCogexApi(input$HgncId, force_include_other) res <- .filterIndraResponse(res, statement_types, evidence_count_cutoff, sources_filter) diff --git a/R/utils_getSubnetworkFromIndra.R b/R/utils_getSubnetworkFromIndra.R index eda9444..b3083e9 100644 --- a/R/utils_getSubnetworkFromIndra.R +++ b/R/utils_getSubnetworkFromIndra.R @@ -102,24 +102,25 @@ #' @param input groupComparison result #' @param pvalueCutoff p-value cutoff #' @param logfc_cutoff logFC cutoff -#' @param force_include_proteins list of proteins to exempt from filtering +#' @param force_include_other list of proteins to exempt from filtering #' @return filtered groupComparison result #' @keywords internal #' @noRd -.filterGetSubnetworkFromIndraInput <- function(input, pvalueCutoff, logfc_cutoff, force_include_proteins) { +.filterGetSubnetworkFromIndraInput <- function(input, pvalueCutoff, logfc_cutoff, force_include_other) { input$Protein <- as.character(input$Protein) # Extract exempt proteins before any filtering exempt_proteins <- NULL - if (!is.null(force_include_proteins)) { - if (!is.character(force_include_proteins)) { - stop("force_include_proteins must be a character vector") + if (!is.null(force_include_other)) { + if (!is.character(force_include_other)) { + stop("force_include_other must be a character vector") } - missing_prots <- setdiff(force_include_proteins, input$Protein) - if (length(missing_prots) > 0) { - warning("force_include_proteins not found: ", paste(missing_prots, collapse = ", ")) + if ("HgncId" %in% colnames(input) && any(grepl("^HGNC:", force_include_other))) { + hgnc_ids_to_include <- gsub("^HGNC:", "", force_include_other[grepl("^HGNC:", force_include_other)]) + exempt_proteins <- input[input$HgncId %in% hgnc_ids_to_include, ] + } else { + exempt_proteins <- data.frame() } - exempt_proteins <- input[input$Protein %in% force_include_proteins,] } # Apply standard filtering @@ -293,6 +294,18 @@ colnames(nodes) = c("id", "hgncName", "Site", "logFC", "adj.pvalue") nodes = nodes[nodes$id %in% c(edges$source, edges$target), ] + extra_force_include_other <- setdiff(unique(c(edges$source, edges$target)), nodes$id) + if (length(extra_force_include_other) > 0) { + extra_nodes <- data.frame( + id = extra_force_include_other, + hgncName = NA, + Site = NA, + logFC = 0, + adj.pvalue = 1, + stringsAsFactors = FALSE + ) + nodes <- rbind(nodes, missing_nodes) + } nodes$hgncName = ifelse(is.na(nodes$hgncName), nodes$id, nodes$hgncName) return(nodes) diff --git a/man/getSubnetworkFromIndra.Rd b/man/getSubnetworkFromIndra.Rd index 2fc5e80..ac8fa6f 100644 --- a/man/getSubnetworkFromIndra.Rd +++ b/man/getSubnetworkFromIndra.Rd @@ -8,13 +8,12 @@ getSubnetworkFromIndra( input, protein_level_data = NULL, pvalueCutoff = NULL, - statement_types = c("IncreaseAmount", "DecreaseAmount"), + statement_types = NULL, paper_count_cutoff = 1, evidence_count_cutoff = 1, correlation_cutoff = 0.3, sources_filter = NULL, logfc_cutoff = NULL, - force_include_proteins = NULL, force_include_other = NULL ) } @@ -33,7 +32,7 @@ and applying correlation cutoffs.} filtering} \item{statement_types}{list of interaction types to filter on. Equivalent to -statement type in INDRA. Default is c("IncreaseAmount", "DecreaseAmount").} +statement type in INDRA. Default is NULL.} \item{paper_count_cutoff}{number of papers to filter on. Default is 1.} @@ -52,10 +51,6 @@ Otherwise, should be a list, e.g. c('reach', 'medscan').} Only proteins with |logFC| greater than this value will be retained. Default is NULL, i.e. no logFC filtering.} -\item{force_include_proteins}{character vector of protein identifiers to exempt -from all filtering steps. These proteins will be retained regardless of p-value, -logFC, or other filtering criteria. Default is NULL, i.e. no exemptions.} - \item{force_include_other}{character vector of identifiers to include in the network, regardless if those ids are in the input data. Should be formatted as "namespace:identifier", e.g. "HGNC:1234" or "CHEBI:4911".} From 6025d0d35b0f6a52c7898618a9cdb8277ba04d1f Mon Sep 17 00:00:00 2001 From: Tony Wu Date: Tue, 4 Nov 2025 10:57:42 -0500 Subject: [PATCH 2/4] fix bug --- R/utils_getSubnetworkFromIndra.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/utils_getSubnetworkFromIndra.R b/R/utils_getSubnetworkFromIndra.R index b3083e9..983d112 100644 --- a/R/utils_getSubnetworkFromIndra.R +++ b/R/utils_getSubnetworkFromIndra.R @@ -304,7 +304,7 @@ adj.pvalue = 1, stringsAsFactors = FALSE ) - nodes <- rbind(nodes, missing_nodes) + nodes <- rbind(nodes, extra_nodes) } nodes$hgncName = ifelse(is.na(nodes$hgncName), nodes$id, nodes$hgncName) From eb9edab54bc10057fc69b140dbb4e0eb625b21bd Mon Sep 17 00:00:00 2001 From: Tony Wu Date: Tue, 4 Nov 2025 11:02:51 -0500 Subject: [PATCH 3/4] fix vignette --- vignettes/PTM-Analysis.Rmd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vignettes/PTM-Analysis.Rmd b/vignettes/PTM-Analysis.Rmd index 67bd7cc..d86e23c 100644 --- a/vignettes/PTM-Analysis.Rmd +++ b/vignettes/PTM-Analysis.Rmd @@ -63,7 +63,7 @@ subnetwork of proteins from the INDRA database based on differential abundance analysis results. This function may help finding off target subnetworks. ```{r} -subnetwork <- getSubnetworkFromIndra(annotated_df, pvalueCutoff = 0.05, statement_types = c("Phosphorylation"), logfc_cutoff = 1, force_include_proteins = c("P00533_Y1110")) +subnetwork <- getSubnetworkFromIndra(annotated_df, pvalueCutoff = 0.05, statement_types = c("Phosphorylation"), logfc_cutoff = 1, force_include_other = c("HGNC:3236")) head(subnetwork$nodes) head(subnetwork$edges) ``` From b4100655883d9308250d7d7b180e2faad631486d Mon Sep 17 00:00:00 2001 From: Tony Wu Date: Tue, 4 Nov 2025 11:12:42 -0500 Subject: [PATCH 4/4] adjust docs --- R/utils_getSubnetworkFromIndra.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/utils_getSubnetworkFromIndra.R b/R/utils_getSubnetworkFromIndra.R index 983d112..4d3071c 100644 --- a/R/utils_getSubnetworkFromIndra.R +++ b/R/utils_getSubnetworkFromIndra.R @@ -102,7 +102,7 @@ #' @param input groupComparison result #' @param pvalueCutoff p-value cutoff #' @param logfc_cutoff logFC cutoff -#' @param force_include_other list of proteins to exempt from filtering +#' @param force_include_other list of identifiers to exempt from filtering #' @return filtered groupComparison result #' @keywords internal #' @noRd