From 6a639b800e791d06c88997c9ffe8274f9d00bdf6 Mon Sep 17 00:00:00 2001
From: Tony Wu <wu.anthon@northeastern.edu>
Date: Tue, 4 Nov 2025 10:51:11 -0500
Subject: [PATCH 1/4] refactor(getSubnetworkFromIndra): deprecate
 force_include_proteins parameter in favor of force_include_other

---
 DESCRIPTION                      |  2 +-
 R/getSubnetworkFromIndra.R       | 10 +++-------
 R/utils_getSubnetworkFromIndra.R | 31 ++++++++++++++++++++++---------
 man/getSubnetworkFromIndra.Rd    |  9 ++-------
 4 files changed, 28 insertions(+), 24 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 9e70804..138f9bd 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -39,4 +39,4 @@ Encoding: UTF-8
 URL: http://msstats.org, https://vitek-lab.github.io/MSstatsBioNet/
 BugReports: https://groups.google.com/forum/#!forum/msstats
 Config/testthat/edition: 3
-RoxygenNote: 7.3.2
+RoxygenNote: 7.3.3
diff --git a/R/getSubnetworkFromIndra.R b/R/getSubnetworkFromIndra.R
index d8a1896..d0d2f39 100644
--- a/R/getSubnetworkFromIndra.R
+++ b/R/getSubnetworkFromIndra.R
@@ -14,7 +14,7 @@
 #' @param pvalueCutoff p-value cutoff for filtering. Default is NULL, i.e. no
 #' filtering
 #' @param statement_types list of interaction types to filter on.  Equivalent to
-#' statement type in INDRA.  Default is c("IncreaseAmount", "DecreaseAmount").
+#' statement type in INDRA.  Default is NULL.
 #' @param paper_count_cutoff number of papers to filter on. Default is 1.
 #' @param evidence_count_cutoff number of evidence to filter on for each
 #' paper. E.g. A paper may have 5 sentences describing the same interaction vs 1
@@ -27,9 +27,6 @@
 #' @param logfc_cutoff absolute log fold change cutoff for filtering proteins. 
 #' Only proteins with |logFC| greater than this value will be retained. Default 
 #' is NULL, i.e. no logFC filtering.
-#' @param force_include_proteins character vector of protein identifiers to exempt 
-#' from all filtering steps. These proteins will be retained regardless of p-value, 
-#' logFC, or other filtering criteria. Default is NULL, i.e. no exemptions.
 #' @param force_include_other character vector of identifiers to include in the
 #' network, regardless if those ids are in the input data. Should be formatted
 #' as "namespace:identifier", e.g. "HGNC:1234" or "CHEBI:4911".
@@ -50,15 +47,14 @@
 getSubnetworkFromIndra <- function(input, 
                                    protein_level_data = NULL,
                                    pvalueCutoff = NULL, 
-                                   statement_types = c("IncreaseAmount", "DecreaseAmount"),
+                                   statement_types = NULL,
                                    paper_count_cutoff = 1,
                                    evidence_count_cutoff = 1,
                                    correlation_cutoff = 0.3,
                                    sources_filter = NULL,
                                    logfc_cutoff = NULL,
-                                   force_include_proteins = NULL,
                                    force_include_other = NULL) {
-    input <- .filterGetSubnetworkFromIndraInput(input, pvalueCutoff, logfc_cutoff, force_include_proteins)
+    input <- .filterGetSubnetworkFromIndraInput(input, pvalueCutoff, logfc_cutoff, force_include_other)
     .validateGetSubnetworkFromIndraInput(input, protein_level_data, sources_filter, force_include_other)
     res <- .callIndraCogexApi(input$HgncId, force_include_other)
     res <- .filterIndraResponse(res, statement_types, evidence_count_cutoff, sources_filter)
diff --git a/R/utils_getSubnetworkFromIndra.R b/R/utils_getSubnetworkFromIndra.R
index eda9444..b3083e9 100644
--- a/R/utils_getSubnetworkFromIndra.R
+++ b/R/utils_getSubnetworkFromIndra.R
@@ -102,24 +102,25 @@
 #' @param input groupComparison result
 #' @param pvalueCutoff p-value cutoff
 #' @param logfc_cutoff logFC cutoff
-#' @param force_include_proteins list of proteins to exempt from filtering
+#' @param force_include_other list of proteins to exempt from filtering
 #' @return filtered groupComparison result
 #' @keywords internal
 #' @noRd
-.filterGetSubnetworkFromIndraInput <- function(input, pvalueCutoff, logfc_cutoff, force_include_proteins) {
+.filterGetSubnetworkFromIndraInput <- function(input, pvalueCutoff, logfc_cutoff, force_include_other) {
     input$Protein <- as.character(input$Protein)
     
     # Extract exempt proteins before any filtering
     exempt_proteins <- NULL
-    if (!is.null(force_include_proteins)) {
-        if (!is.character(force_include_proteins)) {
-            stop("force_include_proteins must be a character vector")
+    if (!is.null(force_include_other)) {
+        if (!is.character(force_include_other)) {
+            stop("force_include_other must be a character vector")
         }
-        missing_prots <- setdiff(force_include_proteins, input$Protein)
-        if (length(missing_prots) > 0) {
-            warning("force_include_proteins not found: ", paste(missing_prots, collapse = ", "))
+        if ("HgncId" %in% colnames(input) && any(grepl("^HGNC:", force_include_other))) {
+            hgnc_ids_to_include <- gsub("^HGNC:", "", force_include_other[grepl("^HGNC:", force_include_other)])
+            exempt_proteins <- input[input$HgncId %in% hgnc_ids_to_include, ]
+        } else {
+            exempt_proteins <- data.frame()
         }
-        exempt_proteins <- input[input$Protein %in% force_include_proteins,]
     }
     
     # Apply standard filtering
@@ -293,6 +294,18 @@
     colnames(nodes) = c("id", "hgncName", "Site", "logFC", "adj.pvalue")
     
     nodes = nodes[nodes$id %in% c(edges$source, edges$target), ]
+    extra_force_include_other <- setdiff(unique(c(edges$source, edges$target)), nodes$id)
+    if (length(extra_force_include_other) > 0) {
+        extra_nodes <- data.frame(
+            id = extra_force_include_other,
+            hgncName = NA,
+            Site = NA,
+            logFC = 0,
+            adj.pvalue = 1,
+            stringsAsFactors = FALSE
+        )
+        nodes <- rbind(nodes, missing_nodes)
+    }
     nodes$hgncName = ifelse(is.na(nodes$hgncName), nodes$id, nodes$hgncName)
     
     return(nodes)
diff --git a/man/getSubnetworkFromIndra.Rd b/man/getSubnetworkFromIndra.Rd
index 2fc5e80..ac8fa6f 100644
--- a/man/getSubnetworkFromIndra.Rd
+++ b/man/getSubnetworkFromIndra.Rd
@@ -8,13 +8,12 @@ getSubnetworkFromIndra(
   input,
   protein_level_data = NULL,
   pvalueCutoff = NULL,
-  statement_types = c("IncreaseAmount", "DecreaseAmount"),
+  statement_types = NULL,
   paper_count_cutoff = 1,
   evidence_count_cutoff = 1,
   correlation_cutoff = 0.3,
   sources_filter = NULL,
   logfc_cutoff = NULL,
-  force_include_proteins = NULL,
   force_include_other = NULL
 )
 }
@@ -33,7 +32,7 @@ and applying correlation cutoffs.}
 filtering}
 
 \item{statement_types}{list of interaction types to filter on.  Equivalent to
-statement type in INDRA.  Default is c("IncreaseAmount", "DecreaseAmount").}
+statement type in INDRA.  Default is NULL.}
 
 \item{paper_count_cutoff}{number of papers to filter on. Default is 1.}
 
@@ -52,10 +51,6 @@ Otherwise, should be a list, e.g. c('reach', 'medscan').}
 Only proteins with |logFC| greater than this value will be retained. Default 
 is NULL, i.e. no logFC filtering.}
 
-\item{force_include_proteins}{character vector of protein identifiers to exempt 
-from all filtering steps. These proteins will be retained regardless of p-value, 
-logFC, or other filtering criteria. Default is NULL, i.e. no exemptions.}
-
 \item{force_include_other}{character vector of identifiers to include in the
 network, regardless if those ids are in the input data. Should be formatted
 as "namespace:identifier", e.g. "HGNC:1234" or "CHEBI:4911".}

From 6025d0d35b0f6a52c7898618a9cdb8277ba04d1f Mon Sep 17 00:00:00 2001
From: Tony Wu <wu.anthon@northeastern.edu>
Date: Tue, 4 Nov 2025 10:57:42 -0500
Subject: [PATCH 2/4] fix bug

---
 R/utils_getSubnetworkFromIndra.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/utils_getSubnetworkFromIndra.R b/R/utils_getSubnetworkFromIndra.R
index b3083e9..983d112 100644
--- a/R/utils_getSubnetworkFromIndra.R
+++ b/R/utils_getSubnetworkFromIndra.R
@@ -304,7 +304,7 @@
             adj.pvalue = 1,
             stringsAsFactors = FALSE
         )
-        nodes <- rbind(nodes, missing_nodes)
+        nodes <- rbind(nodes, extra_nodes)
     }
     nodes$hgncName = ifelse(is.na(nodes$hgncName), nodes$id, nodes$hgncName)
     

From eb9edab54bc10057fc69b140dbb4e0eb625b21bd Mon Sep 17 00:00:00 2001
From: Tony Wu <wu.anthon@northeastern.edu>
Date: Tue, 4 Nov 2025 11:02:51 -0500
Subject: [PATCH 3/4] fix vignette

---
 vignettes/PTM-Analysis.Rmd | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vignettes/PTM-Analysis.Rmd b/vignettes/PTM-Analysis.Rmd
index 67bd7cc..d86e23c 100644
--- a/vignettes/PTM-Analysis.Rmd
+++ b/vignettes/PTM-Analysis.Rmd
@@ -63,7 +63,7 @@ subnetwork of proteins from the INDRA database based on differential abundance
 analysis results.  This function may help finding off target subnetworks.  
 
 ```{r}
-subnetwork <- getSubnetworkFromIndra(annotated_df, pvalueCutoff = 0.05, statement_types = c("Phosphorylation"), logfc_cutoff = 1, force_include_proteins = c("P00533_Y1110"))
+subnetwork <- getSubnetworkFromIndra(annotated_df, pvalueCutoff = 0.05, statement_types = c("Phosphorylation"), logfc_cutoff = 1, force_include_other = c("HGNC:3236"))
 head(subnetwork$nodes)
 head(subnetwork$edges)
 ```

From b4100655883d9308250d7d7b180e2faad631486d Mon Sep 17 00:00:00 2001
From: Tony Wu <wu.anthon@northeastern.edu>
Date: Tue, 4 Nov 2025 11:12:42 -0500
Subject: [PATCH 4/4] adjust docs

---
 R/utils_getSubnetworkFromIndra.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/utils_getSubnetworkFromIndra.R b/R/utils_getSubnetworkFromIndra.R
index 983d112..4d3071c 100644
--- a/R/utils_getSubnetworkFromIndra.R
+++ b/R/utils_getSubnetworkFromIndra.R
@@ -102,7 +102,7 @@
 #' @param input groupComparison result
 #' @param pvalueCutoff p-value cutoff
 #' @param logfc_cutoff logFC cutoff
-#' @param force_include_other list of proteins to exempt from filtering
+#' @param force_include_other list of identifiers to exempt from filtering
 #' @return filtered groupComparison result
 #' @keywords internal
 #' @noRd