Vitek-Lab · tonywu1999 · Nov 4, 2025 · Nov 4, 2025 · Nov 4, 2025 · Nov 4, 2025
diff --git a/NAMESPACE b/NAMESPACE
@@ -18,9 +18,11 @@ importFrom(RCy3,mapVisualProperty)
 importFrom(RCy3,setVisualStyle)
 importFrom(grDevices,colorRamp)
 importFrom(grDevices,rgb)
+importFrom(httr,GET)
 importFrom(httr,POST)
 importFrom(httr,add_headers)
 importFrom(httr,content)
+importFrom(httr,status_code)
 importFrom(jsonlite,fromJSON)
 importFrom(jsonlite,toJSON)
 importFrom(r2r,hashmap)

diff --git a/R/getSubnetworkFromIndra.R b/R/getSubnetworkFromIndra.R
@@ -30,6 +30,9 @@
 #' @param force_include_other character vector of identifiers to include in the
 #' network, regardless if those ids are in the input data. Should be formatted
 #' as "namespace:identifier", e.g. "HGNC:1234" or "CHEBI:4911".
+#' @param filter_by_curation logical, whether to filter out statements that
+#' have been curated as incorrect in INDRA.  Default is FALSE.
+#' @param api_key string of INDRA API key for accessing curated statements.
 #'
 #' @return list of 2 data.frames, nodes and edges
 #'
@@ -53,11 +56,13 @@ getSubnetworkFromIndra <- function(input,
                                    correlation_cutoff = 0.3,
                                    sources_filter = NULL,
                                    logfc_cutoff = NULL,
-                                   force_include_other = NULL) {
+                                   force_include_other = NULL, 
+                                   filter_by_curation = FALSE, 
+                                   api_key = "") {
     input <- .filterGetSubnetworkFromIndraInput(input, pvalueCutoff, logfc_cutoff, force_include_other)
     .validateGetSubnetworkFromIndraInput(input, protein_level_data, sources_filter, force_include_other)
     res <- .callIndraCogexApi(input$HgncId, force_include_other)
-    res <- .filterIndraResponse(res, statement_types, evidence_count_cutoff, sources_filter)
+    res <- .filterIndraResponse(res, statement_types, evidence_count_cutoff, sources_filter, filter_by_curation, api_key)
     edges <- .constructEdgesDataFrame(res, input, protein_level_data)
     edges <- .filterEdgesDataFrame(edges, paper_count_cutoff, correlation_cutoff)
     nodes <- .constructNodesDataFrame(input, edges)

diff --git a/R/utils_getSubnetworkFromIndra.R b/R/utils_getSubnetworkFromIndra.R
@@ -66,25 +66,53 @@
     return(res)
 }
 
+#' @importFrom httr GET status_code content
+#' @importFrom jsonlite fromJSON
+.get_incorrect_curation_count <- function(stmt_hash, api_key) {
+    stmt_hash_char <- as.character(stmt_hash)
+    url <- paste0("https://db.indra.bio/curation/list/", stmt_hash_char, "?api_key=", api_key)
+
+    tryCatch({
+        response <- GET(url)
+        if (status_code(response) == 200) {
+            curations <- fromJSON(content(response, "text", encoding = "UTF-8"))
+            if (length(curations) == 0) {
+                return(0)
+            }
+            incorrect_curations <- curations[curations$tag != "correct", ]
+            unique_incorrect <- length(unique(incorrect_curations$source_hash))
+
+            return(unique_incorrect)
+        } else {
+            warning(paste("API request failed for hash", stmt_hash_char, 
+                          "with status code", status_code(response)))
+            return(0)
+        }
+    }, error = function(e) {
+        warning(paste("Error processing hash", stmt_hash_char, ":", e$message))
+        return(0)
+    })
+}
+
 #' Call INDRA Cogex API and return response
 #' @param res response from INDRA
 #' @param interaction_types interaction types to filter by
 #' @param evidence_count_cutoff number of evidence to filter on for each paper
 #' @param sources_filter list of sources to filter by. Default is NULL, i.e. no filter
+#' @param filter_by_curation logical, whether to filter out statements that
+#' have been curated as incorrect in INDRA.  Default is FALSE.
+#' @param api_key string of INDRA API key for accessing curated statements.
 #' @return filtered list of INDRA statements
 #' @importFrom jsonlite fromJSON
 #' @keywords internal
 #' @noRd
-.filterIndraResponse <- function(res, interaction_types, evidence_count_cutoff, sources_filter = NULL) {
+.filterIndraResponse <- function(res, interaction_types, evidence_count_cutoff, 
+                                 sources_filter = NULL, filter_by_curation = FALSE, api_key = "") {
     if (!is.null(interaction_types)) {
         res = Filter(
             function(statement) statement$data$stmt_type %in% interaction_types, 
             res)
     }
-    res = Filter(
-        function(statement) statement$data$evidence_count >= evidence_count_cutoff, 
-        res
-    )
     if (!is.null(sources_filter)) {
         res = Filter(
             function(statement) {
@@ -95,6 +123,19 @@
             res
         )
     }
+    if (filter_by_curation) {
+        for (i in seq_along(res)) {
+            stmt_hash <- res[[i]]$data$stmt_hash
+            incorrect_count <- .get_incorrect_curation_count(stmt_hash, api_key)
+            res[[i]]$data$evidence_count <- res[[i]]$data$evidence_count - incorrect_count
+            # Todo: Also subtract source_counts accordingly if requested
+            Sys.sleep(0.1)
+        }
+    }
+    res = Filter(
+        function(statement) statement$data$evidence_count >= evidence_count_cutoff, 
+        res
+    )
     return(res)
 }
 

diff --git a/man/getSubnetworkFromIndra.Rd b/man/getSubnetworkFromIndra.Rd