From 900d096946de77cfa3b54fc39056eebf5b2d0086 Mon Sep 17 00:00:00 2001 From: Ivan Glazunov Date: Fri, 20 Mar 2026 13:59:27 +0300 Subject: [PATCH 1/2] feat: add RPQMatrix evalution --- Cargo.toml | 1 + build.rs | 4 + src/graph/mod.rs | 1 + src/lagraph_sys_generated.rs | 29 ++++ src/lib.rs | 3 +- src/rpq/mod.rs | 54 ++++++ src/rpq/rpqmatrix.rs | 214 +++++++++++++++++++++++ src/utils.rs | 20 +++ tests/rpqmatrix_tests.rs | 326 +++++++++++++++++++++++++++++++++++ 9 files changed, 651 insertions(+), 1 deletion(-) create mode 100644 src/rpq/mod.rs create mode 100644 src/rpq/rpqmatrix.rs create mode 100644 tests/rpqmatrix_tests.rs diff --git a/Cargo.toml b/Cargo.toml index 2420d28..9f24b40 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -5,6 +5,7 @@ edition = "2024" [dependencies] csv = "1.4.0" +egg = "0.10.0" libc = "0.2" oxrdf = "0.3.3" oxttl = "0.2.3" diff --git a/build.rs b/build.rs index 475046e..efec3c5 100644 --- a/build.rs +++ b/build.rs @@ -74,6 +74,8 @@ fn regenerate_bindings() { .allowlist_function("GrB_Vector_extractTuples_BOOL") .allowlist_function("GrB_vxm") .allowlist_item("LAGRAPH_MSG_LEN") + .allowlist_item("RPQMatrixOp") + .allowlist_type("RPQMatrixPlan") .allowlist_type("LAGraph_Graph") .allowlist_type("LAGraph_Kind") .allowlist_function("LAGraph_CheckGraph") @@ -83,6 +85,8 @@ fn regenerate_bindings() { .allowlist_function("LAGraph_Delete") .allowlist_function("LAGraph_Cached_AT") .allowlist_function("LAGraph_MMRead") + .allowlist_function("LAGraph_RPQMatrix") + .allowlist_function("LAGraph_DestroyRpqMatrixPlan") .default_enum_style(bindgen::EnumVariation::Rust { non_exhaustive: false, }) diff --git a/src/graph/mod.rs b/src/graph/mod.rs index d096b6f..05b0125 100644 --- a/src/graph/mod.rs +++ b/src/graph/mod.rs @@ -125,6 +125,7 @@ impl Drop for LagraphGraph { unsafe impl Send for LagraphGraph {} unsafe impl Sync for LagraphGraph {} +#[derive(Debug)] pub struct GraphblasVector { pub inner: GrB_Vector, } diff --git a/src/lagraph_sys_generated.rs b/src/lagraph_sys_generated.rs index 3201d28..6be0310 100644 --- a/src/lagraph_sys_generated.rs +++ b/src/lagraph_sys_generated.rs @@ -261,3 +261,32 @@ unsafe extern "C" { msg: *mut ::std::os::raw::c_char, ) -> ::std::os::raw::c_int; } +#[repr(u32)] +#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)] +pub enum RPQMatrixOp { + RPQ_MATRIX_OP_LABEL = 0, + RPQ_MATRIX_OP_LOR = 1, + RPQ_MATRIX_OP_CONCAT = 2, + RPQ_MATRIX_OP_KLEENE = 3, + RPQ_MATRIX_OP_KLEENE_L = 4, + RPQ_MATRIX_OP_KLEENE_R = 5, +} +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct RPQMatrixPlan { + pub op: RPQMatrixOp, + pub lhs: *mut RPQMatrixPlan, + pub rhs: *mut RPQMatrixPlan, + pub mat: GrB_Matrix, + pub res_mat: GrB_Matrix, +} +unsafe extern "C" { + pub fn LAGraph_RPQMatrix( + nnz: *mut GrB_Index, + plan: *mut RPQMatrixPlan, + msg: *mut ::std::os::raw::c_char, + ) -> GrB_Info; +} +unsafe extern "C" { + pub fn LAGraph_DestroyRpqMatrixPlan(plan: *mut RPQMatrixPlan) -> GrB_Info; +} diff --git a/src/lib.rs b/src/lib.rs index 0d11b1f..0f89008 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,7 +1,8 @@ pub mod formats; pub mod graph; +pub mod rpq; pub mod sparql; #[allow(unused_unsafe, dead_code)] -pub(crate) mod utils; +pub mod utils; pub mod lagraph_sys; diff --git a/src/rpq/mod.rs b/src/rpq/mod.rs new file mode 100644 index 0000000..8180612 --- /dev/null +++ b/src/rpq/mod.rs @@ -0,0 +1,54 @@ +//! Regular Path Query (RPQ) evaluation over edge-labeled graphs. +//! ```rust,ignore +//! use pathrex::sparql::parse_rpq; +//! use pathrex::rpq::{RpqEvaluator, nfarpq::NfaRpqEvaluator}; +//! +//! let triple = parse_rpq("SELECT ?x ?y WHERE { ?x /* ?y . }")?; +//! let result = NfaRpqEvaluator.evaluate(&triple.subject, &triple.path, &triple.object, &graph)?; +//! ``` + +pub mod rpqmatrix; + +use crate::graph::GraphDecomposition; +use crate::graph::GraphblasVector; +use crate::sparql::ExtractError; +use spargebra::SparqlSyntaxError; +use spargebra::algebra::PropertyPathExpression; +use spargebra::term::TermPattern; +use thiserror::Error; + +#[derive(Debug, Error)] +pub enum RpqError { + #[error("SPARQL syntax error: {0}")] + Parse(#[from] SparqlSyntaxError), + + #[error("query extraction error: {0}")] + Extract(#[from] ExtractError), + + #[error("unsupported path expression: {0}")] + UnsupportedPath(String), + + #[error("label not found in graph: '{0}'")] + LabelNotFound(String), + + #[error("vertex not found in graph: '{0}'")] + VertexNotFound(String), + + #[error("GraphBLAS/LAGraph error: {0}")] + GraphBlas(String), +} + +#[derive(Debug)] +pub struct RpqResult { + pub reachable: GraphblasVector, +} + +pub trait RpqEvaluator { + fn evaluate( + &self, + subject: &TermPattern, + path: &PropertyPathExpression, + object: &TermPattern, + graph: &G, + ) -> Result; +} diff --git a/src/rpq/rpqmatrix.rs b/src/rpq/rpqmatrix.rs new file mode 100644 index 0000000..e551e34 --- /dev/null +++ b/src/rpq/rpqmatrix.rs @@ -0,0 +1,214 @@ +//! Plan-based RPQ evaluation using `LAGraph_RPQMatrix`. + +use std::ptr::null_mut; + +use egg::{Id, RecExpr, define_language}; +use spargebra::algebra::PropertyPathExpression; +use spargebra::term::TermPattern; + +use crate::graph::{GraphDecomposition, GraphblasVector, ensure_grb_init}; +use crate::lagraph_sys::*; +use crate::rpq::{RpqError, RpqEvaluator, RpqResult}; +use crate::{grb_ok, la_ok}; + +unsafe impl Send for RPQMatrixPlan {} + +define_language! { + pub enum RpqPlan { + Label(String), + "/" = Seq([Id; 2]), + "|" = Alt([Id; 2]), + "*" = Star([Id; 1]), + } +} + +/// Compile a [`PropertyPathExpression`] into [`RecExpr`]. +pub fn to_expr(path: &PropertyPathExpression) -> Result, RpqError> { + let mut expr = RecExpr::default(); + to_expr_aux(path, &mut expr)?; + Ok(expr) +} + +fn to_expr_aux( + path: &PropertyPathExpression, + expr: &mut RecExpr, +) -> Result { + match path { + PropertyPathExpression::NamedNode(nn) => { + Ok(expr.add(RpqPlan::Label(nn.as_str().to_owned()))) + } + + PropertyPathExpression::Sequence(lhs, rhs) => { + let l = to_expr_aux(lhs, expr)?; + let r = to_expr_aux(rhs, expr)?; + Ok(expr.add(RpqPlan::Seq([l, r]))) + } + + PropertyPathExpression::Alternative(lhs, rhs) => { + let l = to_expr_aux(lhs, expr)?; + let r = to_expr_aux(rhs, expr)?; + Ok(expr.add(RpqPlan::Alt([l, r]))) + } + + PropertyPathExpression::ZeroOrMore(inner) => { + let i = to_expr_aux(inner, expr)?; + Ok(expr.add(RpqPlan::Star([i]))) + } + + PropertyPathExpression::OneOrMore(inner) => { + let e = to_expr_aux(inner, expr)?; + let s = expr.add(RpqPlan::Star([e])); + Ok(expr.add(RpqPlan::Seq([e, s]))) + } + + PropertyPathExpression::ZeroOrOne(_) => Err(RpqError::UnsupportedPath( + "ZeroOrOne (?) is not supported by RPQMatrix".into(), + )), + + PropertyPathExpression::Reverse(_) => Err(RpqError::UnsupportedPath( + "Reverse paths are not supported".into(), + )), + + PropertyPathExpression::NegatedPropertySet(_) => Err(RpqError::UnsupportedPath( + "NegatedPropertySet paths are not supported".into(), + )), + } +} + +/// Convert a [`RecExpr`] into the flat [`RPQMatrixPlan`] array that +/// `LAGraph_RPQMatrix` expects. +pub fn materialize( + expr: &RecExpr, + graph: &G, +) -> Result, RpqError> { + let null_plan = RPQMatrixPlan { + op: RPQMatrixOp::RPQ_MATRIX_OP_LABEL, + lhs: null_mut(), + rhs: null_mut(), + mat: null_mut(), + res_mat: null_mut(), + }; + let mut plans = vec![null_plan; expr.len()]; + + for (id, node) in expr.as_ref().iter().enumerate() { + plans[id] = match node { + RpqPlan::Label(label) => { + let lg = graph + .get_graph(label) + .map_err(|_| RpqError::LabelNotFound(label.clone()))?; + let mat = unsafe { (*lg.inner).A }; + RPQMatrixPlan { + op: RPQMatrixOp::RPQ_MATRIX_OP_LABEL, + lhs: null_mut(), + rhs: null_mut(), + mat, + res_mat: null_mut(), + } + } + + RpqPlan::Seq([l, r]) => RPQMatrixPlan { + op: RPQMatrixOp::RPQ_MATRIX_OP_CONCAT, + lhs: unsafe { plans.as_mut_ptr().add(usize::from(*l)) }, + rhs: unsafe { plans.as_mut_ptr().add(usize::from(*r)) }, + mat: null_mut(), + res_mat: null_mut(), + }, + + RpqPlan::Alt([l, r]) => RPQMatrixPlan { + op: RPQMatrixOp::RPQ_MATRIX_OP_LOR, + lhs: unsafe { plans.as_mut_ptr().add(usize::from(*l)) }, + rhs: unsafe { plans.as_mut_ptr().add(usize::from(*r)) }, + mat: null_mut(), + res_mat: null_mut(), + }, + + RpqPlan::Star([i]) => RPQMatrixPlan { + op: RPQMatrixOp::RPQ_MATRIX_OP_KLEENE, + lhs: null_mut(), + rhs: unsafe { plans.as_mut_ptr().add(usize::from(*i)) }, + mat: null_mut(), + res_mat: null_mut(), + }, + }; + } + + Ok(plans) +} + +/// RPQ evaluator backed by `LAGraph_RPQMatrix`. +pub struct RpqMatrixEvaluator; + +impl RpqEvaluator for RpqMatrixEvaluator { + fn evaluate( + &self, + subject: &TermPattern, + path: &PropertyPathExpression, + object: &TermPattern, + graph: &G, + ) -> Result { + if !matches!(object, TermPattern::Variable(_)) { + return Err(RpqError::UnsupportedPath( + "bound object term is not yet supported by RpqMatrixEvaluator".into(), + )); + } + + ensure_grb_init().map_err(|e| RpqError::GraphBlas(e.to_string()))?; + + let n = graph.num_nodes() as GrB_Index; + + let expr = to_expr(path)?; + + let mut plans = materialize(&expr, graph)?; + let root_ptr = unsafe { plans.as_mut_ptr().add(plans.len() - 1) }; + + let mut nnz: GrB_Index = 0; + la_ok!(LAGraph_RPQMatrix(&mut nnz, root_ptr)) + .map_err(|e| RpqError::GraphBlas(e.to_string()))?; + + let res_mat = unsafe { (*root_ptr).res_mat }; + + let src = unsafe { + GraphblasVector::new_bool(n).map_err(|e| RpqError::GraphBlas(e.to_string()))? + }; + match subject { + TermPattern::NamedNode(nn) => { + let id = graph + .get_node_id(nn.as_str()) + .ok_or_else(|| RpqError::VertexNotFound(nn.as_str().to_owned()))? + as GrB_Index; + grb_ok!(GrB_Vector_setElement_BOOL(src.inner, true, id)) + .map_err(|e| RpqError::GraphBlas(e.to_string()))?; + } + TermPattern::Variable(_) => { + for i in 0..n { + grb_ok!(GrB_Vector_setElement_BOOL(src.inner, true, i)) + .map_err(|e| RpqError::GraphBlas(e.to_string()))?; + } + } + _ => { + return Err(RpqError::UnsupportedPath( + "subject must be a variable or named node".into(), + )); + } + } + + let result = unsafe { + GraphblasVector::new_bool(n).map_err(|e| RpqError::GraphBlas(e.to_string()))? + }; + grb_ok!(GrB_vxm( + result.inner, + null_mut(), + null_mut(), + GrB_LOR_LAND_SEMIRING_BOOL, + src.inner, + res_mat, + null_mut(), + )) + .map_err(|e| RpqError::GraphBlas(e.to_string()))?; + + grb_ok!(LAGraph_DestroyRpqMatrixPlan(root_ptr)) + .map_err(|e| RpqError::GraphBlas(e.to_string()))?; + + Ok(RpqResult { reachable: result }) + } +} diff --git a/src/utils.rs b/src/utils.rs index 7cb37d3..92846ca 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -1,6 +1,26 @@ use crate::{graph::*, lagraph_sys::*}; use std::{fmt::Display, sync::Arc}; +pub fn build_graph(edges: &[(&str, &str, &str)]) -> ::Graph { + let builder = InMemoryBuilder::new(); + let edges = edges + .iter() + .cloned() + .map(|(s, t, l)| { + Ok(Edge { + source: s.to_string(), + label: l.to_string(), + target: t.to_string(), + }) + }) + .collect::>>(); + builder + .with_stream(edges.into_iter()) + .expect("Should insert edges stream") + .build() + .expect("build must succeed") +} + pub struct CountOutput(pub usize, std::marker::PhantomData); impl CountOutput { diff --git a/tests/rpqmatrix_tests.rs b/tests/rpqmatrix_tests.rs new file mode 100644 index 0000000..ab4f766 --- /dev/null +++ b/tests/rpqmatrix_tests.rs @@ -0,0 +1,326 @@ +use pathrex::graph::GraphDecomposition; +use pathrex::lagraph_sys::{GrB_Index, GrB_Vector_extractTuples_BOOL, GrB_Vector_nvals}; +use pathrex::rpq::rpqmatrix::RpqMatrixEvaluator; +use pathrex::rpq::{RpqError, RpqEvaluator, RpqResult}; +use pathrex::utils::build_graph; +use spargebra::algebra::PropertyPathExpression; +use spargebra::term::{NamedNode, TermPattern, Variable}; + +fn named(iri: &str) -> PropertyPathExpression { + PropertyPathExpression::NamedNode(NamedNode::new_unchecked(iri)) +} + +fn var(name: &str) -> TermPattern { + TermPattern::Variable(Variable::new_unchecked(name)) +} + +fn named_term(iri: &str) -> TermPattern { + TermPattern::NamedNode(NamedNode::new_unchecked(iri)) +} + +fn reachable_indices(result: &RpqResult) -> Vec { + unsafe { + let mut nvals: GrB_Index = 0; + GrB_Vector_nvals(&mut nvals, result.reachable.inner); + if nvals == 0 { + return Vec::new(); + } + let mut indices = vec![0u64; nvals as usize]; + let mut values = vec![false; nvals as usize]; + GrB_Vector_extractTuples_BOOL( + indices.as_mut_ptr(), + values.as_mut_ptr(), + &mut nvals, + result.reachable.inner, + ); + indices.truncate(nvals as usize); + indices + } +} + +fn reachable_count(result: &RpqResult) -> u64 { + unsafe { + let mut nvals: GrB_Index = 0; + GrB_Vector_nvals(&mut nvals, result.reachable.inner); + nvals + } +} + +/// Graph: A --knows--> B --knows--> C +/// Query: ?x ?y +#[test] +fn test_single_label_variable_variable() { + let graph = build_graph(&[("A", "B", "knows"), ("B", "C", "knows")]); + let evaluator = RpqMatrixEvaluator; + + let result = evaluator + .evaluate(&var("x"), &named("knows"), &var("y"), &graph) + .expect("evaluate should succeed"); + + let count = reachable_count(&result); + assert_eq!(count, 2); +} + +/// Graph: A --knows--> B --knows--> C +/// Query: ?y +#[test] +fn test_single_label_named_source() { + let graph = build_graph(&[("A", "B", "knows"), ("B", "C", "knows")]); + let evaluator = RpqMatrixEvaluator; + + let result = evaluator + .evaluate(&named_term("A"), &named("knows"), &var("y"), &graph) + .expect("evaluate should succeed"); + + let indices = reachable_indices(&result); + let b_id = graph.get_node_id("B").expect("B should exist"); + assert!( + indices.contains(&(b_id as GrB_Index)), + "B (id={b_id}) should be reachable from A via 'knows', got indices: {indices:?}" + ); +} + +/// Graph: A --knows--> B --likes--> C +/// Query: ?x / ?y (two-hop sequence) +#[test] +fn test_sequence_path() { + let graph = build_graph(&[("A", "B", "knows"), ("B", "C", "likes")]); + let evaluator = RpqMatrixEvaluator; + + let path = PropertyPathExpression::Sequence(Box::new(named("knows")), Box::new(named("likes"))); + + let result = evaluator + .evaluate(&var("x"), &path, &var("y"), &graph) + .expect("evaluate should succeed"); + + let count = reachable_count(&result); + assert_eq!(count, 1); +} + +/// Graph: A --knows--> B --likes--> C +/// Query: / ?y +#[test] +fn test_sequence_path_named_source() { + let graph = build_graph(&[("A", "B", "knows"), ("B", "C", "likes")]); + let evaluator = RpqMatrixEvaluator; + + let path = PropertyPathExpression::Sequence(Box::new(named("knows")), Box::new(named("likes"))); + + let result = evaluator + .evaluate(&named_term("A"), &path, &var("y"), &graph) + .expect("evaluate should succeed"); + + let indices = reachable_indices(&result); + let c_id = graph.get_node_id("C").expect("C should exist"); + assert!( + indices.contains(&(c_id as GrB_Index)), + "C (id={c_id}) should be reachable from A via knows/likes, got indices: {indices:?}" + ); +} + +/// Graph: A --knows--> B, A --likes--> C +/// Query: | ?y +#[test] +fn test_alternative_path() { + let graph = build_graph(&[("A", "B", "knows"), ("A", "C", "likes")]); + let evaluator = RpqMatrixEvaluator; + + let path = + PropertyPathExpression::Alternative(Box::new(named("knows")), Box::new(named("likes"))); + + let result = evaluator + .evaluate(&named_term("A"), &path, &var("y"), &graph) + .expect("evaluate should succeed"); + + let indices = reachable_indices(&result); + let b_id = graph.get_node_id("B").expect("B should exist"); + let c_id = graph.get_node_id("C").expect("C should exist"); + assert!( + indices.contains(&(b_id as GrB_Index)), + "B should be reachable via knows|likes" + ); + assert!( + indices.contains(&(c_id as GrB_Index)), + "C should be reachable via knows|likes" + ); +} + +/// Graph: A --knows--> B --knows--> C +/// Query: * ?y +#[test] +fn test_zero_or_more_path() { + let graph = build_graph(&[("A", "B", "knows"), ("B", "C", "knows")]); + let evaluator = RpqMatrixEvaluator; + + let path = PropertyPathExpression::ZeroOrMore(Box::new(named("knows"))); + + let result = evaluator + .evaluate(&named_term("A"), &path, &var("y"), &graph) + .expect("evaluate should succeed"); + + let indices = reachable_indices(&result); + let a_id = graph.get_node_id("A").expect("A should exist"); + let b_id = graph.get_node_id("B").expect("B should exist"); + let c_id = graph.get_node_id("C").expect("C should exist"); + + assert!(indices.contains(&(a_id as GrB_Index)), "A should be reachable (zero hops)"); + assert!(indices.contains(&(b_id as GrB_Index)), "B should be reachable (one hop)"); + assert!(indices.contains(&(c_id as GrB_Index)), "C should be reachable (two hops)"); +} + +/// Graph: A --knows--> B --knows--> C +/// Query: + ?y +#[test] +fn test_one_or_more_path() { + let graph = build_graph(&[("A", "B", "knows"), ("B", "C", "knows")]); + let evaluator = RpqMatrixEvaluator; + + let path = PropertyPathExpression::OneOrMore(Box::new(named("knows"))); + + let result = evaluator + .evaluate(&named_term("A"), &path, &var("y"), &graph) + .expect("evaluate should succeed"); + + let indices = reachable_indices(&result); + let a_id = graph.get_node_id("A").expect("A should exist"); + let b_id = graph.get_node_id("B").expect("B should exist"); + let c_id = graph.get_node_id("C").expect("C should exist"); + + assert!(!indices.contains(&(a_id as GrB_Index)), "A shouldn't be reachable"); + assert!(indices.contains(&(b_id as GrB_Index)), "B should be reachable (one hop)"); + assert!(indices.contains(&(c_id as GrB_Index)), "C should be reachable (two hops)"); +} + +#[test] +fn test_zero_or_one_unsupported() { + let graph = build_graph(&[("A", "B", "knows")]); + let evaluator = RpqMatrixEvaluator; + + let path = PropertyPathExpression::ZeroOrOne(Box::new(named("knows"))); + let result = evaluator.evaluate(&var("x"), &path, &var("y"), &graph); + + assert!( + matches!(result, Err(RpqError::UnsupportedPath(_))), + "expected UnsupportedPath for ZeroOrOne, got: {result:?}" + ); +} + +#[test] +fn test_label_not_found() { + let graph = build_graph(&[("A", "B", "knows")]); + let evaluator = RpqMatrixEvaluator; + + let result = evaluator.evaluate(&var("x"), &named("nonexistent"), &var("y"), &graph); + + assert!( + matches!(result, Err(RpqError::LabelNotFound(ref l)) if l == "nonexistent"), + "expected LabelNotFound error, got: {result:?}" + ); +} + +#[test] +fn test_vertex_not_found() { + let graph = build_graph(&[("A", "B", "knows")]); + let evaluator = RpqMatrixEvaluator; + + let result = evaluator.evaluate(&named_term("Z"), &named("knows"), &var("y"), &graph); + + assert!( + matches!(result, Err(RpqError::VertexNotFound(ref v)) if v == "Z"), + "expected VertexNotFound error, got: {result:?}" + ); +} + +#[test] +fn test_bound_object_unsupported() { + let graph = build_graph(&[("A", "B", "knows")]); + let evaluator = RpqMatrixEvaluator; + + let result = evaluator.evaluate(&var("x"), &named("knows"), &named_term("B"), &graph); + + assert!( + matches!(result, Err(RpqError::UnsupportedPath(_))), + "expected UnsupportedPath for bound object, got: {result:?}" + ); +} + +#[test] +fn test_reverse_path_unsupported() { + let graph = build_graph(&[("A", "B", "knows")]); + let evaluator = RpqMatrixEvaluator; + + let path = PropertyPathExpression::Reverse(Box::new(named("knows"))); + let result = evaluator.evaluate(&var("x"), &path, &var("y"), &graph); + + assert!( + matches!(result, Err(RpqError::UnsupportedPath(_))), + "expected UnsupportedPath error, got: {result:?}" + ); +} + +/// Graph: A --knows--> B --knows--> C --knows--> A (cycle) +/// Query: * ?y +#[test] +fn test_cycle_graph_star() { + let graph = build_graph(&[ + ("A", "B", "knows"), + ("B", "C", "knows"), + ("C", "A", "knows"), + ]); + let evaluator = RpqMatrixEvaluator; + + let path = PropertyPathExpression::ZeroOrMore(Box::new(named("knows"))); + + let result = evaluator + .evaluate(&named_term("A"), &path, &var("y"), &graph) + .expect("evaluate should succeed"); + + let count = reachable_count(&result); + assert_eq!(count, 3, "all 3 nodes should be reachable in a cycle"); +} + +/// Graph: A --knows--> B --likes--> C --knows--> D +/// Query: /*/ ?y +#[test] +fn test_complex_path() { + let graph = build_graph(&[ + ("A", "B", "knows"), + ("B", "C", "likes"), + ("C", "D", "knows"), + ]); + let evaluator = RpqMatrixEvaluator; + + let path = PropertyPathExpression::Sequence( + Box::new(PropertyPathExpression::Sequence( + Box::new(named("knows")), + Box::new(PropertyPathExpression::ZeroOrMore(Box::new(named("likes")))), + )), + Box::new(named("knows")), + ); + + let result = evaluator + .evaluate(&named_term("A"), &path, &var("y"), &graph) + .expect("evaluate should succeed"); + + let indices = reachable_indices(&result); + let d_id = graph.get_node_id("D").expect("D should exist"); + assert!( + indices.contains(&(d_id as GrB_Index)), + "D should be reachable via knows/likes*/knows, got indices: {indices:?}" + ); +} + +#[test] +fn test_no_matching_path() { + let graph = build_graph(&[("A", "B", "knows")]); + let evaluator = RpqMatrixEvaluator; + + let path = PropertyPathExpression::Sequence(Box::new(named("knows")), Box::new(named("likes"))); + + let result = evaluator.evaluate(&var("x"), &path, &var("y"), &graph); + + assert!( + matches!(result, Err(RpqError::LabelNotFound(ref l)) if l == "likes"), + "expected LabelNotFound for 'likes', got: {result:?}" + ); +} From 5220855f5b1edafa7155d61e9d3176242f522ec0 Mon Sep 17 00:00:00 2001 From: Rodion Suvorov Date: Tue, 7 Apr 2026 16:55:22 +0300 Subject: [PATCH 2/2] feat: some rpqmatrix evaluator improvements This patch brings following changes: 1) Add data catalogs with meta information about input matrices 2) Add plan optimizations via Egg library Signed-off-by: Rodion Suvorov --- Cargo.toml | 2 + build.rs | 1 + src/graph/inmemory.rs | 56 ++++++- src/graph/mod.rs | 20 ++- src/lagraph_sys_generated.rs | 7 + src/rpq/rpqmatrix.rs | 214 ------------------------ src/rpq/rpqmatrix/eval.rs | 92 +++++++++++ src/rpq/rpqmatrix/mod.rs | 3 + src/rpq/rpqmatrix/optimizer.rs | 287 +++++++++++++++++++++++++++++++++ src/rpq/rpqmatrix/plan.rs | 146 +++++++++++++++++ src/utils.rs | 4 + tests/rpqmatrix_tests.rs | 2 +- 12 files changed, 613 insertions(+), 221 deletions(-) delete mode 100644 src/rpq/rpqmatrix.rs create mode 100644 src/rpq/rpqmatrix/eval.rs create mode 100644 src/rpq/rpqmatrix/mod.rs create mode 100644 src/rpq/rpqmatrix/optimizer.rs create mode 100644 src/rpq/rpqmatrix/plan.rs diff --git a/Cargo.toml b/Cargo.toml index 9f24b40..346b901 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -11,6 +11,8 @@ oxrdf = "0.3.3" oxttl = "0.2.3" spargebra = "0.4.6" thiserror = "1.0" +rand = "0.8" +expect-test = "1.5.1" [features] regenerate-bindings = ["bindgen"] diff --git a/build.rs b/build.rs index efec3c5..15cb4c7 100644 --- a/build.rs +++ b/build.rs @@ -87,6 +87,7 @@ fn regenerate_bindings() { .allowlist_function("LAGraph_MMRead") .allowlist_function("LAGraph_RPQMatrix") .allowlist_function("LAGraph_DestroyRpqMatrixPlan") + .allowlist_function("LAGraph_RPQMatrix_reduce") .default_enum_style(bindgen::EnumVariation::Rust { non_exhaustive: false, }) diff --git a/src/graph/inmemory.rs b/src/graph/inmemory.rs index 103e5fe..39c7743 100644 --- a/src/graph/inmemory.rs +++ b/src/graph/inmemory.rs @@ -3,13 +3,14 @@ use std::{collections::HashMap, io::Read}; use crate::formats::mm::{load_mm_file, parse_index_map}; use crate::formats::{Csv, MatrixMarket}; +use crate::graph::ReduceType; use crate::{ graph::GraphSource, lagraph_sys::{GrB_Index, GrB_Matrix, GrB_Matrix_free, LAGraph_Kind}, }; use super::{ - Backend, Edge, GraphBuilder, GraphDecomposition, GraphError, LagraphGraph, ensure_grb_init, + ensure_grb_init, Backend, Edge, GraphBuilder, GraphDecomposition, GraphError, LagraphGraph, }; /// Marker type for the in-memory GraphBLAS-backed backend. @@ -141,8 +142,20 @@ impl GraphBuilder for InMemoryBuilder { let mut graphs: HashMap> = HashMap::with_capacity(self.label_buffers.len() + self.prebuilt.len()); + let mut catalog: HashMap = + HashMap::with_capacity(self.label_buffers.len() + self.prebuilt.len()); for (label, lg) in self.prebuilt { + let rreduce = lg.reduces_nvals(ReduceType::ReduceByRows)? as usize; + let creduce = lg.reduces_nvals(ReduceType::ReduceByCols)? as usize; + catalog.insert( + label.clone(), + GraphMetadata { + // num_vertices: nrows, + row_projections: rreduce, + column_projections: creduce, + }, + ); graphs.insert(label, Arc::new(lg)); } @@ -159,6 +172,17 @@ impl GraphBuilder for InMemoryBuilder { LAGraph_Kind::LAGraph_ADJACENCY_DIRECTED, )?; + let rreduce = lg.reduces_nvals(ReduceType::ReduceByRows)? as usize; + let creduce = lg.reduces_nvals(ReduceType::ReduceByCols)? as usize; + catalog.insert( + label.clone(), + GraphMetadata { + // num_vertices: nrows, + row_projections: rreduce, + column_projections: creduce, + }, + ); + graphs.insert(label.clone(), Arc::new(lg)); } @@ -166,15 +190,25 @@ impl GraphBuilder for InMemoryBuilder { node_to_id: self.node_to_id, id_to_node: self.id_to_node, graphs, + catalog, }) } } +/// Metadata about a labeled graph in the decomposition. +#[derive(Debug, Clone, Default)] +pub struct GraphMetadata { + // pub num_vertices: usize, + pub row_projections: usize, + pub column_projections: usize, +} + /// Immutable, read-only Boolean-decomposed graph backed by LAGraph graphs. pub struct InMemoryGraph { node_to_id: HashMap, id_to_node: HashMap, graphs: HashMap>, + catalog: HashMap, } impl GraphDecomposition for InMemoryGraph { @@ -198,6 +232,20 @@ impl GraphDecomposition for InMemoryGraph { fn num_nodes(&self) -> usize { self.id_to_node.len() } + + fn get_meta(&self, label: &str) -> Option<&GraphMetadata> { + self.get_metadata(label) + } +} + +impl InMemoryGraph { + pub fn get_metadata(&self, label: &str) -> Option<&GraphMetadata> { + self.catalog.get(label) + } + + pub fn catalog(&self) -> &HashMap { + &self.catalog + } } impl GraphSource for Csv { @@ -216,10 +264,8 @@ impl GraphSource for MatrixMarket { fn apply_to(self, mut builder: InMemoryBuilder) -> Result { let vertices_path = self.dir.join("vertices.txt"); let (vert_by_idx, vert_by_name) = parse_index_map(&vertices_path)?; - let vert_by_idx = - vert_by_idx.into_iter().map(|(i, n)| (i - 1, n)).collect(); - let vert_by_name = - vert_by_name.into_iter().map(|(n, i)| (n, i - 1)).collect(); + let vert_by_idx = vert_by_idx.into_iter().map(|(i, n)| (i - 1, n)).collect(); + let vert_by_name = vert_by_name.into_iter().map(|(n, i)| (n, i - 1)).collect(); let (edge_by_idx, _) = parse_index_map(&self.dir.join("edges.txt"))?; diff --git a/src/graph/mod.rs b/src/graph/mod.rs index 05b0125..b9278f7 100644 --- a/src/graph/mod.rs +++ b/src/graph/mod.rs @@ -2,7 +2,7 @@ pub mod inmemory; -pub use inmemory::{InMemory, InMemoryBuilder, InMemoryGraph}; +pub use inmemory::{GraphMetadata, InMemory, InMemoryBuilder, InMemoryGraph}; use std::marker::PhantomData; use std::sync::{Arc, Once}; @@ -11,6 +11,10 @@ use crate::{grb_ok, la_ok, lagraph_sys::*}; use thiserror::Error; +pub enum ReduceType { + ReduceByRows, + ReduceByCols, +} #[derive(Debug, Error)] pub enum GraphError { /// A GraphBLAS C call returned a non-SUCCESS info code. @@ -112,6 +116,19 @@ impl LagraphGraph { pub fn check_graph(&self) -> Result<(), GraphError> { la_ok!(LAGraph_CheckGraph(self.inner)) } + + pub fn reduces_nvals(&self, reduce_type: ReduceType) -> Result { + let mut n: GrB_Index = 0; + let matrix = unsafe { &*self.inner }.A; + grb_ok!(LAGraph_RPQMatrix_reduce(&mut n, matrix, reduce_type as u8))?; + Ok(n) + } + pub fn nvals(&self) -> Result { + let mut n: GrB_Index = 0; + let matrix = unsafe { &*self.inner }.A; + grb_ok!(GrB_Matrix_nvals(&mut n, matrix))?; + Ok(n) + } } impl Drop for LagraphGraph { @@ -202,6 +219,7 @@ pub trait GraphDecomposition { /// Translates a matrix index back to a string ID. fn get_node_name(&self, mapped_id: usize) -> Option; fn num_nodes(&self) -> usize; + fn get_meta(&self, label: &str) -> Option<&GraphMetadata>; } /// Associates a backend marker type with a concrete [`GraphBuilder`] and diff --git a/src/lagraph_sys_generated.rs b/src/lagraph_sys_generated.rs index 6be0310..1127fc8 100644 --- a/src/lagraph_sys_generated.rs +++ b/src/lagraph_sys_generated.rs @@ -290,3 +290,10 @@ unsafe extern "C" { unsafe extern "C" { pub fn LAGraph_DestroyRpqMatrixPlan(plan: *mut RPQMatrixPlan) -> GrB_Info; } +unsafe extern "C" { + pub fn LAGraph_RPQMatrix_reduce( + res: *mut GrB_Index, + mat: GrB_Matrix, + reduce_type: u8, + ) -> GrB_Info; +} diff --git a/src/rpq/rpqmatrix.rs b/src/rpq/rpqmatrix.rs deleted file mode 100644 index e551e34..0000000 --- a/src/rpq/rpqmatrix.rs +++ /dev/null @@ -1,214 +0,0 @@ -//! Plan-based RPQ evaluation using `LAGraph_RPQMatrix`. - -use std::ptr::null_mut; - -use egg::{Id, RecExpr, define_language}; -use spargebra::algebra::PropertyPathExpression; -use spargebra::term::TermPattern; - -use crate::graph::{GraphDecomposition, GraphblasVector, ensure_grb_init}; -use crate::lagraph_sys::*; -use crate::rpq::{RpqError, RpqEvaluator, RpqResult}; -use crate::{grb_ok, la_ok}; - -unsafe impl Send for RPQMatrixPlan {} - -define_language! { - pub enum RpqPlan { - Label(String), - "/" = Seq([Id; 2]), - "|" = Alt([Id; 2]), - "*" = Star([Id; 1]), - } -} - -/// Compile a [`PropertyPathExpression`] into [`RecExpr`]. -pub fn to_expr(path: &PropertyPathExpression) -> Result, RpqError> { - let mut expr = RecExpr::default(); - to_expr_aux(path, &mut expr)?; - Ok(expr) -} - -fn to_expr_aux( - path: &PropertyPathExpression, - expr: &mut RecExpr, -) -> Result { - match path { - PropertyPathExpression::NamedNode(nn) => { - Ok(expr.add(RpqPlan::Label(nn.as_str().to_owned()))) - } - - PropertyPathExpression::Sequence(lhs, rhs) => { - let l = to_expr_aux(lhs, expr)?; - let r = to_expr_aux(rhs, expr)?; - Ok(expr.add(RpqPlan::Seq([l, r]))) - } - - PropertyPathExpression::Alternative(lhs, rhs) => { - let l = to_expr_aux(lhs, expr)?; - let r = to_expr_aux(rhs, expr)?; - Ok(expr.add(RpqPlan::Alt([l, r]))) - } - - PropertyPathExpression::ZeroOrMore(inner) => { - let i = to_expr_aux(inner, expr)?; - Ok(expr.add(RpqPlan::Star([i]))) - } - - PropertyPathExpression::OneOrMore(inner) => { - let e = to_expr_aux(inner, expr)?; - let s = expr.add(RpqPlan::Star([e])); - Ok(expr.add(RpqPlan::Seq([e, s]))) - } - - PropertyPathExpression::ZeroOrOne(_) => Err(RpqError::UnsupportedPath( - "ZeroOrOne (?) is not supported by RPQMatrix".into(), - )), - - PropertyPathExpression::Reverse(_) => Err(RpqError::UnsupportedPath( - "Reverse paths are not supported".into(), - )), - - PropertyPathExpression::NegatedPropertySet(_) => Err(RpqError::UnsupportedPath( - "NegatedPropertySet paths are not supported".into(), - )), - } -} - -/// Convert a [`RecExpr`] into the flat [`RPQMatrixPlan`] array that -/// `LAGraph_RPQMatrix` expects. -pub fn materialize( - expr: &RecExpr, - graph: &G, -) -> Result, RpqError> { - let null_plan = RPQMatrixPlan { - op: RPQMatrixOp::RPQ_MATRIX_OP_LABEL, - lhs: null_mut(), - rhs: null_mut(), - mat: null_mut(), - res_mat: null_mut(), - }; - let mut plans = vec![null_plan; expr.len()]; - - for (id, node) in expr.as_ref().iter().enumerate() { - plans[id] = match node { - RpqPlan::Label(label) => { - let lg = graph - .get_graph(label) - .map_err(|_| RpqError::LabelNotFound(label.clone()))?; - let mat = unsafe { (*lg.inner).A }; - RPQMatrixPlan { - op: RPQMatrixOp::RPQ_MATRIX_OP_LABEL, - lhs: null_mut(), - rhs: null_mut(), - mat, - res_mat: null_mut(), - } - } - - RpqPlan::Seq([l, r]) => RPQMatrixPlan { - op: RPQMatrixOp::RPQ_MATRIX_OP_CONCAT, - lhs: unsafe { plans.as_mut_ptr().add(usize::from(*l)) }, - rhs: unsafe { plans.as_mut_ptr().add(usize::from(*r)) }, - mat: null_mut(), - res_mat: null_mut(), - }, - - RpqPlan::Alt([l, r]) => RPQMatrixPlan { - op: RPQMatrixOp::RPQ_MATRIX_OP_LOR, - lhs: unsafe { plans.as_mut_ptr().add(usize::from(*l)) }, - rhs: unsafe { plans.as_mut_ptr().add(usize::from(*r)) }, - mat: null_mut(), - res_mat: null_mut(), - }, - - RpqPlan::Star([i]) => RPQMatrixPlan { - op: RPQMatrixOp::RPQ_MATRIX_OP_KLEENE, - lhs: null_mut(), - rhs: unsafe { plans.as_mut_ptr().add(usize::from(*i)) }, - mat: null_mut(), - res_mat: null_mut(), - }, - }; - } - - Ok(plans) -} - -/// RPQ evaluator backed by `LAGraph_RPQMatrix`. -pub struct RpqMatrixEvaluator; - -impl RpqEvaluator for RpqMatrixEvaluator { - fn evaluate( - &self, - subject: &TermPattern, - path: &PropertyPathExpression, - object: &TermPattern, - graph: &G, - ) -> Result { - if !matches!(object, TermPattern::Variable(_)) { - return Err(RpqError::UnsupportedPath( - "bound object term is not yet supported by RpqMatrixEvaluator".into(), - )); - } - - ensure_grb_init().map_err(|e| RpqError::GraphBlas(e.to_string()))?; - - let n = graph.num_nodes() as GrB_Index; - - let expr = to_expr(path)?; - - let mut plans = materialize(&expr, graph)?; - let root_ptr = unsafe { plans.as_mut_ptr().add(plans.len() - 1) }; - - let mut nnz: GrB_Index = 0; - la_ok!(LAGraph_RPQMatrix(&mut nnz, root_ptr)) - .map_err(|e| RpqError::GraphBlas(e.to_string()))?; - - let res_mat = unsafe { (*root_ptr).res_mat }; - - let src = unsafe { - GraphblasVector::new_bool(n).map_err(|e| RpqError::GraphBlas(e.to_string()))? - }; - match subject { - TermPattern::NamedNode(nn) => { - let id = graph - .get_node_id(nn.as_str()) - .ok_or_else(|| RpqError::VertexNotFound(nn.as_str().to_owned()))? - as GrB_Index; - grb_ok!(GrB_Vector_setElement_BOOL(src.inner, true, id)) - .map_err(|e| RpqError::GraphBlas(e.to_string()))?; - } - TermPattern::Variable(_) => { - for i in 0..n { - grb_ok!(GrB_Vector_setElement_BOOL(src.inner, true, i)) - .map_err(|e| RpqError::GraphBlas(e.to_string()))?; - } - } - _ => { - return Err(RpqError::UnsupportedPath( - "subject must be a variable or named node".into(), - )); - } - } - - let result = unsafe { - GraphblasVector::new_bool(n).map_err(|e| RpqError::GraphBlas(e.to_string()))? - }; - grb_ok!(GrB_vxm( - result.inner, - null_mut(), - null_mut(), - GrB_LOR_LAND_SEMIRING_BOOL, - src.inner, - res_mat, - null_mut(), - )) - .map_err(|e| RpqError::GraphBlas(e.to_string()))?; - - grb_ok!(LAGraph_DestroyRpqMatrixPlan(root_ptr)) - .map_err(|e| RpqError::GraphBlas(e.to_string()))?; - - Ok(RpqResult { reachable: result }) - } -} diff --git a/src/rpq/rpqmatrix/eval.rs b/src/rpq/rpqmatrix/eval.rs new file mode 100644 index 0000000..8d5b548 --- /dev/null +++ b/src/rpq/rpqmatrix/eval.rs @@ -0,0 +1,92 @@ +//! Plan-based RPQ evaluation using `LAGraph_RPQMatrix`. + +use std::ptr::null_mut; + +use spargebra::algebra::PropertyPathExpression; +use spargebra::term::TermPattern; + +use super::plan::{materialize, to_expr}; +use crate::graph::{GraphDecomposition, GraphblasVector, ensure_grb_init}; +use crate::lagraph_sys::*; +use crate::rpq::{RpqError, RpqEvaluator, RpqResult}; +use crate::{grb_ok, la_ok}; + +unsafe impl Send for RPQMatrixPlan {} + +/// RPQ evaluator backed by `LAGraph_RPQMatrix`. +pub struct RpqMatrixEvaluator; + +impl RpqEvaluator for RpqMatrixEvaluator { + fn evaluate( + &self, + subject: &TermPattern, + path: &PropertyPathExpression, + object: &TermPattern, + graph: &G, + ) -> Result { + if !matches!(object, TermPattern::Variable(_)) { + return Err(RpqError::UnsupportedPath( + "bound object term is not yet supported by RpqMatrixEvaluator".into(), + )); + } + + ensure_grb_init().map_err(|e| RpqError::GraphBlas(e.to_string()))?; + + let n = graph.num_nodes() as GrB_Index; + + let expr = to_expr(graph, path)?; + + let mut plans = materialize(&expr, graph)?; + let root_ptr = unsafe { plans.as_mut_ptr().add(plans.len() - 1) }; + + let mut nnz: GrB_Index = 0; + la_ok!(LAGraph_RPQMatrix(&mut nnz, root_ptr)) + .map_err(|e| RpqError::GraphBlas(e.to_string()))?; + + let res_mat = unsafe { (*root_ptr).res_mat }; + + let src = unsafe { + GraphblasVector::new_bool(n).map_err(|e| RpqError::GraphBlas(e.to_string()))? + }; + match subject { + TermPattern::NamedNode(nn) => { + let id = graph + .get_node_id(nn.as_str()) + .ok_or_else(|| RpqError::VertexNotFound(nn.as_str().to_owned()))? + as GrB_Index; + grb_ok!(GrB_Vector_setElement_BOOL(src.inner, true, id)) + .map_err(|e| RpqError::GraphBlas(e.to_string()))?; + } + TermPattern::Variable(_) => { + for i in 0..n { + grb_ok!(GrB_Vector_setElement_BOOL(src.inner, true, i)) + .map_err(|e| RpqError::GraphBlas(e.to_string()))?; + } + } + _ => { + return Err(RpqError::UnsupportedPath( + "subject must be a variable or named node".into(), + )); + } + } + + let result = unsafe { + GraphblasVector::new_bool(n).map_err(|e| RpqError::GraphBlas(e.to_string()))? + }; + grb_ok!(GrB_vxm( + result.inner, + null_mut(), + null_mut(), + GrB_LOR_LAND_SEMIRING_BOOL, + src.inner, + res_mat, + null_mut(), + )) + .map_err(|e| RpqError::GraphBlas(e.to_string()))?; + + grb_ok!(LAGraph_DestroyRpqMatrixPlan(root_ptr)) + .map_err(|e| RpqError::GraphBlas(e.to_string()))?; + + Ok(RpqResult { reachable: result }) + } +} diff --git a/src/rpq/rpqmatrix/mod.rs b/src/rpq/rpqmatrix/mod.rs new file mode 100644 index 0000000..0aaefd0 --- /dev/null +++ b/src/rpq/rpqmatrix/mod.rs @@ -0,0 +1,3 @@ +pub mod eval; +mod optimizer; +mod plan; \ No newline at end of file diff --git a/src/rpq/rpqmatrix/optimizer.rs b/src/rpq/rpqmatrix/optimizer.rs new file mode 100644 index 0000000..b5bc737 --- /dev/null +++ b/src/rpq/rpqmatrix/optimizer.rs @@ -0,0 +1,287 @@ +use std::{cmp::Ordering, fmt::Display, str::FromStr}; +use egg::{Id, define_language}; +use egg::*; + +#[derive(Clone, Hash, Ord, Eq, PartialEq, PartialOrd, Debug)] +pub struct LabelMeta { + pub name: String, + pub nvals: usize, + pub rreduce_nvals: usize, + pub creduce_nvals: usize, +} + +impl FromStr for LabelMeta { + type Err = ::Err; + // This is needed for the builtin egg parser. Only used in tests. + fn from_str(s: &str) -> Result { + Ok(LabelMeta { + name: "-".to_string(), + nvals: s.parse()?, + rreduce_nvals: s.parse()?, + creduce_nvals: s.parse()?, + }) + } +} + +impl Display for LabelMeta { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "({}, {})", self.name, self.nvals) + } +} + +define_language! { +pub(super) enum RpqPlan { + Label(LabelMeta), + "/" = Seq([egg::Id; 2]), + "|" = Alt([egg::Id; 2]), + "*" = Star([egg::Id; 1]), + "l*" = LStar([egg::Id; 2]), + "*r" = RStar([egg::Id; 2]), +} } + +pub fn make_rules() -> Vec> { + vec![ + rewrite!("assoc-sec-1"; "(/ ?a (/ ?b ?c))" => "(/ (/ ?a ?b) ?c)"), + rewrite!("assoc-sec-2"; "(/ (/ ?a ?b) ?c)" => "(/ ?a (/ ?b ?c))"), + rewrite!("commute-alt"; "(| ?a ?b)" => "(| ?b ?a)"), + rewrite!("assoc-alt"; "(| ?a (| ?b ?c))" => "(| (| ?a ?b) ?c)"), + rewrite!("distribute-1"; "(/ ?a (| ?b ?c))" => "(| (/ ?a ?b) (/ ?a ?c))"), + rewrite!("distribute-2"; "(/ (| ?a ?b) ?c)" => "(| (/ ?a ?c) (/ ?b ?c))"), + rewrite!("distribute-3"; "(| (/ ?a ?b) (/ ?a ?c))" => "(/ ?a (| ?b ?c))"), + rewrite!("distribute-4"; "(| (/ ?a ?c) (/ ?b ?c))" => "(/ (| ?a ?b) ?c)"), + rewrite!("build-lstar"; "(/ (* ?a) ?b)" => "(l* ?a ?b)"), + rewrite!("build-rstar"; "(/ ?a (* ?b))" => "(*r ?a ?b)"), + ] +} + +// pub fn make_stupid_rules() -> Vec> { +// vec![ +// rewrite!("assoc-sec-1"; "(/ ?a (/ ?b ?c))" => "(/ (/ ?a ?b) ?c)"), +// rewrite!("assoc-sec-2"; "(/ (/ ?a ?b) ?c)" => "(/ ?a (/ ?b ?c))"), +// rewrite!("commute-alt"; "(| ?a ?b)" => "(| ?b ?a)"), +// rewrite!("assoc-alt"; "(| ?a (| ?b ?c))" => "(| (| ?a ?b) ?c)"), +// ] +// } + +pub struct RandomCostFn; +impl CostFunction for RandomCostFn { + type Cost = f64; + fn cost(&mut self, _enode: &RpqPlan, _costs: C) -> Self::Cost + where + C: FnMut(Id) -> Self::Cost, + { + rand::random() + } +} + +#[derive(Clone, Debug, PartialEq)] +pub struct CardCost { + pub score: f64, + pub nnz: f64, + pub nnz_r: f64, + pub nnz_c: f64, +} + +impl Eq for CardCost {} + +impl PartialOrd for CardCost { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + +impl Ord for CardCost { + fn cmp(&self, other: &Self) -> Ordering { + match self.score.total_cmp(&other.score) { + Ordering::Equal => {} + ord => return ord, + } + match self.nnz.total_cmp(&other.nnz) { + Ordering::Equal => {} + ord => return ord, + } + match self.nnz_r.total_cmp(&other.nnz_r) { + Ordering::Equal => {} + ord => return ord, + } + self.nnz_c.total_cmp(&other.nnz_c) + } +} + +pub struct CardinalityCostFn { + pub n: f64, + pub star_penalty: f64, + pub lr_multiplier: f64, +} + +// TODO: check value intervals +impl CostFunction for CardinalityCostFn { + type Cost = CardCost; + + fn cost(&mut self, enode: &RpqPlan, mut costs: C) -> Self::Cost + where + C: FnMut(Id) -> Self::Cost, + { + match enode { + RpqPlan::Label(meta) => CardCost { + score: 0.0, + nnz: meta.nvals as f64, + nnz_r: meta.rreduce_nvals as f64, + nnz_c: meta.creduce_nvals as f64, + }, + + RpqPlan::Seq([a, b]) => { + let ca = costs(*a); + let cb = costs(*b); + + let denom = ca.nnz_r.max(cb.nnz_c).max(1.0); + let op_cost = (ca.nnz * cb.nnz) / denom; + let score = ca.score + cb.score + op_cost; + + let nnz_est = ca.nnz * cb.nnz / (self.n * self.n); + + CardCost { + score, + nnz: nnz_est, + nnz_r: ca.nnz_r.min(self.n), // TODO: better reduce estimators + nnz_c: cb.nnz_c.min(self.n), // TODO: better reduce estimators + } + } + + RpqPlan::Alt([a, b]) => { + let ca = costs(*a); + let cb = costs(*b); + + let overlap = (ca.nnz * cb.nnz) / (self.n * self.n); + let op_cost = ca.nnz + cb.nnz - overlap; + let score = ca.score + cb.score + op_cost; + + let nnz_est = (ca.nnz + cb.nnz - overlap).min(self.n * self.n).max(0.0); + + let nnz_r_est = (ca.nnz_r + cb.nnz_r - (ca.nnz_r * cb.nnz_r) / self.n) + .min(self.n) + .max(0.0); + + let nnz_c_est = (ca.nnz_c + cb.nnz_c - (ca.nnz_c * cb.nnz_c) / self.n) + .min(self.n) + .max(0.0); + + CardCost { + score, + nnz: nnz_est, + nnz_r: nnz_r_est, + nnz_c: nnz_c_est, + } + } + + RpqPlan::Star([a]) => { + let ca = costs(*a); + + let penalty = self.star_penalty * ca.nnz.max(1.0); + let score = ca.score + penalty; + + CardCost { + score, + nnz: self.n * self.n, + nnz_r: self.n, + nnz_c: self.n, + } + } + + RpqPlan::LStar([a, b]) => { + let ca = costs(*a); + let cb = costs(*b); + + let denom = ca.nnz_r.max(cb.nnz_c).max(1.0); + let base = (ca.nnz * cb.nnz) / denom; + let op_cost = self.lr_multiplier * base; + let score = ca.score + cb.score + op_cost; + + let nnz_est = self.lr_multiplier * ca.nnz * cb.nnz / (self.n * self.n); + + CardCost { + score, + nnz: nnz_est, + nnz_r: ca.nnz_r.min(self.n), // TODO: better reduce estimators + nnz_c: cb.nnz_c.min(self.n), // TODO: better reduce estimators + } + } + + RpqPlan::RStar([a, b]) => { + let ca = costs(*a); + let cb = costs(*b); + + let denom = ca.nnz_r.max(cb.nnz_c).max(1.0); + let base = (ca.nnz * cb.nnz) / denom; + + let op_cost = self.lr_multiplier * base; + let score = ca.score + cb.score + op_cost; + + let nnz_est = self.lr_multiplier * ca.nnz * cb.nnz / (self.n * self.n); + + CardCost { + score, + nnz: nnz_est, + nnz_r: ca.nnz_r.min(self.n), // TODO: better reduce estimators + nnz_c: cb.nnz_c.min(self.n), // TODO: better reduce estimators + } + } + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use expect_test::expect; + + pub struct CostFn; + impl CostFunction for CostFn { + type Cost = f64; + fn cost(&mut self, enode: &RpqPlan, mut costs: C) -> Self::Cost + where + C: FnMut(Id) -> Self::Cost, + { + match enode { + RpqPlan::Label(meta) => meta.nvals as f64, + RpqPlan::Seq(args) => costs(args[0]).min(costs(args[1])).powf(1.1), + RpqPlan::Alt(args) => costs(args[0]).min(costs(args[1])).powf(1.1), + RpqPlan::Star(args) => costs(args[0]).powi(3), + RpqPlan::LStar(args) => costs(args[0]) * costs(args[1]), + RpqPlan::RStar(args) => costs(args[0]) * costs(args[1]), + } + } + } + + fn test_simplify(s: String) -> String { + let expr = s.parse().unwrap(); + let runner = Runner::default().with_expr(&expr).run(&make_rules()); + let cost_func = CostFn; + let extractor = Extractor::new(&runner.egraph, cost_func); + extractor.find_best(runner.roots[0]).1.to_string() + } + + #[test] + fn test_basic_seq_1() { + expect![[r#"(/ "(-, 1)" (/ "(-, 2)" (/ "(-, 3)" "(-, 4)")))"#]] + .assert_eq(test_simplify("(/ (/ (/ 1 2) 3) 4)".to_string()).as_str()); + } + + #[test] + fn test_basic_seq_2() { + expect![[r#"(/ "(-, 4)" (/ "(-, 3)" (/ "(-, 2)" "(-, 1)")))"#]] + .assert_eq(test_simplify("(/ (/ (/ 4 3) 2) 1)".to_string()).as_str()); + } + + #[test] + fn test_basic_alt_1() { + expect![[r#"(| "(-, 2)" (| "(-, 4)" (| "(-, 1)" "(-, 3)")))"#]] + .assert_eq(test_simplify("(| (| (| 1 2) 3) 4)".to_string()).as_str()); + } + + #[test] + fn test_basic_alt_2() { + expect![[r#"(| "(-, 3)" (| "(-, 1)" (| "(-, 4)" "(-, 2)")))"#]] + .assert_eq(test_simplify("(| (| (| 4 3) 2) 1)".to_string()).as_str()); + } +} diff --git a/src/rpq/rpqmatrix/plan.rs b/src/rpq/rpqmatrix/plan.rs new file mode 100644 index 0000000..b133889 --- /dev/null +++ b/src/rpq/rpqmatrix/plan.rs @@ -0,0 +1,146 @@ +use std::ptr::null_mut; + +use egg::{Id, RecExpr}; +use spargebra::algebra::PropertyPathExpression; + +use super::optimizer::RpqPlan; +use crate::graph::{GraphDecomposition,GraphError}; +use crate::lagraph_sys::*; +use crate::rpq::rpqmatrix::optimizer::LabelMeta; +use crate::rpq::{RpqError,}; + +/// Compile a [`PropertyPathExpression`] into [`RecExpr`]. +pub(super) fn to_expr(graph: &G, path: &PropertyPathExpression) -> Result, RpqError> { + let mut expr = RecExpr::default(); + to_expr_aux(path, &mut expr,graph)?; + Ok(expr) +} + +pub(super) fn to_expr_aux( + // 2) в граф добавить каталоги + path: &PropertyPathExpression, + expr: &mut RecExpr, + graph: &G, +) -> Result { + match path { + PropertyPathExpression::NamedNode(nn) => { + let label = nn.clone().into_string(); + let meta = (graph.get_meta(&label)).ok_or_else(|| RpqError::LabelNotFound(label.to_owned()))?; + Ok(expr.add(RpqPlan::Label(LabelMeta{ + name: label, + nvals: graph.num_nodes(), + rreduce_nvals: meta.row_projections, + creduce_nvals: meta.column_projections, + }))) + } + + PropertyPathExpression::Sequence(lhs, rhs) => { + let l = to_expr_aux(lhs, expr, graph)?; + let r = to_expr_aux(rhs, expr, graph)?; + Ok(expr.add(RpqPlan::Seq([l, r]))) + } + + PropertyPathExpression::Alternative(lhs, rhs) => { + let l = to_expr_aux(lhs, expr, graph)?; + let r = to_expr_aux(rhs, expr, graph)?; + Ok(expr.add(RpqPlan::Alt([l, r]))) + } + + PropertyPathExpression::ZeroOrMore(inner) => { + let i = to_expr_aux(inner, expr, graph)?; + Ok(expr.add(RpqPlan::Star([i]))) + } + + PropertyPathExpression::OneOrMore(inner) => { + let e = to_expr_aux(inner, expr, graph)?; + let s = expr.add(RpqPlan::Star([e])); + Ok(expr.add(RpqPlan::Seq([e, s]))) + } + + PropertyPathExpression::ZeroOrOne(_) => Err(RpqError::UnsupportedPath( + "ZeroOrOne (?) is not supported by RPQMatrix".into(), + )), + + PropertyPathExpression::Reverse(_) => Err(RpqError::UnsupportedPath( + "Reverse paths are not supported".into(), + )), + + PropertyPathExpression::NegatedPropertySet(_) => Err(RpqError::UnsupportedPath( + "NegatedPropertySet paths are not supported".into(), + )), + } +} + +/// Convert a [`RecExpr`] into the flat [`RPQMatrixPlan`] array that +/// `LAGraph_RPQMatrix` expects. +pub fn materialize( + expr: &RecExpr, + graph: &G, +) -> Result, RpqError> { + let null_plan = RPQMatrixPlan { + op: RPQMatrixOp::RPQ_MATRIX_OP_LABEL, + lhs: null_mut(), + rhs: null_mut(), + mat: null_mut(), + res_mat: null_mut(), + }; + let mut plans = vec![null_plan; expr.len()]; + + for (id, node) in expr.as_ref().iter().enumerate() { + plans[id] = match node { + RpqPlan::Label(label) => { + let lg = graph + .get_graph(&label.name) + .map_err(|_| RpqError::LabelNotFound(label.name.clone()))?; + let mat = unsafe { (*lg.inner).A }; + RPQMatrixPlan { + op: RPQMatrixOp::RPQ_MATRIX_OP_LABEL, + lhs: null_mut(), + rhs: null_mut(), + mat, + res_mat: null_mut(), + } + } + + RpqPlan::Seq([l, r]) => RPQMatrixPlan { + op: RPQMatrixOp::RPQ_MATRIX_OP_CONCAT, + lhs: unsafe { plans.as_mut_ptr().add(usize::from(*l)) }, + rhs: unsafe { plans.as_mut_ptr().add(usize::from(*r)) }, + mat: null_mut(), + res_mat: null_mut(), + }, + + RpqPlan::Alt([l, r]) => RPQMatrixPlan { + op: RPQMatrixOp::RPQ_MATRIX_OP_LOR, + lhs: unsafe { plans.as_mut_ptr().add(usize::from(*l)) }, + rhs: unsafe { plans.as_mut_ptr().add(usize::from(*r)) }, + mat: null_mut(), + res_mat: null_mut(), + }, + + RpqPlan::Star([i]) => RPQMatrixPlan { + op: RPQMatrixOp::RPQ_MATRIX_OP_KLEENE, + lhs: null_mut(), + rhs: unsafe { plans.as_mut_ptr().add(usize::from(*i)) }, + mat: null_mut(), + res_mat: null_mut(), + }, + RpqPlan::RStar([l, r]) => RPQMatrixPlan { + op: RPQMatrixOp::RPQ_MATRIX_OP_KLEENE_R, + lhs: unsafe { plans.as_mut_ptr().add(usize::from(*l)) }, + rhs: unsafe { plans.as_mut_ptr().add(usize::from(*r)) }, + mat: null_mut(), + res_mat: null_mut(), + }, + RpqPlan::LStar([l, r]) => RPQMatrixPlan { + op: RPQMatrixOp::RPQ_MATRIX_OP_KLEENE_L, + lhs: unsafe { plans.as_mut_ptr().add(usize::from(*l)) }, + rhs: unsafe { plans.as_mut_ptr().add(usize::from(*r)) }, + mat: null_mut(), + res_mat: null_mut(), + }, + }; + } + + Ok(plans) +} diff --git a/src/utils.rs b/src/utils.rs index 92846ca..6dc84e5 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -48,6 +48,10 @@ impl GraphDecomposition for CountOutput { fn num_nodes(&self) -> usize { self.0 } + + fn get_meta(&self, _label: &str) -> Option<&GraphMetadata> { + None + } } /// A minimal [`GraphBuilder`] that counts pushed edges and produces a [`CountOutput`]. diff --git a/tests/rpqmatrix_tests.rs b/tests/rpqmatrix_tests.rs index ab4f766..45eb6af 100644 --- a/tests/rpqmatrix_tests.rs +++ b/tests/rpqmatrix_tests.rs @@ -1,6 +1,6 @@ use pathrex::graph::GraphDecomposition; use pathrex::lagraph_sys::{GrB_Index, GrB_Vector_extractTuples_BOOL, GrB_Vector_nvals}; -use pathrex::rpq::rpqmatrix::RpqMatrixEvaluator; +use pathrex::rpq::rpqmatrix::eval::RpqMatrixEvaluator; use pathrex::rpq::{RpqError, RpqEvaluator, RpqResult}; use pathrex::utils::build_graph; use spargebra::algebra::PropertyPathExpression;