From 925a2b0c201f341acd1c8ccffb9d45975ee415b8 Mon Sep 17 00:00:00 2001 From: Ivan Glazunov Date: Fri, 20 Mar 2026 13:32:33 +0300 Subject: [PATCH 1/2] feat: add parser of simplified sparql --- AGENTS.md | 82 +++++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 59 insertions(+), 23 deletions(-) diff --git a/AGENTS.md b/AGENTS.md index b6088f6..4b15576 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -14,7 +14,7 @@ pathrex/ ├── Cargo.toml # Crate manifest (edition 2024) ├── build.rs # Links LAGraph + LAGraphX; optionally regenerates FFI bindings ├── src/ -│ ├── lib.rs # Public modules: formats, graph, sparql, lagraph_sys, utils +│ ├── lib.rs # Modules: formats, graph, sparql, utils (pub(crate)), lagraph_sys │ ├── main.rs # Binary entry point (placeholder) │ ├── lagraph_sys.rs # FFI module — includes generated bindings │ ├── lagraph_sys_generated.rs# Bindgen output (checked in, regenerated in CI) @@ -121,15 +121,15 @@ regenerates it with `--features regenerate-bindings`. **Do not hand-edit this fi ### Edge -[`Edge`](src/graph/mod.rs:154) is the universal currency between format parsers and graph +[`Edge`](src/graph/mod.rs:158) is the universal currency between format parsers and graph builders: `{ source: String, target: String, label: String }`. ### GraphSource trait -[`GraphSource`](src/graph/mod.rs:164) is implemented by any data source that knows how to +[`GraphSource`](src/graph/mod.rs:168) is implemented by any data source that knows how to feed itself into a specific [`GraphBuilder`]: -- [`apply_to(self, builder: B) -> Result`](src/graph/mod.rs:165) — consumes the +- [`apply_to(self, builder: B) -> Result`](src/graph/mod.rs:169) — consumes the source and returns the populated builder. [`Csv`](src/formats/csv.rs:52) implements `GraphSource` directly, so it @@ -137,24 +137,24 @@ can be passed to [`GraphBuilder::load`]. ### GraphBuilder trait -[`GraphBuilder`](src/graph/mod.rs:169) accumulates edges and produces a -[`GraphDecomposition`](src/graph/mod.rs:188): +[`GraphBuilder`](src/graph/mod.rs:173) accumulates edges and produces a +[`GraphDecomposition`](src/graph/mod.rs:192): -- [`load>(self, source: S)`](src/graph/mod.rs:179) — primary entry point; +- [`load>(self, source: S)`](src/graph/mod.rs:183) — primary entry point; delegates to `GraphSource::apply_to`. -- [`build(self)`](src/graph/mod.rs:184) — finalise into an immutable graph. +- [`build(self)`](src/graph/mod.rs:188) — finalise into an immutable graph. `InMemoryBuilder` also exposes lower-level helpers outside the trait: -- [`push_edge(&mut self, edge: Edge)`](src/graph/inmemory.rs:62) — ingest one edge. -- [`with_stream(self, stream: I)`](src/graph/inmemory.rs:72) — consume an +- [`push_edge(&mut self, edge: Edge)`](src/graph/inmemory.rs:83) — ingest one edge. +- [`with_stream(self, stream: I)`](src/graph/inmemory.rs:93) — consume an `IntoIterator>`. -- [`push_grb_matrix(&mut self, label, matrix: GrB_Matrix)`](src/graph/inmemory.rs:85) — accept +- [`push_grb_matrix(&mut self, label, matrix: GrB_Matrix)`](src/graph/inmemory.rs:106) — accept a pre-built `GrB_Matrix` for a label, wrapping it in an `LAGraph_Graph` immediately. ### Backend trait & Graph\ handle -[`Backend`](src/graph/mod.rs:217) associates a marker type with a concrete builder/graph pair: +[`Backend`](src/graph/mod.rs:221) associates a marker type with a concrete builder/graph pair: ```rust pub trait Backend { @@ -163,28 +163,28 @@ pub trait Backend { } ``` -[`Graph`](src/graph/mod.rs:229) is a zero-sized handle parameterised by a `Backend`: +[`Graph`](src/graph/mod.rs:233) is a zero-sized handle parameterised by a `Backend`: -- [`Graph::::builder()`](src/graph/mod.rs:234) — returns a fresh `InMemoryBuilder`. -- [`Graph::::try_from(source)`](src/graph/mod.rs:238) — builds a graph from a single +- [`Graph::::builder()`](src/graph/mod.rs:238) — returns a fresh `InMemoryBuilder`. +- [`Graph::::try_from(source)`](src/graph/mod.rs:242) — builds a graph from a single source in one call. -[`InMemory`](src/graph/inmemory.rs:26) is the concrete backend marker type. +[`InMemory`](src/graph/inmemory.rs:27) is the concrete backend marker type. ### GraphDecomposition trait -[`GraphDecomposition`](src/graph/mod.rs:188) is the read-only query interface: +[`GraphDecomposition`](src/graph/mod.rs:192) is the read-only query interface: -- [`get_graph(label)`](src/graph/mod.rs:192) — returns `Arc` for a given edge label. -- [`get_node_id(string_id)`](src/graph/mod.rs:195) / [`get_node_name(mapped_id)`](src/graph/mod.rs:198) — bidirectional string ↔ integer dictionary. -- [`num_nodes()`](src/graph/mod.rs:199) — total unique nodes. +- [`get_graph(label)`](src/graph/mod.rs:196) — returns `Arc` for a given edge label. +- [`get_node_id(string_id)`](src/graph/mod.rs:199) / [`get_node_name(mapped_id)`](src/graph/mod.rs:202) — bidirectional string ↔ integer dictionary. +- [`num_nodes()`](src/graph/mod.rs:203) — total unique nodes. ### InMemoryBuilder / InMemoryGraph -[`InMemoryBuilder`](src/graph/inmemory.rs:35) is the primary `GraphBuilder` implementation. +[`InMemoryBuilder`](src/graph/inmemory.rs:36) is the primary `GraphBuilder` implementation. It collects edges in RAM, then [`build()`](src/graph/inmemory.rs:131) calls GraphBLAS to create one `GrB_Matrix` per label via COO format, wraps each in an -`LAGraph_Graph`, and returns an [`InMemoryGraph`](src/graph/inmemory.rs:173). +`LAGraph_Graph`, and returns an [`InMemoryGraph`](src/graph/inmemory.rs:174). Multiple CSV sources can be chained with repeated `.load()` calls; all edges are merged into a single graph. @@ -196,7 +196,7 @@ which is used by the MatrixMarket loader. ### Format parsers -Two built-in parsers are available: +CSV and MatrixMarket edge loaders are available: #### CSV format @@ -274,6 +274,42 @@ and the parsed query contains full IRIs sharing a common prefix. The module handles spargebra's desugaring of sequence paths (`?x // ?y`) from a chain of BGP triples back into a single path expression. +### SPARQL parsing (`src/sparql/mod.rs`) + +The [`sparql`](src/sparql/mod.rs) module uses the [`spargebra`](https://crates.io/crates/spargebra) +crate to parse SPARQL 1.1 query strings and extract the single property-path +triple pattern that pathrex's RPQ evaluators operate on. + +**Supported query form:** `SELECT` queries with exactly one triple or property +path pattern in the `WHERE` clause, e.g.: + +```sparql +SELECT ?x ?y WHERE { ?x /* ?y . } +``` + +Key public items: + +- [`parse_query(sparql)`](src/sparql/mod.rs:45) — parses a SPARQL string into a + [`spargebra::Query`]. +- [`extract_path(query)`](src/sparql/mod.rs:67) — validates a parsed `Query` is a + `SELECT` with a single path pattern and returns a [`PathTriple`](src/sparql/mod.rs:56). +- [`parse_rpq(sparql)`](src/sparql/mod.rs:190) — convenience function combining + `parse_query` + `extract_path` in one call. +- [`PathTriple`](src/sparql/mod.rs:56) — holds the extracted `subject` + ([`TermPattern`]), `path` ([`PropertyPathExpression`]), and `object` + ([`TermPattern`]). +- [`ExtractError`](src/sparql/mod.rs:25) — error enum for extraction failures + (`NotSelect`, `NotSinglePath`, `UnsupportedSubject`, `UnsupportedObject`, + `VariablePredicate`). +- [`RpqParseError`](src/sparql/mod.rs:198) — combined error for [`parse_rpq`] + wrapping both [`SparqlSyntaxError`] and [`ExtractError`]. +- [`DEFAULT_BASE_IRI`](src/sparql/mod.rs:38) — `"http://example.org/"`, the + default base IRI constant. + +The module also handles spargebra's desugaring of sequence paths +(`?x // ?y`) from a chain of BGP triples back into a single +[`PropertyPathExpression::Sequence`]. + ### FFI layer [`lagraph_sys`](src/lagraph_sys.rs) exposes raw C bindings for GraphBLAS and From 86d86f8df9588a10c0f0f6a13c97ec061f62ebff Mon Sep 17 00:00:00 2001 From: Ivan Glazunov Date: Fri, 20 Mar 2026 13:49:25 +0300 Subject: [PATCH 2/2] feat: add rpq evalution with LAGraph_RegularPathQuerry --- AGENTS.md | 132 ++++++++----- Cargo.toml | 1 + build.rs | 2 + src/graph/mod.rs | 1 + src/lagraph_sys_generated.rs | 45 +++++ src/rpq/mod.rs | 2 + src/rpq/nfarpq.rs | 345 +++++++++++++++++++++++++++++++++ tests/nfarpq_tests.rs | 361 +++++++++++++++++++++++++++++++++++ 8 files changed, 842 insertions(+), 47 deletions(-) create mode 100644 src/rpq/nfarpq.rs create mode 100644 tests/nfarpq_tests.rs diff --git a/AGENTS.md b/AGENTS.md index 4b15576..bc9c0b5 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -14,16 +14,20 @@ pathrex/ ├── Cargo.toml # Crate manifest (edition 2024) ├── build.rs # Links LAGraph + LAGraphX; optionally regenerates FFI bindings ├── src/ -│ ├── lib.rs # Modules: formats, graph, sparql, utils (pub(crate)), lagraph_sys +│ ├── lib.rs # Modules: formats, graph, rpq, sparql, utils, lagraph_sys │ ├── main.rs # Binary entry point (placeholder) │ ├── lagraph_sys.rs # FFI module — includes generated bindings │ ├── lagraph_sys_generated.rs# Bindgen output (checked in, regenerated in CI) -│ ├── utils.rs # Internal helpers: CountingBuilder, CountOutput, VecSource, -│ │ # grb_ok! and la_ok! macros +│ ├── utils.rs # Public helpers: CountingBuilder, CountOutput, VecSource, +│ │ # grb_ok! and la_ok! macros, build_graph │ ├── graph/ │ │ ├── mod.rs # Core traits (GraphBuilder, GraphDecomposition, GraphSource, │ │ │ # Backend, Graph), error types, RAII wrappers, GrB init │ │ └── inmemory.rs # InMemory marker, InMemoryBuilder, InMemoryGraph +│ ├── rpq/ +│ │ ├── mod.rs # RpqEvaluator (assoc. Result), RpqQuery, Endpoint, PathExpr, RpqError +│ │ ├── nfarpq.rs # NfaRpqEvaluator (LAGraph_RegularPathQuery) +│ │ └── rpqmatrix.rs # Matrix-plan RPQ evaluator │ ├── sparql/ │ │ └── mod.rs # parse_rpq / extract_rpq → RpqQuery (spargebra) │ └── formats/ @@ -32,7 +36,9 @@ pathrex/ │ └── mm.rs # MatrixMarket directory loader (vertices.txt, edges.txt, *.txt) ├── tests/ │ ├── inmemory_tests.rs # Integration tests for InMemoryBuilder / InMemoryGraph -│ └── mm_tests.rs # Integration tests for MatrixMarket format +│ ├── mm_tests.rs # Integration tests for MatrixMarket format +│ ├── nfarpq_tests.rs # Integration tests for NfaRpqEvaluator +│ └── rpqmatrix_tests.rs # Integration tests for matrix-plan RPQ evaluator ├── deps/ │ └── LAGraph/ # Git submodule (SparseLinearAlgebra/LAGraph) └── .github/workflows/ci.yml # CI: build GraphBLAS + LAGraph, cargo build & test @@ -138,7 +144,7 @@ can be passed to [`GraphBuilder::load`]. ### GraphBuilder trait [`GraphBuilder`](src/graph/mod.rs:173) accumulates edges and produces a -[`GraphDecomposition`](src/graph/mod.rs:192): +[`GraphDecomposition`](src/graph/mod.rs:193): - [`load>(self, source: S)`](src/graph/mod.rs:183) — primary entry point; delegates to `GraphSource::apply_to`. @@ -173,11 +179,11 @@ pub trait Backend { ### GraphDecomposition trait -[`GraphDecomposition`](src/graph/mod.rs:192) is the read-only query interface: +[`GraphDecomposition`](src/graph/mod.rs:193) is the read-only query interface: -- [`get_graph(label)`](src/graph/mod.rs:196) — returns `Arc` for a given edge label. -- [`get_node_id(string_id)`](src/graph/mod.rs:199) / [`get_node_name(mapped_id)`](src/graph/mod.rs:202) — bidirectional string ↔ integer dictionary. -- [`num_nodes()`](src/graph/mod.rs:203) — total unique nodes. +- [`get_graph(label)`](src/graph/mod.rs:197) — returns `Arc` for a given edge label. +- [`get_node_id(string_id)`](src/graph/mod.rs:200) / [`get_node_name(mapped_id)`](src/graph/mod.rs:203) — bidirectional string ↔ integer dictionary. +- [`num_nodes()`](src/graph/mod.rs:204) — total unique nodes. ### InMemoryBuilder / InMemoryGraph @@ -220,7 +226,7 @@ Name-based lookup requires `has_header: true`. [`MatrixMarket`](src/formats/mm.rs) loads an edge-labeled graph from a directory with: -- `vertices.txt` — one line per node: ` <1-based-index>` on disk; [`get_node_id`](src/graph/mod.rs:199) returns the matching **0-based** matrix index +- `vertices.txt` — one line per node: ` <1-based-index>` on disk; [`get_node_id`](src/graph/mod.rs:200) returns the matching **0-based** matrix index - `edges.txt` — one line per label: ` <1-based-index>` (selects `n.txt`) - `.txt` — MatrixMarket adjacency matrix for label with index `n` @@ -274,41 +280,63 @@ and the parsed query contains full IRIs sharing a common prefix. The module handles spargebra's desugaring of sequence paths (`?x // ?y`) from a chain of BGP triples back into a single path expression. -### SPARQL parsing (`src/sparql/mod.rs`) - -The [`sparql`](src/sparql/mod.rs) module uses the [`spargebra`](https://crates.io/crates/spargebra) -crate to parse SPARQL 1.1 query strings and extract the single property-path -triple pattern that pathrex's RPQ evaluators operate on. +### RPQ evaluation (`src/rpq/`) -**Supported query form:** `SELECT` queries with exactly one triple or property -path pattern in the `WHERE` clause, e.g.: - -```sparql -SELECT ?x ?y WHERE { ?x /* ?y . } -``` +The [`rpq`](src/rpq/mod.rs) module provides an abstraction for evaluating +Regular Path Queries (RPQs) over edge-labeled graphs using GraphBLAS/LAGraph. Key public items: -- [`parse_query(sparql)`](src/sparql/mod.rs:45) — parses a SPARQL string into a - [`spargebra::Query`]. -- [`extract_path(query)`](src/sparql/mod.rs:67) — validates a parsed `Query` is a - `SELECT` with a single path pattern and returns a [`PathTriple`](src/sparql/mod.rs:56). -- [`parse_rpq(sparql)`](src/sparql/mod.rs:190) — convenience function combining - `parse_query` + `extract_path` in one call. -- [`PathTriple`](src/sparql/mod.rs:56) — holds the extracted `subject` - ([`TermPattern`]), `path` ([`PropertyPathExpression`]), and `object` - ([`TermPattern`]). -- [`ExtractError`](src/sparql/mod.rs:25) — error enum for extraction failures - (`NotSelect`, `NotSinglePath`, `UnsupportedSubject`, `UnsupportedObject`, - `VariablePredicate`). -- [`RpqParseError`](src/sparql/mod.rs:198) — combined error for [`parse_rpq`] - wrapping both [`SparqlSyntaxError`] and [`ExtractError`]. -- [`DEFAULT_BASE_IRI`](src/sparql/mod.rs:38) — `"http://example.org/"`, the - default base IRI constant. - -The module also handles spargebra's desugaring of sequence paths -(`?x // ?y`) from a chain of BGP triples back into a single -[`PropertyPathExpression::Sequence`]. +- [`Endpoint`](src/rpq/mod.rs) — `Variable(String)` or `Named(String)` (IRI string). +- [`PathExpr`](src/rpq/mod.rs) — `Label`, `Sequence`, `Alternative`, `ZeroOrMore`, + `OneOrMore`, `ZeroOrOne`. +- [`RpqQuery`](src/rpq/mod.rs) — `{ subject, path, object }` using the types above; + [`strip_base(&mut self, base)`](src/rpq/mod.rs) removes a shared IRI prefix from + named endpoints and labels. +- [`RpqEvaluator`](src/rpq/mod.rs) — trait with associated type `Result` and + [`evaluate(query, graph)`](src/rpq/mod.rs) taking `&RpqQuery` and + [`GraphDecomposition`], returning `Result`. + Each concrete evaluator exposes its own output type (see below). +- [`RpqError`](src/rpq/mod.rs) — unified error type for RPQ parsing and evaluation: + `Parse` (SPARQL syntax), `Extract` (query extraction), `UnsupportedPath`, + `VertexNotFound`, and `Graph` (wraps [`GraphError`](src/graph/mod.rs) for + label-not-found and GraphBLAS/LAGraph failures). + +#### `NfaRpqEvaluator` (`src/rpq/nfarpq.rs`) + +[`NfaRpqEvaluator`](src/rpq/nfarpq.rs) implements [`RpqEvaluator`] by: + +1. Converting a [`PathExpr`] into an [`Nfa`](src/rpq/nfarpq.rs) via Thompson's + construction ([`Nfa::from_path_expr()`](src/rpq/nfarpq.rs)). +2. Eliminating ε-transitions via epsilon closure ([`NfaBuilder::epsilon_closure()`](src/rpq/nfarpq.rs)). +3. Building one `LAGraph_Graph` per NFA label transition + ([`Nfa::build_lagraph_matrices()`](src/rpq/nfarpq.rs)). +4. Calling [`LAGraph_RegularPathQuery`] with the NFA matrices, data-graph + matrices, start/final states, and source vertices. + +`type Result = NfaRpqResult` ([`GraphblasVector`] of reachable targets). + +Supported path operators match [`PathExpr`] variants above. `Reverse` and +`NegatedPropertySet` from SPARQL map to [`RpqError::UnsupportedPath`] when they +appear in extracted paths. + +Subject/object resolution: [`Endpoint::Variable`] means "all vertices"; +[`Endpoint::Named`] resolves to a single vertex via +[`GraphDecomposition::get_node_id()`](src/graph/mod.rs:200). + +[`NfaRpqResult`](src/rpq/nfarpq.rs) wraps a [`GraphblasVector`] of reachable **target** +vertices. When the subject is a variable, every vertex is used as a source and +`LAGraph_RegularPathQuery` returns the union of targets — individual `(source, target)` +pairs are not preserved. + +#### `RpqMatrixEvaluator` (`src/rpq/rpqmatrix.rs`) + +[`RpqMatrixEvaluator`](src/rpq/rpqmatrix.rs) compiles [`PathExpr`] into a Boolean matrix plan +over label adjacency matrices and runs [`LAGraph_RPQMatrix`]. It returns +[`RpqMatrixResult`](src/rpq/rpqmatrix.rs): the path-relation `nnz` plus a +[`GraphblasMatrix`] duplicate of the result matrix (full reachability relation for the path). +Subject/object do not filter the matrix; a named subject is only validated to exist. +Bound objects are not supported yet ([`RpqError::UnsupportedPath`]). ### FFI layer @@ -323,7 +351,7 @@ LAGraph. Safe Rust wrappers live in [`graph::mod`](src/graph/mod.rs): - [`GraphblasMatrix`](src/graph/mod.rs) — RAII wrapper around `GrB_Matrix` (`dup` + `free` on drop). - [`ensure_grb_init()`](src/graph/mod.rs:39) — one-time `LAGraph_Init` via `std::sync::Once`. -### Macros (`src/utils.rs`) +### Macros & helpers (`src/utils.rs`) Two `#[macro_export]` macros handle FFI error mapping: @@ -333,6 +361,12 @@ Two `#[macro_export]` macros handle FFI error mapping: appending the required `*mut i8` message buffer, and maps failure to `GraphError::LAGraph(info, msg)`. +A convenience function is also provided: + +- [`build_graph(edges)`](src/utils.rs:184) — builds an `InMemoryGraph` from a + slice of `(&str, &str, &str)` triples (source, target, label). Used by + integration tests. + ## Coding Conventions - **Rust edition 2024**. @@ -344,14 +378,13 @@ Two `#[macro_export]` macros handle FFI error mapping: - `GraphError` converts into `RpqError` via `#[from] GraphError` on the `RpqError::Graph` variant, enabling `?` propagation in evaluators. - Unsafe FFI calls are confined to `lagraph_sys`, `graph/mod.rs`, - `graph/inmemory.rs`, `rpq/nfarpq.rs`, and `rpq/rpqmatrix.rs`. All raw pointers are wrapped in + `graph/inmemory.rs`, `rpq/nfarpq.rs`. All raw pointers are wrapped in RAII types that free resources on drop. - `unsafe impl Send + Sync` is provided for `LagraphGraph`, `GraphblasVector`, and `GraphblasMatrix` because GraphBLAS handles are thread-safe after init. - Unit tests live in `#[cfg(test)] mod tests` blocks inside each module. Integration tests that need GraphBLAS live in [`tests/inmemory_tests.rs`](tests/inmemory_tests.rs), - [`tests/mm_tests.rs`](tests/mm_tests.rs), [`tests/nfarpq_tests.rs`](tests/nfarpq_tests.rs), - and [`tests/rpqmatrix_tests.rs`](tests/rpqmatrix_tests.rs). + [`tests/mm_tests.rs`](tests/mm_tests.rs), [`tests/nfarpq_tests.rs`](tests/nfarpq_tests.rs). ## Testing @@ -371,8 +404,13 @@ Tests in `src/formats/csv.rs` are pure Rust and need no native dependencies. Tests in `src/sparql/mod.rs` are pure Rust and need no native dependencies. -Tests in `src/graph/inmemory.rs` and [`tests/inmemory_tests.rs`](tests/inmemory_tests.rs) -call real GraphBLAS/LAGraph and require the native libraries to be present. +Tests in `src/rpq/nfarpq.rs` (NFA construction unit tests) are pure Rust and need no +native dependencies. + +Tests in `src/graph/inmemory.rs`, [`tests/inmemory_tests.rs`](tests/inmemory_tests.rs), +[`tests/mm_tests.rs`](tests/mm_tests.rs), [`tests/nfarpq_tests.rs`](tests/nfarpq_tests.rs), +and [`tests/rpqmatrix_tests.rs`](tests/rpqmatrix_tests.rs) call real GraphBLAS/LAGraph and +require the native libraries to be present. ## CI diff --git a/Cargo.toml b/Cargo.toml index 2420d28..c55aa3c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -8,6 +8,7 @@ csv = "1.4.0" libc = "0.2" oxrdf = "0.3.3" oxttl = "0.2.3" +rustfst = "1.2" spargebra = "0.4.6" thiserror = "1.0" diff --git a/build.rs b/build.rs index 475046e..bde7677 100644 --- a/build.rs +++ b/build.rs @@ -66,6 +66,7 @@ fn regenerate_bindings() { .allowlist_function("GrB_Matrix_new") .allowlist_function("GrB_Matrix_nvals") .allowlist_function("GrB_Matrix_free") + .allowlist_function("GrB_Matrix_extractElement_BOOL") .allowlist_function("GrB_Matrix_build_BOOL") .allowlist_function("GrB_Vector_new") .allowlist_function("GrB_Vector_free") @@ -83,6 +84,7 @@ fn regenerate_bindings() { .allowlist_function("LAGraph_Delete") .allowlist_function("LAGraph_Cached_AT") .allowlist_function("LAGraph_MMRead") + .allowlist_function("LAGraph_RegularPathQuery") .default_enum_style(bindgen::EnumVariation::Rust { non_exhaustive: false, }) diff --git a/src/graph/mod.rs b/src/graph/mod.rs index fda7928..514cbb0 100644 --- a/src/graph/mod.rs +++ b/src/graph/mod.rs @@ -125,6 +125,7 @@ impl Drop for LagraphGraph { unsafe impl Send for LagraphGraph {} unsafe impl Sync for LagraphGraph {} +#[derive(Debug)] pub struct GraphblasVector { pub inner: GrB_Vector, } diff --git a/src/lagraph_sys_generated.rs b/src/lagraph_sys_generated.rs index 3201d28..690c1ff 100644 --- a/src/lagraph_sys_generated.rs +++ b/src/lagraph_sys_generated.rs @@ -155,6 +155,9 @@ unsafe extern "C" { ncols: GrB_Index, ) -> GrB_Info; } +unsafe extern "C" { + pub fn GrB_Matrix_dup(C: *mut GrB_Matrix, A: GrB_Matrix) -> GrB_Info; +} unsafe extern "C" { pub fn GrB_Matrix_nvals(nvals: *mut GrB_Index, A: GrB_Matrix) -> GrB_Info; } @@ -168,6 +171,14 @@ unsafe extern "C" { dup: GrB_BinaryOp, ) -> GrB_Info; } +unsafe extern "C" { + pub fn GrB_Matrix_extractElement_BOOL( + x: *mut bool, + A: GrB_Matrix, + i: GrB_Index, + j: GrB_Index, + ) -> GrB_Info; +} unsafe extern "C" { pub fn GrB_vxm( w: GrB_Vector, @@ -261,3 +272,37 @@ unsafe extern "C" { msg: *mut ::std::os::raw::c_char, ) -> ::std::os::raw::c_int; } +unsafe extern "C" { + pub fn LAGraph_RegularPathQuery( + reachable: *mut GrB_Vector, + R: *mut LAGraph_Graph, + nl: usize, + QS: *const GrB_Index, + nqs: usize, + QF: *const GrB_Index, + nqf: usize, + G: *mut LAGraph_Graph, + S: *const GrB_Index, + ns: usize, + msg: *mut ::std::os::raw::c_char, + ) -> ::std::os::raw::c_int; +} +#[repr(u32)] +#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)] +pub enum RPQMatrixOp { + RPQ_MATRIX_OP_LABEL = 0, + RPQ_MATRIX_OP_LOR = 1, + RPQ_MATRIX_OP_CONCAT = 2, + RPQ_MATRIX_OP_KLEENE = 3, + RPQ_MATRIX_OP_KLEENE_L = 4, + RPQ_MATRIX_OP_KLEENE_R = 5, +} +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct RPQMatrixPlan { + pub op: RPQMatrixOp, + pub lhs: *mut RPQMatrixPlan, + pub rhs: *mut RPQMatrixPlan, + pub mat: GrB_Matrix, + pub res_mat: GrB_Matrix, +} diff --git a/src/rpq/mod.rs b/src/rpq/mod.rs index 6a8a546..bbf69f5 100644 --- a/src/rpq/mod.rs +++ b/src/rpq/mod.rs @@ -10,6 +10,8 @@ //! let result: NfaRpqResult = NfaRpqEvaluator.evaluate(&query, &graph)?; //! ``` +pub mod nfarpq; + use crate::graph::{GraphDecomposition, GraphError}; use crate::sparql::ExtractError; use spargebra::SparqlSyntaxError; diff --git a/src/rpq/nfarpq.rs b/src/rpq/nfarpq.rs new file mode 100644 index 0000000..61f9951 --- /dev/null +++ b/src/rpq/nfarpq.rs @@ -0,0 +1,345 @@ +//! NFA-based RPQ evaluation using `LAGraph_RegularPathQuery`. + +use crate::graph::{GraphDecomposition, GraphblasVector, LagraphGraph, ensure_grb_init}; +use crate::grb_ok; +use crate::la_ok; +use crate::lagraph_sys::*; +use crate::lagraph_sys::{GrB_BOOL, GrB_LOR, GrB_Matrix_build_BOOL, GrB_Matrix_new, LAGraph_Kind}; +use crate::rpq::{Endpoint, PathExpr, RpqError, RpqEvaluator, RpqQuery}; +use rustfst::algorithms::closure::{ClosureType, closure}; +use rustfst::algorithms::concat::concat; +use rustfst::algorithms::rm_epsilon::rm_epsilon; +use rustfst::algorithms::union::union; +use rustfst::prelude::*; +use rustfst::semirings::TropicalWeight; +use rustfst::utils::{acceptor, epsilon_machine}; +use std::collections::HashMap; + +/// Transitions for a single edge label in the NFA. +/// +/// `rows[i]` and `cols[i]` form a parallel pair: there is a transition from +/// state `rows[i]` to state `cols[i]` on `label`. +#[derive(Debug, Clone)] +pub struct NfaLabelTransitions { + pub label: String, + pub rows: Vec, + pub cols: Vec, +} + +#[derive(Debug, Clone)] +pub struct Nfa { + pub num_states: usize, + pub start_states: Vec, + pub final_states: Vec, + pub transitions: Vec, +} + +struct SymbolTable { + label_to_id: HashMap, + id_to_label: HashMap, + next_id: Label, +} + +impl SymbolTable { + fn new() -> Self { + Self { + label_to_id: HashMap::new(), + id_to_label: HashMap::new(), + next_id: 1, + } + } + + fn get_or_insert(&mut self, label: &str) -> Label { + if let Some(&id) = self.label_to_id.get(label) { + id + } else { + let id = self.next_id; + self.next_id += 1; + self.label_to_id.insert(label.to_string(), id); + self.id_to_label.insert(id, label.to_string()); + id + } + } + + fn get_label(&self, id: Label) -> Option<&str> { + self.id_to_label.get(&id).map(|s| s.as_str()) + } +} + +fn map_fst_error(operation: &'static str, e: E) -> RpqError { + RpqError::UnsupportedPath(format!("{} failed: {}", operation, e)) +} + +impl Nfa { + /// Build an NFA from a path expression. + pub fn from_path_expr(path: &PathExpr) -> Result { + let mut symbols = SymbolTable::new(); + + let mut fst = build_fst(path, &mut symbols)?; + + rm_epsilon(&mut fst).map_err(|e| map_fst_error("rm_epsilon", e))?; + + extract_nfa(&fst, &symbols) + } + + /// Convert NFA transitions to LAGraph matrices for RPQ evaluation. + pub fn build_lagraph_matrices(&self) -> Result, RpqError> { + ensure_grb_init()?; + let n = self.num_states as GrB_Index; + let mut result = Vec::with_capacity(self.transitions.len()); + + for trans in &self.transitions { + let mut mat: GrB_Matrix = std::ptr::null_mut(); + grb_ok!(GrB_Matrix_new(&mut mat, GrB_BOOL, n, n))?; + + if !trans.rows.is_empty() { + let vals: Vec = vec![true; trans.rows.len()]; + grb_ok!(GrB_Matrix_build_BOOL( + mat, + trans.rows.as_ptr(), + trans.cols.as_ptr(), + vals.as_ptr(), + trans.rows.len() as u64, + GrB_LOR, + ))?; + } + + let lg = LagraphGraph::new(mat, LAGraph_Kind::LAGraph_ADJACENCY_DIRECTED)?; + result.push((trans.label.clone(), lg)); + } + + Ok(result) + } +} + +/// Build a VectorFst from a PathExpr using Thompson-like construction. +fn build_fst( + path: &PathExpr, + symbols: &mut SymbolTable, +) -> Result, RpqError> { + match path { + PathExpr::Label(label) => { + let label_id = symbols.get_or_insert(label); + Ok(acceptor(&[label_id], TropicalWeight::one())) + } + + PathExpr::Sequence(lhs, rhs) => { + let mut fst_l = build_fst(lhs, symbols)?; + let fst_r = build_fst(rhs, symbols)?; + concat(&mut fst_l, &fst_r).map_err(|e| map_fst_error("concat", e))?; + Ok(fst_l) + } + + PathExpr::Alternative(lhs, rhs) => { + let mut fst_l = build_fst(lhs, symbols)?; + let fst_r = build_fst(rhs, symbols)?; + union(&mut fst_l, &fst_r).map_err(|e| map_fst_error("union", e))?; + Ok(fst_l) + } + + PathExpr::ZeroOrMore(inner) => { + let mut fst = build_fst(inner, symbols)?; + closure(&mut fst, ClosureType::ClosureStar); + Ok(fst) + } + + PathExpr::OneOrMore(inner) => { + let mut fst = build_fst(inner, symbols)?; + closure(&mut fst, ClosureType::ClosurePlus); + Ok(fst) + } + + PathExpr::ZeroOrOne(inner) => { + let mut fst_inner = build_fst(inner, symbols)?; + let fst_eps = epsilon_machine::>() + .map_err(|e| map_fst_error("epsilon_machine", e))?; + + union(&mut fst_inner, &fst_eps).map_err(|e| map_fst_error("union", e))?; + Ok(fst_inner) + } + } +} + +fn extract_nfa(fst: &VectorFst, symbols: &SymbolTable) -> Result { + let num_states = fst.num_states(); + + let mut label_transitions: HashMap> = HashMap::new(); + + for state in fst.states_iter() { + for tr in fst.get_trs(state).unwrap().trs() { + if tr.ilabel == EPS_LABEL { + continue; + } + + if let Some(label) = symbols.get_label(tr.ilabel) { + label_transitions + .entry(label.to_string()) + .or_default() + .push((state as usize, tr.nextstate as usize)); + } + } + } + + let start_states: Vec = fst + .start() + .map(|s| vec![s as GrB_Index]) + .unwrap_or_default(); + + let final_states: Vec = fst + .states_iter() + .filter(|&s| fst.is_final(s).unwrap_or(false)) + .map(|s| s as GrB_Index) + .collect(); + + let transitions: Vec = label_transitions + .into_iter() + .map(|(label, pairs)| { + let mut rows = Vec::with_capacity(pairs.len()); + let mut cols = Vec::with_capacity(pairs.len()); + for (r, c) in pairs { + rows.push(r as GrB_Index); + cols.push(c as GrB_Index); + } + NfaLabelTransitions { label, rows, cols } + }) + .collect(); + + Ok(Nfa { + num_states, + start_states, + final_states, + transitions, + }) +} + +#[derive(Debug)] +pub struct NfaRpqResult { + pub reachable: GraphblasVector, +} + +/// Evaluates RPQs using `LAGraph_RegularPathQuery`. +pub struct NfaRpqEvaluator; + +impl RpqEvaluator for NfaRpqEvaluator { + type Result = NfaRpqResult; + + fn evaluate( + &self, + query: &RpqQuery, + graph: &G, + ) -> Result { + let nfa = Nfa::from_path_expr(&query.path)?; + let nfa_matrices = nfa.build_lagraph_matrices()?; + + let src_id = resolve_endpoint(&query.subject, graph)?; + let _dst_id = resolve_endpoint(&query.object, graph)?; + + let n = graph.num_nodes(); + + let source_vertices: Vec = match src_id { + Some(id) => vec![id as GrB_Index], + None => (0..n as GrB_Index).collect(), + }; + + let mut nfa_graph_ptrs: Vec = + nfa_matrices.iter().map(|(_, lg)| lg.inner).collect(); + + let mut data_graph_ptrs: Vec = Vec::with_capacity(nfa_matrices.len()); + for (label, _) in &nfa_matrices { + let lg = graph.get_graph(label)?; + data_graph_ptrs.push(lg.inner); + } + + let mut reachable: GrB_Vector = std::ptr::null_mut(); + + la_ok!(LAGraph_RegularPathQuery( + &mut reachable, + nfa_graph_ptrs.as_mut_ptr(), + nfa_matrices.len(), + nfa.start_states.as_ptr(), + nfa.start_states.len(), + nfa.final_states.as_ptr(), + nfa.final_states.len(), + data_graph_ptrs.as_mut_ptr(), + source_vertices.as_ptr(), + source_vertices.len(), + ))?; + + let result_vec = GraphblasVector { inner: reachable }; + + Ok(NfaRpqResult { + reachable: result_vec, + }) + } +} + +fn resolve_endpoint( + term: &Endpoint, + graph: &G, +) -> Result, RpqError> { + match term { + Endpoint::Variable(_) => Ok(None), + Endpoint::Named(id) => graph + .get_node_id(id) + .map(Some) + .ok_or_else(|| RpqError::VertexNotFound(id.clone())), + } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn label(s: &str) -> PathExpr { + PathExpr::Label(s.to_owned()) + } + + #[test] + fn test_single_label() { + let nfa = Nfa::from_path_expr(&label("knows")).unwrap(); + assert!(nfa.num_states >= 2, "NFA should have at least 2 states"); + assert!(!nfa.start_states.is_empty(), "should have start states"); + assert!(!nfa.final_states.is_empty(), "should have final states"); + assert_eq!(nfa.transitions.len(), 1); + assert_eq!(nfa.transitions[0].label, "knows"); + assert!(!nfa.transitions[0].rows.is_empty()); + } + + #[test] + fn test_sequence() { + let path = PathExpr::Sequence(Box::new(label("a")), Box::new(label("b"))); + let nfa = Nfa::from_path_expr(&path).unwrap(); + let labels: Vec<&str> = nfa.transitions.iter().map(|t| t.label.as_str()).collect(); + assert!(labels.contains(&"a")); + assert!(labels.contains(&"b")); + } + + #[test] + fn test_alternative() { + let path = PathExpr::Alternative(Box::new(label("a")), Box::new(label("b"))); + let nfa = Nfa::from_path_expr(&path).unwrap(); + let labels: Vec<&str> = nfa.transitions.iter().map(|t| t.label.as_str()).collect(); + assert!(labels.contains(&"a")); + assert!(labels.contains(&"b")); + } + + #[test] + fn test_zero_or_more() { + let path = PathExpr::ZeroOrMore(Box::new(label("knows"))); + let nfa = Nfa::from_path_expr(&path).unwrap(); + // For zero-or-more, start state should be final (accepts empty string) + assert!(!nfa.start_states.is_empty()); + assert!(!nfa.final_states.is_empty()); + // After rm_epsilon, the start state should be in final states + let start_set: std::collections::HashSet = + nfa.start_states.iter().copied().collect(); + let final_set: std::collections::HashSet = + nfa.final_states.iter().copied().collect(); + assert!( + !start_set.is_disjoint(&final_set), + "start and final states should overlap for zero-or-more, start={:?}, final={:?}", + start_set, + final_set + ); + } +} diff --git a/tests/nfarpq_tests.rs b/tests/nfarpq_tests.rs new file mode 100644 index 0000000..ec54d91 --- /dev/null +++ b/tests/nfarpq_tests.rs @@ -0,0 +1,361 @@ +use pathrex::graph::{GraphDecomposition, GraphError}; +use pathrex::lagraph_sys::{GrB_Index, GrB_Vector_extractTuples_BOOL, GrB_Vector_nvals}; +use pathrex::rpq::nfarpq::{NfaRpqEvaluator, NfaRpqResult}; +use pathrex::rpq::{Endpoint, PathExpr, RpqError, RpqEvaluator, RpqQuery}; +use pathrex::utils::build_graph; + +fn label(s: &str) -> PathExpr { + PathExpr::Label(s.to_string()) +} + +fn var(name: &str) -> Endpoint { + Endpoint::Variable(name.to_string()) +} + +fn named_ep(s: &str) -> Endpoint { + Endpoint::Named(s.to_string()) +} + +fn rq(subject: Endpoint, path: PathExpr, object: Endpoint) -> RpqQuery { + RpqQuery { + subject, + path, + object, + } +} + +fn reachable_indices(result: &NfaRpqResult) -> Vec { + unsafe { + let mut nvals: GrB_Index = 0; + GrB_Vector_nvals(&mut nvals, result.reachable.inner); + if nvals == 0 { + return Vec::new(); + } + let mut indices = vec![0u64; nvals as usize]; + let mut values = vec![false; nvals as usize]; + GrB_Vector_extractTuples_BOOL( + indices.as_mut_ptr(), + values.as_mut_ptr(), + &mut nvals, + result.reachable.inner, + ); + indices.truncate(nvals as usize); + indices + } +} + +fn reachable_count(result: &NfaRpqResult) -> u64 { + unsafe { + let mut nvals: GrB_Index = 0; + GrB_Vector_nvals(&mut nvals, result.reachable.inner); + nvals + } +} + +/// Graph: A --knows--> B --knows--> C +/// Query: ?x ?y +#[test] +fn test_single_label_variable_variable() { + let graph = build_graph(&[("A", "B", "knows"), ("B", "C", "knows")]); + let evaluator = NfaRpqEvaluator; + + let result = evaluator + .evaluate(&rq(var("x"), label("knows"), var("y")), &graph) + .expect("evaluate should succeed"); + + let count = reachable_count(&result); + assert_eq!(count, 2); +} + +/// Graph: A --knows--> B --knows--> C +/// Query: ?y +#[test] +fn test_single_label_named_source() { + let graph = build_graph(&[("A", "B", "knows"), ("B", "C", "knows")]); + let evaluator = NfaRpqEvaluator; + + let result = evaluator + .evaluate(&rq(named_ep("A"), label("knows"), var("y")), &graph) + .expect("evaluate should succeed"); + + let indices = reachable_indices(&result); + let b_id = graph.get_node_id("B").expect("B should exist"); + assert!( + indices.contains(&(b_id as GrB_Index)), + "B (id={b_id}) should be reachable from A via 'knows', got indices: {indices:?}" + ); +} + +/// Graph: A --knows--> B --likes--> C +/// Query: ?x / ?y (two-hop sequence) +#[test] +fn test_sequence_path() { + let graph = build_graph(&[("A", "B", "knows"), ("B", "C", "likes")]); + let evaluator = NfaRpqEvaluator; + + let path = PathExpr::Sequence(Box::new(label("knows")), Box::new(label("likes"))); + + let result = evaluator + .evaluate(&rq(var("x"), path, var("y")), &graph) + .expect("evaluate should succeed"); + + let count = reachable_count(&result); + assert_eq!(count, 1); +} + +/// Graph: A --knows--> B --likes--> C +/// Query: / ?y +#[test] +fn test_sequence_path_named_source() { + let graph = build_graph(&[("A", "B", "knows"), ("B", "C", "likes")]); + let evaluator = NfaRpqEvaluator; + + let path = PathExpr::Sequence(Box::new(label("knows")), Box::new(label("likes"))); + + let result = evaluator + .evaluate(&rq(named_ep("A"), path, var("y")), &graph) + .expect("evaluate should succeed"); + + let indices = reachable_indices(&result); + let c_id = graph.get_node_id("C").expect("C should exist"); + assert!( + indices.contains(&(c_id as GrB_Index)), + "C (id={c_id}) should be reachable from A via knows/likes, got indices: {indices:?}" + ); +} + +/// Graph: A --knows--> B, A --likes--> C +/// Query: | ?y +#[test] +fn test_alternative_path() { + let graph = build_graph(&[("A", "B", "knows"), ("A", "C", "likes")]); + let evaluator = NfaRpqEvaluator; + + let path = PathExpr::Alternative(Box::new(label("knows")), Box::new(label("likes"))); + + let result = evaluator + .evaluate(&rq(named_ep("A"), path, var("y")), &graph) + .expect("evaluate should succeed"); + + let indices = reachable_indices(&result); + let b_id = graph.get_node_id("B").expect("B should exist"); + let c_id = graph.get_node_id("C").expect("C should exist"); + assert!( + indices.contains(&(b_id as GrB_Index)), + "B should be reachable via knows|likes" + ); + assert!( + indices.contains(&(c_id as GrB_Index)), + "C should be reachable via knows|likes" + ); +} + +/// Graph: A --knows--> B --knows--> C +/// Query: * ?y +#[test] +fn test_zero_or_more_path() { + let graph = build_graph(&[("A", "B", "knows"), ("B", "C", "knows")]); + let evaluator = NfaRpqEvaluator; + + let path = PathExpr::ZeroOrMore(Box::new(label("knows"))); + + let result = evaluator + .evaluate(&rq(named_ep("A"), path, var("y")), &graph) + .expect("evaluate should succeed"); + + let indices = reachable_indices(&result); + let a_id = graph.get_node_id("A").expect("A should exist"); + let b_id = graph.get_node_id("B").expect("B should exist"); + let c_id = graph.get_node_id("C").expect("C should exist"); + + assert!( + indices.contains(&(a_id as GrB_Index)), + "A should be reachable (zero hops)" + ); + assert!( + indices.contains(&(b_id as GrB_Index)), + "B should be reachable (one hop)" + ); + assert!( + indices.contains(&(c_id as GrB_Index)), + "C should be reachable (two hops)" + ); +} + +/// Graph: A --knows--> B --knows--> C +/// Query: + ?y +#[test] +fn test_one_or_more_path() { + let graph = build_graph(&[("A", "B", "knows"), ("B", "C", "knows")]); + let evaluator = NfaRpqEvaluator; + + let path = PathExpr::OneOrMore(Box::new(label("knows"))); + + let result = evaluator + .evaluate(&rq(named_ep("A"), path, var("y")), &graph) + .expect("evaluate should succeed"); + + let indices = reachable_indices(&result); + let a_id = graph.get_node_id("A").expect("A should exist"); + let b_id = graph.get_node_id("B").expect("B should exist"); + let c_id = graph.get_node_id("C").expect("C should exist"); + + assert!( + !indices.contains(&(a_id as GrB_Index)), + "A shouldn't be reachable" + ); + assert!( + indices.contains(&(b_id as GrB_Index)), + "B should be reachable (one hop)" + ); + assert!( + indices.contains(&(c_id as GrB_Index)), + "C should be reachable (two hops)" + ); +} + +/// Graph: A --knows--> B --knows--> C +/// Query: ? ?y +#[test] +fn test_zero_or_one_path() { + let graph = build_graph(&[("A", "B", "knows"), ("B", "C", "knows")]); + let evaluator = NfaRpqEvaluator; + + let path = PathExpr::ZeroOrOne(Box::new(label("knows"))); + + let result = evaluator + .evaluate(&rq(named_ep("A"), path, var("y")), &graph) + .expect("evaluate should succeed"); + + let indices = reachable_indices(&result); + let a_id = graph.get_node_id("A").expect("A should exist"); + let b_id = graph.get_node_id("B").expect("B should exist"); + let c_id = graph.get_node_id("C").expect("C should exist"); + + assert!( + indices.contains(&(a_id as GrB_Index)), + "A should be reachable (zero hops)" + ); + assert!( + indices.contains(&(b_id as GrB_Index)), + "B should be reachable (one hop)" + ); + assert!( + !indices.contains(&(c_id as GrB_Index)), + "C should NOT be reachable (two hops, but path is ?)" + ); +} + +#[test] +fn test_label_not_found() { + let graph = build_graph(&[("A", "B", "knows")]); + let evaluator = NfaRpqEvaluator; + + let result = evaluator.evaluate(&rq(var("x"), label("nonexistent"), var("y")), &graph); + + assert!( + matches!(result, Err(RpqError::Graph(GraphError::LabelNotFound(ref l))) if l == "nonexistent"), + "expected LabelNotFound error, got: {result:?}" + ); +} + +#[test] +fn test_vertex_not_found() { + let graph = build_graph(&[("A", "B", "knows")]); + let evaluator = NfaRpqEvaluator; + + let result = evaluator.evaluate(&rq(named_ep("Z"), label("knows"), var("y")), &graph); + + assert!( + matches!(result, Err(RpqError::VertexNotFound(ref v)) if v == "Z"), + "expected VertexNotFound error, got: {result:?}" + ); +} + +#[test] +fn test_object_vertex_not_found() { + let graph = build_graph(&[("A", "B", "knows")]); + let evaluator = NfaRpqEvaluator; + + let result = evaluator.evaluate(&rq(var("x"), label("knows"), named_ep("Z")), &graph); + + assert!( + matches!(result, Err(RpqError::VertexNotFound(ref v)) if v == "Z"), + "expected VertexNotFound error for object, got: {result:?}" + ); +} + +#[test] +fn test_negated_property_set_rejected_by_sparql_conversion() { + let sparql = "BASE SELECT ?x ?y WHERE { ?x !() ?y . }"; + let r = pathrex::sparql::parse_rpq(sparql); + assert!(matches!(r, Err(RpqError::UnsupportedPath(_)))); +} + +/// Graph: A --knows--> B --knows--> C --knows--> A (cycle) +/// Query: * ?y +#[test] +fn test_cycle_graph_star() { + let graph = build_graph(&[ + ("A", "B", "knows"), + ("B", "C", "knows"), + ("C", "A", "knows"), + ]); + let evaluator = NfaRpqEvaluator; + + let path = PathExpr::ZeroOrMore(Box::new(label("knows"))); + + let result = evaluator + .evaluate(&rq(named_ep("A"), path, var("y")), &graph) + .expect("evaluate should succeed"); + + let count = reachable_count(&result); + assert_eq!(count, 3, "all 3 nodes should be reachable in a cycle"); +} + +/// Graph: A --knows--> B --likes--> C --knows--> D +/// Query: ?x /*/ ?y +#[test] +fn test_complex_path() { + let graph = build_graph(&[ + ("A", "B", "knows"), + ("B", "C", "likes"), + ("C", "D", "knows"), + ]); + let evaluator = NfaRpqEvaluator; + + // knows / likes* / knows + let path = PathExpr::Sequence( + Box::new(PathExpr::Sequence( + Box::new(label("knows")), + Box::new(PathExpr::ZeroOrMore(Box::new(label("likes")))), + )), + Box::new(label("knows")), + ); + + let result = evaluator + .evaluate(&rq(named_ep("A"), path, var("y")), &graph) + .expect("evaluate should succeed"); + + let indices = reachable_indices(&result); + let d_id = graph.get_node_id("D").expect("D should exist"); + assert!( + indices.contains(&(d_id as GrB_Index)), + "D should be reachable via knows/likes*/knows, got indices: {indices:?}" + ); +} + +#[test] +fn test_no_matching_path() { + let graph = build_graph(&[("A", "B", "knows")]); + let evaluator = NfaRpqEvaluator; + + let path = PathExpr::Sequence(Box::new(label("knows")), Box::new(label("likes"))); + + let result = evaluator.evaluate(&rq(var("x"), path, var("y")), &graph); + + assert!( + matches!(result, Err(RpqError::Graph(GraphError::LabelNotFound(ref l))) if l == "likes"), + "expected LabelNotFound for 'likes', got: {result:?}" + ); +}