From 4b85b3ebd3ce1549aa7527af6a1e0f7afae0a596 Mon Sep 17 00:00:00 2001 From: Guy Repta <50716988+gtrepta@users.noreply.github.com> Date: Tue, 28 Apr 2026 13:42:08 -0500 Subject: [PATCH 01/13] Add kllvm API as a source --- kframework_ffi/c/kllvm-c.h | 184 +++++++++++++++++++++++++++++++++++++ 1 file changed, 184 insertions(+) create mode 100644 kframework_ffi/c/kllvm-c.h diff --git a/kframework_ffi/c/kllvm-c.h b/kframework_ffi/c/kllvm-c.h new file mode 100644 index 0000000..1709635 --- /dev/null +++ b/kframework_ffi/c/kllvm-c.h @@ -0,0 +1,184 @@ +#ifndef KLLVM_C_H +#define KLLVM_C_H + +#ifndef __cplusplus +#include +#include +#include +#else +#include +#include +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * Error Handling + * ============== + * + * Some API functions in these bindings can bubble up internal errors from the + * LLVM backend to avoid crashing the host process. + * + * These functions take an initial parameter of type `kore_error *`; if that + * parameter is `NULL` then any C++ exceptions thrown by the backend will + * simply be rethrown and the host process will crash with the relevant error + * message. + * + * If the input is not NULL, then the object will have call-specific information + * filled in that can be accessed using the getter functions in this section. + */ + +typedef struct kore_error kore_error; + +/** + * Create an empty error object. Initially, the created object will report + * success (not failure), and will return `NULL` if its message is accessed. + */ +kore_error *kore_error_new(void); + +/** + * Return true if no error occurred when this object was passed to an API call. + */ +bool kore_error_is_success(kore_error const *); + +/** + * Return any error-specific message that has been added to this object. The + * returned string is a reference to the error's internal state and should not + * be freed separately. If no error has occurred (i.e. is_success returns true), + * then return NULL. + */ +char const *kore_error_message(kore_error const *); + +/** + * Deallocate an error and its associated message. + */ +void kore_error_free(kore_error *); + +/* + * Binary KORE Outputs + * =================== + * + * All API functions in these bindings that return binary KORE data do so with a + * pair of output parameters: + * + * char ** data_out + * size_t * size_out + * + * The returned binary data in *data_out has length *size_out, and should be + * freed with free(*data_out) when it is no longer required. + */ + +/* Opaque types */ + +typedef struct kore_pattern kore_pattern; +typedef struct kore_sort kore_sort; +typedef struct kore_symbol kore_symbol; +typedef struct block block; + +/* kore_pattern */ + +char *kore_pattern_dump(kore_pattern const *); + +char *kore_pattern_pretty_print(kore_pattern const *); + +void kore_pattern_serialize(kore_pattern const *, char **, size_t *); + +void kore_pattern_free(kore_pattern const *); + +kore_pattern *kore_pattern_parse(char const *); +kore_pattern *kore_pattern_parse_file(char const *); + +kore_pattern *kore_pattern_new_token(char const *, kore_sort const *); +kore_pattern * +kore_pattern_new_token_with_len(char const *, size_t, kore_sort const *); + +kore_pattern *kore_pattern_new_injection( + kore_pattern const *, kore_sort const *, kore_sort const *); + +kore_pattern * +kore_pattern_make_interpreter_input(kore_pattern const *, kore_sort const *); + +kore_pattern *kore_composite_pattern_new(char const *); +kore_pattern *kore_composite_pattern_from_symbol(kore_symbol *); +void kore_composite_pattern_add_argument(kore_pattern *, kore_pattern const *); + +kore_pattern *kore_pattern_desugar_associative(kore_pattern const *); + +kore_pattern *kore_string_pattern_new(char const *); +kore_pattern *kore_string_pattern_new_with_len(char const *, size_t); + +block *kore_pattern_construct(kore_pattern const *); +char *kore_block_dump(block *); + +kore_pattern *kore_pattern_from_block(block *); + +/* + * Expects the argument term to be of the form: + * sym{}(BOOL) + */ +bool kore_block_get_bool(block *); + +bool kore_simplify_bool(kore_error *, kore_pattern const *); + +void kore_simplify( + kore_error *err, kore_pattern const *pattern, kore_sort const *sort, + char **, size_t *); + +void kore_simplify_binary( + kore_error *, char *, size_t, kore_sort const *, char **, size_t *); + +block *take_steps(int64_t depth, block *term); + +void init_static_objects(void); + +/* kore_sort */ + +char *kore_sort_dump(kore_sort const *); + +void kore_sort_free(kore_sort const *); + +bool kore_sort_is_concrete(kore_sort const *); + +bool kore_sort_is_kitem(kore_sort const *); +bool kore_sort_is_k(kore_sort const *); + +kore_sort *kore_composite_sort_new(char const *); +void kore_composite_sort_add_argument(kore_sort const *, kore_sort const *); + +/* kore_symbol */ + +kore_symbol *kore_symbol_new(char const *); + +void kore_symbol_free(kore_symbol const *); + +void kore_symbol_add_formal_argument(kore_symbol *, kore_sort const *); + +/* Memory management */ + +void kllvm_init(void); +void kllvm_free_all_memory(void); +void kllvm_reset_munmap_all_arenas(void); + +/* Sort-specific functions */ + +bool kllvm_mutable_bytes_enabled(void); + +/* Definitions */ + +/** + * Parse the given KORE definition, then if any of its axioms have a `label` + * attribute that matches the supplied label, return the name of the function + * symbol that attempts matching a pattern against that axiom (and will + * therefore populate the backend's global matching log). + * + * If no such axiom exists, return `nullptr`. + */ +char *kore_match_function_name(char const *defn_path, char const *label); + +#ifdef __cplusplus +} +#endif + +#endif From ab0bced17b3f6f1be01f205539fcfdb9c361476b Mon Sep 17 00:00:00 2001 From: Guy Repta <50716988+gtrepta@users.noreply.github.com> Date: Tue, 28 Apr 2026 13:56:29 -0500 Subject: [PATCH 02/13] Use libc for free Change signatures to match what bindgen will produce --- kframework_ffi/Cargo.toml | 3 +++ kframework_ffi/src/kllvm/block.rs | 2 +- kframework_ffi/src/kllvm/ffi.rs | 13 +++++-------- kframework_ffi/src/kllvm/pattern.rs | 3 ++- 4 files changed, 11 insertions(+), 10 deletions(-) diff --git a/kframework_ffi/Cargo.toml b/kframework_ffi/Cargo.toml index db14bf4..0f37de0 100644 --- a/kframework_ffi/Cargo.toml +++ b/kframework_ffi/Cargo.toml @@ -3,3 +3,6 @@ name = "kframework_ffi" version.workspace = true edition.workspace = true rust-version.workspace = true + +[dependencies] +libc = "0.2" diff --git a/kframework_ffi/src/kllvm/block.rs b/kframework_ffi/src/kllvm/block.rs index e2f2a4b..1f5ee1b 100644 --- a/kframework_ffi/src/kllvm/block.rs +++ b/kframework_ffi/src/kllvm/block.rs @@ -4,7 +4,7 @@ use super::Pattern; /// A safe wrapper around a foreign pointer to kllvm's interned representation. /// kllvm's garbage collector manages the allocation/freeing of this pointer. pub struct Block { - pub(crate) block: *const ffi::block, + pub(crate) block: *mut ffi::block, } impl Block { diff --git a/kframework_ffi/src/kllvm/ffi.rs b/kframework_ffi/src/kllvm/ffi.rs index 9e997cc..1bfaabe 100644 --- a/kframework_ffi/src/kllvm/ffi.rs +++ b/kframework_ffi/src/kllvm/ffi.rs @@ -12,18 +12,15 @@ pub struct kore_pattern { #[allow(dead_code)] unsafe extern "C" { - /// Some strings from the llvm-backend are malloced, and we need to free them ourselves. - pub fn free(ptr: *const c_void) -> c_void; - pub fn kllvm_init() -> c_void; pub fn kllvm_free_all_memory() -> c_void; - pub fn take_steps(steps: i64, subject: *const block) -> *const block; + pub fn take_steps(steps: i64, subject: *mut block) -> *mut block; - pub fn kore_pattern_parse(data: *const c_char) -> *const kore_pattern; - pub fn kore_pattern_dump(pattern: *const kore_pattern) -> *const c_char; + pub fn kore_pattern_parse(data: *const c_char) -> *mut kore_pattern; + pub fn kore_pattern_dump(pattern: *const kore_pattern) -> *mut c_char; pub fn kore_pattern_free(pattern: *const kore_pattern) -> c_void; - pub fn kore_pattern_construct(pattern: *const kore_pattern) -> *const block; - pub fn kore_pattern_from_block(subject: *const block) -> *const kore_pattern; + pub fn kore_pattern_construct(pattern: *const kore_pattern) -> *mut block; + pub fn kore_pattern_from_block(subject: *const block) -> *mut kore_pattern; } diff --git a/kframework_ffi/src/kllvm/pattern.rs b/kframework_ffi/src/kllvm/pattern.rs index b571ecb..e4f8629 100644 --- a/kframework_ffi/src/kllvm/pattern.rs +++ b/kframework_ffi/src/kllvm/pattern.rs @@ -1,3 +1,4 @@ +use libc; use super::ffi; use super::Block; use std::ffi::{c_void, CStr, CString}; @@ -40,7 +41,7 @@ impl fmt::Display for Pattern { .to_str() .expect("Failed to convert kllvm::Pattern to &str") .to_string(); - ffi::free(c_str as *const c_void); + libc::free(c_str as *mut c_void); result }; write!(f, "{}", result) From 8cb2ba5339027a793df38289b9d99dde6431cb6d Mon Sep 17 00:00:00 2001 From: Guy Repta <50716988+gtrepta@users.noreply.github.com> Date: Tue, 28 Apr 2026 14:20:41 -0500 Subject: [PATCH 03/13] Generate KLLVM bindings from C header --- kframework_ffi/Cargo.toml | 3 +++ kframework_ffi/build.rs | 17 +++++++++++++++++ kframework_ffi/src/kllvm/ffi.rs | 30 +++++------------------------- 3 files changed, 25 insertions(+), 25 deletions(-) create mode 100644 kframework_ffi/build.rs diff --git a/kframework_ffi/Cargo.toml b/kframework_ffi/Cargo.toml index 0f37de0..eb9cc65 100644 --- a/kframework_ffi/Cargo.toml +++ b/kframework_ffi/Cargo.toml @@ -6,3 +6,6 @@ rust-version.workspace = true [dependencies] libc = "0.2" + +[build-dependencies] +bindgen = "0.72.1" diff --git a/kframework_ffi/build.rs b/kframework_ffi/build.rs new file mode 100644 index 0000000..8f4445e --- /dev/null +++ b/kframework_ffi/build.rs @@ -0,0 +1,17 @@ +use std::env; +use std::path::PathBuf; + +fn main() { + // Generate bindings to KLLVM's C API + let bindings = bindgen::Builder::default() + .header("c/kllvm-c.h") + .allowlist_file("c/kllvm-c.h") + .parse_callbacks(Box::new(bindgen::CargoCallbacks::new())) + .generate() + .expect("Unable to generate kllvm-c bindings"); + + let out_path = PathBuf::from(env::var("OUT_DIR").unwrap()); + bindings + .write_to_file(out_path.join("kllvm-c.rs")) + .expect("Couldn't write kllvm-c bindings!"); +} diff --git a/kframework_ffi/src/kllvm/ffi.rs b/kframework_ffi/src/kllvm/ffi.rs index 1bfaabe..089d1b3 100644 --- a/kframework_ffi/src/kllvm/ffi.rs +++ b/kframework_ffi/src/kllvm/ffi.rs @@ -1,26 +1,6 @@ -use std::ffi::{c_char, c_void}; +#![allow(non_upper_case_globals)] +#![allow(non_camel_case_types)] +#![allow(non_snake_case)] +#![allow(unused)] -#[repr(C)] -pub struct block { - _private: [u8; 0], // Unused field. Makes rust happy about FFI safety. -} - -#[repr(C)] -pub struct kore_pattern { - _private: [u8; 0], // Unused field. Makes rust happy about FFI safety. -} - -#[allow(dead_code)] -unsafe extern "C" { - pub fn kllvm_init() -> c_void; - pub fn kllvm_free_all_memory() -> c_void; - - pub fn take_steps(steps: i64, subject: *mut block) -> *mut block; - - pub fn kore_pattern_parse(data: *const c_char) -> *mut kore_pattern; - pub fn kore_pattern_dump(pattern: *const kore_pattern) -> *mut c_char; - pub fn kore_pattern_free(pattern: *const kore_pattern) -> c_void; - - pub fn kore_pattern_construct(pattern: *const kore_pattern) -> *mut block; - pub fn kore_pattern_from_block(subject: *const block) -> *mut kore_pattern; -} +include!(concat!(env!("OUT_DIR"), "/kllvm-c.rs")); From 03e1f4252899c13b6d8466ddc44a6c0d51c89b56 Mon Sep 17 00:00:00 2001 From: Guy Repta <50716988+gtrepta@users.noreply.github.com> Date: Tue, 28 Apr 2026 14:34:24 -0500 Subject: [PATCH 04/13] Cargo.lock --- Cargo.lock | 223 +++++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 199 insertions(+), 24 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 50db7f5..92704d2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,11 +2,20 @@ # It is not intended for manual editing. version = 4 +[[package]] +name = "aho-corasick" +version = "1.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ddd31a130427c27518df266943a5308ed92d4b226cc639f5a8f1002816174301" +dependencies = [ + "memchr", +] + [[package]] name = "anstream" -version = "0.6.21" +version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "43d5b281e737544384e969a5ccad3f1cdd24b48086a0fc1b2a5262a26b8f4f4a" +checksum = "824a212faf96e9acacdbd09febd34438f8f711fb84e09a8916013cd7815ca28d" dependencies = [ "anstyle", "anstyle-parse", @@ -19,15 +28,15 @@ dependencies = [ [[package]] name = "anstyle" -version = "1.0.13" +version = "1.0.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5192cca8006f1fd4f7237516f40fa183bb07f8fbdfedaa0036de5ea9b0b45e78" +checksum = "940b3a0ca603d1eade50a4846a2afffd5ef57a9feac2c0e2ec2e14f9ead76000" [[package]] name = "anstyle-parse" -version = "0.2.7" +version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4e7644824f0aa2c7b9384579234ef10eb7efb6a0deb83f9630a49594dd9c15c2" +checksum = "52ce7f38b242319f7cabaa6813055467063ecdc9d355bbb4ce0c68908cd8130e" dependencies = [ "utf8parse", ] @@ -52,11 +61,63 @@ dependencies = [ "windows-sys", ] +[[package]] +name = "bindgen" +version = "0.72.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "993776b509cfb49c750f11b8f07a46fa23e0a1386ffc01fb1e7d343efc387895" +dependencies = [ + "bitflags", + "cexpr", + "clang-sys", + "itertools", + "log", + "prettyplease", + "proc-macro2", + "quote", + "regex", + "rustc-hash", + "shlex", + "syn", +] + +[[package]] +name = "bitflags" +version = "2.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c4512299f36f043ab09a583e57bceb5a5aab7a73db1805848e8fef3c9e8c78b3" + +[[package]] +name = "cexpr" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6fac387a98bb7c37292057cffc56d62ecb629900026402633ae9160df93a8766" +dependencies = [ + "nom", +] + +[[package]] +name = "cfg-if" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" + +[[package]] +name = "clang-sys" +version = "1.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b023947811758c97c59bf9d1c188fd619ad4718dcaa767947df1cadb14f39f4" +dependencies = [ + "glob", + "libc", + "libloading", +] + [[package]] name = "clap" -version = "4.5.58" +version = "4.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "63be97961acde393029492ce0be7a1af7e323e6bae9511ebfac33751be5e6806" +checksum = "1ddb117e43bbf7dacf0a4190fef4d345b9bad68dfc649cb349e7d17d28428e51" dependencies = [ "clap_builder", "clap_derive", @@ -64,9 +125,9 @@ dependencies = [ [[package]] name = "clap_builder" -version = "4.5.58" +version = "4.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f13174bda5dfd69d7e947827e5af4b0f2f94a4a3ee92912fba07a66150f21e2" +checksum = "714a53001bf66416adb0e2ef5ac857140e7dc3a0c48fb28b2f10762fc4b5069f" dependencies = [ "anstream", "anstyle", @@ -76,9 +137,9 @@ dependencies = [ [[package]] name = "clap_derive" -version = "4.5.55" +version = "4.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a92793da1a46a5f2a02a6f4c46c6496b28c43638adea8306fcb0caa1634f24e5" +checksum = "f2ce8604710f6733aa641a2b3731eaa1e8b3d9973d5e3565da11800813f997a9" dependencies = [ "heck", "proc-macro2", @@ -88,15 +149,27 @@ dependencies = [ [[package]] name = "clap_lex" -version = "1.0.0" +version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3a822ea5bc7590f9d40f1ba12c0dc3c2760f3482c6984db1573ad11031420831" +checksum = "c8d4a3bb8b1e0c1050499d1815f5ab16d04f0959b233085fb31653fbfc9d98f9" [[package]] name = "colorchoice" -version = "1.0.4" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d07550c9036bf2ae0c684c4297d503f838287c83c53686d05370d0e139ae570" + +[[package]] +name = "either" +version = "1.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" + +[[package]] +name = "glob" +version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75" +checksum = "0cc23270f6e1808e30a928bdc84dea0b9b4136a8bc82338574f23baf47bbd280" [[package]] name = "heck" @@ -119,11 +192,20 @@ version = "1.70.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a6cb138bb79a146c1bd460005623e142ef0181e3d0219cb493e02f7d08a35695" +[[package]] +name = "itertools" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186" +dependencies = [ + "either", +] + [[package]] name = "itoa" -version = "1.0.17" +version = "1.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "92ecc6618181def0457392ccd0ee51198e065e016d1d527a7ac1b6dc7c1f09d2" +checksum = "8f42a60cbdf9a97f5d2305f08a87dc4e09308d1276d28c869c684d7777685682" [[package]] name = "kframework" @@ -138,6 +220,32 @@ dependencies = [ [[package]] name = "kframework_ffi" version = "0.1.0" +dependencies = [ + "bindgen", + "libc", +] + +[[package]] +name = "libc" +version = "0.2.186" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68ab91017fe16c622486840e4c83c9a37afeff978bd239b5293d61ece587de66" + +[[package]] +name = "libloading" +version = "0.8.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d7c4b02199fee7c5d21a5ae7d8cfa79a6ef5bb2fc834d6e9058e89c825efdc55" +dependencies = [ + "cfg-if", + "windows-link", +] + +[[package]] +name = "log" +version = "0.4.29" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897" [[package]] name = "memchr" @@ -145,12 +253,38 @@ version = "2.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79" +[[package]] +name = "minimal-lexical" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" + +[[package]] +name = "nom" +version = "7.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a" +dependencies = [ + "memchr", + "minimal-lexical", +] + [[package]] name = "once_cell_polyfill" version = "1.70.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe" +[[package]] +name = "prettyplease" +version = "0.2.37" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "479ca8adacdd7ce8f1fb39ce9ecccbfe93a3f1344b3d0d97f20bc0196208f62b" +dependencies = [ + "proc-macro2", + "syn", +] + [[package]] name = "proc-macro2" version = "1.0.106" @@ -162,13 +296,48 @@ dependencies = [ [[package]] name = "quote" -version = "1.0.44" +version = "1.0.45" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "21b2ebcf727b7760c461f091f9f0f539b77b8e87f2fd88131e7f1b433b3cece4" +checksum = "41f2619966050689382d2b44f664f4bc593e129785a36d6ee376ddf37259b924" dependencies = [ "proc-macro2", ] +[[package]] +name = "regex" +version = "1.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e10754a14b9137dd7b1e3e5b0493cc9171fdd105e0ab477f51b72e7f3ac0e276" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.4.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e1dd4122fc1595e8162618945476892eefca7b88c52820e74af6262213cae8f" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.8.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc897dd8d9e8bd1ed8cdad82b5966c3e0ecae09fb1907d58efaa013543185d0a" + +[[package]] +name = "rustc-hash" +version = "2.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94300abf3f1ae2e2b8ffb7b58043de3d399c73fa6f4b73826402a5c457614dbe" + [[package]] name = "rustversion" version = "1.0.22" @@ -218,6 +387,12 @@ dependencies = [ "zmij", ] +[[package]] +name = "shlex" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" + [[package]] name = "strsim" version = "0.11.1" @@ -226,9 +401,9 @@ checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" [[package]] name = "syn" -version = "2.0.115" +version = "2.0.117" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e614ed320ac28113fa64972c4262d5dbc89deacdfd00c34a3e4cea073243c12" +checksum = "e665b8803e7b1d2a727f4023456bbbbe74da67099c585258af0ad9c5013b9b99" dependencies = [ "proc-macro2", "quote", @@ -237,9 +412,9 @@ dependencies = [ [[package]] name = "unicode-ident" -version = "1.0.23" +version = "1.0.24" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "537dd038a89878be9b64dd4bd1b260315c1bb94f4d784956b81e27a088d9a09e" +checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75" [[package]] name = "utf8parse" From ea74112dd941983138a649fdb28dc69dbaa5eb1f Mon Sep 17 00:00:00 2001 From: Guy Repta <50716988+gtrepta@users.noreply.github.com> Date: Wed, 29 Apr 2026 10:19:03 -0500 Subject: [PATCH 05/13] feat: Draft `Marshaller` for moving rust kore patterns to the llvm-backend Assisted-by: Claude Code --- examples/fuzzer/src/main.rs | 4 + examples/fuzzer/src/marshal.rs | 186 ++++++++++++++++++++++++++++ kframework/src/kore/syntax.rs | 8 ++ kframework_ffi/src/kllvm.rs | 2 +- kframework_ffi/src/kllvm/pattern.rs | 4 + 5 files changed, 203 insertions(+), 1 deletion(-) create mode 100644 examples/fuzzer/src/marshal.rs diff --git a/examples/fuzzer/src/main.rs b/examples/fuzzer/src/main.rs index 9514343..e0c2be9 100644 --- a/examples/fuzzer/src/main.rs +++ b/examples/fuzzer/src/main.rs @@ -5,6 +5,10 @@ use honggfuzz::fuzz; use kframework::kore::{App, Parser, Pattern, SymbolId}; use kframework_ffi::kllvm; +mod marshal; +#[allow(unused_imports)] +use marshal::{MarshalError, Marshaller, VarHandler}; + #[derive(Clone, Copy)] struct FuzzInput { field1: u32, diff --git a/examples/fuzzer/src/marshal.rs b/examples/fuzzer/src/marshal.rs new file mode 100644 index 0000000..d175e63 --- /dev/null +++ b/examples/fuzzer/src/marshal.rs @@ -0,0 +1,186 @@ +#![allow(unused)] + +use kframework::kore::{App, Pattern, Sort, SymbolId}; +use kframework_ffi::kllvm; +use kframework_ffi::kllvm::ffi; +use std::collections::HashMap; +use std::ffi::CString; + +#[derive(Debug)] +pub enum MarshalError { + Unsupported(&'static str), + UnknownVar(String), + Cstring, +} + +pub trait VarHandler { + fn substitute(&mut self, name: &str) -> Result; +} + +struct OwnedSymbol(*mut ffi::kore_symbol); +impl Drop for OwnedSymbol { + fn drop(&mut self) { + unsafe { ffi::kore_symbol_free(self.0) } + } +} + +struct OwnedPattern(*mut ffi::kore_pattern); +impl Drop for OwnedPattern { + fn drop(&mut self) { + unsafe { ffi::kore_pattern_free(self.0) } + } +} + +struct OwnedSort(*mut ffi::kore_sort); +impl Drop for OwnedSort { + fn drop(&mut self) { + unsafe { ffi::kore_sort_free(self.0) } + } +} + +pub struct Marshaller { + symbols: HashMap, + subtrees: HashMap<*const Pattern, OwnedPattern>, + handler: H, +} + +impl Marshaller { + pub fn new(handler: H) -> Self { + Self { + symbols: HashMap::new(), + subtrees: HashMap::new(), + handler, + } + } + + pub fn marshal(&mut self, root: &Pattern) -> Result { + let (raw, _var_free) = self.marshal_node(root)?; + // If the root happened to be fully var-free it now lives in the + // cache; pop it so kllvm::Pattern's Drop owns the only reference + // and we don't double-free. + self.subtrees.remove(&(root as *const Pattern)); + Ok(kllvm::Pattern::from_raw(raw)) + } + + fn marshal_node( + &mut self, + p: &Pattern, + ) -> Result<(*mut ffi::kore_pattern, bool), MarshalError> { + if let Some(owned) = self.subtrees.get(&(p as *const Pattern)) { + return Ok((owned.0, true)); + } + + match p { + Pattern::Var(v) => { + let sub = self.handler.substitute(v.id.as_str())?; + let (ptr, _) = self.marshal_node(&sub)?; + Ok((ptr, false)) + } + + Pattern::Dv { sort, value } => { + let sort_owned = build_sort(sort)?; + let c_val = CString::new(value.0.as_str()).map_err(|_| MarshalError::Cstring)?; + let ptr = unsafe { ffi::kore_pattern_new_token(c_val.as_ptr(), sort_owned.0) }; + self.subtrees.insert(p as *const Pattern, OwnedPattern(ptr)); + Ok((ptr, true)) + } + + Pattern::App(app) => self.marshal_app(p, app), + + other => Err(MarshalError::Unsupported(variant_name(other))), + } + } + + fn marshal_app( + &mut self, + p: &Pattern, + app: &App, + ) -> Result<(*mut ffi::kore_pattern, bool), MarshalError> { + let sym = self.intern_symbol(&app.symbol, &app.sorts)?; + let pat = unsafe { ffi::kore_composite_pattern_from_symbol(sym) }; + + let mut var_free = true; + for arg in &app.args { + let (child, child_vf) = self.marshal_node(arg)?; + unsafe { ffi::kore_composite_pattern_add_argument(pat, child) }; + var_free &= child_vf; + // Do NOT free `child`. If borrowed (cache hit), the cache owns + // it. If freshly allocated under a Var-bearing subtree, the C++ + // side took a shared_ptr copy when add_argument was called, so + // `pat`'s eventual free reclaims the AST. The C wrapper struct + // around `child` in that latter case is intentionally leaked + // (~16 bytes/node, bounded by tree size, not iteration count). + } + + if var_free { + self.subtrees + .insert(p as *const Pattern, OwnedPattern(pat)); + } + Ok((pat, var_free)) + } + + fn intern_symbol( + &mut self, + id: &SymbolId, + sorts: &[Sort], + ) -> Result<*mut ffi::kore_symbol, MarshalError> { + let key = id.as_str().to_owned(); + if let Some(owned) = self.symbols.get(&key) { + return Ok(owned.0); + } + let c_name = CString::new(id.as_str()).map_err(|_| MarshalError::Cstring)?; + let sym = unsafe { ffi::kore_symbol_new(c_name.as_ptr()) }; + for sort in sorts { + let s = build_sort(sort)?; + unsafe { ffi::kore_symbol_add_formal_argument(sym, s.0) }; + // s drops -> kore_sort_free; the symbol kept a shared_ptr copy. + } + self.symbols.insert(key, OwnedSymbol(sym)); + Ok(sym) + } +} + +fn build_sort(s: &Sort) -> Result { + match s { + Sort::App { id, args } => { + let c_name = CString::new(id.as_str()).map_err(|_| MarshalError::Cstring)?; + let raw = unsafe { ffi::kore_composite_sort_new(c_name.as_ptr()) }; + for arg in args { + let arg_sort = build_sort(arg)?; + unsafe { ffi::kore_composite_sort_add_argument(raw, arg_sort.0) }; + } + Ok(OwnedSort(raw)) + } + Sort::Var(_) => Err(MarshalError::Unsupported("Sort::Var")), + } +} + +fn variant_name(p: &Pattern) -> &'static str { + match p { + Pattern::Var(_) => "Var", + Pattern::SVar(_) => "SVar", + Pattern::Str(_) => "Str", + Pattern::App(_) => "App", + Pattern::LeftAssoc(_) => "LeftAssoc", + Pattern::RightAssoc(_) => "RightAssoc", + Pattern::Top(_) => "Top", + Pattern::Bottom(_) => "Bottom", + Pattern::Dv { .. } => "Dv", + Pattern::Not { .. } => "Not", + Pattern::Implies { .. } => "Implies", + Pattern::Iff { .. } => "Iff", + Pattern::And { .. } => "And", + Pattern::Or { .. } => "Or", + Pattern::Exists { .. } => "Exists", + Pattern::Forall { .. } => "Forall", + Pattern::Mu { .. } => "Mu", + Pattern::Nu { .. } => "Nu", + Pattern::Ceil { .. } => "Ceil", + Pattern::Floor { .. } => "Floor", + Pattern::Equals { .. } => "Equals", + Pattern::In { .. } => "In", + Pattern::Next { .. } => "Next", + Pattern::Rewrites { .. } => "Rewrites", + } +} + diff --git a/kframework/src/kore/syntax.rs b/kframework/src/kore/syntax.rs index a1effe5..504d573 100644 --- a/kframework/src/kore/syntax.rs +++ b/kframework/src/kore/syntax.rs @@ -27,6 +27,10 @@ impl Id { pub fn value(self) -> String { self.0 } + + pub fn as_str(&self) -> &str { + &self.0 + } } impl TryFrom for Id { @@ -87,6 +91,10 @@ impl SymbolId { pub fn value(self) -> String { self.0 } + + pub fn as_str(&self) -> &str { + &self.0 + } } impl TryFrom for SymbolId { diff --git a/kframework_ffi/src/kllvm.rs b/kframework_ffi/src/kllvm.rs index 9328678..c492f3b 100644 --- a/kframework_ffi/src/kllvm.rs +++ b/kframework_ffi/src/kllvm.rs @@ -26,7 +26,7 @@ //! kllvm::free_all_memory(); //! ``` mod block; -mod ffi; +pub mod ffi; mod pattern; pub fn init() { diff --git a/kframework_ffi/src/kllvm/pattern.rs b/kframework_ffi/src/kllvm/pattern.rs index e4f8629..fdb64fb 100644 --- a/kframework_ffi/src/kllvm/pattern.rs +++ b/kframework_ffi/src/kllvm/pattern.rs @@ -17,6 +17,10 @@ impl Pattern { pub fn new(s: &str) -> Result::Err> { Pattern::from_str(s) } + + pub fn from_raw(p: *mut ffi::kore_pattern) -> Self { + Self { pattern: p } + } } impl FromStr for Pattern { From e5c0404325242b54f3dccac3b10816a5bd134156 Mon Sep 17 00:00:00 2001 From: Guy Repta <50716988+gtrepta@users.noreply.github.com> Date: Wed, 29 Apr 2026 12:03:32 -0500 Subject: [PATCH 06/13] fix: Use the marshaller in the fuzz target BUG: The fuzzer has a memory leak now --- examples/fuzzer/src/main.rs | 59 +++++++++++++++++++++++----------- examples/fuzzer/src/marshal.rs | 38 ++++++++++++++-------- 2 files changed, 64 insertions(+), 33 deletions(-) diff --git a/examples/fuzzer/src/main.rs b/examples/fuzzer/src/main.rs index e0c2be9..4624052 100644 --- a/examples/fuzzer/src/main.rs +++ b/examples/fuzzer/src/main.rs @@ -2,7 +2,7 @@ use std::panic; use arbitrary::{Arbitrary, Unstructured}; use honggfuzz::fuzz; -use kframework::kore::{App, Parser, Pattern, SymbolId}; +use kframework::kore::{App, Id, Parser, Pattern, Sort, SymbolId}; use kframework_ffi::kllvm; mod marshal; @@ -23,26 +23,43 @@ impl Arbitrary<'_> for FuzzInput { } } -/// Hardcoded kore strings for assembling the initial configuration -const PREFIX: &str = r#" +/// Hardcoded kore string for the initial configuration +const INIT_CONFIG: &str = r#" Lbl'-LT-'generatedTop'-GT-'{}( Lbl'-LT-'T'-GT-'{}( Lbl'-LT-'k'-GT-'{}( kseq{}(inj{SortPgm{}, SortKItem{}}( - Lblinit'Unds'fuzz{}(\dv{SortInt{}}(""#; -const MIDFIX: &str = r#""),\dv{SortInt{}}(""#; -const POSTFIX: &str = r#""))), dotk{}())), Lbl'-LT-'state'-GT-'{}(Lbl'Stop'Map{}())), Lbl'-LT-'generatedCounter'-GT-'{}(\dv{SortInt{}}("0")))"#; - -impl From for String { - /// Build the kore string to send off to kllvm's parser - fn from(input: FuzzInput) -> String { - let field1_str = input.field1.to_string(); - let field2_str = input.field2.to_string(); - - format!( - "{}{}{}{}{}", - PREFIX, field1_str, MIDFIX, field2_str, POSTFIX + Lblinit'Unds'fuzz{}( + FIELD1:SortInt, + FIELD2:SortInt ) + ), + dotk{}()) + ), + Lbl'-LT-'state'-GT-'{}(Lbl'Stop'Map{}())), + Lbl'-LT-'generatedCounter'-GT-'{}(\dv{SortInt{}}("0")) +)"#; + +impl VarHandler for FuzzInput { + fn substitute(&mut self, name: &str) -> Result { + let int_sort = Sort::App { + id: Id::new("SortInt".to_string()).unwrap(), + args: vec![], + }; + match name { + "FIELD1" => Ok(Pattern::Dv { + sort: int_sort, + value: self.field1.to_string().into(), + }), + "FIELD2" => Ok(Pattern::Dv { + sort: int_sort, + value: self.field2.to_string().into(), + }), + _ => Err(MarshalError::UnknownVar(format!( + "Unrecognized variable: {}", + name + ))), + } } } @@ -54,6 +71,10 @@ fn main() { kllvm::free_all_memory(); })); + let kore_pattern = Parser::new(INIT_CONFIG).unwrap().pattern().unwrap(); + + let mut marshaller: Marshaller = Marshaller::new(None); + loop { fuzz!(|seed: &[u8]| { let mut u = Unstructured::new(seed); @@ -61,10 +82,10 @@ fn main() { panic!("Failed to generate input from seed"); }; + marshaller.set_handler(input); + // Build the initial config, execute it, retrieve the final config kore string - let pattern_string: String = input.clone().into(); - let pattern: kllvm::Pattern = - pattern_string.parse().expect("Failed parsing kore string"); + let pattern = marshaller.marshal(&kore_pattern).unwrap(); let mut block: kllvm::Block = pattern.into(); block.take_steps(-1); diff --git a/examples/fuzzer/src/marshal.rs b/examples/fuzzer/src/marshal.rs index d175e63..210eece 100644 --- a/examples/fuzzer/src/marshal.rs +++ b/examples/fuzzer/src/marshal.rs @@ -41,11 +41,11 @@ impl Drop for OwnedSort { pub struct Marshaller { symbols: HashMap, subtrees: HashMap<*const Pattern, OwnedPattern>, - handler: H, + handler: Option, } impl Marshaller { - pub fn new(handler: H) -> Self { + pub fn new(handler: Option) -> Self { Self { symbols: HashMap::new(), subtrees: HashMap::new(), @@ -54,7 +54,7 @@ impl Marshaller { } pub fn marshal(&mut self, root: &Pattern) -> Result { - let (raw, _var_free) = self.marshal_node(root)?; + let (raw, _var_free) = self.marshal_node(root, false)?; // If the root happened to be fully var-free it now lives in the // cache; pop it so kllvm::Pattern's Drop owns the only reference // and we don't double-free. @@ -62,18 +62,27 @@ impl Marshaller { Ok(kllvm::Pattern::from_raw(raw)) } + pub fn set_handler(&mut self, h: H) { + self.handler = Some(h); + } + fn marshal_node( &mut self, p: &Pattern, + was_var: bool, ) -> Result<(*mut ffi::kore_pattern, bool), MarshalError> { if let Some(owned) = self.subtrees.get(&(p as *const Pattern)) { return Ok((owned.0, true)); } - match p { + let res = match p { Pattern::Var(v) => { - let sub = self.handler.substitute(v.id.as_str())?; - let (ptr, _) = self.marshal_node(&sub)?; + let handler = self + .handler + .as_mut() + .ok_or_else(|| MarshalError::UnknownVar(v.id.as_str().into()))?; + let sub = handler.substitute(v.id.as_str())?; + let (ptr, _) = self.marshal_node(&sub, true)?; Ok((ptr, false)) } @@ -81,14 +90,20 @@ impl Marshaller { let sort_owned = build_sort(sort)?; let c_val = CString::new(value.0.as_str()).map_err(|_| MarshalError::Cstring)?; let ptr = unsafe { ffi::kore_pattern_new_token(c_val.as_ptr(), sort_owned.0) }; - self.subtrees.insert(p as *const Pattern, OwnedPattern(ptr)); Ok((ptr, true)) } Pattern::App(app) => self.marshal_app(p, app), other => Err(MarshalError::Unsupported(variant_name(other))), - } + }; + + res.inspect(|(ptr, var_free)| { + if !was_var && *var_free { + self.subtrees + .insert(p as *const Pattern, OwnedPattern(*ptr)); + } + }) } fn marshal_app( @@ -101,7 +116,7 @@ impl Marshaller { let mut var_free = true; for arg in &app.args { - let (child, child_vf) = self.marshal_node(arg)?; + let (child, child_vf) = self.marshal_node(arg, false)?; unsafe { ffi::kore_composite_pattern_add_argument(pat, child) }; var_free &= child_vf; // Do NOT free `child`. If borrowed (cache hit), the cache owns @@ -112,10 +127,6 @@ impl Marshaller { // (~16 bytes/node, bounded by tree size, not iteration count). } - if var_free { - self.subtrees - .insert(p as *const Pattern, OwnedPattern(pat)); - } Ok((pat, var_free)) } @@ -183,4 +194,3 @@ fn variant_name(p: &Pattern) -> &'static str { Pattern::Rewrites { .. } => "Rewrites", } } - From b8bd1c1607edb2f42e6a812ba6aa47c421bd2df2 Mon Sep 17 00:00:00 2001 From: Guy Repta <50716988+gtrepta@users.noreply.github.com> Date: Wed, 29 Apr 2026 13:40:26 -0500 Subject: [PATCH 07/13] fix: Remove memory leak Assisted-by: Claude Code --- examples/fuzzer/src/marshal.rs | 67 +++++++++++++++++++++------------- 1 file changed, 41 insertions(+), 26 deletions(-) diff --git a/examples/fuzzer/src/marshal.rs b/examples/fuzzer/src/marshal.rs index 210eece..43ac3d2 100644 --- a/examples/fuzzer/src/marshal.rs +++ b/examples/fuzzer/src/marshal.rs @@ -54,11 +54,16 @@ impl Marshaller { } pub fn marshal(&mut self, root: &Pattern) -> Result { - let (raw, _var_free) = self.marshal_node(root, false)?; + let (raw, _var_free, fresh) = self.marshal_node(root, false)?; // If the root happened to be fully var-free it now lives in the - // cache; pop it so kllvm::Pattern's Drop owns the only reference - // and we don't double-free. - self.subtrees.remove(&(root as *const Pattern)); + // cache; transfer ownership out so kllvm::Pattern's Drop is the + // sole freer. mem::forget the OwnedPattern we popped — the cache + // no longer owns it; kllvm::Pattern does. + if !fresh { + if let Some(owned) = self.subtrees.remove(&(root as *const Pattern)) { + std::mem::forget(owned); + } + } Ok(kllvm::Pattern::from_raw(raw)) } @@ -66,13 +71,18 @@ impl Marshaller { self.handler = Some(h); } + /// Returns `(ptr, var_free, fresh)`. + /// - `fresh = true` => caller owns the C wrapper and must free it + /// (typically right after handing it to `add_argument`, since the + /// parent already took a `shared_ptr` copy of the underlying AST). + /// - `fresh = false` => `ptr` is borrowed (lives in `self.subtrees`). fn marshal_node( &mut self, p: &Pattern, was_var: bool, - ) -> Result<(*mut ffi::kore_pattern, bool), MarshalError> { + ) -> Result<(*mut ffi::kore_pattern, bool, bool), MarshalError> { if let Some(owned) = self.subtrees.get(&(p as *const Pattern)) { - return Ok((owned.0, true)); + return Ok((owned.0, true, false)); } let res = match p { @@ -82,52 +92,57 @@ impl Marshaller { .as_mut() .ok_or_else(|| MarshalError::UnknownVar(v.id.as_str().into()))?; let sub = handler.substitute(v.id.as_str())?; - let (ptr, _) = self.marshal_node(&sub, true)?; - Ok((ptr, false)) + let (ptr, _, fresh) = self.marshal_node(&sub, true)?; + Ok((ptr, false, fresh)) } Pattern::Dv { sort, value } => { let sort_owned = build_sort(sort)?; let c_val = CString::new(value.0.as_str()).map_err(|_| MarshalError::Cstring)?; let ptr = unsafe { ffi::kore_pattern_new_token(c_val.as_ptr(), sort_owned.0) }; - Ok((ptr, true)) + Ok((ptr, true, true)) } - Pattern::App(app) => self.marshal_app(p, app), + Pattern::App(app) => self.marshal_app(app), other => Err(MarshalError::Unsupported(variant_name(other))), }; - res.inspect(|(ptr, var_free)| { - if !was_var && *var_free { - self.subtrees - .insert(p as *const Pattern, OwnedPattern(*ptr)); - } - }) + let (ptr, var_free, mut fresh) = res?; + if !was_var && var_free && fresh { + self.subtrees + .insert(p as *const Pattern, OwnedPattern(ptr)); + fresh = false; + } + Ok((ptr, var_free, fresh)) } fn marshal_app( &mut self, - p: &Pattern, app: &App, - ) -> Result<(*mut ffi::kore_pattern, bool), MarshalError> { + ) -> Result<(*mut ffi::kore_pattern, bool, bool), MarshalError> { let sym = self.intern_symbol(&app.symbol, &app.sorts)?; let pat = unsafe { ffi::kore_composite_pattern_from_symbol(sym) }; let mut var_free = true; for arg in &app.args { - let (child, child_vf) = self.marshal_node(arg, false)?; + let (child, child_vf, child_fresh) = self.marshal_node(arg, false)?; unsafe { ffi::kore_composite_pattern_add_argument(pat, child) }; + // add_argument copied the inner shared_ptr; the underlying AST + // is now retained by `pat`. A fresh child wrapper is no longer + // needed and must be freed here, otherwise every var-bearing + // intermediate node leaks its C wrapper struct each iteration + // (which was the dominant slow drip in the earlier MVP). + // Borrowed (cached) children must NOT be freed. + if child_fresh { + unsafe { ffi::kore_pattern_free(child) }; + } var_free &= child_vf; - // Do NOT free `child`. If borrowed (cache hit), the cache owns - // it. If freshly allocated under a Var-bearing subtree, the C++ - // side took a shared_ptr copy when add_argument was called, so - // `pat`'s eventual free reclaims the AST. The C wrapper struct - // around `child` in that latter case is intentionally leaked - // (~16 bytes/node, bounded by tree size, not iteration count). } - Ok((pat, var_free)) + // `pat` is freshly allocated in this call. Caller (or + // marshal_node's caching step) decides what to do with it. + Ok((pat, var_free, true)) } fn intern_symbol( From 342023d226bd6deb663a7d84a531f1b87ab7c4a9 Mon Sep 17 00:00:00 2001 From: Guy Repta <50716988+gtrepta@users.noreply.github.com> Date: Wed, 29 Apr 2026 14:09:28 -0500 Subject: [PATCH 08/13] cleanup: Move Marshaller into kframework_ffi Assisted-by: Claude Code --- Cargo.lock | 1 + examples/fuzzer/src/main.rs | 5 +- examples/fuzzer/src/marshal.rs | 211 ---------------------------- kframework_ffi/Cargo.toml | 1 + kframework_ffi/src/kllvm.rs | 6 + kframework_ffi/src/kllvm/marshal.rs | 193 +++++++++++++++++++++++++ kframework_ffi/src/kllvm/pattern.rs | 27 +++- kframework_ffi/src/kllvm/sort.rs | 33 +++++ kframework_ffi/src/kllvm/symbol.rs | 33 +++++ 9 files changed, 293 insertions(+), 217 deletions(-) delete mode 100644 examples/fuzzer/src/marshal.rs create mode 100644 kframework_ffi/src/kllvm/marshal.rs create mode 100644 kframework_ffi/src/kllvm/sort.rs create mode 100644 kframework_ffi/src/kllvm/symbol.rs diff --git a/Cargo.lock b/Cargo.lock index 92704d2..08f94fc 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -222,6 +222,7 @@ name = "kframework_ffi" version = "0.1.0" dependencies = [ "bindgen", + "kframework", "libc", ] diff --git a/examples/fuzzer/src/main.rs b/examples/fuzzer/src/main.rs index 4624052..7674b65 100644 --- a/examples/fuzzer/src/main.rs +++ b/examples/fuzzer/src/main.rs @@ -4,10 +4,7 @@ use arbitrary::{Arbitrary, Unstructured}; use honggfuzz::fuzz; use kframework::kore::{App, Id, Parser, Pattern, Sort, SymbolId}; use kframework_ffi::kllvm; - -mod marshal; -#[allow(unused_imports)] -use marshal::{MarshalError, Marshaller, VarHandler}; +use kframework_ffi::kllvm::{MarshalError, Marshaller, VarHandler}; #[derive(Clone, Copy)] struct FuzzInput { diff --git a/examples/fuzzer/src/marshal.rs b/examples/fuzzer/src/marshal.rs deleted file mode 100644 index 43ac3d2..0000000 --- a/examples/fuzzer/src/marshal.rs +++ /dev/null @@ -1,211 +0,0 @@ -#![allow(unused)] - -use kframework::kore::{App, Pattern, Sort, SymbolId}; -use kframework_ffi::kllvm; -use kframework_ffi::kllvm::ffi; -use std::collections::HashMap; -use std::ffi::CString; - -#[derive(Debug)] -pub enum MarshalError { - Unsupported(&'static str), - UnknownVar(String), - Cstring, -} - -pub trait VarHandler { - fn substitute(&mut self, name: &str) -> Result; -} - -struct OwnedSymbol(*mut ffi::kore_symbol); -impl Drop for OwnedSymbol { - fn drop(&mut self) { - unsafe { ffi::kore_symbol_free(self.0) } - } -} - -struct OwnedPattern(*mut ffi::kore_pattern); -impl Drop for OwnedPattern { - fn drop(&mut self) { - unsafe { ffi::kore_pattern_free(self.0) } - } -} - -struct OwnedSort(*mut ffi::kore_sort); -impl Drop for OwnedSort { - fn drop(&mut self) { - unsafe { ffi::kore_sort_free(self.0) } - } -} - -pub struct Marshaller { - symbols: HashMap, - subtrees: HashMap<*const Pattern, OwnedPattern>, - handler: Option, -} - -impl Marshaller { - pub fn new(handler: Option) -> Self { - Self { - symbols: HashMap::new(), - subtrees: HashMap::new(), - handler, - } - } - - pub fn marshal(&mut self, root: &Pattern) -> Result { - let (raw, _var_free, fresh) = self.marshal_node(root, false)?; - // If the root happened to be fully var-free it now lives in the - // cache; transfer ownership out so kllvm::Pattern's Drop is the - // sole freer. mem::forget the OwnedPattern we popped — the cache - // no longer owns it; kllvm::Pattern does. - if !fresh { - if let Some(owned) = self.subtrees.remove(&(root as *const Pattern)) { - std::mem::forget(owned); - } - } - Ok(kllvm::Pattern::from_raw(raw)) - } - - pub fn set_handler(&mut self, h: H) { - self.handler = Some(h); - } - - /// Returns `(ptr, var_free, fresh)`. - /// - `fresh = true` => caller owns the C wrapper and must free it - /// (typically right after handing it to `add_argument`, since the - /// parent already took a `shared_ptr` copy of the underlying AST). - /// - `fresh = false` => `ptr` is borrowed (lives in `self.subtrees`). - fn marshal_node( - &mut self, - p: &Pattern, - was_var: bool, - ) -> Result<(*mut ffi::kore_pattern, bool, bool), MarshalError> { - if let Some(owned) = self.subtrees.get(&(p as *const Pattern)) { - return Ok((owned.0, true, false)); - } - - let res = match p { - Pattern::Var(v) => { - let handler = self - .handler - .as_mut() - .ok_or_else(|| MarshalError::UnknownVar(v.id.as_str().into()))?; - let sub = handler.substitute(v.id.as_str())?; - let (ptr, _, fresh) = self.marshal_node(&sub, true)?; - Ok((ptr, false, fresh)) - } - - Pattern::Dv { sort, value } => { - let sort_owned = build_sort(sort)?; - let c_val = CString::new(value.0.as_str()).map_err(|_| MarshalError::Cstring)?; - let ptr = unsafe { ffi::kore_pattern_new_token(c_val.as_ptr(), sort_owned.0) }; - Ok((ptr, true, true)) - } - - Pattern::App(app) => self.marshal_app(app), - - other => Err(MarshalError::Unsupported(variant_name(other))), - }; - - let (ptr, var_free, mut fresh) = res?; - if !was_var && var_free && fresh { - self.subtrees - .insert(p as *const Pattern, OwnedPattern(ptr)); - fresh = false; - } - Ok((ptr, var_free, fresh)) - } - - fn marshal_app( - &mut self, - app: &App, - ) -> Result<(*mut ffi::kore_pattern, bool, bool), MarshalError> { - let sym = self.intern_symbol(&app.symbol, &app.sorts)?; - let pat = unsafe { ffi::kore_composite_pattern_from_symbol(sym) }; - - let mut var_free = true; - for arg in &app.args { - let (child, child_vf, child_fresh) = self.marshal_node(arg, false)?; - unsafe { ffi::kore_composite_pattern_add_argument(pat, child) }; - // add_argument copied the inner shared_ptr; the underlying AST - // is now retained by `pat`. A fresh child wrapper is no longer - // needed and must be freed here, otherwise every var-bearing - // intermediate node leaks its C wrapper struct each iteration - // (which was the dominant slow drip in the earlier MVP). - // Borrowed (cached) children must NOT be freed. - if child_fresh { - unsafe { ffi::kore_pattern_free(child) }; - } - var_free &= child_vf; - } - - // `pat` is freshly allocated in this call. Caller (or - // marshal_node's caching step) decides what to do with it. - Ok((pat, var_free, true)) - } - - fn intern_symbol( - &mut self, - id: &SymbolId, - sorts: &[Sort], - ) -> Result<*mut ffi::kore_symbol, MarshalError> { - let key = id.as_str().to_owned(); - if let Some(owned) = self.symbols.get(&key) { - return Ok(owned.0); - } - let c_name = CString::new(id.as_str()).map_err(|_| MarshalError::Cstring)?; - let sym = unsafe { ffi::kore_symbol_new(c_name.as_ptr()) }; - for sort in sorts { - let s = build_sort(sort)?; - unsafe { ffi::kore_symbol_add_formal_argument(sym, s.0) }; - // s drops -> kore_sort_free; the symbol kept a shared_ptr copy. - } - self.symbols.insert(key, OwnedSymbol(sym)); - Ok(sym) - } -} - -fn build_sort(s: &Sort) -> Result { - match s { - Sort::App { id, args } => { - let c_name = CString::new(id.as_str()).map_err(|_| MarshalError::Cstring)?; - let raw = unsafe { ffi::kore_composite_sort_new(c_name.as_ptr()) }; - for arg in args { - let arg_sort = build_sort(arg)?; - unsafe { ffi::kore_composite_sort_add_argument(raw, arg_sort.0) }; - } - Ok(OwnedSort(raw)) - } - Sort::Var(_) => Err(MarshalError::Unsupported("Sort::Var")), - } -} - -fn variant_name(p: &Pattern) -> &'static str { - match p { - Pattern::Var(_) => "Var", - Pattern::SVar(_) => "SVar", - Pattern::Str(_) => "Str", - Pattern::App(_) => "App", - Pattern::LeftAssoc(_) => "LeftAssoc", - Pattern::RightAssoc(_) => "RightAssoc", - Pattern::Top(_) => "Top", - Pattern::Bottom(_) => "Bottom", - Pattern::Dv { .. } => "Dv", - Pattern::Not { .. } => "Not", - Pattern::Implies { .. } => "Implies", - Pattern::Iff { .. } => "Iff", - Pattern::And { .. } => "And", - Pattern::Or { .. } => "Or", - Pattern::Exists { .. } => "Exists", - Pattern::Forall { .. } => "Forall", - Pattern::Mu { .. } => "Mu", - Pattern::Nu { .. } => "Nu", - Pattern::Ceil { .. } => "Ceil", - Pattern::Floor { .. } => "Floor", - Pattern::Equals { .. } => "Equals", - Pattern::In { .. } => "In", - Pattern::Next { .. } => "Next", - Pattern::Rewrites { .. } => "Rewrites", - } -} diff --git a/kframework_ffi/Cargo.toml b/kframework_ffi/Cargo.toml index eb9cc65..9dc4d38 100644 --- a/kframework_ffi/Cargo.toml +++ b/kframework_ffi/Cargo.toml @@ -5,6 +5,7 @@ edition.workspace = true rust-version.workspace = true [dependencies] +kframework = { path = "../kframework" } libc = "0.2" [build-dependencies] diff --git a/kframework_ffi/src/kllvm.rs b/kframework_ffi/src/kllvm.rs index c492f3b..e7a4c42 100644 --- a/kframework_ffi/src/kllvm.rs +++ b/kframework_ffi/src/kllvm.rs @@ -27,7 +27,10 @@ //! ``` mod block; pub mod ffi; +mod marshal; mod pattern; +mod sort; +mod symbol; pub fn init() { unsafe { @@ -41,4 +44,7 @@ pub fn free_all_memory() { } pub use self::block::Block; +pub use self::marshal::{MarshalError, Marshaller, VarHandler}; pub use self::pattern::Pattern; +pub use self::sort::Sort; +pub use self::symbol::Symbol; diff --git a/kframework_ffi/src/kllvm/marshal.rs b/kframework_ffi/src/kllvm/marshal.rs new file mode 100644 index 0000000..d3dd19c --- /dev/null +++ b/kframework_ffi/src/kllvm/marshal.rs @@ -0,0 +1,193 @@ +use super::ffi; +use super::{Pattern, Sort, Symbol}; +use kframework::kore; +use std::collections::HashMap; + +#[derive(Debug)] +pub enum MarshalError { + Unsupported(&'static str), + UnknownVar(String), + Cstring, +} + +pub trait VarHandler { + fn substitute(&mut self, name: &str) -> Result; +} + +pub struct Marshaller { + /// SymbolId string -> Symbol. Each Symbol's underlying C++ AST is held + /// alive via `shared_ptr` by every pattern that referenced it during + /// construction; dropping the Symbol wrapper here only frees the C + /// struct. + symbols: HashMap, + /// `*const kore::Pattern` (pointer identity into the caller-owned + /// source template) -> Pattern. Only var-free subtrees of the source + /// template are cached; substituted-Var subtrees live on the stack + /// and must not be cached. + subtrees: HashMap<*const kore::Pattern, Pattern>, + handler: Option, +} + +impl Marshaller { + pub fn new(handler: Option) -> Self { + Self { + symbols: HashMap::new(), + subtrees: HashMap::new(), + handler, + } + } + + pub fn set_handler(&mut self, h: H) { + self.handler = Some(h); + } + + pub fn marshal(&mut self, root: &kore::Pattern) -> Result { + let (raw, _var_free, fresh) = self.marshal_node(root, false)?; + // If the root resolved to a cached pointer (rare for fuzzer + // templates, since they have Vars), transfer ownership out of the + // cache so Pattern's Drop is the sole freer. + if !fresh { + if let Some(owned) = self.subtrees.remove(&(root as *const kore::Pattern)) { + std::mem::forget(owned); + } + } + Ok(Pattern::from_raw(raw)) + } + + /// Returns `(ptr, var_free, fresh)`. + /// - `fresh = true` => caller owns the C wrapper and must free it + /// (typically right after handing it to `add_argument`, since the + /// parent already took a `shared_ptr` copy of the underlying AST). + /// - `fresh = false` => `ptr` is borrowed (lives in `self.subtrees`). + fn marshal_node( + &mut self, + p: &kore::Pattern, + was_var: bool, + ) -> Result<(*mut ffi::kore_pattern, bool, bool), MarshalError> { + if let Some(cached) = self.subtrees.get(&(p as *const kore::Pattern)) { + return Ok((cached.pattern as *mut _, true, false)); + } + + let res = match p { + kore::Pattern::Var(v) => { + let handler = self + .handler + .as_mut() + .ok_or_else(|| MarshalError::UnknownVar(v.id.as_str().into()))?; + let sub = handler.substitute(v.id.as_str())?; + let (ptr, _, fresh) = self.marshal_node(&sub, true)?; + Ok((ptr, false, fresh)) + } + + kore::Pattern::Dv { sort, value } => { + let s = build_sort(sort)?; + let pat = + Pattern::new_token(value.0.as_str(), &s).map_err(|_| MarshalError::Cstring)?; + let raw = pat.pattern as *mut _; + // Keep tracking the raw pointer manually until the + // post-match step decides whether to cache or hand back. + std::mem::forget(pat); + Ok((raw, true, true)) + } + + kore::Pattern::App(app) => self.marshal_app(app), + + other => Err(MarshalError::Unsupported(variant_name(other))), + }; + + let (ptr, var_free, mut fresh) = res?; + if !was_var && var_free && fresh { + self.subtrees + .insert(p as *const kore::Pattern, Pattern::from_raw(ptr)); + fresh = false; + } + Ok((ptr, var_free, fresh)) + } + + fn marshal_app( + &mut self, + app: &kore::App, + ) -> Result<(*mut ffi::kore_pattern, bool, bool), MarshalError> { + let sym_ptr = self.intern_symbol(&app.symbol, &app.sorts)?; + let pat = unsafe { ffi::kore_composite_pattern_from_symbol(sym_ptr) }; + + let mut var_free = true; + for arg in &app.args { + let (child, child_vf, child_fresh) = self.marshal_node(arg, false)?; + unsafe { ffi::kore_composite_pattern_add_argument(pat, child) }; + // add_argument copied the inner shared_ptr; the underlying AST + // is now retained by `pat`. A fresh child wrapper is no longer + // needed and must be freed here, otherwise every var-bearing + // intermediate node leaks its C wrapper struct each iteration. + // Borrowed (cached) children must NOT be freed. + if child_fresh { + unsafe { ffi::kore_pattern_free(child) }; + } + var_free &= child_vf; + } + + Ok((pat, var_free, true)) + } + + fn intern_symbol( + &mut self, + id: &kore::SymbolId, + sorts: &[kore::Sort], + ) -> Result<*mut ffi::kore_symbol, MarshalError> { + let key = id.as_str().to_owned(); + if let Some(sym) = self.symbols.get(&key) { + return Ok(sym.symbol); + } + let mut sym = Symbol::new(id.as_str()).map_err(|_| MarshalError::Cstring)?; + for sort in sorts { + let s = build_sort(sort)?; + sym.add_formal_argument(&s); + } + let raw = sym.symbol; + self.symbols.insert(key, sym); + Ok(raw) + } +} + +fn build_sort(s: &kore::Sort) -> Result { + match s { + kore::Sort::App { id, args } => { + let mut sort = Sort::new_composite(id.as_str()).map_err(|_| MarshalError::Cstring)?; + for arg in args { + let arg_sort = build_sort(arg)?; + sort.add_argument(&arg_sort); + } + Ok(sort) + } + kore::Sort::Var(_) => Err(MarshalError::Unsupported("Sort::Var")), + } +} + +fn variant_name(p: &kore::Pattern) -> &'static str { + match p { + kore::Pattern::Var(_) => "Var", + kore::Pattern::SVar(_) => "SVar", + kore::Pattern::Str(_) => "Str", + kore::Pattern::App(_) => "App", + kore::Pattern::LeftAssoc(_) => "LeftAssoc", + kore::Pattern::RightAssoc(_) => "RightAssoc", + kore::Pattern::Top(_) => "Top", + kore::Pattern::Bottom(_) => "Bottom", + kore::Pattern::Dv { .. } => "Dv", + kore::Pattern::Not { .. } => "Not", + kore::Pattern::Implies { .. } => "Implies", + kore::Pattern::Iff { .. } => "Iff", + kore::Pattern::And { .. } => "And", + kore::Pattern::Or { .. } => "Or", + kore::Pattern::Exists { .. } => "Exists", + kore::Pattern::Forall { .. } => "Forall", + kore::Pattern::Mu { .. } => "Mu", + kore::Pattern::Nu { .. } => "Nu", + kore::Pattern::Ceil { .. } => "Ceil", + kore::Pattern::Floor { .. } => "Floor", + kore::Pattern::Equals { .. } => "Equals", + kore::Pattern::In { .. } => "In", + kore::Pattern::Next { .. } => "Next", + kore::Pattern::Rewrites { .. } => "Rewrites", + } +} diff --git a/kframework_ffi/src/kllvm/pattern.rs b/kframework_ffi/src/kllvm/pattern.rs index fdb64fb..be565e6 100644 --- a/kframework_ffi/src/kllvm/pattern.rs +++ b/kframework_ffi/src/kllvm/pattern.rs @@ -1,7 +1,7 @@ use libc; use super::ffi; -use super::Block; -use std::ffi::{c_void, CStr, CString}; +use super::{Block, Sort, Symbol}; +use std::ffi::{c_void, CStr, CString, NulError}; use std::fmt; use std::str::FromStr; @@ -21,6 +21,29 @@ impl Pattern { pub fn from_raw(p: *mut ffi::kore_pattern) -> Self { Self { pattern: p } } + + /// Build a fresh composite pattern from a symbol. The symbol is borrowed; + /// the C++ side keeps a `shared_ptr` to its underlying AST. + pub fn from_symbol(sym: &Symbol) -> Self { + let raw = unsafe { ffi::kore_composite_pattern_from_symbol(sym.symbol) }; + Self { pattern: raw } + } + + /// Build a fresh token (Dv) pattern of the given sort. + pub fn new_token(value: &str, sort: &Sort) -> Result { + let c_val = CString::new(value)?; + let raw = unsafe { ffi::kore_pattern_new_token(c_val.as_ptr(), sort.sort) }; + Ok(Self { pattern: raw }) + } + + /// Append `child` as an argument to this composite pattern. The child + /// is borrowed; the C++ side takes a `shared_ptr` copy of its AST, so + /// the caller may continue to use or drop `child` afterwards. + pub fn add_argument(&mut self, child: &Pattern) { + unsafe { + ffi::kore_composite_pattern_add_argument(self.pattern as *mut _, child.pattern) + }; + } } impl FromStr for Pattern { diff --git a/kframework_ffi/src/kllvm/sort.rs b/kframework_ffi/src/kllvm/sort.rs new file mode 100644 index 0000000..64e70f0 --- /dev/null +++ b/kframework_ffi/src/kllvm/sort.rs @@ -0,0 +1,33 @@ +use super::ffi; +use std::ffi::{CString, NulError}; + +/// Safe wrapper around a `kore_sort *`. Drops via `kore_sort_free`. +/// +/// The underlying C++ AST node is held via a `shared_ptr` on the C++ side; +/// dropping this wrapper only releases the C struct that points to it, so +/// any patterns or symbols that referenced this sort during construction +/// remain valid. +pub struct Sort { + pub(crate) sort: *mut ffi::kore_sort, +} + +impl Sort { + /// Build a fresh composite sort by name (e.g. "SortInt"). Add any sort + /// arguments with [`add_argument`]. + pub fn new_composite(name: &str) -> Result { + let c_name = CString::new(name)?; + let raw = unsafe { ffi::kore_composite_sort_new(c_name.as_ptr()) }; + Ok(Self { sort: raw }) + } + + /// Append a sort argument (for parameterised sorts). + pub fn add_argument(&mut self, arg: &Sort) { + unsafe { ffi::kore_composite_sort_add_argument(self.sort, arg.sort) }; + } +} + +impl Drop for Sort { + fn drop(&mut self) { + unsafe { ffi::kore_sort_free(self.sort) }; + } +} diff --git a/kframework_ffi/src/kllvm/symbol.rs b/kframework_ffi/src/kllvm/symbol.rs new file mode 100644 index 0000000..1255ab0 --- /dev/null +++ b/kframework_ffi/src/kllvm/symbol.rs @@ -0,0 +1,33 @@ +use super::ffi; +use super::Sort; +use std::ffi::{CString, NulError}; + +/// Safe wrapper around a `kore_symbol *`. Drops via `kore_symbol_free`. +/// +/// The underlying C++ symbol is held via a `shared_ptr`; dropping this +/// wrapper only releases the C struct, so patterns built from this symbol +/// stay valid for the rest of their lifetime. +pub struct Symbol { + pub(crate) symbol: *mut ffi::kore_symbol, +} + +impl Symbol { + /// Build a fresh symbol by name (e.g. `Lblfoo`). Add any formal sort + /// arguments with [`add_formal_argument`]. + pub fn new(name: &str) -> Result { + let c_name = CString::new(name)?; + let sym = unsafe { ffi::kore_symbol_new(c_name.as_ptr()) }; + Ok(Self { symbol: sym }) + } + + /// Append one formal sort argument to the symbol's signature. + pub fn add_formal_argument(&mut self, sort: &Sort) { + unsafe { ffi::kore_symbol_add_formal_argument(self.symbol, sort.sort) }; + } +} + +impl Drop for Symbol { + fn drop(&mut self) { + unsafe { ffi::kore_symbol_free(self.symbol) }; + } +} From 318b1ff61fb843a4fb4bfd47fa02d45c7dabfbca Mon Sep 17 00:00:00 2001 From: Guy Repta <50716988+gtrepta@users.noreply.github.com> Date: Wed, 29 Apr 2026 20:28:47 -0500 Subject: [PATCH 09/13] cleanup: Add named semantics for bool flags in function paramaters/return values. Remove unsafe blocks and rely on the safe datatypes. Assisted-by: Claude Code --- kframework_ffi/src/kllvm/marshal.rs | 203 +++++++++++++++++----------- kframework_ffi/src/kllvm/pattern.rs | 6 +- 2 files changed, 124 insertions(+), 85 deletions(-) diff --git a/kframework_ffi/src/kllvm/marshal.rs b/kframework_ffi/src/kllvm/marshal.rs index d3dd19c..f1e7dcf 100644 --- a/kframework_ffi/src/kllvm/marshal.rs +++ b/kframework_ffi/src/kllvm/marshal.rs @@ -1,6 +1,6 @@ -use super::ffi; use super::{Pattern, Sort, Symbol}; use kframework::kore; +use std::collections::hash_map::Entry; use std::collections::HashMap; #[derive(Debug)] @@ -14,11 +14,34 @@ pub trait VarHandler { fn substitute(&mut self, name: &str) -> Result; } +/// Whether this marshalling is allowed to insert into the subtree cache. +/// `Disabled` propagates downward through Var-substituted subtrees, whose +/// Patterns live on the caller's stack and would leave dangling pointer +/// keys if cached. +#[derive(Clone, Copy)] +enum Caching { + Allowed, + Disabled, +} + +/// Result of marshalling one node. Encodes ownership and var-freedom so +/// callers can determine whether or not to cache it. The lifetime ties +/// `Cached` to the `&mut self` borrow of the marshaller. +enum Marshalled<'a> { + /// Already in `self.subtrees` (cache hit, or just inserted). Caller + /// must NOT free; by construction the source subtree was var-free. + Cached(&'a Pattern), + /// Freshly built. RAII frees the C wrapper when the `Pattern` drops, + /// after the parent's `add_argument` has copied the inner shared_ptr. + /// `var_free` reports whether the *source* subtree was Var-free, so + /// an ancestor App can decide its own cacheability. + Fresh { pattern: Pattern, var_free: bool }, +} + pub struct Marshaller { - /// SymbolId string -> Symbol. Each Symbol's underlying C++ AST is held - /// alive via `shared_ptr` by every pattern that referenced it during - /// construction; dropping the Symbol wrapper here only frees the C - /// struct. + /// SymbolId string -> Symbol. The underlying C++ symbol is held alive + /// via `shared_ptr` by every pattern that referenced it; dropping the + /// Symbol wrapper here only frees the C struct. symbols: HashMap, /// `*const kore::Pattern` (pointer identity into the caller-owned /// source template) -> Pattern. Only var-free subtrees of the source @@ -42,110 +65,128 @@ impl Marshaller { } pub fn marshal(&mut self, root: &kore::Pattern) -> Result { - let (raw, _var_free, fresh) = self.marshal_node(root, false)?; - // If the root resolved to a cached pointer (rare for fuzzer - // templates, since they have Vars), transfer ownership out of the - // cache so Pattern's Drop is the sole freer. - if !fresh { - if let Some(owned) = self.subtrees.remove(&(root as *const kore::Pattern)) { - std::mem::forget(owned); - } + // Drop the Marshalled<'_> (and any borrow it holds on self.subtrees) + // before mutating the cache. + match self.marshal_node(root, Caching::Allowed)? { + Marshalled::Fresh { pattern, .. } => return Ok(pattern), + Marshalled::Cached(_) => Ok(self + .subtrees + .remove(&(root as *const kore::Pattern)) + .expect("Cached root must be present in the cache")), } - Ok(Pattern::from_raw(raw)) } - /// Returns `(ptr, var_free, fresh)`. - /// - `fresh = true` => caller owns the C wrapper and must free it - /// (typically right after handing it to `add_argument`, since the - /// parent already took a `shared_ptr` copy of the underlying AST). - /// - `fresh = false` => `ptr` is borrowed (lives in `self.subtrees`). fn marshal_node( &mut self, p: &kore::Pattern, - was_var: bool, - ) -> Result<(*mut ffi::kore_pattern, bool, bool), MarshalError> { - if let Some(cached) = self.subtrees.get(&(p as *const kore::Pattern)) { - return Ok((cached.pattern as *mut _, true, false)); + caching: Caching, + ) -> Result, MarshalError> { + // Probe with contains_key (short borrow) so the cache-miss path + // doesn't carry an immutable borrow on self.subtrees with the + // function's return lifetime — current NLL can't see the + // conditional control flow without that. + let key = p as *const kore::Pattern; + if matches!(caching, Caching::Allowed) && self.subtrees.contains_key(&key) { + return Ok(Marshalled::Cached(self.subtrees.get(&key).unwrap())); } + let (pattern, var_free) = match p { + kore::Pattern::Var(v) => self.marshal_var(v)?, + kore::Pattern::Dv { sort, value } => self.marshal_dv(sort, value)?, + kore::Pattern::App(app) => self.marshal_app(app, caching)?, + other => return Err(MarshalError::Unsupported(variant_name(other))), + }; + Ok(self.maybe_cache(p, pattern, var_free, caching)) + } - let res = match p { - kore::Pattern::Var(v) => { - let handler = self - .handler - .as_mut() - .ok_or_else(|| MarshalError::UnknownVar(v.id.as_str().into()))?; - let sub = handler.substitute(v.id.as_str())?; - let (ptr, _, fresh) = self.marshal_node(&sub, true)?; - Ok((ptr, false, fresh)) - } - - kore::Pattern::Dv { sort, value } => { - let s = build_sort(sort)?; - let pat = - Pattern::new_token(value.0.as_str(), &s).map_err(|_| MarshalError::Cstring)?; - let raw = pat.pattern as *mut _; - // Keep tracking the raw pointer manually until the - // post-match step decides whether to cache or hand back. - std::mem::forget(pat); - Ok((raw, true, true)) - } - - kore::Pattern::App(app) => self.marshal_app(app), - - other => Err(MarshalError::Unsupported(variant_name(other))), + fn marshal_var(&mut self, v: &kore::Var) -> Result<(Pattern, bool), MarshalError> { + let handler = self + .handler + .as_mut() + .ok_or_else(|| MarshalError::UnknownVar(v.id.as_str().into()))?; + let sub = handler.substitute(v.id.as_str())?; + // Caching::Disabled means the recursive call cannot return Cached. + let Marshalled::Fresh { pattern, .. } = self.marshal_node(&sub, Caching::Disabled)? else { + unreachable!("Caching::Disabled cannot return Cached") }; + Ok((pattern, false)) + } - let (ptr, var_free, mut fresh) = res?; - if !was_var && var_free && fresh { - self.subtrees - .insert(p as *const kore::Pattern, Pattern::from_raw(ptr)); - fresh = false; - } - Ok((ptr, var_free, fresh)) + fn marshal_dv( + &mut self, + sort: &kore::Sort, + value: &kore::Str, + ) -> Result<(Pattern, bool), MarshalError> { + let s = build_sort(sort)?; + let pattern = + Pattern::new_token(value.0.as_str(), &s).map_err(|_| MarshalError::Cstring)?; + Ok((pattern, true)) } fn marshal_app( &mut self, app: &kore::App, - ) -> Result<(*mut ffi::kore_pattern, bool, bool), MarshalError> { - let sym_ptr = self.intern_symbol(&app.symbol, &app.sorts)?; - let pat = unsafe { ffi::kore_composite_pattern_from_symbol(sym_ptr) }; + caching: Caching, + ) -> Result<(Pattern, bool), MarshalError> { + // Borrow of `sym` ends after Pattern::from_symbol consumes it, + // freeing self for the recursive marshal_node calls below. + let mut pattern = { + let sym = self.intern_symbol(&app.symbol, &app.sorts)?; + Pattern::from_symbol(sym) + }; let mut var_free = true; for arg in &app.args { - let (child, child_vf, child_fresh) = self.marshal_node(arg, false)?; - unsafe { ffi::kore_composite_pattern_add_argument(pat, child) }; - // add_argument copied the inner shared_ptr; the underlying AST - // is now retained by `pat`. A fresh child wrapper is no longer - // needed and must be freed here, otherwise every var-bearing - // intermediate node leaks its C wrapper struct each iteration. - // Borrowed (cached) children must NOT be freed. - if child_fresh { - unsafe { ffi::kore_pattern_free(child) }; + match self.marshal_node(arg, caching)? { + Marshalled::Cached(child) => pattern.add_argument(child), + Marshalled::Fresh { + pattern: child, + var_free: cvf, + } => { + pattern.add_argument(&child); + // child drops here -> kore_pattern_free + var_free &= cvf; + } } - var_free &= child_vf; } - Ok((pat, var_free, true)) + Ok((pattern, var_free)) + } + + fn maybe_cache( + &mut self, + p: &kore::Pattern, + pattern: Pattern, + var_free: bool, + caching: Caching, + ) -> Marshalled<'_> { + if matches!(caching, Caching::Allowed) && var_free { + Marshalled::Cached( + self.subtrees + .entry(p as *const kore::Pattern) + .or_insert(pattern), + ) + } else { + Marshalled::Fresh { pattern, var_free } + } } fn intern_symbol( &mut self, id: &kore::SymbolId, sorts: &[kore::Sort], - ) -> Result<*mut ffi::kore_symbol, MarshalError> { + ) -> Result<&Symbol, MarshalError> { let key = id.as_str().to_owned(); - if let Some(sym) = self.symbols.get(&key) { - return Ok(sym.symbol); - } - let mut sym = Symbol::new(id.as_str()).map_err(|_| MarshalError::Cstring)?; - for sort in sorts { - let s = build_sort(sort)?; - sym.add_formal_argument(&s); - } - let raw = sym.symbol; - self.symbols.insert(key, sym); - Ok(raw) + Ok(match self.symbols.entry(key) { + Entry::Occupied(o) => o.into_mut(), + Entry::Vacant(v) => { + let mut sym = Symbol::new(id.as_str()).map_err(|_| MarshalError::Cstring)?; + for sort in sorts { + let s = build_sort(sort)?; + sym.add_formal_argument(&s); + } + v.insert(sym) + } + }) } } diff --git a/kframework_ffi/src/kllvm/pattern.rs b/kframework_ffi/src/kllvm/pattern.rs index be565e6..6d1ddad 100644 --- a/kframework_ffi/src/kllvm/pattern.rs +++ b/kframework_ffi/src/kllvm/pattern.rs @@ -1,6 +1,6 @@ -use libc; use super::ffi; use super::{Block, Sort, Symbol}; +use libc; use std::ffi::{c_void, CStr, CString, NulError}; use std::fmt; use std::str::FromStr; @@ -40,9 +40,7 @@ impl Pattern { /// is borrowed; the C++ side takes a `shared_ptr` copy of its AST, so /// the caller may continue to use or drop `child` afterwards. pub fn add_argument(&mut self, child: &Pattern) { - unsafe { - ffi::kore_composite_pattern_add_argument(self.pattern as *mut _, child.pattern) - }; + unsafe { ffi::kore_composite_pattern_add_argument(self.pattern as *mut _, child.pattern) }; } } From 9860b894e4e47f4a63ac39533863da377cb47224 Mon Sep 17 00:00:00 2001 From: Guy Repta <50716988+gtrepta@users.noreply.github.com> Date: Thu, 30 Apr 2026 19:03:23 -0500 Subject: [PATCH 10/13] cleanup: Add documentation and cleanup verbose LLM comments --- kframework_ffi/src/kllvm/marshal.rs | 49 ++++++++++++++++------------- kframework_ffi/src/kllvm/sort.rs | 3 +- kframework_ffi/src/kllvm/symbol.rs | 4 --- 3 files changed, 28 insertions(+), 28 deletions(-) diff --git a/kframework_ffi/src/kllvm/marshal.rs b/kframework_ffi/src/kllvm/marshal.rs index f1e7dcf..c0f355d 100644 --- a/kframework_ffi/src/kllvm/marshal.rs +++ b/kframework_ffi/src/kllvm/marshal.rs @@ -28,25 +28,31 @@ enum Caching { /// callers can determine whether or not to cache it. The lifetime ties /// `Cached` to the `&mut self` borrow of the marshaller. enum Marshalled<'a> { - /// Already in `self.subtrees` (cache hit, or just inserted). Caller - /// must NOT free; by construction the source subtree was var-free. Cached(&'a Pattern), - /// Freshly built. RAII frees the C wrapper when the `Pattern` drops, - /// after the parent's `add_argument` has copied the inner shared_ptr. - /// `var_free` reports whether the *source* subtree was Var-free, so - /// an ancestor App can decide its own cacheability. Fresh { pattern: Pattern, var_free: bool }, } +/// A marshalling utility for moving a [`kore::Pattern`] over +/// to an llvm-backend's [`Pattern`] +/// +/// Optionally uses a [`VarHandler`] to substitute variable terms +/// in the tree with concrete ones. +/// +/// It caches any variable-free trees, keyed by the pointer to the +/// source tree. +/// +/// This marshaller is good if: +/// - You have one tree that you want to marshal over once. +/// - You have a tree with variables that you want to marshal over multiple +/// times, but with different substitutions for the variables each time +/// (ie. you're running a fuzzer) +/// +/// This marshaller is NOT good if: +/// - You are creating many different trees and want to marshal +/// over every one of them, and they contain few/no common +/// subtrees. pub struct Marshaller { - /// SymbolId string -> Symbol. The underlying C++ symbol is held alive - /// via `shared_ptr` by every pattern that referenced it; dropping the - /// Symbol wrapper here only frees the C struct. symbols: HashMap, - /// `*const kore::Pattern` (pointer identity into the caller-owned - /// source template) -> Pattern. Only var-free subtrees of the source - /// template are cached; substituted-Var subtrees live on the stack - /// and must not be cached. subtrees: HashMap<*const kore::Pattern, Pattern>, handler: Option, } @@ -60,10 +66,17 @@ impl Marshaller { } } + /// Set the handler for any variable substitutions that need to be + /// made. Replaces any pre-existing handler. pub fn set_handler(&mut self, h: H) { self.handler = Some(h); } + /// Marshal over a [`kore::Pattern`] to the llvm-backend. + /// + /// Caches variable-free subtrees for repeated uses. The cache is + /// keyed by `*const kore::Pattern`, so it is expected that + /// structurally equivalent trees are actually the same tree. pub fn marshal(&mut self, root: &kore::Pattern) -> Result { // Drop the Marshalled<'_> (and any borrow it holds on self.subtrees) // before mutating the cache. @@ -81,10 +94,6 @@ impl Marshaller { p: &kore::Pattern, caching: Caching, ) -> Result, MarshalError> { - // Probe with contains_key (short borrow) so the cache-miss path - // doesn't carry an immutable borrow on self.subtrees with the - // function's return lifetime — current NLL can't see the - // conditional control flow without that. let key = p as *const kore::Pattern; if matches!(caching, Caching::Allowed) && self.subtrees.contains_key(&key) { return Ok(Marshalled::Cached(self.subtrees.get(&key).unwrap())); @@ -104,9 +113,8 @@ impl Marshaller { .as_mut() .ok_or_else(|| MarshalError::UnknownVar(v.id.as_str().into()))?; let sub = handler.substitute(v.id.as_str())?; - // Caching::Disabled means the recursive call cannot return Cached. let Marshalled::Fresh { pattern, .. } = self.marshal_node(&sub, Caching::Disabled)? else { - unreachable!("Caching::Disabled cannot return Cached") + unreachable!("Caching::Disabled should return Marshalled::Fresh") }; Ok((pattern, false)) } @@ -127,8 +135,6 @@ impl Marshaller { app: &kore::App, caching: Caching, ) -> Result<(Pattern, bool), MarshalError> { - // Borrow of `sym` ends after Pattern::from_symbol consumes it, - // freeing self for the recursive marshal_node calls below. let mut pattern = { let sym = self.intern_symbol(&app.symbol, &app.sorts)?; Pattern::from_symbol(sym) @@ -143,7 +149,6 @@ impl Marshaller { var_free: cvf, } => { pattern.add_argument(&child); - // child drops here -> kore_pattern_free var_free &= cvf; } } diff --git a/kframework_ffi/src/kllvm/sort.rs b/kframework_ffi/src/kllvm/sort.rs index 64e70f0..adb270f 100644 --- a/kframework_ffi/src/kllvm/sort.rs +++ b/kframework_ffi/src/kllvm/sort.rs @@ -5,8 +5,7 @@ use std::ffi::{CString, NulError}; /// /// The underlying C++ AST node is held via a `shared_ptr` on the C++ side; /// dropping this wrapper only releases the C struct that points to it, so -/// any patterns or symbols that referenced this sort during construction -/// remain valid. +/// any patterns or symbols that reference this sort remain valid. pub struct Sort { pub(crate) sort: *mut ffi::kore_sort, } diff --git a/kframework_ffi/src/kllvm/symbol.rs b/kframework_ffi/src/kllvm/symbol.rs index 1255ab0..9713de1 100644 --- a/kframework_ffi/src/kllvm/symbol.rs +++ b/kframework_ffi/src/kllvm/symbol.rs @@ -3,10 +3,6 @@ use super::Sort; use std::ffi::{CString, NulError}; /// Safe wrapper around a `kore_symbol *`. Drops via `kore_symbol_free`. -/// -/// The underlying C++ symbol is held via a `shared_ptr`; dropping this -/// wrapper only releases the C struct, so patterns built from this symbol -/// stay valid for the rest of their lifetime. pub struct Symbol { pub(crate) symbol: *mut ffi::kore_symbol, } From 7d2fa8b62641b31fbd46426e2cf18de15be3caed Mon Sep 17 00:00:00 2001 From: Guy Repta <50716988+gtrepta@users.noreply.github.com> Date: Fri, 1 May 2026 08:45:08 -0500 Subject: [PATCH 11/13] feat: Allow VarHandler to take the variable's Sort. --- examples/fuzzer/src/main.rs | 2 +- kframework_ffi/src/kllvm/marshal.rs | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/examples/fuzzer/src/main.rs b/examples/fuzzer/src/main.rs index 7674b65..bd1e1f2 100644 --- a/examples/fuzzer/src/main.rs +++ b/examples/fuzzer/src/main.rs @@ -38,7 +38,7 @@ Lbl'-LT-'generatedTop'-GT-'{}( )"#; impl VarHandler for FuzzInput { - fn substitute(&mut self, name: &str) -> Result { + fn substitute(&mut self, name: &str, _sort: &Sort) -> Result { let int_sort = Sort::App { id: Id::new("SortInt".to_string()).unwrap(), args: vec![], diff --git a/kframework_ffi/src/kllvm/marshal.rs b/kframework_ffi/src/kllvm/marshal.rs index c0f355d..012766e 100644 --- a/kframework_ffi/src/kllvm/marshal.rs +++ b/kframework_ffi/src/kllvm/marshal.rs @@ -11,7 +11,7 @@ pub enum MarshalError { } pub trait VarHandler { - fn substitute(&mut self, name: &str) -> Result; + fn substitute(&mut self, name: &str, sort: &kore::Sort) -> Result; } /// Whether this marshalling is allowed to insert into the subtree cache. @@ -26,7 +26,7 @@ enum Caching { /// Result of marshalling one node. Encodes ownership and var-freedom so /// callers can determine whether or not to cache it. The lifetime ties -/// `Cached` to the `&mut self` borrow of the marshaller. +/// `Cached` to the marshaller. enum Marshalled<'a> { Cached(&'a Pattern), Fresh { pattern: Pattern, var_free: bool }, @@ -112,7 +112,7 @@ impl Marshaller { .handler .as_mut() .ok_or_else(|| MarshalError::UnknownVar(v.id.as_str().into()))?; - let sub = handler.substitute(v.id.as_str())?; + let sub = handler.substitute(v.id.as_str(), &v.sort)?; let Marshalled::Fresh { pattern, .. } = self.marshal_node(&sub, Caching::Disabled)? else { unreachable!("Caching::Disabled should return Marshalled::Fresh") }; From f61bee545f1dab645542acf66de7ad234ef5c2a6 Mon Sep 17 00:00:00 2001 From: Guy Repta <50716988+gtrepta@users.noreply.github.com> Date: Fri, 1 May 2026 09:06:35 -0500 Subject: [PATCH 12/13] fix: Cargo clippy --- examples/fuzzer/build.rs | 5 +---- examples/fuzzer/src/main.rs | 2 +- kframework_ffi/src/kllvm/marshal.rs | 2 +- 3 files changed, 3 insertions(+), 6 deletions(-) diff --git a/examples/fuzzer/build.rs b/examples/fuzzer/build.rs index 9a67992..45ff2dd 100644 --- a/examples/fuzzer/build.rs +++ b/examples/fuzzer/build.rs @@ -6,12 +6,9 @@ fn main() { // Add the location of interpreter.so to your LD_LIBRARY_PATH variable // You will also need this variable set when you run the program - match env::var_os("LD_LIBRARY_PATH") { - Some(paths) => { + if let Some(paths) = env::var_os("LD_LIBRARY_PATH") { for lib_path in env::split_paths(&paths) { println!("cargo:rustc-link-search={}", lib_path.display()); } - } - None => (), }; } diff --git a/examples/fuzzer/src/main.rs b/examples/fuzzer/src/main.rs index bd1e1f2..63cc889 100644 --- a/examples/fuzzer/src/main.rs +++ b/examples/fuzzer/src/main.rs @@ -99,7 +99,7 @@ fn main() { Pattern::App(App { symbol, args, .. }, ..) => { if symbol == SymbolId::new("Lbl'-LT-'generatedTop'-GT-'".to_string()).unwrap() { let expected = args - .get(0) + .first() .expect("Expected first argument of generatedTop to be present"); match expected { Pattern::App(App { symbol, .. }, ..) => { diff --git a/kframework_ffi/src/kllvm/marshal.rs b/kframework_ffi/src/kllvm/marshal.rs index 012766e..0ada28f 100644 --- a/kframework_ffi/src/kllvm/marshal.rs +++ b/kframework_ffi/src/kllvm/marshal.rs @@ -81,7 +81,7 @@ impl Marshaller { // Drop the Marshalled<'_> (and any borrow it holds on self.subtrees) // before mutating the cache. match self.marshal_node(root, Caching::Allowed)? { - Marshalled::Fresh { pattern, .. } => return Ok(pattern), + Marshalled::Fresh { pattern, .. } => Ok(pattern), Marshalled::Cached(_) => Ok(self .subtrees .remove(&(root as *const kore::Pattern)) From 9f5ed8d1b7b09d45567764d419f64cc0faa5559d Mon Sep 17 00:00:00 2001 From: Guy Repta <50716988+gtrepta@users.noreply.github.com> Date: Fri, 1 May 2026 09:12:27 -0500 Subject: [PATCH 13/13] fix: Install bindgen dependency in Dockerfiles --- .github/actions/with-docker/Dockerfile | 2 ++ .github/actions/with-docker/Dockerfile.fuzz | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/actions/with-docker/Dockerfile b/.github/actions/with-docker/Dockerfile index 56c192b..284ae09 100644 --- a/.github/actions/with-docker/Dockerfile +++ b/.github/actions/with-docker/Dockerfile @@ -2,6 +2,8 @@ FROM rust:1.85.0 RUN rustup component add clippy rustfmt +RUN apt-get update && apt-get install --yes libclang-dev + ARG USER=user ARG GROUP=$USER ARG USER_ID=1000 diff --git a/.github/actions/with-docker/Dockerfile.fuzz b/.github/actions/with-docker/Dockerfile.fuzz index 615a0a7..a5c34f0 100644 --- a/.github/actions/with-docker/Dockerfile.fuzz +++ b/.github/actions/with-docker/Dockerfile.fuzz @@ -10,7 +10,7 @@ RUN kompile --llvm-kompile-type c fuzz.k FROM rust:${RUST_TOOLCHAIN} COPY --from=semantics /opt/imp-semantics/fuzz-kompiled/interpreter.so /usr/lib -RUN apt-get update && apt-get install --yes build-essential binutils-dev libunwind-dev libblocksruntime-dev liblzma-dev +RUN apt-get update && apt-get install --yes build-essential binutils-dev libclang-dev libunwind-dev libblocksruntime-dev liblzma-dev RUN cargo install honggfuzz ARG USER=user