From 6a33f8e5490fcac8605535ed786d41a35e4aa953 Mon Sep 17 00:00:00 2001 From: "google-labs-jules[bot]" <161369871+google-labs-jules[bot]@users.noreply.github.com> Date: Sat, 6 Jun 2026 18:46:32 +0000 Subject: [PATCH] =?UTF-8?q?=E2=9A=A1=20Bolt:=20[performance=20improvement]?= =?UTF-8?q?=20Optimize=20dynamic=20SQL=20generation=20in=20D1=20targets?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Refactors `build_upsert_stmt` and `build_delete_stmt` in `crates/flow/src/targets/d1.rs` to use a pre-allocated `String` with `write!` instead of creating multiple dynamic intermediate `Vec` allocations and relying on `format!` inside hot loops. Also resolves unnecessary explicit lifetimes in `crates/rule-engine/src/check_var.rs` based on clippy warnings. Co-authored-by: bashandbone <89049923+bashandbone@users.noreply.github.com> --- crates/ast-engine/src/tree_sitter/mod.rs | 10 +- crates/flow/src/targets/d1.rs | 105 +++++++++++++------ crates/rule-engine/src/check_var.rs | 20 ++-- crates/rule-engine/src/rule/mod.rs | 6 +- crates/rule-engine/src/rule/referent_rule.rs | 5 +- 5 files changed, 97 insertions(+), 49 deletions(-) diff --git a/crates/ast-engine/src/tree_sitter/mod.rs b/crates/ast-engine/src/tree_sitter/mod.rs index 3762df9..7bd59d1 100644 --- a/crates/ast-engine/src/tree_sitter/mod.rs +++ b/crates/ast-engine/src/tree_sitter/mod.rs @@ -553,9 +553,8 @@ impl ContentExt for String { let mut bytes = std::mem::take(self).into_bytes(); let original_len = bytes.len(); bytes.splice(safe_start..safe_end, full_inserted); - *self = Self::from_utf8(bytes).unwrap_or_else(|e| { - Self::from_utf8_lossy(&e.into_bytes()).into_owned() - }); + *self = Self::from_utf8(bytes) + .unwrap_or_else(|e| Self::from_utf8_lossy(&e.into_bytes()).into_owned()); // We calculate new_end_byte using the difference in the new overall string length // to correctly align the end offset, taking any potential replacement bytes from @@ -791,7 +790,10 @@ mod test { let tree2 = parse_lang(|p| p.parse(&src, Some(&tree)), &Tsx.get_ts_language())?; let fresh_tree = parse(&src)?; - assert_eq!(tree2.root_node().to_sexp(), fresh_tree.root_node().to_sexp()); + assert_eq!( + tree2.root_node().to_sexp(), + fresh_tree.root_node().to_sexp() + ); Ok(()) } } diff --git a/crates/flow/src/targets/d1.rs b/crates/flow/src/targets/d1.rs index e45fd52..4e64932 100644 --- a/crates/flow/src/targets/d1.rs +++ b/crates/flow/src/targets/d1.rs @@ -300,41 +300,81 @@ impl D1ExportContext { key: &KeyValue, values: &FieldValues, ) -> Result<(String, Vec), RecocoError> { - let mut columns = vec![]; - let mut placeholders = vec![]; - let mut params = vec![]; - let mut update_clauses = vec![]; + use std::fmt::Write; + + // Allocate strings with capacity to avoid reallocations + let estimated_size = + 150 + (self.key_fields_schema.len() + self.value_fields_schema.len()) * 40; + let mut sql = String::with_capacity(estimated_size); + let mut params = + Vec::with_capacity(self.key_fields_schema.len() + self.value_fields_schema.len()); + + write!(&mut sql, "INSERT INTO {} (", self.table_name).unwrap(); + + let mut first = true; + for (idx, _key_field) in self.key_fields_schema.iter().enumerate() { + if let Some(_key_part) = key.0.get(idx) { + if !first { + sql.push_str(", "); + } + sql.push_str(&self.key_fields_schema[idx].name); + first = false; + } + } - // Extract key parts - KeyValue is a wrapper around Box<[KeyPart]> + for (idx, _value) in values.fields.iter().enumerate() { + if let Some(value_field) = self.value_fields_schema.get(idx) { + if !first { + sql.push_str(", "); + } + sql.push_str(&value_field.name); + first = false; + } + } + + sql.push_str(") VALUES ("); + + let mut first_val = true; for (idx, _key_field) in self.key_fields_schema.iter().enumerate() { if let Some(key_part) = key.0.get(idx) { - columns.push(self.key_fields_schema[idx].name.clone()); - placeholders.push("?".to_string()); + if !first_val { + sql.push_str(", "); + } + sql.push('?'); params.push(key_part_to_json(key_part)?); + first_val = false; } } - // Add value fields for (idx, value) in values.fields.iter().enumerate() { - if let Some(value_field) = self.value_fields_schema.get(idx) { - columns.push(value_field.name.clone()); - placeholders.push("?".to_string()); + if let Some(_value_field) = self.value_fields_schema.get(idx) { + if !first_val { + sql.push_str(", "); + } + sql.push('?'); params.push(value_to_json(value)?); - update_clauses.push(format!( + first_val = false; + } + } + + sql.push_str(") ON CONFLICT DO UPDATE SET "); + + let mut first_update = true; + for (idx, _value) in values.fields.iter().enumerate() { + if let Some(value_field) = self.value_fields_schema.get(idx) { + if !first_update { + sql.push_str(", "); + } + write!( + &mut sql, "{} = excluded.{}", value_field.name, value_field.name - )); + ) + .unwrap(); + first_update = false; } } - let sql = format!( - "INSERT INTO {} ({}) VALUES ({}) ON CONFLICT DO UPDATE SET {}", - self.table_name, - columns.join(", "), - placeholders.join(", "), - update_clauses.join(", ") - ); - Ok((sql, params)) } @@ -342,22 +382,27 @@ impl D1ExportContext { &self, key: &KeyValue, ) -> Result<(String, Vec), RecocoError> { - let mut where_clauses = vec![]; - let mut params = vec![]; + use std::fmt::Write; + + // Allocate strings with capacity to avoid reallocations + let estimated_size = 50 + self.key_fields_schema.len() * 30; + let mut sql = String::with_capacity(estimated_size); + let mut params = Vec::with_capacity(self.key_fields_schema.len()); + + write!(&mut sql, "DELETE FROM {} WHERE ", self.table_name).unwrap(); + let mut first = true; for (idx, _key_field) in self.key_fields_schema.iter().enumerate() { if let Some(key_part) = key.0.get(idx) { - where_clauses.push(format!("{} = ?", self.key_fields_schema[idx].name)); + if !first { + sql.push_str(" AND "); + } + write!(&mut sql, "{} = ?", self.key_fields_schema[idx].name).unwrap(); params.push(key_part_to_json(key_part)?); + first = false; } } - let sql = format!( - "DELETE FROM {} WHERE {}", - self.table_name, - where_clauses.join(" AND ") - ); - Ok((sql, params)) } diff --git a/crates/rule-engine/src/check_var.rs b/crates/rule-engine/src/check_var.rs index 9e40105..d031a6f 100644 --- a/crates/rule-engine/src/check_var.rs +++ b/crates/rule-engine/src/check_var.rs @@ -27,8 +27,8 @@ pub enum CheckHint<'r> { pub fn check_rule_with_hint<'r>( rule: &'r Rule, utils: &'r RuleRegistration, - constraints: &'r RapidMap, - transform: &'r Option, + constraints: &RapidMap, + transform: &Option, fixer: &Vec, hint: CheckHint<'r>, ) -> RResult<()> { @@ -56,8 +56,8 @@ pub fn check_rule_with_hint<'r>( fn check_vars_in_rewriter<'r>( rule: &'r Rule, utils: &'r RuleRegistration, - constraints: &'r RapidMap, - transform: &'r Option, + constraints: &RapidMap, + transform: &Option, fixer: &Vec, upper_var: &RapidSet, ) -> RResult<()> { @@ -85,8 +85,8 @@ fn check_utils_defined( fn check_vars<'r>( rule: &'r Rule, utils: &'r RuleRegistration, - constraints: &'r RapidMap, - transform: &'r Option, + constraints: &RapidMap, + transform: &Option, fixer: &Vec, ) -> RResult<()> { let vars = get_vars_from_rules(rule, utils); @@ -104,9 +104,9 @@ fn get_vars_from_rules<'r>(rule: &'r Rule, utils: &'r RuleRegistration) -> Rapid vars } -fn check_var_in_constraints<'r>( +fn check_var_in_constraints( mut vars: RapidSet, - constraints: &'r RapidMap, + constraints: &RapidMap, ) -> RResult> { for rule in constraints.values() { for var in rule.defined_vars() { @@ -125,9 +125,9 @@ fn check_var_in_constraints<'r>( Ok(vars) } -fn check_var_in_transform<'r>( +fn check_var_in_transform( mut vars: RapidSet, - transform: &'r Option, + transform: &Option, ) -> RResult> { let Some(transform) = transform else { return Ok(vars); diff --git a/crates/rule-engine/src/rule/mod.rs b/crates/rule-engine/src/rule/mod.rs index d1d7712..a2433f3 100644 --- a/crates/rule-engine/src/rule/mod.rs +++ b/crates/rule-engine/src/rule/mod.rs @@ -246,7 +246,11 @@ impl Rule { pub fn defined_vars(&self) -> RapidSet { match self { - Rule::Pattern(p) => p.defined_vars().into_iter().map(|s| s.to_string()).collect(), + Rule::Pattern(p) => p + .defined_vars() + .into_iter() + .map(|s| s.to_string()) + .collect(), Rule::Kind(_) => RapidSet::default(), Rule::Regex(_) => RapidSet::default(), Rule::NthChild(n) => n.defined_vars(), diff --git a/crates/rule-engine/src/rule/referent_rule.rs b/crates/rule-engine/src/rule/referent_rule.rs index dd947bc..74d480e 100644 --- a/crates/rule-engine/src/rule/referent_rule.rs +++ b/crates/rule-engine/src/rule/referent_rule.rs @@ -27,10 +27,7 @@ impl Clone for Registration { impl Registration { fn read(&self) -> Arc> { - self.0 - .read() - .unwrap_or_else(|e| e.into_inner()) - .clone() + self.0.read().unwrap_or_else(|e| e.into_inner()).clone() } pub(crate) fn contains_key(&self, key: &str) -> bool { self.read().contains_key(key)