Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions packages/mysql-on-sqlite/src/load.php
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ class_exists( 'WP_MySQL_Native_Lexer', false )
require_once __DIR__ . '/mysql/native/class-wp-mysql-lexer.php';
require_once __DIR__ . '/mysql/native/mysql-rust-bridge.php';
require_once __DIR__ . '/mysql/native/trait-wp-mysql-native-parser-impl.php';
require_once __DIR__ . '/mysql/native/class-wp-mysql-native-parser-node.php';
require_once __DIR__ . '/mysql/native/class-wp-mysql-parser.php';
} else {
require_once __DIR__ . '/mysql/class-wp-mysql-lexer.php';
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,174 @@
<?php

/**
* Parser node backed by a native (Rust) AST.
*
* Constructed by the native MySQL parser extension. Read methods delegate
* into the Rust-owned AST so children are never copied into PHP unless a
* caller actually walks the tree. On the first append_child() mutation, the
* node materializes its children into the inherited `$children` array and
* behaves like a plain WP_Parser_Node from then on.
*/
class WP_MySQL_Native_Parser_Node extends WP_Parser_Node {
private $was_mutated = false;

public function __construct( $rule_id, $rule_name ) {
parent::__construct( $rule_id, $rule_name );
}

public function __destruct() {
if ( function_exists( 'wp_sqlite_mysql_native_ast_release_wrapper' ) ) {
wp_sqlite_mysql_native_ast_release_wrapper( $this );
}
}

/** @inheritDoc */
public function append_child( $node ) {
$this->materialize_native_children();
parent::append_child( $node );
}

/** @inheritDoc */
public function has_child(): bool {
if ( $this->was_mutated() ) {
return parent::has_child();
}
return wp_sqlite_mysql_native_ast_has_child( $this );
}

/** @inheritDoc */
public function has_child_node( ?string $rule_name = null ): bool {
if ( $this->was_mutated() ) {
return parent::has_child_node( $rule_name );
}
return wp_sqlite_mysql_native_ast_has_child_node( $this, $rule_name );
}

/** @inheritDoc */
public function has_child_token( ?int $token_id = null ): bool {
if ( $this->was_mutated() ) {
return parent::has_child_token( $token_id );
}
return wp_sqlite_mysql_native_ast_has_child_token( $this, $token_id );
}

/** @inheritDoc */
public function get_first_child() {
if ( $this->was_mutated() ) {
return parent::get_first_child();
}
return wp_sqlite_mysql_native_ast_get_first_child( $this );
}

/** @inheritDoc */
public function get_first_child_node( ?string $rule_name = null ): ?WP_Parser_Node {
if ( $this->was_mutated() ) {
return parent::get_first_child_node( $rule_name );
}
return wp_sqlite_mysql_native_ast_get_first_child_node( $this, $rule_name );
}

/** @inheritDoc */
public function get_first_child_token( ?int $token_id = null ): ?WP_Parser_Token {
if ( $this->was_mutated() ) {
return parent::get_first_child_token( $token_id );
}
return wp_sqlite_mysql_native_ast_get_first_child_token( $this, $token_id );
}

/** @inheritDoc */
public function get_first_descendant_node( ?string $rule_name = null ): ?WP_Parser_Node {
if ( $this->was_mutated() ) {
return parent::get_first_descendant_node( $rule_name );
}
return wp_sqlite_mysql_native_ast_get_first_descendant_node( $this, $rule_name );
}

/** @inheritDoc */
public function get_first_descendant_token( ?int $token_id = null ): ?WP_Parser_Token {
if ( $this->was_mutated() ) {
return parent::get_first_descendant_token( $token_id );
}
return wp_sqlite_mysql_native_ast_get_first_descendant_token( $this, $token_id );
}

/** @inheritDoc */
public function get_children(): array {
if ( $this->was_mutated() ) {
return parent::get_children();
}
return wp_sqlite_mysql_native_ast_get_children( $this );
}

/** @inheritDoc */
public function get_child_nodes( ?string $rule_name = null ): array {
if ( $this->was_mutated() ) {
return parent::get_child_nodes( $rule_name );
}
return wp_sqlite_mysql_native_ast_get_child_nodes( $this, $rule_name );
}

/** @inheritDoc */
public function get_child_tokens( ?int $token_id = null ): array {
if ( $this->was_mutated() ) {
return parent::get_child_tokens( $token_id );
}
return wp_sqlite_mysql_native_ast_get_child_tokens( $this, $token_id );
}

/** @inheritDoc */
public function get_descendants(): array {
if ( $this->was_mutated() ) {
return parent::get_descendants();
}
return wp_sqlite_mysql_native_ast_get_descendants( $this );
}

/** @inheritDoc */
public function get_descendant_nodes( ?string $rule_name = null ): array {
if ( $this->was_mutated() ) {
return parent::get_descendant_nodes( $rule_name );
}
return wp_sqlite_mysql_native_ast_get_descendant_nodes( $this, $rule_name );
}

/** @inheritDoc */
public function get_descendant_tokens( ?int $token_id = null ): array {
if ( $this->was_mutated() ) {
return parent::get_descendant_tokens( $token_id );
}
return wp_sqlite_mysql_native_ast_get_descendant_tokens( $this, $token_id );
}

/** @inheritDoc */
public function get_start(): int {
if ( $this->was_mutated() ) {
return parent::get_start();
}
return wp_sqlite_mysql_native_ast_get_start( $this );
}

/** @inheritDoc */
public function get_length(): int {
if ( $this->was_mutated() ) {
return parent::get_length();
}
return wp_sqlite_mysql_native_ast_get_length( $this );
}

private function was_mutated(): bool {
return $this->was_mutated;
}

private function materialize_native_children(): void {
if ( $this->was_mutated ) {
return;
}

$this->children = wp_sqlite_mysql_native_ast_get_children( $this );
$this->was_mutated = true;
if ( function_exists( 'wp_sqlite_mysql_native_ast_materialize_wrapper' ) ) {
wp_sqlite_mysql_native_ast_materialize_wrapper( $this );
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
* In this way, a parser node constitutes a recursive structure that represents
* a parse (sub)tree at each level of the full grammar tree.
*/
final class WP_Parser_Node {
class WP_Parser_Node {
/**
* @TODO: Review and document these properties and their visibility.
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,4 +36,28 @@ public function test_parser_returns_an_ast(): void {
$this->assertNotNull( $ast );
$this->assertInstanceOf( WP_Parser_Node::class, $ast );
}

public function test_native_ast_node_identity_survives_mutation(): void {
if ( ! class_exists( 'WP_MySQL_Native_Parser_Node', false ) ) {
$this->markTestSkipped( 'Native parser extension is not active.' );
}

$grammar = new WP_Parser_Grammar( include __DIR__ . '/../../../src/mysql/mysql-grammar.php' );
$lexer = new WP_MySQL_Lexer( 'SELECT 1' );
$parser = new WP_MySQL_Parser( $grammar, $lexer->native_token_stream() );

$ast = $parser->parse();
$this->assertInstanceOf( WP_MySQL_Native_Parser_Node::class, $ast );

$first_child = $ast->get_first_child_node();
$this->assertInstanceOf( WP_Parser_Node::class, $first_child );
$this->assertSame( $first_child, $ast->get_first_child_node() );

$synthetic = new WP_Parser_Node( 0, 'synthetic' );
$first_child->append_child( $synthetic );

$same_first_child = $ast->get_first_child_node();
$this->assertSame( $first_child, $same_first_child );
$this->assertTrue( in_array( $synthetic, $same_first_child->get_children(), true ) );
}
}
32 changes: 27 additions & 5 deletions packages/mysql-on-sqlite/tests/tools/run-parser-benchmark.php
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,10 @@
* Options:
* --json Print machine-readable benchmark output.
* --limit=N Only benchmark the first N queries.
* --consume=MODE
* How much AST data to consume after parsing:
* none Only require parse() to return an AST (default).
* descendants Walk all descendants with get_descendants().
*/

// Throw exception if anything fails.
Expand All @@ -17,12 +21,20 @@
}
);

$json = in_array( '--json', $argv, true );
$limit = null;
$json = in_array( '--json', $argv, true );
$limit = null;
$consume = 'none';
foreach ( $argv as $arg ) {
if ( 0 === strpos( $arg, '--limit=' ) ) {
$limit = max( 1, (int) substr( $arg, strlen( '--limit=' ) ) );
}
if ( 0 === strpos( $arg, '--consume=' ) ) {
$consume = substr( $arg, strlen( '--consume=' ) );
}
}

if ( ! in_array( $consume, array( 'none', 'descendants' ), true ) ) {
throw new InvalidArgumentException( sprintf( 'Unsupported --consume mode: %s', $consume ) );
}

// Use the integration loader so an already-loaded native extension selects
Expand Down Expand Up @@ -61,9 +73,10 @@
}

// Run the parser.
$failures = array();
$exceptions = array();
$processed = 0;
$failures = array();
$exceptions = array();
$processed = 0;
$descendants = 0;
// Reuse a single parser across queries, mirroring the driver
// (WP_PDO_MySQL_On_SQLite::reset_or_create_parser), which resets tokens on the
// same instance rather than constructing a fresh parser per query.
Expand All @@ -87,6 +100,8 @@
$ast = $parser->parse();
if ( null === $ast ) {
$failures[] = $query;
} elseif ( 'descendants' === $consume ) {
$descendants += count( $ast->get_descendants() );
}
} catch ( Exception $e ) {
$exceptions[] = $query;
Expand All @@ -107,6 +122,8 @@
'implementation' => class_exists( 'WP_MySQL_Native_Parser', false ) ? 'native-extension' : 'php',
'extension_loaded' => extension_loaded( 'wp_mysql_parser' ),
'queries' => $processed,
'consume' => $consume,
'descendants' => $descendants,
'duration' => $duration,
'qps' => $qps,
'failures' => count( $failures ),
Expand All @@ -119,6 +136,11 @@
}

echo get_stats( $processed, count( $failures ), count( $exceptions ) ), "\n";
printf( "AST consumption: %s", $consume );

Check failure on line 139 in packages/mysql-on-sqlite/tests/tools/run-parser-benchmark.php

View workflow job for this annotation

GitHub Actions / Check code style

String "AST consumption: %s" does not require double quotes; use single quotes instead

Check failure on line 139 in packages/mysql-on-sqlite/tests/tools/run-parser-benchmark.php

View workflow job for this annotation

GitHub Actions / Check code style

String "AST consumption: %s" does not require double quotes; use single quotes instead
if ( 'descendants' === $consume ) {
printf( " (%d descendants)", $descendants );

Check failure on line 141 in packages/mysql-on-sqlite/tests/tools/run-parser-benchmark.php

View workflow job for this annotation

GitHub Actions / Check code style

String " (%d descendants)" does not require double quotes; use single quotes instead

Check failure on line 141 in packages/mysql-on-sqlite/tests/tools/run-parser-benchmark.php

View workflow job for this annotation

GitHub Actions / Check code style

String " (%d descendants)" does not require double quotes; use single quotes instead
}
echo "\n";

// Print the results.
printf( "\nParsed %d queries in %.5fs @ %d QPS.\n", $processed, $duration, $qps );
27 changes: 18 additions & 9 deletions packages/php-ext-wp-mysql-parser/src/lexer_constants.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
#![allow(dead_code)]

use std::collections::HashMap;
use std::mem;
use std::ptr;
use std::sync::OnceLock;

use ext_php_rs::boxed::ZBox;
use ext_php_rs::builders::ClassBuilder;
Expand Down Expand Up @@ -3944,6 +3946,10 @@ pub const TOKEN_SYNONYMS: &[(i64, i64)] = &[
(630i64, 629i64),
];

static KEYWORD_TOKEN_MAP: OnceLock<HashMap<&'static str, i64>> = OnceLock::new();
static VERSION_RULE_MAP: OnceLock<HashMap<i64, i64>> = OnceLock::new();
static TOKEN_SYNONYM_MAP: OnceLock<HashMap<i64, i64>> = OnceLock::new();

pub const UNDERSCORE_CHARSET_NAMES: &[&str] = &[
"_armscii8",
"_ascii",
Expand Down Expand Up @@ -4003,25 +4009,28 @@ pub fn token_name(id: i64) -> Option<&'static str> {
}

pub fn keyword_token(keyword: &str) -> Option<i64> {
KEYWORD_TOKENS
.iter()
.find_map(|(candidate, id)| (*candidate == keyword).then_some(*id))
KEYWORD_TOKEN_MAP
.get_or_init(|| KEYWORD_TOKENS.iter().copied().collect())
.get(keyword)
.copied()
}

pub fn version_rule(token_id: i64) -> Option<i64> {
VERSION_RULES
.iter()
.find_map(|(candidate, version)| (*candidate == token_id).then_some(*version))
VERSION_RULE_MAP
.get_or_init(|| VERSION_RULES.iter().copied().collect())
.get(&token_id)
.copied()
}

pub fn is_function_token(token_id: i64) -> bool {
FUNCTION_TOKENS.contains(&token_id)
}

pub fn token_synonym(token_id: i64) -> Option<i64> {
TOKEN_SYNONYMS
.iter()
.find_map(|(candidate, synonym)| (*candidate == token_id).then_some(*synonym))
TOKEN_SYNONYM_MAP
.get_or_init(|| TOKEN_SYNONYMS.iter().copied().collect())
.get(&token_id)
.copied()
}

pub fn is_underscore_charset(name: &str) -> bool {
Expand Down
Loading
Loading