From 7ab628938eac5f378621ed2ff133f31b9b3b3609 Mon Sep 17 00:00:00 2001 From: Filip Ilic Date: Fri, 29 May 2026 17:47:28 +0200 Subject: [PATCH 01/12] Add website-spec audit engine: source contract, PHP checks, AI bridge Introduce the audit layer that checks a site against specification.website. Defines the swappable Audit_Source contract + normalized finding schema (Audit_Runner), five deterministic PHP checks (doctype, lang, charset, robots.txt, sitemap) behind a filterable registry, a Local source that merges PHP checks with an optional AI pass (PHP wins on overlap), a Remote SaaS source stub for the future server-side engine, and Spec_Mcp_Client which drives WP 7.0's core AI client. Note: WP 7.0's AI client cannot act as an MCP client, so Spec_Mcp_Client feeds the spec checklist + HTML to wp_ai_client_prompt() instead. The whole AI path is guarded by is_available() and degrades to PHP-only checks; WP7-specific calls are marked TODO(wp7-verify) as they can't be exercised without a live WP 7.0 connector. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../audit/checks/class-charset-check.php | 56 ++++ .../audit/checks/class-check.php | 41 +++ .../audit/checks/class-checks-registry.php | 103 +++++++ .../audit/checks/class-doctype-check.php | 53 ++++ .../checks/class-lang-attribute-check.php | 51 ++++ .../audit/checks/class-robots-txt-check.php | 63 ++++ .../audit/checks/class-sitemap-check.php | 82 ++++++ .../audit/class-audit-runner.php | 166 +++++++++++ .../audit/class-audit-source.php | 52 ++++ .../audit/class-local-audit-source.php | 102 +++++++ .../audit/class-remote-audit-source.php | 122 ++++++++ .../audit/class-spec-mcp-client.php | 268 ++++++++++++++++++ 12 files changed, 1159 insertions(+) create mode 100644 classes/suggested-tasks/audit/checks/class-charset-check.php create mode 100644 classes/suggested-tasks/audit/checks/class-check.php create mode 100644 classes/suggested-tasks/audit/checks/class-checks-registry.php create mode 100644 classes/suggested-tasks/audit/checks/class-doctype-check.php create mode 100644 classes/suggested-tasks/audit/checks/class-lang-attribute-check.php create mode 100644 classes/suggested-tasks/audit/checks/class-robots-txt-check.php create mode 100644 classes/suggested-tasks/audit/checks/class-sitemap-check.php create mode 100644 classes/suggested-tasks/audit/class-audit-runner.php create mode 100644 classes/suggested-tasks/audit/class-audit-source.php create mode 100644 classes/suggested-tasks/audit/class-local-audit-source.php create mode 100644 classes/suggested-tasks/audit/class-remote-audit-source.php create mode 100644 classes/suggested-tasks/audit/class-spec-mcp-client.php diff --git a/classes/suggested-tasks/audit/checks/class-charset-check.php b/classes/suggested-tasks/audit/checks/class-charset-check.php new file mode 100644 index 000000000..ca8a1da2c --- /dev/null +++ b/classes/suggested-tasks/audit/checks/class-charset-check.php @@ -0,0 +1,56 @@ + + */ + public function run( string $url, string $html, array $context ): array { + if ( '' === \trim( $html ) ) { + return []; + } + + // Pass if declared either via or a Content-Type response header. + $pass = (bool) \preg_match( '/]*charset\s*=\s*["\']?\s*utf-?8/i', $html ); + + if ( ! $pass && isset( $context['headers']['content-type'] ) ) { + $pass = false !== \stripos( (string) $context['headers']['content-type'], 'utf-8' ); + } + + return [ + 'rule_id' => $this->get_rule_id(), + 'category' => 'basics', + 'title' => \__( 'Declare a UTF-8 charset', 'progress-planner' ), + 'description' => \__( 'Add near the top of your . Without an explicit charset, special characters and emoji can render as garbled text.', 'progress-planner' ), + 'severity' => 'medium', + 'status' => $pass ? 'pass' : 'fail', + 'doc_url' => 'https://specification.website/', + 'source' => 'php-check', + ]; + } +} diff --git a/classes/suggested-tasks/audit/checks/class-check.php b/classes/suggested-tasks/audit/checks/class-check.php new file mode 100644 index 000000000..3d7e42b80 --- /dev/null +++ b/classes/suggested-tasks/audit/checks/class-check.php @@ -0,0 +1,41 @@ + A single finding (see Audit_Runner schema), with at least + * 'rule_id' and 'status'. + */ + public function run( string $url, string $html, array $context ): array; +} diff --git a/classes/suggested-tasks/audit/checks/class-checks-registry.php b/classes/suggested-tasks/audit/checks/class-checks-registry.php new file mode 100644 index 000000000..2bc28ee73 --- /dev/null +++ b/classes/suggested-tasks/audit/checks/class-checks-registry.php @@ -0,0 +1,103 @@ + $check instanceof Check ) + ); + } + + /** + * Run all checks against a URL and return their findings. + * + * The URL is fetched once; the HTML and response metadata are shared with + * every check. + * + * @param string $url The URL to audit. + * + * @return array> Findings (one per check that returns one). + */ + public function run( string $url ): array { + $context = $this->fetch( $url ); + + $findings = []; + foreach ( $this->get_checks() as $check ) { + $finding = $check->run( $url, $context['html'], $context ); + if ( ! empty( $finding ) && ! empty( $finding['rule_id'] ) ) { + $findings[] = $finding; + } + } + + return $findings; + } + + /** + * Fetch the URL once and return shared context. + * + * @param string $url The URL to fetch. + * + * @return array{html: string, response_code: int, headers: array} + */ + protected function fetch( string $url ): array { + $response = \wp_remote_get( + $url, + [ + 'timeout' => 10, + 'user-agent' => 'Progress Planner Spec Audit', + ] + ); + + if ( \is_wp_error( $response ) ) { + return [ + 'html' => '', + 'response_code' => 0, + 'headers' => [], + ]; + } + + $raw_headers = \wp_remote_retrieve_headers( $response ); + $headers = []; + // wp_remote_retrieve_headers() returns a CaseInsensitiveDictionary; getAll() yields a plain array. + $header_array = \is_object( $raw_headers ) ? $raw_headers->getAll() : (array) $raw_headers; + foreach ( $header_array as $name => $value ) { + $headers[ \strtolower( (string) $name ) ] = \is_array( $value ) ? \implode( ', ', $value ) : (string) $value; + } + + return [ + 'html' => (string) \wp_remote_retrieve_body( $response ), + 'response_code' => (int) \wp_remote_retrieve_response_code( $response ), + 'headers' => $headers, + ]; + } +} diff --git a/classes/suggested-tasks/audit/checks/class-doctype-check.php b/classes/suggested-tasks/audit/checks/class-doctype-check.php new file mode 100644 index 000000000..87aca1ddc --- /dev/null +++ b/classes/suggested-tasks/audit/checks/class-doctype-check.php @@ -0,0 +1,53 @@ + + */ + public function run( string $url, string $html, array $context ): array { + // Can't determine from an empty body — don't emit a false failure. + if ( '' === \trim( $html ) ) { + return []; + } + + // Allow an optional UTF-8 BOM and leading whitespace before the doctype. + $pass = (bool) \preg_match( '/^(\xEF\xBB\xBF)?\s* $this->get_rule_id(), + 'category' => 'basics', + 'title' => \__( 'Add an HTML5 doctype to your homepage', 'progress-planner' ), + 'description' => \__( 'Every page should start with so browsers render it in standards mode. Without it, browsers fall back to quirks mode, which can break your layout.', 'progress-planner' ), + 'severity' => 'high', + 'status' => $pass ? 'pass' : 'fail', + 'doc_url' => 'https://specification.website/', + 'source' => 'php-check', + ]; + } +} diff --git a/classes/suggested-tasks/audit/checks/class-lang-attribute-check.php b/classes/suggested-tasks/audit/checks/class-lang-attribute-check.php new file mode 100644 index 000000000..1c2e91e60 --- /dev/null +++ b/classes/suggested-tasks/audit/checks/class-lang-attribute-check.php @@ -0,0 +1,51 @@ + tag declares a lang attribute. + * + * @package Progress_Planner + */ + +namespace Progress_Planner\Suggested_Tasks\Audit\Checks; + +/** + * Lang attribute check. + */ +class Lang_Attribute_Check implements Check { + + /** + * {@inheritDoc} + * + * @return string + */ + public function get_rule_id(): string { + return 'html-lang-attribute'; + } + + /** + * {@inheritDoc} + * + * @param string $url The audited URL. + * @param string $html The fetched homepage HTML. + * @param array $context Shared fetch context. + * + * @return array + */ + public function run( string $url, string $html, array $context ): array { + if ( '' === \trim( $html ) ) { + return []; + } + + $pass = (bool) \preg_match( '/]*\blang\s*=\s*["\']?\s*[a-z]{2,3}\b/i', $html ); + + return [ + 'rule_id' => $this->get_rule_id(), + 'category' => 'accessibility', + 'title' => \__( "Declare your site's language", 'progress-planner' ), + 'description' => \__( 'Add a lang attribute to the tag (for example lang="en"). It helps screen readers pronounce content correctly and search engines serve the right language.', 'progress-planner' ), + 'severity' => 'medium', + 'status' => $pass ? 'pass' : 'fail', + 'doc_url' => 'https://specification.website/', + 'source' => 'php-check', + ]; + } +} diff --git a/classes/suggested-tasks/audit/checks/class-robots-txt-check.php b/classes/suggested-tasks/audit/checks/class-robots-txt-check.php new file mode 100644 index 000000000..ed981c212 --- /dev/null +++ b/classes/suggested-tasks/audit/checks/class-robots-txt-check.php @@ -0,0 +1,63 @@ + + */ + public function run( string $url, string $html, array $context ): array { + $response = \wp_remote_get( + \home_url( '/robots.txt' ), + [ + 'timeout' => 10, + 'user-agent' => 'Progress Planner Spec Audit', + ] + ); + + // Network failure: we can't determine the state, so don't emit a finding. + if ( \is_wp_error( $response ) ) { + return []; + } + + $code = (int) \wp_remote_retrieve_response_code( $response ); + $body = \trim( (string) \wp_remote_retrieve_body( $response ) ); + + $pass = 200 === $code && '' !== $body; + + return [ + 'rule_id' => $this->get_rule_id(), + 'category' => 'seo', + 'title' => \__( 'Publish a robots.txt file', 'progress-planner' ), + 'description' => \__( 'A robots.txt file tells search engines and crawlers which parts of your site they may access. Serving a non-empty robots.txt is a baseline expectation for a well-built site.', 'progress-planner' ), + 'severity' => 'low', + 'status' => $pass ? 'pass' : 'fail', + 'doc_url' => 'https://specification.website/', + 'source' => 'php-check', + ]; + } +} diff --git a/classes/suggested-tasks/audit/checks/class-sitemap-check.php b/classes/suggested-tasks/audit/checks/class-sitemap-check.php new file mode 100644 index 000000000..bdc87bb0e --- /dev/null +++ b/classes/suggested-tasks/audit/checks/class-sitemap-check.php @@ -0,0 +1,82 @@ + + */ + public function run( string $url, string $html, array $context ): array { + // Core sitemaps enabled is a definitive pass without an HTTP request. + if ( \function_exists( 'wp_sitemaps_get_server' ) ) { + if ( \wp_sitemaps_get_server()->sitemaps_enabled() ) { + return $this->finding( 'pass' ); + } + } + + // Otherwise probe the common sitemap locations (SEO plugins, custom sitemaps). + foreach ( [ '/sitemap.xml', '/sitemap_index.xml', '/wp-sitemap.xml' ] as $path ) { + $response = \wp_remote_get( + \home_url( $path ), + [ + 'timeout' => 10, + 'user-agent' => 'Progress Planner Spec Audit', + ] + ); + + if ( \is_wp_error( $response ) ) { + continue; + } + + if ( 200 === (int) \wp_remote_retrieve_response_code( $response ) ) { + return $this->finding( 'pass' ); + } + } + + return $this->finding( 'fail' ); + } + + /** + * Build the finding for a given status. + * + * @param string $status 'pass' or 'fail'. + * + * @return array + */ + protected function finding( string $status ): array { + return [ + 'rule_id' => $this->get_rule_id(), + 'category' => 'seo', + 'title' => \__( 'Publish an XML sitemap', 'progress-planner' ), + 'description' => \__( 'An XML sitemap lists your important URLs so search engines can discover and index them efficiently. WordPress can generate one automatically, or an SEO plugin can provide it.', 'progress-planner' ), + 'severity' => 'medium', + 'status' => $status, + 'doc_url' => 'https://specification.website/', + 'source' => 'php-check', + ]; + } +} diff --git a/classes/suggested-tasks/audit/class-audit-runner.php b/classes/suggested-tasks/audit/class-audit-runner.php new file mode 100644 index 000000000..8f92d8096 --- /dev/null +++ b/classes/suggested-tasks/audit/class-audit-runner.php @@ -0,0 +1,166 @@ + + */ + public const SEVERITY_PRIORITY = [ + 'high' => 20, + 'medium' => 50, + 'low' => 80, + ]; + + /** + * Allowed finding statuses. + * + * @var array + */ + public const STATUSES = [ 'fail', 'pass' ]; + + /** + * Get the active audit source. + * + * Defaults to the local source. Filterable so phase B (or tests) can swap in + * the remote SaaS source without touching the consumer. + * + * @return Audit_Source + */ + public function get_source(): Audit_Source { + $source = new Local_Audit_Source(); + + /** + * Filter the active audit source. + * + * @param Audit_Source $source The default (local) audit source. + */ + $source = \apply_filters( 'progress_planner_audit_source', $source ); + + // A third-party filter could return anything; fall back to the local source. + return $source instanceof Audit_Source ? $source : new Local_Audit_Source(); // @phpstan-ignore-line instanceof.alwaysTrue + } + + /** + * Get the URL to audit (the site's public home URL). + * + * @return string + */ + public function get_audit_url(): string { + /** + * Filter the URL audited against the specification. + * + * @param string $url The site's public home URL. + */ + return (string) \apply_filters( 'progress_planner_audit_url', \home_url( '/' ) ); + } + + /** + * Run the audit and return normalized findings. + * + * @return array> Normalized findings. + */ + public function run(): array { + $source = $this->get_source(); + + if ( ! $source->is_available() ) { + return []; + } + + $findings = $source->get_findings( $this->get_audit_url() ); + + return $this->normalize( $findings ); + } + + /** + * Normalize a raw list of findings to the canonical schema. + * + * Drops findings without a rule_id, clamps severity/status to allowed values, + * and sanitizes user-facing strings. Duplicate rule_ids keep the first + * occurrence (sources should pre-empt by ordering deterministic checks first). + * + * @param array $findings Raw findings. + * + * @return array> Normalized findings. + */ + public function normalize( array $findings ): array { + $normalized = []; + $seen = []; + + foreach ( $findings as $finding ) { + if ( ! \is_array( $finding ) || empty( $finding['rule_id'] ) ) { + continue; + } + + $rule_id = \sanitize_title( (string) $finding['rule_id'] ); + if ( '' === $rule_id || isset( $seen[ $rule_id ] ) ) { + continue; + } + $seen[ $rule_id ] = true; + + $raw_severity = isset( $finding['severity'] ) ? (string) $finding['severity'] : ''; + $severity = isset( self::SEVERITY_PRIORITY[ $raw_severity ] ) ? $raw_severity : 'medium'; + + $status = isset( $finding['status'] ) && \in_array( $finding['status'], self::STATUSES, true ) + ? (string) $finding['status'] + : 'fail'; + + $normalized[] = [ + 'rule_id' => $rule_id, + 'category' => isset( $finding['category'] ) ? \sanitize_key( (string) $finding['category'] ) : 'basics', + 'title' => isset( $finding['title'] ) ? \sanitize_text_field( (string) $finding['title'] ) : '', + 'description' => isset( $finding['description'] ) ? \wp_kses_post( (string) $finding['description'] ) : '', + 'severity' => $severity, + 'status' => $status, + 'doc_url' => isset( $finding['doc_url'] ) ? \esc_url_raw( (string) $finding['doc_url'] ) : '', + 'fix_url' => isset( $finding['fix_url'] ) ? \esc_url_raw( (string) $finding['fix_url'] ) : '', + 'source' => isset( $finding['source'] ) ? \sanitize_key( (string) $finding['source'] ) : '', + ]; + } + + return $normalized; + } + + /** + * Filter a normalized findings list down to failing findings only. + * + * @param array> $findings Normalized findings. + * + * @return array> Failing findings. + */ + public static function failing( array $findings ): array { + return \array_values( + \array_filter( + $findings, + static fn( $finding ) => isset( $finding['status'] ) && 'fail' === $finding['status'] + ) + ); + } + + /** + * Map a severity to a task priority value. + * + * @param string $severity The finding severity. + * + * @return int + */ + public static function severity_to_priority( string $severity ): int { + return self::SEVERITY_PRIORITY[ $severity ] ?? self::SEVERITY_PRIORITY['medium']; + } +} diff --git a/classes/suggested-tasks/audit/class-audit-source.php b/classes/suggested-tasks/audit/class-audit-source.php new file mode 100644 index 000000000..1b8dd2923 --- /dev/null +++ b/classes/suggested-tasks/audit/class-audit-source.php @@ -0,0 +1,52 @@ + 'pass'); + * only failing rules become tasks. + * + * @param string $url The public URL to audit. + * @param array $args Optional source-specific arguments. + * + * @return array> List of findings. + */ + public function get_findings( string $url, array $args = [] ): array; + + /** + * Get the stable identifier for this source (e.g. 'local-connector', 'remote-saas'). + * + * @return string + */ + public function get_id(): string; + + /** + * Whether this source can run right now. + * + * The local source is always available (it can always run PHP checks). The + * remote source requires a license key; the AI layer requires a configured + * WP core connector. + * + * @return bool + */ + public function is_available(): bool; +} diff --git a/classes/suggested-tasks/audit/class-local-audit-source.php b/classes/suggested-tasks/audit/class-local-audit-source.php new file mode 100644 index 000000000..b8e1281ac --- /dev/null +++ b/classes/suggested-tasks/audit/class-local-audit-source.php @@ -0,0 +1,102 @@ +checks_registry = $checks_registry ?? new Checks_Registry(); + $this->mcp_client = $mcp_client ?? new Spec_Mcp_Client(); + } + + /** + * {@inheritDoc} + * + * @return string + */ + public function get_id(): string { + return 'local-connector'; + } + + /** + * The local source can always run (deterministic checks need no connector). + * + * @return bool + */ + public function is_available(): bool { + return true; + } + + /** + * {@inheritDoc} + * + * @param string $url The URL to audit. + * @param array $args Optional arguments. + * + * @return array> + */ + public function get_findings( string $url, array $args = [] ): array { + $php_findings = $this->checks_registry->run( $url ); + + // Index deterministic findings by rule so they pre-empt any LLM finding. + $by_rule = []; + foreach ( $php_findings as $finding ) { + if ( ! empty( $finding['rule_id'] ) ) { + $by_rule[ (string) $finding['rule_id'] ] = $finding; + } + } + + // Layer in the AI findings only when a connector is configured. + if ( $this->mcp_client->is_available() ) { + foreach ( $this->mcp_client->audit_url( $url ) as $finding ) { + $rule_id = empty( $finding['rule_id'] ) ? '' : (string) $finding['rule_id']; + if ( '' === $rule_id || isset( $by_rule[ $rule_id ] ) ) { + continue; // Deterministic check already covers this rule. + } + $by_rule[ $rule_id ] = $finding; + } + } + + return \array_values( $by_rule ); + } +} diff --git a/classes/suggested-tasks/audit/class-remote-audit-source.php b/classes/suggested-tasks/audit/class-remote-audit-source.php new file mode 100644 index 000000000..b035b8b08 --- /dev/null +++ b/classes/suggested-tasks/audit/class-remote-audit-source.php @@ -0,0 +1,122 @@ +get_license_key(); + } + + /** + * {@inheritDoc} + * + * @param string $url The URL to audit. + * @param array $args Optional arguments. + * + * @return array> + */ + public function get_findings( string $url, array $args = [] ): array { + $endpoint = \progress_planner()->get_remote_server_root_url() . '/wp-json/progress-planner-saas/v1/audit'; + $endpoint = \add_query_arg( + [ + 'site' => \get_site_url(), + 'license_key' => \progress_planner()->get_license_key(), + 'url' => $url, + 'locale' => \get_locale(), + ], + $endpoint + ); + + $cache_key = \md5( $endpoint ); + $cached = \progress_planner()->get_utils__cache()->get( $cache_key ); + if ( \is_array( $cached ) ) { + return $this->map_response( $cached ); + } + + $response = \wp_remote_get( $endpoint, [ 'timeout' => 30 ] ); + + if ( \is_wp_error( $response ) || 200 !== (int) \wp_remote_retrieve_response_code( $response ) ) { + \progress_planner()->get_utils__cache()->set( $cache_key, [], 5 * MINUTE_IN_SECONDS ); + return []; + } + + $json = \json_decode( \wp_remote_retrieve_body( $response ), true ); + if ( ! \is_array( $json ) ) { + \progress_planner()->get_utils__cache()->set( $cache_key, [], 5 * MINUTE_IN_SECONDS ); + return []; + } + + \progress_planner()->get_utils__cache()->set( $cache_key, $json, DAY_IN_SECONDS ); + + return $this->map_response( $json ); + } + + /** + * Map the SaaS response into the canonical finding shape. + * + * Kept deliberately simple: the SaaS is expected to return findings already + * shaped like the schema, but we map defensively so the contract is explicit + * and testable against the local source's output. + * + * @param array $json The decoded SaaS response. + * + * @return array> + */ + protected function map_response( array $json ): array { + $items = isset( $json['findings'] ) && \is_array( $json['findings'] ) ? $json['findings'] : $json; + + $findings = []; + foreach ( $items as $item ) { + if ( ! \is_array( $item ) || empty( $item['rule_id'] ) ) { + continue; + } + $findings[] = [ + 'rule_id' => $item['rule_id'], + 'category' => $item['category'] ?? 'basics', + 'title' => $item['title'] ?? '', + 'description' => $item['description'] ?? '', + 'severity' => $item['severity'] ?? 'medium', + 'status' => $item['status'] ?? 'fail', + 'doc_url' => $item['doc_url'] ?? '', + 'fix_url' => $item['fix_url'] ?? '', + 'source' => 'saas', + ]; + } + + return $findings; + } +} diff --git a/classes/suggested-tasks/audit/class-spec-mcp-client.php b/classes/suggested-tasks/audit/class-spec-mcp-client.php new file mode 100644 index 000000000..7ed011b7f --- /dev/null +++ b/classes/suggested-tasks/audit/class-spec-mcp-client.php @@ -0,0 +1,268 @@ +is_supported_for_text_generation(); + } + } catch ( \Throwable $e ) { + return false; + } + + // Entry point exists but we couldn't confirm a configured provider. + return false; + } + + /** + * Audit a URL against the specification using the AI client. + * + * Returns raw (un-normalized) findings; {@see Audit_Runner::normalize()} + * enforces the schema. Returns an empty array on any failure, so callers can + * safely treat the AI layer as best-effort. + * + * @param string $url The URL to audit. + * + * @return array> + */ + public function audit_url( string $url ): array { + if ( ! $this->is_available() ) { + return []; + } + + $checklist = $this->get_checklist(); + $html = $this->fetch_html( $url ); + + if ( '' === $html ) { + return []; + } + + try { + $json = $this->run_prompt( $url, $checklist, $html ); + } catch ( \Throwable $e ) { + return []; + } + + $decoded = \is_string( $json ) ? \json_decode( $json, true ) : $json; + if ( ! \is_array( $decoded ) ) { + return []; + } + + // Accept either a bare list or a { "findings": [...] } envelope. + $items = isset( $decoded['findings'] ) && \is_array( $decoded['findings'] ) ? $decoded['findings'] : $decoded; + + $findings = []; + foreach ( $items as $item ) { + if ( \is_array( $item ) && ! empty( $item['rule_id'] ) ) { + $item['source'] = 'mcp-llm'; + $findings[] = $item; + } + } + + return $findings; + } + + /** + * Run the AI prompt and return the (JSON) response. + * + * Isolated so the exact WP 7.0 builder chain lives in one place. + * + * @param string $url The audited URL. + * @param string $checklist The specification checklist text. + * @param string $html The homepage HTML. + * + * @return string|array|null + */ + protected function run_prompt( string $url, string $checklist, string $html ) { + $schema = [ + 'type' => 'object', + 'properties' => [ + 'findings' => [ + 'type' => 'array', + 'items' => [ + 'type' => 'object', + 'properties' => [ + 'rule_id' => [ 'type' => 'string' ], + 'category' => [ 'type' => 'string' ], + 'title' => [ 'type' => 'string' ], + 'description' => [ 'type' => 'string' ], + 'severity' => [ + 'type' => 'string', + 'enum' => [ 'high', 'medium', 'low' ], + ], + 'status' => [ + 'type' => 'string', + 'enum' => [ 'pass', 'fail' ], + ], + 'doc_url' => [ 'type' => 'string' ], + ], + 'required' => [ 'rule_id', 'status', 'title' ], + ], + ], + ], + 'required' => [ 'findings' ], + ]; + + // Keep the HTML payload bounded so we don't blow the context / token budget. + $html_excerpt = \substr( $html, 0, 20000 ); + + $instruction = \sprintf( + "You are auditing a website against the Website Specification.\n\nURL: %s\n\nSpecification checklist:\n%s\n\nFor each BASIC checklist rule you can evaluate from the HTML below, return a finding with a stable rule_id slug, the category, a short human title and description of the fix, a severity, and status 'pass' or 'fail'. Only include rules you can actually evaluate from the provided HTML. Do not invent rules.\n\nHTML:\n%s", + $url, + $checklist, + $html_excerpt + ); + + // TODO(wp7-verify): confirm method names using_max_tokens / as_json_response / + // generate_text and that generate_text() returns a JSON string for a JSON response. + $builder = \wp_ai_client_prompt( $instruction ); // @phpstan-ignore-line function.notFound + + if ( \method_exists( $builder, 'using_max_tokens' ) ) { + $builder = $builder->using_max_tokens( 2000 ); + } + if ( \method_exists( $builder, 'as_json_response' ) ) { + $builder = $builder->as_json_response( $schema ); + } + + $result = $builder->generate_text(); + + // generate_text() returns WP_Error on failure in the wrapper. + if ( \is_wp_error( $result ) ) { + return null; + } + + return $result; + } + + /** + * Fetch the specification checklist text. + * + * Cached for a week; failures fall back to an empty string (the model then + * audits from general knowledge of the spec, still useful but less precise). + * + * @return string + */ + protected function get_checklist(): string { + $cache_key = 'spec_audit_checklist'; + $cached = \progress_planner()->get_utils__cache()->get( $cache_key ); + if ( \is_string( $cached ) ) { + return $cached; + } + + $response = \wp_remote_get( + self::SPEC_CHECKLIST_URL, + [ + 'timeout' => 15, + 'headers' => [ 'Accept' => 'text/markdown, text/plain, */*' ], + ] + ); + + if ( \is_wp_error( $response ) || 200 !== (int) \wp_remote_retrieve_response_code( $response ) ) { + \progress_planner()->get_utils__cache()->set( $cache_key, '', 5 * MINUTE_IN_SECONDS ); + return ''; + } + + $body = (string) \wp_remote_retrieve_body( $response ); + \progress_planner()->get_utils__cache()->set( $cache_key, $body, WEEK_IN_SECONDS ); + + return $body; + } + + /** + * Fetch the HTML for a URL. + * + * @param string $url The URL. + * + * @return string + */ + protected function fetch_html( string $url ): string { + $response = \wp_remote_get( + $url, + [ + 'timeout' => 10, + 'user-agent' => 'Progress Planner Spec Audit', + ] + ); + + if ( \is_wp_error( $response ) || 200 !== (int) \wp_remote_retrieve_response_code( $response ) ) { + return ''; + } + + return (string) \wp_remote_retrieve_body( $response ); + } +} From 8d6dd1cc8fa4d33dcdd15f9414492d8198c8d9d5 Mon Sep 17 00:00:00 2001 From: Filip Ilic Date: Fri, 29 May 2026 17:47:52 +0200 Subject: [PATCH 02/12] Wire spec audit into suggested tasks with per-window throttle Add the Spec_Audit data collector (caches the audit; runs the expensive checks/LLM only on cache refresh, never on admin_init) and the Spec_Audit task provider that turns failing findings into recommendations. The provider releases at most one task per window (default daily), overridable via progress_planner_spec_audit_max_tasks_per_window and _window filters; each failing rule maps to one durable task and is auto-completed when a re-audit shows it passing. Register both in their managers, and add a `wp prpl audit run` CLI command plus a progress_planner_run_spec_audit AJAX trigger for on-demand runs. Co-Authored-By: Claude Opus 4.7 (1M context) --- classes/class-base.php | 1 + .../suggested-tasks/class-tasks-manager.php | 2 + .../class-data-collector-manager.php | 2 + .../data-collector/class-spec-audit.php | 86 +++++ .../providers/class-spec-audit.php | 359 ++++++++++++++++++ classes/wp-cli/class-audit-command.php | 97 +++++ 6 files changed, 547 insertions(+) create mode 100644 classes/suggested-tasks/data-collector/class-spec-audit.php create mode 100644 classes/suggested-tasks/providers/class-spec-audit.php create mode 100644 classes/wp-cli/class-audit-command.php diff --git a/classes/class-base.php b/classes/class-base.php index 7fa27f1ff..af288ef44 100644 --- a/classes/class-base.php +++ b/classes/class-base.php @@ -176,6 +176,7 @@ public function init() { if ( \defined( 'WP_CLI' ) && \WP_CLI ) { $this->get_wp_cli__get_stats_command(); $this->get_wp_cli__task_command(); + $this->get_wp_cli__audit_command(); } // Init the enqueue class. diff --git a/classes/suggested-tasks/class-tasks-manager.php b/classes/suggested-tasks/class-tasks-manager.php index 64b2aefb5..9c21b5713 100644 --- a/classes/suggested-tasks/class-tasks-manager.php +++ b/classes/suggested-tasks/class-tasks-manager.php @@ -42,6 +42,7 @@ use Progress_Planner\Suggested_Tasks\Providers\Set_Page_About; use Progress_Planner\Suggested_Tasks\Providers\Set_Page_FAQ; use Progress_Planner\Suggested_Tasks\Providers\Set_Page_Contact; +use Progress_Planner\Suggested_Tasks\Providers\Spec_Audit; /** * Tasks_Manager class. @@ -93,6 +94,7 @@ public function __construct() { new Set_Page_About(), new Set_Page_FAQ(), new Set_Page_Contact(), + new Spec_Audit(), ]; // Add the plugin integration. diff --git a/classes/suggested-tasks/data-collector/class-data-collector-manager.php b/classes/suggested-tasks/data-collector/class-data-collector-manager.php index 8ce2c520f..7fe9671fc 100644 --- a/classes/suggested-tasks/data-collector/class-data-collector-manager.php +++ b/classes/suggested-tasks/data-collector/class-data-collector-manager.php @@ -21,6 +21,7 @@ use Progress_Planner\Suggested_Tasks\Data_Collector\Yoast_Orphaned_Content; use Progress_Planner\Suggested_Tasks\Data_Collector\Unpublished_Content; use Progress_Planner\Suggested_Tasks\Data_Collector\SEO_Plugin; +use Progress_Planner\Suggested_Tasks\Data_Collector\Spec_Audit; /** * Base data collector. @@ -54,6 +55,7 @@ public function __construct() { new Published_Post_Count(), new Unpublished_Content(), new SEO_Plugin(), + new Spec_Audit(), ]; // Add the plugin integration. diff --git a/classes/suggested-tasks/data-collector/class-spec-audit.php b/classes/suggested-tasks/data-collector/class-spec-audit.php new file mode 100644 index 000000000..1b39c799a --- /dev/null +++ b/classes/suggested-tasks/data-collector/class-spec-audit.php @@ -0,0 +1,86 @@ +runner ) { + $this->runner = new Audit_Runner(); + } + return $this->runner; + } + + /** + * Calculate the data: run the audit and return normalized findings. + * + * @return array> + */ + protected function calculate_data() { + return $this->get_runner()->run(); + } + + /** + * Get only the failing findings from the cached audit. + * + * @return array> + */ + public function get_failing_findings(): array { + $findings = $this->collect(); + return \is_array( $findings ) ? Audit_Runner::failing( $findings ) : []; + } + + /** + * Get the cached finding for a single rule, if any. + * + * @param string $rule_id The rule ID. + * + * @return array|null + */ + public function get_finding( string $rule_id ): ?array { + $findings = $this->collect(); + if ( ! \is_array( $findings ) ) { + return null; + } + foreach ( $findings as $finding ) { + if ( isset( $finding['rule_id'] ) && $finding['rule_id'] === $rule_id ) { + return $finding; + } + } + return null; + } +} diff --git a/classes/suggested-tasks/providers/class-spec-audit.php b/classes/suggested-tasks/providers/class-spec-audit.php new file mode 100644 index 000000000..7e5e3aba5 --- /dev/null +++ b/classes/suggested-tasks/providers/class-spec-audit.php @@ -0,0 +1,359 @@ +get_data_collector(); + return $collector; // @phpstan-ignore-line return.type + } + + /** + * The maximum number of audit tasks to release per window (filterable). + * + * @return int + */ + protected function get_max_per_window(): int { + /** + * Filter the maximum number of audit tasks released per window. + * + * Default 1. Raise it locally (e.g. via a mu-plugin) to surface findings + * faster during testing. + * + * @param int $max The maximum number of tasks per window. + */ + return \max( 1, (int) \apply_filters( 'progress_planner_spec_audit_max_tasks_per_window', 1 ) ); + } + + /** + * The current throttle window key (filterable: 'day' or 'week'). + * + * @return string + */ + protected function get_window_key(): string { + /** + * Filter the throttle window granularity. + * + * @param string $window Either 'day' or 'week'. + */ + $window = \apply_filters( 'progress_planner_spec_audit_window', 'day' ); + + return 'week' === $window ? \gmdate( 'oW' ) : \gmdate( 'Ymd' ); + } + + /** + * Get the number of tasks already released in the current window. + * + * @return int + */ + protected function get_released_this_window(): int { + $injections = \progress_planner()->get_settings()->get( [ self::SETTINGS_KEY, 'injections' ], [] ); + $window_key = $this->get_window_key(); + return isset( $injections[ $window_key ] ) ? (int) $injections[ $window_key ] : 0; + } + + /** + * Record that a number of tasks were released this window, pruning old windows. + * + * @param int $count How many tasks were just released. + * + * @return void + */ + protected function record_released( int $count ) { + if ( $count < 1 ) { + return; + } + $window_key = $this->get_window_key(); + $injections = [ + $window_key => $this->get_released_this_window() + $count, + ]; + \progress_planner()->get_settings()->set( [ self::SETTINGS_KEY, 'injections' ], $injections ); + } + + /** + * Get the task ID for a finding. + * + * @param array $task_data The task data (expects 'rule_id'). + * + * @return string + */ + public function get_task_id( $task_data = [] ) { + return isset( $task_data['rule_id'] ) && '' !== $task_data['rule_id'] + ? $this->get_provider_id() . '-' . $task_data['rule_id'] + : $this->get_provider_id(); + } + + /** + * Get the title for a finding. + * + * @param array $task_data The task data. + * + * @return string + */ + protected function get_title_with_data( $task_data = [] ) { + return isset( $task_data['title'] ) ? (string) $task_data['title'] : ''; + } + + /** + * Get the description for a finding. + * + * @param array $task_data The task data. + * + * @return string + */ + protected function get_description_with_data( $task_data = [] ) { + return isset( $task_data['description'] ) ? (string) $task_data['description'] : ''; + } + + /** + * Get the external "why is this important?" link for a finding. + * + * @return string + */ + public function get_external_link_url() { + return $this->external_link_url; + } + + /** + * Whether there is at least one failing rule not yet represented by a task. + * + * @return bool + */ + public function should_add_task() { + return ! empty( $this->get_pending_findings() ); + } + + /** + * Get failing findings that are not yet represented by a (non-completed) task. + * + * Sorted by severity (high first) so the most important issues surface first. + * + * @return array> + */ + protected function get_pending_findings(): array { + $findings = $this->get_audit_collector()->get_failing_findings(); + + $pending = []; + foreach ( $findings as $finding ) { + $task_id = $this->get_task_id( $finding ); + + if ( \progress_planner()->get_suggested_tasks_db()->get_post( $task_id ) ) { + continue; // Already injected (any status). + } + if ( true === \progress_planner()->get_suggested_tasks()->was_task_completed( $task_id ) ) { + continue; // User already fixed/dismissed it. + } + $pending[] = $finding; + } + + \usort( + $pending, + static function ( $a, $b ) { + return Audit_Runner::severity_to_priority( $a['severity'] ?? 'medium' ) + <=> Audit_Runner::severity_to_priority( $b['severity'] ?? 'medium' ); + } + ); + + return $pending; + } + + /** + * Build task data from a finding. + * + * @param array $finding The normalized finding. + * + * @return array + */ + protected function build_task_data( array $finding ): array { + $this->external_link_url = $finding['doc_url'] ?? ''; + $this->priority = Audit_Runner::severity_to_priority( $finding['severity'] ?? 'medium' ); + $this->url = ! empty( $finding['fix_url'] ) ? $finding['fix_url'] : ''; + $this->url_target = '_blank'; + + $task_data = $this->get_task_details( $finding ); + $task_data['rule_id'] = $finding['rule_id']; + $task_data['category'] = $finding['category'] ?? 'basics'; + $task_data['severity'] = $finding['severity'] ?? 'medium'; + + return $task_data; + } + + /** + * Inject up to the per-window limit of pending findings as tasks. + * + * @return array Created post IDs. + */ + public function get_tasks_to_inject() { + if ( true === $this->is_task_snoozed() ) { + return []; + } + + $limit = $this->get_max_per_window(); + $released = $this->get_released_this_window(); + if ( $released >= $limit ) { + return []; + } + + $pending = $this->get_pending_findings(); + if ( empty( $pending ) ) { + return []; + } + + $to_release = \array_slice( $pending, 0, $limit - $released ); + + $created = []; + foreach ( $to_release as $finding ) { + $task_data = $this->build_task_data( $finding ); + + if ( \progress_planner()->get_suggested_tasks_db()->get_post( $task_data['task_id'] ) ) { + continue; + } + + $post_id = \progress_planner()->get_suggested_tasks_db()->add( $task_data ); + if ( $post_id ) { + $created[] = $post_id; + } + } + + $this->record_released( \count( $created ) ); + + return $created; + } + + /** + * A spec-audit task is complete when its rule now passes (or is gone). + * + * @param string $task_id The task ID to check. + * + * @return bool + */ + protected function is_specific_task_completed( $task_id ) { + $rule_id = $this->get_rule_id_from_task_id( $task_id ); + if ( '' === $rule_id ) { + return false; + } + + $finding = $this->get_audit_collector()->get_finding( $rule_id ); + + // Rule no longer reported as failing -> the user fixed it. + if ( null === $finding ) { + return true; + } + + return isset( $finding['status'] ) && 'pass' === $finding['status']; + } + + /** + * Extract the rule ID from a task ID. + * + * @param string $task_id The task ID. + * + * @return string + */ + protected function get_rule_id_from_task_id( string $task_id ): string { + $prefix = $this->get_provider_id() . '-'; + return 0 === \strpos( $task_id, $prefix ) ? \substr( $task_id, \strlen( $prefix ) ) : ''; + } + + /** + * AJAX handler for the manual "run audit now" trigger. + * + * @return void + */ + public function ajax_run_audit() { + if ( ! \current_user_can( static::CAPABILITY ) ) { + \wp_send_json_error( [ 'message' => \esc_html__( 'You do not have permission to run the audit.', 'progress-planner' ) ] ); + } + + if ( ! \check_ajax_referer( 'progress_planner', 'nonce', false ) ) { + \wp_send_json_error( [ 'message' => \esc_html__( 'Invalid nonce.', 'progress-planner' ) ] ); + } + + $injected = $this->run_audit_now(); + + \wp_send_json_success( + [ + 'failing_count' => \count( $this->get_audit_collector()->get_failing_findings() ), + 'injected_count' => \count( $injected ), + ] + ); + } + + /** + * Refresh the audit cache and inject any newly surfaced tasks. + * + * Shared by the AJAX handler and the WP-CLI command. + * + * @return array Created post IDs. + */ + public function run_audit_now(): array { + $this->get_audit_collector()->update_cache(); + return $this->get_tasks_to_inject(); + } +} diff --git a/classes/wp-cli/class-audit-command.php b/classes/wp-cli/class-audit-command.php new file mode 100644 index 000000000..eeb0684f0 --- /dev/null +++ b/classes/wp-cli/class-audit-command.php @@ -0,0 +1,97 @@ +] + * : Accepted values: table, csv, json, yaml. Default: table + * + * ## EXAMPLES + * + * # Run the audit now + * $ wp prpl audit run + * + * # Run the audit and show findings as JSON + * $ wp prpl audit run --format=json + * + * @param array $args Command arguments. + * @param array $assoc_args Command associative arguments. + * + * @return void + */ + public function run( $args, $assoc_args ) { + $provider = \progress_planner()->get_suggested_tasks()->get_tasks_manager()->get_task_provider( 'spec-audit' ); + + if ( ! $provider instanceof Spec_Audit ) { + \WP_CLI::error( 'Spec audit provider not found.' ); // @phpstan-ignore-line + return; + } + + $injected = $provider->run_audit_now(); + + $collector = new Spec_Audit_Data_Collector(); + $findings = $collector->get_failing_findings(); + + $format = isset( $assoc_args['format'] ) ? $assoc_args['format'] : 'table'; + $fields = [ 'rule_id', 'category', 'severity', 'status', 'title' ]; + + $rows = []; + foreach ( $findings as $finding ) { + $rows[] = [ + 'rule_id' => $finding['rule_id'] ?? '', + 'category' => $finding['category'] ?? '', + 'severity' => $finding['severity'] ?? '', + 'status' => $finding['status'] ?? '', + 'title' => $finding['title'] ?? '', + ]; + } + + if ( ! empty( $rows ) ) { + WP_CLI\Utils\format_items( $format, $rows, $fields ); // @phpstan-ignore-line + } + + \WP_CLI::success( // @phpstan-ignore-line + \sprintf( + '%d failing rule(s); %d task(s) injected this run.', + \count( $findings ), + \count( $injected ) + ) + ); + } +} From c07307c6986f1414e03c0b1dac6a0069c4c01896 Mon Sep 17 00:00:00 2001 From: Filip Ilic Date: Fri, 29 May 2026 17:48:15 +0200 Subject: [PATCH 03/12] Add tests for the spec audit engine, throttle, and C/B contract Cover the deterministic PHP checks, Audit_Runner schema normalization/dedup, graceful degradation to PHP-only findings when the AI layer is unavailable, the per-window injection throttle and per-rule completion, and a shape-equality test asserting the local and remote sources produce identical finding shapes (the guard that keeps the phase-C and phase-B engines interchangeable). Co-Authored-By: Claude Opus 4.7 (1M context) --- .../phpunit/test-class-spec-audit-checks.php | 91 +++++ .../test-class-spec-audit-provider.php | 239 +++++++++++++ .../phpunit/test-class-spec-audit-runner.php | 335 ++++++++++++++++++ 3 files changed, 665 insertions(+) create mode 100644 tests/phpunit/test-class-spec-audit-checks.php create mode 100644 tests/phpunit/test-class-spec-audit-provider.php create mode 100644 tests/phpunit/test-class-spec-audit-runner.php diff --git a/tests/phpunit/test-class-spec-audit-checks.php b/tests/phpunit/test-class-spec-audit-checks.php new file mode 100644 index 000000000..5a061ab8e --- /dev/null +++ b/tests/phpunit/test-class-spec-audit-checks.php @@ -0,0 +1,91 @@ +TestHi'; + + /** + * Test the doctype check passes on a valid doctype and fails without one. + * + * @return void + */ + public function test_doctype_check() { + $check = new Doctype_Check(); + + $pass = $check->run( 'https://example.com', self::GOOD_HTML, [] ); + $this->assertSame( 'html-doctype', $pass['rule_id'] ); + $this->assertSame( 'pass', $pass['status'] ); + + $fail = $check->run( 'https://example.com', 'No doctype', [] ); + $this->assertSame( 'fail', $fail['status'] ); + } + + /** + * Test that an empty body yields no finding (can't determine). + * + * @return void + */ + public function test_doctype_check_empty_html_returns_no_finding() { + $check = new Doctype_Check(); + $this->assertSame( [], $check->run( 'https://example.com', '', [] ) ); + } + + /** + * Test the lang-attribute check. + * + * @return void + */ + public function test_lang_attribute_check() { + $check = new Lang_Attribute_Check(); + + $pass = $check->run( 'https://example.com', self::GOOD_HTML, [] ); + $this->assertSame( 'pass', $pass['status'] ); + + $fail = $check->run( 'https://example.com', '', [] ); + $this->assertSame( 'fail', $fail['status'] ); + } + + /** + * Test the charset check passes via meta tag and via header. + * + * @return void + */ + public function test_charset_check() { + $check = new Charset_Check(); + + // Passes via meta tag. + $pass = $check->run( 'https://example.com', self::GOOD_HTML, [] ); + $this->assertSame( 'pass', $pass['status'] ); + + // Fails when neither meta tag nor header declares utf-8. + $fail = $check->run( 'https://example.com', '', [ 'headers' => [] ] ); + $this->assertSame( 'fail', $fail['status'] ); + + // Passes via Content-Type header even when no meta tag is present. + $header_pass = $check->run( + 'https://example.com', + '', + [ 'headers' => [ 'content-type' => 'text/html; charset=UTF-8' ] ] + ); + $this->assertSame( 'pass', $header_pass['status'] ); + } +} diff --git a/tests/phpunit/test-class-spec-audit-provider.php b/tests/phpunit/test-class-spec-audit-provider.php new file mode 100644 index 000000000..cd45b0050 --- /dev/null +++ b/tests/phpunit/test-class-spec-audit-provider.php @@ -0,0 +1,239 @@ +provider = new Spec_Audit(); + + // Guarantee isolation: Settings uses a static cache that survives across + // tests in the same process, so explicitly reset the throttle state. + \progress_planner()->get_settings()->set( 'spec_audit', [] ); + } + + /** + * Clean up after the test. + * + * @return void + */ + public function tearDown(): void { + \progress_planner()->get_suggested_tasks_db()->delete_all_recommendations(); + \progress_planner()->get_settings()->set( 'spec_audit', [] ); + // Clear the seeded collector cache. + $data = \progress_planner()->get_settings()->get( 'progress_planner_data_collector', [] ); + unset( $data['spec_audit_findings'] ); + \progress_planner()->get_settings()->set( 'progress_planner_data_collector', $data ); + + \remove_all_filters( 'progress_planner_spec_audit_max_tasks_per_window' ); + \remove_all_filters( 'progress_planner_spec_audit_window' ); + + parent::tearDown(); + } + + /** + * Seed the audit data-collector cache with the given findings. + * + * @param array> $findings The findings to cache. + * + * @return void + */ + private function seed_findings( array $findings ) { + $data = \progress_planner()->get_settings()->get( 'progress_planner_data_collector', [] ); + $data['spec_audit_findings'] = $findings; + \progress_planner()->get_settings()->set( 'progress_planner_data_collector', $data ); + } + + /** + * Build a normalized-shape failing finding. + * + * @param string $rule_id The rule ID. + * @param string $severity The severity. + * + * @return array + */ + private function finding( string $rule_id, string $severity = 'medium' ): array { + return [ + 'rule_id' => $rule_id, + 'category' => 'basics', + 'title' => 'Fix ' . $rule_id, + 'description' => 'Description for ' . $rule_id, + 'severity' => $severity, + 'status' => 'fail', + 'doc_url' => 'https://specification.website/', + 'fix_url' => '', + 'source' => 'php-check', + ]; + } + + /** + * Test that only one task is released per window by default. + * + * @return void + */ + public function test_throttle_releases_one_per_window_by_default() { + $this->seed_findings( + [ + $this->finding( 'rule-a' ), + $this->finding( 'rule-b' ), + $this->finding( 'rule-c' ), + ] + ); + + // First injection: exactly one task. + $created = $this->provider->get_tasks_to_inject(); + $this->assertCount( 1, $created, 'Default throttle releases exactly one task.' ); + + // Second injection in the same window: nothing more. + $created_again = $this->provider->get_tasks_to_inject(); + $this->assertCount( 0, $created_again, 'No further tasks released in the same window.' ); + } + + /** + * Test that the per-window limit filter raises the cap. + * + * @return void + */ + public function test_filter_raises_limit() { + \add_filter( 'progress_planner_spec_audit_max_tasks_per_window', fn() => 3 ); + + $this->seed_findings( + [ + $this->finding( 'rule-a' ), + $this->finding( 'rule-b' ), + $this->finding( 'rule-c' ), + ] + ); + + $created = $this->provider->get_tasks_to_inject(); + $this->assertCount( 3, $created, 'Filter raises the per-window limit to 3.' ); + } + + /** + * Test that higher-severity findings are released first. + * + * @return void + */ + public function test_high_severity_released_first() { + $this->seed_findings( + [ + $this->finding( 'low-rule', 'low' ), + $this->finding( 'high-rule', 'high' ), + $this->finding( 'medium-rule', 'medium' ), + ] + ); + + $created = $this->provider->get_tasks_to_inject(); + $this->assertCount( 1, $created ); + + $task = \get_post( $created[0] ); + $this->assertNotNull( $task ); + $this->assertStringContainsString( 'high-rule', (string) $task->post_name ); + } + + /** + * Test that an already-injected rule is not re-injected. + * + * @return void + */ + public function test_already_injected_rule_not_repeated() { + \add_filter( 'progress_planner_spec_audit_max_tasks_per_window', fn() => 10 ); + + $this->seed_findings( [ $this->finding( 'rule-a' ) ] ); + + $first = $this->provider->get_tasks_to_inject(); + $this->assertCount( 1, $first ); + + // Reset the window counter, but the task already exists in the DB. + \progress_planner()->get_settings()->set( 'spec_audit', [] ); + + $second = $this->provider->get_tasks_to_inject(); + $this->assertCount( 0, $second, 'A rule already represented by a task is not re-injected.' ); + } + + /** + * Test the task ID is stable and derived from the rule ID. + * + * @return void + */ + public function test_task_id_is_stable_per_rule() { + $this->assertSame( 'spec-audit-html-doctype', $this->provider->get_task_id( [ 'rule_id' => 'html-doctype' ] ) ); + $this->assertSame( 'spec-audit', $this->provider->get_task_id( [] ) ); + } + + /** + * Test completion: a rule that now passes marks the task complete. + * + * @return void + */ + public function test_task_completed_when_rule_passes() { + $this->seed_findings( [ $this->finding( 'rule-a' ) ] ); + $this->provider->get_tasks_to_inject(); + + // While the rule still fails, the task is not complete. + $this->assertFalse( $this->provider->is_task_completed( 'spec-audit-rule-a' ) ); + + // Flip the rule to passing. + $passing = $this->finding( 'rule-a' ); + $passing['status'] = 'pass'; + $this->seed_findings( [ $passing ] ); + + $this->assertTrue( $this->provider->is_task_completed( 'spec-audit-rule-a' ) ); + } + + /** + * Test completion: a rule that disappears from the audit marks the task complete. + * + * @return void + */ + public function test_task_completed_when_rule_gone() { + $this->seed_findings( [ $this->finding( 'rule-a' ) ] ); + $this->provider->get_tasks_to_inject(); + + // Rule no longer present in the audit at all. + $this->seed_findings( [] ); + + $this->assertTrue( $this->provider->is_task_completed( 'spec-audit-rule-a' ) ); + } +} diff --git a/tests/phpunit/test-class-spec-audit-runner.php b/tests/phpunit/test-class-spec-audit-runner.php new file mode 100644 index 000000000..5ec68a3b2 --- /dev/null +++ b/tests/phpunit/test-class-spec-audit-runner.php @@ -0,0 +1,335 @@ + 'Html-Lang-Attribute', + 'category' => 'accessibility', + 'title' => 'Declare your language', + 'severity' => 'high', + 'status' => 'fail', + 'doc_url' => 'https://specification.website/', + ], + // Missing rule_id -> dropped. + [ + 'title' => 'No rule id', + 'status' => 'fail', + ], + // Unknown severity/status -> clamped to defaults. + [ + 'rule_id' => 'charset-meta', + 'severity' => 'critical', + 'status' => 'unknown', + ], + // Not an array -> dropped. + 'garbage', + ]; + + $normalized = $runner->normalize( $raw ); + + $this->assertCount( 2, $normalized, 'Findings without a rule_id or non-arrays are dropped.' ); + + // rule_id is slugified. + $this->assertSame( 'html-lang-attribute', $normalized[0]['rule_id'] ); + $this->assertSame( 'high', $normalized[0]['severity'] ); + $this->assertSame( 'fail', $normalized[0]['status'] ); + + // Unknown severity/status clamp to defaults. + $this->assertSame( 'medium', $normalized[1]['severity'] ); + $this->assertSame( 'fail', $normalized[1]['status'] ); + + // Every normalized finding has the full key set. + foreach ( $normalized as $finding ) { + $this->assertSame( + [ 'rule_id', 'category', 'title', 'description', 'severity', 'status', 'doc_url', 'fix_url', 'source' ], + \array_keys( $finding ) + ); + } + } + + /** + * Test that duplicate rule_ids keep only the first occurrence. + * + * @return void + */ + public function test_normalize_dedupes_by_rule_id() { + $runner = new Audit_Runner(); + + $normalized = $runner->normalize( + [ + [ + 'rule_id' => 'robots-txt', + 'title' => 'First', + 'status' => 'fail', + ], + [ + 'rule_id' => 'robots-txt', + 'title' => 'Second', + 'status' => 'pass', + ], + ] + ); + + $this->assertCount( 1, $normalized ); + $this->assertSame( 'First', $normalized[0]['title'] ); + } + + /** + * Test the failing() helper filters out passing findings. + * + * @return void + */ + public function test_failing_filters_passing() { + $findings = [ + [ + 'rule_id' => 'a', + 'status' => 'fail', + ], + [ + 'rule_id' => 'b', + 'status' => 'pass', + ], + [ + 'rule_id' => 'c', + 'status' => 'fail', + ], + ]; + + $failing = Audit_Runner::failing( $findings ); + $this->assertCount( 2, $failing ); + $this->assertSame( 'a', $failing[0]['rule_id'] ); + $this->assertSame( 'c', $failing[1]['rule_id'] ); + } + + /** + * Test that the local source returns PHP-only findings when the AI layer is unavailable. + * + * @return void + */ + public function test_local_source_degrades_to_php_when_ai_unavailable() { + // A checks registry that returns one deterministic finding without HTTP. + $registry = new class() extends Checks_Registry { + /** + * Return a fixed finding. + * + * @param string $url The URL. + * @return array> + */ + public function run( string $url ): array { + return [ + [ + 'rule_id' => 'html-doctype', + 'status' => 'fail', + 'title' => 'Add a doctype', + 'source' => 'php-check', + ], + ]; + } + }; + + // An MCP client that reports itself unavailable. + $mcp = new class() extends Spec_Mcp_Client { + /** + * Always unavailable. + * + * @return bool + */ + public function is_available(): bool { + return false; + } + }; + + $source = new Local_Audit_Source( $registry, $mcp ); + $findings = $source->get_findings( 'https://example.com' ); + + $this->assertCount( 1, $findings ); + $this->assertSame( 'html-doctype', $findings[0]['rule_id'] ); + $this->assertSame( 'php-check', $findings[0]['source'] ); + } + + /** + * Test that deterministic findings pre-empt AI findings for the same rule. + * + * @return void + */ + public function test_local_source_php_preempts_ai_on_overlap() { + $registry = new class() extends Checks_Registry { + /** + * Return a fixed deterministic finding. + * + * @param string $url The URL. + * @return array> + */ + public function run( string $url ): array { + return [ + [ + 'rule_id' => 'html-doctype', + 'status' => 'fail', + 'source' => 'php-check', + ], + ]; + } + }; + + // AI client that "covers" the same rule plus a new one. + $mcp = new class() extends Spec_Mcp_Client { + /** + * Always available. + * + * @return bool + */ + public function is_available(): bool { + return true; + } + + /** + * Return overlapping + new findings. + * + * @param string $url The URL. + * @return array> + */ + public function audit_url( string $url ): array { + return [ + [ + 'rule_id' => 'html-doctype', + 'status' => 'pass', + 'source' => 'mcp-llm', + ], + [ + 'rule_id' => 'meta-description', + 'status' => 'fail', + 'source' => 'mcp-llm', + ], + ]; + } + }; + + $source = new Local_Audit_Source( $registry, $mcp ); + $findings = $source->get_findings( 'https://example.com' ); + + // Index by rule for clarity. + $by_rule = []; + foreach ( $findings as $finding ) { + $by_rule[ $finding['rule_id'] ] = $finding; + } + + $this->assertCount( 2, $findings ); + // PHP check wins for the overlapping rule. + $this->assertSame( 'php-check', $by_rule['html-doctype']['source'] ); + $this->assertSame( 'fail', $by_rule['html-doctype']['status'] ); + // AI-only rule is included. + $this->assertSame( 'mcp-llm', $by_rule['meta-description']['source'] ); + } + + /** + * Test the C/B contract: local and remote sources produce the same finding shape. + * + * This is the regression guard that keeps the two engines interchangeable. + * + * @return void + */ + public function test_local_and_remote_sources_produce_same_shape() { + $runner = new Audit_Runner(); + + // Local finding (deterministic), normalized. + $registry = new class() extends Checks_Registry { + /** + * Return a fixed finding. + * + * @param string $url The URL. + * @return array> + */ + public function run( string $url ): array { + return [ + [ + 'rule_id' => 'html-doctype', + 'status' => 'fail', + 'title' => 'Doctype', + 'source' => 'php-check', + ], + ]; + } + }; + $mcp = new class() extends Spec_Mcp_Client { + /** + * Always unavailable. + * + * @return bool + */ + public function is_available(): bool { + return false; + } + }; + + $local_source = new Local_Audit_Source( $registry, $mcp ); + $local = $runner->normalize( $local_source->get_findings( 'https://example.com' ) ); + + // Remote finding mapped via the SaaS response shape, normalized. + $remote_source = new class() extends Remote_Audit_Source { + /** + * Map a canned SaaS response to findings. + * + * @param string $url The URL. + * @param array $args Args. + * @return array> + */ + public function get_findings( string $url, array $args = [] ): array { + return $this->map_response( + [ + 'findings' => [ + [ + 'rule_id' => 'html-doctype', + 'status' => 'fail', + 'title' => 'Doctype', + ], + ], + ] + ); + } + }; + $remote = $runner->normalize( $remote_source->get_findings( 'https://example.com' ) ); + + // Both produce one finding with identical key sets. + $this->assertCount( 1, $local ); + $this->assertCount( 1, $remote ); + $this->assertSame( \array_keys( $local[0] ), \array_keys( $remote[0] ), 'Local and remote findings must share the same shape.' ); + $this->assertSame( $local[0]['rule_id'], $remote[0]['rule_id'] ); + } + + /** + * Test that both sources implement the Audit_Source contract. + * + * @return void + */ + public function test_sources_implement_contract() { + $this->assertInstanceOf( Audit_Source::class, new Local_Audit_Source() ); + $this->assertInstanceOf( Audit_Source::class, new Remote_Audit_Source() ); + } +} From ba74aa518b16f7a1b8735d97d7b94b463adf6c58 Mon Sep 17 00:00:00 2001 From: Filip Ilic Date: Fri, 29 May 2026 18:04:21 +0200 Subject: [PATCH 04/12] Fix Spec_Mcp_Client against real WP 7.0 AI client API Verified on a live WP 7.0 install: the AI builder (WP_AI_Client_Prompt_Builder) delegates SDK methods through __call, so the earlier method_exists() guards on using_max_tokens/as_json_response/is_supported_for_text_generation silently skipped those calls. Call them directly and gate availability on wp_supports_ai() plus is_supported_for_text_generation(). Confirmed end-to-end via `wp prpl audit run`: detects failing rules, injects one task, and the throttle blocks the second run; the AI path correctly reports unavailable when no provider is configured and degrades to PHP-only checks. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../audit/class-spec-mcp-client.php | 66 +++++++++---------- 1 file changed, 30 insertions(+), 36 deletions(-) diff --git a/classes/suggested-tasks/audit/class-spec-mcp-client.php b/classes/suggested-tasks/audit/class-spec-mcp-client.php index 7ed011b7f..e34ad4335 100644 --- a/classes/suggested-tasks/audit/class-spec-mcp-client.php +++ b/classes/suggested-tasks/audit/class-spec-mcp-client.php @@ -30,19 +30,20 @@ * This uses the *verified* WP 7.0 AI API surface and avoids a hand-rolled MCP * JSON-RPC client. * - * EVERYTHING that touches the WP 7.0 API is guarded by function/method-existence - * checks, so this class is a safe no-op on WordPress < 7.0 or when no AI - * connector is configured — {@see is_available()} returns false and the local - * source falls back to deterministic PHP checks only. + * The WP 7.0 entry point is guarded by `function_exists( 'wp_ai_client_prompt' )` + * and `wp_supports_ai()`, so this class is a safe no-op on WordPress < 7.0 or + * when no AI provider is configured — {@see is_available()} returns false and the + * local source falls back to deterministic PHP checks only. * - * UNVERIFIED (could not be exercised without a running WP 7.0 + configured - * connector). Items marked `TODO(wp7-verify)` need confirmation against a real - * 7.0 install: - * - exact `wp_ai_client_prompt()` builder method chain & return type, - * - `is_supported_for_text_generation()` as the availability gate, - * - `as_json_response( $schema )` behavior and whether `generate_text()` - * returns a JSON string vs a structured object, - * - the specification checklist endpoint shape. + * Verified against WordPress 7.0 core (wp-includes/ai-client.php + + * wp-includes/ai-client/class-wp-ai-client-prompt-builder.php): the builder is + * WP_AI_Client_Prompt_Builder, which delegates SDK methods via __call (so + * method_exists() must NOT be used to guard them). `is_supported_for_text_generation()` + * returns bool and is gated by wp_supports_ai() + the wp_ai_client_prevent_prompt + * filter; `using_max_tokens( int )`, `as_json_response( ?array )` and + * `generate_text()` (returns string or WP_Error) all resolve through the wrapper. + * The one thing still not exercised is the specification.website checklist + * endpoint shape (we tolerate an empty checklist). * --------------------------------------------------------------------------- */ class Spec_Mcp_Client { @@ -69,24 +70,22 @@ public function is_available(): bool { return false; } - // WP 7.0 core AI client entry point. Absent on older WP / when the - // feature is not loaded. - if ( ! \function_exists( 'wp_ai_client_prompt' ) ) { + // WP 7.0 core AI client entry point + environment support check. + if ( ! \function_exists( 'wp_ai_client_prompt' ) || ! \function_exists( 'wp_supports_ai' ) ) { + return false; + } + if ( ! \wp_supports_ai() ) { // @phpstan-ignore-line function.notFound return false; } - // TODO(wp7-verify): confirm builder exposes is_supported_for_text_generation(). + // is_supported_for_text_generation() is delegated through the builder's + // __call, so it must be called directly (method_exists() would miss it). + // It returns false when no provider/model is configured for text generation. try { - $prompt = \wp_ai_client_prompt( 'ping' ); // @phpstan-ignore-line function.notFound - if ( \is_object( $prompt ) && \method_exists( $prompt, 'is_supported_for_text_generation' ) ) { - return (bool) $prompt->is_supported_for_text_generation(); - } + return (bool) \wp_ai_client_prompt( 'ping' )->is_supported_for_text_generation(); // @phpstan-ignore-line function.notFound } catch ( \Throwable $e ) { return false; } - - // Entry point exists but we couldn't confirm a configured provider. - return false; } /** @@ -188,20 +187,15 @@ protected function run_prompt( string $url, string $checklist, string $html ) { $html_excerpt ); - // TODO(wp7-verify): confirm method names using_max_tokens / as_json_response / - // generate_text and that generate_text() returns a JSON string for a JSON response. - $builder = \wp_ai_client_prompt( $instruction ); // @phpstan-ignore-line function.notFound - - if ( \method_exists( $builder, 'using_max_tokens' ) ) { - $builder = $builder->using_max_tokens( 2000 ); - } - if ( \method_exists( $builder, 'as_json_response' ) ) { - $builder = $builder->as_json_response( $schema ); - } - - $result = $builder->generate_text(); + // These methods are delegated via WP_AI_Client_Prompt_Builder::__call, so + // they are called directly (method_exists() would not see them). The + // builder is fluent; generate_text() returns a JSON string (per the schema) + // or a WP_Error. + $result = \wp_ai_client_prompt( $instruction ) // @phpstan-ignore-line function.notFound + ->using_max_tokens( 2000 ) + ->as_json_response( $schema ) + ->generate_text(); - // generate_text() returns WP_Error on failure in the wrapper. if ( \is_wp_error( $result ) ) { return null; } From 9caef26fbe2c932ddd45653f777a84538c1fa7b1 Mon Sep 17 00:00:00 2001 From: Filip Ilic Date: Sat, 30 May 2026 07:27:04 +0200 Subject: [PATCH 05/12] Keep audit's outbound HTTP off the admin_init / FPM path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previously the inject-time recheck I added in ba74aa5 called wp_remote_get from get_tasks_to_inject(), which runs on every admin_init. Each admin pageview then triggered 1+ loopback HTTP requests back into the same PHP-FPM pool serving the user — pinning workers and starving the whole pool until nginx returned 502s for unrelated Valet sites. Three structural changes prevent that class of bug entirely: 1. The Spec_Audit data collector's update_cache() is a no-op unless an explicit caller has opted in via Spec_Audit_Data_Collector::with_explicit_refresh(). The Data_Collector_Manager's admin_init sweep therefore cannot trigger the audit. Sanctioned callers: CLI command, cron hook, AJAX shutdown. 2. collect() never falls back to calculate_data() on cache miss — a missing cache returns []. is_specific_task_completed() now distinguishes "no cache" from "rule passed", so an object-cache flush can't mass-complete every audit task. 3. The AJAX "run now" handler defers the audit to shutdown and calls fastcgi_finish_request() so the user's worker is released to the pool before the outbound HTTP starts. A daily wp-cron hook also drives refreshes from a non-web context. Reverted is_still_failing()'s live recheck — it's the wrong place for HTTP. Kept the deferred throttle counter (pure-PHP, no HTTP). Updated tests: dropped the live-recheck test; added tests for the cache-empty completion guard and the no-explicit-refresh no-op. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../data-collector/class-spec-audit.php | 77 +++++++-- .../providers/class-spec-audit.php | 149 ++++++++++++++++-- .../test-class-spec-audit-provider.php | 99 +++++++++++- 3 files changed, 299 insertions(+), 26 deletions(-) diff --git a/classes/suggested-tasks/data-collector/class-spec-audit.php b/classes/suggested-tasks/data-collector/class-spec-audit.php index 1b39c799a..5ea0ad36c 100644 --- a/classes/suggested-tasks/data-collector/class-spec-audit.php +++ b/classes/suggested-tasks/data-collector/class-spec-audit.php @@ -12,10 +12,15 @@ /** * Caches the result of running the website audit against the specification. * - * The audit (which may make HTTP requests and, when configured, LLM calls) is - * expensive, so it runs only when the cache is refreshed — on the daily - * data-collector cron and on the manual "run audit now" trigger — never on - * every admin request via {@see collect()}. + * The audit makes outbound HTTP requests (and, when configured, LLM calls), so + * a stray cache refresh on `admin_init` can starve the FPM pool via loopback. + * Two guards prevent that: + * 1. {@see collect()} NEVER falls through to {@see calculate_data()} — a missing + * cache returns an empty array, not a live audit. + * 2. {@see update_cache()} only runs when an explicit caller (the provider's + * CLI / cron / AJAX-shutdown path) has set {@see $explicit_refresh} to true. + * The Data_Collector_Manager's sweep on `admin_init` therefore can't trigger + * the audit. */ class Spec_Audit extends Base_Data_Collector { @@ -26,6 +31,17 @@ class Spec_Audit extends Base_Data_Collector { */ protected const DATA_KEY = 'spec_audit_findings'; + /** + * Whether the current call to update_cache() came from a sanctioned caller + * (CLI, AJAX shutdown handler, or the dedicated cron hook). + * + * Static so the data collector loop in Data_Collector_Manager — which + * instantiates its own copy — cannot bypass it. + * + * @var bool + */ + protected static $explicit_refresh = false; + /** * The audit runner. * @@ -33,6 +49,27 @@ class Spec_Audit extends Base_Data_Collector { */ protected $runner = null; + /** + * Allow the next call to update_cache() to actually perform the audit. + * + * Sanctioned callers (CLI command, cron hook handler, AJAX shutdown handler) + * wrap their refresh call in this flag so the audit doesn't run from the + * data-collector-manager's `admin_init` sweep. + * + * @param callable $callback Invoked while the flag is set. The flag is + * always cleared on return. + * + * @return mixed The callback's return value. + */ + public static function with_explicit_refresh( callable $callback ) { + self::$explicit_refresh = true; + try { + return $callback(); + } finally { + self::$explicit_refresh = false; + } + } + /** * Get the audit runner. * @@ -54,14 +91,39 @@ protected function calculate_data() { return $this->get_runner()->run(); } + /** + * Read the cached audit findings; NEVER trigger a live audit. + * + * The base implementation falls back to calculate_data() on cache miss, + * which would make outbound HTTP from any request that happens to hit a + * cold cache (e.g. after object-cache flush). Override to short-circuit. + * + * @return array> + */ + public function collect() { + $cached = $this->get_cached_data( $this->get_data_key() ); + return \is_array( $cached ) ? $cached : []; + } + + /** + * Refresh the cache only when an explicit caller has opted in. + * + * @return void + */ + public function update_cache() { + if ( ! self::$explicit_refresh ) { + return; + } + parent::update_cache(); + } + /** * Get only the failing findings from the cached audit. * * @return array> */ public function get_failing_findings(): array { - $findings = $this->collect(); - return \is_array( $findings ) ? Audit_Runner::failing( $findings ) : []; + return Audit_Runner::failing( $this->collect() ); } /** @@ -73,9 +135,6 @@ public function get_failing_findings(): array { */ public function get_finding( string $rule_id ): ?array { $findings = $this->collect(); - if ( ! \is_array( $findings ) ) { - return null; - } foreach ( $findings as $finding ) { if ( isset( $finding['rule_id'] ) && $finding['rule_id'] === $rule_id ) { return $finding; diff --git a/classes/suggested-tasks/providers/class-spec-audit.php b/classes/suggested-tasks/providers/class-spec-audit.php index 7e5e3aba5..5b74bc4bc 100644 --- a/classes/suggested-tasks/providers/class-spec-audit.php +++ b/classes/suggested-tasks/providers/class-spec-audit.php @@ -55,6 +55,23 @@ class Spec_Audit extends Tasks { */ protected $external_link_url = ''; + /** + * Post IDs injected this request, pending verification on shutdown that + * they still exist. Used to keep the throttle counter honest when an + * injected task is removed in the same request (e.g. by an immediate + * evaluate_tasks() pass that fires after a cache refresh). + * + * @var array + */ + protected $pending_release_ids = []; + + /** + * The cron hook that refreshes the audit cache + injects tasks. + * + * @var string + */ + public const CRON_HOOK = 'progress_planner_spec_audit_run'; + /** * Initialize the task provider. * @@ -62,6 +79,21 @@ class Spec_Audit extends Tasks { */ public function init() { \add_action( 'wp_ajax_progress_planner_run_spec_audit', [ $this, 'ajax_run_audit' ] ); + \add_action( 'shutdown', [ $this, 'finalize_throttle_on_shutdown' ] ); + + // The audit's outbound HTTP must NEVER block a user-facing request. + // A dedicated cron hook is the only context (besides explicit CLI/AJAX + // triggers) where the audit may run; system-cron-driven WP cron keeps it + // off the web FPM pool entirely. + \add_action( + self::CRON_HOOK, + function () { + $this->run_audit_now(); + } + ); + if ( ! \wp_next_scheduled( self::CRON_HOOK ) ) { + \wp_schedule_event( \time() + DAY_IN_SECONDS, 'daily', self::CRON_HOOK ); + } } /** @@ -247,16 +279,26 @@ protected function build_task_data( array $finding ): array { /** * Inject up to the per-window limit of pending findings as tasks. * - * @return array Created post IDs. + * This method does NO outbound HTTP — it reads findings from the cache + * populated by run_audit_now() (which only runs from CLI/AJAX/cron, never + * from admin_init). The throttle counter is incremented at shutdown after + * verifying each created task survived (handles the case where an + * evaluate_tasks() pass in the same request removed it). + * + * @return array Created post IDs (provisional — final count is + * taken on shutdown). */ public function get_tasks_to_inject() { if ( true === $this->is_task_snoozed() ) { return []; } - $limit = $this->get_max_per_window(); - $released = $this->get_released_this_window(); - if ( $released >= $limit ) { + $limit = $this->get_max_per_window(); + // Count both finalized (counter) and in-flight (pending shutdown + // verification) releases against the cap, so two calls within the same + // request can't both blow past the limit. + $used = $this->get_released_this_window() + \count( $this->pending_release_ids ); + if ( $used >= $limit ) { return []; } @@ -265,7 +307,7 @@ public function get_tasks_to_inject() { return []; } - $to_release = \array_slice( $pending, 0, $limit - $released ); + $to_release = \array_slice( $pending, 0, $limit - $used ); $created = []; foreach ( $to_release as $finding ) { @@ -277,15 +319,45 @@ public function get_tasks_to_inject() { $post_id = \progress_planner()->get_suggested_tasks_db()->add( $task_data ); if ( $post_id ) { - $created[] = $post_id; + $created[] = $post_id; + $this->pending_release_ids[] = $post_id; } } - $this->record_released( \count( $created ) ); - return $created; } + /** + * Finalize the per-window throttle counter on shutdown. + * + * Counts only the tasks injected this request that actually survived (i.e. + * were not deleted/trashed by a same-request evaluate_tasks() pass), so the + * counter reflects real user-visible throttling rather than transient creates. + * + * @return void + */ + public function finalize_throttle_on_shutdown() { + if ( empty( $this->pending_release_ids ) ) { + return; + } + + $surviving = 0; + foreach ( $this->pending_release_ids as $post_id ) { + $post = \get_post( $post_id ); + // A 'publish' task that's still around is the only kind that should + // consume a window slot; trashed/auto-completed tasks shouldn't. + if ( $post && 'publish' === $post->post_status ) { + ++$surviving; + } + } + + $this->pending_release_ids = []; + + if ( $surviving > 0 ) { + $this->record_released( $surviving ); + } + } + /** * A spec-audit task is complete when its rule now passes (or is gone). * @@ -299,9 +371,19 @@ protected function is_specific_task_completed( $task_id ) { return false; } - $finding = $this->get_audit_collector()->get_finding( $rule_id ); + $collector = $this->get_audit_collector(); + $findings = $collector->collect(); - // Rule no longer reported as failing -> the user fixed it. + // No cached audit at all -> we don't know; leave the task alone rather + // than declare it complete (which would happen if we treated the + // missing finding as "rule no longer reported as failing"). + if ( empty( $findings ) ) { + return false; + } + + $finding = $collector->get_finding( $rule_id ); + + // Rule no longer reported in a populated audit -> the user fixed it. if ( null === $finding ) { return true; } @@ -324,6 +406,12 @@ protected function get_rule_id_from_task_id( string $task_id ): string { /** * AJAX handler for the manual "run audit now" trigger. * + * The audit's outbound HTTP can take seconds, and running it inline would + * keep the user's FPM worker busy the whole time. Instead the response is + * flushed immediately ("queued") and the audit is run after the worker + * detaches from the client (via fastcgi_finish_request() when available), + * on the `shutdown` action. + * * @return void */ public function ajax_run_audit() { @@ -335,25 +423,58 @@ public function ajax_run_audit() { \wp_send_json_error( [ 'message' => \esc_html__( 'Invalid nonce.', 'progress-planner' ) ] ); } - $injected = $this->run_audit_now(); + // Defer the actual audit to shutdown so the user's worker can be released + // back to the FPM pool before the outbound HTTP starts. + \add_action( 'shutdown', [ $this, 'shutdown_run_audit' ], 5 ); \wp_send_json_success( [ + 'queued' => true, 'failing_count' => \count( $this->get_audit_collector()->get_failing_findings() ), - 'injected_count' => \count( $injected ), + 'injected_count' => 0, ] ); } + /** + * Run the audit at end-of-request, after the response is flushed. + * + * Uses fastcgi_finish_request() where available so the FPM worker is freed + * back to the pool immediately and the outbound HTTP doesn't hold up the + * user's request. Also raises set_time_limit() since the audit may take a + * few seconds. + * + * @return void + */ + public function shutdown_run_audit() { + if ( \function_exists( 'fastcgi_finish_request' ) ) { + \fastcgi_finish_request(); + } + // We are now detached from the client; allow more wall-clock time. + if ( \function_exists( 'set_time_limit' ) ) { + @\set_time_limit( 60 ); // phpcs:ignore WordPress.PHP.NoSilencedErrors.Discouraged + } + + $this->run_audit_now(); + } + /** * Refresh the audit cache and inject any newly surfaced tasks. * - * Shared by the AJAX handler and the WP-CLI command. + * Outbound HTTP runs here. Allowed callers: the WP-CLI command (separate + * php process), the cron hook ({@see CRON_HOOK}), and the AJAX shutdown + * handler ({@see shutdown_run_audit()}). Never called from `admin_init`, + * which would amplify into FPM pool starvation. * * @return array Created post IDs. */ public function run_audit_now(): array { - $this->get_audit_collector()->update_cache(); + $collector = $this->get_audit_collector(); + Spec_Audit_Data_Collector::with_explicit_refresh( + static function () use ( $collector ) { + $collector->update_cache(); + } + ); return $this->get_tasks_to_inject(); } } diff --git a/tests/phpunit/test-class-spec-audit-provider.php b/tests/phpunit/test-class-spec-audit-provider.php index cd45b0050..75a606524 100644 --- a/tests/phpunit/test-class-spec-audit-provider.php +++ b/tests/phpunit/test-class-spec-audit-provider.php @@ -8,6 +8,7 @@ namespace Progress_Planner\Tests; use Progress_Planner\Suggested_Tasks\Providers\Spec_Audit; +use Progress_Planner\Suggested_Tasks\Data_Collector\Spec_Audit as Spec_Audit_Data_Collector; /** * Class Spec_Audit_Provider_Test. @@ -223,7 +224,12 @@ public function test_task_completed_when_rule_passes() { } /** - * Test completion: a rule that disappears from the audit marks the task complete. + * Test completion: a rule that disappears from a NON-EMPTY audit marks the + * task complete. + * + * Note: an *empty* cache is NOT treated as "rule passed" — see + * test_task_not_completed_when_cache_is_empty — so this test seeds a + * non-empty cache that simply doesn't include rule-a. * * @return void */ @@ -231,9 +237,96 @@ public function test_task_completed_when_rule_gone() { $this->seed_findings( [ $this->finding( 'rule-a' ) ] ); $this->provider->get_tasks_to_inject(); - // Rule no longer present in the audit at all. - $this->seed_findings( [] ); + // Audit ran again and the rule is no longer reported (but other rules are). + $this->seed_findings( [ $this->finding( 'rule-b' ) ] ); $this->assertTrue( $this->provider->is_task_completed( 'spec-audit-rule-a' ) ); } + + /** + * Test that an empty/missing audit cache does NOT mark tasks as completed. + * + * If we treated a missing cache as "rule no longer reported", a single + * object-cache flush could mass-complete every audit task. Tasks must wait + * for a real audit before changing state. + * + * @return void + */ + public function test_task_not_completed_when_cache_is_empty() { + $this->seed_findings( [ $this->finding( 'rule-a' ) ] ); + $this->provider->get_tasks_to_inject(); + + // Simulate the cache being cleared (object cache flush, restart, etc.). + $this->seed_findings( [] ); + + $this->assertFalse( + $this->provider->is_task_completed( 'spec-audit-rule-a' ), + 'An empty cache must not be treated as evidence the rule passes.' + ); + } + + /** + * Test that the data collector's update_cache() is a no-op unless an + * explicit caller (CLI/cron/AJAX-shutdown) has opted in. This is the + * guard that keeps the audit's outbound HTTP off the admin_init path. + * + * @return void + */ + public function test_update_cache_is_noop_without_explicit_refresh() { + $collector = new Spec_Audit_Data_Collector(); + + // Pre-seed a sentinel so we can detect whether update_cache ran. + $this->seed_findings( [ $this->finding( 'sentinel-rule' ) ] ); + + // Call update_cache() outside the with_explicit_refresh() guard — + // emulates the Data_Collector_Manager admin_init sweep. + $collector->update_cache(); + + $findings = $collector->collect(); + $this->assertSame( + 'sentinel-rule', + $findings[0]['rule_id'] ?? null, + 'update_cache() without explicit refresh must NOT overwrite the cache (which would imply it tried to run a fresh audit and its outbound HTTP).' + ); + } + + /** + * Test that the throttle counter is not consumed when an injected task + * is removed in the same request (the shutdown verification step skips it). + * + * @return void + */ + public function test_throttle_window_not_consumed_for_canceled_injection() { + $this->seed_findings( [ $this->finding( 'rule-a' ) ] ); + + $created = $this->provider->get_tasks_to_inject(); + $this->assertCount( 1, $created ); + + // Simulate the same-request evaluate_tasks() removing the task because + // the cache contradicted itself after injection. + \wp_delete_post( $created[0], true ); + + // shutdown action runs at request end; call it directly. + $this->provider->finalize_throttle_on_shutdown(); + + $injections = \progress_planner()->get_settings()->get( [ 'spec_audit', 'injections' ], [] ); + $this->assertSame( [], $injections, 'Canceled injection should not consume the per-window quota.' ); + } + + /** + * Test that a surviving injection DOES consume the window quota at shutdown. + * + * @return void + */ + public function test_throttle_window_consumed_for_surviving_injection() { + $this->seed_findings( [ $this->finding( 'rule-a' ) ] ); + + $created = $this->provider->get_tasks_to_inject(); + $this->assertCount( 1, $created ); + + $this->provider->finalize_throttle_on_shutdown(); + + $injections = \progress_planner()->get_settings()->get( [ 'spec_audit', 'injections' ], [] ); + $this->assertSame( 1, $injections[ \gmdate( 'Ymd' ) ] ?? 0 ); + } } From 03b286ed912023caf8fe7e8fccf15e70a5f4b7c3 Mon Sep 17 00:00:00 2001 From: Filip Ilic Date: Sat, 30 May 2026 07:50:36 +0200 Subject: [PATCH 06/12] Replace robots.txt check with meta-description MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The robots.txt check turned out to be the wrong rule for a suggested task: WordPress generates a virtual robots.txt automatically, and when it fails (e.g. status 404 with a real body, as seen on a Yoast + Woo install), the fix is at the nginx/Valet routing level — not something a WordPress user can address from wp-admin. Suggested tasks should be actionable inside WordPress. Replace with a meta-description check that operates on the homepage HTML we already fetch (zero extra outbound HTTP). The user-facing fix is real and in-scope: install/configure an SEO plugin or set a description in the theme. Yoast/RankMath both supply it out of the box, so the recommendation naturally guides users to a known good path. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../audit/checks/class-checks-registry.php | 2 +- .../checks/class-meta-description-check.php | 64 +++++++++++++++++++ .../audit/checks/class-robots-txt-check.php | 63 ------------------ .../phpunit/test-class-spec-audit-checks.php | 44 +++++++++++++ 4 files changed, 109 insertions(+), 64 deletions(-) create mode 100644 classes/suggested-tasks/audit/checks/class-meta-description-check.php delete mode 100644 classes/suggested-tasks/audit/checks/class-robots-txt-check.php diff --git a/classes/suggested-tasks/audit/checks/class-checks-registry.php b/classes/suggested-tasks/audit/checks/class-checks-registry.php index 2bc28ee73..ea5555253 100644 --- a/classes/suggested-tasks/audit/checks/class-checks-registry.php +++ b/classes/suggested-tasks/audit/checks/class-checks-registry.php @@ -22,7 +22,7 @@ public function get_checks(): array { new Doctype_Check(), new Lang_Attribute_Check(), new Charset_Check(), - new Robots_Txt_Check(), + new Meta_Description_Check(), new Sitemap_Check(), ]; diff --git a/classes/suggested-tasks/audit/checks/class-meta-description-check.php b/classes/suggested-tasks/audit/checks/class-meta-description-check.php new file mode 100644 index 000000000..005fcc725 --- /dev/null +++ b/classes/suggested-tasks/audit/checks/class-meta-description-check.php @@ -0,0 +1,64 @@ + + */ + public function run( string $url, string $html, array $context ): array { + // Can't determine from an empty body — don't emit a false failure. + if ( '' === \trim( $html ) ) { + return []; + } + + $pass = false; + + // Look for a with a non-empty + // content attribute. Case-insensitive, tolerates attribute reordering. + if ( \preg_match( '/]*\bname\s*=\s*["\']?description["\']?[^>]*\bcontent\s*=\s*"([^"]*)"/i', $html, $m ) + || \preg_match( '/]*\bname\s*=\s*["\']?description["\']?[^>]*\bcontent\s*=\s*\'([^\']*)\'/i', $html, $m ) + || \preg_match( '/]*\bcontent\s*=\s*"([^"]*)"[^>]*\bname\s*=\s*["\']?description["\']?/i', $html, $m ) + || \preg_match( '/]*\bcontent\s*=\s*\'([^\']*)\'[^>]*\bname\s*=\s*["\']?description["\']?/i', $html, $m ) + ) { + $pass = '' !== \trim( $m[1] ); + } + + return [ + 'rule_id' => $this->get_rule_id(), + 'category' => 'seo', + 'title' => \__( 'Add a meta description to your homepage', 'progress-planner' ), + 'description' => \__( 'A meta description is the snippet search engines show under your page title in search results. Without one, search engines auto-generate a snippet which is often less compelling and less accurate. Install an SEO plugin like Yoast SEO or set a description in your theme to control how your site is summarized.', 'progress-planner' ), + 'severity' => 'medium', + 'status' => $pass ? 'pass' : 'fail', + 'doc_url' => 'https://specification.website/', + 'source' => 'php-check', + ]; + } +} diff --git a/classes/suggested-tasks/audit/checks/class-robots-txt-check.php b/classes/suggested-tasks/audit/checks/class-robots-txt-check.php deleted file mode 100644 index ed981c212..000000000 --- a/classes/suggested-tasks/audit/checks/class-robots-txt-check.php +++ /dev/null @@ -1,63 +0,0 @@ - - */ - public function run( string $url, string $html, array $context ): array { - $response = \wp_remote_get( - \home_url( '/robots.txt' ), - [ - 'timeout' => 10, - 'user-agent' => 'Progress Planner Spec Audit', - ] - ); - - // Network failure: we can't determine the state, so don't emit a finding. - if ( \is_wp_error( $response ) ) { - return []; - } - - $code = (int) \wp_remote_retrieve_response_code( $response ); - $body = \trim( (string) \wp_remote_retrieve_body( $response ) ); - - $pass = 200 === $code && '' !== $body; - - return [ - 'rule_id' => $this->get_rule_id(), - 'category' => 'seo', - 'title' => \__( 'Publish a robots.txt file', 'progress-planner' ), - 'description' => \__( 'A robots.txt file tells search engines and crawlers which parts of your site they may access. Serving a non-empty robots.txt is a baseline expectation for a well-built site.', 'progress-planner' ), - 'severity' => 'low', - 'status' => $pass ? 'pass' : 'fail', - 'doc_url' => 'https://specification.website/', - 'source' => 'php-check', - ]; - } -} diff --git a/tests/phpunit/test-class-spec-audit-checks.php b/tests/phpunit/test-class-spec-audit-checks.php index 5a061ab8e..cb6d3b05d 100644 --- a/tests/phpunit/test-class-spec-audit-checks.php +++ b/tests/phpunit/test-class-spec-audit-checks.php @@ -10,6 +10,7 @@ use Progress_Planner\Suggested_Tasks\Audit\Checks\Doctype_Check; use Progress_Planner\Suggested_Tasks\Audit\Checks\Lang_Attribute_Check; use Progress_Planner\Suggested_Tasks\Audit\Checks\Charset_Check; +use Progress_Planner\Suggested_Tasks\Audit\Checks\Meta_Description_Check; /** * Class Spec_Audit_Checks_Test. @@ -88,4 +89,47 @@ public function test_charset_check() { ); $this->assertSame( 'pass', $header_pass['status'] ); } + + /** + * Test the meta-description check. + * + * @return void + */ + public function test_meta_description_check() { + $check = new Meta_Description_Check(); + + $with_desc = 'T'; + $pass = $check->run( 'https://example.com', $with_desc, [] ); + $this->assertSame( 'meta-description', $pass['rule_id'] ); + $this->assertSame( 'pass', $pass['status'] ); + + // Missing meta description -> fail. + $fail = $check->run( 'https://example.com', self::GOOD_HTML, [] ); + $this->assertSame( 'fail', $fail['status'] ); + + // Present but empty content attribute -> fail. + $empty_html = ''; + $empty = $check->run( 'https://example.com', $empty_html, [] ); + $this->assertSame( 'fail', $empty['status'] ); + + // Single quotes around attribute values are accepted. + $single_quote_html = ""; + $sq_pass = $check->run( 'https://example.com', $single_quote_html, [] ); + $this->assertSame( 'pass', $sq_pass['status'] ); + + // Attribute order reversed (content before name) is accepted. + $reversed = ''; + $rev_pass = $check->run( 'https://example.com', $reversed, [] ); + $this->assertSame( 'pass', $rev_pass['status'] ); + } + + /** + * Empty body yields no finding for the meta-description check. + * + * @return void + */ + public function test_meta_description_check_empty_html_returns_no_finding() { + $check = new Meta_Description_Check(); + $this->assertSame( [], $check->run( 'https://example.com', '', [] ) ); + } } From bbfaf446fefafe70d67ab77f13dbf0870ed24c2a Mon Sep 17 00:00:00 2001 From: Filip Ilic Date: Sat, 30 May 2026 08:04:18 +0200 Subject: [PATCH 07/12] Self-heal stale tasks when a PHP-check rule is retired MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Tasks are keyed by rule_id. When we rename, retire, or filter out a check (as just happened with robots-txt), the old task lives on in users' DBs forever — its completion logic only knew how to react to the rule still being audited. Two changes: 1. On injection, persist the finding's source as prpl_source meta so the provider can later tell deterministic (php-check) tasks from probabilistic (mcp-llm / saas) ones. Legacy tasks without the meta are backfilled to 'php-check' (the original starter set was all PHP checks), so existing installs self-heal on upgrade too. 2. In is_specific_task_completed(), short-circuit to "complete" for any php-check task whose rule_id is no longer present in the live Checks_Registry. Does NOT apply to LLM/SaaS tasks — their rule space is open-ended and a rule missing from one audit just means the model didn't mention it this run, not that it was retired. Pure in-memory work; no outbound HTTP. The existing cache-empty guard in is_specific_task_completed() still prevents mass-completion from an object-cache flush. Tests use synthetic rule IDs (rule-a etc.) that the registry never knew about, so the test setUp now registers no-op stub checks for them; the new retired-rule tests remove all audit_checks filters to simulate removal. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../providers/class-spec-audit.php | 52 +++++++ .../test-class-spec-audit-provider.php | 139 ++++++++++++++++++ 2 files changed, 191 insertions(+) diff --git a/classes/suggested-tasks/providers/class-spec-audit.php b/classes/suggested-tasks/providers/class-spec-audit.php index 5b74bc4bc..0df085c04 100644 --- a/classes/suggested-tasks/providers/class-spec-audit.php +++ b/classes/suggested-tasks/providers/class-spec-audit.php @@ -9,6 +9,7 @@ use Progress_Planner\Suggested_Tasks\Data_Collector\Spec_Audit as Spec_Audit_Data_Collector; use Progress_Planner\Suggested_Tasks\Audit\Audit_Runner; +use Progress_Planner\Suggested_Tasks\Audit\Checks\Checks_Registry; /** * Turns failing audit findings into suggested tasks. @@ -272,6 +273,11 @@ protected function build_task_data( array $finding ): array { $task_data['rule_id'] = $finding['rule_id']; $task_data['category'] = $finding['category'] ?? 'basics'; $task_data['severity'] = $finding['severity'] ?? 'medium'; + // Persist the finding's source so completion logic can distinguish + // deterministic (php-check) tasks from probabilistic (mcp-llm / saas) + // ones — e.g. so a retired PHP check auto-completes its orphan tasks + // without doing the same to LLM tasks the model just didn't mention. + $task_data['source'] = $finding['source'] ?? ''; return $task_data; } @@ -371,6 +377,18 @@ protected function is_specific_task_completed( $task_id ) { return false; } + // Self-healing for retired/renamed PHP-check rules: if this is a + // php-check task whose rule_id is no longer in the registry, the rule + // was removed (by us upgrading the check set, or by a 3rd-party filter) + // and the task is orphaned. Mark it complete so it stops nagging. + // + // Limited to php-check because the LLM/SaaS rule space is open-ended: + // a rule disappearing from one audit run doesn't mean it was retired, + // just that the model didn't mention it this time. + if ( 'php-check' === $this->get_task_source( $task_id ) && ! $this->rule_is_registered( $rule_id ) ) { + return true; + } + $collector = $this->get_audit_collector(); $findings = $collector->collect(); @@ -391,6 +409,40 @@ protected function is_specific_task_completed( $task_id ) { return isset( $finding['status'] ) && 'pass' === $finding['status']; } + /** + * Read the `prpl_source` meta for a task. Backfills missing values to + * 'php-check' so legacy tasks (injected before this meta existed) still + * benefit from the retired-rule auto-completion path. + * + * @param string $task_id The task slug. + * + * @return string + */ + protected function get_task_source( string $task_id ): string { + $post = \progress_planner()->get_suggested_tasks_db()->get_post( $task_id ); + if ( ! $post ) { + return 'php-check'; + } + $source = \get_post_meta( $post->ID, 'prpl_source', true ); + return '' === $source ? 'php-check' : (string) $source; + } + + /** + * Whether the given rule_id is present in the current PHP-check registry. + * + * @param string $rule_id The rule ID. + * + * @return bool + */ + protected function rule_is_registered( string $rule_id ): bool { + foreach ( ( new Checks_Registry() )->get_checks() as $check ) { + if ( $check->get_rule_id() === $rule_id ) { + return true; + } + } + return false; + } + /** * Extract the rule ID from a task ID. * diff --git a/tests/phpunit/test-class-spec-audit-provider.php b/tests/phpunit/test-class-spec-audit-provider.php index 75a606524..1ed22588b 100644 --- a/tests/phpunit/test-class-spec-audit-provider.php +++ b/tests/phpunit/test-class-spec-audit-provider.php @@ -9,6 +9,7 @@ use Progress_Planner\Suggested_Tasks\Providers\Spec_Audit; use Progress_Planner\Suggested_Tasks\Data_Collector\Spec_Audit as Spec_Audit_Data_Collector; +use Progress_Planner\Suggested_Tasks\Audit\Checks\Check; /** * Class Spec_Audit_Provider_Test. @@ -52,6 +53,54 @@ public function setUp(): void { // Guarantee isolation: Settings uses a static cache that survives across // tests in the same process, so explicitly reset the throttle state. \progress_planner()->get_settings()->set( 'spec_audit', [] ); + + // Most tests use synthetic rule IDs (rule-a, rule-b, etc.) that aren't + // in the real registry. The provider's retired-rule self-healing would + // trip on those and mark every task complete. Register stub checks for + // the synthetic IDs so the retired-rule path doesn't fire. + \add_filter( + 'progress_planner_audit_checks', + function ( $checks ) { + foreach ( [ 'rule-a', 'rule-b', 'rule-c', 'low-rule', 'medium-rule', 'high-rule' ] as $rid ) { + $checks[] = new class($rid) implements Check { + /** + * The rule ID. + * + * @var string + */ + private $rid; + /** + * Constructor. + * + * @param string $rid Rule ID. + */ + public function __construct( string $rid ) { + $this->rid = $rid; + } + /** + * Rule ID. + * + * @return string + */ + public function get_rule_id(): string { + return $this->rid; + } + /** + * No-op run. + * + * @param string $url URL. + * @param string $html HTML. + * @param array $context Context. + * @return array + */ + public function run( string $url, string $html, array $context ): array { + return []; + } + }; + } + return $checks; + } + ); } /** @@ -69,6 +118,7 @@ public function tearDown(): void { \remove_all_filters( 'progress_planner_spec_audit_max_tasks_per_window' ); \remove_all_filters( 'progress_planner_spec_audit_window' ); + \remove_all_filters( 'progress_planner_audit_checks' ); parent::tearDown(); } @@ -329,4 +379,93 @@ public function test_throttle_window_consumed_for_surviving_injection() { $injections = \progress_planner()->get_settings()->get( [ 'spec_audit', 'injections' ], [] ); $this->assertSame( 1, $injections[ \gmdate( 'Ymd' ) ] ?? 0 ); } + + /** + * Test that a task for a PHP-check rule that no longer exists in the + * registry is auto-completed. This is the self-healing path for stale + * tasks left over from a previous plugin version where a rule was + * renamed or retired. + * + * @return void + */ + public function test_task_auto_completed_when_php_check_rule_retired() { + // Register one controllable check while we inject the task, so the + // rule_id exists at injection time. + $stub_check = new class() implements Check { + /** + * Rule ID. + * + * @return string + */ + public function get_rule_id(): string { + return 'rule-a'; + } + /** + * Reports failing so the test can inject. + * + * @param string $url URL. + * @param string $html HTML. + * @param array $context Context. + * @return array + */ + public function run( string $url, string $html, array $context ): array { + return [ + 'rule_id' => 'rule-a', + 'status' => 'fail', + 'title' => 'Stub', + 'source' => 'php-check', + ]; + } + }; + \add_filter( + 'progress_planner_audit_checks', + static fn() => [ $stub_check ], + 10, + 0 + ); + + $this->seed_findings( [ $this->finding( 'rule-a' ) ] ); + $created = $this->provider->get_tasks_to_inject(); + $this->assertCount( 1, $created, 'Sanity: task is injected while rule exists.' ); + + // Now retire the rule by removing it from the registry. The task + // remains in the DB but its rule_id is no longer known. + \remove_all_filters( 'progress_planner_audit_checks' ); + + // Even with the cache still populated and the rule still listed as + // failing in it, the task should be reported complete because the + // rule itself is gone from the registry. + $this->assertTrue( + $this->provider->is_task_completed( 'spec-audit-rule-a' ), + 'A PHP-check task whose rule was retired must auto-complete.' + ); + } + + /** + * Backfill safety: a task with no stored source meta is treated as + * php-check and auto-completed when its rule_id is missing from the + * current registry. + * + * @return void + */ + public function test_legacy_task_without_source_meta_is_treated_as_php_check() { + // Inject directly into the DB without a 'source' key — emulates a + // task created by a previous plugin version that didn't store source. + $post_id = \progress_planner()->get_suggested_tasks_db()->add( + [ + 'task_id' => 'spec-audit-legacy-rule', + 'provider_id' => 'spec-audit', + 'post_title' => 'Legacy task', + 'rule_id' => 'legacy-rule', + ] + ); + $this->assertGreaterThan( 0, $post_id ); + + // 'legacy-rule' is not registered in any Check — the legacy task + // should still auto-complete. + $this->assertTrue( + $this->provider->is_task_completed( 'spec-audit-legacy-rule' ), + 'A legacy task with no source meta must be treated as php-check and auto-complete when its rule is missing.' + ); + } } From c45f7fd6ce38c447865816222bf4eff08b7aa611 Mon Sep 17 00:00:00 2001 From: Filip Ilic Date: Sat, 30 May 2026 09:53:35 +0200 Subject: [PATCH 08/12] Fix the AI audit: schema, checklist source, and silent errors MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three bugs blocked the AI path on a live WP 7.0 + Anthropic connector, found via foreground reflection probing rather than running the audit live: 1. JSON schema rejected. Anthropic's structured-output API requires `additionalProperties:false` on every `type:object`. Without it the call 400s and the WP_Error is swallowed, leaving no diagnostic. 2. Checklist URL was wrong. /mcp/ returns the HTML page describing the MCP server, not spec content — the model was fed 22KB of irrelevant HTML. Switched to /llms.txt, the canonical LLM-oriented Markdown index (~37KB) the spec publishes for exactly this purpose. 3. Errors were silent. run_prompt() returned null on any failure, and audit_url() caught Throwables and json_decode failures the same way. Added log_error() that writes to error_log under WP_DEBUG so future failures show up in debug.log without changing the public contract. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../audit/class-spec-mcp-client.php | 57 ++++++++++++++++--- 1 file changed, 48 insertions(+), 9 deletions(-) diff --git a/classes/suggested-tasks/audit/class-spec-mcp-client.php b/classes/suggested-tasks/audit/class-spec-mcp-client.php index e34ad4335..ad664ae5e 100644 --- a/classes/suggested-tasks/audit/class-spec-mcp-client.php +++ b/classes/suggested-tasks/audit/class-spec-mcp-client.php @@ -49,11 +49,16 @@ class Spec_Mcp_Client { /** - * The specification checklist endpoint (published Markdown/JSON source). + * Endpoint for the specification's LLM-oriented index. + * + * Per https://specification.website/, /llms.txt is the canonical structured + * Markdown index designed for LLM consumption (~37KB). The /mcp/ URL we used + * earlier returns the HTML page describing the MCP server, NOT spec content, + * so the model was being fed irrelevant HTML. * * @var string */ - protected const SPEC_CHECKLIST_URL = 'https://specification.website/mcp/'; + protected const SPEC_CHECKLIST_URL = 'https://specification.website/llms.txt'; /** * Whether the AI client is available and a provider is configured. @@ -84,6 +89,7 @@ public function is_available(): bool { try { return (bool) \wp_ai_client_prompt( 'ping' )->is_supported_for_text_generation(); // @phpstan-ignore-line function.notFound } catch ( \Throwable $e ) { + $this->log_error( 'is_supported_for_text_generation threw', \get_class( $e ), $e->getMessage() ); return false; } } @@ -114,11 +120,13 @@ public function audit_url( string $url ): array { try { $json = $this->run_prompt( $url, $checklist, $html ); } catch ( \Throwable $e ) { + $this->log_error( 'run_prompt threw', \get_class( $e ), $e->getMessage() ); return []; } $decoded = \is_string( $json ) ? \json_decode( $json, true ) : $json; if ( ! \is_array( $decoded ) ) { + $this->log_error( 'json_decode failed or non-array result', '', \is_string( $json ) ? \substr( $json, 0, 200 ) : \gettype( $json ) ); return []; } @@ -148,14 +156,21 @@ public function audit_url( string $url ): array { * @return string|array|null */ protected function run_prompt( string $url, string $checklist, string $html ) { + // NOTE: Anthropic's structured-output API requires `additionalProperties:false` + // on every `type:object` in the schema. Without it the call 400s with: + // output_format.schema: For 'object' type, 'additionalProperties' must + // be explicitly set to false. + // Verified against the live Connectors API on 2026-05-30. $schema = [ - 'type' => 'object', - 'properties' => [ + 'type' => 'object', + 'additionalProperties' => false, + 'properties' => [ 'findings' => [ 'type' => 'array', 'items' => [ - 'type' => 'object', - 'properties' => [ + 'type' => 'object', + 'additionalProperties' => false, + 'properties' => [ 'rule_id' => [ 'type' => 'string' ], 'category' => [ 'type' => 'string' ], 'title' => [ 'type' => 'string' ], @@ -170,18 +185,18 @@ protected function run_prompt( string $url, string $checklist, string $html ) { ], 'doc_url' => [ 'type' => 'string' ], ], - 'required' => [ 'rule_id', 'status', 'title' ], + 'required' => [ 'rule_id', 'status', 'title' ], ], ], ], - 'required' => [ 'findings' ], + 'required' => [ 'findings' ], ]; // Keep the HTML payload bounded so we don't blow the context / token budget. $html_excerpt = \substr( $html, 0, 20000 ); $instruction = \sprintf( - "You are auditing a website against the Website Specification.\n\nURL: %s\n\nSpecification checklist:\n%s\n\nFor each BASIC checklist rule you can evaluate from the HTML below, return a finding with a stable rule_id slug, the category, a short human title and description of the fix, a severity, and status 'pass' or 'fail'. Only include rules you can actually evaluate from the provided HTML. Do not invent rules.\n\nHTML:\n%s", + "You are auditing a website against the Website Specification.\n\nURL: %s\n\nSpecification (Markdown index):\n%s\n\nFor each BASIC checklist rule you can evaluate from the HTML below, return a finding with a stable rule_id slug (kebab-case, derived from the spec rule's identifier), the category, a short human title and description of the fix, a severity, and status 'pass' or 'fail'. Only include rules you can actually evaluate from the provided HTML. Do not invent rules. Keep rule_id values consistent across runs.\n\nHTML:\n%s", $url, $checklist, $html_excerpt @@ -197,12 +212,36 @@ protected function run_prompt( string $url, string $checklist, string $html ) { ->generate_text(); if ( \is_wp_error( $result ) ) { + $this->log_error( 'wp_ai_client_prompt returned WP_Error', $result->get_error_code(), $result->get_error_message() ); return null; } return $result; } + /** + * Log a diagnostic line about the AI call when WP_DEBUG is on. + * + * The AI path is best-effort and we deliberately return [] on failure so the + * audit still produces deterministic findings. But silent failure makes the + * feature impossible to debug — this surfaces the cause in debug.log without + * changing the public contract. + * + * @param string $context Short label for where in the flow we failed. + * @param string $code Optional WP_Error code (or exception class name). + * @param string $message Optional human-readable error message. + * + * @return void + */ + protected function log_error( string $context, string $code = '', string $message = '' ) { + if ( ! ( \defined( 'WP_DEBUG' ) && \WP_DEBUG ) ) { + return; + } + \error_log( // phpcs:ignore WordPress.PHP.DevelopmentFunctions.error_log_error_log + \sprintf( '[progress-planner spec-audit ai] %s | code=%s | %s', $context, $code, $message ) + ); + } + /** * Fetch the specification checklist text. * From deb03b10667fd59de048097892f846857e449d10 Mon Sep 17 00:00:00 2001 From: Filip Ilic Date: Sat, 30 May 2026 10:00:35 +0200 Subject: [PATCH 09/12] Add handoff doc for the spec-audit feature MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Captures the architecture, decisions, what's verified vs not, open questions, and how to test. The top priority follow-up is using spec.website's canonical slugs as rule_ids so PHP and LLM findings dedupe naturally and doc_urls point at real spec pages — written up in detail at the bottom. Co-Authored-By: Claude Opus 4.7 (1M context) --- HANDOFF-spec-audit.md | 432 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 432 insertions(+) create mode 100644 HANDOFF-spec-audit.md diff --git a/HANDOFF-spec-audit.md b/HANDOFF-spec-audit.md new file mode 100644 index 000000000..f6551f2fa --- /dev/null +++ b/HANDOFF-spec-audit.md @@ -0,0 +1,432 @@ +# Spec Audit Feature — Handoff + +**Branch:** `filip/spec-audit` (off `pp-hosts`, originally off `develop`) +**Status:** Functionally complete on local dev. Not pushed. No PR. +**Last verified:** 2026-05-30 on a live WP 7.0 + Yoast + Woo + Anthropic Connector site at `planner.test`. + +--- + +## TL;DR + +Adds a **specification.website** audit that runs against the site's public URL +and turns each failing spec rule into a Progress Planner suggested task, +throttled to 1 per day. Two engines, sharing all plugin-side task-mapping code: + +- **Deterministic PHP checks** (always on): 5 starter rules — doctype, lang + attribute, charset, meta description, sitemap. +- **WP 7.0 AI client** (optional, requires a configured connector): asks + Claude/GPT/Gemini to evaluate the homepage against + `https://specification.website/llms.txt`; PHP wins on overlapping rule_ids. + +Designed so the audit engine can later move to the progressplanner.com SaaS +(phase B) without touching the task-creation code. + +--- + +## Commits on this branch + +``` +c45f7fd6 Fix the AI audit: schema, checklist source, and silent errors +bbfaf446 Self-heal stale tasks when a PHP-check rule is retired +03b286ed Replace robots.txt check with meta-description +9caef26f Keep audit's outbound HTTP off the admin_init / FPM path +c07307c6 Add tests for the spec audit engine, throttle, and C/B contract +bbfaf446 + earlier: see git log +``` + +The early commits introduced the core; the later commits fixed real issues +discovered during live testing (FPM pool starvation, retired-rule orphans, +wrong AI schema, wrong checklist URL). + +--- + +## Architecture + +``` +classes/suggested-tasks/audit/ +├── class-audit-source.php # interface: get_findings/get_id/is_available +├── class-audit-runner.php # picks source, normalizes findings +├── class-local-audit-source.php # PHP checks + optional AI (PHP wins on overlap) +├── class-remote-audit-source.php # phase-B SaaS stub (mirrors class-lessons.php) +├── class-spec-mcp-client.php # WP 7.0 AI client (despite the name — see below) +└── checks/ + ├── class-check.php # interface + ├── class-checks-registry.php # holds checks; fetches homepage once + ├── class-doctype-check.php + ├── class-lang-attribute-check.php + ├── class-charset-check.php + ├── class-meta-description-check.php + └── class-sitemap-check.php + +classes/suggested-tasks/data-collector/ +└── class-spec-audit.php # caches audit findings (settings sub-key) + +classes/suggested-tasks/providers/ +└── class-spec-audit.php # findings → tasks (the queue + throttle) + +classes/wp-cli/ +└── class-audit-command.php # `wp prpl audit run` +``` + +### The shared contract (C ↔ B boundary) + +`Audit_Source` is the load-bearing interface. Both `Local_Audit_Source` (PHP + +AI) and `Remote_Audit_Source` (future SaaS) implement it, returning the same +**finding** shape. A regression test asserts shape equality across both — +that's the guard that keeps the engines interchangeable. If you change the +schema, that test fires first. + +Finding schema (enforced by `Audit_Runner::normalize()`): +```php +[ + 'rule_id' => string, // REQUIRED, slugified. Drives task_id. + 'category' => string, // basics|seo|accessibility|security|performance + 'title' => string, + 'description' => string, + 'severity' => 'high'|'medium'|'low', + 'status' => 'fail'|'pass', + 'doc_url' => string, + 'fix_url' => string, + 'source' => 'php-check'|'mcp-llm'|'saas', // PERSISTED as prpl_source meta +] +``` + +--- + +## Decisions made (and why) + +### 1. **No outbound HTTP from `admin_init`. Ever.** + +The original design had the data collector refresh on the daily `admin_init` +sweep. During testing this caused **PHP-FPM pool starvation and 502 errors on +unrelated Valet sites** when 5 loopback HTTP requests per admin pageview pinned +all workers. + +The fix (commit `9caef26f`): +- `Spec_Audit` data collector's `update_cache()` is a no-op unless an explicit + caller has opted in via `Spec_Audit_Data_Collector::with_explicit_refresh()`. + The `Data_Collector_Manager`'s admin_init sweep therefore can't trigger the + audit. +- `collect()` never falls through to `calculate_data()` on cache miss — a + missing cache returns `[]`. This prevents a cold cache (after object-cache + flush) from triggering a live audit during any random request. +- AJAX "Run audit now" handler defers to `shutdown` and calls + `fastcgi_finish_request()` so the user's worker is released to the pool + before outbound HTTP starts. +- A daily WP-cron hook (`progress_planner_spec_audit_run`) drives refreshes + from a non-web context. + +**Allowed callers of the audit:** WP-CLI command, cron hook, AJAX shutdown +handler. **Not** admin pageloads. + +### 2. **Throttle is deferred, counts only surviving tasks.** + +Originally the throttle counter incremented eagerly inside +`get_tasks_to_inject()`. But if `evaluate_tasks()` ran later in the same +request and auto-completed the just-injected task (because the cache showed it +passing), the slot would be burned for a task that no longer existed. + +Fix: track injected post IDs as `$pending_release_ids`. On `shutdown`, count +only tasks that still exist in `publish` status. Pure-PHP, no HTTP. + +In-flight in-request counts also count toward the cap so two calls in the same +request (e.g. cron + admin_init) can't both blow past the limit. + +### 3. **Self-healing for retired/renamed PHP rules.** + +A task's `rule_id` is a string. If we rename or retire a rule (which already +happened once — we swapped `robots-txt` for `meta-description`), the +old task lives in users' DBs forever and `is_specific_task_completed()` would +never find evidence to complete it. + +Fix (commit `bbfaf446`): +- On injection, persist `prpl_source` as task meta. +- In `is_specific_task_completed()`, if the task's source is `php-check` and + its `rule_id` is NOT in the live `Checks_Registry`, return `true` — + auto-complete the orphan. +- Legacy tasks without `prpl_source` meta backfill to `php-check` so they + self-heal on the next admin pageload after upgrade. +- LLM/SaaS tasks are deliberately exempt: their rule space is open-ended, so + a rule "missing" from one audit just means the model didn't mention it + this time, not that it was retired. + +### 4. **AI engine: phase C now, phase B later.** + +Phase C (now): plugin is the AI client, using the site owner's WP 7.0 +Connector + API key. Phase B (future): the agent moves to +progressplanner.com SaaS, license-key gated, plugin just receives findings. + +`Local_Audit_Source` (C) and `Remote_Audit_Source` (B) share the +`Audit_Source` contract. The findings → tasks mapping is identical for both. + +### 5. **AI client is NOT an MCP client.** + +The class is called `Spec_Mcp_Client` because the original design pointed +WP's AI client at the specification.website MCP server. Source-level research +of WP 7.0 core (`wp-includes/ai-client/class-wp-ai-client-prompt-builder.php`) +confirmed: **core's AI client cannot act as an MCP client.** It only supports +locally-registered function declarations / Abilities + web_search. There is no +MCP tool type. (WordPress's MCP story is the *MCP Adapter*, which makes WP an +MCP *server* — opposite direction.) + +So `Spec_Mcp_Client` fetches `https://specification.website/llms.txt` (the +spec's canonical LLM-oriented Markdown index, 37KB) over plain HTTP and feeds +it + the homepage HTML to `wp_ai_client_prompt()` with a JSON-schema-constrained +response. The class name is now misleading but the behavior is correct. Rename +opportunity for future cleanup; not blocking. + +### 6. **Verified WP 7.0 AI API quirks.** + +- The builder uses `__call` for SDK delegation. `method_exists($builder, + 'using_max_tokens')` returns FALSE. **Never** guard SDK methods with + `method_exists()`. +- Availability gate is `wp_supports_ai()` + `$builder->is_supported_for_text_generation()`. + There is no `is_configured()`. +- Anthropic's structured output requires `additionalProperties: false` on every + `type: object` in the schema. Without it the call 400s and the WP_Error + is swallowed. +- `generate_text()` returns string (JSON when `as_json_response($schema)` is + set) or `WP_Error`. Synchronous/blocking. + +All four lessons cost real debugging time. They are now in code comments at +the relevant call sites. + +### 7. **Default throttle: 1 task/day, filterable.** + +`progress_planner_spec_audit_max_tasks_per_window` (default 1) and +`progress_planner_spec_audit_window` (default `'day'`, also accepts `'week'`). +Stored counters in `progress_planner_settings['spec_audit']['injections']`. +Window key = `gmdate('Ymd')` or `gmdate('oW')`. + +The intent: avoid flooding users with tasks. A new site might genuinely have +10 failing rules; surfacing them one per day keeps the UI calm. + +### 8. **Deterministic checks win on rule_id overlap with AI.** + +`Local_Audit_Source::get_findings()` indexes PHP findings by `rule_id` first, +then drops any AI finding with the same `rule_id`. Rationale: PHP checks are +exact and free; the LLM could hallucinate the same rule wrong. PHP wins. + +**Known gap (see open question 1 below):** the LLM uses different slugs +(`doctype` vs our `html-doctype`, etc.), so this dedupe doesn't actually +trigger in practice. Both PHP and LLM versions of the same rule end up in +the cache. Currently harmless (they all pass and don't generate tasks), but +not ideal. + +--- + +## What was verified live + +On `http://planner.test/` (WP 7.0 + Yoast + Woo + Anthropic Sonnet 4.5): + +- ✅ `wp prpl audit run` completes in ~18s including one Anthropic API call. +- ✅ PHP checks: 5 ran, 4 passed, 1 failed (`meta-description`, before Yoast + was given a description). +- ✅ AI added 12 findings: 7 pass, 5 fail (`canonical-url`, `open-graph`, + `theme-color`, `color-scheme`, `https-tls`). +- ✅ Severity-based prioritization picked the highest-severity rule + (`https-tls`, severity high) for the throttled slot. +- ✅ Throttle holds — second run returned `0 task(s) injected`. +- ✅ Auto-completion verified: set a meta description in Yoast → re-run + audit → rule flips to `pass` → next admin pageload marks the task + `pending` (celebration-pending). +- ✅ Zero outbound HTTP from admin pageloads. Verified by checking nothing + hangs after the audit cache exists. +- ✅ Self-healing verified: a stale `spec-audit-robots-txt` task from before + the rule was swapped was correctly auto-completed on the next evaluation. + +--- + +## What was NOT verified + +- **Production WP 7.0 release.** All testing was on a WP 7.0 nightly running + in Valet locally. The Connectors UI behaved oddly on nightly (failed to + auto-install provider plugins) but worked on stable 7.0. Worth re-testing + on shipped 7.0. +- **The SaaS phase-B path.** `Remote_Audit_Source` is a stub — the + progressplanner.com endpoint doesn't exist. Shape-equality test guards + against the contract drifting. +- **Concurrency.** Multiple `wp prpl audit run` invocations at once. The DB + layer has a distributed lock; should be fine but not tested. +- **Multisite.** Not considered. + +--- + +## Open questions for the next agent + +### 1. (priority) Use spec.website's canonical slugs as `rule_id`s. + +Currently the PHP checks invent slugs (`html-doctype`, `meta-description`) +and the LLM invents its own (`doctype`, `meta-charset`). They overlap +conceptually but use different identifiers. + +The spec itself publishes canonical slugs in its URL structure: +``` +https://specification.website/spec/foundations/meta-viewport/ + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + category / rule slug +``` + +If we used those: +- PHP and LLM findings naturally dedupe. +- `doc_url` becomes + `https://specification.website/spec/{category}/{rule_id}/` — a real link to + the actual spec page, not the homepage. +- Cross-source consistency: future MCP/Abilities/etc. integrations align + automatically. + +**Implementation sketch:** +- Rename PHP check rule_ids to match canonical slugs: + `html-doctype` → `doctype`, `html-lang-attribute` → `lang`, + `charset-meta` → `meta-charset`, `meta-description` → `meta-description` + (already matches!), `xml-sitemap` → `xml-sitemap` (need to verify exact spec + slug). +- Update `doc_url` in each check to point to `/spec/{category}/{rule_id}/`. +- Update the AI prompt to instruct the model to use spec.website canonical + slugs (and provide examples). +- Handle migration: existing tasks with the OLD rule_ids will be considered + retired by the self-heal logic → auto-completed cleanly on next eval. + **This is the right behavior** — they were the right tasks under the old + schema and the user will get fresh ones under the new schema. No data + needs hand-migration. + +Estimated effort: 1-2 hours of code + tests + a careful verification run. +Worth doing before shipping. + +### 2. (medium) Local-dev false positive on `https-tls`. + +The AI correctly flags non-HTTPS sites, but on a dev environment like +`http://planner.test/` this is noise. Two options: +- Filter findings where the audited URL has `.test`/`.local`/`localhost` and + the rule_id is `https-tls`. +- Just document it and let users dismiss in dev. + +I'd lean (b) — sanitizing for dev environments risks hiding real prod issues. + +### 3. (medium) Rename `Spec_Mcp_Client` → `Spec_Ai_Client`. + +The "MCP" in the name was aspirational and incorrect (see decision 5). +Simple rename, no behavior change. Wait until after canonical-slug work so +both refactors land in one commit. + +### 4. (low) Add a `--dry-run` flag to the CLI command. + +Run the audit, print findings, inject nothing. Useful for evaluating AI +output quality without cluttering the suggested-tasks UI between runs. +~10 lines. + +### 5. (low) UI for "Run audit now." + +The AJAX endpoint (`progress_planner_run_spec_audit`) exists and is +fastcgi-detached. There's no admin UI button wired to it yet. + +### 6. (low) Audit should respect a deactivation cleanup. + +When the plugin is deactivated, the daily cron hook +`progress_planner_spec_audit_run` should be unscheduled. Not currently wired. + +--- + +## File map (quick) + +| File | What | +|---|---| +| `classes/suggested-tasks/audit/class-audit-source.php` | Interface | +| `classes/suggested-tasks/audit/class-audit-runner.php` | Source picker + `normalize()` | +| `classes/suggested-tasks/audit/class-local-audit-source.php` | PHP + AI, PHP wins on overlap | +| `classes/suggested-tasks/audit/class-remote-audit-source.php` | Phase-B SaaS stub | +| `classes/suggested-tasks/audit/class-spec-mcp-client.php` | WP 7.0 AI client (rename TODO) | +| `classes/suggested-tasks/audit/checks/*` | Check interface + 5 PHP rules + registry | +| `classes/suggested-tasks/data-collector/class-spec-audit.php` | Cache (gated by `with_explicit_refresh`) | +| `classes/suggested-tasks/providers/class-spec-audit.php` | Findings → tasks + throttle | +| `classes/wp-cli/class-audit-command.php` | `wp prpl audit run` | +| `tests/phpunit/test-class-spec-audit-checks.php` | PHP check unit tests | +| `tests/phpunit/test-class-spec-audit-runner.php` | Schema, C↔B contract | +| `tests/phpunit/test-class-spec-audit-provider.php` | Throttle, completion, self-healing | + +--- + +## How to run / test + +### Static checks (always before commit; pre-commit hook does this) +```bash +composer phpstan +composer lint +vendor/bin/phpcs -s --warning-severity=6 +``` + +### PHPUnit (needs WP test lib) +```bash +# Install once +export PATH="/Users/Shared/DBngin/mysql/8.0.33/bin:$PATH" +WPCONFIG=/Users/filip/Valet/planner/htdocs/wp-config.php +DB_PASS=$(grep "DB_PASSWORD" "$WPCONFIG" | head -1 | sed -E "s/.*DB_PASSWORD', '([^']*)'.*/\1/") +bash bin/install-wp-tests.sh wordpress_test root "$DB_PASS" 127.0.0.1 latest true +# Patch the password (script writes 'root' literally) +TESTCONF=/var/folders/xc/kd88yddj42l40ch1lj1g10d40000gn/T/wordpress-tests-lib/wp-tests-config.php +php -r '$f=$argv[1];$p=$argv[2];$c=file_get_contents($f);$c=preg_replace("/define\(\s*.DB_PASSWORD.,\s*.[^\047]*.\s*\)/","define( \047DB_PASSWORD\047, \047".addslashes($p)."\047 )",$c);file_put_contents($f,$c);' "$TESTCONF" "$DB_PASS" + +# Run +export WP_TESTS_DIR=/var/folders/xc/kd88yddj42l40ch1lj1g10d40000gn/T/wordpress-tests-lib +composer test +# or +vendor/bin/phpunit --filter Spec_Audit +``` + +### Live test on planner.test +```bash +cd /Users/filip/Valet/planner/htdocs +wp prpl audit run +wp prpl audit run --format=json | jq '.[]' +``` + +### See all findings (including passes) with sources +```bash +wp eval ' +foreach (progress_planner()->get_settings()->get("progress_planner_data_collector", [])["spec_audit_findings"] ?? [] as $f) { + printf("%-30s %-10s %-6s %s\n", $f["rule_id"]??"?", $f["source"]??"?", $f["status"]??"?", substr($f["title"]??"", 0, 60)); +}' +``` + +### Clean state (delete all spec-audit tasks + reset throttle + clear cache) +```bash +wp eval ' +$db = progress_planner()->get_suggested_tasks_db(); +foreach ((array) $db->get_tasks_by(["post_status"=>["publish","trash","draft","future","pending"],"provider_id"=>"spec-audit"]) as $t) { + if (is_object($t) && isset($t->ID)) wp_delete_post($t->ID, true); +} +$s = progress_planner()->get_settings(); +$dc = $s->get("progress_planner_data_collector", []); +unset($dc["spec_audit_findings"]); +$s->set("progress_planner_data_collector", $dc); +$s->set("spec_audit", []); +progress_planner()->get_utils__cache()->delete("spec_audit_checklist"); +$ts = wp_next_scheduled("progress_planner_spec_audit_run"); +if ($ts) wp_unschedule_event($ts, "progress_planner_spec_audit_run"); +echo "clean\n"; +' +``` + +--- + +## Memory file (Claude Code) + +There's also a memory file at +`~/.claude/projects/-Users-filip-Valet-planner-htdocs-wp-content-plugins-progress-planner/memory/project_wp7-ai-spec-audit.md` +that captures the WP 7.0 AI API findings — the things that aren't obvious from +reading code (e.g. `method_exists` doesn't see `__call` methods, +`additionalProperties: false` is required by Anthropic). Read it if the next +agent is also Claude Code; otherwise the same content is in code comments +around the relevant call sites. + +--- + +## Final state of `filip/spec-audit` as of handoff + +- 424/424 PHPUnit tests pass. +- PHPStan clean. +- phpcs clean at hook severity (`--warning-severity=6`). +- 9 commits ahead of base. +- Working tree clean. +- No push, no PR. +- Live planner.test site is at zero spec-audit state (cleanup snippet just ran). From 6b977d0e7e3eb2d24090e0a835eae3142f3480a6 Mon Sep 17 00:00:00 2001 From: Filip Ilic Date: Sat, 30 May 2026 10:35:38 +0200 Subject: [PATCH 10/12] Use spec.website canonical slugs as rule_ids MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PHP checks were inventing slugs (html-doctype, html-lang-attribute, charset-meta, xml-sitemap) and the LLM was inventing its own (doctype, html-lang, meta-charset, etc.). They covered the same rules with different identifiers, so the "PHP wins on overlap" dedupe never fired and doc_urls were generic /specification.website/ pointers. Adopt the spec's own URL slugs as canonical rule_ids: html-doctype → doctype (/spec/foundations/doctype/) html-lang-attribute → html-lang (/spec/foundations/html-lang/) charset-meta → meta-charset (/spec/foundations/meta-charset/) meta-description → meta-description (/spec/foundations/meta-description/) xml-sitemap → xml-sitemaps (/spec/seo/xml-sitemaps/) Also align categories to the spec ('foundations' for the HTML-baseline checks, 'seo' for sitemaps). Each finding's doc_url now points at the actual spec page so the "Why is this important?" link is genuinely useful. Update the AI prompt to instruct the model to use canonical slugs derived from the spec URL pattern, with concrete examples. PHP and LLM findings will now dedupe naturally where they cover the same rule. Existing tasks with the old rule_ids will be auto-completed by the self-heal logic (commit bbfaf446) on the next admin pageload — no manual migration needed. Drop the canonical-slug TODO from the handoff doc and renumber remaining open questions. Co-Authored-By: Claude Opus 4.7 (1M context) --- HANDOFF-spec-audit.md | 57 ++++--------------- .../audit/checks/class-charset-check.php | 6 +- .../audit/checks/class-doctype-check.php | 6 +- .../checks/class-lang-attribute-check.php | 6 +- .../checks/class-meta-description-check.php | 4 +- .../audit/checks/class-sitemap-check.php | 4 +- .../audit/class-spec-mcp-client.php | 17 +++++- .../phpunit/test-class-spec-audit-checks.php | 2 +- .../test-class-spec-audit-provider.php | 2 +- 9 files changed, 43 insertions(+), 61 deletions(-) diff --git a/HANDOFF-spec-audit.md b/HANDOFF-spec-audit.md index f6551f2fa..a4796da9d 100644 --- a/HANDOFF-spec-audit.md +++ b/HANDOFF-spec-audit.md @@ -254,46 +254,14 @@ On `http://planner.test/` (WP 7.0 + Yoast + Woo + Anthropic Sonnet 4.5): ## Open questions for the next agent -### 1. (priority) Use spec.website's canonical slugs as `rule_id`s. +**Note on rule_id slugs:** PHP checks and the AI prompt now both use +**spec.website's canonical slugs** (e.g. `doctype`, `html-lang`, +`meta-charset`, `meta-description`, `xml-sitemaps`). Each finding's +`doc_url` points at the actual spec page +(`https://specification.website/spec/{category}/{slug}/`). PHP and LLM +findings dedupe naturally when they cover the same rule. -Currently the PHP checks invent slugs (`html-doctype`, `meta-description`) -and the LLM invents its own (`doctype`, `meta-charset`). They overlap -conceptually but use different identifiers. - -The spec itself publishes canonical slugs in its URL structure: -``` -https://specification.website/spec/foundations/meta-viewport/ - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - category / rule slug -``` - -If we used those: -- PHP and LLM findings naturally dedupe. -- `doc_url` becomes - `https://specification.website/spec/{category}/{rule_id}/` — a real link to - the actual spec page, not the homepage. -- Cross-source consistency: future MCP/Abilities/etc. integrations align - automatically. - -**Implementation sketch:** -- Rename PHP check rule_ids to match canonical slugs: - `html-doctype` → `doctype`, `html-lang-attribute` → `lang`, - `charset-meta` → `meta-charset`, `meta-description` → `meta-description` - (already matches!), `xml-sitemap` → `xml-sitemap` (need to verify exact spec - slug). -- Update `doc_url` in each check to point to `/spec/{category}/{rule_id}/`. -- Update the AI prompt to instruct the model to use spec.website canonical - slugs (and provide examples). -- Handle migration: existing tasks with the OLD rule_ids will be considered - retired by the self-heal logic → auto-completed cleanly on next eval. - **This is the right behavior** — they were the right tasks under the old - schema and the user will get fresh ones under the new schema. No data - needs hand-migration. - -Estimated effort: 1-2 hours of code + tests + a careful verification run. -Worth doing before shipping. - -### 2. (medium) Local-dev false positive on `https-tls`. +### 1. (medium) Local-dev false positive on `https-tls`. The AI correctly flags non-HTTPS sites, but on a dev environment like `http://planner.test/` this is noise. Two options: @@ -303,24 +271,23 @@ The AI correctly flags non-HTTPS sites, but on a dev environment like I'd lean (b) — sanitizing for dev environments risks hiding real prod issues. -### 3. (medium) Rename `Spec_Mcp_Client` → `Spec_Ai_Client`. +### 2. (medium) Rename `Spec_Mcp_Client` → `Spec_Ai_Client`. The "MCP" in the name was aspirational and incorrect (see decision 5). -Simple rename, no behavior change. Wait until after canonical-slug work so -both refactors land in one commit. +Simple rename, no behavior change. Simple rename, no behavior change. -### 4. (low) Add a `--dry-run` flag to the CLI command. +### 3. (low) Add a `--dry-run` flag to the CLI command. Run the audit, print findings, inject nothing. Useful for evaluating AI output quality without cluttering the suggested-tasks UI between runs. ~10 lines. -### 5. (low) UI for "Run audit now." +### 4. (low) UI for "Run audit now." The AJAX endpoint (`progress_planner_run_spec_audit`) exists and is fastcgi-detached. There's no admin UI button wired to it yet. -### 6. (low) Audit should respect a deactivation cleanup. +### 5. (low) Audit should respect a deactivation cleanup. When the plugin is deactivated, the daily cron hook `progress_planner_spec_audit_run` should be unscheduled. Not currently wired. diff --git a/classes/suggested-tasks/audit/checks/class-charset-check.php b/classes/suggested-tasks/audit/checks/class-charset-check.php index ca8a1da2c..2ad74e6f5 100644 --- a/classes/suggested-tasks/audit/checks/class-charset-check.php +++ b/classes/suggested-tasks/audit/checks/class-charset-check.php @@ -18,7 +18,7 @@ class Charset_Check implements Check { * @return string */ public function get_rule_id(): string { - return 'charset-meta'; + return 'meta-charset'; } /** @@ -44,12 +44,12 @@ public function run( string $url, string $html, array $context ): array { return [ 'rule_id' => $this->get_rule_id(), - 'category' => 'basics', + 'category' => 'foundations', 'title' => \__( 'Declare a UTF-8 charset', 'progress-planner' ), 'description' => \__( 'Add near the top of your . Without an explicit charset, special characters and emoji can render as garbled text.', 'progress-planner' ), 'severity' => 'medium', 'status' => $pass ? 'pass' : 'fail', - 'doc_url' => 'https://specification.website/', + 'doc_url' => 'https://specification.website/spec/foundations/meta-charset/', 'source' => 'php-check', ]; } diff --git a/classes/suggested-tasks/audit/checks/class-doctype-check.php b/classes/suggested-tasks/audit/checks/class-doctype-check.php index 87aca1ddc..4c010bf81 100644 --- a/classes/suggested-tasks/audit/checks/class-doctype-check.php +++ b/classes/suggested-tasks/audit/checks/class-doctype-check.php @@ -18,7 +18,7 @@ class Doctype_Check implements Check { * @return string */ public function get_rule_id(): string { - return 'html-doctype'; + return 'doctype'; } /** @@ -41,12 +41,12 @@ public function run( string $url, string $html, array $context ): array { return [ 'rule_id' => $this->get_rule_id(), - 'category' => 'basics', + 'category' => 'foundations', 'title' => \__( 'Add an HTML5 doctype to your homepage', 'progress-planner' ), 'description' => \__( 'Every page should start with so browsers render it in standards mode. Without it, browsers fall back to quirks mode, which can break your layout.', 'progress-planner' ), 'severity' => 'high', 'status' => $pass ? 'pass' : 'fail', - 'doc_url' => 'https://specification.website/', + 'doc_url' => 'https://specification.website/spec/foundations/doctype/', 'source' => 'php-check', ]; } diff --git a/classes/suggested-tasks/audit/checks/class-lang-attribute-check.php b/classes/suggested-tasks/audit/checks/class-lang-attribute-check.php index 1c2e91e60..70e5e4ad0 100644 --- a/classes/suggested-tasks/audit/checks/class-lang-attribute-check.php +++ b/classes/suggested-tasks/audit/checks/class-lang-attribute-check.php @@ -18,7 +18,7 @@ class Lang_Attribute_Check implements Check { * @return string */ public function get_rule_id(): string { - return 'html-lang-attribute'; + return 'html-lang'; } /** @@ -39,12 +39,12 @@ public function run( string $url, string $html, array $context ): array { return [ 'rule_id' => $this->get_rule_id(), - 'category' => 'accessibility', + 'category' => 'foundations', 'title' => \__( "Declare your site's language", 'progress-planner' ), 'description' => \__( 'Add a lang attribute to the tag (for example lang="en"). It helps screen readers pronounce content correctly and search engines serve the right language.', 'progress-planner' ), 'severity' => 'medium', 'status' => $pass ? 'pass' : 'fail', - 'doc_url' => 'https://specification.website/', + 'doc_url' => 'https://specification.website/spec/foundations/html-lang/', 'source' => 'php-check', ]; } diff --git a/classes/suggested-tasks/audit/checks/class-meta-description-check.php b/classes/suggested-tasks/audit/checks/class-meta-description-check.php index 005fcc725..211374893 100644 --- a/classes/suggested-tasks/audit/checks/class-meta-description-check.php +++ b/classes/suggested-tasks/audit/checks/class-meta-description-check.php @@ -52,12 +52,12 @@ public function run( string $url, string $html, array $context ): array { return [ 'rule_id' => $this->get_rule_id(), - 'category' => 'seo', + 'category' => 'foundations', 'title' => \__( 'Add a meta description to your homepage', 'progress-planner' ), 'description' => \__( 'A meta description is the snippet search engines show under your page title in search results. Without one, search engines auto-generate a snippet which is often less compelling and less accurate. Install an SEO plugin like Yoast SEO or set a description in your theme to control how your site is summarized.', 'progress-planner' ), 'severity' => 'medium', 'status' => $pass ? 'pass' : 'fail', - 'doc_url' => 'https://specification.website/', + 'doc_url' => 'https://specification.website/spec/foundations/meta-description/', 'source' => 'php-check', ]; } diff --git a/classes/suggested-tasks/audit/checks/class-sitemap-check.php b/classes/suggested-tasks/audit/checks/class-sitemap-check.php index bdc87bb0e..a6dee93f8 100644 --- a/classes/suggested-tasks/audit/checks/class-sitemap-check.php +++ b/classes/suggested-tasks/audit/checks/class-sitemap-check.php @@ -18,7 +18,7 @@ class Sitemap_Check implements Check { * @return string */ public function get_rule_id(): string { - return 'xml-sitemap'; + return 'xml-sitemaps'; } /** @@ -75,7 +75,7 @@ protected function finding( string $status ): array { 'description' => \__( 'An XML sitemap lists your important URLs so search engines can discover and index them efficiently. WordPress can generate one automatically, or an SEO plugin can provide it.', 'progress-planner' ), 'severity' => 'medium', 'status' => $status, - 'doc_url' => 'https://specification.website/', + 'doc_url' => 'https://specification.website/spec/seo/xml-sitemaps/', 'source' => 'php-check', ]; } diff --git a/classes/suggested-tasks/audit/class-spec-mcp-client.php b/classes/suggested-tasks/audit/class-spec-mcp-client.php index ad664ae5e..a22104ca7 100644 --- a/classes/suggested-tasks/audit/class-spec-mcp-client.php +++ b/classes/suggested-tasks/audit/class-spec-mcp-client.php @@ -196,7 +196,22 @@ protected function run_prompt( string $url, string $checklist, string $html ) { $html_excerpt = \substr( $html, 0, 20000 ); $instruction = \sprintf( - "You are auditing a website against the Website Specification.\n\nURL: %s\n\nSpecification (Markdown index):\n%s\n\nFor each BASIC checklist rule you can evaluate from the HTML below, return a finding with a stable rule_id slug (kebab-case, derived from the spec rule's identifier), the category, a short human title and description of the fix, a severity, and status 'pass' or 'fail'. Only include rules you can actually evaluate from the provided HTML. Do not invent rules. Keep rule_id values consistent across runs.\n\nHTML:\n%s", + "You are auditing a website against the Website Specification.\n\n" + . "URL: %s\n\n" + . "Specification (Markdown index):\n%s\n\n" + . "Each spec rule has a CANONICAL identifier derived from its URL on specification.website:\n" + . " https://specification.website/spec/{category}/{slug}/\n" + . "For example: /spec/foundations/meta-viewport/ → category=foundations, rule_id=meta-viewport.\n\n" + . "For each BASIC rule from the spec that you can evaluate from the HTML below, return a finding:\n" + . " - rule_id: the exact canonical slug from the spec URL (e.g. 'meta-viewport', 'open-graph', 'theme-color'). NEVER invent slugs.\n" + . " - category: the spec category from the URL ('foundations', 'seo', 'accessibility', 'security', 'performance', 'privacy', 'agent-readiness', 'resilience', 'i18n', 'well-known').\n" + . " - title: a short, human title for the fix.\n" + . " - description: 1–2 sentences explaining what's wrong and how to fix it.\n" + . " - severity: 'high' | 'medium' | 'low'.\n" + . " - status: 'pass' if the HTML satisfies the rule, 'fail' otherwise.\n" + . " - doc_url: the full /spec/{category}/{slug}/ URL.\n\n" + . "Only include rules whose canonical slug appears in the specification above and which you can actually evaluate from the provided HTML. Do not invent rules. Keep rule_id values consistent across runs.\n\n" + . "HTML:\n%s", $url, $checklist, $html_excerpt diff --git a/tests/phpunit/test-class-spec-audit-checks.php b/tests/phpunit/test-class-spec-audit-checks.php index cb6d3b05d..1407ed422 100644 --- a/tests/phpunit/test-class-spec-audit-checks.php +++ b/tests/phpunit/test-class-spec-audit-checks.php @@ -33,7 +33,7 @@ public function test_doctype_check() { $check = new Doctype_Check(); $pass = $check->run( 'https://example.com', self::GOOD_HTML, [] ); - $this->assertSame( 'html-doctype', $pass['rule_id'] ); + $this->assertSame( 'doctype', $pass['rule_id'] ); $this->assertSame( 'pass', $pass['status'] ); $fail = $check->run( 'https://example.com', 'No doctype', [] ); diff --git a/tests/phpunit/test-class-spec-audit-provider.php b/tests/phpunit/test-class-spec-audit-provider.php index 1ed22588b..5ca9f6ce3 100644 --- a/tests/phpunit/test-class-spec-audit-provider.php +++ b/tests/phpunit/test-class-spec-audit-provider.php @@ -249,7 +249,7 @@ public function test_already_injected_rule_not_repeated() { * @return void */ public function test_task_id_is_stable_per_rule() { - $this->assertSame( 'spec-audit-html-doctype', $this->provider->get_task_id( [ 'rule_id' => 'html-doctype' ] ) ); + $this->assertSame( 'spec-audit-doctype', $this->provider->get_task_id( [ 'rule_id' => 'doctype' ] ) ); $this->assertSame( 'spec-audit', $this->provider->get_task_id( [] ) ); } From cbaae1c06dab6693f86987142b774a7fa7f80301 Mon Sep 17 00:00:00 2001 From: Filip Ilic Date: Mon, 1 Jun 2026 14:40:15 +0200 Subject: [PATCH 11/12] Don't auto-complete LLM audit tasks when the model omits a rule MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit is_specific_task_completed() treated "rule no longer reported in a populated audit" as completion for any source. Because the mcp-llm engine is non-deterministic, a rule simply being absent from a later audit run made its task self-complete even though the user fixed nothing — contradicting the documented design (only php-check tasks should complete on rule-absence; LLM/SaaS tasks complete only on an explicit pass). Guard the rule-absence branch on the php-check source. Add a regression test asserting an mcp-llm task is not completed on omission but is completed on an explicit pass, and clarify the existing php-check test. Update the handoff doc. Co-Authored-By: Claude Opus 4.8 (1M context) --- HANDOFF-spec-audit.md | 12 +++- .../providers/class-spec-audit.php | 15 ++++- .../test-class-spec-audit-provider.php | 63 +++++++++++++++++-- 3 files changed, 83 insertions(+), 7 deletions(-) diff --git a/HANDOFF-spec-audit.md b/HANDOFF-spec-audit.md index a4796da9d..8ce72cb96 100644 --- a/HANDOFF-spec-audit.md +++ b/HANDOFF-spec-audit.md @@ -150,6 +150,16 @@ Fix (commit `bbfaf446`): a rule "missing" from one audit just means the model didn't mention it this time, not that it was retired. +**Follow-up fix (found in PR review testing):** the exemption above was only +applied to the *retired-rule registry* path. A second branch in +`is_specific_task_completed()` — "rule no longer reported in a populated +audit" — returned `true` for **any** source, so an `mcp-llm` task +auto-completed the moment the (non-deterministic) model omitted its rule on a +later run. That contradicted this decision. The branch is now guarded the same +way: only `php-check` tasks treat rule-absence as "fixed"; `mcp-llm`/`saas` +tasks complete only on an explicit `pass` status. Regression test: +`test_llm_task_not_completed_when_rule_gone`. + ### 4. **AI engine: phase C now, phase B later.** Phase C (now): plugin is the AI client, using the site owner's WP 7.0 @@ -390,7 +400,7 @@ around the relevant call sites. ## Final state of `filip/spec-audit` as of handoff -- 424/424 PHPUnit tests pass. +- 425/425 PHPUnit tests pass. - PHPStan clean. - phpcs clean at hook severity (`--warning-severity=6`). - 9 commits ahead of base. diff --git a/classes/suggested-tasks/providers/class-spec-audit.php b/classes/suggested-tasks/providers/class-spec-audit.php index 0df085c04..d8879834f 100644 --- a/classes/suggested-tasks/providers/class-spec-audit.php +++ b/classes/suggested-tasks/providers/class-spec-audit.php @@ -401,9 +401,20 @@ protected function is_specific_task_completed( $task_id ) { $finding = $collector->get_finding( $rule_id ); - // Rule no longer reported in a populated audit -> the user fixed it. + // Rule no longer reported in a populated audit. + // + // For php-check tasks this means the user fixed it (or the rule was + // retired): the deterministic check set is fixed, so a passing rule is + // simply omitted. Mark complete. + // + // For mcp-llm / saas tasks we must NOT treat absence as completion: the + // LLM/SaaS rule space is open-ended and non-deterministic, so a rule + // missing from one audit run usually just means the model didn't mention + // it this time, not that the user fixed it. Those tasks only complete on + // an explicit 'pass' status (handled below). See decision 3 in + // HANDOFF-spec-audit.md. if ( null === $finding ) { - return true; + return 'php-check' === $this->get_task_source( $task_id ); } return isset( $finding['status'] ) && 'pass' === $finding['status']; diff --git a/tests/phpunit/test-class-spec-audit-provider.php b/tests/phpunit/test-class-spec-audit-provider.php index 5ca9f6ce3..526d9cec6 100644 --- a/tests/phpunit/test-class-spec-audit-provider.php +++ b/tests/phpunit/test-class-spec-audit-provider.php @@ -274,8 +274,13 @@ public function test_task_completed_when_rule_passes() { } /** - * Test completion: a rule that disappears from a NON-EMPTY audit marks the - * task complete. + * Test completion: a PHP-CHECK rule that disappears from a NON-EMPTY audit + * marks the task complete. + * + * The deterministic check set is fixed, so a passing rule is simply omitted + * from the findings — absence therefore means "fixed" for php-check tasks. + * (mcp-llm / saas tasks are handled differently — see + * test_llm_task_not_completed_when_rule_gone.) * * Note: an *empty* cache is NOT treated as "rule passed" — see * test_task_not_completed_when_cache_is_empty — so this test seeds a @@ -283,14 +288,64 @@ public function test_task_completed_when_rule_passes() { * * @return void */ - public function test_task_completed_when_rule_gone() { + public function test_php_check_task_completed_when_rule_gone() { + // finding() defaults source to 'php-check'. $this->seed_findings( [ $this->finding( 'rule-a' ) ] ); $this->provider->get_tasks_to_inject(); // Audit ran again and the rule is no longer reported (but other rules are). $this->seed_findings( [ $this->finding( 'rule-b' ) ] ); - $this->assertTrue( $this->provider->is_task_completed( 'spec-audit-rule-a' ) ); + $this->assertTrue( + $this->provider->is_task_completed( 'spec-audit-rule-a' ), + 'A php-check task whose rule vanished from a populated audit must complete.' + ); + } + + /** + * Test completion: an mcp-llm task whose rule simply disappears from a + * populated audit must NOT be marked complete. + * + * Regression guard. The LLM/SaaS rule space is open-ended and the model is + * non-deterministic: a rule missing from one audit run usually just means + * the model didn't mention it this time, not that the user fixed it. + * Treating absence as completion let AI-surfaced tasks silently self-complete + * between runs. Such tasks must only complete on an explicit 'pass' status. + * See decision 3 in HANDOFF-spec-audit.md. + * + * @return void + */ + public function test_llm_task_not_completed_when_rule_gone() { + $llm_finding = $this->finding( 'llm-rule' ); + $llm_finding['source'] = 'mcp-llm'; + $this->seed_findings( [ $llm_finding ] ); + + $created = $this->provider->get_tasks_to_inject(); + $this->assertCount( 1, $created, 'Sanity: the mcp-llm task is injected.' ); + $this->assertSame( + 'mcp-llm', + \get_post_meta( $created[0], 'prpl_source', true ), + 'Sanity: the injected task carries the mcp-llm source.' + ); + + // Audit ran again; the model simply didn't mention llm-rule this time + // (cache is non-empty — another rule is present). + $this->seed_findings( [ $this->finding( 'rule-b' ) ] ); + + $this->assertFalse( + $this->provider->is_task_completed( 'spec-audit-llm-rule' ), + 'An mcp-llm task must NOT complete just because the model omitted its rule.' + ); + + // It DOES complete when the rule is explicitly reported as passing. + $passing = $llm_finding; + $passing['status'] = 'pass'; + $this->seed_findings( [ $passing ] ); + + $this->assertTrue( + $this->provider->is_task_completed( 'spec-audit-llm-rule' ), + 'An mcp-llm task completes when its rule is explicitly reported pass.' + ); } /** From 902d36cc8d216f1f6cf672447997d34cddec0f0d Mon Sep 17 00:00:00 2001 From: Filip Ilic Date: Mon, 1 Jun 2026 15:39:03 +0200 Subject: [PATCH 12/12] Report accurate injected count from run_audit_now; fix handoff cleanup run_audit_now() returned only the tasks its own get_tasks_to_inject() call created. When the bootstrap inject_tasks() sweep already consumed the daily throttle slot earlier in the same request, the CLI command reported "0 task(s) injected" even though a task had been injected. Return pending_release_ids (all tasks injected this request) so the count reflects reality. Also fix the handoff's clean-state snippet: it iterated the cached get_tasks_by() result while delete_recommendation() flushed that cache group mid-loop, skipping tasks and leaving survivors. Snapshot the IDs with a raw get_posts() + provider tax_query instead. Co-Authored-By: Claude Opus 4.8 (1M context) --- HANDOFF-spec-audit.md | 26 ++++++++++++++++--- .../providers/class-spec-audit.php | 11 ++++++-- 2 files changed, 32 insertions(+), 5 deletions(-) diff --git a/HANDOFF-spec-audit.md b/HANDOFF-spec-audit.md index 8ce72cb96..6bd2dba0f 100644 --- a/HANDOFF-spec-audit.md +++ b/HANDOFF-spec-audit.md @@ -366,11 +366,31 @@ foreach (progress_planner()->get_settings()->get("progress_planner_data_collecto ``` ### Clean state (delete all spec-audit tasks + reset throttle + clear cache) + +NOTE: collect the task IDs with a raw `get_posts()` (+ provider tax_query), NOT +`$db->get_tasks_by()`. `get_tasks_by()` is cached in `GET_TASKS_CACHE_GROUP`, +and `delete_recommendation()` flushes that group mid-loop — so iterating the +cached list while deleting from it skips tasks and leaves survivors behind +(this is what made tasks look like they accumulate across runs). A raw +`get_posts()` snapshot of IDs is stable across the deletes. + ```bash wp eval ' +$ids = get_posts([ + "post_type" => "prpl_recommendations", + "post_status" => "any", + "numberposts" => -1, + "fields" => "ids", + "tax_query" => [[ + "taxonomy" => "prpl_recommendations_provider", + "field" => "slug", + "terms" => "spec-audit", + ]], +]); $db = progress_planner()->get_suggested_tasks_db(); -foreach ((array) $db->get_tasks_by(["post_status"=>["publish","trash","draft","future","pending"],"provider_id"=>"spec-audit"]) as $t) { - if (is_object($t) && isset($t->ID)) wp_delete_post($t->ID, true); +foreach ($ids as $id) { + $db->delete_recommendation($id); + wp_delete_post($id, true); } $s = progress_planner()->get_settings(); $dc = $s->get("progress_planner_data_collector", []); @@ -380,7 +400,7 @@ $s->set("spec_audit", []); progress_planner()->get_utils__cache()->delete("spec_audit_checklist"); $ts = wp_next_scheduled("progress_planner_spec_audit_run"); if ($ts) wp_unschedule_event($ts, "progress_planner_spec_audit_run"); -echo "clean\n"; +echo "deleted " . count($ids) . " spec-audit task(s)\n"; ' ``` diff --git a/classes/suggested-tasks/providers/class-spec-audit.php b/classes/suggested-tasks/providers/class-spec-audit.php index d8879834f..717af3a60 100644 --- a/classes/suggested-tasks/providers/class-spec-audit.php +++ b/classes/suggested-tasks/providers/class-spec-audit.php @@ -529,7 +529,12 @@ public function shutdown_run_audit() { * handler ({@see shutdown_run_audit()}). Never called from `admin_init`, * which would amplify into FPM pool starvation. * - * @return array Created post IDs. + * @return array Post IDs injected this request. This includes any + * task already injected earlier in the same request + * by the bootstrap inject_tasks() sweep — not just + * the ones this call created — so callers report an + * accurate "injected this run" count even when the + * sweep consumed the throttle slot before us. */ public function run_audit_now(): array { $collector = $this->get_audit_collector(); @@ -538,6 +543,8 @@ static function () use ( $collector ) { $collector->update_cache(); } ); - return $this->get_tasks_to_inject(); + $this->get_tasks_to_inject(); + + return $this->pending_release_ids; } }