From 4dceec5fd1bd2696e8a7efc3fed68703c54a5066 Mon Sep 17 00:00:00 2001 From: Yvette Carlisle Date: Tue, 30 Jun 2026 10:05:38 -0400 Subject: [PATCH 1/6] {"schema":"decodex/commit/1","summary":"Split progressive search type modules","authority":"manual"} --- .../src/progressive_search/details/access.rs | 2 +- .../src/progressive_search/details/results.rs | 4 +- .../progressive_search/details/timeline.rs | 3 +- .../followup/details_endpoint.rs | 2 +- .../progressive_search/followup/timeline.rs | 4 +- .../progressive_search/service/entrypoints.rs | 2 +- .../progressive_search/service/sessionized.rs | 8 +- .../src/progressive_search/storage/hits.rs | 2 +- .../src/progressive_search/storage/session.rs | 7 +- .../src/progressive_search/types.rs | 263 ++---------------- .../src/progressive_search/types/details.rs | 57 ++++ .../src/progressive_search/types/index.rs | 108 +++++++ .../progressive_search/types/session_mode.rs | 44 +++ .../src/progressive_search/types/timeline.rs | 43 +++ 14 files changed, 289 insertions(+), 260 deletions(-) create mode 100644 packages/elf-service/src/progressive_search/types/details.rs create mode 100644 packages/elf-service/src/progressive_search/types/index.rs create mode 100644 packages/elf-service/src/progressive_search/types/session_mode.rs create mode 100644 packages/elf-service/src/progressive_search/types/timeline.rs diff --git a/packages/elf-service/src/progressive_search/details/access.rs b/packages/elf-service/src/progressive_search/details/access.rs index bdc77e6d..8cb35a85 100644 --- a/packages/elf-service/src/progressive_search/details/access.rs +++ b/packages/elf-service/src/progressive_search/details/access.rs @@ -5,7 +5,7 @@ use time::OffsetDateTime; use crate::{ Error, Result, access::{self, SharedSpaceGrantKey}, - progressive_search::types::{SearchDetailsError, SearchSession}, + progressive_search::types::{SearchDetailsError, session::SearchSession}, }; use elf_config::Config; use elf_storage::models::MemoryNote; diff --git a/packages/elf-service/src/progressive_search/details/results.rs b/packages/elf-service/src/progressive_search/details/results.rs index 192bc15b..ea99289f 100644 --- a/packages/elf-service/src/progressive_search/details/results.rs +++ b/packages/elf-service/src/progressive_search/details/results.rs @@ -10,8 +10,8 @@ use crate::{ progressive_search::{ details::{access, text}, types::{ - HitItem, SearchDetailsError, SearchDetailsResult, SearchSession, - SearchSessionItemRecord, + SearchDetailsError, SearchDetailsResult, + session::{HitItem, SearchSession, SearchSessionItemRecord}, }, }, structured_fields::StructuredFields, diff --git a/packages/elf-service/src/progressive_search/details/timeline.rs b/packages/elf-service/src/progressive_search/details/timeline.rs index c4f4b07e..28593cba 100644 --- a/packages/elf-service/src/progressive_search/details/timeline.rs +++ b/packages/elf-service/src/progressive_search/details/timeline.rs @@ -6,7 +6,8 @@ use uuid::Uuid; use crate::{ Result, progressive_search::types::{ - SearchIndexItem, SearchSessionItemRecord, SearchTimelineGroup, SearchTimelineResponse, + SearchIndexItem, SearchTimelineGroup, SearchTimelineResponse, + session::SearchSessionItemRecord, }, }; diff --git a/packages/elf-service/src/progressive_search/followup/details_endpoint.rs b/packages/elf-service/src/progressive_search/followup/details_endpoint.rs index 6502248f..7807e92a 100644 --- a/packages/elf-service/src/progressive_search/followup/details_endpoint.rs +++ b/packages/elf-service/src/progressive_search/followup/details_endpoint.rs @@ -9,7 +9,7 @@ use crate::{ progressive_search::{ details::{self, SearchDetailsBuildArgs}, storage, - types::{SearchDetailsRequest, SearchDetailsResponse, SearchSessionItemRecord}, + types::{SearchDetailsRequest, SearchDetailsResponse, session::SearchSessionItemRecord}, }, structured_fields, }; diff --git a/packages/elf-service/src/progressive_search/followup/timeline.rs b/packages/elf-service/src/progressive_search/followup/timeline.rs index b27d32b5..8a4a5e9c 100644 --- a/packages/elf-service/src/progressive_search/followup/timeline.rs +++ b/packages/elf-service/src/progressive_search/followup/timeline.rs @@ -5,8 +5,8 @@ use crate::{ progressive_search::{ details, storage, types::{ - SearchSessionItemRecord, SearchTimelineGroup, SearchTimelineRequest, - SearchTimelineResponse, + SearchTimelineGroup, SearchTimelineRequest, SearchTimelineResponse, + session::SearchSessionItemRecord, }, }, }; diff --git a/packages/elf-service/src/progressive_search/service/entrypoints.rs b/packages/elf-service/src/progressive_search/service/entrypoints.rs index 752cc745..6e9cd7ac 100644 --- a/packages/elf-service/src/progressive_search/service/entrypoints.rs +++ b/packages/elf-service/src/progressive_search/service/entrypoints.rs @@ -1,7 +1,7 @@ use crate::{ ElfService, Error, Result, SearchRequest, progressive_search::types::{ - SearchIndexPlannedResponse, SearchIndexResponse, SearchSessionizePath, + SearchIndexPlannedResponse, SearchIndexResponse, session::SearchSessionizePath, }, }; diff --git a/packages/elf-service/src/progressive_search/service/sessionized.rs b/packages/elf-service/src/progressive_search/service/sessionized.rs index 440ef07a..3d600123 100644 --- a/packages/elf-service/src/progressive_search/service/sessionized.rs +++ b/packages/elf-service/src/progressive_search/service/sessionized.rs @@ -6,9 +6,11 @@ use crate::{ progressive_search::{ details, storage, types::{ - NewSearchSession, SESSION_SLIDING_TTL_HOURS, SearchIndexItem, SearchIndexResponse, - SearchSessionItemRecord, SearchSessionMode, SearchSessionizePath, - SearchSessionizedOutput, + SearchIndexItem, SearchIndexResponse, SearchSessionMode, + session::{ + NewSearchSession, SESSION_SLIDING_TTL_HOURS, SearchSessionItemRecord, + SearchSessionizePath, SearchSessionizedOutput, + }, }, }, structured_fields, diff --git a/packages/elf-service/src/progressive_search/storage/hits.rs b/packages/elf-service/src/progressive_search/storage/hits.rs index 91f57acf..35f4cbe1 100644 --- a/packages/elf-service/src/progressive_search/storage/hits.rs +++ b/packages/elf-service/src/progressive_search/storage/hits.rs @@ -4,7 +4,7 @@ use uuid::Uuid; use crate::{ Error, Result, - progressive_search::{storage::hash, types::HitItem}, + progressive_search::{storage::hash, types::session::HitItem}, }; use elf_domain::english_gate; diff --git a/packages/elf-service/src/progressive_search/storage/session.rs b/packages/elf-service/src/progressive_search/storage/session.rs index 4e26f524..d14cfa5e 100644 --- a/packages/elf-service/src/progressive_search/storage/session.rs +++ b/packages/elf-service/src/progressive_search/storage/session.rs @@ -8,8 +8,11 @@ use uuid::Uuid; use crate::{ Error, Result, progressive_search::types::{ - NewSearchSession, SESSION_ABSOLUTE_TTL_HOURS, SESSION_SLIDING_TTL_HOURS, SearchSession, - SearchSessionItemRecord, SearchSessionMode, SearchSessionRow, + SearchSessionMode, + session::{ + NewSearchSession, SESSION_ABSOLUTE_TTL_HOURS, SESSION_SLIDING_TTL_HOURS, SearchSession, + SearchSessionItemRecord, SearchSessionRow, + }, }, }; diff --git a/packages/elf-service/src/progressive_search/types.rs b/packages/elf-service/src/progressive_search/types.rs index 1678a874..b066133c 100644 --- a/packages/elf-service/src/progressive_search/types.rs +++ b/packages/elf-service/src/progressive_search/types.rs @@ -1,247 +1,18 @@ -mod session; - -pub(super) use session::{ - HitItem, NewSearchSession, SESSION_ABSOLUTE_TTL_HOURS, SESSION_SLIDING_TTL_HOURS, - SearchSession, SearchSessionItemRecord, SearchSessionRow, SearchSessionizePath, - SearchSessionizedOutput, +pub(in crate::progressive_search) mod session; + +mod details; +mod index; +mod session_mode; +mod timeline; + +pub use self::{ + details::{ + SearchDetailsError, SearchDetailsRequest, SearchDetailsResponse, SearchDetailsResult, + }, + index::{ + SearchIndexItem, SearchIndexPlannedResponse, SearchIndexResponse, SearchSessionGetRequest, + SearchSessionGetResponse, + }, + session_mode::SearchSessionMode, + timeline::{SearchTimelineGroup, SearchTimelineRequest, SearchTimelineResponse}, }; - -use std::str::FromStr; - -use serde::{Deserialize, Serialize}; -use time::OffsetDateTime; -use uuid::Uuid; - -use crate::{NoteFetchResponse, PayloadLevel, QueryPlan, SearchTrajectorySummary}; - -/// Lightweight session-storable search hit used by progressive-search APIs. -#[derive(Clone, Debug, Deserialize, Serialize)] -pub struct SearchIndexItem { - /// Note identifier. - pub note_id: Uuid, - /// Note type discriminator. - pub r#type: String, - /// Optional application-defined key. - pub key: Option, - /// Scope key for the note. - pub scope: String, - /// Importance score. - pub importance: f32, - /// Confidence score. - pub confidence: f32, - #[serde(with = "crate::time_serde")] - /// Last update timestamp. - pub updated_at: OffsetDateTime, - #[serde(with = "crate::time_serde::option")] - /// Optional expiry timestamp. - pub expires_at: Option, - /// Final ranked score. - pub final_score: f32, - /// Short display summary. - pub summary: String, -} - -/// Response payload for initial indexed search results. -#[derive(Clone, Debug, Deserialize, Serialize)] -pub struct SearchIndexResponse { - /// Search trace identifier. - pub trace_id: Uuid, - /// Search session identifier used for follow-up requests. - pub search_session_id: Uuid, - #[serde(with = "crate::time_serde")] - /// Session expiry timestamp. - pub expires_at: OffsetDateTime, - /// Stored search hits. - pub items: Vec, - /// Optional condensed explain output. - pub trajectory_summary: Option, -} - -/// Search-session mode used by progressive-search APIs. -#[derive(Clone, Copy, Debug, Eq, PartialEq, Deserialize, Serialize)] -#[serde(rename_all = "snake_case")] -pub enum SearchSessionMode { - /// Quick-find session without a stored query plan. - QuickFind, - /// Planned-search session with a stored query plan. - PlannedSearch, -} -impl SearchSessionMode { - pub(super) fn as_str(self) -> &'static str { - match self { - Self::QuickFind => "quick_find", - Self::PlannedSearch => "planned_search", - } - } -} - -impl FromStr for SearchSessionMode { - type Err = crate::Error; - - fn from_str(value: &str) -> std::result::Result { - match value { - "quick_find" => Ok(Self::QuickFind), - "planned_search" => Ok(Self::PlannedSearch), - _ => Err(crate::Error::Storage { - message: format!("Unknown search session mode: {value}"), - }), - } - } -} - -impl From for SearchSessionMode { - fn from(path: SearchSessionizePath) -> Self { - match path { - SearchSessionizePath::Quick => Self::QuickFind, - SearchSessionizePath::Planned => Self::PlannedSearch, - } - } -} - -/// Response payload for reloading a stored search session. -#[derive(Clone, Debug, Deserialize, Serialize)] -pub struct SearchSessionGetResponse { - /// Search trace identifier. - pub trace_id: Uuid, - /// Search session identifier. - pub search_session_id: Uuid, - #[serde(with = "crate::time_serde")] - /// Session expiry timestamp. - pub expires_at: OffsetDateTime, - /// Stored hits after trimming to the requested limit. - pub items: Vec, - /// Session mode. - pub mode: SearchSessionMode, - /// Stored query plan for planned-search sessions. - pub query_plan: Option, - /// Optional condensed explain output. - pub trajectory_summary: Option, -} - -/// Planned-search variant of the indexed search response. -#[derive(Clone, Debug, Deserialize, Serialize)] -pub struct SearchIndexPlannedResponse { - /// Search trace identifier. - pub trace_id: Uuid, - /// Search session identifier. - pub search_session_id: Uuid, - #[serde(with = "crate::time_serde")] - /// Session expiry timestamp. - pub expires_at: OffsetDateTime, - /// Stored hits. - pub items: Vec, - /// Optional condensed explain output. - pub trajectory_summary: Option, - /// Stored query plan for the session. - pub query_plan: QueryPlan, -} - -/// Request payload for reloading a search session. -#[derive(Clone, Debug, Deserialize, Serialize)] -pub struct SearchSessionGetRequest { - /// Tenant that owns the session. - pub tenant_id: String, - /// Project that owns the session. - pub project_id: String, - /// Agent requesting the read. - pub agent_id: String, - /// Search session identifier. - pub search_session_id: Uuid, - #[serde(default)] - /// Desired payload-detail level. - pub payload_level: PayloadLevel, - /// Optional limit on returned items. - pub top_k: Option, - /// When true, extends the sliding session TTL. - pub touch: Option, -} - -/// Request payload for timeline projection of a search session. -#[derive(Clone, Debug, Deserialize, Serialize)] -pub struct SearchTimelineRequest { - /// Tenant that owns the session. - pub tenant_id: String, - /// Project that owns the session. - pub project_id: String, - /// Agent requesting the read. - pub agent_id: String, - /// Search session identifier. - pub search_session_id: Uuid, - /// Desired payload-detail level. - pub payload_level: PayloadLevel, - /// Optional timeline grouping mode. - pub group_by: Option, -} - -/// One timeline bucket for a search session. -#[derive(Clone, Debug, Deserialize, Serialize)] -pub struct SearchTimelineGroup { - /// Group key, usually a day string. - pub date: String, - /// Items that belong to the group. - pub items: Vec, -} - -/// Response payload for timeline projection. -#[derive(Clone, Debug, Deserialize, Serialize)] -pub struct SearchTimelineResponse { - /// Search session identifier. - pub search_session_id: Uuid, - #[serde(with = "crate::time_serde")] - /// Session expiry timestamp. - pub expires_at: OffsetDateTime, - /// Timeline groups. - pub groups: Vec, -} - -/// Request payload for materializing details from a search session. -#[derive(Clone, Debug, Deserialize, Serialize)] -pub struct SearchDetailsRequest { - /// Tenant that owns the session. - pub tenant_id: String, - /// Project that owns the session. - pub project_id: String, - /// Agent requesting the read. - pub agent_id: String, - /// Search session identifier. - pub search_session_id: Uuid, - #[serde(default)] - /// Desired payload-detail level. - pub payload_level: PayloadLevel, - /// Requested subset of note identifiers. - pub note_ids: Vec, - /// When true, records note-hit metrics for returned details. - pub record_hits: Option, -} - -/// Per-note error payload for detail materialization. -#[derive(Clone, Debug, Deserialize, Serialize)] -pub struct SearchDetailsError { - /// Machine-readable error code. - pub code: String, - /// Human-readable error message. - pub message: String, -} - -/// Per-note detail result for a search session. -#[derive(Clone, Debug, Deserialize, Serialize)] -pub struct SearchDetailsResult { - /// Requested note identifier. - pub note_id: Uuid, - /// Materialized note payload, when loading succeeded. - pub note: Option, - /// Per-note failure, when loading failed. - pub error: Option, -} - -/// Response payload for detail materialization. -#[derive(Clone, Debug, Deserialize, Serialize)] -pub struct SearchDetailsResponse { - /// Search session identifier. - pub search_session_id: Uuid, - #[serde(with = "crate::time_serde")] - /// Session expiry timestamp. - pub expires_at: OffsetDateTime, - /// Per-note results. - pub results: Vec, -} diff --git a/packages/elf-service/src/progressive_search/types/details.rs b/packages/elf-service/src/progressive_search/types/details.rs new file mode 100644 index 00000000..ce2a2380 --- /dev/null +++ b/packages/elf-service/src/progressive_search/types/details.rs @@ -0,0 +1,57 @@ +use serde::{Deserialize, Serialize}; +use time::OffsetDateTime; +use uuid::Uuid; + +use crate::{NoteFetchResponse, PayloadLevel}; + +/// Request payload for materializing details from a search session. +#[derive(Clone, Debug, Deserialize, Serialize)] +pub struct SearchDetailsRequest { + /// Tenant that owns the session. + pub tenant_id: String, + /// Project that owns the session. + pub project_id: String, + /// Agent requesting the read. + pub agent_id: String, + /// Search session identifier. + pub search_session_id: Uuid, + #[serde(default)] + /// Desired payload-detail level. + pub payload_level: PayloadLevel, + /// Requested subset of note identifiers. + pub note_ids: Vec, + /// When true, records note-hit metrics for returned details. + pub record_hits: Option, +} + +/// Per-note error payload for detail materialization. +#[derive(Clone, Debug, Deserialize, Serialize)] +pub struct SearchDetailsError { + /// Machine-readable error code. + pub code: String, + /// Human-readable error message. + pub message: String, +} + +/// Per-note detail result for a search session. +#[derive(Clone, Debug, Deserialize, Serialize)] +pub struct SearchDetailsResult { + /// Requested note identifier. + pub note_id: Uuid, + /// Materialized note payload, when loading succeeded. + pub note: Option, + /// Per-note failure, when loading failed. + pub error: Option, +} + +/// Response payload for detail materialization. +#[derive(Clone, Debug, Deserialize, Serialize)] +pub struct SearchDetailsResponse { + /// Search session identifier. + pub search_session_id: Uuid, + #[serde(with = "crate::time_serde")] + /// Session expiry timestamp. + pub expires_at: OffsetDateTime, + /// Per-note results. + pub results: Vec, +} diff --git a/packages/elf-service/src/progressive_search/types/index.rs b/packages/elf-service/src/progressive_search/types/index.rs new file mode 100644 index 00000000..430adb70 --- /dev/null +++ b/packages/elf-service/src/progressive_search/types/index.rs @@ -0,0 +1,108 @@ +use serde::{Deserialize, Serialize}; +use time::OffsetDateTime; +use uuid::Uuid; + +use crate::{ + PayloadLevel, QueryPlan, SearchTrajectorySummary, progressive_search::types::SearchSessionMode, +}; + +/// Lightweight session-storable search hit used by progressive-search APIs. +#[derive(Clone, Debug, Deserialize, Serialize)] +pub struct SearchIndexItem { + /// Note identifier. + pub note_id: Uuid, + /// Note type discriminator. + pub r#type: String, + /// Optional application-defined key. + pub key: Option, + /// Scope key for the note. + pub scope: String, + /// Importance score. + pub importance: f32, + /// Confidence score. + pub confidence: f32, + #[serde(with = "crate::time_serde")] + /// Last update timestamp. + pub updated_at: OffsetDateTime, + #[serde(with = "crate::time_serde::option")] + /// Optional expiry timestamp. + pub expires_at: Option, + /// Final ranked score. + pub final_score: f32, + /// Short display summary. + pub summary: String, +} + +/// Response payload for initial indexed search results. +#[derive(Clone, Debug, Deserialize, Serialize)] +pub struct SearchIndexResponse { + /// Search trace identifier. + pub trace_id: Uuid, + /// Search session identifier used for follow-up requests. + pub search_session_id: Uuid, + #[serde(with = "crate::time_serde")] + /// Session expiry timestamp. + pub expires_at: OffsetDateTime, + /// Stored search hits. + pub items: Vec, + /// Optional condensed explain output. + pub trajectory_summary: Option, +} + +/// Response payload for reloading a stored search session. +#[derive(Clone, Debug, Deserialize, Serialize)] +pub struct SearchSessionGetResponse { + /// Search trace identifier. + pub trace_id: Uuid, + /// Search session identifier. + pub search_session_id: Uuid, + #[serde(with = "crate::time_serde")] + /// Session expiry timestamp. + pub expires_at: OffsetDateTime, + /// Stored hits after trimming to the requested limit. + pub items: Vec, + /// Session mode. + pub mode: SearchSessionMode, + /// Stored query plan for planned-search sessions. + pub query_plan: Option, + /// Optional condensed explain output. + pub trajectory_summary: Option, +} + +/// Planned-search variant of the indexed search response. +#[derive(Clone, Debug, Deserialize, Serialize)] +pub struct SearchIndexPlannedResponse { + /// Search trace identifier. + pub trace_id: Uuid, + /// Search session identifier. + pub search_session_id: Uuid, + #[serde(with = "crate::time_serde")] + /// Session expiry timestamp. + pub expires_at: OffsetDateTime, + /// Stored hits. + pub items: Vec, + /// Optional condensed explain output. + pub trajectory_summary: Option, + /// Stored query plan for the session. + pub query_plan: QueryPlan, +} + +/// Request payload for reloading a search session. +#[derive(Clone, Debug, Deserialize, Serialize)] +pub struct SearchSessionGetRequest { + /// Tenant that owns the session. + pub tenant_id: String, + /// Project that owns the session. + pub project_id: String, + /// Agent requesting the read. + pub agent_id: String, + /// Search session identifier. + pub search_session_id: Uuid, + #[serde(default)] + /// Desired payload-detail level. + pub payload_level: PayloadLevel, + /// Optional limit on returned items. + pub top_k: Option, + /// When true, extends the sliding session TTL. + pub touch: Option, +} diff --git a/packages/elf-service/src/progressive_search/types/session_mode.rs b/packages/elf-service/src/progressive_search/types/session_mode.rs new file mode 100644 index 00000000..9a929f6c --- /dev/null +++ b/packages/elf-service/src/progressive_search/types/session_mode.rs @@ -0,0 +1,44 @@ +use std::str::FromStr; + +use serde::{Deserialize, Serialize}; + +use crate::{Error, progressive_search::types::session::SearchSessionizePath}; + +/// Search-session mode used by progressive-search APIs. +#[derive(Clone, Copy, Debug, Eq, PartialEq, Deserialize, Serialize)] +#[serde(rename_all = "snake_case")] +pub enum SearchSessionMode { + /// Quick-find session without a stored query plan. + QuickFind, + /// Planned-search session with a stored query plan. + PlannedSearch, +} +impl SearchSessionMode { + pub(in crate::progressive_search) fn as_str(self) -> &'static str { + match self { + Self::QuickFind => "quick_find", + Self::PlannedSearch => "planned_search", + } + } +} + +impl FromStr for SearchSessionMode { + type Err = Error; + + fn from_str(value: &str) -> std::result::Result { + match value { + "quick_find" => Ok(Self::QuickFind), + "planned_search" => Ok(Self::PlannedSearch), + _ => Err(Error::Storage { message: format!("Unknown search session mode: {value}") }), + } + } +} + +impl From for SearchSessionMode { + fn from(path: SearchSessionizePath) -> Self { + match path { + SearchSessionizePath::Quick => Self::QuickFind, + SearchSessionizePath::Planned => Self::PlannedSearch, + } + } +} diff --git a/packages/elf-service/src/progressive_search/types/timeline.rs b/packages/elf-service/src/progressive_search/types/timeline.rs new file mode 100644 index 00000000..0a149f13 --- /dev/null +++ b/packages/elf-service/src/progressive_search/types/timeline.rs @@ -0,0 +1,43 @@ +use serde::{Deserialize, Serialize}; +use time::OffsetDateTime; +use uuid::Uuid; + +use crate::{PayloadLevel, progressive_search::types::SearchIndexItem}; + +/// Request payload for timeline projection of a search session. +#[derive(Clone, Debug, Deserialize, Serialize)] +pub struct SearchTimelineRequest { + /// Tenant that owns the session. + pub tenant_id: String, + /// Project that owns the session. + pub project_id: String, + /// Agent requesting the read. + pub agent_id: String, + /// Search session identifier. + pub search_session_id: Uuid, + /// Desired payload-detail level. + pub payload_level: PayloadLevel, + /// Optional timeline grouping mode. + pub group_by: Option, +} + +/// One timeline bucket for a search session. +#[derive(Clone, Debug, Deserialize, Serialize)] +pub struct SearchTimelineGroup { + /// Group key, usually a day string. + pub date: String, + /// Items that belong to the group. + pub items: Vec, +} + +/// Response payload for timeline projection. +#[derive(Clone, Debug, Deserialize, Serialize)] +pub struct SearchTimelineResponse { + /// Search session identifier. + pub search_session_id: Uuid, + #[serde(with = "crate::time_serde")] + /// Session expiry timestamp. + pub expires_at: OffsetDateTime, + /// Timeline groups. + pub groups: Vec, +} From 71942d43e701313e3a84c1d9eb07892290d7f2cf Mon Sep 17 00:00:00 2001 From: Yvette Carlisle Date: Tue, 30 Jun 2026 10:10:13 -0400 Subject: [PATCH 2/6] {"schema":"decodex/commit/1","summary":"Split diversity selection modules","authority":"manual"} --- .../src/search/ranking/diversity/selection.rs | 249 +----------------- .../ranking/diversity/selection/disabled.rs | 49 ++++ .../ranking/diversity/selection/enabled.rs | 188 +++++++++++++ .../ranking/diversity/selection/pick.rs | 17 ++ 4 files changed, 261 insertions(+), 242 deletions(-) create mode 100644 packages/elf-service/src/search/ranking/diversity/selection/disabled.rs create mode 100644 packages/elf-service/src/search/ranking/diversity/selection/enabled.rs create mode 100644 packages/elf-service/src/search/ranking/diversity/selection/pick.rs diff --git a/packages/elf-service/src/search/ranking/diversity/selection.rs b/packages/elf-service/src/search/ranking/diversity/selection.rs index acfe28ea..3b4b1d16 100644 --- a/packages/elf-service/src/search/ranking/diversity/selection.rs +++ b/packages/elf-service/src/search/ranking/diversity/selection.rs @@ -1,27 +1,12 @@ +mod disabled; +mod enabled; +mod pick; + use std::collections::HashMap; use uuid::Uuid; -use crate::search::{ - DiversityDecision, ScoredChunk, - ranking::{diversity::similarity, policy::ResolvedDiversityPolicy, retrieval}, -}; - -#[derive(Clone, Copy)] -struct DiversityPick { - remaining_pos: usize, - mmr_score: f32, - nearest_note_id: Option, - similarity: Option, - missing_embedding: bool, - retrieval_rank: u32, -} -impl DiversityPick { - fn better_than(self, other: &Self) -> bool { - self.mmr_score > other.mmr_score - || (self.mmr_score == other.mmr_score && self.retrieval_rank < other.retrieval_rank) - } -} +use crate::search::{DiversityDecision, ScoredChunk, ranking::policy::ResolvedDiversityPolicy}; pub fn select_diverse_results( candidates: Vec, @@ -33,228 +18,8 @@ pub fn select_diverse_results( return (Vec::new(), HashMap::new()); } if !policy.enabled { - return select_diverse_results_disabled(candidates, top_k, note_vectors); - } - - select_diverse_results_enabled(candidates, top_k, policy, note_vectors) -} - -fn select_diverse_results_disabled( - candidates: Vec, - top_k: u32, - note_vectors: &HashMap>, -) -> (Vec, HashMap) { - let mut decisions = HashMap::new(); - let mut selected = Vec::new(); - - for (idx, candidate) in candidates.into_iter().enumerate() { - let selected_rank = (idx < top_k as usize).then_some(idx as u32 + 1); - let is_selected = selected_rank.is_some(); - let note_id = candidate.item.note.note_id; - let missing_embedding = !note_vectors.contains_key(¬e_id); - - decisions.insert( - note_id, - DiversityDecision { - selected: is_selected, - selected_rank, - selected_reason: if is_selected { - "disabled_passthrough".to_string() - } else { - "disabled_truncate".to_string() - }, - skipped_reason: if is_selected { - None - } else { - Some("disabled_truncate".to_string()) - }, - nearest_selected_note_id: None, - similarity: None, - mmr_score: None, - missing_embedding, - }, - ); - - if is_selected { - selected.push(candidate); - } - } - - (selected, decisions) -} - -fn select_diverse_results_enabled( - candidates: Vec, - top_k: u32, - policy: &ResolvedDiversityPolicy, - note_vectors: &HashMap>, -) -> (Vec, HashMap) { - let total = u32::try_from(candidates.len()).unwrap_or(1).max(1); - let relevance_by_idx: Vec = - (0..candidates.len()).map(|idx| retrieval::rank_normalize(idx as u32 + 1, total)).collect(); - let mut remaining_indices: Vec = (0..candidates.len()).collect(); - let mut selected_indices: Vec = Vec::new(); - let mut decisions: HashMap = HashMap::new(); - let first_idx = remaining_indices.remove(0); - let first_note_id = candidates[first_idx].item.note.note_id; - let first_missing_embedding = !note_vectors.contains_key(&first_note_id); - - selected_indices.push(first_idx); - decisions.insert( - first_note_id, - DiversityDecision { - selected: true, - selected_rank: Some(1), - selected_reason: "top_relevance".to_string(), - skipped_reason: None, - nearest_selected_note_id: None, - similarity: None, - mmr_score: Some(relevance_by_idx[first_idx]), - missing_embedding: first_missing_embedding, - }, - ); - - while selected_indices.len() < top_k as usize && !remaining_indices.is_empty() { - let Some((selected_pick, selected_reason)) = pick_next_candidate( - &remaining_indices, - &candidates, - &selected_indices, - note_vectors, - &relevance_by_idx, - policy, - ) else { - break; - }; - let picked_idx = remaining_indices.remove(selected_pick.remaining_pos); - - selected_indices.push(picked_idx); - - let selected_note_id = candidates[picked_idx].item.note.note_id; - - decisions.insert( - selected_note_id, - DiversityDecision { - selected: true, - selected_rank: Some(selected_indices.len() as u32), - selected_reason: selected_reason.to_string(), - skipped_reason: None, - nearest_selected_note_id: selected_pick.nearest_note_id, - similarity: selected_pick.similarity, - mmr_score: Some(selected_pick.mmr_score), - missing_embedding: selected_pick.missing_embedding, - }, - ); - } - - for candidate_idx in remaining_indices { - let note_id = candidates[candidate_idx].item.note.note_id; - let (similarity, nearest_note_id, missing_embedding) = - similarity::nearest_selected_similarity( - note_id, - &candidates, - &selected_indices, - note_vectors, - ); - let skipped_reason = - if similarity.map(|value| value > policy.sim_threshold).unwrap_or(false) { - "similarity_threshold" - } else { - "lower_mmr" - }; - let redundancy = similarity.unwrap_or(0.0); - let mmr_score = policy.mmr_lambda * relevance_by_idx[candidate_idx] - - (1.0 - policy.mmr_lambda) * redundancy; - - decisions.insert( - note_id, - DiversityDecision { - selected: false, - selected_rank: None, - selected_reason: "not_selected".to_string(), - skipped_reason: Some(skipped_reason.to_string()), - nearest_selected_note_id: nearest_note_id, - similarity, - mmr_score: Some(mmr_score), - missing_embedding, - }, - ); - } - - let selected = selected_indices.into_iter().map(|idx| candidates[idx].clone()).collect(); - - (selected, decisions) -} - -fn pick_next_candidate( - remaining_indices: &[usize], - candidates: &[ScoredChunk], - selected_indices: &[usize], - note_vectors: &HashMap>, - relevance_by_idx: &[f32], - policy: &ResolvedDiversityPolicy, -) -> Option<(DiversityPick, &'static str)> { - let mut best_non_filtered: Option = None; - let mut best_filtered: Option = None; - let mut best_any: Option = None; - let mut filtered_count = 0_u32; - - for (remaining_pos, candidate_idx) in remaining_indices.iter().copied().enumerate() { - let note_id = candidates[candidate_idx].item.note.note_id; - let (similarity, nearest_note_id, missing_embedding) = - similarity::nearest_selected_similarity( - note_id, - candidates, - selected_indices, - note_vectors, - ); - let redundancy = similarity.unwrap_or(0.0); - let mmr_score = policy.mmr_lambda * relevance_by_idx[candidate_idx] - - (1.0 - policy.mmr_lambda) * redundancy; - let high_similarity = similarity.map(|value| value > policy.sim_threshold).unwrap_or(false); - - if high_similarity { - filtered_count += 1; - } - - let candidate_pick = DiversityPick { - remaining_pos, - mmr_score, - nearest_note_id, - similarity, - missing_embedding, - retrieval_rank: candidates[candidate_idx].item.retrieval_rank, - }; - - if best_any.as_ref().map(|current| candidate_pick.better_than(current)).unwrap_or(true) { - best_any = Some(candidate_pick); - } - if high_similarity { - if best_filtered - .as_ref() - .map(|current| candidate_pick.better_than(current)) - .unwrap_or(true) - { - best_filtered = Some(candidate_pick); - } - - continue; - } - if best_non_filtered - .as_ref() - .map(|current| candidate_pick.better_than(current)) - .unwrap_or(true) - { - best_non_filtered = Some(candidate_pick); - } - } - - if let Some(best) = best_non_filtered { - return Some((best, "mmr")); - } - - if filtered_count >= policy.max_skips { - return best_any.map(|best| (best, "max_skips_backfill")); + return disabled::select_diverse_results_disabled(candidates, top_k, note_vectors); } - best_filtered.map(|best| (best, "threshold_backfill")) + enabled::select_diverse_results_enabled(candidates, top_k, policy, note_vectors) } diff --git a/packages/elf-service/src/search/ranking/diversity/selection/disabled.rs b/packages/elf-service/src/search/ranking/diversity/selection/disabled.rs new file mode 100644 index 00000000..b1c48bb0 --- /dev/null +++ b/packages/elf-service/src/search/ranking/diversity/selection/disabled.rs @@ -0,0 +1,49 @@ +use std::collections::HashMap; + +use uuid::Uuid; + +use crate::search::{DiversityDecision, ScoredChunk}; + +pub(super) fn select_diverse_results_disabled( + candidates: Vec, + top_k: u32, + note_vectors: &HashMap>, +) -> (Vec, HashMap) { + let mut decisions = HashMap::new(); + let mut selected = Vec::new(); + + for (idx, candidate) in candidates.into_iter().enumerate() { + let selected_rank = (idx < top_k as usize).then_some(idx as u32 + 1); + let is_selected = selected_rank.is_some(); + let note_id = candidate.item.note.note_id; + let missing_embedding = !note_vectors.contains_key(¬e_id); + + decisions.insert( + note_id, + DiversityDecision { + selected: is_selected, + selected_rank, + selected_reason: if is_selected { + "disabled_passthrough".to_string() + } else { + "disabled_truncate".to_string() + }, + skipped_reason: if is_selected { + None + } else { + Some("disabled_truncate".to_string()) + }, + nearest_selected_note_id: None, + similarity: None, + mmr_score: None, + missing_embedding, + }, + ); + + if is_selected { + selected.push(candidate); + } + } + + (selected, decisions) +} diff --git a/packages/elf-service/src/search/ranking/diversity/selection/enabled.rs b/packages/elf-service/src/search/ranking/diversity/selection/enabled.rs new file mode 100644 index 00000000..da76c11e --- /dev/null +++ b/packages/elf-service/src/search/ranking/diversity/selection/enabled.rs @@ -0,0 +1,188 @@ +use std::collections::HashMap; + +use uuid::Uuid; + +use crate::search::{ + DiversityDecision, ScoredChunk, + ranking::{ + diversity::{selection::pick::DiversityPick, similarity}, + policy::ResolvedDiversityPolicy, + retrieval, + }, +}; + +pub(super) fn select_diverse_results_enabled( + candidates: Vec, + top_k: u32, + policy: &ResolvedDiversityPolicy, + note_vectors: &HashMap>, +) -> (Vec, HashMap) { + let total = u32::try_from(candidates.len()).unwrap_or(1).max(1); + let relevance_by_idx: Vec = + (0..candidates.len()).map(|idx| retrieval::rank_normalize(idx as u32 + 1, total)).collect(); + let mut remaining_indices: Vec = (0..candidates.len()).collect(); + let mut selected_indices: Vec = Vec::new(); + let mut decisions: HashMap = HashMap::new(); + let first_idx = remaining_indices.remove(0); + let first_note_id = candidates[first_idx].item.note.note_id; + let first_missing_embedding = !note_vectors.contains_key(&first_note_id); + + selected_indices.push(first_idx); + decisions.insert( + first_note_id, + DiversityDecision { + selected: true, + selected_rank: Some(1), + selected_reason: "top_relevance".to_string(), + skipped_reason: None, + nearest_selected_note_id: None, + similarity: None, + mmr_score: Some(relevance_by_idx[first_idx]), + missing_embedding: first_missing_embedding, + }, + ); + + while selected_indices.len() < top_k as usize && !remaining_indices.is_empty() { + let Some((selected_pick, selected_reason)) = pick_next_candidate( + &remaining_indices, + &candidates, + &selected_indices, + note_vectors, + &relevance_by_idx, + policy, + ) else { + break; + }; + let picked_idx = remaining_indices.remove(selected_pick.remaining_pos); + + selected_indices.push(picked_idx); + + let selected_note_id = candidates[picked_idx].item.note.note_id; + + decisions.insert( + selected_note_id, + DiversityDecision { + selected: true, + selected_rank: Some(selected_indices.len() as u32), + selected_reason: selected_reason.to_string(), + skipped_reason: None, + nearest_selected_note_id: selected_pick.nearest_note_id, + similarity: selected_pick.similarity, + mmr_score: Some(selected_pick.mmr_score), + missing_embedding: selected_pick.missing_embedding, + }, + ); + } + + for candidate_idx in remaining_indices { + let note_id = candidates[candidate_idx].item.note.note_id; + let (similarity, nearest_note_id, missing_embedding) = + similarity::nearest_selected_similarity( + note_id, + &candidates, + &selected_indices, + note_vectors, + ); + let skipped_reason = + if similarity.map(|value| value > policy.sim_threshold).unwrap_or(false) { + "similarity_threshold" + } else { + "lower_mmr" + }; + let redundancy = similarity.unwrap_or(0.0); + let mmr_score = policy.mmr_lambda * relevance_by_idx[candidate_idx] + - (1.0 - policy.mmr_lambda) * redundancy; + + decisions.insert( + note_id, + DiversityDecision { + selected: false, + selected_rank: None, + selected_reason: "not_selected".to_string(), + skipped_reason: Some(skipped_reason.to_string()), + nearest_selected_note_id: nearest_note_id, + similarity, + mmr_score: Some(mmr_score), + missing_embedding, + }, + ); + } + + let selected = selected_indices.into_iter().map(|idx| candidates[idx].clone()).collect(); + + (selected, decisions) +} + +fn pick_next_candidate( + remaining_indices: &[usize], + candidates: &[ScoredChunk], + selected_indices: &[usize], + note_vectors: &HashMap>, + relevance_by_idx: &[f32], + policy: &ResolvedDiversityPolicy, +) -> Option<(DiversityPick, &'static str)> { + let mut best_non_filtered: Option = None; + let mut best_filtered: Option = None; + let mut best_any: Option = None; + let mut filtered_count = 0_u32; + + for (remaining_pos, candidate_idx) in remaining_indices.iter().copied().enumerate() { + let note_id = candidates[candidate_idx].item.note.note_id; + let (similarity, nearest_note_id, missing_embedding) = + similarity::nearest_selected_similarity( + note_id, + candidates, + selected_indices, + note_vectors, + ); + let redundancy = similarity.unwrap_or(0.0); + let mmr_score = policy.mmr_lambda * relevance_by_idx[candidate_idx] + - (1.0 - policy.mmr_lambda) * redundancy; + let high_similarity = similarity.map(|value| value > policy.sim_threshold).unwrap_or(false); + + if high_similarity { + filtered_count += 1; + } + + let candidate_pick = DiversityPick { + remaining_pos, + mmr_score, + nearest_note_id, + similarity, + missing_embedding, + retrieval_rank: candidates[candidate_idx].item.retrieval_rank, + }; + + if best_any.as_ref().map(|current| candidate_pick.better_than(current)).unwrap_or(true) { + best_any = Some(candidate_pick); + } + if high_similarity { + if best_filtered + .as_ref() + .map(|current| candidate_pick.better_than(current)) + .unwrap_or(true) + { + best_filtered = Some(candidate_pick); + } + + continue; + } + if best_non_filtered + .as_ref() + .map(|current| candidate_pick.better_than(current)) + .unwrap_or(true) + { + best_non_filtered = Some(candidate_pick); + } + } + + if let Some(best) = best_non_filtered { + return Some((best, "mmr")); + } + + if filtered_count >= policy.max_skips { + return best_any.map(|best| (best, "max_skips_backfill")); + } + + best_filtered.map(|best| (best, "threshold_backfill")) +} diff --git a/packages/elf-service/src/search/ranking/diversity/selection/pick.rs b/packages/elf-service/src/search/ranking/diversity/selection/pick.rs new file mode 100644 index 00000000..58a95e04 --- /dev/null +++ b/packages/elf-service/src/search/ranking/diversity/selection/pick.rs @@ -0,0 +1,17 @@ +use uuid::Uuid; + +#[derive(Clone, Copy)] +pub(super) struct DiversityPick { + pub(super) remaining_pos: usize, + pub(super) mmr_score: f32, + pub(super) nearest_note_id: Option, + pub(super) similarity: Option, + pub(super) missing_embedding: bool, + pub(super) retrieval_rank: u32, +} +impl DiversityPick { + pub(super) fn better_than(self, other: &Self) -> bool { + self.mmr_score > other.mmr_score + || (self.mmr_score == other.mmr_score && self.retrieval_rank < other.retrieval_rank) + } +} From a409123837103583dc28fcd14e9027109806318f Mon Sep 17 00:00:00 2001 From: Yvette Carlisle Date: Tue, 30 Jun 2026 10:22:14 -0400 Subject: [PATCH 3/6] {"schema":"decodex/commit/1","summary":"Split search filter parser modules","authority":"manual"} --- packages/elf-service/src/search/filter.rs | 2 +- .../elf-service/src/search/filter/impact.rs | 4 +- .../elf-service/src/search/filter/parser.rs | 260 ++---------------- .../src/search/filter/parser/constants.rs | 5 + .../src/search/filter/parser/error.rs | 10 + .../src/search/filter/parser/expr.rs | 63 +++++ .../src/search/filter/parser/search_filter.rs | 100 +++++++ .../src/search/filter/parser/state.rs | 5 + .../src/search/filter/parser/value.rs | 70 +++++ 9 files changed, 271 insertions(+), 248 deletions(-) create mode 100644 packages/elf-service/src/search/filter/parser/constants.rs create mode 100644 packages/elf-service/src/search/filter/parser/error.rs create mode 100644 packages/elf-service/src/search/filter/parser/expr.rs create mode 100644 packages/elf-service/src/search/filter/parser/search_filter.rs create mode 100644 packages/elf-service/src/search/filter/parser/state.rs create mode 100644 packages/elf-service/src/search/filter/parser/value.rs diff --git a/packages/elf-service/src/search/filter.rs b/packages/elf-service/src/search/filter.rs index e8604481..a7eba6c1 100644 --- a/packages/elf-service/src/search/filter.rs +++ b/packages/elf-service/src/search/filter.rs @@ -3,6 +3,6 @@ mod impact; mod parser; mod value; -pub(crate) use self::{impact::SearchFilterImpact, parser::SearchFilter}; +pub(crate) use self::{impact::SearchFilterImpact, parser::search_filter::SearchFilter}; #[cfg(test)] mod tests; diff --git a/packages/elf-service/src/search/filter/impact.rs b/packages/elf-service/src/search/filter/impact.rs index 481e566a..e39fa4c9 100644 --- a/packages/elf-service/src/search/filter/impact.rs +++ b/packages/elf-service/src/search/filter/impact.rs @@ -4,9 +4,7 @@ use serde::Serialize; use serde_json::Value; use uuid::Uuid; -use crate::search::{ - ChunkCandidate, NoteMeta, SEARCH_FILTER_IMPACT_SCHEMA_V1, filter::parser::SearchFilter, -}; +use crate::search::{ChunkCandidate, NoteMeta, SEARCH_FILTER_IMPACT_SCHEMA_V1, SearchFilter}; #[derive(Clone, Debug, Serialize)] pub(crate) struct SearchFilterImpact { diff --git a/packages/elf-service/src/search/filter/parser.rs b/packages/elf-service/src/search/filter/parser.rs index da46afd4..5a7c291f 100644 --- a/packages/elf-service/src/search/filter/parser.rs +++ b/packages/elf-service/src/search/filter/parser.rs @@ -1,246 +1,18 @@ -use std::{ - collections::HashMap, - fmt::{Display, Formatter}, -}; - -use serde_json::Value; -use time::{OffsetDateTime, format_description::well_known::Rfc3339}; -use uuid::Uuid; - -use crate::search::{ - ChunkCandidate, NoteMeta, - filter::{ - expr::{FilterExpr, FilterField}, - impact::SearchFilterImpact, - value::FilterValue, +pub(in crate::search::filter) mod search_filter; + +mod constants; +mod error; +mod expr; +mod state; +mod value; + +pub(super) use self::{ + constants::{ + MAX_FILTER_DEPTH, MAX_FILTER_NODES, MAX_IN_LIST_ITEMS, MAX_STRING_BYTES, + SEARCH_FILTER_EXPR_SCHEMA_V1, }, + error::FilterParseError, + expr::parse_expr, + state::FilterParseState, + value::parse_value, }; - -pub(super) const SEARCH_FILTER_EXPR_SCHEMA_V1: &str = "search_filter_expr/v1"; -pub(super) const MAX_FILTER_DEPTH: usize = 8; -pub(super) const MAX_FILTER_NODES: usize = 128; -pub(super) const MAX_IN_LIST_ITEMS: usize = 128; -pub(super) const MAX_STRING_BYTES: usize = 512; - -#[derive(Clone, Debug)] -pub(crate) struct FilterParseError { - pub(super) path: String, - pub(super) message: String, -} -impl Display for FilterParseError { - fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { - write!(f, "{}: {}", self.path, self.message) - } -} - -#[derive(Clone, Debug)] -pub(crate) struct SearchFilter { - expr: FilterExpr, - json: Value, -} -impl SearchFilter { - pub(super) fn as_value(&self) -> Value { - self.json.clone() - } - - pub(super) fn evaluate(&self, note: &NoteMeta) -> (bool, Option) { - self.expr.evaluate(note) - } - - pub(crate) fn parse(raw: &Value) -> Result { - let path = "$.filter"; - let obj = raw.as_object().ok_or_else(|| FilterParseError { - path: path.to_string(), - message: "filter must be an object.".to_string(), - })?; - let schema = obj.get("schema").and_then(Value::as_str).ok_or_else(|| FilterParseError { - path: format!("{path}.schema"), - message: "filter.schema is required.".to_string(), - })?; - - if schema != SEARCH_FILTER_EXPR_SCHEMA_V1 { - return Err(FilterParseError { - path: format!("{path}.schema"), - message: format!( - "unsupported filter schema '{schema}', expected '{SEARCH_FILTER_EXPR_SCHEMA_V1}'." - ), - }); - } - - let expr = obj.get("expr").ok_or_else(|| FilterParseError { - path: format!("{path}.expr"), - message: "filter.expr is required.".to_string(), - })?; - let mut state = FilterParseState::default(); - let parsed = parse_expr(expr, "$.filter.expr", 1, &mut state)?; - - Ok(Self { - expr: parsed.clone(), - json: serde_json::json!({"schema": SEARCH_FILTER_EXPR_SCHEMA_V1, "expr": parsed.to_value()}), - }) - } - - pub(crate) fn eval( - &self, - candidates: Vec, - note_meta: &HashMap, - requested_candidate_k: u32, - effective_candidate_k: u32, - ) -> (Vec, SearchFilterImpact) { - let impact = SearchFilterImpact::from_eval( - self, - candidates.as_slice(), - note_meta, - requested_candidate_k, - effective_candidate_k, - ); - let pre = candidates.len(); - let mut kept = Vec::with_capacity(impact.candidate_count_post); - - for candidate in candidates { - let Some(note) = note_meta.get(&candidate.note_id) else { - continue; - }; - - if self.expr.evaluate(note).0 { - kept.push(candidate); - } - } - - let post = kept.len(); - - ( - kept, - SearchFilterImpact { - candidate_count_post: post, - dropped_total: pre.saturating_sub(post), - ..impact - }, - ) - } -} - -#[derive(Default)] -pub(super) struct FilterParseState { - pub(super) nodes: usize, - pub(super) max_depth: usize, -} - -pub(super) fn parse_expr( - value: &Value, - path: &str, - depth: usize, - state: &mut FilterParseState, -) -> Result { - FilterExpr::validate_metrics(path, depth, state)?; - - let Some(map) = value.as_object() else { - return Err(FilterParseError { - path: path.to_string(), - message: "filter node must be an object.".to_string(), - }); - }; - let op = map.get("op").and_then(Value::as_str).ok_or_else(|| FilterParseError { - path: path.to_string(), - message: "filter node is missing required string op.".to_string(), - })?; - - match op { - "and" => { - let args = map.get("args").ok_or_else(|| FilterParseError { - path: format!("{path}.args"), - message: "and node requires args.".to_string(), - })?; - let args = FilterExpr::parse_args(args, &format!("{path}.args"), depth, state)?; - - Ok(FilterExpr::And(args)) - }, - "or" => { - let args = map.get("args").ok_or_else(|| FilterParseError { - path: format!("{path}.args"), - message: "or node requires args.".to_string(), - })?; - let args = FilterExpr::parse_args(args, &format!("{path}.args"), depth, state)?; - - Ok(FilterExpr::Or(args)) - }, - "not" => { - let expr = map.get("expr").ok_or_else(|| FilterParseError { - path: format!("{path}.expr"), - message: "not node requires expr.".to_string(), - })?; - let child = parse_expr(expr, &format!("{path}.expr"), depth.saturating_add(1), state)?; - - Ok(FilterExpr::Not(Box::new(child))) - }, - "in" => FilterExpr::parse_leaf(map, op, path), - "eq" | "neq" | "gt" | "gte" | "lt" | "lte" | "contains" => - FilterExpr::parse_leaf(map, op, path), - _ => Err(FilterParseError { - path: path.to_string(), - message: format!("unsupported filter op '{op}'."), - }), - } -} - -pub(super) fn parse_value( - field: &FilterField, - raw: &Value, - path: &str, -) -> Result { - match field { - FilterField::Type | FilterField::Key | FilterField::Scope | FilterField::AgentId => - match raw { - Value::String(_) | Value::Null if matches!(field, FilterField::Key) => { - if raw.is_null() { - Ok(FilterValue::Null) - } else { - parse_string(path, raw).map(FilterValue::String) - } - }, - _ => parse_string(path, raw).map(FilterValue::String), - }, - FilterField::Importance | FilterField::Confidence | FilterField::HitCount => { - let value = raw.as_f64().ok_or_else(|| FilterParseError { - path: path.to_string(), - message: "numeric value expected.".to_string(), - })?; - - Ok(FilterValue::Number(value)) - }, - FilterField::UpdatedAt => - OffsetDateTime::parse(parse_string(path, raw)?.as_str(), &Rfc3339) - .map(FilterValue::DateTime) - .map_err(|_| FilterParseError { - path: path.to_string(), - message: "datetime value must be RFC3339.".to_string(), - }), - FilterField::ExpiresAt | FilterField::LastHitAt => - if raw.is_null() { - Ok(FilterValue::Null) - } else { - OffsetDateTime::parse(parse_string(path, raw)?.as_str(), &Rfc3339) - .map(FilterValue::DateTime) - .map_err(|_| FilterParseError { - path: path.to_string(), - message: "datetime value must be RFC3339.".to_string(), - }) - }, - } -} - -fn parse_string(path: &str, raw: &Value) -> Result { - let value = raw.as_str().ok_or_else(|| FilterParseError { - path: path.to_string(), - message: "string value expected.".to_string(), - })?; - - if value.len() > MAX_STRING_BYTES { - return Err(FilterParseError { - path: path.to_string(), - message: format!("string value exceeds maximum bytes ({}).", MAX_STRING_BYTES), - }); - } - - Ok(value.to_string()) -} diff --git a/packages/elf-service/src/search/filter/parser/constants.rs b/packages/elf-service/src/search/filter/parser/constants.rs new file mode 100644 index 00000000..c6262b61 --- /dev/null +++ b/packages/elf-service/src/search/filter/parser/constants.rs @@ -0,0 +1,5 @@ +pub(in crate::search::filter) const SEARCH_FILTER_EXPR_SCHEMA_V1: &str = "search_filter_expr/v1"; +pub(in crate::search::filter) const MAX_FILTER_DEPTH: usize = 8; +pub(in crate::search::filter) const MAX_FILTER_NODES: usize = 128; +pub(in crate::search::filter) const MAX_IN_LIST_ITEMS: usize = 128; +pub(in crate::search::filter) const MAX_STRING_BYTES: usize = 512; diff --git a/packages/elf-service/src/search/filter/parser/error.rs b/packages/elf-service/src/search/filter/parser/error.rs new file mode 100644 index 00000000..3b95052e --- /dev/null +++ b/packages/elf-service/src/search/filter/parser/error.rs @@ -0,0 +1,10 @@ +#[derive(Clone, Debug)] +pub(crate) struct FilterParseError { + pub(in crate::search::filter) path: String, + pub(in crate::search::filter) message: String, +} +impl std::fmt::Display for FilterParseError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}: {}", self.path, self.message) + } +} diff --git a/packages/elf-service/src/search/filter/parser/expr.rs b/packages/elf-service/src/search/filter/parser/expr.rs new file mode 100644 index 00000000..2e99b68a --- /dev/null +++ b/packages/elf-service/src/search/filter/parser/expr.rs @@ -0,0 +1,63 @@ +use serde_json::Value; + +use crate::search::filter::{ + expr::FilterExpr, + parser::{FilterParseError, FilterParseState}, +}; + +pub(in crate::search::filter) fn parse_expr( + value: &Value, + path: &str, + depth: usize, + state: &mut FilterParseState, +) -> Result { + FilterExpr::validate_metrics(path, depth, state)?; + + let Some(map) = value.as_object() else { + return Err(FilterParseError { + path: path.to_string(), + message: "filter node must be an object.".to_string(), + }); + }; + let op = map.get("op").and_then(Value::as_str).ok_or_else(|| FilterParseError { + path: path.to_string(), + message: "filter node is missing required string op.".to_string(), + })?; + + match op { + "and" => { + let args = map.get("args").ok_or_else(|| FilterParseError { + path: format!("{path}.args"), + message: "and node requires args.".to_string(), + })?; + let args = FilterExpr::parse_args(args, &format!("{path}.args"), depth, state)?; + + Ok(FilterExpr::And(args)) + }, + "or" => { + let args = map.get("args").ok_or_else(|| FilterParseError { + path: format!("{path}.args"), + message: "or node requires args.".to_string(), + })?; + let args = FilterExpr::parse_args(args, &format!("{path}.args"), depth, state)?; + + Ok(FilterExpr::Or(args)) + }, + "not" => { + let expr = map.get("expr").ok_or_else(|| FilterParseError { + path: format!("{path}.expr"), + message: "not node requires expr.".to_string(), + })?; + let child = parse_expr(expr, &format!("{path}.expr"), depth.saturating_add(1), state)?; + + Ok(FilterExpr::Not(Box::new(child))) + }, + "in" => FilterExpr::parse_leaf(map, op, path), + "eq" | "neq" | "gt" | "gte" | "lt" | "lte" | "contains" => + FilterExpr::parse_leaf(map, op, path), + _ => Err(FilterParseError { + path: path.to_string(), + message: format!("unsupported filter op '{op}'."), + }), + } +} diff --git a/packages/elf-service/src/search/filter/parser/search_filter.rs b/packages/elf-service/src/search/filter/parser/search_filter.rs new file mode 100644 index 00000000..3fdb9b9d --- /dev/null +++ b/packages/elf-service/src/search/filter/parser/search_filter.rs @@ -0,0 +1,100 @@ +use std::collections::HashMap; + +use serde_json::Value; +use uuid::Uuid; + +use crate::search::{ + ChunkCandidate, NoteMeta, + filter::{ + expr::FilterExpr, + impact::SearchFilterImpact, + parser::{self, FilterParseError, FilterParseState, SEARCH_FILTER_EXPR_SCHEMA_V1}, + }, +}; + +#[derive(Clone, Debug)] +pub(crate) struct SearchFilter { + expr: FilterExpr, + json: Value, +} +impl SearchFilter { + pub(in crate::search::filter) fn as_value(&self) -> Value { + self.json.clone() + } + + pub(in crate::search::filter) fn evaluate(&self, note: &NoteMeta) -> (bool, Option) { + self.expr.evaluate(note) + } + + pub(crate) fn parse(raw: &Value) -> Result { + let path = "$.filter"; + let obj = raw.as_object().ok_or_else(|| FilterParseError { + path: path.to_string(), + message: "filter must be an object.".to_string(), + })?; + let schema = obj.get("schema").and_then(Value::as_str).ok_or_else(|| FilterParseError { + path: format!("{path}.schema"), + message: "filter.schema is required.".to_string(), + })?; + + if schema != SEARCH_FILTER_EXPR_SCHEMA_V1 { + return Err(FilterParseError { + path: format!("{path}.schema"), + message: format!( + "unsupported filter schema '{schema}', expected '{SEARCH_FILTER_EXPR_SCHEMA_V1}'." + ), + }); + } + + let expr = obj.get("expr").ok_or_else(|| FilterParseError { + path: format!("{path}.expr"), + message: "filter.expr is required.".to_string(), + })?; + let mut state = FilterParseState::default(); + let parsed = parser::parse_expr(expr, "$.filter.expr", 1, &mut state)?; + + Ok(Self { + expr: parsed.clone(), + json: serde_json::json!({"schema": SEARCH_FILTER_EXPR_SCHEMA_V1, "expr": parsed.to_value()}), + }) + } + + pub(crate) fn eval( + &self, + candidates: Vec, + note_meta: &HashMap, + requested_candidate_k: u32, + effective_candidate_k: u32, + ) -> (Vec, SearchFilterImpact) { + let impact = SearchFilterImpact::from_eval( + self, + candidates.as_slice(), + note_meta, + requested_candidate_k, + effective_candidate_k, + ); + let pre = candidates.len(); + let mut kept = Vec::with_capacity(impact.candidate_count_post); + + for candidate in candidates { + let Some(note) = note_meta.get(&candidate.note_id) else { + continue; + }; + + if self.expr.evaluate(note).0 { + kept.push(candidate); + } + } + + let post = kept.len(); + + ( + kept, + SearchFilterImpact { + candidate_count_post: post, + dropped_total: pre.saturating_sub(post), + ..impact + }, + ) + } +} diff --git a/packages/elf-service/src/search/filter/parser/state.rs b/packages/elf-service/src/search/filter/parser/state.rs new file mode 100644 index 00000000..5b9e31ec --- /dev/null +++ b/packages/elf-service/src/search/filter/parser/state.rs @@ -0,0 +1,5 @@ +#[derive(Default)] +pub(in crate::search::filter) struct FilterParseState { + pub(in crate::search::filter) nodes: usize, + pub(in crate::search::filter) max_depth: usize, +} diff --git a/packages/elf-service/src/search/filter/parser/value.rs b/packages/elf-service/src/search/filter/parser/value.rs new file mode 100644 index 00000000..27faf120 --- /dev/null +++ b/packages/elf-service/src/search/filter/parser/value.rs @@ -0,0 +1,70 @@ +use serde_json::Value; +use time::{OffsetDateTime, format_description::well_known::Rfc3339}; + +use crate::search::filter::{ + expr::FilterField, + parser::{FilterParseError, MAX_STRING_BYTES}, + value::FilterValue, +}; + +pub(in crate::search::filter) fn parse_value( + field: &FilterField, + raw: &Value, + path: &str, +) -> Result { + match field { + FilterField::Type | FilterField::Key | FilterField::Scope | FilterField::AgentId => + match raw { + Value::String(_) | Value::Null if matches!(field, FilterField::Key) => { + if raw.is_null() { + Ok(FilterValue::Null) + } else { + parse_string(path, raw).map(FilterValue::String) + } + }, + _ => parse_string(path, raw).map(FilterValue::String), + }, + FilterField::Importance | FilterField::Confidence | FilterField::HitCount => { + let value = raw.as_f64().ok_or_else(|| FilterParseError { + path: path.to_string(), + message: "numeric value expected.".to_string(), + })?; + + Ok(FilterValue::Number(value)) + }, + FilterField::UpdatedAt => + OffsetDateTime::parse(parse_string(path, raw)?.as_str(), &Rfc3339) + .map(FilterValue::DateTime) + .map_err(|_| FilterParseError { + path: path.to_string(), + message: "datetime value must be RFC3339.".to_string(), + }), + FilterField::ExpiresAt | FilterField::LastHitAt => + if raw.is_null() { + Ok(FilterValue::Null) + } else { + OffsetDateTime::parse(parse_string(path, raw)?.as_str(), &Rfc3339) + .map(FilterValue::DateTime) + .map_err(|_| FilterParseError { + path: path.to_string(), + message: "datetime value must be RFC3339.".to_string(), + }) + }, + } +} + +fn parse_string(path: &str, raw: &Value) -> Result { + let value = raw.as_str().ok_or_else(|| FilterParseError { + path: path.to_string(), + message: "string value expected.".to_string(), + })?; + + if value.len() > MAX_STRING_BYTES { + return Err(FilterParseError { + path: path.to_string(), + message: format!("string value exceeds maximum bytes ({}).", MAX_STRING_BYTES), + }); + } + + Ok(value.to_string()) +} From 893ee7326bfbec756de4b20c9f9c15a9d4d6d5d5 Mon Sep 17 00:00:00 2001 From: Yvette Carlisle Date: Tue, 30 Jun 2026 10:28:13 -0400 Subject: [PATCH 4/6] {"schema":"decodex/commit/1","summary":"Split ranking text modules","authority":"manual"} --- .../elf-service/src/search/ranking/text.rs | 260 +----------------- .../src/search/ranking/text/deterministic.rs | 82 ++++++ .../src/search/ranking/text/embedding.rs | 13 + .../src/search/ranking/text/matching.rs | 65 +++++ .../src/search/ranking/text/scope.rs | 2 +- .../src/search/ranking/text/tokenization.rs | 88 ++++++ 6 files changed, 262 insertions(+), 248 deletions(-) create mode 100644 packages/elf-service/src/search/ranking/text/deterministic.rs create mode 100644 packages/elf-service/src/search/ranking/text/embedding.rs create mode 100644 packages/elf-service/src/search/ranking/text/matching.rs create mode 100644 packages/elf-service/src/search/ranking/text/tokenization.rs diff --git a/packages/elf-service/src/search/ranking/text.rs b/packages/elf-service/src/search/ranking/text.rs index 4bc6101b..629fd0c5 100644 --- a/packages/elf-service/src/search/ranking/text.rs +++ b/packages/elf-service/src/search/ranking/text.rs @@ -1,248 +1,14 @@ +mod deterministic; +mod embedding; +mod matching; mod scope; - -use std::collections::{HashMap, HashSet}; - -use time::OffsetDateTime; - -use crate::search::DeterministicRankingTerms; -use elf_config::{Config, Context}; - -pub fn build_dense_embedding_input( - query: &str, - project_context_description: Option<&str>, -) -> String { - let Some(description) = project_context_description else { return query.to_string() }; - let trimmed = description.trim(); - - if trimmed.is_empty() { - return query.to_string(); - } - - format!("{query}\n\nProject context:\n{trimmed}") -} - -pub fn build_scope_context_boost_by_scope<'a>( - tokens: &[String], - context: Option<&'a Context>, -) -> HashMap<&'a str, f32> { - scope::build_scope_context_boost_by_scope(tokens, context) -} - -pub fn tokenize_query(query: &str, max_terms: usize) -> Vec { - let mut normalized = String::with_capacity(query.len()); - - for ch in query.chars() { - if ch.is_ascii_alphanumeric() { - normalized.push(ch.to_ascii_lowercase()); - } else { - normalized.push(' '); - } - } - - let mut out = Vec::new(); - let mut seen = HashSet::new(); - - for token in normalized.split_whitespace() { - if token.len() < 2 { - continue; - } - if seen.insert(token) { - out.push(token.to_string()); - } - if out.len() >= max_terms { - break; - } - } - - out -} - -pub fn tokenize_text_terms(text: &str, max_terms: usize) -> HashSet { - if max_terms == 0 { - return HashSet::new(); - } - - let mut normalized = String::with_capacity(text.len()); - - for ch in text.chars() { - if ch.is_ascii_alphanumeric() { - normalized.push(ch.to_ascii_lowercase()); - } else { - normalized.push(' '); - } - } - - let mut out = HashSet::new(); - - for token in normalized.split_whitespace() { - if token.len() < 2 { - continue; - } - - out.insert(token.to_string()); - - if out.len() >= max_terms { - break; - } - } - - out -} - -pub fn lexical_overlap_ratio(query_tokens: &[String], text: &str, max_text_terms: usize) -> f32 { - if query_tokens.is_empty() { - return 0.0; - } - - let text_terms = tokenize_text_terms(text, max_text_terms); - - if text_terms.is_empty() { - return 0.0; - } - - let mut matched = 0_usize; - - for token in query_tokens { - if text_terms.contains(token.as_str()) { - matched += 1; - } - } - - matched as f32 / query_tokens.len() as f32 -} - -pub fn compute_deterministic_ranking_terms( - cfg: &Config, - query_tokens: &[String], - snippet: &str, - note_hit_count: i64, - note_last_hit_at: Option, - age_days: f32, - now: OffsetDateTime, -) -> DeterministicRankingTerms { - let det = &cfg.ranking.deterministic; - - if !det.enabled { - return DeterministicRankingTerms::default(); - } - - let mut out = DeterministicRankingTerms::default(); - - if det.lexical.enabled && det.lexical.weight > 0.0 && !query_tokens.is_empty() { - let ratio = - lexical_overlap_ratio(query_tokens, snippet, det.lexical.max_text_terms as usize); - - out.lexical_overlap_ratio = ratio; - - let min_ratio = det.lexical.min_ratio.clamp(0.0, 1.0); - let scaled = if ratio >= min_ratio && min_ratio < 1.0 { - ((ratio - min_ratio) / (1.0 - min_ratio)).clamp(0.0, 1.0) - } else if ratio >= 1.0 && min_ratio >= 1.0 { - 1.0 - } else { - 0.0 - }; - - out.lexical_bonus = det.lexical.weight * scaled; - } - if det.hits.enabled && det.hits.weight > 0.0 { - let hit_count = note_hit_count.max(0); - - out.hit_count = hit_count; - - let half = det.hits.half_saturation; - let hit_saturation = if half > 0.0 && hit_count > 0 { - let hc = hit_count as f32; - - (hc / (hc + half)).clamp(0.0, 1.0) - } else { - 0.0 - }; - let last_hit_age_days = - note_last_hit_at.map(|ts| ((now - ts).as_seconds_f32() / 86_400.0).max(0.0)); - - out.last_hit_age_days = last_hit_age_days; - - let tau = det.hits.last_hit_tau_days; - let recency = if tau > 0.0 { - match last_hit_age_days { - Some(days) => (-days / tau).exp(), - None => 1.0, - } - } else { - 1.0 - }; - - out.hit_boost = det.hits.weight * hit_saturation * recency; - } - if det.decay.enabled && det.decay.weight > 0.0 { - let age_days = age_days.max(0.0); - let tau = det.decay.tau_days; - let staleness = if tau > 0.0 { 1.0 - (-age_days / tau).exp() } else { 0.0 }; - - out.decay_penalty = -det.decay.weight * staleness.clamp(0.0, 1.0); - } - - out -} - -pub fn match_terms_in_text( - tokens: &[String], - text: &str, - key: Option<&str>, - max_terms: usize, -) -> (Vec, Vec) { - if tokens.is_empty() { - return (Vec::new(), Vec::new()); - } - - let text = text.to_lowercase(); - let key = key.map(|value| value.to_lowercase()); - let mut matched_terms = Vec::new(); - let mut matched_fields = HashSet::new(); - - for token in tokens { - let mut matched = false; - - if text.contains(token) { - matched_fields.insert("text"); - - matched = true; - } - - if let Some(key) = key.as_ref() - && key.contains(token) - { - matched_fields.insert("key"); - - matched = true; - } - - if matched { - matched_terms.push(token.clone()); - } - if matched_terms.len() >= max_terms { - break; - } - } - - let mut fields: Vec = - matched_fields.into_iter().map(|field| field.to_string()).collect(); - - fields.sort(); - - (matched_terms, fields) -} - -pub fn merge_matched_fields(mut base: Vec, extra: Option<&Vec>) -> Vec { - if let Some(extra) = extra { - for field in extra { - base.push(field.clone()); - } - - base.sort(); - base.dedup(); - } - - base -} +mod tokenization; + +#[cfg(test)] pub(in crate::search) use self::tokenization::lexical_overlap_ratio; +pub(in crate::search) use self::{ + deterministic::compute_deterministic_ranking_terms, + embedding::build_dense_embedding_input, + matching::{match_terms_in_text, merge_matched_fields}, + scope::build_scope_context_boost_by_scope, + tokenization::tokenize_query, +}; diff --git a/packages/elf-service/src/search/ranking/text/deterministic.rs b/packages/elf-service/src/search/ranking/text/deterministic.rs new file mode 100644 index 00000000..f62bd5bd --- /dev/null +++ b/packages/elf-service/src/search/ranking/text/deterministic.rs @@ -0,0 +1,82 @@ +use time::OffsetDateTime; + +use crate::search::{DeterministicRankingTerms, ranking::text::tokenization}; +use elf_config::Config; + +pub(crate) fn compute_deterministic_ranking_terms( + cfg: &Config, + query_tokens: &[String], + snippet: &str, + note_hit_count: i64, + note_last_hit_at: Option, + age_days: f32, + now: OffsetDateTime, +) -> DeterministicRankingTerms { + let det = &cfg.ranking.deterministic; + + if !det.enabled { + return DeterministicRankingTerms::default(); + } + + let mut out = DeterministicRankingTerms::default(); + + if det.lexical.enabled && det.lexical.weight > 0.0 && !query_tokens.is_empty() { + let ratio = tokenization::lexical_overlap_ratio( + query_tokens, + snippet, + det.lexical.max_text_terms as usize, + ); + + out.lexical_overlap_ratio = ratio; + + let min_ratio = det.lexical.min_ratio.clamp(0.0, 1.0); + let scaled = if ratio >= min_ratio && min_ratio < 1.0 { + ((ratio - min_ratio) / (1.0 - min_ratio)).clamp(0.0, 1.0) + } else if ratio >= 1.0 && min_ratio >= 1.0 { + 1.0 + } else { + 0.0 + }; + + out.lexical_bonus = det.lexical.weight * scaled; + } + if det.hits.enabled && det.hits.weight > 0.0 { + let hit_count = note_hit_count.max(0); + + out.hit_count = hit_count; + + let half = det.hits.half_saturation; + let hit_saturation = if half > 0.0 && hit_count > 0 { + let hc = hit_count as f32; + + (hc / (hc + half)).clamp(0.0, 1.0) + } else { + 0.0 + }; + let last_hit_age_days = + note_last_hit_at.map(|ts| ((now - ts).as_seconds_f32() / 86_400.0).max(0.0)); + + out.last_hit_age_days = last_hit_age_days; + + let tau = det.hits.last_hit_tau_days; + let recency = if tau > 0.0 { + match last_hit_age_days { + Some(days) => (-days / tau).exp(), + None => 1.0, + } + } else { + 1.0 + }; + + out.hit_boost = det.hits.weight * hit_saturation * recency; + } + if det.decay.enabled && det.decay.weight > 0.0 { + let age_days = age_days.max(0.0); + let tau = det.decay.tau_days; + let staleness = if tau > 0.0 { 1.0 - (-age_days / tau).exp() } else { 0.0 }; + + out.decay_penalty = -det.decay.weight * staleness.clamp(0.0, 1.0); + } + + out +} diff --git a/packages/elf-service/src/search/ranking/text/embedding.rs b/packages/elf-service/src/search/ranking/text/embedding.rs new file mode 100644 index 00000000..9f9c56c5 --- /dev/null +++ b/packages/elf-service/src/search/ranking/text/embedding.rs @@ -0,0 +1,13 @@ +pub(crate) fn build_dense_embedding_input( + query: &str, + project_context_description: Option<&str>, +) -> String { + let Some(description) = project_context_description else { return query.to_string() }; + let trimmed = description.trim(); + + if trimmed.is_empty() { + return query.to_string(); + } + + format!("{query}\n\nProject context:\n{trimmed}") +} diff --git a/packages/elf-service/src/search/ranking/text/matching.rs b/packages/elf-service/src/search/ranking/text/matching.rs new file mode 100644 index 00000000..7d19cab3 --- /dev/null +++ b/packages/elf-service/src/search/ranking/text/matching.rs @@ -0,0 +1,65 @@ +use std::collections::HashSet; + +pub(crate) fn match_terms_in_text( + tokens: &[String], + text: &str, + key: Option<&str>, + max_terms: usize, +) -> (Vec, Vec) { + if tokens.is_empty() { + return (Vec::new(), Vec::new()); + } + + let text = text.to_lowercase(); + let key = key.map(|value| value.to_lowercase()); + let mut matched_terms = Vec::new(); + let mut matched_fields = HashSet::new(); + + for token in tokens { + let mut matched = false; + + if text.contains(token) { + matched_fields.insert("text"); + + matched = true; + } + + if let Some(key) = key.as_ref() + && key.contains(token) + { + matched_fields.insert("key"); + + matched = true; + } + + if matched { + matched_terms.push(token.clone()); + } + if matched_terms.len() >= max_terms { + break; + } + } + + let mut fields: Vec = + matched_fields.into_iter().map(|field| field.to_string()).collect(); + + fields.sort(); + + (matched_terms, fields) +} + +pub(crate) fn merge_matched_fields( + mut base: Vec, + extra: Option<&Vec>, +) -> Vec { + if let Some(extra) = extra { + for field in extra { + base.push(field.clone()); + } + + base.sort(); + base.dedup(); + } + + base +} diff --git a/packages/elf-service/src/search/ranking/text/scope.rs b/packages/elf-service/src/search/ranking/text/scope.rs index 1d4bdbb5..52a56130 100644 --- a/packages/elf-service/src/search/ranking/text/scope.rs +++ b/packages/elf-service/src/search/ranking/text/scope.rs @@ -3,7 +3,7 @@ use std::collections::{HashMap, HashSet}; use elf_config::Context; use elf_domain::english_gate; -pub(super) fn build_scope_context_boost_by_scope<'a>( +pub(crate) fn build_scope_context_boost_by_scope<'a>( tokens: &[String], context: Option<&'a Context>, ) -> HashMap<&'a str, f32> { diff --git a/packages/elf-service/src/search/ranking/text/tokenization.rs b/packages/elf-service/src/search/ranking/text/tokenization.rs new file mode 100644 index 00000000..3b82f2cc --- /dev/null +++ b/packages/elf-service/src/search/ranking/text/tokenization.rs @@ -0,0 +1,88 @@ +use std::collections::HashSet; + +pub(crate) fn tokenize_query(query: &str, max_terms: usize) -> Vec { + let mut normalized = String::with_capacity(query.len()); + + for ch in query.chars() { + if ch.is_ascii_alphanumeric() { + normalized.push(ch.to_ascii_lowercase()); + } else { + normalized.push(' '); + } + } + + let mut out = Vec::new(); + let mut seen = HashSet::new(); + + for token in normalized.split_whitespace() { + if token.len() < 2 { + continue; + } + if seen.insert(token) { + out.push(token.to_string()); + } + if out.len() >= max_terms { + break; + } + } + + out +} + +pub(crate) fn lexical_overlap_ratio( + query_tokens: &[String], + text: &str, + max_text_terms: usize, +) -> f32 { + if query_tokens.is_empty() { + return 0.0; + } + + let text_terms = tokenize_text_terms(text, max_text_terms); + + if text_terms.is_empty() { + return 0.0; + } + + let mut matched = 0_usize; + + for token in query_tokens { + if text_terms.contains(token.as_str()) { + matched += 1; + } + } + + matched as f32 / query_tokens.len() as f32 +} + +fn tokenize_text_terms(text: &str, max_terms: usize) -> HashSet { + if max_terms == 0 { + return HashSet::new(); + } + + let mut normalized = String::with_capacity(text.len()); + + for ch in text.chars() { + if ch.is_ascii_alphanumeric() { + normalized.push(ch.to_ascii_lowercase()); + } else { + normalized.push(' '); + } + } + + let mut out = HashSet::new(); + + for token in normalized.split_whitespace() { + if token.len() < 2 { + continue; + } + + out.insert(token.to_string()); + + if out.len() >= max_terms { + break; + } + } + + out +} From fabb2c3d87877885840ee51f6839d867b67a5ed3 Mon Sep 17 00:00:00 2001 From: Yvette Carlisle Date: Tue, 30 Jun 2026 10:31:21 -0400 Subject: [PATCH 5/6] {"schema":"decodex/commit/1","summary":"Split provenance history modules","authority":"manual"} --- .../elf-service/src/provenance/history.rs | 263 +----------------- .../src/provenance/history/builders.rs | 195 +++++++++++++ .../src/provenance/history/classify.rs | 45 +++ .../src/provenance/history/summaries.rs | 30 ++ 4 files changed, 276 insertions(+), 257 deletions(-) create mode 100644 packages/elf-service/src/provenance/history/builders.rs create mode 100644 packages/elf-service/src/provenance/history/classify.rs create mode 100644 packages/elf-service/src/provenance/history/summaries.rs diff --git a/packages/elf-service/src/provenance/history.rs b/packages/elf-service/src/provenance/history.rs index 71b2755f..fbfb7f62 100644 --- a/packages/elf-service/src/provenance/history.rs +++ b/packages/elf-service/src/provenance/history.rs @@ -1,259 +1,8 @@ -use serde_json; -use time::OffsetDateTime; -use uuid::Uuid; +mod builders; +mod classify; +mod summaries; -use crate::provenance::types::{ - MemoryHistoryEvent, NoteProvenanceIngestDecision, NoteProvenanceNoteVersion, - rows::{NoteDerivedProposalRow, NoteProposalReviewRow}, +pub(super) use self::builders::{ + decision_history_event, derived_proposal_history_event, expire_history_event, + proposal_review_history_event, should_emit_decision_event, version_history_event, }; -use elf_storage::models::MemoryNote; - -pub(super) fn version_history_event( - version: &NoteProvenanceNoteVersion, - decision: Option<&&NoteProvenanceIngestDecision>, -) -> MemoryHistoryEvent { - let event_type = version_event_type(version.op.as_str(), version.reason.as_str()); - let related_decision_id = decision.map(|decision| decision.decision_id); - let details = serde_json::json!({ - "reason": version.reason, - "prev_snapshot": version.prev_snapshot, - "new_snapshot": version.new_snapshot, - "ingest_decision": decision.map(|decision| serde_json::json!({ - "decision_id": decision.decision_id, - "pipeline": decision.pipeline, - "base_decision": decision.base_decision, - "policy_decision": decision.policy_decision, - "note_op": decision.note_op, - "reason_code": decision.reason_code, - })), - }); - - MemoryHistoryEvent { - event_id: format!("memory_note_versions:{}", version.version_id), - event_type: event_type.to_string(), - subject_type: "note".to_string(), - note_id: version.note_id, - source_table: "memory_note_versions".to_string(), - source_id: Some(version.version_id), - related_note_version_id: Some(version.version_id), - related_decision_id, - related_proposal_id: None, - actor: Some(version.actor.clone()), - op: Some(version.op.clone()), - reason_code: None, - summary: version_summary(event_type, version.reason.as_str()), - details, - ts: version.ts, - } -} - -pub(super) fn decision_history_event( - note_id: Uuid, - decision: &NoteProvenanceIngestDecision, -) -> MemoryHistoryEvent { - let event_type = decision_event_type(decision); - let details = serde_json::json!({ - "pipeline": decision.pipeline, - "note_type": decision.note_type, - "note_key": decision.note_key, - "base_decision": decision.base_decision, - "policy_decision": decision.policy_decision, - "note_op": decision.note_op, - "details": decision.details, - }); - - MemoryHistoryEvent { - event_id: format!("memory_ingest_decisions:{}", decision.decision_id), - event_type: event_type.to_string(), - subject_type: "note".to_string(), - note_id, - source_table: "memory_ingest_decisions".to_string(), - source_id: Some(decision.decision_id), - related_note_version_id: decision.note_version_id, - related_decision_id: Some(decision.decision_id), - related_proposal_id: None, - actor: Some(decision.agent_id.clone()), - op: Some(decision.note_op.clone()), - reason_code: decision.reason_code.clone(), - summary: decision_summary(event_type, decision), - details, - ts: decision.ts, - } -} - -pub(super) fn expire_history_event( - note: &MemoryNote, - expires_at: OffsetDateTime, -) -> MemoryHistoryEvent { - MemoryHistoryEvent { - event_id: format!("memory_notes:{}:expire:{expires_at}", note.note_id), - event_type: "expire".to_string(), - subject_type: "note".to_string(), - note_id: note.note_id, - source_table: "memory_notes".to_string(), - source_id: Some(note.note_id), - related_note_version_id: None, - related_decision_id: None, - related_proposal_id: None, - actor: Some(note.agent_id.clone()), - op: Some("EXPIRE".to_string()), - reason_code: None, - summary: "Note reached its persisted expires_at timestamp.".to_string(), - details: serde_json::json!({ - "status": note.status, - "expires_at": expires_at, - }), - ts: expires_at, - } -} - -pub(super) fn derived_proposal_history_event( - note_id: Uuid, - proposal: NoteDerivedProposalRow, -) -> MemoryHistoryEvent { - MemoryHistoryEvent { - event_id: format!("consolidation_proposals:{}", proposal.proposal_id), - event_type: "derived".to_string(), - subject_type: "note".to_string(), - note_id, - source_table: "consolidation_proposals".to_string(), - source_id: Some(proposal.proposal_id), - related_note_version_id: None, - related_decision_id: None, - related_proposal_id: Some(proposal.proposal_id), - actor: Some(proposal.agent_id), - op: Some(proposal.apply_intent.clone()), - reason_code: None, - summary: format!( - "Derived proposal '{}' was created with review_state '{}'.", - proposal.proposal_kind, proposal.review_state - ), - details: serde_json::json!({ - "run_id": proposal.run_id, - "proposal_kind": proposal.proposal_kind, - "apply_intent": proposal.apply_intent, - "review_state": proposal.review_state, - "source_refs": proposal.source_refs, - "source_snapshot": proposal.source_snapshot, - "lineage": proposal.lineage, - "diff": proposal.diff, - "confidence": proposal.confidence, - "target_ref": proposal.target_ref, - "proposed_payload": proposal.proposed_payload, - }), - ts: proposal.created_at, - } -} - -pub(super) fn proposal_review_history_event( - note_id: Uuid, - review: NoteProposalReviewRow, -) -> MemoryHistoryEvent { - let event_type = proposal_review_event_type(review.action.as_str()); - - MemoryHistoryEvent { - event_id: format!("consolidation_proposal_reviews:{}", review.review_id), - event_type: event_type.to_string(), - subject_type: "note".to_string(), - note_id, - source_table: "consolidation_proposal_reviews".to_string(), - source_id: Some(review.review_id), - related_note_version_id: None, - related_decision_id: None, - related_proposal_id: Some(review.proposal_id), - actor: Some(review.reviewer_agent_id), - op: Some(review.action.clone()), - reason_code: None, - summary: format!( - "Proposal review action '{}' moved '{}' from '{}' to '{}'.", - review.action, review.proposal_kind, review.from_review_state, review.to_review_state - ), - details: serde_json::json!({ - "proposal_id": review.proposal_id, - "run_id": review.run_id, - "proposal_kind": review.proposal_kind, - "apply_intent": review.apply_intent, - "from_review_state": review.from_review_state, - "to_review_state": review.to_review_state, - "review_comment": review.review_comment, - "diff": review.diff, - }), - ts: review.created_at, - } -} - -pub(super) fn should_emit_decision_event(decision: &NoteProvenanceIngestDecision) -> bool { - if matches!(decision.note_op.as_str(), "NONE" | "REJECTED") { - return true; - } - - decision.note_version_id.is_none() -} - -fn version_event_type(op: &str, reason: &str) -> &'static str { - let reason = reason.to_ascii_lowercase(); - - match op { - "ADD" => "add", - "UPDATE" => "update", - "DELETE" if reason.contains("expire") => "expire", - "DELETE" => "delete", - "PUBLISH" | "UNPUBLISH" => "related", - "DEPRECATE" => "superseded", - "RESTORE" => "restored", - "INVALIDATE" => "invalidated", - _ => "related", - } -} - -fn decision_event_type(decision: &NoteProvenanceIngestDecision) -> &'static str { - if decision.policy_decision == "reject" || decision.note_op == "REJECTED" { - return "reject"; - } - if decision.policy_decision == "ignore" || decision.note_op == "NONE" { - return "ignore"; - } - - match decision.note_op.as_str() { - "ADD" => "add", - "UPDATE" => "update", - "DELETE" => "delete", - _ => "related", - } -} - -fn proposal_review_event_type(action: &str) -> &'static str { - match action { - "apply" => "applied", - "discard" => "reject", - "defer" => "defer", - "approve" => "related", - _ => "related", - } -} - -fn version_summary(event_type: &str, reason: &str) -> String { - match event_type { - "add" => format!("Note was added by {reason}."), - "update" => format!("Note was updated by {reason}."), - "delete" => format!("Note was deleted by {reason}."), - "expire" => format!("Note expired through {reason}."), - "superseded" => format!("Note was superseded by {reason}."), - "restored" => format!("Note was restored by {reason}."), - "invalidated" => format!("Note was invalidated by {reason}."), - _ => format!("Note recorded related transition {reason}."), - } -} - -fn decision_summary(event_type: &str, decision: &NoteProvenanceIngestDecision) -> String { - let reason = decision.reason_code.as_deref().unwrap_or("no_reason_code"); - - match event_type { - "ignore" => format!("Ingestion ignored candidate memory with {reason}."), - "reject" => format!("Ingestion rejected candidate memory with {reason}."), - _ => format!( - "Ingestion recorded {} decision for operation {}.", - decision.policy_decision, decision.note_op - ), - } -} diff --git a/packages/elf-service/src/provenance/history/builders.rs b/packages/elf-service/src/provenance/history/builders.rs new file mode 100644 index 00000000..51590a35 --- /dev/null +++ b/packages/elf-service/src/provenance/history/builders.rs @@ -0,0 +1,195 @@ +use time::OffsetDateTime; +use uuid::Uuid; + +use crate::provenance::{ + history::{classify, summaries}, + types::{ + MemoryHistoryEvent, NoteProvenanceIngestDecision, NoteProvenanceNoteVersion, + rows::{NoteDerivedProposalRow, NoteProposalReviewRow}, + }, +}; +use elf_storage::models::MemoryNote; + +pub(in crate::provenance) fn version_history_event( + version: &NoteProvenanceNoteVersion, + decision: Option<&&NoteProvenanceIngestDecision>, +) -> MemoryHistoryEvent { + let event_type = classify::version_event_type(version.op.as_str(), version.reason.as_str()); + let related_decision_id = decision.map(|decision| decision.decision_id); + let details = serde_json::json!({ + "reason": version.reason, + "prev_snapshot": version.prev_snapshot, + "new_snapshot": version.new_snapshot, + "ingest_decision": decision.map(|decision| serde_json::json!({ + "decision_id": decision.decision_id, + "pipeline": decision.pipeline, + "base_decision": decision.base_decision, + "policy_decision": decision.policy_decision, + "note_op": decision.note_op, + "reason_code": decision.reason_code, + })), + }); + + MemoryHistoryEvent { + event_id: format!("memory_note_versions:{}", version.version_id), + event_type: event_type.to_string(), + subject_type: "note".to_string(), + note_id: version.note_id, + source_table: "memory_note_versions".to_string(), + source_id: Some(version.version_id), + related_note_version_id: Some(version.version_id), + related_decision_id, + related_proposal_id: None, + actor: Some(version.actor.clone()), + op: Some(version.op.clone()), + reason_code: None, + summary: summaries::version_summary(event_type, version.reason.as_str()), + details, + ts: version.ts, + } +} + +pub(in crate::provenance) fn decision_history_event( + note_id: Uuid, + decision: &NoteProvenanceIngestDecision, +) -> MemoryHistoryEvent { + let event_type = classify::decision_event_type(decision); + let details = serde_json::json!({ + "pipeline": decision.pipeline, + "note_type": decision.note_type, + "note_key": decision.note_key, + "base_decision": decision.base_decision, + "policy_decision": decision.policy_decision, + "note_op": decision.note_op, + "details": decision.details, + }); + + MemoryHistoryEvent { + event_id: format!("memory_ingest_decisions:{}", decision.decision_id), + event_type: event_type.to_string(), + subject_type: "note".to_string(), + note_id, + source_table: "memory_ingest_decisions".to_string(), + source_id: Some(decision.decision_id), + related_note_version_id: decision.note_version_id, + related_decision_id: Some(decision.decision_id), + related_proposal_id: None, + actor: Some(decision.agent_id.clone()), + op: Some(decision.note_op.clone()), + reason_code: decision.reason_code.clone(), + summary: summaries::decision_summary(event_type, decision), + details, + ts: decision.ts, + } +} + +pub(in crate::provenance) fn expire_history_event( + note: &MemoryNote, + expires_at: OffsetDateTime, +) -> MemoryHistoryEvent { + MemoryHistoryEvent { + event_id: format!("memory_notes:{}:expire:{expires_at}", note.note_id), + event_type: "expire".to_string(), + subject_type: "note".to_string(), + note_id: note.note_id, + source_table: "memory_notes".to_string(), + source_id: Some(note.note_id), + related_note_version_id: None, + related_decision_id: None, + related_proposal_id: None, + actor: Some(note.agent_id.clone()), + op: Some("EXPIRE".to_string()), + reason_code: None, + summary: "Note reached its persisted expires_at timestamp.".to_string(), + details: serde_json::json!({ + "status": note.status, + "expires_at": expires_at, + }), + ts: expires_at, + } +} + +pub(in crate::provenance) fn derived_proposal_history_event( + note_id: Uuid, + proposal: NoteDerivedProposalRow, +) -> MemoryHistoryEvent { + MemoryHistoryEvent { + event_id: format!("consolidation_proposals:{}", proposal.proposal_id), + event_type: "derived".to_string(), + subject_type: "note".to_string(), + note_id, + source_table: "consolidation_proposals".to_string(), + source_id: Some(proposal.proposal_id), + related_note_version_id: None, + related_decision_id: None, + related_proposal_id: Some(proposal.proposal_id), + actor: Some(proposal.agent_id), + op: Some(proposal.apply_intent.clone()), + reason_code: None, + summary: format!( + "Derived proposal '{}' was created with review_state '{}'.", + proposal.proposal_kind, proposal.review_state + ), + details: serde_json::json!({ + "run_id": proposal.run_id, + "proposal_kind": proposal.proposal_kind, + "apply_intent": proposal.apply_intent, + "review_state": proposal.review_state, + "source_refs": proposal.source_refs, + "source_snapshot": proposal.source_snapshot, + "lineage": proposal.lineage, + "diff": proposal.diff, + "confidence": proposal.confidence, + "target_ref": proposal.target_ref, + "proposed_payload": proposal.proposed_payload, + }), + ts: proposal.created_at, + } +} + +pub(in crate::provenance) fn proposal_review_history_event( + note_id: Uuid, + review: NoteProposalReviewRow, +) -> MemoryHistoryEvent { + let event_type = classify::proposal_review_event_type(review.action.as_str()); + + MemoryHistoryEvent { + event_id: format!("consolidation_proposal_reviews:{}", review.review_id), + event_type: event_type.to_string(), + subject_type: "note".to_string(), + note_id, + source_table: "consolidation_proposal_reviews".to_string(), + source_id: Some(review.review_id), + related_note_version_id: None, + related_decision_id: None, + related_proposal_id: Some(review.proposal_id), + actor: Some(review.reviewer_agent_id), + op: Some(review.action.clone()), + reason_code: None, + summary: format!( + "Proposal review action '{}' moved '{}' from '{}' to '{}'.", + review.action, review.proposal_kind, review.from_review_state, review.to_review_state + ), + details: serde_json::json!({ + "proposal_id": review.proposal_id, + "run_id": review.run_id, + "proposal_kind": review.proposal_kind, + "apply_intent": review.apply_intent, + "from_review_state": review.from_review_state, + "to_review_state": review.to_review_state, + "review_comment": review.review_comment, + "diff": review.diff, + }), + ts: review.created_at, + } +} + +pub(in crate::provenance) fn should_emit_decision_event( + decision: &NoteProvenanceIngestDecision, +) -> bool { + if matches!(decision.note_op.as_str(), "NONE" | "REJECTED") { + return true; + } + + decision.note_version_id.is_none() +} diff --git a/packages/elf-service/src/provenance/history/classify.rs b/packages/elf-service/src/provenance/history/classify.rs new file mode 100644 index 00000000..19ab78d3 --- /dev/null +++ b/packages/elf-service/src/provenance/history/classify.rs @@ -0,0 +1,45 @@ +use crate::provenance::types::NoteProvenanceIngestDecision; + +pub(in crate::provenance::history) fn version_event_type(op: &str, reason: &str) -> &'static str { + let reason = reason.to_ascii_lowercase(); + + match op { + "ADD" => "add", + "UPDATE" => "update", + "DELETE" if reason.contains("expire") => "expire", + "DELETE" => "delete", + "PUBLISH" | "UNPUBLISH" => "related", + "DEPRECATE" => "superseded", + "RESTORE" => "restored", + "INVALIDATE" => "invalidated", + _ => "related", + } +} + +pub(in crate::provenance::history) fn decision_event_type( + decision: &NoteProvenanceIngestDecision, +) -> &'static str { + if decision.policy_decision == "reject" || decision.note_op == "REJECTED" { + return "reject"; + } + if decision.policy_decision == "ignore" || decision.note_op == "NONE" { + return "ignore"; + } + + match decision.note_op.as_str() { + "ADD" => "add", + "UPDATE" => "update", + "DELETE" => "delete", + _ => "related", + } +} + +pub(in crate::provenance::history) fn proposal_review_event_type(action: &str) -> &'static str { + match action { + "apply" => "applied", + "discard" => "reject", + "defer" => "defer", + "approve" => "related", + _ => "related", + } +} diff --git a/packages/elf-service/src/provenance/history/summaries.rs b/packages/elf-service/src/provenance/history/summaries.rs new file mode 100644 index 00000000..ea7d48ae --- /dev/null +++ b/packages/elf-service/src/provenance/history/summaries.rs @@ -0,0 +1,30 @@ +use crate::provenance::types::NoteProvenanceIngestDecision; + +pub(in crate::provenance::history) fn version_summary(event_type: &str, reason: &str) -> String { + match event_type { + "add" => format!("Note was added by {reason}."), + "update" => format!("Note was updated by {reason}."), + "delete" => format!("Note was deleted by {reason}."), + "expire" => format!("Note expired through {reason}."), + "superseded" => format!("Note was superseded by {reason}."), + "restored" => format!("Note was restored by {reason}."), + "invalidated" => format!("Note was invalidated by {reason}."), + _ => format!("Note recorded related transition {reason}."), + } +} + +pub(in crate::provenance::history) fn decision_summary( + event_type: &str, + decision: &NoteProvenanceIngestDecision, +) -> String { + let reason = decision.reason_code.as_deref().unwrap_or("no_reason_code"); + + match event_type { + "ignore" => format!("Ingestion ignored candidate memory with {reason}."), + "reject" => format!("Ingestion rejected candidate memory with {reason}."), + _ => format!( + "Ingestion recorded {} decision for operation {}.", + decision.policy_decision, decision.note_op + ), + } +} From 1fa49a8ee71fa98069032bc7015e178d166f45cd Mon Sep 17 00:00:00 2001 From: Yvette Carlisle Date: Tue, 30 Jun 2026 10:34:44 -0400 Subject: [PATCH 6/6] {"schema":"decodex/commit/1","summary":"Split knowledge write modules","authority":"manual"} --- packages/elf-storage/src/knowledge/writes.rs | 261 +----------------- .../src/knowledge/writes/children.rs | 42 +++ .../src/knowledge/writes/inserts.rs | 133 +++++++++ .../src/knowledge/writes/upsert.rs | 82 ++++++ 4 files changed, 267 insertions(+), 251 deletions(-) create mode 100644 packages/elf-storage/src/knowledge/writes/children.rs create mode 100644 packages/elf-storage/src/knowledge/writes/inserts.rs create mode 100644 packages/elf-storage/src/knowledge/writes/upsert.rs diff --git a/packages/elf-storage/src/knowledge/writes.rs b/packages/elf-storage/src/knowledge/writes.rs index 80f62d03..2f352073 100644 --- a/packages/elf-storage/src/knowledge/writes.rs +++ b/packages/elf-storage/src/knowledge/writes.rs @@ -1,253 +1,12 @@ -use sqlx::PgExecutor; -use uuid::Uuid; - -use crate::{ - Result, - knowledge::types::{ - KnowledgePageLintFindingInsert, KnowledgePageSectionInsert, KnowledgePageSourceRefInsert, - KnowledgePageUpsert, +mod children; +mod inserts; +mod upsert; + +pub use self::{ + children::{delete_knowledge_page_children, delete_knowledge_page_lint_findings}, + inserts::{ + insert_knowledge_page_lint_finding, insert_knowledge_page_section, + insert_knowledge_page_source_ref, }, - models::KnowledgePage, + upsert::upsert_knowledge_page, }; - -/// Upserts one derived knowledge page and returns the persisted row. -pub async fn upsert_knowledge_page<'e, E>( - executor: E, - args: KnowledgePageUpsert<'_>, -) -> Result -where - E: PgExecutor<'e>, -{ - let row = sqlx::query_as::<_, KnowledgePage>( - "\ -INSERT INTO knowledge_pages ( - page_id, - tenant_id, - project_id, - page_kind, - page_key, - title, - contract_schema, - status, - rebuild_source_hash, - content_hash, - source_coverage, - source_snapshot, - rebuild_metadata, - created_at, - updated_at, - rebuilt_at -) -VALUES ($1,$2,$3,$4,$5,$6,$7,$8,$9,$10,$11,$12,$13,$14,$14,$14) -ON CONFLICT (tenant_id, project_id, page_kind, page_key) DO UPDATE -SET - title = EXCLUDED.title, - contract_schema = EXCLUDED.contract_schema, - status = EXCLUDED.status, - rebuild_source_hash = EXCLUDED.rebuild_source_hash, - content_hash = EXCLUDED.content_hash, - source_coverage = EXCLUDED.source_coverage, - source_snapshot = EXCLUDED.source_snapshot, - rebuild_metadata = EXCLUDED.rebuild_metadata, - updated_at = EXCLUDED.updated_at, - rebuilt_at = EXCLUDED.rebuilt_at -RETURNING - page_id, - tenant_id, - project_id, - page_kind, - page_key, - title, - contract_schema, - status, - rebuild_source_hash, - content_hash, - source_coverage, - source_snapshot, - rebuild_metadata, - created_at, - updated_at, - rebuilt_at", - ) - .bind(args.page_id) - .bind(args.tenant_id) - .bind(args.project_id) - .bind(args.page_kind) - .bind(args.page_key) - .bind(args.title) - .bind(args.contract_schema) - .bind(args.status) - .bind(args.rebuild_source_hash) - .bind(args.content_hash) - .bind(args.source_coverage) - .bind(args.source_snapshot) - .bind(args.rebuild_metadata) - .bind(args.now) - .fetch_one(executor) - .await?; - - Ok(row) -} - -/// Deletes all section, citation, and lint child rows for a page before rebuild. -pub async fn delete_knowledge_page_children<'e, E>(executor: E, page_id: Uuid) -> Result<()> -where - E: PgExecutor<'e>, -{ - sqlx::query( - "\ - WITH deleted_lint AS ( - DELETE FROM knowledge_page_lint_findings - WHERE page_id = $1 - ), - deleted_source_refs AS ( - DELETE FROM knowledge_page_source_refs - WHERE page_id = $1 - ) - DELETE FROM knowledge_page_sections - WHERE page_id = $1", - ) - .bind(page_id) - .execute(executor) - .await?; - - Ok(()) -} - -/// Inserts one derived knowledge page section. -pub async fn insert_knowledge_page_section<'e, E>( - executor: E, - args: KnowledgePageSectionInsert<'_>, -) -> Result<()> -where - E: PgExecutor<'e>, -{ - sqlx::query( - "\ -INSERT INTO knowledge_page_sections ( - section_id, - page_id, - section_key, - heading, - role, - content, - ordinal, - citations, - unsupported_reason, - content_hash, - created_at, - updated_at -) -VALUES ($1,$2,$3,$4,$5,$6,$7,$8,$9,$10,$11,$11)", - ) - .bind(args.section_id) - .bind(args.page_id) - .bind(args.section_key) - .bind(args.heading) - .bind(args.role) - .bind(args.content) - .bind(args.ordinal) - .bind(args.citations) - .bind(args.unsupported_reason) - .bind(args.content_hash) - .bind(args.now) - .execute(executor) - .await?; - - Ok(()) -} - -/// Inserts one normalized knowledge page citation/source reference. -pub async fn insert_knowledge_page_source_ref<'e, E>( - executor: E, - args: KnowledgePageSourceRefInsert<'_>, -) -> Result<()> -where - E: PgExecutor<'e>, -{ - sqlx::query( - "\ -INSERT INTO knowledge_page_source_refs ( - ref_id, - page_id, - section_id, - source_kind, - source_id, - source_status, - source_updated_at, - source_content_hash, - source_snapshot, - citation_metadata, - created_at -) -VALUES ($1,$2,$3,$4,$5,$6,$7,$8,$9,$10,$11)", - ) - .bind(args.ref_id) - .bind(args.page_id) - .bind(args.section_id) - .bind(args.source_kind) - .bind(args.source_id) - .bind(args.source_status) - .bind(args.source_updated_at) - .bind(args.source_content_hash) - .bind(args.source_snapshot) - .bind(args.citation_metadata) - .bind(args.now) - .execute(executor) - .await?; - - Ok(()) -} - -/// Inserts one knowledge page lint finding. -pub async fn insert_knowledge_page_lint_finding<'e, E>( - executor: E, - args: KnowledgePageLintFindingInsert<'_>, -) -> Result<()> -where - E: PgExecutor<'e>, -{ - sqlx::query( - "\ -INSERT INTO knowledge_page_lint_findings ( - finding_id, - page_id, - section_id, - finding_type, - severity, - source_kind, - source_id, - message, - details, - created_at -) -VALUES ($1,$2,$3,$4,$5,$6,$7,$8,$9,$10)", - ) - .bind(args.finding_id) - .bind(args.page_id) - .bind(args.section_id) - .bind(args.finding_type) - .bind(args.severity) - .bind(args.source_kind) - .bind(args.source_id) - .bind(args.message) - .bind(args.details) - .bind(args.now) - .execute(executor) - .await?; - - Ok(()) -} - -/// Deletes persisted lint findings for one page. -pub async fn delete_knowledge_page_lint_findings<'e, E>(executor: E, page_id: Uuid) -> Result<()> -where - E: PgExecutor<'e>, -{ - sqlx::query("DELETE FROM knowledge_page_lint_findings WHERE page_id = $1") - .bind(page_id) - .execute(executor) - .await?; - - Ok(()) -} diff --git a/packages/elf-storage/src/knowledge/writes/children.rs b/packages/elf-storage/src/knowledge/writes/children.rs new file mode 100644 index 00000000..92865a5b --- /dev/null +++ b/packages/elf-storage/src/knowledge/writes/children.rs @@ -0,0 +1,42 @@ +use sqlx::PgExecutor; +use uuid::Uuid; + +use crate::Result; + +/// Deletes all section, citation, and lint child rows for a page before rebuild. +pub async fn delete_knowledge_page_children<'e, E>(executor: E, page_id: Uuid) -> Result<()> +where + E: PgExecutor<'e>, +{ + sqlx::query( + "\ + WITH deleted_lint AS ( + DELETE FROM knowledge_page_lint_findings + WHERE page_id = $1 + ), + deleted_source_refs AS ( + DELETE FROM knowledge_page_source_refs + WHERE page_id = $1 + ) + DELETE FROM knowledge_page_sections + WHERE page_id = $1", + ) + .bind(page_id) + .execute(executor) + .await?; + + Ok(()) +} + +/// Deletes persisted lint findings for one page. +pub async fn delete_knowledge_page_lint_findings<'e, E>(executor: E, page_id: Uuid) -> Result<()> +where + E: PgExecutor<'e>, +{ + sqlx::query("DELETE FROM knowledge_page_lint_findings WHERE page_id = $1") + .bind(page_id) + .execute(executor) + .await?; + + Ok(()) +} diff --git a/packages/elf-storage/src/knowledge/writes/inserts.rs b/packages/elf-storage/src/knowledge/writes/inserts.rs new file mode 100644 index 00000000..a10de403 --- /dev/null +++ b/packages/elf-storage/src/knowledge/writes/inserts.rs @@ -0,0 +1,133 @@ +use sqlx::PgExecutor; + +use crate::{ + Result, + knowledge::types::{ + KnowledgePageLintFindingInsert, KnowledgePageSectionInsert, KnowledgePageSourceRefInsert, + }, +}; + +/// Inserts one derived knowledge page section. +pub async fn insert_knowledge_page_section<'e, E>( + executor: E, + args: KnowledgePageSectionInsert<'_>, +) -> Result<()> +where + E: PgExecutor<'e>, +{ + sqlx::query( + "\ +INSERT INTO knowledge_page_sections ( + section_id, + page_id, + section_key, + heading, + role, + content, + ordinal, + citations, + unsupported_reason, + content_hash, + created_at, + updated_at +) +VALUES ($1,$2,$3,$4,$5,$6,$7,$8,$9,$10,$11,$11)", + ) + .bind(args.section_id) + .bind(args.page_id) + .bind(args.section_key) + .bind(args.heading) + .bind(args.role) + .bind(args.content) + .bind(args.ordinal) + .bind(args.citations) + .bind(args.unsupported_reason) + .bind(args.content_hash) + .bind(args.now) + .execute(executor) + .await?; + + Ok(()) +} + +/// Inserts one normalized knowledge page citation/source reference. +pub async fn insert_knowledge_page_source_ref<'e, E>( + executor: E, + args: KnowledgePageSourceRefInsert<'_>, +) -> Result<()> +where + E: PgExecutor<'e>, +{ + sqlx::query( + "\ +INSERT INTO knowledge_page_source_refs ( + ref_id, + page_id, + section_id, + source_kind, + source_id, + source_status, + source_updated_at, + source_content_hash, + source_snapshot, + citation_metadata, + created_at +) +VALUES ($1,$2,$3,$4,$5,$6,$7,$8,$9,$10,$11)", + ) + .bind(args.ref_id) + .bind(args.page_id) + .bind(args.section_id) + .bind(args.source_kind) + .bind(args.source_id) + .bind(args.source_status) + .bind(args.source_updated_at) + .bind(args.source_content_hash) + .bind(args.source_snapshot) + .bind(args.citation_metadata) + .bind(args.now) + .execute(executor) + .await?; + + Ok(()) +} + +/// Inserts one knowledge page lint finding. +pub async fn insert_knowledge_page_lint_finding<'e, E>( + executor: E, + args: KnowledgePageLintFindingInsert<'_>, +) -> Result<()> +where + E: PgExecutor<'e>, +{ + sqlx::query( + "\ +INSERT INTO knowledge_page_lint_findings ( + finding_id, + page_id, + section_id, + finding_type, + severity, + source_kind, + source_id, + message, + details, + created_at +) +VALUES ($1,$2,$3,$4,$5,$6,$7,$8,$9,$10)", + ) + .bind(args.finding_id) + .bind(args.page_id) + .bind(args.section_id) + .bind(args.finding_type) + .bind(args.severity) + .bind(args.source_kind) + .bind(args.source_id) + .bind(args.message) + .bind(args.details) + .bind(args.now) + .execute(executor) + .await?; + + Ok(()) +} diff --git a/packages/elf-storage/src/knowledge/writes/upsert.rs b/packages/elf-storage/src/knowledge/writes/upsert.rs new file mode 100644 index 00000000..75a6420f --- /dev/null +++ b/packages/elf-storage/src/knowledge/writes/upsert.rs @@ -0,0 +1,82 @@ +use sqlx::PgExecutor; + +use crate::{Result, knowledge::types::KnowledgePageUpsert, models::KnowledgePage}; + +/// Upserts one derived knowledge page and returns the persisted row. +pub async fn upsert_knowledge_page<'e, E>( + executor: E, + args: KnowledgePageUpsert<'_>, +) -> Result +where + E: PgExecutor<'e>, +{ + let row = sqlx::query_as::<_, KnowledgePage>( + "\ +INSERT INTO knowledge_pages ( + page_id, + tenant_id, + project_id, + page_kind, + page_key, + title, + contract_schema, + status, + rebuild_source_hash, + content_hash, + source_coverage, + source_snapshot, + rebuild_metadata, + created_at, + updated_at, + rebuilt_at +) +VALUES ($1,$2,$3,$4,$5,$6,$7,$8,$9,$10,$11,$12,$13,$14,$14,$14) +ON CONFLICT (tenant_id, project_id, page_kind, page_key) DO UPDATE +SET + title = EXCLUDED.title, + contract_schema = EXCLUDED.contract_schema, + status = EXCLUDED.status, + rebuild_source_hash = EXCLUDED.rebuild_source_hash, + content_hash = EXCLUDED.content_hash, + source_coverage = EXCLUDED.source_coverage, + source_snapshot = EXCLUDED.source_snapshot, + rebuild_metadata = EXCLUDED.rebuild_metadata, + updated_at = EXCLUDED.updated_at, + rebuilt_at = EXCLUDED.rebuilt_at +RETURNING + page_id, + tenant_id, + project_id, + page_kind, + page_key, + title, + contract_schema, + status, + rebuild_source_hash, + content_hash, + source_coverage, + source_snapshot, + rebuild_metadata, + created_at, + updated_at, + rebuilt_at", + ) + .bind(args.page_id) + .bind(args.tenant_id) + .bind(args.project_id) + .bind(args.page_kind) + .bind(args.page_key) + .bind(args.title) + .bind(args.contract_schema) + .bind(args.status) + .bind(args.rebuild_source_hash) + .bind(args.content_hash) + .bind(args.source_coverage) + .bind(args.source_snapshot) + .bind(args.rebuild_metadata) + .bind(args.now) + .fetch_one(executor) + .await?; + + Ok(row) +}