diff --git a/AGENTS.md b/AGENTS.md index 085cee3..a200d91 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -114,6 +114,30 @@ git commit -m "feat: implement prefixEncoding and itemEncoding for OpenAPI 3.2 3. **Searchability**: Easier to search and filter commits 4. **Tool Compatibility**: Works better with automated tools and scripts +## Multi-Module Dependency Management + +This repository uses Go workspaces (`go.work`) with multiple modules. The `cmd/openapi` module depends on the root `github.com/speakeasy-api/openapi` module. + +### How Local Development Works + +The `go.work` file lists all modules, so during local development the workspace resolves cross-module imports automatically. You do **not** need a `replace` directive in `cmd/openapi/go.mod`. + +### When Adding New Packages to the Root Module + +If you add new packages to the root module (e.g., `oq/`, `graph/`) that `cmd/openapi` imports, the published module version won't contain them yet. The workspace handles this locally, but `cmd/openapi/go.mod` must reference a version that includes the new packages for CI to pass `mod-check`. + +**Do NOT use `replace` directives.** Instead: + +1. Push your branch with the new root module packages. +2. From the repo root, update `cmd/openapi` to reference your branch commit: + ```bash + GOWORK=off go get -C cmd/openapi github.com/speakeasy-api/openapi@ + GOWORK=off go mod tidy -C cmd/openapi + ``` +3. Verify with `mise run mod-check`. + +This gives `cmd/openapi/go.mod` a pseudo-version (e.g., `v1.19.6-0.20260312183335-395c19cd8edd`) that resolves correctly both locally and in CI. Each subsequent push that changes the root module requires repeating step 2 with the new commit SHA. + ## Linter Rules This project uses `golangci-lint` with strict rules. Run `mise lint` to check. The most common violations are listed below. **When you encounter a new common lint pattern not documented here, add it to this section so future sessions avoid the same mistakes.** diff --git a/cmd/openapi/commands/openapi/query.go b/cmd/openapi/commands/openapi/query.go new file mode 100644 index 0000000..166bfa3 --- /dev/null +++ b/cmd/openapi/commands/openapi/query.go @@ -0,0 +1,182 @@ +package openapi + +import ( + "context" + "errors" + "fmt" + "os" + + "github.com/speakeasy-api/openapi/graph" + "github.com/speakeasy-api/openapi/openapi" + "github.com/speakeasy-api/openapi/oq" + "github.com/speakeasy-api/openapi/references" + "github.com/spf13/cobra" +) + +var queryCmd = &cobra.Command{ + Use: "query [input-file]", + Short: "Query an OpenAPI specification using the oq pipeline language", + Long: `Query an OpenAPI specification using the oq pipeline language to answer +structural and semantic questions about schemas and operations. + +The query argument comes first, followed by an optional input file. If no file +is given, reads from stdin. + +Examples: + # Deeply nested components (jq-style syntax) + openapi spec query 'schemas.components | sort_by(depth; desc) | first(10) | pick name, depth' petstore.yaml + + # Pipe from stdin + cat spec.yaml | openapi spec query 'schemas | count' + + # Explicit stdin + openapi spec query 'schemas | count' - + + # Filter with select() + openapi spec query 'schemas | select(union_width > 0) | sort_by(union_width; desc) | first(10)' petstore.yaml + + # Dead components (no incoming references) + openapi spec query 'schemas.components | select(in_degree == 0) | pick name' petstore.yaml + + # Variable binding — exclude seed from reachable results + openapi spec query 'schemas | select(name == "Pet") | let $pet = name | reachable | select(name != $pet)' petstore.yaml + + # User-defined functions + openapi spec query 'def hot: select(in_degree > 5); schemas.components | hot | pick name' petstore.yaml + + # Alternative operator — fallback for null/falsy values + openapi spec query 'schemas | select(name // "none" != "none")' petstore.yaml + + # If-then-else conditional + openapi spec query 'schemas | select(if is_component then depth > 3 else true end)' petstore.yaml + + # Blast radius + openapi spec query 'schemas.components | select(name == "Error") | blast-radius | length' petstore.yaml + + # Explain a query plan + openapi spec query 'schemas.components | select(depth > 5) | sort_by(depth; desc) | explain' petstore.yaml + +Pipeline stages (jq-style): + Source: schemas, schemas.components, schemas.inline, operations + Traversal: refs-out, refs-in, reachable, ancestors, properties, union-members, items, + ops, schemas, path(A; B), connected, blast-radius, neighbors(N) + Analysis: orphans, leaves, cycles, clusters, tag-boundary, shared-refs + Filter: select(expr), pick , sort_by(field; desc), first(N), last(N), + sample(N), top(N; field), bottom(N; field), unique, group_by(field), length + Variables: let $var = expr + Functions: def name: body; def name($p): body; include "file.oq"; + Meta: explain, fields, format(table|json|markdown|toon) + + Legacy syntax (where, sort, take, head, select fields, group-by, count) is still supported. + +Expression operators: ==, !=, >, <, >=, <=, and, or, not, //, has(), matches, + if-then-else-end, string interpolation \(expr)`, + Args: queryArgs(), + Run: runQuery, +} + +var queryOutputFormat string +var queryFromFile string + +func init() { + queryCmd.Flags().StringVar(&queryOutputFormat, "format", "table", "output format: table, json, markdown, or toon") + queryCmd.Flags().StringVarP(&queryFromFile, "file", "f", "", "read query from file instead of argument") +} + +func runQuery(cmd *cobra.Command, args []string) { + ctx := cmd.Context() + + // args[0] = query (or input file if using -f), args[1] = input file (optional) + queryStr := "" + inputFile := "-" // default to stdin + + if queryFromFile != "" { + data, err := os.ReadFile(queryFromFile) + if err != nil { + fmt.Fprintf(os.Stderr, "Error reading query file: %v\n", err) + os.Exit(1) + } + queryStr = string(data) + // When using -f, all positional args are input files + if len(args) > 0 { + inputFile = args[0] + } + } else if len(args) >= 1 { + queryStr = args[0] + if len(args) >= 2 { + inputFile = args[1] + } + } + + if queryStr == "" { + fmt.Fprintf(os.Stderr, "Error: no query provided\n") + os.Exit(1) + } + + processor, err := NewOpenAPIProcessor(inputFile, "", false) + if err != nil { + fmt.Fprintf(os.Stderr, "Error: %v\n", err) + os.Exit(1) + } + + if err := queryOpenAPI(ctx, processor, queryStr); err != nil { + fmt.Fprintf(os.Stderr, "Error: %v\n", err) + os.Exit(1) + } +} + +func queryOpenAPI(ctx context.Context, processor *OpenAPIProcessor, queryStr string) error { + doc, _, err := processor.LoadDocument(ctx) + if err != nil { + return err + } + if doc == nil { + return errors.New("failed to parse OpenAPI document: document is nil") + } + + // Build index + idx := buildIndex(ctx, doc) + + // Build graph + g := graph.Build(ctx, idx) + + // Execute query + result, err := oq.Execute(queryStr, g) + if err != nil { + return fmt.Errorf("query error: %w", err) + } + + // Format and output — inline format stage overrides CLI flag + format := queryOutputFormat + if result.FormatHint != "" { + format = result.FormatHint + } + + var output string + switch format { + case "json": + output = oq.FormatJSON(result, g) + case "markdown": + output = oq.FormatMarkdown(result, g) + case "toon": + output = oq.FormatToon(result, g) + default: + output = oq.FormatTable(result, g) + } + + fmt.Fprint(processor.stdout(), output) + if result.IsCount { + fmt.Fprintln(processor.stdout()) + } + + return nil +} + +func buildIndex(ctx context.Context, doc *openapi.OpenAPI) *openapi.Index { + resolveOpts := references.ResolveOptions{ + RootDocument: doc, + TargetDocument: doc, + TargetLocation: ".", + } + return openapi.BuildIndex(ctx, doc, resolveOpts) +} diff --git a/cmd/openapi/commands/openapi/query_reference.go b/cmd/openapi/commands/openapi/query_reference.go new file mode 100644 index 0000000..c68509f --- /dev/null +++ b/cmd/openapi/commands/openapi/query_reference.go @@ -0,0 +1,275 @@ +package openapi + +import ( + "fmt" + + "github.com/spf13/cobra" +) + +var queryReferenceCmd = &cobra.Command{ + Use: "query-reference", + Short: "Print the oq query language reference", + Long: "Print the complete reference for the oq pipeline query language, including all stages, fields, operators, and examples.", + Run: func(_ *cobra.Command, _ []string) { + fmt.Print(queryReference) + }, +} + +const queryReference = `oq — OpenAPI Query Language Reference +===================================== + +oq is a pipeline query language for exploring OpenAPI schema graphs. +Queries are composed as left-to-right pipelines: + + source | stage | stage | ... | terminal + +SOURCES +------- +The first element of every pipeline is a source that selects the initial +result set. + + schemas All schemas (component + inline) + schemas.components Only component schemas (in #/components/schemas) + schemas.inline Only inline schemas + operations All operations + +TRAVERSAL STAGES +---------------- +Graph navigation stages replace the current result set by following edges +in the schema reference graph. + + refs-out Direct outgoing references (1 hop, with edge annotations) + refs-in Direct incoming references (1 hop, with edge annotations) + reachable Transitive closure of outgoing references + ancestors Transitive closure of incoming references + properties Expand to property sub-schemas (with edge annotations) + union-members Expand allOf/oneOf/anyOf children (with edge annotations) + items Expand to array items schema (with edge annotations) + ops Schemas → operations that use them + schemas Operations → schemas they touch + path(A; B) Shortest path between two named schemas + connected Full connected component (schemas + operations) + blast-radius Ancestors + all affected operations (change impact) + neighbors(N) Bidirectional neighborhood within N hops + +ANALYSIS STAGES +--------------- + + orphans Schemas with no incoming refs and no operation usage + leaves Schemas with no outgoing refs (leaf/terminal nodes) + cycles Strongly connected components (actual reference cycles) + clusters Weakly connected component grouping + tag-boundary Schemas used by operations across multiple tags + shared-refs Schemas shared by ALL operations in result set + +FILTER & TRANSFORM STAGES +-------------------------- + + select(expr) Filter rows by predicate expression (jq-style) + pick Project specific fields (comma-separated) + sort_by(field) Sort ascending by field + sort_by(field; desc) Sort descending by field + first(N) Limit to first N results + last(N) Limit to last N results + sample(N) Deterministic pseudo-random sample of N rows + top(N; field) Sort descending by field and take N (shorthand) + bottom(N; field) Sort ascending by field and take N (shorthand) + unique Deduplicate rows by identity + group_by(field) Group rows and aggregate counts + length Count rows (terminal — returns a single number) + let $var = expr Bind expression result to a variable for later stages + + Legacy syntax is still supported: + where , select , sort [desc], take/head , + group-by , count + +FUNCTION DEFINITIONS & MODULES +------------------------------- +Define reusable pipeline fragments: + + def hot: select(in_degree > 10); + def impact($name): select(name == $name) | blast-radius; + + Syntax: def name: body; + def name($p1; $p2): body; + +Load definitions from .oq files: + + include "stdlib.oq"; + + Search paths: current directory, then ~/.config/oq/ + +META STAGES +----------- + + explain Print the query execution plan instead of running it + fields List available fields for the current result kind + format(fmt) Set output format: table, json, markdown, or toon + +SCHEMA FIELDS +------------- + + Field Type Description + ───── ──── ─────────── + name string Component name or JSON pointer + type string Schema type (object, array, string, ...) + depth int Max nesting depth + in_degree int Number of schemas referencing this one + out_degree int Number of schemas this references + union_width int oneOf + anyOf + allOf member count + property_count int Number of properties + is_component bool In #/components/schemas + is_inline bool Defined inline + is_circular bool Part of a circular reference chain + has_ref bool Has a $ref + hash string Content hash + path string JSON pointer in document + op_count int Number of operations using this schema + tag_count int Number of distinct tags across operations + +OPERATION FIELDS +---------------- + + Field Type Description + ───── ──── ─────────── + name string operationId or "METHOD /path" + method string HTTP method (GET, POST, ...) + path string URL path + operation_id string operationId + schema_count int Total reachable schema count + component_count int Reachable component schema count + tag string First tag + parameter_count int Number of parameters + deprecated bool Whether the operation is deprecated + description string Operation description + summary string Operation summary + +EDGE ANNOTATION FIELDS +---------------------- +Available on rows produced by 1-hop traversal stages (refs-out, refs-in, +properties, union-members, items): + + Field Type Description + ───── ──── ─────────── + edge_kind string Edge type: property, items, allOf, oneOf, ref, ... + edge_label string Edge label: property name, array index, etc. + edge_from string Source node name + +EXPRESSIONS +----------- +The expression language is used in select(), let, and if-then-else: + + Comparison: == != > < >= <= + Logical: and or not + Alternative: // (returns left if truthy, else right) + Functions: has() — true if field is non-null/non-zero + matches(, "") — regex match + Infix: matches "" + Conditional: if then else end + if then elif then else end + Interpolation: "\()" inside string literals + Grouping: ( ... ) + Literals: "string" 42 true false + Variables: $var (bound by let) + +OUTPUT FORMATS +-------------- + + table Aligned columns with header (default) + json JSON array of objects + markdown Markdown table + toon TOON (Token-Oriented Object Notation) tabular format + +Set via --format flag or inline format stage: + schemas | length | format(json) + +EXAMPLES +-------- + + # Deeply nested components (jq-style) + schemas.components | sort_by(depth; desc) | first(10) | pick name, depth + + # Wide union trees + schemas | select(union_width > 0) | sort_by(union_width; desc) | first(10) + + # Most referenced schemas + schemas.components | sort_by(in_degree; desc) | first(10) | pick name, in_degree + + # Dead components (no incoming references) + schemas.components | select(in_degree == 0) | pick name + + # Operation sprawl + operations | sort_by(schema_count; desc) | first(10) | pick name, schema_count + + # Circular references + schemas | select(is_circular) | pick name, path + + # Schema count + schemas | length + + # Shortest path between schemas + schemas | path(Pet; Address) | pick name + + # Top 5 by in-degree + schemas.components | top(5; in_degree) | pick name, in_degree + + # Walk an operation to find all connected schemas + operations | select(name == "GET /users") | schemas | pick name, type + + # Explain a query plan + schemas.components | select(depth > 5) | sort_by(depth; desc) | explain + + # List available fields + schemas | fields + + # Regex filter + schemas | select(name matches "Error.*") | pick name, path + + # Complex filter + schemas | select(property_count > 3 and not is_component) | pick name, property_count, path + + # Edge annotations — see how Pet references other schemas + schemas.components | select(name == "Pet") | refs-out | pick name, edge_kind, edge_label, edge_from + + # Blast radius — what breaks if I change the Error schema? + schemas.components | select(name == "Error") | blast-radius | length + + # Neighborhood — schemas within 2 hops of Pet + schemas.components | select(name == "Pet") | neighbors(2) | pick name + + # Orphaned schemas — unreferenced by anything + schemas.components | orphans | pick name + + # Leaf schemas — terminal nodes with no outgoing refs + schemas.components | leaves | pick name, in_degree + + # Detect reference cycles + schemas | cycles + + # Discover schema clusters + schemas.components | clusters + + # Cross-tag schemas — shared across team boundaries + schemas | tag-boundary | pick name, tag_count + + # Schemas shared by all operations + operations | shared-refs | pick name, op_count + + # Variable binding — find Pet's reachable schemas (excluding Pet itself) + schemas | select(name == "Pet") | let $pet = name | reachable | select(name != $pet) | pick name + + # Alternative operator — fallback for missing values + schemas | select(name // "unnamed" != "unnamed") + + # If-then-else — conditional filtering + schemas | select(if is_component then depth > 3 else true end) + + # User-defined functions + def hot: select(in_degree > 10); + def impact($name): select(name == $name) | blast-radius; + schemas.components | hot | pick name, in_degree + + # Load functions from a module file + include "stdlib.oq"; + schemas.components | hot | pick name, in_degree +` diff --git a/cmd/openapi/commands/openapi/root.go b/cmd/openapi/commands/openapi/root.go index 5f4c614..72562b0 100644 --- a/cmd/openapi/commands/openapi/root.go +++ b/cmd/openapi/commands/openapi/root.go @@ -18,4 +18,6 @@ func Apply(rootCmd *cobra.Command) { rootCmd.AddCommand(localizeCmd) rootCmd.AddCommand(exploreCmd) rootCmd.AddCommand(snipCmd) + rootCmd.AddCommand(queryCmd) + rootCmd.AddCommand(queryReferenceCmd) } diff --git a/cmd/openapi/commands/openapi/shared.go b/cmd/openapi/commands/openapi/shared.go index b79a77b..f471aec 100644 --- a/cmd/openapi/commands/openapi/shared.go +++ b/cmd/openapi/commands/openapi/shared.go @@ -31,6 +31,23 @@ func stdinOrFileArgs(minArgs, maxArgs int) cobra.PositionalArgs { return cmdutil.StdinOrFileArgs(minArgs, maxArgs) } +// queryArgs returns a PositionalArgs validator for the query command. +// When -f/--file is provided, 0 positional args are allowed (spec from stdin). +// Otherwise requires 1–2 positional args (query + optional spec file). +func queryArgs() cobra.PositionalArgs { + return func(cmd *cobra.Command, args []string) error { + fromFile, _ := cmd.Flags().GetString("file") + if fromFile != "" { + // -f flag present: 0 or 1 positional arg (optional spec file) + if len(args) > 1 { + return fmt.Errorf("accepts at most 1 arg when using --file, received %d", len(args)) + } + return nil + } + return cmdutil.StdinOrFileArgs(1, 2)(cmd, args) + } +} + // OpenAPIProcessor handles common OpenAPI document processing operations type OpenAPIProcessor struct { InputFile string diff --git a/cmd/openapi/go.mod b/cmd/openapi/go.mod index d5ea064..2548055 100644 --- a/cmd/openapi/go.mod +++ b/cmd/openapi/go.mod @@ -1,12 +1,12 @@ module github.com/speakeasy-api/openapi/cmd/openapi -go 1.24.3 +go 1.25.0 require ( github.com/charmbracelet/bubbles v0.21.0 github.com/charmbracelet/bubbletea v1.3.10 github.com/charmbracelet/lipgloss v1.1.0 - github.com/speakeasy-api/openapi v1.19.5-0.20260309010446-7ff6a9590a7f + github.com/speakeasy-api/openapi v1.20.1-0.20260313143718-19d9a40d4c09 github.com/speakeasy-api/openapi/openapi/linter/customrules v0.0.0-20260309010446-7ff6a9590a7f github.com/spf13/cobra v1.10.1 github.com/stretchr/testify v1.11.1 @@ -43,7 +43,7 @@ require ( github.com/spf13/pflag v1.0.9 // indirect github.com/vmware-labs/yaml-jsonpath v0.3.2 // indirect github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e // indirect - golang.org/x/sync v0.19.0 // indirect + golang.org/x/sync v0.20.0 // indirect golang.org/x/sys v0.36.0 // indirect golang.org/x/text v0.34.0 // indirect ) diff --git a/cmd/openapi/go.sum b/cmd/openapi/go.sum index ca0478f..9544910 100644 --- a/cmd/openapi/go.sum +++ b/cmd/openapi/go.sum @@ -84,8 +84,8 @@ github.com/sergi/go-diff v1.1.0 h1:we8PVUC3FE2uYfodKH/nBHMSetSfHDR6scGdBi+erh0= github.com/sergi/go-diff v1.1.0/go.mod h1:STckp+ISIX8hZLjrqAeVduY0gWCT9IjLuqbuNXdaHfM= github.com/speakeasy-api/jsonpath v0.6.3 h1:c+QPwzAOdrWvzycuc9HFsIZcxKIaWcNpC+xhOW9rJxU= github.com/speakeasy-api/jsonpath v0.6.3/go.mod h1:2cXloNuQ+RSXi5HTRaeBh7JEmjRXTiaKpFTdZiL7URI= -github.com/speakeasy-api/openapi v1.19.5-0.20260309010446-7ff6a9590a7f h1:UjpoKOKoNqok2lxBTTQMq3Pv8metgqwRh6+ZeTxPFJw= -github.com/speakeasy-api/openapi v1.19.5-0.20260309010446-7ff6a9590a7f/go.mod h1:UfKa7FqE4jgexJZuj51MmdHAFGmDv0Zaw3+yOd81YKU= +github.com/speakeasy-api/openapi v1.20.1-0.20260313143718-19d9a40d4c09 h1:aWkreRg/x9FEle9bDBidEB6P6JBCdzZrQRGgCd6E+Ys= +github.com/speakeasy-api/openapi v1.20.1-0.20260313143718-19d9a40d4c09/go.mod h1:5gOzfAL1nSm57JswBgbpLqoBMGFlabSlTbxTNgHHO/0= github.com/speakeasy-api/openapi/openapi/linter/customrules v0.0.0-20260309010446-7ff6a9590a7f h1:kwiHeGSILCUVEM9iSAUtifl1TLlvyDXmMjyW26/iX2k= github.com/speakeasy-api/openapi/openapi/linter/customrules v0.0.0-20260309010446-7ff6a9590a7f/go.mod h1:ALDg9E6LRTL5tMFlddVrLhc4JaarCHL65x2YkwL7xdg= github.com/spf13/cobra v1.10.1 h1:lJeBwCfmrnXthfAupyUTzJ/J4Nc1RsHC/mSRU2dll/s= @@ -107,8 +107,8 @@ golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73r golang.org/x/net v0.42.0 h1:jzkYrhi3YQWD6MLBJcsklgQsoAcw89EcZbJw8Z614hs= golang.org/x/net v0.42.0/go.mod h1:FF1RA5d3u7nAYA4z2TkclSCKh68eSXtiFwcWQpPXdt8= golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.19.0 h1:vV+1eWNmZ5geRlYjzm2adRgW2/mcpevXNg50YZtPCE4= -golang.org/x/sync v0.19.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI= +golang.org/x/sync v0.20.0 h1:e0PTpb7pjO8GAtTs2dQ6jYa5BWYlMuX047Dco/pItO4= +golang.org/x/sync v0.20.0/go.mod h1:9xrNwdLfx4jkKbNva9FpL6vEN7evnE43NNNJQ2LF3+0= golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20210809222454-d867a43fc93e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= diff --git a/graph/graph.go b/graph/graph.go new file mode 100644 index 0000000..7da6876 --- /dev/null +++ b/graph/graph.go @@ -0,0 +1,917 @@ +// Package graph provides a pre-computed directed graph over OpenAPI schemas and operations, +// materialized from an openapi.Index for efficient structural queries. +package graph + +import ( + "context" + "sort" + "strconv" + "strings" + + "github.com/speakeasy-api/openapi/hashing" + "github.com/speakeasy-api/openapi/jsonschema/oas3" + "github.com/speakeasy-api/openapi/openapi" +) + +// NodeID is a unique identifier for a node in the graph. +type NodeID int + +// EdgeKind represents the type of relationship between two schema nodes. +type EdgeKind int + +const ( + EdgeProperty EdgeKind = iota // properties/X + EdgeItems // items + EdgeAllOf // allOf[i] + EdgeOneOf // oneOf[i] + EdgeAnyOf // anyOf[i] + EdgeAdditionalProps // additionalProperties + EdgeNot // not + EdgeIf // if + EdgeThen // then + EdgeElse // else + EdgeContains // contains + EdgePrefixItems // prefixItems[i] + EdgeDependentSchema // dependentSchemas/X + EdgePatternProperty // patternProperties/X + EdgePropertyNames // propertyNames + EdgeUnevaluatedItems // unevaluatedItems + EdgeUnevaluatedProps // unevaluatedProperties + EdgeRef // resolved $ref +) + +// Edge represents a directed edge between two schema nodes. +type Edge struct { + From NodeID + To NodeID + Kind EdgeKind + Label string // property name, pattern key, or index +} + +// SchemaNode represents a schema in the graph. +type SchemaNode struct { + ID NodeID + Name string // component name or JSON pointer + Path string // JSON pointer in document + Schema *oas3.JSONSchemaReferenceable + Location openapi.Locations + IsComponent bool + IsInline bool + IsExternal bool + IsBoolean bool + IsCircular bool + HasRef bool + Type string // primary schema type + Depth int + InDegree int + OutDegree int + UnionWidth int + PropertyCount int + Hash string +} + +// OperationNode represents an operation in the graph. +type OperationNode struct { + ID NodeID + Name string // operationId or "METHOD /path" + Method string + Path string + OperationID string + Operation *openapi.Operation + Location openapi.Locations + SchemaCount int + ComponentCount int +} + +// SchemaGraph is a pre-computed directed graph over OpenAPI schemas and operations. +type SchemaGraph struct { + Schemas []SchemaNode + Operations []OperationNode + + outEdges map[NodeID][]Edge + inEdges map[NodeID][]Edge + + // Lookup maps + ptrToNode map[*oas3.JSONSchemaReferenceable]NodeID + nameToNode map[string]NodeID + + // Operation-schema relationships + opSchemas map[NodeID]map[NodeID]bool // operation -> set of schema NodeIDs + schemaOps map[NodeID]map[NodeID]bool // schema -> set of operation NodeIDs +} + +// Build constructs a SchemaGraph from an openapi.Index. +func Build(_ context.Context, idx *openapi.Index) *SchemaGraph { + g := &SchemaGraph{ + outEdges: make(map[NodeID][]Edge), + inEdges: make(map[NodeID][]Edge), + ptrToNode: make(map[*oas3.JSONSchemaReferenceable]NodeID), + nameToNode: make(map[string]NodeID), + opSchemas: make(map[NodeID]map[NodeID]bool), + schemaOps: make(map[NodeID]map[NodeID]bool), + } + + // Phase 1: Register nodes + g.registerNodes(idx) + + // Phase 2: Build edges + g.buildEdges() + + // Phase 3: Operation edges + g.buildOperationEdges(idx) + + // Phase 4: Compute metrics + g.computeMetrics() + + return g +} + +// OutEdges returns the outgoing edges from the given node. +func (g *SchemaGraph) OutEdges(id NodeID) []Edge { + return g.outEdges[id] +} + +// InEdges returns the incoming edges to the given node. +func (g *SchemaGraph) InEdges(id NodeID) []Edge { + return g.inEdges[id] +} + +// SchemaByName returns the schema node with the given component name, if any. +func (g *SchemaGraph) SchemaByName(name string) (SchemaNode, bool) { + if id, ok := g.nameToNode[name]; ok && int(id) < len(g.Schemas) { + return g.Schemas[id], true + } + return SchemaNode{}, false +} + +// OperationSchemas returns the schema NodeIDs reachable from the given operation. +// Results are sorted by NodeID for deterministic output. +func (g *SchemaGraph) OperationSchemas(opID NodeID) []NodeID { + set := g.opSchemas[opID] + ids := make([]NodeID, 0, len(set)) + for id := range set { + ids = append(ids, id) + } + sort.Slice(ids, func(i, j int) bool { return ids[i] < ids[j] }) + return ids +} + +// SchemaOperations returns the operation NodeIDs that reference the given schema. +// Results are sorted by NodeID for deterministic output. +func (g *SchemaGraph) SchemaOperations(schemaID NodeID) []NodeID { + set := g.schemaOps[schemaID] + ids := make([]NodeID, 0, len(set)) + for id := range set { + ids = append(ids, id) + } + sort.Slice(ids, func(i, j int) bool { return ids[i] < ids[j] }) + return ids +} + +// Phase 1: Register all schema nodes from the index. +func (g *SchemaGraph) registerNodes(idx *openapi.Index) { + addSchema := func(node *openapi.IndexNode[*oas3.JSONSchemaReferenceable], isComponent, isInline, isExternal, isBoolean bool) { + if node == nil || node.Node == nil { + return + } + // Avoid duplicates + if _, exists := g.ptrToNode[node.Node]; exists { + return + } + + id := NodeID(len(g.Schemas)) + jp := string(node.Location.ToJSONPointer()) + + name := jp + if isComponent { + // Extract component name from the JSON pointer: /components/schemas/Name + parts := strings.Split(jp, "/") + if len(parts) >= 4 { + name = parts[len(parts)-1] + } + } + + hasRef := false + schemaType := "" + if schema := node.Node.GetSchema(); schema != nil { + hasRef = schema.Ref != nil + types := schema.GetType() + if len(types) > 0 { + schemaType = string(types[0]) + } + } + + sn := SchemaNode{ + ID: id, + Name: name, + Path: jp, + Schema: node.Node, + Location: node.Location, + IsComponent: isComponent, + IsInline: isInline, + IsExternal: isExternal, + IsBoolean: isBoolean, + HasRef: hasRef, + Type: schemaType, + } + + g.Schemas = append(g.Schemas, sn) + g.ptrToNode[node.Node] = id + if isComponent { + g.nameToNode[name] = id + } + } + + for _, n := range idx.ComponentSchemas { + addSchema(n, true, false, false, false) + } + for _, n := range idx.InlineSchemas { + addSchema(n, false, true, false, false) + } + for _, n := range idx.ExternalSchemas { + addSchema(n, false, false, true, false) + } + for _, n := range idx.BooleanSchemas { + addSchema(n, false, false, false, true) + } + + // Also register schema references (nodes that are $refs to other schemas) + for _, n := range idx.SchemaReferences { + addSchema(n, false, false, false, false) + } +} + +// Phase 2: Build edges by inspecting child-bearing fields of each schema. +func (g *SchemaGraph) buildEdges() { + for i := range g.Schemas { + sn := &g.Schemas[i] + schema := sn.Schema.GetSchema() + if schema == nil { + continue + } + + // If this is a $ref node, add an edge to the resolved target + if schema.Ref != nil { + if targetID, ok := g.resolveRef(string(*schema.Ref)); ok { + g.addEdge(sn.ID, targetID, EdgeRef, string(*schema.Ref)) + } + } + + // Properties + if schema.Properties != nil { + for key, child := range schema.Properties.All() { + if childID, ok := g.resolveChild(child); ok { + g.addEdge(sn.ID, childID, EdgeProperty, key) + } + } + } + + // Items + if schema.Items != nil { + if childID, ok := g.resolveChild(schema.Items); ok { + g.addEdge(sn.ID, childID, EdgeItems, "items") + } + } + + // AllOf + for j, child := range schema.AllOf { + if childID, ok := g.resolveChild(child); ok { + g.addEdge(sn.ID, childID, EdgeAllOf, "allOf/"+intStr(j)) + } + } + + // OneOf + for j, child := range schema.OneOf { + if childID, ok := g.resolveChild(child); ok { + g.addEdge(sn.ID, childID, EdgeOneOf, "oneOf/"+intStr(j)) + } + } + + // AnyOf + for j, child := range schema.AnyOf { + if childID, ok := g.resolveChild(child); ok { + g.addEdge(sn.ID, childID, EdgeAnyOf, "anyOf/"+intStr(j)) + } + } + + // AdditionalProperties + if schema.AdditionalProperties != nil { + if childID, ok := g.resolveChild(schema.AdditionalProperties); ok { + g.addEdge(sn.ID, childID, EdgeAdditionalProps, "additionalProperties") + } + } + + // Not + if schema.Not != nil { + if childID, ok := g.resolveChild(schema.Not); ok { + g.addEdge(sn.ID, childID, EdgeNot, "not") + } + } + + // If / Then / Else + if schema.If != nil { + if childID, ok := g.resolveChild(schema.If); ok { + g.addEdge(sn.ID, childID, EdgeIf, "if") + } + } + if schema.Then != nil { + if childID, ok := g.resolveChild(schema.Then); ok { + g.addEdge(sn.ID, childID, EdgeThen, "then") + } + } + if schema.Else != nil { + if childID, ok := g.resolveChild(schema.Else); ok { + g.addEdge(sn.ID, childID, EdgeElse, "else") + } + } + + // Contains + if schema.Contains != nil { + if childID, ok := g.resolveChild(schema.Contains); ok { + g.addEdge(sn.ID, childID, EdgeContains, "contains") + } + } + + // PrefixItems + for j, child := range schema.PrefixItems { + if childID, ok := g.resolveChild(child); ok { + g.addEdge(sn.ID, childID, EdgePrefixItems, "prefixItems/"+intStr(j)) + } + } + + // DependentSchemas + if schema.DependentSchemas != nil { + for key, child := range schema.DependentSchemas.All() { + if childID, ok := g.resolveChild(child); ok { + g.addEdge(sn.ID, childID, EdgeDependentSchema, key) + } + } + } + + // PatternProperties + if schema.PatternProperties != nil { + for key, child := range schema.PatternProperties.All() { + if childID, ok := g.resolveChild(child); ok { + g.addEdge(sn.ID, childID, EdgePatternProperty, key) + } + } + } + + // PropertyNames + if schema.PropertyNames != nil { + if childID, ok := g.resolveChild(schema.PropertyNames); ok { + g.addEdge(sn.ID, childID, EdgePropertyNames, "propertyNames") + } + } + + // UnevaluatedItems + if schema.UnevaluatedItems != nil { + if childID, ok := g.resolveChild(schema.UnevaluatedItems); ok { + g.addEdge(sn.ID, childID, EdgeUnevaluatedItems, "unevaluatedItems") + } + } + + // UnevaluatedProperties + if schema.UnevaluatedProperties != nil { + if childID, ok := g.resolveChild(schema.UnevaluatedProperties); ok { + g.addEdge(sn.ID, childID, EdgeUnevaluatedProps, "unevaluatedProperties") + } + } + } +} + +// resolveChild finds the node ID for a child schema pointer. +// If the pointer is directly registered, returns it. +// If not, checks if it's a $ref and resolves via the component name lookup. +func (g *SchemaGraph) resolveChild(child *oas3.JSONSchemaReferenceable) (NodeID, bool) { + if child == nil { + return 0, false + } + // Direct pointer match + if id, ok := g.ptrToNode[child]; ok { + return id, true + } + // Try to resolve via $ref + if s := child.GetSchema(); s != nil && s.Ref != nil { + return g.resolveRef(string(*s.Ref)) + } + return 0, false +} + +// resolveRef resolves a $ref string (e.g., "#/components/schemas/Owner") to a node ID. +func (g *SchemaGraph) resolveRef(ref string) (NodeID, bool) { + const prefix = "#/components/schemas/" + if strings.HasPrefix(ref, prefix) { + name := ref[len(prefix):] + if id, ok := g.nameToNode[name]; ok { + return id, true + } + } + return 0, false +} + +func (g *SchemaGraph) addEdge(from, to NodeID, kind EdgeKind, label string) { + e := Edge{From: from, To: to, Kind: kind, Label: label} + g.outEdges[from] = append(g.outEdges[from], e) + g.inEdges[to] = append(g.inEdges[to], e) +} + +// Phase 3: Build operation nodes and operation-schema relationships. +func (g *SchemaGraph) buildOperationEdges(idx *openapi.Index) { + for _, opNode := range idx.Operations { + if opNode == nil || opNode.Node == nil { + continue + } + + method, path := openapi.ExtractMethodAndPath(opNode.Location) + opID := opNode.Node.GetOperationID() + + name := opID + if name == "" { + name = strings.ToUpper(method) + " " + path + } + + opNodeID := NodeID(len(g.Operations)) + on := OperationNode{ + ID: opNodeID, + Name: name, + Method: method, + Path: path, + OperationID: opID, + Operation: opNode.Node, + Location: opNode.Location, + } + + // Find schemas reachable from this operation by walking its structure + directSchemas := g.findOperationSchemas(opNode.Node) + + // Build transitive closure from direct schemas + reachable := make(map[NodeID]bool) + for _, sid := range directSchemas { + g.reachableBFS(sid, reachable) + } + + g.opSchemas[opNodeID] = reachable + + componentCount := 0 + for sid := range reachable { + if int(sid) < len(g.Schemas) && g.Schemas[sid].IsComponent { + componentCount++ + } + // Build reverse mapping + if g.schemaOps[sid] == nil { + g.schemaOps[sid] = make(map[NodeID]bool) + } + g.schemaOps[sid][opNodeID] = true + } + + on.SchemaCount = len(reachable) + on.ComponentCount = componentCount + + g.Operations = append(g.Operations, on) + } +} + +// findOperationSchemas finds schema NodeIDs directly referenced by an operation's +// parameters, request body, and responses. +func (g *SchemaGraph) findOperationSchemas(op *openapi.Operation) []NodeID { + var result []NodeID + seen := make(map[NodeID]bool) + + addIfKnown := func(js *oas3.JSONSchemaReferenceable) { + if js == nil { + return + } + if id, ok := g.ptrToNode[js]; ok && !seen[id] { + seen[id] = true + result = append(result, id) + } + } + + // Walk parameters + for _, param := range op.Parameters { + if param == nil { + continue + } + p := param.GetObject() + if p == nil { + continue + } + if p.Schema != nil { + addIfKnown(p.Schema) + } + } + + // Walk request body + if op.RequestBody != nil { + rb := op.RequestBody.GetObject() + if rb != nil && rb.Content != nil { + for _, mt := range rb.Content.All() { + if mt != nil && mt.Schema != nil { + addIfKnown(mt.Schema) + } + } + } + } + + // Walk responses + for _, resp := range op.Responses.All() { + if resp == nil { + continue + } + r := resp.GetObject() + if r == nil || r.Content == nil { + continue + } + for _, mt := range r.Content.All() { + if mt != nil && mt.Schema != nil { + addIfKnown(mt.Schema) + } + } + } + // Also check default response + if op.Responses.Default != nil { + r := op.Responses.Default.GetObject() + if r != nil && r.Content != nil { + for _, mt := range r.Content.All() { + if mt != nil && mt.Schema != nil { + addIfKnown(mt.Schema) + } + } + } + } + + return result +} + +// reachableBFS performs BFS from a schema node and adds all reachable nodes to the set. +func (g *SchemaGraph) reachableBFS(start NodeID, visited map[NodeID]bool) { + if visited[start] { + return + } + queue := []NodeID{start} + visited[start] = true + + for len(queue) > 0 { + current := queue[0] + queue = queue[1:] + + for _, edge := range g.outEdges[current] { + if !visited[edge.To] { + visited[edge.To] = true + queue = append(queue, edge.To) + } + } + } +} + +// Phase 4: Compute metrics for each schema node. +func (g *SchemaGraph) computeMetrics() { + // Detect circular nodes with a single shared DFS (O(V+E)) + circularNodes := make(map[NodeID]bool) + visited := make(map[NodeID]bool) + inStack := make(map[NodeID]bool) + for i := range g.Schemas { + nid := NodeID(i) + if !visited[nid] { + g.detectCycle(nid, visited, inStack, circularNodes) + } + } + + for i := range g.Schemas { + sn := &g.Schemas[i] + id := NodeID(i) + + sn.OutDegree = len(g.outEdges[id]) + sn.InDegree = len(g.inEdges[id]) + sn.IsCircular = circularNodes[id] + + schema := sn.Schema.GetSchema() + if schema != nil { + sn.UnionWidth = len(schema.AllOf) + len(schema.OneOf) + len(schema.AnyOf) + if schema.Properties != nil { + sn.PropertyCount = schema.Properties.Len() + } + sn.Hash = hashing.Hash(schema) + } + + // Compute depth via DFS with cycle detection + depthVisited := make(map[NodeID]bool) + sn.Depth = g.computeDepth(id, depthVisited) + } +} + +func (g *SchemaGraph) computeDepth(id NodeID, visited map[NodeID]bool) int { + if visited[id] { + return 0 // cycle + } + visited[id] = true + + maxChild := 0 + for _, edge := range g.outEdges[id] { + d := g.computeDepth(edge.To, visited) + if d+1 > maxChild { + maxChild = d + 1 + } + } + visited[id] = false + return maxChild +} + +// detectCycle performs a DFS from id, marking nodes that participate in cycles. +// It returns the NodeID of the cycle entry point that still needs to be "closed" +// by an ancestor frame, or -1 if no open cycle passes through this node. +func (g *SchemaGraph) detectCycle(id NodeID, visited, inStack map[NodeID]bool, circular map[NodeID]bool) NodeID { + if inStack[id] { + circular[id] = true + return id // back-edge found; id is the cycle entry point + } + if visited[id] { + return -1 + } + visited[id] = true + inStack[id] = true + + var outerEntry NodeID = -1 + for _, edge := range g.outEdges[id] { + entry := g.detectCycle(edge.To, visited, inStack, circular) + if entry != -1 { + circular[id] = true + // If the cycle entry is this node, the cycle is closed — don't propagate. + // Otherwise, remember the outermost open entry to propagate upward. + if entry != id { + outerEntry = entry + } + } + } + + inStack[id] = false + return outerEntry +} + +// Reachable returns all schema NodeIDs transitively reachable from the given node via out-edges. +func (g *SchemaGraph) Reachable(id NodeID) []NodeID { + visited := make(map[NodeID]bool) + g.reachableBFS(id, visited) + delete(visited, id) // exclude self + result := make([]NodeID, 0, len(visited)) + for nid := range visited { + result = append(result, nid) + } + return result +} + +// Ancestors returns all schema NodeIDs that can transitively reach the given node via in-edges. +func (g *SchemaGraph) Ancestors(id NodeID) []NodeID { + visited := make(map[NodeID]bool) + visited[id] = true + queue := []NodeID{id} + + for len(queue) > 0 { + current := queue[0] + queue = queue[1:] + + for _, edge := range g.inEdges[current] { + if !visited[edge.From] { + visited[edge.From] = true + queue = append(queue, edge.From) + } + } + } + + delete(visited, id) // exclude self + result := make([]NodeID, 0, len(visited)) + for nid := range visited { + result = append(result, nid) + } + return result +} + +// ShortestPath returns the shortest path from `from` to `to` using out-edges (BFS). +// Returns nil if no path exists. The returned slice includes both endpoints. +func (g *SchemaGraph) ShortestPath(from, to NodeID) []NodeID { + if from == to { + return []NodeID{from} + } + + parent := make(map[NodeID]NodeID) + visited := make(map[NodeID]bool) + visited[from] = true + queue := []NodeID{from} + + for len(queue) > 0 { + current := queue[0] + queue = queue[1:] + + for _, edge := range g.outEdges[current] { + if visited[edge.To] { + continue + } + visited[edge.To] = true + parent[edge.To] = current + + if edge.To == to { + // Reconstruct path + var path []NodeID + for n := to; n != from; n = parent[n] { + path = append(path, n) + } + path = append(path, from) + // Reverse + for i, j := 0, len(path)-1; i < j; i, j = i+1, j-1 { + path[i], path[j] = path[j], path[i] + } + return path + } + + queue = append(queue, edge.To) + } + } + + return nil +} + +// SchemaOpCount returns the number of operations that reference the given schema. +func (g *SchemaGraph) SchemaOpCount(id NodeID) int { + return len(g.schemaOps[id]) +} + +// Neighbors returns schema NodeIDs within maxDepth hops of the given node, +// following both out-edges and in-edges (bidirectional BFS). +// The result excludes the seed node itself. +func (g *SchemaGraph) Neighbors(id NodeID, maxDepth int) []NodeID { + visited := map[NodeID]bool{id: true} + current := []NodeID{id} + + for depth := 0; depth < maxDepth && len(current) > 0; depth++ { + var next []NodeID + for _, nid := range current { + for _, edge := range g.outEdges[nid] { + if !visited[edge.To] { + visited[edge.To] = true + next = append(next, edge.To) + } + } + for _, edge := range g.inEdges[nid] { + if !visited[edge.From] { + visited[edge.From] = true + next = append(next, edge.From) + } + } + } + current = next + } + + delete(visited, id) + result := make([]NodeID, 0, len(visited)) + for nid := range visited { + result = append(result, nid) + } + return result +} + +// StronglyConnectedComponents returns the SCCs of the schema graph using +// Tarjan's algorithm. Only returns components with more than one node +// (i.e., actual cycles, not singleton nodes). +func (g *SchemaGraph) StronglyConnectedComponents() [][]NodeID { + idx := 0 + var stack []NodeID + onStack := make(map[NodeID]bool) + indices := make(map[NodeID]int) + lowlinks := make(map[NodeID]int) + defined := make(map[NodeID]bool) + var sccs [][]NodeID + + var strongConnect func(v NodeID) + strongConnect = func(v NodeID) { + indices[v] = idx + lowlinks[v] = idx + defined[v] = true + idx++ + stack = append(stack, v) + onStack[v] = true + + for _, edge := range g.outEdges[v] { + w := edge.To + if !defined[w] { + strongConnect(w) + if lowlinks[w] < lowlinks[v] { + lowlinks[v] = lowlinks[w] + } + } else if onStack[w] { + if indices[w] < lowlinks[v] { + lowlinks[v] = indices[w] + } + } + } + + if lowlinks[v] == indices[v] { + var scc []NodeID + for { + w := stack[len(stack)-1] + stack = stack[:len(stack)-1] + onStack[w] = false + scc = append(scc, w) + if w == v { + break + } + } + if len(scc) > 1 { + sccs = append(sccs, scc) + } + } + } + + for i := range g.Schemas { + nid := NodeID(i) + if !defined[nid] { + strongConnect(nid) + } + } + + return sccs +} + +// ConnectedComponent computes the full connected component reachable from the +// given seed schema and operation nodes. It treats schema edges as undirected +// (follows both out-edges and in-edges) and crosses schema↔operation links. +// Returns the sets of reachable schema and operation NodeIDs (including seeds). +func (g *SchemaGraph) ConnectedComponent(schemaSeeds, opSeeds []NodeID) (schemas []NodeID, ops []NodeID) { + visitedSchemas := make(map[NodeID]bool) + visitedOps := make(map[NodeID]bool) + + // Queues for BFS across both node types + schemaQueue := make([]NodeID, 0, len(schemaSeeds)) + opQueue := make([]NodeID, 0, len(opSeeds)) + + for _, id := range schemaSeeds { + if !visitedSchemas[id] { + visitedSchemas[id] = true + schemaQueue = append(schemaQueue, id) + } + } + for _, id := range opSeeds { + if !visitedOps[id] { + visitedOps[id] = true + opQueue = append(opQueue, id) + } + } + + for len(schemaQueue) > 0 || len(opQueue) > 0 { + // Process schema nodes + for len(schemaQueue) > 0 { + current := schemaQueue[0] + schemaQueue = schemaQueue[1:] + + // Follow out-edges (undirected: treat as bidirectional) + for _, edge := range g.outEdges[current] { + if !visitedSchemas[edge.To] { + visitedSchemas[edge.To] = true + schemaQueue = append(schemaQueue, edge.To) + } + } + // Follow in-edges + for _, edge := range g.inEdges[current] { + if !visitedSchemas[edge.From] { + visitedSchemas[edge.From] = true + schemaQueue = append(schemaQueue, edge.From) + } + } + // Cross to operations + for opID := range g.schemaOps[current] { + if !visitedOps[opID] { + visitedOps[opID] = true + opQueue = append(opQueue, opID) + } + } + } + + // Process operation nodes + for len(opQueue) > 0 { + current := opQueue[0] + opQueue = opQueue[1:] + + // Cross to schemas + for sid := range g.opSchemas[current] { + if !visitedSchemas[sid] { + visitedSchemas[sid] = true + schemaQueue = append(schemaQueue, sid) + } + } + } + } + + schemas = make([]NodeID, 0, len(visitedSchemas)) + for id := range visitedSchemas { + schemas = append(schemas, id) + } + ops = make([]NodeID, 0, len(visitedOps)) + for id := range visitedOps { + ops = append(ops, id) + } + return schemas, ops +} + +func intStr(i int) string { + return strconv.Itoa(i) +} diff --git a/graph/graph_test.go b/graph/graph_test.go new file mode 100644 index 0000000..8833685 --- /dev/null +++ b/graph/graph_test.go @@ -0,0 +1,304 @@ +package graph_test + +import ( + "os" + "testing" + + "github.com/speakeasy-api/openapi/graph" + "github.com/speakeasy-api/openapi/openapi" + "github.com/speakeasy-api/openapi/references" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func loadTestGraph(t *testing.T) *graph.SchemaGraph { + t.Helper() + + f, err := os.Open("../oq/testdata/petstore.yaml") + require.NoError(t, err) + defer f.Close() + + ctx := t.Context() + doc, _, err := openapi.Unmarshal(ctx, f, openapi.WithSkipValidation()) + require.NoError(t, err) + require.NotNil(t, doc) + + idx := openapi.BuildIndex(ctx, doc, references.ResolveOptions{ + RootDocument: doc, + TargetDocument: doc, + TargetLocation: "../oq/testdata/petstore.yaml", + }) + + return graph.Build(ctx, idx) +} + +func TestBuild_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + assert.NotEmpty(t, g.Schemas, "should have schema nodes") + assert.NotEmpty(t, g.Operations, "should have operation nodes") +} + +func TestBuild_ComponentSchemas_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + componentNames := make(map[string]bool) + for _, s := range g.Schemas { + if s.IsComponent { + componentNames[s.Name] = true + } + } + + assert.True(t, componentNames["Pet"]) + assert.True(t, componentNames["Owner"]) + assert.True(t, componentNames["Address"]) + assert.True(t, componentNames["Error"]) + assert.True(t, componentNames["Shape"]) + assert.True(t, componentNames["Circle"]) + assert.True(t, componentNames["Square"]) + assert.True(t, componentNames["Unused"]) +} + +func TestBuild_SchemaByName_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + pet, ok := g.SchemaByName("Pet") + assert.True(t, ok) + assert.Equal(t, "Pet", pet.Name) + assert.Equal(t, "object", pet.Type) + assert.True(t, pet.IsComponent) + + _, ok = g.SchemaByName("NonExistent") + assert.False(t, ok) +} + +func TestBuild_Edges_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + pet, _ := g.SchemaByName("Pet") + edges := g.OutEdges(pet.ID) + + // Pet has properties: id, name, tag, owner + assert.Len(t, edges, 4, "Pet should have 4 out-edges") + + edgeLabels := make(map[string]graph.EdgeKind) + for _, e := range edges { + edgeLabels[e.Label] = e.Kind + } + assert.Equal(t, graph.EdgeProperty, edgeLabels["id"]) + assert.Equal(t, graph.EdgeProperty, edgeLabels["name"]) + assert.Equal(t, graph.EdgeProperty, edgeLabels["tag"]) + assert.Equal(t, graph.EdgeProperty, edgeLabels["owner"]) +} + +func TestBuild_Reachable_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + pet, _ := g.SchemaByName("Pet") + reachable := g.Reachable(pet.ID) + assert.NotEmpty(t, reachable, "Pet should have reachable schemas") + + reachableNames := make(map[string]bool) + for _, id := range reachable { + reachableNames[g.Schemas[id].Name] = true + } + + // Pet -> owner -> Owner -> address -> Address + assert.True(t, reachableNames["Owner"], "Owner should be reachable from Pet") + assert.True(t, reachableNames["Address"], "Address should be reachable from Pet") +} + +func TestBuild_Ancestors_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + addr, _ := g.SchemaByName("Address") + ancestors := g.Ancestors(addr.ID) + assert.NotEmpty(t, ancestors, "Address should have ancestors") + + ancestorNames := make(map[string]bool) + for _, id := range ancestors { + ancestorNames[g.Schemas[id].Name] = true + } + + assert.True(t, ancestorNames["Owner"], "Owner should be an ancestor of Address") +} + +func TestBuild_Operations_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + opNames := make(map[string]bool) + for _, op := range g.Operations { + opNames[op.Name] = true + } + + assert.True(t, opNames["listPets"]) + assert.True(t, opNames["createPet"]) + assert.True(t, opNames["showPetById"]) + assert.True(t, opNames["listOwners"]) +} + +func TestBuild_OperationSchemas_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + for _, op := range g.Operations { + if op.OperationID == "listPets" { + schemas := g.OperationSchemas(op.ID) + assert.NotEmpty(t, schemas, "listPets should reference schemas") + assert.Positive(t, op.SchemaCount) + return + } + } + t.Fatal("listPets operation not found") +} + +func TestBuild_ShortestPath_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + pet, _ := g.SchemaByName("Pet") + addr, _ := g.SchemaByName("Address") + path := g.ShortestPath(pet.ID, addr.ID) + assert.NotEmpty(t, path, "should find path from Pet to Address") + assert.Equal(t, pet.ID, path[0]) + assert.Equal(t, addr.ID, path[len(path)-1]) +} + +func TestBuild_ShortestPath_NoPath_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + unused, _ := g.SchemaByName("Unused") + pet, _ := g.SchemaByName("Pet") + path := g.ShortestPath(unused.ID, pet.ID) + assert.Empty(t, path, "Unused should not reach Pet") +} + +func TestBuild_Metrics_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + pet, _ := g.SchemaByName("Pet") + assert.Equal(t, 4, pet.PropertyCount, "Pet should have 4 properties") + assert.Equal(t, 4, pet.OutDegree, "Pet should have 4 out-edges") + assert.Positive(t, pet.InDegree, "Pet should be referenced") + assert.NotEmpty(t, pet.Hash, "Pet should have a hash") + + shape, _ := g.SchemaByName("Shape") + assert.Equal(t, 2, shape.UnionWidth, "Shape should have union_width 2 (oneOf)") + + unused, _ := g.SchemaByName("Unused") + assert.Equal(t, 0, unused.InDegree, "Unused should have no incoming edges from other schemas") +} + +func TestBuild_InEdges_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + // Owner is referenced by Pet via the "owner" property (possibly through a $ref node) + owner, _ := g.SchemaByName("Owner") + inEdges := g.InEdges(owner.ID) + assert.NotEmpty(t, inEdges, "Owner should have incoming edges") + + // Verify the InEdges returns edges with correct To field + for _, e := range inEdges { + assert.Equal(t, owner.ID, e.To, "InEdge To should match the queried node") + } +} + +func TestBuild_SchemaOperations_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + pet, _ := g.SchemaByName("Pet") + ops := g.SchemaOperations(pet.ID) + assert.NotEmpty(t, ops, "Pet should be referenced by operations") +} + +func TestBuild_SchemaOpCount_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + pet, _ := g.SchemaByName("Pet") + count := g.SchemaOpCount(pet.ID) + assert.Positive(t, count, "Pet should have operations referencing it") + + unused, _ := g.SchemaByName("Unused") + count = g.SchemaOpCount(unused.ID) + assert.Equal(t, 0, count, "Unused should have no operations") +} + +func TestBuild_Neighbors_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + pet, _ := g.SchemaByName("Pet") + + // Depth 1: direct out-edges and in-edges + n1 := g.Neighbors(pet.ID, 1) + assert.NotEmpty(t, n1, "Pet should have depth-1 neighbors") + + // Depth 0: should return nothing (no hops) + n0 := g.Neighbors(pet.ID, 0) + assert.Empty(t, n0, "depth-0 neighbors should be empty") + + // Depth 2: should be >= depth 1 + n2 := g.Neighbors(pet.ID, 2) + assert.GreaterOrEqual(t, len(n2), len(n1), "depth-2 should include at least depth-1 neighbors") +} + +func TestBuild_StronglyConnectedComponents_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + sccs := g.StronglyConnectedComponents() + // Petstore shouldn't have cycles, so SCCs should be empty (no multi-node components) + assert.Empty(t, sccs, "petstore should have no strongly connected components") +} + +func TestBuild_ConnectedComponent_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + pet, _ := g.SchemaByName("Pet") + schemas, ops := g.ConnectedComponent([]graph.NodeID{pet.ID}, nil) + assert.NotEmpty(t, schemas, "connected component from Pet should include schemas") + assert.NotEmpty(t, ops, "connected component from Pet should include operations") + + // Should include Pet itself + hasPet := false + for _, id := range schemas { + if id == pet.ID { + hasPet = true + } + } + assert.True(t, hasPet, "connected component should include the seed") +} + +func TestBuild_ConnectedComponent_FromOp_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + // Start from first operation + require.NotEmpty(t, g.Operations) + schemas, ops := g.ConnectedComponent(nil, []graph.NodeID{g.Operations[0].ID}) + assert.NotEmpty(t, schemas, "connected component from operation should include schemas") + assert.NotEmpty(t, ops, "connected component from operation should include the seed operation") +} + +func TestBuild_ShortestPath_SameNode_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + pet, _ := g.SchemaByName("Pet") + path := g.ShortestPath(pet.ID, pet.ID) + assert.Len(t, path, 1, "path from node to itself should be length 1") + assert.Equal(t, pet.ID, path[0]) +} diff --git a/oq/README.md b/oq/README.md new file mode 100644 index 0000000..21e4d83 --- /dev/null +++ b/oq/README.md @@ -0,0 +1,301 @@ +# oq — OpenAPI Query Language + +`oq` is a pipeline query language for exploring OpenAPI schema reference graphs. It lets you ask structural and semantic questions about schemas and operations at the command line. + +## Quick Start + +```bash +# Count all schemas +openapi spec query 'schemas | count' petstore.yaml + +# Top 10 deepest component schemas (new jq-style) +openapi spec query 'schemas.components | sort_by(depth; desc) | first(10) | pick name, depth' petstore.yaml + +# Dead components (unreferenced) +openapi spec query 'schemas.components | select(in_degree == 0) | pick name' petstore.yaml +``` + +Stdin is supported: + +```bash +cat spec.yaml | openapi spec query 'schemas | count' +``` + +## Pipeline Syntax + +Queries are left-to-right pipelines separated by `|`: + +``` +source | stage | stage | ... | terminal +``` + +### Sources + +| Source | Description | +|--------|-------------| +| `schemas` | All schemas (component + inline) | +| `schemas.components` | Component schemas only | +| `schemas.inline` | Inline schemas only | +| `operations` | All operations | + +### Traversal Stages + +| Stage | Description | +|-------|-------------| +| `refs-out` | Direct outgoing references (with edge annotations) | +| `refs-in` | Direct incoming references (with edge annotations) | +| `reachable` | Transitive closure of outgoing refs | +| `ancestors` | Transitive closure of incoming refs | +| `properties` | Property sub-schemas (with edge annotations) | +| `union-members` | allOf/oneOf/anyOf children (with edge annotations) | +| `items` | Array items schema (with edge annotations) | +| `ops` | Schemas → operations | +| `schemas` | Operations → schemas | +| `path(A; B)` | Shortest path between two schemas | +| `connected` | Full connected component (schemas + operations) | +| `blast-radius` | Ancestors + all affected operations | +| `neighbors(N)` | Bidirectional neighborhood within N hops | + +### Analysis Stages + +| Stage | Description | +|-------|-------------| +| `orphans` | Schemas with no incoming refs and no operation usage | +| `leaves` | Schemas with no outgoing refs (terminal nodes) | +| `cycles` | Strongly connected components (actual cycles) | +| `clusters` | Weakly connected component grouping | +| `tag-boundary` | Schemas used by operations across multiple tags | +| `shared-refs` | Schemas shared by ALL operations in result set | + +### Filter & Transform Stages + +| Stage | Description | +|-------|-------------| +| `select(expr)` | Filter by predicate (jq-style) | +| `pick f1, f2` | Project fields | +| `sort_by(field)` / `sort_by(field; desc)` | Sort (ascending by default) | +| `first(N)` | Limit to first N results | +| `last(N)` | Limit to last N results | +| `sample(N)` | Deterministic random sample | +| `top(N; field)` | Sort desc + take | +| `bottom(N; field)` | Sort asc + take | +| `unique` | Deduplicate | +| `group_by(field)` | Group and count | +| `length` | Count rows | +| `let $var = expr` | Bind expression result to a variable | + +**Legacy syntax** (`where`, `sort`, `take`, `head`, `select fields`, `group-by`, `count`) is still supported. + +### Meta Stages + +| Stage | Description | +|-------|-------------| +| `explain` | Print query plan | +| `fields` | List available fields | +| `format(fmt)` | Set output format (table/json/markdown/toon) | + +### Function Definitions & Modules + +Define reusable functions with `def` and load them from `.oq` files with `include`: + +``` +# Inline definitions +def hot: select(in_degree > 10); +def impact($name): select(name == $name) | blast-radius; +schemas.components | hot | pick name, in_degree + +# Load from file +include "stdlib.oq"; +schemas.components | hot | pick name, in_degree +``` + +Def syntax: `def name: body;` or `def name($p1; $p2): body;` +Module search paths: current directory, then `~/.config/oq/` + +## Fields + +### Schema Fields + +| Field | Type | Description | +|-------|------|-------------| +| `name` | string | Component name or JSON pointer | +| `type` | string | Schema type | +| `depth` | int | Max nesting depth | +| `in_degree` | int | Incoming reference count | +| `out_degree` | int | Outgoing reference count | +| `union_width` | int | Union member count | +| `property_count` | int | Property count | +| `is_component` | bool | In components/schemas | +| `is_inline` | bool | Defined inline | +| `is_circular` | bool | Part of circular reference | +| `has_ref` | bool | Has $ref | +| `hash` | string | Content hash | +| `path` | string | JSON pointer | +| `op_count` | int | Operations using this schema | +| `tag_count` | int | Distinct tags across operations | + +### Operation Fields + +| Field | Type | Description | +|-------|------|-------------| +| `name` | string | operationId or METHOD /path | +| `method` | string | HTTP method | +| `path` | string | URL path | +| `operation_id` | string | operationId | +| `schema_count` | int | Reachable schema count | +| `component_count` | int | Reachable component count | +| `tag` | string | First tag | +| `parameter_count` | int | Parameter count | +| `deprecated` | bool | Deprecated flag | +| `description` | string | Description | +| `summary` | string | Summary | + +### Edge Annotation Fields + +Available on rows produced by 1-hop traversal stages (`refs-out`, `refs-in`, `properties`, `union-members`, `items`): + +| Field | Type | Description | +|-------|------|-------------| +| `edge_kind` | string | Edge type: property, items, allOf, oneOf, ref, ... | +| `edge_label` | string | Edge label: property name, array index, etc. | +| `edge_from` | string | Source node name | + +## Expressions + +oq supports a rich expression language used in `select()`, `let`, and `if-then-else`: + +``` +depth > 5 +type == "object" +name matches "Error.*" +property_count > 3 and not is_component +has(oneOf) and not has(discriminator) +(depth > 10 or union_width > 5) and is_component +name // "unnamed" # alternative: fallback if null/falsy +if is_component then depth > 3 else true end # conditional +"prefix_\(name)" # string interpolation +``` + +### Operators + +| Operator | Description | +|----------|-------------| +| `==`, `!=`, `>`, `<`, `>=`, `<=` | Comparison | +| `and`, `or`, `not` | Logical | +| `//` | Alternative (returns left if truthy, else right) | +| `has(field)` | True if field is non-null/non-zero | +| `matches "regex"` | Regex match | +| `if cond then a else b end` | Conditional (elif supported) | +| `\(expr)` | String interpolation inside `"..."` | + +### Variables + +Use `let` to bind values for use in later stages: + +``` +schemas | select(name == "Pet") | let $pet = name | reachable | select(name != $pet) +``` + +## Output Formats + +Use `--format` flag or inline `format` stage: + +```bash +openapi spec query 'schemas | count' spec.yaml --format json +openapi spec query 'schemas | first(5) | format(markdown)' spec.yaml +``` + +| Format | Description | +|--------|-------------| +| `table` | Aligned columns (default) | +| `json` | JSON array | +| `markdown` | Markdown table | +| `toon` | [TOON](https://github.com/toon-format/toon) tabular format | + +## Examples + +```bash +# Wide union trees +schemas | select(union_width > 0) | sort_by(union_width; desc) | first(10) + +# Central schemas (most referenced) +schemas.components | sort_by(in_degree; desc) | first(10) | pick name, in_degree + +# Operation sprawl +operations | sort_by(schema_count; desc) | first(10) | pick name, schema_count + +# Circular references +schemas | select(is_circular) | pick name, path + +# Shortest path between schemas +schemas | path(Pet; Address) | pick name + +# Walk an operation to connected schemas and back to operations +operations | select(name == "GET /users") | schemas | ops | pick name, method, path + +# Explain query plan +schemas.components | select(depth > 5) | sort_by(depth; desc) | explain + +# Regex filter +schemas | select(name matches "Error.*") | pick name, path + +# Group by type +schemas | group_by(type) + +# Edge annotations — how does Pet reference other schemas? +schemas.components | select(name == "Pet") | refs-out | pick name, edge_kind, edge_label, edge_from + +# Blast radius — what breaks if Error changes? +schemas.components | select(name == "Error") | blast-radius | length + +# 2-hop neighborhood +schemas.components | select(name == "Pet") | neighbors(2) | pick name + +# Orphaned schemas +schemas.components | orphans | pick name + +# Leaf nodes +schemas.components | leaves | pick name, in_degree + +# Detect cycles +schemas | cycles + +# Discover clusters +schemas.components | clusters + +# Cross-tag schemas +schemas | tag-boundary | pick name, tag_count + +# Schemas shared across all operations +operations | shared-refs | pick name, op_count + +# Variable binding — find Pet's reachable schemas (excluding Pet itself) +schemas | select(name == "Pet") | let $pet = name | reachable | select(name != $pet) | pick name + +# User-defined functions +def hot: select(in_degree > 10); +def impact($name): select(name == $name) | blast-radius; +schemas.components | hot | pick name, in_degree + +# Alternative operator — fallback for missing values +schemas | select(name // "unnamed" != "unnamed") | pick name +``` + +## CLI Reference + +```bash +# Run query-reference for the full language reference +openapi spec query-reference + +# Inline query +openapi spec query '' + +# Query from file +openapi spec query -f query.oq + +# With output format +openapi spec query '' --format json + +# From stdin +cat spec.yaml | openapi spec query '' +``` diff --git a/oq/exec.go b/oq/exec.go new file mode 100644 index 0000000..e0b6516 --- /dev/null +++ b/oq/exec.go @@ -0,0 +1,1074 @@ +package oq + +import ( + "errors" + "fmt" + "math/rand/v2" + "slices" + "sort" + "strconv" + "strings" + + "github.com/speakeasy-api/openapi/graph" + "github.com/speakeasy-api/openapi/oq/expr" +) + +func run(stages []Stage, g *graph.SchemaGraph) (*Result, error) { + if len(stages) == 0 { + return &Result{}, nil + } + + // Check if explain stage is present + for _, stage := range stages { + if stage.Kind == StageExplain { + return &Result{Explain: buildExplain(stages)}, nil + } + } + + // Execute source stage + result, err := execSource(stages[0], g) + if err != nil { + return nil, err + } + + // Thread env through stages for let bindings + env := map[string]expr.Value{} + + // Execute remaining stages + for _, stage := range stages[1:] { + result, env, err = execStageWithEnv(stage, result, g, env) + if err != nil { + return nil, err + } + } + + return result, nil +} + +func execSource(stage Stage, g *graph.SchemaGraph) (*Result, error) { + result := &Result{} + switch stage.Source { + case "schemas": + for i := range g.Schemas { + result.Rows = append(result.Rows, Row{Kind: SchemaResult, SchemaIdx: i}) + } + case "schemas.components": + for i, s := range g.Schemas { + if s.IsComponent { + result.Rows = append(result.Rows, Row{Kind: SchemaResult, SchemaIdx: i}) + } + } + case "schemas.inline": + for i, s := range g.Schemas { + if s.IsInline { + result.Rows = append(result.Rows, Row{Kind: SchemaResult, SchemaIdx: i}) + } + } + case "operations": + for i := range g.Operations { + result.Rows = append(result.Rows, Row{Kind: OperationResult, OpIdx: i}) + } + default: + return nil, fmt.Errorf("unknown source: %q", stage.Source) + } + return result, nil +} + +func execStageWithEnv(stage Stage, result *Result, g *graph.SchemaGraph, env map[string]expr.Value) (*Result, map[string]expr.Value, error) { + switch stage.Kind { + case StageLet: + r, newEnv, err := execLet(stage, result, g, env) + return r, newEnv, err + case StageWhere: + r, err := execWhere(stage, result, g, env) + return r, env, err + default: + r, err := execStage(stage, result, g) + return r, env, err + } +} + +func execStage(stage Stage, result *Result, g *graph.SchemaGraph) (*Result, error) { + switch stage.Kind { + case StageWhere: + return execWhere(stage, result, g, nil) + case StageLast: + return execLast(stage, result) + case StageSelect: + result.Fields = stage.Fields + return result, nil + case StageSort: + return execSort(stage, result, g) + case StageTake: + return execTake(stage, result) + case StageUnique: + return execUnique(result) + case StageGroupBy: + return execGroupBy(stage, result, g) + case StageCount: + return &Result{IsCount: true, Count: len(result.Rows)}, nil + case StageRefsOut: + return execTraversal(result, g, traverseRefsOut) + case StageRefsIn: + return execTraversal(result, g, traverseRefsIn) + case StageReachable: + return execTraversal(result, g, traverseReachable) + case StageAncestors: + return execTraversal(result, g, traverseAncestors) + case StageProperties: + return execTraversal(result, g, traverseProperties) + case StageUnionMembers: + return execTraversal(result, g, traverseUnionMembers) + case StageItems: + return execTraversal(result, g, traverseItems) + case StageOps: + return execSchemasToOps(result, g) + case StageSchemas: + return execOpsToSchemas(result, g) + case StageFields: + return execFields(result) + case StageSample: + return execSample(stage, result) + case StagePath: + return execPath(stage, g) + case StageTop: + // Expand to sort desc + take + sorted, err := execSort(Stage{Kind: StageSort, SortField: stage.SortField, SortDesc: true}, result, g) + if err != nil { + return nil, err + } + return execTake(Stage{Kind: StageTake, Limit: stage.Limit}, sorted) + case StageBottom: + // Expand to sort asc + take + sorted, err := execSort(Stage{Kind: StageSort, SortField: stage.SortField, SortDesc: false}, result, g) + if err != nil { + return nil, err + } + return execTake(Stage{Kind: StageTake, Limit: stage.Limit}, sorted) + case StageFormat: + result.FormatHint = stage.Format + return result, nil + case StageConnected: + return execConnected(result, g) + case StageBlastRadius: + return execBlastRadius(result, g) + case StageNeighbors: + return execNeighbors(stage, result, g) + case StageOrphans: + return execOrphans(result, g) + case StageLeaves: + return execLeaves(result, g) + case StageCycles: + return execCycles(result, g) + case StageClusters: + return execClusters(result, g) + case StageTagBoundary: + return execTagBoundary(result, g) + case StageSharedRefs: + return execSharedRefs(result, g) + default: + return nil, fmt.Errorf("unimplemented stage kind: %d", stage.Kind) + } +} + +func execWhere(stage Stage, result *Result, g *graph.SchemaGraph, env map[string]expr.Value) (*Result, error) { + predicate, err := expr.Parse(stage.Expr) + if err != nil { + return nil, fmt.Errorf("where expression error: %w", err) + } + + filtered := &Result{Fields: result.Fields} + for _, row := range result.Rows { + r := rowAdapter{row: row, g: g, env: env} + val := predicate.Eval(r) + if val.Kind == expr.KindBool && val.Bool { + filtered.Rows = append(filtered.Rows, row) + } + } + return filtered, nil +} + +func execLast(stage Stage, result *Result) (*Result, error) { + rows := result.Rows + if stage.Limit < len(rows) { + rows = rows[len(rows)-stage.Limit:] + } + return &Result{ + Fields: result.Fields, + FormatHint: result.FormatHint, + Rows: slices.Clone(rows), + }, nil +} + +func execLet(stage Stage, result *Result, g *graph.SchemaGraph, env map[string]expr.Value) (*Result, map[string]expr.Value, error) { + predicate, err := expr.Parse(stage.Expr) + if err != nil { + return nil, env, fmt.Errorf("let expression error: %w", err) + } + + // Evaluate against first row + newEnv := make(map[string]expr.Value, len(env)+1) + for k, v := range env { + newEnv[k] = v + } + + if len(result.Rows) > 0 { + r := rowAdapter{row: result.Rows[0], g: g, env: env} + val := predicate.Eval(r) + newEnv[stage.VarName] = val + } else { + newEnv[stage.VarName] = expr.NullVal() + } + + return result, newEnv, nil +} + +func execSort(stage Stage, result *Result, g *graph.SchemaGraph) (*Result, error) { + sorted := &Result{ + Fields: result.Fields, + FormatHint: result.FormatHint, + Rows: slices.Clone(result.Rows), + } + sort.SliceStable(sorted.Rows, func(i, j int) bool { + vi := fieldValue(sorted.Rows[i], stage.SortField, g) + vj := fieldValue(sorted.Rows[j], stage.SortField, g) + + cmp := compareValues(vi, vj) + if stage.SortDesc { + return cmp > 0 + } + return cmp < 0 + }) + return sorted, nil +} + +func execTake(stage Stage, result *Result) (*Result, error) { + rows := result.Rows + if stage.Limit < len(rows) { + rows = rows[:stage.Limit] + } + return &Result{ + Fields: result.Fields, + FormatHint: result.FormatHint, + Rows: slices.Clone(rows), + }, nil +} + +func execUnique(result *Result) (*Result, error) { + seen := make(map[string]bool) + filtered := &Result{Fields: result.Fields} + for _, row := range result.Rows { + key := rowKey(row) + if !seen[key] { + seen[key] = true + filtered.Rows = append(filtered.Rows, row) + } + } + return filtered, nil +} + +func execGroupBy(stage Stage, result *Result, g *graph.SchemaGraph) (*Result, error) { + if len(stage.Fields) == 0 { + return nil, errors.New("group-by requires at least one field") + } + field := stage.Fields[0] + + type group struct { + count int + names []string + } + groups := make(map[string]*group) + var order []string + + for _, row := range result.Rows { + v := fieldValue(row, field, g) + key := valueToString(v) + grp, exists := groups[key] + if !exists { + grp = &group{} + groups[key] = grp + order = append(order, key) + } + grp.count++ + nameV := fieldValue(row, "name", g) + grp.names = append(grp.names, valueToString(nameV)) + } + + grouped := &Result{Fields: result.Fields} + for _, key := range order { + grp, ok := groups[key] + if !ok { + continue + } + grouped.Groups = append(grouped.Groups, GroupResult{ + Key: key, + Count: grp.count, + Names: grp.names, + }) + } + return grouped, nil +} + +// --- Traversal --- + +type traversalFunc func(row Row, g *graph.SchemaGraph) []Row + +func execTraversal(result *Result, g *graph.SchemaGraph, fn traversalFunc) (*Result, error) { + out := &Result{Fields: result.Fields} + seen := make(map[string]bool) + for _, row := range result.Rows { + for _, newRow := range fn(row, g) { + key := edgeRowKey(newRow) + if !seen[key] { + seen[key] = true + out.Rows = append(out.Rows, newRow) + } + } + } + return out, nil +} + +func edgeRowKey(row Row) string { + base := rowKey(row) + if row.EdgeKind == "" { + return base + } + return base + "|" + row.EdgeFrom + "|" + row.EdgeKind + "|" + row.EdgeLabel +} + +func traverseRefsOut(row Row, g *graph.SchemaGraph) []Row { + if row.Kind != SchemaResult { + return nil + } + fromName := schemaName(row.SchemaIdx, g) + var result []Row + for _, edge := range g.OutEdges(graph.NodeID(row.SchemaIdx)) { + result = append(result, Row{ + Kind: SchemaResult, + SchemaIdx: int(edge.To), + EdgeKind: edgeKindString(edge.Kind), + EdgeLabel: edge.Label, + EdgeFrom: fromName, + }) + } + return result +} + +func traverseRefsIn(row Row, g *graph.SchemaGraph) []Row { + if row.Kind != SchemaResult { + return nil + } + toName := schemaName(row.SchemaIdx, g) + var result []Row + for _, edge := range g.InEdges(graph.NodeID(row.SchemaIdx)) { + result = append(result, Row{ + Kind: SchemaResult, + SchemaIdx: int(edge.From), + EdgeKind: edgeKindString(edge.Kind), + EdgeLabel: edge.Label, + EdgeFrom: toName, + }) + } + return result +} + +func traverseReachable(row Row, g *graph.SchemaGraph) []Row { + if row.Kind != SchemaResult { + return nil + } + ids := g.Reachable(graph.NodeID(row.SchemaIdx)) + result := make([]Row, len(ids)) + for i, id := range ids { + result[i] = Row{Kind: SchemaResult, SchemaIdx: int(id)} + } + return result +} + +func traverseAncestors(row Row, g *graph.SchemaGraph) []Row { + if row.Kind != SchemaResult { + return nil + } + ids := g.Ancestors(graph.NodeID(row.SchemaIdx)) + result := make([]Row, len(ids)) + for i, id := range ids { + result[i] = Row{Kind: SchemaResult, SchemaIdx: int(id)} + } + return result +} + +func traverseProperties(row Row, g *graph.SchemaGraph) []Row { + if row.Kind != SchemaResult { + return nil + } + fromName := schemaName(row.SchemaIdx, g) + var result []Row + for _, edge := range g.OutEdges(graph.NodeID(row.SchemaIdx)) { + if edge.Kind == graph.EdgeProperty { + result = append(result, Row{ + Kind: SchemaResult, + SchemaIdx: int(edge.To), + EdgeKind: edgeKindString(edge.Kind), + EdgeLabel: edge.Label, + EdgeFrom: fromName, + }) + } + } + return result +} + +func traverseUnionMembers(row Row, g *graph.SchemaGraph) []Row { + if row.Kind != SchemaResult { + return nil + } + fromName := schemaName(row.SchemaIdx, g) + var result []Row + for _, edge := range g.OutEdges(graph.NodeID(row.SchemaIdx)) { + if edge.Kind == graph.EdgeAllOf || edge.Kind == graph.EdgeOneOf || edge.Kind == graph.EdgeAnyOf { + // Follow through $ref nodes transparently + target := resolveRefTarget(int(edge.To), g) + result = append(result, Row{ + Kind: SchemaResult, + SchemaIdx: target, + EdgeKind: edgeKindString(edge.Kind), + EdgeLabel: edge.Label, + EdgeFrom: fromName, + }) + } + } + return result +} + +func traverseItems(row Row, g *graph.SchemaGraph) []Row { + if row.Kind != SchemaResult { + return nil + } + fromName := schemaName(row.SchemaIdx, g) + var result []Row + for _, edge := range g.OutEdges(graph.NodeID(row.SchemaIdx)) { + if edge.Kind == graph.EdgeItems { + result = append(result, Row{ + Kind: SchemaResult, + SchemaIdx: int(edge.To), + EdgeKind: edgeKindString(edge.Kind), + EdgeLabel: edge.Label, + EdgeFrom: fromName, + }) + } + } + return result +} + +// resolveRefTarget follows EdgeRef edges to get the actual target node. +// If the node at idx is a $ref wrapper, returns the target component's index. +// Otherwise returns idx unchanged. +func resolveRefTarget(idx int, g *graph.SchemaGraph) int { + if idx < 0 || idx >= len(g.Schemas) { + return idx + } + node := &g.Schemas[idx] + if !node.HasRef { + return idx + } + // Follow EdgeRef edges + for _, edge := range g.OutEdges(graph.NodeID(idx)) { + if edge.Kind == graph.EdgeRef { + return int(edge.To) + } + } + return idx +} + +func execSchemasToOps(result *Result, g *graph.SchemaGraph) (*Result, error) { + out := &Result{Fields: result.Fields} + seen := make(map[int]bool) + for _, row := range result.Rows { + if row.Kind != SchemaResult { + continue + } + opIDs := g.SchemaOperations(graph.NodeID(row.SchemaIdx)) + for _, opID := range opIDs { + idx := int(opID) + if !seen[idx] { + seen[idx] = true + out.Rows = append(out.Rows, Row{Kind: OperationResult, OpIdx: idx}) + } + } + } + return out, nil +} + +func execOpsToSchemas(result *Result, g *graph.SchemaGraph) (*Result, error) { + out := &Result{Fields: result.Fields} + seen := make(map[int]bool) + for _, row := range result.Rows { + if row.Kind != OperationResult { + continue + } + schemaIDs := g.OperationSchemas(graph.NodeID(row.OpIdx)) + for _, sid := range schemaIDs { + idx := int(sid) + if !seen[idx] { + seen[idx] = true + out.Rows = append(out.Rows, Row{Kind: SchemaResult, SchemaIdx: idx}) + } + } + } + return out, nil +} + +func execConnected(result *Result, g *graph.SchemaGraph) (*Result, error) { + var schemaSeeds, opSeeds []graph.NodeID + for _, row := range result.Rows { + switch row.Kind { + case SchemaResult: + schemaSeeds = append(schemaSeeds, graph.NodeID(row.SchemaIdx)) + case OperationResult: + opSeeds = append(opSeeds, graph.NodeID(row.OpIdx)) + } + } + + schemas, ops := g.ConnectedComponent(schemaSeeds, opSeeds) + + out := &Result{Fields: result.Fields} + for _, id := range schemas { + out.Rows = append(out.Rows, Row{Kind: SchemaResult, SchemaIdx: int(id)}) + } + for _, id := range ops { + out.Rows = append(out.Rows, Row{Kind: OperationResult, OpIdx: int(id)}) + } + return out, nil +} + +func execBlastRadius(result *Result, g *graph.SchemaGraph) (*Result, error) { + out := &Result{Fields: result.Fields} + seenSchemas := make(map[int]bool) + seenOps := make(map[int]bool) + + // Collect seed schemas + var seeds []graph.NodeID + for _, row := range result.Rows { + if row.Kind == SchemaResult { + seeds = append(seeds, graph.NodeID(row.SchemaIdx)) + seenSchemas[row.SchemaIdx] = true + } + } + + // Find all ancestors (schemas that depend on the seeds) + for _, seed := range seeds { + for _, aid := range g.Ancestors(seed) { + seenSchemas[int(aid)] = true + } + } + + // Collect and sort schema indices for deterministic output + schemaIndices := make([]int, 0, len(seenSchemas)) + for idx := range seenSchemas { + schemaIndices = append(schemaIndices, idx) + } + sort.Ints(schemaIndices) + + // Add schema rows + for _, idx := range schemaIndices { + out.Rows = append(out.Rows, Row{Kind: SchemaResult, SchemaIdx: idx}) + } + + // Find all operations that reference any affected schema + for _, idx := range schemaIndices { + for _, opID := range g.SchemaOperations(graph.NodeID(idx)) { + if !seenOps[int(opID)] { + seenOps[int(opID)] = true + out.Rows = append(out.Rows, Row{Kind: OperationResult, OpIdx: int(opID)}) + } + } + } + + return out, nil +} + +func execNeighbors(stage Stage, result *Result, g *graph.SchemaGraph) (*Result, error) { + out := &Result{Fields: result.Fields} + seen := make(map[int]bool) + + for _, row := range result.Rows { + if row.Kind != SchemaResult { + continue + } + // Include seed + if !seen[row.SchemaIdx] { + seen[row.SchemaIdx] = true + out.Rows = append(out.Rows, Row{Kind: SchemaResult, SchemaIdx: row.SchemaIdx}) + } + for _, id := range g.Neighbors(graph.NodeID(row.SchemaIdx), stage.Limit) { + if !seen[int(id)] { + seen[int(id)] = true + out.Rows = append(out.Rows, Row{Kind: SchemaResult, SchemaIdx: int(id)}) + } + } + } + + return out, nil +} + +func execOrphans(result *Result, g *graph.SchemaGraph) (*Result, error) { + out := &Result{Fields: result.Fields} + for _, row := range result.Rows { + if row.Kind != SchemaResult { + continue + } + s := &g.Schemas[row.SchemaIdx] + if s.InDegree == 0 && g.SchemaOpCount(graph.NodeID(row.SchemaIdx)) == 0 { + out.Rows = append(out.Rows, row) + } + } + return out, nil +} + +func execLeaves(result *Result, g *graph.SchemaGraph) (*Result, error) { + out := &Result{Fields: result.Fields} + for _, row := range result.Rows { + if row.Kind != SchemaResult { + continue + } + if g.Schemas[row.SchemaIdx].OutDegree == 0 { + out.Rows = append(out.Rows, row) + } + } + return out, nil +} + +func execCycles(result *Result, g *graph.SchemaGraph) (*Result, error) { + sccs := g.StronglyConnectedComponents() + + // Filter SCCs to only include nodes present in the current result + resultNodes := make(map[int]bool) + for _, row := range result.Rows { + if row.Kind == SchemaResult { + resultNodes[row.SchemaIdx] = true + } + } + + out := &Result{Fields: result.Fields} + for i, scc := range sccs { + hasMatch := false + for _, id := range scc { + if resultNodes[int(id)] { + hasMatch = true + break + } + } + if !hasMatch { + continue + } + var names []string + for _, id := range scc { + if int(id) < len(g.Schemas) { + names = append(names, g.Schemas[id].Name) + } + } + out.Groups = append(out.Groups, GroupResult{ + Key: "cycle-" + strconv.Itoa(i+1), + Count: len(scc), + Names: names, + }) + } + + return out, nil +} + +func execClusters(result *Result, g *graph.SchemaGraph) (*Result, error) { + resultNodes := make(map[int]bool) + for _, row := range result.Rows { + if row.Kind == SchemaResult { + resultNodes[row.SchemaIdx] = true + } + } + + // Sort node indices for deterministic iteration + sortedNodes := make([]int, 0, len(resultNodes)) + for idx := range resultNodes { + sortedNodes = append(sortedNodes, idx) + } + sort.Ints(sortedNodes) + + // BFS to find connected components. Follow ALL graph edges (including + // through intermediary nodes like $ref wrappers) but only collect + // nodes that are in the result set. + assigned := make(map[int]bool) // result nodes already assigned to a cluster + out := &Result{Fields: result.Fields} + clusterNum := 0 + + for _, idx := range sortedNodes { + if assigned[idx] { + continue + } + clusterNum++ + var component []int + + // BFS through the full graph + visited := make(map[int]bool) + queue := []int{idx} + visited[idx] = true + + for len(queue) > 0 { + cur := queue[0] + queue = queue[1:] + + if resultNodes[cur] && !assigned[cur] { + assigned[cur] = true + component = append(component, cur) + } + + for _, edge := range g.OutEdges(graph.NodeID(cur)) { + to := int(edge.To) + if !visited[to] { + visited[to] = true + queue = append(queue, to) + } + } + for _, edge := range g.InEdges(graph.NodeID(cur)) { + from := int(edge.From) + if !visited[from] { + visited[from] = true + queue = append(queue, from) + } + } + } + + var names []string + for _, id := range component { + if id < len(g.Schemas) { + names = append(names, g.Schemas[id].Name) + } + } + if len(component) > 0 { + out.Groups = append(out.Groups, GroupResult{ + Key: "cluster-" + strconv.Itoa(clusterNum), + Count: len(component), + Names: names, + }) + } + } + + return out, nil +} + +func execTagBoundary(result *Result, g *graph.SchemaGraph) (*Result, error) { + out := &Result{Fields: result.Fields} + for _, row := range result.Rows { + if row.Kind != SchemaResult { + continue + } + if schemaTagCount(row.SchemaIdx, g) > 1 { + out.Rows = append(out.Rows, row) + } + } + return out, nil +} + +func schemaTagCount(schemaIdx int, g *graph.SchemaGraph) int { + tags := make(map[string]bool) + for _, opID := range g.SchemaOperations(graph.NodeID(schemaIdx)) { + if int(opID) < len(g.Operations) { + op := &g.Operations[opID] + if op.Operation != nil { + for _, tag := range op.Operation.Tags { + tags[tag] = true + } + } + } + } + return len(tags) +} + +func execSharedRefs(result *Result, g *graph.SchemaGraph) (*Result, error) { + var ops []graph.NodeID + for _, row := range result.Rows { + if row.Kind == OperationResult { + ops = append(ops, graph.NodeID(row.OpIdx)) + } + } + + if len(ops) == 0 { + return &Result{Fields: result.Fields}, nil + } + + // Start with first operation's schemas + intersection := make(map[graph.NodeID]bool) + for _, sid := range g.OperationSchemas(ops[0]) { + intersection[sid] = true + } + + // Intersect with each subsequent operation + for _, opID := range ops[1:] { + opSchemas := make(map[graph.NodeID]bool) + for _, sid := range g.OperationSchemas(opID) { + opSchemas[sid] = true + } + for sid := range intersection { + if !opSchemas[sid] { + delete(intersection, sid) + } + } + } + + // Sort for deterministic output + sortedIDs := make([]int, 0, len(intersection)) + for sid := range intersection { + sortedIDs = append(sortedIDs, int(sid)) + } + sort.Ints(sortedIDs) + + out := &Result{Fields: result.Fields} + for _, sid := range sortedIDs { + out.Rows = append(out.Rows, Row{Kind: SchemaResult, SchemaIdx: sid}) + } + return out, nil +} + +// --- Edge annotation helpers --- + +func schemaName(idx int, g *graph.SchemaGraph) string { + if idx >= 0 && idx < len(g.Schemas) { + return g.Schemas[idx].Name + } + return "" +} + +func edgeKindString(k graph.EdgeKind) string { + switch k { + case graph.EdgeProperty: + return "property" + case graph.EdgeItems: + return "items" + case graph.EdgeAllOf: + return "allOf" + case graph.EdgeOneOf: + return "oneOf" + case graph.EdgeAnyOf: + return "anyOf" + case graph.EdgeAdditionalProps: + return "additionalProperties" + case graph.EdgeNot: + return "not" + case graph.EdgeIf: + return "if" + case graph.EdgeThen: + return "then" + case graph.EdgeElse: + return "else" + case graph.EdgeContains: + return "contains" + case graph.EdgePrefixItems: + return "prefixItems" + case graph.EdgeDependentSchema: + return "dependentSchema" + case graph.EdgePatternProperty: + return "patternProperty" + case graph.EdgePropertyNames: + return "propertyNames" + case graph.EdgeUnevaluatedItems: + return "unevaluatedItems" + case graph.EdgeUnevaluatedProps: + return "unevaluatedProperties" + case graph.EdgeRef: + return "ref" + default: + return "unknown" + } +} + +// --- Explain --- + +func buildExplain(stages []Stage) string { + var sb strings.Builder + for i, stage := range stages { + if stage.Kind == StageExplain { + continue + } + if i == 0 { + fmt.Fprintf(&sb, "Source: %s\n", stage.Source) + } else { + desc := describeStage(stage) + fmt.Fprintf(&sb, " → %s\n", desc) + } + } + return sb.String() +} + +func describeStage(stage Stage) string { + switch stage.Kind { + case StageWhere: + return "Filter: select(" + stage.Expr + ")" + case StageSelect: + return "Project: pick " + strings.Join(stage.Fields, ", ") + case StageSort: + dir := "asc" + if stage.SortDesc { + dir = "desc" + } + return "Sort: sort_by(" + stage.SortField + "; " + dir + ")" + case StageTake: + return "Limit: first(" + strconv.Itoa(stage.Limit) + ")" + case StageLast: + return "Limit: last(" + strconv.Itoa(stage.Limit) + ")" + case StageLet: + return "Bind: let " + stage.VarName + " = " + stage.Expr + case StageUnique: + return "Unique: deduplicate rows" + case StageGroupBy: + return "Group: group-by " + strings.Join(stage.Fields, ", ") + case StageCount: + return "Count: count rows" + case StageRefsOut: + return "Traverse: outgoing references" + case StageRefsIn: + return "Traverse: incoming references" + case StageReachable: + return "Traverse: all reachable nodes" + case StageAncestors: + return "Traverse: all ancestor nodes" + case StageProperties: + return "Traverse: property children" + case StageUnionMembers: + return "Traverse: union members" + case StageItems: + return "Traverse: array items" + case StageOps: + return "Navigate: schemas to operations" + case StageSchemas: + return "Navigate: operations to schemas" + case StageFields: + return "Terminal: list available fields" + case StageSample: + return "Sample: random " + strconv.Itoa(stage.Limit) + " rows" + case StagePath: + return "Path: shortest path from " + stage.PathFrom + " to " + stage.PathTo + case StageTop: + return "Top: " + strconv.Itoa(stage.Limit) + " by " + stage.SortField + " descending" + case StageBottom: + return "Bottom: " + strconv.Itoa(stage.Limit) + " by " + stage.SortField + " ascending" + case StageFormat: + return "Format: " + stage.Format + case StageConnected: + return "Traverse: full connected component (schemas + operations)" + case StageBlastRadius: + return "Traverse: blast radius (ancestors + affected operations)" + case StageNeighbors: + return "Traverse: bidirectional neighbors within " + strconv.Itoa(stage.Limit) + " hops" + case StageOrphans: + return "Filter: schemas with no incoming refs and no operation usage" + case StageLeaves: + return "Filter: schemas with no outgoing refs (leaf nodes)" + case StageCycles: + return "Analyze: strongly connected components (actual cycles)" + case StageClusters: + return "Analyze: weakly connected component clusters" + case StageTagBoundary: + return "Filter: schemas used by operations across multiple tags" + case StageSharedRefs: + return "Analyze: schemas shared by all operations in result" + default: + return "Unknown stage" + } +} + +// --- Fields --- + +func execFields(result *Result) (*Result, error) { + var sb strings.Builder + kind := SchemaResult + if len(result.Rows) > 0 { + kind = result.Rows[0].Kind + } + + if kind == SchemaResult { + sb.WriteString("Field Type\n") + sb.WriteString("----------- ------\n") + fields := []struct{ name, typ string }{ + {"name", "string"}, + {"type", "string"}, + {"depth", "int"}, + {"in_degree", "int"}, + {"out_degree", "int"}, + {"union_width", "int"}, + {"property_count", "int"}, + {"is_component", "bool"}, + {"is_inline", "bool"}, + {"is_circular", "bool"}, + {"has_ref", "bool"}, + {"hash", "string"}, + {"path", "string"}, + {"op_count", "int"}, + {"tag_count", "int"}, + {"edge_kind", "string"}, + {"edge_label", "string"}, + {"edge_from", "string"}, + } + for _, f := range fields { + fmt.Fprintf(&sb, "%-17s %s\n", f.name, f.typ) + } + } else { + sb.WriteString("Field Type\n") + sb.WriteString("----------- ------\n") + fields := []struct{ name, typ string }{ + {"name", "string"}, + {"method", "string"}, + {"path", "string"}, + {"operation_id", "string"}, + {"schema_count", "int"}, + {"component_count", "int"}, + {"tag", "string"}, + {"parameter_count", "int"}, + {"deprecated", "bool"}, + {"description", "string"}, + {"summary", "string"}, + {"edge_kind", "string"}, + {"edge_label", "string"}, + {"edge_from", "string"}, + } + for _, f := range fields { + fmt.Fprintf(&sb, "%-17s %s\n", f.name, f.typ) + } + } + + return &Result{Explain: sb.String()}, nil +} + +// --- Sample --- + +func execSample(stage Stage, result *Result) (*Result, error) { + if stage.Limit >= len(result.Rows) { + return result, nil + } + + // Deterministic shuffle using Fisher-Yates with a fixed seed derived from row count. + rows := append([]Row{}, result.Rows...) + rng := rand.New(rand.NewPCG(uint64(len(rows)), 0)) //nolint:gosec // deterministic seed is intentional + rng.Shuffle(len(rows), func(i, j int) { + rows[i], rows[j] = rows[j], rows[i] + }) + + out := &Result{Fields: result.Fields} + out.Rows = rows[:stage.Limit] + return out, nil +} + +// --- Path --- + +func execPath(stage Stage, g *graph.SchemaGraph) (*Result, error) { + fromNode, ok := g.SchemaByName(stage.PathFrom) + if !ok { + return nil, fmt.Errorf("schema %q not found", stage.PathFrom) + } + toNode, ok := g.SchemaByName(stage.PathTo) + if !ok { + return nil, fmt.Errorf("schema %q not found", stage.PathTo) + } + + path := g.ShortestPath(fromNode.ID, toNode.ID) + out := &Result{} + for _, id := range path { + out.Rows = append(out.Rows, Row{Kind: SchemaResult, SchemaIdx: int(id)}) + } + return out, nil +} diff --git a/oq/expr/expr.go b/oq/expr/expr.go new file mode 100644 index 0000000..45af718 --- /dev/null +++ b/oq/expr/expr.go @@ -0,0 +1,621 @@ +// Package expr provides a predicate expression parser and evaluator for the oq query language. +package expr + +import ( + "errors" + "fmt" + "regexp" + "strconv" + "strings" +) + +// Value represents a typed value in the expression system. +type Value struct { + Kind ValueKind + Str string + Int int + Bool bool +} + +type ValueKind int + +const ( + KindString ValueKind = iota + KindInt + KindBool + KindNull +) + +// Row provides field access for predicate evaluation. +type Row interface { + Field(name string) Value +} + +// Expr is the interface for all expression nodes. +type Expr interface { + Eval(row Row) Value +} + +// --- Expression node types --- + +type binaryExpr struct { + op string + left Expr + right Expr +} + +type alternativeExpr struct { + left Expr + right Expr +} + +type ifExpr struct { + cond Expr + then_ Expr + else_ Expr // nil means return null +} + +type interpExpr struct { + parts []Expr +} + +type notExpr struct { + inner Expr +} + +type hasExpr struct { + field string +} + +type matchesExpr struct { + field string + pattern *regexp.Regexp +} + +type fieldExpr struct { + name string +} + +type literalExpr struct { + val Value +} + +func (e *binaryExpr) Eval(row Row) Value { + switch e.op { + case "and": + l := toBool(e.left.Eval(row)) + if !l { + return Value{Kind: KindBool, Bool: false} + } + return Value{Kind: KindBool, Bool: toBool(e.right.Eval(row))} + case "or": + l := toBool(e.left.Eval(row)) + if l { + return Value{Kind: KindBool, Bool: true} + } + return Value{Kind: KindBool, Bool: toBool(e.right.Eval(row))} + case "==": + return Value{Kind: KindBool, Bool: equal(e.left.Eval(row), e.right.Eval(row))} + case "!=": + return Value{Kind: KindBool, Bool: !equal(e.left.Eval(row), e.right.Eval(row))} + case ">": + return Value{Kind: KindBool, Bool: compare(e.left.Eval(row), e.right.Eval(row)) > 0} + case "<": + return Value{Kind: KindBool, Bool: compare(e.left.Eval(row), e.right.Eval(row)) < 0} + case ">=": + return Value{Kind: KindBool, Bool: compare(e.left.Eval(row), e.right.Eval(row)) >= 0} + case "<=": + return Value{Kind: KindBool, Bool: compare(e.left.Eval(row), e.right.Eval(row)) <= 0} + default: + return Value{Kind: KindNull} + } +} + +func (e *notExpr) Eval(row Row) Value { + return Value{Kind: KindBool, Bool: !toBool(e.inner.Eval(row))} +} + +func (e *hasExpr) Eval(row Row) Value { + v := row.Field(e.field) + return Value{Kind: KindBool, Bool: v.Kind != KindNull && (v.Kind != KindInt || v.Int != 0) && (v.Kind != KindBool || v.Bool) && (v.Kind != KindString || v.Str != "")} +} + +func (e *matchesExpr) Eval(row Row) Value { + v := row.Field(e.field) + return Value{Kind: KindBool, Bool: v.Kind == KindString && e.pattern.MatchString(v.Str)} +} + +func (e *fieldExpr) Eval(row Row) Value { + return row.Field(e.name) +} + +func (e *literalExpr) Eval(_ Row) Value { + return e.val +} + +func (e *alternativeExpr) Eval(row Row) Value { + l := e.left.Eval(row) + if l.Kind != KindNull && toBool(l) { + return l + } + return e.right.Eval(row) +} + +func (e *ifExpr) Eval(row Row) Value { + cond := e.cond.Eval(row) + if toBool(cond) { + return e.then_.Eval(row) + } + if e.else_ != nil { + return e.else_.Eval(row) + } + return Value{Kind: KindNull} +} + +func (e *interpExpr) Eval(row Row) Value { + var sb strings.Builder + for _, part := range e.parts { + v := part.Eval(row) + sb.WriteString(toString(v)) + } + return StringVal(sb.String()) +} + +// --- Helpers --- + +func toBool(v Value) bool { + switch v.Kind { + case KindBool: + return v.Bool + case KindInt: + return v.Int != 0 + case KindString: + return v.Str != "" + default: + return false + } +} + +func equal(a, b Value) bool { + if a.Kind == KindString || b.Kind == KindString { + return toString(a) == toString(b) + } + if a.Kind == KindInt && b.Kind == KindInt { + return a.Int == b.Int + } + if a.Kind == KindBool && b.Kind == KindBool { + return a.Bool == b.Bool + } + return false +} + +func compare(a, b Value) int { + ai := toInt(a) + bi := toInt(b) + if ai < bi { + return -1 + } + if ai > bi { + return 1 + } + return 0 +} + +func toInt(v Value) int { + switch v.Kind { + case KindInt: + return v.Int + case KindBool: + if v.Bool { + return 1 + } + return 0 + case KindString: + n, _ := strconv.Atoi(v.Str) + return n + default: + return 0 + } +} + +func toString(v Value) string { + switch v.Kind { + case KindString: + return v.Str + case KindInt: + return strconv.Itoa(v.Int) + case KindBool: + return strconv.FormatBool(v.Bool) + default: + return "" + } +} + +// StringVal creates a string Value. +func StringVal(s string) Value { + return Value{Kind: KindString, Str: s} +} + +// IntVal creates an int Value. +func IntVal(n int) Value { + return Value{Kind: KindInt, Int: n} +} + +// BoolVal creates a bool Value. +func BoolVal(b bool) Value { + return Value{Kind: KindBool, Bool: b} +} + +// NullVal creates a null Value. +func NullVal() Value { + return Value{Kind: KindNull} +} + +// --- Parser --- + +// Parse parses a predicate expression string into an Expr tree. +func Parse(input string) (Expr, error) { + p := &parser{tokens: tokenize(input)} + expr, err := p.parseOr() + if err != nil { + return nil, err + } + if p.pos < len(p.tokens) { + return nil, fmt.Errorf("unexpected token: %q", p.tokens[p.pos]) + } + return expr, nil +} + +type parser struct { + tokens []string + pos int +} + +func (p *parser) peek() string { + if p.pos >= len(p.tokens) { + return "" + } + return p.tokens[p.pos] +} + +func (p *parser) next() string { + t := p.peek() + p.pos++ + return t +} + +func (p *parser) expect(tok string) error { + got := p.next() + if got != tok { + return fmt.Errorf("expected %q, got %q", tok, got) + } + return nil +} + +func (p *parser) parseOr() (Expr, error) { + left, err := p.parseAnd() + if err != nil { + return nil, err + } + for p.peek() == "or" { + p.next() + right, err := p.parseAnd() + if err != nil { + return nil, err + } + left = &binaryExpr{op: "or", left: left, right: right} + } + return left, nil +} + +func (p *parser) parseAnd() (Expr, error) { + left, err := p.parseComparison() + if err != nil { + return nil, err + } + for p.peek() == "and" { + p.next() + right, err := p.parseComparison() + if err != nil { + return nil, err + } + left = &binaryExpr{op: "and", left: left, right: right} + } + return left, nil +} + +func (p *parser) parseComparison() (Expr, error) { + left, err := p.parseAlternative() + if err != nil { + return nil, err + } + switch p.peek() { + case "==", "!=", ">", "<", ">=", "<=": + op := p.next() + right, err := p.parseUnary() + if err != nil { + return nil, err + } + return &binaryExpr{op: op, left: left, right: right}, nil + case "matches": + p.next() + patternTok := p.next() + pattern := strings.Trim(patternTok, "\"") + re, compileErr := regexp.Compile(pattern) + if compileErr != nil { + return nil, fmt.Errorf("invalid regex %q: %w", pattern, compileErr) + } + // left must be a field reference + fieldRef, ok := left.(*fieldExpr) + if !ok { + return nil, errors.New("matches requires a field on the left side") + } + return &matchesExpr{field: fieldRef.name, pattern: re}, nil + } + return left, nil +} + +func (p *parser) parseAlternative() (Expr, error) { + left, err := p.parseUnary() + if err != nil { + return nil, err + } + for p.peek() == "//" { + p.next() + right, err := p.parseUnary() + if err != nil { + return nil, err + } + left = &alternativeExpr{left: left, right: right} + } + return left, nil +} + +func (p *parser) parseUnary() (Expr, error) { + if p.peek() == "not" { + p.next() + inner, err := p.parseUnary() + if err != nil { + return nil, err + } + return ¬Expr{inner: inner}, nil + } + return p.parsePrimary() +} + +func (p *parser) parsePrimary() (Expr, error) { + tok := p.peek() + + // if-then-else-end + if tok == "if" { + return p.parseIf() + } + + // Parenthesized expression + if tok == "(" { + p.next() + expr, err := p.parseOr() + if err != nil { + return nil, err + } + if err := p.expect(")"); err != nil { + return nil, err + } + return expr, nil + } + + // Function calls + if tok == "has" { + p.next() + if err := p.expect("("); err != nil { + return nil, err + } + field := p.next() + if err := p.expect(")"); err != nil { + return nil, err + } + return &hasExpr{field: field}, nil + } + + if tok == "matches" { + p.next() + if err := p.expect("("); err != nil { + return nil, err + } + field := p.next() + if err := p.expect(","); err != nil { + return nil, err + } + patternTok := p.next() + pattern := strings.Trim(patternTok, "\"") + re, err := regexp.Compile(pattern) + if err != nil { + return nil, fmt.Errorf("invalid regex %q: %w", pattern, err) + } + if err := p.expect(")"); err != nil { + return nil, err + } + return &matchesExpr{field: field, pattern: re}, nil + } + + // String literal (possibly with interpolation) + if strings.HasPrefix(tok, "\"") { + p.next() + inner := tok[1 : len(tok)-1] // strip quotes + if strings.Contains(inner, "\\(") { + return parseInterpolation(inner) + } + return &literalExpr{val: StringVal(inner)}, nil + } + + // Boolean literals + if tok == "true" { + p.next() + return &literalExpr{val: BoolVal(true)}, nil + } + if tok == "false" { + p.next() + return &literalExpr{val: BoolVal(false)}, nil + } + + // Integer literal + if n, err := strconv.Atoi(tok); err == nil { + p.next() + return &literalExpr{val: IntVal(n)}, nil + } + + // Field reference + if tok != "" && tok != ")" && tok != "," { + p.next() + return &fieldExpr{name: tok}, nil + } + + return nil, fmt.Errorf("unexpected token: %q", tok) +} + +func (p *parser) parseIf() (Expr, error) { + p.next() // consume "if" + cond, err := p.parseOr() + if err != nil { + return nil, err + } + if err := p.expect("then"); err != nil { + return nil, err + } + then_, err := p.parseOr() + if err != nil { + return nil, err + } + var else_ Expr + switch p.peek() { + case "elif": + // elif chains into a nested ifExpr + // Rewrite "elif" token as "if" for recursive parsing + p.tokens[p.pos] = "if" + else_, err = p.parseIf() + if err != nil { + return nil, err + } + case "else": + p.next() + else_, err = p.parseOr() + if err != nil { + return nil, err + } + if err := p.expect("end"); err != nil { + return nil, err + } + case "end": + p.next() + default: + return nil, fmt.Errorf("expected \"else\", \"elif\", or \"end\", got %q", p.peek()) + } + return &ifExpr{cond: cond, then_: then_, else_: else_}, nil +} + +func parseInterpolation(s string) (Expr, error) { + var parts []Expr + for len(s) > 0 { + idx := strings.Index(s, "\\(") + if idx < 0 { + parts = append(parts, &literalExpr{val: StringVal(s)}) + break + } + if idx > 0 { + parts = append(parts, &literalExpr{val: StringVal(s[:idx])}) + } + s = s[idx+2:] + // Find matching closing paren + depth := 1 + end := 0 + for end < len(s) { + if s[end] == '(' { + depth++ + } else if s[end] == ')' { + depth-- + if depth == 0 { + break + } + } + end++ + } + if depth != 0 { + return nil, errors.New("unterminated interpolation \\(") + } + inner := s[:end] + e, err := Parse(inner) + if err != nil { + return nil, fmt.Errorf("interpolation error: %w", err) + } + parts = append(parts, e) + s = s[end+1:] + } + if len(parts) == 1 { + return parts[0], nil + } + return &interpExpr{parts: parts}, nil +} + +// tokenize splits an expression into tokens. +func tokenize(input string) []string { + var tokens []string + i := 0 + for i < len(input) { + ch := input[i] + + // Skip whitespace + if ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r' { + i++ + continue + } + + // Two-character operators + if i+1 < len(input) { + two := input[i : i+2] + if two == "==" || two == "!=" || two == ">=" || two == "<=" || two == "//" { + tokens = append(tokens, two) + i += 2 + continue + } + } + + // Single-character tokens + if ch == '(' || ch == ')' || ch == ',' || ch == '>' || ch == '<' { + tokens = append(tokens, string(ch)) + i++ + continue + } + + // Quoted string + if ch == '"' { + j := i + 1 + for j < len(input) && input[j] != '"' { + if input[j] == '\\' && j+1 < len(input) { + j++ + } + j++ + } + if j < len(input) { + j++ + } + tokens = append(tokens, input[i:j]) + i = j + continue + } + + // Word (identifier, keyword, or number) + j := i + for j < len(input) && input[j] != ' ' && input[j] != '\t' && input[j] != '\n' && + input[j] != '(' && input[j] != ')' && input[j] != ',' && + input[j] != '>' && input[j] != '<' && input[j] != '=' && input[j] != '!' && input[j] != '/' { + j++ + } + if j > i { + tokens = append(tokens, input[i:j]) + i = j + } else { + i++ + } + } + return tokens +} diff --git a/oq/expr/expr_test.go b/oq/expr/expr_test.go new file mode 100644 index 0000000..3c3924d --- /dev/null +++ b/oq/expr/expr_test.go @@ -0,0 +1,546 @@ +package expr_test + +import ( + "testing" + + "github.com/speakeasy-api/openapi/oq/expr" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +type testRow map[string]expr.Value + +func (r testRow) Field(name string) expr.Value { + if v, ok := r[name]; ok { + return v + } + return expr.NullVal() +} + +func TestParse_Comparison_Success(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + expr string + row testRow + expected bool + }{ + { + name: "integer equality", + expr: `depth == 5`, + row: testRow{"depth": expr.IntVal(5)}, + expected: true, + }, + { + name: "integer inequality", + expr: `depth != 5`, + row: testRow{"depth": expr.IntVal(3)}, + expected: true, + }, + { + name: "greater than", + expr: `depth > 3`, + row: testRow{"depth": expr.IntVal(5)}, + expected: true, + }, + { + name: "less than false", + expr: `depth < 3`, + row: testRow{"depth": expr.IntVal(5)}, + expected: false, + }, + { + name: "string equality", + expr: `type == "object"`, + row: testRow{"type": expr.StringVal("object")}, + expected: true, + }, + { + name: "boolean field", + expr: `is_component`, + row: testRow{"is_component": expr.BoolVal(true)}, + expected: true, + }, + { + name: "and operator", + expr: `depth > 3 and is_component`, + row: testRow{"depth": expr.IntVal(5), "is_component": expr.BoolVal(true)}, + expected: true, + }, + { + name: "or operator", + expr: `depth > 10 or is_component`, + row: testRow{"depth": expr.IntVal(2), "is_component": expr.BoolVal(true)}, + expected: true, + }, + { + name: "not operator", + expr: `not is_inline`, + row: testRow{"is_inline": expr.BoolVal(false)}, + expected: true, + }, + { + name: "has function", + expr: `has(oneOf)`, + row: testRow{"oneOf": expr.IntVal(2)}, + expected: true, + }, + { + name: "has function false", + expr: `has(oneOf)`, + row: testRow{"oneOf": expr.IntVal(0)}, + expected: false, + }, + { + name: "matches operator", + expr: `name matches "Error.*"`, + row: testRow{"name": expr.StringVal("ErrorResponse")}, + expected: true, + }, + { + name: "matches operator no match", + expr: `name matches "Error.*"`, + row: testRow{"name": expr.StringVal("Pet")}, + expected: false, + }, + { + name: "complex expression", + expr: `property_count > 0 and in_degree == 0`, + row: testRow{"property_count": expr.IntVal(3), "in_degree": expr.IntVal(0)}, + expected: true, + }, + { + name: "parenthesized expression", + expr: `(depth > 3 or depth < 1) and is_component`, + row: testRow{"depth": expr.IntVal(5), "is_component": expr.BoolVal(true)}, + expected: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + + parsed, err := expr.Parse(tt.expr) + require.NoError(t, err) + + result := parsed.Eval(tt.row) + assert.Equal(t, expr.KindBool, result.Kind) + assert.Equal(t, tt.expected, result.Bool) + }) + } +} + +func TestParse_Error(t *testing.T) { + t.Parallel() + + _, err := expr.Parse("") + require.Error(t, err) + + _, err = expr.Parse("name matches \"[invalid\"") + require.Error(t, err) +} + +func TestParse_UnterminatedBackslashString(t *testing.T) { + t.Parallel() + + // Should not panic on unterminated string ending with backslash + assert.NotPanics(t, func() { + expr.Parse(`name == "x\`) //nolint:errcheck + }) +} + +func TestParse_UnterminatedFunction(t *testing.T) { + t.Parallel() + + // Should not panic when tokens are exhausted inside a function call + assert.NotPanics(t, func() { + _, err := expr.Parse(`has(field`) + require.Error(t, err) + }) + assert.NotPanics(t, func() { + _, err := expr.Parse(`matches(field,`) + require.Error(t, err) + }) +} + +func TestEval_Operators_Coverage(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + exprStr string + row testRow + expected bool + }{ + { + name: "greater or equal true", + exprStr: `depth >= 5`, + row: testRow{"depth": expr.IntVal(5)}, + expected: true, + }, + { + name: "less or equal true", + exprStr: `depth <= 5`, + row: testRow{"depth": expr.IntVal(3)}, + expected: true, + }, + { + name: "less than true", + exprStr: `depth < 10`, + row: testRow{"depth": expr.IntVal(3)}, + expected: true, + }, + { + name: "and short-circuit false", + exprStr: `depth > 100 and is_component`, + row: testRow{"depth": expr.IntVal(1), "is_component": expr.BoolVal(true)}, + expected: false, + }, + { + name: "or short-circuit true", + exprStr: `is_component or depth > 100`, + row: testRow{"depth": expr.IntVal(1), "is_component": expr.BoolVal(true)}, + expected: true, + }, + { + name: "not true value", + exprStr: `not is_component`, + row: testRow{"is_component": expr.BoolVal(true)}, + expected: false, + }, + { + name: "has null field", + exprStr: `has(missing)`, + row: testRow{}, + expected: false, + }, + { + name: "has empty string", + exprStr: `has(name)`, + row: testRow{"name": expr.StringVal("")}, + expected: false, + }, + { + name: "has non-empty string", + exprStr: `has(name)`, + row: testRow{"name": expr.StringVal("Pet")}, + expected: true, + }, + { + name: "has false bool", + exprStr: `has(flag)`, + row: testRow{"flag": expr.BoolVal(false)}, + expected: false, + }, + { + name: "matches non-string field", + exprStr: `name matches ".*"`, + row: testRow{"name": expr.IntVal(42)}, + expected: false, + }, + { + name: "integer equality both sides", + exprStr: `depth == 0`, + row: testRow{"depth": expr.IntVal(0)}, + expected: true, + }, + { + name: "boolean equality", + exprStr: `is_component == is_inline`, + row: testRow{"is_component": expr.BoolVal(true), "is_inline": expr.BoolVal(true)}, + expected: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + parsed, err := expr.Parse(tt.exprStr) + require.NoError(t, err) + result := parsed.Eval(tt.row) + assert.Equal(t, tt.expected, result.Bool) + }) + } +} + +func TestEval_TypeConversion_Coverage(t *testing.T) { + t.Parallel() + + // Test toBool with int + e, err := expr.Parse(`depth`) + require.NoError(t, err) + row := testRow{"depth": expr.IntVal(5)} + result := e.Eval(row) + assert.Equal(t, expr.KindInt, result.Kind) + + // Test toBool with string (non-empty = truthy in boolean context) + e, err = expr.Parse(`name and depth > 0`) + require.NoError(t, err) + row = testRow{"name": expr.StringVal("Pet"), "depth": expr.IntVal(1)} + result = e.Eval(row) + assert.True(t, result.Bool) + + // Test toBool with empty string (falsy) + e, err = expr.Parse(`name and depth > 0`) + require.NoError(t, err) + row = testRow{"name": expr.StringVal(""), "depth": expr.IntVal(1)} + result = e.Eval(row) + assert.False(t, result.Bool) + + // Test comparison with string-to-int coercion + e, err = expr.Parse(`depth > 0`) + require.NoError(t, err) + row = testRow{"depth": expr.BoolVal(true)} // bool true -> 1 in comparison + result = e.Eval(row) + assert.True(t, result.Bool) + + // Test string equality with int (cross-type via toString) + e, err = expr.Parse(`name == "5"`) + require.NoError(t, err) + row = testRow{"name": expr.IntVal(5)} + result = e.Eval(row) + assert.True(t, result.Bool) +} + +func TestParse_NullVal(t *testing.T) { + t.Parallel() + + v := expr.NullVal() + assert.Equal(t, expr.KindNull, v.Kind) +} + +func TestParse_LiteralValues(t *testing.T) { + t.Parallel() + + // true literal + e, err := expr.Parse(`true`) + require.NoError(t, err) + result := e.Eval(testRow{}) + assert.Equal(t, expr.KindBool, result.Kind) + assert.True(t, result.Bool) + + // false literal + e, err = expr.Parse(`false`) + require.NoError(t, err) + result = e.Eval(testRow{}) + assert.Equal(t, expr.KindBool, result.Kind) + assert.False(t, result.Bool) + + // numeric literal + e, err = expr.Parse(`depth > 0`) + require.NoError(t, err) + result = e.Eval(testRow{"depth": expr.IntVal(5)}) + assert.True(t, result.Bool) +} + +func TestParse_AlternativeOperator(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + exprStr string + row testRow + expected expr.Value + }{ + { + name: "left is truthy", + exprStr: `name // "default"`, + row: testRow{"name": expr.StringVal("Pet")}, + expected: expr.StringVal("Pet"), + }, + { + name: "left is null", + exprStr: `missing // "default"`, + row: testRow{}, + expected: expr.StringVal("default"), + }, + { + name: "left is empty string (falsy)", + exprStr: `name // "default"`, + row: testRow{"name": expr.StringVal("")}, + expected: expr.StringVal("default"), + }, + { + name: "left is false", + exprStr: `flag // true`, + row: testRow{"flag": expr.BoolVal(false)}, + expected: expr.BoolVal(true), + }, + { + name: "left is zero (falsy int)", + exprStr: `count // 42`, + row: testRow{"count": expr.IntVal(0)}, + expected: expr.IntVal(42), + }, + { + name: "left is nonzero int (truthy)", + exprStr: `count // 42`, + row: testRow{"count": expr.IntVal(5)}, + expected: expr.IntVal(5), + }, + { + name: "chained alternative", + exprStr: `a // b // "fallback"`, + row: testRow{"a": expr.NullVal(), "b": expr.StringVal("")}, + expected: expr.StringVal("fallback"), + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + parsed, err := expr.Parse(tt.exprStr) + require.NoError(t, err) + result := parsed.Eval(tt.row) + assert.Equal(t, tt.expected, result) + }) + } +} + +func TestParse_IfThenElse(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + exprStr string + row testRow + expected expr.Value + }{ + { + name: "if true then value", + exprStr: `if is_component then depth else 0 end`, + row: testRow{"is_component": expr.BoolVal(true), "depth": expr.IntVal(5)}, + expected: expr.IntVal(5), + }, + { + name: "if false else value", + exprStr: `if is_component then depth else 0 end`, + row: testRow{"is_component": expr.BoolVal(false), "depth": expr.IntVal(5)}, + expected: expr.IntVal(0), + }, + { + name: "if without else returns null", + exprStr: `if is_component then depth end`, + row: testRow{"is_component": expr.BoolVal(false), "depth": expr.IntVal(5)}, + expected: expr.NullVal(), + }, + { + name: "nested if-then-else", + exprStr: `if depth > 10 then "deep" elif depth > 5 then "medium" else "shallow" end`, + row: testRow{"depth": expr.IntVal(7)}, + expected: expr.StringVal("medium"), + }, + { + name: "if in boolean context", + exprStr: `if is_component then depth > 3 else depth > 5 end`, + row: testRow{"is_component": expr.BoolVal(true), "depth": expr.IntVal(4)}, + expected: expr.BoolVal(true), + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + parsed, err := expr.Parse(tt.exprStr) + require.NoError(t, err) + result := parsed.Eval(tt.row) + assert.Equal(t, tt.expected, result) + }) + } +} + +func TestParse_StringInterpolation(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + exprStr string + row testRow + expected string + }{ + { + name: "simple interpolation", + exprStr: `"hello \(name)"`, + row: testRow{"name": expr.StringVal("world")}, + expected: "hello world", + }, + { + name: "interpolation with expr", + exprStr: `"\(name) has depth \(depth)"`, + row: testRow{"name": expr.StringVal("Pet"), "depth": expr.IntVal(3)}, + expected: "Pet has depth 3", + }, + { + name: "no interpolation", + exprStr: `"plain string"`, + row: testRow{}, + expected: "plain string", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + parsed, err := expr.Parse(tt.exprStr) + require.NoError(t, err) + result := parsed.Eval(tt.row) + assert.Equal(t, expr.KindString, result.Kind) + assert.Equal(t, tt.expected, result.Str) + }) + } +} + +func TestParse_IfThenElse_Error(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + exprStr string + }{ + {"missing then", `if true depth end`}, + {"missing end", `if true then depth`}, + {"missing end after else", `if true then depth else 0`}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + _, err := expr.Parse(tt.exprStr) + assert.Error(t, err) + }) + } +} + +func TestParse_InterpolationError(t *testing.T) { + t.Parallel() + + // Unterminated interpolation + _, err := expr.Parse(`"hello \(name"`) + require.Error(t, err) +} + +func TestParse_ComplexPrecedence(t *testing.T) { + t.Parallel() + + // a and b or c and d — "and" binds tighter, so this is (a and b) or (c and d) + e, err := expr.Parse(`depth > 0 and is_component or depth < 0 and is_inline`) + require.NoError(t, err) + + // Both "and" groups are false -> false + result := e.Eval(testRow{ + "depth": expr.IntVal(0), + "is_component": expr.BoolVal(true), + "is_inline": expr.BoolVal(true), + }) + assert.False(t, result.Bool) + + // First "and" group is true -> true + result = e.Eval(testRow{ + "depth": expr.IntVal(5), + "is_component": expr.BoolVal(true), + "is_inline": expr.BoolVal(false), + }) + assert.True(t, result.Bool) +} diff --git a/oq/field.go b/oq/field.go new file mode 100644 index 0000000..fbca276 --- /dev/null +++ b/oq/field.go @@ -0,0 +1,173 @@ +package oq + +import ( + "strconv" + "strings" + + "github.com/speakeasy-api/openapi/graph" + "github.com/speakeasy-api/openapi/oq/expr" +) + +// --- Field access --- + +type rowAdapter struct { + row Row + g *graph.SchemaGraph + env map[string]expr.Value +} + +func (r rowAdapter) Field(name string) expr.Value { + if strings.HasPrefix(name, "$") && r.env != nil { + if v, ok := r.env[name]; ok { + return v + } + return expr.NullVal() + } + return fieldValue(r.row, name, r.g) +} + +// FieldValuePublic returns the value of a named field for the given row. +// Exported for testing and external consumers. +func FieldValuePublic(row Row, name string, g *graph.SchemaGraph) expr.Value { + return fieldValue(row, name, g) +} + +func fieldValue(row Row, name string, g *graph.SchemaGraph) expr.Value { + switch row.Kind { + case SchemaResult: + if row.SchemaIdx < 0 || row.SchemaIdx >= len(g.Schemas) { + return expr.NullVal() + } + s := &g.Schemas[row.SchemaIdx] + switch name { + case "name": + return expr.StringVal(s.Name) + case "type": + return expr.StringVal(s.Type) + case "depth": + return expr.IntVal(s.Depth) + case "in_degree": + return expr.IntVal(s.InDegree) + case "out_degree": + return expr.IntVal(s.OutDegree) + case "union_width": + return expr.IntVal(s.UnionWidth) + case "property_count": + return expr.IntVal(s.PropertyCount) + case "is_component": + return expr.BoolVal(s.IsComponent) + case "is_inline": + return expr.BoolVal(s.IsInline) + case "is_circular": + return expr.BoolVal(s.IsCircular) + case "has_ref": + return expr.BoolVal(s.HasRef) + case "hash": + return expr.StringVal(s.Hash) + case "path": + return expr.StringVal(s.Path) + case "op_count": + return expr.IntVal(g.SchemaOpCount(graph.NodeID(row.SchemaIdx))) + case "tag_count": + return expr.IntVal(schemaTagCount(row.SchemaIdx, g)) + case "edge_kind": + return expr.StringVal(row.EdgeKind) + case "edge_label": + return expr.StringVal(row.EdgeLabel) + case "edge_from": + return expr.StringVal(row.EdgeFrom) + } + case OperationResult: + if row.OpIdx < 0 || row.OpIdx >= len(g.Operations) { + return expr.NullVal() + } + o := &g.Operations[row.OpIdx] + switch name { + case "name": + return expr.StringVal(o.Name) + case "method": + return expr.StringVal(o.Method) + case "path": + return expr.StringVal(o.Path) + case "operation_id": + return expr.StringVal(o.OperationID) + case "schema_count": + return expr.IntVal(o.SchemaCount) + case "component_count": + return expr.IntVal(o.ComponentCount) + case "tag": + if o.Operation != nil && len(o.Operation.Tags) > 0 { + return expr.StringVal(o.Operation.Tags[0]) + } + return expr.StringVal("") + case "parameter_count": + if o.Operation != nil { + return expr.IntVal(len(o.Operation.Parameters)) + } + return expr.IntVal(0) + case "deprecated": + if o.Operation != nil { + return expr.BoolVal(o.Operation.Deprecated != nil && *o.Operation.Deprecated) + } + return expr.BoolVal(false) + case "description": + if o.Operation != nil { + return expr.StringVal(o.Operation.GetDescription()) + } + return expr.StringVal("") + case "summary": + if o.Operation != nil { + return expr.StringVal(o.Operation.GetSummary()) + } + return expr.StringVal("") + case "edge_kind": + return expr.StringVal(row.EdgeKind) + case "edge_label": + return expr.StringVal(row.EdgeLabel) + case "edge_from": + return expr.StringVal(row.EdgeFrom) + } + } + return expr.NullVal() +} + +func compareValues(a, b expr.Value) int { + if a.Kind == expr.KindInt && b.Kind == expr.KindInt { + if a.Int < b.Int { + return -1 + } + if a.Int > b.Int { + return 1 + } + return 0 + } + sa := valueToString(a) + sb := valueToString(b) + if sa < sb { + return -1 + } + if sa > sb { + return 1 + } + return 0 +} + +func valueToString(v expr.Value) string { + switch v.Kind { + case expr.KindString: + return v.Str + case expr.KindInt: + return strconv.Itoa(v.Int) + case expr.KindBool: + return strconv.FormatBool(v.Bool) + default: + return "" + } +} + +func rowKey(row Row) string { + if row.Kind == SchemaResult { + return "s:" + strconv.Itoa(row.SchemaIdx) + } + return "o:" + strconv.Itoa(row.OpIdx) +} diff --git a/oq/format.go b/oq/format.go new file mode 100644 index 0000000..aa689b3 --- /dev/null +++ b/oq/format.go @@ -0,0 +1,384 @@ +package oq + +import ( + "fmt" + "slices" + "strconv" + "strings" + + "github.com/speakeasy-api/openapi/graph" + "github.com/speakeasy-api/openapi/oq/expr" +) + +// FormatTable formats a result as a simple table string. +func FormatTable(result *Result, g *graph.SchemaGraph) string { + if result.Explain != "" { + return result.Explain + } + + if result.IsCount { + return strconv.Itoa(result.Count) + } + + if len(result.Groups) > 0 { + return formatGroups(result) + } + + if len(result.Rows) == 0 { + return "(empty)" + } + + fields := result.Fields + if len(fields) == 0 { + if result.Rows[0].Kind == SchemaResult { + fields = []string{"name", "type", "depth", "in_degree", "out_degree"} + } else { + fields = []string{"name", "method", "path", "schema_count"} + } + } + + // Build header + widths := make([]int, len(fields)) + for i, f := range fields { + widths[i] = len(f) + } + + // Collect rows + var tableRows [][]string + for _, row := range result.Rows { + var cols []string + for i, f := range fields { + v := valueToString(fieldValue(row, f, g)) + cols = append(cols, v) + if len(v) > widths[i] { + widths[i] = len(v) + } + } + tableRows = append(tableRows, cols) + } + + // Format + var sb strings.Builder + // Header + for i, f := range fields { + if i > 0 { + sb.WriteString(" ") + } + sb.WriteString(padRight(f, widths[i])) + } + sb.WriteString("\n") + // Separator + for i, w := range widths { + if i > 0 { + sb.WriteString(" ") + } + sb.WriteString(strings.Repeat("-", w)) + } + sb.WriteString("\n") + // Data + for _, row := range tableRows { + for i, col := range row { + if i > 0 { + sb.WriteString(" ") + } + sb.WriteString(padRight(col, widths[i])) + } + sb.WriteString("\n") + } + + return sb.String() +} + +// FormatJSON formats a result as JSON. +func FormatJSON(result *Result, g *graph.SchemaGraph) string { + if result.Explain != "" { + return result.Explain + } + + if result.IsCount { + return strconv.Itoa(result.Count) + } + + if len(result.Groups) > 0 { + return formatGroupsJSON(result) + } + + if len(result.Rows) == 0 { + return "[]" + } + + fields := result.Fields + if len(fields) == 0 { + if result.Rows[0].Kind == SchemaResult { + fields = []string{"name", "type", "depth", "in_degree", "out_degree"} + } else { + fields = []string{"name", "method", "path", "schema_count"} + } + } + + var sb strings.Builder + sb.WriteString("[\n") + for i, row := range result.Rows { + if i > 0 { + sb.WriteString(",\n") + } + sb.WriteString(" {") + for j, f := range fields { + if j > 0 { + sb.WriteString(", ") + } + v := fieldValue(row, f, g) + fmt.Fprintf(&sb, "%q: %s", f, jsonValue(v)) + } + sb.WriteString("}") + } + sb.WriteString("\n]") + return sb.String() +} + +// FormatMarkdown formats a result as a markdown table. +func FormatMarkdown(result *Result, g *graph.SchemaGraph) string { + if result.Explain != "" { + return result.Explain + } + + if result.IsCount { + return strconv.Itoa(result.Count) + } + + if len(result.Groups) > 0 { + var sb strings.Builder + sb.WriteString("| Key | Count |\n") + sb.WriteString("| --- | --- |\n") + for _, grp := range result.Groups { + fmt.Fprintf(&sb, "| %s | %d |\n", grp.Key, grp.Count) + } + return sb.String() + } + + if len(result.Rows) == 0 { + return "(empty)" + } + + fields := result.Fields + if len(fields) == 0 { + if result.Rows[0].Kind == SchemaResult { + fields = []string{"name", "type", "depth", "in_degree", "out_degree"} + } else { + fields = []string{"name", "method", "path", "schema_count"} + } + } + + var sb strings.Builder + // Header + sb.WriteString("| ") + sb.WriteString(strings.Join(fields, " | ")) + sb.WriteString(" |\n") + // Separator + sb.WriteString("|") + for range fields { + sb.WriteString(" --- |") + } + sb.WriteString("\n") + // Rows + for _, row := range result.Rows { + sb.WriteString("| ") + for i, f := range fields { + if i > 0 { + sb.WriteString(" | ") + } + v := valueToString(fieldValue(row, f, g)) + sb.WriteString(v) + } + sb.WriteString(" |\n") + } + + return sb.String() +} + +// FormatToon formats a result in the TOON (Token-Oriented Object Notation) format. +// TOON uses tabular array syntax for uniform rows: header[N]{field1,field2,...}: +// followed by comma-delimited data rows. See https://github.com/toon-format/toon +func FormatToon(result *Result, g *graph.SchemaGraph) string { + if result.Explain != "" { + return result.Explain + } + + if result.IsCount { + return "count: " + strconv.Itoa(result.Count) + } + + if len(result.Groups) > 0 { + return formatGroupsToon(result) + } + + if len(result.Rows) == 0 { + return "results[0]:\n" + } + + fields := result.Fields + if len(fields) == 0 { + if result.Rows[0].Kind == SchemaResult { + fields = []string{"name", "type", "depth", "in_degree", "out_degree"} + } else { + fields = []string{"name", "method", "path", "schema_count"} + } + } + + var sb strings.Builder + + // Header: results[N]{field1,field2,...}: + fmt.Fprintf(&sb, "results[%d]{%s}:\n", len(result.Rows), strings.Join(fields, ",")) + + // Data rows: comma-separated values, indented by one space + for _, row := range result.Rows { + sb.WriteByte(' ') + for i, f := range fields { + if i > 0 { + sb.WriteByte(',') + } + v := fieldValue(row, f, g) + sb.WriteString(toonValue(v)) + } + sb.WriteByte('\n') + } + + return sb.String() +} + +func formatGroupsToon(result *Result) string { + var sb strings.Builder + + // Groups as tabular array + fmt.Fprintf(&sb, "groups[%d]{key,count,names}:\n", len(result.Groups)) + for _, grp := range result.Groups { + names := strings.Join(grp.Names, ";") + fmt.Fprintf(&sb, " %s,%d,%s\n", toonEscape(grp.Key), grp.Count, toonEscape(names)) + } + return sb.String() +} + +// toonValue encodes an expr.Value for TOON format. +func toonValue(v expr.Value) string { + switch v.Kind { + case expr.KindString: + return toonEscape(v.Str) + case expr.KindInt: + return strconv.Itoa(v.Int) + case expr.KindBool: + return strconv.FormatBool(v.Bool) + default: + return "null" + } +} + +// toonEscape quotes a string if it needs escaping for TOON format. +// A string must be quoted if it: is empty, contains comma/colon/quote/backslash/ +// brackets/braces/control chars, has leading/trailing whitespace, or matches +// true/false/null or a numeric pattern. +func toonEscape(s string) string { + if s == "" { + return `""` + } + if s == "true" || s == "false" || s == "null" { + return `"` + s + `"` + } + // Check if it looks numeric + if _, err := strconv.ParseFloat(s, 64); err == nil { + return `"` + s + `"` + } + needsQuote := false + for _, ch := range s { + if ch == ',' || ch == ':' || ch == '"' || ch == '\\' || + ch == '[' || ch == ']' || ch == '{' || ch == '}' || + ch == '\n' || ch == '\r' || ch == '\t' || + ch < 0x20 { + needsQuote = true + break + } + } + if s[0] == ' ' || s[len(s)-1] == ' ' { + needsQuote = true + } + if !needsQuote { + return s + } + // Quote with escaping + var sb strings.Builder + sb.WriteByte('"') + for _, ch := range s { + switch ch { + case '\\': + sb.WriteString(`\\`) + case '"': + sb.WriteString(`\"`) + case '\n': + sb.WriteString(`\n`) + case '\r': + sb.WriteString(`\r`) + case '\t': + sb.WriteString(`\t`) + default: + sb.WriteRune(ch) + } + } + sb.WriteByte('"') + return sb.String() +} + +func jsonValue(v expr.Value) string { + switch v.Kind { + case expr.KindString: + return fmt.Sprintf("%q", v.Str) + case expr.KindInt: + return strconv.Itoa(v.Int) + case expr.KindBool: + return strconv.FormatBool(v.Bool) + default: + return "null" + } +} + +func formatGroups(result *Result) string { + var sb strings.Builder + for _, g := range result.Groups { + fmt.Fprintf(&sb, "%s: count=%d", g.Key, g.Count) + if len(g.Names) > 0 { + names := slices.Clone(g.Names) + if len(names) > 5 { + names = names[:5] + names = append(names, "...") + } + fmt.Fprintf(&sb, " names=[%s]", strings.Join(names, ", ")) + } + sb.WriteString("\n") + } + return sb.String() +} + +func formatGroupsJSON(result *Result) string { + var sb strings.Builder + sb.WriteString("[\n") + for i, g := range result.Groups { + if i > 0 { + sb.WriteString(",\n") + } + fmt.Fprintf(&sb, ` {"key": %q, "count": %d, "names": [`, g.Key, g.Count) + for j, n := range g.Names { + if j > 0 { + sb.WriteString(", ") + } + fmt.Fprintf(&sb, "%q", n) + } + sb.WriteString("]}") + } + sb.WriteString("\n]") + return sb.String() +} + +func padRight(s string, width int) string { + if len(s) >= width { + return s + } + return s + strings.Repeat(" ", width-len(s)) +} diff --git a/oq/module.go b/oq/module.go new file mode 100644 index 0000000..a691e56 --- /dev/null +++ b/oq/module.go @@ -0,0 +1,113 @@ +package oq + +import ( + "fmt" + "os" + "path/filepath" + "strings" +) + +// LoadModule loads function definitions from a .oq module file. +func LoadModule(path string, searchPaths []string) ([]FuncDef, error) { + resolved, err := resolveModulePath(path, searchPaths) + if err != nil { + return nil, err + } + + data, err := os.ReadFile(resolved) //nolint:gosec // module paths are user-provided query inputs, not untrusted + if err != nil { + return nil, fmt.Errorf("reading module %q: %w", resolved, err) + } + + q, err := parseDeclarations(string(data)) + if err != nil { + return nil, fmt.Errorf("parsing module %q: %w", resolved, err) + } + + return q.Defs, nil +} + +func resolveModulePath(path string, searchPaths []string) (string, error) { + if !strings.HasSuffix(path, ".oq") { + path += ".oq" + } + + if filepath.IsAbs(path) { + if _, err := os.Stat(path); err == nil { + return path, nil + } + } + + allPaths := make([]string, 0, len(searchPaths)+2) + allPaths = append(allPaths, ".") + allPaths = append(allPaths, searchPaths...) + if home, err := os.UserHomeDir(); err == nil { + allPaths = append(allPaths, filepath.Join(home, ".config", "oq")) + } + + for _, dir := range allPaths { + full := filepath.Join(dir, path) + if _, err := os.Stat(full); err == nil { + return full, nil + } + } + + return "", fmt.Errorf("module %q not found in search paths", path) +} + +// ExpandDefs performs text-level macro expansion on pipeline segments. +// Each segment that matches a def name gets replaced with the def's body +// (with params substituted). +func ExpandDefs(pipelineText string, defs []FuncDef) (string, error) { + if len(defs) == 0 { + return pipelineText, nil + } + + defMap := make(map[string]FuncDef, len(defs)) + for _, d := range defs { + defMap[d.Name] = d + } + + parts := splitPipeline(pipelineText) + var expanded []string + + for i, part := range parts { + part = strings.TrimSpace(part) + if part == "" { + continue + } + if i == 0 { + // Source — don't expand + expanded = append(expanded, part) + continue + } + + keyword, args, isCall := splitKeywordCall(part) + if !isCall { + keyword, _ = splitFirst(part) + } + + def, ok := defMap[strings.ToLower(keyword)] + if !ok { + expanded = append(expanded, part) + continue + } + + body := def.Body + if isCall && len(def.Params) > 0 { + callArgs := splitSemicolonArgs(args) + if len(callArgs) != len(def.Params) { + return "", fmt.Errorf("def %q expects %d params, got %d", def.Name, len(def.Params), len(callArgs)) + } + for j, param := range def.Params { + body = strings.ReplaceAll(body, param, strings.TrimSpace(callArgs[j])) + } + } else if !isCall && len(def.Params) > 0 { + return "", fmt.Errorf("def %q requires %d params", def.Name, len(def.Params)) + } + + expanded = append(expanded, body) + } + + return strings.Join(expanded, " | "), nil +} diff --git a/oq/oq.go b/oq/oq.go new file mode 100644 index 0000000..1973c38 --- /dev/null +++ b/oq/oq.go @@ -0,0 +1,163 @@ +// Package oq implements a pipeline query language for OpenAPI schema graphs. +// +// Queries are written as pipeline expressions with jq-inspired syntax: +// +// schemas.components | select(depth > 5) | sort_by(depth; desc) | first(10) | pick name, depth +// +// Legacy syntax (where, sort, take, select fields) is also supported. +package oq + +import ( + "fmt" + + "github.com/speakeasy-api/openapi/graph" +) + +// ResultKind distinguishes between schema and operation result rows. +type ResultKind int + +const ( + SchemaResult ResultKind = iota + OperationResult +) + +// Row represents a single result in the pipeline. +type Row struct { + Kind ResultKind + SchemaIdx int // index into SchemaGraph.Schemas + OpIdx int // index into SchemaGraph.Operations + + // Edge annotations (populated by 1-hop traversal stages) + EdgeKind string // edge type: "property", "items", "allOf", "oneOf", "ref", etc. + EdgeLabel string // edge label: property name, array index, etc. + EdgeFrom string // source node name +} + +// Result is the output of a query execution. +type Result struct { + Rows []Row + Fields []string // projected fields (empty = all) + IsCount bool + Count int + Groups []GroupResult + Explain string // human-readable pipeline explanation + FormatHint string // format preference from format stage (table, json, markdown) +} + +// GroupResult represents a group-by aggregation result. +type GroupResult struct { + Key string + Count int + Names []string +} + +// Execute parses and executes a query against the given graph. +func Execute(query string, g *graph.SchemaGraph) (*Result, error) { + return ExecuteWithSearchPaths(query, g, nil) +} + +// ExecuteWithSearchPaths parses and executes a query, searching for modules in the given paths. +func ExecuteWithSearchPaths(query string, g *graph.SchemaGraph, searchPaths []string) (*Result, error) { + decls, err := parseDeclarations(query) + if err != nil { + return nil, fmt.Errorf("parse error: %w", err) + } + + // Resolve includes + for _, inc := range decls.Includes { + defs, loadErr := LoadModule(inc, searchPaths) + if loadErr != nil { + return nil, fmt.Errorf("include %q: %w", inc, loadErr) + } + decls.Defs = append(decls.Defs, defs...) + } + + // Text-level def expansion before parsing pipeline + pipelineText, err := ExpandDefs(decls.PipelineText, decls.Defs) + if err != nil { + return nil, fmt.Errorf("def expansion: %w", err) + } + + if pipelineText == "" { + return &Result{}, nil + } + + stages, err := parsePipeline(pipelineText) + if err != nil { + return nil, fmt.Errorf("parse error: %w", err) + } + + return run(stages, g) +} + +// --- AST --- + +// StageKind represents the type of pipeline stage. +type StageKind int + +const ( + StageSource StageKind = iota + StageWhere + StageSelect + StageSort + StageTake + StageUnique + StageGroupBy + StageCount + StageRefsOut + StageRefsIn + StageReachable + StageAncestors + StageProperties + StageUnionMembers + StageItems + StageOps + StageSchemas + StageExplain + StageFields + StageSample + StagePath + StageTop + StageBottom + StageFormat + StageConnected + StageBlastRadius + StageNeighbors + StageOrphans + StageLeaves + StageCycles + StageClusters + StageTagBoundary + StageSharedRefs + StageLast + StageLet +) + +// Stage represents a single stage in the query pipeline. +type Stage struct { + Kind StageKind + Source string // for StageSource + Expr string // for StageWhere, StageLet + Fields []string // for StageSelect, StageGroupBy + SortField string // for StageSort + SortDesc bool // for StageSort + Limit int // for StageTake, StageLast, StageSample, StageTop, StageBottom + PathFrom string // for StagePath + PathTo string // for StagePath + Format string // for StageFormat + VarName string // for StageLet +} + +// Query represents a parsed query with optional includes, defs, and pipeline stages. +type Query struct { + Includes []string + Defs []FuncDef + Stages []Stage +} + +// FuncDef represents a user-defined function. +type FuncDef struct { + Name string + Params []string // with $ prefix + Body string // raw pipeline text +} diff --git a/oq/oq_test.go b/oq/oq_test.go new file mode 100644 index 0000000..7e50d04 --- /dev/null +++ b/oq/oq_test.go @@ -0,0 +1,1650 @@ +package oq_test + +import ( + "os" + "strings" + "testing" + + "github.com/speakeasy-api/openapi/graph" + "github.com/speakeasy-api/openapi/openapi" + "github.com/speakeasy-api/openapi/oq" + "github.com/speakeasy-api/openapi/oq/expr" + "github.com/speakeasy-api/openapi/references" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func loadTestGraph(t *testing.T) *graph.SchemaGraph { + t.Helper() + + f, err := os.Open("testdata/petstore.yaml") + require.NoError(t, err) + defer f.Close() + + ctx := t.Context() + doc, _, err := openapi.Unmarshal(ctx, f, openapi.WithSkipValidation()) + require.NoError(t, err) + require.NotNil(t, doc) + + idx := openapi.BuildIndex(ctx, doc, references.ResolveOptions{ + RootDocument: doc, + TargetDocument: doc, + TargetLocation: "testdata/petstore.yaml", + }) + + return graph.Build(ctx, idx) +} + +func TestParse_Success(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + query string + }{ + {"simple source", "schemas"}, + {"components source", "schemas.components"}, + {"inline source", "schemas.inline"}, + {"operations source", "operations"}, + {"sort", "schemas | sort depth desc"}, + {"take", "schemas | take 5"}, + {"where", "schemas | where depth > 3"}, + {"select", "schemas | select name, depth"}, + {"count", "schemas | count"}, + {"unique", "schemas | unique"}, + {"group-by", "schemas | group-by hash"}, + {"refs-out", "schemas | refs-out"}, + {"refs-in", "schemas | refs-in"}, + {"reachable", "schemas | reachable"}, + {"ancestors", "schemas | ancestors"}, + {"properties", "schemas | properties"}, + {"union-members", "schemas | union-members"}, + {"items", "schemas | items"}, + {"ops", "schemas | ops"}, + {"schemas from ops", "operations | schemas"}, + {"connected", "schemas.components | where name == \"Pet\" | connected"}, + {"blast-radius", "schemas.components | where name == \"Pet\" | blast-radius"}, + {"neighbors", "schemas.components | where name == \"Pet\" | neighbors 2"}, + {"orphans", "schemas.components | orphans"}, + {"leaves", "schemas.components | leaves"}, + {"cycles", "schemas | cycles"}, + {"clusters", "schemas.components | clusters"}, + {"tag-boundary", "schemas | tag-boundary"}, + {"shared-refs", "operations | take 2 | shared-refs"}, + {"full pipeline", "schemas.components | where depth > 0 | sort depth desc | take 5 | select name, depth"}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + stages, err := oq.Parse(tt.query) + require.NoError(t, err) + assert.NotEmpty(t, stages) + }) + } +} + +func TestParse_Error(t *testing.T) { + t.Parallel() + + _, err := oq.Parse("") + require.Error(t, err) + + _, err = oq.Parse("schemas | unknown_stage") + require.Error(t, err) + + _, err = oq.Parse("schemas | take abc") + require.Error(t, err) +} + +func TestExecute_SchemasCount_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute("schemas | count", g) + require.NoError(t, err) + assert.True(t, result.IsCount, "should be a count result") + assert.Positive(t, result.Count, "count should be positive") +} + +func TestExecute_ComponentSchemas_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute("schemas.components | select name", g) + require.NoError(t, err) + assert.NotEmpty(t, result.Rows, "should have component schema rows") + + // Check that we have the expected component schemas + names := collectNames(result, g) + assert.Contains(t, names, "Pet", "should include Pet schema") + assert.Contains(t, names, "Owner", "should include Owner schema") + assert.Contains(t, names, "Address", "should include Address schema") + assert.Contains(t, names, "Error", "should include Error schema") + assert.Contains(t, names, "Shape", "should include Shape schema") + assert.Contains(t, names, "Unused", "should include Unused schema") +} + +func TestExecute_Where_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute(`schemas.components | where type == "object" | select name`, g) + require.NoError(t, err) + + names := collectNames(result, g) + assert.Contains(t, names, "Pet", "should include Pet schema") + assert.Contains(t, names, "Owner", "should include Owner schema") +} + +func TestExecute_WhereInDegree_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + // Unused schema has no incoming references (from other schemas in components) + result, err := oq.Execute(`schemas.components | where in_degree == 0 | select name`, g) + require.NoError(t, err) + + names := collectNames(result, g) + // Unused should have no references from other schemas + assert.Contains(t, names, "Unused", "should include Unused schema with in_degree 0") +} + +func TestExecute_Sort_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute("schemas.components | sort property_count desc | take 3 | select name, property_count", g) + require.NoError(t, err) + assert.LessOrEqual(t, len(result.Rows), 3, "should return at most 3 rows") +} + +func TestExecute_Reachable_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute(`schemas.components | where name == "Pet" | reachable | select name`, g) + require.NoError(t, err) + + names := collectNames(result, g) + // Pet references Owner, Owner references Address + assert.Contains(t, names, "Owner", "Pet should reach Owner") + assert.Contains(t, names, "Address", "Pet should reach Address") +} + +func TestExecute_Ancestors_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute(`schemas.components | where name == "Address" | ancestors | select name`, g) + require.NoError(t, err) + + names := collectNames(result, g) + // Address is referenced by Owner, which is referenced by Pet + assert.Contains(t, names, "Owner", "Address ancestors should include Owner") +} + +func TestExecute_Properties_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute(`schemas.components | where name == "Pet" | properties | select name`, g) + require.NoError(t, err) + // Pet has 4 properties: id, name, tag, owner + assert.NotEmpty(t, result.Rows, "Pet should have properties") +} + +func TestExecute_UnionMembers_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute(`schemas.components | where name == "Shape" | union-members | select name`, g) + require.NoError(t, err) + // Shape has oneOf with Circle and Square + names := collectNames(result, g) + assert.Contains(t, names, "Circle", "Shape union members should include Circle") + assert.Contains(t, names, "Square", "Shape union members should include Square") +} + +func TestExecute_Operations_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute("operations | select name, method, path", g) + require.NoError(t, err) + assert.NotEmpty(t, result.Rows, "should have operations") +} + +func TestExecute_OperationSchemas_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute(`operations | where operation_id == "listPets" | schemas | select name`, g) + require.NoError(t, err) + + names := collectNames(result, g) + assert.Contains(t, names, "Pet", "listPets operation should reference Pet schema") +} + +func TestExecute_GroupBy_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute(`schemas.components | group-by type`, g) + require.NoError(t, err) + assert.NotEmpty(t, result.Groups, "should have groups") +} + +func TestExecute_Unique_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute("schemas.components | unique", g) + require.NoError(t, err) + + names := collectNames(result, g) + // Check no duplicates + seen := make(map[string]bool) + for _, n := range names { + assert.False(t, seen[n], "duplicate: %s", n) + seen[n] = true + } +} + +func TestExecute_SchemasToOps_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute(`schemas.components | where name == "Pet" | ops | select name`, g) + require.NoError(t, err) + assert.NotEmpty(t, result.Rows, "should have operations using Pet schema") +} + +func TestFormatTable_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute("schemas.components | take 3 | select name, type", g) + require.NoError(t, err) + + table := oq.FormatTable(result, g) + assert.Contains(t, table, "name", "table should include name column") + assert.Contains(t, table, "type", "table should include type column") + assert.NotEmpty(t, table, "table should not be empty") +} + +func TestFormatJSON_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute("schemas.components | take 3 | select name, type", g) + require.NoError(t, err) + + json := oq.FormatJSON(result, g) + assert.True(t, strings.HasPrefix(json, "["), "JSON output should start with [") + assert.True(t, strings.HasSuffix(json, "]"), "JSON output should end with ]") +} + +func TestFormatTable_Count_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute("schemas | count", g) + require.NoError(t, err) + + table := oq.FormatTable(result, g) + assert.NotEmpty(t, table, "count table should not be empty") +} + +func TestFormatTable_Empty_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute(`schemas.components | where name == "NonExistent"`, g) + require.NoError(t, err) + + table := oq.FormatTable(result, g) + assert.Equal(t, "(empty)", table, "empty result should format as (empty)") +} + +func TestExecute_MatchesExpression_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute(`schemas.components | where name matches ".*Error.*" | select name`, g) + require.NoError(t, err) + + names := collectNames(result, g) + assert.Contains(t, names, "Error", "regex match should return Error schema") +} + +func TestExecute_SortAsc_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute("schemas.components | sort name asc | select name", g) + require.NoError(t, err) + + names := collectNames(result, g) + for i := 1; i < len(names); i++ { + assert.LessOrEqual(t, names[i-1], names[i], "names should be in ascending order") + } +} + +func TestExecute_Explain_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute("schemas.components | where depth > 5 | sort depth desc | take 10 | explain", g) + require.NoError(t, err) + assert.Contains(t, result.Explain, "Source: schemas.components", "explain should show source") + assert.Contains(t, result.Explain, "Filter: select(depth > 5)", "explain should show filter stage") + assert.Contains(t, result.Explain, "Sort: sort_by(depth; desc)", "explain should show sort stage") + assert.Contains(t, result.Explain, "Limit: first(10)", "explain should show limit stage") +} + +func TestExecute_Fields_Schemas_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute("schemas | fields", g) + require.NoError(t, err) + assert.Contains(t, result.Explain, "name", "fields output should list name") + assert.Contains(t, result.Explain, "depth", "fields output should list depth") + assert.Contains(t, result.Explain, "property_count", "fields output should list property_count") + assert.Contains(t, result.Explain, "is_component", "fields output should list is_component") +} + +func TestExecute_Fields_Operations_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute("operations | fields", g) + require.NoError(t, err) + assert.Contains(t, result.Explain, "method", "fields output should list method") + assert.Contains(t, result.Explain, "operation_id", "fields output should list operation_id") + assert.Contains(t, result.Explain, "schema_count", "fields output should list schema_count") + assert.Contains(t, result.Explain, "tag", "fields output should list tag") + assert.Contains(t, result.Explain, "deprecated", "fields output should list deprecated") +} + +func TestExecute_Head_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute("schemas.components | head 3", g) + require.NoError(t, err) + assert.Len(t, result.Rows, 3, "head should return exactly 3 rows") +} + +func TestExecute_Sample_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute("schemas.components | sample 3", g) + require.NoError(t, err) + assert.Len(t, result.Rows, 3, "sample should return exactly 3 rows") + + // Running sample again should produce the same result (deterministic) + result2, err := oq.Execute("schemas.components | sample 3", g) + require.NoError(t, err) + assert.Len(t, result2.Rows, len(result.Rows), "sample should be deterministic") +} + +func TestExecute_Path_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute(`schemas | path Pet Address | select name`, g) + require.NoError(t, err) + assert.NotEmpty(t, result.Rows, "path from Pet to Address should have results") + + names := collectNames(result, g) + // Path should include Pet, something in between, and Address + assert.Equal(t, "Pet", names[0], "path should start at Pet") + assert.Equal(t, "Address", names[len(names)-1], "path should end at Address") +} + +func TestExecute_Path_NotFound_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + // Unused has no outgoing edges to reach Pet + result, err := oq.Execute(`schemas | path Unused Pet | select name`, g) + require.NoError(t, err) + assert.Empty(t, result.Rows, "no path should exist from Unused to Pet") +} + +func TestExecute_Top_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute("schemas.components | top 3 property_count | select name, property_count", g) + require.NoError(t, err) + assert.Len(t, result.Rows, 3, "top should return exactly 3 rows") + + // Verify descending order + for i := 1; i < len(result.Rows); i++ { + prev := oq.FieldValuePublic(result.Rows[i-1], "property_count", g) + curr := oq.FieldValuePublic(result.Rows[i], "property_count", g) + assert.GreaterOrEqual(t, prev.Int, curr.Int, "top should be in descending order") + } +} + +func TestExecute_Bottom_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute("schemas.components | bottom 3 property_count | select name, property_count", g) + require.NoError(t, err) + assert.Len(t, result.Rows, 3, "bottom should return exactly 3 rows") + + // Verify ascending order + for i := 1; i < len(result.Rows); i++ { + prev := oq.FieldValuePublic(result.Rows[i-1], "property_count", g) + curr := oq.FieldValuePublic(result.Rows[i], "property_count", g) + assert.LessOrEqual(t, prev.Int, curr.Int, "bottom should be in ascending order") + } +} + +func TestExecute_Format_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute("schemas.components | take 3 | format json", g) + require.NoError(t, err) + assert.Equal(t, "json", result.FormatHint, "format hint should be json") +} + +func TestFormatMarkdown_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute("schemas.components | take 3 | select name, type", g) + require.NoError(t, err) + + md := oq.FormatMarkdown(result, g) + assert.Contains(t, md, "| name", "markdown should include name column header") + assert.Contains(t, md, "| --- |", "markdown should include separator row") +} + +func TestExecute_OperationTag_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute("operations | select name, tag, parameter_count", g) + require.NoError(t, err) + assert.NotEmpty(t, result.Rows, "should have operation rows") +} + +func TestParse_NewStages_Success(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + query string + }{ + {"explain", "schemas | explain"}, + {"fields", "schemas | fields"}, + {"head", "schemas | head 5"}, + {"sample", "schemas | sample 10"}, + {"path", `schemas | path "User" "Order"`}, + {"path unquoted", "schemas | path User Order"}, + {"top", "schemas | top 5 depth"}, + {"bottom", "schemas | bottom 5 depth"}, + {"format", "schemas | format json"}, + {"format markdown", "schemas | format markdown"}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + stages, err := oq.Parse(tt.query) + require.NoError(t, err) + assert.NotEmpty(t, stages, "should parse into non-empty stages") + }) + } +} + +func TestExecute_RefsOut_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute(`schemas.components | where name == "Pet" | refs-out | select name`, g) + require.NoError(t, err) + assert.NotEmpty(t, result.Rows, "Pet should have outgoing refs") +} + +func TestExecute_RefsIn_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute(`schemas.components | where name == "Owner" | refs-in | select name`, g) + require.NoError(t, err) + assert.NotEmpty(t, result.Rows, "Owner should have incoming refs") +} + +func TestExecute_Items_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + // listPets response includes an array with items + result, err := oq.Execute(`schemas | where type == "array" | items | select name`, g) + require.NoError(t, err) + // May or may not have results depending on spec, but should not error + assert.NotNil(t, result, "result should not be nil") +} + +func TestExecute_Connected_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + // Start from Pet, connected should return schemas and operations in the same component + result, err := oq.Execute(`schemas.components | where name == "Pet" | connected`, g) + require.NoError(t, err) + assert.NotEmpty(t, result.Rows, "connected should return rows") + + // Should have both schema and operation rows + hasSchema := false + hasOp := false + for _, row := range result.Rows { + if row.Kind == oq.SchemaResult { + hasSchema = true + } + if row.Kind == oq.OperationResult { + hasOp = true + } + } + assert.True(t, hasSchema, "connected should include schema nodes") + assert.True(t, hasOp, "connected should include operation nodes") +} + +func TestExecute_Connected_FromOps_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + // Start from an operation, connected should also find schemas + result, err := oq.Execute(`operations | take 1 | connected`, g) + require.NoError(t, err) + assert.NotEmpty(t, result.Rows, "connected from operation should return rows") + + hasSchema := false + for _, row := range result.Rows { + if row.Kind == oq.SchemaResult { + hasSchema = true + } + } + assert.True(t, hasSchema, "connected from operation should include schema nodes") +} + +func TestExecute_EdgeAnnotations_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute(`schemas.components | where name == "Pet" | refs-out | select name, edge_kind, edge_label, edge_from`, g) + require.NoError(t, err) + assert.NotEmpty(t, result.Rows, "refs-out from Pet should have results") + + // Every row should have edge annotations + for _, row := range result.Rows { + kind := oq.FieldValuePublic(row, "edge_kind", g) + assert.NotEmpty(t, kind.Str, "edge_kind should be set") + from := oq.FieldValuePublic(row, "edge_from", g) + assert.Equal(t, "Pet", from.Str, "edge_from should be Pet") + } +} + +func TestExecute_BlastRadius_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute(`schemas.components | where name == "Pet" | blast-radius`, g) + require.NoError(t, err) + assert.NotEmpty(t, result.Rows, "blast-radius should return rows") + + // Should include both schemas and operations + hasSchema := false + hasOp := false + for _, row := range result.Rows { + if row.Kind == oq.SchemaResult { + hasSchema = true + } + if row.Kind == oq.OperationResult { + hasOp = true + } + } + assert.True(t, hasSchema, "blast-radius should include schemas") + assert.True(t, hasOp, "blast-radius should include operations") +} + +func TestExecute_Neighbors_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute(`schemas.components | where name == "Pet" | neighbors 1`, g) + require.NoError(t, err) + assert.NotEmpty(t, result.Rows, "neighbors should return rows") + + // Depth-1 neighbors should include seed + direct refs in both directions + names := make(map[string]bool) + for _, row := range result.Rows { + n := oq.FieldValuePublic(row, "name", g) + names[n.Str] = true + } + assert.True(t, names["Pet"], "neighbors should include the seed node") +} + +func TestExecute_Orphans_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute(`schemas.components | orphans | select name`, g) + require.NoError(t, err) + // Result may be empty if all schemas are referenced, that's fine + assert.NotNil(t, result, "result should not be nil") +} + +func TestExecute_Leaves_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute(`schemas.components | leaves | select name, out_degree`, g) + require.NoError(t, err) + // All returned rows should have out_degree == 0 + for _, row := range result.Rows { + od := oq.FieldValuePublic(row, "out_degree", g) + assert.Equal(t, 0, od.Int, "leaf nodes should have out_degree 0") + } +} + +func TestExecute_Cycles_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute(`schemas | cycles`, g) + require.NoError(t, err) + // Returns groups — may be empty if no cycles in petstore + assert.NotNil(t, result, "result should not be nil") +} + +func TestExecute_Clusters_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute(`schemas.components | clusters`, g) + require.NoError(t, err) + assert.NotEmpty(t, result.Groups, "should have clusters") + + // Total names across all clusters should equal component count + total := 0 + for _, grp := range result.Groups { + total += grp.Count + } + // Count component schemas + compCount, err := oq.Execute(`schemas.components | count`, g) + require.NoError(t, err) + assert.Equal(t, compCount.Count, total, "cluster totals should equal component count") +} + +func TestExecute_TagBoundary_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute(`schemas | tag-boundary | select name, tag_count`, g) + require.NoError(t, err) + // All returned rows should have tag_count > 1 + for _, row := range result.Rows { + tc := oq.FieldValuePublic(row, "tag_count", g) + assert.Greater(t, tc.Int, 1, "tag-boundary schemas should have tag_count > 1") + } +} + +func TestExecute_SharedRefs_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute(`operations | shared-refs | select name`, g) + require.NoError(t, err) + // Schemas shared by ALL operations + assert.NotNil(t, result, "result should not be nil") +} + +func TestExecute_OpCount_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute(`schemas.components | sort op_count desc | take 3 | select name, op_count`, g) + require.NoError(t, err) + assert.NotEmpty(t, result.Rows, "should have schemas sorted by op_count") +} + +func TestFormatTable_Groups_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute("schemas.components | group-by type", g) + require.NoError(t, err) + assert.NotEmpty(t, result.Groups, "should have groups") + + table := oq.FormatTable(result, g) + assert.Contains(t, table, "count=", "group table should show count") +} + +func TestFormatJSON_Groups_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute("schemas.components | group-by type", g) + require.NoError(t, err) + + json := oq.FormatJSON(result, g) + assert.Contains(t, json, "\"key\"", "group JSON should include key field") + assert.Contains(t, json, "\"count\"", "group JSON should include count field") +} + +func TestFormatMarkdown_Groups_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute("schemas.components | group-by type", g) + require.NoError(t, err) + + md := oq.FormatMarkdown(result, g) + assert.Contains(t, md, "| Key |", "group markdown should include Key column") +} + +func TestExecute_InlineSource_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute("schemas.inline | count", g) + require.NoError(t, err) + assert.True(t, result.IsCount, "should be a count result") +} + +func TestExecute_SchemaFields_Coverage(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + // Select all schema fields to cover fieldValue branches + result, err := oq.Execute("schemas.components | take 1 | select name, type, depth, in_degree, out_degree, union_width, property_count, is_component, is_inline, is_circular, has_ref, hash, path", g) + require.NoError(t, err) + assert.NotEmpty(t, result.Rows, "should have schema rows") + + table := oq.FormatTable(result, g) + assert.NotEmpty(t, table, "table output should not be empty") + + json := oq.FormatJSON(result, g) + assert.Contains(t, json, "\"name\"", "JSON should include name field") +} + +func TestExecute_OperationFields_Coverage(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + // Select all operation fields to cover fieldValue branches + result, err := oq.Execute("operations | take 1 | select name, method, path, operation_id, schema_count, component_count, tag, parameter_count, deprecated, description, summary", g) + require.NoError(t, err) + assert.NotEmpty(t, result.Rows, "should have operation rows") +} + +func TestFormatJSON_Empty_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute(`schemas.components | where name == "NonExistent"`, g) + require.NoError(t, err) + + json := oq.FormatJSON(result, g) + assert.Equal(t, "[]", json, "empty result JSON should be []") +} + +func TestFormatMarkdown_Empty_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute(`schemas.components | where name == "NonExistent"`, g) + require.NoError(t, err) + + md := oq.FormatMarkdown(result, g) + assert.Equal(t, "(empty)", md, "empty result markdown should be (empty)") +} + +func TestFormatJSON_Count_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute("schemas | count", g) + require.NoError(t, err) + + json := oq.FormatJSON(result, g) + assert.NotEmpty(t, json, "count JSON should not be empty") +} + +func TestFormatToon_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute("schemas.components | take 3 | select name, type", g) + require.NoError(t, err) + + toon := oq.FormatToon(result, g) + assert.Contains(t, toon, "results[3]{name,type}:", "toon should show result count and fields") + assert.Contains(t, toon, "object", "toon should include object type value") +} + +func TestFormatToon_Count_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute("schemas | count", g) + require.NoError(t, err) + + toon := oq.FormatToon(result, g) + assert.Contains(t, toon, "count:", "toon should show count label") +} + +func TestFormatToon_Groups_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute("schemas.components | group-by type", g) + require.NoError(t, err) + + toon := oq.FormatToon(result, g) + assert.Contains(t, toon, "groups[", "toon should show groups header") + assert.Contains(t, toon, "{key,count,names}:", "toon should show group fields") +} + +func TestFormatToon_Empty_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute(`schemas.components | where name == "NonExistent"`, g) + require.NoError(t, err) + + toon := oq.FormatToon(result, g) + assert.Equal(t, "results[0]:\n", toon, "empty toon should show results[0]") +} + +func TestFormatToon_Escaping_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + // Paths contain special chars like / that don't need escaping, + // but hash values and paths are good coverage + result, err := oq.Execute("schemas.components | take 1 | select name, hash, path", g) + require.NoError(t, err) + + toon := oq.FormatToon(result, g) + assert.Contains(t, toon, "results[1]{name,hash,path}:", "toon should show result count and selected fields") +} + +func TestFormatMarkdown_Count_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute("schemas | count", g) + require.NoError(t, err) + + md := oq.FormatMarkdown(result, g) + assert.NotEmpty(t, md, "count markdown should not be empty") +} + +func TestExecute_Explain_AllStages_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + // Cover more stage descriptions in explain + tests := []struct { + name string + query string + expects []string + }{ + { + "explain with unique and count", + "schemas.components | unique | count | explain", + []string{"Unique:", "Count:"}, + }, + { + "explain with group-by", + "schemas.components | group-by type | explain", + []string{"Group: group-by"}, + }, + { + "explain with traversals", + "schemas.components | where name == \"Pet\" | refs-out | explain", + []string{"Traverse: outgoing references"}, + }, + { + "explain with refs-in", + "schemas.components | where name == \"Owner\" | refs-in | explain", + []string{"Traverse: incoming references"}, + }, + { + "explain with reachable", + "schemas.components | where name == \"Pet\" | reachable | explain", + []string{"Traverse: all reachable"}, + }, + { + "explain with ancestors", + "schemas.components | where name == \"Address\" | ancestors | explain", + []string{"Traverse: all ancestor"}, + }, + { + "explain with properties", + "schemas.components | where name == \"Pet\" | properties | explain", + []string{"Traverse: property children"}, + }, + { + "explain with union-members", + "schemas.components | where name == \"Shape\" | union-members | explain", + []string{"Traverse: union members"}, + }, + { + "explain with items", + "schemas | where type == \"array\" | items | explain", + []string{"Traverse: array items"}, + }, + { + "explain with ops", + "schemas.components | where name == \"Pet\" | ops | explain", + []string{"Navigate: schemas to operations"}, + }, + { + "explain with schemas from ops", + "operations | schemas | explain", + []string{"Navigate: operations to schemas"}, + }, + { + "explain with sample", + "schemas.components | sample 3 | explain", + []string{"Sample: random 3"}, + }, + { + "explain with path", + "schemas | path Pet Address | explain", + []string{"Path: shortest path from Pet to Address"}, + }, + { + "explain with top", + "schemas.components | top 3 depth | explain", + []string{"Top: 3 by depth"}, + }, + { + "explain with bottom", + "schemas.components | bottom 3 depth | explain", + []string{"Bottom: 3 by depth"}, + }, + { + "explain with format", + "schemas.components | format json | explain", + []string{"Format: json"}, + }, + { + "explain with connected", + "schemas.components | where name == \"Pet\" | connected | explain", + []string{"Traverse: full connected"}, + }, + { + "explain with blast-radius", + "schemas.components | where name == \"Pet\" | blast-radius | explain", + []string{"Traverse: blast radius"}, + }, + { + "explain with neighbors", + "schemas.components | where name == \"Pet\" | neighbors 2 | explain", + []string{"Traverse: bidirectional neighbors within 2"}, + }, + { + "explain with orphans", + "schemas.components | orphans | explain", + []string{"Filter: schemas with no incoming"}, + }, + { + "explain with leaves", + "schemas.components | leaves | explain", + []string{"Filter: schemas with no outgoing"}, + }, + { + "explain with cycles", + "schemas | cycles | explain", + []string{"Analyze: strongly connected"}, + }, + { + "explain with clusters", + "schemas.components | clusters | explain", + []string{"Analyze: weakly connected"}, + }, + { + "explain with tag-boundary", + "schemas | tag-boundary | explain", + []string{"Filter: schemas used by operations across multiple"}, + }, + { + "explain with shared-refs", + "operations | shared-refs | explain", + []string{"Analyze: schemas shared"}, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + result, err := oq.Execute(tt.query, g) + require.NoError(t, err) + for _, exp := range tt.expects { + assert.Contains(t, result.Explain, exp, "explain should contain: "+exp) + } + }) + } +} + +func TestExecute_FieldValue_EdgeCases(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + // Test operation fields that require nil checks + result, err := oq.Execute("operations | take 1 | select name, tag, parameter_count, deprecated, description, summary", g) + require.NoError(t, err) + assert.NotEmpty(t, result.Rows, "should have operation rows") + + // Test edge fields on non-traversal rows (should be empty strings) + result, err = oq.Execute("schemas.components | take 1 | select name, edge_kind, edge_label, edge_from", g) + require.NoError(t, err) + assert.NotEmpty(t, result.Rows, "should have schema rows") + edgeKind := oq.FieldValuePublic(result.Rows[0], "edge_kind", g) + assert.Empty(t, edgeKind.Str, "edge_kind should be empty for non-traversal rows") + + // Test tag_count field + result, err = oq.Execute("schemas.components | take 1 | select name, tag_count", g) + require.NoError(t, err) + assert.NotEmpty(t, result.Rows, "should have rows for tag_count test") + + // Test op_count field + result, err = oq.Execute("schemas.components | take 1 | select name, op_count", g) + require.NoError(t, err) + assert.NotEmpty(t, result.Rows, "should have rows for op_count test") + + // Test unknown field returns null (KindNull == 0) + v := oq.FieldValuePublic(result.Rows[0], "nonexistent_field", g) + assert.Equal(t, expr.KindNull, v.Kind, "unknown field should return KindNull") +} + +func TestExecute_Cycles_NoCycles(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + // Petstore has no cycles, so cycles should return empty groups + result, err := oq.Execute("schemas | cycles", g) + require.NoError(t, err) + assert.Empty(t, result.Groups, "petstore should have no cycles") +} + +func TestExecute_SharedRefs_AllOps(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + // shared-refs with all operations — returns schemas shared by all operations + result, err := oq.Execute("operations | shared-refs | select name", g) + require.NoError(t, err) + assert.NotNil(t, result, "result should not be nil") +} + +func TestFormatToon_SpecialChars(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + // Test TOON format with bool and int fields to cover toonValue branches + result, err := oq.Execute("schemas.components | take 1 | select name, depth, is_component", g) + require.NoError(t, err) + + toon := oq.FormatToon(result, g) + assert.NotEmpty(t, toon, "toon output should not be empty") + assert.Contains(t, toon, "results[1]", "toon should show one result") +} + +func TestFormatJSON_Operations(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute("operations | take 2 | select name, method, path", g) + require.NoError(t, err) + + json := oq.FormatJSON(result, g) + assert.True(t, strings.HasPrefix(json, "["), "JSON output should start with [") + assert.Contains(t, json, "\"name\"", "JSON should include name field") + assert.Contains(t, json, "\"method\"", "JSON should include method field") +} + +func TestFormatMarkdown_Operations(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute("operations | take 2 | select name, method", g) + require.NoError(t, err) + + md := oq.FormatMarkdown(result, g) + assert.Contains(t, md, "| name", "markdown should include name column") + assert.Contains(t, md, "| method", "markdown should include method column") +} + +func TestParse_Error_MoreCases(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + query string + }{ + {"empty query", ""}, + {"unknown stage", "schemas | bogus_stage"}, + {"take non-integer", "schemas | take abc"}, + {"sample non-integer", "schemas | sample xyz"}, + {"head non-integer", "schemas | head xyz"}, + {"neighbors non-integer", "schemas | neighbors abc"}, + {"top missing field", "schemas | top 5"}, + {"bottom missing field", "schemas | bottom 5"}, + {"path missing args", "schemas | path"}, + {"path one arg", "schemas | path Pet"}, + {"where empty expr", "schemas | where"}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + _, err := oq.Parse(tt.query) + assert.Error(t, err) + }) + } +} + +func TestParse_MoreStages_Success(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + query string + }{ + {"format table", "schemas | format table"}, + {"format toon", "schemas | format toon"}, + {"sort asc explicit", "schemas | sort name asc"}, + {"sort default asc", "schemas | sort name"}, + {"select single field", "schemas | select name"}, + {"select many fields", "schemas | select name, type, depth, in_degree"}, + {"where with string", `schemas | where name == "Pet"`}, + {"where with bool", "schemas | where is_component"}, + {"where with not", "schemas | where not is_inline"}, + {"where with has", "schemas | where has(hash)"}, + {"where with matches", `schemas | where name matches ".*Pet.*"`}, + {"path quoted", `schemas | path "Pet" "Address"`}, + {"shared-refs stage", "operations | take 2 | shared-refs"}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + stages, err := oq.Parse(tt.query) + require.NoError(t, err) + assert.NotEmpty(t, stages, "should parse into non-empty stages") + }) + } +} + +func TestExecute_WhereAndOr_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + // Test compound where expressions + result, err := oq.Execute(`schemas.components | where depth > 0 and is_component`, g) + require.NoError(t, err) + assert.NotNil(t, result, "result should not be nil") + + result, err = oq.Execute(`schemas.components | where depth > 100 or is_component`, g) + require.NoError(t, err) + assert.NotEmpty(t, result.Rows, "or should match is_component=true schemas") +} + +func TestExecute_SortStringField_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + // Sort by string field + result, err := oq.Execute("schemas.components | sort type asc | select name, type", g) + require.NoError(t, err) + assert.NotEmpty(t, result.Rows, "should have schemas sorted by type") +} + +func TestExecute_GroupBy_Type_Details(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute("schemas.components | group-by type", g) + require.NoError(t, err) + assert.NotEmpty(t, result.Groups, "should have groups") + + // Each group should have Count and Names + for _, grp := range result.Groups { + assert.Positive(t, grp.Count, "group count should be positive") + assert.Len(t, grp.Names, grp.Count, "group names length should match count") + } +} + +func TestFormatMarkdown_Groups_Details(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute("schemas.components | group-by type", g) + require.NoError(t, err) + + md := oq.FormatMarkdown(result, g) + assert.Contains(t, md, "| Key |", "group markdown should include Key column") + assert.Contains(t, md, "| Count |", "group markdown should include Count column") +} + +func TestFormatJSON_Explain(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute("schemas | explain", g) + require.NoError(t, err) + + // All formats should handle explain + table := oq.FormatTable(result, g) + assert.Contains(t, table, "Source: schemas", "table should render explain output") + + json := oq.FormatJSON(result, g) + assert.Contains(t, json, "Source: schemas", "JSON should render explain output") + + md := oq.FormatMarkdown(result, g) + assert.Contains(t, md, "Source: schemas", "markdown should render explain output") + + toon := oq.FormatToon(result, g) + assert.Contains(t, toon, "Source: schemas", "toon should render explain output") +} + +func TestExecute_Leaves_AllZeroOutDegree(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute("schemas.components | leaves | select name, out_degree", g) + require.NoError(t, err) + + // Verify leaves are leaf nodes + for _, row := range result.Rows { + od := oq.FieldValuePublic(row, "out_degree", g) + assert.Equal(t, 0, od.Int, "leaves should have 0 out_degree") + } +} + +func TestExecute_OperationsTraversals(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + // Operations going to schemas and back + result, err := oq.Execute("operations | take 1 | schemas | select name", g) + require.NoError(t, err) + assert.NotEmpty(t, result.Rows, "operation schemas should have results") + + // Schema to operations roundtrip + result, err = oq.Execute("schemas.components | where name == \"Pet\" | ops | select name", g) + require.NoError(t, err) + assert.NotEmpty(t, result.Rows, "Pet should be used by operations") +} + +func loadCyclicGraph(t *testing.T) *graph.SchemaGraph { + t.Helper() + + f, err := os.Open("testdata/cyclic.yaml") + require.NoError(t, err) + defer f.Close() + + ctx := t.Context() + doc, _, err := openapi.Unmarshal(ctx, f, openapi.WithSkipValidation()) + require.NoError(t, err) + require.NotNil(t, doc) + + idx := openapi.BuildIndex(ctx, doc, references.ResolveOptions{ + RootDocument: doc, + TargetDocument: doc, + TargetLocation: "testdata/cyclic.yaml", + }) + + return graph.Build(ctx, idx) +} + +func TestExecute_Cycles_WithCyclicSpec(t *testing.T) { + t.Parallel() + g := loadCyclicGraph(t) + + // NodeA -> NodeB -> NodeA is a cycle + result, err := oq.Execute("schemas | cycles", g) + require.NoError(t, err) + assert.NotEmpty(t, result.Groups, "cyclic spec should have cycles") + + // Format the groups + table := oq.FormatTable(result, g) + assert.Contains(t, table, "cycle-") + + json := oq.FormatJSON(result, g) + assert.Contains(t, json, "cycle-") +} + +func TestExecute_CyclicSpec_EdgeAnnotations(t *testing.T) { + t.Parallel() + g := loadCyclicGraph(t) + + // Test refs-out to cover edgeKindString branches + result, err := oq.Execute(`schemas.components | where name == "NodeA" | refs-out | select name, edge_kind, edge_label`, g) + require.NoError(t, err) + assert.NotEmpty(t, result.Rows, "NodeA should have outgoing refs") + + // Collect edge kinds + edgeKinds := make(map[string]bool) + for _, row := range result.Rows { + k := oq.FieldValuePublic(row, "edge_kind", g) + edgeKinds[k.Str] = true + } + // NodeA has properties, allOf, anyOf, items etc. + assert.True(t, edgeKinds["property"], "should have property edges") +} + +func TestExecute_CyclicSpec_IsCircular(t *testing.T) { + t.Parallel() + g := loadCyclicGraph(t) + + result, err := oq.Execute("schemas.components | where is_circular | select name", g) + require.NoError(t, err) + names := collectNames(result, g) + assert.Contains(t, names, "NodeA", "NodeA is in the A↔B cycle") + assert.Contains(t, names, "NodeB", "NodeB is in the A↔B cycle") + + // NodeC is NOT in the cycle — it's only referenced by NodeA via allOf + assert.NotContains(t, names, "NodeC", "NodeC should not be marked circular") +} + +func TestExecute_CyclicSpec_DeprecatedOp(t *testing.T) { + t.Parallel() + g := loadCyclicGraph(t) + + // The listNodes operation is deprecated with tags, summary, and description + result, err := oq.Execute("operations | select name, deprecated, summary, description, tag, parameter_count", g) + require.NoError(t, err) + assert.NotEmpty(t, result.Rows, "should have operation rows") + + dep := oq.FieldValuePublic(result.Rows[0], "deprecated", g) + assert.True(t, dep.Bool, "listNodes should be deprecated") + + summary := oq.FieldValuePublic(result.Rows[0], "summary", g) + assert.Equal(t, "List all nodes", summary.Str, "summary should match spec") + + desc := oq.FieldValuePublic(result.Rows[0], "description", g) + assert.NotEmpty(t, desc.Str, "description should not be empty") + + tag := oq.FieldValuePublic(result.Rows[0], "tag", g) + assert.Equal(t, "nodes", tag.Str, "tag should be nodes") +} + +func TestExecute_ToonFormat_WithBoolAndInt(t *testing.T) { + t.Parallel() + g := loadCyclicGraph(t) + + // Select fields that cover all toonValue branches (string, int, bool) + result, err := oq.Execute("schemas.components | take 1 | select name, depth, is_circular", g) + require.NoError(t, err) + + toon := oq.FormatToon(result, g) + assert.NotEmpty(t, toon, "toon output should not be empty") +} + +func TestExecute_ToonEscape_SpecialChars(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + // path fields contain "/" which doesn't need quoting, but let's cover the formatter + result, err := oq.Execute("schemas | take 3 | select path", g) + require.NoError(t, err) + + toon := oq.FormatToon(result, g) + assert.NotEmpty(t, toon, "toon output should not be empty") +} + +func TestFormatToon_Explain(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute("schemas | where depth > 0 | explain", g) + require.NoError(t, err) + + toon := oq.FormatToon(result, g) + assert.Contains(t, toon, "Source: schemas", "toon should render explain output") +} + +func TestFormatMarkdown_Explain(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute("schemas | explain", g) + require.NoError(t, err) + + md := oq.FormatMarkdown(result, g) + assert.Contains(t, md, "Source: schemas", "markdown should render explain output") +} + +// --- New jq-style syntax tests --- + +func TestParse_NewSyntax_Success(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + query string + }{ + {"select filter", `schemas | select(depth > 3)`}, + {"pick fields", "schemas | pick name, depth"}, + {"sort_by asc", "schemas | sort_by(depth)"}, + {"sort_by desc", "schemas | sort_by(depth; desc)"}, + {"first", "schemas | first(5)"}, + {"last", "schemas | last(5)"}, + {"length", "schemas | length"}, + {"group_by", "schemas | group_by(type)"}, + {"sample call", "schemas | sample(3)"}, + {"neighbors call", "schemas | neighbors(2)"}, + {"path call", "schemas | path(Pet; Address)"}, + {"top call", "schemas | top(3; depth)"}, + {"bottom call", "schemas | bottom(3; depth)"}, + {"format call", "schemas | format(json)"}, + {"let binding", `schemas | select(name == "Pet") | let $pet = name`}, + {"full new pipeline", `schemas.components | select(depth > 5) | sort_by(depth; desc) | first(10) | pick name, depth`}, + {"def inline", `def hot: select(in_degree > 0); schemas.components | hot`}, + {"def with params", `def impact($name): select(name == $name); schemas.components | impact("Pet")`}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + stages, err := oq.Parse(tt.query) + require.NoError(t, err, "query: %s", tt.query) + assert.NotEmpty(t, stages, "should parse into non-empty stages") + }) + } +} + +func TestExecute_SelectFilter_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute(`schemas.components | select(type == "object") | pick name`, g) + require.NoError(t, err) + + names := collectNames(result, g) + assert.Contains(t, names, "Pet", "select filter should match Pet") + assert.Contains(t, names, "Owner", "select filter should match Owner") +} + +func TestExecute_SortBy_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute("schemas.components | sort_by(property_count; desc) | first(3) | pick name, property_count", g) + require.NoError(t, err) + assert.LessOrEqual(t, len(result.Rows), 3, "should return at most 3 rows") +} + +func TestExecute_First_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute("schemas.components | first(3)", g) + require.NoError(t, err) + assert.Len(t, result.Rows, 3, "first should return exactly 3 rows") +} + +func TestExecute_Last_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute("schemas.components | last(2)", g) + require.NoError(t, err) + assert.Len(t, result.Rows, 2, "last should return exactly 2 rows") +} + +func TestExecute_Length_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute("schemas.components | length", g) + require.NoError(t, err) + assert.True(t, result.IsCount, "length should be a count result") + assert.Positive(t, result.Count, "count should be positive") +} + +func TestExecute_GroupBy_NewSyntax_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute("schemas.components | group_by(type)", g) + require.NoError(t, err) + assert.NotEmpty(t, result.Groups, "should have groups") +} + +func TestExecute_LetBinding_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + // let $pet = name, then use $pet in subsequent filter + result, err := oq.Execute(`schemas.components | select(name == "Pet") | let $pet = name | reachable | select(name != $pet) | pick name`, g) + require.NoError(t, err) + + names := collectNames(result, g) + assert.NotContains(t, names, "Pet", "should not include the $pet variable value") + assert.Contains(t, names, "Owner", "should include reachable schemas") +} + +func TestExecute_DefExpansion_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute(`def hot: select(in_degree > 0); schemas.components | hot | pick name`, g) + require.NoError(t, err) + assert.NotEmpty(t, result.Rows, "def expansion should produce results") + + // All results should have in_degree > 0 + for _, row := range result.Rows { + v := oq.FieldValuePublic(row, "in_degree", g) + assert.Positive(t, v.Int, "hot filter should require in_degree > 0") + } +} + +func TestExecute_DefWithParams_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute(`def impact($name): select(name == $name) | blast-radius; schemas.components | impact("Pet")`, g) + require.NoError(t, err) + assert.NotEmpty(t, result.Rows, "parameterized def should produce results") +} + +func TestExecute_AlternativeOperator_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + // name // "none" — name is always set, so should not be "none" + result, err := oq.Execute(`schemas.components | select(name // "none" != "none") | pick name`, g) + require.NoError(t, err) + assert.NotEmpty(t, result.Rows, "alternative operator should work") +} + +func TestExecute_IfThenElse_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute(`schemas.components | select(if is_component then depth >= 0 else true end) | pick name`, g) + require.NoError(t, err) + assert.NotEmpty(t, result.Rows, "if-then-else should work in select") +} + +func TestExecute_ExplainNewSyntax_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute(`schemas.components | select(depth > 5) | sort_by(depth; desc) | first(10) | pick name, depth | explain`, g) + require.NoError(t, err) + assert.Contains(t, result.Explain, "Filter: select(depth > 5)", "explain should show select filter") + assert.Contains(t, result.Explain, "Sort: sort_by(depth; desc)", "explain should show sort_by") + assert.Contains(t, result.Explain, "Limit: first(10)", "explain should show first") + assert.Contains(t, result.Explain, "Project: pick name, depth", "explain should show pick") +} + +func TestExecute_ExplainLast_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute("schemas.components | last(3) | explain", g) + require.NoError(t, err) + assert.Contains(t, result.Explain, "Limit: last(3)", "explain should show last") +} + +func TestExecute_ExplainLet_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute(`schemas.components | select(name == "Pet") | let $pet = name | explain`, g) + require.NoError(t, err) + assert.Contains(t, result.Explain, "Bind: let $pet = name", "explain should show let binding") +} + +func TestParse_NewSyntax_Error(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + query string + }{ + {"select call empty", "schemas | select()"}, + {"sort_by no parens", "schemas | sort_by depth"}, + {"group_by no parens", "schemas | group_by type"}, + {"let no dollar", "schemas | let x = name"}, + {"let no equals", "schemas | let $x name"}, + {"let empty expr", "schemas | let $x ="}, + {"def missing colon", "def hot select(depth > 0); schemas | hot"}, + {"def missing semicolon", "def hot: select(depth > 0) schemas | hot"}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + _, err := oq.Parse(tt.query) + assert.Error(t, err, "query should fail: %s", tt.query) + }) + } +} + +// collectNames extracts the "name" field from all rows in the result. +func collectNames(result *oq.Result, g *graph.SchemaGraph) []string { + var names []string + for _, row := range result.Rows { + v := oq.FieldValuePublic(row, "name", g) + names = append(names, v.Str) + } + return names +} diff --git a/oq/parse.go b/oq/parse.go new file mode 100644 index 0000000..72517df --- /dev/null +++ b/oq/parse.go @@ -0,0 +1,681 @@ +package oq + +import ( + "errors" + "fmt" + "strconv" + "strings" +) + +// declarations holds parsed includes, defs, and the raw remaining pipeline text. +type declarations struct { + Includes []string + Defs []FuncDef + PipelineText string +} + +// parseDeclarations scans for include/def declarations at the start of a query. +func parseDeclarations(query string) (*declarations, error) { + d := &declarations{} + remaining := strings.TrimSpace(query) + + for { + remaining = strings.TrimSpace(remaining) + if remaining == "" { + break + } + if strings.HasPrefix(remaining, "include ") { + rest := remaining[len("include "):] + semi := findUnquotedSemicolon(rest) + if semi < 0 { + return nil, errors.New("include missing terminating ;") + } + path := strings.TrimSpace(rest[:semi]) + path = strings.Trim(path, "\"") + if path == "" { + return nil, errors.New("include requires a path") + } + d.Includes = append(d.Includes, path) + remaining = rest[semi+1:] + continue + } + if strings.HasPrefix(remaining, "def ") { + rest := remaining[len("def "):] + colonIdx := strings.Index(rest, ":") + if colonIdx < 0 { + return nil, errors.New("def missing colon separator") + } + sig := strings.TrimSpace(rest[:colonIdx]) + body := rest[colonIdx+1:] + semi := findUnquotedSemicolon(body) + if semi < 0 { + return nil, errors.New("def missing terminating ;") + } + bodyStr := strings.TrimSpace(body[:semi]) + remaining = body[semi+1:] + + fd, err := parseFuncSig(sig) + if err != nil { + return nil, err + } + fd.Body = bodyStr + d.Defs = append(d.Defs, fd) + continue + } + break + } + + d.PipelineText = remaining + return d, nil +} + +// ParseQuery parses a full query string including optional includes, defs, and pipeline. +func ParseQuery(query string) (*Query, error) { + d, err := parseDeclarations(query) + if err != nil { + return nil, err + } + + q := &Query{ + Includes: d.Includes, + Defs: d.Defs, + } + + if d.PipelineText == "" { + if len(q.Defs) > 0 || len(q.Includes) > 0 { + return q, nil + } + return nil, errors.New("empty query") + } + + // Expand defs at text level before parsing + expanded, err := ExpandDefs(d.PipelineText, d.Defs) + if err != nil { + return nil, err + } + + stages, err := parsePipeline(expanded) + if err != nil { + return nil, err + } + q.Stages = stages + return q, nil +} + +// Parse splits a pipeline query string into stages (backward compat). +func Parse(query string) ([]Stage, error) { + q, err := ParseQuery(query) + if err != nil { + return nil, err + } + return q.Stages, nil +} + +func parsePipeline(query string) ([]Stage, error) { + parts := splitPipeline(query) + if len(parts) == 0 { + return nil, errors.New("empty query") + } + + var stages []Stage + + for i, part := range parts { + part = strings.TrimSpace(part) + if part == "" { + continue + } + + if i == 0 { + stages = append(stages, Stage{Kind: StageSource, Source: part}) + continue + } + + stage, err := parseStage(part) + if err != nil { + return nil, err + } + stages = append(stages, stage) + } + + return stages, nil +} + +func parseStage(s string) (Stage, error) { + // Try keyword-call syntax first: select(...), sort_by(...), etc. + keyword, args, isCall := splitKeywordCall(s) + if !isCall { + keyword, args = splitFirst(s) + } + keyword = strings.ToLower(keyword) + + switch keyword { + // New jq-style: select(expr) replaces where + case "select": + if isCall { + // select(expr) → filter + if args == "" { + return Stage{}, errors.New("select() requires an expression") + } + return Stage{Kind: StageWhere, Expr: args}, nil + } + // select f1, f2 → old-style field projection — use pick instead + // But keep for backward compat during migration + if args == "" { + return Stage{}, errors.New("select requires field names") + } + fields := parseCSV(args) + return Stage{Kind: StageSelect, Fields: fields}, nil + + case "pick": + if args == "" { + return Stage{}, errors.New("pick requires field names") + } + fields := parseCSV(args) + return Stage{Kind: StageSelect, Fields: fields}, nil + + // where (legacy, still supported) + case "where": + if args == "" { + return Stage{}, errors.New("where requires an expression") + } + return Stage{Kind: StageWhere, Expr: args}, nil + + case "sort_by": + if isCall { + parts := splitSemicolonArgs(args) + if len(parts) == 0 || parts[0] == "" { + return Stage{}, errors.New("sort_by requires a field name") + } + desc := false + if len(parts) >= 2 && strings.TrimSpace(parts[1]) == "desc" { + desc = true + } + return Stage{Kind: StageSort, SortField: strings.TrimSpace(parts[0]), SortDesc: desc}, nil + } + return Stage{}, errors.New("sort_by requires parentheses: sort_by(field) or sort_by(field; desc)") + + // Legacy sort + case "sort": + parts := strings.Fields(args) + if len(parts) == 0 { + return Stage{}, errors.New("sort requires a field name") + } + desc := false + if len(parts) >= 2 && strings.ToLower(parts[1]) == "desc" { + desc = true + } + return Stage{Kind: StageSort, SortField: parts[0], SortDesc: desc}, nil + + case "first": + if isCall { + n, err := strconv.Atoi(strings.TrimSpace(args)) + if err != nil { + return Stage{}, fmt.Errorf("first requires a number: %w", err) + } + return Stage{Kind: StageTake, Limit: n}, nil + } + // bare "first" with space arg + n, err := strconv.Atoi(strings.TrimSpace(args)) + if err != nil { + return Stage{}, fmt.Errorf("first requires a number: %w", err) + } + return Stage{Kind: StageTake, Limit: n}, nil + + case "last": + if isCall { + n, err := strconv.Atoi(strings.TrimSpace(args)) + if err != nil { + return Stage{}, fmt.Errorf("last requires a number: %w", err) + } + return Stage{Kind: StageLast, Limit: n}, nil + } + n, err := strconv.Atoi(strings.TrimSpace(args)) + if err != nil { + return Stage{}, fmt.Errorf("last requires a number: %w", err) + } + return Stage{Kind: StageLast, Limit: n}, nil + + // Legacy take/head + case "take", "head": + n, err := strconv.Atoi(strings.TrimSpace(args)) + if err != nil { + return Stage{}, fmt.Errorf("take requires a number: %w", err) + } + return Stage{Kind: StageTake, Limit: n}, nil + + case "length": + return Stage{Kind: StageCount}, nil + + case "unique": + return Stage{Kind: StageUnique}, nil + + case "group_by": + if isCall { + if args == "" { + return Stage{}, errors.New("group_by requires a field name") + } + fields := parseCSV(args) + return Stage{Kind: StageGroupBy, Fields: fields}, nil + } + return Stage{}, errors.New("group_by requires parentheses: group_by(field)") + + // Legacy group-by + case "group-by": + if args == "" { + return Stage{}, errors.New("group-by requires a field name") + } + fields := parseCSV(args) + return Stage{Kind: StageGroupBy, Fields: fields}, nil + + case "count": + return Stage{Kind: StageCount}, nil + + case "refs-out": + return Stage{Kind: StageRefsOut}, nil + + case "refs-in": + return Stage{Kind: StageRefsIn}, nil + + case "reachable": + return Stage{Kind: StageReachable}, nil + + case "ancestors": + return Stage{Kind: StageAncestors}, nil + + case "properties": + return Stage{Kind: StageProperties}, nil + + case "union-members": + return Stage{Kind: StageUnionMembers}, nil + + case "items": + return Stage{Kind: StageItems}, nil + + case "ops": + return Stage{Kind: StageOps}, nil + + case "schemas": + return Stage{Kind: StageSchemas}, nil + + case "explain": + return Stage{Kind: StageExplain}, nil + + case "fields": + return Stage{Kind: StageFields}, nil + + case "sample": + if isCall { + n, err := strconv.Atoi(strings.TrimSpace(args)) + if err != nil { + return Stage{}, fmt.Errorf("sample requires a number: %w", err) + } + return Stage{Kind: StageSample, Limit: n}, nil + } + n, err := strconv.Atoi(strings.TrimSpace(args)) + if err != nil { + return Stage{}, fmt.Errorf("sample requires a number: %w", err) + } + return Stage{Kind: StageSample, Limit: n}, nil + + case "neighbors": + if isCall { + n, err := strconv.Atoi(strings.TrimSpace(args)) + if err != nil { + return Stage{}, fmt.Errorf("neighbors requires a depth number: %w", err) + } + return Stage{Kind: StageNeighbors, Limit: n}, nil + } + n, err := strconv.Atoi(strings.TrimSpace(args)) + if err != nil { + return Stage{}, fmt.Errorf("neighbors requires a depth number: %w", err) + } + return Stage{Kind: StageNeighbors, Limit: n}, nil + + case "path": + if isCall { + parts := splitSemicolonArgs(args) + if len(parts) < 2 || strings.TrimSpace(parts[0]) == "" || strings.TrimSpace(parts[1]) == "" { + return Stage{}, errors.New("path requires two schema names") + } + return Stage{Kind: StagePath, PathFrom: strings.TrimSpace(parts[0]), PathTo: strings.TrimSpace(parts[1])}, nil + } + from, to := parseTwoArgs(args) + if from == "" || to == "" { + return Stage{}, errors.New("path requires two schema names") + } + return Stage{Kind: StagePath, PathFrom: from, PathTo: to}, nil + + case "top": + if isCall { + parts := splitSemicolonArgs(args) + if len(parts) < 2 { + return Stage{}, errors.New("top requires a number and a field name") + } + n, err := strconv.Atoi(strings.TrimSpace(parts[0])) + if err != nil { + return Stage{}, fmt.Errorf("top requires a number: %w", err) + } + return Stage{Kind: StageTop, Limit: n, SortField: strings.TrimSpace(parts[1])}, nil + } + parts := strings.Fields(args) + if len(parts) < 2 { + return Stage{}, errors.New("top requires a number and a field name") + } + n, err := strconv.Atoi(parts[0]) + if err != nil { + return Stage{}, fmt.Errorf("top requires a number: %w", err) + } + return Stage{Kind: StageTop, Limit: n, SortField: parts[1]}, nil + + case "bottom": + if isCall { + parts := splitSemicolonArgs(args) + if len(parts) < 2 { + return Stage{}, errors.New("bottom requires a number and a field name") + } + n, err := strconv.Atoi(strings.TrimSpace(parts[0])) + if err != nil { + return Stage{}, fmt.Errorf("bottom requires a number: %w", err) + } + return Stage{Kind: StageBottom, Limit: n, SortField: strings.TrimSpace(parts[1])}, nil + } + parts := strings.Fields(args) + if len(parts) < 2 { + return Stage{}, errors.New("bottom requires a number and a field name") + } + n, err := strconv.Atoi(parts[0]) + if err != nil { + return Stage{}, fmt.Errorf("bottom requires a number: %w", err) + } + return Stage{Kind: StageBottom, Limit: n, SortField: parts[1]}, nil + + case "format": + f := strings.TrimSpace(args) + if isCall { + f = strings.TrimSpace(args) + } + if f != "table" && f != "json" && f != "markdown" && f != "toon" { + return Stage{}, fmt.Errorf("format must be table, json, markdown, or toon, got %q", f) + } + return Stage{Kind: StageFormat, Format: f}, nil + + case "connected": + return Stage{Kind: StageConnected}, nil + + case "blast-radius": + return Stage{Kind: StageBlastRadius}, nil + + case "orphans": + return Stage{Kind: StageOrphans}, nil + + case "leaves": + return Stage{Kind: StageLeaves}, nil + + case "cycles": + return Stage{Kind: StageCycles}, nil + + case "clusters": + return Stage{Kind: StageClusters}, nil + + case "tag-boundary": + return Stage{Kind: StageTagBoundary}, nil + + case "shared-refs": + return Stage{Kind: StageSharedRefs}, nil + + case "let": + return parseLet(args) + + default: + return Stage{}, fmt.Errorf("unknown stage: %q", keyword) + } +} + +func parseLet(args string) (Stage, error) { + // let $var = expr + if args == "" || !strings.HasPrefix(args, "$") { + return Stage{}, errors.New("let requires $variable = expression") + } + eqIdx := strings.Index(args, "=") + if eqIdx < 0 { + return Stage{}, errors.New("let requires $variable = expression") + } + varName := strings.TrimSpace(args[:eqIdx]) + exprStr := strings.TrimSpace(args[eqIdx+1:]) + if !strings.HasPrefix(varName, "$") || len(varName) < 2 { + return Stage{}, errors.New("let variable must start with $") + } + if exprStr == "" { + return Stage{}, errors.New("let requires an expression after =") + } + return Stage{Kind: StageLet, VarName: varName, Expr: exprStr}, nil +} + +func parseFuncSig(sig string) (FuncDef, error) { + fd := FuncDef{} + parenIdx := strings.Index(sig, "(") + if parenIdx < 0 { + fd.Name = strings.TrimSpace(sig) + if fd.Name == "" { + return fd, errors.New("def requires a name") + } + return fd, nil + } + fd.Name = strings.TrimSpace(sig[:parenIdx]) + if fd.Name == "" { + return fd, errors.New("def requires a name") + } + closeIdx := strings.LastIndex(sig, ")") + if closeIdx < 0 { + return fd, errors.New("def params missing closing )") + } + paramStr := sig[parenIdx+1 : closeIdx] + for _, p := range splitSemicolonArgs(paramStr) { + p = strings.TrimSpace(p) + if p != "" { + if !strings.HasPrefix(p, "$") { + return fd, fmt.Errorf("def param %q must start with $", p) + } + fd.Params = append(fd.Params, p) + } + } + return fd, nil +} + +func findUnquotedSemicolon(s string) int { + inQuote := false + depth := 0 + for i := 0; i < len(s); i++ { + switch s[i] { + case '"': + inQuote = !inQuote + case '(': + if !inQuote { + depth++ + } + case ')': + if !inQuote { + depth-- + } + case ';': + if !inQuote && depth == 0 { + return i + } + } + } + return -1 +} + +// splitKeywordCall splits "select(expr)" into ("select", "expr", true). +// Returns ("", "", false) if s doesn't match keyword(...) form. +// The keyword must be a single word (no spaces before the opening paren). +func splitKeywordCall(s string) (string, string, bool) { + s = strings.TrimSpace(s) + parenIdx := strings.Index(s, "(") + if parenIdx < 0 { + return "", "", false + } + keyword := s[:parenIdx] + // Keyword must not contain spaces (single word only) + if strings.ContainsAny(keyword, " \t") { + return "", "", false + } + if keyword == "" { + return "", "", false + } + // Find matching closing paren (not just the last one — handle nested parens) + rest := s[parenIdx+1:] + depth := 1 + inQuote := false + end := -1 + for i := 0; i < len(rest); i++ { + switch rest[i] { + case '"': + inQuote = !inQuote + case '(': + if !inQuote { + depth++ + } + case ')': + if !inQuote { + depth-- + if depth == 0 { + end = i + } + } + } + if end >= 0 { + break + } + } + if end < 0 { + return "", "", false + } + // Ensure nothing after the closing paren + trailing := strings.TrimSpace(rest[end+1:]) + if trailing != "" { + return "", "", false + } + args := rest[:end] + return keyword, args, true +} + +func splitSemicolonArgs(s string) []string { + var parts []string + var current strings.Builder + depth := 0 + inQuote := false + + for i := 0; i < len(s); i++ { + ch := s[i] + switch { + case ch == '"': + inQuote = !inQuote + current.WriteByte(ch) + case ch == '(' && !inQuote: + depth++ + current.WriteByte(ch) + case ch == ')' && !inQuote: + depth-- + current.WriteByte(ch) + case ch == ';' && !inQuote && depth == 0: + parts = append(parts, current.String()) + current.Reset() + default: + current.WriteByte(ch) + } + } + if current.Len() > 0 { + parts = append(parts, current.String()) + } + return parts +} + +func parseTwoArgs(s string) (string, string) { + s = strings.TrimSpace(s) + var args []string + for len(s) > 0 { + if s[0] == '"' { + // Quoted arg + end := strings.Index(s[1:], "\"") + if end < 0 { + args = append(args, s[1:]) + break + } + args = append(args, s[1:end+1]) + s = strings.TrimSpace(s[end+2:]) + } else { + idx := strings.IndexAny(s, " \t") + if idx < 0 { + args = append(args, s) + break + } + args = append(args, s[:idx]) + s = strings.TrimSpace(s[idx+1:]) + } + if len(args) == 2 { + break + } + } + if len(args) < 2 { + if len(args) == 1 { + return args[0], "" + } + return "", "" + } + return args[0], args[1] +} + +// --- Pipeline splitting --- + +func splitPipeline(input string) []string { + var parts []string + var current strings.Builder + inQuote := false + depth := 0 // paren depth — don't split | inside parens + + for i := 0; i < len(input); i++ { + ch := input[i] + switch { + case ch == '"': + inQuote = !inQuote + current.WriteByte(ch) + case ch == '(' && !inQuote: + depth++ + current.WriteByte(ch) + case ch == ')' && !inQuote: + depth-- + current.WriteByte(ch) + case ch == '|' && !inQuote && depth == 0: + parts = append(parts, current.String()) + current.Reset() + default: + current.WriteByte(ch) + } + } + if current.Len() > 0 { + parts = append(parts, current.String()) + } + return parts +} + +func splitFirst(s string) (string, string) { + s = strings.TrimSpace(s) + idx := strings.IndexAny(s, " \t") + if idx < 0 { + return s, "" + } + return s[:idx], strings.TrimSpace(s[idx+1:]) +} + +func parseCSV(s string) []string { + parts := strings.Split(s, ",") + result := make([]string, 0, len(parts)) + for _, p := range parts { + p = strings.TrimSpace(p) + if p != "" { + result = append(result, p) + } + } + return result +} diff --git a/oq/testdata/cyclic.yaml b/oq/testdata/cyclic.yaml new file mode 100644 index 0000000..bed5052 --- /dev/null +++ b/oq/testdata/cyclic.yaml @@ -0,0 +1,89 @@ +openapi: "3.1.0" +info: + title: Cyclic Test + version: "1.0.0" +paths: + /nodes: + get: + operationId: listNodes + tags: + - nodes + - admin + deprecated: true + summary: List all nodes + description: Returns a list of all node objects + responses: + "200": + description: A list of nodes + content: + application/json: + schema: + type: array + items: + $ref: '#/components/schemas/NodeA' +components: + schemas: + NodeA: + type: object + properties: + name: + type: string + child: + $ref: '#/components/schemas/NodeB' + extra: + allOf: + - $ref: '#/components/schemas/NodeC' + variant: + anyOf: + - type: string + - type: integer + meta: + additionalProperties: + type: string + items: + type: array + items: + type: string + prefixItems: + - type: integer + guard: + if: + type: string + then: + type: string + else: + type: integer + negative: + not: + type: null + contained: + type: array + contains: + type: string + propNames: + type: object + propertyNames: + type: string + pattern: + type: object + patternProperties: + "^S_": + type: string + deps: + type: object + dependentSchemas: + bar: + type: object + properties: + barValue: + type: string + NodeB: + type: object + properties: + ref_back: + $ref: '#/components/schemas/NodeA' + NodeC: + type: object + properties: + value: + type: string diff --git a/oq/testdata/petstore.yaml b/oq/testdata/petstore.yaml new file mode 100644 index 0000000..82deb95 --- /dev/null +++ b/oq/testdata/petstore.yaml @@ -0,0 +1,131 @@ +openapi: "3.1.0" +info: + title: Petstore + version: "1.0.0" +paths: + /pets: + get: + operationId: listPets + parameters: + - name: limit + in: query + schema: + type: integer + responses: + "200": + description: A list of pets + content: + application/json: + schema: + type: array + items: + $ref: '#/components/schemas/Pet' + post: + operationId: createPet + requestBody: + content: + application/json: + schema: + $ref: '#/components/schemas/Pet' + responses: + "201": + description: Created + content: + application/json: + schema: + $ref: '#/components/schemas/Pet' + /pets/{petId}: + get: + operationId: showPetById + parameters: + - name: petId + in: path + required: true + schema: + type: string + responses: + "200": + description: A pet + content: + application/json: + schema: + $ref: '#/components/schemas/Pet' + default: + description: unexpected error + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + /owners: + get: + operationId: listOwners + responses: + "200": + description: A list of owners + content: + application/json: + schema: + type: array + items: + $ref: '#/components/schemas/Owner' +components: + schemas: + Pet: + type: object + properties: + id: + type: integer + name: + type: string + tag: + type: string + owner: + $ref: '#/components/schemas/Owner' + required: + - id + - name + Owner: + type: object + properties: + id: + type: integer + name: + type: string + address: + $ref: '#/components/schemas/Address' + Address: + type: object + properties: + street: + type: string + city: + type: string + Error: + type: object + properties: + code: + type: integer + message: + type: string + required: + - code + - message + Shape: + oneOf: + - $ref: '#/components/schemas/Circle' + - $ref: '#/components/schemas/Square' + Circle: + type: object + properties: + radius: + type: number + Square: + type: object + properties: + side: + type: number + Unused: + type: object + properties: + data: + type: string