From 340c0810b8832ac512e4a05929a44f2952de845a Mon Sep 17 00:00:00 2001 From: Vishal Gowda Date: Thu, 12 Mar 2026 00:14:19 +0000 Subject: [PATCH 01/27] feat: add oq pipeline query language for OpenAPI schema graphs Implement a domain-specific pipeline query language (oq) that enables agents and humans to construct ad-hoc structural queries over OpenAPI documents. The query engine operates over a pre-computed directed graph materialized from openapi.Index. New packages: - graph/: SchemaGraph type with node/edge types, Build() constructor, reachability/ancestor traversal, and pre-computed metrics - oq/expr/: Predicate expression parser and evaluator supporting ==, !=, >, <, >=, <=, and, or, not, has(), matches() - oq/: Pipeline parser, AST, executor with source/traversal/filter stages, and table/JSON formatters New CLI command: openapi spec query '' Example queries: schemas.components | sort depth desc | take 10 | select name, depth schemas | where union_width > 0 | sort union_width desc | take 10 schemas.components | where in_degree == 0 | select name operations | sort schema_count desc | take 10 Co-Authored-By: Claude Opus 4.6 --- cmd/openapi/commands/openapi/query.go | 146 +++++ cmd/openapi/commands/openapi/root.go | 1 + graph/graph.go | 678 +++++++++++++++++++ graph/graph_test.go | 178 +++++ oq/expr/expr.go | 469 ++++++++++++++ oq/expr/expr_test.go | 143 +++++ oq/oq.go | 893 ++++++++++++++++++++++++++ oq/oq_test.go | 333 ++++++++++ oq/testdata/petstore.yaml | 131 ++++ 9 files changed, 2972 insertions(+) create mode 100644 cmd/openapi/commands/openapi/query.go create mode 100644 graph/graph.go create mode 100644 graph/graph_test.go create mode 100644 oq/expr/expr.go create mode 100644 oq/expr/expr_test.go create mode 100644 oq/oq.go create mode 100644 oq/oq_test.go create mode 100644 oq/testdata/petstore.yaml diff --git a/cmd/openapi/commands/openapi/query.go b/cmd/openapi/commands/openapi/query.go new file mode 100644 index 0000000..681552f --- /dev/null +++ b/cmd/openapi/commands/openapi/query.go @@ -0,0 +1,146 @@ +package openapi + +import ( + "context" + "errors" + "fmt" + "os" + + "github.com/speakeasy-api/openapi/graph" + "github.com/speakeasy-api/openapi/openapi" + "github.com/speakeasy-api/openapi/oq" + "github.com/speakeasy-api/openapi/references" + "github.com/spf13/cobra" +) + +var queryCmd = &cobra.Command{ + Use: "query ", + Short: "Query an OpenAPI specification using the oq pipeline language", + Long: `Query an OpenAPI specification using the oq pipeline language to answer +structural and semantic questions about schemas and operations. + +Examples: + # Deeply nested components + openapi spec query petstore.yaml 'schemas.components | sort depth desc | take 10 | select name, depth' + + # Wide union trees + openapi spec query petstore.yaml 'schemas | where union_width > 0 | sort union_width desc | take 10' + + # Central components (highest in-degree) + openapi spec query petstore.yaml 'schemas.components | sort in_degree desc | take 10 | select name, in_degree' + + # Dead components (no incoming references) + openapi spec query petstore.yaml 'schemas.components | where in_degree == 0 | select name' + + # Operation sprawl + openapi spec query petstore.yaml 'operations | sort schema_count desc | take 10 | select name, schema_count' + + # Circular references + openapi spec query petstore.yaml 'schemas | where is_circular | select name, path' + + # Schema count + openapi spec query petstore.yaml 'schemas | count' + +Stdin is supported — either pipe data directly or use '-' explicitly: + cat spec.yaml | openapi spec query - 'schemas | count' + +Pipeline stages: + Source: schemas, schemas.components, schemas.inline, operations + Traversal: refs-out, refs-in, reachable, ancestors, properties, union-members, items, ops, schemas + Filter: where , select , sort [asc|desc], take , unique, group-by , count + +Where expressions support: ==, !=, >, <, >=, <=, and, or, not, has(), matches`, + Args: stdinOrFileArgs(2, 2), + Run: runQuery, +} + +var queryOutputFormat string +var queryFromFile string + +func init() { + queryCmd.Flags().StringVar(&queryOutputFormat, "format", "table", "output format: table or json") + queryCmd.Flags().StringVarP(&queryFromFile, "file", "f", "", "read query from file instead of argument") +} + +func runQuery(cmd *cobra.Command, args []string) { + ctx := cmd.Context() + inputFile := inputFileFromArgs(args) + + queryStr := "" + if queryFromFile != "" { + data, err := os.ReadFile(queryFromFile) + if err != nil { + fmt.Fprintf(os.Stderr, "Error reading query file: %v\n", err) + os.Exit(1) + } + queryStr = string(data) + } else if len(args) >= 2 { + queryStr = args[1] + } + + if queryStr == "" { + fmt.Fprintf(os.Stderr, "Error: no query provided\n") + os.Exit(1) + } + + processor, err := NewOpenAPIProcessor(inputFile, "", false) + if err != nil { + fmt.Fprintf(os.Stderr, "Error: %v\n", err) + os.Exit(1) + } + + if err := queryOpenAPI(ctx, processor, queryStr); err != nil { + fmt.Fprintf(os.Stderr, "Error: %v\n", err) + os.Exit(1) + } +} + +func queryOpenAPI(ctx context.Context, processor *OpenAPIProcessor, queryStr string) error { + doc, _, err := processor.LoadDocument(ctx) + if err != nil { + return err + } + if doc == nil { + return errors.New("failed to parse OpenAPI document: document is nil") + } + + // Build index + idx := buildIndex(ctx, doc) + + // Build graph + g := graph.Build(ctx, idx) + + // Execute query + result, err := oq.Execute(queryStr, g) + if err != nil { + return fmt.Errorf("query error: %w", err) + } + + // Format and output + var output string + switch queryOutputFormat { + case "json": + output = oq.FormatJSON(result, g) + default: + output = oq.FormatTable(result, g) + } + + fmt.Fprint(processor.stdout(), output) + if !result.IsCount || queryOutputFormat != "table" { + // FormatTable already includes newlines for non-count results + if result.IsCount { + fmt.Fprintln(processor.stdout()) + } + } + + return nil +} + +func buildIndex(ctx context.Context, doc *openapi.OpenAPI) *openapi.Index { + resolveOpts := references.ResolveOptions{ + RootDocument: doc, + TargetDocument: doc, + TargetLocation: ".", + } + return openapi.BuildIndex(ctx, doc, resolveOpts) +} diff --git a/cmd/openapi/commands/openapi/root.go b/cmd/openapi/commands/openapi/root.go index 5f4c614..976abc6 100644 --- a/cmd/openapi/commands/openapi/root.go +++ b/cmd/openapi/commands/openapi/root.go @@ -18,4 +18,5 @@ func Apply(rootCmd *cobra.Command) { rootCmd.AddCommand(localizeCmd) rootCmd.AddCommand(exploreCmd) rootCmd.AddCommand(snipCmd) + rootCmd.AddCommand(queryCmd) } diff --git a/graph/graph.go b/graph/graph.go new file mode 100644 index 0000000..1e87228 --- /dev/null +++ b/graph/graph.go @@ -0,0 +1,678 @@ +// Package graph provides a pre-computed directed graph over OpenAPI schemas and operations, +// materialized from an openapi.Index for efficient structural queries. +package graph + +import ( + "context" + "strconv" + "strings" + + "github.com/speakeasy-api/openapi/hashing" + "github.com/speakeasy-api/openapi/jsonschema/oas3" + "github.com/speakeasy-api/openapi/openapi" +) + +// NodeID is a unique identifier for a node in the graph. +type NodeID int + +// EdgeKind represents the type of relationship between two schema nodes. +type EdgeKind int + +const ( + EdgeProperty EdgeKind = iota // properties/X + EdgeItems // items + EdgeAllOf // allOf[i] + EdgeOneOf // oneOf[i] + EdgeAnyOf // anyOf[i] + EdgeAdditionalProps // additionalProperties + EdgeNot // not + EdgeIf // if + EdgeThen // then + EdgeElse // else + EdgeContains // contains + EdgePrefixItems // prefixItems[i] + EdgeDependentSchema // dependentSchemas/X + EdgePatternProperty // patternProperties/X + EdgePropertyNames // propertyNames + EdgeUnevaluatedItems // unevaluatedItems + EdgeUnevaluatedProps // unevaluatedProperties + EdgeRef // resolved $ref +) + +// Edge represents a directed edge between two schema nodes. +type Edge struct { + From NodeID + To NodeID + Kind EdgeKind + Label string // property name, pattern key, or index +} + +// SchemaNode represents a schema in the graph. +type SchemaNode struct { + ID NodeID + Name string // component name or JSON pointer + Path string // JSON pointer in document + Schema *oas3.JSONSchemaReferenceable + Location openapi.Locations + IsComponent bool + IsInline bool + IsExternal bool + IsBoolean bool + IsCircular bool + HasRef bool + Type string // primary schema type + Depth int + InDegree int + OutDegree int + UnionWidth int + PropertyCount int + Hash string +} + +// OperationNode represents an operation in the graph. +type OperationNode struct { + ID NodeID + Name string // operationId or "METHOD /path" + Method string + Path string + OperationID string + Operation *openapi.Operation + Location openapi.Locations + SchemaCount int + ComponentCount int +} + +// SchemaGraph is a pre-computed directed graph over OpenAPI schemas and operations. +type SchemaGraph struct { + Schemas []SchemaNode + Operations []OperationNode + + outEdges map[NodeID][]Edge + inEdges map[NodeID][]Edge + + // Lookup maps + ptrToNode map[*oas3.JSONSchemaReferenceable]NodeID + nameToNode map[string]NodeID + + // Operation-schema relationships + opSchemas map[NodeID]map[NodeID]bool // operation -> set of schema NodeIDs + schemaOps map[NodeID]map[NodeID]bool // schema -> set of operation NodeIDs +} + +// Build constructs a SchemaGraph from an openapi.Index. +func Build(ctx context.Context, idx *openapi.Index) *SchemaGraph { + g := &SchemaGraph{ + outEdges: make(map[NodeID][]Edge), + inEdges: make(map[NodeID][]Edge), + ptrToNode: make(map[*oas3.JSONSchemaReferenceable]NodeID), + nameToNode: make(map[string]NodeID), + opSchemas: make(map[NodeID]map[NodeID]bool), + schemaOps: make(map[NodeID]map[NodeID]bool), + } + + // Phase 1: Register nodes + g.registerNodes(idx) + + // Phase 2: Build edges + g.buildEdges() + + // Phase 3: Operation edges + g.buildOperationEdges(idx) + + // Phase 4: Compute metrics + g.computeMetrics() + + return g +} + +// OutEdges returns the outgoing edges from the given node. +func (g *SchemaGraph) OutEdges(id NodeID) []Edge { + return g.outEdges[id] +} + +// InEdges returns the incoming edges to the given node. +func (g *SchemaGraph) InEdges(id NodeID) []Edge { + return g.inEdges[id] +} + +// SchemaByName returns the schema node with the given component name, if any. +func (g *SchemaGraph) SchemaByName(name string) (SchemaNode, bool) { + if id, ok := g.nameToNode[name]; ok && int(id) < len(g.Schemas) { + return g.Schemas[id], true + } + return SchemaNode{}, false +} + +// OperationSchemas returns the schema NodeIDs reachable from the given operation. +func (g *SchemaGraph) OperationSchemas(opID NodeID) []NodeID { + set := g.opSchemas[opID] + ids := make([]NodeID, 0, len(set)) + for id := range set { + ids = append(ids, id) + } + return ids +} + +// SchemaOperations returns the operation NodeIDs that reference the given schema. +func (g *SchemaGraph) SchemaOperations(schemaID NodeID) []NodeID { + set := g.schemaOps[schemaID] + ids := make([]NodeID, 0, len(set)) + for id := range set { + ids = append(ids, id) + } + return ids +} + +// Phase 1: Register all schema nodes from the index. +func (g *SchemaGraph) registerNodes(idx *openapi.Index) { + addSchema := func(node *openapi.IndexNode[*oas3.JSONSchemaReferenceable], isComponent, isInline, isExternal, isBoolean bool) { + if node == nil || node.Node == nil { + return + } + // Avoid duplicates + if _, exists := g.ptrToNode[node.Node]; exists { + return + } + + id := NodeID(len(g.Schemas)) + jp := string(node.Location.ToJSONPointer()) + + name := jp + if isComponent { + // Extract component name from the JSON pointer: /components/schemas/Name + parts := strings.Split(jp, "/") + if len(parts) >= 4 { + name = parts[len(parts)-1] + } + } + + hasRef := false + schemaType := "" + if schema := node.Node.GetSchema(); schema != nil { + hasRef = schema.Ref != nil + types := schema.GetType() + if len(types) > 0 { + schemaType = string(types[0]) + } + } + + sn := SchemaNode{ + ID: id, + Name: name, + Path: jp, + Schema: node.Node, + Location: node.Location, + IsComponent: isComponent, + IsInline: isInline, + IsExternal: isExternal, + IsBoolean: isBoolean, + HasRef: hasRef, + Type: schemaType, + } + + g.Schemas = append(g.Schemas, sn) + g.ptrToNode[node.Node] = id + if isComponent { + g.nameToNode[name] = id + } + } + + for _, n := range idx.ComponentSchemas { + addSchema(n, true, false, false, false) + } + for _, n := range idx.InlineSchemas { + addSchema(n, false, true, false, false) + } + for _, n := range idx.ExternalSchemas { + addSchema(n, false, false, true, false) + } + for _, n := range idx.BooleanSchemas { + addSchema(n, false, false, false, true) + } + + // Also register schema references (nodes that are $refs to other schemas) + for _, n := range idx.SchemaReferences { + addSchema(n, false, false, false, false) + } +} + +// Phase 2: Build edges by inspecting child-bearing fields of each schema. +func (g *SchemaGraph) buildEdges() { + for i := range g.Schemas { + sn := &g.Schemas[i] + schema := sn.Schema.GetSchema() + if schema == nil { + continue + } + + // If this is a $ref node, add an edge to the resolved target + if schema.Ref != nil { + if targetID, ok := g.resolveRef(string(*schema.Ref)); ok { + g.addEdge(sn.ID, targetID, EdgeRef, string(*schema.Ref)) + } + } + + // Properties + if schema.Properties != nil { + for key, child := range schema.Properties.All() { + if childID, ok := g.resolveChild(child); ok { + g.addEdge(sn.ID, childID, EdgeProperty, key) + } + } + } + + // Items + if schema.Items != nil { + if childID, ok := g.resolveChild(schema.Items); ok { + g.addEdge(sn.ID, childID, EdgeItems, "items") + } + } + + // AllOf + for i, child := range schema.AllOf { + if childID, ok := g.resolveChild(child); ok { + g.addEdge(sn.ID, childID, EdgeAllOf, "allOf/"+intStr(i)) + } + } + + // OneOf + for i, child := range schema.OneOf { + if childID, ok := g.resolveChild(child); ok { + g.addEdge(sn.ID, childID, EdgeOneOf, "oneOf/"+intStr(i)) + } + } + + // AnyOf + for i, child := range schema.AnyOf { + if childID, ok := g.resolveChild(child); ok { + g.addEdge(sn.ID, childID, EdgeAnyOf, "anyOf/"+intStr(i)) + } + } + + // AdditionalProperties + if schema.AdditionalProperties != nil { + if childID, ok := g.resolveChild(schema.AdditionalProperties); ok { + g.addEdge(sn.ID, childID, EdgeAdditionalProps, "additionalProperties") + } + } + + // Not + if schema.Not != nil { + if childID, ok := g.resolveChild(schema.Not); ok { + g.addEdge(sn.ID, childID, EdgeNot, "not") + } + } + + // If / Then / Else + if schema.If != nil { + if childID, ok := g.resolveChild(schema.If); ok { + g.addEdge(sn.ID, childID, EdgeIf, "if") + } + } + if schema.Then != nil { + if childID, ok := g.resolveChild(schema.Then); ok { + g.addEdge(sn.ID, childID, EdgeThen, "then") + } + } + if schema.Else != nil { + if childID, ok := g.resolveChild(schema.Else); ok { + g.addEdge(sn.ID, childID, EdgeElse, "else") + } + } + + // Contains + if schema.Contains != nil { + if childID, ok := g.resolveChild(schema.Contains); ok { + g.addEdge(sn.ID, childID, EdgeContains, "contains") + } + } + + // PrefixItems + for i, child := range schema.PrefixItems { + if childID, ok := g.resolveChild(child); ok { + g.addEdge(sn.ID, childID, EdgePrefixItems, "prefixItems/"+intStr(i)) + } + } + + // DependentSchemas + if schema.DependentSchemas != nil { + for key, child := range schema.DependentSchemas.All() { + if childID, ok := g.resolveChild(child); ok { + g.addEdge(sn.ID, childID, EdgeDependentSchema, key) + } + } + } + + // PatternProperties + if schema.PatternProperties != nil { + for key, child := range schema.PatternProperties.All() { + if childID, ok := g.resolveChild(child); ok { + g.addEdge(sn.ID, childID, EdgePatternProperty, key) + } + } + } + + // PropertyNames + if schema.PropertyNames != nil { + if childID, ok := g.resolveChild(schema.PropertyNames); ok { + g.addEdge(sn.ID, childID, EdgePropertyNames, "propertyNames") + } + } + + // UnevaluatedItems + if schema.UnevaluatedItems != nil { + if childID, ok := g.resolveChild(schema.UnevaluatedItems); ok { + g.addEdge(sn.ID, childID, EdgeUnevaluatedItems, "unevaluatedItems") + } + } + + // UnevaluatedProperties + if schema.UnevaluatedProperties != nil { + if childID, ok := g.resolveChild(schema.UnevaluatedProperties); ok { + g.addEdge(sn.ID, childID, EdgeUnevaluatedProps, "unevaluatedProperties") + } + } + } +} + +// resolveChild finds the node ID for a child schema pointer. +// If the pointer is directly registered, returns it. +// If not, checks if it's a $ref and resolves via the component name lookup. +func (g *SchemaGraph) resolveChild(child *oas3.JSONSchemaReferenceable) (NodeID, bool) { + if child == nil { + return 0, false + } + // Direct pointer match + if id, ok := g.ptrToNode[child]; ok { + return id, true + } + // Try to resolve via $ref + if s := child.GetSchema(); s != nil && s.Ref != nil { + return g.resolveRef(string(*s.Ref)) + } + return 0, false +} + +// resolveRef resolves a $ref string (e.g., "#/components/schemas/Owner") to a node ID. +func (g *SchemaGraph) resolveRef(ref string) (NodeID, bool) { + const prefix = "#/components/schemas/" + if strings.HasPrefix(ref, prefix) { + name := ref[len(prefix):] + if id, ok := g.nameToNode[name]; ok { + return id, true + } + } + return 0, false +} + +func (g *SchemaGraph) addEdge(from, to NodeID, kind EdgeKind, label string) { + e := Edge{From: from, To: to, Kind: kind, Label: label} + g.outEdges[from] = append(g.outEdges[from], e) + g.inEdges[to] = append(g.inEdges[to], e) +} + +// Phase 3: Build operation nodes and operation-schema relationships. +func (g *SchemaGraph) buildOperationEdges(idx *openapi.Index) { + for _, opNode := range idx.Operations { + if opNode == nil || opNode.Node == nil { + continue + } + + method, path := openapi.ExtractMethodAndPath(opNode.Location) + opID := opNode.Node.GetOperationID() + + name := opID + if name == "" { + name = strings.ToUpper(method) + " " + path + } + + opNodeID := NodeID(len(g.Operations)) + on := OperationNode{ + ID: opNodeID, + Name: name, + Method: method, + Path: path, + OperationID: opID, + Operation: opNode.Node, + Location: opNode.Location, + } + + // Find schemas reachable from this operation by walking its structure + directSchemas := g.findOperationSchemas(opNode.Node) + + // Build transitive closure from direct schemas + reachable := make(map[NodeID]bool) + for _, sid := range directSchemas { + g.reachableBFS(sid, reachable) + } + + g.opSchemas[opNodeID] = reachable + + componentCount := 0 + for sid := range reachable { + if int(sid) < len(g.Schemas) && g.Schemas[sid].IsComponent { + componentCount++ + } + // Build reverse mapping + if g.schemaOps[sid] == nil { + g.schemaOps[sid] = make(map[NodeID]bool) + } + g.schemaOps[sid][opNodeID] = true + } + + on.SchemaCount = len(reachable) + on.ComponentCount = componentCount + + g.Operations = append(g.Operations, on) + } +} + +// findOperationSchemas finds schema NodeIDs directly referenced by an operation's +// parameters, request body, and responses. +func (g *SchemaGraph) findOperationSchemas(op *openapi.Operation) []NodeID { + var result []NodeID + seen := make(map[NodeID]bool) + + addIfKnown := func(js *oas3.JSONSchemaReferenceable) { + if js == nil { + return + } + if id, ok := g.ptrToNode[js]; ok && !seen[id] { + seen[id] = true + result = append(result, id) + } + } + + // Walk parameters + for _, param := range op.Parameters { + if param == nil { + continue + } + p := param.GetObject() + if p == nil { + continue + } + if p.Schema != nil { + addIfKnown(p.Schema) + } + } + + // Walk request body + if op.RequestBody != nil { + rb := op.RequestBody.GetObject() + if rb != nil && rb.Content != nil { + for _, mt := range rb.Content.All() { + if mt != nil && mt.Schema != nil { + addIfKnown(mt.Schema) + } + } + } + } + + // Walk responses + for _, resp := range op.Responses.All() { + if resp == nil { + continue + } + r := resp.GetObject() + if r == nil || r.Content == nil { + continue + } + for _, mt := range r.Content.All() { + if mt != nil && mt.Schema != nil { + addIfKnown(mt.Schema) + } + } + } + // Also check default response + if op.Responses.Default != nil { + r := op.Responses.Default.GetObject() + if r != nil && r.Content != nil { + for _, mt := range r.Content.All() { + if mt != nil && mt.Schema != nil { + addIfKnown(mt.Schema) + } + } + } + } + + return result +} + +// reachableBFS performs BFS from a schema node and adds all reachable nodes to the set. +func (g *SchemaGraph) reachableBFS(start NodeID, visited map[NodeID]bool) { + if visited[start] { + return + } + queue := []NodeID{start} + visited[start] = true + + for len(queue) > 0 { + current := queue[0] + queue = queue[1:] + + for _, edge := range g.outEdges[current] { + if !visited[edge.To] { + visited[edge.To] = true + queue = append(queue, edge.To) + } + } + } +} + +// Phase 4: Compute metrics for each schema node. +func (g *SchemaGraph) computeMetrics() { + // Detect circular nodes + circularNodes := make(map[NodeID]bool) + for i := range g.Schemas { + visited := make(map[NodeID]bool) + inStack := make(map[NodeID]bool) + if g.detectCycle(NodeID(i), visited, inStack, circularNodes) { + circularNodes[NodeID(i)] = true + } + } + + for i := range g.Schemas { + sn := &g.Schemas[i] + id := NodeID(i) + + sn.OutDegree = len(g.outEdges[id]) + sn.InDegree = len(g.inEdges[id]) + sn.IsCircular = circularNodes[id] + + schema := sn.Schema.GetSchema() + if schema != nil { + sn.UnionWidth = len(schema.AllOf) + len(schema.OneOf) + len(schema.AnyOf) + if schema.Properties != nil { + sn.PropertyCount = schema.Properties.Len() + } + sn.Hash = hashing.Hash(schema) + } + + // Compute depth via DFS with cycle detection + depthVisited := make(map[NodeID]bool) + sn.Depth = g.computeDepth(id, depthVisited) + } +} + +func (g *SchemaGraph) computeDepth(id NodeID, visited map[NodeID]bool) int { + if visited[id] { + return 0 // cycle + } + visited[id] = true + + maxChild := 0 + for _, edge := range g.outEdges[id] { + d := g.computeDepth(edge.To, visited) + if d+1 > maxChild { + maxChild = d + 1 + } + } + visited[id] = false + return maxChild +} + +func (g *SchemaGraph) detectCycle(id NodeID, visited, inStack map[NodeID]bool, circular map[NodeID]bool) bool { + if inStack[id] { + circular[id] = true + return true + } + if visited[id] { + return false + } + visited[id] = true + inStack[id] = true + + found := false + for _, edge := range g.outEdges[id] { + if g.detectCycle(edge.To, visited, inStack, circular) { + circular[id] = true + found = true + } + } + + inStack[id] = false + return found +} + +// Reachable returns all schema NodeIDs transitively reachable from the given node via out-edges. +func (g *SchemaGraph) Reachable(id NodeID) []NodeID { + visited := make(map[NodeID]bool) + g.reachableBFS(id, visited) + delete(visited, id) // exclude self + result := make([]NodeID, 0, len(visited)) + for nid := range visited { + result = append(result, nid) + } + return result +} + +// Ancestors returns all schema NodeIDs that can transitively reach the given node via in-edges. +func (g *SchemaGraph) Ancestors(id NodeID) []NodeID { + visited := make(map[NodeID]bool) + visited[id] = true + queue := []NodeID{id} + + for len(queue) > 0 { + current := queue[0] + queue = queue[1:] + + for _, edge := range g.inEdges[current] { + if !visited[edge.From] { + visited[edge.From] = true + queue = append(queue, edge.From) + } + } + } + + delete(visited, id) // exclude self + result := make([]NodeID, 0, len(visited)) + for nid := range visited { + result = append(result, nid) + } + return result +} + +func intStr(i int) string { + return strconv.Itoa(i) +} diff --git a/graph/graph_test.go b/graph/graph_test.go new file mode 100644 index 0000000..52a06a6 --- /dev/null +++ b/graph/graph_test.go @@ -0,0 +1,178 @@ +package graph_test + +import ( + "context" + "os" + "testing" + + "github.com/speakeasy-api/openapi/graph" + "github.com/speakeasy-api/openapi/openapi" + "github.com/speakeasy-api/openapi/references" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func loadTestGraph(t *testing.T) *graph.SchemaGraph { + t.Helper() + + f, err := os.Open("../oq/testdata/petstore.yaml") + require.NoError(t, err) + defer f.Close() + + ctx := context.Background() + doc, _, err := openapi.Unmarshal(ctx, f, openapi.WithSkipValidation()) + require.NoError(t, err) + require.NotNil(t, doc) + + idx := openapi.BuildIndex(ctx, doc, references.ResolveOptions{ + RootDocument: doc, + TargetDocument: doc, + TargetLocation: "../oq/testdata/petstore.yaml", + }) + + return graph.Build(ctx, idx) +} + +func TestBuild_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + assert.NotEmpty(t, g.Schemas, "should have schema nodes") + assert.NotEmpty(t, g.Operations, "should have operation nodes") +} + +func TestBuild_ComponentSchemas_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + componentNames := make(map[string]bool) + for _, s := range g.Schemas { + if s.IsComponent { + componentNames[s.Name] = true + } + } + + assert.True(t, componentNames["Pet"]) + assert.True(t, componentNames["Owner"]) + assert.True(t, componentNames["Address"]) + assert.True(t, componentNames["Error"]) + assert.True(t, componentNames["Shape"]) + assert.True(t, componentNames["Circle"]) + assert.True(t, componentNames["Square"]) + assert.True(t, componentNames["Unused"]) +} + +func TestBuild_SchemaByName_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + pet, ok := g.SchemaByName("Pet") + assert.True(t, ok) + assert.Equal(t, "Pet", pet.Name) + assert.Equal(t, "object", pet.Type) + assert.True(t, pet.IsComponent) + + _, ok = g.SchemaByName("NonExistent") + assert.False(t, ok) +} + +func TestBuild_Edges_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + pet, _ := g.SchemaByName("Pet") + edges := g.OutEdges(pet.ID) + + // Pet has properties: id, name, tag, owner + assert.Equal(t, 4, len(edges), "Pet should have 4 out-edges") + + edgeLabels := make(map[string]graph.EdgeKind) + for _, e := range edges { + edgeLabels[e.Label] = e.Kind + } + assert.Equal(t, graph.EdgeProperty, edgeLabels["id"]) + assert.Equal(t, graph.EdgeProperty, edgeLabels["name"]) + assert.Equal(t, graph.EdgeProperty, edgeLabels["tag"]) + assert.Equal(t, graph.EdgeProperty, edgeLabels["owner"]) +} + +func TestBuild_Reachable_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + pet, _ := g.SchemaByName("Pet") + reachable := g.Reachable(pet.ID) + assert.NotEmpty(t, reachable, "Pet should have reachable schemas") + + reachableNames := make(map[string]bool) + for _, id := range reachable { + reachableNames[g.Schemas[id].Name] = true + } + + // Pet -> owner -> Owner -> address -> Address + assert.True(t, reachableNames["Owner"], "Owner should be reachable from Pet") + assert.True(t, reachableNames["Address"], "Address should be reachable from Pet") +} + +func TestBuild_Ancestors_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + addr, _ := g.SchemaByName("Address") + ancestors := g.Ancestors(addr.ID) + assert.NotEmpty(t, ancestors, "Address should have ancestors") + + ancestorNames := make(map[string]bool) + for _, id := range ancestors { + ancestorNames[g.Schemas[id].Name] = true + } + + assert.True(t, ancestorNames["Owner"], "Owner should be an ancestor of Address") +} + +func TestBuild_Operations_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + opNames := make(map[string]bool) + for _, op := range g.Operations { + opNames[op.Name] = true + } + + assert.True(t, opNames["listPets"]) + assert.True(t, opNames["createPet"]) + assert.True(t, opNames["showPetById"]) + assert.True(t, opNames["listOwners"]) +} + +func TestBuild_OperationSchemas_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + for _, op := range g.Operations { + if op.OperationID == "listPets" { + schemas := g.OperationSchemas(op.ID) + assert.NotEmpty(t, schemas, "listPets should reference schemas") + assert.Greater(t, op.SchemaCount, 0) + return + } + } + t.Fatal("listPets operation not found") +} + +func TestBuild_Metrics_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + pet, _ := g.SchemaByName("Pet") + assert.Equal(t, 4, pet.PropertyCount, "Pet should have 4 properties") + assert.Equal(t, 4, pet.OutDegree, "Pet should have 4 out-edges") + assert.Greater(t, pet.InDegree, 0, "Pet should be referenced") + assert.NotEmpty(t, pet.Hash, "Pet should have a hash") + + shape, _ := g.SchemaByName("Shape") + assert.Equal(t, 2, shape.UnionWidth, "Shape should have union_width 2 (oneOf)") + + unused, _ := g.SchemaByName("Unused") + assert.Equal(t, 0, unused.InDegree, "Unused should have no incoming edges from other schemas") +} diff --git a/oq/expr/expr.go b/oq/expr/expr.go new file mode 100644 index 0000000..b511823 --- /dev/null +++ b/oq/expr/expr.go @@ -0,0 +1,469 @@ +// Package expr provides a predicate expression parser and evaluator for the oq query language. +package expr + +import ( + "fmt" + "regexp" + "strconv" + "strings" +) + +// Value represents a typed value in the expression system. +type Value struct { + Kind ValueKind + Str string + Int int + Bool bool + isNull bool +} + +type ValueKind int + +const ( + KindString ValueKind = iota + KindInt + KindBool + KindNull +) + +// Row provides field access for predicate evaluation. +type Row interface { + Field(name string) Value +} + +// Expr is the interface for all expression nodes. +type Expr interface { + Eval(row Row) Value +} + +// --- Expression node types --- + +type binaryExpr struct { + op string + left Expr + right Expr +} + +type notExpr struct { + inner Expr +} + +type hasExpr struct { + field string +} + +type matchesExpr struct { + field string + pattern *regexp.Regexp +} + +type fieldExpr struct { + name string +} + +type literalExpr struct { + val Value +} + +func (e *binaryExpr) Eval(row Row) Value { + switch e.op { + case "and": + l := toBool(e.left.Eval(row)) + if !l { + return Value{Kind: KindBool, Bool: false} + } + return Value{Kind: KindBool, Bool: toBool(e.right.Eval(row))} + case "or": + l := toBool(e.left.Eval(row)) + if l { + return Value{Kind: KindBool, Bool: true} + } + return Value{Kind: KindBool, Bool: toBool(e.right.Eval(row))} + case "==": + return Value{Kind: KindBool, Bool: equal(e.left.Eval(row), e.right.Eval(row))} + case "!=": + return Value{Kind: KindBool, Bool: !equal(e.left.Eval(row), e.right.Eval(row))} + case ">": + return Value{Kind: KindBool, Bool: compare(e.left.Eval(row), e.right.Eval(row)) > 0} + case "<": + return Value{Kind: KindBool, Bool: compare(e.left.Eval(row), e.right.Eval(row)) < 0} + case ">=": + return Value{Kind: KindBool, Bool: compare(e.left.Eval(row), e.right.Eval(row)) >= 0} + case "<=": + return Value{Kind: KindBool, Bool: compare(e.left.Eval(row), e.right.Eval(row)) <= 0} + default: + return Value{Kind: KindNull, isNull: true} + } +} + +func (e *notExpr) Eval(row Row) Value { + return Value{Kind: KindBool, Bool: !toBool(e.inner.Eval(row))} +} + +func (e *hasExpr) Eval(row Row) Value { + v := row.Field(e.field) + return Value{Kind: KindBool, Bool: !v.isNull && (v.Kind != KindInt || v.Int > 0) && (v.Kind != KindBool || v.Bool)} +} + +func (e *matchesExpr) Eval(row Row) Value { + v := row.Field(e.field) + return Value{Kind: KindBool, Bool: v.Kind == KindString && e.pattern.MatchString(v.Str)} +} + +func (e *fieldExpr) Eval(row Row) Value { + return row.Field(e.name) +} + +func (e *literalExpr) Eval(_ Row) Value { + return e.val +} + +// --- Helpers --- + +func toBool(v Value) bool { + switch v.Kind { + case KindBool: + return v.Bool + case KindInt: + return v.Int != 0 + case KindString: + return v.Str != "" + default: + return false + } +} + +func equal(a, b Value) bool { + if a.Kind == KindString || b.Kind == KindString { + return toString(a) == toString(b) + } + if a.Kind == KindInt && b.Kind == KindInt { + return a.Int == b.Int + } + if a.Kind == KindBool && b.Kind == KindBool { + return a.Bool == b.Bool + } + return false +} + +func compare(a, b Value) int { + ai := toInt(a) + bi := toInt(b) + if ai < bi { + return -1 + } + if ai > bi { + return 1 + } + return 0 +} + +func toInt(v Value) int { + switch v.Kind { + case KindInt: + return v.Int + case KindBool: + if v.Bool { + return 1 + } + return 0 + case KindString: + n, _ := strconv.Atoi(v.Str) + return n + default: + return 0 + } +} + +func toString(v Value) string { + switch v.Kind { + case KindString: + return v.Str + case KindInt: + return strconv.Itoa(v.Int) + case KindBool: + return strconv.FormatBool(v.Bool) + default: + return "" + } +} + +// StringVal creates a string Value. +func StringVal(s string) Value { + return Value{Kind: KindString, Str: s} +} + +// IntVal creates an int Value. +func IntVal(n int) Value { + return Value{Kind: KindInt, Int: n} +} + +// BoolVal creates a bool Value. +func BoolVal(b bool) Value { + return Value{Kind: KindBool, Bool: b} +} + +// NullVal creates a null Value. +func NullVal() Value { + return Value{Kind: KindNull, isNull: true} +} + +// --- Parser --- + +// Parse parses a predicate expression string into an Expr tree. +func Parse(input string) (Expr, error) { + p := &parser{tokens: tokenize(input)} + expr, err := p.parseOr() + if err != nil { + return nil, err + } + if p.pos < len(p.tokens) { + return nil, fmt.Errorf("unexpected token: %q", p.tokens[p.pos]) + } + return expr, nil +} + +type parser struct { + tokens []string + pos int +} + +func (p *parser) peek() string { + if p.pos >= len(p.tokens) { + return "" + } + return p.tokens[p.pos] +} + +func (p *parser) next() string { + t := p.peek() + p.pos++ + return t +} + +func (p *parser) expect(tok string) error { + if p.next() != tok { + return fmt.Errorf("expected %q, got %q", tok, p.tokens[p.pos-1]) + } + return nil +} + +func (p *parser) parseOr() (Expr, error) { + left, err := p.parseAnd() + if err != nil { + return nil, err + } + for p.peek() == "or" { + p.next() + right, err := p.parseAnd() + if err != nil { + return nil, err + } + left = &binaryExpr{op: "or", left: left, right: right} + } + return left, nil +} + +func (p *parser) parseAnd() (Expr, error) { + left, err := p.parseComparison() + if err != nil { + return nil, err + } + for p.peek() == "and" { + p.next() + right, err := p.parseComparison() + if err != nil { + return nil, err + } + left = &binaryExpr{op: "and", left: left, right: right} + } + return left, nil +} + +func (p *parser) parseComparison() (Expr, error) { + left, err := p.parseUnary() + if err != nil { + return nil, err + } + switch p.peek() { + case "==", "!=", ">", "<", ">=", "<=": + op := p.next() + right, err := p.parseUnary() + if err != nil { + return nil, err + } + return &binaryExpr{op: op, left: left, right: right}, nil + case "matches": + p.next() + patternTok := p.next() + pattern := strings.Trim(patternTok, "\"") + re, compileErr := regexp.Compile(pattern) + if compileErr != nil { + return nil, fmt.Errorf("invalid regex %q: %w", pattern, compileErr) + } + // left must be a field reference + fieldRef, ok := left.(*fieldExpr) + if !ok { + return nil, fmt.Errorf("matches requires a field on the left side") + } + return &matchesExpr{field: fieldRef.name, pattern: re}, nil + } + return left, nil +} + +func (p *parser) parseUnary() (Expr, error) { + if p.peek() == "not" { + p.next() + inner, err := p.parseUnary() + if err != nil { + return nil, err + } + return ¬Expr{inner: inner}, nil + } + return p.parsePrimary() +} + +func (p *parser) parsePrimary() (Expr, error) { + tok := p.peek() + + // Parenthesized expression + if tok == "(" { + p.next() + expr, err := p.parseOr() + if err != nil { + return nil, err + } + if err := p.expect(")"); err != nil { + return nil, err + } + return expr, nil + } + + // Function calls + if tok == "has" { + p.next() + if err := p.expect("("); err != nil { + return nil, err + } + field := p.next() + if err := p.expect(")"); err != nil { + return nil, err + } + return &hasExpr{field: field}, nil + } + + if tok == "matches" { + p.next() + if err := p.expect("("); err != nil { + return nil, err + } + field := p.next() + if err := p.expect(","); err != nil { + return nil, err + } + patternTok := p.next() + pattern := strings.Trim(patternTok, "\"") + re, err := regexp.Compile(pattern) + if err != nil { + return nil, fmt.Errorf("invalid regex %q: %w", pattern, err) + } + if err := p.expect(")"); err != nil { + return nil, err + } + return &matchesExpr{field: field, pattern: re}, nil + } + + // String literal + if strings.HasPrefix(tok, "\"") { + p.next() + return &literalExpr{val: StringVal(strings.Trim(tok, "\""))}, nil + } + + // Boolean literals + if tok == "true" { + p.next() + return &literalExpr{val: BoolVal(true)}, nil + } + if tok == "false" { + p.next() + return &literalExpr{val: BoolVal(false)}, nil + } + + // Integer literal + if n, err := strconv.Atoi(tok); err == nil { + p.next() + return &literalExpr{val: IntVal(n)}, nil + } + + // Field reference + if tok != "" && tok != ")" && tok != "," { + p.next() + return &fieldExpr{name: tok}, nil + } + + return nil, fmt.Errorf("unexpected token: %q", tok) +} + +// tokenize splits an expression into tokens. +func tokenize(input string) []string { + var tokens []string + i := 0 + for i < len(input) { + ch := input[i] + + // Skip whitespace + if ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r' { + i++ + continue + } + + // Two-character operators + if i+1 < len(input) { + two := input[i : i+2] + if two == "==" || two == "!=" || two == ">=" || two == "<=" { + tokens = append(tokens, two) + i += 2 + continue + } + } + + // Single-character tokens + if ch == '(' || ch == ')' || ch == ',' || ch == '>' || ch == '<' { + tokens = append(tokens, string(ch)) + i++ + continue + } + + // Quoted string + if ch == '"' { + j := i + 1 + for j < len(input) && input[j] != '"' { + if input[j] == '\\' { + j++ + } + j++ + } + if j < len(input) { + j++ + } + tokens = append(tokens, input[i:j]) + i = j + continue + } + + // Word (identifier, keyword, or number) + j := i + for j < len(input) && input[j] != ' ' && input[j] != '\t' && input[j] != '\n' && + input[j] != '(' && input[j] != ')' && input[j] != ',' && + input[j] != '>' && input[j] != '<' && input[j] != '=' && input[j] != '!' { + j++ + } + if j > i { + tokens = append(tokens, input[i:j]) + i = j + } else { + i++ + } + } + return tokens +} diff --git a/oq/expr/expr_test.go b/oq/expr/expr_test.go new file mode 100644 index 0000000..7207ebb --- /dev/null +++ b/oq/expr/expr_test.go @@ -0,0 +1,143 @@ +package expr_test + +import ( + "testing" + + "github.com/speakeasy-api/openapi/oq/expr" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +type testRow map[string]expr.Value + +func (r testRow) Field(name string) expr.Value { + if v, ok := r[name]; ok { + return v + } + return expr.NullVal() +} + +func TestParse_Comparison_Success(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + expr string + row testRow + expected bool + }{ + { + name: "integer equality", + expr: `depth == 5`, + row: testRow{"depth": expr.IntVal(5)}, + expected: true, + }, + { + name: "integer inequality", + expr: `depth != 5`, + row: testRow{"depth": expr.IntVal(3)}, + expected: true, + }, + { + name: "greater than", + expr: `depth > 3`, + row: testRow{"depth": expr.IntVal(5)}, + expected: true, + }, + { + name: "less than false", + expr: `depth < 3`, + row: testRow{"depth": expr.IntVal(5)}, + expected: false, + }, + { + name: "string equality", + expr: `type == "object"`, + row: testRow{"type": expr.StringVal("object")}, + expected: true, + }, + { + name: "boolean field", + expr: `is_component`, + row: testRow{"is_component": expr.BoolVal(true)}, + expected: true, + }, + { + name: "and operator", + expr: `depth > 3 and is_component`, + row: testRow{"depth": expr.IntVal(5), "is_component": expr.BoolVal(true)}, + expected: true, + }, + { + name: "or operator", + expr: `depth > 10 or is_component`, + row: testRow{"depth": expr.IntVal(2), "is_component": expr.BoolVal(true)}, + expected: true, + }, + { + name: "not operator", + expr: `not is_inline`, + row: testRow{"is_inline": expr.BoolVal(false)}, + expected: true, + }, + { + name: "has function", + expr: `has(oneOf)`, + row: testRow{"oneOf": expr.IntVal(2)}, + expected: true, + }, + { + name: "has function false", + expr: `has(oneOf)`, + row: testRow{"oneOf": expr.IntVal(0)}, + expected: false, + }, + { + name: "matches operator", + expr: `name matches "Error.*"`, + row: testRow{"name": expr.StringVal("ErrorResponse")}, + expected: true, + }, + { + name: "matches operator no match", + expr: `name matches "Error.*"`, + row: testRow{"name": expr.StringVal("Pet")}, + expected: false, + }, + { + name: "complex expression", + expr: `property_count > 0 and in_degree == 0`, + row: testRow{"property_count": expr.IntVal(3), "in_degree": expr.IntVal(0)}, + expected: true, + }, + { + name: "parenthesized expression", + expr: `(depth > 3 or depth < 1) and is_component`, + row: testRow{"depth": expr.IntVal(5), "is_component": expr.BoolVal(true)}, + expected: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + + parsed, err := expr.Parse(tt.expr) + require.NoError(t, err) + + result := parsed.Eval(tt.row) + assert.Equal(t, expr.KindBool, result.Kind) + assert.Equal(t, tt.expected, result.Bool) + }) + } +} + +func TestParse_Error(t *testing.T) { + t.Parallel() + + _, err := expr.Parse("") + assert.Error(t, err) + + _, err = expr.Parse("name matches \"[invalid\"") + assert.Error(t, err) +} diff --git a/oq/oq.go b/oq/oq.go new file mode 100644 index 0000000..67824f4 --- /dev/null +++ b/oq/oq.go @@ -0,0 +1,893 @@ +// Package oq implements a pipeline query language for OpenAPI schema graphs. +// +// Queries are written as pipeline expressions like: +// +// schemas.components | where depth > 5 | sort depth desc | take 10 | select name, depth +package oq + +import ( + "fmt" + "slices" + "sort" + "strconv" + "strings" + + "github.com/speakeasy-api/openapi/graph" + "github.com/speakeasy-api/openapi/oq/expr" +) + +// ResultKind distinguishes between schema and operation result rows. +type ResultKind int + +const ( + SchemaResult ResultKind = iota + OperationResult +) + +// Row represents a single result in the pipeline. +type Row struct { + Kind ResultKind + SchemaIdx int // index into SchemaGraph.Schemas + OpIdx int // index into SchemaGraph.Operations +} + +// Result is the output of a query execution. +type Result struct { + Rows []Row + Fields []string // projected fields (empty = all) + IsCount bool + Count int + Groups []GroupResult +} + +// GroupResult represents a group-by aggregation result. +type GroupResult struct { + Key string + Count int + Names []string +} + +// Execute parses and executes a query against the given graph. +func Execute(query string, g *graph.SchemaGraph) (*Result, error) { + stages, err := Parse(query) + if err != nil { + return nil, fmt.Errorf("parse error: %w", err) + } + return run(stages, g) +} + +// --- AST --- + +// StageKind represents the type of pipeline stage. +type StageKind int + +const ( + StageSource StageKind = iota + StageWhere + StageSelect + StageSort + StageTake + StageUnique + StageGroupBy + StageCount + StageRefsOut + StageRefsIn + StageReachable + StageAncestors + StageProperties + StageUnionMembers + StageItems + StageOps + StageSchemas +) + +// Stage represents a single stage in the query pipeline. +type Stage struct { + Kind StageKind + Source string // for StageSource + Expr string // for StageWhere + Fields []string // for StageSelect, StageGroupBy + SortField string // for StageSort + SortDesc bool // for StageSort + Limit int // for StageTake +} + +// Parse splits a pipeline query string into stages. +func Parse(query string) ([]Stage, error) { + // Split by pipe, respecting quoted strings + parts := splitPipeline(query) + if len(parts) == 0 { + return nil, fmt.Errorf("empty query") + } + + var stages []Stage + + for i, part := range parts { + part = strings.TrimSpace(part) + if part == "" { + continue + } + + if i == 0 { + // First part is a source + stages = append(stages, Stage{Kind: StageSource, Source: part}) + continue + } + + stage, err := parseStage(part) + if err != nil { + return nil, err + } + stages = append(stages, stage) + } + + return stages, nil +} + +func parseStage(s string) (Stage, error) { + // Extract the keyword + keyword, rest := splitFirst(s) + keyword = strings.ToLower(keyword) + + switch keyword { + case "where": + if rest == "" { + return Stage{}, fmt.Errorf("where requires an expression") + } + return Stage{Kind: StageWhere, Expr: rest}, nil + + case "select": + if rest == "" { + return Stage{}, fmt.Errorf("select requires field names") + } + fields := parseCSV(rest) + return Stage{Kind: StageSelect, Fields: fields}, nil + + case "sort": + parts := strings.Fields(rest) + if len(parts) == 0 { + return Stage{}, fmt.Errorf("sort requires a field name") + } + desc := false + if len(parts) >= 2 && strings.ToLower(parts[1]) == "desc" { + desc = true + } + return Stage{Kind: StageSort, SortField: parts[0], SortDesc: desc}, nil + + case "take": + n, err := strconv.Atoi(strings.TrimSpace(rest)) + if err != nil { + return Stage{}, fmt.Errorf("take requires a number: %w", err) + } + return Stage{Kind: StageTake, Limit: n}, nil + + case "unique": + return Stage{Kind: StageUnique}, nil + + case "group-by": + if rest == "" { + return Stage{}, fmt.Errorf("group-by requires a field name") + } + fields := parseCSV(rest) + return Stage{Kind: StageGroupBy, Fields: fields}, nil + + case "count": + return Stage{Kind: StageCount}, nil + + case "refs-out": + return Stage{Kind: StageRefsOut}, nil + + case "refs-in": + return Stage{Kind: StageRefsIn}, nil + + case "reachable": + return Stage{Kind: StageReachable}, nil + + case "ancestors": + return Stage{Kind: StageAncestors}, nil + + case "properties": + return Stage{Kind: StageProperties}, nil + + case "union-members": + return Stage{Kind: StageUnionMembers}, nil + + case "items": + return Stage{Kind: StageItems}, nil + + case "ops": + return Stage{Kind: StageOps}, nil + + case "schemas": + return Stage{Kind: StageSchemas}, nil + + default: + return Stage{}, fmt.Errorf("unknown stage: %q", keyword) + } +} + +// --- Executor --- + +func run(stages []Stage, g *graph.SchemaGraph) (*Result, error) { + if len(stages) == 0 { + return &Result{}, nil + } + + // Execute source stage + result, err := execSource(stages[0], g) + if err != nil { + return nil, err + } + + // Execute remaining stages + for _, stage := range stages[1:] { + result, err = execStage(stage, result, g) + if err != nil { + return nil, err + } + } + + return result, nil +} + +func execSource(stage Stage, g *graph.SchemaGraph) (*Result, error) { + result := &Result{} + switch stage.Source { + case "schemas": + for i := range g.Schemas { + result.Rows = append(result.Rows, Row{Kind: SchemaResult, SchemaIdx: i}) + } + case "schemas.components": + for i, s := range g.Schemas { + if s.IsComponent { + result.Rows = append(result.Rows, Row{Kind: SchemaResult, SchemaIdx: i}) + } + } + case "schemas.inline": + for i, s := range g.Schemas { + if s.IsInline { + result.Rows = append(result.Rows, Row{Kind: SchemaResult, SchemaIdx: i}) + } + } + case "operations": + for i := range g.Operations { + result.Rows = append(result.Rows, Row{Kind: OperationResult, OpIdx: i}) + } + default: + return nil, fmt.Errorf("unknown source: %q", stage.Source) + } + return result, nil +} + +func execStage(stage Stage, result *Result, g *graph.SchemaGraph) (*Result, error) { + switch stage.Kind { + case StageWhere: + return execWhere(stage, result, g) + case StageSelect: + result.Fields = stage.Fields + return result, nil + case StageSort: + return execSort(stage, result, g) + case StageTake: + return execTake(stage, result) + case StageUnique: + return execUnique(result) + case StageGroupBy: + return execGroupBy(stage, result, g) + case StageCount: + return &Result{IsCount: true, Count: len(result.Rows)}, nil + case StageRefsOut: + return execTraversal(result, g, traverseRefsOut) + case StageRefsIn: + return execTraversal(result, g, traverseRefsIn) + case StageReachable: + return execTraversal(result, g, traverseReachable) + case StageAncestors: + return execTraversal(result, g, traverseAncestors) + case StageProperties: + return execTraversal(result, g, traverseProperties) + case StageUnionMembers: + return execTraversal(result, g, traverseUnionMembers) + case StageItems: + return execTraversal(result, g, traverseItems) + case StageOps: + return execSchemasToOps(result, g) + case StageSchemas: + return execOpsToSchemas(result, g) + default: + return nil, fmt.Errorf("unimplemented stage kind: %d", stage.Kind) + } +} + +func execWhere(stage Stage, result *Result, g *graph.SchemaGraph) (*Result, error) { + predicate, err := expr.Parse(stage.Expr) + if err != nil { + return nil, fmt.Errorf("where expression error: %w", err) + } + + filtered := &Result{Fields: result.Fields} + for _, row := range result.Rows { + r := rowAdapter{row: row, g: g} + val := predicate.Eval(r) + if val.Kind == expr.KindBool && val.Bool { + filtered.Rows = append(filtered.Rows, row) + } + } + return filtered, nil +} + +func execSort(stage Stage, result *Result, g *graph.SchemaGraph) (*Result, error) { + sort.SliceStable(result.Rows, func(i, j int) bool { + vi := fieldValue(result.Rows[i], stage.SortField, g) + vj := fieldValue(result.Rows[j], stage.SortField, g) + + cmp := compareValues(vi, vj) + if stage.SortDesc { + return cmp > 0 + } + return cmp < 0 + }) + return result, nil +} + +func execTake(stage Stage, result *Result) (*Result, error) { + if stage.Limit < len(result.Rows) { + result.Rows = result.Rows[:stage.Limit] + } + return result, nil +} + +func execUnique(result *Result) (*Result, error) { + seen := make(map[string]bool) + filtered := &Result{Fields: result.Fields} + for _, row := range result.Rows { + key := rowKey(row) + if !seen[key] { + seen[key] = true + filtered.Rows = append(filtered.Rows, row) + } + } + return filtered, nil +} + +func execGroupBy(stage Stage, result *Result, g *graph.SchemaGraph) (*Result, error) { + if len(stage.Fields) == 0 { + return nil, fmt.Errorf("group-by requires at least one field") + } + field := stage.Fields[0] + + type group struct { + count int + names []string + } + groups := make(map[string]*group) + var order []string + + for _, row := range result.Rows { + v := fieldValue(row, field, g) + key := valueToString(v) + grp, exists := groups[key] + if !exists { + grp = &group{} + groups[key] = grp + order = append(order, key) + } + grp.count++ + nameV := fieldValue(row, "name", g) + grp.names = append(grp.names, valueToString(nameV)) + } + + grouped := &Result{Fields: result.Fields} + for _, key := range order { + grp := groups[key] + grouped.Groups = append(grouped.Groups, GroupResult{ + Key: key, + Count: grp.count, + Names: grp.names, + }) + } + return grouped, nil +} + +// --- Traversal --- + +type traversalFunc func(row Row, g *graph.SchemaGraph) []Row + +func execTraversal(result *Result, g *graph.SchemaGraph, fn traversalFunc) (*Result, error) { + out := &Result{Fields: result.Fields} + seen := make(map[string]bool) + for _, row := range result.Rows { + for _, newRow := range fn(row, g) { + key := rowKey(newRow) + if !seen[key] { + seen[key] = true + out.Rows = append(out.Rows, newRow) + } + } + } + return out, nil +} + +func traverseRefsOut(row Row, g *graph.SchemaGraph) []Row { + if row.Kind != SchemaResult { + return nil + } + var result []Row + for _, edge := range g.OutEdges(graph.NodeID(row.SchemaIdx)) { + result = append(result, Row{Kind: SchemaResult, SchemaIdx: int(edge.To)}) + } + return result +} + +func traverseRefsIn(row Row, g *graph.SchemaGraph) []Row { + if row.Kind != SchemaResult { + return nil + } + var result []Row + for _, edge := range g.InEdges(graph.NodeID(row.SchemaIdx)) { + result = append(result, Row{Kind: SchemaResult, SchemaIdx: int(edge.From)}) + } + return result +} + +func traverseReachable(row Row, g *graph.SchemaGraph) []Row { + if row.Kind != SchemaResult { + return nil + } + ids := g.Reachable(graph.NodeID(row.SchemaIdx)) + result := make([]Row, len(ids)) + for i, id := range ids { + result[i] = Row{Kind: SchemaResult, SchemaIdx: int(id)} + } + return result +} + +func traverseAncestors(row Row, g *graph.SchemaGraph) []Row { + if row.Kind != SchemaResult { + return nil + } + ids := g.Ancestors(graph.NodeID(row.SchemaIdx)) + result := make([]Row, len(ids)) + for i, id := range ids { + result[i] = Row{Kind: SchemaResult, SchemaIdx: int(id)} + } + return result +} + +func traverseProperties(row Row, g *graph.SchemaGraph) []Row { + if row.Kind != SchemaResult { + return nil + } + var result []Row + for _, edge := range g.OutEdges(graph.NodeID(row.SchemaIdx)) { + if edge.Kind == graph.EdgeProperty { + result = append(result, Row{Kind: SchemaResult, SchemaIdx: int(edge.To)}) + } + } + return result +} + +func traverseUnionMembers(row Row, g *graph.SchemaGraph) []Row { + if row.Kind != SchemaResult { + return nil + } + var result []Row + for _, edge := range g.OutEdges(graph.NodeID(row.SchemaIdx)) { + if edge.Kind == graph.EdgeAllOf || edge.Kind == graph.EdgeOneOf || edge.Kind == graph.EdgeAnyOf { + // Follow through $ref nodes transparently + target := resolveRefTarget(int(edge.To), g) + result = append(result, Row{Kind: SchemaResult, SchemaIdx: target}) + } + } + return result +} + +func traverseItems(row Row, g *graph.SchemaGraph) []Row { + if row.Kind != SchemaResult { + return nil + } + var result []Row + for _, edge := range g.OutEdges(graph.NodeID(row.SchemaIdx)) { + if edge.Kind == graph.EdgeItems { + result = append(result, Row{Kind: SchemaResult, SchemaIdx: int(edge.To)}) + } + } + return result +} + +// resolveRefTarget follows EdgeRef edges to get the actual target node. +// If the node at idx is a $ref wrapper, returns the target component's index. +// Otherwise returns idx unchanged. +func resolveRefTarget(idx int, g *graph.SchemaGraph) int { + if idx < 0 || idx >= len(g.Schemas) { + return idx + } + node := &g.Schemas[idx] + if !node.HasRef { + return idx + } + // Follow EdgeRef edges + for _, edge := range g.OutEdges(graph.NodeID(idx)) { + if edge.Kind == graph.EdgeRef { + return int(edge.To) + } + } + return idx +} + +func execSchemasToOps(result *Result, g *graph.SchemaGraph) (*Result, error) { + out := &Result{Fields: result.Fields} + seen := make(map[int]bool) + for _, row := range result.Rows { + if row.Kind != SchemaResult { + continue + } + opIDs := g.SchemaOperations(graph.NodeID(row.SchemaIdx)) + for _, opID := range opIDs { + idx := int(opID) + if !seen[idx] { + seen[idx] = true + out.Rows = append(out.Rows, Row{Kind: OperationResult, OpIdx: idx}) + } + } + } + return out, nil +} + +func execOpsToSchemas(result *Result, g *graph.SchemaGraph) (*Result, error) { + out := &Result{Fields: result.Fields} + seen := make(map[int]bool) + for _, row := range result.Rows { + if row.Kind != OperationResult { + continue + } + schemaIDs := g.OperationSchemas(graph.NodeID(row.OpIdx)) + for _, sid := range schemaIDs { + idx := int(sid) + if !seen[idx] { + seen[idx] = true + out.Rows = append(out.Rows, Row{Kind: SchemaResult, SchemaIdx: idx}) + } + } + } + return out, nil +} + +// --- Field access --- + +type rowAdapter struct { + row Row + g *graph.SchemaGraph +} + +func (r rowAdapter) Field(name string) expr.Value { + return fieldValue(r.row, name, r.g) +} + +// FieldValuePublic returns the value of a named field for the given row. +// Exported for testing and external consumers. +func FieldValuePublic(row Row, name string, g *graph.SchemaGraph) expr.Value { + return fieldValue(row, name, g) +} + +func fieldValue(row Row, name string, g *graph.SchemaGraph) expr.Value { + switch row.Kind { + case SchemaResult: + if row.SchemaIdx < 0 || row.SchemaIdx >= len(g.Schemas) { + return expr.NullVal() + } + s := &g.Schemas[row.SchemaIdx] + switch name { + case "name": + return expr.StringVal(s.Name) + case "type": + return expr.StringVal(s.Type) + case "depth": + return expr.IntVal(s.Depth) + case "in_degree": + return expr.IntVal(s.InDegree) + case "out_degree": + return expr.IntVal(s.OutDegree) + case "union_width": + return expr.IntVal(s.UnionWidth) + case "property_count": + return expr.IntVal(s.PropertyCount) + case "is_component": + return expr.BoolVal(s.IsComponent) + case "is_inline": + return expr.BoolVal(s.IsInline) + case "is_circular": + return expr.BoolVal(s.IsCircular) + case "has_ref": + return expr.BoolVal(s.HasRef) + case "hash": + return expr.StringVal(s.Hash) + case "path": + return expr.StringVal(s.Path) + } + case OperationResult: + if row.OpIdx < 0 || row.OpIdx >= len(g.Operations) { + return expr.NullVal() + } + o := &g.Operations[row.OpIdx] + switch name { + case "name": + return expr.StringVal(o.Name) + case "method": + return expr.StringVal(o.Method) + case "path": + return expr.StringVal(o.Path) + case "operation_id": + return expr.StringVal(o.OperationID) + case "schema_count": + return expr.IntVal(o.SchemaCount) + case "component_count": + return expr.IntVal(o.ComponentCount) + } + } + return expr.NullVal() +} + +func compareValues(a, b expr.Value) int { + if a.Kind == expr.KindInt && b.Kind == expr.KindInt { + if a.Int < b.Int { + return -1 + } + if a.Int > b.Int { + return 1 + } + return 0 + } + sa := valueToString(a) + sb := valueToString(b) + if sa < sb { + return -1 + } + if sa > sb { + return 1 + } + return 0 +} + +func valueToString(v expr.Value) string { + switch v.Kind { + case expr.KindString: + return v.Str + case expr.KindInt: + return strconv.Itoa(v.Int) + case expr.KindBool: + return strconv.FormatBool(v.Bool) + default: + return "" + } +} + +func rowKey(row Row) string { + if row.Kind == SchemaResult { + return "s:" + strconv.Itoa(row.SchemaIdx) + } + return "o:" + strconv.Itoa(row.OpIdx) +} + +// --- Formatting --- + +// FormatTable formats a result as a simple table string. +func FormatTable(result *Result, g *graph.SchemaGraph) string { + if result.IsCount { + return strconv.Itoa(result.Count) + } + + if len(result.Groups) > 0 { + return formatGroups(result) + } + + if len(result.Rows) == 0 { + return "(empty)" + } + + fields := result.Fields + if len(fields) == 0 { + if result.Rows[0].Kind == SchemaResult { + fields = []string{"name", "type", "depth", "in_degree", "out_degree"} + } else { + fields = []string{"name", "method", "path", "schema_count"} + } + } + + // Build header + widths := make([]int, len(fields)) + for i, f := range fields { + widths[i] = len(f) + } + + // Collect rows + var tableRows [][]string + for _, row := range result.Rows { + var cols []string + for i, f := range fields { + v := valueToString(fieldValue(row, f, g)) + cols = append(cols, v) + if len(v) > widths[i] { + widths[i] = len(v) + } + } + tableRows = append(tableRows, cols) + } + + // Format + var sb strings.Builder + // Header + for i, f := range fields { + if i > 0 { + sb.WriteString(" ") + } + sb.WriteString(padRight(f, widths[i])) + } + sb.WriteString("\n") + // Separator + for i, w := range widths { + if i > 0 { + sb.WriteString(" ") + } + sb.WriteString(strings.Repeat("-", w)) + } + sb.WriteString("\n") + // Data + for _, row := range tableRows { + for i, col := range row { + if i > 0 { + sb.WriteString(" ") + } + sb.WriteString(padRight(col, widths[i])) + } + sb.WriteString("\n") + } + + return sb.String() +} + +// FormatJSON formats a result as JSON. +func FormatJSON(result *Result, g *graph.SchemaGraph) string { + if result.IsCount { + return strconv.Itoa(result.Count) + } + + if len(result.Groups) > 0 { + return formatGroupsJSON(result) + } + + if len(result.Rows) == 0 { + return "[]" + } + + fields := result.Fields + if len(fields) == 0 { + if result.Rows[0].Kind == SchemaResult { + fields = []string{"name", "type", "depth", "in_degree", "out_degree"} + } else { + fields = []string{"name", "method", "path", "schema_count"} + } + } + + var sb strings.Builder + sb.WriteString("[\n") + for i, row := range result.Rows { + if i > 0 { + sb.WriteString(",\n") + } + sb.WriteString(" {") + for j, f := range fields { + if j > 0 { + sb.WriteString(", ") + } + v := fieldValue(row, f, g) + sb.WriteString(fmt.Sprintf("%q: %s", f, jsonValue(v))) + } + sb.WriteString("}") + } + sb.WriteString("\n]") + return sb.String() +} + +func jsonValue(v expr.Value) string { + switch v.Kind { + case expr.KindString: + return fmt.Sprintf("%q", v.Str) + case expr.KindInt: + return strconv.Itoa(v.Int) + case expr.KindBool: + return strconv.FormatBool(v.Bool) + default: + return "null" + } +} + +func formatGroups(result *Result) string { + var sb strings.Builder + for _, g := range result.Groups { + sb.WriteString(fmt.Sprintf("%s: count=%d", g.Key, g.Count)) + if len(g.Names) > 0 { + names := slices.Clone(g.Names) + if len(names) > 5 { + names = names[:5] + names = append(names, "...") + } + sb.WriteString(fmt.Sprintf(" names=[%s]", strings.Join(names, ", "))) + } + sb.WriteString("\n") + } + return sb.String() +} + +func formatGroupsJSON(result *Result) string { + var sb strings.Builder + sb.WriteString("[\n") + for i, g := range result.Groups { + if i > 0 { + sb.WriteString(",\n") + } + sb.WriteString(fmt.Sprintf(` {"key": %q, "count": %d, "names": [`, g.Key, g.Count)) + for j, n := range g.Names { + if j > 0 { + sb.WriteString(", ") + } + sb.WriteString(fmt.Sprintf("%q", n)) + } + sb.WriteString("]}") + } + sb.WriteString("\n]") + return sb.String() +} + +func padRight(s string, width int) string { + if len(s) >= width { + return s + } + return s + strings.Repeat(" ", width-len(s)) +} + +// --- Pipeline splitting --- + +func splitPipeline(input string) []string { + var parts []string + var current strings.Builder + inQuote := false + + for i := 0; i < len(input); i++ { + ch := input[i] + if ch == '"' { + inQuote = !inQuote + current.WriteByte(ch) + } else if ch == '|' && !inQuote { + parts = append(parts, current.String()) + current.Reset() + } else { + current.WriteByte(ch) + } + } + if current.Len() > 0 { + parts = append(parts, current.String()) + } + return parts +} + +func splitFirst(s string) (string, string) { + s = strings.TrimSpace(s) + idx := strings.IndexAny(s, " \t") + if idx < 0 { + return s, "" + } + return s[:idx], strings.TrimSpace(s[idx+1:]) +} + +func parseCSV(s string) []string { + parts := strings.Split(s, ",") + result := make([]string, 0, len(parts)) + for _, p := range parts { + p = strings.TrimSpace(p) + if p != "" { + result = append(result, p) + } + } + return result +} diff --git a/oq/oq_test.go b/oq/oq_test.go new file mode 100644 index 0000000..30d1dbf --- /dev/null +++ b/oq/oq_test.go @@ -0,0 +1,333 @@ +package oq_test + +import ( + "context" + "os" + "strings" + "testing" + + "github.com/speakeasy-api/openapi/graph" + "github.com/speakeasy-api/openapi/openapi" + "github.com/speakeasy-api/openapi/oq" + "github.com/speakeasy-api/openapi/references" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func loadTestGraph(t *testing.T) *graph.SchemaGraph { + t.Helper() + + f, err := os.Open("testdata/petstore.yaml") + require.NoError(t, err) + defer f.Close() + + ctx := context.Background() + doc, _, err := openapi.Unmarshal(ctx, f, openapi.WithSkipValidation()) + require.NoError(t, err) + require.NotNil(t, doc) + + idx := openapi.BuildIndex(ctx, doc, references.ResolveOptions{ + RootDocument: doc, + TargetDocument: doc, + TargetLocation: "testdata/petstore.yaml", + }) + + return graph.Build(ctx, idx) +} + +func TestParse_Success(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + query string + }{ + {"simple source", "schemas"}, + {"components source", "schemas.components"}, + {"inline source", "schemas.inline"}, + {"operations source", "operations"}, + {"sort", "schemas | sort depth desc"}, + {"take", "schemas | take 5"}, + {"where", "schemas | where depth > 3"}, + {"select", "schemas | select name, depth"}, + {"count", "schemas | count"}, + {"unique", "schemas | unique"}, + {"group-by", "schemas | group-by hash"}, + {"refs-out", "schemas | refs-out"}, + {"refs-in", "schemas | refs-in"}, + {"reachable", "schemas | reachable"}, + {"ancestors", "schemas | ancestors"}, + {"properties", "schemas | properties"}, + {"union-members", "schemas | union-members"}, + {"items", "schemas | items"}, + {"ops", "schemas | ops"}, + {"schemas from ops", "operations | schemas"}, + {"full pipeline", "schemas.components | where depth > 0 | sort depth desc | take 5 | select name, depth"}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + stages, err := oq.Parse(tt.query) + require.NoError(t, err) + assert.NotEmpty(t, stages) + }) + } +} + +func TestParse_Error(t *testing.T) { + t.Parallel() + + _, err := oq.Parse("") + assert.Error(t, err) + + _, err = oq.Parse("schemas | unknown_stage") + assert.Error(t, err) + + _, err = oq.Parse("schemas | take abc") + assert.Error(t, err) +} + +func TestExecute_SchemasCount_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute("schemas | count", g) + require.NoError(t, err) + assert.True(t, result.IsCount) + assert.Greater(t, result.Count, 0) +} + +func TestExecute_ComponentSchemas_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute("schemas.components | select name", g) + require.NoError(t, err) + assert.NotEmpty(t, result.Rows) + + // Check that we have the expected component schemas + names := collectNames(result, g) + assert.Contains(t, names, "Pet") + assert.Contains(t, names, "Owner") + assert.Contains(t, names, "Address") + assert.Contains(t, names, "Error") + assert.Contains(t, names, "Shape") + assert.Contains(t, names, "Unused") +} + +func TestExecute_Where_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute(`schemas.components | where type == "object" | select name`, g) + require.NoError(t, err) + + names := collectNames(result, g) + assert.Contains(t, names, "Pet") + assert.Contains(t, names, "Owner") +} + +func TestExecute_WhereInDegree_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + // Unused schema has no incoming references (from other schemas in components) + result, err := oq.Execute(`schemas.components | where in_degree == 0 | select name`, g) + require.NoError(t, err) + + names := collectNames(result, g) + // Unused should have no references from other schemas + assert.Contains(t, names, "Unused") +} + +func TestExecute_Sort_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute("schemas.components | sort property_count desc | take 3 | select name, property_count", g) + require.NoError(t, err) + assert.LessOrEqual(t, len(result.Rows), 3) +} + +func TestExecute_Reachable_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute(`schemas.components | where name == "Pet" | reachable | select name`, g) + require.NoError(t, err) + + names := collectNames(result, g) + // Pet references Owner, Owner references Address + assert.Contains(t, names, "Owner") + assert.Contains(t, names, "Address") +} + +func TestExecute_Ancestors_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute(`schemas.components | where name == "Address" | ancestors | select name`, g) + require.NoError(t, err) + + names := collectNames(result, g) + // Address is referenced by Owner, which is referenced by Pet + assert.Contains(t, names, "Owner") +} + +func TestExecute_Properties_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute(`schemas.components | where name == "Pet" | properties | select name`, g) + require.NoError(t, err) + // Pet has 4 properties: id, name, tag, owner + assert.NotEmpty(t, result.Rows) +} + +func TestExecute_UnionMembers_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute(`schemas.components | where name == "Shape" | union-members | select name`, g) + require.NoError(t, err) + // Shape has oneOf with Circle and Square + names := collectNames(result, g) + assert.Contains(t, names, "Circle") + assert.Contains(t, names, "Square") +} + +func TestExecute_Operations_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute("operations | select name, method, path", g) + require.NoError(t, err) + assert.NotEmpty(t, result.Rows) +} + +func TestExecute_OperationSchemas_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute(`operations | where operation_id == "listPets" | schemas | select name`, g) + require.NoError(t, err) + + names := collectNames(result, g) + assert.Contains(t, names, "Pet") +} + +func TestExecute_GroupBy_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute(`schemas.components | group-by type`, g) + require.NoError(t, err) + assert.NotEmpty(t, result.Groups) +} + +func TestExecute_Unique_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute("schemas.components | unique", g) + require.NoError(t, err) + + names := collectNames(result, g) + // Check no duplicates + seen := make(map[string]bool) + for _, n := range names { + assert.False(t, seen[n], "duplicate: %s", n) + seen[n] = true + } +} + +func TestExecute_SchemasToOps_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute(`schemas.components | where name == "Pet" | ops | select name`, g) + require.NoError(t, err) + assert.NotEmpty(t, result.Rows) +} + +func TestFormatTable_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute("schemas.components | take 3 | select name, type", g) + require.NoError(t, err) + + table := oq.FormatTable(result, g) + assert.Contains(t, table, "name") + assert.Contains(t, table, "type") + assert.NotEmpty(t, table) +} + +func TestFormatJSON_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute("schemas.components | take 3 | select name, type", g) + require.NoError(t, err) + + json := oq.FormatJSON(result, g) + assert.True(t, strings.HasPrefix(json, "[")) + assert.True(t, strings.HasSuffix(json, "]")) +} + +func TestFormatTable_Count_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute("schemas | count", g) + require.NoError(t, err) + + table := oq.FormatTable(result, g) + assert.NotEmpty(t, table) +} + +func TestFormatTable_Empty_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute(`schemas.components | where name == "NonExistent"`, g) + require.NoError(t, err) + + table := oq.FormatTable(result, g) + assert.Equal(t, "(empty)", table) +} + +func TestExecute_MatchesExpression_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute(`schemas.components | where name matches ".*Error.*" | select name`, g) + require.NoError(t, err) + + names := collectNames(result, g) + assert.Contains(t, names, "Error") +} + +func TestExecute_SortAsc_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute("schemas.components | sort name asc | select name", g) + require.NoError(t, err) + + names := collectNames(result, g) + for i := 1; i < len(names); i++ { + assert.LessOrEqual(t, names[i-1], names[i]) + } +} + +// collectNames extracts the "name" field from all rows in the result. +func collectNames(result *oq.Result, g *graph.SchemaGraph) []string { + var names []string + for _, row := range result.Rows { + v := oq.FieldValuePublic(row, "name", g) + names = append(names, v.Str) + } + return names +} diff --git a/oq/testdata/petstore.yaml b/oq/testdata/petstore.yaml new file mode 100644 index 0000000..82deb95 --- /dev/null +++ b/oq/testdata/petstore.yaml @@ -0,0 +1,131 @@ +openapi: "3.1.0" +info: + title: Petstore + version: "1.0.0" +paths: + /pets: + get: + operationId: listPets + parameters: + - name: limit + in: query + schema: + type: integer + responses: + "200": + description: A list of pets + content: + application/json: + schema: + type: array + items: + $ref: '#/components/schemas/Pet' + post: + operationId: createPet + requestBody: + content: + application/json: + schema: + $ref: '#/components/schemas/Pet' + responses: + "201": + description: Created + content: + application/json: + schema: + $ref: '#/components/schemas/Pet' + /pets/{petId}: + get: + operationId: showPetById + parameters: + - name: petId + in: path + required: true + schema: + type: string + responses: + "200": + description: A pet + content: + application/json: + schema: + $ref: '#/components/schemas/Pet' + default: + description: unexpected error + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + /owners: + get: + operationId: listOwners + responses: + "200": + description: A list of owners + content: + application/json: + schema: + type: array + items: + $ref: '#/components/schemas/Owner' +components: + schemas: + Pet: + type: object + properties: + id: + type: integer + name: + type: string + tag: + type: string + owner: + $ref: '#/components/schemas/Owner' + required: + - id + - name + Owner: + type: object + properties: + id: + type: integer + name: + type: string + address: + $ref: '#/components/schemas/Address' + Address: + type: object + properties: + street: + type: string + city: + type: string + Error: + type: object + properties: + code: + type: integer + message: + type: string + required: + - code + - message + Shape: + oneOf: + - $ref: '#/components/schemas/Circle' + - $ref: '#/components/schemas/Square' + Circle: + type: object + properties: + radius: + type: number + Square: + type: object + properties: + side: + type: number + Unused: + type: object + properties: + data: + type: string From b5dc93a3c01d4b218c9602c74568a5b1b206d5c1 Mon Sep 17 00:00:00 2001 From: Vishal Gowda Date: Thu, 12 Mar 2026 00:16:06 +0000 Subject: [PATCH 02/27] style: fix gofmt formatting Co-Authored-By: Claude Opus 4.6 --- graph/graph.go | 6 +++--- oq/expr/expr.go | 10 +++++----- oq/oq.go | 2 +- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/graph/graph.go b/graph/graph.go index 1e87228..9985219 100644 --- a/graph/graph.go +++ b/graph/graph.go @@ -91,12 +91,12 @@ type SchemaGraph struct { inEdges map[NodeID][]Edge // Lookup maps - ptrToNode map[*oas3.JSONSchemaReferenceable]NodeID + ptrToNode map[*oas3.JSONSchemaReferenceable]NodeID nameToNode map[string]NodeID // Operation-schema relationships - opSchemas map[NodeID]map[NodeID]bool // operation -> set of schema NodeIDs - schemaOps map[NodeID]map[NodeID]bool // schema -> set of operation NodeIDs + opSchemas map[NodeID]map[NodeID]bool // operation -> set of schema NodeIDs + schemaOps map[NodeID]map[NodeID]bool // schema -> set of operation NodeIDs } // Build constructs a SchemaGraph from an openapi.Index. diff --git a/oq/expr/expr.go b/oq/expr/expr.go index b511823..086b8cd 100644 --- a/oq/expr/expr.go +++ b/oq/expr/expr.go @@ -10,11 +10,11 @@ import ( // Value represents a typed value in the expression system. type Value struct { - Kind ValueKind - Str string - Int int - Bool bool - isNull bool + Kind ValueKind + Str string + Int int + Bool bool + isNull bool } type ValueKind int diff --git a/oq/oq.go b/oq/oq.go index 67824f4..a43f1bd 100644 --- a/oq/oq.go +++ b/oq/oq.go @@ -20,7 +20,7 @@ import ( type ResultKind int const ( - SchemaResult ResultKind = iota + SchemaResult ResultKind = iota OperationResult ) From ded07af0550dd31dac18aa8e3ad1975e03db06b8 Mon Sep 17 00:00:00 2001 From: Vishal Gowda Date: Thu, 12 Mar 2026 00:18:58 +0000 Subject: [PATCH 03/27] build: add replace directive for cmd/openapi to resolve local packages The cmd/openapi module needs a replace directive pointing to the root module so that go mod tidy can resolve the new graph/ and oq/ packages that aren't yet published. Co-Authored-By: Claude Opus 4.6 --- cmd/openapi/go.mod | 2 ++ cmd/openapi/go.sum | 2 -- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/cmd/openapi/go.mod b/cmd/openapi/go.mod index d5ea064..4865210 100644 --- a/cmd/openapi/go.mod +++ b/cmd/openapi/go.mod @@ -2,6 +2,8 @@ module github.com/speakeasy-api/openapi/cmd/openapi go 1.24.3 +replace github.com/speakeasy-api/openapi => ../../ + require ( github.com/charmbracelet/bubbles v0.21.0 github.com/charmbracelet/bubbletea v1.3.10 diff --git a/cmd/openapi/go.sum b/cmd/openapi/go.sum index ca0478f..31f3ed1 100644 --- a/cmd/openapi/go.sum +++ b/cmd/openapi/go.sum @@ -84,8 +84,6 @@ github.com/sergi/go-diff v1.1.0 h1:we8PVUC3FE2uYfodKH/nBHMSetSfHDR6scGdBi+erh0= github.com/sergi/go-diff v1.1.0/go.mod h1:STckp+ISIX8hZLjrqAeVduY0gWCT9IjLuqbuNXdaHfM= github.com/speakeasy-api/jsonpath v0.6.3 h1:c+QPwzAOdrWvzycuc9HFsIZcxKIaWcNpC+xhOW9rJxU= github.com/speakeasy-api/jsonpath v0.6.3/go.mod h1:2cXloNuQ+RSXi5HTRaeBh7JEmjRXTiaKpFTdZiL7URI= -github.com/speakeasy-api/openapi v1.19.5-0.20260309010446-7ff6a9590a7f h1:UjpoKOKoNqok2lxBTTQMq3Pv8metgqwRh6+ZeTxPFJw= -github.com/speakeasy-api/openapi v1.19.5-0.20260309010446-7ff6a9590a7f/go.mod h1:UfKa7FqE4jgexJZuj51MmdHAFGmDv0Zaw3+yOd81YKU= github.com/speakeasy-api/openapi/openapi/linter/customrules v0.0.0-20260309010446-7ff6a9590a7f h1:kwiHeGSILCUVEM9iSAUtifl1TLlvyDXmMjyW26/iX2k= github.com/speakeasy-api/openapi/openapi/linter/customrules v0.0.0-20260309010446-7ff6a9590a7f/go.mod h1:ALDg9E6LRTL5tMFlddVrLhc4JaarCHL65x2YkwL7xdg= github.com/spf13/cobra v1.10.1 h1:lJeBwCfmrnXthfAupyUTzJ/J4Nc1RsHC/mSRU2dll/s= From d88cea1ac111007b09d86162f7e9f437f0ef1224 Mon Sep 17 00:00:00 2001 From: Vishal Gowda Date: Thu, 12 Mar 2026 00:24:42 +0000 Subject: [PATCH 04/27] fix: resolve remaining testifylint errors in test files Use require.Error for error assertions and assert.Positive for count checks. Co-Authored-By: Claude Opus 4.6 --- oq/expr/expr_test.go | 4 ++-- oq/oq_test.go | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/oq/expr/expr_test.go b/oq/expr/expr_test.go index 7207ebb..2057560 100644 --- a/oq/expr/expr_test.go +++ b/oq/expr/expr_test.go @@ -136,8 +136,8 @@ func TestParse_Error(t *testing.T) { t.Parallel() _, err := expr.Parse("") - assert.Error(t, err) + require.Error(t, err) _, err = expr.Parse("name matches \"[invalid\"") - assert.Error(t, err) + require.Error(t, err) } diff --git a/oq/oq_test.go b/oq/oq_test.go index 30d1dbf..d29cb09 100644 --- a/oq/oq_test.go +++ b/oq/oq_test.go @@ -79,13 +79,13 @@ func TestParse_Error(t *testing.T) { t.Parallel() _, err := oq.Parse("") - assert.Error(t, err) + require.Error(t, err) _, err = oq.Parse("schemas | unknown_stage") - assert.Error(t, err) + require.Error(t, err) _, err = oq.Parse("schemas | take abc") - assert.Error(t, err) + require.Error(t, err) } func TestExecute_SchemasCount_Success(t *testing.T) { @@ -95,7 +95,7 @@ func TestExecute_SchemasCount_Success(t *testing.T) { result, err := oq.Execute("schemas | count", g) require.NoError(t, err) assert.True(t, result.IsCount) - assert.Greater(t, result.Count, 0) + assert.Positive(t, result.Count) } func TestExecute_ComponentSchemas_Success(t *testing.T) { From dbdaafdcd5ef8de9794bc4fcf6ca593eeb47be63 Mon Sep 17 00:00:00 2001 From: Vishal Gowda Date: Thu, 12 Mar 2026 00:28:33 +0000 Subject: [PATCH 05/27] fix: resolve all golangci-lint errors - Replace fmt.Errorf with errors.New where no format args (perfsprint) - Convert if-else chain to switch statement (gocritic) - Use assert.Len and assert.Positive in tests (testifylint) Co-Authored-By: Claude Opus 4.6 --- graph/graph_test.go | 6 +++--- oq/expr/expr.go | 3 ++- oq/oq.go | 20 +++++++++++--------- 3 files changed, 16 insertions(+), 13 deletions(-) diff --git a/graph/graph_test.go b/graph/graph_test.go index 52a06a6..cf0192a 100644 --- a/graph/graph_test.go +++ b/graph/graph_test.go @@ -84,7 +84,7 @@ func TestBuild_Edges_Success(t *testing.T) { edges := g.OutEdges(pet.ID) // Pet has properties: id, name, tag, owner - assert.Equal(t, 4, len(edges), "Pet should have 4 out-edges") + assert.Len(t, edges, 4, "Pet should have 4 out-edges") edgeLabels := make(map[string]graph.EdgeKind) for _, e := range edges { @@ -153,7 +153,7 @@ func TestBuild_OperationSchemas_Success(t *testing.T) { if op.OperationID == "listPets" { schemas := g.OperationSchemas(op.ID) assert.NotEmpty(t, schemas, "listPets should reference schemas") - assert.Greater(t, op.SchemaCount, 0) + assert.Positive(t, op.SchemaCount) return } } @@ -167,7 +167,7 @@ func TestBuild_Metrics_Success(t *testing.T) { pet, _ := g.SchemaByName("Pet") assert.Equal(t, 4, pet.PropertyCount, "Pet should have 4 properties") assert.Equal(t, 4, pet.OutDegree, "Pet should have 4 out-edges") - assert.Greater(t, pet.InDegree, 0, "Pet should be referenced") + assert.Positive(t, pet.InDegree, "Pet should be referenced") assert.NotEmpty(t, pet.Hash, "Pet should have a hash") shape, _ := g.SchemaByName("Shape") diff --git a/oq/expr/expr.go b/oq/expr/expr.go index 086b8cd..ed02740 100644 --- a/oq/expr/expr.go +++ b/oq/expr/expr.go @@ -2,6 +2,7 @@ package expr import ( + "errors" "fmt" "regexp" "strconv" @@ -304,7 +305,7 @@ func (p *parser) parseComparison() (Expr, error) { // left must be a field reference fieldRef, ok := left.(*fieldExpr) if !ok { - return nil, fmt.Errorf("matches requires a field on the left side") + return nil, errors.New("matches requires a field on the left side") } return &matchesExpr{field: fieldRef.name, pattern: re}, nil } diff --git a/oq/oq.go b/oq/oq.go index a43f1bd..742c021 100644 --- a/oq/oq.go +++ b/oq/oq.go @@ -6,6 +6,7 @@ package oq import ( + "errors" "fmt" "slices" "sort" @@ -97,7 +98,7 @@ func Parse(query string) ([]Stage, error) { // Split by pipe, respecting quoted strings parts := splitPipeline(query) if len(parts) == 0 { - return nil, fmt.Errorf("empty query") + return nil, errors.New("empty query") } var stages []Stage @@ -132,13 +133,13 @@ func parseStage(s string) (Stage, error) { switch keyword { case "where": if rest == "" { - return Stage{}, fmt.Errorf("where requires an expression") + return Stage{}, errors.New("where requires an expression") } return Stage{Kind: StageWhere, Expr: rest}, nil case "select": if rest == "" { - return Stage{}, fmt.Errorf("select requires field names") + return Stage{}, errors.New("select requires field names") } fields := parseCSV(rest) return Stage{Kind: StageSelect, Fields: fields}, nil @@ -146,7 +147,7 @@ func parseStage(s string) (Stage, error) { case "sort": parts := strings.Fields(rest) if len(parts) == 0 { - return Stage{}, fmt.Errorf("sort requires a field name") + return Stage{}, errors.New("sort requires a field name") } desc := false if len(parts) >= 2 && strings.ToLower(parts[1]) == "desc" { @@ -166,7 +167,7 @@ func parseStage(s string) (Stage, error) { case "group-by": if rest == "" { - return Stage{}, fmt.Errorf("group-by requires a field name") + return Stage{}, errors.New("group-by requires a field name") } fields := parseCSV(rest) return Stage{Kind: StageGroupBy, Fields: fields}, nil @@ -352,7 +353,7 @@ func execUnique(result *Result) (*Result, error) { func execGroupBy(stage Stage, result *Result, g *graph.SchemaGraph) (*Result, error) { if len(stage.Fields) == 0 { - return nil, fmt.Errorf("group-by requires at least one field") + return nil, errors.New("group-by requires at least one field") } field := stage.Fields[0] @@ -855,13 +856,14 @@ func splitPipeline(input string) []string { for i := 0; i < len(input); i++ { ch := input[i] - if ch == '"' { + switch { + case ch == '"': inQuote = !inQuote current.WriteByte(ch) - } else if ch == '|' && !inQuote { + case ch == '|' && !inQuote: parts = append(parts, current.String()) current.Reset() - } else { + default: current.WriteByte(ch) } } From c02147eb880aba07d9c6b5bfb12706f0ea47a8f6 Mon Sep 17 00:00:00 2001 From: Vishal Gowda Date: Thu, 12 Mar 2026 07:53:47 +0000 Subject: [PATCH 06/27] fix: guard map lookup to satisfy nil-panic linter Co-Authored-By: Claude Opus 4.6 --- oq/oq.go | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/oq/oq.go b/oq/oq.go index 742c021..3a5a834 100644 --- a/oq/oq.go +++ b/oq/oq.go @@ -380,7 +380,10 @@ func execGroupBy(stage Stage, result *Result, g *graph.SchemaGraph) (*Result, er grouped := &Result{Fields: result.Fields} for _, key := range order { - grp := groups[key] + grp, ok := groups[key] + if !ok { + continue + } grouped.Groups = append(grouped.Groups, GroupResult{ Key: key, Count: grp.count, From 26edf4a6f9fd35c7c824fc52c49f86697f1125ad Mon Sep 17 00:00:00 2001 From: Vishal Gowda Date: Thu, 12 Mar 2026 07:57:31 +0000 Subject: [PATCH 07/27] fix: address PR review feedback - Use t.Context() instead of context.Background() in tests - Replace WriteString(fmt.Sprintf(...)) with fmt.Fprintf - Remove development replace directive from cmd/openapi/go.mod - Fix trailing newline for count results in table format Co-Authored-By: Claude Opus 4.6 --- cmd/openapi/commands/openapi/query.go | 7 ++----- cmd/openapi/go.mod | 2 -- graph/graph_test.go | 3 +-- oq/oq.go | 10 +++++----- oq/oq_test.go | 3 +-- 5 files changed, 9 insertions(+), 16 deletions(-) diff --git a/cmd/openapi/commands/openapi/query.go b/cmd/openapi/commands/openapi/query.go index 681552f..5f80c0b 100644 --- a/cmd/openapi/commands/openapi/query.go +++ b/cmd/openapi/commands/openapi/query.go @@ -126,11 +126,8 @@ func queryOpenAPI(ctx context.Context, processor *OpenAPIProcessor, queryStr str } fmt.Fprint(processor.stdout(), output) - if !result.IsCount || queryOutputFormat != "table" { - // FormatTable already includes newlines for non-count results - if result.IsCount { - fmt.Fprintln(processor.stdout()) - } + if result.IsCount { + fmt.Fprintln(processor.stdout()) } return nil diff --git a/cmd/openapi/go.mod b/cmd/openapi/go.mod index 4865210..d5ea064 100644 --- a/cmd/openapi/go.mod +++ b/cmd/openapi/go.mod @@ -2,8 +2,6 @@ module github.com/speakeasy-api/openapi/cmd/openapi go 1.24.3 -replace github.com/speakeasy-api/openapi => ../../ - require ( github.com/charmbracelet/bubbles v0.21.0 github.com/charmbracelet/bubbletea v1.3.10 diff --git a/graph/graph_test.go b/graph/graph_test.go index cf0192a..88f12a3 100644 --- a/graph/graph_test.go +++ b/graph/graph_test.go @@ -1,7 +1,6 @@ package graph_test import ( - "context" "os" "testing" @@ -19,7 +18,7 @@ func loadTestGraph(t *testing.T) *graph.SchemaGraph { require.NoError(t, err) defer f.Close() - ctx := context.Background() + ctx := t.Context() doc, _, err := openapi.Unmarshal(ctx, f, openapi.WithSkipValidation()) require.NoError(t, err) require.NotNil(t, doc) diff --git a/oq/oq.go b/oq/oq.go index 3a5a834..a198f22 100644 --- a/oq/oq.go +++ b/oq/oq.go @@ -785,7 +785,7 @@ func FormatJSON(result *Result, g *graph.SchemaGraph) string { sb.WriteString(", ") } v := fieldValue(row, f, g) - sb.WriteString(fmt.Sprintf("%q: %s", f, jsonValue(v))) + fmt.Fprintf(&sb, "%q: %s", f, jsonValue(v)) } sb.WriteString("}") } @@ -809,14 +809,14 @@ func jsonValue(v expr.Value) string { func formatGroups(result *Result) string { var sb strings.Builder for _, g := range result.Groups { - sb.WriteString(fmt.Sprintf("%s: count=%d", g.Key, g.Count)) + fmt.Fprintf(&sb, "%s: count=%d", g.Key, g.Count) if len(g.Names) > 0 { names := slices.Clone(g.Names) if len(names) > 5 { names = names[:5] names = append(names, "...") } - sb.WriteString(fmt.Sprintf(" names=[%s]", strings.Join(names, ", "))) + fmt.Fprintf(&sb, " names=[%s]", strings.Join(names, ", ")) } sb.WriteString("\n") } @@ -830,12 +830,12 @@ func formatGroupsJSON(result *Result) string { if i > 0 { sb.WriteString(",\n") } - sb.WriteString(fmt.Sprintf(` {"key": %q, "count": %d, "names": [`, g.Key, g.Count)) + fmt.Fprintf(&sb, ` {"key": %q, "count": %d, "names": [`, g.Key, g.Count) for j, n := range g.Names { if j > 0 { sb.WriteString(", ") } - sb.WriteString(fmt.Sprintf("%q", n)) + fmt.Fprintf(&sb, "%q", n) } sb.WriteString("]}") } diff --git a/oq/oq_test.go b/oq/oq_test.go index d29cb09..21166d5 100644 --- a/oq/oq_test.go +++ b/oq/oq_test.go @@ -1,7 +1,6 @@ package oq_test import ( - "context" "os" "strings" "testing" @@ -21,7 +20,7 @@ func loadTestGraph(t *testing.T) *graph.SchemaGraph { require.NoError(t, err) defer f.Close() - ctx := context.Background() + ctx := t.Context() doc, _, err := openapi.Unmarshal(ctx, f, openapi.WithSkipValidation()) require.NoError(t, err) require.NotNil(t, doc) From 200bdd9b9cae7368e25e39ce31b5b268e405e4f0 Mon Sep 17 00:00:00 2001 From: Vishal Gowda Date: Thu, 12 Mar 2026 08:17:08 +0000 Subject: [PATCH 08/27] feat: add new oq pipeline stages and operation fields New stages: explain, fields, head (alias), sample, path, top, bottom, format New operation fields: tag, parameter_count, deprecated, description, summary New graph method: ShortestPath for BFS pathfinding New formatter: FormatMarkdown for markdown table output Restore replace directive in cmd/openapi/go.mod (required for CI) Co-Authored-By: Claude Opus 4.6 --- cmd/openapi/commands/openapi/query.go | 30 +- cmd/openapi/go.mod | 2 + graph/graph.go | 44 +++ graph/graph_test.go | 22 ++ oq/oq.go | 412 +++++++++++++++++++++++++- oq/oq_test.go | 175 +++++++++++ 6 files changed, 673 insertions(+), 12 deletions(-) diff --git a/cmd/openapi/commands/openapi/query.go b/cmd/openapi/commands/openapi/query.go index 5f80c0b..2a2a3e8 100644 --- a/cmd/openapi/commands/openapi/query.go +++ b/cmd/openapi/commands/openapi/query.go @@ -44,10 +44,23 @@ Examples: Stdin is supported — either pipe data directly or use '-' explicitly: cat spec.yaml | openapi spec query - 'schemas | count' + # Shortest path between schemas + openapi spec query petstore.yaml 'schemas | path "Pet" "Address" | select name' + + # Top 5 most connected schemas + openapi spec query petstore.yaml 'schemas.components | top 5 in_degree | select name, in_degree' + + # Explain a query plan + openapi spec query petstore.yaml 'schemas.components | where depth > 5 | sort depth desc | explain' + + # List available fields + openapi spec query petstore.yaml 'schemas | fields' + Pipeline stages: Source: schemas, schemas.components, schemas.inline, operations - Traversal: refs-out, refs-in, reachable, ancestors, properties, union-members, items, ops, schemas - Filter: where , select , sort [asc|desc], take , unique, group-by , count + Traversal: refs-out, refs-in, reachable, ancestors, properties, union-members, items, ops, schemas, path + Filter: where , select , sort [asc|desc], take/head , sample , top , bottom , unique, group-by , count + Meta: explain, fields, format Where expressions support: ==, !=, >, <, >=, <=, and, or, not, has(), matches`, Args: stdinOrFileArgs(2, 2), @@ -58,7 +71,7 @@ var queryOutputFormat string var queryFromFile string func init() { - queryCmd.Flags().StringVar(&queryOutputFormat, "format", "table", "output format: table or json") + queryCmd.Flags().StringVar(&queryOutputFormat, "format", "table", "output format: table, json, or markdown") queryCmd.Flags().StringVarP(&queryFromFile, "file", "f", "", "read query from file instead of argument") } @@ -116,11 +129,18 @@ func queryOpenAPI(ctx context.Context, processor *OpenAPIProcessor, queryStr str return fmt.Errorf("query error: %w", err) } - // Format and output + // Format and output — inline format stage overrides CLI flag + format := queryOutputFormat + if result.FormatHint != "" { + format = result.FormatHint + } + var output string - switch queryOutputFormat { + switch format { case "json": output = oq.FormatJSON(result, g) + case "markdown": + output = oq.FormatMarkdown(result, g) default: output = oq.FormatTable(result, g) } diff --git a/cmd/openapi/go.mod b/cmd/openapi/go.mod index d5ea064..4865210 100644 --- a/cmd/openapi/go.mod +++ b/cmd/openapi/go.mod @@ -2,6 +2,8 @@ module github.com/speakeasy-api/openapi/cmd/openapi go 1.24.3 +replace github.com/speakeasy-api/openapi => ../../ + require ( github.com/charmbracelet/bubbles v0.21.0 github.com/charmbracelet/bubbletea v1.3.10 diff --git a/graph/graph.go b/graph/graph.go index 9985219..4b1fd29 100644 --- a/graph/graph.go +++ b/graph/graph.go @@ -673,6 +673,50 @@ func (g *SchemaGraph) Ancestors(id NodeID) []NodeID { return result } +// ShortestPath returns the shortest path from `from` to `to` using out-edges (BFS). +// Returns nil if no path exists. The returned slice includes both endpoints. +func (g *SchemaGraph) ShortestPath(from, to NodeID) []NodeID { + if from == to { + return []NodeID{from} + } + + parent := make(map[NodeID]NodeID) + visited := make(map[NodeID]bool) + visited[from] = true + queue := []NodeID{from} + + for len(queue) > 0 { + current := queue[0] + queue = queue[1:] + + for _, edge := range g.outEdges[current] { + if visited[edge.To] { + continue + } + visited[edge.To] = true + parent[edge.To] = current + + if edge.To == to { + // Reconstruct path + var path []NodeID + for n := to; n != from; n = parent[n] { + path = append(path, n) + } + path = append(path, from) + // Reverse + for i, j := 0, len(path)-1; i < j; i, j = i+1, j-1 { + path[i], path[j] = path[j], path[i] + } + return path + } + + queue = append(queue, edge.To) + } + } + + return nil +} + func intStr(i int) string { return strconv.Itoa(i) } diff --git a/graph/graph_test.go b/graph/graph_test.go index 88f12a3..7a09010 100644 --- a/graph/graph_test.go +++ b/graph/graph_test.go @@ -159,6 +159,28 @@ func TestBuild_OperationSchemas_Success(t *testing.T) { t.Fatal("listPets operation not found") } +func TestBuild_ShortestPath_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + pet, _ := g.SchemaByName("Pet") + addr, _ := g.SchemaByName("Address") + path := g.ShortestPath(pet.ID, addr.ID) + assert.NotEmpty(t, path, "should find path from Pet to Address") + assert.Equal(t, pet.ID, path[0]) + assert.Equal(t, addr.ID, path[len(path)-1]) +} + +func TestBuild_ShortestPath_NoPath_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + unused, _ := g.SchemaByName("Unused") + pet, _ := g.SchemaByName("Pet") + path := g.ShortestPath(unused.ID, pet.ID) + assert.Empty(t, path, "Unused should not reach Pet") +} + func TestBuild_Metrics_Success(t *testing.T) { t.Parallel() g := loadTestGraph(t) diff --git a/oq/oq.go b/oq/oq.go index a198f22..1867986 100644 --- a/oq/oq.go +++ b/oq/oq.go @@ -6,6 +6,8 @@ package oq import ( + "crypto/sha256" + "encoding/hex" "errors" "fmt" "slices" @@ -34,11 +36,13 @@ type Row struct { // Result is the output of a query execution. type Result struct { - Rows []Row - Fields []string // projected fields (empty = all) - IsCount bool - Count int - Groups []GroupResult + Rows []Row + Fields []string // projected fields (empty = all) + IsCount bool + Count int + Groups []GroupResult + Explain string // human-readable pipeline explanation + FormatHint string // format preference from format stage (table, json, markdown) } // GroupResult represents a group-by aggregation result. @@ -80,6 +84,13 @@ const ( StageItems StageOps StageSchemas + StageExplain + StageFields + StageSample + StagePath + StageTop + StageBottom + StageFormat ) // Stage represents a single stage in the query pipeline. @@ -90,7 +101,10 @@ type Stage struct { Fields []string // for StageSelect, StageGroupBy SortField string // for StageSort SortDesc bool // for StageSort - Limit int // for StageTake + Limit int // for StageTake, StageSample, StageTop, StageBottom + PathFrom string // for StagePath + PathTo string // for StagePath + Format string // for StageFormat } // Parse splits a pipeline query string into stages. @@ -155,7 +169,7 @@ func parseStage(s string) (Stage, error) { } return Stage{Kind: StageSort, SortField: parts[0], SortDesc: desc}, nil - case "take": + case "take", "head": n, err := strconv.Atoi(strings.TrimSpace(rest)) if err != nil { return Stage{}, fmt.Errorf("take requires a number: %w", err) @@ -202,6 +216,55 @@ func parseStage(s string) (Stage, error) { case "schemas": return Stage{Kind: StageSchemas}, nil + case "explain": + return Stage{Kind: StageExplain}, nil + + case "fields": + return Stage{Kind: StageFields}, nil + + case "sample": + n, err := strconv.Atoi(strings.TrimSpace(rest)) + if err != nil { + return Stage{}, fmt.Errorf("sample requires a number: %w", err) + } + return Stage{Kind: StageSample, Limit: n}, nil + + case "path": + from, to := parseTwoArgs(rest) + if from == "" || to == "" { + return Stage{}, errors.New("path requires two schema names") + } + return Stage{Kind: StagePath, PathFrom: from, PathTo: to}, nil + + case "top": + parts := strings.Fields(rest) + if len(parts) < 2 { + return Stage{}, errors.New("top requires a number and a field name") + } + n, err := strconv.Atoi(parts[0]) + if err != nil { + return Stage{}, fmt.Errorf("top requires a number: %w", err) + } + return Stage{Kind: StageTop, Limit: n, SortField: parts[1]}, nil + + case "bottom": + parts := strings.Fields(rest) + if len(parts) < 2 { + return Stage{}, errors.New("bottom requires a number and a field name") + } + n, err := strconv.Atoi(parts[0]) + if err != nil { + return Stage{}, fmt.Errorf("bottom requires a number: %w", err) + } + return Stage{Kind: StageBottom, Limit: n, SortField: parts[1]}, nil + + case "format": + f := strings.TrimSpace(rest) + if f != "table" && f != "json" && f != "markdown" { + return Stage{}, fmt.Errorf("format must be table, json, or markdown, got %q", f) + } + return Stage{Kind: StageFormat, Format: f}, nil + default: return Stage{}, fmt.Errorf("unknown stage: %q", keyword) } @@ -214,6 +277,13 @@ func run(stages []Stage, g *graph.SchemaGraph) (*Result, error) { return &Result{}, nil } + // Check if explain stage is present + for _, stage := range stages { + if stage.Kind == StageExplain { + return &Result{Explain: buildExplain(stages)}, nil + } + } + // Execute source stage result, err := execSource(stages[0], g) if err != nil { @@ -295,6 +365,29 @@ func execStage(stage Stage, result *Result, g *graph.SchemaGraph) (*Result, erro return execSchemasToOps(result, g) case StageSchemas: return execOpsToSchemas(result, g) + case StageFields: + return execFields(result) + case StageSample: + return execSample(stage, result) + case StagePath: + return execPath(stage, g) + case StageTop: + // Expand to sort desc + take + sorted, err := execSort(Stage{Kind: StageSort, SortField: stage.SortField, SortDesc: true}, result, g) + if err != nil { + return nil, err + } + return execTake(Stage{Kind: StageTake, Limit: stage.Limit}, sorted) + case StageBottom: + // Expand to sort asc + take + sorted, err := execSort(Stage{Kind: StageSort, SortField: stage.SortField, SortDesc: false}, result, g) + if err != nil { + return nil, err + } + return execTake(Stage{Kind: StageTake, Limit: stage.Limit}, sorted) + case StageFormat: + result.FormatHint = stage.Format + return result, nil default: return nil, fmt.Errorf("unimplemented stage kind: %d", stage.Kind) } @@ -627,6 +720,31 @@ func fieldValue(row Row, name string, g *graph.SchemaGraph) expr.Value { return expr.IntVal(o.SchemaCount) case "component_count": return expr.IntVal(o.ComponentCount) + case "tag": + if o.Operation != nil && len(o.Operation.Tags) > 0 { + return expr.StringVal(o.Operation.Tags[0]) + } + return expr.StringVal("") + case "parameter_count": + if o.Operation != nil { + return expr.IntVal(len(o.Operation.Parameters)) + } + return expr.IntVal(0) + case "deprecated": + if o.Operation != nil { + return expr.BoolVal(o.Operation.Deprecated != nil && *o.Operation.Deprecated) + } + return expr.BoolVal(false) + case "description": + if o.Operation != nil { + return expr.StringVal(o.Operation.GetDescription()) + } + return expr.StringVal("") + case "summary": + if o.Operation != nil { + return expr.StringVal(o.Operation.GetSummary()) + } + return expr.StringVal("") } } return expr.NullVal() @@ -673,10 +791,226 @@ func rowKey(row Row) string { return "o:" + strconv.Itoa(row.OpIdx) } +// --- Explain --- + +func buildExplain(stages []Stage) string { + var sb strings.Builder + for i, stage := range stages { + if stage.Kind == StageExplain { + continue + } + if i == 0 { + fmt.Fprintf(&sb, "Source: %s\n", stage.Source) + } else { + desc := describeStage(stage) + fmt.Fprintf(&sb, " → %s\n", desc) + } + } + return sb.String() +} + +func describeStage(stage Stage) string { + switch stage.Kind { + case StageWhere: + return "Filter: where " + stage.Expr + case StageSelect: + return "Project: select " + strings.Join(stage.Fields, ", ") + case StageSort: + dir := "ascending" + if stage.SortDesc { + dir = "descending" + } + return "Sort: " + stage.SortField + " " + dir + case StageTake: + return "Limit: take " + strconv.Itoa(stage.Limit) + case StageUnique: + return "Unique: deduplicate rows" + case StageGroupBy: + return "Group: group-by " + strings.Join(stage.Fields, ", ") + case StageCount: + return "Count: count rows" + case StageRefsOut: + return "Traverse: outgoing references" + case StageRefsIn: + return "Traverse: incoming references" + case StageReachable: + return "Traverse: all reachable nodes" + case StageAncestors: + return "Traverse: all ancestor nodes" + case StageProperties: + return "Traverse: property children" + case StageUnionMembers: + return "Traverse: union members" + case StageItems: + return "Traverse: array items" + case StageOps: + return "Navigate: schemas to operations" + case StageSchemas: + return "Navigate: operations to schemas" + case StageFields: + return "Terminal: list available fields" + case StageSample: + return "Sample: random " + strconv.Itoa(stage.Limit) + " rows" + case StagePath: + return "Path: shortest path from " + stage.PathFrom + " to " + stage.PathTo + case StageTop: + return "Top: " + strconv.Itoa(stage.Limit) + " by " + stage.SortField + " descending" + case StageBottom: + return "Bottom: " + strconv.Itoa(stage.Limit) + " by " + stage.SortField + " ascending" + case StageFormat: + return "Format: " + stage.Format + default: + return "Unknown stage" + } +} + +// --- Fields --- + +func execFields(result *Result) (*Result, error) { + var sb strings.Builder + kind := SchemaResult + if len(result.Rows) > 0 { + kind = result.Rows[0].Kind + } + + if kind == SchemaResult { + sb.WriteString("Field Type\n") + sb.WriteString("----------- ------\n") + fields := []struct{ name, typ string }{ + {"name", "string"}, + {"type", "string"}, + {"depth", "int"}, + {"in_degree", "int"}, + {"out_degree", "int"}, + {"union_width", "int"}, + {"property_count", "int"}, + {"is_component", "bool"}, + {"is_inline", "bool"}, + {"is_circular", "bool"}, + {"has_ref", "bool"}, + {"hash", "string"}, + {"path", "string"}, + } + for _, f := range fields { + fmt.Fprintf(&sb, "%-17s %s\n", f.name, f.typ) + } + } else { + sb.WriteString("Field Type\n") + sb.WriteString("----------- ------\n") + fields := []struct{ name, typ string }{ + {"name", "string"}, + {"method", "string"}, + {"path", "string"}, + {"operation_id", "string"}, + {"schema_count", "int"}, + {"component_count", "int"}, + {"tag", "string"}, + {"parameter_count", "int"}, + {"deprecated", "bool"}, + {"description", "string"}, + {"summary", "string"}, + } + for _, f := range fields { + fmt.Fprintf(&sb, "%-17s %s\n", f.name, f.typ) + } + } + + return &Result{Explain: sb.String()}, nil +} + +// --- Sample --- + +func execSample(stage Stage, result *Result) (*Result, error) { + if stage.Limit >= len(result.Rows) { + return result, nil + } + + // Deterministic shuffle: sort by hash of row key, then take first n + type keyed struct { + hash string + row Row + } + items := make([]keyed, len(result.Rows)) + for i, row := range result.Rows { + h := sha256.Sum256([]byte(rowKey(row))) + items[i] = keyed{hash: hex.EncodeToString(h[:]), row: row} + } + sort.SliceStable(items, func(i, j int) bool { + return items[i].hash < items[j].hash + }) + + out := &Result{Fields: result.Fields} + for i := 0; i < stage.Limit && i < len(items); i++ { + out.Rows = append(out.Rows, items[i].row) + } + return out, nil +} + +// --- Path --- + +func execPath(stage Stage, g *graph.SchemaGraph) (*Result, error) { + fromNode, ok := g.SchemaByName(stage.PathFrom) + if !ok { + return nil, fmt.Errorf("schema %q not found", stage.PathFrom) + } + toNode, ok := g.SchemaByName(stage.PathTo) + if !ok { + return nil, fmt.Errorf("schema %q not found", stage.PathTo) + } + + path := g.ShortestPath(fromNode.ID, toNode.ID) + out := &Result{} + for _, id := range path { + out.Rows = append(out.Rows, Row{Kind: SchemaResult, SchemaIdx: int(id)}) + } + return out, nil +} + +// --- Arg parsing helpers --- + +func parseTwoArgs(s string) (string, string) { + s = strings.TrimSpace(s) + var args []string + for len(s) > 0 { + if s[0] == '"' { + // Quoted arg + end := strings.Index(s[1:], "\"") + if end < 0 { + args = append(args, s[1:]) + break + } + args = append(args, s[1:end+1]) + s = strings.TrimSpace(s[end+2:]) + } else { + idx := strings.IndexAny(s, " \t") + if idx < 0 { + args = append(args, s) + break + } + args = append(args, s[:idx]) + s = strings.TrimSpace(s[idx+1:]) + } + if len(args) == 2 { + break + } + } + if len(args) < 2 { + if len(args) == 1 { + return args[0], "" + } + return "", "" + } + return args[0], args[1] +} + // --- Formatting --- // FormatTable formats a result as a simple table string. func FormatTable(result *Result, g *graph.SchemaGraph) string { + if result.Explain != "" { + return result.Explain + } + if result.IsCount { return strconv.Itoa(result.Count) } @@ -752,6 +1086,10 @@ func FormatTable(result *Result, g *graph.SchemaGraph) string { // FormatJSON formats a result as JSON. func FormatJSON(result *Result, g *graph.SchemaGraph) string { + if result.Explain != "" { + return result.Explain + } + if result.IsCount { return strconv.Itoa(result.Count) } @@ -793,6 +1131,66 @@ func FormatJSON(result *Result, g *graph.SchemaGraph) string { return sb.String() } +// FormatMarkdown formats a result as a markdown table. +func FormatMarkdown(result *Result, g *graph.SchemaGraph) string { + if result.Explain != "" { + return result.Explain + } + + if result.IsCount { + return strconv.Itoa(result.Count) + } + + if len(result.Groups) > 0 { + var sb strings.Builder + sb.WriteString("| Key | Count |\n") + sb.WriteString("| --- | --- |\n") + for _, grp := range result.Groups { + fmt.Fprintf(&sb, "| %s | %d |\n", grp.Key, grp.Count) + } + return sb.String() + } + + if len(result.Rows) == 0 { + return "(empty)" + } + + fields := result.Fields + if len(fields) == 0 { + if result.Rows[0].Kind == SchemaResult { + fields = []string{"name", "type", "depth", "in_degree", "out_degree"} + } else { + fields = []string{"name", "method", "path", "schema_count"} + } + } + + var sb strings.Builder + // Header + sb.WriteString("| ") + sb.WriteString(strings.Join(fields, " | ")) + sb.WriteString(" |\n") + // Separator + sb.WriteString("|") + for range fields { + sb.WriteString(" --- |") + } + sb.WriteString("\n") + // Rows + for _, row := range result.Rows { + sb.WriteString("| ") + for i, f := range fields { + if i > 0 { + sb.WriteString(" | ") + } + v := valueToString(fieldValue(row, f, g)) + sb.WriteString(v) + } + sb.WriteString(" |\n") + } + + return sb.String() +} + func jsonValue(v expr.Value) string { switch v.Kind { case expr.KindString: diff --git a/oq/oq_test.go b/oq/oq_test.go index 21166d5..4a9853c 100644 --- a/oq/oq_test.go +++ b/oq/oq_test.go @@ -321,6 +321,181 @@ func TestExecute_SortAsc_Success(t *testing.T) { } } +func TestExecute_Explain_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute("schemas.components | where depth > 5 | sort depth desc | take 10 | explain", g) + require.NoError(t, err) + assert.Contains(t, result.Explain, "Source: schemas.components") + assert.Contains(t, result.Explain, "Filter: where depth > 5") + assert.Contains(t, result.Explain, "Sort: depth descending") + assert.Contains(t, result.Explain, "Limit: take 10") +} + +func TestExecute_Fields_Schemas_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute("schemas | fields", g) + require.NoError(t, err) + assert.Contains(t, result.Explain, "name") + assert.Contains(t, result.Explain, "depth") + assert.Contains(t, result.Explain, "property_count") + assert.Contains(t, result.Explain, "is_component") +} + +func TestExecute_Fields_Operations_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute("operations | fields", g) + require.NoError(t, err) + assert.Contains(t, result.Explain, "method") + assert.Contains(t, result.Explain, "operation_id") + assert.Contains(t, result.Explain, "schema_count") + assert.Contains(t, result.Explain, "tag") + assert.Contains(t, result.Explain, "deprecated") +} + +func TestExecute_Head_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute("schemas.components | head 3", g) + require.NoError(t, err) + assert.Len(t, result.Rows, 3) +} + +func TestExecute_Sample_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute("schemas.components | sample 3", g) + require.NoError(t, err) + assert.Len(t, result.Rows, 3) + + // Running sample again should produce the same result (deterministic) + result2, err := oq.Execute("schemas.components | sample 3", g) + require.NoError(t, err) + assert.Equal(t, len(result.Rows), len(result2.Rows)) +} + +func TestExecute_Path_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute(`schemas | path Pet Address | select name`, g) + require.NoError(t, err) + assert.NotEmpty(t, result.Rows) + + names := collectNames(result, g) + // Path should include Pet, something in between, and Address + assert.Equal(t, "Pet", names[0]) + assert.Equal(t, "Address", names[len(names)-1]) +} + +func TestExecute_Path_NotFound_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + // Unused has no outgoing edges to reach Pet + result, err := oq.Execute(`schemas | path Unused Pet | select name`, g) + require.NoError(t, err) + assert.Empty(t, result.Rows) +} + +func TestExecute_Top_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute("schemas.components | top 3 property_count | select name, property_count", g) + require.NoError(t, err) + assert.Len(t, result.Rows, 3) + + // Verify descending order + for i := 1; i < len(result.Rows); i++ { + prev := oq.FieldValuePublic(result.Rows[i-1], "property_count", g) + curr := oq.FieldValuePublic(result.Rows[i], "property_count", g) + assert.GreaterOrEqual(t, prev.Int, curr.Int) + } +} + +func TestExecute_Bottom_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute("schemas.components | bottom 3 property_count | select name, property_count", g) + require.NoError(t, err) + assert.Len(t, result.Rows, 3) + + // Verify ascending order + for i := 1; i < len(result.Rows); i++ { + prev := oq.FieldValuePublic(result.Rows[i-1], "property_count", g) + curr := oq.FieldValuePublic(result.Rows[i], "property_count", g) + assert.LessOrEqual(t, prev.Int, curr.Int) + } +} + +func TestExecute_Format_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute("schemas.components | take 3 | format json", g) + require.NoError(t, err) + assert.Equal(t, "json", result.FormatHint) +} + +func TestFormatMarkdown_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute("schemas.components | take 3 | select name, type", g) + require.NoError(t, err) + + md := oq.FormatMarkdown(result, g) + assert.Contains(t, md, "| name") + assert.Contains(t, md, "| --- |") +} + +func TestExecute_OperationTag_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute("operations | select name, tag, parameter_count", g) + require.NoError(t, err) + assert.NotEmpty(t, result.Rows) +} + +func TestParse_NewStages_Success(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + query string + }{ + {"explain", "schemas | explain"}, + {"fields", "schemas | fields"}, + {"head", "schemas | head 5"}, + {"sample", "schemas | sample 10"}, + {"path", `schemas | path "User" "Order"`}, + {"path unquoted", "schemas | path User Order"}, + {"top", "schemas | top 5 depth"}, + {"bottom", "schemas | bottom 5 depth"}, + {"format", "schemas | format json"}, + {"format markdown", "schemas | format markdown"}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + stages, err := oq.Parse(tt.query) + require.NoError(t, err) + assert.NotEmpty(t, stages) + }) + } +} + // collectNames extracts the "name" field from all rows in the result. func collectNames(result *oq.Result, g *graph.SchemaGraph) []string { var names []string From df5461d0b4c88c53dadec9553760d781d0f7d3db Mon Sep 17 00:00:00 2001 From: Vishal Gowda Date: Thu, 12 Mar 2026 08:20:41 +0000 Subject: [PATCH 09/27] fix: use assert.Len for testifylint compliance Co-Authored-By: Claude Opus 4.6 --- oq/oq_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/oq/oq_test.go b/oq/oq_test.go index 4a9853c..f15ea8c 100644 --- a/oq/oq_test.go +++ b/oq/oq_test.go @@ -378,7 +378,7 @@ func TestExecute_Sample_Success(t *testing.T) { // Running sample again should produce the same result (deterministic) result2, err := oq.Execute("schemas.components | sample 3", g) require.NoError(t, err) - assert.Equal(t, len(result.Rows), len(result2.Rows)) + assert.Len(t, result2.Rows, len(result.Rows)) } func TestExecute_Path_Success(t *testing.T) { From 9f3ba40dd8b7080ddc72da649d4a730f5204482c Mon Sep 17 00:00:00 2001 From: Vishal Gowda Date: Thu, 12 Mar 2026 08:36:35 +0000 Subject: [PATCH 10/27] fix: address PR review feedback and improve test coverage - Fix stdinOrFileArgs(2,2) -> (1,2) so -f flag works with 1 positional arg - Fix OOB panic in expr tokenizer on unterminated backslash-terminated strings - Add tests for refs-out, refs-in, items, format groups, field coverage, empty/count edge cases bringing oq coverage from 72% to 83% Co-Authored-By: Claude Opus 4.6 --- cmd/openapi/commands/openapi/query.go | 2 +- oq/expr/expr.go | 2 +- oq/expr/expr_test.go | 9 ++ oq/oq_test.go | 143 ++++++++++++++++++++++++++ 4 files changed, 154 insertions(+), 2 deletions(-) diff --git a/cmd/openapi/commands/openapi/query.go b/cmd/openapi/commands/openapi/query.go index 2a2a3e8..c3c69ca 100644 --- a/cmd/openapi/commands/openapi/query.go +++ b/cmd/openapi/commands/openapi/query.go @@ -63,7 +63,7 @@ Pipeline stages: Meta: explain, fields, format Where expressions support: ==, !=, >, <, >=, <=, and, or, not, has(), matches`, - Args: stdinOrFileArgs(2, 2), + Args: stdinOrFileArgs(1, 2), Run: runQuery, } diff --git a/oq/expr/expr.go b/oq/expr/expr.go index ed02740..5445b38 100644 --- a/oq/expr/expr.go +++ b/oq/expr/expr.go @@ -439,7 +439,7 @@ func tokenize(input string) []string { if ch == '"' { j := i + 1 for j < len(input) && input[j] != '"' { - if input[j] == '\\' { + if input[j] == '\\' && j+1 < len(input) { j++ } j++ diff --git a/oq/expr/expr_test.go b/oq/expr/expr_test.go index 2057560..8baeabd 100644 --- a/oq/expr/expr_test.go +++ b/oq/expr/expr_test.go @@ -141,3 +141,12 @@ func TestParse_Error(t *testing.T) { _, err = expr.Parse("name matches \"[invalid\"") require.Error(t, err) } + +func TestParse_UnterminatedBackslashString(t *testing.T) { + t.Parallel() + + // Should not panic on unterminated string ending with backslash + assert.NotPanics(t, func() { + expr.Parse(`name == "x\`) //nolint:errcheck + }) +} diff --git a/oq/oq_test.go b/oq/oq_test.go index f15ea8c..4c497b5 100644 --- a/oq/oq_test.go +++ b/oq/oq_test.go @@ -496,6 +496,149 @@ func TestParse_NewStages_Success(t *testing.T) { } } +func TestExecute_RefsOut_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute(`schemas.components | where name == "Pet" | refs-out | select name`, g) + require.NoError(t, err) + assert.NotEmpty(t, result.Rows) +} + +func TestExecute_RefsIn_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute(`schemas.components | where name == "Owner" | refs-in | select name`, g) + require.NoError(t, err) + assert.NotEmpty(t, result.Rows) +} + +func TestExecute_Items_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + // listPets response includes an array with items + result, err := oq.Execute(`schemas | where type == "array" | items | select name`, g) + require.NoError(t, err) + // May or may not have results depending on spec, but should not error + assert.NotNil(t, result) +} + +func TestFormatTable_Groups_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute("schemas.components | group-by type", g) + require.NoError(t, err) + assert.NotEmpty(t, result.Groups) + + table := oq.FormatTable(result, g) + assert.Contains(t, table, "count=") +} + +func TestFormatJSON_Groups_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute("schemas.components | group-by type", g) + require.NoError(t, err) + + json := oq.FormatJSON(result, g) + assert.Contains(t, json, "\"key\"") + assert.Contains(t, json, "\"count\"") +} + +func TestFormatMarkdown_Groups_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute("schemas.components | group-by type", g) + require.NoError(t, err) + + md := oq.FormatMarkdown(result, g) + assert.Contains(t, md, "| Key |") +} + +func TestExecute_InlineSource_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute("schemas.inline | count", g) + require.NoError(t, err) + assert.True(t, result.IsCount) +} + +func TestExecute_SchemaFields_Coverage(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + // Select all schema fields to cover fieldValue branches + result, err := oq.Execute("schemas.components | take 1 | select name, type, depth, in_degree, out_degree, union_width, property_count, is_component, is_inline, is_circular, has_ref, hash, path", g) + require.NoError(t, err) + assert.NotEmpty(t, result.Rows) + + table := oq.FormatTable(result, g) + assert.NotEmpty(t, table) + + json := oq.FormatJSON(result, g) + assert.Contains(t, json, "\"name\"") +} + +func TestExecute_OperationFields_Coverage(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + // Select all operation fields to cover fieldValue branches + result, err := oq.Execute("operations | take 1 | select name, method, path, operation_id, schema_count, component_count, tag, parameter_count, deprecated, description, summary", g) + require.NoError(t, err) + assert.NotEmpty(t, result.Rows) +} + +func TestFormatJSON_Empty_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute(`schemas.components | where name == "NonExistent"`, g) + require.NoError(t, err) + + json := oq.FormatJSON(result, g) + assert.Equal(t, "[]", json) +} + +func TestFormatMarkdown_Empty_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute(`schemas.components | where name == "NonExistent"`, g) + require.NoError(t, err) + + md := oq.FormatMarkdown(result, g) + assert.Equal(t, "(empty)", md) +} + +func TestFormatJSON_Count_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute("schemas | count", g) + require.NoError(t, err) + + json := oq.FormatJSON(result, g) + assert.NotEmpty(t, json) +} + +func TestFormatMarkdown_Count_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute("schemas | count", g) + require.NoError(t, err) + + md := oq.FormatMarkdown(result, g) + assert.NotEmpty(t, md) +} + // collectNames extracts the "name" field from all rows in the result. func collectNames(result *oq.Result, g *graph.SchemaGraph) []string { var names []string From 8af8105f1fdddc95f8dde813623f754a18857a4a Mon Sep 17 00:00:00 2001 From: Vishal Gowda Date: Thu, 12 Mar 2026 08:53:50 +0000 Subject: [PATCH 11/27] feat: add TOON output format for oq Implement FormatToon following the TOON (Token-Oriented Object Notation) spec: tabular array syntax with header[N]{fields}: and comma-delimited data rows. Includes proper string escaping per TOON quoting rules. See https://github.com/toon-format/toon Co-Authored-By: Claude Opus 4.6 --- cmd/openapi/commands/openapi/query.go | 6 +- oq/oq.go | 134 +++++++++++++++++++++++++- oq/oq_test.go | 59 ++++++++++++ 3 files changed, 195 insertions(+), 4 deletions(-) diff --git a/cmd/openapi/commands/openapi/query.go b/cmd/openapi/commands/openapi/query.go index c3c69ca..84488d7 100644 --- a/cmd/openapi/commands/openapi/query.go +++ b/cmd/openapi/commands/openapi/query.go @@ -60,7 +60,7 @@ Pipeline stages: Source: schemas, schemas.components, schemas.inline, operations Traversal: refs-out, refs-in, reachable, ancestors, properties, union-members, items, ops, schemas, path Filter: where , select , sort [asc|desc], take/head , sample , top , bottom , unique, group-by , count - Meta: explain, fields, format + Meta: explain, fields, format Where expressions support: ==, !=, >, <, >=, <=, and, or, not, has(), matches`, Args: stdinOrFileArgs(1, 2), @@ -71,7 +71,7 @@ var queryOutputFormat string var queryFromFile string func init() { - queryCmd.Flags().StringVar(&queryOutputFormat, "format", "table", "output format: table, json, or markdown") + queryCmd.Flags().StringVar(&queryOutputFormat, "format", "table", "output format: table, json, markdown, or toon") queryCmd.Flags().StringVarP(&queryFromFile, "file", "f", "", "read query from file instead of argument") } @@ -141,6 +141,8 @@ func queryOpenAPI(ctx context.Context, processor *OpenAPIProcessor, queryStr str output = oq.FormatJSON(result, g) case "markdown": output = oq.FormatMarkdown(result, g) + case "toon": + output = oq.FormatToon(result, g) default: output = oq.FormatTable(result, g) } diff --git a/oq/oq.go b/oq/oq.go index 1867986..34bc91c 100644 --- a/oq/oq.go +++ b/oq/oq.go @@ -260,8 +260,8 @@ func parseStage(s string) (Stage, error) { case "format": f := strings.TrimSpace(rest) - if f != "table" && f != "json" && f != "markdown" { - return Stage{}, fmt.Errorf("format must be table, json, or markdown, got %q", f) + if f != "table" && f != "json" && f != "markdown" && f != "toon" { + return Stage{}, fmt.Errorf("format must be table, json, markdown, or toon, got %q", f) } return Stage{Kind: StageFormat, Format: f}, nil @@ -1191,6 +1191,136 @@ func FormatMarkdown(result *Result, g *graph.SchemaGraph) string { return sb.String() } +// FormatToon formats a result in the TOON (Token-Oriented Object Notation) format. +// TOON uses tabular array syntax for uniform rows: header[N]{field1,field2,...}: +// followed by comma-delimited data rows. See https://github.com/toon-format/toon +func FormatToon(result *Result, g *graph.SchemaGraph) string { + if result.Explain != "" { + return result.Explain + } + + if result.IsCount { + return "count: " + strconv.Itoa(result.Count) + "\n" + } + + if len(result.Groups) > 0 { + return formatGroupsToon(result) + } + + if len(result.Rows) == 0 { + return "results[0]:\n" + } + + fields := result.Fields + if len(fields) == 0 { + if result.Rows[0].Kind == SchemaResult { + fields = []string{"name", "type", "depth", "in_degree", "out_degree"} + } else { + fields = []string{"name", "method", "path", "schema_count"} + } + } + + var sb strings.Builder + + // Header: results[N]{field1,field2,...}: + fmt.Fprintf(&sb, "results[%d]{%s}:\n", len(result.Rows), strings.Join(fields, ",")) + + // Data rows: comma-separated values, indented by one space + for _, row := range result.Rows { + sb.WriteByte(' ') + for i, f := range fields { + if i > 0 { + sb.WriteByte(',') + } + v := fieldValue(row, f, g) + sb.WriteString(toonValue(v)) + } + sb.WriteByte('\n') + } + + return sb.String() +} + +func formatGroupsToon(result *Result) string { + var sb strings.Builder + + // Groups as tabular array + fmt.Fprintf(&sb, "groups[%d]{key,count,names}:\n", len(result.Groups)) + for _, grp := range result.Groups { + names := strings.Join(grp.Names, ";") + fmt.Fprintf(&sb, " %s,%d,%s\n", toonEscape(grp.Key), grp.Count, toonEscape(names)) + } + return sb.String() +} + +// toonValue encodes an expr.Value for TOON format. +func toonValue(v expr.Value) string { + switch v.Kind { + case expr.KindString: + return toonEscape(v.Str) + case expr.KindInt: + return strconv.Itoa(v.Int) + case expr.KindBool: + return strconv.FormatBool(v.Bool) + default: + return "null" + } +} + +// toonEscape quotes a string if it needs escaping for TOON format. +// A string must be quoted if it: is empty, contains comma/colon/quote/backslash/ +// brackets/braces/control chars, has leading/trailing whitespace, or matches +// true/false/null or a numeric pattern. +func toonEscape(s string) string { + if s == "" { + return `""` + } + if s == "true" || s == "false" || s == "null" { + return `"` + s + `"` + } + // Check if it looks numeric + if _, err := strconv.ParseFloat(s, 64); err == nil { + return `"` + s + `"` + } + needsQuote := false + for _, ch := range s { + if ch == ',' || ch == ':' || ch == '"' || ch == '\\' || + ch == '[' || ch == ']' || ch == '{' || ch == '}' || + ch == '\n' || ch == '\r' || ch == '\t' || + ch < 0x20 { + needsQuote = true + break + } + } + if s[0] == ' ' || s[len(s)-1] == ' ' { + needsQuote = true + } + if !needsQuote { + return s + } + // Quote with escaping + var sb strings.Builder + sb.WriteByte('"') + for _, ch := range s { + switch ch { + case '\\': + sb.WriteString(`\\`) + case '"': + sb.WriteString(`\"`) + case '\n': + sb.WriteString(`\n`) + case '\r': + sb.WriteString(`\r`) + case '\t': + sb.WriteString(`\t`) + default: + sb.WriteRune(ch) + } + } + sb.WriteByte('"') + return sb.String() +} + func jsonValue(v expr.Value) string { switch v.Kind { case expr.KindString: diff --git a/oq/oq_test.go b/oq/oq_test.go index 4c497b5..428effe 100644 --- a/oq/oq_test.go +++ b/oq/oq_test.go @@ -628,6 +628,65 @@ func TestFormatJSON_Count_Success(t *testing.T) { assert.NotEmpty(t, json) } +func TestFormatToon_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute("schemas.components | take 3 | select name, type", g) + require.NoError(t, err) + + toon := oq.FormatToon(result, g) + assert.Contains(t, toon, "results[3]{name,type}:") + assert.Contains(t, toon, "object") +} + +func TestFormatToon_Count_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute("schemas | count", g) + require.NoError(t, err) + + toon := oq.FormatToon(result, g) + assert.Contains(t, toon, "count:") +} + +func TestFormatToon_Groups_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute("schemas.components | group-by type", g) + require.NoError(t, err) + + toon := oq.FormatToon(result, g) + assert.Contains(t, toon, "groups[") + assert.Contains(t, toon, "{key,count,names}:") +} + +func TestFormatToon_Empty_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute(`schemas.components | where name == "NonExistent"`, g) + require.NoError(t, err) + + toon := oq.FormatToon(result, g) + assert.Equal(t, "results[0]:\n", toon) +} + +func TestFormatToon_Escaping_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + // Paths contain special chars like / that don't need escaping, + // but hash values and paths are good coverage + result, err := oq.Execute("schemas.components | take 1 | select name, hash, path", g) + require.NoError(t, err) + + toon := oq.FormatToon(result, g) + assert.Contains(t, toon, "results[1]{name,hash,path}:") +} + func TestFormatMarkdown_Count_Success(t *testing.T) { t.Parallel() g := loadTestGraph(t) From f4323f9429ef4b00e2014452a002eb044f7f3697 Mon Sep 17 00:00:00 2001 From: Vishal Gowda Date: Thu, 12 Mar 2026 09:06:05 +0000 Subject: [PATCH 12/27] feat: add query-reference subcommand, oq README, and fix expr parser panic Add `openapi spec query-reference` subcommand that prints the complete oq language reference. Add README.md for the oq package. Fix OOB panic in expr parser's expect() method when tokens are exhausted mid-parse. Co-Authored-By: Claude Opus 4.6 --- .../commands/openapi/query_reference.go | 180 +++++++++++++++++ cmd/openapi/commands/openapi/root.go | 1 + oq/README.md | 189 ++++++++++++++++++ oq/expr/expr.go | 5 +- oq/expr/expr_test.go | 14 ++ 5 files changed, 387 insertions(+), 2 deletions(-) create mode 100644 cmd/openapi/commands/openapi/query_reference.go create mode 100644 oq/README.md diff --git a/cmd/openapi/commands/openapi/query_reference.go b/cmd/openapi/commands/openapi/query_reference.go new file mode 100644 index 0000000..7671457 --- /dev/null +++ b/cmd/openapi/commands/openapi/query_reference.go @@ -0,0 +1,180 @@ +package openapi + +import ( + "fmt" + + "github.com/spf13/cobra" +) + +var queryReferenceCmd = &cobra.Command{ + Use: "query-reference", + Short: "Print the oq query language reference", + Long: "Print the complete reference for the oq pipeline query language, including all stages, fields, operators, and examples.", + Run: func(_ *cobra.Command, _ []string) { + fmt.Print(queryReference) + }, +} + +const queryReference = `oq — OpenAPI Query Language Reference +===================================== + +oq is a pipeline query language for exploring OpenAPI schema graphs. +Queries are composed as left-to-right pipelines: + + source | stage | stage | ... | terminal + +SOURCES +------- +The first element of every pipeline is a source that selects the initial +result set. + + schemas All schemas (component + inline) + schemas.components Only component schemas (in #/components/schemas) + schemas.inline Only inline schemas + operations All operations + +TRAVERSAL STAGES +---------------- +Graph navigation stages replace the current result set by following edges +in the schema reference graph. + + refs-out Direct outgoing references (1 hop) + refs-in Direct incoming references (1 hop) + reachable Transitive closure of outgoing references + ancestors Transitive closure of incoming references + properties Expand to property sub-schemas + union-members Expand allOf/oneOf/anyOf children + items Expand to array items schema + ops Schemas → operations that use them + schemas Operations → schemas they touch + path Shortest path between two named schemas + +FILTER & TRANSFORM STAGES +-------------------------- + + where Filter rows by predicate expression + select Project specific fields (comma-separated) + sort [desc] Sort by field (default ascending, add "desc" for descending) + take Limit to first N results + head Alias for take + sample Deterministic pseudo-random sample of N rows + top Sort descending by field and take N (shorthand) + bottom Sort ascending by field and take N (shorthand) + unique Deduplicate rows by identity + group-by Group rows and aggregate counts + count Count rows (terminal — returns a single number) + +META STAGES +----------- + + explain Print the query execution plan instead of running it + fields List available fields for the current result kind + format Set output format: table, json, markdown, or toon + +SCHEMA FIELDS +------------- + + Field Type Description + ───── ──── ─────────── + name string Component name or JSON pointer + type string Schema type (object, array, string, ...) + depth int Max nesting depth + in_degree int Number of schemas referencing this one + out_degree int Number of schemas this references + union_width int oneOf + anyOf + allOf member count + property_count int Number of properties + is_component bool In #/components/schemas + is_inline bool Defined inline + is_circular bool Part of a circular reference chain + has_ref bool Has a $ref + hash string Content hash + path string JSON pointer in document + +OPERATION FIELDS +---------------- + + Field Type Description + ───── ──── ─────────── + name string operationId or "METHOD /path" + method string HTTP method (GET, POST, ...) + path string URL path + operation_id string operationId + schema_count int Total reachable schema count + component_count int Reachable component schema count + tag string First tag + parameter_count int Number of parameters + deprecated bool Whether the operation is deprecated + description string Operation description + summary string Operation summary + +WHERE EXPRESSIONS +----------------- +The where clause supports a predicate expression language: + + Comparison: == != > < >= <= + Logical: and or not + Functions: has() — true if field is non-null/non-zero + matches(, "") — regex match + Infix: matches "" + Grouping: ( ... ) + Literals: "string" 42 true false + +OUTPUT FORMATS +-------------- + + table Aligned columns with header (default) + json JSON array of objects + markdown Markdown table + toon TOON (Token-Oriented Object Notation) tabular format + +Set via --format flag or inline format stage: + schemas | count | format json + +EXAMPLES +-------- + + # Deeply nested components + schemas.components | sort depth desc | take 10 | select name, depth + + # Wide union trees + schemas | where union_width > 0 | sort union_width desc | take 10 + + # Most referenced schemas + schemas.components | sort in_degree desc | take 10 | select name, in_degree + + # Dead components (no incoming references) + schemas.components | where in_degree == 0 | select name + + # Operation sprawl + operations | sort schema_count desc | take 10 | select name, schema_count + + # Circular references + schemas | where is_circular | select name, path + + # Schema count + schemas | count + + # Shortest path between schemas + schemas | path "Pet" "Address" | select name + + # Top 5 by in-degree + schemas.components | top 5 in_degree | select name, in_degree + + # Walk an operation to find all connected schemas + operations | where name == "GET /users" | schemas | select name, type + + # Schemas used by an operation, then find connected operations + operations | where name == "GET /users" | schemas | ops | select name, method, path + + # Explain a query plan + schemas.components | where depth > 5 | sort depth desc | explain + + # List available fields + schemas | fields + + # Regex filter + schemas | where name matches "Error.*" | select name, path + + # Complex filter + schemas | where property_count > 3 and not is_component | select name, property_count, path +` diff --git a/cmd/openapi/commands/openapi/root.go b/cmd/openapi/commands/openapi/root.go index 976abc6..72562b0 100644 --- a/cmd/openapi/commands/openapi/root.go +++ b/cmd/openapi/commands/openapi/root.go @@ -19,4 +19,5 @@ func Apply(rootCmd *cobra.Command) { rootCmd.AddCommand(exploreCmd) rootCmd.AddCommand(snipCmd) rootCmd.AddCommand(queryCmd) + rootCmd.AddCommand(queryReferenceCmd) } diff --git a/oq/README.md b/oq/README.md new file mode 100644 index 0000000..6953d5e --- /dev/null +++ b/oq/README.md @@ -0,0 +1,189 @@ +# oq — OpenAPI Query Language + +`oq` is a pipeline query language for exploring OpenAPI schema reference graphs. It lets you ask structural and semantic questions about schemas and operations at the command line. + +## Quick Start + +```bash +# Count all schemas +openapi spec query petstore.yaml 'schemas | count' + +# Top 10 deepest component schemas +openapi spec query petstore.yaml 'schemas.components | sort depth desc | take 10 | select name, depth' + +# Dead components (unreferenced) +openapi spec query petstore.yaml 'schemas.components | where in_degree == 0 | select name' +``` + +Stdin is supported: + +```bash +cat spec.yaml | openapi spec query - 'schemas | count' +``` + +## Pipeline Syntax + +Queries are left-to-right pipelines separated by `|`: + +``` +source | stage | stage | ... | terminal +``` + +### Sources + +| Source | Description | +|--------|-------------| +| `schemas` | All schemas (component + inline) | +| `schemas.components` | Component schemas only | +| `schemas.inline` | Inline schemas only | +| `operations` | All operations | + +### Traversal Stages + +| Stage | Description | +|-------|-------------| +| `refs-out` | Direct outgoing references | +| `refs-in` | Direct incoming references | +| `reachable` | Transitive closure of outgoing refs | +| `ancestors` | Transitive closure of incoming refs | +| `properties` | Property sub-schemas | +| `union-members` | allOf/oneOf/anyOf children | +| `items` | Array items schema | +| `ops` | Schemas → operations | +| `schemas` | Operations → schemas | +| `path ` | Shortest path between two schemas | + +### Filter & Transform Stages + +| Stage | Description | +|-------|-------------| +| `where ` | Filter by predicate | +| `select ` | Project fields | +| `sort [desc]` | Sort (ascending by default) | +| `take ` / `head ` | Limit results | +| `sample ` | Deterministic random sample | +| `top ` | Sort desc + take | +| `bottom ` | Sort asc + take | +| `unique` | Deduplicate | +| `group-by ` | Group and count | +| `count` | Count rows | + +### Meta Stages + +| Stage | Description | +|-------|-------------| +| `explain` | Print query plan | +| `fields` | List available fields | +| `format ` | Set output format (table/json/markdown/toon) | + +## Fields + +### Schema Fields + +| Field | Type | Description | +|-------|------|-------------| +| `name` | string | Component name or JSON pointer | +| `type` | string | Schema type | +| `depth` | int | Max nesting depth | +| `in_degree` | int | Incoming reference count | +| `out_degree` | int | Outgoing reference count | +| `union_width` | int | Union member count | +| `property_count` | int | Property count | +| `is_component` | bool | In components/schemas | +| `is_inline` | bool | Defined inline | +| `is_circular` | bool | Part of circular reference | +| `has_ref` | bool | Has $ref | +| `hash` | string | Content hash | +| `path` | string | JSON pointer | + +### Operation Fields + +| Field | Type | Description | +|-------|------|-------------| +| `name` | string | operationId or METHOD /path | +| `method` | string | HTTP method | +| `path` | string | URL path | +| `operation_id` | string | operationId | +| `schema_count` | int | Reachable schema count | +| `component_count` | int | Reachable component count | +| `tag` | string | First tag | +| `parameter_count` | int | Parameter count | +| `deprecated` | bool | Deprecated flag | +| `description` | string | Description | +| `summary` | string | Summary | + +## Where Expressions + +``` +depth > 5 +type == "object" +name matches "Error.*" +property_count > 3 and not is_component +has(oneOf) and not has(discriminator) +(depth > 10 or union_width > 5) and is_component +``` + +Operators: `==`, `!=`, `>`, `<`, `>=`, `<=`, `and`, `or`, `not`, `has()`, `matches()` + +## Output Formats + +Use `--format` flag or inline `format` stage: + +```bash +openapi spec query spec.yaml 'schemas | count' --format json +openapi spec query spec.yaml 'schemas | take 5 | format markdown' +``` + +| Format | Description | +|--------|-------------| +| `table` | Aligned columns (default) | +| `json` | JSON array | +| `markdown` | Markdown table | +| `toon` | [TOON](https://github.com/toon-format/toon) tabular format | + +## Examples + +```bash +# Wide union trees +schemas | where union_width > 0 | sort union_width desc | take 10 + +# Central schemas (most referenced) +schemas.components | sort in_degree desc | take 10 | select name, in_degree + +# Operation sprawl +operations | sort schema_count desc | take 10 | select name, schema_count + +# Circular references +schemas | where is_circular | select name, path + +# Shortest path between schemas +schemas | path "Pet" "Address" | select name + +# Walk an operation to connected schemas and back to operations +operations | where name == "GET /users" | schemas | ops | select name, method, path + +# Explain query plan +schemas.components | where depth > 5 | sort depth desc | explain + +# Regex filter +schemas | where name matches "Error.*" | select name, path + +# Group by type +schemas | group-by type +``` + +## CLI Reference + +```bash +# Run query-reference for the full language reference +openapi spec query-reference + +# Inline query +openapi spec query '' + +# Query from file +openapi spec query -f query.oq + +# With output format +openapi spec query '' --format json +``` diff --git a/oq/expr/expr.go b/oq/expr/expr.go index 5445b38..3463ae0 100644 --- a/oq/expr/expr.go +++ b/oq/expr/expr.go @@ -243,8 +243,9 @@ func (p *parser) next() string { } func (p *parser) expect(tok string) error { - if p.next() != tok { - return fmt.Errorf("expected %q, got %q", tok, p.tokens[p.pos-1]) + got := p.next() + if got != tok { + return fmt.Errorf("expected %q, got %q", tok, got) } return nil } diff --git a/oq/expr/expr_test.go b/oq/expr/expr_test.go index 8baeabd..ddc41ca 100644 --- a/oq/expr/expr_test.go +++ b/oq/expr/expr_test.go @@ -150,3 +150,17 @@ func TestParse_UnterminatedBackslashString(t *testing.T) { expr.Parse(`name == "x\`) //nolint:errcheck }) } + +func TestParse_UnterminatedFunction(t *testing.T) { + t.Parallel() + + // Should not panic when tokens are exhausted inside a function call + assert.NotPanics(t, func() { + _, err := expr.Parse(`has(field`) + require.Error(t, err) + }) + assert.NotPanics(t, func() { + _, err := expr.Parse(`matches(field,`) + require.Error(t, err) + }) +} From a91d68897aa4bd9165eabaaaa197b5836084262b Mon Sep 17 00:00:00 2001 From: Vishal Gowda Date: Thu, 12 Mar 2026 09:34:27 +0000 Subject: [PATCH 13/27] feat: add edge annotations, graph analysis stages, and new schema fields Edge annotations: 1-hop traversal stages (refs-out, refs-in, properties, union-members, items) now populate edge_kind, edge_label, and edge_from fields on result rows, making relationship types visible in query output. New traversal stages: connected, blast-radius, neighbors New analysis stages: orphans, leaves, cycles, clusters, tag-boundary, shared-refs New schema fields: op_count, tag_count Graph layer additions: Neighbors (depth-limited bidirectional BFS), StronglyConnectedComponents (Tarjan's SCC), SchemaOpCount. Co-Authored-By: Claude Opus 4.6 --- cmd/openapi/commands/openapi/query.go | 7 +- .../commands/openapi/query_reference.go | 73 ++- graph/graph.go | 181 +++++++ oq/README.md | 63 ++- oq/oq.go | 500 +++++++++++++++++- oq/oq_test.go | 192 +++++++ 6 files changed, 993 insertions(+), 23 deletions(-) diff --git a/cmd/openapi/commands/openapi/query.go b/cmd/openapi/commands/openapi/query.go index 84488d7..aeefe54 100644 --- a/cmd/openapi/commands/openapi/query.go +++ b/cmd/openapi/commands/openapi/query.go @@ -58,8 +58,11 @@ Stdin is supported — either pipe data directly or use '-' explicitly: Pipeline stages: Source: schemas, schemas.components, schemas.inline, operations - Traversal: refs-out, refs-in, reachable, ancestors, properties, union-members, items, ops, schemas, path - Filter: where , select , sort [asc|desc], take/head , sample , top , bottom , unique, group-by , count + Traversal: refs-out, refs-in, reachable, ancestors, properties, union-members, items, + ops, schemas, path , connected, blast-radius, neighbors + Analysis: orphans, leaves, cycles, clusters, tag-boundary, shared-refs + Filter: where , select , sort [asc|desc], take/head , + sample , top , bottom , unique, group-by , count Meta: explain, fields, format Where expressions support: ==, !=, >, <, >=, <=, and, or, not, has(), matches`, diff --git a/cmd/openapi/commands/openapi/query_reference.go b/cmd/openapi/commands/openapi/query_reference.go index 7671457..2f6f6cf 100644 --- a/cmd/openapi/commands/openapi/query_reference.go +++ b/cmd/openapi/commands/openapi/query_reference.go @@ -38,16 +38,29 @@ TRAVERSAL STAGES Graph navigation stages replace the current result set by following edges in the schema reference graph. - refs-out Direct outgoing references (1 hop) - refs-in Direct incoming references (1 hop) - reachable Transitive closure of outgoing references - ancestors Transitive closure of incoming references - properties Expand to property sub-schemas - union-members Expand allOf/oneOf/anyOf children - items Expand to array items schema - ops Schemas → operations that use them - schemas Operations → schemas they touch - path Shortest path between two named schemas + refs-out Direct outgoing references (1 hop, with edge annotations) + refs-in Direct incoming references (1 hop, with edge annotations) + reachable Transitive closure of outgoing references + ancestors Transitive closure of incoming references + properties Expand to property sub-schemas (with edge annotations) + union-members Expand allOf/oneOf/anyOf children (with edge annotations) + items Expand to array items schema (with edge annotations) + ops Schemas → operations that use them + schemas Operations → schemas they touch + path Shortest path between two named schemas + connected Full connected component (schemas + operations) + blast-radius Ancestors + all affected operations (change impact) + neighbors Bidirectional neighborhood within N hops + +ANALYSIS STAGES +--------------- + + orphans Schemas with no incoming refs and no operation usage + leaves Schemas with no outgoing refs (leaf/terminal nodes) + cycles Strongly connected components (actual reference cycles) + clusters Weakly connected component grouping + tag-boundary Schemas used by operations across multiple tags + shared-refs Schemas shared by ALL operations in result set FILTER & TRANSFORM STAGES -------------------------- @@ -89,6 +102,8 @@ SCHEMA FIELDS has_ref bool Has a $ref hash string Content hash path string JSON pointer in document + op_count int Number of operations using this schema + tag_count int Number of distinct tags across operations OPERATION FIELDS ---------------- @@ -107,6 +122,17 @@ OPERATION FIELDS description string Operation description summary string Operation summary +EDGE ANNOTATION FIELDS +---------------------- +Available on rows produced by 1-hop traversal stages (refs-out, refs-in, +properties, union-members, items): + + Field Type Description + ───── ──── ─────────── + edge_kind string Edge type: property, items, allOf, oneOf, ref, ... + edge_label string Edge label: property name, array index, etc. + edge_from string Source node name + WHERE EXPRESSIONS ----------------- The where clause supports a predicate expression language: @@ -177,4 +203,31 @@ EXAMPLES # Complex filter schemas | where property_count > 3 and not is_component | select name, property_count, path + + # Edge annotations — see how Pet references other schemas + schemas.components | where name == "Pet" | refs-out | select name, edge_kind, edge_label, edge_from + + # Blast radius — what breaks if I change the Error schema? + schemas.components | where name == "Error" | blast-radius | count + + # Neighborhood — schemas within 2 hops of Pet + schemas.components | where name == "Pet" | neighbors 2 | select name + + # Orphaned schemas — unreferenced by anything + schemas.components | orphans | select name + + # Leaf schemas — terminal nodes with no outgoing refs + schemas.components | leaves | select name, in_degree + + # Detect reference cycles + schemas | cycles + + # Discover schema clusters + schemas.components | clusters + + # Cross-tag schemas — shared across team boundaries + schemas | tag-boundary | select name, tag_count + + # Schemas shared by all operations + operations | shared-refs | select name, op_count ` diff --git a/graph/graph.go b/graph/graph.go index 4b1fd29..8f804cf 100644 --- a/graph/graph.go +++ b/graph/graph.go @@ -717,6 +717,187 @@ func (g *SchemaGraph) ShortestPath(from, to NodeID) []NodeID { return nil } +// SchemaOpCount returns the number of operations that reference the given schema. +func (g *SchemaGraph) SchemaOpCount(id NodeID) int { + return len(g.schemaOps[id]) +} + +// Neighbors returns schema NodeIDs within maxDepth hops of the given node, +// following both out-edges and in-edges (bidirectional BFS). +// The result excludes the seed node itself. +func (g *SchemaGraph) Neighbors(id NodeID, maxDepth int) []NodeID { + visited := map[NodeID]bool{id: true} + current := []NodeID{id} + + for depth := 0; depth < maxDepth && len(current) > 0; depth++ { + var next []NodeID + for _, nid := range current { + for _, edge := range g.outEdges[nid] { + if !visited[edge.To] { + visited[edge.To] = true + next = append(next, edge.To) + } + } + for _, edge := range g.inEdges[nid] { + if !visited[edge.From] { + visited[edge.From] = true + next = append(next, edge.From) + } + } + } + current = next + } + + delete(visited, id) + result := make([]NodeID, 0, len(visited)) + for nid := range visited { + result = append(result, nid) + } + return result +} + +// StronglyConnectedComponents returns the SCCs of the schema graph using +// Tarjan's algorithm. Only returns components with more than one node +// (i.e., actual cycles, not singleton nodes). +func (g *SchemaGraph) StronglyConnectedComponents() [][]NodeID { + idx := 0 + var stack []NodeID + onStack := make(map[NodeID]bool) + indices := make(map[NodeID]int) + lowlinks := make(map[NodeID]int) + defined := make(map[NodeID]bool) + var sccs [][]NodeID + + var strongConnect func(v NodeID) + strongConnect = func(v NodeID) { + indices[v] = idx + lowlinks[v] = idx + defined[v] = true + idx++ + stack = append(stack, v) + onStack[v] = true + + for _, edge := range g.outEdges[v] { + w := edge.To + if !defined[w] { + strongConnect(w) + if lowlinks[w] < lowlinks[v] { + lowlinks[v] = lowlinks[w] + } + } else if onStack[w] { + if indices[w] < lowlinks[v] { + lowlinks[v] = indices[w] + } + } + } + + if lowlinks[v] == indices[v] { + var scc []NodeID + for { + w := stack[len(stack)-1] + stack = stack[:len(stack)-1] + onStack[w] = false + scc = append(scc, w) + if w == v { + break + } + } + if len(scc) > 1 { + sccs = append(sccs, scc) + } + } + } + + for i := range g.Schemas { + nid := NodeID(i) + if !defined[nid] { + strongConnect(nid) + } + } + + return sccs +} + +// ConnectedComponent computes the full connected component reachable from the +// given seed schema and operation nodes. It treats schema edges as undirected +// (follows both out-edges and in-edges) and crosses schema↔operation links. +// Returns the sets of reachable schema and operation NodeIDs (including seeds). +func (g *SchemaGraph) ConnectedComponent(schemaSeeds, opSeeds []NodeID) (schemas []NodeID, ops []NodeID) { + visitedSchemas := make(map[NodeID]bool) + visitedOps := make(map[NodeID]bool) + + // Queues for BFS across both node types + schemaQueue := make([]NodeID, 0, len(schemaSeeds)) + opQueue := make([]NodeID, 0, len(opSeeds)) + + for _, id := range schemaSeeds { + if !visitedSchemas[id] { + visitedSchemas[id] = true + schemaQueue = append(schemaQueue, id) + } + } + for _, id := range opSeeds { + if !visitedOps[id] { + visitedOps[id] = true + opQueue = append(opQueue, id) + } + } + + for len(schemaQueue) > 0 || len(opQueue) > 0 { + // Process schema nodes + for len(schemaQueue) > 0 { + current := schemaQueue[0] + schemaQueue = schemaQueue[1:] + + // Follow out-edges (undirected: treat as bidirectional) + for _, edge := range g.outEdges[current] { + if !visitedSchemas[edge.To] { + visitedSchemas[edge.To] = true + schemaQueue = append(schemaQueue, edge.To) + } + } + // Follow in-edges + for _, edge := range g.inEdges[current] { + if !visitedSchemas[edge.From] { + visitedSchemas[edge.From] = true + schemaQueue = append(schemaQueue, edge.From) + } + } + // Cross to operations + for opID := range g.schemaOps[current] { + if !visitedOps[opID] { + visitedOps[opID] = true + opQueue = append(opQueue, opID) + } + } + } + + // Process operation nodes + for len(opQueue) > 0 { + current := opQueue[0] + opQueue = opQueue[1:] + + // Cross to schemas + for sid := range g.opSchemas[current] { + if !visitedSchemas[sid] { + visitedSchemas[sid] = true + schemaQueue = append(schemaQueue, sid) + } + } + } + } + + schemas = make([]NodeID, 0, len(visitedSchemas)) + for id := range visitedSchemas { + schemas = append(schemas, id) + } + ops = make([]NodeID, 0, len(visitedOps)) + for id := range visitedOps { + ops = append(ops, id) + } + return schemas, ops +} + func intStr(i int) string { return strconv.Itoa(i) } diff --git a/oq/README.md b/oq/README.md index 6953d5e..a292e42 100644 --- a/oq/README.md +++ b/oq/README.md @@ -42,16 +42,30 @@ source | stage | stage | ... | terminal | Stage | Description | |-------|-------------| -| `refs-out` | Direct outgoing references | -| `refs-in` | Direct incoming references | +| `refs-out` | Direct outgoing references (with edge annotations) | +| `refs-in` | Direct incoming references (with edge annotations) | | `reachable` | Transitive closure of outgoing refs | | `ancestors` | Transitive closure of incoming refs | -| `properties` | Property sub-schemas | -| `union-members` | allOf/oneOf/anyOf children | -| `items` | Array items schema | +| `properties` | Property sub-schemas (with edge annotations) | +| `union-members` | allOf/oneOf/anyOf children (with edge annotations) | +| `items` | Array items schema (with edge annotations) | | `ops` | Schemas → operations | | `schemas` | Operations → schemas | | `path ` | Shortest path between two schemas | +| `connected` | Full connected component (schemas + operations) | +| `blast-radius` | Ancestors + all affected operations | +| `neighbors ` | Bidirectional neighborhood within N hops | + +### Analysis Stages + +| Stage | Description | +|-------|-------------| +| `orphans` | Schemas with no incoming refs and no operation usage | +| `leaves` | Schemas with no outgoing refs (terminal nodes) | +| `cycles` | Strongly connected components (actual cycles) | +| `clusters` | Weakly connected component grouping | +| `tag-boundary` | Schemas used by operations across multiple tags | +| `shared-refs` | Schemas shared by ALL operations in result set | ### Filter & Transform Stages @@ -95,6 +109,8 @@ source | stage | stage | ... | terminal | `has_ref` | bool | Has $ref | | `hash` | string | Content hash | | `path` | string | JSON pointer | +| `op_count` | int | Operations using this schema | +| `tag_count` | int | Distinct tags across operations | ### Operation Fields @@ -112,6 +128,16 @@ source | stage | stage | ... | terminal | `description` | string | Description | | `summary` | string | Summary | +### Edge Annotation Fields + +Available on rows produced by 1-hop traversal stages (`refs-out`, `refs-in`, `properties`, `union-members`, `items`): + +| Field | Type | Description | +|-------|------|-------------| +| `edge_kind` | string | Edge type: property, items, allOf, oneOf, ref, ... | +| `edge_label` | string | Edge label: property name, array index, etc. | +| `edge_from` | string | Source node name | + ## Where Expressions ``` @@ -170,6 +196,33 @@ schemas | where name matches "Error.*" | select name, path # Group by type schemas | group-by type + +# Edge annotations — how does Pet reference other schemas? +schemas.components | where name == "Pet" | refs-out | select name, edge_kind, edge_label, edge_from + +# Blast radius — what breaks if Error changes? +schemas.components | where name == "Error" | blast-radius | count + +# 2-hop neighborhood +schemas.components | where name == "Pet" | neighbors 2 | select name + +# Orphaned schemas +schemas.components | orphans | select name + +# Leaf nodes +schemas.components | leaves | select name, in_degree + +# Detect cycles +schemas | cycles + +# Discover clusters +schemas.components | clusters + +# Cross-tag schemas +schemas | tag-boundary | select name, tag_count + +# Schemas shared across all operations +operations | shared-refs | select name, op_count ``` ## CLI Reference diff --git a/oq/oq.go b/oq/oq.go index 34bc91c..10e12d2 100644 --- a/oq/oq.go +++ b/oq/oq.go @@ -32,6 +32,11 @@ type Row struct { Kind ResultKind SchemaIdx int // index into SchemaGraph.Schemas OpIdx int // index into SchemaGraph.Operations + + // Edge annotations (populated by 1-hop traversal stages) + EdgeKind string // edge type: "property", "items", "allOf", "oneOf", "ref", etc. + EdgeLabel string // edge label: property name, array index, etc. + EdgeFrom string // source node name } // Result is the output of a query execution. @@ -91,6 +96,15 @@ const ( StageTop StageBottom StageFormat + StageConnected + StageBlastRadius + StageNeighbors + StageOrphans + StageLeaves + StageCycles + StageClusters + StageTagBoundary + StageSharedRefs ) // Stage represents a single stage in the query pipeline. @@ -265,6 +279,37 @@ func parseStage(s string) (Stage, error) { } return Stage{Kind: StageFormat, Format: f}, nil + case "connected": + return Stage{Kind: StageConnected}, nil + + case "blast-radius": + return Stage{Kind: StageBlastRadius}, nil + + case "neighbors": + n, err := strconv.Atoi(strings.TrimSpace(rest)) + if err != nil { + return Stage{}, fmt.Errorf("neighbors requires a depth number: %w", err) + } + return Stage{Kind: StageNeighbors, Limit: n}, nil + + case "orphans": + return Stage{Kind: StageOrphans}, nil + + case "leaves": + return Stage{Kind: StageLeaves}, nil + + case "cycles": + return Stage{Kind: StageCycles}, nil + + case "clusters": + return Stage{Kind: StageClusters}, nil + + case "tag-boundary": + return Stage{Kind: StageTagBoundary}, nil + + case "shared-refs": + return Stage{Kind: StageSharedRefs}, nil + default: return Stage{}, fmt.Errorf("unknown stage: %q", keyword) } @@ -388,6 +433,24 @@ func execStage(stage Stage, result *Result, g *graph.SchemaGraph) (*Result, erro case StageFormat: result.FormatHint = stage.Format return result, nil + case StageConnected: + return execConnected(result, g) + case StageBlastRadius: + return execBlastRadius(result, g) + case StageNeighbors: + return execNeighbors(stage, result, g) + case StageOrphans: + return execOrphans(result, g) + case StageLeaves: + return execLeaves(result, g) + case StageCycles: + return execCycles(result, g) + case StageClusters: + return execClusters(result, g) + case StageTagBoundary: + return execTagBoundary(result, g) + case StageSharedRefs: + return execSharedRefs(result, g) default: return nil, fmt.Errorf("unimplemented stage kind: %d", stage.Kind) } @@ -495,7 +558,7 @@ func execTraversal(result *Result, g *graph.SchemaGraph, fn traversalFunc) (*Res seen := make(map[string]bool) for _, row := range result.Rows { for _, newRow := range fn(row, g) { - key := rowKey(newRow) + key := edgeRowKey(newRow) if !seen[key] { seen[key] = true out.Rows = append(out.Rows, newRow) @@ -505,13 +568,28 @@ func execTraversal(result *Result, g *graph.SchemaGraph, fn traversalFunc) (*Res return out, nil } +func edgeRowKey(row Row) string { + base := rowKey(row) + if row.EdgeKind == "" { + return base + } + return base + "|" + row.EdgeFrom + "|" + row.EdgeKind + "|" + row.EdgeLabel +} + func traverseRefsOut(row Row, g *graph.SchemaGraph) []Row { if row.Kind != SchemaResult { return nil } + fromName := schemaName(row.SchemaIdx, g) var result []Row for _, edge := range g.OutEdges(graph.NodeID(row.SchemaIdx)) { - result = append(result, Row{Kind: SchemaResult, SchemaIdx: int(edge.To)}) + result = append(result, Row{ + Kind: SchemaResult, + SchemaIdx: int(edge.To), + EdgeKind: edgeKindString(edge.Kind), + EdgeLabel: edge.Label, + EdgeFrom: fromName, + }) } return result } @@ -520,9 +598,16 @@ func traverseRefsIn(row Row, g *graph.SchemaGraph) []Row { if row.Kind != SchemaResult { return nil } + toName := schemaName(row.SchemaIdx, g) var result []Row for _, edge := range g.InEdges(graph.NodeID(row.SchemaIdx)) { - result = append(result, Row{Kind: SchemaResult, SchemaIdx: int(edge.From)}) + result = append(result, Row{ + Kind: SchemaResult, + SchemaIdx: int(edge.From), + EdgeKind: edgeKindString(edge.Kind), + EdgeLabel: edge.Label, + EdgeFrom: toName, + }) } return result } @@ -555,10 +640,17 @@ func traverseProperties(row Row, g *graph.SchemaGraph) []Row { if row.Kind != SchemaResult { return nil } + fromName := schemaName(row.SchemaIdx, g) var result []Row for _, edge := range g.OutEdges(graph.NodeID(row.SchemaIdx)) { if edge.Kind == graph.EdgeProperty { - result = append(result, Row{Kind: SchemaResult, SchemaIdx: int(edge.To)}) + result = append(result, Row{ + Kind: SchemaResult, + SchemaIdx: int(edge.To), + EdgeKind: edgeKindString(edge.Kind), + EdgeLabel: edge.Label, + EdgeFrom: fromName, + }) } } return result @@ -568,12 +660,19 @@ func traverseUnionMembers(row Row, g *graph.SchemaGraph) []Row { if row.Kind != SchemaResult { return nil } + fromName := schemaName(row.SchemaIdx, g) var result []Row for _, edge := range g.OutEdges(graph.NodeID(row.SchemaIdx)) { if edge.Kind == graph.EdgeAllOf || edge.Kind == graph.EdgeOneOf || edge.Kind == graph.EdgeAnyOf { // Follow through $ref nodes transparently target := resolveRefTarget(int(edge.To), g) - result = append(result, Row{Kind: SchemaResult, SchemaIdx: target}) + result = append(result, Row{ + Kind: SchemaResult, + SchemaIdx: target, + EdgeKind: edgeKindString(edge.Kind), + EdgeLabel: edge.Label, + EdgeFrom: fromName, + }) } } return result @@ -583,10 +682,17 @@ func traverseItems(row Row, g *graph.SchemaGraph) []Row { if row.Kind != SchemaResult { return nil } + fromName := schemaName(row.SchemaIdx, g) var result []Row for _, edge := range g.OutEdges(graph.NodeID(row.SchemaIdx)) { if edge.Kind == graph.EdgeItems { - result = append(result, Row{Kind: SchemaResult, SchemaIdx: int(edge.To)}) + result = append(result, Row{ + Kind: SchemaResult, + SchemaIdx: int(edge.To), + EdgeKind: edgeKindString(edge.Kind), + EdgeLabel: edge.Label, + EdgeFrom: fromName, + }) } } return result @@ -650,6 +756,346 @@ func execOpsToSchemas(result *Result, g *graph.SchemaGraph) (*Result, error) { return out, nil } +func execConnected(result *Result, g *graph.SchemaGraph) (*Result, error) { + var schemaSeeds, opSeeds []graph.NodeID + for _, row := range result.Rows { + switch row.Kind { + case SchemaResult: + schemaSeeds = append(schemaSeeds, graph.NodeID(row.SchemaIdx)) + case OperationResult: + opSeeds = append(opSeeds, graph.NodeID(row.OpIdx)) + } + } + + schemas, ops := g.ConnectedComponent(schemaSeeds, opSeeds) + + out := &Result{Fields: result.Fields} + for _, id := range schemas { + out.Rows = append(out.Rows, Row{Kind: SchemaResult, SchemaIdx: int(id)}) + } + for _, id := range ops { + out.Rows = append(out.Rows, Row{Kind: OperationResult, OpIdx: int(id)}) + } + return out, nil +} + +func execBlastRadius(result *Result, g *graph.SchemaGraph) (*Result, error) { + out := &Result{Fields: result.Fields} + seenSchemas := make(map[int]bool) + seenOps := make(map[int]bool) + + // Collect seed schemas + var seeds []graph.NodeID + for _, row := range result.Rows { + if row.Kind == SchemaResult { + seeds = append(seeds, graph.NodeID(row.SchemaIdx)) + seenSchemas[row.SchemaIdx] = true + } + } + + // Find all ancestors (schemas that depend on the seeds) + for _, seed := range seeds { + for _, aid := range g.Ancestors(seed) { + seenSchemas[int(aid)] = true + } + } + + // Add schema rows + for idx := range seenSchemas { + out.Rows = append(out.Rows, Row{Kind: SchemaResult, SchemaIdx: idx}) + } + + // Find all operations that reference any affected schema + for idx := range seenSchemas { + for _, opID := range g.SchemaOperations(graph.NodeID(idx)) { + if !seenOps[int(opID)] { + seenOps[int(opID)] = true + out.Rows = append(out.Rows, Row{Kind: OperationResult, OpIdx: int(opID)}) + } + } + } + + return out, nil +} + +func execNeighbors(stage Stage, result *Result, g *graph.SchemaGraph) (*Result, error) { + out := &Result{Fields: result.Fields} + seen := make(map[int]bool) + + for _, row := range result.Rows { + if row.Kind != SchemaResult { + continue + } + // Include seed + if !seen[row.SchemaIdx] { + seen[row.SchemaIdx] = true + out.Rows = append(out.Rows, Row{Kind: SchemaResult, SchemaIdx: row.SchemaIdx}) + } + for _, id := range g.Neighbors(graph.NodeID(row.SchemaIdx), stage.Limit) { + if !seen[int(id)] { + seen[int(id)] = true + out.Rows = append(out.Rows, Row{Kind: SchemaResult, SchemaIdx: int(id)}) + } + } + } + + return out, nil +} + +func execOrphans(result *Result, g *graph.SchemaGraph) (*Result, error) { + out := &Result{Fields: result.Fields} + for _, row := range result.Rows { + if row.Kind != SchemaResult { + continue + } + s := &g.Schemas[row.SchemaIdx] + if s.InDegree == 0 && g.SchemaOpCount(graph.NodeID(row.SchemaIdx)) == 0 { + out.Rows = append(out.Rows, row) + } + } + return out, nil +} + +func execLeaves(result *Result, g *graph.SchemaGraph) (*Result, error) { + out := &Result{Fields: result.Fields} + for _, row := range result.Rows { + if row.Kind != SchemaResult { + continue + } + if g.Schemas[row.SchemaIdx].OutDegree == 0 { + out.Rows = append(out.Rows, row) + } + } + return out, nil +} + +func execCycles(result *Result, g *graph.SchemaGraph) (*Result, error) { + sccs := g.StronglyConnectedComponents() + + // Filter SCCs to only include nodes present in the current result + resultNodes := make(map[int]bool) + for _, row := range result.Rows { + if row.Kind == SchemaResult { + resultNodes[row.SchemaIdx] = true + } + } + + out := &Result{Fields: result.Fields} + for i, scc := range sccs { + hasMatch := false + for _, id := range scc { + if resultNodes[int(id)] { + hasMatch = true + break + } + } + if !hasMatch { + continue + } + var names []string + for _, id := range scc { + if int(id) < len(g.Schemas) { + names = append(names, g.Schemas[id].Name) + } + } + out.Groups = append(out.Groups, GroupResult{ + Key: "cycle-" + strconv.Itoa(i+1), + Count: len(scc), + Names: names, + }) + } + + return out, nil +} + +func execClusters(result *Result, g *graph.SchemaGraph) (*Result, error) { + resultNodes := make(map[int]bool) + for _, row := range result.Rows { + if row.Kind == SchemaResult { + resultNodes[row.SchemaIdx] = true + } + } + + // BFS to find connected components. Follow ALL graph edges (including + // through intermediary nodes like $ref wrappers) but only collect + // nodes that are in the result set. + assigned := make(map[int]bool) // result nodes already assigned to a cluster + out := &Result{Fields: result.Fields} + clusterNum := 0 + + for idx := range resultNodes { + if assigned[idx] { + continue + } + clusterNum++ + var component []int + + // BFS through the full graph + visited := make(map[int]bool) + queue := []int{idx} + visited[idx] = true + + for len(queue) > 0 { + cur := queue[0] + queue = queue[1:] + + if resultNodes[cur] && !assigned[cur] { + assigned[cur] = true + component = append(component, cur) + } + + for _, edge := range g.OutEdges(graph.NodeID(cur)) { + to := int(edge.To) + if !visited[to] { + visited[to] = true + queue = append(queue, to) + } + } + for _, edge := range g.InEdges(graph.NodeID(cur)) { + from := int(edge.From) + if !visited[from] { + visited[from] = true + queue = append(queue, from) + } + } + } + + var names []string + for _, id := range component { + if id < len(g.Schemas) { + names = append(names, g.Schemas[id].Name) + } + } + if len(component) > 0 { + out.Groups = append(out.Groups, GroupResult{ + Key: "cluster-" + strconv.Itoa(clusterNum), + Count: len(component), + Names: names, + }) + } + } + + return out, nil +} + +func execTagBoundary(result *Result, g *graph.SchemaGraph) (*Result, error) { + out := &Result{Fields: result.Fields} + for _, row := range result.Rows { + if row.Kind != SchemaResult { + continue + } + if schemaTagCount(row.SchemaIdx, g) > 1 { + out.Rows = append(out.Rows, row) + } + } + return out, nil +} + +func schemaTagCount(schemaIdx int, g *graph.SchemaGraph) int { + tags := make(map[string]bool) + for _, opID := range g.SchemaOperations(graph.NodeID(schemaIdx)) { + if int(opID) < len(g.Operations) { + op := &g.Operations[opID] + if op.Operation != nil { + for _, tag := range op.Operation.Tags { + tags[tag] = true + } + } + } + } + return len(tags) +} + +func execSharedRefs(result *Result, g *graph.SchemaGraph) (*Result, error) { + var ops []graph.NodeID + for _, row := range result.Rows { + if row.Kind == OperationResult { + ops = append(ops, graph.NodeID(row.OpIdx)) + } + } + + if len(ops) == 0 { + return &Result{Fields: result.Fields}, nil + } + + // Start with first operation's schemas + intersection := make(map[graph.NodeID]bool) + for _, sid := range g.OperationSchemas(ops[0]) { + intersection[sid] = true + } + + // Intersect with each subsequent operation + for _, opID := range ops[1:] { + opSchemas := make(map[graph.NodeID]bool) + for _, sid := range g.OperationSchemas(opID) { + opSchemas[sid] = true + } + for sid := range intersection { + if !opSchemas[sid] { + delete(intersection, sid) + } + } + } + + out := &Result{Fields: result.Fields} + for sid := range intersection { + out.Rows = append(out.Rows, Row{Kind: SchemaResult, SchemaIdx: int(sid)}) + } + return out, nil +} + +// --- Edge annotation helpers --- + +func schemaName(idx int, g *graph.SchemaGraph) string { + if idx >= 0 && idx < len(g.Schemas) { + return g.Schemas[idx].Name + } + return "" +} + +func edgeKindString(k graph.EdgeKind) string { + switch k { + case graph.EdgeProperty: + return "property" + case graph.EdgeItems: + return "items" + case graph.EdgeAllOf: + return "allOf" + case graph.EdgeOneOf: + return "oneOf" + case graph.EdgeAnyOf: + return "anyOf" + case graph.EdgeAdditionalProps: + return "additionalProperties" + case graph.EdgeNot: + return "not" + case graph.EdgeIf: + return "if" + case graph.EdgeThen: + return "then" + case graph.EdgeElse: + return "else" + case graph.EdgeContains: + return "contains" + case graph.EdgePrefixItems: + return "prefixItems" + case graph.EdgeDependentSchema: + return "dependentSchema" + case graph.EdgePatternProperty: + return "patternProperty" + case graph.EdgePropertyNames: + return "propertyNames" + case graph.EdgeUnevaluatedItems: + return "unevaluatedItems" + case graph.EdgeUnevaluatedProps: + return "unevaluatedProperties" + case graph.EdgeRef: + return "ref" + default: + return "unknown" + } +} + // --- Field access --- type rowAdapter struct { @@ -701,6 +1147,16 @@ func fieldValue(row Row, name string, g *graph.SchemaGraph) expr.Value { return expr.StringVal(s.Hash) case "path": return expr.StringVal(s.Path) + case "op_count": + return expr.IntVal(g.SchemaOpCount(graph.NodeID(row.SchemaIdx))) + case "tag_count": + return expr.IntVal(schemaTagCount(row.SchemaIdx, g)) + case "edge_kind": + return expr.StringVal(row.EdgeKind) + case "edge_label": + return expr.StringVal(row.EdgeLabel) + case "edge_from": + return expr.StringVal(row.EdgeFrom) } case OperationResult: if row.OpIdx < 0 || row.OpIdx >= len(g.Operations) { @@ -745,6 +1201,12 @@ func fieldValue(row Row, name string, g *graph.SchemaGraph) expr.Value { return expr.StringVal(o.Operation.GetSummary()) } return expr.StringVal("") + case "edge_kind": + return expr.StringVal(row.EdgeKind) + case "edge_label": + return expr.StringVal(row.EdgeLabel) + case "edge_from": + return expr.StringVal(row.EdgeFrom) } } return expr.NullVal() @@ -859,6 +1321,24 @@ func describeStage(stage Stage) string { return "Bottom: " + strconv.Itoa(stage.Limit) + " by " + stage.SortField + " ascending" case StageFormat: return "Format: " + stage.Format + case StageConnected: + return "Traverse: full connected component (schemas + operations)" + case StageBlastRadius: + return "Traverse: blast radius (ancestors + affected operations)" + case StageNeighbors: + return "Traverse: bidirectional neighbors within " + strconv.Itoa(stage.Limit) + " hops" + case StageOrphans: + return "Filter: schemas with no incoming refs and no operation usage" + case StageLeaves: + return "Filter: schemas with no outgoing refs (leaf nodes)" + case StageCycles: + return "Analyze: strongly connected components (actual cycles)" + case StageClusters: + return "Analyze: weakly connected component clusters" + case StageTagBoundary: + return "Filter: schemas used by operations across multiple tags" + case StageSharedRefs: + return "Analyze: schemas shared by all operations in result" default: return "Unknown stage" } @@ -890,6 +1370,11 @@ func execFields(result *Result) (*Result, error) { {"has_ref", "bool"}, {"hash", "string"}, {"path", "string"}, + {"op_count", "int"}, + {"tag_count", "int"}, + {"edge_kind", "string"}, + {"edge_label", "string"}, + {"edge_from", "string"}, } for _, f := range fields { fmt.Fprintf(&sb, "%-17s %s\n", f.name, f.typ) @@ -909,6 +1394,9 @@ func execFields(result *Result) (*Result, error) { {"deprecated", "bool"}, {"description", "string"}, {"summary", "string"}, + {"edge_kind", "string"}, + {"edge_label", "string"}, + {"edge_from", "string"}, } for _, f := range fields { fmt.Fprintf(&sb, "%-17s %s\n", f.name, f.typ) diff --git a/oq/oq_test.go b/oq/oq_test.go index 428effe..419d247 100644 --- a/oq/oq_test.go +++ b/oq/oq_test.go @@ -61,6 +61,15 @@ func TestParse_Success(t *testing.T) { {"items", "schemas | items"}, {"ops", "schemas | ops"}, {"schemas from ops", "operations | schemas"}, + {"connected", "schemas.components | where name == \"Pet\" | connected"}, + {"blast-radius", "schemas.components | where name == \"Pet\" | blast-radius"}, + {"neighbors", "schemas.components | where name == \"Pet\" | neighbors 2"}, + {"orphans", "schemas.components | orphans"}, + {"leaves", "schemas.components | leaves"}, + {"cycles", "schemas | cycles"}, + {"clusters", "schemas.components | clusters"}, + {"tag-boundary", "schemas | tag-boundary"}, + {"shared-refs", "operations | take 2 | shared-refs"}, {"full pipeline", "schemas.components | where depth > 0 | sort depth desc | take 5 | select name, depth"}, } @@ -525,6 +534,189 @@ func TestExecute_Items_Success(t *testing.T) { assert.NotNil(t, result) } +func TestExecute_Connected_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + // Start from Pet, connected should return schemas and operations in the same component + result, err := oq.Execute(`schemas.components | where name == "Pet" | connected`, g) + require.NoError(t, err) + assert.NotEmpty(t, result.Rows) + + // Should have both schema and operation rows + hasSchema := false + hasOp := false + for _, row := range result.Rows { + if row.Kind == oq.SchemaResult { + hasSchema = true + } + if row.Kind == oq.OperationResult { + hasOp = true + } + } + assert.True(t, hasSchema, "connected should include schema nodes") + assert.True(t, hasOp, "connected should include operation nodes") +} + +func TestExecute_Connected_FromOps_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + // Start from an operation, connected should also find schemas + result, err := oq.Execute(`operations | take 1 | connected`, g) + require.NoError(t, err) + assert.NotEmpty(t, result.Rows) + + hasSchema := false + for _, row := range result.Rows { + if row.Kind == oq.SchemaResult { + hasSchema = true + } + } + assert.True(t, hasSchema, "connected from operation should include schema nodes") +} + +func TestExecute_EdgeAnnotations_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute(`schemas.components | where name == "Pet" | refs-out | select name, edge_kind, edge_label, edge_from`, g) + require.NoError(t, err) + assert.NotEmpty(t, result.Rows) + + // Every row should have edge annotations + for _, row := range result.Rows { + kind := oq.FieldValuePublic(row, "edge_kind", g) + assert.NotEmpty(t, kind.Str, "edge_kind should be set") + from := oq.FieldValuePublic(row, "edge_from", g) + assert.Equal(t, "Pet", from.Str) + } +} + +func TestExecute_BlastRadius_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute(`schemas.components | where name == "Pet" | blast-radius`, g) + require.NoError(t, err) + assert.NotEmpty(t, result.Rows) + + // Should include both schemas and operations + hasSchema := false + hasOp := false + for _, row := range result.Rows { + if row.Kind == oq.SchemaResult { + hasSchema = true + } + if row.Kind == oq.OperationResult { + hasOp = true + } + } + assert.True(t, hasSchema, "blast-radius should include schemas") + assert.True(t, hasOp, "blast-radius should include operations") +} + +func TestExecute_Neighbors_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute(`schemas.components | where name == "Pet" | neighbors 1`, g) + require.NoError(t, err) + assert.NotEmpty(t, result.Rows) + + // Depth-1 neighbors should include seed + direct refs in both directions + names := make(map[string]bool) + for _, row := range result.Rows { + n := oq.FieldValuePublic(row, "name", g) + names[n.Str] = true + } + assert.True(t, names["Pet"], "neighbors should include the seed node") +} + +func TestExecute_Orphans_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute(`schemas.components | orphans | select name`, g) + require.NoError(t, err) + // Result may be empty if all schemas are referenced, that's fine + assert.NotNil(t, result) +} + +func TestExecute_Leaves_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute(`schemas.components | leaves | select name, out_degree`, g) + require.NoError(t, err) + // All returned rows should have out_degree == 0 + for _, row := range result.Rows { + od := oq.FieldValuePublic(row, "out_degree", g) + assert.Equal(t, 0, od.Int) + } +} + +func TestExecute_Cycles_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute(`schemas | cycles`, g) + require.NoError(t, err) + // Returns groups — may be empty if no cycles in petstore + assert.NotNil(t, result) +} + +func TestExecute_Clusters_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute(`schemas.components | clusters`, g) + require.NoError(t, err) + assert.NotEmpty(t, result.Groups) + + // Total names across all clusters should equal component count + total := 0 + for _, grp := range result.Groups { + total += grp.Count + } + // Count component schemas + compCount, err := oq.Execute(`schemas.components | count`, g) + require.NoError(t, err) + assert.Equal(t, compCount.Count, total) +} + +func TestExecute_TagBoundary_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute(`schemas | tag-boundary | select name, tag_count`, g) + require.NoError(t, err) + // All returned rows should have tag_count > 1 + for _, row := range result.Rows { + tc := oq.FieldValuePublic(row, "tag_count", g) + assert.Greater(t, tc.Int, 1) + } +} + +func TestExecute_SharedRefs_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute(`operations | shared-refs | select name`, g) + require.NoError(t, err) + // Schemas shared by ALL operations + assert.NotNil(t, result) +} + +func TestExecute_OpCount_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute(`schemas.components | sort op_count desc | take 3 | select name, op_count`, g) + require.NoError(t, err) + assert.NotEmpty(t, result.Rows) +} + func TestFormatTable_Groups_Success(t *testing.T) { t.Parallel() g := loadTestGraph(t) From 48b8cf30bcab995738e754da2f549aa3a028ad0c Mon Sep 17 00:00:00 2001 From: Vishal Gowda Date: Thu, 12 Mar 2026 09:45:18 +0000 Subject: [PATCH 14/27] refactor: swap query command arg order to query-first Change `openapi spec query ` to `openapi spec query [file]`. The query is the primary argument; the input file is optional and defaults to stdin when omitted. Co-Authored-By: Claude Opus 4.6 --- cmd/openapi/commands/openapi/query.go | 59 +++++++++++++++------------ oq/README.md | 21 ++++++---- 2 files changed, 46 insertions(+), 34 deletions(-) diff --git a/cmd/openapi/commands/openapi/query.go b/cmd/openapi/commands/openapi/query.go index aeefe54..8321e4b 100644 --- a/cmd/openapi/commands/openapi/query.go +++ b/cmd/openapi/commands/openapi/query.go @@ -14,47 +14,47 @@ import ( ) var queryCmd = &cobra.Command{ - Use: "query ", + Use: "query [input-file]", Short: "Query an OpenAPI specification using the oq pipeline language", Long: `Query an OpenAPI specification using the oq pipeline language to answer structural and semantic questions about schemas and operations. +The query argument comes first, followed by an optional input file. If no file +is given, reads from stdin. + Examples: # Deeply nested components - openapi spec query petstore.yaml 'schemas.components | sort depth desc | take 10 | select name, depth' + openapi spec query 'schemas.components | sort depth desc | take 10 | select name, depth' petstore.yaml - # Wide union trees - openapi spec query petstore.yaml 'schemas | where union_width > 0 | sort union_width desc | take 10' + # Pipe from stdin + cat spec.yaml | openapi spec query 'schemas | count' + + # Explicit stdin + openapi spec query 'schemas | count' - - # Central components (highest in-degree) - openapi spec query petstore.yaml 'schemas.components | sort in_degree desc | take 10 | select name, in_degree' + # Wide union trees + openapi spec query 'schemas | where union_width > 0 | sort union_width desc | take 10' petstore.yaml # Dead components (no incoming references) - openapi spec query petstore.yaml 'schemas.components | where in_degree == 0 | select name' + openapi spec query 'schemas.components | where in_degree == 0 | select name' petstore.yaml # Operation sprawl - openapi spec query petstore.yaml 'operations | sort schema_count desc | take 10 | select name, schema_count' + openapi spec query 'operations | sort schema_count desc | take 10 | select name, schema_count' petstore.yaml # Circular references - openapi spec query petstore.yaml 'schemas | where is_circular | select name, path' - - # Schema count - openapi spec query petstore.yaml 'schemas | count' - -Stdin is supported — either pipe data directly or use '-' explicitly: - cat spec.yaml | openapi spec query - 'schemas | count' + openapi spec query 'schemas | where is_circular | select name, path' petstore.yaml # Shortest path between schemas - openapi spec query petstore.yaml 'schemas | path "Pet" "Address" | select name' + openapi spec query 'schemas | path "Pet" "Address" | select name' petstore.yaml - # Top 5 most connected schemas - openapi spec query petstore.yaml 'schemas.components | top 5 in_degree | select name, in_degree' + # Edge annotations + openapi spec query 'schemas.components | where name == "Pet" | refs-out | select name, edge_kind, edge_label' petstore.yaml - # Explain a query plan - openapi spec query petstore.yaml 'schemas.components | where depth > 5 | sort depth desc | explain' + # Blast radius + openapi spec query 'schemas.components | where name == "Error" | blast-radius | count' petstore.yaml - # List available fields - openapi spec query petstore.yaml 'schemas | fields' + # Explain a query plan + openapi spec query 'schemas.components | where depth > 5 | sort depth desc | explain' petstore.yaml Pipeline stages: Source: schemas, schemas.components, schemas.inline, operations @@ -80,9 +80,11 @@ func init() { func runQuery(cmd *cobra.Command, args []string) { ctx := cmd.Context() - inputFile := inputFileFromArgs(args) + // args[0] = query (or input file if using -f), args[1] = input file (optional) queryStr := "" + inputFile := "-" // default to stdin + if queryFromFile != "" { data, err := os.ReadFile(queryFromFile) if err != nil { @@ -90,8 +92,15 @@ func runQuery(cmd *cobra.Command, args []string) { os.Exit(1) } queryStr = string(data) - } else if len(args) >= 2 { - queryStr = args[1] + // When using -f, all positional args are input files + if len(args) > 0 { + inputFile = args[0] + } + } else if len(args) >= 1 { + queryStr = args[0] + if len(args) >= 2 { + inputFile = args[1] + } } if queryStr == "" { diff --git a/oq/README.md b/oq/README.md index a292e42..65e6b34 100644 --- a/oq/README.md +++ b/oq/README.md @@ -6,19 +6,19 @@ ```bash # Count all schemas -openapi spec query petstore.yaml 'schemas | count' +openapi spec query 'schemas | count' petstore.yaml # Top 10 deepest component schemas -openapi spec query petstore.yaml 'schemas.components | sort depth desc | take 10 | select name, depth' +openapi spec query 'schemas.components | sort depth desc | take 10 | select name, depth' petstore.yaml # Dead components (unreferenced) -openapi spec query petstore.yaml 'schemas.components | where in_degree == 0 | select name' +openapi spec query 'schemas.components | where in_degree == 0 | select name' petstore.yaml ``` Stdin is supported: ```bash -cat spec.yaml | openapi spec query - 'schemas | count' +cat spec.yaml | openapi spec query 'schemas | count' ``` ## Pipeline Syntax @@ -156,8 +156,8 @@ Operators: `==`, `!=`, `>`, `<`, `>=`, `<=`, `and`, `or`, `not`, `has()`, `match Use `--format` flag or inline `format` stage: ```bash -openapi spec query spec.yaml 'schemas | count' --format json -openapi spec query spec.yaml 'schemas | take 5 | format markdown' +openapi spec query 'schemas | count' spec.yaml --format json +openapi spec query 'schemas | take 5 | format markdown' spec.yaml ``` | Format | Description | @@ -232,11 +232,14 @@ operations | shared-refs | select name, op_count openapi spec query-reference # Inline query -openapi spec query '' +openapi spec query '' # Query from file -openapi spec query -f query.oq +openapi spec query -f query.oq # With output format -openapi spec query '' --format json +openapi spec query '' --format json + +# From stdin +cat spec.yaml | openapi spec query '' ``` From 41975c1d3bba39a2398c1b808c124f723870d8b3 Mon Sep 17 00:00:00 2001 From: Vishal Gowda Date: Thu, 12 Mar 2026 16:24:12 +0000 Subject: [PATCH 15/27] fix: remove redundant isNull field and treat empty strings as falsy in has() --- oq/expr/expr.go | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/oq/expr/expr.go b/oq/expr/expr.go index 3463ae0..2cb9bcd 100644 --- a/oq/expr/expr.go +++ b/oq/expr/expr.go @@ -11,11 +11,10 @@ import ( // Value represents a typed value in the expression system. type Value struct { - Kind ValueKind - Str string - Int int - Bool bool - isNull bool + Kind ValueKind + Str string + Int int + Bool bool } type ValueKind int @@ -93,7 +92,7 @@ func (e *binaryExpr) Eval(row Row) Value { case "<=": return Value{Kind: KindBool, Bool: compare(e.left.Eval(row), e.right.Eval(row)) <= 0} default: - return Value{Kind: KindNull, isNull: true} + return Value{Kind: KindNull} } } @@ -103,7 +102,7 @@ func (e *notExpr) Eval(row Row) Value { func (e *hasExpr) Eval(row Row) Value { v := row.Field(e.field) - return Value{Kind: KindBool, Bool: !v.isNull && (v.Kind != KindInt || v.Int > 0) && (v.Kind != KindBool || v.Bool)} + return Value{Kind: KindBool, Bool: v.Kind != KindNull && (v.Kind != KindInt || v.Int > 0) && (v.Kind != KindBool || v.Bool) && (v.Kind != KindString || v.Str != "")} } func (e *matchesExpr) Eval(row Row) Value { @@ -206,7 +205,7 @@ func BoolVal(b bool) Value { // NullVal creates a null Value. func NullVal() Value { - return Value{Kind: KindNull, isNull: true} + return Value{Kind: KindNull} } // --- Parser --- From b71bcd73f9d9c84aae1b74bf90cb268f63a35e57 Mon Sep 17 00:00:00 2001 From: Vishal Gowda Date: Thu, 12 Mar 2026 18:04:45 +0000 Subject: [PATCH 16/27] refactor: split oq/oq.go into parse, exec, format, field modules --- cmd/openapi/commands/openapi/query.go | 2 +- cmd/openapi/commands/openapi/shared.go | 17 + graph/graph.go | 36 +- oq/exec.go | 1016 +++++++++++++ oq/field.go | 165 +++ oq/format.go | 384 +++++ oq/oq.go | 1800 ------------------------ oq/parse.go | 284 ++++ 8 files changed, 1889 insertions(+), 1815 deletions(-) create mode 100644 oq/exec.go create mode 100644 oq/field.go create mode 100644 oq/format.go create mode 100644 oq/parse.go diff --git a/cmd/openapi/commands/openapi/query.go b/cmd/openapi/commands/openapi/query.go index 8321e4b..17f0f13 100644 --- a/cmd/openapi/commands/openapi/query.go +++ b/cmd/openapi/commands/openapi/query.go @@ -66,7 +66,7 @@ Pipeline stages: Meta: explain, fields, format Where expressions support: ==, !=, >, <, >=, <=, and, or, not, has(), matches`, - Args: stdinOrFileArgs(1, 2), + Args: queryArgs(), Run: runQuery, } diff --git a/cmd/openapi/commands/openapi/shared.go b/cmd/openapi/commands/openapi/shared.go index b79a77b..f471aec 100644 --- a/cmd/openapi/commands/openapi/shared.go +++ b/cmd/openapi/commands/openapi/shared.go @@ -31,6 +31,23 @@ func stdinOrFileArgs(minArgs, maxArgs int) cobra.PositionalArgs { return cmdutil.StdinOrFileArgs(minArgs, maxArgs) } +// queryArgs returns a PositionalArgs validator for the query command. +// When -f/--file is provided, 0 positional args are allowed (spec from stdin). +// Otherwise requires 1–2 positional args (query + optional spec file). +func queryArgs() cobra.PositionalArgs { + return func(cmd *cobra.Command, args []string) error { + fromFile, _ := cmd.Flags().GetString("file") + if fromFile != "" { + // -f flag present: 0 or 1 positional arg (optional spec file) + if len(args) > 1 { + return fmt.Errorf("accepts at most 1 arg when using --file, received %d", len(args)) + } + return nil + } + return cmdutil.StdinOrFileArgs(1, 2)(cmd, args) + } +} + // OpenAPIProcessor handles common OpenAPI document processing operations type OpenAPIProcessor struct { InputFile string diff --git a/graph/graph.go b/graph/graph.go index 8f804cf..0f4953b 100644 --- a/graph/graph.go +++ b/graph/graph.go @@ -4,6 +4,7 @@ package graph import ( "context" + "sort" "strconv" "strings" @@ -100,7 +101,7 @@ type SchemaGraph struct { } // Build constructs a SchemaGraph from an openapi.Index. -func Build(ctx context.Context, idx *openapi.Index) *SchemaGraph { +func Build(_ context.Context, idx *openapi.Index) *SchemaGraph { g := &SchemaGraph{ outEdges: make(map[NodeID][]Edge), inEdges: make(map[NodeID][]Edge), @@ -144,22 +145,26 @@ func (g *SchemaGraph) SchemaByName(name string) (SchemaNode, bool) { } // OperationSchemas returns the schema NodeIDs reachable from the given operation. +// Results are sorted by NodeID for deterministic output. func (g *SchemaGraph) OperationSchemas(opID NodeID) []NodeID { set := g.opSchemas[opID] ids := make([]NodeID, 0, len(set)) for id := range set { ids = append(ids, id) } + sort.Slice(ids, func(i, j int) bool { return ids[i] < ids[j] }) return ids } // SchemaOperations returns the operation NodeIDs that reference the given schema. +// Results are sorted by NodeID for deterministic output. func (g *SchemaGraph) SchemaOperations(schemaID NodeID) []NodeID { set := g.schemaOps[schemaID] ids := make([]NodeID, 0, len(set)) for id := range set { ids = append(ids, id) } + sort.Slice(ids, func(i, j int) bool { return ids[i] < ids[j] }) return ids } @@ -269,23 +274,23 @@ func (g *SchemaGraph) buildEdges() { } // AllOf - for i, child := range schema.AllOf { + for j, child := range schema.AllOf { if childID, ok := g.resolveChild(child); ok { - g.addEdge(sn.ID, childID, EdgeAllOf, "allOf/"+intStr(i)) + g.addEdge(sn.ID, childID, EdgeAllOf, "allOf/"+intStr(j)) } } // OneOf - for i, child := range schema.OneOf { + for j, child := range schema.OneOf { if childID, ok := g.resolveChild(child); ok { - g.addEdge(sn.ID, childID, EdgeOneOf, "oneOf/"+intStr(i)) + g.addEdge(sn.ID, childID, EdgeOneOf, "oneOf/"+intStr(j)) } } // AnyOf - for i, child := range schema.AnyOf { + for j, child := range schema.AnyOf { if childID, ok := g.resolveChild(child); ok { - g.addEdge(sn.ID, childID, EdgeAnyOf, "anyOf/"+intStr(i)) + g.addEdge(sn.ID, childID, EdgeAnyOf, "anyOf/"+intStr(j)) } } @@ -328,9 +333,9 @@ func (g *SchemaGraph) buildEdges() { } // PrefixItems - for i, child := range schema.PrefixItems { + for j, child := range schema.PrefixItems { if childID, ok := g.resolveChild(child); ok { - g.addEdge(sn.ID, childID, EdgePrefixItems, "prefixItems/"+intStr(i)) + g.addEdge(sn.ID, childID, EdgePrefixItems, "prefixItems/"+intStr(j)) } } @@ -562,13 +567,16 @@ func (g *SchemaGraph) reachableBFS(start NodeID, visited map[NodeID]bool) { // Phase 4: Compute metrics for each schema node. func (g *SchemaGraph) computeMetrics() { - // Detect circular nodes + // Detect circular nodes with a single shared DFS (O(V+E)) circularNodes := make(map[NodeID]bool) + visited := make(map[NodeID]bool) + inStack := make(map[NodeID]bool) for i := range g.Schemas { - visited := make(map[NodeID]bool) - inStack := make(map[NodeID]bool) - if g.detectCycle(NodeID(i), visited, inStack, circularNodes) { - circularNodes[NodeID(i)] = true + nid := NodeID(i) + if !visited[nid] { + if g.detectCycle(nid, visited, inStack, circularNodes) { + circularNodes[nid] = true + } } } diff --git a/oq/exec.go b/oq/exec.go new file mode 100644 index 0000000..01e9177 --- /dev/null +++ b/oq/exec.go @@ -0,0 +1,1016 @@ +package oq + +import ( + "errors" + "fmt" + "math/rand/v2" + "slices" + "sort" + "strconv" + "strings" + + "github.com/speakeasy-api/openapi/graph" + "github.com/speakeasy-api/openapi/oq/expr" +) + +func run(stages []Stage, g *graph.SchemaGraph) (*Result, error) { + if len(stages) == 0 { + return &Result{}, nil + } + + // Check if explain stage is present + for _, stage := range stages { + if stage.Kind == StageExplain { + return &Result{Explain: buildExplain(stages)}, nil + } + } + + // Execute source stage + result, err := execSource(stages[0], g) + if err != nil { + return nil, err + } + + // Execute remaining stages + for _, stage := range stages[1:] { + result, err = execStage(stage, result, g) + if err != nil { + return nil, err + } + } + + return result, nil +} + +func execSource(stage Stage, g *graph.SchemaGraph) (*Result, error) { + result := &Result{} + switch stage.Source { + case "schemas": + for i := range g.Schemas { + result.Rows = append(result.Rows, Row{Kind: SchemaResult, SchemaIdx: i}) + } + case "schemas.components": + for i, s := range g.Schemas { + if s.IsComponent { + result.Rows = append(result.Rows, Row{Kind: SchemaResult, SchemaIdx: i}) + } + } + case "schemas.inline": + for i, s := range g.Schemas { + if s.IsInline { + result.Rows = append(result.Rows, Row{Kind: SchemaResult, SchemaIdx: i}) + } + } + case "operations": + for i := range g.Operations { + result.Rows = append(result.Rows, Row{Kind: OperationResult, OpIdx: i}) + } + default: + return nil, fmt.Errorf("unknown source: %q", stage.Source) + } + return result, nil +} + +func execStage(stage Stage, result *Result, g *graph.SchemaGraph) (*Result, error) { + switch stage.Kind { + case StageWhere: + return execWhere(stage, result, g) + case StageSelect: + result.Fields = stage.Fields + return result, nil + case StageSort: + return execSort(stage, result, g) + case StageTake: + return execTake(stage, result) + case StageUnique: + return execUnique(result) + case StageGroupBy: + return execGroupBy(stage, result, g) + case StageCount: + return &Result{IsCount: true, Count: len(result.Rows)}, nil + case StageRefsOut: + return execTraversal(result, g, traverseRefsOut) + case StageRefsIn: + return execTraversal(result, g, traverseRefsIn) + case StageReachable: + return execTraversal(result, g, traverseReachable) + case StageAncestors: + return execTraversal(result, g, traverseAncestors) + case StageProperties: + return execTraversal(result, g, traverseProperties) + case StageUnionMembers: + return execTraversal(result, g, traverseUnionMembers) + case StageItems: + return execTraversal(result, g, traverseItems) + case StageOps: + return execSchemasToOps(result, g) + case StageSchemas: + return execOpsToSchemas(result, g) + case StageFields: + return execFields(result) + case StageSample: + return execSample(stage, result) + case StagePath: + return execPath(stage, g) + case StageTop: + // Expand to sort desc + take + sorted, err := execSort(Stage{Kind: StageSort, SortField: stage.SortField, SortDesc: true}, result, g) + if err != nil { + return nil, err + } + return execTake(Stage{Kind: StageTake, Limit: stage.Limit}, sorted) + case StageBottom: + // Expand to sort asc + take + sorted, err := execSort(Stage{Kind: StageSort, SortField: stage.SortField, SortDesc: false}, result, g) + if err != nil { + return nil, err + } + return execTake(Stage{Kind: StageTake, Limit: stage.Limit}, sorted) + case StageFormat: + result.FormatHint = stage.Format + return result, nil + case StageConnected: + return execConnected(result, g) + case StageBlastRadius: + return execBlastRadius(result, g) + case StageNeighbors: + return execNeighbors(stage, result, g) + case StageOrphans: + return execOrphans(result, g) + case StageLeaves: + return execLeaves(result, g) + case StageCycles: + return execCycles(result, g) + case StageClusters: + return execClusters(result, g) + case StageTagBoundary: + return execTagBoundary(result, g) + case StageSharedRefs: + return execSharedRefs(result, g) + default: + return nil, fmt.Errorf("unimplemented stage kind: %d", stage.Kind) + } +} + +func execWhere(stage Stage, result *Result, g *graph.SchemaGraph) (*Result, error) { + predicate, err := expr.Parse(stage.Expr) + if err != nil { + return nil, fmt.Errorf("where expression error: %w", err) + } + + filtered := &Result{Fields: result.Fields} + for _, row := range result.Rows { + r := rowAdapter{row: row, g: g} + val := predicate.Eval(r) + if val.Kind == expr.KindBool && val.Bool { + filtered.Rows = append(filtered.Rows, row) + } + } + return filtered, nil +} + +func execSort(stage Stage, result *Result, g *graph.SchemaGraph) (*Result, error) { + sorted := &Result{ + Fields: result.Fields, + FormatHint: result.FormatHint, + Rows: slices.Clone(result.Rows), + } + sort.SliceStable(sorted.Rows, func(i, j int) bool { + vi := fieldValue(sorted.Rows[i], stage.SortField, g) + vj := fieldValue(sorted.Rows[j], stage.SortField, g) + + cmp := compareValues(vi, vj) + if stage.SortDesc { + return cmp > 0 + } + return cmp < 0 + }) + return sorted, nil +} + +func execTake(stage Stage, result *Result) (*Result, error) { + rows := result.Rows + if stage.Limit < len(rows) { + rows = rows[:stage.Limit] + } + return &Result{ + Fields: result.Fields, + FormatHint: result.FormatHint, + Rows: slices.Clone(rows), + }, nil +} + +func execUnique(result *Result) (*Result, error) { + seen := make(map[string]bool) + filtered := &Result{Fields: result.Fields} + for _, row := range result.Rows { + key := rowKey(row) + if !seen[key] { + seen[key] = true + filtered.Rows = append(filtered.Rows, row) + } + } + return filtered, nil +} + +func execGroupBy(stage Stage, result *Result, g *graph.SchemaGraph) (*Result, error) { + if len(stage.Fields) == 0 { + return nil, errors.New("group-by requires at least one field") + } + field := stage.Fields[0] + + type group struct { + count int + names []string + } + groups := make(map[string]*group) + var order []string + + for _, row := range result.Rows { + v := fieldValue(row, field, g) + key := valueToString(v) + grp, exists := groups[key] + if !exists { + grp = &group{} + groups[key] = grp + order = append(order, key) + } + grp.count++ + nameV := fieldValue(row, "name", g) + grp.names = append(grp.names, valueToString(nameV)) + } + + grouped := &Result{Fields: result.Fields} + for _, key := range order { + grp, ok := groups[key] + if !ok { + continue + } + grouped.Groups = append(grouped.Groups, GroupResult{ + Key: key, + Count: grp.count, + Names: grp.names, + }) + } + return grouped, nil +} + +// --- Traversal --- + +type traversalFunc func(row Row, g *graph.SchemaGraph) []Row + +func execTraversal(result *Result, g *graph.SchemaGraph, fn traversalFunc) (*Result, error) { + out := &Result{Fields: result.Fields} + seen := make(map[string]bool) + for _, row := range result.Rows { + for _, newRow := range fn(row, g) { + key := edgeRowKey(newRow) + if !seen[key] { + seen[key] = true + out.Rows = append(out.Rows, newRow) + } + } + } + return out, nil +} + +func edgeRowKey(row Row) string { + base := rowKey(row) + if row.EdgeKind == "" { + return base + } + return base + "|" + row.EdgeFrom + "|" + row.EdgeKind + "|" + row.EdgeLabel +} + +func traverseRefsOut(row Row, g *graph.SchemaGraph) []Row { + if row.Kind != SchemaResult { + return nil + } + fromName := schemaName(row.SchemaIdx, g) + var result []Row + for _, edge := range g.OutEdges(graph.NodeID(row.SchemaIdx)) { + result = append(result, Row{ + Kind: SchemaResult, + SchemaIdx: int(edge.To), + EdgeKind: edgeKindString(edge.Kind), + EdgeLabel: edge.Label, + EdgeFrom: fromName, + }) + } + return result +} + +func traverseRefsIn(row Row, g *graph.SchemaGraph) []Row { + if row.Kind != SchemaResult { + return nil + } + toName := schemaName(row.SchemaIdx, g) + var result []Row + for _, edge := range g.InEdges(graph.NodeID(row.SchemaIdx)) { + result = append(result, Row{ + Kind: SchemaResult, + SchemaIdx: int(edge.From), + EdgeKind: edgeKindString(edge.Kind), + EdgeLabel: edge.Label, + EdgeFrom: toName, + }) + } + return result +} + +func traverseReachable(row Row, g *graph.SchemaGraph) []Row { + if row.Kind != SchemaResult { + return nil + } + ids := g.Reachable(graph.NodeID(row.SchemaIdx)) + result := make([]Row, len(ids)) + for i, id := range ids { + result[i] = Row{Kind: SchemaResult, SchemaIdx: int(id)} + } + return result +} + +func traverseAncestors(row Row, g *graph.SchemaGraph) []Row { + if row.Kind != SchemaResult { + return nil + } + ids := g.Ancestors(graph.NodeID(row.SchemaIdx)) + result := make([]Row, len(ids)) + for i, id := range ids { + result[i] = Row{Kind: SchemaResult, SchemaIdx: int(id)} + } + return result +} + +func traverseProperties(row Row, g *graph.SchemaGraph) []Row { + if row.Kind != SchemaResult { + return nil + } + fromName := schemaName(row.SchemaIdx, g) + var result []Row + for _, edge := range g.OutEdges(graph.NodeID(row.SchemaIdx)) { + if edge.Kind == graph.EdgeProperty { + result = append(result, Row{ + Kind: SchemaResult, + SchemaIdx: int(edge.To), + EdgeKind: edgeKindString(edge.Kind), + EdgeLabel: edge.Label, + EdgeFrom: fromName, + }) + } + } + return result +} + +func traverseUnionMembers(row Row, g *graph.SchemaGraph) []Row { + if row.Kind != SchemaResult { + return nil + } + fromName := schemaName(row.SchemaIdx, g) + var result []Row + for _, edge := range g.OutEdges(graph.NodeID(row.SchemaIdx)) { + if edge.Kind == graph.EdgeAllOf || edge.Kind == graph.EdgeOneOf || edge.Kind == graph.EdgeAnyOf { + // Follow through $ref nodes transparently + target := resolveRefTarget(int(edge.To), g) + result = append(result, Row{ + Kind: SchemaResult, + SchemaIdx: target, + EdgeKind: edgeKindString(edge.Kind), + EdgeLabel: edge.Label, + EdgeFrom: fromName, + }) + } + } + return result +} + +func traverseItems(row Row, g *graph.SchemaGraph) []Row { + if row.Kind != SchemaResult { + return nil + } + fromName := schemaName(row.SchemaIdx, g) + var result []Row + for _, edge := range g.OutEdges(graph.NodeID(row.SchemaIdx)) { + if edge.Kind == graph.EdgeItems { + result = append(result, Row{ + Kind: SchemaResult, + SchemaIdx: int(edge.To), + EdgeKind: edgeKindString(edge.Kind), + EdgeLabel: edge.Label, + EdgeFrom: fromName, + }) + } + } + return result +} + +// resolveRefTarget follows EdgeRef edges to get the actual target node. +// If the node at idx is a $ref wrapper, returns the target component's index. +// Otherwise returns idx unchanged. +func resolveRefTarget(idx int, g *graph.SchemaGraph) int { + if idx < 0 || idx >= len(g.Schemas) { + return idx + } + node := &g.Schemas[idx] + if !node.HasRef { + return idx + } + // Follow EdgeRef edges + for _, edge := range g.OutEdges(graph.NodeID(idx)) { + if edge.Kind == graph.EdgeRef { + return int(edge.To) + } + } + return idx +} + +func execSchemasToOps(result *Result, g *graph.SchemaGraph) (*Result, error) { + out := &Result{Fields: result.Fields} + seen := make(map[int]bool) + for _, row := range result.Rows { + if row.Kind != SchemaResult { + continue + } + opIDs := g.SchemaOperations(graph.NodeID(row.SchemaIdx)) + for _, opID := range opIDs { + idx := int(opID) + if !seen[idx] { + seen[idx] = true + out.Rows = append(out.Rows, Row{Kind: OperationResult, OpIdx: idx}) + } + } + } + return out, nil +} + +func execOpsToSchemas(result *Result, g *graph.SchemaGraph) (*Result, error) { + out := &Result{Fields: result.Fields} + seen := make(map[int]bool) + for _, row := range result.Rows { + if row.Kind != OperationResult { + continue + } + schemaIDs := g.OperationSchemas(graph.NodeID(row.OpIdx)) + for _, sid := range schemaIDs { + idx := int(sid) + if !seen[idx] { + seen[idx] = true + out.Rows = append(out.Rows, Row{Kind: SchemaResult, SchemaIdx: idx}) + } + } + } + return out, nil +} + +func execConnected(result *Result, g *graph.SchemaGraph) (*Result, error) { + var schemaSeeds, opSeeds []graph.NodeID + for _, row := range result.Rows { + switch row.Kind { + case SchemaResult: + schemaSeeds = append(schemaSeeds, graph.NodeID(row.SchemaIdx)) + case OperationResult: + opSeeds = append(opSeeds, graph.NodeID(row.OpIdx)) + } + } + + schemas, ops := g.ConnectedComponent(schemaSeeds, opSeeds) + + out := &Result{Fields: result.Fields} + for _, id := range schemas { + out.Rows = append(out.Rows, Row{Kind: SchemaResult, SchemaIdx: int(id)}) + } + for _, id := range ops { + out.Rows = append(out.Rows, Row{Kind: OperationResult, OpIdx: int(id)}) + } + return out, nil +} + +func execBlastRadius(result *Result, g *graph.SchemaGraph) (*Result, error) { + out := &Result{Fields: result.Fields} + seenSchemas := make(map[int]bool) + seenOps := make(map[int]bool) + + // Collect seed schemas + var seeds []graph.NodeID + for _, row := range result.Rows { + if row.Kind == SchemaResult { + seeds = append(seeds, graph.NodeID(row.SchemaIdx)) + seenSchemas[row.SchemaIdx] = true + } + } + + // Find all ancestors (schemas that depend on the seeds) + for _, seed := range seeds { + for _, aid := range g.Ancestors(seed) { + seenSchemas[int(aid)] = true + } + } + + // Collect and sort schema indices for deterministic output + schemaIndices := make([]int, 0, len(seenSchemas)) + for idx := range seenSchemas { + schemaIndices = append(schemaIndices, idx) + } + sort.Ints(schemaIndices) + + // Add schema rows + for _, idx := range schemaIndices { + out.Rows = append(out.Rows, Row{Kind: SchemaResult, SchemaIdx: idx}) + } + + // Find all operations that reference any affected schema + for _, idx := range schemaIndices { + for _, opID := range g.SchemaOperations(graph.NodeID(idx)) { + if !seenOps[int(opID)] { + seenOps[int(opID)] = true + out.Rows = append(out.Rows, Row{Kind: OperationResult, OpIdx: int(opID)}) + } + } + } + + return out, nil +} + +func execNeighbors(stage Stage, result *Result, g *graph.SchemaGraph) (*Result, error) { + out := &Result{Fields: result.Fields} + seen := make(map[int]bool) + + for _, row := range result.Rows { + if row.Kind != SchemaResult { + continue + } + // Include seed + if !seen[row.SchemaIdx] { + seen[row.SchemaIdx] = true + out.Rows = append(out.Rows, Row{Kind: SchemaResult, SchemaIdx: row.SchemaIdx}) + } + for _, id := range g.Neighbors(graph.NodeID(row.SchemaIdx), stage.Limit) { + if !seen[int(id)] { + seen[int(id)] = true + out.Rows = append(out.Rows, Row{Kind: SchemaResult, SchemaIdx: int(id)}) + } + } + } + + return out, nil +} + +func execOrphans(result *Result, g *graph.SchemaGraph) (*Result, error) { + out := &Result{Fields: result.Fields} + for _, row := range result.Rows { + if row.Kind != SchemaResult { + continue + } + s := &g.Schemas[row.SchemaIdx] + if s.InDegree == 0 && g.SchemaOpCount(graph.NodeID(row.SchemaIdx)) == 0 { + out.Rows = append(out.Rows, row) + } + } + return out, nil +} + +func execLeaves(result *Result, g *graph.SchemaGraph) (*Result, error) { + out := &Result{Fields: result.Fields} + for _, row := range result.Rows { + if row.Kind != SchemaResult { + continue + } + if g.Schemas[row.SchemaIdx].OutDegree == 0 { + out.Rows = append(out.Rows, row) + } + } + return out, nil +} + +func execCycles(result *Result, g *graph.SchemaGraph) (*Result, error) { + sccs := g.StronglyConnectedComponents() + + // Filter SCCs to only include nodes present in the current result + resultNodes := make(map[int]bool) + for _, row := range result.Rows { + if row.Kind == SchemaResult { + resultNodes[row.SchemaIdx] = true + } + } + + out := &Result{Fields: result.Fields} + for i, scc := range sccs { + hasMatch := false + for _, id := range scc { + if resultNodes[int(id)] { + hasMatch = true + break + } + } + if !hasMatch { + continue + } + var names []string + for _, id := range scc { + if int(id) < len(g.Schemas) { + names = append(names, g.Schemas[id].Name) + } + } + out.Groups = append(out.Groups, GroupResult{ + Key: "cycle-" + strconv.Itoa(i+1), + Count: len(scc), + Names: names, + }) + } + + return out, nil +} + +func execClusters(result *Result, g *graph.SchemaGraph) (*Result, error) { + resultNodes := make(map[int]bool) + for _, row := range result.Rows { + if row.Kind == SchemaResult { + resultNodes[row.SchemaIdx] = true + } + } + + // Sort node indices for deterministic iteration + sortedNodes := make([]int, 0, len(resultNodes)) + for idx := range resultNodes { + sortedNodes = append(sortedNodes, idx) + } + sort.Ints(sortedNodes) + + // BFS to find connected components. Follow ALL graph edges (including + // through intermediary nodes like $ref wrappers) but only collect + // nodes that are in the result set. + assigned := make(map[int]bool) // result nodes already assigned to a cluster + out := &Result{Fields: result.Fields} + clusterNum := 0 + + for _, idx := range sortedNodes { + if assigned[idx] { + continue + } + clusterNum++ + var component []int + + // BFS through the full graph + visited := make(map[int]bool) + queue := []int{idx} + visited[idx] = true + + for len(queue) > 0 { + cur := queue[0] + queue = queue[1:] + + if resultNodes[cur] && !assigned[cur] { + assigned[cur] = true + component = append(component, cur) + } + + for _, edge := range g.OutEdges(graph.NodeID(cur)) { + to := int(edge.To) + if !visited[to] { + visited[to] = true + queue = append(queue, to) + } + } + for _, edge := range g.InEdges(graph.NodeID(cur)) { + from := int(edge.From) + if !visited[from] { + visited[from] = true + queue = append(queue, from) + } + } + } + + var names []string + for _, id := range component { + if id < len(g.Schemas) { + names = append(names, g.Schemas[id].Name) + } + } + if len(component) > 0 { + out.Groups = append(out.Groups, GroupResult{ + Key: "cluster-" + strconv.Itoa(clusterNum), + Count: len(component), + Names: names, + }) + } + } + + return out, nil +} + +func execTagBoundary(result *Result, g *graph.SchemaGraph) (*Result, error) { + out := &Result{Fields: result.Fields} + for _, row := range result.Rows { + if row.Kind != SchemaResult { + continue + } + if schemaTagCount(row.SchemaIdx, g) > 1 { + out.Rows = append(out.Rows, row) + } + } + return out, nil +} + +func schemaTagCount(schemaIdx int, g *graph.SchemaGraph) int { + tags := make(map[string]bool) + for _, opID := range g.SchemaOperations(graph.NodeID(schemaIdx)) { + if int(opID) < len(g.Operations) { + op := &g.Operations[opID] + if op.Operation != nil { + for _, tag := range op.Operation.Tags { + tags[tag] = true + } + } + } + } + return len(tags) +} + +func execSharedRefs(result *Result, g *graph.SchemaGraph) (*Result, error) { + var ops []graph.NodeID + for _, row := range result.Rows { + if row.Kind == OperationResult { + ops = append(ops, graph.NodeID(row.OpIdx)) + } + } + + if len(ops) == 0 { + return &Result{Fields: result.Fields}, nil + } + + // Start with first operation's schemas + intersection := make(map[graph.NodeID]bool) + for _, sid := range g.OperationSchemas(ops[0]) { + intersection[sid] = true + } + + // Intersect with each subsequent operation + for _, opID := range ops[1:] { + opSchemas := make(map[graph.NodeID]bool) + for _, sid := range g.OperationSchemas(opID) { + opSchemas[sid] = true + } + for sid := range intersection { + if !opSchemas[sid] { + delete(intersection, sid) + } + } + } + + // Sort for deterministic output + sortedIDs := make([]int, 0, len(intersection)) + for sid := range intersection { + sortedIDs = append(sortedIDs, int(sid)) + } + sort.Ints(sortedIDs) + + out := &Result{Fields: result.Fields} + for _, sid := range sortedIDs { + out.Rows = append(out.Rows, Row{Kind: SchemaResult, SchemaIdx: sid}) + } + return out, nil +} + +// --- Edge annotation helpers --- + +func schemaName(idx int, g *graph.SchemaGraph) string { + if idx >= 0 && idx < len(g.Schemas) { + return g.Schemas[idx].Name + } + return "" +} + +func edgeKindString(k graph.EdgeKind) string { + switch k { + case graph.EdgeProperty: + return "property" + case graph.EdgeItems: + return "items" + case graph.EdgeAllOf: + return "allOf" + case graph.EdgeOneOf: + return "oneOf" + case graph.EdgeAnyOf: + return "anyOf" + case graph.EdgeAdditionalProps: + return "additionalProperties" + case graph.EdgeNot: + return "not" + case graph.EdgeIf: + return "if" + case graph.EdgeThen: + return "then" + case graph.EdgeElse: + return "else" + case graph.EdgeContains: + return "contains" + case graph.EdgePrefixItems: + return "prefixItems" + case graph.EdgeDependentSchema: + return "dependentSchema" + case graph.EdgePatternProperty: + return "patternProperty" + case graph.EdgePropertyNames: + return "propertyNames" + case graph.EdgeUnevaluatedItems: + return "unevaluatedItems" + case graph.EdgeUnevaluatedProps: + return "unevaluatedProperties" + case graph.EdgeRef: + return "ref" + default: + return "unknown" + } +} + +// --- Explain --- + +func buildExplain(stages []Stage) string { + var sb strings.Builder + for i, stage := range stages { + if stage.Kind == StageExplain { + continue + } + if i == 0 { + fmt.Fprintf(&sb, "Source: %s\n", stage.Source) + } else { + desc := describeStage(stage) + fmt.Fprintf(&sb, " → %s\n", desc) + } + } + return sb.String() +} + +func describeStage(stage Stage) string { + switch stage.Kind { + case StageWhere: + return "Filter: where " + stage.Expr + case StageSelect: + return "Project: select " + strings.Join(stage.Fields, ", ") + case StageSort: + dir := "ascending" + if stage.SortDesc { + dir = "descending" + } + return "Sort: " + stage.SortField + " " + dir + case StageTake: + return "Limit: take " + strconv.Itoa(stage.Limit) + case StageUnique: + return "Unique: deduplicate rows" + case StageGroupBy: + return "Group: group-by " + strings.Join(stage.Fields, ", ") + case StageCount: + return "Count: count rows" + case StageRefsOut: + return "Traverse: outgoing references" + case StageRefsIn: + return "Traverse: incoming references" + case StageReachable: + return "Traverse: all reachable nodes" + case StageAncestors: + return "Traverse: all ancestor nodes" + case StageProperties: + return "Traverse: property children" + case StageUnionMembers: + return "Traverse: union members" + case StageItems: + return "Traverse: array items" + case StageOps: + return "Navigate: schemas to operations" + case StageSchemas: + return "Navigate: operations to schemas" + case StageFields: + return "Terminal: list available fields" + case StageSample: + return "Sample: random " + strconv.Itoa(stage.Limit) + " rows" + case StagePath: + return "Path: shortest path from " + stage.PathFrom + " to " + stage.PathTo + case StageTop: + return "Top: " + strconv.Itoa(stage.Limit) + " by " + stage.SortField + " descending" + case StageBottom: + return "Bottom: " + strconv.Itoa(stage.Limit) + " by " + stage.SortField + " ascending" + case StageFormat: + return "Format: " + stage.Format + case StageConnected: + return "Traverse: full connected component (schemas + operations)" + case StageBlastRadius: + return "Traverse: blast radius (ancestors + affected operations)" + case StageNeighbors: + return "Traverse: bidirectional neighbors within " + strconv.Itoa(stage.Limit) + " hops" + case StageOrphans: + return "Filter: schemas with no incoming refs and no operation usage" + case StageLeaves: + return "Filter: schemas with no outgoing refs (leaf nodes)" + case StageCycles: + return "Analyze: strongly connected components (actual cycles)" + case StageClusters: + return "Analyze: weakly connected component clusters" + case StageTagBoundary: + return "Filter: schemas used by operations across multiple tags" + case StageSharedRefs: + return "Analyze: schemas shared by all operations in result" + default: + return "Unknown stage" + } +} + +// --- Fields --- + +func execFields(result *Result) (*Result, error) { + var sb strings.Builder + kind := SchemaResult + if len(result.Rows) > 0 { + kind = result.Rows[0].Kind + } + + if kind == SchemaResult { + sb.WriteString("Field Type\n") + sb.WriteString("----------- ------\n") + fields := []struct{ name, typ string }{ + {"name", "string"}, + {"type", "string"}, + {"depth", "int"}, + {"in_degree", "int"}, + {"out_degree", "int"}, + {"union_width", "int"}, + {"property_count", "int"}, + {"is_component", "bool"}, + {"is_inline", "bool"}, + {"is_circular", "bool"}, + {"has_ref", "bool"}, + {"hash", "string"}, + {"path", "string"}, + {"op_count", "int"}, + {"tag_count", "int"}, + {"edge_kind", "string"}, + {"edge_label", "string"}, + {"edge_from", "string"}, + } + for _, f := range fields { + fmt.Fprintf(&sb, "%-17s %s\n", f.name, f.typ) + } + } else { + sb.WriteString("Field Type\n") + sb.WriteString("----------- ------\n") + fields := []struct{ name, typ string }{ + {"name", "string"}, + {"method", "string"}, + {"path", "string"}, + {"operation_id", "string"}, + {"schema_count", "int"}, + {"component_count", "int"}, + {"tag", "string"}, + {"parameter_count", "int"}, + {"deprecated", "bool"}, + {"description", "string"}, + {"summary", "string"}, + {"edge_kind", "string"}, + {"edge_label", "string"}, + {"edge_from", "string"}, + } + for _, f := range fields { + fmt.Fprintf(&sb, "%-17s %s\n", f.name, f.typ) + } + } + + return &Result{Explain: sb.String()}, nil +} + +// --- Sample --- + +func execSample(stage Stage, result *Result) (*Result, error) { + if stage.Limit >= len(result.Rows) { + return result, nil + } + + // Deterministic shuffle using Fisher-Yates with a fixed seed derived from row count. + rows := slices.Clone(result.Rows) + rng := rand.New(rand.NewPCG(uint64(len(rows)), 0)) //nolint:gosec // deterministic seed is intentional + rng.Shuffle(len(rows), func(i, j int) { + rows[i], rows[j] = rows[j], rows[i] + }) + + out := &Result{Fields: result.Fields} + out.Rows = rows[:stage.Limit] + return out, nil +} + +// --- Path --- + +func execPath(stage Stage, g *graph.SchemaGraph) (*Result, error) { + fromNode, ok := g.SchemaByName(stage.PathFrom) + if !ok { + return nil, fmt.Errorf("schema %q not found", stage.PathFrom) + } + toNode, ok := g.SchemaByName(stage.PathTo) + if !ok { + return nil, fmt.Errorf("schema %q not found", stage.PathTo) + } + + path := g.ShortestPath(fromNode.ID, toNode.ID) + out := &Result{} + for _, id := range path { + out.Rows = append(out.Rows, Row{Kind: SchemaResult, SchemaIdx: int(id)}) + } + return out, nil +} diff --git a/oq/field.go b/oq/field.go new file mode 100644 index 0000000..0db8956 --- /dev/null +++ b/oq/field.go @@ -0,0 +1,165 @@ +package oq + +import ( + "strconv" + + "github.com/speakeasy-api/openapi/graph" + "github.com/speakeasy-api/openapi/oq/expr" +) + +// --- Field access --- + +type rowAdapter struct { + row Row + g *graph.SchemaGraph +} + +func (r rowAdapter) Field(name string) expr.Value { + return fieldValue(r.row, name, r.g) +} + +// FieldValuePublic returns the value of a named field for the given row. +// Exported for testing and external consumers. +func FieldValuePublic(row Row, name string, g *graph.SchemaGraph) expr.Value { + return fieldValue(row, name, g) +} + +func fieldValue(row Row, name string, g *graph.SchemaGraph) expr.Value { + switch row.Kind { + case SchemaResult: + if row.SchemaIdx < 0 || row.SchemaIdx >= len(g.Schemas) { + return expr.NullVal() + } + s := &g.Schemas[row.SchemaIdx] + switch name { + case "name": + return expr.StringVal(s.Name) + case "type": + return expr.StringVal(s.Type) + case "depth": + return expr.IntVal(s.Depth) + case "in_degree": + return expr.IntVal(s.InDegree) + case "out_degree": + return expr.IntVal(s.OutDegree) + case "union_width": + return expr.IntVal(s.UnionWidth) + case "property_count": + return expr.IntVal(s.PropertyCount) + case "is_component": + return expr.BoolVal(s.IsComponent) + case "is_inline": + return expr.BoolVal(s.IsInline) + case "is_circular": + return expr.BoolVal(s.IsCircular) + case "has_ref": + return expr.BoolVal(s.HasRef) + case "hash": + return expr.StringVal(s.Hash) + case "path": + return expr.StringVal(s.Path) + case "op_count": + return expr.IntVal(g.SchemaOpCount(graph.NodeID(row.SchemaIdx))) + case "tag_count": + return expr.IntVal(schemaTagCount(row.SchemaIdx, g)) + case "edge_kind": + return expr.StringVal(row.EdgeKind) + case "edge_label": + return expr.StringVal(row.EdgeLabel) + case "edge_from": + return expr.StringVal(row.EdgeFrom) + } + case OperationResult: + if row.OpIdx < 0 || row.OpIdx >= len(g.Operations) { + return expr.NullVal() + } + o := &g.Operations[row.OpIdx] + switch name { + case "name": + return expr.StringVal(o.Name) + case "method": + return expr.StringVal(o.Method) + case "path": + return expr.StringVal(o.Path) + case "operation_id": + return expr.StringVal(o.OperationID) + case "schema_count": + return expr.IntVal(o.SchemaCount) + case "component_count": + return expr.IntVal(o.ComponentCount) + case "tag": + if o.Operation != nil && len(o.Operation.Tags) > 0 { + return expr.StringVal(o.Operation.Tags[0]) + } + return expr.StringVal("") + case "parameter_count": + if o.Operation != nil { + return expr.IntVal(len(o.Operation.Parameters)) + } + return expr.IntVal(0) + case "deprecated": + if o.Operation != nil { + return expr.BoolVal(o.Operation.Deprecated != nil && *o.Operation.Deprecated) + } + return expr.BoolVal(false) + case "description": + if o.Operation != nil { + return expr.StringVal(o.Operation.GetDescription()) + } + return expr.StringVal("") + case "summary": + if o.Operation != nil { + return expr.StringVal(o.Operation.GetSummary()) + } + return expr.StringVal("") + case "edge_kind": + return expr.StringVal(row.EdgeKind) + case "edge_label": + return expr.StringVal(row.EdgeLabel) + case "edge_from": + return expr.StringVal(row.EdgeFrom) + } + } + return expr.NullVal() +} + +func compareValues(a, b expr.Value) int { + if a.Kind == expr.KindInt && b.Kind == expr.KindInt { + if a.Int < b.Int { + return -1 + } + if a.Int > b.Int { + return 1 + } + return 0 + } + sa := valueToString(a) + sb := valueToString(b) + if sa < sb { + return -1 + } + if sa > sb { + return 1 + } + return 0 +} + +func valueToString(v expr.Value) string { + switch v.Kind { + case expr.KindString: + return v.Str + case expr.KindInt: + return strconv.Itoa(v.Int) + case expr.KindBool: + return strconv.FormatBool(v.Bool) + default: + return "" + } +} + +func rowKey(row Row) string { + if row.Kind == SchemaResult { + return "s:" + strconv.Itoa(row.SchemaIdx) + } + return "o:" + strconv.Itoa(row.OpIdx) +} diff --git a/oq/format.go b/oq/format.go new file mode 100644 index 0000000..8b51338 --- /dev/null +++ b/oq/format.go @@ -0,0 +1,384 @@ +package oq + +import ( + "fmt" + "slices" + "strconv" + "strings" + + "github.com/speakeasy-api/openapi/graph" + "github.com/speakeasy-api/openapi/oq/expr" +) + +// FormatTable formats a result as a simple table string. +func FormatTable(result *Result, g *graph.SchemaGraph) string { + if result.Explain != "" { + return result.Explain + } + + if result.IsCount { + return strconv.Itoa(result.Count) + } + + if len(result.Groups) > 0 { + return formatGroups(result) + } + + if len(result.Rows) == 0 { + return "(empty)" + } + + fields := result.Fields + if len(fields) == 0 { + if result.Rows[0].Kind == SchemaResult { + fields = []string{"name", "type", "depth", "in_degree", "out_degree"} + } else { + fields = []string{"name", "method", "path", "schema_count"} + } + } + + // Build header + widths := make([]int, len(fields)) + for i, f := range fields { + widths[i] = len(f) + } + + // Collect rows + var tableRows [][]string + for _, row := range result.Rows { + var cols []string + for i, f := range fields { + v := valueToString(fieldValue(row, f, g)) + cols = append(cols, v) + if len(v) > widths[i] { + widths[i] = len(v) + } + } + tableRows = append(tableRows, cols) + } + + // Format + var sb strings.Builder + // Header + for i, f := range fields { + if i > 0 { + sb.WriteString(" ") + } + sb.WriteString(padRight(f, widths[i])) + } + sb.WriteString("\n") + // Separator + for i, w := range widths { + if i > 0 { + sb.WriteString(" ") + } + sb.WriteString(strings.Repeat("-", w)) + } + sb.WriteString("\n") + // Data + for _, row := range tableRows { + for i, col := range row { + if i > 0 { + sb.WriteString(" ") + } + sb.WriteString(padRight(col, widths[i])) + } + sb.WriteString("\n") + } + + return sb.String() +} + +// FormatJSON formats a result as JSON. +func FormatJSON(result *Result, g *graph.SchemaGraph) string { + if result.Explain != "" { + return result.Explain + } + + if result.IsCount { + return strconv.Itoa(result.Count) + } + + if len(result.Groups) > 0 { + return formatGroupsJSON(result) + } + + if len(result.Rows) == 0 { + return "[]" + } + + fields := result.Fields + if len(fields) == 0 { + if result.Rows[0].Kind == SchemaResult { + fields = []string{"name", "type", "depth", "in_degree", "out_degree"} + } else { + fields = []string{"name", "method", "path", "schema_count"} + } + } + + var sb strings.Builder + sb.WriteString("[\n") + for i, row := range result.Rows { + if i > 0 { + sb.WriteString(",\n") + } + sb.WriteString(" {") + for j, f := range fields { + if j > 0 { + sb.WriteString(", ") + } + v := fieldValue(row, f, g) + fmt.Fprintf(&sb, "%q: %s", f, jsonValue(v)) + } + sb.WriteString("}") + } + sb.WriteString("\n]") + return sb.String() +} + +// FormatMarkdown formats a result as a markdown table. +func FormatMarkdown(result *Result, g *graph.SchemaGraph) string { + if result.Explain != "" { + return result.Explain + } + + if result.IsCount { + return strconv.Itoa(result.Count) + } + + if len(result.Groups) > 0 { + var sb strings.Builder + sb.WriteString("| Key | Count |\n") + sb.WriteString("| --- | --- |\n") + for _, grp := range result.Groups { + fmt.Fprintf(&sb, "| %s | %d |\n", grp.Key, grp.Count) + } + return sb.String() + } + + if len(result.Rows) == 0 { + return "(empty)" + } + + fields := result.Fields + if len(fields) == 0 { + if result.Rows[0].Kind == SchemaResult { + fields = []string{"name", "type", "depth", "in_degree", "out_degree"} + } else { + fields = []string{"name", "method", "path", "schema_count"} + } + } + + var sb strings.Builder + // Header + sb.WriteString("| ") + sb.WriteString(strings.Join(fields, " | ")) + sb.WriteString(" |\n") + // Separator + sb.WriteString("|") + for range fields { + sb.WriteString(" --- |") + } + sb.WriteString("\n") + // Rows + for _, row := range result.Rows { + sb.WriteString("| ") + for i, f := range fields { + if i > 0 { + sb.WriteString(" | ") + } + v := valueToString(fieldValue(row, f, g)) + sb.WriteString(v) + } + sb.WriteString(" |\n") + } + + return sb.String() +} + +// FormatToon formats a result in the TOON (Token-Oriented Object Notation) format. +// TOON uses tabular array syntax for uniform rows: header[N]{field1,field2,...}: +// followed by comma-delimited data rows. See https://github.com/toon-format/toon +func FormatToon(result *Result, g *graph.SchemaGraph) string { + if result.Explain != "" { + return result.Explain + } + + if result.IsCount { + return "count: " + strconv.Itoa(result.Count) + "\n" + } + + if len(result.Groups) > 0 { + return formatGroupsToon(result) + } + + if len(result.Rows) == 0 { + return "results[0]:\n" + } + + fields := result.Fields + if len(fields) == 0 { + if result.Rows[0].Kind == SchemaResult { + fields = []string{"name", "type", "depth", "in_degree", "out_degree"} + } else { + fields = []string{"name", "method", "path", "schema_count"} + } + } + + var sb strings.Builder + + // Header: results[N]{field1,field2,...}: + fmt.Fprintf(&sb, "results[%d]{%s}:\n", len(result.Rows), strings.Join(fields, ",")) + + // Data rows: comma-separated values, indented by one space + for _, row := range result.Rows { + sb.WriteByte(' ') + for i, f := range fields { + if i > 0 { + sb.WriteByte(',') + } + v := fieldValue(row, f, g) + sb.WriteString(toonValue(v)) + } + sb.WriteByte('\n') + } + + return sb.String() +} + +func formatGroupsToon(result *Result) string { + var sb strings.Builder + + // Groups as tabular array + fmt.Fprintf(&sb, "groups[%d]{key,count,names}:\n", len(result.Groups)) + for _, grp := range result.Groups { + names := strings.Join(grp.Names, ";") + fmt.Fprintf(&sb, " %s,%d,%s\n", toonEscape(grp.Key), grp.Count, toonEscape(names)) + } + return sb.String() +} + +// toonValue encodes an expr.Value for TOON format. +func toonValue(v expr.Value) string { + switch v.Kind { + case expr.KindString: + return toonEscape(v.Str) + case expr.KindInt: + return strconv.Itoa(v.Int) + case expr.KindBool: + return strconv.FormatBool(v.Bool) + default: + return "null" + } +} + +// toonEscape quotes a string if it needs escaping for TOON format. +// A string must be quoted if it: is empty, contains comma/colon/quote/backslash/ +// brackets/braces/control chars, has leading/trailing whitespace, or matches +// true/false/null or a numeric pattern. +func toonEscape(s string) string { + if s == "" { + return `""` + } + if s == "true" || s == "false" || s == "null" { + return `"` + s + `"` + } + // Check if it looks numeric + if _, err := strconv.ParseFloat(s, 64); err == nil { + return `"` + s + `"` + } + needsQuote := false + for _, ch := range s { + if ch == ',' || ch == ':' || ch == '"' || ch == '\\' || + ch == '[' || ch == ']' || ch == '{' || ch == '}' || + ch == '\n' || ch == '\r' || ch == '\t' || + ch < 0x20 { + needsQuote = true + break + } + } + if s[0] == ' ' || s[len(s)-1] == ' ' { + needsQuote = true + } + if !needsQuote { + return s + } + // Quote with escaping + var sb strings.Builder + sb.WriteByte('"') + for _, ch := range s { + switch ch { + case '\\': + sb.WriteString(`\\`) + case '"': + sb.WriteString(`\"`) + case '\n': + sb.WriteString(`\n`) + case '\r': + sb.WriteString(`\r`) + case '\t': + sb.WriteString(`\t`) + default: + sb.WriteRune(ch) + } + } + sb.WriteByte('"') + return sb.String() +} + +func jsonValue(v expr.Value) string { + switch v.Kind { + case expr.KindString: + return fmt.Sprintf("%q", v.Str) + case expr.KindInt: + return strconv.Itoa(v.Int) + case expr.KindBool: + return strconv.FormatBool(v.Bool) + default: + return "null" + } +} + +func formatGroups(result *Result) string { + var sb strings.Builder + for _, g := range result.Groups { + fmt.Fprintf(&sb, "%s: count=%d", g.Key, g.Count) + if len(g.Names) > 0 { + names := slices.Clone(g.Names) + if len(names) > 5 { + names = names[:5] + names = append(names, "...") + } + fmt.Fprintf(&sb, " names=[%s]", strings.Join(names, ", ")) + } + sb.WriteString("\n") + } + return sb.String() +} + +func formatGroupsJSON(result *Result) string { + var sb strings.Builder + sb.WriteString("[\n") + for i, g := range result.Groups { + if i > 0 { + sb.WriteString(",\n") + } + fmt.Fprintf(&sb, ` {"key": %q, "count": %d, "names": [`, g.Key, g.Count) + for j, n := range g.Names { + if j > 0 { + sb.WriteString(", ") + } + fmt.Fprintf(&sb, "%q", n) + } + sb.WriteString("]}") + } + sb.WriteString("\n]") + return sb.String() +} + +func padRight(s string, width int) string { + if len(s) >= width { + return s + } + return s + strings.Repeat(" ", width-len(s)) +} diff --git a/oq/oq.go b/oq/oq.go index 10e12d2..2809c27 100644 --- a/oq/oq.go +++ b/oq/oq.go @@ -6,17 +6,9 @@ package oq import ( - "crypto/sha256" - "encoding/hex" - "errors" "fmt" - "slices" - "sort" - "strconv" - "strings" "github.com/speakeasy-api/openapi/graph" - "github.com/speakeasy-api/openapi/oq/expr" ) // ResultKind distinguishes between schema and operation result rows. @@ -120,1795 +112,3 @@ type Stage struct { PathTo string // for StagePath Format string // for StageFormat } - -// Parse splits a pipeline query string into stages. -func Parse(query string) ([]Stage, error) { - // Split by pipe, respecting quoted strings - parts := splitPipeline(query) - if len(parts) == 0 { - return nil, errors.New("empty query") - } - - var stages []Stage - - for i, part := range parts { - part = strings.TrimSpace(part) - if part == "" { - continue - } - - if i == 0 { - // First part is a source - stages = append(stages, Stage{Kind: StageSource, Source: part}) - continue - } - - stage, err := parseStage(part) - if err != nil { - return nil, err - } - stages = append(stages, stage) - } - - return stages, nil -} - -func parseStage(s string) (Stage, error) { - // Extract the keyword - keyword, rest := splitFirst(s) - keyword = strings.ToLower(keyword) - - switch keyword { - case "where": - if rest == "" { - return Stage{}, errors.New("where requires an expression") - } - return Stage{Kind: StageWhere, Expr: rest}, nil - - case "select": - if rest == "" { - return Stage{}, errors.New("select requires field names") - } - fields := parseCSV(rest) - return Stage{Kind: StageSelect, Fields: fields}, nil - - case "sort": - parts := strings.Fields(rest) - if len(parts) == 0 { - return Stage{}, errors.New("sort requires a field name") - } - desc := false - if len(parts) >= 2 && strings.ToLower(parts[1]) == "desc" { - desc = true - } - return Stage{Kind: StageSort, SortField: parts[0], SortDesc: desc}, nil - - case "take", "head": - n, err := strconv.Atoi(strings.TrimSpace(rest)) - if err != nil { - return Stage{}, fmt.Errorf("take requires a number: %w", err) - } - return Stage{Kind: StageTake, Limit: n}, nil - - case "unique": - return Stage{Kind: StageUnique}, nil - - case "group-by": - if rest == "" { - return Stage{}, errors.New("group-by requires a field name") - } - fields := parseCSV(rest) - return Stage{Kind: StageGroupBy, Fields: fields}, nil - - case "count": - return Stage{Kind: StageCount}, nil - - case "refs-out": - return Stage{Kind: StageRefsOut}, nil - - case "refs-in": - return Stage{Kind: StageRefsIn}, nil - - case "reachable": - return Stage{Kind: StageReachable}, nil - - case "ancestors": - return Stage{Kind: StageAncestors}, nil - - case "properties": - return Stage{Kind: StageProperties}, nil - - case "union-members": - return Stage{Kind: StageUnionMembers}, nil - - case "items": - return Stage{Kind: StageItems}, nil - - case "ops": - return Stage{Kind: StageOps}, nil - - case "schemas": - return Stage{Kind: StageSchemas}, nil - - case "explain": - return Stage{Kind: StageExplain}, nil - - case "fields": - return Stage{Kind: StageFields}, nil - - case "sample": - n, err := strconv.Atoi(strings.TrimSpace(rest)) - if err != nil { - return Stage{}, fmt.Errorf("sample requires a number: %w", err) - } - return Stage{Kind: StageSample, Limit: n}, nil - - case "path": - from, to := parseTwoArgs(rest) - if from == "" || to == "" { - return Stage{}, errors.New("path requires two schema names") - } - return Stage{Kind: StagePath, PathFrom: from, PathTo: to}, nil - - case "top": - parts := strings.Fields(rest) - if len(parts) < 2 { - return Stage{}, errors.New("top requires a number and a field name") - } - n, err := strconv.Atoi(parts[0]) - if err != nil { - return Stage{}, fmt.Errorf("top requires a number: %w", err) - } - return Stage{Kind: StageTop, Limit: n, SortField: parts[1]}, nil - - case "bottom": - parts := strings.Fields(rest) - if len(parts) < 2 { - return Stage{}, errors.New("bottom requires a number and a field name") - } - n, err := strconv.Atoi(parts[0]) - if err != nil { - return Stage{}, fmt.Errorf("bottom requires a number: %w", err) - } - return Stage{Kind: StageBottom, Limit: n, SortField: parts[1]}, nil - - case "format": - f := strings.TrimSpace(rest) - if f != "table" && f != "json" && f != "markdown" && f != "toon" { - return Stage{}, fmt.Errorf("format must be table, json, markdown, or toon, got %q", f) - } - return Stage{Kind: StageFormat, Format: f}, nil - - case "connected": - return Stage{Kind: StageConnected}, nil - - case "blast-radius": - return Stage{Kind: StageBlastRadius}, nil - - case "neighbors": - n, err := strconv.Atoi(strings.TrimSpace(rest)) - if err != nil { - return Stage{}, fmt.Errorf("neighbors requires a depth number: %w", err) - } - return Stage{Kind: StageNeighbors, Limit: n}, nil - - case "orphans": - return Stage{Kind: StageOrphans}, nil - - case "leaves": - return Stage{Kind: StageLeaves}, nil - - case "cycles": - return Stage{Kind: StageCycles}, nil - - case "clusters": - return Stage{Kind: StageClusters}, nil - - case "tag-boundary": - return Stage{Kind: StageTagBoundary}, nil - - case "shared-refs": - return Stage{Kind: StageSharedRefs}, nil - - default: - return Stage{}, fmt.Errorf("unknown stage: %q", keyword) - } -} - -// --- Executor --- - -func run(stages []Stage, g *graph.SchemaGraph) (*Result, error) { - if len(stages) == 0 { - return &Result{}, nil - } - - // Check if explain stage is present - for _, stage := range stages { - if stage.Kind == StageExplain { - return &Result{Explain: buildExplain(stages)}, nil - } - } - - // Execute source stage - result, err := execSource(stages[0], g) - if err != nil { - return nil, err - } - - // Execute remaining stages - for _, stage := range stages[1:] { - result, err = execStage(stage, result, g) - if err != nil { - return nil, err - } - } - - return result, nil -} - -func execSource(stage Stage, g *graph.SchemaGraph) (*Result, error) { - result := &Result{} - switch stage.Source { - case "schemas": - for i := range g.Schemas { - result.Rows = append(result.Rows, Row{Kind: SchemaResult, SchemaIdx: i}) - } - case "schemas.components": - for i, s := range g.Schemas { - if s.IsComponent { - result.Rows = append(result.Rows, Row{Kind: SchemaResult, SchemaIdx: i}) - } - } - case "schemas.inline": - for i, s := range g.Schemas { - if s.IsInline { - result.Rows = append(result.Rows, Row{Kind: SchemaResult, SchemaIdx: i}) - } - } - case "operations": - for i := range g.Operations { - result.Rows = append(result.Rows, Row{Kind: OperationResult, OpIdx: i}) - } - default: - return nil, fmt.Errorf("unknown source: %q", stage.Source) - } - return result, nil -} - -func execStage(stage Stage, result *Result, g *graph.SchemaGraph) (*Result, error) { - switch stage.Kind { - case StageWhere: - return execWhere(stage, result, g) - case StageSelect: - result.Fields = stage.Fields - return result, nil - case StageSort: - return execSort(stage, result, g) - case StageTake: - return execTake(stage, result) - case StageUnique: - return execUnique(result) - case StageGroupBy: - return execGroupBy(stage, result, g) - case StageCount: - return &Result{IsCount: true, Count: len(result.Rows)}, nil - case StageRefsOut: - return execTraversal(result, g, traverseRefsOut) - case StageRefsIn: - return execTraversal(result, g, traverseRefsIn) - case StageReachable: - return execTraversal(result, g, traverseReachable) - case StageAncestors: - return execTraversal(result, g, traverseAncestors) - case StageProperties: - return execTraversal(result, g, traverseProperties) - case StageUnionMembers: - return execTraversal(result, g, traverseUnionMembers) - case StageItems: - return execTraversal(result, g, traverseItems) - case StageOps: - return execSchemasToOps(result, g) - case StageSchemas: - return execOpsToSchemas(result, g) - case StageFields: - return execFields(result) - case StageSample: - return execSample(stage, result) - case StagePath: - return execPath(stage, g) - case StageTop: - // Expand to sort desc + take - sorted, err := execSort(Stage{Kind: StageSort, SortField: stage.SortField, SortDesc: true}, result, g) - if err != nil { - return nil, err - } - return execTake(Stage{Kind: StageTake, Limit: stage.Limit}, sorted) - case StageBottom: - // Expand to sort asc + take - sorted, err := execSort(Stage{Kind: StageSort, SortField: stage.SortField, SortDesc: false}, result, g) - if err != nil { - return nil, err - } - return execTake(Stage{Kind: StageTake, Limit: stage.Limit}, sorted) - case StageFormat: - result.FormatHint = stage.Format - return result, nil - case StageConnected: - return execConnected(result, g) - case StageBlastRadius: - return execBlastRadius(result, g) - case StageNeighbors: - return execNeighbors(stage, result, g) - case StageOrphans: - return execOrphans(result, g) - case StageLeaves: - return execLeaves(result, g) - case StageCycles: - return execCycles(result, g) - case StageClusters: - return execClusters(result, g) - case StageTagBoundary: - return execTagBoundary(result, g) - case StageSharedRefs: - return execSharedRefs(result, g) - default: - return nil, fmt.Errorf("unimplemented stage kind: %d", stage.Kind) - } -} - -func execWhere(stage Stage, result *Result, g *graph.SchemaGraph) (*Result, error) { - predicate, err := expr.Parse(stage.Expr) - if err != nil { - return nil, fmt.Errorf("where expression error: %w", err) - } - - filtered := &Result{Fields: result.Fields} - for _, row := range result.Rows { - r := rowAdapter{row: row, g: g} - val := predicate.Eval(r) - if val.Kind == expr.KindBool && val.Bool { - filtered.Rows = append(filtered.Rows, row) - } - } - return filtered, nil -} - -func execSort(stage Stage, result *Result, g *graph.SchemaGraph) (*Result, error) { - sort.SliceStable(result.Rows, func(i, j int) bool { - vi := fieldValue(result.Rows[i], stage.SortField, g) - vj := fieldValue(result.Rows[j], stage.SortField, g) - - cmp := compareValues(vi, vj) - if stage.SortDesc { - return cmp > 0 - } - return cmp < 0 - }) - return result, nil -} - -func execTake(stage Stage, result *Result) (*Result, error) { - if stage.Limit < len(result.Rows) { - result.Rows = result.Rows[:stage.Limit] - } - return result, nil -} - -func execUnique(result *Result) (*Result, error) { - seen := make(map[string]bool) - filtered := &Result{Fields: result.Fields} - for _, row := range result.Rows { - key := rowKey(row) - if !seen[key] { - seen[key] = true - filtered.Rows = append(filtered.Rows, row) - } - } - return filtered, nil -} - -func execGroupBy(stage Stage, result *Result, g *graph.SchemaGraph) (*Result, error) { - if len(stage.Fields) == 0 { - return nil, errors.New("group-by requires at least one field") - } - field := stage.Fields[0] - - type group struct { - count int - names []string - } - groups := make(map[string]*group) - var order []string - - for _, row := range result.Rows { - v := fieldValue(row, field, g) - key := valueToString(v) - grp, exists := groups[key] - if !exists { - grp = &group{} - groups[key] = grp - order = append(order, key) - } - grp.count++ - nameV := fieldValue(row, "name", g) - grp.names = append(grp.names, valueToString(nameV)) - } - - grouped := &Result{Fields: result.Fields} - for _, key := range order { - grp, ok := groups[key] - if !ok { - continue - } - grouped.Groups = append(grouped.Groups, GroupResult{ - Key: key, - Count: grp.count, - Names: grp.names, - }) - } - return grouped, nil -} - -// --- Traversal --- - -type traversalFunc func(row Row, g *graph.SchemaGraph) []Row - -func execTraversal(result *Result, g *graph.SchemaGraph, fn traversalFunc) (*Result, error) { - out := &Result{Fields: result.Fields} - seen := make(map[string]bool) - for _, row := range result.Rows { - for _, newRow := range fn(row, g) { - key := edgeRowKey(newRow) - if !seen[key] { - seen[key] = true - out.Rows = append(out.Rows, newRow) - } - } - } - return out, nil -} - -func edgeRowKey(row Row) string { - base := rowKey(row) - if row.EdgeKind == "" { - return base - } - return base + "|" + row.EdgeFrom + "|" + row.EdgeKind + "|" + row.EdgeLabel -} - -func traverseRefsOut(row Row, g *graph.SchemaGraph) []Row { - if row.Kind != SchemaResult { - return nil - } - fromName := schemaName(row.SchemaIdx, g) - var result []Row - for _, edge := range g.OutEdges(graph.NodeID(row.SchemaIdx)) { - result = append(result, Row{ - Kind: SchemaResult, - SchemaIdx: int(edge.To), - EdgeKind: edgeKindString(edge.Kind), - EdgeLabel: edge.Label, - EdgeFrom: fromName, - }) - } - return result -} - -func traverseRefsIn(row Row, g *graph.SchemaGraph) []Row { - if row.Kind != SchemaResult { - return nil - } - toName := schemaName(row.SchemaIdx, g) - var result []Row - for _, edge := range g.InEdges(graph.NodeID(row.SchemaIdx)) { - result = append(result, Row{ - Kind: SchemaResult, - SchemaIdx: int(edge.From), - EdgeKind: edgeKindString(edge.Kind), - EdgeLabel: edge.Label, - EdgeFrom: toName, - }) - } - return result -} - -func traverseReachable(row Row, g *graph.SchemaGraph) []Row { - if row.Kind != SchemaResult { - return nil - } - ids := g.Reachable(graph.NodeID(row.SchemaIdx)) - result := make([]Row, len(ids)) - for i, id := range ids { - result[i] = Row{Kind: SchemaResult, SchemaIdx: int(id)} - } - return result -} - -func traverseAncestors(row Row, g *graph.SchemaGraph) []Row { - if row.Kind != SchemaResult { - return nil - } - ids := g.Ancestors(graph.NodeID(row.SchemaIdx)) - result := make([]Row, len(ids)) - for i, id := range ids { - result[i] = Row{Kind: SchemaResult, SchemaIdx: int(id)} - } - return result -} - -func traverseProperties(row Row, g *graph.SchemaGraph) []Row { - if row.Kind != SchemaResult { - return nil - } - fromName := schemaName(row.SchemaIdx, g) - var result []Row - for _, edge := range g.OutEdges(graph.NodeID(row.SchemaIdx)) { - if edge.Kind == graph.EdgeProperty { - result = append(result, Row{ - Kind: SchemaResult, - SchemaIdx: int(edge.To), - EdgeKind: edgeKindString(edge.Kind), - EdgeLabel: edge.Label, - EdgeFrom: fromName, - }) - } - } - return result -} - -func traverseUnionMembers(row Row, g *graph.SchemaGraph) []Row { - if row.Kind != SchemaResult { - return nil - } - fromName := schemaName(row.SchemaIdx, g) - var result []Row - for _, edge := range g.OutEdges(graph.NodeID(row.SchemaIdx)) { - if edge.Kind == graph.EdgeAllOf || edge.Kind == graph.EdgeOneOf || edge.Kind == graph.EdgeAnyOf { - // Follow through $ref nodes transparently - target := resolveRefTarget(int(edge.To), g) - result = append(result, Row{ - Kind: SchemaResult, - SchemaIdx: target, - EdgeKind: edgeKindString(edge.Kind), - EdgeLabel: edge.Label, - EdgeFrom: fromName, - }) - } - } - return result -} - -func traverseItems(row Row, g *graph.SchemaGraph) []Row { - if row.Kind != SchemaResult { - return nil - } - fromName := schemaName(row.SchemaIdx, g) - var result []Row - for _, edge := range g.OutEdges(graph.NodeID(row.SchemaIdx)) { - if edge.Kind == graph.EdgeItems { - result = append(result, Row{ - Kind: SchemaResult, - SchemaIdx: int(edge.To), - EdgeKind: edgeKindString(edge.Kind), - EdgeLabel: edge.Label, - EdgeFrom: fromName, - }) - } - } - return result -} - -// resolveRefTarget follows EdgeRef edges to get the actual target node. -// If the node at idx is a $ref wrapper, returns the target component's index. -// Otherwise returns idx unchanged. -func resolveRefTarget(idx int, g *graph.SchemaGraph) int { - if idx < 0 || idx >= len(g.Schemas) { - return idx - } - node := &g.Schemas[idx] - if !node.HasRef { - return idx - } - // Follow EdgeRef edges - for _, edge := range g.OutEdges(graph.NodeID(idx)) { - if edge.Kind == graph.EdgeRef { - return int(edge.To) - } - } - return idx -} - -func execSchemasToOps(result *Result, g *graph.SchemaGraph) (*Result, error) { - out := &Result{Fields: result.Fields} - seen := make(map[int]bool) - for _, row := range result.Rows { - if row.Kind != SchemaResult { - continue - } - opIDs := g.SchemaOperations(graph.NodeID(row.SchemaIdx)) - for _, opID := range opIDs { - idx := int(opID) - if !seen[idx] { - seen[idx] = true - out.Rows = append(out.Rows, Row{Kind: OperationResult, OpIdx: idx}) - } - } - } - return out, nil -} - -func execOpsToSchemas(result *Result, g *graph.SchemaGraph) (*Result, error) { - out := &Result{Fields: result.Fields} - seen := make(map[int]bool) - for _, row := range result.Rows { - if row.Kind != OperationResult { - continue - } - schemaIDs := g.OperationSchemas(graph.NodeID(row.OpIdx)) - for _, sid := range schemaIDs { - idx := int(sid) - if !seen[idx] { - seen[idx] = true - out.Rows = append(out.Rows, Row{Kind: SchemaResult, SchemaIdx: idx}) - } - } - } - return out, nil -} - -func execConnected(result *Result, g *graph.SchemaGraph) (*Result, error) { - var schemaSeeds, opSeeds []graph.NodeID - for _, row := range result.Rows { - switch row.Kind { - case SchemaResult: - schemaSeeds = append(schemaSeeds, graph.NodeID(row.SchemaIdx)) - case OperationResult: - opSeeds = append(opSeeds, graph.NodeID(row.OpIdx)) - } - } - - schemas, ops := g.ConnectedComponent(schemaSeeds, opSeeds) - - out := &Result{Fields: result.Fields} - for _, id := range schemas { - out.Rows = append(out.Rows, Row{Kind: SchemaResult, SchemaIdx: int(id)}) - } - for _, id := range ops { - out.Rows = append(out.Rows, Row{Kind: OperationResult, OpIdx: int(id)}) - } - return out, nil -} - -func execBlastRadius(result *Result, g *graph.SchemaGraph) (*Result, error) { - out := &Result{Fields: result.Fields} - seenSchemas := make(map[int]bool) - seenOps := make(map[int]bool) - - // Collect seed schemas - var seeds []graph.NodeID - for _, row := range result.Rows { - if row.Kind == SchemaResult { - seeds = append(seeds, graph.NodeID(row.SchemaIdx)) - seenSchemas[row.SchemaIdx] = true - } - } - - // Find all ancestors (schemas that depend on the seeds) - for _, seed := range seeds { - for _, aid := range g.Ancestors(seed) { - seenSchemas[int(aid)] = true - } - } - - // Add schema rows - for idx := range seenSchemas { - out.Rows = append(out.Rows, Row{Kind: SchemaResult, SchemaIdx: idx}) - } - - // Find all operations that reference any affected schema - for idx := range seenSchemas { - for _, opID := range g.SchemaOperations(graph.NodeID(idx)) { - if !seenOps[int(opID)] { - seenOps[int(opID)] = true - out.Rows = append(out.Rows, Row{Kind: OperationResult, OpIdx: int(opID)}) - } - } - } - - return out, nil -} - -func execNeighbors(stage Stage, result *Result, g *graph.SchemaGraph) (*Result, error) { - out := &Result{Fields: result.Fields} - seen := make(map[int]bool) - - for _, row := range result.Rows { - if row.Kind != SchemaResult { - continue - } - // Include seed - if !seen[row.SchemaIdx] { - seen[row.SchemaIdx] = true - out.Rows = append(out.Rows, Row{Kind: SchemaResult, SchemaIdx: row.SchemaIdx}) - } - for _, id := range g.Neighbors(graph.NodeID(row.SchemaIdx), stage.Limit) { - if !seen[int(id)] { - seen[int(id)] = true - out.Rows = append(out.Rows, Row{Kind: SchemaResult, SchemaIdx: int(id)}) - } - } - } - - return out, nil -} - -func execOrphans(result *Result, g *graph.SchemaGraph) (*Result, error) { - out := &Result{Fields: result.Fields} - for _, row := range result.Rows { - if row.Kind != SchemaResult { - continue - } - s := &g.Schemas[row.SchemaIdx] - if s.InDegree == 0 && g.SchemaOpCount(graph.NodeID(row.SchemaIdx)) == 0 { - out.Rows = append(out.Rows, row) - } - } - return out, nil -} - -func execLeaves(result *Result, g *graph.SchemaGraph) (*Result, error) { - out := &Result{Fields: result.Fields} - for _, row := range result.Rows { - if row.Kind != SchemaResult { - continue - } - if g.Schemas[row.SchemaIdx].OutDegree == 0 { - out.Rows = append(out.Rows, row) - } - } - return out, nil -} - -func execCycles(result *Result, g *graph.SchemaGraph) (*Result, error) { - sccs := g.StronglyConnectedComponents() - - // Filter SCCs to only include nodes present in the current result - resultNodes := make(map[int]bool) - for _, row := range result.Rows { - if row.Kind == SchemaResult { - resultNodes[row.SchemaIdx] = true - } - } - - out := &Result{Fields: result.Fields} - for i, scc := range sccs { - hasMatch := false - for _, id := range scc { - if resultNodes[int(id)] { - hasMatch = true - break - } - } - if !hasMatch { - continue - } - var names []string - for _, id := range scc { - if int(id) < len(g.Schemas) { - names = append(names, g.Schemas[id].Name) - } - } - out.Groups = append(out.Groups, GroupResult{ - Key: "cycle-" + strconv.Itoa(i+1), - Count: len(scc), - Names: names, - }) - } - - return out, nil -} - -func execClusters(result *Result, g *graph.SchemaGraph) (*Result, error) { - resultNodes := make(map[int]bool) - for _, row := range result.Rows { - if row.Kind == SchemaResult { - resultNodes[row.SchemaIdx] = true - } - } - - // BFS to find connected components. Follow ALL graph edges (including - // through intermediary nodes like $ref wrappers) but only collect - // nodes that are in the result set. - assigned := make(map[int]bool) // result nodes already assigned to a cluster - out := &Result{Fields: result.Fields} - clusterNum := 0 - - for idx := range resultNodes { - if assigned[idx] { - continue - } - clusterNum++ - var component []int - - // BFS through the full graph - visited := make(map[int]bool) - queue := []int{idx} - visited[idx] = true - - for len(queue) > 0 { - cur := queue[0] - queue = queue[1:] - - if resultNodes[cur] && !assigned[cur] { - assigned[cur] = true - component = append(component, cur) - } - - for _, edge := range g.OutEdges(graph.NodeID(cur)) { - to := int(edge.To) - if !visited[to] { - visited[to] = true - queue = append(queue, to) - } - } - for _, edge := range g.InEdges(graph.NodeID(cur)) { - from := int(edge.From) - if !visited[from] { - visited[from] = true - queue = append(queue, from) - } - } - } - - var names []string - for _, id := range component { - if id < len(g.Schemas) { - names = append(names, g.Schemas[id].Name) - } - } - if len(component) > 0 { - out.Groups = append(out.Groups, GroupResult{ - Key: "cluster-" + strconv.Itoa(clusterNum), - Count: len(component), - Names: names, - }) - } - } - - return out, nil -} - -func execTagBoundary(result *Result, g *graph.SchemaGraph) (*Result, error) { - out := &Result{Fields: result.Fields} - for _, row := range result.Rows { - if row.Kind != SchemaResult { - continue - } - if schemaTagCount(row.SchemaIdx, g) > 1 { - out.Rows = append(out.Rows, row) - } - } - return out, nil -} - -func schemaTagCount(schemaIdx int, g *graph.SchemaGraph) int { - tags := make(map[string]bool) - for _, opID := range g.SchemaOperations(graph.NodeID(schemaIdx)) { - if int(opID) < len(g.Operations) { - op := &g.Operations[opID] - if op.Operation != nil { - for _, tag := range op.Operation.Tags { - tags[tag] = true - } - } - } - } - return len(tags) -} - -func execSharedRefs(result *Result, g *graph.SchemaGraph) (*Result, error) { - var ops []graph.NodeID - for _, row := range result.Rows { - if row.Kind == OperationResult { - ops = append(ops, graph.NodeID(row.OpIdx)) - } - } - - if len(ops) == 0 { - return &Result{Fields: result.Fields}, nil - } - - // Start with first operation's schemas - intersection := make(map[graph.NodeID]bool) - for _, sid := range g.OperationSchemas(ops[0]) { - intersection[sid] = true - } - - // Intersect with each subsequent operation - for _, opID := range ops[1:] { - opSchemas := make(map[graph.NodeID]bool) - for _, sid := range g.OperationSchemas(opID) { - opSchemas[sid] = true - } - for sid := range intersection { - if !opSchemas[sid] { - delete(intersection, sid) - } - } - } - - out := &Result{Fields: result.Fields} - for sid := range intersection { - out.Rows = append(out.Rows, Row{Kind: SchemaResult, SchemaIdx: int(sid)}) - } - return out, nil -} - -// --- Edge annotation helpers --- - -func schemaName(idx int, g *graph.SchemaGraph) string { - if idx >= 0 && idx < len(g.Schemas) { - return g.Schemas[idx].Name - } - return "" -} - -func edgeKindString(k graph.EdgeKind) string { - switch k { - case graph.EdgeProperty: - return "property" - case graph.EdgeItems: - return "items" - case graph.EdgeAllOf: - return "allOf" - case graph.EdgeOneOf: - return "oneOf" - case graph.EdgeAnyOf: - return "anyOf" - case graph.EdgeAdditionalProps: - return "additionalProperties" - case graph.EdgeNot: - return "not" - case graph.EdgeIf: - return "if" - case graph.EdgeThen: - return "then" - case graph.EdgeElse: - return "else" - case graph.EdgeContains: - return "contains" - case graph.EdgePrefixItems: - return "prefixItems" - case graph.EdgeDependentSchema: - return "dependentSchema" - case graph.EdgePatternProperty: - return "patternProperty" - case graph.EdgePropertyNames: - return "propertyNames" - case graph.EdgeUnevaluatedItems: - return "unevaluatedItems" - case graph.EdgeUnevaluatedProps: - return "unevaluatedProperties" - case graph.EdgeRef: - return "ref" - default: - return "unknown" - } -} - -// --- Field access --- - -type rowAdapter struct { - row Row - g *graph.SchemaGraph -} - -func (r rowAdapter) Field(name string) expr.Value { - return fieldValue(r.row, name, r.g) -} - -// FieldValuePublic returns the value of a named field for the given row. -// Exported for testing and external consumers. -func FieldValuePublic(row Row, name string, g *graph.SchemaGraph) expr.Value { - return fieldValue(row, name, g) -} - -func fieldValue(row Row, name string, g *graph.SchemaGraph) expr.Value { - switch row.Kind { - case SchemaResult: - if row.SchemaIdx < 0 || row.SchemaIdx >= len(g.Schemas) { - return expr.NullVal() - } - s := &g.Schemas[row.SchemaIdx] - switch name { - case "name": - return expr.StringVal(s.Name) - case "type": - return expr.StringVal(s.Type) - case "depth": - return expr.IntVal(s.Depth) - case "in_degree": - return expr.IntVal(s.InDegree) - case "out_degree": - return expr.IntVal(s.OutDegree) - case "union_width": - return expr.IntVal(s.UnionWidth) - case "property_count": - return expr.IntVal(s.PropertyCount) - case "is_component": - return expr.BoolVal(s.IsComponent) - case "is_inline": - return expr.BoolVal(s.IsInline) - case "is_circular": - return expr.BoolVal(s.IsCircular) - case "has_ref": - return expr.BoolVal(s.HasRef) - case "hash": - return expr.StringVal(s.Hash) - case "path": - return expr.StringVal(s.Path) - case "op_count": - return expr.IntVal(g.SchemaOpCount(graph.NodeID(row.SchemaIdx))) - case "tag_count": - return expr.IntVal(schemaTagCount(row.SchemaIdx, g)) - case "edge_kind": - return expr.StringVal(row.EdgeKind) - case "edge_label": - return expr.StringVal(row.EdgeLabel) - case "edge_from": - return expr.StringVal(row.EdgeFrom) - } - case OperationResult: - if row.OpIdx < 0 || row.OpIdx >= len(g.Operations) { - return expr.NullVal() - } - o := &g.Operations[row.OpIdx] - switch name { - case "name": - return expr.StringVal(o.Name) - case "method": - return expr.StringVal(o.Method) - case "path": - return expr.StringVal(o.Path) - case "operation_id": - return expr.StringVal(o.OperationID) - case "schema_count": - return expr.IntVal(o.SchemaCount) - case "component_count": - return expr.IntVal(o.ComponentCount) - case "tag": - if o.Operation != nil && len(o.Operation.Tags) > 0 { - return expr.StringVal(o.Operation.Tags[0]) - } - return expr.StringVal("") - case "parameter_count": - if o.Operation != nil { - return expr.IntVal(len(o.Operation.Parameters)) - } - return expr.IntVal(0) - case "deprecated": - if o.Operation != nil { - return expr.BoolVal(o.Operation.Deprecated != nil && *o.Operation.Deprecated) - } - return expr.BoolVal(false) - case "description": - if o.Operation != nil { - return expr.StringVal(o.Operation.GetDescription()) - } - return expr.StringVal("") - case "summary": - if o.Operation != nil { - return expr.StringVal(o.Operation.GetSummary()) - } - return expr.StringVal("") - case "edge_kind": - return expr.StringVal(row.EdgeKind) - case "edge_label": - return expr.StringVal(row.EdgeLabel) - case "edge_from": - return expr.StringVal(row.EdgeFrom) - } - } - return expr.NullVal() -} - -func compareValues(a, b expr.Value) int { - if a.Kind == expr.KindInt && b.Kind == expr.KindInt { - if a.Int < b.Int { - return -1 - } - if a.Int > b.Int { - return 1 - } - return 0 - } - sa := valueToString(a) - sb := valueToString(b) - if sa < sb { - return -1 - } - if sa > sb { - return 1 - } - return 0 -} - -func valueToString(v expr.Value) string { - switch v.Kind { - case expr.KindString: - return v.Str - case expr.KindInt: - return strconv.Itoa(v.Int) - case expr.KindBool: - return strconv.FormatBool(v.Bool) - default: - return "" - } -} - -func rowKey(row Row) string { - if row.Kind == SchemaResult { - return "s:" + strconv.Itoa(row.SchemaIdx) - } - return "o:" + strconv.Itoa(row.OpIdx) -} - -// --- Explain --- - -func buildExplain(stages []Stage) string { - var sb strings.Builder - for i, stage := range stages { - if stage.Kind == StageExplain { - continue - } - if i == 0 { - fmt.Fprintf(&sb, "Source: %s\n", stage.Source) - } else { - desc := describeStage(stage) - fmt.Fprintf(&sb, " → %s\n", desc) - } - } - return sb.String() -} - -func describeStage(stage Stage) string { - switch stage.Kind { - case StageWhere: - return "Filter: where " + stage.Expr - case StageSelect: - return "Project: select " + strings.Join(stage.Fields, ", ") - case StageSort: - dir := "ascending" - if stage.SortDesc { - dir = "descending" - } - return "Sort: " + stage.SortField + " " + dir - case StageTake: - return "Limit: take " + strconv.Itoa(stage.Limit) - case StageUnique: - return "Unique: deduplicate rows" - case StageGroupBy: - return "Group: group-by " + strings.Join(stage.Fields, ", ") - case StageCount: - return "Count: count rows" - case StageRefsOut: - return "Traverse: outgoing references" - case StageRefsIn: - return "Traverse: incoming references" - case StageReachable: - return "Traverse: all reachable nodes" - case StageAncestors: - return "Traverse: all ancestor nodes" - case StageProperties: - return "Traverse: property children" - case StageUnionMembers: - return "Traverse: union members" - case StageItems: - return "Traverse: array items" - case StageOps: - return "Navigate: schemas to operations" - case StageSchemas: - return "Navigate: operations to schemas" - case StageFields: - return "Terminal: list available fields" - case StageSample: - return "Sample: random " + strconv.Itoa(stage.Limit) + " rows" - case StagePath: - return "Path: shortest path from " + stage.PathFrom + " to " + stage.PathTo - case StageTop: - return "Top: " + strconv.Itoa(stage.Limit) + " by " + stage.SortField + " descending" - case StageBottom: - return "Bottom: " + strconv.Itoa(stage.Limit) + " by " + stage.SortField + " ascending" - case StageFormat: - return "Format: " + stage.Format - case StageConnected: - return "Traverse: full connected component (schemas + operations)" - case StageBlastRadius: - return "Traverse: blast radius (ancestors + affected operations)" - case StageNeighbors: - return "Traverse: bidirectional neighbors within " + strconv.Itoa(stage.Limit) + " hops" - case StageOrphans: - return "Filter: schemas with no incoming refs and no operation usage" - case StageLeaves: - return "Filter: schemas with no outgoing refs (leaf nodes)" - case StageCycles: - return "Analyze: strongly connected components (actual cycles)" - case StageClusters: - return "Analyze: weakly connected component clusters" - case StageTagBoundary: - return "Filter: schemas used by operations across multiple tags" - case StageSharedRefs: - return "Analyze: schemas shared by all operations in result" - default: - return "Unknown stage" - } -} - -// --- Fields --- - -func execFields(result *Result) (*Result, error) { - var sb strings.Builder - kind := SchemaResult - if len(result.Rows) > 0 { - kind = result.Rows[0].Kind - } - - if kind == SchemaResult { - sb.WriteString("Field Type\n") - sb.WriteString("----------- ------\n") - fields := []struct{ name, typ string }{ - {"name", "string"}, - {"type", "string"}, - {"depth", "int"}, - {"in_degree", "int"}, - {"out_degree", "int"}, - {"union_width", "int"}, - {"property_count", "int"}, - {"is_component", "bool"}, - {"is_inline", "bool"}, - {"is_circular", "bool"}, - {"has_ref", "bool"}, - {"hash", "string"}, - {"path", "string"}, - {"op_count", "int"}, - {"tag_count", "int"}, - {"edge_kind", "string"}, - {"edge_label", "string"}, - {"edge_from", "string"}, - } - for _, f := range fields { - fmt.Fprintf(&sb, "%-17s %s\n", f.name, f.typ) - } - } else { - sb.WriteString("Field Type\n") - sb.WriteString("----------- ------\n") - fields := []struct{ name, typ string }{ - {"name", "string"}, - {"method", "string"}, - {"path", "string"}, - {"operation_id", "string"}, - {"schema_count", "int"}, - {"component_count", "int"}, - {"tag", "string"}, - {"parameter_count", "int"}, - {"deprecated", "bool"}, - {"description", "string"}, - {"summary", "string"}, - {"edge_kind", "string"}, - {"edge_label", "string"}, - {"edge_from", "string"}, - } - for _, f := range fields { - fmt.Fprintf(&sb, "%-17s %s\n", f.name, f.typ) - } - } - - return &Result{Explain: sb.String()}, nil -} - -// --- Sample --- - -func execSample(stage Stage, result *Result) (*Result, error) { - if stage.Limit >= len(result.Rows) { - return result, nil - } - - // Deterministic shuffle: sort by hash of row key, then take first n - type keyed struct { - hash string - row Row - } - items := make([]keyed, len(result.Rows)) - for i, row := range result.Rows { - h := sha256.Sum256([]byte(rowKey(row))) - items[i] = keyed{hash: hex.EncodeToString(h[:]), row: row} - } - sort.SliceStable(items, func(i, j int) bool { - return items[i].hash < items[j].hash - }) - - out := &Result{Fields: result.Fields} - for i := 0; i < stage.Limit && i < len(items); i++ { - out.Rows = append(out.Rows, items[i].row) - } - return out, nil -} - -// --- Path --- - -func execPath(stage Stage, g *graph.SchemaGraph) (*Result, error) { - fromNode, ok := g.SchemaByName(stage.PathFrom) - if !ok { - return nil, fmt.Errorf("schema %q not found", stage.PathFrom) - } - toNode, ok := g.SchemaByName(stage.PathTo) - if !ok { - return nil, fmt.Errorf("schema %q not found", stage.PathTo) - } - - path := g.ShortestPath(fromNode.ID, toNode.ID) - out := &Result{} - for _, id := range path { - out.Rows = append(out.Rows, Row{Kind: SchemaResult, SchemaIdx: int(id)}) - } - return out, nil -} - -// --- Arg parsing helpers --- - -func parseTwoArgs(s string) (string, string) { - s = strings.TrimSpace(s) - var args []string - for len(s) > 0 { - if s[0] == '"' { - // Quoted arg - end := strings.Index(s[1:], "\"") - if end < 0 { - args = append(args, s[1:]) - break - } - args = append(args, s[1:end+1]) - s = strings.TrimSpace(s[end+2:]) - } else { - idx := strings.IndexAny(s, " \t") - if idx < 0 { - args = append(args, s) - break - } - args = append(args, s[:idx]) - s = strings.TrimSpace(s[idx+1:]) - } - if len(args) == 2 { - break - } - } - if len(args) < 2 { - if len(args) == 1 { - return args[0], "" - } - return "", "" - } - return args[0], args[1] -} - -// --- Formatting --- - -// FormatTable formats a result as a simple table string. -func FormatTable(result *Result, g *graph.SchemaGraph) string { - if result.Explain != "" { - return result.Explain - } - - if result.IsCount { - return strconv.Itoa(result.Count) - } - - if len(result.Groups) > 0 { - return formatGroups(result) - } - - if len(result.Rows) == 0 { - return "(empty)" - } - - fields := result.Fields - if len(fields) == 0 { - if result.Rows[0].Kind == SchemaResult { - fields = []string{"name", "type", "depth", "in_degree", "out_degree"} - } else { - fields = []string{"name", "method", "path", "schema_count"} - } - } - - // Build header - widths := make([]int, len(fields)) - for i, f := range fields { - widths[i] = len(f) - } - - // Collect rows - var tableRows [][]string - for _, row := range result.Rows { - var cols []string - for i, f := range fields { - v := valueToString(fieldValue(row, f, g)) - cols = append(cols, v) - if len(v) > widths[i] { - widths[i] = len(v) - } - } - tableRows = append(tableRows, cols) - } - - // Format - var sb strings.Builder - // Header - for i, f := range fields { - if i > 0 { - sb.WriteString(" ") - } - sb.WriteString(padRight(f, widths[i])) - } - sb.WriteString("\n") - // Separator - for i, w := range widths { - if i > 0 { - sb.WriteString(" ") - } - sb.WriteString(strings.Repeat("-", w)) - } - sb.WriteString("\n") - // Data - for _, row := range tableRows { - for i, col := range row { - if i > 0 { - sb.WriteString(" ") - } - sb.WriteString(padRight(col, widths[i])) - } - sb.WriteString("\n") - } - - return sb.String() -} - -// FormatJSON formats a result as JSON. -func FormatJSON(result *Result, g *graph.SchemaGraph) string { - if result.Explain != "" { - return result.Explain - } - - if result.IsCount { - return strconv.Itoa(result.Count) - } - - if len(result.Groups) > 0 { - return formatGroupsJSON(result) - } - - if len(result.Rows) == 0 { - return "[]" - } - - fields := result.Fields - if len(fields) == 0 { - if result.Rows[0].Kind == SchemaResult { - fields = []string{"name", "type", "depth", "in_degree", "out_degree"} - } else { - fields = []string{"name", "method", "path", "schema_count"} - } - } - - var sb strings.Builder - sb.WriteString("[\n") - for i, row := range result.Rows { - if i > 0 { - sb.WriteString(",\n") - } - sb.WriteString(" {") - for j, f := range fields { - if j > 0 { - sb.WriteString(", ") - } - v := fieldValue(row, f, g) - fmt.Fprintf(&sb, "%q: %s", f, jsonValue(v)) - } - sb.WriteString("}") - } - sb.WriteString("\n]") - return sb.String() -} - -// FormatMarkdown formats a result as a markdown table. -func FormatMarkdown(result *Result, g *graph.SchemaGraph) string { - if result.Explain != "" { - return result.Explain - } - - if result.IsCount { - return strconv.Itoa(result.Count) - } - - if len(result.Groups) > 0 { - var sb strings.Builder - sb.WriteString("| Key | Count |\n") - sb.WriteString("| --- | --- |\n") - for _, grp := range result.Groups { - fmt.Fprintf(&sb, "| %s | %d |\n", grp.Key, grp.Count) - } - return sb.String() - } - - if len(result.Rows) == 0 { - return "(empty)" - } - - fields := result.Fields - if len(fields) == 0 { - if result.Rows[0].Kind == SchemaResult { - fields = []string{"name", "type", "depth", "in_degree", "out_degree"} - } else { - fields = []string{"name", "method", "path", "schema_count"} - } - } - - var sb strings.Builder - // Header - sb.WriteString("| ") - sb.WriteString(strings.Join(fields, " | ")) - sb.WriteString(" |\n") - // Separator - sb.WriteString("|") - for range fields { - sb.WriteString(" --- |") - } - sb.WriteString("\n") - // Rows - for _, row := range result.Rows { - sb.WriteString("| ") - for i, f := range fields { - if i > 0 { - sb.WriteString(" | ") - } - v := valueToString(fieldValue(row, f, g)) - sb.WriteString(v) - } - sb.WriteString(" |\n") - } - - return sb.String() -} - -// FormatToon formats a result in the TOON (Token-Oriented Object Notation) format. -// TOON uses tabular array syntax for uniform rows: header[N]{field1,field2,...}: -// followed by comma-delimited data rows. See https://github.com/toon-format/toon -func FormatToon(result *Result, g *graph.SchemaGraph) string { - if result.Explain != "" { - return result.Explain - } - - if result.IsCount { - return "count: " + strconv.Itoa(result.Count) + "\n" - } - - if len(result.Groups) > 0 { - return formatGroupsToon(result) - } - - if len(result.Rows) == 0 { - return "results[0]:\n" - } - - fields := result.Fields - if len(fields) == 0 { - if result.Rows[0].Kind == SchemaResult { - fields = []string{"name", "type", "depth", "in_degree", "out_degree"} - } else { - fields = []string{"name", "method", "path", "schema_count"} - } - } - - var sb strings.Builder - - // Header: results[N]{field1,field2,...}: - fmt.Fprintf(&sb, "results[%d]{%s}:\n", len(result.Rows), strings.Join(fields, ",")) - - // Data rows: comma-separated values, indented by one space - for _, row := range result.Rows { - sb.WriteByte(' ') - for i, f := range fields { - if i > 0 { - sb.WriteByte(',') - } - v := fieldValue(row, f, g) - sb.WriteString(toonValue(v)) - } - sb.WriteByte('\n') - } - - return sb.String() -} - -func formatGroupsToon(result *Result) string { - var sb strings.Builder - - // Groups as tabular array - fmt.Fprintf(&sb, "groups[%d]{key,count,names}:\n", len(result.Groups)) - for _, grp := range result.Groups { - names := strings.Join(grp.Names, ";") - fmt.Fprintf(&sb, " %s,%d,%s\n", toonEscape(grp.Key), grp.Count, toonEscape(names)) - } - return sb.String() -} - -// toonValue encodes an expr.Value for TOON format. -func toonValue(v expr.Value) string { - switch v.Kind { - case expr.KindString: - return toonEscape(v.Str) - case expr.KindInt: - return strconv.Itoa(v.Int) - case expr.KindBool: - return strconv.FormatBool(v.Bool) - default: - return "null" - } -} - -// toonEscape quotes a string if it needs escaping for TOON format. -// A string must be quoted if it: is empty, contains comma/colon/quote/backslash/ -// brackets/braces/control chars, has leading/trailing whitespace, or matches -// true/false/null or a numeric pattern. -func toonEscape(s string) string { - if s == "" { - return `""` - } - if s == "true" || s == "false" || s == "null" { - return `"` + s + `"` - } - // Check if it looks numeric - if _, err := strconv.ParseFloat(s, 64); err == nil { - return `"` + s + `"` - } - needsQuote := false - for _, ch := range s { - if ch == ',' || ch == ':' || ch == '"' || ch == '\\' || - ch == '[' || ch == ']' || ch == '{' || ch == '}' || - ch == '\n' || ch == '\r' || ch == '\t' || - ch < 0x20 { - needsQuote = true - break - } - } - if s[0] == ' ' || s[len(s)-1] == ' ' { - needsQuote = true - } - if !needsQuote { - return s - } - // Quote with escaping - var sb strings.Builder - sb.WriteByte('"') - for _, ch := range s { - switch ch { - case '\\': - sb.WriteString(`\\`) - case '"': - sb.WriteString(`\"`) - case '\n': - sb.WriteString(`\n`) - case '\r': - sb.WriteString(`\r`) - case '\t': - sb.WriteString(`\t`) - default: - sb.WriteRune(ch) - } - } - sb.WriteByte('"') - return sb.String() -} - -func jsonValue(v expr.Value) string { - switch v.Kind { - case expr.KindString: - return fmt.Sprintf("%q", v.Str) - case expr.KindInt: - return strconv.Itoa(v.Int) - case expr.KindBool: - return strconv.FormatBool(v.Bool) - default: - return "null" - } -} - -func formatGroups(result *Result) string { - var sb strings.Builder - for _, g := range result.Groups { - fmt.Fprintf(&sb, "%s: count=%d", g.Key, g.Count) - if len(g.Names) > 0 { - names := slices.Clone(g.Names) - if len(names) > 5 { - names = names[:5] - names = append(names, "...") - } - fmt.Fprintf(&sb, " names=[%s]", strings.Join(names, ", ")) - } - sb.WriteString("\n") - } - return sb.String() -} - -func formatGroupsJSON(result *Result) string { - var sb strings.Builder - sb.WriteString("[\n") - for i, g := range result.Groups { - if i > 0 { - sb.WriteString(",\n") - } - fmt.Fprintf(&sb, ` {"key": %q, "count": %d, "names": [`, g.Key, g.Count) - for j, n := range g.Names { - if j > 0 { - sb.WriteString(", ") - } - fmt.Fprintf(&sb, "%q", n) - } - sb.WriteString("]}") - } - sb.WriteString("\n]") - return sb.String() -} - -func padRight(s string, width int) string { - if len(s) >= width { - return s - } - return s + strings.Repeat(" ", width-len(s)) -} - -// --- Pipeline splitting --- - -func splitPipeline(input string) []string { - var parts []string - var current strings.Builder - inQuote := false - - for i := 0; i < len(input); i++ { - ch := input[i] - switch { - case ch == '"': - inQuote = !inQuote - current.WriteByte(ch) - case ch == '|' && !inQuote: - parts = append(parts, current.String()) - current.Reset() - default: - current.WriteByte(ch) - } - } - if current.Len() > 0 { - parts = append(parts, current.String()) - } - return parts -} - -func splitFirst(s string) (string, string) { - s = strings.TrimSpace(s) - idx := strings.IndexAny(s, " \t") - if idx < 0 { - return s, "" - } - return s[:idx], strings.TrimSpace(s[idx+1:]) -} - -func parseCSV(s string) []string { - parts := strings.Split(s, ",") - result := make([]string, 0, len(parts)) - for _, p := range parts { - p = strings.TrimSpace(p) - if p != "" { - result = append(result, p) - } - } - return result -} diff --git a/oq/parse.go b/oq/parse.go new file mode 100644 index 0000000..a0c8835 --- /dev/null +++ b/oq/parse.go @@ -0,0 +1,284 @@ +package oq + +import ( + "errors" + "fmt" + "strconv" + "strings" +) + +// Parse splits a pipeline query string into stages. +func Parse(query string) ([]Stage, error) { + // Split by pipe, respecting quoted strings + parts := splitPipeline(query) + if len(parts) == 0 { + return nil, errors.New("empty query") + } + + var stages []Stage + + for i, part := range parts { + part = strings.TrimSpace(part) + if part == "" { + continue + } + + if i == 0 { + // First part is a source + stages = append(stages, Stage{Kind: StageSource, Source: part}) + continue + } + + stage, err := parseStage(part) + if err != nil { + return nil, err + } + stages = append(stages, stage) + } + + return stages, nil +} + +func parseStage(s string) (Stage, error) { + // Extract the keyword + keyword, rest := splitFirst(s) + keyword = strings.ToLower(keyword) + + switch keyword { + case "where": + if rest == "" { + return Stage{}, errors.New("where requires an expression") + } + return Stage{Kind: StageWhere, Expr: rest}, nil + + case "select": + if rest == "" { + return Stage{}, errors.New("select requires field names") + } + fields := parseCSV(rest) + return Stage{Kind: StageSelect, Fields: fields}, nil + + case "sort": + parts := strings.Fields(rest) + if len(parts) == 0 { + return Stage{}, errors.New("sort requires a field name") + } + desc := false + if len(parts) >= 2 && strings.ToLower(parts[1]) == "desc" { + desc = true + } + return Stage{Kind: StageSort, SortField: parts[0], SortDesc: desc}, nil + + case "take", "head": + n, err := strconv.Atoi(strings.TrimSpace(rest)) + if err != nil { + return Stage{}, fmt.Errorf("take requires a number: %w", err) + } + return Stage{Kind: StageTake, Limit: n}, nil + + case "unique": + return Stage{Kind: StageUnique}, nil + + case "group-by": + if rest == "" { + return Stage{}, errors.New("group-by requires a field name") + } + fields := parseCSV(rest) + return Stage{Kind: StageGroupBy, Fields: fields}, nil + + case "count": + return Stage{Kind: StageCount}, nil + + case "refs-out": + return Stage{Kind: StageRefsOut}, nil + + case "refs-in": + return Stage{Kind: StageRefsIn}, nil + + case "reachable": + return Stage{Kind: StageReachable}, nil + + case "ancestors": + return Stage{Kind: StageAncestors}, nil + + case "properties": + return Stage{Kind: StageProperties}, nil + + case "union-members": + return Stage{Kind: StageUnionMembers}, nil + + case "items": + return Stage{Kind: StageItems}, nil + + case "ops": + return Stage{Kind: StageOps}, nil + + case "schemas": + return Stage{Kind: StageSchemas}, nil + + case "explain": + return Stage{Kind: StageExplain}, nil + + case "fields": + return Stage{Kind: StageFields}, nil + + case "sample": + n, err := strconv.Atoi(strings.TrimSpace(rest)) + if err != nil { + return Stage{}, fmt.Errorf("sample requires a number: %w", err) + } + return Stage{Kind: StageSample, Limit: n}, nil + + case "path": + from, to := parseTwoArgs(rest) + if from == "" || to == "" { + return Stage{}, errors.New("path requires two schema names") + } + return Stage{Kind: StagePath, PathFrom: from, PathTo: to}, nil + + case "top": + parts := strings.Fields(rest) + if len(parts) < 2 { + return Stage{}, errors.New("top requires a number and a field name") + } + n, err := strconv.Atoi(parts[0]) + if err != nil { + return Stage{}, fmt.Errorf("top requires a number: %w", err) + } + return Stage{Kind: StageTop, Limit: n, SortField: parts[1]}, nil + + case "bottom": + parts := strings.Fields(rest) + if len(parts) < 2 { + return Stage{}, errors.New("bottom requires a number and a field name") + } + n, err := strconv.Atoi(parts[0]) + if err != nil { + return Stage{}, fmt.Errorf("bottom requires a number: %w", err) + } + return Stage{Kind: StageBottom, Limit: n, SortField: parts[1]}, nil + + case "format": + f := strings.TrimSpace(rest) + if f != "table" && f != "json" && f != "markdown" && f != "toon" { + return Stage{}, fmt.Errorf("format must be table, json, markdown, or toon, got %q", f) + } + return Stage{Kind: StageFormat, Format: f}, nil + + case "connected": + return Stage{Kind: StageConnected}, nil + + case "blast-radius": + return Stage{Kind: StageBlastRadius}, nil + + case "neighbors": + n, err := strconv.Atoi(strings.TrimSpace(rest)) + if err != nil { + return Stage{}, fmt.Errorf("neighbors requires a depth number: %w", err) + } + return Stage{Kind: StageNeighbors, Limit: n}, nil + + case "orphans": + return Stage{Kind: StageOrphans}, nil + + case "leaves": + return Stage{Kind: StageLeaves}, nil + + case "cycles": + return Stage{Kind: StageCycles}, nil + + case "clusters": + return Stage{Kind: StageClusters}, nil + + case "tag-boundary": + return Stage{Kind: StageTagBoundary}, nil + + case "shared-refs": + return Stage{Kind: StageSharedRefs}, nil + + default: + return Stage{}, fmt.Errorf("unknown stage: %q", keyword) + } +} + +func parseTwoArgs(s string) (string, string) { + s = strings.TrimSpace(s) + var args []string + for len(s) > 0 { + if s[0] == '"' { + // Quoted arg + end := strings.Index(s[1:], "\"") + if end < 0 { + args = append(args, s[1:]) + break + } + args = append(args, s[1:end+1]) + s = strings.TrimSpace(s[end+2:]) + } else { + idx := strings.IndexAny(s, " \t") + if idx < 0 { + args = append(args, s) + break + } + args = append(args, s[:idx]) + s = strings.TrimSpace(s[idx+1:]) + } + if len(args) == 2 { + break + } + } + if len(args) < 2 { + if len(args) == 1 { + return args[0], "" + } + return "", "" + } + return args[0], args[1] +} + +// --- Pipeline splitting --- + +func splitPipeline(input string) []string { + var parts []string + var current strings.Builder + inQuote := false + + for i := 0; i < len(input); i++ { + ch := input[i] + switch { + case ch == '"': + inQuote = !inQuote + current.WriteByte(ch) + case ch == '|' && !inQuote: + parts = append(parts, current.String()) + current.Reset() + default: + current.WriteByte(ch) + } + } + if current.Len() > 0 { + parts = append(parts, current.String()) + } + return parts +} + +func splitFirst(s string) (string, string) { + s = strings.TrimSpace(s) + idx := strings.IndexAny(s, " \t") + if idx < 0 { + return s, "" + } + return s[:idx], strings.TrimSpace(s[idx+1:]) +} + +func parseCSV(s string) []string { + parts := strings.Split(s, ",") + result := make([]string, 0, len(parts)) + for _, p := range parts { + p = strings.TrimSpace(p) + if p != "" { + result = append(result, p) + } + } + return result +} From 395c19cd8edd702149221630155f08863f2d180a Mon Sep 17 00:00:00 2001 From: Vishal Gowda Date: Thu, 12 Mar 2026 18:33:35 +0000 Subject: [PATCH 17/27] fix: re-trigger CI for mod-check From de1339add833e1f894a78dcc0849fa150dff3133 Mon Sep 17 00:00:00 2001 From: Vishal Gowda Date: Fri, 13 Mar 2026 11:56:51 +0000 Subject: [PATCH 18/27] fix: remove replace directive, increase test coverage, and document multi-module workflow --- AGENTS.md | 24 ++ cmd/openapi/go.mod | 4 +- cmd/openapi/go.sum | 2 + graph/graph_test.go | 105 ++++++++ oq/expr/expr_test.go | 194 +++++++++++++++ oq/oq_test.go | 537 ++++++++++++++++++++++++++++++++++++++++ oq/testdata/cyclic.yaml | 89 +++++++ 7 files changed, 952 insertions(+), 3 deletions(-) create mode 100644 oq/testdata/cyclic.yaml diff --git a/AGENTS.md b/AGENTS.md index 085cee3..a200d91 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -114,6 +114,30 @@ git commit -m "feat: implement prefixEncoding and itemEncoding for OpenAPI 3.2 3. **Searchability**: Easier to search and filter commits 4. **Tool Compatibility**: Works better with automated tools and scripts +## Multi-Module Dependency Management + +This repository uses Go workspaces (`go.work`) with multiple modules. The `cmd/openapi` module depends on the root `github.com/speakeasy-api/openapi` module. + +### How Local Development Works + +The `go.work` file lists all modules, so during local development the workspace resolves cross-module imports automatically. You do **not** need a `replace` directive in `cmd/openapi/go.mod`. + +### When Adding New Packages to the Root Module + +If you add new packages to the root module (e.g., `oq/`, `graph/`) that `cmd/openapi` imports, the published module version won't contain them yet. The workspace handles this locally, but `cmd/openapi/go.mod` must reference a version that includes the new packages for CI to pass `mod-check`. + +**Do NOT use `replace` directives.** Instead: + +1. Push your branch with the new root module packages. +2. From the repo root, update `cmd/openapi` to reference your branch commit: + ```bash + GOWORK=off go get -C cmd/openapi github.com/speakeasy-api/openapi@ + GOWORK=off go mod tidy -C cmd/openapi + ``` +3. Verify with `mise run mod-check`. + +This gives `cmd/openapi/go.mod` a pseudo-version (e.g., `v1.19.6-0.20260312183335-395c19cd8edd`) that resolves correctly both locally and in CI. Each subsequent push that changes the root module requires repeating step 2 with the new commit SHA. + ## Linter Rules This project uses `golangci-lint` with strict rules. Run `mise lint` to check. The most common violations are listed below. **When you encounter a new common lint pattern not documented here, add it to this section so future sessions avoid the same mistakes.** diff --git a/cmd/openapi/go.mod b/cmd/openapi/go.mod index 4865210..2b0dc2b 100644 --- a/cmd/openapi/go.mod +++ b/cmd/openapi/go.mod @@ -2,13 +2,11 @@ module github.com/speakeasy-api/openapi/cmd/openapi go 1.24.3 -replace github.com/speakeasy-api/openapi => ../../ - require ( github.com/charmbracelet/bubbles v0.21.0 github.com/charmbracelet/bubbletea v1.3.10 github.com/charmbracelet/lipgloss v1.1.0 - github.com/speakeasy-api/openapi v1.19.5-0.20260309010446-7ff6a9590a7f + github.com/speakeasy-api/openapi v1.19.6-0.20260312183335-395c19cd8edd github.com/speakeasy-api/openapi/openapi/linter/customrules v0.0.0-20260309010446-7ff6a9590a7f github.com/spf13/cobra v1.10.1 github.com/stretchr/testify v1.11.1 diff --git a/cmd/openapi/go.sum b/cmd/openapi/go.sum index 31f3ed1..4082af8 100644 --- a/cmd/openapi/go.sum +++ b/cmd/openapi/go.sum @@ -84,6 +84,8 @@ github.com/sergi/go-diff v1.1.0 h1:we8PVUC3FE2uYfodKH/nBHMSetSfHDR6scGdBi+erh0= github.com/sergi/go-diff v1.1.0/go.mod h1:STckp+ISIX8hZLjrqAeVduY0gWCT9IjLuqbuNXdaHfM= github.com/speakeasy-api/jsonpath v0.6.3 h1:c+QPwzAOdrWvzycuc9HFsIZcxKIaWcNpC+xhOW9rJxU= github.com/speakeasy-api/jsonpath v0.6.3/go.mod h1:2cXloNuQ+RSXi5HTRaeBh7JEmjRXTiaKpFTdZiL7URI= +github.com/speakeasy-api/openapi v1.19.6-0.20260312183335-395c19cd8edd h1:tsvAmrswd6tB0jeBE5DlIr4fB8WcMeWfCn4HUB6Vg44= +github.com/speakeasy-api/openapi v1.19.6-0.20260312183335-395c19cd8edd/go.mod h1:UfKa7FqE4jgexJZuj51MmdHAFGmDv0Zaw3+yOd81YKU= github.com/speakeasy-api/openapi/openapi/linter/customrules v0.0.0-20260309010446-7ff6a9590a7f h1:kwiHeGSILCUVEM9iSAUtifl1TLlvyDXmMjyW26/iX2k= github.com/speakeasy-api/openapi/openapi/linter/customrules v0.0.0-20260309010446-7ff6a9590a7f/go.mod h1:ALDg9E6LRTL5tMFlddVrLhc4JaarCHL65x2YkwL7xdg= github.com/spf13/cobra v1.10.1 h1:lJeBwCfmrnXthfAupyUTzJ/J4Nc1RsHC/mSRU2dll/s= diff --git a/graph/graph_test.go b/graph/graph_test.go index 7a09010..8833685 100644 --- a/graph/graph_test.go +++ b/graph/graph_test.go @@ -197,3 +197,108 @@ func TestBuild_Metrics_Success(t *testing.T) { unused, _ := g.SchemaByName("Unused") assert.Equal(t, 0, unused.InDegree, "Unused should have no incoming edges from other schemas") } + +func TestBuild_InEdges_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + // Owner is referenced by Pet via the "owner" property (possibly through a $ref node) + owner, _ := g.SchemaByName("Owner") + inEdges := g.InEdges(owner.ID) + assert.NotEmpty(t, inEdges, "Owner should have incoming edges") + + // Verify the InEdges returns edges with correct To field + for _, e := range inEdges { + assert.Equal(t, owner.ID, e.To, "InEdge To should match the queried node") + } +} + +func TestBuild_SchemaOperations_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + pet, _ := g.SchemaByName("Pet") + ops := g.SchemaOperations(pet.ID) + assert.NotEmpty(t, ops, "Pet should be referenced by operations") +} + +func TestBuild_SchemaOpCount_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + pet, _ := g.SchemaByName("Pet") + count := g.SchemaOpCount(pet.ID) + assert.Positive(t, count, "Pet should have operations referencing it") + + unused, _ := g.SchemaByName("Unused") + count = g.SchemaOpCount(unused.ID) + assert.Equal(t, 0, count, "Unused should have no operations") +} + +func TestBuild_Neighbors_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + pet, _ := g.SchemaByName("Pet") + + // Depth 1: direct out-edges and in-edges + n1 := g.Neighbors(pet.ID, 1) + assert.NotEmpty(t, n1, "Pet should have depth-1 neighbors") + + // Depth 0: should return nothing (no hops) + n0 := g.Neighbors(pet.ID, 0) + assert.Empty(t, n0, "depth-0 neighbors should be empty") + + // Depth 2: should be >= depth 1 + n2 := g.Neighbors(pet.ID, 2) + assert.GreaterOrEqual(t, len(n2), len(n1), "depth-2 should include at least depth-1 neighbors") +} + +func TestBuild_StronglyConnectedComponents_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + sccs := g.StronglyConnectedComponents() + // Petstore shouldn't have cycles, so SCCs should be empty (no multi-node components) + assert.Empty(t, sccs, "petstore should have no strongly connected components") +} + +func TestBuild_ConnectedComponent_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + pet, _ := g.SchemaByName("Pet") + schemas, ops := g.ConnectedComponent([]graph.NodeID{pet.ID}, nil) + assert.NotEmpty(t, schemas, "connected component from Pet should include schemas") + assert.NotEmpty(t, ops, "connected component from Pet should include operations") + + // Should include Pet itself + hasPet := false + for _, id := range schemas { + if id == pet.ID { + hasPet = true + } + } + assert.True(t, hasPet, "connected component should include the seed") +} + +func TestBuild_ConnectedComponent_FromOp_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + // Start from first operation + require.NotEmpty(t, g.Operations) + schemas, ops := g.ConnectedComponent(nil, []graph.NodeID{g.Operations[0].ID}) + assert.NotEmpty(t, schemas, "connected component from operation should include schemas") + assert.NotEmpty(t, ops, "connected component from operation should include the seed operation") +} + +func TestBuild_ShortestPath_SameNode_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + pet, _ := g.SchemaByName("Pet") + path := g.ShortestPath(pet.ID, pet.ID) + assert.Len(t, path, 1, "path from node to itself should be length 1") + assert.Equal(t, pet.ID, path[0]) +} diff --git a/oq/expr/expr_test.go b/oq/expr/expr_test.go index ddc41ca..98c1cab 100644 --- a/oq/expr/expr_test.go +++ b/oq/expr/expr_test.go @@ -164,3 +164,197 @@ func TestParse_UnterminatedFunction(t *testing.T) { require.Error(t, err) }) } + +func TestEval_Operators_Coverage(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + exprStr string + row testRow + expected bool + }{ + { + name: "greater or equal true", + exprStr: `depth >= 5`, + row: testRow{"depth": expr.IntVal(5)}, + expected: true, + }, + { + name: "less or equal true", + exprStr: `depth <= 5`, + row: testRow{"depth": expr.IntVal(3)}, + expected: true, + }, + { + name: "less than true", + exprStr: `depth < 10`, + row: testRow{"depth": expr.IntVal(3)}, + expected: true, + }, + { + name: "and short-circuit false", + exprStr: `depth > 100 and is_component`, + row: testRow{"depth": expr.IntVal(1), "is_component": expr.BoolVal(true)}, + expected: false, + }, + { + name: "or short-circuit true", + exprStr: `is_component or depth > 100`, + row: testRow{"depth": expr.IntVal(1), "is_component": expr.BoolVal(true)}, + expected: true, + }, + { + name: "not true value", + exprStr: `not is_component`, + row: testRow{"is_component": expr.BoolVal(true)}, + expected: false, + }, + { + name: "has null field", + exprStr: `has(missing)`, + row: testRow{}, + expected: false, + }, + { + name: "has empty string", + exprStr: `has(name)`, + row: testRow{"name": expr.StringVal("")}, + expected: false, + }, + { + name: "has non-empty string", + exprStr: `has(name)`, + row: testRow{"name": expr.StringVal("Pet")}, + expected: true, + }, + { + name: "has false bool", + exprStr: `has(flag)`, + row: testRow{"flag": expr.BoolVal(false)}, + expected: false, + }, + { + name: "matches non-string field", + exprStr: `name matches ".*"`, + row: testRow{"name": expr.IntVal(42)}, + expected: false, + }, + { + name: "integer equality both sides", + exprStr: `depth == 0`, + row: testRow{"depth": expr.IntVal(0)}, + expected: true, + }, + { + name: "boolean equality", + exprStr: `is_component == is_inline`, + row: testRow{"is_component": expr.BoolVal(true), "is_inline": expr.BoolVal(true)}, + expected: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + parsed, err := expr.Parse(tt.exprStr) + require.NoError(t, err) + result := parsed.Eval(tt.row) + assert.Equal(t, tt.expected, result.Bool) + }) + } +} + +func TestEval_TypeConversion_Coverage(t *testing.T) { + t.Parallel() + + // Test toBool with int + e, err := expr.Parse(`depth`) + require.NoError(t, err) + row := testRow{"depth": expr.IntVal(5)} + result := e.Eval(row) + assert.Equal(t, expr.KindInt, result.Kind) + + // Test toBool with string (non-empty = truthy in boolean context) + e, err = expr.Parse(`name and depth > 0`) + require.NoError(t, err) + row = testRow{"name": expr.StringVal("Pet"), "depth": expr.IntVal(1)} + result = e.Eval(row) + assert.True(t, result.Bool) + + // Test toBool with empty string (falsy) + e, err = expr.Parse(`name and depth > 0`) + require.NoError(t, err) + row = testRow{"name": expr.StringVal(""), "depth": expr.IntVal(1)} + result = e.Eval(row) + assert.False(t, result.Bool) + + // Test comparison with string-to-int coercion + e, err = expr.Parse(`depth > 0`) + require.NoError(t, err) + row = testRow{"depth": expr.BoolVal(true)} // bool true -> 1 in comparison + result = e.Eval(row) + assert.True(t, result.Bool) + + // Test string equality with int (cross-type via toString) + e, err = expr.Parse(`name == "5"`) + require.NoError(t, err) + row = testRow{"name": expr.IntVal(5)} + result = e.Eval(row) + assert.True(t, result.Bool) +} + +func TestParse_NullVal(t *testing.T) { + t.Parallel() + + v := expr.NullVal() + assert.Equal(t, expr.KindNull, v.Kind) +} + +func TestParse_LiteralValues(t *testing.T) { + t.Parallel() + + // true literal + e, err := expr.Parse(`true`) + require.NoError(t, err) + result := e.Eval(testRow{}) + assert.Equal(t, expr.KindBool, result.Kind) + assert.True(t, result.Bool) + + // false literal + e, err = expr.Parse(`false`) + require.NoError(t, err) + result = e.Eval(testRow{}) + assert.Equal(t, expr.KindBool, result.Kind) + assert.False(t, result.Bool) + + // numeric literal + e, err = expr.Parse(`depth > 0`) + require.NoError(t, err) + result = e.Eval(testRow{"depth": expr.IntVal(5)}) + assert.True(t, result.Bool) +} + +func TestParse_ComplexPrecedence(t *testing.T) { + t.Parallel() + + // a and b or c and d — "and" binds tighter, so this is (a and b) or (c and d) + e, err := expr.Parse(`depth > 0 and is_component or depth < 0 and is_inline`) + require.NoError(t, err) + + // Both "and" groups are false -> false + result := e.Eval(testRow{ + "depth": expr.IntVal(0), + "is_component": expr.BoolVal(true), + "is_inline": expr.BoolVal(true), + }) + assert.False(t, result.Bool) + + // First "and" group is true -> true + result = e.Eval(testRow{ + "depth": expr.IntVal(5), + "is_component": expr.BoolVal(true), + "is_inline": expr.BoolVal(false), + }) + assert.True(t, result.Bool) +} diff --git a/oq/oq_test.go b/oq/oq_test.go index 419d247..8bac5f2 100644 --- a/oq/oq_test.go +++ b/oq/oq_test.go @@ -8,6 +8,7 @@ import ( "github.com/speakeasy-api/openapi/graph" "github.com/speakeasy-api/openapi/openapi" "github.com/speakeasy-api/openapi/oq" + "github.com/speakeasy-api/openapi/oq/expr" "github.com/speakeasy-api/openapi/references" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" @@ -890,6 +891,542 @@ func TestFormatMarkdown_Count_Success(t *testing.T) { assert.NotEmpty(t, md) } +func TestExecute_Explain_AllStages_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + // Cover more stage descriptions in explain + tests := []struct { + name string + query string + expects []string + }{ + { + "explain with unique and count", + "schemas.components | unique | count | explain", + []string{"Unique:", "Count:"}, + }, + { + "explain with group-by", + "schemas.components | group-by type | explain", + []string{"Group: group-by"}, + }, + { + "explain with traversals", + "schemas.components | where name == \"Pet\" | refs-out | explain", + []string{"Traverse: outgoing references"}, + }, + { + "explain with refs-in", + "schemas.components | where name == \"Owner\" | refs-in | explain", + []string{"Traverse: incoming references"}, + }, + { + "explain with reachable", + "schemas.components | where name == \"Pet\" | reachable | explain", + []string{"Traverse: all reachable"}, + }, + { + "explain with ancestors", + "schemas.components | where name == \"Address\" | ancestors | explain", + []string{"Traverse: all ancestor"}, + }, + { + "explain with properties", + "schemas.components | where name == \"Pet\" | properties | explain", + []string{"Traverse: property children"}, + }, + { + "explain with union-members", + "schemas.components | where name == \"Shape\" | union-members | explain", + []string{"Traverse: union members"}, + }, + { + "explain with items", + "schemas | where type == \"array\" | items | explain", + []string{"Traverse: array items"}, + }, + { + "explain with ops", + "schemas.components | where name == \"Pet\" | ops | explain", + []string{"Navigate: schemas to operations"}, + }, + { + "explain with schemas from ops", + "operations | schemas | explain", + []string{"Navigate: operations to schemas"}, + }, + { + "explain with sample", + "schemas.components | sample 3 | explain", + []string{"Sample: random 3"}, + }, + { + "explain with path", + "schemas | path Pet Address | explain", + []string{"Path: shortest path from Pet to Address"}, + }, + { + "explain with top", + "schemas.components | top 3 depth | explain", + []string{"Top: 3 by depth"}, + }, + { + "explain with bottom", + "schemas.components | bottom 3 depth | explain", + []string{"Bottom: 3 by depth"}, + }, + { + "explain with format", + "schemas.components | format json | explain", + []string{"Format: json"}, + }, + { + "explain with connected", + "schemas.components | where name == \"Pet\" | connected | explain", + []string{"Traverse: full connected"}, + }, + { + "explain with blast-radius", + "schemas.components | where name == \"Pet\" | blast-radius | explain", + []string{"Traverse: blast radius"}, + }, + { + "explain with neighbors", + "schemas.components | where name == \"Pet\" | neighbors 2 | explain", + []string{"Traverse: bidirectional neighbors within 2"}, + }, + { + "explain with orphans", + "schemas.components | orphans | explain", + []string{"Filter: schemas with no incoming"}, + }, + { + "explain with leaves", + "schemas.components | leaves | explain", + []string{"Filter: schemas with no outgoing"}, + }, + { + "explain with cycles", + "schemas | cycles | explain", + []string{"Analyze: strongly connected"}, + }, + { + "explain with clusters", + "schemas.components | clusters | explain", + []string{"Analyze: weakly connected"}, + }, + { + "explain with tag-boundary", + "schemas | tag-boundary | explain", + []string{"Filter: schemas used by operations across multiple"}, + }, + { + "explain with shared-refs", + "operations | shared-refs | explain", + []string{"Analyze: schemas shared"}, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + result, err := oq.Execute(tt.query, g) + require.NoError(t, err) + for _, exp := range tt.expects { + assert.Contains(t, result.Explain, exp) + } + }) + } +} + +func TestExecute_FieldValue_EdgeCases(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + // Test operation fields that require nil checks + result, err := oq.Execute("operations | take 1 | select name, tag, parameter_count, deprecated, description, summary", g) + require.NoError(t, err) + assert.NotEmpty(t, result.Rows) + + // Test edge fields on non-traversal rows (should be empty strings) + result, err = oq.Execute("schemas.components | take 1 | select name, edge_kind, edge_label, edge_from", g) + require.NoError(t, err) + assert.NotEmpty(t, result.Rows) + edgeKind := oq.FieldValuePublic(result.Rows[0], "edge_kind", g) + assert.Equal(t, "", edgeKind.Str) + + // Test tag_count field + result, err = oq.Execute("schemas.components | take 1 | select name, tag_count", g) + require.NoError(t, err) + assert.NotEmpty(t, result.Rows) + + // Test op_count field + result, err = oq.Execute("schemas.components | take 1 | select name, op_count", g) + require.NoError(t, err) + assert.NotEmpty(t, result.Rows) + + // Test unknown field returns null (KindNull == 0) + v := oq.FieldValuePublic(result.Rows[0], "nonexistent_field", g) + assert.Equal(t, expr.KindNull, v.Kind) +} + +func TestExecute_Cycles_NoCycles(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + // Petstore has no cycles, so cycles should return empty groups + result, err := oq.Execute("schemas | cycles", g) + require.NoError(t, err) + assert.Empty(t, result.Groups, "petstore should have no cycles") +} + +func TestExecute_SharedRefs_AllOps(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + // shared-refs with all operations — returns schemas shared by all operations + result, err := oq.Execute("operations | shared-refs | select name", g) + require.NoError(t, err) + assert.NotNil(t, result) +} + +func TestFormatToon_SpecialChars(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + // Test TOON format with bool and int fields to cover toonValue branches + result, err := oq.Execute("schemas.components | take 1 | select name, depth, is_component", g) + require.NoError(t, err) + + toon := oq.FormatToon(result, g) + assert.NotEmpty(t, toon) + assert.Contains(t, toon, "results[1]") +} + +func TestFormatJSON_Operations(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute("operations | take 2 | select name, method, path", g) + require.NoError(t, err) + + json := oq.FormatJSON(result, g) + assert.True(t, strings.HasPrefix(json, "[")) + assert.Contains(t, json, "\"name\"") + assert.Contains(t, json, "\"method\"") +} + +func TestFormatMarkdown_Operations(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute("operations | take 2 | select name, method", g) + require.NoError(t, err) + + md := oq.FormatMarkdown(result, g) + assert.Contains(t, md, "| name") + assert.Contains(t, md, "| method") +} + +func TestParse_Error_MoreCases(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + query string + }{ + {"empty query", ""}, + {"unknown stage", "schemas | bogus_stage"}, + {"take non-integer", "schemas | take abc"}, + {"sample non-integer", "schemas | sample xyz"}, + {"head non-integer", "schemas | head xyz"}, + {"neighbors non-integer", "schemas | neighbors abc"}, + {"top missing field", "schemas | top 5"}, + {"bottom missing field", "schemas | bottom 5"}, + {"path missing args", "schemas | path"}, + {"path one arg", "schemas | path Pet"}, + {"where empty expr", "schemas | where"}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + _, err := oq.Parse(tt.query) + assert.Error(t, err) + }) + } +} + +func TestParse_MoreStages_Success(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + query string + }{ + {"format table", "schemas | format table"}, + {"format toon", "schemas | format toon"}, + {"sort asc explicit", "schemas | sort name asc"}, + {"sort default asc", "schemas | sort name"}, + {"select single field", "schemas | select name"}, + {"select many fields", "schemas | select name, type, depth, in_degree"}, + {"where with string", `schemas | where name == "Pet"`}, + {"where with bool", "schemas | where is_component"}, + {"where with not", "schemas | where not is_inline"}, + {"where with has", "schemas | where has(hash)"}, + {"where with matches", `schemas | where name matches ".*Pet.*"`}, + {"path quoted", `schemas | path "Pet" "Address"`}, + {"shared-refs stage", "operations | take 2 | shared-refs"}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + stages, err := oq.Parse(tt.query) + require.NoError(t, err) + assert.NotEmpty(t, stages) + }) + } +} + +func TestExecute_WhereAndOr_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + // Test compound where expressions + result, err := oq.Execute(`schemas.components | where depth > 0 and is_component`, g) + require.NoError(t, err) + assert.NotNil(t, result) + + result, err = oq.Execute(`schemas.components | where depth > 100 or is_component`, g) + require.NoError(t, err) + assert.NotEmpty(t, result.Rows, "or should match is_component=true schemas") +} + +func TestExecute_SortStringField_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + // Sort by string field + result, err := oq.Execute("schemas.components | sort type asc | select name, type", g) + require.NoError(t, err) + assert.NotEmpty(t, result.Rows) +} + +func TestExecute_GroupBy_Type_Details(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute("schemas.components | group-by type", g) + require.NoError(t, err) + assert.NotEmpty(t, result.Groups) + + // Each group should have Count and Names + for _, grp := range result.Groups { + assert.Positive(t, grp.Count) + assert.Len(t, grp.Names, grp.Count) + } +} + +func TestFormatMarkdown_Groups_Details(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute("schemas.components | group-by type", g) + require.NoError(t, err) + + md := oq.FormatMarkdown(result, g) + assert.Contains(t, md, "| Key |") + assert.Contains(t, md, "| Count |") +} + +func TestFormatJSON_Explain(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute("schemas | explain", g) + require.NoError(t, err) + + // All formats should handle explain + table := oq.FormatTable(result, g) + assert.Contains(t, table, "Source: schemas") + + json := oq.FormatJSON(result, g) + assert.Contains(t, json, "Source: schemas") + + md := oq.FormatMarkdown(result, g) + assert.Contains(t, md, "Source: schemas") + + toon := oq.FormatToon(result, g) + assert.Contains(t, toon, "Source: schemas") +} + +func TestExecute_Leaves_AllZeroOutDegree(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute("schemas.components | leaves | select name, out_degree", g) + require.NoError(t, err) + + // Verify leaves are leaf nodes + for _, row := range result.Rows { + od := oq.FieldValuePublic(row, "out_degree", g) + assert.Equal(t, 0, od.Int, "leaves should have 0 out_degree") + } +} + +func TestExecute_OperationsTraversals(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + // Operations going to schemas and back + result, err := oq.Execute("operations | take 1 | schemas | select name", g) + require.NoError(t, err) + assert.NotEmpty(t, result.Rows) + + // Schema to operations roundtrip + result, err = oq.Execute("schemas.components | where name == \"Pet\" | ops | select name", g) + require.NoError(t, err) + assert.NotEmpty(t, result.Rows) +} + +func loadCyclicGraph(t *testing.T) *graph.SchemaGraph { + t.Helper() + + f, err := os.Open("testdata/cyclic.yaml") + require.NoError(t, err) + defer f.Close() + + ctx := t.Context() + doc, _, err := openapi.Unmarshal(ctx, f, openapi.WithSkipValidation()) + require.NoError(t, err) + require.NotNil(t, doc) + + idx := openapi.BuildIndex(ctx, doc, references.ResolveOptions{ + RootDocument: doc, + TargetDocument: doc, + TargetLocation: "testdata/cyclic.yaml", + }) + + return graph.Build(ctx, idx) +} + +func TestExecute_Cycles_WithCyclicSpec(t *testing.T) { + t.Parallel() + g := loadCyclicGraph(t) + + // NodeA -> NodeB -> NodeA is a cycle + result, err := oq.Execute("schemas | cycles", g) + require.NoError(t, err) + assert.NotEmpty(t, result.Groups, "cyclic spec should have cycles") + + // Format the groups + table := oq.FormatTable(result, g) + assert.Contains(t, table, "cycle-") + + json := oq.FormatJSON(result, g) + assert.Contains(t, json, "cycle-") +} + +func TestExecute_CyclicSpec_EdgeAnnotations(t *testing.T) { + t.Parallel() + g := loadCyclicGraph(t) + + // Test refs-out to cover edgeKindString branches + result, err := oq.Execute(`schemas.components | where name == "NodeA" | refs-out | select name, edge_kind, edge_label`, g) + require.NoError(t, err) + assert.NotEmpty(t, result.Rows) + + // Collect edge kinds + edgeKinds := make(map[string]bool) + for _, row := range result.Rows { + k := oq.FieldValuePublic(row, "edge_kind", g) + edgeKinds[k.Str] = true + } + // NodeA has properties, allOf, anyOf, items etc. + assert.True(t, edgeKinds["property"], "should have property edges") +} + +func TestExecute_CyclicSpec_IsCircular(t *testing.T) { + t.Parallel() + g := loadCyclicGraph(t) + + result, err := oq.Execute("schemas.components | where is_circular | select name", g) + require.NoError(t, err) + names := collectNames(result, g) + assert.Contains(t, names, "NodeA") + assert.Contains(t, names, "NodeB") +} + +func TestExecute_CyclicSpec_DeprecatedOp(t *testing.T) { + t.Parallel() + g := loadCyclicGraph(t) + + // The listNodes operation is deprecated with tags, summary, and description + result, err := oq.Execute("operations | select name, deprecated, summary, description, tag, parameter_count", g) + require.NoError(t, err) + assert.NotEmpty(t, result.Rows) + + dep := oq.FieldValuePublic(result.Rows[0], "deprecated", g) + assert.True(t, dep.Bool, "listNodes should be deprecated") + + summary := oq.FieldValuePublic(result.Rows[0], "summary", g) + assert.Equal(t, "List all nodes", summary.Str) + + desc := oq.FieldValuePublic(result.Rows[0], "description", g) + assert.NotEmpty(t, desc.Str) + + tag := oq.FieldValuePublic(result.Rows[0], "tag", g) + assert.Equal(t, "nodes", tag.Str) +} + +func TestExecute_ToonFormat_WithBoolAndInt(t *testing.T) { + t.Parallel() + g := loadCyclicGraph(t) + + // Select fields that cover all toonValue branches (string, int, bool) + result, err := oq.Execute("schemas.components | take 1 | select name, depth, is_circular", g) + require.NoError(t, err) + + toon := oq.FormatToon(result, g) + assert.NotEmpty(t, toon) +} + +func TestExecute_ToonEscape_SpecialChars(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + // path fields contain "/" which doesn't need quoting, but let's cover the formatter + result, err := oq.Execute("schemas | take 3 | select path", g) + require.NoError(t, err) + + toon := oq.FormatToon(result, g) + assert.NotEmpty(t, toon) +} + +func TestFormatToon_Explain(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute("schemas | where depth > 0 | explain", g) + require.NoError(t, err) + + toon := oq.FormatToon(result, g) + assert.Contains(t, toon, "Source: schemas") +} + +func TestFormatMarkdown_Explain(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute("schemas | explain", g) + require.NoError(t, err) + + md := oq.FormatMarkdown(result, g) + assert.Contains(t, md, "Source: schemas") +} + // collectNames extracts the "name" field from all rows in the result. func collectNames(result *oq.Result, g *graph.SchemaGraph) []string { var names []string diff --git a/oq/testdata/cyclic.yaml b/oq/testdata/cyclic.yaml new file mode 100644 index 0000000..bed5052 --- /dev/null +++ b/oq/testdata/cyclic.yaml @@ -0,0 +1,89 @@ +openapi: "3.1.0" +info: + title: Cyclic Test + version: "1.0.0" +paths: + /nodes: + get: + operationId: listNodes + tags: + - nodes + - admin + deprecated: true + summary: List all nodes + description: Returns a list of all node objects + responses: + "200": + description: A list of nodes + content: + application/json: + schema: + type: array + items: + $ref: '#/components/schemas/NodeA' +components: + schemas: + NodeA: + type: object + properties: + name: + type: string + child: + $ref: '#/components/schemas/NodeB' + extra: + allOf: + - $ref: '#/components/schemas/NodeC' + variant: + anyOf: + - type: string + - type: integer + meta: + additionalProperties: + type: string + items: + type: array + items: + type: string + prefixItems: + - type: integer + guard: + if: + type: string + then: + type: string + else: + type: integer + negative: + not: + type: null + contained: + type: array + contains: + type: string + propNames: + type: object + propertyNames: + type: string + pattern: + type: object + patternProperties: + "^S_": + type: string + deps: + type: object + dependentSchemas: + bar: + type: object + properties: + barValue: + type: string + NodeB: + type: object + properties: + ref_back: + $ref: '#/components/schemas/NodeA' + NodeC: + type: object + properties: + value: + type: string From cfaf308ff8520abd6fc12c4f9cf05df655304c4e Mon Sep 17 00:00:00 2001 From: Vishal Gowda Date: Fri, 13 Mar 2026 12:06:39 +0000 Subject: [PATCH 19/27] fix: detectCycle marking non-cycle ancestors as circular, FormatToon double newline, and lint issues --- graph/graph.go | 26 +++++++++------- oq/format.go | 2 +- oq/oq_test.go | 81 ++++++++++++++++++++++++++------------------------ 3 files changed, 59 insertions(+), 50 deletions(-) diff --git a/graph/graph.go b/graph/graph.go index 0f4953b..7da6876 100644 --- a/graph/graph.go +++ b/graph/graph.go @@ -574,9 +574,7 @@ func (g *SchemaGraph) computeMetrics() { for i := range g.Schemas { nid := NodeID(i) if !visited[nid] { - if g.detectCycle(nid, visited, inStack, circularNodes) { - circularNodes[nid] = true - } + g.detectCycle(nid, visited, inStack, circularNodes) } } @@ -620,27 +618,35 @@ func (g *SchemaGraph) computeDepth(id NodeID, visited map[NodeID]bool) int { return maxChild } -func (g *SchemaGraph) detectCycle(id NodeID, visited, inStack map[NodeID]bool, circular map[NodeID]bool) bool { +// detectCycle performs a DFS from id, marking nodes that participate in cycles. +// It returns the NodeID of the cycle entry point that still needs to be "closed" +// by an ancestor frame, or -1 if no open cycle passes through this node. +func (g *SchemaGraph) detectCycle(id NodeID, visited, inStack map[NodeID]bool, circular map[NodeID]bool) NodeID { if inStack[id] { circular[id] = true - return true + return id // back-edge found; id is the cycle entry point } if visited[id] { - return false + return -1 } visited[id] = true inStack[id] = true - found := false + var outerEntry NodeID = -1 for _, edge := range g.outEdges[id] { - if g.detectCycle(edge.To, visited, inStack, circular) { + entry := g.detectCycle(edge.To, visited, inStack, circular) + if entry != -1 { circular[id] = true - found = true + // If the cycle entry is this node, the cycle is closed — don't propagate. + // Otherwise, remember the outermost open entry to propagate upward. + if entry != id { + outerEntry = entry + } } } inStack[id] = false - return found + return outerEntry } // Reachable returns all schema NodeIDs transitively reachable from the given node via out-edges. diff --git a/oq/format.go b/oq/format.go index 8b51338..aa689b3 100644 --- a/oq/format.go +++ b/oq/format.go @@ -205,7 +205,7 @@ func FormatToon(result *Result, g *graph.SchemaGraph) string { } if result.IsCount { - return "count: " + strconv.Itoa(result.Count) + "\n" + return "count: " + strconv.Itoa(result.Count) } if len(result.Groups) > 0 { diff --git a/oq/oq_test.go b/oq/oq_test.go index 8bac5f2..d1278cb 100644 --- a/oq/oq_test.go +++ b/oq/oq_test.go @@ -103,8 +103,8 @@ func TestExecute_SchemasCount_Success(t *testing.T) { result, err := oq.Execute("schemas | count", g) require.NoError(t, err) - assert.True(t, result.IsCount) - assert.Positive(t, result.Count) + assert.True(t, result.IsCount, "should be a count result") + assert.Positive(t, result.Count, "count should be positive") } func TestExecute_ComponentSchemas_Success(t *testing.T) { @@ -113,16 +113,16 @@ func TestExecute_ComponentSchemas_Success(t *testing.T) { result, err := oq.Execute("schemas.components | select name", g) require.NoError(t, err) - assert.NotEmpty(t, result.Rows) + assert.NotEmpty(t, result.Rows, "should have component schema rows") // Check that we have the expected component schemas names := collectNames(result, g) - assert.Contains(t, names, "Pet") - assert.Contains(t, names, "Owner") - assert.Contains(t, names, "Address") - assert.Contains(t, names, "Error") - assert.Contains(t, names, "Shape") - assert.Contains(t, names, "Unused") + assert.Contains(t, names, "Pet", "should include Pet schema") + assert.Contains(t, names, "Owner", "should include Owner schema") + assert.Contains(t, names, "Address", "should include Address schema") + assert.Contains(t, names, "Error", "should include Error schema") + assert.Contains(t, names, "Shape", "should include Shape schema") + assert.Contains(t, names, "Unused", "should include Unused schema") } func TestExecute_Where_Success(t *testing.T) { @@ -133,8 +133,8 @@ func TestExecute_Where_Success(t *testing.T) { require.NoError(t, err) names := collectNames(result, g) - assert.Contains(t, names, "Pet") - assert.Contains(t, names, "Owner") + assert.Contains(t, names, "Pet", "should include Pet schema") + assert.Contains(t, names, "Owner", "should include Owner schema") } func TestExecute_WhereInDegree_Success(t *testing.T) { @@ -147,7 +147,7 @@ func TestExecute_WhereInDegree_Success(t *testing.T) { names := collectNames(result, g) // Unused should have no references from other schemas - assert.Contains(t, names, "Unused") + assert.Contains(t, names, "Unused", "should include Unused schema with in_degree 0") } func TestExecute_Sort_Success(t *testing.T) { @@ -156,7 +156,7 @@ func TestExecute_Sort_Success(t *testing.T) { result, err := oq.Execute("schemas.components | sort property_count desc | take 3 | select name, property_count", g) require.NoError(t, err) - assert.LessOrEqual(t, len(result.Rows), 3) + assert.LessOrEqual(t, len(result.Rows), 3, "should return at most 3 rows") } func TestExecute_Reachable_Success(t *testing.T) { @@ -168,8 +168,8 @@ func TestExecute_Reachable_Success(t *testing.T) { names := collectNames(result, g) // Pet references Owner, Owner references Address - assert.Contains(t, names, "Owner") - assert.Contains(t, names, "Address") + assert.Contains(t, names, "Owner", "Pet should reach Owner") + assert.Contains(t, names, "Address", "Pet should reach Address") } func TestExecute_Ancestors_Success(t *testing.T) { @@ -181,7 +181,7 @@ func TestExecute_Ancestors_Success(t *testing.T) { names := collectNames(result, g) // Address is referenced by Owner, which is referenced by Pet - assert.Contains(t, names, "Owner") + assert.Contains(t, names, "Owner", "Address ancestors should include Owner") } func TestExecute_Properties_Success(t *testing.T) { @@ -191,7 +191,7 @@ func TestExecute_Properties_Success(t *testing.T) { result, err := oq.Execute(`schemas.components | where name == "Pet" | properties | select name`, g) require.NoError(t, err) // Pet has 4 properties: id, name, tag, owner - assert.NotEmpty(t, result.Rows) + assert.NotEmpty(t, result.Rows, "Pet should have properties") } func TestExecute_UnionMembers_Success(t *testing.T) { @@ -202,8 +202,8 @@ func TestExecute_UnionMembers_Success(t *testing.T) { require.NoError(t, err) // Shape has oneOf with Circle and Square names := collectNames(result, g) - assert.Contains(t, names, "Circle") - assert.Contains(t, names, "Square") + assert.Contains(t, names, "Circle", "Shape union members should include Circle") + assert.Contains(t, names, "Square", "Shape union members should include Square") } func TestExecute_Operations_Success(t *testing.T) { @@ -212,7 +212,7 @@ func TestExecute_Operations_Success(t *testing.T) { result, err := oq.Execute("operations | select name, method, path", g) require.NoError(t, err) - assert.NotEmpty(t, result.Rows) + assert.NotEmpty(t, result.Rows, "should have operations") } func TestExecute_OperationSchemas_Success(t *testing.T) { @@ -223,7 +223,7 @@ func TestExecute_OperationSchemas_Success(t *testing.T) { require.NoError(t, err) names := collectNames(result, g) - assert.Contains(t, names, "Pet") + assert.Contains(t, names, "Pet", "listPets operation should reference Pet schema") } func TestExecute_GroupBy_Success(t *testing.T) { @@ -232,7 +232,7 @@ func TestExecute_GroupBy_Success(t *testing.T) { result, err := oq.Execute(`schemas.components | group-by type`, g) require.NoError(t, err) - assert.NotEmpty(t, result.Groups) + assert.NotEmpty(t, result.Groups, "should have groups") } func TestExecute_Unique_Success(t *testing.T) { @@ -257,7 +257,7 @@ func TestExecute_SchemasToOps_Success(t *testing.T) { result, err := oq.Execute(`schemas.components | where name == "Pet" | ops | select name`, g) require.NoError(t, err) - assert.NotEmpty(t, result.Rows) + assert.NotEmpty(t, result.Rows, "should have operations using Pet schema") } func TestFormatTable_Success(t *testing.T) { @@ -268,9 +268,9 @@ func TestFormatTable_Success(t *testing.T) { require.NoError(t, err) table := oq.FormatTable(result, g) - assert.Contains(t, table, "name") - assert.Contains(t, table, "type") - assert.NotEmpty(t, table) + assert.Contains(t, table, "name", "table should include name column") + assert.Contains(t, table, "type", "table should include type column") + assert.NotEmpty(t, table, "table should not be empty") } func TestFormatJSON_Success(t *testing.T) { @@ -281,8 +281,8 @@ func TestFormatJSON_Success(t *testing.T) { require.NoError(t, err) json := oq.FormatJSON(result, g) - assert.True(t, strings.HasPrefix(json, "[")) - assert.True(t, strings.HasSuffix(json, "]")) + assert.True(t, strings.HasPrefix(json, "["), "JSON output should start with [") + assert.True(t, strings.HasSuffix(json, "]"), "JSON output should end with ]") } func TestFormatTable_Count_Success(t *testing.T) { @@ -293,7 +293,7 @@ func TestFormatTable_Count_Success(t *testing.T) { require.NoError(t, err) table := oq.FormatTable(result, g) - assert.NotEmpty(t, table) + assert.NotEmpty(t, table, "count table should not be empty") } func TestFormatTable_Empty_Success(t *testing.T) { @@ -304,7 +304,7 @@ func TestFormatTable_Empty_Success(t *testing.T) { require.NoError(t, err) table := oq.FormatTable(result, g) - assert.Equal(t, "(empty)", table) + assert.Equal(t, "(empty)", table, "empty result should format as (empty)") } func TestExecute_MatchesExpression_Success(t *testing.T) { @@ -315,7 +315,7 @@ func TestExecute_MatchesExpression_Success(t *testing.T) { require.NoError(t, err) names := collectNames(result, g) - assert.Contains(t, names, "Error") + assert.Contains(t, names, "Error", "regex match should return Error schema") } func TestExecute_SortAsc_Success(t *testing.T) { @@ -327,7 +327,7 @@ func TestExecute_SortAsc_Success(t *testing.T) { names := collectNames(result, g) for i := 1; i < len(names); i++ { - assert.LessOrEqual(t, names[i-1], names[i]) + assert.LessOrEqual(t, names[i-1], names[i], "names should be in ascending order") } } @@ -337,10 +337,10 @@ func TestExecute_Explain_Success(t *testing.T) { result, err := oq.Execute("schemas.components | where depth > 5 | sort depth desc | take 10 | explain", g) require.NoError(t, err) - assert.Contains(t, result.Explain, "Source: schemas.components") - assert.Contains(t, result.Explain, "Filter: where depth > 5") - assert.Contains(t, result.Explain, "Sort: depth descending") - assert.Contains(t, result.Explain, "Limit: take 10") + assert.Contains(t, result.Explain, "Source: schemas.components", "explain should show source") + assert.Contains(t, result.Explain, "Filter: where depth > 5", "explain should show filter stage") + assert.Contains(t, result.Explain, "Sort: depth descending", "explain should show sort stage") + assert.Contains(t, result.Explain, "Limit: take 10", "explain should show limit stage") } func TestExecute_Fields_Schemas_Success(t *testing.T) { @@ -1054,7 +1054,7 @@ func TestExecute_FieldValue_EdgeCases(t *testing.T) { require.NoError(t, err) assert.NotEmpty(t, result.Rows) edgeKind := oq.FieldValuePublic(result.Rows[0], "edge_kind", g) - assert.Equal(t, "", edgeKind.Str) + assert.Empty(t, edgeKind.Str, "edge_kind should be empty for non-traversal rows") // Test tag_count field result, err = oq.Execute("schemas.components | take 1 | select name, tag_count", g) @@ -1355,8 +1355,11 @@ func TestExecute_CyclicSpec_IsCircular(t *testing.T) { result, err := oq.Execute("schemas.components | where is_circular | select name", g) require.NoError(t, err) names := collectNames(result, g) - assert.Contains(t, names, "NodeA") - assert.Contains(t, names, "NodeB") + assert.Contains(t, names, "NodeA", "NodeA is in the A↔B cycle") + assert.Contains(t, names, "NodeB", "NodeB is in the A↔B cycle") + + // NodeC is NOT in the cycle — it's only referenced by NodeA via allOf + assert.NotContains(t, names, "NodeC", "NodeC should not be marked circular") } func TestExecute_CyclicSpec_DeprecatedOp(t *testing.T) { From c5c24955129f21e0a143551d321184c17e6865c9 Mon Sep 17 00:00:00 2001 From: Vishal Gowda Date: Fri, 13 Mar 2026 12:07:09 +0000 Subject: [PATCH 20/27] fix: update cmd/openapi dependency to latest commit --- cmd/openapi/go.mod | 2 +- cmd/openapi/go.sum | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/cmd/openapi/go.mod b/cmd/openapi/go.mod index 2b0dc2b..706c5c9 100644 --- a/cmd/openapi/go.mod +++ b/cmd/openapi/go.mod @@ -6,7 +6,7 @@ require ( github.com/charmbracelet/bubbles v0.21.0 github.com/charmbracelet/bubbletea v1.3.10 github.com/charmbracelet/lipgloss v1.1.0 - github.com/speakeasy-api/openapi v1.19.6-0.20260312183335-395c19cd8edd + github.com/speakeasy-api/openapi v1.19.6-0.20260313120639-cfaf308ff852 github.com/speakeasy-api/openapi/openapi/linter/customrules v0.0.0-20260309010446-7ff6a9590a7f github.com/spf13/cobra v1.10.1 github.com/stretchr/testify v1.11.1 diff --git a/cmd/openapi/go.sum b/cmd/openapi/go.sum index 4082af8..867276a 100644 --- a/cmd/openapi/go.sum +++ b/cmd/openapi/go.sum @@ -84,8 +84,8 @@ github.com/sergi/go-diff v1.1.0 h1:we8PVUC3FE2uYfodKH/nBHMSetSfHDR6scGdBi+erh0= github.com/sergi/go-diff v1.1.0/go.mod h1:STckp+ISIX8hZLjrqAeVduY0gWCT9IjLuqbuNXdaHfM= github.com/speakeasy-api/jsonpath v0.6.3 h1:c+QPwzAOdrWvzycuc9HFsIZcxKIaWcNpC+xhOW9rJxU= github.com/speakeasy-api/jsonpath v0.6.3/go.mod h1:2cXloNuQ+RSXi5HTRaeBh7JEmjRXTiaKpFTdZiL7URI= -github.com/speakeasy-api/openapi v1.19.6-0.20260312183335-395c19cd8edd h1:tsvAmrswd6tB0jeBE5DlIr4fB8WcMeWfCn4HUB6Vg44= -github.com/speakeasy-api/openapi v1.19.6-0.20260312183335-395c19cd8edd/go.mod h1:UfKa7FqE4jgexJZuj51MmdHAFGmDv0Zaw3+yOd81YKU= +github.com/speakeasy-api/openapi v1.19.6-0.20260313120639-cfaf308ff852 h1:z/lpRQjRfXDKF3jRpVWwIMzxiUDtcfFCNtJTs0thSKE= +github.com/speakeasy-api/openapi v1.19.6-0.20260313120639-cfaf308ff852/go.mod h1:UfKa7FqE4jgexJZuj51MmdHAFGmDv0Zaw3+yOd81YKU= github.com/speakeasy-api/openapi/openapi/linter/customrules v0.0.0-20260309010446-7ff6a9590a7f h1:kwiHeGSILCUVEM9iSAUtifl1TLlvyDXmMjyW26/iX2k= github.com/speakeasy-api/openapi/openapi/linter/customrules v0.0.0-20260309010446-7ff6a9590a7f/go.mod h1:ALDg9E6LRTL5tMFlddVrLhc4JaarCHL65x2YkwL7xdg= github.com/spf13/cobra v1.10.1 h1:lJeBwCfmrnXthfAupyUTzJ/J4Nc1RsHC/mSRU2dll/s= From e4afdd233c38ffc79fe87b5c7950410b37c4cb8a Mon Sep 17 00:00:00 2001 From: Vishal Gowda Date: Fri, 13 Mar 2026 12:11:36 +0000 Subject: [PATCH 21/27] test: add descriptive assertion messages to oq tests --- oq/oq_test.go | 200 +++++++++++++++++++++++++------------------------- 1 file changed, 100 insertions(+), 100 deletions(-) diff --git a/oq/oq_test.go b/oq/oq_test.go index d1278cb..5c7a059 100644 --- a/oq/oq_test.go +++ b/oq/oq_test.go @@ -349,10 +349,10 @@ func TestExecute_Fields_Schemas_Success(t *testing.T) { result, err := oq.Execute("schemas | fields", g) require.NoError(t, err) - assert.Contains(t, result.Explain, "name") - assert.Contains(t, result.Explain, "depth") - assert.Contains(t, result.Explain, "property_count") - assert.Contains(t, result.Explain, "is_component") + assert.Contains(t, result.Explain, "name", "fields output should list name") + assert.Contains(t, result.Explain, "depth", "fields output should list depth") + assert.Contains(t, result.Explain, "property_count", "fields output should list property_count") + assert.Contains(t, result.Explain, "is_component", "fields output should list is_component") } func TestExecute_Fields_Operations_Success(t *testing.T) { @@ -361,11 +361,11 @@ func TestExecute_Fields_Operations_Success(t *testing.T) { result, err := oq.Execute("operations | fields", g) require.NoError(t, err) - assert.Contains(t, result.Explain, "method") - assert.Contains(t, result.Explain, "operation_id") - assert.Contains(t, result.Explain, "schema_count") - assert.Contains(t, result.Explain, "tag") - assert.Contains(t, result.Explain, "deprecated") + assert.Contains(t, result.Explain, "method", "fields output should list method") + assert.Contains(t, result.Explain, "operation_id", "fields output should list operation_id") + assert.Contains(t, result.Explain, "schema_count", "fields output should list schema_count") + assert.Contains(t, result.Explain, "tag", "fields output should list tag") + assert.Contains(t, result.Explain, "deprecated", "fields output should list deprecated") } func TestExecute_Head_Success(t *testing.T) { @@ -374,7 +374,7 @@ func TestExecute_Head_Success(t *testing.T) { result, err := oq.Execute("schemas.components | head 3", g) require.NoError(t, err) - assert.Len(t, result.Rows, 3) + assert.Len(t, result.Rows, 3, "head should return exactly 3 rows") } func TestExecute_Sample_Success(t *testing.T) { @@ -383,12 +383,12 @@ func TestExecute_Sample_Success(t *testing.T) { result, err := oq.Execute("schemas.components | sample 3", g) require.NoError(t, err) - assert.Len(t, result.Rows, 3) + assert.Len(t, result.Rows, 3, "sample should return exactly 3 rows") // Running sample again should produce the same result (deterministic) result2, err := oq.Execute("schemas.components | sample 3", g) require.NoError(t, err) - assert.Len(t, result2.Rows, len(result.Rows)) + assert.Len(t, result2.Rows, len(result.Rows), "sample should be deterministic") } func TestExecute_Path_Success(t *testing.T) { @@ -397,12 +397,12 @@ func TestExecute_Path_Success(t *testing.T) { result, err := oq.Execute(`schemas | path Pet Address | select name`, g) require.NoError(t, err) - assert.NotEmpty(t, result.Rows) + assert.NotEmpty(t, result.Rows, "path from Pet to Address should have results") names := collectNames(result, g) // Path should include Pet, something in between, and Address - assert.Equal(t, "Pet", names[0]) - assert.Equal(t, "Address", names[len(names)-1]) + assert.Equal(t, "Pet", names[0], "path should start at Pet") + assert.Equal(t, "Address", names[len(names)-1], "path should end at Address") } func TestExecute_Path_NotFound_Success(t *testing.T) { @@ -412,7 +412,7 @@ func TestExecute_Path_NotFound_Success(t *testing.T) { // Unused has no outgoing edges to reach Pet result, err := oq.Execute(`schemas | path Unused Pet | select name`, g) require.NoError(t, err) - assert.Empty(t, result.Rows) + assert.Empty(t, result.Rows, "no path should exist from Unused to Pet") } func TestExecute_Top_Success(t *testing.T) { @@ -421,13 +421,13 @@ func TestExecute_Top_Success(t *testing.T) { result, err := oq.Execute("schemas.components | top 3 property_count | select name, property_count", g) require.NoError(t, err) - assert.Len(t, result.Rows, 3) + assert.Len(t, result.Rows, 3, "top should return exactly 3 rows") // Verify descending order for i := 1; i < len(result.Rows); i++ { prev := oq.FieldValuePublic(result.Rows[i-1], "property_count", g) curr := oq.FieldValuePublic(result.Rows[i], "property_count", g) - assert.GreaterOrEqual(t, prev.Int, curr.Int) + assert.GreaterOrEqual(t, prev.Int, curr.Int, "top should be in descending order") } } @@ -437,13 +437,13 @@ func TestExecute_Bottom_Success(t *testing.T) { result, err := oq.Execute("schemas.components | bottom 3 property_count | select name, property_count", g) require.NoError(t, err) - assert.Len(t, result.Rows, 3) + assert.Len(t, result.Rows, 3, "bottom should return exactly 3 rows") // Verify ascending order for i := 1; i < len(result.Rows); i++ { prev := oq.FieldValuePublic(result.Rows[i-1], "property_count", g) curr := oq.FieldValuePublic(result.Rows[i], "property_count", g) - assert.LessOrEqual(t, prev.Int, curr.Int) + assert.LessOrEqual(t, prev.Int, curr.Int, "bottom should be in ascending order") } } @@ -453,7 +453,7 @@ func TestExecute_Format_Success(t *testing.T) { result, err := oq.Execute("schemas.components | take 3 | format json", g) require.NoError(t, err) - assert.Equal(t, "json", result.FormatHint) + assert.Equal(t, "json", result.FormatHint, "format hint should be json") } func TestFormatMarkdown_Success(t *testing.T) { @@ -464,8 +464,8 @@ func TestFormatMarkdown_Success(t *testing.T) { require.NoError(t, err) md := oq.FormatMarkdown(result, g) - assert.Contains(t, md, "| name") - assert.Contains(t, md, "| --- |") + assert.Contains(t, md, "| name", "markdown should include name column header") + assert.Contains(t, md, "| --- |", "markdown should include separator row") } func TestExecute_OperationTag_Success(t *testing.T) { @@ -474,7 +474,7 @@ func TestExecute_OperationTag_Success(t *testing.T) { result, err := oq.Execute("operations | select name, tag, parameter_count", g) require.NoError(t, err) - assert.NotEmpty(t, result.Rows) + assert.NotEmpty(t, result.Rows, "should have operation rows") } func TestParse_NewStages_Success(t *testing.T) { @@ -501,7 +501,7 @@ func TestParse_NewStages_Success(t *testing.T) { t.Parallel() stages, err := oq.Parse(tt.query) require.NoError(t, err) - assert.NotEmpty(t, stages) + assert.NotEmpty(t, stages, "should parse into non-empty stages") }) } } @@ -512,7 +512,7 @@ func TestExecute_RefsOut_Success(t *testing.T) { result, err := oq.Execute(`schemas.components | where name == "Pet" | refs-out | select name`, g) require.NoError(t, err) - assert.NotEmpty(t, result.Rows) + assert.NotEmpty(t, result.Rows, "Pet should have outgoing refs") } func TestExecute_RefsIn_Success(t *testing.T) { @@ -521,7 +521,7 @@ func TestExecute_RefsIn_Success(t *testing.T) { result, err := oq.Execute(`schemas.components | where name == "Owner" | refs-in | select name`, g) require.NoError(t, err) - assert.NotEmpty(t, result.Rows) + assert.NotEmpty(t, result.Rows, "Owner should have incoming refs") } func TestExecute_Items_Success(t *testing.T) { @@ -532,7 +532,7 @@ func TestExecute_Items_Success(t *testing.T) { result, err := oq.Execute(`schemas | where type == "array" | items | select name`, g) require.NoError(t, err) // May or may not have results depending on spec, but should not error - assert.NotNil(t, result) + assert.NotNil(t, result, "result should not be nil") } func TestExecute_Connected_Success(t *testing.T) { @@ -542,7 +542,7 @@ func TestExecute_Connected_Success(t *testing.T) { // Start from Pet, connected should return schemas and operations in the same component result, err := oq.Execute(`schemas.components | where name == "Pet" | connected`, g) require.NoError(t, err) - assert.NotEmpty(t, result.Rows) + assert.NotEmpty(t, result.Rows, "connected should return rows") // Should have both schema and operation rows hasSchema := false @@ -566,7 +566,7 @@ func TestExecute_Connected_FromOps_Success(t *testing.T) { // Start from an operation, connected should also find schemas result, err := oq.Execute(`operations | take 1 | connected`, g) require.NoError(t, err) - assert.NotEmpty(t, result.Rows) + assert.NotEmpty(t, result.Rows, "connected from operation should return rows") hasSchema := false for _, row := range result.Rows { @@ -583,14 +583,14 @@ func TestExecute_EdgeAnnotations_Success(t *testing.T) { result, err := oq.Execute(`schemas.components | where name == "Pet" | refs-out | select name, edge_kind, edge_label, edge_from`, g) require.NoError(t, err) - assert.NotEmpty(t, result.Rows) + assert.NotEmpty(t, result.Rows, "refs-out from Pet should have results") // Every row should have edge annotations for _, row := range result.Rows { kind := oq.FieldValuePublic(row, "edge_kind", g) assert.NotEmpty(t, kind.Str, "edge_kind should be set") from := oq.FieldValuePublic(row, "edge_from", g) - assert.Equal(t, "Pet", from.Str) + assert.Equal(t, "Pet", from.Str, "edge_from should be Pet") } } @@ -600,7 +600,7 @@ func TestExecute_BlastRadius_Success(t *testing.T) { result, err := oq.Execute(`schemas.components | where name == "Pet" | blast-radius`, g) require.NoError(t, err) - assert.NotEmpty(t, result.Rows) + assert.NotEmpty(t, result.Rows, "blast-radius should return rows") // Should include both schemas and operations hasSchema := false @@ -623,7 +623,7 @@ func TestExecute_Neighbors_Success(t *testing.T) { result, err := oq.Execute(`schemas.components | where name == "Pet" | neighbors 1`, g) require.NoError(t, err) - assert.NotEmpty(t, result.Rows) + assert.NotEmpty(t, result.Rows, "neighbors should return rows") // Depth-1 neighbors should include seed + direct refs in both directions names := make(map[string]bool) @@ -641,7 +641,7 @@ func TestExecute_Orphans_Success(t *testing.T) { result, err := oq.Execute(`schemas.components | orphans | select name`, g) require.NoError(t, err) // Result may be empty if all schemas are referenced, that's fine - assert.NotNil(t, result) + assert.NotNil(t, result, "result should not be nil") } func TestExecute_Leaves_Success(t *testing.T) { @@ -653,7 +653,7 @@ func TestExecute_Leaves_Success(t *testing.T) { // All returned rows should have out_degree == 0 for _, row := range result.Rows { od := oq.FieldValuePublic(row, "out_degree", g) - assert.Equal(t, 0, od.Int) + assert.Equal(t, 0, od.Int, "leaf nodes should have out_degree 0") } } @@ -664,7 +664,7 @@ func TestExecute_Cycles_Success(t *testing.T) { result, err := oq.Execute(`schemas | cycles`, g) require.NoError(t, err) // Returns groups — may be empty if no cycles in petstore - assert.NotNil(t, result) + assert.NotNil(t, result, "result should not be nil") } func TestExecute_Clusters_Success(t *testing.T) { @@ -673,7 +673,7 @@ func TestExecute_Clusters_Success(t *testing.T) { result, err := oq.Execute(`schemas.components | clusters`, g) require.NoError(t, err) - assert.NotEmpty(t, result.Groups) + assert.NotEmpty(t, result.Groups, "should have clusters") // Total names across all clusters should equal component count total := 0 @@ -683,7 +683,7 @@ func TestExecute_Clusters_Success(t *testing.T) { // Count component schemas compCount, err := oq.Execute(`schemas.components | count`, g) require.NoError(t, err) - assert.Equal(t, compCount.Count, total) + assert.Equal(t, compCount.Count, total, "cluster totals should equal component count") } func TestExecute_TagBoundary_Success(t *testing.T) { @@ -695,7 +695,7 @@ func TestExecute_TagBoundary_Success(t *testing.T) { // All returned rows should have tag_count > 1 for _, row := range result.Rows { tc := oq.FieldValuePublic(row, "tag_count", g) - assert.Greater(t, tc.Int, 1) + assert.Greater(t, tc.Int, 1, "tag-boundary schemas should have tag_count > 1") } } @@ -706,7 +706,7 @@ func TestExecute_SharedRefs_Success(t *testing.T) { result, err := oq.Execute(`operations | shared-refs | select name`, g) require.NoError(t, err) // Schemas shared by ALL operations - assert.NotNil(t, result) + assert.NotNil(t, result, "result should not be nil") } func TestExecute_OpCount_Success(t *testing.T) { @@ -715,7 +715,7 @@ func TestExecute_OpCount_Success(t *testing.T) { result, err := oq.Execute(`schemas.components | sort op_count desc | take 3 | select name, op_count`, g) require.NoError(t, err) - assert.NotEmpty(t, result.Rows) + assert.NotEmpty(t, result.Rows, "should have schemas sorted by op_count") } func TestFormatTable_Groups_Success(t *testing.T) { @@ -724,10 +724,10 @@ func TestFormatTable_Groups_Success(t *testing.T) { result, err := oq.Execute("schemas.components | group-by type", g) require.NoError(t, err) - assert.NotEmpty(t, result.Groups) + assert.NotEmpty(t, result.Groups, "should have groups") table := oq.FormatTable(result, g) - assert.Contains(t, table, "count=") + assert.Contains(t, table, "count=", "group table should show count") } func TestFormatJSON_Groups_Success(t *testing.T) { @@ -738,8 +738,8 @@ func TestFormatJSON_Groups_Success(t *testing.T) { require.NoError(t, err) json := oq.FormatJSON(result, g) - assert.Contains(t, json, "\"key\"") - assert.Contains(t, json, "\"count\"") + assert.Contains(t, json, "\"key\"", "group JSON should include key field") + assert.Contains(t, json, "\"count\"", "group JSON should include count field") } func TestFormatMarkdown_Groups_Success(t *testing.T) { @@ -750,7 +750,7 @@ func TestFormatMarkdown_Groups_Success(t *testing.T) { require.NoError(t, err) md := oq.FormatMarkdown(result, g) - assert.Contains(t, md, "| Key |") + assert.Contains(t, md, "| Key |", "group markdown should include Key column") } func TestExecute_InlineSource_Success(t *testing.T) { @@ -759,7 +759,7 @@ func TestExecute_InlineSource_Success(t *testing.T) { result, err := oq.Execute("schemas.inline | count", g) require.NoError(t, err) - assert.True(t, result.IsCount) + assert.True(t, result.IsCount, "should be a count result") } func TestExecute_SchemaFields_Coverage(t *testing.T) { @@ -769,13 +769,13 @@ func TestExecute_SchemaFields_Coverage(t *testing.T) { // Select all schema fields to cover fieldValue branches result, err := oq.Execute("schemas.components | take 1 | select name, type, depth, in_degree, out_degree, union_width, property_count, is_component, is_inline, is_circular, has_ref, hash, path", g) require.NoError(t, err) - assert.NotEmpty(t, result.Rows) + assert.NotEmpty(t, result.Rows, "should have schema rows") table := oq.FormatTable(result, g) - assert.NotEmpty(t, table) + assert.NotEmpty(t, table, "table output should not be empty") json := oq.FormatJSON(result, g) - assert.Contains(t, json, "\"name\"") + assert.Contains(t, json, "\"name\"", "JSON should include name field") } func TestExecute_OperationFields_Coverage(t *testing.T) { @@ -785,7 +785,7 @@ func TestExecute_OperationFields_Coverage(t *testing.T) { // Select all operation fields to cover fieldValue branches result, err := oq.Execute("operations | take 1 | select name, method, path, operation_id, schema_count, component_count, tag, parameter_count, deprecated, description, summary", g) require.NoError(t, err) - assert.NotEmpty(t, result.Rows) + assert.NotEmpty(t, result.Rows, "should have operation rows") } func TestFormatJSON_Empty_Success(t *testing.T) { @@ -796,7 +796,7 @@ func TestFormatJSON_Empty_Success(t *testing.T) { require.NoError(t, err) json := oq.FormatJSON(result, g) - assert.Equal(t, "[]", json) + assert.Equal(t, "[]", json, "empty result JSON should be []") } func TestFormatMarkdown_Empty_Success(t *testing.T) { @@ -807,7 +807,7 @@ func TestFormatMarkdown_Empty_Success(t *testing.T) { require.NoError(t, err) md := oq.FormatMarkdown(result, g) - assert.Equal(t, "(empty)", md) + assert.Equal(t, "(empty)", md, "empty result markdown should be (empty)") } func TestFormatJSON_Count_Success(t *testing.T) { @@ -818,7 +818,7 @@ func TestFormatJSON_Count_Success(t *testing.T) { require.NoError(t, err) json := oq.FormatJSON(result, g) - assert.NotEmpty(t, json) + assert.NotEmpty(t, json, "count JSON should not be empty") } func TestFormatToon_Success(t *testing.T) { @@ -829,8 +829,8 @@ func TestFormatToon_Success(t *testing.T) { require.NoError(t, err) toon := oq.FormatToon(result, g) - assert.Contains(t, toon, "results[3]{name,type}:") - assert.Contains(t, toon, "object") + assert.Contains(t, toon, "results[3]{name,type}:", "toon should show result count and fields") + assert.Contains(t, toon, "object", "toon should include object type value") } func TestFormatToon_Count_Success(t *testing.T) { @@ -841,7 +841,7 @@ func TestFormatToon_Count_Success(t *testing.T) { require.NoError(t, err) toon := oq.FormatToon(result, g) - assert.Contains(t, toon, "count:") + assert.Contains(t, toon, "count:", "toon should show count label") } func TestFormatToon_Groups_Success(t *testing.T) { @@ -852,8 +852,8 @@ func TestFormatToon_Groups_Success(t *testing.T) { require.NoError(t, err) toon := oq.FormatToon(result, g) - assert.Contains(t, toon, "groups[") - assert.Contains(t, toon, "{key,count,names}:") + assert.Contains(t, toon, "groups[", "toon should show groups header") + assert.Contains(t, toon, "{key,count,names}:", "toon should show group fields") } func TestFormatToon_Empty_Success(t *testing.T) { @@ -864,7 +864,7 @@ func TestFormatToon_Empty_Success(t *testing.T) { require.NoError(t, err) toon := oq.FormatToon(result, g) - assert.Equal(t, "results[0]:\n", toon) + assert.Equal(t, "results[0]:\n", toon, "empty toon should show results[0]") } func TestFormatToon_Escaping_Success(t *testing.T) { @@ -877,7 +877,7 @@ func TestFormatToon_Escaping_Success(t *testing.T) { require.NoError(t, err) toon := oq.FormatToon(result, g) - assert.Contains(t, toon, "results[1]{name,hash,path}:") + assert.Contains(t, toon, "results[1]{name,hash,path}:", "toon should show result count and selected fields") } func TestFormatMarkdown_Count_Success(t *testing.T) { @@ -888,7 +888,7 @@ func TestFormatMarkdown_Count_Success(t *testing.T) { require.NoError(t, err) md := oq.FormatMarkdown(result, g) - assert.NotEmpty(t, md) + assert.NotEmpty(t, md, "count markdown should not be empty") } func TestExecute_Explain_AllStages_Success(t *testing.T) { @@ -1034,7 +1034,7 @@ func TestExecute_Explain_AllStages_Success(t *testing.T) { result, err := oq.Execute(tt.query, g) require.NoError(t, err) for _, exp := range tt.expects { - assert.Contains(t, result.Explain, exp) + assert.Contains(t, result.Explain, exp, "explain should contain: "+exp) } }) } @@ -1047,28 +1047,28 @@ func TestExecute_FieldValue_EdgeCases(t *testing.T) { // Test operation fields that require nil checks result, err := oq.Execute("operations | take 1 | select name, tag, parameter_count, deprecated, description, summary", g) require.NoError(t, err) - assert.NotEmpty(t, result.Rows) + assert.NotEmpty(t, result.Rows, "should have operation rows") // Test edge fields on non-traversal rows (should be empty strings) result, err = oq.Execute("schemas.components | take 1 | select name, edge_kind, edge_label, edge_from", g) require.NoError(t, err) - assert.NotEmpty(t, result.Rows) + assert.NotEmpty(t, result.Rows, "should have schema rows") edgeKind := oq.FieldValuePublic(result.Rows[0], "edge_kind", g) assert.Empty(t, edgeKind.Str, "edge_kind should be empty for non-traversal rows") // Test tag_count field result, err = oq.Execute("schemas.components | take 1 | select name, tag_count", g) require.NoError(t, err) - assert.NotEmpty(t, result.Rows) + assert.NotEmpty(t, result.Rows, "should have rows for tag_count test") // Test op_count field result, err = oq.Execute("schemas.components | take 1 | select name, op_count", g) require.NoError(t, err) - assert.NotEmpty(t, result.Rows) + assert.NotEmpty(t, result.Rows, "should have rows for op_count test") // Test unknown field returns null (KindNull == 0) v := oq.FieldValuePublic(result.Rows[0], "nonexistent_field", g) - assert.Equal(t, expr.KindNull, v.Kind) + assert.Equal(t, expr.KindNull, v.Kind, "unknown field should return KindNull") } func TestExecute_Cycles_NoCycles(t *testing.T) { @@ -1088,7 +1088,7 @@ func TestExecute_SharedRefs_AllOps(t *testing.T) { // shared-refs with all operations — returns schemas shared by all operations result, err := oq.Execute("operations | shared-refs | select name", g) require.NoError(t, err) - assert.NotNil(t, result) + assert.NotNil(t, result, "result should not be nil") } func TestFormatToon_SpecialChars(t *testing.T) { @@ -1100,8 +1100,8 @@ func TestFormatToon_SpecialChars(t *testing.T) { require.NoError(t, err) toon := oq.FormatToon(result, g) - assert.NotEmpty(t, toon) - assert.Contains(t, toon, "results[1]") + assert.NotEmpty(t, toon, "toon output should not be empty") + assert.Contains(t, toon, "results[1]", "toon should show one result") } func TestFormatJSON_Operations(t *testing.T) { @@ -1112,9 +1112,9 @@ func TestFormatJSON_Operations(t *testing.T) { require.NoError(t, err) json := oq.FormatJSON(result, g) - assert.True(t, strings.HasPrefix(json, "[")) - assert.Contains(t, json, "\"name\"") - assert.Contains(t, json, "\"method\"") + assert.True(t, strings.HasPrefix(json, "["), "JSON output should start with [") + assert.Contains(t, json, "\"name\"", "JSON should include name field") + assert.Contains(t, json, "\"method\"", "JSON should include method field") } func TestFormatMarkdown_Operations(t *testing.T) { @@ -1125,8 +1125,8 @@ func TestFormatMarkdown_Operations(t *testing.T) { require.NoError(t, err) md := oq.FormatMarkdown(result, g) - assert.Contains(t, md, "| name") - assert.Contains(t, md, "| method") + assert.Contains(t, md, "| name", "markdown should include name column") + assert.Contains(t, md, "| method", "markdown should include method column") } func TestParse_Error_MoreCases(t *testing.T) { @@ -1185,7 +1185,7 @@ func TestParse_MoreStages_Success(t *testing.T) { t.Parallel() stages, err := oq.Parse(tt.query) require.NoError(t, err) - assert.NotEmpty(t, stages) + assert.NotEmpty(t, stages, "should parse into non-empty stages") }) } } @@ -1197,7 +1197,7 @@ func TestExecute_WhereAndOr_Success(t *testing.T) { // Test compound where expressions result, err := oq.Execute(`schemas.components | where depth > 0 and is_component`, g) require.NoError(t, err) - assert.NotNil(t, result) + assert.NotNil(t, result, "result should not be nil") result, err = oq.Execute(`schemas.components | where depth > 100 or is_component`, g) require.NoError(t, err) @@ -1211,7 +1211,7 @@ func TestExecute_SortStringField_Success(t *testing.T) { // Sort by string field result, err := oq.Execute("schemas.components | sort type asc | select name, type", g) require.NoError(t, err) - assert.NotEmpty(t, result.Rows) + assert.NotEmpty(t, result.Rows, "should have schemas sorted by type") } func TestExecute_GroupBy_Type_Details(t *testing.T) { @@ -1220,12 +1220,12 @@ func TestExecute_GroupBy_Type_Details(t *testing.T) { result, err := oq.Execute("schemas.components | group-by type", g) require.NoError(t, err) - assert.NotEmpty(t, result.Groups) + assert.NotEmpty(t, result.Groups, "should have groups") // Each group should have Count and Names for _, grp := range result.Groups { - assert.Positive(t, grp.Count) - assert.Len(t, grp.Names, grp.Count) + assert.Positive(t, grp.Count, "group count should be positive") + assert.Len(t, grp.Names, grp.Count, "group names length should match count") } } @@ -1237,8 +1237,8 @@ func TestFormatMarkdown_Groups_Details(t *testing.T) { require.NoError(t, err) md := oq.FormatMarkdown(result, g) - assert.Contains(t, md, "| Key |") - assert.Contains(t, md, "| Count |") + assert.Contains(t, md, "| Key |", "group markdown should include Key column") + assert.Contains(t, md, "| Count |", "group markdown should include Count column") } func TestFormatJSON_Explain(t *testing.T) { @@ -1250,16 +1250,16 @@ func TestFormatJSON_Explain(t *testing.T) { // All formats should handle explain table := oq.FormatTable(result, g) - assert.Contains(t, table, "Source: schemas") + assert.Contains(t, table, "Source: schemas", "table should render explain output") json := oq.FormatJSON(result, g) - assert.Contains(t, json, "Source: schemas") + assert.Contains(t, json, "Source: schemas", "JSON should render explain output") md := oq.FormatMarkdown(result, g) - assert.Contains(t, md, "Source: schemas") + assert.Contains(t, md, "Source: schemas", "markdown should render explain output") toon := oq.FormatToon(result, g) - assert.Contains(t, toon, "Source: schemas") + assert.Contains(t, toon, "Source: schemas", "toon should render explain output") } func TestExecute_Leaves_AllZeroOutDegree(t *testing.T) { @@ -1283,12 +1283,12 @@ func TestExecute_OperationsTraversals(t *testing.T) { // Operations going to schemas and back result, err := oq.Execute("operations | take 1 | schemas | select name", g) require.NoError(t, err) - assert.NotEmpty(t, result.Rows) + assert.NotEmpty(t, result.Rows, "operation schemas should have results") // Schema to operations roundtrip result, err = oq.Execute("schemas.components | where name == \"Pet\" | ops | select name", g) require.NoError(t, err) - assert.NotEmpty(t, result.Rows) + assert.NotEmpty(t, result.Rows, "Pet should be used by operations") } func loadCyclicGraph(t *testing.T) *graph.SchemaGraph { @@ -1336,7 +1336,7 @@ func TestExecute_CyclicSpec_EdgeAnnotations(t *testing.T) { // Test refs-out to cover edgeKindString branches result, err := oq.Execute(`schemas.components | where name == "NodeA" | refs-out | select name, edge_kind, edge_label`, g) require.NoError(t, err) - assert.NotEmpty(t, result.Rows) + assert.NotEmpty(t, result.Rows, "NodeA should have outgoing refs") // Collect edge kinds edgeKinds := make(map[string]bool) @@ -1369,19 +1369,19 @@ func TestExecute_CyclicSpec_DeprecatedOp(t *testing.T) { // The listNodes operation is deprecated with tags, summary, and description result, err := oq.Execute("operations | select name, deprecated, summary, description, tag, parameter_count", g) require.NoError(t, err) - assert.NotEmpty(t, result.Rows) + assert.NotEmpty(t, result.Rows, "should have operation rows") dep := oq.FieldValuePublic(result.Rows[0], "deprecated", g) assert.True(t, dep.Bool, "listNodes should be deprecated") summary := oq.FieldValuePublic(result.Rows[0], "summary", g) - assert.Equal(t, "List all nodes", summary.Str) + assert.Equal(t, "List all nodes", summary.Str, "summary should match spec") desc := oq.FieldValuePublic(result.Rows[0], "description", g) - assert.NotEmpty(t, desc.Str) + assert.NotEmpty(t, desc.Str, "description should not be empty") tag := oq.FieldValuePublic(result.Rows[0], "tag", g) - assert.Equal(t, "nodes", tag.Str) + assert.Equal(t, "nodes", tag.Str, "tag should be nodes") } func TestExecute_ToonFormat_WithBoolAndInt(t *testing.T) { @@ -1393,7 +1393,7 @@ func TestExecute_ToonFormat_WithBoolAndInt(t *testing.T) { require.NoError(t, err) toon := oq.FormatToon(result, g) - assert.NotEmpty(t, toon) + assert.NotEmpty(t, toon, "toon output should not be empty") } func TestExecute_ToonEscape_SpecialChars(t *testing.T) { @@ -1405,7 +1405,7 @@ func TestExecute_ToonEscape_SpecialChars(t *testing.T) { require.NoError(t, err) toon := oq.FormatToon(result, g) - assert.NotEmpty(t, toon) + assert.NotEmpty(t, toon, "toon output should not be empty") } func TestFormatToon_Explain(t *testing.T) { @@ -1416,7 +1416,7 @@ func TestFormatToon_Explain(t *testing.T) { require.NoError(t, err) toon := oq.FormatToon(result, g) - assert.Contains(t, toon, "Source: schemas") + assert.Contains(t, toon, "Source: schemas", "toon should render explain output") } func TestFormatMarkdown_Explain(t *testing.T) { @@ -1427,7 +1427,7 @@ func TestFormatMarkdown_Explain(t *testing.T) { require.NoError(t, err) md := oq.FormatMarkdown(result, g) - assert.Contains(t, md, "Source: schemas") + assert.Contains(t, md, "Source: schemas", "markdown should render explain output") } // collectNames extracts the "name" field from all rows in the result. From 0becad8e629b1b773478e4823ff36142371ec64b Mon Sep 17 00:00:00 2001 From: Vishal Gowda Date: Fri, 13 Mar 2026 12:12:11 +0000 Subject: [PATCH 22/27] fix: update cmd/openapi dependency to latest commit --- cmd/openapi/go.mod | 2 +- cmd/openapi/go.sum | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/cmd/openapi/go.mod b/cmd/openapi/go.mod index 706c5c9..ce8f3ba 100644 --- a/cmd/openapi/go.mod +++ b/cmd/openapi/go.mod @@ -6,7 +6,7 @@ require ( github.com/charmbracelet/bubbles v0.21.0 github.com/charmbracelet/bubbletea v1.3.10 github.com/charmbracelet/lipgloss v1.1.0 - github.com/speakeasy-api/openapi v1.19.6-0.20260313120639-cfaf308ff852 + github.com/speakeasy-api/openapi v1.19.6-0.20260313121136-e4afdd233c38 github.com/speakeasy-api/openapi/openapi/linter/customrules v0.0.0-20260309010446-7ff6a9590a7f github.com/spf13/cobra v1.10.1 github.com/stretchr/testify v1.11.1 diff --git a/cmd/openapi/go.sum b/cmd/openapi/go.sum index 867276a..d90bc8b 100644 --- a/cmd/openapi/go.sum +++ b/cmd/openapi/go.sum @@ -84,8 +84,8 @@ github.com/sergi/go-diff v1.1.0 h1:we8PVUC3FE2uYfodKH/nBHMSetSfHDR6scGdBi+erh0= github.com/sergi/go-diff v1.1.0/go.mod h1:STckp+ISIX8hZLjrqAeVduY0gWCT9IjLuqbuNXdaHfM= github.com/speakeasy-api/jsonpath v0.6.3 h1:c+QPwzAOdrWvzycuc9HFsIZcxKIaWcNpC+xhOW9rJxU= github.com/speakeasy-api/jsonpath v0.6.3/go.mod h1:2cXloNuQ+RSXi5HTRaeBh7JEmjRXTiaKpFTdZiL7URI= -github.com/speakeasy-api/openapi v1.19.6-0.20260313120639-cfaf308ff852 h1:z/lpRQjRfXDKF3jRpVWwIMzxiUDtcfFCNtJTs0thSKE= -github.com/speakeasy-api/openapi v1.19.6-0.20260313120639-cfaf308ff852/go.mod h1:UfKa7FqE4jgexJZuj51MmdHAFGmDv0Zaw3+yOd81YKU= +github.com/speakeasy-api/openapi v1.19.6-0.20260313121136-e4afdd233c38 h1:D3ZfpT/5sVEJRhKtK1J7mLdq3mF2JjBcVT/3aejRjLk= +github.com/speakeasy-api/openapi v1.19.6-0.20260313121136-e4afdd233c38/go.mod h1:UfKa7FqE4jgexJZuj51MmdHAFGmDv0Zaw3+yOd81YKU= github.com/speakeasy-api/openapi/openapi/linter/customrules v0.0.0-20260309010446-7ff6a9590a7f h1:kwiHeGSILCUVEM9iSAUtifl1TLlvyDXmMjyW26/iX2k= github.com/speakeasy-api/openapi/openapi/linter/customrules v0.0.0-20260309010446-7ff6a9590a7f/go.mod h1:ALDg9E6LRTL5tMFlddVrLhc4JaarCHL65x2YkwL7xdg= github.com/spf13/cobra v1.10.1 h1:lJeBwCfmrnXthfAupyUTzJ/J4Nc1RsHC/mSRU2dll/s= From e705517fb4e26786c7bb45276e020dac80647754 Mon Sep 17 00:00:00 2001 From: Vishal Gowda Date: Fri, 13 Mar 2026 12:19:36 +0000 Subject: [PATCH 23/27] fix: avoid nilaway false positive in execSample by replacing slices.Clone with append --- oq/exec.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/oq/exec.go b/oq/exec.go index 01e9177..9ce7aa7 100644 --- a/oq/exec.go +++ b/oq/exec.go @@ -984,7 +984,7 @@ func execSample(stage Stage, result *Result) (*Result, error) { } // Deterministic shuffle using Fisher-Yates with a fixed seed derived from row count. - rows := slices.Clone(result.Rows) + rows := append([]Row{}, result.Rows...) rng := rand.New(rand.NewPCG(uint64(len(rows)), 0)) //nolint:gosec // deterministic seed is intentional rng.Shuffle(len(rows), func(i, j int) { rows[i], rows[j] = rows[j], rows[i] From 06a0016572627876b6e3e8c3f0924e549505727e Mon Sep 17 00:00:00 2001 From: Vishal Gowda Date: Fri, 13 Mar 2026 12:19:55 +0000 Subject: [PATCH 24/27] fix: update cmd/openapi dependency to latest commit --- cmd/openapi/go.mod | 2 +- cmd/openapi/go.sum | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/cmd/openapi/go.mod b/cmd/openapi/go.mod index ce8f3ba..a59174e 100644 --- a/cmd/openapi/go.mod +++ b/cmd/openapi/go.mod @@ -6,7 +6,7 @@ require ( github.com/charmbracelet/bubbles v0.21.0 github.com/charmbracelet/bubbletea v1.3.10 github.com/charmbracelet/lipgloss v1.1.0 - github.com/speakeasy-api/openapi v1.19.6-0.20260313121136-e4afdd233c38 + github.com/speakeasy-api/openapi v1.19.6-0.20260313121936-e705517fb4e2 github.com/speakeasy-api/openapi/openapi/linter/customrules v0.0.0-20260309010446-7ff6a9590a7f github.com/spf13/cobra v1.10.1 github.com/stretchr/testify v1.11.1 diff --git a/cmd/openapi/go.sum b/cmd/openapi/go.sum index d90bc8b..7048275 100644 --- a/cmd/openapi/go.sum +++ b/cmd/openapi/go.sum @@ -84,8 +84,8 @@ github.com/sergi/go-diff v1.1.0 h1:we8PVUC3FE2uYfodKH/nBHMSetSfHDR6scGdBi+erh0= github.com/sergi/go-diff v1.1.0/go.mod h1:STckp+ISIX8hZLjrqAeVduY0gWCT9IjLuqbuNXdaHfM= github.com/speakeasy-api/jsonpath v0.6.3 h1:c+QPwzAOdrWvzycuc9HFsIZcxKIaWcNpC+xhOW9rJxU= github.com/speakeasy-api/jsonpath v0.6.3/go.mod h1:2cXloNuQ+RSXi5HTRaeBh7JEmjRXTiaKpFTdZiL7URI= -github.com/speakeasy-api/openapi v1.19.6-0.20260313121136-e4afdd233c38 h1:D3ZfpT/5sVEJRhKtK1J7mLdq3mF2JjBcVT/3aejRjLk= -github.com/speakeasy-api/openapi v1.19.6-0.20260313121136-e4afdd233c38/go.mod h1:UfKa7FqE4jgexJZuj51MmdHAFGmDv0Zaw3+yOd81YKU= +github.com/speakeasy-api/openapi v1.19.6-0.20260313121936-e705517fb4e2 h1:97FEnVTs9WkTzjrEmDCmwjKYA7JaCZMFFcR4mO4Jcxc= +github.com/speakeasy-api/openapi v1.19.6-0.20260313121936-e705517fb4e2/go.mod h1:UfKa7FqE4jgexJZuj51MmdHAFGmDv0Zaw3+yOd81YKU= github.com/speakeasy-api/openapi/openapi/linter/customrules v0.0.0-20260309010446-7ff6a9590a7f h1:kwiHeGSILCUVEM9iSAUtifl1TLlvyDXmMjyW26/iX2k= github.com/speakeasy-api/openapi/openapi/linter/customrules v0.0.0-20260309010446-7ff6a9590a7f/go.mod h1:ALDg9E6LRTL5tMFlddVrLhc4JaarCHL65x2YkwL7xdg= github.com/spf13/cobra v1.10.1 h1:lJeBwCfmrnXthfAupyUTzJ/J4Nc1RsHC/mSRU2dll/s= From b927a3f97a3bcf706dc9acc78ba752a1d21a9696 Mon Sep 17 00:00:00 2001 From: Vishal Gowda Date: Fri, 13 Mar 2026 13:07:58 +0000 Subject: [PATCH 25/27] feat: add jq-inspired syntax to oq query language Modernize oq with jq-style syntax: select(expr) for filtering, pick for field projection, sort_by(field; desc), first/last/length, group_by(), def/include for user-defined functions and modules, let $var for variable binding, // alternative operator, if-then-else-end, and string interpolation \(expr). All legacy syntax remains supported. Co-Authored-By: Claude Opus 4.6 --- oq/exec.go | 78 +++++++- oq/expr/expr.go | 161 ++++++++++++++- oq/expr/expr_test.go | 186 ++++++++++++++++++ oq/field.go | 8 + oq/module.go | 113 +++++++++++ oq/oq.go | 53 ++++- oq/oq_test.go | 215 +++++++++++++++++++- oq/parse.go | 455 ++++++++++++++++++++++++++++++++++++++++--- 8 files changed, 1219 insertions(+), 50 deletions(-) create mode 100644 oq/module.go diff --git a/oq/exec.go b/oq/exec.go index 9ce7aa7..e0b6516 100644 --- a/oq/exec.go +++ b/oq/exec.go @@ -31,9 +31,12 @@ func run(stages []Stage, g *graph.SchemaGraph) (*Result, error) { return nil, err } + // Thread env through stages for let bindings + env := map[string]expr.Value{} + // Execute remaining stages for _, stage := range stages[1:] { - result, err = execStage(stage, result, g) + result, env, err = execStageWithEnv(stage, result, g, env) if err != nil { return nil, err } @@ -71,10 +74,26 @@ func execSource(stage Stage, g *graph.SchemaGraph) (*Result, error) { return result, nil } +func execStageWithEnv(stage Stage, result *Result, g *graph.SchemaGraph, env map[string]expr.Value) (*Result, map[string]expr.Value, error) { + switch stage.Kind { + case StageLet: + r, newEnv, err := execLet(stage, result, g, env) + return r, newEnv, err + case StageWhere: + r, err := execWhere(stage, result, g, env) + return r, env, err + default: + r, err := execStage(stage, result, g) + return r, env, err + } +} + func execStage(stage Stage, result *Result, g *graph.SchemaGraph) (*Result, error) { switch stage.Kind { case StageWhere: - return execWhere(stage, result, g) + return execWhere(stage, result, g, nil) + case StageLast: + return execLast(stage, result) case StageSelect: result.Fields = stage.Fields return result, nil @@ -152,7 +171,7 @@ func execStage(stage Stage, result *Result, g *graph.SchemaGraph) (*Result, erro } } -func execWhere(stage Stage, result *Result, g *graph.SchemaGraph) (*Result, error) { +func execWhere(stage Stage, result *Result, g *graph.SchemaGraph, env map[string]expr.Value) (*Result, error) { predicate, err := expr.Parse(stage.Expr) if err != nil { return nil, fmt.Errorf("where expression error: %w", err) @@ -160,7 +179,7 @@ func execWhere(stage Stage, result *Result, g *graph.SchemaGraph) (*Result, erro filtered := &Result{Fields: result.Fields} for _, row := range result.Rows { - r := rowAdapter{row: row, g: g} + r := rowAdapter{row: row, g: g, env: env} val := predicate.Eval(r) if val.Kind == expr.KindBool && val.Bool { filtered.Rows = append(filtered.Rows, row) @@ -169,6 +188,41 @@ func execWhere(stage Stage, result *Result, g *graph.SchemaGraph) (*Result, erro return filtered, nil } +func execLast(stage Stage, result *Result) (*Result, error) { + rows := result.Rows + if stage.Limit < len(rows) { + rows = rows[len(rows)-stage.Limit:] + } + return &Result{ + Fields: result.Fields, + FormatHint: result.FormatHint, + Rows: slices.Clone(rows), + }, nil +} + +func execLet(stage Stage, result *Result, g *graph.SchemaGraph, env map[string]expr.Value) (*Result, map[string]expr.Value, error) { + predicate, err := expr.Parse(stage.Expr) + if err != nil { + return nil, env, fmt.Errorf("let expression error: %w", err) + } + + // Evaluate against first row + newEnv := make(map[string]expr.Value, len(env)+1) + for k, v := range env { + newEnv[k] = v + } + + if len(result.Rows) > 0 { + r := rowAdapter{row: result.Rows[0], g: g, env: env} + val := predicate.Eval(r) + newEnv[stage.VarName] = val + } else { + newEnv[stage.VarName] = expr.NullVal() + } + + return result, newEnv, nil +} + func execSort(stage Stage, result *Result, g *graph.SchemaGraph) (*Result, error) { sorted := &Result{ Fields: result.Fields, @@ -844,17 +898,21 @@ func buildExplain(stages []Stage) string { func describeStage(stage Stage) string { switch stage.Kind { case StageWhere: - return "Filter: where " + stage.Expr + return "Filter: select(" + stage.Expr + ")" case StageSelect: - return "Project: select " + strings.Join(stage.Fields, ", ") + return "Project: pick " + strings.Join(stage.Fields, ", ") case StageSort: - dir := "ascending" + dir := "asc" if stage.SortDesc { - dir = "descending" + dir = "desc" } - return "Sort: " + stage.SortField + " " + dir + return "Sort: sort_by(" + stage.SortField + "; " + dir + ")" case StageTake: - return "Limit: take " + strconv.Itoa(stage.Limit) + return "Limit: first(" + strconv.Itoa(stage.Limit) + ")" + case StageLast: + return "Limit: last(" + strconv.Itoa(stage.Limit) + ")" + case StageLet: + return "Bind: let " + stage.VarName + " = " + stage.Expr case StageUnique: return "Unique: deduplicate rows" case StageGroupBy: diff --git a/oq/expr/expr.go b/oq/expr/expr.go index 2cb9bcd..c027436 100644 --- a/oq/expr/expr.go +++ b/oq/expr/expr.go @@ -44,6 +44,21 @@ type binaryExpr struct { right Expr } +type alternativeExpr struct { + left Expr + right Expr +} + +type ifExpr struct { + cond Expr + then_ Expr + else_ Expr // nil means return null +} + +type interpExpr struct { + parts []Expr +} + type notExpr struct { inner Expr } @@ -118,6 +133,34 @@ func (e *literalExpr) Eval(_ Row) Value { return e.val } +func (e *alternativeExpr) Eval(row Row) Value { + l := e.left.Eval(row) + if l.Kind != KindNull && toBool(l) { + return l + } + return e.right.Eval(row) +} + +func (e *ifExpr) Eval(row Row) Value { + cond := e.cond.Eval(row) + if toBool(cond) { + return e.then_.Eval(row) + } + if e.else_ != nil { + return e.else_.Eval(row) + } + return Value{Kind: KindNull} +} + +func (e *interpExpr) Eval(row Row) Value { + var sb strings.Builder + for _, part := range e.parts { + v := part.Eval(row) + sb.WriteString(toString(v)) + } + return StringVal(sb.String()) +} + // --- Helpers --- func toBool(v Value) bool { @@ -282,7 +325,7 @@ func (p *parser) parseAnd() (Expr, error) { } func (p *parser) parseComparison() (Expr, error) { - left, err := p.parseUnary() + left, err := p.parseAlternative() if err != nil { return nil, err } @@ -312,6 +355,22 @@ func (p *parser) parseComparison() (Expr, error) { return left, nil } +func (p *parser) parseAlternative() (Expr, error) { + left, err := p.parseUnary() + if err != nil { + return nil, err + } + for p.peek() == "//" { + p.next() + right, err := p.parseUnary() + if err != nil { + return nil, err + } + left = &alternativeExpr{left: left, right: right} + } + return left, nil +} + func (p *parser) parseUnary() (Expr, error) { if p.peek() == "not" { p.next() @@ -327,6 +386,11 @@ func (p *parser) parseUnary() (Expr, error) { func (p *parser) parsePrimary() (Expr, error) { tok := p.peek() + // if-then-else-end + if tok == "if" { + return p.parseIf() + } + // Parenthesized expression if tok == "(" { p.next() @@ -374,10 +438,14 @@ func (p *parser) parsePrimary() (Expr, error) { return &matchesExpr{field: field, pattern: re}, nil } - // String literal + // String literal (possibly with interpolation) if strings.HasPrefix(tok, "\"") { p.next() - return &literalExpr{val: StringVal(strings.Trim(tok, "\""))}, nil + inner := tok[1 : len(tok)-1] // strip quotes + if strings.Contains(inner, "\\(") { + return parseInterpolation(inner) + } + return &literalExpr{val: StringVal(inner)}, nil } // Boolean literals @@ -405,6 +473,89 @@ func (p *parser) parsePrimary() (Expr, error) { return nil, fmt.Errorf("unexpected token: %q", tok) } +func (p *parser) parseIf() (Expr, error) { + p.next() // consume "if" + cond, err := p.parseOr() + if err != nil { + return nil, err + } + if err := p.expect("then"); err != nil { + return nil, err + } + then_, err := p.parseOr() + if err != nil { + return nil, err + } + var else_ Expr + switch p.peek() { + case "elif": + // elif chains into a nested ifExpr + // Rewrite "elif" token as "if" for recursive parsing + p.tokens[p.pos] = "if" + else_, err = p.parseIf() + if err != nil { + return nil, err + } + case "else": + p.next() + else_, err = p.parseOr() + if err != nil { + return nil, err + } + if err := p.expect("end"); err != nil { + return nil, err + } + case "end": + p.next() + default: + return nil, fmt.Errorf("expected \"else\", \"elif\", or \"end\", got %q", p.peek()) + } + return &ifExpr{cond: cond, then_: then_, else_: else_}, nil +} + +func parseInterpolation(s string) (Expr, error) { + var parts []Expr + for len(s) > 0 { + idx := strings.Index(s, "\\(") + if idx < 0 { + parts = append(parts, &literalExpr{val: StringVal(s)}) + break + } + if idx > 0 { + parts = append(parts, &literalExpr{val: StringVal(s[:idx])}) + } + s = s[idx+2:] + // Find matching closing paren + depth := 1 + end := 0 + for end < len(s) { + if s[end] == '(' { + depth++ + } else if s[end] == ')' { + depth-- + if depth == 0 { + break + } + } + end++ + } + if depth != 0 { + return nil, errors.New("unterminated interpolation \\(") + } + inner := s[:end] + e, err := Parse(inner) + if err != nil { + return nil, fmt.Errorf("interpolation error: %w", err) + } + parts = append(parts, e) + s = s[end+1:] + } + if len(parts) == 1 { + return parts[0], nil + } + return &interpExpr{parts: parts}, nil +} + // tokenize splits an expression into tokens. func tokenize(input string) []string { var tokens []string @@ -421,7 +572,7 @@ func tokenize(input string) []string { // Two-character operators if i+1 < len(input) { two := input[i : i+2] - if two == "==" || two == "!=" || two == ">=" || two == "<=" { + if two == "==" || two == "!=" || two == ">=" || two == "<=" || two == "//" { tokens = append(tokens, two) i += 2 continue @@ -456,7 +607,7 @@ func tokenize(input string) []string { j := i for j < len(input) && input[j] != ' ' && input[j] != '\t' && input[j] != '\n' && input[j] != '(' && input[j] != ')' && input[j] != ',' && - input[j] != '>' && input[j] != '<' && input[j] != '=' && input[j] != '!' { + input[j] != '>' && input[j] != '<' && input[j] != '=' && input[j] != '!' && input[j] != '/' { j++ } if j > i { diff --git a/oq/expr/expr_test.go b/oq/expr/expr_test.go index 98c1cab..3c3924d 100644 --- a/oq/expr/expr_test.go +++ b/oq/expr/expr_test.go @@ -335,6 +335,192 @@ func TestParse_LiteralValues(t *testing.T) { assert.True(t, result.Bool) } +func TestParse_AlternativeOperator(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + exprStr string + row testRow + expected expr.Value + }{ + { + name: "left is truthy", + exprStr: `name // "default"`, + row: testRow{"name": expr.StringVal("Pet")}, + expected: expr.StringVal("Pet"), + }, + { + name: "left is null", + exprStr: `missing // "default"`, + row: testRow{}, + expected: expr.StringVal("default"), + }, + { + name: "left is empty string (falsy)", + exprStr: `name // "default"`, + row: testRow{"name": expr.StringVal("")}, + expected: expr.StringVal("default"), + }, + { + name: "left is false", + exprStr: `flag // true`, + row: testRow{"flag": expr.BoolVal(false)}, + expected: expr.BoolVal(true), + }, + { + name: "left is zero (falsy int)", + exprStr: `count // 42`, + row: testRow{"count": expr.IntVal(0)}, + expected: expr.IntVal(42), + }, + { + name: "left is nonzero int (truthy)", + exprStr: `count // 42`, + row: testRow{"count": expr.IntVal(5)}, + expected: expr.IntVal(5), + }, + { + name: "chained alternative", + exprStr: `a // b // "fallback"`, + row: testRow{"a": expr.NullVal(), "b": expr.StringVal("")}, + expected: expr.StringVal("fallback"), + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + parsed, err := expr.Parse(tt.exprStr) + require.NoError(t, err) + result := parsed.Eval(tt.row) + assert.Equal(t, tt.expected, result) + }) + } +} + +func TestParse_IfThenElse(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + exprStr string + row testRow + expected expr.Value + }{ + { + name: "if true then value", + exprStr: `if is_component then depth else 0 end`, + row: testRow{"is_component": expr.BoolVal(true), "depth": expr.IntVal(5)}, + expected: expr.IntVal(5), + }, + { + name: "if false else value", + exprStr: `if is_component then depth else 0 end`, + row: testRow{"is_component": expr.BoolVal(false), "depth": expr.IntVal(5)}, + expected: expr.IntVal(0), + }, + { + name: "if without else returns null", + exprStr: `if is_component then depth end`, + row: testRow{"is_component": expr.BoolVal(false), "depth": expr.IntVal(5)}, + expected: expr.NullVal(), + }, + { + name: "nested if-then-else", + exprStr: `if depth > 10 then "deep" elif depth > 5 then "medium" else "shallow" end`, + row: testRow{"depth": expr.IntVal(7)}, + expected: expr.StringVal("medium"), + }, + { + name: "if in boolean context", + exprStr: `if is_component then depth > 3 else depth > 5 end`, + row: testRow{"is_component": expr.BoolVal(true), "depth": expr.IntVal(4)}, + expected: expr.BoolVal(true), + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + parsed, err := expr.Parse(tt.exprStr) + require.NoError(t, err) + result := parsed.Eval(tt.row) + assert.Equal(t, tt.expected, result) + }) + } +} + +func TestParse_StringInterpolation(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + exprStr string + row testRow + expected string + }{ + { + name: "simple interpolation", + exprStr: `"hello \(name)"`, + row: testRow{"name": expr.StringVal("world")}, + expected: "hello world", + }, + { + name: "interpolation with expr", + exprStr: `"\(name) has depth \(depth)"`, + row: testRow{"name": expr.StringVal("Pet"), "depth": expr.IntVal(3)}, + expected: "Pet has depth 3", + }, + { + name: "no interpolation", + exprStr: `"plain string"`, + row: testRow{}, + expected: "plain string", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + parsed, err := expr.Parse(tt.exprStr) + require.NoError(t, err) + result := parsed.Eval(tt.row) + assert.Equal(t, expr.KindString, result.Kind) + assert.Equal(t, tt.expected, result.Str) + }) + } +} + +func TestParse_IfThenElse_Error(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + exprStr string + }{ + {"missing then", `if true depth end`}, + {"missing end", `if true then depth`}, + {"missing end after else", `if true then depth else 0`}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + _, err := expr.Parse(tt.exprStr) + assert.Error(t, err) + }) + } +} + +func TestParse_InterpolationError(t *testing.T) { + t.Parallel() + + // Unterminated interpolation + _, err := expr.Parse(`"hello \(name"`) + require.Error(t, err) +} + func TestParse_ComplexPrecedence(t *testing.T) { t.Parallel() diff --git a/oq/field.go b/oq/field.go index 0db8956..fbca276 100644 --- a/oq/field.go +++ b/oq/field.go @@ -2,6 +2,7 @@ package oq import ( "strconv" + "strings" "github.com/speakeasy-api/openapi/graph" "github.com/speakeasy-api/openapi/oq/expr" @@ -12,9 +13,16 @@ import ( type rowAdapter struct { row Row g *graph.SchemaGraph + env map[string]expr.Value } func (r rowAdapter) Field(name string) expr.Value { + if strings.HasPrefix(name, "$") && r.env != nil { + if v, ok := r.env[name]; ok { + return v + } + return expr.NullVal() + } return fieldValue(r.row, name, r.g) } diff --git a/oq/module.go b/oq/module.go new file mode 100644 index 0000000..8566c24 --- /dev/null +++ b/oq/module.go @@ -0,0 +1,113 @@ +package oq + +import ( + "fmt" + "os" + "path/filepath" + "strings" +) + +// LoadModule loads function definitions from a .oq module file. +func LoadModule(path string, searchPaths []string) ([]FuncDef, error) { + resolved, err := resolveModulePath(path, searchPaths) + if err != nil { + return nil, err + } + + data, err := os.ReadFile(resolved) + if err != nil { + return nil, fmt.Errorf("reading module %q: %w", resolved, err) + } + + q, err := parseDeclarations(string(data)) + if err != nil { + return nil, fmt.Errorf("parsing module %q: %w", resolved, err) + } + + return q.Defs, nil +} + +func resolveModulePath(path string, searchPaths []string) (string, error) { + if !strings.HasSuffix(path, ".oq") { + path = path + ".oq" + } + + if filepath.IsAbs(path) { + if _, err := os.Stat(path); err == nil { + return path, nil + } + } + + allPaths := make([]string, 0, len(searchPaths)+2) + allPaths = append(allPaths, ".") + allPaths = append(allPaths, searchPaths...) + if home, err := os.UserHomeDir(); err == nil { + allPaths = append(allPaths, filepath.Join(home, ".config", "oq")) + } + + for _, dir := range allPaths { + full := filepath.Join(dir, path) + if _, err := os.Stat(full); err == nil { + return full, nil + } + } + + return "", fmt.Errorf("module %q not found in search paths", path) +} + +// ExpandDefs performs text-level macro expansion on pipeline segments. +// Each segment that matches a def name gets replaced with the def's body +// (with params substituted). +func ExpandDefs(pipelineText string, defs []FuncDef) (string, error) { + if len(defs) == 0 { + return pipelineText, nil + } + + defMap := make(map[string]FuncDef, len(defs)) + for _, d := range defs { + defMap[d.Name] = d + } + + parts := splitPipeline(pipelineText) + var expanded []string + + for i, part := range parts { + part = strings.TrimSpace(part) + if part == "" { + continue + } + if i == 0 { + // Source — don't expand + expanded = append(expanded, part) + continue + } + + keyword, args, isCall := splitKeywordCall(part) + if !isCall { + keyword, _ = splitFirst(part) + } + + def, ok := defMap[strings.ToLower(keyword)] + if !ok { + expanded = append(expanded, part) + continue + } + + body := def.Body + if isCall && len(def.Params) > 0 { + callArgs := splitSemicolonArgs(args) + if len(callArgs) != len(def.Params) { + return "", fmt.Errorf("def %q expects %d params, got %d", def.Name, len(def.Params), len(callArgs)) + } + for j, param := range def.Params { + body = strings.ReplaceAll(body, param, strings.TrimSpace(callArgs[j])) + } + } else if !isCall && len(def.Params) > 0 { + return "", fmt.Errorf("def %q requires %d params", def.Name, len(def.Params)) + } + + expanded = append(expanded, body) + } + + return strings.Join(expanded, " | "), nil +} diff --git a/oq/oq.go b/oq/oq.go index 2809c27..447efb4 100644 --- a/oq/oq.go +++ b/oq/oq.go @@ -51,10 +51,40 @@ type GroupResult struct { // Execute parses and executes a query against the given graph. func Execute(query string, g *graph.SchemaGraph) (*Result, error) { - stages, err := Parse(query) + return ExecuteWithSearchPaths(query, g, nil) +} + +// ExecuteWithSearchPaths parses and executes a query, searching for modules in the given paths. +func ExecuteWithSearchPaths(query string, g *graph.SchemaGraph, searchPaths []string) (*Result, error) { + decls, err := parseDeclarations(query) + if err != nil { + return nil, fmt.Errorf("parse error: %w", err) + } + + // Resolve includes + for _, inc := range decls.Includes { + defs, loadErr := LoadModule(inc, searchPaths) + if loadErr != nil { + return nil, fmt.Errorf("include %q: %w", inc, loadErr) + } + decls.Defs = append(decls.Defs, defs...) + } + + // Text-level def expansion before parsing pipeline + pipelineText, err := ExpandDefs(decls.PipelineText, decls.Defs) + if err != nil { + return nil, fmt.Errorf("def expansion: %w", err) + } + + if pipelineText == "" { + return &Result{}, nil + } + + stages, err := parsePipeline(pipelineText) if err != nil { return nil, fmt.Errorf("parse error: %w", err) } + return run(stages, g) } @@ -97,18 +127,35 @@ const ( StageClusters StageTagBoundary StageSharedRefs + StageLast + StageLet ) // Stage represents a single stage in the query pipeline. type Stage struct { Kind StageKind Source string // for StageSource - Expr string // for StageWhere + Expr string // for StageWhere, StageLet Fields []string // for StageSelect, StageGroupBy SortField string // for StageSort SortDesc bool // for StageSort - Limit int // for StageTake, StageSample, StageTop, StageBottom + Limit int // for StageTake, StageLast, StageSample, StageTop, StageBottom PathFrom string // for StagePath PathTo string // for StagePath Format string // for StageFormat + VarName string // for StageLet +} + +// Query represents a parsed query with optional includes, defs, and pipeline stages. +type Query struct { + Includes []string + Defs []FuncDef + Stages []Stage +} + +// FuncDef represents a user-defined function. +type FuncDef struct { + Name string + Params []string // with $ prefix + Body string // raw pipeline text } diff --git a/oq/oq_test.go b/oq/oq_test.go index 5c7a059..e72eedc 100644 --- a/oq/oq_test.go +++ b/oq/oq_test.go @@ -338,9 +338,9 @@ func TestExecute_Explain_Success(t *testing.T) { result, err := oq.Execute("schemas.components | where depth > 5 | sort depth desc | take 10 | explain", g) require.NoError(t, err) assert.Contains(t, result.Explain, "Source: schemas.components", "explain should show source") - assert.Contains(t, result.Explain, "Filter: where depth > 5", "explain should show filter stage") - assert.Contains(t, result.Explain, "Sort: depth descending", "explain should show sort stage") - assert.Contains(t, result.Explain, "Limit: take 10", "explain should show limit stage") + assert.Contains(t, result.Explain, "Filter: select(depth > 5)", "explain should show filter stage") + assert.Contains(t, result.Explain, "Sort: sort_by(depth; desc)", "explain should show sort stage") + assert.Contains(t, result.Explain, "Limit: first(10)", "explain should show limit stage") } func TestExecute_Fields_Schemas_Success(t *testing.T) { @@ -1430,6 +1430,215 @@ func TestFormatMarkdown_Explain(t *testing.T) { assert.Contains(t, md, "Source: schemas", "markdown should render explain output") } +// --- New jq-style syntax tests --- + +func TestParse_NewSyntax_Success(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + query string + }{ + {"select filter", `schemas | select(depth > 3)`}, + {"pick fields", "schemas | pick name, depth"}, + {"sort_by asc", "schemas | sort_by(depth)"}, + {"sort_by desc", "schemas | sort_by(depth; desc)"}, + {"first", "schemas | first(5)"}, + {"last", "schemas | last(5)"}, + {"length", "schemas | length"}, + {"group_by", "schemas | group_by(type)"}, + {"sample call", "schemas | sample(3)"}, + {"neighbors call", "schemas | neighbors(2)"}, + {"path call", "schemas | path(Pet; Address)"}, + {"top call", "schemas | top(3; depth)"}, + {"bottom call", "schemas | bottom(3; depth)"}, + {"format call", "schemas | format(json)"}, + {"let binding", `schemas | select(name == "Pet") | let $pet = name`}, + {"full new pipeline", `schemas.components | select(depth > 5) | sort_by(depth; desc) | first(10) | pick name, depth`}, + {"def inline", `def hot: select(in_degree > 0); schemas.components | hot`}, + {"def with params", `def impact($name): select(name == $name); schemas.components | impact("Pet")`}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + stages, err := oq.Parse(tt.query) + require.NoError(t, err, "query: %s", tt.query) + assert.NotEmpty(t, stages, "should parse into non-empty stages") + }) + } +} + +func TestExecute_SelectFilter_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute(`schemas.components | select(type == "object") | pick name`, g) + require.NoError(t, err) + + names := collectNames(result, g) + assert.Contains(t, names, "Pet", "select filter should match Pet") + assert.Contains(t, names, "Owner", "select filter should match Owner") +} + +func TestExecute_SortBy_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute("schemas.components | sort_by(property_count; desc) | first(3) | pick name, property_count", g) + require.NoError(t, err) + assert.LessOrEqual(t, len(result.Rows), 3, "should return at most 3 rows") +} + +func TestExecute_First_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute("schemas.components | first(3)", g) + require.NoError(t, err) + assert.Len(t, result.Rows, 3, "first should return exactly 3 rows") +} + +func TestExecute_Last_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute("schemas.components | last(2)", g) + require.NoError(t, err) + assert.Len(t, result.Rows, 2, "last should return exactly 2 rows") +} + +func TestExecute_Length_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute("schemas.components | length", g) + require.NoError(t, err) + assert.True(t, result.IsCount, "length should be a count result") + assert.Positive(t, result.Count, "count should be positive") +} + +func TestExecute_GroupBy_NewSyntax_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute("schemas.components | group_by(type)", g) + require.NoError(t, err) + assert.NotEmpty(t, result.Groups, "should have groups") +} + +func TestExecute_LetBinding_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + // let $pet = name, then use $pet in subsequent filter + result, err := oq.Execute(`schemas.components | select(name == "Pet") | let $pet = name | reachable | select(name != $pet) | pick name`, g) + require.NoError(t, err) + + names := collectNames(result, g) + assert.NotContains(t, names, "Pet", "should not include the $pet variable value") + assert.Contains(t, names, "Owner", "should include reachable schemas") +} + +func TestExecute_DefExpansion_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute(`def hot: select(in_degree > 0); schemas.components | hot | pick name`, g) + require.NoError(t, err) + assert.NotEmpty(t, result.Rows, "def expansion should produce results") + + // All results should have in_degree > 0 + for _, row := range result.Rows { + v := oq.FieldValuePublic(row, "in_degree", g) + assert.Greater(t, v.Int, 0, "hot filter should require in_degree > 0") + } +} + +func TestExecute_DefWithParams_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute(`def impact($name): select(name == $name) | blast-radius; schemas.components | impact("Pet")`, g) + require.NoError(t, err) + assert.NotEmpty(t, result.Rows, "parameterized def should produce results") +} + +func TestExecute_AlternativeOperator_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + // name // "none" — name is always set, so should not be "none" + result, err := oq.Execute(`schemas.components | select(name // "none" != "none") | pick name`, g) + require.NoError(t, err) + assert.NotEmpty(t, result.Rows, "alternative operator should work") +} + +func TestExecute_IfThenElse_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute(`schemas.components | select(if is_component then depth >= 0 else true end) | pick name`, g) + require.NoError(t, err) + assert.NotEmpty(t, result.Rows, "if-then-else should work in select") +} + +func TestExecute_ExplainNewSyntax_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute(`schemas.components | select(depth > 5) | sort_by(depth; desc) | first(10) | pick name, depth | explain`, g) + require.NoError(t, err) + assert.Contains(t, result.Explain, "Filter: select(depth > 5)", "explain should show select filter") + assert.Contains(t, result.Explain, "Sort: sort_by(depth; desc)", "explain should show sort_by") + assert.Contains(t, result.Explain, "Limit: first(10)", "explain should show first") + assert.Contains(t, result.Explain, "Project: pick name, depth", "explain should show pick") +} + +func TestExecute_ExplainLast_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute("schemas.components | last(3) | explain", g) + require.NoError(t, err) + assert.Contains(t, result.Explain, "Limit: last(3)", "explain should show last") +} + +func TestExecute_ExplainLet_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute(`schemas.components | select(name == "Pet") | let $pet = name | explain`, g) + require.NoError(t, err) + assert.Contains(t, result.Explain, "Bind: let $pet = name", "explain should show let binding") +} + +func TestParse_NewSyntax_Error(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + query string + }{ + {"select call empty", "schemas | select()"}, + {"sort_by no parens", "schemas | sort_by depth"}, + {"group_by no parens", "schemas | group_by type"}, + {"let no dollar", "schemas | let x = name"}, + {"let no equals", "schemas | let $x name"}, + {"let empty expr", "schemas | let $x ="}, + {"def missing colon", "def hot select(depth > 0); schemas | hot"}, + {"def missing semicolon", "def hot: select(depth > 0) schemas | hot"}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + _, err := oq.Parse(tt.query) + assert.Error(t, err, "query should fail: %s", tt.query) + }) + } +} + // collectNames extracts the "name" field from all rows in the result. func collectNames(result *oq.Result, g *graph.SchemaGraph) []string { var names []string diff --git a/oq/parse.go b/oq/parse.go index a0c8835..98b8258 100644 --- a/oq/parse.go +++ b/oq/parse.go @@ -7,9 +7,111 @@ import ( "strings" ) -// Parse splits a pipeline query string into stages. +// declarations holds parsed includes, defs, and the raw remaining pipeline text. +type declarations struct { + Includes []string + Defs []FuncDef + PipelineText string +} + +// parseDeclarations scans for include/def declarations at the start of a query. +func parseDeclarations(query string) (*declarations, error) { + d := &declarations{} + remaining := strings.TrimSpace(query) + + for { + remaining = strings.TrimSpace(remaining) + if remaining == "" { + break + } + if strings.HasPrefix(remaining, "include ") { + rest := remaining[len("include "):] + semi := findUnquotedSemicolon(rest) + if semi < 0 { + return nil, errors.New("include missing terminating ;") + } + path := strings.TrimSpace(rest[:semi]) + path = strings.Trim(path, "\"") + if path == "" { + return nil, errors.New("include requires a path") + } + d.Includes = append(d.Includes, path) + remaining = rest[semi+1:] + continue + } + if strings.HasPrefix(remaining, "def ") { + rest := remaining[len("def "):] + colonIdx := strings.Index(rest, ":") + if colonIdx < 0 { + return nil, errors.New("def missing :") + } + sig := strings.TrimSpace(rest[:colonIdx]) + body := rest[colonIdx+1:] + semi := findUnquotedSemicolon(body) + if semi < 0 { + return nil, errors.New("def missing terminating ;") + } + bodyStr := strings.TrimSpace(body[:semi]) + remaining = body[semi+1:] + + fd, err := parseFuncSig(sig) + if err != nil { + return nil, err + } + fd.Body = bodyStr + d.Defs = append(d.Defs, fd) + continue + } + break + } + + d.PipelineText = remaining + return d, nil +} + +// ParseQuery parses a full query string including optional includes, defs, and pipeline. +func ParseQuery(query string) (*Query, error) { + d, err := parseDeclarations(query) + if err != nil { + return nil, err + } + + q := &Query{ + Includes: d.Includes, + Defs: d.Defs, + } + + if d.PipelineText == "" { + if len(q.Defs) > 0 || len(q.Includes) > 0 { + return q, nil + } + return nil, errors.New("empty query") + } + + // Expand defs at text level before parsing + expanded, err := ExpandDefs(d.PipelineText, d.Defs) + if err != nil { + return nil, err + } + + stages, err := parsePipeline(expanded) + if err != nil { + return nil, err + } + q.Stages = stages + return q, nil +} + +// Parse splits a pipeline query string into stages (backward compat). func Parse(query string) ([]Stage, error) { - // Split by pipe, respecting quoted strings + q, err := ParseQuery(query) + if err != nil { + return nil, err + } + return q.Stages, nil +} + +func parsePipeline(query string) ([]Stage, error) { parts := splitPipeline(query) if len(parts) == 0 { return nil, errors.New("empty query") @@ -24,7 +126,6 @@ func Parse(query string) ([]Stage, error) { } if i == 0 { - // First part is a source stages = append(stages, Stage{Kind: StageSource, Source: part}) continue } @@ -40,26 +141,62 @@ func Parse(query string) ([]Stage, error) { } func parseStage(s string) (Stage, error) { - // Extract the keyword - keyword, rest := splitFirst(s) + // Try keyword-call syntax first: select(...), sort_by(...), etc. + keyword, args, isCall := splitKeywordCall(s) + if !isCall { + keyword, args = splitFirst(s) + } keyword = strings.ToLower(keyword) switch keyword { + // New jq-style: select(expr) replaces where + case "select": + if isCall { + // select(expr) → filter + if args == "" { + return Stage{}, errors.New("select() requires an expression") + } + return Stage{Kind: StageWhere, Expr: args}, nil + } + // select f1, f2 → old-style field projection — use pick instead + // But keep for backward compat during migration + if args == "" { + return Stage{}, errors.New("select requires field names") + } + fields := parseCSV(args) + return Stage{Kind: StageSelect, Fields: fields}, nil + + case "pick": + if args == "" { + return Stage{}, errors.New("pick requires field names") + } + fields := parseCSV(args) + return Stage{Kind: StageSelect, Fields: fields}, nil + + // where (legacy, still supported) case "where": - if rest == "" { + if args == "" { return Stage{}, errors.New("where requires an expression") } - return Stage{Kind: StageWhere, Expr: rest}, nil + return Stage{Kind: StageWhere, Expr: args}, nil - case "select": - if rest == "" { - return Stage{}, errors.New("select requires field names") + case "sort_by": + if isCall { + parts := splitSemicolonArgs(args) + if len(parts) == 0 || parts[0] == "" { + return Stage{}, errors.New("sort_by requires a field name") + } + desc := false + if len(parts) >= 2 && strings.TrimSpace(parts[1]) == "desc" { + desc = true + } + return Stage{Kind: StageSort, SortField: strings.TrimSpace(parts[0]), SortDesc: desc}, nil } - fields := parseCSV(rest) - return Stage{Kind: StageSelect, Fields: fields}, nil + return Stage{}, errors.New("sort_by requires parentheses: sort_by(field) or sort_by(field; desc)") + // Legacy sort case "sort": - parts := strings.Fields(rest) + parts := strings.Fields(args) if len(parts) == 0 { return Stage{}, errors.New("sort requires a field name") } @@ -69,21 +206,65 @@ func parseStage(s string) (Stage, error) { } return Stage{Kind: StageSort, SortField: parts[0], SortDesc: desc}, nil + case "first": + if isCall { + n, err := strconv.Atoi(strings.TrimSpace(args)) + if err != nil { + return Stage{}, fmt.Errorf("first requires a number: %w", err) + } + return Stage{Kind: StageTake, Limit: n}, nil + } + // bare "first" with space arg + n, err := strconv.Atoi(strings.TrimSpace(args)) + if err != nil { + return Stage{}, fmt.Errorf("first requires a number: %w", err) + } + return Stage{Kind: StageTake, Limit: n}, nil + + case "last": + if isCall { + n, err := strconv.Atoi(strings.TrimSpace(args)) + if err != nil { + return Stage{}, fmt.Errorf("last requires a number: %w", err) + } + return Stage{Kind: StageLast, Limit: n}, nil + } + n, err := strconv.Atoi(strings.TrimSpace(args)) + if err != nil { + return Stage{}, fmt.Errorf("last requires a number: %w", err) + } + return Stage{Kind: StageLast, Limit: n}, nil + + // Legacy take/head case "take", "head": - n, err := strconv.Atoi(strings.TrimSpace(rest)) + n, err := strconv.Atoi(strings.TrimSpace(args)) if err != nil { return Stage{}, fmt.Errorf("take requires a number: %w", err) } return Stage{Kind: StageTake, Limit: n}, nil + case "length": + return Stage{Kind: StageCount}, nil + case "unique": return Stage{Kind: StageUnique}, nil + case "group_by": + if isCall { + if args == "" { + return Stage{}, errors.New("group_by requires a field name") + } + fields := parseCSV(args) + return Stage{Kind: StageGroupBy, Fields: fields}, nil + } + return Stage{}, errors.New("group_by requires parentheses: group_by(field)") + + // Legacy group-by case "group-by": - if rest == "" { + if args == "" { return Stage{}, errors.New("group-by requires a field name") } - fields := parseCSV(rest) + fields := parseCSV(args) return Stage{Kind: StageGroupBy, Fields: fields}, nil case "count": @@ -123,21 +304,60 @@ func parseStage(s string) (Stage, error) { return Stage{Kind: StageFields}, nil case "sample": - n, err := strconv.Atoi(strings.TrimSpace(rest)) + if isCall { + n, err := strconv.Atoi(strings.TrimSpace(args)) + if err != nil { + return Stage{}, fmt.Errorf("sample requires a number: %w", err) + } + return Stage{Kind: StageSample, Limit: n}, nil + } + n, err := strconv.Atoi(strings.TrimSpace(args)) if err != nil { return Stage{}, fmt.Errorf("sample requires a number: %w", err) } return Stage{Kind: StageSample, Limit: n}, nil + case "neighbors": + if isCall { + n, err := strconv.Atoi(strings.TrimSpace(args)) + if err != nil { + return Stage{}, fmt.Errorf("neighbors requires a depth number: %w", err) + } + return Stage{Kind: StageNeighbors, Limit: n}, nil + } + n, err := strconv.Atoi(strings.TrimSpace(args)) + if err != nil { + return Stage{}, fmt.Errorf("neighbors requires a depth number: %w", err) + } + return Stage{Kind: StageNeighbors, Limit: n}, nil + case "path": - from, to := parseTwoArgs(rest) + if isCall { + parts := splitSemicolonArgs(args) + if len(parts) < 2 || strings.TrimSpace(parts[0]) == "" || strings.TrimSpace(parts[1]) == "" { + return Stage{}, errors.New("path requires two schema names") + } + return Stage{Kind: StagePath, PathFrom: strings.TrimSpace(parts[0]), PathTo: strings.TrimSpace(parts[1])}, nil + } + from, to := parseTwoArgs(args) if from == "" || to == "" { return Stage{}, errors.New("path requires two schema names") } return Stage{Kind: StagePath, PathFrom: from, PathTo: to}, nil case "top": - parts := strings.Fields(rest) + if isCall { + parts := splitSemicolonArgs(args) + if len(parts) < 2 { + return Stage{}, errors.New("top requires a number and a field name") + } + n, err := strconv.Atoi(strings.TrimSpace(parts[0])) + if err != nil { + return Stage{}, fmt.Errorf("top requires a number: %w", err) + } + return Stage{Kind: StageTop, Limit: n, SortField: strings.TrimSpace(parts[1])}, nil + } + parts := strings.Fields(args) if len(parts) < 2 { return Stage{}, errors.New("top requires a number and a field name") } @@ -148,7 +368,18 @@ func parseStage(s string) (Stage, error) { return Stage{Kind: StageTop, Limit: n, SortField: parts[1]}, nil case "bottom": - parts := strings.Fields(rest) + if isCall { + parts := splitSemicolonArgs(args) + if len(parts) < 2 { + return Stage{}, errors.New("bottom requires a number and a field name") + } + n, err := strconv.Atoi(strings.TrimSpace(parts[0])) + if err != nil { + return Stage{}, fmt.Errorf("bottom requires a number: %w", err) + } + return Stage{Kind: StageBottom, Limit: n, SortField: strings.TrimSpace(parts[1])}, nil + } + parts := strings.Fields(args) if len(parts) < 2 { return Stage{}, errors.New("bottom requires a number and a field name") } @@ -159,7 +390,10 @@ func parseStage(s string) (Stage, error) { return Stage{Kind: StageBottom, Limit: n, SortField: parts[1]}, nil case "format": - f := strings.TrimSpace(rest) + f := strings.TrimSpace(args) + if isCall { + f = strings.TrimSpace(args) + } if f != "table" && f != "json" && f != "markdown" && f != "toon" { return Stage{}, fmt.Errorf("format must be table, json, markdown, or toon, got %q", f) } @@ -171,13 +405,6 @@ func parseStage(s string) (Stage, error) { case "blast-radius": return Stage{Kind: StageBlastRadius}, nil - case "neighbors": - n, err := strconv.Atoi(strings.TrimSpace(rest)) - if err != nil { - return Stage{}, fmt.Errorf("neighbors requires a depth number: %w", err) - } - return Stage{Kind: StageNeighbors, Limit: n}, nil - case "orphans": return Stage{Kind: StageOrphans}, nil @@ -196,11 +423,174 @@ func parseStage(s string) (Stage, error) { case "shared-refs": return Stage{Kind: StageSharedRefs}, nil + case "let": + return parseLet(args) + default: return Stage{}, fmt.Errorf("unknown stage: %q", keyword) } } +func parseLet(args string) (Stage, error) { + // let $var = expr + if args == "" || !strings.HasPrefix(args, "$") { + return Stage{}, errors.New("let requires $variable = expression") + } + eqIdx := strings.Index(args, "=") + if eqIdx < 0 { + return Stage{}, errors.New("let requires $variable = expression") + } + varName := strings.TrimSpace(args[:eqIdx]) + exprStr := strings.TrimSpace(args[eqIdx+1:]) + if !strings.HasPrefix(varName, "$") || len(varName) < 2 { + return Stage{}, errors.New("let variable must start with $") + } + if exprStr == "" { + return Stage{}, errors.New("let requires an expression after =") + } + return Stage{Kind: StageLet, VarName: varName, Expr: exprStr}, nil +} + +func parseFuncSig(sig string) (FuncDef, error) { + fd := FuncDef{} + parenIdx := strings.Index(sig, "(") + if parenIdx < 0 { + fd.Name = strings.TrimSpace(sig) + if fd.Name == "" { + return fd, errors.New("def requires a name") + } + return fd, nil + } + fd.Name = strings.TrimSpace(sig[:parenIdx]) + if fd.Name == "" { + return fd, errors.New("def requires a name") + } + closeIdx := strings.LastIndex(sig, ")") + if closeIdx < 0 { + return fd, errors.New("def params missing closing )") + } + paramStr := sig[parenIdx+1 : closeIdx] + for _, p := range splitSemicolonArgs(paramStr) { + p = strings.TrimSpace(p) + if p != "" { + if !strings.HasPrefix(p, "$") { + return fd, fmt.Errorf("def param %q must start with $", p) + } + fd.Params = append(fd.Params, p) + } + } + return fd, nil +} + +func findUnquotedSemicolon(s string) int { + inQuote := false + depth := 0 + for i := 0; i < len(s); i++ { + switch s[i] { + case '"': + inQuote = !inQuote + case '(': + if !inQuote { + depth++ + } + case ')': + if !inQuote { + depth-- + } + case ';': + if !inQuote && depth == 0 { + return i + } + } + } + return -1 +} + +// splitKeywordCall splits "select(expr)" into ("select", "expr", true). +// Returns ("", "", false) if s doesn't match keyword(...) form. +// The keyword must be a single word (no spaces before the opening paren). +func splitKeywordCall(s string) (string, string, bool) { + s = strings.TrimSpace(s) + parenIdx := strings.Index(s, "(") + if parenIdx < 0 { + return "", "", false + } + keyword := s[:parenIdx] + // Keyword must not contain spaces (single word only) + if strings.ContainsAny(keyword, " \t") { + return "", "", false + } + if keyword == "" { + return "", "", false + } + // Find matching closing paren (not just the last one — handle nested parens) + rest := s[parenIdx+1:] + depth := 1 + inQuote := false + end := -1 + for i := 0; i < len(rest); i++ { + switch rest[i] { + case '"': + inQuote = !inQuote + case '(': + if !inQuote { + depth++ + } + case ')': + if !inQuote { + depth-- + if depth == 0 { + end = i + } + } + } + if end >= 0 { + break + } + } + if end < 0 { + return "", "", false + } + // Ensure nothing after the closing paren + trailing := strings.TrimSpace(rest[end+1:]) + if trailing != "" { + return "", "", false + } + args := rest[:end] + return keyword, args, true +} + +func splitSemicolonArgs(s string) []string { + var parts []string + var current strings.Builder + depth := 0 + inQuote := false + + for i := 0; i < len(s); i++ { + ch := s[i] + switch { + case ch == '"': + inQuote = !inQuote + current.WriteByte(ch) + case ch == '(' && !inQuote: + depth++ + current.WriteByte(ch) + case ch == ')' && !inQuote: + depth-- + current.WriteByte(ch) + case ch == ';' && !inQuote && depth == 0: + parts = append(parts, current.String()) + current.Reset() + default: + current.WriteByte(ch) + } + } + if current.Len() > 0 { + parts = append(parts, current.String()) + } + return parts +} + func parseTwoArgs(s string) (string, string) { s = strings.TrimSpace(s) var args []string @@ -242,6 +632,7 @@ func splitPipeline(input string) []string { var parts []string var current strings.Builder inQuote := false + depth := 0 // paren depth — don't split | inside parens for i := 0; i < len(input); i++ { ch := input[i] @@ -249,7 +640,13 @@ func splitPipeline(input string) []string { case ch == '"': inQuote = !inQuote current.WriteByte(ch) - case ch == '|' && !inQuote: + case ch == '(' && !inQuote: + depth++ + current.WriteByte(ch) + case ch == ')' && !inQuote: + depth-- + current.WriteByte(ch) + case ch == '|' && !inQuote && depth == 0: parts = append(parts, current.String()) current.Reset() default: From 4bf74e8a198df7d18f4e29d848b34d42903544ff Mon Sep 17 00:00:00 2001 From: Vishal Gowda Date: Fri, 13 Mar 2026 13:13:07 +0000 Subject: [PATCH 26/27] fix: lint issues, update docs for jq-style oq syntax Fix gocritic assignOp, gosec G304, staticcheck punctuation, and testifylint issues. Update README, query help text, and language reference to document new jq-style syntax (select, pick, sort_by, first/last, //, if-then-else, def/include, let). Fix has() to use != 0 instead of > 0 for correct non-zero semantics. Co-Authored-By: Claude Opus 4.6 --- cmd/openapi/commands/openapi/query.go | 47 +++--- .../commands/openapi/query_reference.go | 140 ++++++++++++------ oq/README.md | 124 +++++++++++----- oq/expr/expr.go | 2 +- oq/module.go | 4 +- oq/oq.go | 6 +- oq/oq_test.go | 2 +- oq/parse.go | 2 +- 8 files changed, 216 insertions(+), 111 deletions(-) diff --git a/cmd/openapi/commands/openapi/query.go b/cmd/openapi/commands/openapi/query.go index 17f0f13..166bfa3 100644 --- a/cmd/openapi/commands/openapi/query.go +++ b/cmd/openapi/commands/openapi/query.go @@ -23,8 +23,8 @@ The query argument comes first, followed by an optional input file. If no file is given, reads from stdin. Examples: - # Deeply nested components - openapi spec query 'schemas.components | sort depth desc | take 10 | select name, depth' petstore.yaml + # Deeply nested components (jq-style syntax) + openapi spec query 'schemas.components | sort_by(depth; desc) | first(10) | pick name, depth' petstore.yaml # Pipe from stdin cat spec.yaml | openapi spec query 'schemas | count' @@ -32,40 +32,45 @@ Examples: # Explicit stdin openapi spec query 'schemas | count' - - # Wide union trees - openapi spec query 'schemas | where union_width > 0 | sort union_width desc | take 10' petstore.yaml + # Filter with select() + openapi spec query 'schemas | select(union_width > 0) | sort_by(union_width; desc) | first(10)' petstore.yaml # Dead components (no incoming references) - openapi spec query 'schemas.components | where in_degree == 0 | select name' petstore.yaml + openapi spec query 'schemas.components | select(in_degree == 0) | pick name' petstore.yaml - # Operation sprawl - openapi spec query 'operations | sort schema_count desc | take 10 | select name, schema_count' petstore.yaml + # Variable binding — exclude seed from reachable results + openapi spec query 'schemas | select(name == "Pet") | let $pet = name | reachable | select(name != $pet)' petstore.yaml - # Circular references - openapi spec query 'schemas | where is_circular | select name, path' petstore.yaml + # User-defined functions + openapi spec query 'def hot: select(in_degree > 5); schemas.components | hot | pick name' petstore.yaml - # Shortest path between schemas - openapi spec query 'schemas | path "Pet" "Address" | select name' petstore.yaml + # Alternative operator — fallback for null/falsy values + openapi spec query 'schemas | select(name // "none" != "none")' petstore.yaml - # Edge annotations - openapi spec query 'schemas.components | where name == "Pet" | refs-out | select name, edge_kind, edge_label' petstore.yaml + # If-then-else conditional + openapi spec query 'schemas | select(if is_component then depth > 3 else true end)' petstore.yaml # Blast radius - openapi spec query 'schemas.components | where name == "Error" | blast-radius | count' petstore.yaml + openapi spec query 'schemas.components | select(name == "Error") | blast-radius | length' petstore.yaml # Explain a query plan - openapi spec query 'schemas.components | where depth > 5 | sort depth desc | explain' petstore.yaml + openapi spec query 'schemas.components | select(depth > 5) | sort_by(depth; desc) | explain' petstore.yaml -Pipeline stages: +Pipeline stages (jq-style): Source: schemas, schemas.components, schemas.inline, operations Traversal: refs-out, refs-in, reachable, ancestors, properties, union-members, items, - ops, schemas, path , connected, blast-radius, neighbors + ops, schemas, path(A; B), connected, blast-radius, neighbors(N) Analysis: orphans, leaves, cycles, clusters, tag-boundary, shared-refs - Filter: where , select , sort [asc|desc], take/head , - sample , top , bottom , unique, group-by , count - Meta: explain, fields, format + Filter: select(expr), pick , sort_by(field; desc), first(N), last(N), + sample(N), top(N; field), bottom(N; field), unique, group_by(field), length + Variables: let $var = expr + Functions: def name: body; def name($p): body; include "file.oq"; + Meta: explain, fields, format(table|json|markdown|toon) -Where expressions support: ==, !=, >, <, >=, <=, and, or, not, has(), matches`, + Legacy syntax (where, sort, take, head, select fields, group-by, count) is still supported. + +Expression operators: ==, !=, >, <, >=, <=, and, or, not, //, has(), matches, + if-then-else-end, string interpolation \(expr)`, Args: queryArgs(), Run: runQuery, } diff --git a/cmd/openapi/commands/openapi/query_reference.go b/cmd/openapi/commands/openapi/query_reference.go index 2f6f6cf..c68509f 100644 --- a/cmd/openapi/commands/openapi/query_reference.go +++ b/cmd/openapi/commands/openapi/query_reference.go @@ -47,10 +47,10 @@ in the schema reference graph. items Expand to array items schema (with edge annotations) ops Schemas → operations that use them schemas Operations → schemas they touch - path Shortest path between two named schemas + path(A; B) Shortest path between two named schemas connected Full connected component (schemas + operations) blast-radius Ancestors + all affected operations (change impact) - neighbors Bidirectional neighborhood within N hops + neighbors(N) Bidirectional neighborhood within N hops ANALYSIS STAGES --------------- @@ -65,24 +65,46 @@ ANALYSIS STAGES FILTER & TRANSFORM STAGES -------------------------- - where Filter rows by predicate expression - select Project specific fields (comma-separated) - sort [desc] Sort by field (default ascending, add "desc" for descending) - take Limit to first N results - head Alias for take - sample Deterministic pseudo-random sample of N rows - top Sort descending by field and take N (shorthand) - bottom Sort ascending by field and take N (shorthand) + select(expr) Filter rows by predicate expression (jq-style) + pick Project specific fields (comma-separated) + sort_by(field) Sort ascending by field + sort_by(field; desc) Sort descending by field + first(N) Limit to first N results + last(N) Limit to last N results + sample(N) Deterministic pseudo-random sample of N rows + top(N; field) Sort descending by field and take N (shorthand) + bottom(N; field) Sort ascending by field and take N (shorthand) unique Deduplicate rows by identity - group-by Group rows and aggregate counts - count Count rows (terminal — returns a single number) + group_by(field) Group rows and aggregate counts + length Count rows (terminal — returns a single number) + let $var = expr Bind expression result to a variable for later stages + + Legacy syntax is still supported: + where , select , sort [desc], take/head , + group-by , count + +FUNCTION DEFINITIONS & MODULES +------------------------------- +Define reusable pipeline fragments: + + def hot: select(in_degree > 10); + def impact($name): select(name == $name) | blast-radius; + + Syntax: def name: body; + def name($p1; $p2): body; + +Load definitions from .oq files: + + include "stdlib.oq"; + + Search paths: current directory, then ~/.config/oq/ META STAGES ----------- explain Print the query execution plan instead of running it fields List available fields for the current result kind - format Set output format: table, json, markdown, or toon + format(fmt) Set output format: table, json, markdown, or toon SCHEMA FIELDS ------------- @@ -133,17 +155,22 @@ properties, union-members, items): edge_label string Edge label: property name, array index, etc. edge_from string Source node name -WHERE EXPRESSIONS ------------------ -The where clause supports a predicate expression language: - - Comparison: == != > < >= <= - Logical: and or not - Functions: has() — true if field is non-null/non-zero - matches(, "") — regex match - Infix: matches "" - Grouping: ( ... ) - Literals: "string" 42 true false +EXPRESSIONS +----------- +The expression language is used in select(), let, and if-then-else: + + Comparison: == != > < >= <= + Logical: and or not + Alternative: // (returns left if truthy, else right) + Functions: has() — true if field is non-null/non-zero + matches(, "") — regex match + Infix: matches "" + Conditional: if then else end + if then elif then else end + Interpolation: "\()" inside string literals + Grouping: ( ... ) + Literals: "string" 42 true false + Variables: $var (bound by let) OUTPUT FORMATS -------------- @@ -154,70 +181,67 @@ OUTPUT FORMATS toon TOON (Token-Oriented Object Notation) tabular format Set via --format flag or inline format stage: - schemas | count | format json + schemas | length | format(json) EXAMPLES -------- - # Deeply nested components - schemas.components | sort depth desc | take 10 | select name, depth + # Deeply nested components (jq-style) + schemas.components | sort_by(depth; desc) | first(10) | pick name, depth # Wide union trees - schemas | where union_width > 0 | sort union_width desc | take 10 + schemas | select(union_width > 0) | sort_by(union_width; desc) | first(10) # Most referenced schemas - schemas.components | sort in_degree desc | take 10 | select name, in_degree + schemas.components | sort_by(in_degree; desc) | first(10) | pick name, in_degree # Dead components (no incoming references) - schemas.components | where in_degree == 0 | select name + schemas.components | select(in_degree == 0) | pick name # Operation sprawl - operations | sort schema_count desc | take 10 | select name, schema_count + operations | sort_by(schema_count; desc) | first(10) | pick name, schema_count # Circular references - schemas | where is_circular | select name, path + schemas | select(is_circular) | pick name, path # Schema count - schemas | count + schemas | length # Shortest path between schemas - schemas | path "Pet" "Address" | select name + schemas | path(Pet; Address) | pick name # Top 5 by in-degree - schemas.components | top 5 in_degree | select name, in_degree + schemas.components | top(5; in_degree) | pick name, in_degree # Walk an operation to find all connected schemas - operations | where name == "GET /users" | schemas | select name, type - - # Schemas used by an operation, then find connected operations - operations | where name == "GET /users" | schemas | ops | select name, method, path + operations | select(name == "GET /users") | schemas | pick name, type # Explain a query plan - schemas.components | where depth > 5 | sort depth desc | explain + schemas.components | select(depth > 5) | sort_by(depth; desc) | explain # List available fields schemas | fields # Regex filter - schemas | where name matches "Error.*" | select name, path + schemas | select(name matches "Error.*") | pick name, path # Complex filter - schemas | where property_count > 3 and not is_component | select name, property_count, path + schemas | select(property_count > 3 and not is_component) | pick name, property_count, path # Edge annotations — see how Pet references other schemas - schemas.components | where name == "Pet" | refs-out | select name, edge_kind, edge_label, edge_from + schemas.components | select(name == "Pet") | refs-out | pick name, edge_kind, edge_label, edge_from # Blast radius — what breaks if I change the Error schema? - schemas.components | where name == "Error" | blast-radius | count + schemas.components | select(name == "Error") | blast-radius | length # Neighborhood — schemas within 2 hops of Pet - schemas.components | where name == "Pet" | neighbors 2 | select name + schemas.components | select(name == "Pet") | neighbors(2) | pick name # Orphaned schemas — unreferenced by anything - schemas.components | orphans | select name + schemas.components | orphans | pick name # Leaf schemas — terminal nodes with no outgoing refs - schemas.components | leaves | select name, in_degree + schemas.components | leaves | pick name, in_degree # Detect reference cycles schemas | cycles @@ -226,8 +250,26 @@ EXAMPLES schemas.components | clusters # Cross-tag schemas — shared across team boundaries - schemas | tag-boundary | select name, tag_count + schemas | tag-boundary | pick name, tag_count # Schemas shared by all operations - operations | shared-refs | select name, op_count + operations | shared-refs | pick name, op_count + + # Variable binding — find Pet's reachable schemas (excluding Pet itself) + schemas | select(name == "Pet") | let $pet = name | reachable | select(name != $pet) | pick name + + # Alternative operator — fallback for missing values + schemas | select(name // "unnamed" != "unnamed") + + # If-then-else — conditional filtering + schemas | select(if is_component then depth > 3 else true end) + + # User-defined functions + def hot: select(in_degree > 10); + def impact($name): select(name == $name) | blast-radius; + schemas.components | hot | pick name, in_degree + + # Load functions from a module file + include "stdlib.oq"; + schemas.components | hot | pick name, in_degree ` diff --git a/oq/README.md b/oq/README.md index 65e6b34..21e4d83 100644 --- a/oq/README.md +++ b/oq/README.md @@ -8,11 +8,11 @@ # Count all schemas openapi spec query 'schemas | count' petstore.yaml -# Top 10 deepest component schemas -openapi spec query 'schemas.components | sort depth desc | take 10 | select name, depth' petstore.yaml +# Top 10 deepest component schemas (new jq-style) +openapi spec query 'schemas.components | sort_by(depth; desc) | first(10) | pick name, depth' petstore.yaml # Dead components (unreferenced) -openapi spec query 'schemas.components | where in_degree == 0 | select name' petstore.yaml +openapi spec query 'schemas.components | select(in_degree == 0) | pick name' petstore.yaml ``` Stdin is supported: @@ -51,10 +51,10 @@ source | stage | stage | ... | terminal | `items` | Array items schema (with edge annotations) | | `ops` | Schemas → operations | | `schemas` | Operations → schemas | -| `path ` | Shortest path between two schemas | +| `path(A; B)` | Shortest path between two schemas | | `connected` | Full connected component (schemas + operations) | | `blast-radius` | Ancestors + all affected operations | -| `neighbors ` | Bidirectional neighborhood within N hops | +| `neighbors(N)` | Bidirectional neighborhood within N hops | ### Analysis Stages @@ -71,16 +71,20 @@ source | stage | stage | ... | terminal | Stage | Description | |-------|-------------| -| `where ` | Filter by predicate | -| `select ` | Project fields | -| `sort [desc]` | Sort (ascending by default) | -| `take ` / `head ` | Limit results | -| `sample ` | Deterministic random sample | -| `top ` | Sort desc + take | -| `bottom ` | Sort asc + take | +| `select(expr)` | Filter by predicate (jq-style) | +| `pick f1, f2` | Project fields | +| `sort_by(field)` / `sort_by(field; desc)` | Sort (ascending by default) | +| `first(N)` | Limit to first N results | +| `last(N)` | Limit to last N results | +| `sample(N)` | Deterministic random sample | +| `top(N; field)` | Sort desc + take | +| `bottom(N; field)` | Sort asc + take | | `unique` | Deduplicate | -| `group-by ` | Group and count | -| `count` | Count rows | +| `group_by(field)` | Group and count | +| `length` | Count rows | +| `let $var = expr` | Bind expression result to a variable | + +**Legacy syntax** (`where`, `sort`, `take`, `head`, `select fields`, `group-by`, `count`) is still supported. ### Meta Stages @@ -88,7 +92,25 @@ source | stage | stage | ... | terminal |-------|-------------| | `explain` | Print query plan | | `fields` | List available fields | -| `format ` | Set output format (table/json/markdown/toon) | +| `format(fmt)` | Set output format (table/json/markdown/toon) | + +### Function Definitions & Modules + +Define reusable functions with `def` and load them from `.oq` files with `include`: + +``` +# Inline definitions +def hot: select(in_degree > 10); +def impact($name): select(name == $name) | blast-radius; +schemas.components | hot | pick name, in_degree + +# Load from file +include "stdlib.oq"; +schemas.components | hot | pick name, in_degree +``` + +Def syntax: `def name: body;` or `def name($p1; $p2): body;` +Module search paths: current directory, then `~/.config/oq/` ## Fields @@ -138,7 +160,9 @@ Available on rows produced by 1-hop traversal stages (`refs-out`, `refs-in`, `pr | `edge_label` | string | Edge label: property name, array index, etc. | | `edge_from` | string | Source node name | -## Where Expressions +## Expressions + +oq supports a rich expression language used in `select()`, `let`, and `if-then-else`: ``` depth > 5 @@ -147,9 +171,30 @@ name matches "Error.*" property_count > 3 and not is_component has(oneOf) and not has(discriminator) (depth > 10 or union_width > 5) and is_component +name // "unnamed" # alternative: fallback if null/falsy +if is_component then depth > 3 else true end # conditional +"prefix_\(name)" # string interpolation ``` -Operators: `==`, `!=`, `>`, `<`, `>=`, `<=`, `and`, `or`, `not`, `has()`, `matches()` +### Operators + +| Operator | Description | +|----------|-------------| +| `==`, `!=`, `>`, `<`, `>=`, `<=` | Comparison | +| `and`, `or`, `not` | Logical | +| `//` | Alternative (returns left if truthy, else right) | +| `has(field)` | True if field is non-null/non-zero | +| `matches "regex"` | Regex match | +| `if cond then a else b end` | Conditional (elif supported) | +| `\(expr)` | String interpolation inside `"..."` | + +### Variables + +Use `let` to bind values for use in later stages: + +``` +schemas | select(name == "Pet") | let $pet = name | reachable | select(name != $pet) +``` ## Output Formats @@ -157,7 +202,7 @@ Use `--format` flag or inline `format` stage: ```bash openapi spec query 'schemas | count' spec.yaml --format json -openapi spec query 'schemas | take 5 | format markdown' spec.yaml +openapi spec query 'schemas | first(5) | format(markdown)' spec.yaml ``` | Format | Description | @@ -171,46 +216,46 @@ openapi spec query 'schemas | take 5 | format markdown' spec.yaml ```bash # Wide union trees -schemas | where union_width > 0 | sort union_width desc | take 10 +schemas | select(union_width > 0) | sort_by(union_width; desc) | first(10) # Central schemas (most referenced) -schemas.components | sort in_degree desc | take 10 | select name, in_degree +schemas.components | sort_by(in_degree; desc) | first(10) | pick name, in_degree # Operation sprawl -operations | sort schema_count desc | take 10 | select name, schema_count +operations | sort_by(schema_count; desc) | first(10) | pick name, schema_count # Circular references -schemas | where is_circular | select name, path +schemas | select(is_circular) | pick name, path # Shortest path between schemas -schemas | path "Pet" "Address" | select name +schemas | path(Pet; Address) | pick name # Walk an operation to connected schemas and back to operations -operations | where name == "GET /users" | schemas | ops | select name, method, path +operations | select(name == "GET /users") | schemas | ops | pick name, method, path # Explain query plan -schemas.components | where depth > 5 | sort depth desc | explain +schemas.components | select(depth > 5) | sort_by(depth; desc) | explain # Regex filter -schemas | where name matches "Error.*" | select name, path +schemas | select(name matches "Error.*") | pick name, path # Group by type -schemas | group-by type +schemas | group_by(type) # Edge annotations — how does Pet reference other schemas? -schemas.components | where name == "Pet" | refs-out | select name, edge_kind, edge_label, edge_from +schemas.components | select(name == "Pet") | refs-out | pick name, edge_kind, edge_label, edge_from # Blast radius — what breaks if Error changes? -schemas.components | where name == "Error" | blast-radius | count +schemas.components | select(name == "Error") | blast-radius | length # 2-hop neighborhood -schemas.components | where name == "Pet" | neighbors 2 | select name +schemas.components | select(name == "Pet") | neighbors(2) | pick name # Orphaned schemas -schemas.components | orphans | select name +schemas.components | orphans | pick name # Leaf nodes -schemas.components | leaves | select name, in_degree +schemas.components | leaves | pick name, in_degree # Detect cycles schemas | cycles @@ -219,10 +264,21 @@ schemas | cycles schemas.components | clusters # Cross-tag schemas -schemas | tag-boundary | select name, tag_count +schemas | tag-boundary | pick name, tag_count # Schemas shared across all operations -operations | shared-refs | select name, op_count +operations | shared-refs | pick name, op_count + +# Variable binding — find Pet's reachable schemas (excluding Pet itself) +schemas | select(name == "Pet") | let $pet = name | reachable | select(name != $pet) | pick name + +# User-defined functions +def hot: select(in_degree > 10); +def impact($name): select(name == $name) | blast-radius; +schemas.components | hot | pick name, in_degree + +# Alternative operator — fallback for missing values +schemas | select(name // "unnamed" != "unnamed") | pick name ``` ## CLI Reference diff --git a/oq/expr/expr.go b/oq/expr/expr.go index c027436..45af718 100644 --- a/oq/expr/expr.go +++ b/oq/expr/expr.go @@ -117,7 +117,7 @@ func (e *notExpr) Eval(row Row) Value { func (e *hasExpr) Eval(row Row) Value { v := row.Field(e.field) - return Value{Kind: KindBool, Bool: v.Kind != KindNull && (v.Kind != KindInt || v.Int > 0) && (v.Kind != KindBool || v.Bool) && (v.Kind != KindString || v.Str != "")} + return Value{Kind: KindBool, Bool: v.Kind != KindNull && (v.Kind != KindInt || v.Int != 0) && (v.Kind != KindBool || v.Bool) && (v.Kind != KindString || v.Str != "")} } func (e *matchesExpr) Eval(row Row) Value { diff --git a/oq/module.go b/oq/module.go index 8566c24..a691e56 100644 --- a/oq/module.go +++ b/oq/module.go @@ -14,7 +14,7 @@ func LoadModule(path string, searchPaths []string) ([]FuncDef, error) { return nil, err } - data, err := os.ReadFile(resolved) + data, err := os.ReadFile(resolved) //nolint:gosec // module paths are user-provided query inputs, not untrusted if err != nil { return nil, fmt.Errorf("reading module %q: %w", resolved, err) } @@ -29,7 +29,7 @@ func LoadModule(path string, searchPaths []string) ([]FuncDef, error) { func resolveModulePath(path string, searchPaths []string) (string, error) { if !strings.HasSuffix(path, ".oq") { - path = path + ".oq" + path += ".oq" } if filepath.IsAbs(path) { diff --git a/oq/oq.go b/oq/oq.go index 447efb4..1973c38 100644 --- a/oq/oq.go +++ b/oq/oq.go @@ -1,8 +1,10 @@ // Package oq implements a pipeline query language for OpenAPI schema graphs. // -// Queries are written as pipeline expressions like: +// Queries are written as pipeline expressions with jq-inspired syntax: // -// schemas.components | where depth > 5 | sort depth desc | take 10 | select name, depth +// schemas.components | select(depth > 5) | sort_by(depth; desc) | first(10) | pick name, depth +// +// Legacy syntax (where, sort, take, select fields) is also supported. package oq import ( diff --git a/oq/oq_test.go b/oq/oq_test.go index e72eedc..7e50d04 100644 --- a/oq/oq_test.go +++ b/oq/oq_test.go @@ -1551,7 +1551,7 @@ func TestExecute_DefExpansion_Success(t *testing.T) { // All results should have in_degree > 0 for _, row := range result.Rows { v := oq.FieldValuePublic(row, "in_degree", g) - assert.Greater(t, v.Int, 0, "hot filter should require in_degree > 0") + assert.Positive(t, v.Int, "hot filter should require in_degree > 0") } } diff --git a/oq/parse.go b/oq/parse.go index 98b8258..72517df 100644 --- a/oq/parse.go +++ b/oq/parse.go @@ -43,7 +43,7 @@ func parseDeclarations(query string) (*declarations, error) { rest := remaining[len("def "):] colonIdx := strings.Index(rest, ":") if colonIdx < 0 { - return nil, errors.New("def missing :") + return nil, errors.New("def missing colon separator") } sig := strings.TrimSpace(rest[:colonIdx]) body := rest[colonIdx+1:] From 833a44d2df0005f9bc9b9c0a47b790dfa46bb1d8 Mon Sep 17 00:00:00 2001 From: Vishal Gowda Date: Fri, 13 Mar 2026 14:42:08 +0000 Subject: [PATCH 27/27] fix: update cmd/openapi dependency to latest commit Co-Authored-By: Claude Opus 4.6 --- cmd/openapi/go.mod | 6 +++--- cmd/openapi/go.sum | 8 ++++---- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/cmd/openapi/go.mod b/cmd/openapi/go.mod index a59174e..2548055 100644 --- a/cmd/openapi/go.mod +++ b/cmd/openapi/go.mod @@ -1,12 +1,12 @@ module github.com/speakeasy-api/openapi/cmd/openapi -go 1.24.3 +go 1.25.0 require ( github.com/charmbracelet/bubbles v0.21.0 github.com/charmbracelet/bubbletea v1.3.10 github.com/charmbracelet/lipgloss v1.1.0 - github.com/speakeasy-api/openapi v1.19.6-0.20260313121936-e705517fb4e2 + github.com/speakeasy-api/openapi v1.20.1-0.20260313143718-19d9a40d4c09 github.com/speakeasy-api/openapi/openapi/linter/customrules v0.0.0-20260309010446-7ff6a9590a7f github.com/spf13/cobra v1.10.1 github.com/stretchr/testify v1.11.1 @@ -43,7 +43,7 @@ require ( github.com/spf13/pflag v1.0.9 // indirect github.com/vmware-labs/yaml-jsonpath v0.3.2 // indirect github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e // indirect - golang.org/x/sync v0.19.0 // indirect + golang.org/x/sync v0.20.0 // indirect golang.org/x/sys v0.36.0 // indirect golang.org/x/text v0.34.0 // indirect ) diff --git a/cmd/openapi/go.sum b/cmd/openapi/go.sum index 7048275..9544910 100644 --- a/cmd/openapi/go.sum +++ b/cmd/openapi/go.sum @@ -84,8 +84,8 @@ github.com/sergi/go-diff v1.1.0 h1:we8PVUC3FE2uYfodKH/nBHMSetSfHDR6scGdBi+erh0= github.com/sergi/go-diff v1.1.0/go.mod h1:STckp+ISIX8hZLjrqAeVduY0gWCT9IjLuqbuNXdaHfM= github.com/speakeasy-api/jsonpath v0.6.3 h1:c+QPwzAOdrWvzycuc9HFsIZcxKIaWcNpC+xhOW9rJxU= github.com/speakeasy-api/jsonpath v0.6.3/go.mod h1:2cXloNuQ+RSXi5HTRaeBh7JEmjRXTiaKpFTdZiL7URI= -github.com/speakeasy-api/openapi v1.19.6-0.20260313121936-e705517fb4e2 h1:97FEnVTs9WkTzjrEmDCmwjKYA7JaCZMFFcR4mO4Jcxc= -github.com/speakeasy-api/openapi v1.19.6-0.20260313121936-e705517fb4e2/go.mod h1:UfKa7FqE4jgexJZuj51MmdHAFGmDv0Zaw3+yOd81YKU= +github.com/speakeasy-api/openapi v1.20.1-0.20260313143718-19d9a40d4c09 h1:aWkreRg/x9FEle9bDBidEB6P6JBCdzZrQRGgCd6E+Ys= +github.com/speakeasy-api/openapi v1.20.1-0.20260313143718-19d9a40d4c09/go.mod h1:5gOzfAL1nSm57JswBgbpLqoBMGFlabSlTbxTNgHHO/0= github.com/speakeasy-api/openapi/openapi/linter/customrules v0.0.0-20260309010446-7ff6a9590a7f h1:kwiHeGSILCUVEM9iSAUtifl1TLlvyDXmMjyW26/iX2k= github.com/speakeasy-api/openapi/openapi/linter/customrules v0.0.0-20260309010446-7ff6a9590a7f/go.mod h1:ALDg9E6LRTL5tMFlddVrLhc4JaarCHL65x2YkwL7xdg= github.com/spf13/cobra v1.10.1 h1:lJeBwCfmrnXthfAupyUTzJ/J4Nc1RsHC/mSRU2dll/s= @@ -107,8 +107,8 @@ golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73r golang.org/x/net v0.42.0 h1:jzkYrhi3YQWD6MLBJcsklgQsoAcw89EcZbJw8Z614hs= golang.org/x/net v0.42.0/go.mod h1:FF1RA5d3u7nAYA4z2TkclSCKh68eSXtiFwcWQpPXdt8= golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.19.0 h1:vV+1eWNmZ5geRlYjzm2adRgW2/mcpevXNg50YZtPCE4= -golang.org/x/sync v0.19.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI= +golang.org/x/sync v0.20.0 h1:e0PTpb7pjO8GAtTs2dQ6jYa5BWYlMuX047Dco/pItO4= +golang.org/x/sync v0.20.0/go.mod h1:9xrNwdLfx4jkKbNva9FpL6vEN7evnE43NNNJQ2LF3+0= golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20210809222454-d867a43fc93e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=