diff --git a/core/src/main/java/org/opensearch/sql/data/model/ExprValueUtils.java b/core/src/main/java/org/opensearch/sql/data/model/ExprValueUtils.java index 02b2e2bcb1d..f1b48214b17 100644 --- a/core/src/main/java/org/opensearch/sql/data/model/ExprValueUtils.java +++ b/core/src/main/java/org/opensearch/sql/data/model/ExprValueUtils.java @@ -164,6 +164,16 @@ public static ExprValue fromObjectValue(Object o) { return timestampValue(((LocalDateTime) o).toInstant(ZoneOffset.UTC)); } else if (o instanceof TemporalAmount) { return intervalValue((TemporalAmount) o); + } else if (o instanceof byte[] bytes) { + // No type context here: OpenSearch `ip` and `binary` fields both collapse to + // VARBINARY in `OpenSearchSchemaBuilder`, so a 4- or 16-byte payload could be + // a genuine address or arbitrary binary that happens to be that length (4-byte + // ASCII strings like "INFO" or "WARN" are indistinguishable from a public IPv4 + // address byte-for-byte). Default to a safe, unambiguous encoding. Typed call + // sites should route IP cells through {@link #ipValue(String)} explicitly. + // TODO: split `ip` and `binary` into separate UDTs upstream so this branch can + // render IP cells in dotted form again. See `OpenSearchSchemaBuilder.mapFieldType`. + return stringValue(java.util.Base64.getEncoder().encodeToString(bytes)); } else { throw new ExpressionEvaluationException("unsupported object " + o.getClass()); } diff --git a/core/src/main/java/org/opensearch/sql/executor/analytics/AnalyticsExecutionEngine.java b/core/src/main/java/org/opensearch/sql/executor/analytics/AnalyticsExecutionEngine.java index ddfe5fd3556..19e2cda324a 100644 --- a/core/src/main/java/org/opensearch/sql/executor/analytics/AnalyticsExecutionEngine.java +++ b/core/src/main/java/org/opensearch/sql/executor/analytics/AnalyticsExecutionEngine.java @@ -6,13 +6,20 @@ package org.opensearch.sql.executor.analytics; import java.util.ArrayList; +import java.util.EnumSet; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import org.apache.calcite.plan.RelOptUtil; import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.core.Project; +import org.apache.calcite.rel.core.Sort; import org.apache.calcite.rel.type.RelDataType; import org.apache.calcite.rel.type.RelDataTypeField; +import org.apache.calcite.rex.RexCall; +import org.apache.calcite.rex.RexNode; +import org.apache.calcite.sql.SqlKind; +import org.apache.calcite.sql.type.SqlTypeName; import org.opensearch.analytics.exec.QueryPlanExecutor; import org.opensearch.core.action.ActionListener; import org.opensearch.sql.ast.statement.ExplainMode; @@ -87,7 +94,7 @@ public void onResponse(Iterable rows) { try { List fields = plan.getRowType().getFieldList(); List results = convertRows(rows, fields); - Schema schema = buildSchema(fields); + Schema schema = buildSchema(fields, recoverOriginalDatetimeTypes(plan)); execMetric.set(System.nanoTime() - execStart); listener.onResponse(new QueryResponse(schema, results, Cursor.None)); } catch (Exception e) { @@ -132,11 +139,11 @@ private List convertRows(Iterable rows, List fields) { + private Schema buildSchema(List fields, List reportedTypes) { List columns = new ArrayList<>(); - for (RelDataTypeField field : fields) { - ExprType exprType = convertType(field.getType()); - columns.add(new Schema.Column(field.getName(), null, exprType)); + for (int i = 0; i < fields.size(); i++) { + ExprType exprType = convertType(reportedTypes.get(i)); + columns.add(new Schema.Column(fields.get(i).getName(), null, exprType)); } return new Schema(columns); } @@ -148,4 +155,68 @@ private ExprType convertType(RelDataType type) { return org.opensearch.sql.data.type.ExprCoreType.UNKNOWN; } } + + /** + * Datetime SqlTypeNames produced by the SQL-plugin engine pipeline. Used by {@link + * #recoverOriginalDatetimeTypes} to detect {@code DatetimeOutputCastRule}'s {@code + * CAST( AS VARCHAR)} output slots. + */ + private static final EnumSet DATETIME_TYPES = + EnumSet.of( + SqlTypeName.TIMESTAMP, + SqlTypeName.TIMESTAMP_WITH_LOCAL_TIME_ZONE, + SqlTypeName.DATE, + SqlTypeName.TIME); + + /** + * Recovers the pre-cast datetime types for the schema. {@code DatetimeOutputCastRule} (api + * post-analysis pass) wraps every datetime-typed root column in {@code CAST(... AS VARCHAR)} so + * the analytics-engine backend can emit PPL's documented space-separator format via {@code + * to_char} ({@code DatetimeOutputCastRewriter} on the OpenSearch sandbox side). The downside is + * that {@code plan.getRowType()} then advertises those columns as VARCHAR — the response schema + * would report {@code "string"} for what tests (and callers) still expect as {@code "timestamp"}. + * The legacy / Calcite-reference path doesn't have this divergence: the wire value is a formatted + * string while the schema column type is the original datetime type. + * + *

This walk inspects the OUTERMOST {@link Project} (descending through a single {@link Sort}, + * the {@code LogicalSystemLimit} wrapper) and, for any slot of the exact shape {@code + * CAST( AS VARCHAR)}, swaps in the inner {@link RelDataType} for the schema. Values + * still come back as the strings DataFusion emitted; only the schema column type is restored. + * + *

Falls back to the slot's reported type for non-cast slots, non-datetime sources, or when the + * root isn't a Project (raw scan / aggregate fragment). + */ + private static List recoverOriginalDatetimeTypes(RelNode plan) { + List fields = plan.getRowType().getFieldList(); + List reportedTypes = new ArrayList<>(fields.size()); + for (RelDataTypeField field : fields) { + reportedTypes.add(field.getType()); + } + RelNode root = plan; + if (root instanceof Sort sort && sort.getInput() instanceof Project) { + root = sort.getInput(); + } + if (!(root instanceof Project project)) { + return reportedTypes; + } + List slots = project.getProjects(); + int n = Math.min(slots.size(), reportedTypes.size()); + for (int i = 0; i < n; i++) { + RexNode slot = slots.get(i); + if (!(slot instanceof RexCall call)) { + continue; + } + if (call.getKind() != SqlKind.CAST) { + continue; + } + if (call.getType().getSqlTypeName() != SqlTypeName.VARCHAR) { + continue; + } + RexNode source = call.getOperands().get(0); + if (DATETIME_TYPES.contains(source.getType().getSqlTypeName())) { + reportedTypes.set(i, source.getType()); + } + } + return reportedTypes; + } } diff --git a/core/src/test/java/org/opensearch/sql/data/model/ExprValueUtilsTest.java b/core/src/test/java/org/opensearch/sql/data/model/ExprValueUtilsTest.java index 888042ad789..4cb7b48f24c 100644 --- a/core/src/test/java/org/opensearch/sql/data/model/ExprValueUtilsTest.java +++ b/core/src/test/java/org/opensearch/sql/data/model/ExprValueUtilsTest.java @@ -244,6 +244,18 @@ public void constructDateAndTimeValue() { ExprValueUtils.fromObjectValue("2012-07-07 01:01:01", TIMESTAMP)); } + @Test + public void fromObjectValue_byte_array_returns_base64_string() { + // Without type context the factory can't distinguish `ip` from `binary` + // (both collapse to VARBINARY in OpenSearchSchemaBuilder), so every byte[] + // is base64-encoded. Typed routing happens at the call site. + assertEquals( + new ExprStringValue("AQIDBA=="), ExprValueUtils.fromObjectValue(new byte[] {1, 2, 3, 4})); + assertEquals( + new ExprStringValue("AQIDBAU="), + ExprValueUtils.fromObjectValue(new byte[] {1, 2, 3, 4, 5})); + } + @Test public void fromObjectValue_double_infinity_returns_null() { assertTrue(ExprValueUtils.fromObjectValue(Double.POSITIVE_INFINITY).isNull()); diff --git a/integ-test/src/test/java/org/opensearch/sql/ppl/SearchCommandIT.java b/integ-test/src/test/java/org/opensearch/sql/ppl/SearchCommandIT.java index b8e5b2a5722..791d7ca7db7 100644 --- a/integ-test/src/test/java/org/opensearch/sql/ppl/SearchCommandIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/ppl/SearchCommandIT.java @@ -54,10 +54,18 @@ public void testSearchCommandWithoutSearchKeyword() throws IOException { @Test public void testSearchCommandWithSpecialIndexName() throws IOException { - executeRequest(new Request("PUT", "/logs-2021.01.11")); + // A minimal `{"properties":{}}` mapping is required so the analytics-engine + // catalog (OpenSearchSchemaBuilder.buildSchema) actually surfaces the index — + // mapping-less indices are skipped and the parser then fails with + // "Table 'logs-2021.01.11' not found". Routing through TestUtils also + // honors the tests.analytics.parquet_indices toggle. + String minimalMapping = "{\"mappings\":{\"properties\":{}}}"; + org.opensearch.sql.legacy.TestUtils.createIndexByRestClient( + client(), "logs-2021.01.11", minimalMapping); verifyDataRows(executeQuery("search source=logs-2021.01.11")); - executeRequest(new Request("PUT", "/logs-7.10.0-2021.01.11")); + org.opensearch.sql.legacy.TestUtils.createIndexByRestClient( + client(), "logs-7.10.0-2021.01.11", minimalMapping); verifyDataRows(executeQuery("search source=logs-7.10.0-2021.01.11")); }