From b283dcc78e9f122ccadff4113a6eddf726575d12 Mon Sep 17 00:00:00 2001
From: Kai Huang <ahkcs@amazon.com>
Date: Fri, 15 May 2026 11:19:50 -0700
Subject: [PATCH 1/4] Inject parquet settings in
 testSearchCommandWithSpecialIndexName

The test creates `logs-2021.01.11` and `logs-7.10.0-2021.01.11` directly
via raw PUTs to exercise the parser's handling of dot/version-prefixed
index names. The raw path bypasses `TestUtils.createIndexByRestClient`,
so the analytics-engine `tests.analytics.parquet_indices=true` toggle
never injects the composite/parquet settings. On the analytics-engine
route this surfaces as `Table 'logs-2021.01.11' not found` (the
catalog-builder only exposes indices that match an analytics-engine-
recognized data format), failing the test.

Routing the creates through `TestUtils.createIndexByRestClient` makes
both code paths honor the toggle: legacy / Calcite continues to create
plain Lucene-backed indices, analytics-engine gets parquet-backed
composite indices. The index names are still tested verbatim.

Surfaced by `CalciteSearchCommandIT` on the analytics-engine route.

Signed-off-by: Kai Huang <ahkcs@amazon.com>
---
 .../java/org/opensearch/sql/ppl/SearchCommandIT.java     | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/integ-test/src/test/java/org/opensearch/sql/ppl/SearchCommandIT.java b/integ-test/src/test/java/org/opensearch/sql/ppl/SearchCommandIT.java
index b8e5b2a5722..a13dbb3c625 100644
--- a/integ-test/src/test/java/org/opensearch/sql/ppl/SearchCommandIT.java
+++ b/integ-test/src/test/java/org/opensearch/sql/ppl/SearchCommandIT.java
@@ -54,10 +54,15 @@ public void testSearchCommandWithoutSearchKeyword() throws IOException {
 
   @Test
   public void testSearchCommandWithSpecialIndexName() throws IOException {
-    executeRequest(new Request("PUT", "/logs-2021.01.11"));
+    // Route through TestUtils so the analytics-engine parquet-indices toggle
+    // (tests.analytics.parquet_indices=true) applies the composite/parquet
+    // settings; without them the analytics-engine planner can't find a backend
+    // for the scan and the query 500s.
+    org.opensearch.sql.legacy.TestUtils.createIndexByRestClient(client(), "logs-2021.01.11", null);
     verifyDataRows(executeQuery("search source=logs-2021.01.11"));
 
-    executeRequest(new Request("PUT", "/logs-7.10.0-2021.01.11"));
+    org.opensearch.sql.legacy.TestUtils.createIndexByRestClient(
+        client(), "logs-7.10.0-2021.01.11", null);
     verifyDataRows(executeQuery("search source=logs-7.10.0-2021.01.11"));
   }
 

From ee5798d8bbe6a99524c24de9f755fc967359afe7 Mon Sep 17 00:00:00 2001
From: Kai Huang <ahkcs@amazon.com>
Date: Fri, 15 May 2026 14:14:07 -0700
Subject: [PATCH 2/4] Pass minimal `{"properties":{}}` mapping in
 testSearchCommandWithSpecialIndexName
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

`OpenSearchSchemaBuilder.buildSchema` skips any index whose mapping has
no `properties` block — the analytics catalog then doesn't surface the
index, and the PPL parser fails with `Table 'logs-2021.01.11' not
found` before reaching the row type. Routing the special-index create
through `TestUtils.createIndexByRestClient` with `null` mapping
produces an empty `{}` body, so the index is created with no mapping
at all and the catalog still skips it.

Pass a minimal `{"mappings":{"properties":{}}}` body. OpenSearch
strips the empty `properties` to `{}` server-side but the schema
builder sees a non-null mapping and registers the (zero-column) table.
The query then parses cleanly and the test's `verifyDataRows()` (no
expected rows) succeeds against the empty index.

Signed-off-by: Kai Huang <ahkcs@amazon.com>
---
 .../org/opensearch/sql/ppl/SearchCommandIT.java   | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/integ-test/src/test/java/org/opensearch/sql/ppl/SearchCommandIT.java b/integ-test/src/test/java/org/opensearch/sql/ppl/SearchCommandIT.java
index a13dbb3c625..791d7ca7db7 100644
--- a/integ-test/src/test/java/org/opensearch/sql/ppl/SearchCommandIT.java
+++ b/integ-test/src/test/java/org/opensearch/sql/ppl/SearchCommandIT.java
@@ -54,15 +54,18 @@ public void testSearchCommandWithoutSearchKeyword() throws IOException {
 
   @Test
   public void testSearchCommandWithSpecialIndexName() throws IOException {
-    // Route through TestUtils so the analytics-engine parquet-indices toggle
-    // (tests.analytics.parquet_indices=true) applies the composite/parquet
-    // settings; without them the analytics-engine planner can't find a backend
-    // for the scan and the query 500s.
-    org.opensearch.sql.legacy.TestUtils.createIndexByRestClient(client(), "logs-2021.01.11", null);
+    // A minimal `{"properties":{}}` mapping is required so the analytics-engine
+    // catalog (OpenSearchSchemaBuilder.buildSchema) actually surfaces the index —
+    // mapping-less indices are skipped and the parser then fails with
+    // "Table 'logs-2021.01.11' not found". Routing through TestUtils also
+    // honors the tests.analytics.parquet_indices toggle.
+    String minimalMapping = "{\"mappings\":{\"properties\":{}}}";
+    org.opensearch.sql.legacy.TestUtils.createIndexByRestClient(
+        client(), "logs-2021.01.11", minimalMapping);
     verifyDataRows(executeQuery("search source=logs-2021.01.11"));
 
     org.opensearch.sql.legacy.TestUtils.createIndexByRestClient(
-        client(), "logs-7.10.0-2021.01.11", null);
+        client(), "logs-7.10.0-2021.01.11", minimalMapping);
     verifyDataRows(executeQuery("search source=logs-7.10.0-2021.01.11"));
   }
 

From 4070c28ebfafc4308ab4b501a7fe46dbe7ce4d54 Mon Sep 17 00:00:00 2001
From: Kai Huang <ahkcs@amazon.com>
Date: Fri, 15 May 2026 14:48:22 -0700
Subject: [PATCH 3/4] Preserve datetime schema-column types in
 AnalyticsExecutionEngine
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

`DatetimeOutputCastRule` (api/spec/datetime) wraps every datetime-typed
root column in `CAST(... AS VARCHAR)` so the analytics-engine backend
can emit PPL's documented space-separator format via `to_char`
(`DatetimeOutputCastRewriter` on the OpenSearch side). The downside is
that `plan.getRowType()` then advertises those columns as VARCHAR — the
response schema reports `"string"` for what callers expect as
`"timestamp"`. The legacy / Calcite-reference path doesn't have this
divergence: the wire value is the formatted string while the schema
column type stays as the original datetime type.

Walk the outermost `Project` (descending through a single `Sort`, the
`LogicalSystemLimit` system query-size wrapper) and, for any slot of
shape `CAST(<datetime> AS VARCHAR)`, swap in the inner `RelDataType`
for the schema column. Values still come back as the strings DataFusion
emitted — only the schema column type is restored.

Falls back to the slot's reported type for non-cast slots, non-datetime
sources, or when the root isn't a Project (raw scan / aggregate fragment).

Surfaced by `verifySchema(... schema("@timestamp", "timestamp") ...)`
assertions in `CalciteSearchCommandIT` time-modifier tests; this commit
removes the schema-type divergence for those queries (the remaining
data-row mismatches are downstream Lucene-secondary issues filed
separately).

Signed-off-by: Kai Huang <ahkcs@amazon.com>
---
 .../analytics/AnalyticsExecutionEngine.java   | 81 +++++++++++++++++--
 1 file changed, 76 insertions(+), 5 deletions(-)

diff --git a/core/src/main/java/org/opensearch/sql/executor/analytics/AnalyticsExecutionEngine.java b/core/src/main/java/org/opensearch/sql/executor/analytics/AnalyticsExecutionEngine.java
index ddfe5fd3556..19e2cda324a 100644
--- a/core/src/main/java/org/opensearch/sql/executor/analytics/AnalyticsExecutionEngine.java
+++ b/core/src/main/java/org/opensearch/sql/executor/analytics/AnalyticsExecutionEngine.java
@@ -6,13 +6,20 @@
 package org.opensearch.sql.executor.analytics;
 
 import java.util.ArrayList;
+import java.util.EnumSet;
 import java.util.LinkedHashMap;
 import java.util.List;
 import java.util.Map;
 import org.apache.calcite.plan.RelOptUtil;
 import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.core.Project;
+import org.apache.calcite.rel.core.Sort;
 import org.apache.calcite.rel.type.RelDataType;
 import org.apache.calcite.rel.type.RelDataTypeField;
+import org.apache.calcite.rex.RexCall;
+import org.apache.calcite.rex.RexNode;
+import org.apache.calcite.sql.SqlKind;
+import org.apache.calcite.sql.type.SqlTypeName;
 import org.opensearch.analytics.exec.QueryPlanExecutor;
 import org.opensearch.core.action.ActionListener;
 import org.opensearch.sql.ast.statement.ExplainMode;
@@ -87,7 +94,7 @@ public void onResponse(Iterable<Object[]> rows) {
             try {
               List<RelDataTypeField> fields = plan.getRowType().getFieldList();
               List<ExprValue> results = convertRows(rows, fields);
-              Schema schema = buildSchema(fields);
+              Schema schema = buildSchema(fields, recoverOriginalDatetimeTypes(plan));
               execMetric.set(System.nanoTime() - execStart);
               listener.onResponse(new QueryResponse(schema, results, Cursor.None));
             } catch (Exception e) {
@@ -132,11 +139,11 @@ private List<ExprValue> convertRows(Iterable<Object[]> rows, List<RelDataTypeFie
     return results;
   }
 
-  private Schema buildSchema(List<RelDataTypeField> fields) {
+  private Schema buildSchema(List<RelDataTypeField> fields, List<RelDataType> reportedTypes) {
     List<Schema.Column> columns = new ArrayList<>();
-    for (RelDataTypeField field : fields) {
-      ExprType exprType = convertType(field.getType());
-      columns.add(new Schema.Column(field.getName(), null, exprType));
+    for (int i = 0; i < fields.size(); i++) {
+      ExprType exprType = convertType(reportedTypes.get(i));
+      columns.add(new Schema.Column(fields.get(i).getName(), null, exprType));
     }
     return new Schema(columns);
   }
@@ -148,4 +155,68 @@ private ExprType convertType(RelDataType type) {
       return org.opensearch.sql.data.type.ExprCoreType.UNKNOWN;
     }
   }
+
+  /**
+   * Datetime SqlTypeNames produced by the SQL-plugin engine pipeline. Used by {@link
+   * #recoverOriginalDatetimeTypes} to detect {@code DatetimeOutputCastRule}'s {@code
+   * CAST(<datetime> AS VARCHAR)} output slots.
+   */
+  private static final EnumSet<SqlTypeName> DATETIME_TYPES =
+      EnumSet.of(
+          SqlTypeName.TIMESTAMP,
+          SqlTypeName.TIMESTAMP_WITH_LOCAL_TIME_ZONE,
+          SqlTypeName.DATE,
+          SqlTypeName.TIME);
+
+  /**
+   * Recovers the pre-cast datetime types for the schema. {@code DatetimeOutputCastRule} (api
+   * post-analysis pass) wraps every datetime-typed root column in {@code CAST(... AS VARCHAR)} so
+   * the analytics-engine backend can emit PPL's documented space-separator format via {@code
+   * to_char} ({@code DatetimeOutputCastRewriter} on the OpenSearch sandbox side). The downside is
+   * that {@code plan.getRowType()} then advertises those columns as VARCHAR — the response schema
+   * would report {@code "string"} for what tests (and callers) still expect as {@code "timestamp"}.
+   * The legacy / Calcite-reference path doesn't have this divergence: the wire value is a formatted
+   * string while the schema column type is the original datetime type.
+   *
+   * <p>This walk inspects the OUTERMOST {@link Project} (descending through a single {@link Sort},
+   * the {@code LogicalSystemLimit} wrapper) and, for any slot of the exact shape {@code
+   * CAST(<datetime> AS VARCHAR)}, swaps in the inner {@link RelDataType} for the schema. Values
+   * still come back as the strings DataFusion emitted; only the schema column type is restored.
+   *
+   * <p>Falls back to the slot's reported type for non-cast slots, non-datetime sources, or when the
+   * root isn't a Project (raw scan / aggregate fragment).
+   */
+  private static List<RelDataType> recoverOriginalDatetimeTypes(RelNode plan) {
+    List<RelDataTypeField> fields = plan.getRowType().getFieldList();
+    List<RelDataType> reportedTypes = new ArrayList<>(fields.size());
+    for (RelDataTypeField field : fields) {
+      reportedTypes.add(field.getType());
+    }
+    RelNode root = plan;
+    if (root instanceof Sort sort && sort.getInput() instanceof Project) {
+      root = sort.getInput();
+    }
+    if (!(root instanceof Project project)) {
+      return reportedTypes;
+    }
+    List<RexNode> slots = project.getProjects();
+    int n = Math.min(slots.size(), reportedTypes.size());
+    for (int i = 0; i < n; i++) {
+      RexNode slot = slots.get(i);
+      if (!(slot instanceof RexCall call)) {
+        continue;
+      }
+      if (call.getKind() != SqlKind.CAST) {
+        continue;
+      }
+      if (call.getType().getSqlTypeName() != SqlTypeName.VARCHAR) {
+        continue;
+      }
+      RexNode source = call.getOperands().get(0);
+      if (DATETIME_TYPES.contains(source.getType().getSqlTypeName())) {
+        reportedTypes.set(i, source.getType());
+      }
+    }
+    return reportedTypes;
+  }
 }

From cdac62ede405470dc3cf22b6e8bf04d8b68f6125 Mon Sep 17 00:00:00 2001
From: Kai Huang <ahkcs@amazon.com>
Date: Mon, 18 May 2026 15:09:15 -0700
Subject: [PATCH 4/4] Convert byte[] cells to base64 ExprStringValue for IP /
 BINARY fields
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

`AnalyticsExecutionEngine.convertRows` reaches `ExprValueUtils.fromObjectValue`
for every cell in every row. OpenSearch `ip` and `binary` fields are stored
as raw bytes, so once the analytics-engine reader (paired with OpenSearch
PR #21681 commit 4) materializes a parquet column, each cell arrives as a
Java `byte[]`. The fall-through catch-all previously threw:

    ExpressionEvaluationException: unsupported object class [B
      at ExprValueUtils.fromObjectValue(ExprValueUtils.java:168)
      at AnalyticsExecutionEngine.convertRows(AnalyticsExecutionEngine.java:128)

Every PPL query that even projects an ip column fails on the analytics route
with this error, regardless of operator or predicate.

`OpenSearchSchemaBuilder.mapFieldType` collapses both `ip` and `binary` into
Calcite `VARBINARY`, so the cell-level type system can't distinguish the two.
An earlier draft of this commit gated IP rendering on byte length (4 or 16
bytes → `ExprIpValue` via `InetAddress.getByAddress`), but that misinterprets
any 4-byte ASCII payload (e.g. `"INFO"`, `"WARN"`, `"POST"`) as a public
IPv4 address — the classic "Norway problem" applied to bytes. With no schema
context here we can't safely guess, so default to a base64-encoded
`ExprStringValue`. Typed call sites that DO know the column is `ip` should
route through `ipValue(String)` explicitly.

Once `OpenSearchSchemaBuilder` splits `ip` and `binary` into separate UDTs
(TODO already filed in that file), this branch can render IP cells in
dotted-decimal form again without the length-guessing.

Signed-off-by: Kai Huang <ahkcs@amazon.com>
---
 .../opensearch/sql/data/model/ExprValueUtils.java    | 10 ++++++++++
 .../sql/data/model/ExprValueUtilsTest.java           | 12 ++++++++++++
 2 files changed, 22 insertions(+)

diff --git a/core/src/main/java/org/opensearch/sql/data/model/ExprValueUtils.java b/core/src/main/java/org/opensearch/sql/data/model/ExprValueUtils.java
index 02b2e2bcb1d..f1b48214b17 100644
--- a/core/src/main/java/org/opensearch/sql/data/model/ExprValueUtils.java
+++ b/core/src/main/java/org/opensearch/sql/data/model/ExprValueUtils.java
@@ -164,6 +164,16 @@ public static ExprValue fromObjectValue(Object o) {
       return timestampValue(((LocalDateTime) o).toInstant(ZoneOffset.UTC));
     } else if (o instanceof TemporalAmount) {
       return intervalValue((TemporalAmount) o);
+    } else if (o instanceof byte[] bytes) {
+      // No type context here: OpenSearch `ip` and `binary` fields both collapse to
+      // VARBINARY in `OpenSearchSchemaBuilder`, so a 4- or 16-byte payload could be
+      // a genuine address or arbitrary binary that happens to be that length (4-byte
+      // ASCII strings like "INFO" or "WARN" are indistinguishable from a public IPv4
+      // address byte-for-byte). Default to a safe, unambiguous encoding. Typed call
+      // sites should route IP cells through {@link #ipValue(String)} explicitly.
+      // TODO: split `ip` and `binary` into separate UDTs upstream so this branch can
+      //   render IP cells in dotted form again. See `OpenSearchSchemaBuilder.mapFieldType`.
+      return stringValue(java.util.Base64.getEncoder().encodeToString(bytes));
     } else {
       throw new ExpressionEvaluationException("unsupported object " + o.getClass());
     }
diff --git a/core/src/test/java/org/opensearch/sql/data/model/ExprValueUtilsTest.java b/core/src/test/java/org/opensearch/sql/data/model/ExprValueUtilsTest.java
index 888042ad789..4cb7b48f24c 100644
--- a/core/src/test/java/org/opensearch/sql/data/model/ExprValueUtilsTest.java
+++ b/core/src/test/java/org/opensearch/sql/data/model/ExprValueUtilsTest.java
@@ -244,6 +244,18 @@ public void constructDateAndTimeValue() {
         ExprValueUtils.fromObjectValue("2012-07-07 01:01:01", TIMESTAMP));
   }
 
+  @Test
+  public void fromObjectValue_byte_array_returns_base64_string() {
+    // Without type context the factory can't distinguish `ip` from `binary`
+    // (both collapse to VARBINARY in OpenSearchSchemaBuilder), so every byte[]
+    // is base64-encoded. Typed routing happens at the call site.
+    assertEquals(
+        new ExprStringValue("AQIDBA=="), ExprValueUtils.fromObjectValue(new byte[] {1, 2, 3, 4}));
+    assertEquals(
+        new ExprStringValue("AQIDBAU="),
+        ExprValueUtils.fromObjectValue(new byte[] {1, 2, 3, 4, 5}));
+  }
+
   @Test
   public void fromObjectValue_double_infinity_returns_null() {
     assertTrue(ExprValueUtils.fromObjectValue(Double.POSITIVE_INFINITY).isNull());