apache · Stefanietry · May 6, 2026 · JingsongLi · Jun 3, 2026
diff --git a/docs/generated/core_configuration.html b/docs/generated/core_configuration.html
@@ -1620,6 +1620,12 @@
             <td>String</td>
             <td>Specifies column names that should be stored as vector type. This is used when you want to treat a ARRAY column as a VECTOR.</td>
         </tr>
+        <tr>
+            <td><h5>vector-search.distribute.enabled</h5></td>
+            <td style="word-wrap: break-word;">false</td>
+            <td>Boolean</td>
+            <td>Whether to process distributed vector search.</td>
+        </tr>
         <tr>
             <td><h5>vector.file.format</h5></td>
             <td style="word-wrap: break-word;">(none)</td>

diff --git a/paimon-api/src/main/java/org/apache/paimon/CoreOptions.java b/paimon-api/src/main/java/org/apache/paimon/CoreOptions.java
@@ -2519,6 +2519,12 @@ public InlineElement getDescription() {
                                                     + " Default is the same as TARGET_FILE_SIZE.")
                                     .build());
 
+    public static final ConfigOption<Boolean> VECTOR_SEARCH_DISTRIBUTE_ENABLED =
+            key("vector-search.distribute.enabled")
+                    .booleanType()
+                    .defaultValue(false)
+                    .withDescription("Whether to process distributed vector search.");
+
     @Immutable
     public static final ConfigOption<Boolean> PK_CLUSTERING_OVERRIDE =
             key("pk-clustering-override")
@@ -3978,6 +3984,10 @@ public long vectorTargetFileSize() {
                 .orElse(targetFileSize(false));
     }
 
+    public boolean vectorSearchDistributeEnabled() {
+        return options.get(VECTOR_SEARCH_DISTRIBUTE_ENABLED);
+    }
+
     /** Specifies the merge engine for table with primary key. */
     public enum MergeEngine implements DescribedEnum {
         DEDUPLICATE("deduplicate", "De-duplicate and keep the last row."),

diff --git a/paimon-common/src/main/java/org/apache/paimon/globalindex/GlobalIndexResultSerializer.java b/paimon-common/src/main/java/org/apache/paimon/globalindex/GlobalIndexResultSerializer.java
@@ -23,6 +23,7 @@
 import org.apache.paimon.io.DataInputView;
 import org.apache.paimon.io.DataOutputSerializer;
 import org.apache.paimon.io.DataOutputView;
+import org.apache.paimon.utils.Preconditions;
 import org.apache.paimon.utils.RoaringNavigableMap64;
 
 import java.io.IOException;
@@ -116,4 +117,20 @@ public GlobalIndexResult deserialize(DataInputView dataInput) throws IOException
 
         return ScoredGlobalIndexResult.create(roaringNavigableMap64, scoreMap::get);
     }
+
+    public byte[] serialize(GlobalIndexResult globalIndexResult) throws IOException {
+        DataOutputSerializer dataOutputSerializer = new DataOutputSerializer(1024);
+        serialize(globalIndexResult, dataOutputSerializer);
+        return dataOutputSerializer.getCopyOfBuffer();
+    }
+
+    public ScoredGlobalIndexResult deserialize(byte[] data) throws IOException {
+        DataInputDeserializer dataInputDeserializer = new DataInputDeserializer(data);
+        GlobalIndexResult globalIndexResult = deserialize(dataInputDeserializer);
+        Preconditions.checkArgument(
+                globalIndexResult instanceof ScoredGlobalIndexResult,
+                "Expected ScoredGlobalIndexResult, but got %s",
+                globalIndexResult == null ? "null" : globalIndexResult.getClass().getName());
+        return (ScoredGlobalIndexResult) globalIndexResult;
+    }
 }
diff --git a/paimon-common/src/main/java/org/apache/paimon/utils/RoaringNavigableMap64.java b/paimon-common/src/main/java/org/apache/paimon/utils/RoaringNavigableMap64.java
@@ -25,12 +25,15 @@
 import java.io.DataInputStream;
 import java.io.DataOutputStream;
 import java.io.IOException;
+import java.io.Serializable;
 import java.util.Iterator;
 import java.util.List;
 import java.util.Objects;
 
 /** A compressed bitmap for 64-bit integer aggregated by tree. */
-public class RoaringNavigableMap64 implements Iterable<Long> {
+public class RoaringNavigableMap64 implements Iterable<Long>, Serializable {
+
+    private static final long serialVersionUID = 1L;
 
     private final Roaring64NavigableMap roaring64NavigableMap;
 

diff --git a/paimon-core/src/main/java/org/apache/paimon/table/InnerTable.java b/paimon-core/src/main/java/org/apache/paimon/table/InnerTable.java
@@ -33,7 +33,7 @@
 import org.apache.paimon.table.source.ReadBuilderImpl;
 import org.apache.paimon.table.source.StreamDataTableScan;
 import org.apache.paimon.table.source.VectorSearchBuilder;
-import org.apache.paimon.table.source.VectorSearchBuilderImpl;
+import org.apache.paimon.table.source.VectorSearchBuilderFactory;
 
 import java.util.Optional;
 
@@ -59,7 +59,7 @@ default ReadBuilder newReadBuilder() {
 
     @Override
     default VectorSearchBuilder newVectorSearchBuilder() {
-        return new VectorSearchBuilderImpl(this);
+        return VectorSearchBuilderFactory.create(this);
     }
 
     @Override

diff --git a/paimon-core/src/main/java/org/apache/paimon/table/source/VectorReadImpl.java b/paimon-core/src/main/java/org/apache/paimon/table/source/VectorReadImpl.java
@@ -42,6 +42,7 @@
 import javax.annotation.Nullable;
 
 import java.io.IOException;
+import java.io.Serializable;
 import java.util.ArrayList;
 import java.util.Comparator;
 import java.util.List;
@@ -55,13 +56,15 @@
 import static org.apache.paimon.utils.Preconditions.checkNotNull;
 
 /** Implementation for {@link VectorRead}. */
-public class VectorReadImpl implements VectorRead {
+public class VectorReadImpl implements VectorRead, Serializable {
 
-    private final FileStoreTable table;
+    private static final long serialVersionUID = 1L;
+
+    protected final FileStoreTable table;
     private final Predicate filter;
-    private final int limit;
-    private final DataField vectorColumn;
-    private final float[] vector;
+    protected final int limit;
+    protected final DataField vectorColumn;
+    protected final float[] vector;
 
     public VectorReadImpl(
             FileStoreTable table,
@@ -120,7 +123,7 @@ public GlobalIndexResult read(List<VectorSearchSplit> splits) {
         return result.topK(limit);
     }
 
-    private Optional<RoaringNavigableMap64> preFilter(List<VectorSearchSplit> splits) {
+    protected Optional<RoaringNavigableMap64> preFilter(List<VectorSearchSplit> splits) {
         Set<IndexFileMeta> scalarIndexFiles =
                 new TreeSet<>(Comparator.comparing(IndexFileMeta::fileName));
         for (VectorSearchSplit split : splits) {
@@ -139,7 +142,7 @@ private Optional<RoaringNavigableMap64> preFilter(List<VectorSearchSplit> splits
         }
     }
 
-    private CompletableFuture<Optional<ScoredGlobalIndexResult>> eval(
+    protected CompletableFuture<Optional<ScoredGlobalIndexResult>> eval(
             GlobalIndexer globalIndexer,
             IndexPathFactory indexPathFactory,
             long rowRangeStart,

diff --git a/paimon-core/src/main/java/org/apache/paimon/table/source/VectorSearchBuilderFactory.java b/paimon-core/src/main/java/org/apache/paimon/table/source/VectorSearchBuilderFactory.java
@@ -0,0 +1,64 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.paimon.table.source;
+
+import org.apache.paimon.factories.FactoryException;
+import org.apache.paimon.factories.FactoryUtil;
+import org.apache.paimon.table.InnerTable;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/** Factory for {@link VectorSearchBuilder}. */
+public class VectorSearchBuilderFactory {
+
+    private VectorSearchBuilderFactory() {}
+
+    public static VectorSearchBuilder create(InnerTable table) {
+        ClassLoader classLoader = Thread.currentThread().getContextClassLoader();
+        if (classLoader == null) {
+            classLoader = VectorSearchBuilderFactory.class.getClassLoader();
+        }
+
+        List<VectorSearchBuilderProvider> providers =
+                FactoryUtil.discoverFactories(classLoader, VectorSearchBuilderProvider.class);
+        List<VectorSearchBuilder> builders = new ArrayList<>();
+        List<String> matchedProviders = new ArrayList<>();
+        for (VectorSearchBuilderProvider provider : providers) {
+            VectorSearchBuilder builder = provider.create(table);
+            if (builder != null) {
+                builders.add(builder);
+                matchedProviders.add(provider.getClass().getName());
+            }
+        }
+
+        if (builders.size() > 1) {
+            throw new FactoryException(
+                    String.format(
+                            "Multiple VectorSearchBuilder providers matched table '%s': %s",
+                            table.name(), String.join(", ", matchedProviders)));
+        }
+
+        if (builders.size() == 1) {
+            return builders.get(0);
+        }
+
+        return new VectorSearchBuilderImpl(table);
+    }
+}
diff --git a/paimon-core/src/main/java/org/apache/paimon/table/source/VectorSearchBuilderImpl.java b/paimon-core/src/main/java/org/apache/paimon/table/source/VectorSearchBuilderImpl.java
@@ -32,13 +32,13 @@ public class VectorSearchBuilderImpl implements VectorSearchBuilder {
 
     private static final long serialVersionUID = 1L;
 
-    private final FileStoreTable table;
+    protected final FileStoreTable table;
 
-    private PartitionPredicate partitionFilter;
-    private Predicate filter;
-    private int limit;
-    private DataField vectorColumn;
-    private float[] vector;
+    protected PartitionPredicate partitionFilter;
+    protected Predicate filter;
+    protected int limit;
+    protected DataField vectorColumn;
+    protected float[] vector;
 
     public VectorSearchBuilderImpl(InnerTable table) {
         this.table = (FileStoreTable) table;

diff --git a/paimon-core/src/main/java/org/apache/paimon/table/source/VectorSearchBuilderProvider.java b/paimon-core/src/main/java/org/apache/paimon/table/source/VectorSearchBuilderProvider.java
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.paimon.table.source;
+
+import org.apache.paimon.table.InnerTable;
+
+import javax.annotation.Nullable;
+
+/** SPI for engine specific {@link VectorSearchBuilder} creation. */
+public interface VectorSearchBuilderProvider {
+
+    @Nullable
+    VectorSearchBuilder create(InnerTable table);
+}
diff --git a/...rk/paimon-spark-common/src/main/java/org/apache/paimon/spark/read/SparkEngineContext.java b/...rk/paimon-spark-common/src/main/java/org/apache/paimon/spark/read/SparkEngineContext.java
@@ -0,0 +1,63 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.paimon.spark.read;
+
+import org.apache.paimon.utils.SerializableFunction;
+
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.broadcast.Broadcast;
+import org.apache.spark.sql.SparkSession;
+
+import java.util.Collections;
+import java.util.List;
+import java.util.stream.Stream;
+
+/**
+ * Tiny wrapper around the active {@link SparkSession} that exposes RDD style {@code map} / {@code
+ * flatMap} primitives over a Java {@link List}. Used by Paimon-on-Spark to dispatch
+ * embarrassingly-parallel work (e.g. per-split vector search) to the cluster without forcing the
+ * caller to depend on Spark types directly.
+ */
+public class SparkEngineContext {
+
+    private final JavaSparkContext jsc;
+
+    public SparkEngineContext() {
+        this.jsc = JavaSparkContext.fromSparkContext(SparkSession.active().sparkContext());
+    }
+
+    public <T> Broadcast<T> broadcast(T value) {
+        return jsc.broadcast(value);
+    }
+
+    public <I, O> List<O> map(List<I> data, SerializableFunction<I, O> func, int parallelism) {
+        if (data.isEmpty()) {
+            return Collections.emptyList();
+        }
+        return jsc.parallelize(data, parallelism).map(func::apply).collect();
+    }
+
+    public <I, O> List<O> flatMap(
+            List<I> data, SerializableFunction<I, Stream<O>> func, int parallelism) {
+        if (data.isEmpty()) {
+            return Collections.emptyList();
+        }
+        return jsc.parallelize(data, parallelism).flatMap(x -> func.apply(x).iterator()).collect();
+    }
+}