From 7bd5cb1f2ecea99c19c7b9c614507e4c9e32b01d Mon Sep 17 00:00:00 2001 From: Pierre Salagnac Date: Wed, 11 Mar 2026 16:53:24 +0100 Subject: [PATCH 1/2] SOLR-18157: Optimize buffer allocation in JavaBinCodec Implements a smarter allocation strategy using powers of 2 for the internal buffer of JavaBinCodec. --- .../unreleased/SOLR-18157-javabin-buffer.yml | 8 ++ .../solr/bench/search/RequestWriters.java | 121 ++++++++++++++++++ .../apache/solr/common/util/JavaBinCodec.java | 36 +++++- .../solr/common/util/TestJavaBinCodec.java | 10 ++ 4 files changed, 172 insertions(+), 3 deletions(-) create mode 100644 changelog/unreleased/SOLR-18157-javabin-buffer.yml create mode 100644 solr/benchmark/src/java/org/apache/solr/bench/search/RequestWriters.java diff --git a/changelog/unreleased/SOLR-18157-javabin-buffer.yml b/changelog/unreleased/SOLR-18157-javabin-buffer.yml new file mode 100644 index 000000000000..2b16606d3928 --- /dev/null +++ b/changelog/unreleased/SOLR-18157-javabin-buffer.yml @@ -0,0 +1,8 @@ +title: Optimize the size of the internal buffer of JavaBin codec to reduce the number of allocations. This reduces GC pressure on SolrJ client under high indexing load. +type: changed # Could be 'optimized' ?? +authors: + - name: Pierre Salagnac +links: + - name: SOLR-18157 + url: https://issues.apache.org/jira/browse/SOLR-18157 + diff --git a/solr/benchmark/src/java/org/apache/solr/bench/search/RequestWriters.java b/solr/benchmark/src/java/org/apache/solr/bench/search/RequestWriters.java new file mode 100644 index 000000000000..d55800ad06e4 --- /dev/null +++ b/solr/benchmark/src/java/org/apache/solr/bench/search/RequestWriters.java @@ -0,0 +1,121 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.bench.search; + +import static org.apache.solr.bench.Docs.docs; +import static org.apache.solr.bench.generators.SourceDSL.integers; +import static org.apache.solr.bench.generators.SourceDSL.longs; +import static org.apache.solr.bench.generators.SourceDSL.strings; + +import java.io.IOException; +import java.io.OutputStream; +import java.util.Iterator; +import java.util.function.Supplier; +import org.apache.commons.io.output.NullOutputStream; +import org.apache.solr.bench.Docs; +import org.apache.solr.client.solrj.request.JavaBinRequestWriter; +import org.apache.solr.client.solrj.request.RequestWriter; +import org.apache.solr.client.solrj.request.UpdateRequest; +import org.apache.solr.client.solrj.request.XMLRequestWriter; +import org.apache.solr.common.SolrInputDocument; +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.BenchmarkMode; +import org.openjdk.jmh.annotations.Fork; +import org.openjdk.jmh.annotations.Level; +import org.openjdk.jmh.annotations.Measurement; +import org.openjdk.jmh.annotations.Mode; +import org.openjdk.jmh.annotations.Param; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.Setup; +import org.openjdk.jmh.annotations.State; +import org.openjdk.jmh.annotations.Threads; +import org.openjdk.jmh.annotations.Warmup; + +/** + * Benchmark for serialization of requests by the client. This only focuses on the serialization + * itself, ignoring sending the request over the network (and for sure ignoring processing the + * request). + */ +@Fork(value = 1) +@BenchmarkMode(Mode.Throughput) +@Warmup(time = 2, iterations = 1) +@Measurement(time = 5, iterations = 5) +@Threads(value = 1) +public class RequestWriters { + + @State(Scope.Benchmark) + public static class BenchState { + + @Param({"xml", "binary"}) + String type; + + @Param({"10", "100", "1000", "10000"}) + int batchSize; + + private final int docCount = 50000; + + private Supplier writerSupplier; + private Iterator docIterator; + + @Setup(Level.Trial) + public void setup() throws Exception { + preGenerateDocs(); + + switch (type) { + case "xml": + writerSupplier = XMLRequestWriter::new; + break; + case "javabin": + writerSupplier = JavaBinRequestWriter::new; + break; + + default: + throw new Error("Unsupported type: " + type); + } + } + + private void preGenerateDocs() throws Exception { + Docs docs = + docs() + .field("id", integers().incrementing()) + .field(strings().basicLatinAlphabet().ofLengthBetween(10, 64)) + .field(strings().basicLatinAlphabet().ofLengthBetween(10, 64)) + .field(strings().basicLatinAlphabet().multi(312).ofLengthBetween(10, 64)) + .field(strings().basicLatinAlphabet().multi(312).ofLengthBetween(10, 64)) + .field(integers().all()) + .field(integers().all()) + .field(longs().all()); + + docs.preGenerate(docCount); + docIterator = docs.generatedDocsCircularIterator(); + } + } + + @Benchmark + public void writeUpdate(BenchState state) throws IOException { + + OutputStream sink = NullOutputStream.INSTANCE; + + UpdateRequest request = new UpdateRequest(); + for (int i = 0; i < state.batchSize; i++) { + request.add(state.docIterator.next()); + } + + RequestWriter writer = state.writerSupplier.get(); + writer.write(request, sink); + } +} diff --git a/solr/solrj/src/java/org/apache/solr/common/util/JavaBinCodec.java b/solr/solrj/src/java/org/apache/solr/common/util/JavaBinCodec.java index a19896ff2b3a..9cd5171f125b 100644 --- a/solr/solrj/src/java/org/apache/solr/common/util/JavaBinCodec.java +++ b/solr/solrj/src/java/org/apache/solr/common/util/JavaBinCodec.java @@ -113,9 +113,10 @@ public class JavaBinCodec implements PushWriter { NAMED_LST = (byte) (6 << 5), // NamedList EXTERN_STRING = (byte) (7 << 5); + private static final int MIN_UTF8_SIZE_FOR_ARRAY_GROW_STRATEGY = 512; private static final int MAX_UTF8_SIZE_FOR_ARRAY_GROW_STRATEGY = 65536; - private static byte VERSION = 2; + private static final byte VERSION = 2; private final ObjectResolver resolver; protected FastOutputStream daos; private StringCache stringCache; @@ -1072,7 +1073,11 @@ public void writeStr(CharSequence s) throws IOException { int maxSize = end * ByteUtils.MAX_UTF8_BYTES_PER_CHAR; if (maxSize <= MAX_UTF8_SIZE_FOR_ARRAY_GROW_STRATEGY) { - if (bytes == null || bytes.length < maxSize) bytes = new byte[maxSize]; + if (bytes == null || bytes.length < maxSize) { + int bufferSize = getBufferSize(maxSize); + bytes = new byte[bufferSize]; + } + int sz = ByteUtils.UTF16toUTF8(s, 0, end, bytes, 0); writeTag(STR, sz); daos.write(bytes, 0, sz); @@ -1105,7 +1110,10 @@ public CharSequence readStr( private CharSequence _readStr(DataInputInputStream dis, StringCache stringCache, int sz) throws IOException { - if (bytes == null || bytes.length < sz) bytes = new byte[sz]; + if (bytes == null || bytes.length < sz) { + int bufferSize = getBufferSize(sz); + bytes = new byte[bufferSize]; + } dis.readFully(bytes, 0, sz); if (stringCache != null) { return stringCache.get(bytesRef.reset(bytes, 0, sz)); @@ -1116,6 +1124,28 @@ private CharSequence _readStr(DataInputInputStream dis, StringCache stringCache, } } + /** + * Compute the buffer size for given required size. This returns the next power of 2 that is + * greater than or equal to the given size. + * + *

This is a trade-off so we don't start with a useless too big buffer, but we don't do too + * many allocations. + */ + static int getBufferSize(int required) { + + if (required < MIN_UTF8_SIZE_FOR_ARRAY_GROW_STRATEGY) { + return MIN_UTF8_SIZE_FOR_ARRAY_GROW_STRATEGY; + } + + int oneBit = Integer.highestOneBit(required); + + if (oneBit == required) { + return oneBit; + } else { + return oneBit << 1; + } + } + /////////// code to optimize reading UTF8 static final int MAX_UTF8_SZ = 1024 * 64; // too big strings can cause too much memory allocation private Function stringProvider; diff --git a/solr/solrj/src/test/org/apache/solr/common/util/TestJavaBinCodec.java b/solr/solrj/src/test/org/apache/solr/common/util/TestJavaBinCodec.java index 52d5cf1e3c98..65a89bbc21bd 100644 --- a/solr/solrj/src/test/org/apache/solr/common/util/TestJavaBinCodec.java +++ b/solr/solrj/src/test/org/apache/solr/common/util/TestJavaBinCodec.java @@ -496,6 +496,16 @@ public void testStringCaching() throws Exception { assertSame(l1.get(1), l2.get(1)); } + @Test + public void testBufferSize() { + assertEquals(512, JavaBinCodec.getBufferSize(1)); + assertEquals(512, JavaBinCodec.getBufferSize(200)); + assertEquals(512, JavaBinCodec.getBufferSize(500)); + assertEquals(512, JavaBinCodec.getBufferSize(512)); + assertEquals(1024, JavaBinCodec.getBufferSize(513)); + assertEquals(2048, JavaBinCodec.getBufferSize(1500)); + } + public void genBinaryFiles() throws IOException { Object data = generateAllDataTypes(); From 65b8ab04e806ff30f2e2ace21477ca4714445cd2 Mon Sep 17 00:00:00 2001 From: Pierre Salagnac Date: Thu, 19 Mar 2026 11:10:43 +0100 Subject: [PATCH 2/2] Remove commit in changelog --- changelog/unreleased/SOLR-18157-javabin-buffer.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/changelog/unreleased/SOLR-18157-javabin-buffer.yml b/changelog/unreleased/SOLR-18157-javabin-buffer.yml index 2b16606d3928..dc5999cc7f00 100644 --- a/changelog/unreleased/SOLR-18157-javabin-buffer.yml +++ b/changelog/unreleased/SOLR-18157-javabin-buffer.yml @@ -1,5 +1,5 @@ title: Optimize the size of the internal buffer of JavaBin codec to reduce the number of allocations. This reduces GC pressure on SolrJ client under high indexing load. -type: changed # Could be 'optimized' ?? +type: changed authors: - name: Pierre Salagnac links: