diff --git a/core/src/main/java/com/google/adk/models/Gemini.java b/core/src/main/java/com/google/adk/models/Gemini.java index 6f145e1de..40809b85e 100644 --- a/core/src/main/java/com/google/adk/models/Gemini.java +++ b/core/src/main/java/com/google/adk/models/Gemini.java @@ -226,21 +226,7 @@ public Flowable generateContent(LlmRequest llmRequest, boolean stre () -> processRawResponses( Flowable.fromFuture(streamFuture).flatMapIterable(iterable -> iterable))) - .filter( - llmResponse -> - llmResponse - .content() - .flatMap(Content::parts) - .map( - parts -> - !parts.isEmpty() - && parts.stream() - .anyMatch( - p -> - p.functionCall().isPresent() - || p.functionResponse().isPresent() - || p.text().isPresent())) - .orElse(false)); + .filter(Gemini::shouldEmit); } else { logger.debug("Sending generateContent request to model {}", effectiveModelName); return Flowable.fromFuture( @@ -298,7 +284,28 @@ static Flowable processRawResponses(FlowableDrops chunks that carry neither semantic content (i.e. they are an empty-text-only response + * per {@link #isEmptyTextOnlyResponse}) nor any useful metadata (per {@link #hasUsefulMetadata}). + * + *

Package-private for testing. + */ + static boolean shouldEmit(LlmResponse response) { + return !isEmptyTextOnlyResponse(response) || hasUsefulMetadata(response); + } + + /** + * Returns true if {@code response} carries any non-content metadata that should be propagated + * downstream (e.g. {@code usageMetadata}, {@code finishReason}, transcriptions, grounding or + * error info). Inspects only top-level {@link LlmResponse} fields; the response's content/parts + * are intentionally not considered here. + */ + private static boolean hasUsefulMetadata(LlmResponse response) { + return response.usageMetadata().isPresent() + || response.finishReason().isPresent() + || response.errorCode().isPresent() + || response.groundingMetadata().isPresent() + || response.inputTranscription().isPresent() + || response.outputTranscription().isPresent(); + } + + /** + * Returns true if {@code response} consists of exactly one {@link Part} whose only meaningful + * payload is an empty text string (i.e. {@code parts:[{text:""}]}). Such a chunk can be safely + * dropped from the streaming aggregator because it carries no semantic content for the agent + * pipeline. A part is considered to carry semantic content if any of its non-text payloads + * ({@code functionCall}, {@code functionResponse}, {@code inlineData}, {@code executableCode}, + * {@code codeExecutionResult}, {@code fileData}, {@code thoughtSignature}, {@code videoMetadata}, + * {@code toolCall}, {@code toolResponse}) is present. + */ + private static boolean isEmptyTextOnlyResponse(LlmResponse response) { + return response + .content() + .flatMap(Content::parts) + .map( + parts -> { + if (parts.size() != 1) { + return false; + } + Part part = parts.get(0); + return part.text().map(String::isEmpty).orElse(false) + && part.functionCall().isEmpty() + && part.functionResponse().isEmpty() + && part.inlineData().isEmpty() + && part.executableCode().isEmpty() + && part.codeExecutionResult().isEmpty() + && part.fileData().isEmpty() + && part.thoughtSignature().isEmpty() + && part.videoMetadata().isEmpty() + && part.toolCall().isEmpty() + && part.toolResponse().isEmpty(); + }) + .orElse(false); + } + @Override public BaseLlmConnection connect(LlmRequest llmRequest) { if (!apiClient.vertexAI()) { diff --git a/core/src/test/java/com/google/adk/models/GeminiTest.java b/core/src/test/java/com/google/adk/models/GeminiTest.java index c230f5f68..656b5e596 100644 --- a/core/src/test/java/com/google/adk/models/GeminiTest.java +++ b/core/src/test/java/com/google/adk/models/GeminiTest.java @@ -63,6 +63,81 @@ public void processRawResponses_withTextChunks_emitsPartialResponses() { isFunctionCallResponse()); } + // Regression test for b/513501918. gemini-3.1-flash-lite emits an extra trailing chunk after a + // function call: `{parts:[{text:""}], finishReason:STOP}`. That chunk must not be propagated as + // a non-partial event because BaseLlmFlow#run would treat it as the final response and + // terminate the loop before the function response is sent back to the model. The chunk's + // metadata (e.g. `finishReason`, `usageMetadata`) is preserved by emitting it on a content-less + // partial response instead of dropping the chunk entirely. + @Test + public void + processRawResponses_functionCallThenEmptyTextWithStop_emitsFunctionCallAndMetadataOnlyPartial() { + Flowable rawResponses = + Flowable.just( + toResponse(Part.fromFunctionCall("test_function", ImmutableMap.of())), + toResponseWithText("", FinishReason.Known.STOP)); + + Flowable llmResponses = Gemini.processRawResponses(rawResponses); + + assertLlmResponses( + llmResponses, + isFunctionCallResponse(), + isContentlessPartialWithFinishReason(FinishReason.Known.STOP)); + } + + // Same as above but with `usageMetadata` on the trailing empty chunk: the metadata must survive + // on the emitted content-less partial. + @Test + public void + processRawResponses_functionCallThenEmptyTextWithUsageMetadata_preservesUsageMetadata() { + GenerateContentResponseUsageMetadata metadata = createUsageMetadata(5, 10, 15); + Flowable rawResponses = + Flowable.just( + toResponse(Part.fromFunctionCall("test_function", ImmutableMap.of())), + toResponseWithText("", FinishReason.Known.STOP, metadata)); + + Flowable llmResponses = Gemini.processRawResponses(rawResponses); + + assertLlmResponses( + llmResponses, isFunctionCallResponse(), isContentlessPartialWithUsageMetadata(metadata)); + } + + // Same as above but without a finishReason or usageMetadata: the trailing empty chunk carries no + // useful payload and must be suppressed entirely. + @Test + public void processRawResponses_functionCallThenEmptyText_doesNotEmitExtraEmptyResponse() { + Flowable rawResponses = + Flowable.just( + toResponse(Part.fromFunctionCall("test_function", ImmutableMap.of())), + toResponseWithText("")); + + Flowable llmResponses = Gemini.processRawResponses(rawResponses); + + assertLlmResponses(llmResponses, isFunctionCallResponse()); + } + + // Combined scenario: leading partial text, then a function call, then the trailing empty-text + // chunk with STOP. Accumulated text must still be flushed, the function call must still be + // emitted, and the trailing chunk must surface only its metadata on a content-less partial. + @Test + public void + processRawResponses_textThenFunctionCallThenEmptyTextWithStop_emitsTextFunctionCallAndMetadata() { + Flowable rawResponses = + Flowable.just( + toResponseWithText("Thinking..."), + toResponse(Part.fromFunctionCall("test_function", ImmutableMap.of())), + toResponseWithText("", FinishReason.Known.STOP)); + + Flowable llmResponses = Gemini.processRawResponses(rawResponses); + + assertLlmResponses( + llmResponses, + isPartialTextResponse("Thinking..."), + isFinalTextResponse("Thinking..."), + isFunctionCallResponse(), + isContentlessPartialWithFinishReason(FinishReason.Known.STOP)); + } + @Test public void processRawResponses_textAndStopReason_emitsPartialThenFinalText() { Flowable rawResponses = @@ -175,6 +250,93 @@ public void processRawResponses_thoughtChunksAndStop_includeUsageMetadata() { isFinalThoughtResponseWithUsageMetadata("Thinking deeply", metadata2)); } + // Test cases for the shouldEmit filter applied by generateContent after processRawResponses. + // shouldEmit drops chunks that are empty-text-only AND carry no useful metadata; everything else + // is forwarded. processRawResponses normally already strips empty-text-only chunks, so shouldEmit + // is defense-in-depth, but it must still behave correctly when fed any LlmResponse directly. + + @Test + public void shouldEmit_emptyTextOnlyResponseWithNoMetadata_returnsFalse() { + LlmResponse response = + LlmResponse.builder() + .content(Content.builder().role("model").parts(Part.fromText("")).build()) + .build(); + + assertThat(Gemini.shouldEmit(response)).isFalse(); + } + + @Test + public void shouldEmit_emptyTextOnlyResponseWithFinishReason_returnsTrue() { + LlmResponse response = + LlmResponse.builder() + .content(Content.builder().role("model").parts(Part.fromText("")).build()) + .finishReason(new FinishReason(FinishReason.Known.STOP)) + .build(); + + assertThat(Gemini.shouldEmit(response)).isTrue(); + } + + @Test + public void shouldEmit_emptyTextOnlyResponseWithUsageMetadata_returnsTrue() { + LlmResponse response = + LlmResponse.builder() + .content(Content.builder().role("model").parts(Part.fromText("")).build()) + .usageMetadata(createUsageMetadata(5, 10, 15)) + .build(); + + assertThat(Gemini.shouldEmit(response)).isTrue(); + } + + @Test + public void shouldEmit_nonEmptyTextResponse_returnsTrue() { + LlmResponse response = + LlmResponse.builder() + .content(Content.builder().role("model").parts(Part.fromText("hello")).build()) + .build(); + + assertThat(Gemini.shouldEmit(response)).isTrue(); + } + + @Test + public void shouldEmit_functionCallResponse_returnsTrue() { + LlmResponse response = + LlmResponse.builder() + .content( + Content.builder() + .role("model") + .parts(Part.fromFunctionCall("test_function", ImmutableMap.of())) + .build()) + .build(); + + assertThat(Gemini.shouldEmit(response)).isTrue(); + } + + @Test + public void shouldEmit_contentlessResponse_returnsTrue() { + // A response with no content at all is not an empty-text-only response, so it should pass + // through regardless of metadata. This is the shape emitted by processRawResponses after it + // strips empty-text content while preserving metadata. + LlmResponse response = LlmResponse.builder().build(); + + assertThat(Gemini.shouldEmit(response)).isTrue(); + } + + @Test + public void shouldEmit_multiPartResponseWithEmptyTextPart_returnsTrue() { + // Only single-part empty-text responses are considered "empty-text-only". A multi-part response + // is treated as carrying semantic content and must always pass through. + LlmResponse response = + LlmResponse.builder() + .content( + Content.builder() + .role("model") + .parts(Part.fromText(""), Part.fromText("hello")) + .build()) + .build(); + + assertThat(Gemini.shouldEmit(response)).isTrue(); + } + @Test public void processRawResponses_thoughtAndTextWithStop_onlyFinalTextIncludesUsageMetadata() { GenerateContentResponseUsageMetadata metadata1 = createUsageMetadata(5, 5, 10); @@ -232,6 +394,26 @@ private static Predicate isFunctionCallResponse() { }; } + private static Predicate isContentlessPartialWithFinishReason( + FinishReason.Known expectedFinishReason) { + return response -> { + assertThat(response.partial()).hasValue(true); + assertThat(response.content()).isEmpty(); + assertThat(response.finishReason().map(fr -> fr.knownEnum())).hasValue(expectedFinishReason); + return true; + }; + } + + private static Predicate isContentlessPartialWithUsageMetadata( + GenerateContentResponseUsageMetadata expectedMetadata) { + return response -> { + assertThat(response.partial()).hasValue(true); + assertThat(response.content()).isEmpty(); + assertThat(response.usageMetadata()).hasValue(expectedMetadata); + return true; + }; + } + private static Predicate isEmptyResponse() { return response -> { assertThat(response.partial()).isEmpty();