Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .code-samples.meilisearch.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,8 @@ get_all_tasks_1: |-
client.get_tasks()
get_task_1: |-
client.get_task(1)
get_task_documents_1: |-
client.get_task_documents(1)
delete_tasks_1: |-
client.delete_tasks({'uids': ['1', '2']})
cancel_tasks_1: |-
Expand Down
26 changes: 26 additions & 0 deletions meilisearch/_httprequests.py
Original file line number Diff line number Diff line change
Expand Up @@ -220,6 +220,32 @@ def post_stream(

raise MeilisearchCommunicationError(str(err)) from err

def get_stream(self, path: str) -> requests.Response:
"""Send a GET request with streaming enabled.

Returns the raw response object for streaming consumption.
"""
try:
request_path = self.config.url + "/" + path
response = requests.get(
request_path,
timeout=self.config.timeout,
headers=self.headers,
stream=True,
)

if not response.ok:
response.raise_for_status()

return response

except requests.exceptions.Timeout as err:
raise MeilisearchTimeoutError(str(err)) from err
except requests.exceptions.ConnectionError as err:
raise MeilisearchCommunicationError(str(err)) from err
except requests.exceptions.HTTPError as err:
raise MeilisearchApiError(str(err), response) from err

Comment on lines +242 to +248

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🩺 Stability & Availability | 🟠 Major

🧩 Analysis chain

🏁 Script executed:

cd /tmp && find . -name "_httprequests.py" -o -name "*http*requests*" 2>/dev/null | head -20

Repository: meilisearch/meilisearch-python

Length of output: 168


🏁 Script executed:

cd /tmp && git ls-files | grep -i http

Repository: meilisearch/meilisearch-python

Length of output: 237


🏁 Script executed:

cd /tmp && git ls-files | head -30

Repository: meilisearch/meilisearch-python

Length of output: 237


🏁 Script executed:

find . -type f -name "_httprequests.py" 2>/dev/null

Repository: meilisearch/meilisearch-python

Length of output: 199


🏁 Script executed:

find . -type f -name "*.py" | grep -i http | head -20

Repository: meilisearch/meilisearch-python

Length of output: 236


🏁 Script executed:

ls -la

Repository: meilisearch/meilisearch-python

Length of output: 1413


🏁 Script executed:

git ls-files | grep -E "\.(py|js|ts)$" | head -30

Repository: meilisearch/meilisearch-python

Length of output: 1078


🏁 Script executed:

wc -l ./meilisearch/_httprequests.py

Repository: meilisearch/meilisearch-python

Length of output: 203


🏁 Script executed:

ast-grep outline ./meilisearch/_httprequests.py

Repository: meilisearch/meilisearch-python

Length of output: 425


🏁 Script executed:

sed -n '1,270p' ./meilisearch/_httprequests.py | cat -n

Repository: meilisearch/meilisearch-python

Length of output: 11223


Add InvalidSchema exception handler to get_stream for consistency.

The get_stream method at lines 242-247 lacks an InvalidSchema handler that exists in both send_request and post_stream. Malformed base URLs currently raise raw requests.exceptions.InvalidSchema instead of wrapping it in MeilisearchCommunicationError, breaking SDK-level exception behavior consistency.

Suggested fix
         except requests.exceptions.Timeout as err:
             raise MeilisearchTimeoutError(str(err)) from err
         except requests.exceptions.ConnectionError as err:
             raise MeilisearchCommunicationError(str(err)) from err
         except requests.exceptions.HTTPError as err:
             raise MeilisearchApiError(str(err), response) from err
+        except requests.exceptions.InvalidSchema as err:
+            if "://" not in self.config.url:
+                raise MeilisearchCommunicationError(
+                    f"""
+                    Invalid URL {self.config.url}, no scheme/protocol supplied.
+                    Did you mean https://{self.config.url}?
+                    """
+                ) from err
+
+            raise MeilisearchCommunicationError(str(err)) from err
🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@meilisearch/_httprequests.py` around lines 242 - 248, Add an exception
handler for `requests.exceptions.InvalidSchema` to the `get_stream` method's
exception handling block (after the ConnectionError handler) to match the
pattern used in `send_request` and `post_stream`. The handler should catch
`requests.exceptions.InvalidSchema` and raise `MeilisearchCommunicationError`
wrapping the error message, maintaining consistency across all HTTP request
methods in the SDK.

@staticmethod
def __to_json(request: requests.Response) -> Any:
if request.content == b"":
Expand Down
31 changes: 30 additions & 1 deletion meilisearch/_utils.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,13 @@
import json
import re
from datetime import datetime
from functools import lru_cache
from typing import Union
from typing import Any, Dict, List, Union

import pydantic

_CONCATENATED_JSON = re.compile(r"(?<=\})\s*(?=\{)")


@lru_cache(maxsize=1)
def is_pydantic_2() -> bool:
Expand Down Expand Up @@ -41,3 +45,28 @@ def iso_to_date_time(iso_date: Union[datetime, str, None]) -> Union[datetime, No
reduce = len(split[1]) - 6
reduced = f"{split[0]}.{split[1][:-reduce]}Z"
return datetime.strptime(reduced, "%Y-%m-%dT%H:%M:%S.%fZ")


def parse_task_documents(raw_documents: str) -> List[Dict[str, Any]]:
"""Parse the payload returned by ``GET /tasks/{uid}/documents``.

The endpoint may return a JSON array, a single JSON object, NDJSON, or
several JSON objects concatenated without a separator. This normalizes all
of those formats into a list of documents.
"""
payload = raw_documents.strip()
if not payload:
return []

try:
parsed = json.loads(payload)
except json.JSONDecodeError:
documents: List[Dict[str, Any]] = []
for line in payload.splitlines():
for chunk in _CONCATENATED_JSON.split(line):
stripped = chunk.strip()
Comment on lines +65 to +67

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🎯 Functional Correctness | 🟠 Major | ⚡ Quick win

Use a JSON-aware concatenation parser instead of regex boundary splitting.

Lines 65-67 can mis-split valid payloads when a document string contains "}{" (e.g., {"text":"a}{b"}{"id":2}), causing false decode failures.

Suggested fix
     except json.JSONDecodeError:
-        documents: List[Dict[str, Any]] = []
-        for line in payload.splitlines():
-            for chunk in _CONCATENATED_JSON.split(line):
-                stripped = chunk.strip()
-                if stripped:
-                    documents.append(json.loads(stripped))
+        decoder = json.JSONDecoder()
+        documents: List[Dict[str, Any]] = []
+        idx = 0
+        while idx < len(payload):
+            while idx < len(payload) and payload[idx].isspace():
+                idx += 1
+            if idx >= len(payload):
+                break
+            document, idx = decoder.raw_decode(payload, idx)
+            documents.append(document)
         return documents
🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@meilisearch/_utils.py` around lines 65 - 67, The splitlines() loop that
processes the payload and splits on _CONCATENATED_JSON regex pattern incorrectly
handles valid JSON objects containing "}{" within string values. Replace the
naive regex-based splitting logic (the for loops that iterate through
payload.splitlines() and _CONCATENATED_JSON.split(line)) with a JSON-aware
parser that properly understands JSON structure and correctly identifies object
boundaries by tracking quote context and brace nesting, rather than using simple
string pattern matching.

if stripped:
documents.append(json.loads(stripped))
return documents

return parsed if isinstance(parsed, list) else [parsed]
23 changes: 23 additions & 0 deletions meilisearch/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -783,6 +783,29 @@ def get_task(self, uid: int) -> Task:
"""
return self.task_handler.get_task(uid)

def get_task_documents(self, uid: int) -> List[Dict[str, Any]]:
"""Get the documents added or updated by a task.

This is an experimental feature; the ``getTaskDocumentsRoute`` experimental
feature must be enabled on the Meilisearch instance.

Parameters
----------
uid:
Identifier of the task.

Returns
-------
documents:
List of the documents associated with the task.

Raises
------
MeilisearchApiError
An error containing details about why Meilisearch can't process your request. Meilisearch error codes are described here: https://www.meilisearch.com/docs/reference/errors/error_codes#meilisearch-errors
"""
return self.task_handler.get_task_documents(uid)

def cancel_tasks(
self, parameters: MutableMapping[str, Any], *, metadata: Optional[str] = None
) -> TaskInfo:
Expand Down
27 changes: 26 additions & 1 deletion meilisearch/task.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,11 @@

from datetime import datetime
from time import sleep
from typing import Any, Mapping, MutableMapping, Optional
from typing import Any, Dict, List, Mapping, MutableMapping, Optional
from urllib import parse

from meilisearch._httprequests import HttpRequests
from meilisearch._utils import parse_task_documents
from meilisearch.config import Config
from meilisearch.errors import MeilisearchTimeoutError
from meilisearch.models.task import Batch, BatchResults, Task, TaskInfo, TaskResults
Expand Down Expand Up @@ -122,6 +123,30 @@ def get_task(self, uid: int) -> Task:
task = self.http.get(f"{self.config.paths.task}/{uid}")
return Task(**task)

def get_task_documents(self, uid: int) -> List[Dict[str, Any]]:
"""Get the documents added or updated by a task.

This is an experimental feature; the ``getTaskDocumentsRoute`` experimental
feature must be enabled on the Meilisearch instance.

Parameters
----------
uid:
Identifier of the task.

Returns
-------
documents:
List of the documents associated with the task.

Raises
------
MeilisearchApiError
An error containing details about why Meilisearch can't process your request. Meilisearch error codes are described here: https://www.meilisearch.com/docs/reference/errors/error_codes#meilisearch-errors
"""
response = self.http.get_stream(f"{self.config.paths.task}/{uid}/documents")
return parse_task_documents(response.text)

def cancel_tasks(
self, parameters: MutableMapping[str, Any], *, metadata: Optional[str] = None
) -> TaskInfo:
Expand Down
15 changes: 15 additions & 0 deletions tests/client/test_client_task_meilisearch.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
# pylint: disable=invalid-name

from unittest.mock import MagicMock, patch

import pytest

from meilisearch.models.task import TaskInfo
Expand Down Expand Up @@ -189,3 +191,16 @@ def test_get_batch(client):
uid = batches.results[0].uid
batch = client.get_batch(uid)
assert batch.uid == uid


def test_get_task_documents_calls_endpoint_and_parses(client):
"""get_task_documents hits /tasks/{uid}/documents and parses the payload."""
fake_response = MagicMock()
fake_response.text = '{"id": 1}\n{"id": 2}'
with patch.object(
client.task_handler.http, "get_stream", return_value=fake_response
) as mock_get:
documents = client.get_task_documents(42)

mock_get.assert_called_once_with("tasks/42/documents")
assert documents == [{"id": 1}, {"id": 2}]
17 changes: 16 additions & 1 deletion tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import pytest

from meilisearch._utils import is_pydantic_2, iso_to_date_time
from meilisearch._utils import is_pydantic_2, iso_to_date_time, parse_task_documents


def test_is_pydantic_2():
Expand Down Expand Up @@ -33,6 +33,21 @@ def test_iso_to_date_time_invalid_format():
iso_to_date_time("2023-07-13T23:37:20Z")


@pytest.mark.parametrize(
"raw, expected",
[
('[{"id": 1}, {"id": 2}]', [{"id": 1}, {"id": 2}]), # JSON array
('{"id": 1}', [{"id": 1}]), # single JSON object
('{"id": 1}\n{"id": 2}', [{"id": 1}, {"id": 2}]), # NDJSON
('{"id": 1}{"id": 2}', [{"id": 1}, {"id": 2}]), # concatenated, no separator
("", []), # empty
(" \n ", []), # whitespace only
],
)
def test_parse_task_documents(raw, expected):
assert parse_task_documents(raw) == expected


# Refactor to use the unified API to toggle experimental features
def reset_network_config(client):
client.add_or_update_networks(body={"remotes": {}, "leader": None})