Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
43 commits
Select commit Hold shift + click to select a range
0159f47
set up folder structure and base code
geetu040 Dec 30, 2025
58e9175
Merge branch 'main' into migration
fkiraly Dec 31, 2025
bdd65ff
Merge branch 'main' into migration
geetu040 Jan 1, 2026
52ef379
fix pre-commit
geetu040 Jan 5, 2026
5dfcbce
refactor
geetu040 Jan 7, 2026
2acbe99
implement cache_dir
geetu040 Jan 7, 2026
af99880
refactor
geetu040 Jan 7, 2026
74ab366
Merge branch 'main' into pr/1576
fkiraly Jan 7, 2026
17a7178
git commit --no-verify
satvshr Jan 9, 2026
510b286
Merge branch 'main' into tasks
satvshr Jan 9, 2026
c2b9e1a
commiting latest cahnges
satvshr Jan 11, 2026
056cf3a
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Jan 11, 2026
fb1ff40
Merge remote-tracking branch 'geetu040/migration' into tasks
satvshr Jan 11, 2026
17ab23c
bug fixing
satvshr Jan 11, 2026
e07ef73
commiting intermediate changes
satvshr Jan 14, 2026
fb57a3e
removed caching
satvshr Jan 14, 2026
8e041a4
removed uneccesary imports
satvshr Jan 14, 2026
61ca98c
merge main
satvshr Jan 14, 2026
e5dd2d9
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Jan 14, 2026
4c75e16
undo changes in tasks/functions.py
geetu040 Jan 15, 2026
5762185
Merge branch 'main' into migration
geetu040 Jan 15, 2026
202314e
small comments
satvshr Jan 15, 2026
3a2f1c4
Merge branch 'tasks' of https://github.com/satvshr/openml-python into…
satvshr Jan 15, 2026
a0c2267
Merge branch 'main' into tasks
satvshr Jan 15, 2026
249efec
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Jan 15, 2026
69dd3c6
Merge branch 'main' into tasks
satvshr Jan 16, 2026
0d5ce53
requested changes
satvshr Jan 16, 2026
e15e892
requested changes
satvshr Jan 16, 2026
1b19c08
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Jan 16, 2026
6913294
requested changes
satvshr Jan 19, 2026
6404f21
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Jan 19, 2026
7e9bc1f
Merge branch 'main' into migration
geetu040 Jan 21, 2026
c603383
add tests directory
geetu040 Jan 21, 2026
ff6a8b0
use enum for delay method
geetu040 Jan 21, 2026
f01898f
implement cache
geetu040 Jan 21, 2026
5c4511e
refactor clients
geetu040 Jan 21, 2026
e9a6b21
req changes
satvshr Jan 21, 2026
824ffd9
pull tasks from main
satvshr Jan 21, 2026
1c00abb
update main migration branch
satvshr Jan 21, 2026
fdb2449
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Jan 21, 2026
e71a885
added tests
satvshr Jan 21, 2026
5b1ba46
Merge branch 'tasks' of https://github.com/satvshr/openml-python into…
satvshr Jan 21, 2026
0f062fb
Merge branch 'main' into tasks
satvshr Jan 22, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions openml/_api/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
from openml._api.runtime.core import APIContext


def set_api_version(version: str, *, strict: bool = False) -> None:
api_context.set_version(version=version, strict=strict)


api_context = APIContext()
6 changes: 6 additions & 0 deletions openml/_api/clients/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
from .http import HTTPCache, HTTPClient

__all__ = [
"HTTPCache",
"HTTPClient",
]
211 changes: 211 additions & 0 deletions openml/_api/clients/http.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,211 @@
from __future__ import annotations

import json
import time
from pathlib import Path
from typing import TYPE_CHECKING, Any
from urllib.parse import urlencode, urljoin, urlparse

import requests
from requests import Response

from openml.__version__ import __version__

if TYPE_CHECKING:
from openml._api.config import DelayMethod


class HTTPCache:
def __init__(self, *, path: Path, ttl: int) -> None:
self.path = path
self.ttl = ttl

def get_key(self, url: str, params: dict[str, Any]) -> str:
parsed_url = urlparse(url)
netloc_parts = parsed_url.netloc.split(".")[::-1]
path_parts = parsed_url.path.strip("/").split("/")

filtered_params = {k: v for k, v in params.items() if k != "api_key"}
params_part = [urlencode(filtered_params)] if filtered_params else []

return str(Path(*netloc_parts, *path_parts, *params_part))

def _key_to_path(self, key: str) -> Path:
return self.path.joinpath(key)

def load(self, key: str) -> Response:
path = self._key_to_path(key)

if not path.exists():
raise FileNotFoundError(f"Cache directory not found: {path}")

meta_path = path / "meta.json"
headers_path = path / "headers.json"
body_path = path / "body.bin"

if not (meta_path.exists() and headers_path.exists() and body_path.exists()):
raise FileNotFoundError(f"Incomplete cache at {path}")

with meta_path.open("r", encoding="utf-8") as f:
meta = json.load(f)

created_at = meta.get("created_at")
if created_at is None:
raise ValueError("Cache metadata missing 'created_at'")

if time.time() - created_at > self.ttl:
raise TimeoutError(f"Cache expired for {path}")

with headers_path.open("r", encoding="utf-8") as f:
headers = json.load(f)

body = body_path.read_bytes()

response = Response()
response.status_code = meta["status_code"]
response.url = meta["url"]
response.reason = meta["reason"]
response.headers = headers
response._content = body
response.encoding = meta["encoding"]

return response

def save(self, key: str, response: Response) -> None:
path = self._key_to_path(key)
path.mkdir(parents=True, exist_ok=True)

(path / "body.bin").write_bytes(response.content)

with (path / "headers.json").open("w", encoding="utf-8") as f:
json.dump(dict(response.headers), f)

meta = {
"status_code": response.status_code,
"url": response.url,
"reason": response.reason,
"encoding": response.encoding,
"elapsed": response.elapsed.total_seconds(),
"created_at": time.time(),
"request": {
"method": response.request.method if response.request else None,
"url": response.request.url if response.request else None,
"headers": dict(response.request.headers) if response.request else None,
"body": response.request.body if response.request else None,
},
}

with (path / "meta.json").open("w", encoding="utf-8") as f:
json.dump(meta, f)


class HTTPClient:
def __init__( # noqa: PLR0913
self,
*,
server: str,
base_url: str,
api_key: str,
timeout: int,
retries: int,
delay_method: DelayMethod,
delay_time: int,
cache: HTTPCache | None = None,
) -> None:
self.server = server
self.base_url = base_url
self.api_key = api_key
self.timeout = timeout
self.retries = retries
self.delay_method = delay_method
self.delay_time = delay_time
self.cache = cache

self.headers: dict[str, str] = {"user-agent": f"openml-python/{__version__}"}

def request(
self,
method: str,
path: str,
*,
use_cache: bool = False,
use_api_key: bool = False,
**request_kwargs: Any,
) -> Response:
url = urljoin(self.server, urljoin(self.base_url, path))

# prepare params
params = request_kwargs.pop("params", {}).copy()
if use_api_key:
params["api_key"] = self.api_key

# prepare headers
headers = request_kwargs.pop("headers", {}).copy()
headers.update(self.headers)

timeout = request_kwargs.pop("timeout", self.timeout)

if use_cache and self.cache is not None:
cache_key = self.cache.get_key(url, params)
try:
return self.cache.load(cache_key)
except (FileNotFoundError, TimeoutError):
pass # cache miss or expired, continue
except Exception:
raise # propagate unexpected cache errors

response = requests.request(
method=method,
url=url,
params=params,
headers=headers,
timeout=timeout,
**request_kwargs,
)

if use_cache and self.cache is not None:
self.cache.save(cache_key, response)

return response

def get(
self,
path: str,
*,
use_cache: bool = False,
use_api_key: bool = False,
**request_kwargs: Any,
) -> Response:
return self.request(
method="GET",
path=path,
use_cache=use_cache,
use_api_key=use_api_key,
**request_kwargs,
)

def post(
self,
path: str,
**request_kwargs: Any,
) -> Response:
return self.request(
method="POST",
path=path,
use_cache=False,
use_api_key=True,
**request_kwargs,
)

def delete(
self,
path: str,
**request_kwargs: Any,
) -> Response:
return self.request(
method="DELETE",
path=path,
use_cache=False,
use_api_key=True,
**request_kwargs,
)
Empty file added openml/_api/clients/minio.py
Empty file.
61 changes: 61 additions & 0 deletions openml/_api/config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
from __future__ import annotations

from dataclasses import dataclass
from enum import Enum


class DelayMethod(str, Enum):
HUMAN = "human"
ROBOT = "robot"


@dataclass
class APIConfig:
server: str
base_url: str
api_key: str
timeout: int = 10 # seconds


@dataclass
class APISettings:
v1: APIConfig
v2: APIConfig


@dataclass
class ConnectionConfig:
retries: int = 3
delay_method: DelayMethod = DelayMethod.HUMAN
delay_time: int = 1 # seconds


@dataclass
class CacheConfig:
dir: str = "~/.openml/cache"
ttl: int = 60 * 60 * 24 * 7 # one week


@dataclass
class Settings:
api: APISettings
connection: ConnectionConfig
cache: CacheConfig


settings = Settings(
api=APISettings(
v1=APIConfig(
server="https://www.openml.org/",
base_url="api/v1/xml/",
api_key="...",
),
v2=APIConfig(
server="http://127.0.0.1:8001/",
base_url="",
api_key="...",
),
),
connection=ConnectionConfig(),
cache=CacheConfig(),
)
4 changes: 4 additions & 0 deletions openml/_api/resources/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
from openml._api.resources.datasets import DatasetsV1, DatasetsV2
from openml._api.resources.tasks import TasksV1, TasksV2

__all__ = ["DatasetsV1", "DatasetsV2", "TasksV1", "TasksV2"]
62 changes: 62 additions & 0 deletions openml/_api/resources/base.py
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

please see the previous comments on TasksAPI

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Solved with the latest commit

Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
from __future__ import annotations

from abc import ABC, abstractmethod
from typing import TYPE_CHECKING, Any

if TYPE_CHECKING:
import pandas as pd
from _api.http import HTTPClient
from requests import Response

from openml.datasets.dataset import OpenMLDataset
from openml.tasks.task import OpenMLTask, TaskType


class ResourceAPI:
def __init__(self, http: HTTPClient):
self._http = http


class DatasetsAPI(ResourceAPI, ABC):
@abstractmethod
def get(self, dataset_id: int) -> OpenMLDataset | tuple[OpenMLDataset, Response]: ...


class TasksAPI(ResourceAPI, ABC):
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why are the methods commented out?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was going to remove them, if I add abstract methods they have to be for shared functions right? The only shared function right now is get.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if I add abstract methods they have to be for shared functions right?

  1. they create blueprint of this resource, so one can look at the resource class to see which are the public methods and what do their inputs and outputs look like
  2. these methods are expected to be implemented in all the child classes, so yes they are used for shared functions

The only shared function right now is get.

list, delete, ...?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

list, delete, ...?

Not there for v2

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

still the base class should have these, in the v2 class just raise an exception or maybe skip it and the exception will be raised automatically

@abstractmethod
def get(
self,
task_id: int,
) -> OpenMLTask:
"""
API v1:
GET /task/{task_id}

API v2:
GET /tasks/{task_id}
"""
...

# Task listing (V1 only)
@abstractmethod
def list(
self,
limit: int,
offset: int,
task_type: TaskType | int | None = None,
**kwargs: Any,
) -> pd.DataFrame:
"""
List tasks with filters.

API v1:
GET /task/list

API v2:
Not available.

Returns
-------
pandas.DataFrame
"""
...
20 changes: 20 additions & 0 deletions openml/_api/resources/datasets.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
from __future__ import annotations

from typing import TYPE_CHECKING

from openml._api.resources.base import DatasetsAPI

if TYPE_CHECKING:
from responses import Response

from openml.datasets.dataset import OpenMLDataset


class DatasetsV1(DatasetsAPI):
def get(self, dataset_id: int) -> OpenMLDataset | tuple[OpenMLDataset, Response]:
raise NotImplementedError


class DatasetsV2(DatasetsAPI):
def get(self, dataset_id: int) -> OpenMLDataset | tuple[OpenMLDataset, Response]:
raise NotImplementedError
Loading
Loading