-
-
Notifications
You must be signed in to change notification settings - Fork 213
[ENH] V1 → V2 API Migration - Tasks #1611
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
0159f47
58e9175
bdd65ff
52ef379
5dfcbce
2acbe99
af99880
74ab366
17a7178
510b286
c2b9e1a
056cf3a
fb1ff40
17ab23c
e07ef73
fb57a3e
8e041a4
61ca98c
e5dd2d9
4c75e16
5762185
202314e
3a2f1c4
a0c2267
249efec
69dd3c6
0d5ce53
e15e892
1b19c08
6913294
6404f21
7e9bc1f
c603383
ff6a8b0
f01898f
5c4511e
e9a6b21
824ffd9
1c00abb
fdb2449
e71a885
5b1ba46
0f062fb
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,8 @@ | ||
| from openml._api.runtime.core import APIContext | ||
|
|
||
|
|
||
| def set_api_version(version: str, *, strict: bool = False) -> None: | ||
| api_context.set_version(version=version, strict=strict) | ||
|
|
||
|
|
||
| api_context = APIContext() |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,6 @@ | ||
| from .http import HTTPCache, HTTPClient | ||
|
|
||
| __all__ = [ | ||
| "HTTPCache", | ||
| "HTTPClient", | ||
| ] |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,211 @@ | ||
| from __future__ import annotations | ||
|
|
||
| import json | ||
| import time | ||
| from pathlib import Path | ||
| from typing import TYPE_CHECKING, Any | ||
| from urllib.parse import urlencode, urljoin, urlparse | ||
|
|
||
| import requests | ||
| from requests import Response | ||
|
|
||
| from openml.__version__ import __version__ | ||
|
|
||
| if TYPE_CHECKING: | ||
| from openml._api.config import DelayMethod | ||
|
|
||
|
|
||
| class HTTPCache: | ||
| def __init__(self, *, path: Path, ttl: int) -> None: | ||
| self.path = path | ||
| self.ttl = ttl | ||
|
|
||
| def get_key(self, url: str, params: dict[str, Any]) -> str: | ||
| parsed_url = urlparse(url) | ||
| netloc_parts = parsed_url.netloc.split(".")[::-1] | ||
| path_parts = parsed_url.path.strip("/").split("/") | ||
|
|
||
| filtered_params = {k: v for k, v in params.items() if k != "api_key"} | ||
| params_part = [urlencode(filtered_params)] if filtered_params else [] | ||
|
|
||
| return str(Path(*netloc_parts, *path_parts, *params_part)) | ||
|
|
||
| def _key_to_path(self, key: str) -> Path: | ||
| return self.path.joinpath(key) | ||
|
|
||
| def load(self, key: str) -> Response: | ||
| path = self._key_to_path(key) | ||
|
|
||
| if not path.exists(): | ||
| raise FileNotFoundError(f"Cache directory not found: {path}") | ||
|
|
||
| meta_path = path / "meta.json" | ||
| headers_path = path / "headers.json" | ||
| body_path = path / "body.bin" | ||
|
|
||
| if not (meta_path.exists() and headers_path.exists() and body_path.exists()): | ||
| raise FileNotFoundError(f"Incomplete cache at {path}") | ||
|
|
||
| with meta_path.open("r", encoding="utf-8") as f: | ||
| meta = json.load(f) | ||
|
|
||
| created_at = meta.get("created_at") | ||
| if created_at is None: | ||
| raise ValueError("Cache metadata missing 'created_at'") | ||
|
|
||
| if time.time() - created_at > self.ttl: | ||
| raise TimeoutError(f"Cache expired for {path}") | ||
|
|
||
| with headers_path.open("r", encoding="utf-8") as f: | ||
| headers = json.load(f) | ||
|
|
||
| body = body_path.read_bytes() | ||
|
|
||
| response = Response() | ||
| response.status_code = meta["status_code"] | ||
| response.url = meta["url"] | ||
| response.reason = meta["reason"] | ||
| response.headers = headers | ||
| response._content = body | ||
| response.encoding = meta["encoding"] | ||
|
|
||
| return response | ||
|
|
||
| def save(self, key: str, response: Response) -> None: | ||
| path = self._key_to_path(key) | ||
| path.mkdir(parents=True, exist_ok=True) | ||
|
|
||
| (path / "body.bin").write_bytes(response.content) | ||
|
|
||
| with (path / "headers.json").open("w", encoding="utf-8") as f: | ||
| json.dump(dict(response.headers), f) | ||
|
|
||
| meta = { | ||
| "status_code": response.status_code, | ||
| "url": response.url, | ||
| "reason": response.reason, | ||
| "encoding": response.encoding, | ||
| "elapsed": response.elapsed.total_seconds(), | ||
| "created_at": time.time(), | ||
| "request": { | ||
| "method": response.request.method if response.request else None, | ||
| "url": response.request.url if response.request else None, | ||
| "headers": dict(response.request.headers) if response.request else None, | ||
| "body": response.request.body if response.request else None, | ||
| }, | ||
| } | ||
|
|
||
| with (path / "meta.json").open("w", encoding="utf-8") as f: | ||
| json.dump(meta, f) | ||
|
|
||
|
|
||
| class HTTPClient: | ||
| def __init__( # noqa: PLR0913 | ||
| self, | ||
| *, | ||
| server: str, | ||
| base_url: str, | ||
| api_key: str, | ||
| timeout: int, | ||
| retries: int, | ||
| delay_method: DelayMethod, | ||
| delay_time: int, | ||
| cache: HTTPCache | None = None, | ||
| ) -> None: | ||
| self.server = server | ||
| self.base_url = base_url | ||
| self.api_key = api_key | ||
| self.timeout = timeout | ||
| self.retries = retries | ||
| self.delay_method = delay_method | ||
| self.delay_time = delay_time | ||
| self.cache = cache | ||
|
|
||
| self.headers: dict[str, str] = {"user-agent": f"openml-python/{__version__}"} | ||
|
|
||
| def request( | ||
| self, | ||
| method: str, | ||
| path: str, | ||
| *, | ||
| use_cache: bool = False, | ||
| use_api_key: bool = False, | ||
| **request_kwargs: Any, | ||
| ) -> Response: | ||
| url = urljoin(self.server, urljoin(self.base_url, path)) | ||
|
|
||
| # prepare params | ||
| params = request_kwargs.pop("params", {}).copy() | ||
| if use_api_key: | ||
| params["api_key"] = self.api_key | ||
|
|
||
| # prepare headers | ||
| headers = request_kwargs.pop("headers", {}).copy() | ||
| headers.update(self.headers) | ||
|
|
||
| timeout = request_kwargs.pop("timeout", self.timeout) | ||
|
|
||
| if use_cache and self.cache is not None: | ||
| cache_key = self.cache.get_key(url, params) | ||
| try: | ||
| return self.cache.load(cache_key) | ||
| except (FileNotFoundError, TimeoutError): | ||
| pass # cache miss or expired, continue | ||
| except Exception: | ||
| raise # propagate unexpected cache errors | ||
|
|
||
| response = requests.request( | ||
| method=method, | ||
| url=url, | ||
| params=params, | ||
| headers=headers, | ||
| timeout=timeout, | ||
| **request_kwargs, | ||
| ) | ||
|
|
||
| if use_cache and self.cache is not None: | ||
| self.cache.save(cache_key, response) | ||
|
|
||
| return response | ||
|
|
||
| def get( | ||
| self, | ||
| path: str, | ||
| *, | ||
| use_cache: bool = False, | ||
| use_api_key: bool = False, | ||
| **request_kwargs: Any, | ||
| ) -> Response: | ||
| return self.request( | ||
| method="GET", | ||
| path=path, | ||
| use_cache=use_cache, | ||
| use_api_key=use_api_key, | ||
| **request_kwargs, | ||
| ) | ||
|
|
||
| def post( | ||
| self, | ||
| path: str, | ||
| **request_kwargs: Any, | ||
| ) -> Response: | ||
| return self.request( | ||
| method="POST", | ||
| path=path, | ||
| use_cache=False, | ||
| use_api_key=True, | ||
| **request_kwargs, | ||
| ) | ||
|
|
||
| def delete( | ||
| self, | ||
| path: str, | ||
| **request_kwargs: Any, | ||
| ) -> Response: | ||
| return self.request( | ||
| method="DELETE", | ||
| path=path, | ||
| use_cache=False, | ||
| use_api_key=True, | ||
| **request_kwargs, | ||
| ) |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,61 @@ | ||
| from __future__ import annotations | ||
|
|
||
| from dataclasses import dataclass | ||
| from enum import Enum | ||
|
|
||
|
|
||
| class DelayMethod(str, Enum): | ||
| HUMAN = "human" | ||
| ROBOT = "robot" | ||
|
|
||
|
|
||
| @dataclass | ||
| class APIConfig: | ||
| server: str | ||
| base_url: str | ||
| api_key: str | ||
| timeout: int = 10 # seconds | ||
|
|
||
|
|
||
| @dataclass | ||
| class APISettings: | ||
| v1: APIConfig | ||
| v2: APIConfig | ||
|
|
||
|
|
||
| @dataclass | ||
| class ConnectionConfig: | ||
| retries: int = 3 | ||
| delay_method: DelayMethod = DelayMethod.HUMAN | ||
| delay_time: int = 1 # seconds | ||
|
|
||
|
|
||
| @dataclass | ||
| class CacheConfig: | ||
| dir: str = "~/.openml/cache" | ||
| ttl: int = 60 * 60 * 24 * 7 # one week | ||
|
|
||
|
|
||
| @dataclass | ||
| class Settings: | ||
| api: APISettings | ||
| connection: ConnectionConfig | ||
| cache: CacheConfig | ||
|
|
||
|
|
||
| settings = Settings( | ||
| api=APISettings( | ||
| v1=APIConfig( | ||
| server="https://www.openml.org/", | ||
| base_url="api/v1/xml/", | ||
| api_key="...", | ||
| ), | ||
| v2=APIConfig( | ||
| server="http://127.0.0.1:8001/", | ||
| base_url="", | ||
| api_key="...", | ||
| ), | ||
| ), | ||
| connection=ConnectionConfig(), | ||
| cache=CacheConfig(), | ||
| ) |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,4 @@ | ||
| from openml._api.resources.datasets import DatasetsV1, DatasetsV2 | ||
| from openml._api.resources.tasks import TasksV1, TasksV2 | ||
|
|
||
| __all__ = ["DatasetsV1", "DatasetsV2", "TasksV1", "TasksV2"] |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,62 @@ | ||
| from __future__ import annotations | ||
|
|
||
| from abc import ABC, abstractmethod | ||
| from typing import TYPE_CHECKING, Any | ||
|
|
||
| if TYPE_CHECKING: | ||
| import pandas as pd | ||
| from _api.http import HTTPClient | ||
| from requests import Response | ||
|
|
||
| from openml.datasets.dataset import OpenMLDataset | ||
| from openml.tasks.task import OpenMLTask, TaskType | ||
|
|
||
|
|
||
| class ResourceAPI: | ||
| def __init__(self, http: HTTPClient): | ||
| self._http = http | ||
|
|
||
|
|
||
| class DatasetsAPI(ResourceAPI, ABC): | ||
| @abstractmethod | ||
| def get(self, dataset_id: int) -> OpenMLDataset | tuple[OpenMLDataset, Response]: ... | ||
|
|
||
|
|
||
| class TasksAPI(ResourceAPI, ABC): | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why are the methods commented out?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I was going to remove them, if I add abstract methods they have to be for shared functions right? The only shared function right now is
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Not there for
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. still the base class should have these, in the v2 class just raise an exception or maybe skip it and the exception will be raised automatically |
||
| @abstractmethod | ||
| def get( | ||
| self, | ||
| task_id: int, | ||
| ) -> OpenMLTask: | ||
| """ | ||
| API v1: | ||
| GET /task/{task_id} | ||
|
|
||
| API v2: | ||
| GET /tasks/{task_id} | ||
| """ | ||
| ... | ||
|
|
||
| # Task listing (V1 only) | ||
| @abstractmethod | ||
| def list( | ||
| self, | ||
| limit: int, | ||
| offset: int, | ||
| task_type: TaskType | int | None = None, | ||
| **kwargs: Any, | ||
| ) -> pd.DataFrame: | ||
| """ | ||
| List tasks with filters. | ||
|
|
||
| API v1: | ||
| GET /task/list | ||
|
|
||
| API v2: | ||
| Not available. | ||
|
|
||
| Returns | ||
| ------- | ||
| pandas.DataFrame | ||
| """ | ||
| ... | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,20 @@ | ||
| from __future__ import annotations | ||
|
|
||
| from typing import TYPE_CHECKING | ||
|
|
||
| from openml._api.resources.base import DatasetsAPI | ||
|
|
||
| if TYPE_CHECKING: | ||
| from responses import Response | ||
|
|
||
| from openml.datasets.dataset import OpenMLDataset | ||
|
|
||
|
|
||
| class DatasetsV1(DatasetsAPI): | ||
| def get(self, dataset_id: int) -> OpenMLDataset | tuple[OpenMLDataset, Response]: | ||
| raise NotImplementedError | ||
|
|
||
|
|
||
| class DatasetsV2(DatasetsAPI): | ||
| def get(self, dataset_id: int) -> OpenMLDataset | tuple[OpenMLDataset, Response]: | ||
| raise NotImplementedError |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
please see the previous comments on
TasksAPIThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Solved with the latest commit