Skip to content

Commit 8d7305f

Browse files
author
Mahitab Ayman
committed
Fix docstring examples and line-length issues
1 parent 06eb009 commit 8d7305f

File tree

3 files changed

+489
-50
lines changed

3 files changed

+489
-50
lines changed

et_id = 61

Lines changed: 213 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,213 @@
1+
diff --git a/openml/datasets/functions.py b/openml/datasets/functions.py
2+
index 0dc1eec..0b908d6 100644
3+
--- a/openml/datasets/functions.py
4+
+++ b/openml/datasets/functions.py
5+
@@ -68,7 +68,8 @@ def list_qualities() -> list[str]:
6+
Examples
7+
--------
8+
>>> import openml
9+
- >>> qualities = openml.datasets.list_qualities()
10+
+ >>> from openml.datasets import list_qualities
11+
+ >>> qualities = list_qualities()
12+
>>> print(qualities[:5])
13+
"""
14+
api_call = "data/qualities/list"
15+
@@ -97,50 +98,53 @@ def list_datasets(
16+
number_classes: int | str | None = None,
17+
number_missing_values: int | str | None = None,
18+
) -> pd.DataFrame:
19+
- """Return a dataframe of all dataset which are on OpenML.
20+
+ """Return a dataframe of all datasets on OpenML.
21+

22+
- Supports large amount of results.
23+
+ Supports large amounts of results.
24+

25+
Parameters
26+
----------
27+
- data_id : list, optional
28+
- A list of data ids, to specify which datasets should be
29+
- listed
30+
+ data_id : list of int, optional
31+
+ List of dataset ids to specify which datasets should be listed.
32+
offset : int, optional
33+
- The number of datasets to skip, starting from the first.
34+
+ Number of datasets to skip, starting from the first.
35+
size : int, optional
36+
- The maximum number of datasets to show.
37+
+ Maximum number of datasets to return.
38+
status : str, optional
39+
- Should be {active, in_preparation, deactivated}. By
40+
- default active datasets are returned, but also datasets
41+
- from another status can be requested.
42+
+ Should be one of {'active', 'in_preparation', 'deactivated'}.
43+
+ By default, active datasets are returned.
44+
tag : str, optional
45+
+ Tag to filter datasets.
46+
data_name : str, optional
47+
+ Name of dataset to filter.
48+
data_version : int, optional
49+
- number_instances : int | str, optional
50+
- number_features : int | str, optional
51+
- number_classes : int | str, optional
52+
- number_missing_values : int | str, optional
53+
+ Version of dataset to filter.
54+
+ number_instances : int or str, optional
55+
+ Filter datasets by number of instances.
56+
+ number_features : int or str, optional
57+
+ Filter datasets by number of features.
58+
+ number_classes : int or str, optional
59+
+ Filter datasets by number of classes.
60+
+ number_missing_values : int or str, optional
61+
+ Filter datasets by number of missing values.
62+

63+
Returns
64+
-------
65+
- datasets: dataframe
66+
- Each row maps to a dataset
67+
- Each column contains the following information:
68+
+ pd.DataFrame
69+
+ Each row maps to a dataset.
70+
+ Columns include:
71+
- dataset id
72+
- name
73+
- format
74+
- status
75+
- If qualities are calculated for the dataset, some of
76+
- these are also included as columns.
77+
+ - and additional columns for dataset qualities if available.
78+

79+
Examples
80+
--------
81+
>>> import openml
82+
- >>> datasets = openml.datasets.list_datasets(size=5)
83+
+ >>> from openml.datasets import list_datasets
84+
+ >>> datasets = list_datasets(size=5)
85+
>>> print(datasets.head())
86+
-
87+
-
88+
"""
89+
listing_call = partial(
90+
_list_datasets,
91+
@@ -358,31 +362,35 @@ def get_datasets(
92+
download_data: bool = False, # noqa: FBT002
93+
download_qualities: bool = False, # noqa: FBT002
94+
) -> list[OpenMLDataset]:
95+
- """Download datasets.
96+
+ """Download datasets from OpenML.
97+

98+
- This function iterates :meth:`openml.datasets.get_dataset`.
99+
+ This function iterates :meth:`openml.datasets.get_dataset`
100+
+ to download multiple datasets at once.
101+

102+
Parameters
103+
----------
104+
- dataset_ids : iterable
105+
- Integers or strings representing dataset ids or dataset names.
106+
- If dataset names are specified, the least recent still active dataset version is returned.
107+
+ dataset_ids : list of str or int
108+
+ Dataset ids or names. If dataset names are specified, the least recent still active dataset
109+
+ version is returned.
110+
download_data : bool, optional
111+
- If True, also download the data file. Beware that some datasets are large and it might
112+
- make the operation noticeably slower. Metadata is also still retrieved.
113+
- If False, create the OpenMLDataset and only populate it with the metadata.
114+
- The data may later be retrieved through the `OpenMLDataset.get_data` method.
115+
- download_qualities : bool, optional (default=True)
116+
- If True, also download qualities.xml file. If False it skip the qualities.xml.
117+
+ If True, download the data file. Some datasets are large
118+
+ and this may slow down the operation.
119+
+ Metadata is always retrieved. If False, only metadata is retrieved;
120+
+ the actual data can later
121+
+ be obtained via `OpenMLDataset.get_data`.
122+
+ download_qualities : bool, optional
123+
+ If True, also download the qualities.xml file. If False, qualities are skipped.
124+

125+
Returns
126+
-------
127+
- datasets : list of datasets
128+
- A list of dataset objects.
129+
- Examples
130+
+ list of OpenMLDataset
131+
+ A list of OpenMLDataset objects containing metadata (and data/qualities if requested).
132+
+
133+
+ Examples
134+
--------
135+
>>> import openml
136+
- >>> datasets = openml.datasets.get_datasets([31, 32])
137+
+ >>> from openml.datasets import get_datasets
138+
+ >>> datasets = get_datasets([31, 32])
139+
>>> for dataset in datasets:
140+
... print(dataset.name)
141+
"""
142+
@@ -1067,27 +1075,30 @@ def _topic_add_dataset(data_id: int, topic: str) -> int:
143+

144+
def _topic_delete_dataset(data_id: int, topic: str) -> int:
145+
"""
146+
- Removes a topic from a dataset.
147+
- This API is not available for all OpenML users and is accessible only by admins.
148+
+ Remove a topic from a dataset on OpenML.
149+
+
150+
+ This API is not available for all users; it is accessible only by admins.
151+

152+
Parameters
153+
----------
154+
data_id : int
155+
- id of the dataset to be forked
156+
+ ID of the dataset to remove the topic from.
157+
topic : str
158+
- Topic to be deleted
159+
+ The topic name to delete from the dataset.
160+

161+
Returns
162+
-------
163+
- Dataset id
164+
+ int
165+
+ The dataset ID after the topic removal.
166+

167+
Examples
168+
--------
169+
- >>> import openml
170+
+ >>> from openml.datasets.functions import _topic_delete_dataset
171+
>>> dataset_id = 61
172+
>>> topic = "biology"
173+
- >>> result = openml.datasets.functions._topic_delete_dataset(dataset_id, topic)
174+
+ >>> result = _topic_delete_dataset(dataset_id, topic)
175+
>>> print(result)
176+
+ 61
177+
"""
178+
if not isinstance(data_id, int):
179+
raise TypeError(f"`data_id` must be of type `int`, not {type(data_id)}.")
180+
@@ -1473,25 +1484,27 @@ def _get_online_dataset_format(dataset_id: int) -> str:
181+

182+

183+
def delete_dataset(dataset_id: int) -> bool:
184+
- """Delete dataset with id `dataset_id` from the OpenML server.
185+
+ """
186+
+ Delete a dataset from the OpenML server.
187+

188+
This can only be done if you are the owner of the dataset and
189+
- no tasks are attached to the dataset.
190+
+ no tasks are attached to it.
191+

192+
Parameters
193+
----------
194+
dataset_id : int
195+
- OpenML id of the dataset
196+
+ OpenML ID of the dataset to delete.
197+

198+
Returns
199+
-------
200+
bool
201+
- True if the deletion was successful. False otherwise.
202+
+ True if the deletion was successful, False otherwise.
203+

204+
Examples
205+
--------
206+
>>> import openml
207+
>>> success = openml.datasets.delete_dataset(123456)
208+
>>> print(success)
209+
+ True
210+
"""
211+
- return openml.utils._delete_entity("data", dataset_id)
212+
\ No newline at end of file
213+
+ return openml.utils._delete_entity("data", dataset_id)

0 commit comments

Comments
 (0)