|
| 1 | +[1mdiff --git a/openml/datasets/functions.py b/openml/datasets/functions.py[m |
| 2 | +[1mindex 0dc1eec..0b908d6 100644[m |
| 3 | +[1m--- a/openml/datasets/functions.py[m |
| 4 | +[1m+++ b/openml/datasets/functions.py[m |
| 5 | +[36m@@ -68,7 +68,8 @@[m [mdef list_qualities() -> list[str]:[m |
| 6 | + Examples[m |
| 7 | + --------[m |
| 8 | + >>> import openml[m |
| 9 | +[31m- >>> qualities = openml.datasets.list_qualities()[m |
| 10 | +[32m+[m[32m >>> from openml.datasets import list_qualities[m |
| 11 | +[32m+[m[32m >>> qualities = list_qualities()[m |
| 12 | + >>> print(qualities[:5])[m |
| 13 | + """[m |
| 14 | + api_call = "data/qualities/list"[m |
| 15 | +[36m@@ -97,50 +98,53 @@[m [mdef list_datasets([m |
| 16 | + number_classes: int | str | None = None,[m |
| 17 | + number_missing_values: int | str | None = None,[m |
| 18 | + ) -> pd.DataFrame:[m |
| 19 | +[31m- """Return a dataframe of all dataset which are on OpenML.[m |
| 20 | +[32m+[m[32m """Return a dataframe of all datasets on OpenML.[m |
| 21 | + [m |
| 22 | +[31m- Supports large amount of results.[m |
| 23 | +[32m+[m[32m Supports large amounts of results.[m |
| 24 | + [m |
| 25 | + Parameters[m |
| 26 | + ----------[m |
| 27 | +[31m- data_id : list, optional[m |
| 28 | +[31m- A list of data ids, to specify which datasets should be[m |
| 29 | +[31m- listed[m |
| 30 | +[32m+[m[32m data_id : list of int, optional[m |
| 31 | +[32m+[m[32m List of dataset ids to specify which datasets should be listed.[m |
| 32 | + offset : int, optional[m |
| 33 | +[31m- The number of datasets to skip, starting from the first.[m |
| 34 | +[32m+[m[32m Number of datasets to skip, starting from the first.[m |
| 35 | + size : int, optional[m |
| 36 | +[31m- The maximum number of datasets to show.[m |
| 37 | +[32m+[m[32m Maximum number of datasets to return.[m |
| 38 | + status : str, optional[m |
| 39 | +[31m- Should be {active, in_preparation, deactivated}. By[m |
| 40 | +[31m- default active datasets are returned, but also datasets[m |
| 41 | +[31m- from another status can be requested.[m |
| 42 | +[32m+[m[32m Should be one of {'active', 'in_preparation', 'deactivated'}.[m |
| 43 | +[32m+[m[32m By default, active datasets are returned.[m |
| 44 | + tag : str, optional[m |
| 45 | +[32m+[m[32m Tag to filter datasets.[m |
| 46 | + data_name : str, optional[m |
| 47 | +[32m+[m[32m Name of dataset to filter.[m |
| 48 | + data_version : int, optional[m |
| 49 | +[31m- number_instances : int | str, optional[m |
| 50 | +[31m- number_features : int | str, optional[m |
| 51 | +[31m- number_classes : int | str, optional[m |
| 52 | +[31m- number_missing_values : int | str, optional[m |
| 53 | +[32m+[m[32m Version of dataset to filter.[m |
| 54 | +[32m+[m[32m number_instances : int or str, optional[m |
| 55 | +[32m+[m[32m Filter datasets by number of instances.[m |
| 56 | +[32m+[m[32m number_features : int or str, optional[m |
| 57 | +[32m+[m[32m Filter datasets by number of features.[m |
| 58 | +[32m+[m[32m number_classes : int or str, optional[m |
| 59 | +[32m+[m[32m Filter datasets by number of classes.[m |
| 60 | +[32m+[m[32m number_missing_values : int or str, optional[m |
| 61 | +[32m+[m[32m Filter datasets by number of missing values.[m |
| 62 | + [m |
| 63 | + Returns[m |
| 64 | + -------[m |
| 65 | +[31m- datasets: dataframe[m |
| 66 | +[31m- Each row maps to a dataset[m |
| 67 | +[31m- Each column contains the following information:[m |
| 68 | +[32m+[m[32m pd.DataFrame[m |
| 69 | +[32m+[m[32m Each row maps to a dataset.[m |
| 70 | +[32m+[m[32m Columns include:[m |
| 71 | + - dataset id[m |
| 72 | + - name[m |
| 73 | + - format[m |
| 74 | + - status[m |
| 75 | +[31m- If qualities are calculated for the dataset, some of[m |
| 76 | +[31m- these are also included as columns.[m |
| 77 | +[32m+[m[32m - and additional columns for dataset qualities if available.[m |
| 78 | + [m |
| 79 | + Examples[m |
| 80 | + --------[m |
| 81 | + >>> import openml[m |
| 82 | +[31m- >>> datasets = openml.datasets.list_datasets(size=5)[m |
| 83 | +[32m+[m[32m >>> from openml.datasets import list_datasets[m |
| 84 | +[32m+[m[32m >>> datasets = list_datasets(size=5)[m |
| 85 | + >>> print(datasets.head())[m |
| 86 | +[31m-[m |
| 87 | +[31m-[m |
| 88 | + """[m |
| 89 | + listing_call = partial([m |
| 90 | + _list_datasets,[m |
| 91 | +[36m@@ -358,31 +362,35 @@[m [mdef get_datasets([m |
| 92 | + download_data: bool = False, # noqa: FBT002[m |
| 93 | + download_qualities: bool = False, # noqa: FBT002[m |
| 94 | + ) -> list[OpenMLDataset]:[m |
| 95 | +[31m- """Download datasets.[m |
| 96 | +[32m+[m[32m """Download datasets from OpenML.[m |
| 97 | + [m |
| 98 | +[31m- This function iterates :meth:`openml.datasets.get_dataset`.[m |
| 99 | +[32m+[m[32m This function iterates :meth:`openml.datasets.get_dataset`[m |
| 100 | +[32m+[m[32m to download multiple datasets at once.[m |
| 101 | + [m |
| 102 | + Parameters[m |
| 103 | + ----------[m |
| 104 | +[31m- dataset_ids : iterable[m |
| 105 | +[31m- Integers or strings representing dataset ids or dataset names.[m |
| 106 | +[31m- If dataset names are specified, the least recent still active dataset version is returned.[m |
| 107 | +[32m+[m[32m dataset_ids : list of str or int[m |
| 108 | +[32m+[m[32m Dataset ids or names. If dataset names are specified, the least recent still active dataset[m |
| 109 | +[32m+[m[32m version is returned.[m |
| 110 | + download_data : bool, optional[m |
| 111 | +[31m- If True, also download the data file. Beware that some datasets are large and it might[m |
| 112 | +[31m- make the operation noticeably slower. Metadata is also still retrieved.[m |
| 113 | +[31m- If False, create the OpenMLDataset and only populate it with the metadata.[m |
| 114 | +[31m- The data may later be retrieved through the `OpenMLDataset.get_data` method.[m |
| 115 | +[31m- download_qualities : bool, optional (default=True)[m |
| 116 | +[31m- If True, also download qualities.xml file. If False it skip the qualities.xml.[m |
| 117 | +[32m+[m[32m If True, download the data file. Some datasets are large[m |
| 118 | +[32m+[m[32m and this may slow down the operation.[m |
| 119 | +[32m+[m[32m Metadata is always retrieved. If False, only metadata is retrieved;[m |
| 120 | +[32m+[m[32m the actual data can later[m |
| 121 | +[32m+[m[32m be obtained via `OpenMLDataset.get_data`.[m |
| 122 | +[32m+[m[32m download_qualities : bool, optional[m |
| 123 | +[32m+[m[32m If True, also download the qualities.xml file. If False, qualities are skipped.[m |
| 124 | + [m |
| 125 | + Returns[m |
| 126 | + -------[m |
| 127 | +[31m- datasets : list of datasets[m |
| 128 | +[31m- A list of dataset objects.[m |
| 129 | +[31m- Examples[m |
| 130 | +[32m+[m[32m list of OpenMLDataset[m |
| 131 | +[32m+[m[32m A list of OpenMLDataset objects containing metadata (and data/qualities if requested).[m |
| 132 | +[32m+[m |
| 133 | +[32m+[m[32m Examples[m |
| 134 | + --------[m |
| 135 | + >>> import openml[m |
| 136 | +[31m- >>> datasets = openml.datasets.get_datasets([31, 32])[m |
| 137 | +[32m+[m[32m >>> from openml.datasets import get_datasets[m |
| 138 | +[32m+[m[32m >>> datasets = get_datasets([31, 32])[m |
| 139 | + >>> for dataset in datasets:[m |
| 140 | + ... print(dataset.name)[m |
| 141 | + """[m |
| 142 | +[36m@@ -1067,27 +1075,30 @@[m [mdef _topic_add_dataset(data_id: int, topic: str) -> int:[m |
| 143 | + [m |
| 144 | + def _topic_delete_dataset(data_id: int, topic: str) -> int:[m |
| 145 | + """[m |
| 146 | +[31m- Removes a topic from a dataset.[m |
| 147 | +[31m- This API is not available for all OpenML users and is accessible only by admins.[m |
| 148 | +[32m+[m[32m Remove a topic from a dataset on OpenML.[m |
| 149 | +[32m+[m |
| 150 | +[32m+[m[32m This API is not available for all users; it is accessible only by admins.[m |
| 151 | + [m |
| 152 | + Parameters[m |
| 153 | + ----------[m |
| 154 | + data_id : int[m |
| 155 | +[31m- id of the dataset to be forked[m |
| 156 | +[32m+[m[32m ID of the dataset to remove the topic from.[m |
| 157 | + topic : str[m |
| 158 | +[31m- Topic to be deleted[m |
| 159 | +[32m+[m[32m The topic name to delete from the dataset.[m |
| 160 | + [m |
| 161 | + Returns[m |
| 162 | + -------[m |
| 163 | +[31m- Dataset id[m |
| 164 | +[32m+[m[32m int[m |
| 165 | +[32m+[m[32m The dataset ID after the topic removal.[m |
| 166 | + [m |
| 167 | + Examples[m |
| 168 | + --------[m |
| 169 | +[31m- >>> import openml[m |
| 170 | +[32m+[m[32m >>> from openml.datasets.functions import _topic_delete_dataset[m |
| 171 | + >>> dataset_id = 61[m |
| 172 | + >>> topic = "biology"[m |
| 173 | +[31m- >>> result = openml.datasets.functions._topic_delete_dataset(dataset_id, topic)[m |
| 174 | +[32m+[m[32m >>> result = _topic_delete_dataset(dataset_id, topic)[m |
| 175 | + >>> print(result)[m |
| 176 | +[32m+[m[32m 61[m |
| 177 | + """[m |
| 178 | + if not isinstance(data_id, int):[m |
| 179 | + raise TypeError(f"`data_id` must be of type `int`, not {type(data_id)}.")[m |
| 180 | +[36m@@ -1473,25 +1484,27 @@[m [mdef _get_online_dataset_format(dataset_id: int) -> str:[m |
| 181 | + [m |
| 182 | + [m |
| 183 | + def delete_dataset(dataset_id: int) -> bool:[m |
| 184 | +[31m- """Delete dataset with id `dataset_id` from the OpenML server.[m |
| 185 | +[32m+[m[32m """[m |
| 186 | +[32m+[m[32m Delete a dataset from the OpenML server.[m |
| 187 | + [m |
| 188 | + This can only be done if you are the owner of the dataset and[m |
| 189 | +[31m- no tasks are attached to the dataset.[m |
| 190 | +[32m+[m[32m no tasks are attached to it.[m |
| 191 | + [m |
| 192 | + Parameters[m |
| 193 | + ----------[m |
| 194 | + dataset_id : int[m |
| 195 | +[31m- OpenML id of the dataset[m |
| 196 | +[32m+[m[32m OpenML ID of the dataset to delete.[m |
| 197 | + [m |
| 198 | + Returns[m |
| 199 | + -------[m |
| 200 | + bool[m |
| 201 | +[31m- True if the deletion was successful. False otherwise.[m |
| 202 | +[32m+[m[32m True if the deletion was successful, False otherwise.[m |
| 203 | + [m |
| 204 | + Examples[m |
| 205 | + --------[m |
| 206 | + >>> import openml[m |
| 207 | + >>> success = openml.datasets.delete_dataset(123456)[m |
| 208 | + >>> print(success)[m |
| 209 | +[32m+[m[32m True[m |
| 210 | + """[m |
| 211 | +[31m- return openml.utils._delete_entity("data", dataset_id)[m |
| 212 | +\ No newline at end of file[m |
| 213 | +[32m+[m[32m return openml.utils._delete_entity("data", dataset_id)[m |
0 commit comments