|
| 1 | +def server2dirname(server): |
| 2 | + """Convert a server URL to a directory name.""" |
| 3 | + import re |
| 4 | + urld = re.sub(r"https*://", "", server) |
| 5 | + urld = re.sub(r'/', '_', urld) |
| 6 | + |
| 7 | + return urld |
| 8 | + |
| 9 | + |
| 10 | +def cachedir(*args): |
| 11 | + """HAPI cache directory. |
| 12 | +
|
| 13 | + cachedir() returns tempfile.gettempdir() + os.path.sep + 'hapi-data' |
| 14 | +
|
| 15 | + cachdir(basedir, server) returns basedir + os.path.sep + server2dirname(server) |
| 16 | + """ |
| 17 | + import os |
| 18 | + import tempfile |
| 19 | + |
| 20 | + if len(args) == 2: |
| 21 | + # cachedir(base_dir, server) |
| 22 | + return args[0] + os.path.sep + server2dirname(args[1]) |
| 23 | + else: |
| 24 | + # cachedir() |
| 25 | + return tempfile.gettempdir() + os.path.sep + 'hapi-data' |
| 26 | + |
| 27 | + |
| 28 | +def request2path(*args): |
| 29 | + # request2path(server, dataset, parameters, start, stop) |
| 30 | + # request2path(server, dataset, parameters, start, stop, cachedir) |
| 31 | + import os |
| 32 | + import re |
| 33 | + |
| 34 | + if len(args) == 5: |
| 35 | + # Use default if cachedir not given. |
| 36 | + cachedirectory = cachedir() |
| 37 | + else: |
| 38 | + cachedirectory = args[5] |
| 39 | + |
| 40 | + args = list(args) |
| 41 | + |
| 42 | + # Replace forbidden characters in directory and filename |
| 43 | + # Replacements assume that there will be no name collisions, |
| 44 | + # e.g., one parameter named abc-< and another abc-@lt@. |
| 45 | + # This also introduces an incompatability between caches on Windows |
| 46 | + # Unix. |
| 47 | + import platform |
| 48 | + if platform.system() == 'Windows': |
| 49 | + # List and code from responses in |
| 50 | + # https://stackoverflow.com/q/1976007 |
| 51 | + reps = ( |
| 52 | + ('<', '@lt@'), |
| 53 | + ('>', '@gt@'), |
| 54 | + (':', '@colon@'), |
| 55 | + ('"', '@doublequote@'), |
| 56 | + ('/', '@forwardslash@'), |
| 57 | + ('/', '@backslash@'), |
| 58 | + ('\\|', '@pipe@'), |
| 59 | + ('\\?', '@questionmark@'), |
| 60 | + ('\\*', '@asterisk@') |
| 61 | + ) |
| 62 | + |
| 63 | + for element in reps: |
| 64 | + args[1] = re.sub(element[0], element[1], args[1]) |
| 65 | + args[2] = re.sub(element[0], element[1], args[2]) |
| 66 | + |
| 67 | + else: |
| 68 | + args[1] = re.sub('/','@forwardslash@',args[1]) |
| 69 | + args[2] = re.sub('/','@forwardslash@',args[2]) |
| 70 | + |
| 71 | + # To shorten filenames. |
| 72 | + args[3] = re.sub(r'-|:|\.|Z', '', args[3]) |
| 73 | + args[4] = re.sub(r'-|:|\.|Z', '', args[4]) |
| 74 | + |
| 75 | + # URL subdirectory |
| 76 | + urldirectory = server2dirname(args[0]) |
| 77 | + fname = '%s_%s_%s_%s' % (args[1], args[2], args[3], args[4]) |
| 78 | + |
| 79 | + return os.path.join(cachedirectory, urldirectory, fname) |
| 80 | + |
| 81 | + |
| 82 | +def meta_cache_paths(SERVER, DATASET, opts): |
| 83 | + """Return (urld, fnamejson, fnamepkl) for metadata cache files.""" |
| 84 | + urld = cachedir(opts["cachedir"], SERVER) |
| 85 | + fname_root = request2path(SERVER, DATASET, '', '', '', opts['cachedir']) |
| 86 | + return urld, fname_root + '.json', fname_root + '.pkl' |
| 87 | + |
| 88 | + |
| 89 | +def meta_cache_read(SERVER, DATASET, opts): |
| 90 | + """Read metadata from PKL cache. Returns meta dict or None.""" |
| 91 | + import os |
| 92 | + import pickle |
| 93 | + from hapiclient.util import log |
| 94 | + |
| 95 | + if not opts["usecache"]: |
| 96 | + log('Not checking metadata cache because usecache is False.') |
| 97 | + return None |
| 98 | + |
| 99 | + urld, _, fnamepkl = meta_cache_paths(SERVER, DATASET, opts) |
| 100 | + if os.path.isfile(fnamepkl): |
| 101 | + log('Reading %s' % fnamepkl.replace(urld + '/', ''), opts) |
| 102 | + with open(fnamepkl, 'rb') as f: |
| 103 | + return pickle.load(f) |
| 104 | + |
| 105 | + if opts["usecache"]: |
| 106 | + log('No metadata cache file found: %s' % fnamepkl.replace(urld + '/', ''), opts) |
| 107 | + |
| 108 | + return None |
| 109 | + |
| 110 | + |
| 111 | +def meta_cache_write(meta, SERVER, DATASET, opts): |
| 112 | + """Write metadata to JSON and PKL cache files.""" |
| 113 | + import os |
| 114 | + import json |
| 115 | + import pickle |
| 116 | + from hapiclient.util import log |
| 117 | + |
| 118 | + if not opts["cache"]: |
| 119 | + return |
| 120 | + |
| 121 | + urld, fnamejson, fnamepkl = meta_cache_paths(SERVER, DATASET, opts) |
| 122 | + if not os.path.exists(urld): |
| 123 | + os.makedirs(urld) |
| 124 | + |
| 125 | + log('Writing %s ' % fnamejson.replace(urld + '/', ''), opts) |
| 126 | + with open(fnamejson, 'w') as f: |
| 127 | + json.dump(meta, f, indent=4) |
| 128 | + |
| 129 | + log('Writing %s ' % fnamepkl.replace(urld + '/', ''), opts) |
| 130 | + with open(fnamepkl, 'wb') as f: |
| 131 | + # protocol=2 used for Python 2.7 compatibility. |
| 132 | + pickle.dump(meta, f, protocol=2) |
| 133 | + |
| 134 | + |
| 135 | +def data_cache_paths(SERVER, DATASET, PARAMETERS, START, STOP, opts): |
| 136 | + """Return (fnamecsv, fnamebin, fnamenpy, fnamepklx) for data cache files.""" |
| 137 | + fname_root = request2path(SERVER, DATASET, PARAMETERS, START, STOP, opts['cachedir']) |
| 138 | + return fname_root + '.csv', fname_root + '.bin', fname_root + '.npy', fname_root + '.pkl' |
| 139 | + |
| 140 | + |
| 141 | +def data_cache_read_metax(SERVER, DATASET, PARAMETERS, START, STOP, opts): |
| 142 | + """Read extended request metadata from PKL cache. Returns meta dict or None.""" |
| 143 | + import os |
| 144 | + import pickle |
| 145 | + from hapiclient.util import log |
| 146 | + |
| 147 | + if not opts["usecache"]: |
| 148 | + log('Not checking data cache because usecache is False.') |
| 149 | + return None |
| 150 | + |
| 151 | + urld = cachedir(opts["cachedir"], SERVER) |
| 152 | + _, _, _, fnamepklx = data_cache_paths(SERVER, DATASET, PARAMETERS, START, STOP, opts) |
| 153 | + if os.path.isfile(fnamepklx): |
| 154 | + log('Reading %s' % fnamepklx.replace(urld + '/', ''), opts) |
| 155 | + with open(fnamepklx, 'rb') as f: |
| 156 | + return pickle.load(f) |
| 157 | + if opts["usecache"]: |
| 158 | + log('No data cache file found: %s' % fnamepklx.replace(urld + '/', ''), opts) |
| 159 | + |
| 160 | + return None |
| 161 | + |
| 162 | + |
| 163 | +def data_cache_read_npy(SERVER, DATASET, PARAMETERS, START, STOP, opts): |
| 164 | + """Read cached numpy data array. Returns None if not cached.""" |
| 165 | + import os |
| 166 | + import numpy as np |
| 167 | + from hapiclient.util import log |
| 168 | + |
| 169 | + if not opts["usecache"]: |
| 170 | + return None |
| 171 | + |
| 172 | + urld = cachedir(opts["cachedir"], SERVER) |
| 173 | + _, _, fnamenpy, _ = data_cache_paths(SERVER, DATASET, PARAMETERS, START, STOP, opts) |
| 174 | + |
| 175 | + if not os.path.isfile(fnamenpy): |
| 176 | + return None |
| 177 | + |
| 178 | + log('Reading %s ' % fnamenpy.replace(urld + '/', '')) |
| 179 | + with open(fnamenpy, 'rb') as f: |
| 180 | + data = np.load(f) |
| 181 | + |
| 182 | + return data |
| 183 | + |
| 184 | + |
| 185 | +def data_cache_write(data_result, meta, SERVER, DATASET, PARAMETERS, START, STOP, opts): |
| 186 | + """Write data array and extended metadata to cache files. |
| 187 | +
|
| 188 | + Also updates meta with file-related x_ fields before writing. |
| 189 | + """ |
| 190 | + import os |
| 191 | + import pickle |
| 192 | + import warnings |
| 193 | + import numpy as np |
| 194 | + from hapiclient.util import log |
| 195 | + |
| 196 | + urld = cachedir(opts["cachedir"], SERVER) |
| 197 | + fnamecsv, fnamebin, fnamenpy, fnamepklx = data_cache_paths(SERVER, DATASET, PARAMETERS, START, STOP, opts) |
| 198 | + _, fnamejson, fnamepkl = meta_cache_paths(SERVER, DATASET, opts) |
| 199 | + |
| 200 | + meta.update({"x_metaFileParsed": fnamepkl}) |
| 201 | + meta.update({"x_dataFileParsed": fnamenpy}) |
| 202 | + meta.update({"x_metaFile": fnamejson}) |
| 203 | + meta.update({"x_dataFile": fnamebin if opts['format'] == 'binary' else fnamecsv}) |
| 204 | + |
| 205 | + if not opts["cache"]: |
| 206 | + return |
| 207 | + if not os.path.exists(opts["cachedir"]): |
| 208 | + os.makedirs(opts["cachedir"]) |
| 209 | + if not os.path.exists(urld): |
| 210 | + os.makedirs(urld) |
| 211 | + |
| 212 | + log('Writing %s' % fnamepklx, opts) |
| 213 | + with open(fnamepklx, 'wb') as f: |
| 214 | + pickle.dump(meta, f, protocol=2) |
| 215 | + |
| 216 | + log('Writing %s' % fnamenpy, opts) |
| 217 | + with warnings.catch_warnings(): |
| 218 | + # Ignore warning that occurs when saving Unicode data. |
| 219 | + warnings.filterwarnings("ignore", |
| 220 | + message=r"Stored array in format 3\.0.*", |
| 221 | + category=UserWarning, |
| 222 | + module=r"numpy\.lib\.format", |
| 223 | + ) |
| 224 | + np.save(fnamenpy, data_result) |
| 225 | + |
| 226 | + |
| 227 | +def _missing_length(meta, opts): |
| 228 | + """Return True if any string or isotime parameter is missing length attribute in metadata.""" |
| 229 | + |
| 230 | + """ |
| 231 | + missing_length = True will be set if HAPI String or ISOTime |
| 232 | + parameter has no length attribute in metadata (length attribute is |
| 233 | + required for both in binary but only for primary time column in CSV). |
| 234 | + When missing_length=True the CSV read gets more complicated. |
| 235 | + """ |
| 236 | + |
| 237 | + if opts['format'] == 'csv': |
| 238 | + if opts['method'] == 'numpynolength' or opts['method'] == 'pandasnolength': |
| 239 | + return True |
| 240 | + |
| 241 | + for param in meta['parameters']: |
| 242 | + if param['type'] in ['string', 'isotime'] and 'length' not in param: |
| 243 | + return True |
| 244 | + |
| 245 | + return False |
| 246 | + |
| 247 | + |
| 248 | +def _compute_dt(meta, opts): |
| 249 | + import numpy as np |
| 250 | + |
| 251 | + # Compute data type variable dt used to read HAPI response into |
| 252 | + # a data structure. |
| 253 | + pnames, psizes, ptypes, dt = [], [], [], [] |
| 254 | + |
| 255 | + # Each element of cols is an array with start/end column number of |
| 256 | + # parameter. |
| 257 | + cols = np.zeros([len(meta["parameters"]), 2], dtype=np.int32) |
| 258 | + ss = 0 # running sum of prod(size) |
| 259 | + |
| 260 | + # Extract sizes and types of parameters. |
| 261 | + for i in range(0, len(meta["parameters"])): |
| 262 | + ptype = meta["parameters"][i]["type"] |
| 263 | + |
| 264 | + ptypes.append(ptype) |
| 265 | + |
| 266 | + pnames.append(str(meta["parameters"][i]["name"])) |
| 267 | + if 'size' in meta["parameters"][i]: |
| 268 | + psizes.append(meta["parameters"][i]['size']) |
| 269 | + else: |
| 270 | + psizes.append(1) |
| 271 | + |
| 272 | + # For size = [N] case, readers want |
| 273 | + # dtype = ('name', type, N) |
| 274 | + # not |
| 275 | + # dtype = ('name', type, [N]) |
| 276 | + if type(psizes[i]) is list and len(psizes[i]) == 1: |
| 277 | + psizes[i] = psizes[i][0] |
| 278 | + |
| 279 | + if type(psizes[i]) is list and len(psizes[i]) > 1: |
| 280 | + # psizes[i] = list(reversed(psizes[i])) |
| 281 | + psizes[i] = list(psizes[i]) |
| 282 | + |
| 283 | + # First column of ith parameter. |
| 284 | + cols[i][0] = ss |
| 285 | + # Last column of ith parameter. |
| 286 | + cols[i][1] = ss + np.prod(psizes[i]) - 1 |
| 287 | + # Running sum of columns. |
| 288 | + ss = cols[i][1] + 1 |
| 289 | + |
| 290 | + # HAPI numerical formats are 64-bit LE floating point and 32-bit LE |
| 291 | + # signed integers. |
| 292 | + if ptype == 'double': |
| 293 | + dtype = (pnames[i], '<d', psizes[i]) |
| 294 | + if ptype == 'integer': |
| 295 | + dtype = (pnames[i], np.dtype('<i4'), psizes[i]) |
| 296 | + |
| 297 | + if ptype == 'string' or ptype == 'isotime': |
| 298 | + if 'length' in meta["parameters"][i]: |
| 299 | + # length is specified for parameter in metadata. Use it. |
| 300 | + if ptype == 'string': |
| 301 | + dtype = (pnames[i], 'U' + str(meta["parameters"][i]["length"]), psizes[i]) |
| 302 | + if ptype == 'isotime': |
| 303 | + dtype = (pnames[i], 'S' + str(meta["parameters"][i]["length"]), psizes[i]) |
| 304 | + else: |
| 305 | + # A string or isotime parameter did not have a length. |
| 306 | + # Will need to use slower CSV read method. |
| 307 | + if ptype == 'string' or ptype == 'isotime': |
| 308 | + dtype = (pnames[i], object, psizes[i]) |
| 309 | + |
| 310 | + # For testing reader. Force use of slow read method. |
| 311 | + if opts['format'] == 'csv': |
| 312 | + if opts['method'] == 'numpynolength' or opts['method'] == 'pandasnolength': |
| 313 | + if ptype == 'string' or ptype == 'isotime': |
| 314 | + dtype = (pnames[i], object, psizes[i]) |
| 315 | + |
| 316 | + # https://numpy.org/doc/stable/release/1.17.0-notes.html#shape-1-fields-in-dtypes-won-t-be-collapsed-to-scalars-in-a-future-version |
| 317 | + if dtype[2] == 1: |
| 318 | + dtype = dtype[0:2] |
| 319 | + |
| 320 | + dt.append(dtype) |
| 321 | + |
| 322 | + return dt, cols, psizes, pnames, ptypes |
0 commit comments