diff --git a/swvo/io/RBMDataSet/RBMDataSet.py b/swvo/io/RBMDataSet/RBMDataSet.py index a470780..f18621e 100644 --- a/swvo/io/RBMDataSet/RBMDataSet.py +++ b/swvo/io/RBMDataSet/RBMDataSet.py @@ -470,14 +470,12 @@ def __eq__(self, other: RBMDataSet) -> bool: return len(different_vars) == 0 def get_different_variables(self, rbm_other: RBMDataSet) -> list[str]: - different_vars: list[str] = [] self_vars = self.get_loaded_variables() other_vars = rbm_other.get_loaded_variables() for var in set(self_vars + other_vars): - if var not in other_vars or var not in self_vars: different_vars.append(var) continue @@ -503,6 +501,5 @@ def get_different_variables(self, rbm_other: RBMDataSet) -> list[str]: return different_vars - from .bin_and_interpolate_to_model_grid import bin_and_interpolate_to_model_grid from .interp_functions import interp_flux, interp_psd diff --git a/swvo/io/omni/omni_high_res.py b/swvo/io/omni/omni_high_res.py index 5d1476b..8bf5b95 100644 --- a/swvo/io/omni/omni_high_res.py +++ b/swvo/io/omni/omni_high_res.py @@ -288,6 +288,7 @@ def _process_single_year(self, data: list[str]) -> pd.DataFrame: "proton_density": 999.8, "temperature": 9999998.0, "pdyn": 99.0, + "sym-h": 99999.0, } df.columns = maxes.keys() @@ -351,7 +352,7 @@ def _get_data_from_omni(self, start: datetime, end: datetime, cadence: int = 1) "start_date": start.strftime("%Y%m%d"), "end_date": end.strftime("%Y%m%d"), } - common_vars = {"vars": ["13", "14", "17", "18", "21", "25", "26", "27"]} + common_vars = {"vars": ["13", "14", "17", "18", "21", "25", "26", "27", "41"]} if cadence == 1: params = {"res": "min", "spacecraft": "omni_min"} payload.update(params) diff --git a/swvo/io/symh/__init__.py b/swvo/io/symh/__init__.py new file mode 100644 index 0000000..cd7a6d6 --- /dev/null +++ b/swvo/io/symh/__init__.py @@ -0,0 +1,5 @@ +# SPDX-FileCopyrightText: 2025 GFZ Helmholtz Centre for Geosciences +# +# SPDX-License-Identifier: Apache-2.0 + +from swvo.io.symh.omni import SymhOMNI as SymhOMNI diff --git a/swvo/io/symh/omni.py b/swvo/io/symh/omni.py new file mode 100644 index 0000000..0a5cb81 --- /dev/null +++ b/swvo/io/symh/omni.py @@ -0,0 +1,83 @@ +# SPDX-FileCopyrightText: 2025 GFZ Helmholtz Centre for Geosciences +# +# SPDX-License-Identifier: Apache-2.0 + +""" +Module for handling OMNI SYM-H data. +""" + +from __future__ import annotations + +import logging +from datetime import datetime, timedelta, timezone +from pathlib import Path +from typing import Optional + +import pandas as pd + +from swvo.io.omni import OMNIHighRes + +logging.captureWarnings(True) + + +class SymhOMNI(OMNIHighRes): + """ + Class for reading SYM-H data from OMNI High Resolution files. + Inherits the `download_and_process`, other private methods and attributes from OMNIHighRes. + """ + + def __init__(self, data_dir: Optional[Path] = None) -> None: + """ + Initialize a SymhOMNI object. + + Parameters + ---------- + data_dir : Path | None + Data directory for the SYM-H OMNI data. If not provided, it will be read from the environment variable + """ + super().__init__(data_dir=data_dir) + + def read( + self, + start_time: datetime, + end_time: datetime, + cadence_min: float = 1, + download: bool = True, + ) -> pd.DataFrame: + """ + Read OMNI SYM-H data for the given time range. + + Parameters + ---------- + start_time : datetime + Start time of the data to read. Must be timezone-aware. + end_time : datetime + End time of the data to read. Must be timezone-aware. + cadence_min : float, optional + Cadence of the data in minutes, defaults to 1 + download : bool, optional + Download data on the go, defaults to True. + + Returns + ------- + :class:`pandas.DataFrame` + OMNI SYM-H data. + """ + data_out = super().read(start_time, end_time, cadence_min=cadence_min, download=download) + + if not start_time.tzinfo: + start_time = start_time.replace(tzinfo=timezone.utc) + if not end_time.tzinfo: + end_time = end_time.replace(tzinfo=timezone.utc) + + symh_df = pd.DataFrame(index=data_out.index) + + symh_df["sym-h"] = data_out["sym-h"] + symh_df["file_name"] = data_out["file_name"] + + symh_df = symh_df.truncate( + before=start_time - timedelta(minutes=cadence_min - 0.0000001), + after=end_time + timedelta(minutes=cadence_min + 0.0000001), + ) + + return symh_df diff --git a/tests/io/dst/test_dst_omni.py b/tests/io/dst/test_dst_omni.py index 3ac3283..d818499 100644 --- a/tests/io/dst/test_dst_omni.py +++ b/tests/io/dst/test_dst_omni.py @@ -61,7 +61,6 @@ def test_download_and_process(self, dstomni, mocker): end_time = datetime(2020, 12, 31, tzinfo=timezone.utc) dstomni.download_and_process(start_time, end_time) - assert (TEST_DIR / Path("data/omni2_2020.dat")).exists() def test_read_without_download(self, dstomni): diff --git a/tests/io/omni/data/OMNI_HIGH_RES_1min_2012.csv b/tests/io/omni/data/OMNI_HIGH_RES_1min_2012.csv index b0d79a5..b65892b 100644 --- a/tests/io/omni/data/OMNI_HIGH_RES_1min_2012.csv +++ b/tests/io/omni/data/OMNI_HIGH_RES_1min_2012.csv @@ -1,11 +1,11 @@ -timestamp,bavg,by_gsm,bz_gsm,speed,proton_density,temperature,bx_gsm -2012-12-31 23:50:00,2.82,-1.17,-0.28,349.8,2.02,37672.0,2.53 -2012-12-31 23:51:00,2.81,-1.51,-0.18,355.7,1.94,31228.0,2.35 -2012-12-31 23:52:00,2.81,-1.53,-0.19,353.1,2.07,34439.0,2.33 -2012-12-31 23:53:00,2.79,-1.6,-0.15,351.4,2.15,36537.0,2.27 -2012-12-31 23:54:00,2.82,-1.42,-0.05,,,,2.43 -2012-12-31 23:55:00,2.81,-1.12,-0.01,353.3,2.09,33264.0,2.54 -2012-12-31 23:56:00,2.75,-1.25,0.16,353.5,1.97,39095.0,2.44 -2012-12-31 23:57:00,2.77,-1.23,0.15,,,,2.48 -2012-12-31 23:58:00,2.73,-1.11,0.08,,,,2.49 -2012-12-31 23:59:00,,,,,,, +timestamp,bavg,bx_gsm,by_gsm,bz_gsm,speed,proton_density,temperature,pdyn,sym-h +2012-12-31 23:50:00,2.82,2.53,-1.17,-0.28,349.8,2.02,37672.0,1.4,-12 +2012-12-31 23:51:00,2.81,2.35,-1.51,-0.18,355.7,1.94,31228.0,1.5,-11 +2012-12-31 23:52:00,2.81,2.33,-1.53,-0.19,353.1,2.07,34439.0,1.6,-13 +2012-12-31 23:53:00,2.79,2.27,-1.6,-0.15,351.4,2.15,36537.0,1.5,-14 +2012-12-31 23:54:00,2.82,2.43,-1.42,-0.05,,,,1.4, +2012-12-31 23:55:00,2.81,2.54,-1.12,-0.01,353.3,2.09,33264.0,1.5,-12 +2012-12-31 23:56:00,2.75,2.44,-1.25,0.16,353.5,1.97,39095.0,1.4,-11 +2012-12-31 23:57:00,2.77,2.48,-1.23,0.15,,,,1.5, +2012-12-31 23:58:00,2.73,2.49,-1.11,0.08,,,,1.4, +2012-12-31 23:59:00,,,,,,,,, \ No newline at end of file diff --git a/tests/io/omni/data/OMNI_HIGH_RES_1min_2013.csv b/tests/io/omni/data/OMNI_HIGH_RES_1min_2013.csv index 3639157..dad4207 100644 --- a/tests/io/omni/data/OMNI_HIGH_RES_1min_2013.csv +++ b/tests/io/omni/data/OMNI_HIGH_RES_1min_2013.csv @@ -1,2 +1,2 @@ -timestamp,bavg,by_gsm,bz_gsm,speed,proton_density,temperature,bx_gsm -2013-01-01 00:00:00,2.45,-0.31,-0.18,358.8,1.94,46455.0,2.42 \ No newline at end of file +timestamp,bavg,bx_gsm,by_gsm,bz_gsm,speed,proton_density,temperature,pdyn,sym-h +2013-01-01 00:00:00,2.45,2.42,-0.31,-0.18,358.8,1.94,46455.0,1.5,-10 \ No newline at end of file diff --git a/tests/io/omni/test_omni_high_res.py b/tests/io/omni/test_omni_high_res.py index 86bae8e..7810fdb 100644 --- a/tests/io/omni/test_omni_high_res.py +++ b/tests/io/omni/test_omni_high_res.py @@ -104,15 +104,14 @@ def test_remove_processed_file(self): def test_process_single_year_parses_data_correctly(self, omni_high_res): data = [ - "YYYY DOY HR MN bavg bx_gsm by_gsm bz_gsm speed proton_density temperature pdyn", - "2020 1 0 0 5.1 1.2 2.3 3.4 400 5.5 1000000 99", - "2020 1 0 1 9999.9 9999.9 9999.9 9999.9 99999.8 999.8 9999998.0 99", + "YYYY DOY HR MN bavg bx_gsm by_gsm bz_gsm speed proton_density temperature pdyn sym-h", + "2020 1 0 0 5.1 1.2 2.3 3.4 400 5.5 1000000 99 -15", + "2020 1 0 1 9999.9 9999.9 9999.9 9999.9 99999.8 999.8 9999998.0 99 99999.0", ] df = omni_high_res._process_single_year(data) assert isinstance(df.index[0], pd.Timestamp) assert len(df) >= 2 - # Check columns expected_cols = [ "bavg", "bx_gsm", @@ -122,6 +121,7 @@ def test_process_single_year_parses_data_correctly(self, omni_high_res): "proton_density", "temperature", "pdyn", + "sym-h", ] assert list(df.columns) == expected_cols assert np.isnan(df.iloc[1]["bavg"]) @@ -131,6 +131,7 @@ def test_process_single_year_parses_data_correctly(self, omni_high_res): assert np.isnan(df.iloc[1]["speed"]) assert np.isnan(df.iloc[1]["proton_density"]) assert np.isnan(df.iloc[1]["temperature"]) + assert np.isnan(df.iloc[1]["sym-h"]) assert df.iloc[0]["bavg"] == 5.1 assert df.iloc[0]["bx_gsm"] == 1.2 assert df.iloc[0]["by_gsm"] == 2.3 @@ -138,13 +139,14 @@ def test_process_single_year_parses_data_correctly(self, omni_high_res): assert df.iloc[0]["speed"] == 400 assert df.iloc[0]["proton_density"] == 5.5 assert df.iloc[0]["temperature"] == 1000000 + assert df.iloc[0]["sym-h"] == -15 def test_process_single_year_handles_missing_data_lines(self, omni_high_res): - data = ["YYYY DOY HR MN bavg bx_gsm by_gsm bz_gsm speed proton_density temperature"] + data = ["YYYY DOY HR MN bavg bx_gsm by_gsm bz_gsm speed proton_density temperature sym-h"] with pytest.raises(ValueError): _ = omni_high_res._process_single_year(data) def test_process_single_year_raises_on_missing_header(self, omni_high_res): - data = ["2020 1 0 0 5.1 1.2 2.3 3.4 400 5.5 1000000"] + data = ["2020 1 0 0 5.1 1.2 2.3 3.4 400 5.5 1000000 -15"] with pytest.raises(StopIteration): omni_high_res._process_single_year(data) diff --git a/tests/io/symh/test_symh_omni.py b/tests/io/symh/test_symh_omni.py new file mode 100644 index 0000000..b6d5e75 --- /dev/null +++ b/tests/io/symh/test_symh_omni.py @@ -0,0 +1,101 @@ +# SPDX-FileCopyrightText: 2025 GFZ Helmholtz Centre for Geosciences +# +# SPDX-License-Identifier: Apache-2.0 + +import os +from datetime import datetime, timezone +from pathlib import Path + +import pandas as pd +import pytest + +from swvo.io.symh import SymhOMNI + +TEST_DIR = os.path.dirname(__file__) +DATA_DIR = Path(os.path.join(TEST_DIR, "../omni/data/")) + + +class TestSymhOMNI: + @pytest.fixture + def symhomni(self): + os.environ["OMNI_HIGH_RES_STREAM_DIR"] = str(DATA_DIR) + yield SymhOMNI() + + @pytest.fixture + def mock_symhomni_data(self): + test_dates = pd.date_range(start=datetime(2020, 1, 1), end=datetime(2020, 12, 31, 23, 59, 0), freq="min") + test_data = pd.DataFrame( + { + "t": test_dates, + "sym-h": [-15.0] * len(test_dates), + "file_name": "some_file", + "timestamp": test_dates.strftime("%Y-%m-%d %H:%M:%S"), + } + ) + test_data.index = test_dates.tz_localize("UTC") + return test_data + + def test_initialization_with_env_var(self, symhomni): + assert symhomni.data_dir.exists() + + def test_initialization_with_data_dir(self): + symhomni = SymhOMNI(data_dir=DATA_DIR) + assert symhomni.data_dir == DATA_DIR + + def test_initialization_without_env_var(self): + if "OMNI_HIGH_RES_STREAM_DIR" in os.environ: + del os.environ["OMNI_HIGH_RES_STREAM_DIR"] + with pytest.raises(ValueError): + SymhOMNI() + + def test_download_and_process(self, symhomni): + start_time = datetime(2020, 1, 1, tzinfo=timezone.utc) + end_time = datetime(2020, 12, 31, tzinfo=timezone.utc) + # download this file without mocking + symhomni.download_and_process(start_time, end_time) + + assert (DATA_DIR / "OMNI_HIGH_RES_1min_2020.csv").exists() + + def test_read_without_download(self, symhomni): + start_time = datetime(2021, 1, 1, tzinfo=timezone.utc) + end_time = datetime(2021, 2, 28, tzinfo=timezone.utc) + with pytest.raises( + ValueError + ): # value error is raised when no files are found hence no concatenation is possible + symhomni.read(start_time, end_time, download=False) + + def test_read_with_download(self, symhomni, mock_symhomni_data, mocker): + mocker.patch("pathlib.Path.exists", return_value=False) + mocker.patch.object(symhomni, "_read_single_file", return_value=mock_symhomni_data) + mocker.patch.object(symhomni, "download_and_process") + + start_time = datetime(2020, 1, 1) + end_time = datetime(2020, 12, 31) + + df = symhomni.read(start_time, end_time, download=True) + symhomni.download_and_process.assert_called_once() + + assert not df.empty + assert all(df["sym-h"] == -15.0) + assert "sym-h" in df.columns + assert all(idx.tzinfo is not None for idx in df.index) + assert all(idx.tzinfo is timezone.utc for idx in df.index) + + def test_read_single_file(self, symhomni): + csv_file = Path(DATA_DIR) / "OMNI_HIGH_RES_1min_2020.csv" + df = symhomni._read_single_file(csv_file) + assert isinstance(df, pd.DataFrame) + assert len(df) > 0 + assert "sym-h" in df.columns + + def test_year_transition(self, symhomni): + start_time = datetime(2012, 12, 31, 23, 50, 0, tzinfo=timezone.utc) + end_time = datetime(2012, 12, 31, 23, 59, 59, tzinfo=timezone.utc) + + result_df = symhomni.read(start_time, end_time, download=False) + + assert result_df.index.min() == pd.Timestamp("2012-12-31 23:50:00+00:00") + assert result_df.index.max() == pd.Timestamp("2013-01-01 00:00:00+00:00") + + def test_remove_processed_file(self): + os.remove(Path(DATA_DIR) / "OMNI_HIGH_RES_1min_2020.csv")