Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 0 additions & 3 deletions swvo/io/RBMDataSet/RBMDataSet.py
Original file line number Diff line number Diff line change
Expand Up @@ -470,14 +470,12 @@ def __eq__(self, other: RBMDataSet) -> bool:
return len(different_vars) == 0

def get_different_variables(self, rbm_other: RBMDataSet) -> list[str]:

different_vars: list[str] = []

self_vars = self.get_loaded_variables()
other_vars = rbm_other.get_loaded_variables()

for var in set(self_vars + other_vars):

if var not in other_vars or var not in self_vars:
different_vars.append(var)
continue
Expand All @@ -503,6 +501,5 @@ def get_different_variables(self, rbm_other: RBMDataSet) -> list[str]:

return different_vars


from .bin_and_interpolate_to_model_grid import bin_and_interpolate_to_model_grid
from .interp_functions import interp_flux, interp_psd
3 changes: 2 additions & 1 deletion swvo/io/omni/omni_high_res.py
Original file line number Diff line number Diff line change
Expand Up @@ -288,6 +288,7 @@ def _process_single_year(self, data: list[str]) -> pd.DataFrame:
"proton_density": 999.8,
"temperature": 9999998.0,
"pdyn": 99.0,
"sym-h": 99999.0,
}

df.columns = maxes.keys()
Expand Down Expand Up @@ -351,7 +352,7 @@ def _get_data_from_omni(self, start: datetime, end: datetime, cadence: int = 1)
"start_date": start.strftime("%Y%m%d"),
"end_date": end.strftime("%Y%m%d"),
}
common_vars = {"vars": ["13", "14", "17", "18", "21", "25", "26", "27"]}
common_vars = {"vars": ["13", "14", "17", "18", "21", "25", "26", "27", "41"]}
if cadence == 1:
params = {"res": "min", "spacecraft": "omni_min"}
payload.update(params)
Expand Down
5 changes: 5 additions & 0 deletions swvo/io/symh/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# SPDX-FileCopyrightText: 2025 GFZ Helmholtz Centre for Geosciences
#
# SPDX-License-Identifier: Apache-2.0

from swvo.io.symh.omni import SymhOMNI as SymhOMNI
83 changes: 83 additions & 0 deletions swvo/io/symh/omni.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
# SPDX-FileCopyrightText: 2025 GFZ Helmholtz Centre for Geosciences
#
# SPDX-License-Identifier: Apache-2.0

"""
Module for handling OMNI SYM-H data.
"""

from __future__ import annotations

import logging
from datetime import datetime, timedelta, timezone
from pathlib import Path
from typing import Optional

import pandas as pd

from swvo.io.omni import OMNIHighRes

logging.captureWarnings(True)


class SymhOMNI(OMNIHighRes):
"""
Class for reading SYM-H data from OMNI High Resolution files.
Inherits the `download_and_process`, other private methods and attributes from OMNIHighRes.
"""

def __init__(self, data_dir: Optional[Path] = None) -> None:
"""
Initialize a SymhOMNI object.

Parameters
----------
data_dir : Path | None
Data directory for the SYM-H OMNI data. If not provided, it will be read from the environment variable
"""
super().__init__(data_dir=data_dir)

def read(
self,
start_time: datetime,
end_time: datetime,
cadence_min: float = 1,
download: bool = True,
) -> pd.DataFrame:
"""
Read OMNI SYM-H data for the given time range.

Parameters
----------
start_time : datetime
Start time of the data to read. Must be timezone-aware.
end_time : datetime
End time of the data to read. Must be timezone-aware.
cadence_min : float, optional
Cadence of the data in minutes, defaults to 1
download : bool, optional
Download data on the go, defaults to True.

Returns
-------
:class:`pandas.DataFrame`
OMNI SYM-H data.
"""
data_out = super().read(start_time, end_time, cadence_min=cadence_min, download=download)

if not start_time.tzinfo:
start_time = start_time.replace(tzinfo=timezone.utc)
if not end_time.tzinfo:
end_time = end_time.replace(tzinfo=timezone.utc)

symh_df = pd.DataFrame(index=data_out.index)

symh_df["sym-h"] = data_out["sym-h"]
symh_df["file_name"] = data_out["file_name"]

symh_df = symh_df.truncate(
before=start_time - timedelta(minutes=cadence_min - 0.0000001),
after=end_time + timedelta(minutes=cadence_min + 0.0000001),
)

return symh_df
1 change: 0 additions & 1 deletion tests/io/dst/test_dst_omni.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,6 @@ def test_download_and_process(self, dstomni, mocker):
end_time = datetime(2020, 12, 31, tzinfo=timezone.utc)

dstomni.download_and_process(start_time, end_time)

assert (TEST_DIR / Path("data/omni2_2020.dat")).exists()

def test_read_without_download(self, dstomni):
Expand Down
22 changes: 11 additions & 11 deletions tests/io/omni/data/OMNI_HIGH_RES_1min_2012.csv
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
timestamp,bavg,by_gsm,bz_gsm,speed,proton_density,temperature,bx_gsm
2012-12-31 23:50:00,2.82,-1.17,-0.28,349.8,2.02,37672.0,2.53
2012-12-31 23:51:00,2.81,-1.51,-0.18,355.7,1.94,31228.0,2.35
2012-12-31 23:52:00,2.81,-1.53,-0.19,353.1,2.07,34439.0,2.33
2012-12-31 23:53:00,2.79,-1.6,-0.15,351.4,2.15,36537.0,2.27
2012-12-31 23:54:00,2.82,-1.42,-0.05,,,,2.43
2012-12-31 23:55:00,2.81,-1.12,-0.01,353.3,2.09,33264.0,2.54
2012-12-31 23:56:00,2.75,-1.25,0.16,353.5,1.97,39095.0,2.44
2012-12-31 23:57:00,2.77,-1.23,0.15,,,,2.48
2012-12-31 23:58:00,2.73,-1.11,0.08,,,,2.49
2012-12-31 23:59:00,,,,,,,
timestamp,bavg,bx_gsm,by_gsm,bz_gsm,speed,proton_density,temperature,pdyn,sym-h
2012-12-31 23:50:00,2.82,2.53,-1.17,-0.28,349.8,2.02,37672.0,1.4,-12
2012-12-31 23:51:00,2.81,2.35,-1.51,-0.18,355.7,1.94,31228.0,1.5,-11
2012-12-31 23:52:00,2.81,2.33,-1.53,-0.19,353.1,2.07,34439.0,1.6,-13
2012-12-31 23:53:00,2.79,2.27,-1.6,-0.15,351.4,2.15,36537.0,1.5,-14
2012-12-31 23:54:00,2.82,2.43,-1.42,-0.05,,,,1.4,
2012-12-31 23:55:00,2.81,2.54,-1.12,-0.01,353.3,2.09,33264.0,1.5,-12
2012-12-31 23:56:00,2.75,2.44,-1.25,0.16,353.5,1.97,39095.0,1.4,-11
2012-12-31 23:57:00,2.77,2.48,-1.23,0.15,,,,1.5,
2012-12-31 23:58:00,2.73,2.49,-1.11,0.08,,,,1.4,
2012-12-31 23:59:00,,,,,,,,,
4 changes: 2 additions & 2 deletions tests/io/omni/data/OMNI_HIGH_RES_1min_2013.csv
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
timestamp,bavg,by_gsm,bz_gsm,speed,proton_density,temperature,bx_gsm
2013-01-01 00:00:00,2.45,-0.31,-0.18,358.8,1.94,46455.0,2.42
timestamp,bavg,bx_gsm,by_gsm,bz_gsm,speed,proton_density,temperature,pdyn,sym-h
2013-01-01 00:00:00,2.45,2.42,-0.31,-0.18,358.8,1.94,46455.0,1.5,-10
14 changes: 8 additions & 6 deletions tests/io/omni/test_omni_high_res.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,15 +104,14 @@ def test_remove_processed_file(self):

def test_process_single_year_parses_data_correctly(self, omni_high_res):
data = [
"YYYY DOY HR MN bavg bx_gsm by_gsm bz_gsm speed proton_density temperature pdyn",
"2020 1 0 0 5.1 1.2 2.3 3.4 400 5.5 1000000 99",
"2020 1 0 1 9999.9 9999.9 9999.9 9999.9 99999.8 999.8 9999998.0 99",
"YYYY DOY HR MN bavg bx_gsm by_gsm bz_gsm speed proton_density temperature pdyn sym-h",
"2020 1 0 0 5.1 1.2 2.3 3.4 400 5.5 1000000 99 -15",
"2020 1 0 1 9999.9 9999.9 9999.9 9999.9 99999.8 999.8 9999998.0 99 99999.0",
]

df = omni_high_res._process_single_year(data)
assert isinstance(df.index[0], pd.Timestamp)
assert len(df) >= 2
# Check columns
expected_cols = [
"bavg",
"bx_gsm",
Expand All @@ -122,6 +121,7 @@ def test_process_single_year_parses_data_correctly(self, omni_high_res):
"proton_density",
"temperature",
"pdyn",
"sym-h",
]
assert list(df.columns) == expected_cols
assert np.isnan(df.iloc[1]["bavg"])
Expand All @@ -131,20 +131,22 @@ def test_process_single_year_parses_data_correctly(self, omni_high_res):
assert np.isnan(df.iloc[1]["speed"])
assert np.isnan(df.iloc[1]["proton_density"])
assert np.isnan(df.iloc[1]["temperature"])
assert np.isnan(df.iloc[1]["sym-h"])
assert df.iloc[0]["bavg"] == 5.1
assert df.iloc[0]["bx_gsm"] == 1.2
assert df.iloc[0]["by_gsm"] == 2.3
assert df.iloc[0]["bz_gsm"] == 3.4
assert df.iloc[0]["speed"] == 400
assert df.iloc[0]["proton_density"] == 5.5
assert df.iloc[0]["temperature"] == 1000000
assert df.iloc[0]["sym-h"] == -15

def test_process_single_year_handles_missing_data_lines(self, omni_high_res):
data = ["YYYY DOY HR MN bavg bx_gsm by_gsm bz_gsm speed proton_density temperature"]
data = ["YYYY DOY HR MN bavg bx_gsm by_gsm bz_gsm speed proton_density temperature sym-h"]
with pytest.raises(ValueError):
_ = omni_high_res._process_single_year(data)

def test_process_single_year_raises_on_missing_header(self, omni_high_res):
data = ["2020 1 0 0 5.1 1.2 2.3 3.4 400 5.5 1000000"]
data = ["2020 1 0 0 5.1 1.2 2.3 3.4 400 5.5 1000000 -15"]
with pytest.raises(StopIteration):
omni_high_res._process_single_year(data)
101 changes: 101 additions & 0 deletions tests/io/symh/test_symh_omni.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
# SPDX-FileCopyrightText: 2025 GFZ Helmholtz Centre for Geosciences
#
# SPDX-License-Identifier: Apache-2.0

import os
from datetime import datetime, timezone
from pathlib import Path

import pandas as pd
import pytest

from swvo.io.symh import SymhOMNI

TEST_DIR = os.path.dirname(__file__)
DATA_DIR = Path(os.path.join(TEST_DIR, "../omni/data/"))


class TestSymhOMNI:
@pytest.fixture
def symhomni(self):
os.environ["OMNI_HIGH_RES_STREAM_DIR"] = str(DATA_DIR)
yield SymhOMNI()

@pytest.fixture
def mock_symhomni_data(self):
test_dates = pd.date_range(start=datetime(2020, 1, 1), end=datetime(2020, 12, 31, 23, 59, 0), freq="min")
test_data = pd.DataFrame(
{
"t": test_dates,
"sym-h": [-15.0] * len(test_dates),
"file_name": "some_file",
"timestamp": test_dates.strftime("%Y-%m-%d %H:%M:%S"),
}
)
test_data.index = test_dates.tz_localize("UTC")
return test_data

def test_initialization_with_env_var(self, symhomni):
assert symhomni.data_dir.exists()

def test_initialization_with_data_dir(self):
symhomni = SymhOMNI(data_dir=DATA_DIR)
assert symhomni.data_dir == DATA_DIR

def test_initialization_without_env_var(self):
if "OMNI_HIGH_RES_STREAM_DIR" in os.environ:
del os.environ["OMNI_HIGH_RES_STREAM_DIR"]
with pytest.raises(ValueError):
SymhOMNI()

def test_download_and_process(self, symhomni):
start_time = datetime(2020, 1, 1, tzinfo=timezone.utc)
end_time = datetime(2020, 12, 31, tzinfo=timezone.utc)
# download this file without mocking
symhomni.download_and_process(start_time, end_time)

assert (DATA_DIR / "OMNI_HIGH_RES_1min_2020.csv").exists()

def test_read_without_download(self, symhomni):
start_time = datetime(2021, 1, 1, tzinfo=timezone.utc)
end_time = datetime(2021, 2, 28, tzinfo=timezone.utc)
with pytest.raises(
ValueError
): # value error is raised when no files are found hence no concatenation is possible
symhomni.read(start_time, end_time, download=False)

def test_read_with_download(self, symhomni, mock_symhomni_data, mocker):
mocker.patch("pathlib.Path.exists", return_value=False)
mocker.patch.object(symhomni, "_read_single_file", return_value=mock_symhomni_data)
mocker.patch.object(symhomni, "download_and_process")

start_time = datetime(2020, 1, 1)
end_time = datetime(2020, 12, 31)

df = symhomni.read(start_time, end_time, download=True)
symhomni.download_and_process.assert_called_once()

assert not df.empty
assert all(df["sym-h"] == -15.0)
assert "sym-h" in df.columns
assert all(idx.tzinfo is not None for idx in df.index)
assert all(idx.tzinfo is timezone.utc for idx in df.index)

def test_read_single_file(self, symhomni):
csv_file = Path(DATA_DIR) / "OMNI_HIGH_RES_1min_2020.csv"
df = symhomni._read_single_file(csv_file)
assert isinstance(df, pd.DataFrame)
assert len(df) > 0
assert "sym-h" in df.columns

def test_year_transition(self, symhomni):
start_time = datetime(2012, 12, 31, 23, 50, 0, tzinfo=timezone.utc)
end_time = datetime(2012, 12, 31, 23, 59, 59, tzinfo=timezone.utc)

result_df = symhomni.read(start_time, end_time, download=False)

assert result_df.index.min() == pd.Timestamp("2012-12-31 23:50:00+00:00")
assert result_df.index.max() == pd.Timestamp("2013-01-01 00:00:00+00:00")

def test_remove_processed_file(self):
os.remove(Path(DATA_DIR) / "OMNI_HIGH_RES_1min_2020.csv")
Loading