GFZ · sahiljhawar · Feb 3, 2026 · Jan 6, 2026 · Jan 6, 2026 · Jan 6, 2026
diff --git a/swvo/io/RBMDataSet/RBMDataSet.py b/swvo/io/RBMDataSet/RBMDataSet.py
@@ -470,14 +470,12 @@ def __eq__(self, other: RBMDataSet) -> bool:
         return len(different_vars) == 0
 
     def get_different_variables(self, rbm_other: RBMDataSet) -> list[str]:
-
         different_vars: list[str] = []
 
         self_vars = self.get_loaded_variables()
         other_vars = rbm_other.get_loaded_variables()
 
         for var in set(self_vars + other_vars):
-
             if var not in other_vars or var not in self_vars:
                 different_vars.append(var)
                 continue
@@ -503,6 +501,5 @@ def get_different_variables(self, rbm_other: RBMDataSet) -> list[str]:
 
         return different_vars
 
-
     from .bin_and_interpolate_to_model_grid import bin_and_interpolate_to_model_grid
     from .interp_functions import interp_flux, interp_psd
diff --git a/swvo/io/omni/omni_high_res.py b/swvo/io/omni/omni_high_res.py
@@ -288,6 +288,7 @@ def _process_single_year(self, data: list[str]) -> pd.DataFrame:
             "proton_density": 999.8,
             "temperature": 9999998.0,
             "pdyn": 99.0,
+            "sym-h": 99999.0,
         }
 
         df.columns = maxes.keys()
@@ -351,7 +352,7 @@ def _get_data_from_omni(self, start: datetime, end: datetime, cadence: int = 1)
             "start_date": start.strftime("%Y%m%d"),
             "end_date": end.strftime("%Y%m%d"),
         }
-        common_vars = {"vars": ["13", "14", "17", "18", "21", "25", "26", "27"]}
+        common_vars = {"vars": ["13", "14", "17", "18", "21", "25", "26", "27", "41"]}
         if cadence == 1:
             params = {"res": "min", "spacecraft": "omni_min"}
             payload.update(params)

diff --git a/swvo/io/symh/__init__.py b/swvo/io/symh/__init__.py
@@ -0,0 +1,5 @@
+# SPDX-FileCopyrightText: 2025 GFZ Helmholtz Centre for Geosciences
+#
+# SPDX-License-Identifier: Apache-2.0
+
+from swvo.io.symh.omni import SymhOMNI as SymhOMNI
diff --git a/swvo/io/symh/omni.py b/swvo/io/symh/omni.py
@@ -0,0 +1,83 @@
+# SPDX-FileCopyrightText: 2025 GFZ Helmholtz Centre for Geosciences
+#
+# SPDX-License-Identifier: Apache-2.0
+
+"""
+Module for handling OMNI SYM-H data.
+"""
+
+from __future__ import annotations
+
+import logging
+from datetime import datetime, timedelta, timezone
+from pathlib import Path
+from typing import Optional
+
+import pandas as pd
+
+from swvo.io.omni import OMNIHighRes
+
+logging.captureWarnings(True)
+
+
+class SymhOMNI(OMNIHighRes):
+    """
+    Class for reading SYM-H data from OMNI High Resolution files.
+    Inherits the `download_and_process`, other private methods and attributes from OMNIHighRes.
+    """
+
+    def __init__(self, data_dir: Optional[Path] = None) -> None:
+        """
+        Initialize a SymhOMNI object.
+
+        Parameters
+        ----------
+        data_dir : Path | None
+            Data directory for the SYM-H OMNI data. If not provided, it will be read from the environment variable
+        """
+        super().__init__(data_dir=data_dir)
+
+    def read(
+        self,
+        start_time: datetime,
+        end_time: datetime,
+        cadence_min: float = 1,
+        download: bool = True,
+    ) -> pd.DataFrame:
+        """
+        Read OMNI SYM-H data for the given time range.
+
+        Parameters
+        ----------
+        start_time : datetime
+            Start time of the data to read. Must be timezone-aware.
+        end_time : datetime
+            End time of the data to read. Must be timezone-aware.
+        cadence_min : float, optional
+            Cadence of the data in minutes, defaults to 1
+        download : bool, optional
+            Download data on the go, defaults to True.
+
+        Returns
+        -------
+        :class:`pandas.DataFrame`
+            OMNI SYM-H data.
+        """
+        data_out = super().read(start_time, end_time, cadence_min=cadence_min, download=download)
+
+        if not start_time.tzinfo:
+            start_time = start_time.replace(tzinfo=timezone.utc)
+        if not end_time.tzinfo:
+            end_time = end_time.replace(tzinfo=timezone.utc)
+
+        symh_df = pd.DataFrame(index=data_out.index)
+
+        symh_df["sym-h"] = data_out["sym-h"]
+        symh_df["file_name"] = data_out["file_name"]
+
+        symh_df = symh_df.truncate(
+            before=start_time - timedelta(minutes=cadence_min - 0.0000001),
+            after=end_time + timedelta(minutes=cadence_min + 0.0000001),
+        )
+
+        return symh_df
diff --git a/tests/io/dst/test_dst_omni.py b/tests/io/dst/test_dst_omni.py
@@ -61,7 +61,6 @@ def test_download_and_process(self, dstomni, mocker):
         end_time = datetime(2020, 12, 31, tzinfo=timezone.utc)
 
         dstomni.download_and_process(start_time, end_time)
-
         assert (TEST_DIR / Path("data/omni2_2020.dat")).exists()
 
     def test_read_without_download(self, dstomni):

diff --git a/tests/io/omni/data/OMNI_HIGH_RES_1min_2012.csv b/tests/io/omni/data/OMNI_HIGH_RES_1min_2012.csv
@@ -1,11 +1,11 @@
-timestamp,bavg,by_gsm,bz_gsm,speed,proton_density,temperature,bx_gsm
-2012-12-31 23:50:00,2.82,-1.17,-0.28,349.8,2.02,37672.0,2.53
-2012-12-31 23:51:00,2.81,-1.51,-0.18,355.7,1.94,31228.0,2.35
-2012-12-31 23:52:00,2.81,-1.53,-0.19,353.1,2.07,34439.0,2.33
-2012-12-31 23:53:00,2.79,-1.6,-0.15,351.4,2.15,36537.0,2.27
-2012-12-31 23:54:00,2.82,-1.42,-0.05,,,,2.43
-2012-12-31 23:55:00,2.81,-1.12,-0.01,353.3,2.09,33264.0,2.54
-2012-12-31 23:56:00,2.75,-1.25,0.16,353.5,1.97,39095.0,2.44
-2012-12-31 23:57:00,2.77,-1.23,0.15,,,,2.48
-2012-12-31 23:58:00,2.73,-1.11,0.08,,,,2.49
-2012-12-31 23:59:00,,,,,,,
+timestamp,bavg,bx_gsm,by_gsm,bz_gsm,speed,proton_density,temperature,pdyn,sym-h
+2012-12-31 23:50:00,2.82,2.53,-1.17,-0.28,349.8,2.02,37672.0,1.4,-12
+2012-12-31 23:51:00,2.81,2.35,-1.51,-0.18,355.7,1.94,31228.0,1.5,-11
+2012-12-31 23:52:00,2.81,2.33,-1.53,-0.19,353.1,2.07,34439.0,1.6,-13
+2012-12-31 23:53:00,2.79,2.27,-1.6,-0.15,351.4,2.15,36537.0,1.5,-14
+2012-12-31 23:54:00,2.82,2.43,-1.42,-0.05,,,,1.4,
+2012-12-31 23:55:00,2.81,2.54,-1.12,-0.01,353.3,2.09,33264.0,1.5,-12
+2012-12-31 23:56:00,2.75,2.44,-1.25,0.16,353.5,1.97,39095.0,1.4,-11
+2012-12-31 23:57:00,2.77,2.48,-1.23,0.15,,,,1.5,
+2012-12-31 23:58:00,2.73,2.49,-1.11,0.08,,,,1.4,
+2012-12-31 23:59:00,,,,,,,,,
diff --git a/tests/io/omni/data/OMNI_HIGH_RES_1min_2013.csv b/tests/io/omni/data/OMNI_HIGH_RES_1min_2013.csv
@@ -1,2 +1,2 @@
-timestamp,bavg,by_gsm,bz_gsm,speed,proton_density,temperature,bx_gsm
-2013-01-01 00:00:00,2.45,-0.31,-0.18,358.8,1.94,46455.0,2.42
+timestamp,bavg,bx_gsm,by_gsm,bz_gsm,speed,proton_density,temperature,pdyn,sym-h
+2013-01-01 00:00:00,2.45,2.42,-0.31,-0.18,358.8,1.94,46455.0,1.5,-10
diff --git a/tests/io/omni/test_omni_high_res.py b/tests/io/omni/test_omni_high_res.py
@@ -104,15 +104,14 @@ def test_remove_processed_file(self):
 
     def test_process_single_year_parses_data_correctly(self, omni_high_res):
         data = [
-            "YYYY DOY HR MN bavg bx_gsm by_gsm bz_gsm speed proton_density temperature pdyn",
-            "2020 1 0 0 5.1 1.2 2.3 3.4 400 5.5 1000000 99",
-            "2020 1 0 1 9999.9 9999.9 9999.9 9999.9 99999.8 999.8 9999998.0 99",
+            "YYYY DOY HR MN bavg bx_gsm by_gsm bz_gsm speed proton_density temperature pdyn sym-h",
+            "2020 1 0 0 5.1 1.2 2.3 3.4 400 5.5 1000000 99 -15",
+            "2020 1 0 1 9999.9 9999.9 9999.9 9999.9 99999.8 999.8 9999998.0 99 99999.0",
         ]
 
         df = omni_high_res._process_single_year(data)
         assert isinstance(df.index[0], pd.Timestamp)
         assert len(df) >= 2
-        # Check columns
         expected_cols = [
             "bavg",
             "bx_gsm",
@@ -122,6 +121,7 @@ def test_process_single_year_parses_data_correctly(self, omni_high_res):
             "proton_density",
             "temperature",
             "pdyn",
+            "sym-h",
         ]
         assert list(df.columns) == expected_cols
         assert np.isnan(df.iloc[1]["bavg"])
@@ -131,20 +131,22 @@ def test_process_single_year_parses_data_correctly(self, omni_high_res):
         assert np.isnan(df.iloc[1]["speed"])
         assert np.isnan(df.iloc[1]["proton_density"])
         assert np.isnan(df.iloc[1]["temperature"])
+        assert np.isnan(df.iloc[1]["sym-h"])
         assert df.iloc[0]["bavg"] == 5.1
         assert df.iloc[0]["bx_gsm"] == 1.2
         assert df.iloc[0]["by_gsm"] == 2.3
         assert df.iloc[0]["bz_gsm"] == 3.4
         assert df.iloc[0]["speed"] == 400
         assert df.iloc[0]["proton_density"] == 5.5
         assert df.iloc[0]["temperature"] == 1000000
+        assert df.iloc[0]["sym-h"] == -15
 
     def test_process_single_year_handles_missing_data_lines(self, omni_high_res):
-        data = ["YYYY DOY HR MN bavg bx_gsm by_gsm bz_gsm speed proton_density temperature"]
+        data = ["YYYY DOY HR MN bavg bx_gsm by_gsm bz_gsm speed proton_density temperature sym-h"]
         with pytest.raises(ValueError):
             _ = omni_high_res._process_single_year(data)
 
     def test_process_single_year_raises_on_missing_header(self, omni_high_res):
-        data = ["2020 1 0 0 5.1 1.2 2.3 3.4 400 5.5 1000000"]
+        data = ["2020 1 0 0 5.1 1.2 2.3 3.4 400 5.5 1000000 -15"]
         with pytest.raises(StopIteration):
             omni_high_res._process_single_year(data)
diff --git a/tests/io/symh/test_symh_omni.py b/tests/io/symh/test_symh_omni.py
@@ -0,0 +1,101 @@
+# SPDX-FileCopyrightText: 2025 GFZ Helmholtz Centre for Geosciences
+#
+# SPDX-License-Identifier: Apache-2.0
+
+import os
+from datetime import datetime, timezone
+from pathlib import Path
+
+import pandas as pd
+import pytest
+
+from swvo.io.symh import SymhOMNI
+
+TEST_DIR = os.path.dirname(__file__)
+DATA_DIR = Path(os.path.join(TEST_DIR, "../omni/data/"))
+
+
+class TestSymhOMNI:
+    @pytest.fixture
+    def symhomni(self):
+        os.environ["OMNI_HIGH_RES_STREAM_DIR"] = str(DATA_DIR)
+        yield SymhOMNI()
+
+    @pytest.fixture
+    def mock_symhomni_data(self):
+        test_dates = pd.date_range(start=datetime(2020, 1, 1), end=datetime(2020, 12, 31, 23, 59, 0), freq="min")
+        test_data = pd.DataFrame(
+            {
+                "t": test_dates,
+                "sym-h": [-15.0] * len(test_dates),
+                "file_name": "some_file",
+                "timestamp": test_dates.strftime("%Y-%m-%d %H:%M:%S"),
+            }
+        )
+        test_data.index = test_dates.tz_localize("UTC")
+        return test_data
+
+    def test_initialization_with_env_var(self, symhomni):
+        assert symhomni.data_dir.exists()
+
+    def test_initialization_with_data_dir(self):
+        symhomni = SymhOMNI(data_dir=DATA_DIR)
+        assert symhomni.data_dir == DATA_DIR
+
+    def test_initialization_without_env_var(self):
+        if "OMNI_HIGH_RES_STREAM_DIR" in os.environ:
+            del os.environ["OMNI_HIGH_RES_STREAM_DIR"]
+        with pytest.raises(ValueError):
+            SymhOMNI()
+
+    def test_download_and_process(self, symhomni):
+        start_time = datetime(2020, 1, 1, tzinfo=timezone.utc)
+        end_time = datetime(2020, 12, 31, tzinfo=timezone.utc)
+        # download this file without mocking
+        symhomni.download_and_process(start_time, end_time)
+
+        assert (DATA_DIR / "OMNI_HIGH_RES_1min_2020.csv").exists()
+
+    def test_read_without_download(self, symhomni):
+        start_time = datetime(2021, 1, 1, tzinfo=timezone.utc)
+        end_time = datetime(2021, 2, 28, tzinfo=timezone.utc)
+        with pytest.raises(
+            ValueError
+        ):  # value error is raised when no files are found hence no concatenation is possible
+            symhomni.read(start_time, end_time, download=False)
+
+    def test_read_with_download(self, symhomni, mock_symhomni_data, mocker):
+        mocker.patch("pathlib.Path.exists", return_value=False)
+        mocker.patch.object(symhomni, "_read_single_file", return_value=mock_symhomni_data)
+        mocker.patch.object(symhomni, "download_and_process")
+
+        start_time = datetime(2020, 1, 1)
+        end_time = datetime(2020, 12, 31)
+
+        df = symhomni.read(start_time, end_time, download=True)
+        symhomni.download_and_process.assert_called_once()
+
+        assert not df.empty
+        assert all(df["sym-h"] == -15.0)
+        assert "sym-h" in df.columns
+        assert all(idx.tzinfo is not None for idx in df.index)
+        assert all(idx.tzinfo is timezone.utc for idx in df.index)
+
+    def test_read_single_file(self, symhomni):
+        csv_file = Path(DATA_DIR) / "OMNI_HIGH_RES_1min_2020.csv"
+        df = symhomni._read_single_file(csv_file)
+        assert isinstance(df, pd.DataFrame)
+        assert len(df) > 0
+        assert "sym-h" in df.columns
+
+    def test_year_transition(self, symhomni):
+        start_time = datetime(2012, 12, 31, 23, 50, 0, tzinfo=timezone.utc)
+        end_time = datetime(2012, 12, 31, 23, 59, 59, tzinfo=timezone.utc)
+
+        result_df = symhomni.read(start_time, end_time, download=False)
+
+        assert result_df.index.min() == pd.Timestamp("2012-12-31 23:50:00+00:00")
+        assert result_df.index.max() == pd.Timestamp("2013-01-01 00:00:00+00:00")
+
+    def test_remove_processed_file(self):
+        os.remove(Path(DATA_DIR) / "OMNI_HIGH_RES_1min_2020.csv")