Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 26 additions & 0 deletions examples/plot_hk_sp2xr.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
"""
Example for plotting selected variables from a SP2-XR housekeeping file
-----------------------------------------------------------------------

"""
import pysp2
import matplotlib.pyplot as plt

my_hk = pysp2.io.read_sp2xr_hk_file(pysp2.testing.EXAMPLE_SP2XR_HK)
print(my_hk)

# A few representative housekeeping channels: temperature, flow, pressure,
# and particle event count
variables = [
'Laser TEC Temp',
'Sample Flow Controller Read',
'Cavity Pressure',
'Threshold Crossing Events',
]

fig, axes = plt.subplots(len(variables), 1, sharex=True, figsize=(8, 8))
for ax, var in zip(axes, variables):
my_hk[var].plot(ax=ax)
ax.set_title(var)
plt.tight_layout()
plt.show()
19 changes: 19 additions & 0 deletions examples/plot_read_sp2xr.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
"""
Example for plotting a waveform in a SP2-XR .sp2b file
------------------------------------------------------

"""
import pysp2
import matplotlib.pyplot as plt

my_sp2xr = pysp2.io.read_sp2xr(pysp2.testing.EXAMPLE_SP2XR_SP2B)
print(my_sp2xr)

# Plot ch0 and ch1 waveforms for the first particle
fig, (ax0, ax1) = plt.subplots(2, 1, sharex=True, figsize=(8, 6))
my_sp2xr['Data_ch0'].isel(event_index=0).plot(ax=ax0)
ax0.set_title('Particle 0 - Data_ch0')
my_sp2xr['Data_ch1'].isel(event_index=0).plot(ax=ax1)
ax1.set_title('Particle 0 - Data_ch1')
plt.tight_layout()
plt.show()
11 changes: 8 additions & 3 deletions pysp2/io/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,12 @@
:toctree: generated/

read_hk_file
read_sp2xr_hk_file
read_sp2xr_hk_psd
get_hk_variable_names
read_sp2
read_sp2xr
read_sp2xr_pbp
read_config
write_dat
write_dat_concs
Expand All @@ -20,9 +24,10 @@

"""

from .read_hk import read_hk_file, get_hk_variable_names
from .read_hk import read_multi_hk_file
from .read_sp2b import read_sp2
from .read_hk import read_hk_file, get_hk_variable_names, read_sp2xr_hk_file
from .read_hk import read_sp2xr_hk_psd, read_multi_hk_file
from .read_sp2b import read_sp2, read_sp2xr
from .read_pbp import read_sp2xr_pbp
from .read_ini import read_config
from .write_dat import write_dat, write_dat_concs, write_dat_concs_arm
from .read_dat import read_dat, read_arm_dat, read_calibration
246 changes: 246 additions & 0 deletions pysp2/io/read_hk.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,18 @@
import act
import datetime
import os
import re
import warnings
import numpy as np
import pandas as pd

from glob import glob

# Regex for SP2-XR firmware-computed PSD bin columns. The number of bins per
# channel is not assumed; it is determined from however many columns match
# the pattern in the file at read time.
_SP2XR_BIN_COL_REGEX = re.compile(r'^(?P<channel>Scatter|Incand) Bin (?P<idx>\d+)$')

def read_hk_file(file_name):
"""
This procedure will read in an SP2 housekeeping file and then
Expand Down Expand Up @@ -48,6 +55,245 @@ def read_hk_file(file_name):
return my_df


def read_sp2xr_hk_file(file_name):
"""
This procedure will read in an SP2-XR housekeeping file (.csv or .zip)
and store the timeseries data into an xarray Dataset.

The SP2-XR housekeeping file uses comma-separated values (rather than
tab-separated like the original SP2). The 'Time Stamp (UTC sec)' column
is used to build the time coordinate, sharing the same 1904-01-01 epoch
as the original SP2 'Timestamp' column.

The SP2-XR firmware-computed PSD bin columns (named like 'Scatter Bin N'
and 'Incand Bin N') are excluded from the output to keep the variable
set similar to read_hk_file(). Use read_sp2xr_hk_psd() to access them
as a 2D dataset.

The retained 'Scattering Mass Conc' and 'Incand Mass Conc' columns are
firmware-computed using the on-board calibration curves. To preserve
provenance, the calibration CSV files alongside the HK file (matching
'*_Scatt_*.csv' and '*_Incan_*.csv') are auto-located and attached as
dataset attributes; a warning is emitted for any missing file.

Parameters
----------
file_name: str
The file name to read in (.csv or .zip).

Returns
-------
hk_ds: xarray.Dataset
The housekeeping information as an xarray Dataset indexed by time,
with the firmware-computed PSD bin columns removed.
"""

my_df = act.io.read_csv(file_name, sep=",")

# Parse time from the UTC seconds column (same 1904 epoch as SP2)
start_time = pd.Timestamp('1904-01-01')
my_df = my_df.rename({'index': 'time'})
my_df['time'] = np.array([
start_time + datetime.timedelta(seconds=x)
for x in my_df['Time Stamp (UTC sec)'].values
])
my_df['time'].attrs['units'] = "datetime"
my_df['time'].attrs['long_name'] = "Time [SP2-XR time]"

# Drop firmware-computed PSD bin columns
bin_vars = [v for v in my_df.data_vars
if _SP2XR_BIN_COL_REGEX.match(str(v))]
if bin_vars:
my_df = my_df.drop_vars(bin_vars)

# Extract units from column names following the "Name (units)" convention
for vars in my_df.variables.keys():
splits = vars.split("(")
try:
units = splits[1][:-1]
my_df[vars].attrs['units'] = units
my_df[vars].attrs['long_name'] = vars
my_df = my_df.rename({vars: splits[0][:-1]})
except (IndexError, ValueError):
continue

# Attach calibration curve provenance (used by the firmware to derive
# the retained Mass Conc columns).
_attach_sp2xr_calibration_attrs(my_df, file_name)

return my_df


def _find_sp2xr_calibration_files(hk_path):
"""
Locate the SP2-XR scattering and incandescence calibration CSVs that
sit alongside an HK file. The convention is filenames containing
'_Scatt_' (scattering) and '_Incan_' (incandescence) in the same
directory as the HK file.

Returns
-------
(scatt_path, incan_path): tuple
Each element is the absolute path to the calibration CSV, or None
if no matching file is present.
"""
dir_path = os.path.dirname(os.path.abspath(hk_path))
scatt = sorted(glob(os.path.join(dir_path, '*_Scatt_*.csv')))
incan = sorted(glob(os.path.join(dir_path, '*_Incan_*.csv')))
return (scatt[0] if scatt else None,
incan[0] if incan else None)


def _load_sp2xr_calibration_curve(csv_path):
"""
Load a SP2-XR calibration CSV (no header, two columns: physical value
and instrument signal). Empty trailing rows are dropped.
"""
df = pd.read_csv(csv_path, header=None).dropna()
return df.iloc[:, 0].to_numpy(), df.iloc[:, 1].to_numpy()


def _attach_sp2xr_calibration_attrs(ds, file_path):
"""
Auto-locate scattering and incandescence calibration CSVs alongside
file_path and attach them as dataset attributes. Emits a UserWarning
for each calibration file that cannot be found.

Attached attributes (when available):
ScatCalibrationFile, ScatCalibration_Diameter_nm,
ScatCalibration_Signal,
IncanCalibrationFile, IncanCalibration_Mass_fg,
IncanCalibration_Signal.
"""
scatt_cal, incan_cal = _find_sp2xr_calibration_files(file_path)

if scatt_cal is not None:
diam_nm, signal = _load_sp2xr_calibration_curve(scatt_cal)
ds.attrs['ScatCalibrationFile'] = scatt_cal
ds.attrs['ScatCalibration_Diameter_nm'] = diam_nm
ds.attrs['ScatCalibration_Signal'] = signal
else:
warnings.warn(
"No SP2-XR scattering calibration file ('*_Scatt_*.csv') found "
"next to %r; ScatCalibration_* attributes will not be attached."
% file_path,
UserWarning,
stacklevel=3,
)

if incan_cal is not None:
mass_fg, signal = _load_sp2xr_calibration_curve(incan_cal)
ds.attrs['IncanCalibrationFile'] = incan_cal
ds.attrs['IncanCalibration_Mass_fg'] = mass_fg
ds.attrs['IncanCalibration_Signal'] = signal
else:
warnings.warn(
"No SP2-XR incandescence calibration file ('*_Incan_*.csv') "
"found next to %r; IncanCalibration_* attributes will not be "
"attached." % file_path,
UserWarning,
stacklevel=3,
)


def read_sp2xr_hk_psd(file_name):
"""
Read the SP2-XR firmware-computed PSD histograms from a HK file.

The on-board firmware accumulates per-second particle counts into
size/mass bins ('Scatter Bin N' for scattering size, 'Incand Bin N'
for incandescence mass) using its own calibration. This procedure
extracts those columns into a 2D dataset shaped (time, num_bins),
matching the convention used by pysp2.util.process_psds() for
PySP2-computed distributions (ScatNumEnsemble, IncanNumEnsemble).

Calibration curves for scattering diameter and incandescence mass are
auto-located alongside the HK file (files matching '*_Scatt_*.csv' and
'*_Incan_*.csv' respectively) and attached as dataset attributes when
present. A warning is issued for any missing calibration file.

Parameters
----------
file_name: str
The HK file name to read in (.csv or .zip).

Returns
-------
psd_ds: xarray.Dataset
Dataset with dimensions (time, num_bins) and data variables
ScatNumEnsemble and IncanNumEnsemble (raw counts per bin per
time). Sample volume must be applied to convert to concentration.
"""

my_df = act.io.read_csv(file_name, sep=",")

start_time = pd.Timestamp('1904-01-01')
my_df = my_df.rename({'index': 'time'})
my_df['time'] = np.array([
start_time + datetime.timedelta(seconds=x)
for x in my_df['Time Stamp (UTC sec)'].values
])
my_df['time'].attrs['units'] = "datetime"
my_df['time'].attrs['long_name'] = "Time [SP2-XR time]"

# Discover bin columns dynamically (do not assume a fixed bin count)
scat_cols, incan_cols = {}, {}
for var in list(my_df.data_vars):
m = _SP2XR_BIN_COL_REGEX.match(str(var))
if m is None:
continue
idx = int(m.group('idx'))
if m.group('channel') == 'Scatter':
scat_cols[idx] = str(var)
else:
incan_cols[idx] = str(var)

if not scat_cols and not incan_cols:
raise ValueError(
"No SP2-XR firmware PSD bin columns ('Scatter Bin N', "
"'Incand Bin N') found in %r." % file_name)

def _stack(cols_by_idx):
ordered = [cols_by_idx[i] for i in sorted(cols_by_idx)]
return np.stack([my_df[c].values for c in ordered], axis=-1)

note = ('Raw particle counts per bin per time. To get concentration in '
'cts/cm^3 divide by the sample volume per record '
'(Sample Flow Controller Read (vccm) / 60).')

psd_ds = xr.Dataset(coords={'time': my_df['time']})

if scat_cols:
psd_ds['ScatNumEnsemble'] = xr.DataArray(
_stack(scat_cols), dims=('time', 'num_bins'),
attrs={
'long_name':
'Scattering number distribution (SP2-XR firmware, raw counts)',
'standard_name': 'scattering_number_distribution',
'units': 'count',
'source': 'SP2-XR firmware on-board calculation',
'note': note,
})

if incan_cols:
psd_ds['IncanNumEnsemble'] = xr.DataArray(
_stack(incan_cols), dims=('time', 'num_bins'),
attrs={
'long_name':
'Incandescence number distribution (SP2-XR firmware, raw counts)',
'standard_name': 'incandescence_number_distribution',
'units': 'count',
'source': 'SP2-XR firmware on-board calculation',
'note': note,
})

psd_ds.attrs['source'] = 'SP2-XR firmware'

_attach_sp2xr_calibration_attrs(psd_ds, file_name)

return psd_ds


def get_hk_variable_names(my_df):
"""
This procedure will return al ist of variables in the
Expand Down
Loading