diff --git a/examples/plot_hk_sp2xr.py b/examples/plot_hk_sp2xr.py new file mode 100644 index 00000000..49a7e61a --- /dev/null +++ b/examples/plot_hk_sp2xr.py @@ -0,0 +1,26 @@ +""" +Example for plotting selected variables from a SP2-XR housekeeping file +----------------------------------------------------------------------- + +""" +import pysp2 +import matplotlib.pyplot as plt + +my_hk = pysp2.io.read_sp2xr_hk_file(pysp2.testing.EXAMPLE_SP2XR_HK) +print(my_hk) + +# A few representative housekeeping channels: temperature, flow, pressure, +# and particle event count +variables = [ + 'Laser TEC Temp', + 'Sample Flow Controller Read', + 'Cavity Pressure', + 'Threshold Crossing Events', +] + +fig, axes = plt.subplots(len(variables), 1, sharex=True, figsize=(8, 8)) +for ax, var in zip(axes, variables): + my_hk[var].plot(ax=ax) + ax.set_title(var) +plt.tight_layout() +plt.show() diff --git a/examples/plot_read_sp2xr.py b/examples/plot_read_sp2xr.py new file mode 100644 index 00000000..4998d030 --- /dev/null +++ b/examples/plot_read_sp2xr.py @@ -0,0 +1,19 @@ +""" +Example for plotting a waveform in a SP2-XR .sp2b file +------------------------------------------------------ + +""" +import pysp2 +import matplotlib.pyplot as plt + +my_sp2xr = pysp2.io.read_sp2xr(pysp2.testing.EXAMPLE_SP2XR_SP2B) +print(my_sp2xr) + +# Plot ch0 and ch1 waveforms for the first particle +fig, (ax0, ax1) = plt.subplots(2, 1, sharex=True, figsize=(8, 6)) +my_sp2xr['Data_ch0'].isel(event_index=0).plot(ax=ax0) +ax0.set_title('Particle 0 - Data_ch0') +my_sp2xr['Data_ch1'].isel(event_index=0).plot(ax=ax1) +ax1.set_title('Particle 0 - Data_ch1') +plt.tight_layout() +plt.show() diff --git a/pysp2/io/__init__.py b/pysp2/io/__init__.py index e9f5d489..90edb188 100644 --- a/pysp2/io/__init__.py +++ b/pysp2/io/__init__.py @@ -8,8 +8,12 @@ :toctree: generated/ read_hk_file + read_sp2xr_hk_file + read_sp2xr_hk_psd get_hk_variable_names read_sp2 + read_sp2xr + read_sp2xr_pbp read_config write_dat write_dat_concs @@ -20,9 +24,10 @@ """ -from .read_hk import read_hk_file, get_hk_variable_names -from .read_hk import read_multi_hk_file -from .read_sp2b import read_sp2 +from .read_hk import read_hk_file, get_hk_variable_names, read_sp2xr_hk_file +from .read_hk import read_sp2xr_hk_psd, read_multi_hk_file +from .read_sp2b import read_sp2, read_sp2xr +from .read_pbp import read_sp2xr_pbp from .read_ini import read_config from .write_dat import write_dat, write_dat_concs, write_dat_concs_arm from .read_dat import read_dat, read_arm_dat, read_calibration diff --git a/pysp2/io/read_hk.py b/pysp2/io/read_hk.py index b2f26825..23285e04 100644 --- a/pysp2/io/read_hk.py +++ b/pysp2/io/read_hk.py @@ -6,11 +6,18 @@ import act import datetime import os +import re +import warnings import numpy as np import pandas as pd from glob import glob +# Regex for SP2-XR firmware-computed PSD bin columns. The number of bins per +# channel is not assumed; it is determined from however many columns match +# the pattern in the file at read time. +_SP2XR_BIN_COL_REGEX = re.compile(r'^(?PScatter|Incand) Bin (?P\d+)$') + def read_hk_file(file_name): """ This procedure will read in an SP2 housekeeping file and then @@ -48,6 +55,245 @@ def read_hk_file(file_name): return my_df +def read_sp2xr_hk_file(file_name): + """ + This procedure will read in an SP2-XR housekeeping file (.csv or .zip) + and store the timeseries data into an xarray Dataset. + + The SP2-XR housekeeping file uses comma-separated values (rather than + tab-separated like the original SP2). The 'Time Stamp (UTC sec)' column + is used to build the time coordinate, sharing the same 1904-01-01 epoch + as the original SP2 'Timestamp' column. + + The SP2-XR firmware-computed PSD bin columns (named like 'Scatter Bin N' + and 'Incand Bin N') are excluded from the output to keep the variable + set similar to read_hk_file(). Use read_sp2xr_hk_psd() to access them + as a 2D dataset. + + The retained 'Scattering Mass Conc' and 'Incand Mass Conc' columns are + firmware-computed using the on-board calibration curves. To preserve + provenance, the calibration CSV files alongside the HK file (matching + '*_Scatt_*.csv' and '*_Incan_*.csv') are auto-located and attached as + dataset attributes; a warning is emitted for any missing file. + + Parameters + ---------- + file_name: str + The file name to read in (.csv or .zip). + + Returns + ------- + hk_ds: xarray.Dataset + The housekeeping information as an xarray Dataset indexed by time, + with the firmware-computed PSD bin columns removed. + """ + + my_df = act.io.read_csv(file_name, sep=",") + + # Parse time from the UTC seconds column (same 1904 epoch as SP2) + start_time = pd.Timestamp('1904-01-01') + my_df = my_df.rename({'index': 'time'}) + my_df['time'] = np.array([ + start_time + datetime.timedelta(seconds=x) + for x in my_df['Time Stamp (UTC sec)'].values + ]) + my_df['time'].attrs['units'] = "datetime" + my_df['time'].attrs['long_name'] = "Time [SP2-XR time]" + + # Drop firmware-computed PSD bin columns + bin_vars = [v for v in my_df.data_vars + if _SP2XR_BIN_COL_REGEX.match(str(v))] + if bin_vars: + my_df = my_df.drop_vars(bin_vars) + + # Extract units from column names following the "Name (units)" convention + for vars in my_df.variables.keys(): + splits = vars.split("(") + try: + units = splits[1][:-1] + my_df[vars].attrs['units'] = units + my_df[vars].attrs['long_name'] = vars + my_df = my_df.rename({vars: splits[0][:-1]}) + except (IndexError, ValueError): + continue + + # Attach calibration curve provenance (used by the firmware to derive + # the retained Mass Conc columns). + _attach_sp2xr_calibration_attrs(my_df, file_name) + + return my_df + + +def _find_sp2xr_calibration_files(hk_path): + """ + Locate the SP2-XR scattering and incandescence calibration CSVs that + sit alongside an HK file. The convention is filenames containing + '_Scatt_' (scattering) and '_Incan_' (incandescence) in the same + directory as the HK file. + + Returns + ------- + (scatt_path, incan_path): tuple + Each element is the absolute path to the calibration CSV, or None + if no matching file is present. + """ + dir_path = os.path.dirname(os.path.abspath(hk_path)) + scatt = sorted(glob(os.path.join(dir_path, '*_Scatt_*.csv'))) + incan = sorted(glob(os.path.join(dir_path, '*_Incan_*.csv'))) + return (scatt[0] if scatt else None, + incan[0] if incan else None) + + +def _load_sp2xr_calibration_curve(csv_path): + """ + Load a SP2-XR calibration CSV (no header, two columns: physical value + and instrument signal). Empty trailing rows are dropped. + """ + df = pd.read_csv(csv_path, header=None).dropna() + return df.iloc[:, 0].to_numpy(), df.iloc[:, 1].to_numpy() + + +def _attach_sp2xr_calibration_attrs(ds, file_path): + """ + Auto-locate scattering and incandescence calibration CSVs alongside + file_path and attach them as dataset attributes. Emits a UserWarning + for each calibration file that cannot be found. + + Attached attributes (when available): + ScatCalibrationFile, ScatCalibration_Diameter_nm, + ScatCalibration_Signal, + IncanCalibrationFile, IncanCalibration_Mass_fg, + IncanCalibration_Signal. + """ + scatt_cal, incan_cal = _find_sp2xr_calibration_files(file_path) + + if scatt_cal is not None: + diam_nm, signal = _load_sp2xr_calibration_curve(scatt_cal) + ds.attrs['ScatCalibrationFile'] = scatt_cal + ds.attrs['ScatCalibration_Diameter_nm'] = diam_nm + ds.attrs['ScatCalibration_Signal'] = signal + else: + warnings.warn( + "No SP2-XR scattering calibration file ('*_Scatt_*.csv') found " + "next to %r; ScatCalibration_* attributes will not be attached." + % file_path, + UserWarning, + stacklevel=3, + ) + + if incan_cal is not None: + mass_fg, signal = _load_sp2xr_calibration_curve(incan_cal) + ds.attrs['IncanCalibrationFile'] = incan_cal + ds.attrs['IncanCalibration_Mass_fg'] = mass_fg + ds.attrs['IncanCalibration_Signal'] = signal + else: + warnings.warn( + "No SP2-XR incandescence calibration file ('*_Incan_*.csv') " + "found next to %r; IncanCalibration_* attributes will not be " + "attached." % file_path, + UserWarning, + stacklevel=3, + ) + + +def read_sp2xr_hk_psd(file_name): + """ + Read the SP2-XR firmware-computed PSD histograms from a HK file. + + The on-board firmware accumulates per-second particle counts into + size/mass bins ('Scatter Bin N' for scattering size, 'Incand Bin N' + for incandescence mass) using its own calibration. This procedure + extracts those columns into a 2D dataset shaped (time, num_bins), + matching the convention used by pysp2.util.process_psds() for + PySP2-computed distributions (ScatNumEnsemble, IncanNumEnsemble). + + Calibration curves for scattering diameter and incandescence mass are + auto-located alongside the HK file (files matching '*_Scatt_*.csv' and + '*_Incan_*.csv' respectively) and attached as dataset attributes when + present. A warning is issued for any missing calibration file. + + Parameters + ---------- + file_name: str + The HK file name to read in (.csv or .zip). + + Returns + ------- + psd_ds: xarray.Dataset + Dataset with dimensions (time, num_bins) and data variables + ScatNumEnsemble and IncanNumEnsemble (raw counts per bin per + time). Sample volume must be applied to convert to concentration. + """ + + my_df = act.io.read_csv(file_name, sep=",") + + start_time = pd.Timestamp('1904-01-01') + my_df = my_df.rename({'index': 'time'}) + my_df['time'] = np.array([ + start_time + datetime.timedelta(seconds=x) + for x in my_df['Time Stamp (UTC sec)'].values + ]) + my_df['time'].attrs['units'] = "datetime" + my_df['time'].attrs['long_name'] = "Time [SP2-XR time]" + + # Discover bin columns dynamically (do not assume a fixed bin count) + scat_cols, incan_cols = {}, {} + for var in list(my_df.data_vars): + m = _SP2XR_BIN_COL_REGEX.match(str(var)) + if m is None: + continue + idx = int(m.group('idx')) + if m.group('channel') == 'Scatter': + scat_cols[idx] = str(var) + else: + incan_cols[idx] = str(var) + + if not scat_cols and not incan_cols: + raise ValueError( + "No SP2-XR firmware PSD bin columns ('Scatter Bin N', " + "'Incand Bin N') found in %r." % file_name) + + def _stack(cols_by_idx): + ordered = [cols_by_idx[i] for i in sorted(cols_by_idx)] + return np.stack([my_df[c].values for c in ordered], axis=-1) + + note = ('Raw particle counts per bin per time. To get concentration in ' + 'cts/cm^3 divide by the sample volume per record ' + '(Sample Flow Controller Read (vccm) / 60).') + + psd_ds = xr.Dataset(coords={'time': my_df['time']}) + + if scat_cols: + psd_ds['ScatNumEnsemble'] = xr.DataArray( + _stack(scat_cols), dims=('time', 'num_bins'), + attrs={ + 'long_name': + 'Scattering number distribution (SP2-XR firmware, raw counts)', + 'standard_name': 'scattering_number_distribution', + 'units': 'count', + 'source': 'SP2-XR firmware on-board calculation', + 'note': note, + }) + + if incan_cols: + psd_ds['IncanNumEnsemble'] = xr.DataArray( + _stack(incan_cols), dims=('time', 'num_bins'), + attrs={ + 'long_name': + 'Incandescence number distribution (SP2-XR firmware, raw counts)', + 'standard_name': 'incandescence_number_distribution', + 'units': 'count', + 'source': 'SP2-XR firmware on-board calculation', + 'note': note, + }) + + psd_ds.attrs['source'] = 'SP2-XR firmware' + + _attach_sp2xr_calibration_attrs(psd_ds, file_name) + + return psd_ds + + def get_hk_variable_names(my_df): """ This procedure will return al ist of variables in the diff --git a/pysp2/io/read_pbp.py b/pysp2/io/read_pbp.py new file mode 100644 index 00000000..9660126a --- /dev/null +++ b/pysp2/io/read_pbp.py @@ -0,0 +1,163 @@ +""" +Python module for reading SP2-XR Particle-by-Particle (PbP) files. +""" + +import os +import re +import warnings + +import numpy as np +import pandas as pd + +from .read_hk import read_sp2xr_hk_file, _attach_sp2xr_calibration_attrs + + +# SP2-XR PbP column names mapped to PySP2-style equivalents. Variables on the +# right are intended to look like the output of pysp2.util.gaussian_fit() so +# that downstream calibration utilities can be used with SP2-XR data. +_PBP_RENAME_MAP = { + 'Scatter relPeak': 'PkHt_ch0', + 'Scatter Peak Time': 'PkPos_ch0', + 'Scatter FWHM': 'PkFWHM_ch0', + 'Scatter Transit Time': 'ScatTransitTime', + 'Incand relPeak': 'PkHt_ch1', + 'Incand Peak Time': 'PkPos_ch1', + 'Incand FWHM': 'PkFWHM_ch1', + 'Incand Transit Time': 'IncTransitTime', + 'Incand Delay': 'IncanDelay', + 'Particle Flags': 'ParticleFlags', + 'Particle Time Stamp': 'ParticleTimeStamp', + 'Packet Time Stamp': 'PacketTimeStamp', + 'Time (sec)': 'TimeWave', + 'Dropped Records': 'DroppedRecords', + 'Record Count': 'RecordCount', + 'Record Size': 'RecordSize', +} + + +def _find_matching_hk_file(pbp_path): + """ + Locate the SP2-XR HK file that corresponds to a PbP file. + + There is one HK file per acquisition session (always named ``_x0001``), + while a session may produce many PbP files (``_x0001``, ``_x0002``, ...). + Both .csv and .zip variants are checked. + + Returns None if no candidate exists on disk. + """ + base = re.sub(r'PbP', 'hk', pbp_path) + base = re.sub(r'(_x)\d{4}', r'\g<1>0001', base) + base = re.sub(r'\.(csv|zip)$', '', base) + + for ext in ('.csv', '.zip'): + candidate = base + ext + if os.path.exists(candidate): + return candidate + return None + + +def read_sp2xr_pbp(file_name, keep_firmware_calibration=False): + """ + Read a SP2-XR Particle-by-Particle (PbP) CSV or ZIP file into an + xarray Dataset. + + The SP2-XR firmware performs peak fitting on-board, so the PbP file + already contains per-particle peak heights, positions, FWHMs, transit + times, and pre-calibrated optical diameter / BC mass. This reader + maps the peak-fit outputs to PySP2-style names (PkHt_ch0, PkHt_ch1, + ...) so that the dataset can be fed into PySP2 calibration utilities + (e.g. pysp2.util.calc_diams_masses) with appropriate SP2-XR + coefficients. + + By default, the pre-calibrated 'Scatter Size (nm)' and 'Incand Mass + (fg)' columns are dropped to encourage recalibration with consistent + curves. Pass keep_firmware_calibration=True to retain them as + 'firmware_ScatterSize_nm' and 'firmware_IncandMass_fg'; in that case + the scattering / incandescence calibration CSVs ('*_Scatt_*.csv' and + '*_Incan_*.csv') are auto-located alongside the PbP file and attached + as dataset attributes for provenance. + + Absolute particle datetimes are derived from the matching SP2-XR HK + file, which is auto-located in the same directory. Each acquisition + session writes one HK file (always ``_x0001``) and one or more PbP + files; the HK provides the wall-clock reference that converts the + PbP instrument seconds to UTC. If no matching HK file is found, a + warning is emitted and the dataset is returned without an absolute + 'time' coordinate. + + Parameters + ---------- + file_name: str + Path to the PbP CSV or ZIP file. + keep_firmware_calibration: bool + If True, retain the SP2-XR firmware's 'Scatter Size (nm)' and + 'Incand Mass (fg)' as 'firmware_ScatterSize_nm' and + 'firmware_IncandMass_fg'. Default False. + + Returns + ------- + pbp_ds: xarray.Dataset + Per-particle data indexed by 'event_index'. Variable names follow + PySP2 conventions for peak fit outputs (PkHt_ch0, PkHt_ch1, + PkFWHM_ch0, ...), with SP2-XR-specific columns kept under + descriptive names (ScatTransitTime, IncTransitTime, IncanDelay, + ParticleFlags). + """ + + # pandas handles both .csv and .zip transparently + df = pd.read_csv(file_name) + + # Drop the 'Reserved' column (always empty) and, by default, the + # firmware-calibrated diameter / mass. + drop_cols = ['Reserved'] + if not keep_firmware_calibration: + drop_cols += ['Scatter Size (nm)', 'Incand Mass (fg)'] + df = df.drop(columns=[c for c in drop_cols if c in df.columns]) + + rename_map = dict(_PBP_RENAME_MAP) + if keep_firmware_calibration: + rename_map['Scatter Size (nm)'] = 'firmware_ScatterSize_nm' + rename_map['Incand Mass (fg)'] = 'firmware_IncandMass_fg' + # The packet-level 'Flag' shares its name with the per-particle flag + # column; rename it explicitly to avoid the collision. + if 'Flag' in df.columns: + rename_map['Flag'] = 'PacketFlag' + df = df.rename(columns=rename_map) + + # Use a monotonic event_index as the primary dimension, consistent + # with read_sp2()/read_sp2xr(). + df.index = pd.Index(np.arange(len(df), dtype='float32'), name='event_index') + ds = df.to_xarray() + ds['EventIndex'] = ds['event_index'] + + # Attach calibration curve provenance only when the firmware-calibrated + # size / mass columns are retained. + if keep_firmware_calibration: + _attach_sp2xr_calibration_attrs(ds, file_name) + + # Auto-locate the matching HK file to derive absolute datetimes. + # The filename itself cannot be trusted: it always encodes the session + # start time, so PbP files past _x0001 would be off by the duration + # of the preceding files. The HK file provides the only safe anchor. + hk_file_name = _find_matching_hk_file(file_name) + if hk_file_name is None: + warnings.warn( + "No matching SP2-XR HK file found next to %r; " + "returning relative instrument seconds only. " + "Place the HK file alongside the PbP file (with '_x0001') to " + "get absolute particle datetimes." % file_name, + UserWarning, + stacklevel=2, + ) + return ds + + hk_ds = read_sp2xr_hk_file(hk_file_name) + t0 = pd.Timestamp(hk_ds['time'].values[0]) + # After read_sp2xr_hk_file strips the unit suffix, the HK 'Time (sec)' + # column is exposed as 'Time'. + hk_first_sec = float(hk_ds['Time'].values[0]) + delta_sec = ds['ParticleTimeStamp'].values - hk_first_sec + time_values = t0 + pd.to_timedelta(delta_sec, unit='s') + ds = ds.assign_coords(time=('event_index', time_values)) + + return ds diff --git a/pysp2/io/read_sp2b.py b/pysp2/io/read_sp2b.py index 1b2b491e..3f1ba978 100644 --- a/pysp2/io/read_sp2b.py +++ b/pysp2/io/read_sp2b.py @@ -2,6 +2,7 @@ import struct import numpy as np import platform +import zipfile from datetime import datetime, timezone @@ -153,3 +154,163 @@ def read_sp2(file_name, debug=False, arm_convention=True): return my_ds else: return None + + +def read_sp2xr(file_name, debug=False): + """ + Loads a binary SP2-XR raw data file and returns all of the waveforms + into an xarray Dataset. + + The SP2-XR file format differs from the original SP2: + each record carries its own (rows, cols) header instead of a single + file-level header, the waveform samples are stored as 4-byte integers + (rather than 2-byte) and only two detector channels are written + (ch0 = scattering, ch1 = incandescence). The time encoding is the + same as the SP2 (TimeDiv10000 * 10000 + TimeRemainder, in seconds + since 1904-01-01). + + Parameters + ---------- + file_name: str + The name of the .sp2b file (or a .zip containing the .sp2b) to read. + debug: bool + Set to true for verbose output. + + Returns + ------- + dataset: xarray.Dataset + The xarray Dataset containing the raw SP2-XR waveforms for each + particle. Variable names mirror read_sp2() (Data_ch0, Data_ch1, + Flag, EventIndex, time, ...). + """ + + if file_name.lower().endswith(".zip"): + with zipfile.ZipFile(file_name, "r") as zf: + inner = zf.namelist()[0] + my_data = zf.read(inner) + else: + my_data = open(file_name, "rb").read() + + if len(my_data) == 0: + return None + + # Pre-scan: locate the start byte of every record and verify shape consistency + record_starts = [] + pos = 0 + numSamples = None + numChannels = None + + while pos + 8 <= len(my_data): + rows, cols = struct.unpack(">2I", my_data[pos:pos + 8]) + # rows == 0 marks an explicit end-of-data marker + if rows == 0: + break + + if numSamples is None: + numSamples, numChannels = rows, cols + elif (rows, cols) != (numSamples, numChannels): + raise ValueError( + "SP2-XR records with varying waveform dimensions are not " + "supported (record %d: (%d, %d) vs first (%d, %d))." % + (len(record_starts), rows, cols, numSamples, numChannels)) + + wave_bytes = rows * cols * 4 + # waveform + Flag(2) + 7 floats(28) + 2 doubles(16) + array_2_dim(4) + meta_end = pos + 8 + wave_bytes + 2 + 28 + 16 + 4 + if meta_end > len(my_data): + break + a2_dim = struct.unpack(">I", my_data[meta_end - 4:meta_end])[0] + rec_end = meta_end + a2_dim * 4 + if rec_end > len(my_data): + break + + record_starts.append(pos) + pos = rec_end + + numRecords = len(record_starts) + if numRecords == 0: + return None + + if debug: + print("Loaded file with numRecords = %d, numChannels = %d, " + "numSamples = %d" % (numRecords, numChannels, numSamples)) + + DataWave = np.zeros((numRecords, numChannels, numSamples), dtype='int32') + Flag = np.zeros(numRecords, dtype='int32') + TimeWave = np.zeros(numRecords, dtype='float32') + Res1 = np.zeros(numRecords, dtype='float32') + EventIndex = np.zeros(numRecords, dtype='float32') + TimeDiv10000 = np.zeros(numRecords, dtype='float32') + TimeRemainder = np.zeros(numRecords, dtype='float32') + Res5 = np.zeros(numRecords, dtype='float32') + Res6 = np.zeros(numRecords, dtype='float32') + Res7 = np.zeros(numRecords, dtype='float64') + Res8 = np.zeros(numRecords, dtype='float64') + + wave_bytes = numSamples * numChannels * 4 + wave_fmt = ">" + "i" * (numSamples * numChannels) + + for i, start in enumerate(record_starts): + # Skip the per-record (rows, cols) header + p = start + 8 + + # Waveform: interleaved (ch0[0], ch1[0], ch0[1], ch1[1], ...) + raw = np.array(struct.unpack(wave_fmt, my_data[p:p + wave_bytes]), + dtype='int32') + # Reshape (numSamples, numChannels) then transpose to (numChannels, numSamples) + DataWave[i] = raw.reshape(numSamples, numChannels).T + p += wave_bytes + + Flag[i] = struct.unpack(">H", my_data[p:p + 2])[0] + p += 2 + + floats = struct.unpack(">7f", my_data[p:p + 28]) + TimeWave[i] = floats[0] + Res1[i] = floats[1] + EventIndex[i] = floats[2] + TimeDiv10000[i] = floats[3] + TimeRemainder[i] = floats[4] + Res5[i] = floats[5] + Res6[i] = floats[6] + p += 28 + + doubles = struct.unpack(">2d", my_data[p:p + 16]) + Res7[i] = doubles[0] + Res8[i] = doubles[1] + + # Reconstruct UTC datetime exactly like read_sp2() + UTCtime = TimeDiv10000.astype('float64') * 10000 + TimeRemainder.astype('float64') + diff_epoch_1904 = ( + datetime(1970, 1, 1) - datetime(1904, 1, 1)).total_seconds() + UTCdatetime = np.array([ + datetime.fromtimestamp( + x - diff_epoch_1904, tz=timezone.utc).replace(tzinfo=None) + for x in UTCtime]) + + # Build xarray Dataset + Flag_da = xr.DataArray(Flag, dims={'event_index': EventIndex}) + Res1_da = xr.DataArray(Res1, dims={'event_index': EventIndex}) + Res5_da = xr.DataArray(Res5, dims={'event_index': EventIndex}) + Res6_da = xr.DataArray(Res6, dims={'event_index': EventIndex}) + Res7_da = xr.DataArray(Res7, dims={'event_index': EventIndex}) + Res8_da = xr.DataArray(Res8, dims={'event_index': EventIndex}) + Time = xr.DataArray(UTCdatetime, dims={'event_index': EventIndex}) + EventInd = xr.DataArray(EventIndex, dims={'event_index': EventIndex}) + DateTimeWaveUTC = xr.DataArray(UTCtime, dims={'event_index': EventIndex}) + TimeWave_da = xr.DataArray(TimeWave, dims={'event_index': EventIndex}) + + my_ds = xr.Dataset({ + 'time': Time, 'Flag': Flag_da, + 'Res1': Res1_da, 'Res5': Res5_da, 'Res6': Res6_da, + 'Res7': Res7_da, 'Res8': Res8_da, + 'EventIndex': EventInd, + 'DateTimeWaveUTC': DateTimeWaveUTC, + 'TimeWave': TimeWave_da}) + + columns = np.arange(numSamples) + for ch in range(numChannels): + my_ds['Data_ch' + str(ch)] = xr.DataArray( + DataWave[:, ch, :], + dims={'event_index': EventIndex, 'columns': columns}) + + return my_ds diff --git a/pysp2/testing/data/SP2XR/20190508172218 20181009_Incan_SN3_PSI.csv b/pysp2/testing/data/SP2XR/20190508172218 20181009_Incan_SN3_PSI.csv new file mode 100644 index 00000000..bb43f45a --- /dev/null +++ b/pysp2/testing/data/SP2XR/20190508172218 20181009_Incan_SN3_PSI.csv @@ -0,0 +1,21 @@ +0.05,5.07E+04 +5.20526,3.26E+07 +10.3605,6.88E+07 +15.5158,1.07E+08 +20.6711,1.47E+08 +25.8263,1.87E+08 +30.9816,2.29E+08 +36.1368,2.73E+08 +41.2921,3.18E+08 +46.4474,3.65E+08 +51.6026,4.14E+08 +56.7579,4.65E+08 +61.9132,5.16E+08 +67.0684,5.69E+08 +72.2237,6.22E+08 +77.3789,6.75E+08 +82.5342,7.28E+08 +87.6895,7.82E+08 +92.8447,8.36E+08 +98,8.89E+08 +,2.40E+09 \ No newline at end of file diff --git a/pysp2/testing/data/SP2XR/20190508172218 20181009_Scatt_SN3_PSI.csv b/pysp2/testing/data/SP2XR/20190508172218 20181009_Scatt_SN3_PSI.csv new file mode 100644 index 00000000..fa4d1121 --- /dev/null +++ b/pysp2/testing/data/SP2XR/20190508172218 20181009_Scatt_SN3_PSI.csv @@ -0,0 +1,21 @@ +100,3.61E+04 +121.053,1.10E+05 +142.105,2.81E+05 +163.158,6.30E+05 +184.211,1.28E+06 +205.263,2.42E+06 +226.316,4.30E+06 +247.368,7.25E+06 +268.421,1.17E+07 +289.474,1.83E+07 +310.526,2.76E+07 +331.579,4.07E+07 +352.632,5.84E+07 +373.684,8.22E+07 +394.737,1.14E+08 +415.789,1.54E+08 +436.842,2.06E+08 +457.895,2.72E+08 +478.947,3.55E+08 +500,4.57E+08 +,5.59E+08 \ No newline at end of file diff --git a/pysp2/testing/data/SP2XR/20190508172218 Calibration 20181005.ini b/pysp2/testing/data/SP2XR/20190508172218 Calibration 20181005.ini new file mode 100644 index 00000000..6441aefc --- /dev/null +++ b/pysp2/testing/data/SP2XR/20190508172218 Calibration 20181005.ini @@ -0,0 +1,266 @@ +[Custom] +Display Tab=TRUE +Display Names=<2> +Display Names 0=set 1 +Display Names 1=set 2 +Sets=<2> +Sets 0.Cluster.Graph 1=<8> +Sets 0.Cluster.Graph 1 0.Plot.Channel=+5V Laser Rail (V) +Sets 0.Cluster.Graph 1 0.Plot.Left/Right=FALSE +Sets 0.Cluster.Graph 1 1.Plot.Channel= +5V Rail (V) +Sets 0.Cluster.Graph 1 1.Plot.Left/Right=FALSE +Sets 0.Cluster.Graph 1 2.Plot.Channel=+12V Rail (V) +Sets 0.Cluster.Graph 1 2.Plot.Left/Right=FALSE +Sets 0.Cluster.Graph 1 3.Plot.Channel=3.3V Iso Rail (V) +Sets 0.Cluster.Graph 1 3.Plot.Left/Right=FALSE +Sets 0.Cluster.Graph 1 4.Plot.Channel=UPS Output (V) +Sets 0.Cluster.Graph 1 4.Plot.Left/Right=TRUE +Sets 0.Cluster.Graph 1 5.Plot.Channel=Inlet Air Temp (C) +Sets 0.Cluster.Graph 1 5.Plot.Left/Right=TRUE +Sets 0.Cluster.Graph 1 6.Plot.Channel=Crystal TEC Temp (C) +Sets 0.Cluster.Graph 1 6.Plot.Left/Right=TRUE +Sets 0.Cluster.Graph 1 7.Plot.Channel=Laser Heatsink Temp (C) +Sets 0.Cluster.Graph 1 7.Plot.Left/Right=TRUE +Sets 0.Cluster.Graph 2=<8> +Sets 0.Cluster.Graph 2 0.Plot.Channel=Laser TEC Temp (C) +Sets 0.Cluster.Graph 2 0.Plot.Left/Right=FALSE +Sets 0.Cluster.Graph 2 1.Plot.Channel=Crystal TEC Temp (C) +Sets 0.Cluster.Graph 2 1.Plot.Left/Right=FALSE +Sets 0.Cluster.Graph 2 2.Plot.Channel=Inlet Air Temp (C) +Sets 0.Cluster.Graph 2 2.Plot.Left/Right=FALSE +Sets 0.Cluster.Graph 2 3.Plot.Channel=Computer Heatsink Temp (C) +Sets 0.Cluster.Graph 2 3.Plot.Left/Right=FALSE +Sets 0.Cluster.Graph 2 4.Plot.Channel=Laser Heatsink Temp (C) +Sets 0.Cluster.Graph 2 4.Plot.Left/Right=FALSE +Sets 0.Cluster.Graph 2 5.Plot.Channel=Outlet Air Temp (C) +Sets 0.Cluster.Graph 2 5.Plot.Left/Right=FALSE +Sets 0.Cluster.Graph 2 6.Plot.Channel=Battery Temp (C) +Sets 0.Cluster.Graph 2 6.Plot.Left/Right=FALSE +Sets 0.Cluster.Graph 2 7.Plot.Channel=Laser TEC Sense +Sets 0.Cluster.Graph 2 7.Plot.Left/Right=TRUE +Sets 1.Cluster.Graph 1=<4> +Sets 1.Cluster.Graph 1 0.Plot.Channel=Threshold Crossing Events +Sets 1.Cluster.Graph 1 0.Plot.Left/Right=FALSE +Sets 1.Cluster.Graph 1 1.Plot.Channel=Dual Qualified Scatter and Incand Particles +Sets 1.Cluster.Graph 1 1.Plot.Left/Right=FALSE +Sets 1.Cluster.Graph 1 2.Plot.Channel=Qualified Scatter Only Particles +Sets 1.Cluster.Graph 1 2.Plot.Left/Right=FALSE +Sets 1.Cluster.Graph 1 3.Plot.Channel=Qualified Incand Only Particles +Sets 1.Cluster.Graph 1 3.Plot.Left/Right=FALSE +Sets 1.Cluster.Graph 2=<8> +Sets 1.Cluster.Graph 2 0.Plot.Channel=Baseline Sizer Lo +Sets 1.Cluster.Graph 2 0.Plot.Left/Right=FALSE +Sets 1.Cluster.Graph 2 1.Plot.Channel=Baseline Sizer Hi +Sets 1.Cluster.Graph 2 1.Plot.Left/Right=FALSE +Sets 1.Cluster.Graph 2 2.Plot.Channel=Baseline Incand Lo +Sets 1.Cluster.Graph 2 2.Plot.Left/Right=FALSE +Sets 1.Cluster.Graph 2 3.Plot.Channel=Baseline Incand Hi +Sets 1.Cluster.Graph 2 3.Plot.Left/Right=FALSE +Sets 1.Cluster.Graph 2 4.Plot.Channel=Bandwidth Sizer Hi +Sets 1.Cluster.Graph 2 4.Plot.Left/Right=TRUE +Sets 1.Cluster.Graph 2 5.Plot.Channel=Bandwidth Sizer Lo +Sets 1.Cluster.Graph 2 5.Plot.Left/Right=TRUE +Sets 1.Cluster.Graph 2 6.Plot.Channel=Bandwidth Incand Lo +Sets 1.Cluster.Graph 2 6.Plot.Left/Right=TRUE +Sets 1.Cluster.Graph 2 7.Plot.Channel=Bandwidth Incand Hi +Sets 1.Cluster.Graph 2 7.Plot.Left/Right=TRUE +[Raw Options] +byte 0: data mux=High Dynamic Range Traces +Raw Data Particle Selection=First Scatter +Scatter relPeak=0 +Incand relPeak=0 +Inter-raw Period (ms)=100 +leader sample count=400 +footer sample count=400 +[Scatter Parameters] +Graph=Counts +X Mode=Size +Norm?=FALSE +Cumulative=FALSE +[Versions] +Instrument Name=SP2XR +SP2 Version=2.01.01.19 +Acq Version=2.00.00.00 +Last Date Updated=4/11/2019 6:24:36 AM +[Trigger Settings] +Scatter Transit Time Min=10 +Scatter Transit Time Max=65535 +Scatter FWHM Min=30 +Scatter FWHM Max=65535 +Scatter Inter Particle Time Min=10 +Incand Transit Time Min=5 +Incand Transit Time Max=65535 +Incand FWHM Min=30 +Incand FWHM Max=65535 +Incand Inter Particle Time Min=10 +Paired Particle Delay Max=10 +Scatter Threshold Min=36100 +Scatter Hysteresis Min=2703 +Incand Threshold Min=50700 +Incand Hysteresis Min=5394 +Scatter Threshold Max=559000000 +Scatter Hysteresis Max=0 +Incand Threshold Max=2147483647 +Incand Hysteresis Max=0 +Forced Trigger=FALSE +Forced Trigger Interval(ms)=1000 +[# Samples S] +# Samples S=0 +[Program] +Data File Path=D:\DMT\SP2XR Data +Restart Files=FALSE +Graph 0 Left=YAG Output Monitor (V) +Graph 0 Right=Laser Driver Current Monitor (A) +Graph 1 Left=Scattering Particle Conc (cts/ccm) +Graph 1 Right=Incand Particle Conc (cts/ccm) +Control Cycle Time=0 +NTP Server= +Write File?=FALSE +Graph Backgrounds=16448250 +Graph 2 Left=Sheath Flow Controller Read (sccm) +Graph 2 Right=Sample Flow Controller Read (sccm) +Description= +Serial Number=0001 +2 or 3 Graphs=TRUE +Time Range=12 Hours +OSDS Format= +Num to Avg=0 +Global 2=0 +Global 3=0 +Global 4=0 +Global 5=0 +Shut Down Sequence= +Crisis Shut Down Seq=turn off pump and laser +Write SP2b Data File=TRUE +Write HK File=TRUE +Write Raw Binary Data=TRUE +TabChannelNum=0 +OptimizeChannelNum=0 +Write HDF5 File=TRUE +NumParticlesPerHDF5File=100000 +Laser Temp Set=29 +Laser Current Set=1.9 +Spare 4 Set=0 +Spare 5 Set=0 +PMT HV Set=0.46 +Interface Board Scaling=<24> +Interface Board Scaling 0=1/(0.000849+0.000261*ln(10000/(65536/VAR-1))+0.000000125*ln(10000/(65536/VAR-1))^3)-273.15 +Interface Board Scaling 1= +Interface Board Scaling 2=(1.0 / (1.1135E-3 + 2.368E-4 * (ln(1E4 * (1-(VAR / (65536.0 * 1.001))) / (VAR / (65536.0 * 1.001)))) + 7.396E-8 * (ln(1E4 * (1-(VAR / (65536.0 * 1.001))) / (VAR / (65536.0 * 1.001))))^3)) - 273.15 +Interface Board Scaling 3=(1.0 / (1.1135E-3 + 2.368E-4 * (ln(1E4 * (1-(VAR / (65536.0 * 1.001))) / (VAR / (65536.0 * 1.001)))) + 7.396E-8 * (ln(1E4 * (1-(VAR / (65536.0 * 1.001))) / (VAR / (65536.0 * 1.001))))^3)) - 273.15 +Interface Board Scaling 4=(1.0 / (1.1135E-3 + 2.368E-4 * (ln(1E4 * (1-(VAR / (65536.0 * 1.001))) / (VAR / (65536.0 * 1.001)))) + 7.396E-8 * (ln(1E4 * (1-(VAR / (65536.0 * 1.001))) / (VAR / (65536.0 * 1.001))))^3)) - 273.15 +Interface Board Scaling 5=(1.0 / (1.1135E-3 + 2.368E-4 * (ln(1E4 * (1-(VAR / (65536.0 * 1.001))) / (VAR / (65536.0 * 1.001)))) + 7.396E-8 * (ln(1E4 * (1-(VAR / (65536.0 * 1.001))) / (VAR / (65536.0 * 1.001))))^3)) - 273.15 +Interface Board Scaling 6=0.0000625*VAR +Interface Board Scaling 7=VAR/72+105.55 +Interface Board Scaling 8=0.125*VAR +Interface Board Scaling 9=0.000125*VAR +Interface Board Scaling 10=0.000125*VAR +Interface Board Scaling 11=2.01*0.0000625 *VAR +Interface Board Scaling 12= +Interface Board Scaling 13=0.000125*VAR +Interface Board Scaling 14=0.000125*VAR +Interface Board Scaling 15=4.57*0.0000625*VAR +Interface Board Scaling 16=0.0152587890625*VAR +Interface Board Scaling 17=1/(0.000894+0.00025*ln((1662.22* VAR)/(39897.3 - VAR))+0.0000002*ln((1662.22*VAR)/(39897.3 -VAR))^3)-273.15 +Interface Board Scaling 18=(69.8+11.5) /11.5*0.0000625*VAR +Interface Board Scaling 19=4.57*0.0000625*VAR +Interface Board Scaling 20=0.000125*VAR +Interface Board Scaling 21=1.1*0.0000625 *VAR +Interface Board Scaling 22= +Interface Board Scaling 23= +ABD 0408 Scaling=<16> +ABD 0408 Scaling 0= +ABD 0408 Scaling 1= +ABD 0408 Scaling 2= +ABD 0408 Scaling 3= +ABD 0408 Scaling 4= +ABD 0408 Scaling 5=VAR*0.00625 +ABD 0408 Scaling 6=7.98*(6.25E-5*VAR) +ABD 0408 Scaling 7=-84.962*(6.25E-5*VAR-1.8639) +ABD 0408 Scaling 8= +ABD 0408 Scaling 9= +ABD 0408 Scaling 10= +ABD 0408 Scaling 11= +ABD 0408 Scaling 12= +ABD 0408 Scaling 13= +ABD 0408 Scaling 14= +ABD 0408 Scaling 15= +Save Every Nth Particle=1 +zip files=TRUE +Particle Density (g/cc)=1.8 +Pump Start-Up State=FALSE +[Detector DAC Settings] +Scatter unused A=23790 +Scatter unused B=65535 +[Incand Parameters] +Graph=Counts +X Mode=Mass +Norm?=FALSE +Cumulative=FALSE +[Control] +Alarms=<3> +Alarms 0.Alarm.Name=TurnPumpON +Alarms 0.Alarm.Channel=Elapsed Time +Alarms 0.Alarm.Condition=> +Alarms 0.Alarm.Threshold=0 +Alarms 0.Alarm.Action=Turn Pump On +Alarms 0.Alarm.Hysteresis=0 +Alarms 0.Alarm.Target Channel= +Alarms 0.Alarm.Set Value=0 +Alarms 0.Alarm.Min Time=0 +Alarms 0.Alarm.Sequence=turn off pump and laser +Alarms 0.Alarm.Target Alarm=TurnPumpON +Alarms 1.Alarm.Name=Turn Laser On +Alarms 1.Alarm.Channel=Elapsed Time +Alarms 1.Alarm.Condition=> +Alarms 1.Alarm.Threshold=5 +Alarms 1.Alarm.Action=Turn Laser On +Alarms 1.Alarm.Hysteresis=0 +Alarms 1.Alarm.Target Channel= +Alarms 1.Alarm.Set Value=0 +Alarms 1.Alarm.Min Time=0 +Alarms 1.Alarm.Sequence= +Alarms 1.Alarm.Target Alarm=Turn Laser On +Alarms 2.Alarm.Name=StartRecording +Alarms 2.Alarm.Channel=Elapsed Time +Alarms 2.Alarm.Condition=> +Alarms 2.Alarm.Threshold=10 +Alarms 2.Alarm.Action=Start Writing Data +Alarms 2.Alarm.Hysteresis=0 +Alarms 2.Alarm.Target Channel= +Alarms 2.Alarm.Set Value=0 +Alarms 2.Alarm.Min Time=0 +Alarms 2.Alarm.Sequence= +Alarms 2.Alarm.Target Alarm=Turn Laser On +Sequences=<0> +Timers=<0> +[Pump] +Pump=TRUE +[# Samples I] +# Samples I=0 +[SampleFlow] +SampleFlow (sccm)=30 +[SheathFlow] +SheathFlow (sccm)=600 +[Polling Interval] +HK Stream Interval (ms)=1000 +PbP Stream Interval (ms)=1000 +[Fans Settings] +Case Fan Mode=normal +Case Fan On Threshold=35 +Case Fan Off Threshold=33 +Laser Fan Mode=forced off +Laser Fan On Threshold=27 +Laser Fan Off Threshold=24 +[Channel Order] +Channel Order=<0> +Digits=<0> +[Streaming Data] +Port=0 +Baud Rate=0 +Channels=<0> +Bus=Serial Port +[Calculated Channels] +Calculated Channels=<0> +[Calculations] +Calculations=<0> diff --git a/pysp2/testing/data/SP2XR/SP2XR_PbP_20190508172218_x0001.zip b/pysp2/testing/data/SP2XR/SP2XR_PbP_20190508172218_x0001.zip new file mode 100644 index 00000000..490f5458 Binary files /dev/null and b/pysp2/testing/data/SP2XR/SP2XR_PbP_20190508172218_x0001.zip differ diff --git a/pysp2/testing/data/SP2XR/SP2XR_PbP_20190508172218_x0002.zip b/pysp2/testing/data/SP2XR/SP2XR_PbP_20190508172218_x0002.zip new file mode 100644 index 00000000..f79aaa5a Binary files /dev/null and b/pysp2/testing/data/SP2XR/SP2XR_PbP_20190508172218_x0002.zip differ diff --git a/pysp2/testing/data/SP2XR/SP2XR_hk_20190508172218_x0001.zip b/pysp2/testing/data/SP2XR/SP2XR_hk_20190508172218_x0001.zip new file mode 100644 index 00000000..c7f75925 Binary files /dev/null and b/pysp2/testing/data/SP2XR/SP2XR_hk_20190508172218_x0001.zip differ diff --git a/pysp2/testing/data/SP2XR/SP2XR_sp2b_20190508172218_x0001.zip b/pysp2/testing/data/SP2XR/SP2XR_sp2b_20190508172218_x0001.zip new file mode 100644 index 00000000..68f30b79 Binary files /dev/null and b/pysp2/testing/data/SP2XR/SP2XR_sp2b_20190508172218_x0001.zip differ diff --git a/pysp2/testing/sample_files.py b/pysp2/testing/sample_files.py index 13b8e421..3af4a0af 100644 --- a/pysp2/testing/sample_files.py +++ b/pysp2/testing/sample_files.py @@ -26,4 +26,14 @@ def _sample_file(local_name: str) -> str: print("DATASETS:", DATASETS.registry_files) EXAMPLE_SP2B_PSL = _sample_file("20230721x002.sp2b") EXAMPLE_INI_PSL = _sample_file("20230721121710.ini") -EXAMPLE_HK_PSL = _sample_file("20230721121711.hk") \ No newline at end of file +EXAMPLE_HK_PSL = _sample_file("20230721121711.hk") + +SP2XR_DATA_PATH = os.path.join(DATA_PATH, 'SP2XR') +EXAMPLE_SP2XR_SP2B = os.path.join( + SP2XR_DATA_PATH, 'SP2XR_sp2b_20190508172218_x0001.zip') +EXAMPLE_SP2XR_HK = os.path.join( + SP2XR_DATA_PATH, 'SP2XR_hk_20190508172218_x0001.zip') +EXAMPLE_SP2XR_INI = os.path.join( + SP2XR_DATA_PATH, '20190508172218 Calibration 20181005.ini') +EXAMPLE_SP2XR_PBP = os.path.join( + SP2XR_DATA_PATH, 'SP2XR_PbP_20190508172218_x0001.zip') \ No newline at end of file diff --git a/tests/test_read_sp2xr.py b/tests/test_read_sp2xr.py new file mode 100644 index 00000000..cea38e59 --- /dev/null +++ b/tests/test_read_sp2xr.py @@ -0,0 +1,53 @@ +import numpy as np + +import pysp2 + + +def test_read_sp2xr_hk_file(): + hk = pysp2.io.read_sp2xr_hk_file(pysp2.testing.EXAMPLE_SP2XR_HK) + assert hk.sizes['time'] == 20650 + assert 'Laser TEC Temp' in hk.data_vars + np.testing.assert_almost_equal( + float(hk['Cavity Pressure'].mean()), 288.88, decimal=2) + # Firmware-computed PSD bin columns should be excluded; use + # read_sp2xr_hk_psd() to access them. + assert not any(str(v).startswith(('Scatter Bin', 'Incand Bin')) + for v in hk.data_vars) + + +def test_read_sp2xr_hk_psd(): + psd = pysp2.io.read_sp2xr_hk_psd(pysp2.testing.EXAMPLE_SP2XR_HK) + assert psd.sizes['time'] == 20650 + # The number of bins is read dynamically from the file; just verify + # that some bins are present and that both channels agree. + assert psd.sizes['num_bins'] > 0 + assert 'ScatNumEnsemble' in psd.data_vars + assert 'IncanNumEnsemble' in psd.data_vars + assert psd['ScatNumEnsemble'].shape == psd['IncanNumEnsemble'].shape + # Calibration sample CSVs are committed alongside the HK file and + # should be auto-detected and attached as dataset attributes. + assert 'ScatCalibration_Diameter_nm' in psd.attrs + assert 'IncanCalibration_Mass_fg' in psd.attrs + + +def test_read_sp2xr(): + ds = pysp2.io.read_sp2xr(pysp2.testing.EXAMPLE_SP2XR_SP2B) + assert ds.sizes['event_index'] == 457 + assert ds.sizes['columns'] == 4096 + assert 'Data_ch0' in ds.data_vars + assert 'Data_ch1' in ds.data_vars + assert ds['Data_ch0'].isel(event_index=0).max().item() > 6_000_000 + + +def test_read_sp2xr_pbp(): + ds = pysp2.io.read_sp2xr_pbp(pysp2.testing.EXAMPLE_SP2XR_PBP) + assert ds.sizes['event_index'] == 341715 + # The matching HK file lives alongside the PbP file, so an absolute + # 'time' coordinate should be derived automatically. + assert 'time' in ds.coords + # Peak-fit outputs are exposed under PySP2-style names. + assert 'PkHt_ch0' in ds.data_vars + assert 'PkHt_ch1' in ds.data_vars + # Firmware-calibrated columns are dropped by default. + assert 'Scatter Size (nm)' not in ds.data_vars + assert 'Incand Mass (fg)' not in ds.data_vars