Skip to content

Commit ff79a12

Browse files
authored
Merge pull request #177 from florisvb/robustify-tvr
Robustifying TVR
2 parents 21b0b5f + 4ae5d6c commit ff79a12

6 files changed

Lines changed: 65 additions & 57 deletions

File tree

pynumdiff/kalman_smooth/_kalman_smooth.py

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
11
import numpy as np
22
from warnings import warn
33
from scipy.linalg import expm, sqrtm
4-
from scipy.stats import norm
54
from time import time
6-
75
try: import cvxpy
86
except ImportError: pass
97

8+
from pynumdiff.utils.utility import huber_const
9+
1010

1111
def kalman_filter(y, dt_or_t, xhat0, P0, A, Q, C, R, B=None, u=None, save_P=True):
1212
"""Run the forward pass of a Kalman filter, with regular or irregular step size.
@@ -278,7 +278,7 @@ def robustdiff(x, dt, order, log_q, log_r, proc_huberM=6, meas_huberM=0):
278278
proper :math:`\\ell_1` normalization.
279279
280280
Note that :code:`log_q` and :code:`proc_huberM` are coupled, as are :code:`log_r` and :code:`meas_huberM`, via the relation
281-
:math:`\\text{Huber}(q^{-1/2}v, M) = q^{-1}\\text{Huber}(v, Mq^{-1/2})`, but these are still independent enough that for
281+
:math:`\\text{Huber}(q^{-1/2}v, M) = q^{-1}\\text{Huber}(v, Mq^{1/2})`, but these are still independent enough that for
282282
the purposes of optimization we cannot collapse them. Nor can :code:`log_q` and :code:`log_r` be combined into
283283
:code:`log_qr_ratio` as in RTS smoothing without the addition of a new absolute scale parameter, becausee :math:`q` and
284284
:math:`r` interact with the distinct Huber :math:`M` parameters.
@@ -329,11 +329,6 @@ def convex_smooth(y, A, Q, C, R, B=None, u=None, proc_huberM=6, meas_huberM=0):
329329
x_states = cvxpy.Variable((A.shape[0], N)) # each column is [position, velocity, acceleration, ...] at step n
330330
control = isinstance(B, np.ndarray) and isinstance(u, np.ndarray) # whether there is a control input
331331

332-
def huber_const(M): # from https://jmlr.org/papers/volume14/aravkin13a/aravkin13a.pdf, with correction for missing sqrt
333-
a = 2*np.exp(-M**2 / 2)/M # huber_const smoothly transitions Huber between 1-norm and 2-norm squared cases
334-
b = np.sqrt(2*np.pi)*(2*norm.cdf(M) - 1)
335-
return np.sqrt((2*a*(1 + M**2)/M**2 + b)/(a + b))
336-
337332
# It is extremely important to run time that CVXPY expressions be in vectorized form
338333
proc_resids = np.linalg.inv(sqrtm(Q)) @ (x_states[:,1:] - A @ x_states[:,:-1] - (0 if not control else B @ u[1:].T)) # all Q^(-1/2)(x_n - (A x_{n-1} + B u_n))
339334
meas_resids = np.linalg.inv(sqrtm(R)) @ (y.reshape(C.shape[0],-1) - C @ x_states) # all R^(-1/2)(y_n - C x_n)

pynumdiff/optimize/_optimize.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -65,8 +65,10 @@
6565
{'sigma': (1e-2, 1e3),
6666
'lmbd': (1e-3, 0.5)}),
6767
tvrdiff: ({'gamma': [1e-2, 1e-1, 1, 10, 100, 1000],
68-
'order': {1, 2, 3}}, # warning: order 1 hacks the loss function when tvgamma is used, tends to win but is usually suboptimal choice in terms of true RMSE
69-
{'gamma': (1e-4, 1e7)}),
68+
'order': {1, 2, 3}, # warning: order 1 hacks the loss function when tvgamma is used, tends to win but is usually suboptimal choice in terms of true RMSE
69+
'huberM': [0., 1, 2, 6]}, # comb lower values more finely, because the scale of sigma is mad(x), bigger than mad(y-x) residuals
70+
{'gamma': (1e-4, 1e7),
71+
'huberM': (0, 6)}),
7072
velocity: ({'gamma': [1e-2, 1e-1, 1, 10, 100, 1000]}, # Deprecated method
7173
{'gamma': (1e-4, 1e7)}),
7274
iterative_velocity: ({'scale': 'small', # Rare to optimize this one, because it's longer-running than convex version

pynumdiff/tests/test_diff_methods.py

Lines changed: 20 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ def spline_irreg_step(*args, **kwargs): return splinediff(*args, **kwargs)
5757
(jerk, {'gamma':10}), (jerk, [10]),
5858
(iterative_velocity, {'num_iterations':5, 'gamma':0.05}), (iterative_velocity, [5, 0.05]),
5959
(smooth_acceleration, {'gamma':2, 'window_size':5}), (smooth_acceleration, [2, 5]),
60-
(lineardiff, {'order':3, 'gamma':5, 'window_size':11, 'solver':'CLARABEL'}), (lineardiff, [3, 5, 11], {'solver':'CLARABEL'})
60+
(lineardiff, {'order':3, 'gamma':0.01, 'window_size':11, 'solver':'CLARABEL'}), (lineardiff, [3, 0.01, 11], {'solver':'CLARABEL'})
6161
]
6262

6363
# All the testing methodology follows the exact same pattern; the only thing that changes is the
@@ -108,8 +108,8 @@ def spline_irreg_step(*args, **kwargs): return splinediff(*args, **kwargs)
108108
[(-25, -25), (0, -1), (0, 0), (1, 1)],
109109
[(-25, -25), (1, 1), (0, 0), (1, 1)],
110110
[(-25, -25), (3, 3), (0, 0), (3, 3)]],
111-
iterated_second_order: [[(-7, -8), (-25, -25), (0, -1), (0, 0)],
112-
[(-7, -8), (-14, -14), (0, -1), (0, 0)],
111+
iterated_second_order: [[(-25, -25), (-25, -25), (0, -1), (0, 0)],
112+
[(-14, -14), (-14, -14), (0, -1), (0, 0)],
113113
[(-1, -1), (0, 0), (0, -1), (0, 0)],
114114
[(0, 0), (1, 0), (0, 0), (1, 0)],
115115
[(1, 1), (2, 2), (1, 1), (2, 2)],
@@ -120,8 +120,8 @@ def spline_irreg_step(*args, **kwargs): return splinediff(*args, **kwargs)
120120
[(-25, -25), (-2, -2), (0, 0), (1, 1)],
121121
[(-25, -25), (1, 0), (0, 0), (1, 1)],
122122
[(-25, -25), (2, 2), (0, 0), (2, 2)]],
123-
iterated_fourth_order: [[(-7, -8), (-25, -25), (0, -1), (0, 0)],
124-
[(-7, -8), (-13, -13), (0, -1), (0, 0)],
123+
iterated_fourth_order: [[(-25, -25), (-25, -25), (0, -1), (0, 0)],
124+
[(-14, -14), (-13, -13), (0, -1), (0, 0)],
125125
[(-1, -1), (0, 0), (-1, -1), (0, 0)],
126126
[(0, -1), (1, 1), (0, 0), (1, 1)],
127127
[(1, 1), (2, 2), (1, 1), (2, 2)],
@@ -132,8 +132,8 @@ def spline_irreg_step(*args, **kwargs): return splinediff(*args, **kwargs)
132132
[(-2, -2), (0, 0), (0, -1), (1, 1)],
133133
[(0, 0), (1, 1), (0, -1), (1, 1)],
134134
[(0, 0), (3, 3), (0, 0), (3, 3)]],
135-
savgoldiff: [[(-7, -7), (-13, -14), (0, -1), (0, 0)],
136-
[(-7, -7), (-13, -13), (0, -1), (0, 0)],
135+
savgoldiff: [[(-13, -14), (-13, -14), (0, -1), (0, 0)],
136+
[(-13, -13), (-13, -13), (0, -1), (0, 0)],
137137
[(-2, -2), (-1, -1), (0, -1), (0, 0)],
138138
[(0, -1), (0, 0), (0, 0), (1, 0)],
139139
[(1, 1), (2, 2), (1, 1), (2, 2)],
@@ -164,16 +164,16 @@ def spline_irreg_step(*args, **kwargs): return splinediff(*args, **kwargs)
164164
[(1, 1), (3, 3), (1, 1), (3, 3)]],
165165
velocity: [[(-25, -25), (-18, -19), (0, -1), (1, 0)],
166166
[(-12, -12), (-11, -12), (-1, -1), (-1, -2)],
167-
[(0, 0), (1, 0), (0, 0), (1, 0)],
167+
[(0, -1), (1, 0), (0, -1), (1, 0)],
168168
[(0, -1), (1, 1), (0, 0), (1, 0)],
169-
[(1, 1), (2, 2), (1, 1), (2, 2)],
170-
[(1, 0), (3, 3), (1, 0), (3, 3)]],
171-
acceleration: [[(-25, -25), (-18, -18), (0, -1), (0, 0)],
169+
[(1, 0), (2, 2), (1, 0), (2, 2)],
170+
[(0, 0), (3, 3), (0, 0), (3, 3)]],
171+
acceleration: [[(-25, -25), (-18, -18), (0, -1), (1, 0)],
172172
[(-10, -10), (-9, -9), (-1, -1), (0, -1)],
173173
[(-10, -10), (-9, -10), (-1, -1), (0, -1)],
174174
[(0, -1), (1, 0), (0, -1), (1, 0)],
175-
[(1, 1), (2, 2), (1, 1), (2, 2)],
176-
[(1, 1), (3, 3), (1, 1), (3, 3)]],
175+
[(1, 0), (2, 2), (1, 0), (2, 2)],
176+
[(0, 0), (3, 3), (0, 0), (3, 3)]],
177177
jerk: [[(-25, -25), (-18, -18), (-1, -1), (0, 0)],
178178
[(-9, -10), (-9, -9), (-1, -1), (0, 0)],
179179
[(-10, -10), (-9, -10), (-1, -1), (0, 0)],
@@ -186,8 +186,8 @@ def spline_irreg_step(*args, **kwargs): return splinediff(*args, **kwargs)
186186
[(1, 0), (1, 1), (1, 0), (1, 1)],
187187
[(2, 1), (2, 2), (2, 1), (2, 2)],
188188
[(1, 1), (3, 3), (1, 1), (3, 3)]],
189-
smooth_acceleration: [[(-7, -8), (-18, -18), (0, -1), (0, 0)],
190-
[(-7, -7), (-10, -10), (-1, -1), (-1, -1)],
189+
smooth_acceleration: [[(-25, -25), (-21, -21), (0, -1), (0, 0)],
190+
[(-10, -11), (-10, -10), (-1, -1), (-1, -1)],
191191
[(-2, -2), (-1, -1), (-1, -1), (0, -1)],
192192
[(0, 0), (1, 0), (0, -1), (1, 0)],
193193
[(1, 1), (2, 2), (1, 1), (2, 2)],
@@ -222,12 +222,12 @@ def spline_irreg_step(*args, **kwargs): return splinediff(*args, **kwargs)
222222
[(-7, -7), (-2, -2), (0, -1), (1, 1)],
223223
[(0, 0), (2, 2), (0, 0), (2, 2)],
224224
[(1, 1), (3, 3), (1, 1), (3, 3)]],
225-
lineardiff: [[(-7, -8), (-14, -14), (0, -1), (0, 0)],
225+
lineardiff: [[(-3, -4), (-3, -3), (0, -1), (1, 0)],
226+
[(-1, -2), (0, 0), (0, -1), (1, 0)],
227+
[(-1, -1), (0, 0), (0, -1), (1, 1)],
228+
[(-1, -2), (0, 0), (0, -1), (1, 1)],
226229
[(0, 0), (2, 1), (0, 0), (2, 1)],
227-
[(1, 0), (2, 2), (1, 0), (2, 2)],
228-
[(1, 0), (2, 1), (1, 0), (2, 1)],
229-
[(1, 1), (2, 2), (1, 1), (2, 2)],
230-
[(1, 1), (3, 3), (1, 1), (3, 3)]]
230+
[(0, -1), (3, 3), (0, 0), (3, 3)]]
231231
}
232232

233233
# Essentially run the cartesian product of [diff methods] x [test functions] through this one test

pynumdiff/total_variation_regularization/_total_variation_regularization.py

Lines changed: 17 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import numpy as np
22
from warnings import warn
3+
from scipy.stats import median_abs_deviation
34

45
from pynumdiff.total_variation_regularization import _chartrand_tvregdiff
56
from pynumdiff.utils import utility
@@ -53,25 +54,28 @@ def iterative_velocity(x, dt, params=None, options=None, num_iterations=None, ga
5354
return x_hat, dxdt_hat
5455

5556

56-
def tvrdiff(x, dt, order, gamma, solver=None):
57+
def tvrdiff(x, dt, order, gamma, huberM=float('inf'), solver=None):
5758
"""Generalized total variation regularized derivatives. Use convex optimization (cvxpy) to solve for a
5859
total variation regularized derivative. Other convex-solver-based methods in this module call this function.
5960
6061
:param np.array[float] x: data to differentiate
6162
:param float dt: step size
6263
:param int order: 1, 2, or 3, the derivative to regularize
6364
:param float gamma: regularization parameter
65+
:param float huberM: Huber loss parameter, in units of scaled median absolute deviation of input data, :code:`x`.
66+
:math:`M = \\infty` reduces to :math:`\\ell_2` loss squared on first, fidelity cost term, and
67+
:math:`M = 0` reduces to :math:`\\ell_1` loss.
6468
:param str solver: Solver to use. Solver options include: 'MOSEK', 'CVXOPT', 'CLARABEL', 'ECOS'.
65-
In testing, 'MOSEK' was the most robust. If not given, fall back to CVXPY's default.
69+
If not given, fall back to CVXPY's default.
6670
6771
:return: - **x_hat** (np.array) -- estimated (smoothed) x
6872
- **dxdt_hat** (np.array) -- estimated derivative of x
6973
"""
7074
# Normalize for numerical consistency with convex solver
71-
mean = np.mean(x)
72-
std = np.std(x)
73-
if std == 0: std = 1 # safety guard
74-
x = (x-mean)/std
75+
mu = np.mean(x)
76+
sigma = median_abs_deviation(x, scale='normal') # robust alternative to std()
77+
if sigma == 0: sigma = 1 # safety guard
78+
x = (x-mu)/sigma
7579

7680
# Define the variables for the highest order derivative and the integration constants
7781
deriv_values = cvxpy.Variable(len(x)) # values of the order^th derivative, in which we're penalizing variation
@@ -84,10 +88,13 @@ def tvrdiff(x, dt, order, gamma, solver=None):
8488
for i in range(order):
8589
y = cvxpy.cumsum(y) + integration_constants[i]
8690

91+
# Compare the recursively integrated position to the noisy position. \ell_2 doesn't get scaled by 1/2 here,
92+
# so cvxpy's doubled Huber is already the right scale, and \ell_1 should be scaled by 2\sqrt{2} to match.
93+
fidelity_cost = cvxpy.sum_squares(y - x) if huberM == float('inf') \
94+
else np.sqrt(8)*cvxpy.norm(y - x, 1) if huberM == 0 \
95+
else utility.huber_const(huberM)*cvxpy.sum(cvxpy.huber(y - x, huberM*sigma))
8796
# Set up and solve the optimization problem
88-
prob = cvxpy.Problem(cvxpy.Minimize(
89-
# Compare the recursively integrated position to the noisy position, and add TVR penalty
90-
cvxpy.sum_squares(y - x) + gamma*cvxpy.sum(cvxpy.tv(deriv_values)) ))
97+
prob = cvxpy.Problem(cvxpy.Minimize(fidelity_cost + gamma*cvxpy.sum(cvxpy.tv(deriv_values)) ))
9198
prob.solve(solver=solver)
9299

93100
# Recursively integrate the final derivative values to get back to the function and derivative values
@@ -102,7 +109,7 @@ def tvrdiff(x, dt, order, gamma, solver=None):
102109
dxdt_hat = (dxdt_hat[:-1] + dxdt_hat[1:])/2
103110
dxdt_hat = np.hstack((dxdt_hat, 2*dxdt_hat[-1] - dxdt_hat[-2])) # last value = penultimate value [-1] + diff between [-1] and [-2]
104111

105-
return x_hat*std+mean, dxdt_hat*std # derivative is linear, so scale derivative by std
112+
return x_hat*sigma+mu, dxdt_hat*sigma # derivative is linear, so scale derivative by scatter
106113

107114

108115
def velocity(x, dt, params=None, options=None, gamma=None, solver=None):

pynumdiff/utils/evaluate.py

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -86,18 +86,16 @@ def robust_rme(u, v, padding=0, M=6):
8686
:param np.array[float] v: e.g. estimated smoothed signal, reconstructed from derivative
8787
:param int padding: number of snapshots on either side of the array to ignore when calculating
8888
the metric. If :code:`'auto'`, defaults to 2.5% of the size of inputs
89-
:param float M: Huber loss parameter in units of ~1.4*mean absolute deviation, intended to approximate
90-
standard deviation robustly.
89+
:param float M: Huber loss parameter in units of ~1.4*mean absolute deviation of population of residual
90+
errors, intended to approximate standard deviation robustly.
9191
9292
:return: (float) -- Robust root mean error between u and v
9393
"""
9494
if padding == 'auto': padding = max(1, int(0.025*len(u)))
9595
s = slice(padding, len(u)-padding) # slice out data we want to measure
96-
N = s.stop - s.start
9796

9897
sigma = stats.median_abs_deviation(u[s] - v[s], scale='normal') # M is in units of this robust scatter metric
99-
if sigma < 1e-6: sigma = 1 # guard against divide by zero
100-
return np.sqrt(2*np.mean(utility.huber((u[s] - v[s])/sigma, M))) * sigma
98+
return np.sqrt(2*np.mean(utility.huber(u[s] - v[s], M*sigma)))
10199

102100

103101
def rmse(u, v, padding=0):

pynumdiff/utils/utility.py

Lines changed: 18 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
import numpy as np
33
from scipy.integrate import cumulative_trapezoid
44
from scipy.optimize import minimize
5-
from scipy.stats import median_abs_deviation
5+
from scipy.stats import median_abs_deviation, norm
66

77

88
def hankel_matrix(x, num_delays, pad=False): # fixed delay step of 1
@@ -102,6 +102,13 @@ def huber(x, M):
102102
absx = np.abs(x)
103103
return np.where(absx <= M, 0.5*x**2, M*(absx - 0.5*M))
104104

105+
def huber_const(M):
106+
"""Scale that makes :code:`sum(huber())` interpolate :math:`\\sqrt{2}\\|\\cdot\\|_1` and :math:`\\frac{1}{2}\\|\\cdot\\|_2^2`,
107+
from https://jmlr.org/papers/volume14/aravkin13a/aravkin13a.pdf, with correction for missing sqrt"""
108+
a = 2*np.exp(-M**2 / 2)/M
109+
b = np.sqrt(2*np.pi)*(2*norm.cdf(M) - 1)
110+
return np.sqrt((2*a*(1 + M**2)/M**2 + b)/(a + b))
111+
105112

106113
def integrate_dxdt_hat(dxdt_hat, dt_or_t):
107114
"""Wrapper for scipy.integrate.cumulative_trapezoid. Use 0 as first value so lengths match, see #88.
@@ -123,21 +130,20 @@ def estimate_integration_constant(x, x_hat, M=6):
123130
124131
:param np.array[float] x: timeseries of measurements
125132
:param np.array[float] x_hat: smoothed estimate of x
126-
:param float M: robustifies constant estimation using Huber loss. The default is intended to capture the idea
127-
of "six sigma": Assuming Gaussian inliers and M in units of standard deviation, the portion of inliers
128-
beyond the Huber loss' transition is only about 1.97e-9. M here is in units of scaled mean absolute deviation,
129-
so scatter can be calculated and used to normalize data without being thrown off by outliers.
133+
:param float M: constant estimation is robustified with the Huber loss. :code:`M` here is in units of scaled
134+
mean absolute deviation of residuals, so scatter can be calculated and used to normalize without being
135+
thrown off by outliers. The default is intended to capture the idea of "six sigma": Assuming Gaussian
136+
:code:`x - xhat` errors, the portion of inliers beyond the Huber loss' transition is only about 1.97e-9.
130137
131138
:return: **integration constant** (float) -- initial condition that best aligns x_hat with x
132139
"""
133-
if M == float('inf'): # calculates the constant to be mean(diff(x, x_hat)), equivalent to argmin_{x0} ||x_hat + x0 - x||_2^2
134-
return np.mean(x - x_hat) # Solves the L2 distance minimization
135-
elif M < 0.1: # small M looks like L1 loss, and Huber gets too flat to work well
136-
return np.median(x - x_hat) # Solves the L1 distance minimization
140+
sigma = median_abs_deviation(x - x_hat, scale='normal') # M is in units of this robust scatter metric
141+
if M == float('inf') or sigma < 1e-3: # If no scatter, then no outliers, so use L2
142+
return np.mean(x - x_hat) # Solves the l2 distance minimization, argmin_{x0} ||x_hat + x0 - x||_2^2
143+
elif M < 1e-3: # small M looks like l1 loss, and Huber gets too flat to work well
144+
return np.median(x - x_hat) # Solves the l1 distance minimization, argmin_{x0} ||x_hat + x0 - x||_1
137145
else:
138-
sigma = median_abs_deviation(x - x_hat, scale='normal') # M is in units of this robust scatter metric
139-
if sigma < 1e-6: sigma = 1 # guard against divide by zero
140-
return minimize(lambda x0: np.mean(huber((x - (x_hat+x0))/sigma, M)), # fn to minimize in 1st argument
146+
return minimize(lambda x0: np.sum(huber(x - (x_hat+x0), M*sigma)), # fn to minimize in 1st argument
141147
0, method='SLSQP').x[0] # result is a vector, even if initial guess is just a scalar
142148

143149

0 commit comments

Comments
 (0)