From e85efce0f6cabc6a60c8bfeebe967321a1a587fb Mon Sep 17 00:00:00 2001 From: Tom Palmer Date: Tue, 5 May 2026 10:42:32 +0100 Subject: [PATCH 1/7] Run autoformat on any branch beginning devel --- .github/workflows/autoformat.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/autoformat.yml b/.github/workflows/autoformat.yml index 41247db..1fe667e 100644 --- a/.github/workflows/autoformat.yml +++ b/.github/workflows/autoformat.yml @@ -2,7 +2,7 @@ name: Auto-format Code on: push: - branches: [main, develop] + branches: [main, devel*] workflow_dispatch: jobs: From c4538789ad73f582b8686f7a31c7f7b4bba97778 Mon Sep 17 00:00:00 2001 From: Tom Palmer Date: Tue, 5 May 2026 11:10:09 +0100 Subject: [PATCH 2/7] Bump version --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index de87979..73035e1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "pySEQTarget" -version = "0.13.2" +version = "0.13.3" description = "Sequentially Nested Target Trial Emulation" readme = "README.md" license = {text = "MIT"} From 51496ff9d3317ee8496c328ea29d23f8b1068dc6 Mon Sep 17 00:00:00 2001 From: Tom Palmer Date: Tue, 5 May 2026 11:07:52 +0100 Subject: [PATCH 3/7] Pre-compute spline knots from full dataset to ensure consistency across bootstraps `cr(followup, df=3)` in patsy computes interior knot positions from the quantiles of the training data. When bootstrapping, each replicate is fit on a resampled dataset that may have a different followup distribution, causing the spline basis to differ between the original model and bootstrap models, and between bootstrap replicates. Fix this by computing knot positions (interior knots, lower bound, upper bound) once from the full unexpanded dataset at the start of the original fit, storing them on the object, and embedding them as literals in the formula string passed to patsy for all subsequent fits (bootstrap replicates included). This mirrors the fix applied to the R package SEQTaRget. --- pySEQTarget/analysis/_outcome_fit.py | 28 +++++++++++++++++++++++++--- 1 file changed, 25 insertions(+), 3 deletions(-) diff --git a/pySEQTarget/analysis/_outcome_fit.py b/pySEQTarget/analysis/_outcome_fit.py index 1bfc045..08bba1f 100644 --- a/pySEQTarget/analysis/_outcome_fit.py +++ b/pySEQTarget/analysis/_outcome_fit.py @@ -1,12 +1,28 @@ import re +import numpy as np import polars as pl import statsmodels.api as sm import statsmodels.formula.api as smf -def _apply_spline_formula(formula, indicator_squared): - spline = "cr(followup, df=3)" +def _compute_spline_knots(followup_arr, df=3): + lower = float(np.min(followup_arr)) + upper = float(np.max(followup_arr)) + n_inner = df - 2 + if n_inner == 0: + inner_knots = [] + else: + # Replicate patsy's knot placement: percentiles of unique values in [lower, upper] + x = np.unique(followup_arr[(lower <= followup_arr) & (followup_arr <= upper)]) + q = np.linspace(0, 100, n_inner + 2)[1:-1] + inner_knots = np.percentile(x, q.tolist()).tolist() + return inner_knots, lower, upper + + +def _apply_spline_formula(formula, indicator_squared, spline_knots): + inner_knots, lower, upper = spline_knots + spline = f"cr(followup, knots={inner_knots}, lower_bound={lower}, upper_bound={upper})" formula = re.sub(r"(\w+)\s*\*\s*followup\b", rf"\1*{spline}", formula) formula = re.sub(r"\bfollowup\s*\*\s*(\w+)", rf"{spline}*\1", formula) @@ -64,7 +80,13 @@ def _outcome_fit( df_pd = _cast_categories(self, df.to_pandas()) if self.followup_spline: - formula = _apply_spline_formula(formula, self.indicator_squared) + if getattr(self, "_current_boot_idx", None) is None: + self._followup_spline_knots = _compute_spline_knots( + self.DT["followup"].to_numpy() + ) + formula = _apply_spline_formula( + formula, self.indicator_squared, self._followup_spline_knots + ) full_formula = f"{outcome} ~ {formula}" From ebf85d95f81240d4150e8e28f76c4a1cf4996d96 Mon Sep 17 00:00:00 2001 From: Tom Palmer Date: Tue, 5 May 2026 11:13:24 +0100 Subject: [PATCH 4/7] Replace squared spline term with the spline basis itself `I(cr(followup, df=3)**2)` was being appended to the formula to represent the non-linear followup effect, but this squares the spline basis matrix element-wise rather than including the spline basis directly. When no treatment-by-followup interaction term is present (e.g. hazard estimation or km_curves=False), this meant the model contained only a squared spline with no main spline effect, completely misrepresenting the followup relationship. Replace the squared term with the spline basis itself. When an interaction term such as `treatment*cr(followup, ...)` is also present, patsy's formula expansion already includes the spline as a main effect and deduplicates it. Update expected test coefficients accordingly. --- pySEQTarget/analysis/_outcome_fit.py | 4 ++-- tests/test_followup_options.py | 22 +++++++++++----------- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/pySEQTarget/analysis/_outcome_fit.py b/pySEQTarget/analysis/_outcome_fit.py index 08bba1f..8746943 100644 --- a/pySEQTarget/analysis/_outcome_fit.py +++ b/pySEQTarget/analysis/_outcome_fit.py @@ -34,8 +34,8 @@ def _apply_spline_formula(formula, indicator_squared, spline_knots): formula = re.sub(r"^\s*\+\s*|\s*\+\s*$", "", formula).strip() if formula: - return f"{formula} + I({spline}**2)" - return f"I({spline}**2)" + return f"{formula} + {spline}" + return spline def _cast_categories(self, df_pd): diff --git a/tests/test_followup_options.py b/tests/test_followup_options.py index 74d5451..96ff012 100644 --- a/tests/test_followup_options.py +++ b/tests/test_followup_options.py @@ -57,17 +57,17 @@ def test_followup_spline(): s.fit() matrix = s.outcome_model[0]["outcome"].summary2().tables[1]["Coef."].to_list() expected = [ - -6.264817962084417, - 0.20125056343026881, - 0.12568743032952776, - 0.03823426390103046, - 0.0006607691746414019, - 0.003343365539743267, - -0.01319460158923785, - 0.19601796921732118, - -0.5186462478511427, - 0.37598656666756103, - 1.6553848469346044, + -4.529508842861102, + 0.18920531994426618, + 0.12716410019183344, + 0.044763329389737344, + 0.0005746738824170931, + 0.0032912518104595678, + -0.013437129896165089, + 0.20082826545396634, + -2.2908167902810934, + -1.4324371106348253, + -0.806254941945183, ] assert [round(x, 3) for x in matrix] == [round(x, 3) for x in expected] From 03cea5297a3e821f48302d72a31c2b1113c4632b Mon Sep 17 00:00:00 2001 From: Tom Palmer Date: Tue, 5 May 2026 11:29:21 +0100 Subject: [PATCH 5/7] Add followup_spline_df parameter to control spline degrees of freedom Add `followup_spline_df` (default 4) to `SEQopts` to allow users to control the number of degrees of freedom for the natural cubic spline fit to followup, matching the `followup.spline.df` parameter introduced in the R package SEQTaRget. The default changes from the previously hardcoded value of 3 to 4, consistent with R. A validation check ensures the value is at least 2, which is the minimum supported by patsy's natural cubic spline implementation. Update expected test coefficients accordingly. --- pySEQTarget/SEQopts.py | 3 +++ pySEQTarget/analysis/_outcome_fit.py | 2 +- pySEQTarget/error/_param_checker.py | 3 +++ tests/test_followup_options.py | 23 ++++++++++++----------- 4 files changed, 19 insertions(+), 12 deletions(-) diff --git a/pySEQTarget/SEQopts.py b/pySEQTarget/SEQopts.py index 5cd32ae..c4fc496 100644 --- a/pySEQTarget/SEQopts.py +++ b/pySEQTarget/SEQopts.py @@ -45,6 +45,8 @@ class SEQopts: :type followup_include: bool :param followup_spline: Boolean to force followup values to be fit to cubic spline :type followup_spline: bool + :param followup_spline_df: Degrees of freedom for the followup cubic spline, default ``4`` + :type followup_spline_df: int :param followup_max: Maximum allowed followup in analysis :type followup_max: int or None :param followup_min: Minimum allowed followup in analysis @@ -130,6 +132,7 @@ class SEQopts: followup_max: int = None followup_min: int = 0 followup_spline: bool = False + followup_spline_df: int = 4 hazard_estimate: bool = False indicator_baseline: str = "_bas" indicator_squared: str = "_sq" diff --git a/pySEQTarget/analysis/_outcome_fit.py b/pySEQTarget/analysis/_outcome_fit.py index 8746943..167adee 100644 --- a/pySEQTarget/analysis/_outcome_fit.py +++ b/pySEQTarget/analysis/_outcome_fit.py @@ -82,7 +82,7 @@ def _outcome_fit( if self.followup_spline: if getattr(self, "_current_boot_idx", None) is None: self._followup_spline_knots = _compute_spline_knots( - self.DT["followup"].to_numpy() + self.DT["followup"].to_numpy(), df=self.followup_spline_df ) formula = _apply_spline_formula( formula, self.indicator_squared, self._followup_spline_knots diff --git a/pySEQTarget/error/_param_checker.py b/pySEQTarget/error/_param_checker.py index a926569..66492cb 100644 --- a/pySEQTarget/error/_param_checker.py +++ b/pySEQTarget/error/_param_checker.py @@ -44,6 +44,9 @@ def _param_checker(self): "Only one of followup_class or followup_include can be set to True." ) + if self.followup_spline_df < 2: + raise ValueError("followup_spline_df must be at least 2.") + if ( self.weighted and self.method == "ITT" diff --git a/tests/test_followup_options.py b/tests/test_followup_options.py index 96ff012..c2005d5 100644 --- a/tests/test_followup_options.py +++ b/tests/test_followup_options.py @@ -57,17 +57,18 @@ def test_followup_spline(): s.fit() matrix = s.outcome_model[0]["outcome"].summary2().tables[1]["Coef."].to_list() expected = [ - -4.529508842861102, - 0.18920531994426618, - 0.12716410019183344, - 0.044763329389737344, - 0.0005746738824170931, - 0.0032912518104595678, - -0.013437129896165089, - 0.20082826545396634, - -2.2908167902810934, - -1.4324371106348253, - -0.806254941945183, + -4.804282252748607, + 0.19115933860001255, + 0.12717121164606823, + 0.044310717515918724, + 0.0005814999431447507, + 0.0032948355025455216, + -0.013371824500839971, + 0.19972467861548412, + -2.027245615586753, + -1.395729861856384, + -0.9397731941281695, + -0.4415335811772879, ] assert [round(x, 3) for x in matrix] == [round(x, 3) for x in expected] From 9f65ea6ba69afb3cc6f782237118aaa1fe84cf59 Mon Sep 17 00:00:00 2001 From: Tom Palmer Date: Tue, 5 May 2026 11:36:36 +0100 Subject: [PATCH 6/7] Add Modelling Followup with a Natural Cubic Spline section --- docs/vignettes/more_advanced_models.md | 27 ++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/docs/vignettes/more_advanced_models.md b/docs/vignettes/more_advanced_models.md index ed9fd5a..d31bd06 100644 --- a/docs/vignettes/more_advanced_models.md +++ b/docs/vignettes/more_advanced_models.md @@ -81,6 +81,33 @@ my_output = my_analysis.collect() my_output.to_md() ``` +## Modelling Followup with a Natural Cubic Spline + +By default, followup time enters the outcome model as a linear and quadratic term (`followup` and `followup_sq`). If you prefer a more flexible non-linear representation, you can replace these with a natural cubic spline basis by setting `followup_spline=True`. Because the spline replaces the standard followup terms, you must also set `followup_include=False` to avoid conflicting specifications. + +```python +my_options = SEQopts( + followup_spline=True, + followup_include=False, + km_curves=True, +) +``` + +The spline basis is constructed using patsy's `cr()` function. Knot positions are computed once from the full expanded dataset before any bootstrap resampling, and are held fixed across all bootstrap iterations and the survival prediction grid. This ensures the spline basis is identical at fit time and prediction time regardless of how the bootstrap sample happens to be distributed. + +The degrees of freedom for the spline are controlled by `followup_spline_df`, which defaults to `4` (matching the default in the R package SEQTaRget). Increasing this allows a more flexible curve; decreasing it towards the minimum of `2` produces a stiffer fit. + +```python +my_options = SEQopts( + followup_spline=True, + followup_include=False, + followup_spline_df=6, # more flexible spline + km_curves=True, +) +``` + +The rest of the analytical pipeline is unchanged — `expand()`, `fit()`, `survival()`, and `collect()` all work exactly as before. + ## That's it? Yes! There are very few differences between the code for more straightforward and more difficult analyses using this package. Our hope is that through utilizing almost only the SEQopts to work with your analysis, that this is a streamlined process that is also easy to manipulate. From e381fe4d4855dad8f5dec32cc48a8f4ec083eb1e Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Tue, 5 May 2026 10:39:00 +0000 Subject: [PATCH 7/7] Auto-format code --- pySEQTarget/analysis/_outcome_fit.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pySEQTarget/analysis/_outcome_fit.py b/pySEQTarget/analysis/_outcome_fit.py index 167adee..a0fdda6 100644 --- a/pySEQTarget/analysis/_outcome_fit.py +++ b/pySEQTarget/analysis/_outcome_fit.py @@ -22,7 +22,9 @@ def _compute_spline_knots(followup_arr, df=3): def _apply_spline_formula(formula, indicator_squared, spline_knots): inner_knots, lower, upper = spline_knots - spline = f"cr(followup, knots={inner_knots}, lower_bound={lower}, upper_bound={upper})" + spline = ( + f"cr(followup, knots={inner_knots}, lower_bound={lower}, upper_bound={upper})" + ) formula = re.sub(r"(\w+)\s*\*\s*followup\b", rf"\1*{spline}", formula) formula = re.sub(r"\bfollowup\s*\*\s*(\w+)", rf"{spline}*\1", formula)