stan-dev · florence-bockting · Jul 2, 2026 · Jul 2, 2026 · Jul 2, 2026 · Jul 2, 2026
diff --git a/.Rbuildignore b/.Rbuildignore
@@ -18,7 +18,7 @@ vignettes/loo2-non-factorized_cache/*
 .github/*
 .vscode/*
 ^\.github$
-^vignettes/online-only$
+^vignettes/articles-online-only$
 
 ^CRAN-SUBMISSION$
 ^release-prep\.R$
@@ -27,3 +27,4 @@ vignettes/loo2-non-factorized_cache/*
 ^touchstone$
 ^.git-blame-ignore-revs
 ^notes-release.md
+^notes$
diff --git a/.github/workflows/pkgdown.yaml b/.github/workflows/pkgdown.yaml
@@ -38,7 +38,7 @@ jobs:
             local::.
             any::bayesplot
             any::BH
-            any::brms
+            paul-buerkner/brms
             any::ggplot2
             any::rmarkdown
             any::Rcpp
@@ -51,8 +51,23 @@ jobs:
             any::StanHeaders
             any::knitr
             any::withr
+            any::lme4
+            any::palmerpenguins
             stan-dev/pkgdown-config
 
+      # pred-measure-workflow needs pre-fitted brms models (~99 MB, not in git).
+      # Skipped when data-for-vignettes/ is already present (e.g. local cache).
+      - name: Generate vignette data if missing (slow — fits brms models)
+        run: |
+          VIG_DATA="vignettes/articles-online-only/data-for-vignettes"
+          N=$(find "$VIG_DATA" -name '*.Rds' 2>/dev/null | wc -l)
+          if [ "$N" -lt 5 ]; then
+            echo "::notice title=Vignette data generation::Fitting brms models for data-for-vignettes/ — expect ~10-20 minutes."
+            Rscript tests/testthat/data-for-tests/test_data_generation.R --vignettes-only
+          else
+            echo "Vignette data already present ($N RDS files) — skipping generation."
+          fi
+
       - name: Build site
         run: |
           withr::with_envvar(

diff --git a/.gitignore b/.gitignore
@@ -19,10 +19,14 @@ vignettes/loo2-non-factorizable_cache/*
 vignettes/*.html
 vignettes/*.pdf
 inst/doc
+*.html
 
 revdep/*
 tests/testthat/Rplots.pdf
 
 cran-comments.md
 CRAN-RELEASE
 release-prep.R
+
+# personal maintainer scratch (not shared)
+internal-notes/
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -36,6 +36,7 @@ Depends:
     R (>= 3.5)
 Imports:
     checkmate,
+    cli (>= 3.4.0),
     matrixStats (>= 0.52),
     parallel,
     posterior (>= 1.7.0),
@@ -46,6 +47,7 @@ Suggests:
     ggplot2,
     graphics,
     knitr,
+    lme4,
     rmarkdown,
     rstan,
     rstanarm (>= 2.19.0),
@@ -62,3 +64,4 @@ LazyData: TRUE
 Roxygen: list(markdown = TRUE)
 SystemRequirements: pandoc (>= 1.12.3), pandoc-citeproc
 Config/roxygen2/version: 8.0.0
+RoxygenNote: 7.3.3
diff --git a/NAMESPACE b/NAMESPACE
@@ -57,14 +57,19 @@ S3method(print,compare.loo_ss)
 S3method(print,importance_sampling)
 S3method(print,importance_sampling_loo)
 S3method(print,kfold)
+S3method(print,kfold_pred_measure)
 S3method(print,loo)
+S3method(print,loo_pred_measure)
+S3method(print,measure)
 S3method(print,pareto_k_table)
+S3method(print,pred_measure)
 S3method(print,pseudobma_bb_weights)
 S3method(print,pseudobma_weights)
 S3method(print,psis)
 S3method(print,psis_loo)
 S3method(print,psis_loo_ap)
 S3method(print,stacking_weights)
+S3method(print,test_pred_measure)
 S3method(print,waic)
 S3method(print_dims,elpd_generic)
 S3method(print_dims,importance_sampling)
@@ -108,6 +113,7 @@ export(example_loglik_matrix)
 export(extract_log_lik)
 export(find_model_names)
 export(gpdfit)
+export(insample_pred_measure)
 export(is.kfold)
 export(is.loo)
 export(is.psis)
@@ -116,6 +122,7 @@ export(is.sis)
 export(is.tis)
 export(is.waic)
 export(kfold)
+export(kfold_pred_measure)
 export(kfold_split_grouped)
 export(kfold_split_random)
 export(kfold_split_stratified)
@@ -134,27 +141,44 @@ export(loo_model_weights)
 export(loo_model_weights.default)
 export(loo_moment_match)
 export(loo_moment_match.default)
+export(loo_pred_measure)
 export(loo_predictive_metric)
 export(loo_scrps)
 export(loo_subsample)
 export(loo_subsample.function)
 export(mcse_loo)
+export(measure_acc)
+export(measure_bacc)
+export(measure_brier)
+export(measure_elpd)
+export(measure_ic)
+export(measure_mae)
+export(measure_mlpd)
+export(measure_mse)
+export(measure_r2)
+export(measure_rmse)
+export(measure_rps)
+export(measure_srps)
 export(nlist)
 export(obs_idx)
 export(pareto_k_ids)
 export(pareto_k_influence_values)
 export(pareto_k_table)
 export(pareto_k_values)
 export(pointwise)
+export(pred_measure)
 export(print_dims)
 export(pseudobma_weights)
 export(psis)
 export(psis_n_eff_values)
 export(psislw)
+export(ptw_log_pred_density)
 export(relative_eff)
 export(scrps)
 export(sis)
 export(stacking_weights)
+export(supported_measures_list)
+export(test_pred_measure)
 export(tis)
 export(waic)
 export(waic.array)

diff --git a/NEWS.md b/NEWS.md
@@ -1,5 +1,13 @@
 # loo (development version)
 
+* `elpd()`, `crps()`, `scrps()`, `loo_crps()`, `loo_scrps()`, and
+  `loo_predictive_metric()` are deprecated in favour of the `measure_*()`
+  functions and the `*_pred_measure()` workflow. See `vignette("migration-guide")`
+  for a full mapping table.
+* New predictive performance API: `insample_pred_measure()`, `loo_pred_measure()`,
+  `kfold_pred_measure()`, `test_pred_measure()`, and `pred_measure()` with
+  built-in measures via `measure_*()` and [supported_measures_list()].
+
 # loo 2.10.0
 
 * Updates to `loo_compare` output by @jgabry, @avehtari, @florence-bockting in #300: 

diff --git a/R/compare.R b/R/compare.R
@@ -1,7 +1,8 @@
 #' Model comparison (deprecated, old version)
 #'
 #' **This function is deprecated**. Please use the new [loo_compare()] function
-#' instead.
+#' instead. See `vignette("migration-guide", package = "loo")` for details.
+#' `compare()` and `R/compare.R` are scheduled for removal in a future release.
 #'
 #' @export
 #' @param ... At least two objects returned by [loo()] (or [waic()]).

diff --git a/R/crps.R b/R/crps.R
@@ -1,4 +1,15 @@
-#' Continuously ranked probability score
+#' Continuously ranked probability score (deprecated)
+#'
+#' As of loo 3.0.0, `crps()`, `scrps()`, `loo_crps()`, and `loo_scrps()` are
+#' **deprecated**. Please use [measure_rps()] and [measure_srps()] instead,
+#' or [loo_pred_measure()] for leave-one-out predictive performance.
+#' See `vignette("migration-guide", package = "loo")` for a full mapping table.
+#'
+#' @details
+#' The new functions use a single matrix of posterior predictive draws
+#' (`ypred`) and a PWM estimator. The deprecated functions require two
+#' independent draw matrices (`x` and `x2`) and use a permutation-based
+#' estimator. See [measure_rps()] for migration details.
 #'
 #' The `crps()` and `scrps()` functions and their `loo_*()` counterparts can be
 #' used to compute the continuously ranked probability score (CRPS) and scaled
@@ -34,16 +45,19 @@
 #'   The former reports estimator and standard error and latter the pointwise
 #'   values. Following Bolin & Wallin (2023), a larger value is better.
 #'
+#' @seealso [measure_rps()], [measure_srps()], [loo_pred_measure()]
+#'
 #' @examples
 #' \dontrun{
-#' # An example using rstanarm
+#' # Deprecated:
 #' library(rstanarm)
 #' data("kidiq")
 #' fit <- stan_glm(kid_score ~ mom_hs + mom_iq, data = kidiq)
 #' ypred1 <- posterior_predict(fit)
 #' ypred2 <- posterior_predict(fit)
 #' crps(ypred1, ypred2, y = fit$y)
-#' loo_crps(ypred1, ypred2, y = fit$y, log_lik = log_lik(fit))
+#' # ->
+#' measure_rps(y = fit$y, ypred = ypred1)
 #' }
 #'
 #' @references
@@ -80,6 +94,7 @@ loo_scrps <- function(x, ...) {
 #' @rdname crps
 #' @export
 crps.matrix <- function(x, x2, y, ..., permutations = 1) {
+  .Deprecated("measure_rps")
   validate_crps_input(x, x2, y)
   repeats <- replicate(permutations, EXX_compute(x, x2), simplify = F)
   EXX <- Reduce(`+`, repeats) / permutations
@@ -114,6 +129,7 @@ loo_crps.matrix <-
            permutations = 1,
            r_eff = 1,
            cores = getOption("mc.cores", 1)) {
+  .Deprecated("loo_pred_measure")
   validate_crps_input(x, x2, y, log_lik)
   repeats <- replicate(permutations,
                        EXX_loo_compute(x, x2, log_lik, r_eff = r_eff, ...),
@@ -128,6 +144,7 @@ loo_crps.matrix <-
 #' @rdname crps
 #' @export
 scrps.matrix <- function(x, x2, y, ..., permutations = 1) {
+  .Deprecated("measure_srps")
   validate_crps_input(x, x2, y)
   repeats <- replicate(permutations, EXX_compute(x, x2), simplify = F)
   EXX <- Reduce(`+`, repeats) / permutations
@@ -156,6 +173,7 @@ loo_scrps.matrix <-
     permutations = 1,
     r_eff = 1,
     cores = getOption("mc.cores", 1)) {
+  .Deprecated("loo_pred_measure")
   validate_crps_input(x, x2, y, log_lik)
   repeats <- replicate(permutations,
                        EXX_loo_compute(x, x2, log_lik, r_eff = r_eff, ...),

diff --git a/R/elpd.R b/R/elpd.R
@@ -1,26 +1,38 @@
-#' Generic (expected) log-predictive density
+#' Generic (expected) log-predictive density (deprecated)
+#'
+#' As of loo 3.0.0, `elpd()` is **deprecated**. Please use [measure_elpd()]
+#' instead. For full predictive performance workflows, see
+#' [insample_pred_measure()] and [loo_pred_measure()].
+#' See `vignette("migration-guide", package = "loo")` for a full mapping table.
+#'
+#' @details
+#' The return type differs: `elpd()` returns class `"elpd_generic"` with
+#' `elpd` and `ic` in `pointwise`; `measure_elpd()` returns class `"measure"`.
 #'
 #' The `elpd()` methods for arrays and matrices can compute the expected log
 #' pointwise predictive density for a new dataset or the log pointwise
 #' predictive density of the observed data (an overestimate of the elpd).
+#' The `elpd()` function is an S3 generic and methods are provided for
+#' 3-D pointwise log-likelihood arrays and matrices.
 #'
 #' @export
 #' @param x A log-likelihood array or matrix. The **Methods (by class)**
 #'   section, below, has detailed descriptions of how to specify the inputs for
 #'   each method.
 #' @param ... Currently ignored.
 #'
-#' @details The `elpd()` function is an S3 generic and methods are provided for
-#'   3-D pointwise log-likelihood arrays and matrices.
-#'
-#' @seealso The vignette *Holdout validation and K-fold cross-validation of Stan
-#'   programs with the loo package* for demonstrations of using the `elpd()`
-#'   methods.
+#' @seealso [measure_elpd()], [insample_pred_measure()], [loo_pred_measure()],
+#'   and the vignette *Holdout validation and K-fold cross-validation of Stan
+#'   programs with the loo package*.
 #'
 #' @examples
-#' # Calculate the lpd of the observed data
+#' \dontrun{
+#' # Deprecated:
 #' LLarr <- example_loglik_array()
 #' elpd(LLarr)
+#' # ->
+#' measure_elpd(LLarr)
+#' }
 #'
 elpd <- function(x, ...) {
   UseMethod("elpd")
@@ -31,22 +43,28 @@ elpd <- function(x, ...) {
 #' @template array
 #'
 elpd.array <- function(x, ...) {
+  .Deprecated("measure_elpd")
   ll <- llarray_to_matrix(x)
-  elpd.matrix(ll)
+  .elpd_matrix_impl(ll)
 }
 
 #' @export
 #' @templateVar fn elpd
 #' @template matrix
 #'
 elpd.matrix <- function(x, ...) {
-  pointwise <- pointwise_elpd_calcs(x)
-  elpd_object(pointwise, dim(x))
+  .Deprecated("measure_elpd")
+  .elpd_matrix_impl(x)
 }
 
 
-
 # internal ----------------------------------------------------------------
+# used to avoid duplicated deprecation warning messages
+.elpd_matrix_impl <- function(x) {
+  pointwise <- pointwise_elpd_calcs(x)
+  elpd_object(pointwise, dim(x))
+}
+
 pointwise_elpd_calcs <- function(ll){
   elpd <- colLogSumExps(ll) - log(nrow(ll))
   ic <- -2 * elpd

diff --git a/R/loo_predictive_metric.R b/R/loo_predictive_metric.R
@@ -1,4 +1,15 @@
-#' Estimate leave-one-out predictive performance..
+#' Estimate leave-one-out predictive performance (deprecated)
+#'
+#' As of loo 3.0.0, `loo_predictive_metric()` is **deprecated**. Please use
+#' [loo_pred_measure()] instead, or the standalone [measure_mae()],
+#' [measure_rmse()], [measure_mse()], [measure_acc()], and [measure_bacc()]
+#' functions with PSIS log-weights.
+#' See `vignette("migration-guide", package = "loo")` for a full mapping table.
+#'
+#' @details
+#' The replacement API uses `mupred` (posterior expected values) rather than
+#' pre-computed LOO expectations passed as `x`. Metric names also differ:
+#' `"balanced_acc"` maps to `"bacc"` in [supported_measures_list()].
 #'
 #' The `loo_predictive_metric()` function computes estimates of leave-one-out
 #' predictive metrics given a set of predictions and observations. Currently
@@ -49,6 +60,7 @@
 #'   Standard error of the estimate.
 #'   }
 #'  }
+#' @seealso [loo_pred_measure()], [measure_mae()], [supported_measures_list()]
 #' @export
 #'
 #' @examples
@@ -93,6 +105,7 @@ loo_predictive_metric.matrix <-
            metric = c("mae", "rmse", "mse", "acc", "balanced_acc"),
            r_eff = 1,
            cores = getOption("mc.cores", 1)) {
+    .Deprecated("loo_pred_measure")
     stopifnot(
       is.numeric(x),
       is.numeric(y),