From bb21b2e7c329c2c10b320d8acc14879c2edbb5fa Mon Sep 17 00:00:00 2001 From: Ivan Seleznev Date: Tue, 24 Feb 2026 16:01:13 +0100 Subject: [PATCH 01/10] Remove unnecessary open modifier --- .../letsPlot/core/plot/base/tooltip/text/DataFrameField.kt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/tooltip/text/DataFrameField.kt b/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/tooltip/text/DataFrameField.kt index 7ba88229046..779733ae134 100644 --- a/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/tooltip/text/DataFrameField.kt +++ b/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/tooltip/text/DataFrameField.kt @@ -12,7 +12,7 @@ import org.jetbrains.letsPlot.core.plot.base.FormatterUtil import org.jetbrains.letsPlot.core.plot.base.PlotContext import org.jetbrains.letsPlot.core.plot.base.data.DataFrameUtil -open class DataFrameField( +class DataFrameField( private val name: String, private val format: String? = null ) : ValueSource { From 7c8dd9fbbc176b1017a712523da9645ae6837292 Mon Sep 17 00:00:00 2001 From: Ivan Seleznev Date: Wed, 25 Feb 2026 11:31:09 +0100 Subject: [PATCH 02/10] Add sample size (N) to smooth stat summary output --- .../jetbrains/letsPlot/core/plot/base/stat/SmoothStatSummary.kt | 1 + .../core/plot/base/stat/regression/RegressionEvaluator.kt | 2 +- .../org/jetbrains/letsPlot/core/spec/back/PlotConfigBackend.kt | 2 +- 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/SmoothStatSummary.kt b/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/SmoothStatSummary.kt index 291c20d116f..47106310b3a 100644 --- a/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/SmoothStatSummary.kt +++ b/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/SmoothStatSummary.kt @@ -113,6 +113,7 @@ class SmoothStatSummary( .put(Stats.Y, listOf(0.0)) .put(Stats.R2, listOf(regression.r2)) .put(Stats.R2_ADJ, listOf(regression.adjR2)) + .put(Stats.N, listOf(regression.n)) val vars = myVariables ?: initVariables(regression.eq.size) regression.eq.forEachIndexed { index, coef -> diff --git a/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/regression/RegressionEvaluator.kt b/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/regression/RegressionEvaluator.kt index 151c3a21844..0bf08e572c0 100644 --- a/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/regression/RegressionEvaluator.kt +++ b/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/regression/RegressionEvaluator.kt @@ -10,7 +10,7 @@ import kotlin.math.pow import kotlin.math.sqrt abstract class RegressionEvaluator protected constructor( - private val n: Int, + val n: Int, private val meanX: Double, private val sumXX: Double, private val model: (Double) -> Double, diff --git a/plot-stem/src/commonMain/kotlin/org/jetbrains/letsPlot/core/spec/back/PlotConfigBackend.kt b/plot-stem/src/commonMain/kotlin/org/jetbrains/letsPlot/core/spec/back/PlotConfigBackend.kt index eacdb25f232..30ba8ee8baf 100644 --- a/plot-stem/src/commonMain/kotlin/org/jetbrains/letsPlot/core/spec/back/PlotConfigBackend.kt +++ b/plot-stem/src/commonMain/kotlin/org/jetbrains/letsPlot/core/spec/back/PlotConfigBackend.kt @@ -334,7 +334,7 @@ open class PlotConfigBackend( varsToKeep.removeAll(notRenderedVars) varsToKeep.addAll(renderedVars) - varsToKeep.addAll(listOf(Stats.R2, Stats.R2_ADJ)) + varsToKeep.addAll(listOf(Stats.R2, Stats.R2_ADJ, Stats.N)) varsToKeep.addAll(layerConfig.ownData.variables().filter { it.label.contains("smooth_eq_coef_") }) return HashSet() + From 541392cbaced13868bbebf14127c49d3c8fde8b0 Mon Sep 17 00:00:00 2001 From: Ivan Seleznev Date: Wed, 25 Feb 2026 12:12:09 +0100 Subject: [PATCH 03/10] Add smoothing method to stat summary output --- .../letsPlot/core/plot/base/stat/SmoothStatSummary.kt | 2 ++ .../jetbrains/letsPlot/core/plot/base/stat/Stats.kt | 2 ++ .../core/plot/base/stat/regression/RegressionUtil.kt | 11 +++++++++++ .../letsPlot/core/spec/back/PlotConfigBackend.kt | 2 +- 4 files changed, 16 insertions(+), 1 deletion(-) diff --git a/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/SmoothStatSummary.kt b/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/SmoothStatSummary.kt index 47106310b3a..5c4da892c48 100644 --- a/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/SmoothStatSummary.kt +++ b/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/SmoothStatSummary.kt @@ -15,6 +15,7 @@ import org.jetbrains.letsPlot.core.plot.base.stat.SmoothStat.Method import org.jetbrains.letsPlot.core.plot.base.stat.regression.LinearRegression import org.jetbrains.letsPlot.core.plot.base.stat.regression.LocalPolynomialRegression import org.jetbrains.letsPlot.core.plot.base.stat.regression.PolynomialRegression +import org.jetbrains.letsPlot.core.plot.base.stat.regression.smoothingMethodLabel import org.jetbrains.letsPlot.core.plot.base.util.SamplingUtil import kotlin.random.Random @@ -114,6 +115,7 @@ class SmoothStatSummary( .put(Stats.R2, listOf(regression.r2)) .put(Stats.R2_ADJ, listOf(regression.adjR2)) .put(Stats.N, listOf(regression.n)) + .put(Stats.METHOD, listOf(smoothingMethodLabel(smoothingMethod))) val vars = myVariables ?: initVariables(regression.eq.size) regression.eq.forEachIndexed { index, coef -> diff --git a/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/Stats.kt b/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/Stats.kt index 67705378b66..5e78d5da390 100644 --- a/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/Stats.kt +++ b/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/Stats.kt @@ -40,6 +40,7 @@ object Stats { val R2_ADJ = DataFrame.Variable("..adjr2..", STAT, "adjr2") val R2 = DataFrame.Variable("..r2..", STAT, "r2") + val METHOD = DataFrame.Variable("..method..", STAT, "method") val SCALED = DataFrame.Variable("..scaled..", STAT, "scaled") @@ -79,6 +80,7 @@ object Stats { GROUP, R2, R2_ADJ, + METHOD, ) val result = HashMap() diff --git a/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/regression/RegressionUtil.kt b/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/regression/RegressionUtil.kt index cf055a22dce..44064b10535 100644 --- a/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/regression/RegressionUtil.kt +++ b/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/regression/RegressionUtil.kt @@ -7,6 +7,7 @@ package org.jetbrains.letsPlot.core.plot.base.stat.regression import org.jetbrains.letsPlot.core.plot.base.stat.math3.Percentile import org.jetbrains.letsPlot.core.commons.data.SeriesUtil +import org.jetbrains.letsPlot.core.plot.base.stat.SmoothStat import kotlin.random.Random internal object RegressionUtil { @@ -114,3 +115,13 @@ fun averageByX(xs: List, ys: List): Pair "lm" + SmoothStat.Method.LOESS -> "loess" + SmoothStat.Method.GLM -> "glm" + SmoothStat.Method.GAM -> "gam" + SmoothStat.Method.RLM -> "rlm" + } +} diff --git a/plot-stem/src/commonMain/kotlin/org/jetbrains/letsPlot/core/spec/back/PlotConfigBackend.kt b/plot-stem/src/commonMain/kotlin/org/jetbrains/letsPlot/core/spec/back/PlotConfigBackend.kt index 30ba8ee8baf..22ffb3d2955 100644 --- a/plot-stem/src/commonMain/kotlin/org/jetbrains/letsPlot/core/spec/back/PlotConfigBackend.kt +++ b/plot-stem/src/commonMain/kotlin/org/jetbrains/letsPlot/core/spec/back/PlotConfigBackend.kt @@ -334,7 +334,7 @@ open class PlotConfigBackend( varsToKeep.removeAll(notRenderedVars) varsToKeep.addAll(renderedVars) - varsToKeep.addAll(listOf(Stats.R2, Stats.R2_ADJ, Stats.N)) + varsToKeep.addAll(listOf(Stats.R2, Stats.R2_ADJ, Stats.N, Stats.METHOD)) varsToKeep.addAll(layerConfig.ownData.variables().filter { it.label.contains("smooth_eq_coef_") }) return HashSet() + From f3cb9c0bc3630497b93387f94a20fe5656418d00 Mon Sep 17 00:00:00 2001 From: Ivan Seleznev Date: Wed, 25 Feb 2026 14:47:14 +0100 Subject: [PATCH 04/10] Add AIC and BIC calculations to regression models and update stat summary output --- .../core/plot/base/stat/SmoothStatSummary.kt | 2 ++ .../letsPlot/core/plot/base/stat/Stats.kt | 4 +++ .../base/stat/regression/LinearRegression.kt | 11 +++++-- .../regression/LocalPolynomialRegression.kt | 2 +- .../stat/regression/PolynomialRegression.kt | 10 +++++-- .../stat/regression/RegressionEvaluator.kt | 30 +++++++++++++++++++ .../core/spec/back/PlotConfigBackend.kt | 2 +- 7 files changed, 55 insertions(+), 6 deletions(-) diff --git a/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/SmoothStatSummary.kt b/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/SmoothStatSummary.kt index 5c4da892c48..6fb415f1635 100644 --- a/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/SmoothStatSummary.kt +++ b/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/SmoothStatSummary.kt @@ -116,6 +116,8 @@ class SmoothStatSummary( .put(Stats.R2_ADJ, listOf(regression.adjR2)) .put(Stats.N, listOf(regression.n)) .put(Stats.METHOD, listOf(smoothingMethodLabel(smoothingMethod))) + .put(Stats.AIC, listOf(regression.aic)) + .put(Stats.BIC, listOf(regression.bic)) val vars = myVariables ?: initVariables(regression.eq.size) regression.eq.forEachIndexed { index, coef -> diff --git a/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/Stats.kt b/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/Stats.kt index 5e78d5da390..386f80e7214 100644 --- a/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/Stats.kt +++ b/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/Stats.kt @@ -41,6 +41,8 @@ object Stats { val R2_ADJ = DataFrame.Variable("..adjr2..", STAT, "adjr2") val R2 = DataFrame.Variable("..r2..", STAT, "r2") val METHOD = DataFrame.Variable("..method..", STAT, "method") + val AIC = DataFrame.Variable("..aic..", STAT, "aic") + val BIC = DataFrame.Variable("..bic..", STAT, "bic") val SCALED = DataFrame.Variable("..scaled..", STAT, "scaled") @@ -81,6 +83,8 @@ object Stats { R2, R2_ADJ, METHOD, + AIC, + BIC, ) val result = HashMap() diff --git a/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/regression/LinearRegression.kt b/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/regression/LinearRegression.kt index 9f93704259d..d178448f42c 100644 --- a/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/regression/LinearRegression.kt +++ b/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/regression/LinearRegression.kt @@ -14,7 +14,9 @@ class LinearRegression private constructor ( tCritical: Double, eq: List, r2: Double, -) : RegressionEvaluator(n, meanX, sumXX, model, standardErrorOfEstimate, tCritical, eq, r2) { + aic: Double, + bic: Double +) : RegressionEvaluator(n, meanX, sumXX, model, standardErrorOfEstimate, tCritical, eq, r2, aic, bic) { companion object { fun fit(xs: List, ys: List, confidenceLevel: Double): LinearRegression? { check(xs, ys, confidenceLevel) @@ -40,6 +42,9 @@ class LinearRegression private constructor ( val intercept = meanY - slope * meanX val model: (Double) -> Double = { x -> slope * x + intercept } + val k = 2 // number of predictors (slope, intercept, sigma^2) + val rss = calcRss(xVals, yVals, model) + return LinearRegression( n, meanX, @@ -48,7 +53,9 @@ class LinearRegression private constructor ( calcStandardErrorOfEstimate(xVals, yVals, model, degreesOfFreedom), calcTCritical(degreesOfFreedom, confidenceLevel), listOf(intercept, slope), - calcRSquared(xVals, yVals, model) + calcRSquared(xVals, yVals, model), + calcAic(n, rss, k), + calcBic(n, rss, k) ) } } diff --git a/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/regression/LocalPolynomialRegression.kt b/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/regression/LocalPolynomialRegression.kt index 4bbeec76c77..6c227b3d0fa 100644 --- a/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/regression/LocalPolynomialRegression.kt +++ b/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/regression/LocalPolynomialRegression.kt @@ -16,7 +16,7 @@ class LocalPolynomialRegression private constructor ( standardErrorOfEstimate: Double, tCritical: Double, r2: Double, -) : RegressionEvaluator(n, meanX, sumXX, model, standardErrorOfEstimate, tCritical, emptyList(), r2) { +) : RegressionEvaluator(n, meanX, sumXX, model, standardErrorOfEstimate, tCritical, emptyList(), r2, Double.NaN, Double.NaN) { companion object { fun fit(xs: List, ys: List, confidenceLevel: Double, bandwidth: Double): LocalPolynomialRegression? { check(xs, ys, confidenceLevel) diff --git a/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/regression/PolynomialRegression.kt b/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/regression/PolynomialRegression.kt index 71f8e5a8ad4..c0c45997daa 100644 --- a/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/regression/PolynomialRegression.kt +++ b/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/regression/PolynomialRegression.kt @@ -18,7 +18,9 @@ class PolynomialRegression private constructor ( tCritical: Double, eq: List, r2: Double, -) : RegressionEvaluator(n, meanX, sumXX, model, standardErrorOfEstimate, tCritical, eq, r2) { + aic: Double, + bic: Double +) : RegressionEvaluator(n, meanX, sumXX, model, standardErrorOfEstimate, tCritical, eq, r2, aic, bic) { companion object { fun fit(xs: List, ys: List, confidenceLevel: Double, deg: Int): PolynomialRegression? { check(xs, ys, confidenceLevel) @@ -42,6 +44,8 @@ class PolynomialRegression private constructor ( val polynomial = calculatePolynomial(deg, xVals, yVals) val model: (Double) -> Double = { x -> polynomial.value(x) } + val rss = calcRss(xVals, yVals, model) + return PolynomialRegression( n, meanX, @@ -50,7 +54,9 @@ class PolynomialRegression private constructor ( calcStandardErrorOfEstimate(xVals, yVals, model, degreesOfFreedom), calcTCritical(degreesOfFreedom, confidenceLevel), polynomial.getCoefficients(), - calcRSquared(xVals, yVals, model) + calcRSquared(xVals, yVals, model), + calcAic(n, rss, deg + 2), + calcBic(n, rss, deg + 2) ) } diff --git a/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/regression/RegressionEvaluator.kt b/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/regression/RegressionEvaluator.kt index 0bf08e572c0..c9ebc862c20 100644 --- a/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/regression/RegressionEvaluator.kt +++ b/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/regression/RegressionEvaluator.kt @@ -6,6 +6,7 @@ package org.jetbrains.letsPlot.core.plot.base.stat.regression import org.jetbrains.letsPlot.core.stat.tQuantile +import kotlin.math.PI import kotlin.math.pow import kotlin.math.sqrt @@ -18,6 +19,8 @@ abstract class RegressionEvaluator protected constructor( private val tCritical: Double, val eq: List, val r2: Double, + val aic: Double, + val bic: Double ) { val adjR2: Double get() { @@ -112,5 +115,32 @@ abstract class RegressionEvaluator protected constructor( 1.0 - ssRes / ssTot } } + + fun calcRss(xVals: DoubleArray, yVals: DoubleArray, model: (Double) -> Double): Double { + var rss = 0.0 + for (i in xVals.indices) { + val e = yVals[i] - model(xVals[i]) + rss += e * e + } + return rss + } + + fun calcAic(n: Int, rss: Double, k: Int): Double { + if (n <= 0 || k <= 0 || !rss.isFinite()) return Double.NaN + // Guard against log(0) in a perfect fit + val rssSafe = maxOf(rss, 1e-12) + return n * kotlin.math.ln(rssSafe / n) + + n * (1.0 + kotlin.math.ln(2.0 * PI)) + + 2.0 * k + } + + fun calcBic(n: Int, rss: Double, k: Int): Double { + if (n <= 0 || k <= 0 || !rss.isFinite()) return Double.NaN + // Guard against log(0) in a perfect fit + val rssSafe = maxOf(rss, 1e-12) + return n * kotlin.math.ln(rssSafe / n) + + n * (1.0 + kotlin.math.ln(2.0 * PI)) + + k * kotlin.math.ln(n.toDouble()) + } } } diff --git a/plot-stem/src/commonMain/kotlin/org/jetbrains/letsPlot/core/spec/back/PlotConfigBackend.kt b/plot-stem/src/commonMain/kotlin/org/jetbrains/letsPlot/core/spec/back/PlotConfigBackend.kt index 22ffb3d2955..3a0b1491424 100644 --- a/plot-stem/src/commonMain/kotlin/org/jetbrains/letsPlot/core/spec/back/PlotConfigBackend.kt +++ b/plot-stem/src/commonMain/kotlin/org/jetbrains/letsPlot/core/spec/back/PlotConfigBackend.kt @@ -334,7 +334,7 @@ open class PlotConfigBackend( varsToKeep.removeAll(notRenderedVars) varsToKeep.addAll(renderedVars) - varsToKeep.addAll(listOf(Stats.R2, Stats.R2_ADJ, Stats.N, Stats.METHOD)) + varsToKeep.addAll(listOf(Stats.R2, Stats.R2_ADJ, Stats.N, Stats.AIC, Stats.BIC, Stats.METHOD)) varsToKeep.addAll(layerConfig.ownData.variables().filter { it.label.contains("smooth_eq_coef_") }) return HashSet() + From 9f851be8dc0a2a907eb1f1286071b6240dbbb3f1 Mon Sep 17 00:00:00 2001 From: Ivan Seleznev Date: Thu, 26 Feb 2026 11:35:11 +0100 Subject: [PATCH 05/10] Add F-distribution implementation and integrate F-test results into regression evaluations --- .../core/plot/base/stat/SmoothStatSummary.kt | 5 ++ .../letsPlot/core/plot/base/stat/Stats.kt | 8 ++ .../base/stat/regression/FDistribution.kt | 74 +++++++++++++++++++ .../base/stat/regression/LinearRegression.kt | 13 ++-- .../regression/LocalPolynomialRegression.kt | 5 +- .../stat/regression/PolynomialRegression.kt | 11 ++- .../stat/regression/RegressionEvaluator.kt | 72 +++++++++++++++++- .../core/spec/back/PlotConfigBackend.kt | 2 +- 8 files changed, 178 insertions(+), 12 deletions(-) create mode 100644 plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/regression/FDistribution.kt diff --git a/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/SmoothStatSummary.kt b/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/SmoothStatSummary.kt index 6fb415f1635..d920ffc10dc 100644 --- a/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/SmoothStatSummary.kt +++ b/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/SmoothStatSummary.kt @@ -109,6 +109,7 @@ class SmoothStatSummary( ) } ?: return DataFrame.Builder.emptyFrame() + val dfb = DataFrame.Builder() .put(Stats.X, listOf(0.0)) .put(Stats.Y, listOf(0.0)) @@ -118,6 +119,10 @@ class SmoothStatSummary( .put(Stats.METHOD, listOf(smoothingMethodLabel(smoothingMethod))) .put(Stats.AIC, listOf(regression.aic)) .put(Stats.BIC, listOf(regression.bic)) + .put(Stats.F, listOf(regression.fTest.fValue)) + .put(Stats.DF1, listOf(regression.fTest.df1)) + .put(Stats.DF2, listOf(regression.fTest.df2)) + .put(Stats.P, listOf(regression.fTest.pValue)) val vars = myVariables ?: initVariables(regression.eq.size) regression.eq.forEachIndexed { index, coef -> diff --git a/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/Stats.kt b/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/Stats.kt index 386f80e7214..7e711fd2d3d 100644 --- a/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/Stats.kt +++ b/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/Stats.kt @@ -43,6 +43,10 @@ object Stats { val METHOD = DataFrame.Variable("..method..", STAT, "method") val AIC = DataFrame.Variable("..aic..", STAT, "aic") val BIC = DataFrame.Variable("..bic..", STAT, "bic") + val F = DataFrame.Variable("..f..", STAT, "f") + val DF1 = DataFrame.Variable("..df1..", STAT, "df1") + val DF2 = DataFrame.Variable("..df2..", STAT, "df2") + val P = DataFrame.Variable("..p..", STAT, "p") val SCALED = DataFrame.Variable("..scaled..", STAT, "scaled") @@ -85,6 +89,10 @@ object Stats { METHOD, AIC, BIC, + F, + DF1, + DF2, + P, ) val result = HashMap() diff --git a/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/regression/FDistribution.kt b/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/regression/FDistribution.kt new file mode 100644 index 00000000000..42e5e051027 --- /dev/null +++ b/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/regression/FDistribution.kt @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2026. JetBrains s.r.o. + * Use of this source code is governed by the MIT license that can be found in the LICENSE file. + */ + +package org.jetbrains.letsPlot.core.plot.base.stat.regression + +import org.jetbrains.letsPlot.core.plot.base.stat.math3.Beta +import kotlin.math.exp +import kotlin.math.ln + +/** + * Implementation of Fisher-Snedecor F-distribution. + * + * @param numeratorDegreesOfFreedom numerator degrees of freedom (df1), must be > 0 + * @param denominatorDegreesOfFreedom denominator degrees of freedom (df2), must be > 0 + */ +internal class FDistribution( + private val numeratorDegreesOfFreedom: Double, + private val denominatorDegreesOfFreedom: Double +) { + + init { + if (numeratorDegreesOfFreedom <= 0.0) { + error("NotStrictlyPositive - NUMERATOR_DEGREES_OF_FREEDOM: $numeratorDegreesOfFreedom") + } + if (denominatorDegreesOfFreedom <= 0.0) { + error("NotStrictlyPositive - DENOMINATOR_DEGREES_OF_FREEDOM: $denominatorDegreesOfFreedom") + } + } + + fun density(x: Double): Double { + if (x < 0.0) return 0.0 + if (x == 0.0) { + // density at 0 can be finite/infinite depending on df1; returning exact handling is optional + return when { + numeratorDegreesOfFreedom < 2.0 -> Double.POSITIVE_INFINITY + numeratorDegreesOfFreedom == 2.0 -> 1.0 + else -> 0.0 + } + } + + val d1 = numeratorDegreesOfFreedom + val d2 = denominatorDegreesOfFreedom + + // log-density for numerical stability: + // f(x) = (d1/d2)^(d1/2) * x^(d1/2 - 1) / B(d1/2, d2/2) * (1 + d1*x/d2)^(-(d1+d2)/2) + val nhalf = d1 / 2.0 + val mhalf = d2 / 2.0 + + val logNumerator = + nhalf * ln(d1 / d2) + + (nhalf - 1.0) * ln(x) - + Beta.logBeta(nhalf, mhalf) + + val logDenominatorPart = ((d1 + d2) / 2.0) * ln(1.0 + (d1 / d2) * x) + + return exp(logNumerator - logDenominatorPart) + } + + fun cumulativeProbability(x: Double): Double { + if (x <= 0.0) return 0.0 + if (x.isNaN()) return Double.NaN + + val d1 = numeratorDegreesOfFreedom + val d2 = denominatorDegreesOfFreedom + + // CDF relation: + // F(x; d1, d2) = I_{ d1*x / (d1*x + d2) }(d1/2, d2/2) + val z = (d1 * x) / (d1 * x + d2) + + return Beta.regularizedBeta(z, d1 / 2.0, d2 / 2.0) + } +} \ No newline at end of file diff --git a/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/regression/LinearRegression.kt b/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/regression/LinearRegression.kt index d178448f42c..4664c95b30f 100644 --- a/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/regression/LinearRegression.kt +++ b/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/regression/LinearRegression.kt @@ -15,8 +15,9 @@ class LinearRegression private constructor ( eq: List, r2: Double, aic: Double, - bic: Double -) : RegressionEvaluator(n, meanX, sumXX, model, standardErrorOfEstimate, tCritical, eq, r2, aic, bic) { + bic: Double, + fTest: RegressionEvaluator.Companion.FTestResult +) : RegressionEvaluator(n, meanX, sumXX, model, standardErrorOfEstimate, tCritical, eq, r2, aic, bic, fTest) { companion object { fun fit(xs: List, ys: List, confidenceLevel: Double): LinearRegression? { check(xs, ys, confidenceLevel) @@ -42,8 +43,9 @@ class LinearRegression private constructor ( val intercept = meanY - slope * meanX val model: (Double) -> Double = { x -> slope * x + intercept } - val k = 2 // number of predictors (slope, intercept, sigma^2) + val k = 3 // number of predictors (slope, intercept, sigma^2) val rss = calcRss(xVals, yVals, model) + val r2 = calcRSquared(xVals, yVals, model) return LinearRegression( n, @@ -53,9 +55,10 @@ class LinearRegression private constructor ( calcStandardErrorOfEstimate(xVals, yVals, model, degreesOfFreedom), calcTCritical(degreesOfFreedom, confidenceLevel), listOf(intercept, slope), - calcRSquared(xVals, yVals, model), + r2, calcAic(n, rss, k), - calcBic(n, rss, k) + calcBic(n, rss, k), + calcOverallModelFTest(n, 2, r2) ) } } diff --git a/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/regression/LocalPolynomialRegression.kt b/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/regression/LocalPolynomialRegression.kt index 6c227b3d0fa..e6ccb459038 100644 --- a/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/regression/LocalPolynomialRegression.kt +++ b/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/regression/LocalPolynomialRegression.kt @@ -7,6 +7,7 @@ package org.jetbrains.letsPlot.core.plot.base.stat.regression import org.jetbrains.letsPlot.core.plot.base.stat.math3.LoessInterpolator import org.jetbrains.letsPlot.core.plot.base.stat.math3.PolynomialSplineFunction +import org.jetbrains.letsPlot.core.plot.base.stat.regression.RegressionEvaluator.Companion.FTestResult class LocalPolynomialRegression private constructor ( n: Int, @@ -16,7 +17,9 @@ class LocalPolynomialRegression private constructor ( standardErrorOfEstimate: Double, tCritical: Double, r2: Double, -) : RegressionEvaluator(n, meanX, sumXX, model, standardErrorOfEstimate, tCritical, emptyList(), r2, Double.NaN, Double.NaN) { +) : RegressionEvaluator(n, meanX, sumXX, model, standardErrorOfEstimate, tCritical, emptyList(), r2, Double.NaN, Double.NaN, + FTestResult(Double.NaN, Double.NaN, Double.NaN, Double.NaN) +) { companion object { fun fit(xs: List, ys: List, confidenceLevel: Double, bandwidth: Double): LocalPolynomialRegression? { check(xs, ys, confidenceLevel) diff --git a/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/regression/PolynomialRegression.kt b/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/regression/PolynomialRegression.kt index c0c45997daa..b1bf51d2ad4 100644 --- a/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/regression/PolynomialRegression.kt +++ b/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/regression/PolynomialRegression.kt @@ -19,8 +19,9 @@ class PolynomialRegression private constructor ( eq: List, r2: Double, aic: Double, - bic: Double -) : RegressionEvaluator(n, meanX, sumXX, model, standardErrorOfEstimate, tCritical, eq, r2, aic, bic) { + bic: Double, + fTest: RegressionEvaluator.Companion.FTestResult, +) : RegressionEvaluator(n, meanX, sumXX, model, standardErrorOfEstimate, tCritical, eq, r2, aic, bic, fTest) { companion object { fun fit(xs: List, ys: List, confidenceLevel: Double, deg: Int): PolynomialRegression? { check(xs, ys, confidenceLevel) @@ -44,6 +45,7 @@ class PolynomialRegression private constructor ( val polynomial = calculatePolynomial(deg, xVals, yVals) val model: (Double) -> Double = { x -> polynomial.value(x) } + val r2 = calcRSquared(xVals, yVals, model) val rss = calcRss(xVals, yVals, model) return PolynomialRegression( @@ -54,9 +56,10 @@ class PolynomialRegression private constructor ( calcStandardErrorOfEstimate(xVals, yVals, model, degreesOfFreedom), calcTCritical(degreesOfFreedom, confidenceLevel), polynomial.getCoefficients(), - calcRSquared(xVals, yVals, model), + r2, calcAic(n, rss, deg + 2), - calcBic(n, rss, deg + 2) + calcBic(n, rss, deg + 2), + calcOverallModelFTest(n, deg + 1, r2) ) } diff --git a/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/regression/RegressionEvaluator.kt b/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/regression/RegressionEvaluator.kt index c9ebc862c20..317408a3636 100644 --- a/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/regression/RegressionEvaluator.kt +++ b/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/regression/RegressionEvaluator.kt @@ -5,6 +5,7 @@ package org.jetbrains.letsPlot.core.plot.base.stat.regression +import org.jetbrains.letsPlot.core.plot.base.stat.regression.FDistribution import org.jetbrains.letsPlot.core.stat.tQuantile import kotlin.math.PI import kotlin.math.pow @@ -20,7 +21,8 @@ abstract class RegressionEvaluator protected constructor( val eq: List, val r2: Double, val aic: Double, - val bic: Double + val bic: Double, + val fTest: FTestResult ) { val adjR2: Double get() { @@ -142,5 +144,73 @@ abstract class RegressionEvaluator protected constructor( n * (1.0 + kotlin.math.ln(2.0 * PI)) + k * kotlin.math.ln(n.toDouble()) } + + data class FTestResult( + val fValue: Double, + val pValue: Double, + val df1: Double, + val df2: Double + ) + + internal fun calcOverallModelFTest( + nRaw: Int, + eqSizeRaw: Int, // includes intercept + r2Raw: Double + ): FTestResult { + val n = nRaw.toDouble() + val p = (eqSizeRaw - 1).toDouble() // predictors without intercept + + val df1 = p + val df2 = n - p - 1.0 + + // Invalid setup + if (!r2Raw.isFinite() || n <= 0.0 || eqSizeRaw <= 0 || df1 <= 0.0 || df2 <= 0.0) { + return FTestResult(Double.NaN, Double.NaN, df1, df2) + } + + // Clamp possible floating-point overshoots + val r2 = r2Raw.coerceIn(0.0, 1.0) + + // Edge cases + if (r2 == 0.0) { + return FTestResult(0.0, 1.0, df1, df2) + } + if (r2 == 1.0) { + return FTestResult(Double.POSITIVE_INFINITY, 0.0, df1, df2) + } + + val numerator = r2 / df1 + val denominator = (1.0 - r2) / df2 + + if (!numerator.isFinite() || !denominator.isFinite() || denominator <= 0.0) { + return FTestResult(Double.NaN, Double.NaN, df1, df2) + } + + val fValue = numerator / denominator + + if (!fValue.isFinite()) { + return if (fValue == Double.POSITIVE_INFINITY) { + FTestResult(Double.POSITIVE_INFINITY, 0.0, df1, df2) + } else { + FTestResult(Double.NaN, Double.NaN, df1, df2) + } + } + + val pValue = fTestPValueUpperTail(fValue, df1, df2) + return FTestResult(fValue, pValue, df1, df2) + } + + internal fun fTestPValueUpperTail(fValue: Double, df1: Double, df2: Double): Double { + if (!fValue.isFinite() || df1 <= 0.0 || df2 <= 0.0) return Double.NaN + + if (fValue < 0.0) return Double.NaN + if (fValue == Double.POSITIVE_INFINITY) return 0.0 + + val cdf = FDistribution(df1, df2).cumulativeProbability(fValue) + + // Guard against tiny numerical drift outside [0, 1] + return (1.0 - cdf).coerceIn(0.0, 1.0) + } + } } diff --git a/plot-stem/src/commonMain/kotlin/org/jetbrains/letsPlot/core/spec/back/PlotConfigBackend.kt b/plot-stem/src/commonMain/kotlin/org/jetbrains/letsPlot/core/spec/back/PlotConfigBackend.kt index 3a0b1491424..da72b449d29 100644 --- a/plot-stem/src/commonMain/kotlin/org/jetbrains/letsPlot/core/spec/back/PlotConfigBackend.kt +++ b/plot-stem/src/commonMain/kotlin/org/jetbrains/letsPlot/core/spec/back/PlotConfigBackend.kt @@ -334,7 +334,7 @@ open class PlotConfigBackend( varsToKeep.removeAll(notRenderedVars) varsToKeep.addAll(renderedVars) - varsToKeep.addAll(listOf(Stats.R2, Stats.R2_ADJ, Stats.N, Stats.AIC, Stats.BIC, Stats.METHOD)) + varsToKeep.addAll(listOf(Stats.R2, Stats.R2_ADJ, Stats.N, Stats.AIC, Stats.BIC, Stats.METHOD, Stats.F, Stats.DF1, Stats.DF2, Stats.P)) varsToKeep.addAll(layerConfig.ownData.variables().filter { it.label.contains("smooth_eq_coef_") }) return HashSet() + From 8c9c61f256af773a812e9be30b267a5f64e638d7 Mon Sep 17 00:00:00 2001 From: Ivan Seleznev Date: Thu, 26 Feb 2026 14:12:03 +0100 Subject: [PATCH 06/10] =?UTF-8?q?Add=20non-central=20F=20distribution=20ca?= =?UTF-8?q?lculations=20and=20R=C2=B2=20confidence=20interval=20support=20?= =?UTF-8?q?to=20regression=20models?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../core/plot/base/stat/SmoothStatSummary.kt | 3 + .../letsPlot/core/plot/base/stat/Stats.kt | 6 + .../base/stat/regression/FDistribution.kt | 44 +++++ .../base/stat/regression/FNoncentralityCI.kt | 129 ++++++++++++++ .../base/stat/regression/LinearRegression.kt | 11 +- .../regression/LocalPolynomialRegression.kt | 3 +- .../plot/base/stat/regression/NoncentralF.kt | 159 ++++++++++++++++++ .../stat/regression/PolynomialRegression.kt | 7 +- .../stat/regression/RegressionEvaluator.kt | 5 +- .../plot/base/stat/regression/RsquaredCI.kt | 105 ++++++++++++ .../core/spec/back/PlotConfigBackend.kt | 18 +- 11 files changed, 480 insertions(+), 10 deletions(-) create mode 100644 plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/regression/FNoncentralityCI.kt create mode 100644 plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/regression/NoncentralF.kt create mode 100644 plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/regression/RsquaredCI.kt diff --git a/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/SmoothStatSummary.kt b/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/SmoothStatSummary.kt index d920ffc10dc..615df532527 100644 --- a/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/SmoothStatSummary.kt +++ b/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/SmoothStatSummary.kt @@ -123,6 +123,9 @@ class SmoothStatSummary( .put(Stats.DF1, listOf(regression.fTest.df1)) .put(Stats.DF2, listOf(regression.fTest.df2)) .put(Stats.P, listOf(regression.fTest.pValue)) + .put(Stats.CI_LEVEL, listOf(regression.r2ConfInt.level)) + .put(Stats.CI_LOW, listOf(regression.r2ConfInt.low)) + .put(Stats.CI_HIGH, listOf(regression.r2ConfInt.high)) val vars = myVariables ?: initVariables(regression.eq.size) regression.eq.forEachIndexed { index, coef -> diff --git a/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/Stats.kt b/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/Stats.kt index 7e711fd2d3d..9d306daea33 100644 --- a/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/Stats.kt +++ b/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/Stats.kt @@ -47,6 +47,9 @@ object Stats { val DF1 = DataFrame.Variable("..df1..", STAT, "df1") val DF2 = DataFrame.Variable("..df2..", STAT, "df2") val P = DataFrame.Variable("..p..", STAT, "p") + val CI_LEVEL = DataFrame.Variable("..cilevel..", STAT, "cilevel") + val CI_LOW = DataFrame.Variable("..cilow..", STAT, "cilow") + val CI_HIGH = DataFrame.Variable("..cihigh..", STAT, "cihigh") val SCALED = DataFrame.Variable("..scaled..", STAT, "scaled") @@ -93,6 +96,9 @@ object Stats { DF1, DF2, P, + CI_LEVEL, + CI_LOW, + CI_HIGH, ) val result = HashMap() diff --git a/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/regression/FDistribution.kt b/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/regression/FDistribution.kt index 42e5e051027..71a5ad075e9 100644 --- a/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/regression/FDistribution.kt +++ b/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/regression/FDistribution.kt @@ -71,4 +71,48 @@ internal class FDistribution( return Beta.regularizedBeta(z, d1 / 2.0, d2 / 2.0) } + + fun inverseCumulativeProbability(p: Double, absAccuracy: Double = 1e-9): Double { + if (p.isNaN() || p < 0.0 || p > 1.0) { + error("OutOfRange - p: $p") + } + if (p == 0.0) return 0.0 + if (p == 1.0) return Double.POSITIVE_INFINITY + + var lo = 0.0 + var hi = 1.0 + + // Expand upper bound until CDF(hi) >= p + while (true) { + val cdfHi = cumulativeProbability(hi) + if (!cdfHi.isFinite() || cdfHi >= p) break + + hi *= 2.0 + if (!hi.isFinite() || hi > 1e12) { + // F quantiles can be huge for extreme probs / dfs; return best effort bound + break + } + } + + // Binary search + repeat(200) { + val mid = (lo + hi) / 2.0 + val cdfMid = cumulativeProbability(mid) + + if (!cdfMid.isFinite()) { + hi = mid + } else if (cdfMid < p) { + lo = mid + } else { + hi = mid + } + + if ((hi - lo) <= absAccuracy * (1.0 + lo + hi)) { + return (lo + hi) / 2.0 + } + } + + return (lo + hi) / 2.0 + } + } \ No newline at end of file diff --git a/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/regression/FNoncentralityCI.kt b/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/regression/FNoncentralityCI.kt new file mode 100644 index 00000000000..34afdf1891f --- /dev/null +++ b/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/regression/FNoncentralityCI.kt @@ -0,0 +1,129 @@ +package org.jetbrains.letsPlot.core.plot.base.stat.regression + +internal data class NcpConfIntResult( + val estimate: Double, + val low: Double, + val high: Double +) + +internal object FNoncentralityCI { + + // Same mapping as confintr source (Smithson p. 38), visible in ci_f_ncp code: + // f_to_ncp <- function(f, df1, df2) { df1 * f * (df1 + df2 + 1) / df2 } + fun fToNcp(f: Double, df1: Double, df2: Double): Double { + if (!f.isFinite() || f < 0.0 || df1 <= 0.0 || df2 <= 0.0) return Double.NaN + return df1 * f * (df1 + df2 + 1.0) / df2 + } + + /** + * Two-sided CI for non-centrality parameter lambda of the F distribution. + * Mirrors confintr::ci_f_ncp logic (test inversion on noncentral F CDF). + * + * probs are lower/upper cumulative probs for the CI limits, e.g. (0.025, 0.975). + */ + fun ciFNoncentrality( + fStat: Double, + df1: Double, + df2: Double, + probsLow: Double, + probsHigh: Double, + absTol: Double = 1e-10 + ): NcpConfIntResult { + if (!fStat.isFinite() || fStat < 0.0 || df1 <= 0.0 || df2 <= 0.0) { + return NcpConfIntResult(Double.NaN, Double.NaN, Double.NaN) + } + if (!(probsLow in 0.0..1.0) || !(probsHigh in 0.0..1.0) || probsLow > probsHigh) { + return NcpConfIntResult(Double.NaN, Double.NaN, Double.NaN) + } + + val estimate = fToNcp(fStat, df1, df2) + if (!estimate.isFinite()) { + return NcpConfIntResult(Double.NaN, Double.NaN, Double.NaN) + } + + // confintr uses iprobs = 1 - probs + val targetLower = 1.0 - probsLow // for lower CI root + val targetUpper = 1.0 - probsHigh // for upper CI root + + val low = if (probsLow == 0.0) { + 0.0 + } else { + val fn: (Double) -> Double = { ncp -> + NoncentralF.cumulativeProbability(fStat, df1, df2, ncp) - targetLower + } + // R code searches interval [0, estimate] + bisectionRootOrNull(fn, 0.0, estimate.coerceAtLeast(0.0), absTol) ?: 0.0 + } + + val high = if (probsHigh == 1.0) { + Double.POSITIVE_INFINITY + } else { + val fn: (Double) -> Double = { ncp -> + NoncentralF.cumulativeProbability(fStat, df1, df2, ncp) - targetUpper + } + + // confintr upper heuristic: + // pmax(4 * estimate, stat * df1 * 4, df1 * 100) + var upper = maxOf(4.0 * estimate, fStat * df1 * 4.0, df1 * 100.0) + + // expand if needed until sign change + var root = bisectionRootOrNull(fn, estimate.coerceAtLeast(0.0), upper, absTol) + var tries = 0 + while (root == null && tries < 20 && upper.isFinite()) { + upper *= 2.0 + root = bisectionRootOrNull(fn, estimate.coerceAtLeast(0.0), upper, absTol) + tries++ + } + root ?: Double.POSITIVE_INFINITY + } + + return NcpConfIntResult(estimate, low, high) + } + + /** + * Monotone root search by bisection. Returns null if no sign change / invalid. + */ + private fun bisectionRootOrNull( + f: (Double) -> Double, + a0: Double, + b0: Double, + absTol: Double, + maxIter: Int = 200 + ): Double? { + var a = a0 + var b = b0 + if (!a.isFinite() || !b.isFinite() || a > b) return null + + var fa = f(a) + var fb = f(b) + if (!fa.isFinite() || !fb.isFinite()) return null + + // exact hits + if (fa == 0.0) return a + if (fb == 0.0) return b + + // need sign change + if (fa * fb > 0.0) return null + + repeat(maxIter) { + val m = 0.5 * (a + b) + val fm = f(m) + if (!fm.isFinite()) return null + + if (fm == 0.0) return m + if ((b - a) <= absTol * (1.0 + kotlin.math.abs(a) + kotlin.math.abs(b))) { + return 0.5 * (a + b) + } + + if (fa * fm <= 0.0) { + b = m + fb = fm + } else { + a = m + fa = fm + } + } + + return 0.5 * (a + b) + } +} diff --git a/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/regression/LinearRegression.kt b/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/regression/LinearRegression.kt index 4664c95b30f..a86b4c0a4ea 100644 --- a/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/regression/LinearRegression.kt +++ b/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/regression/LinearRegression.kt @@ -5,6 +5,8 @@ package org.jetbrains.letsPlot.core.plot.base.stat.regression +import org.jetbrains.letsPlot.core.plot.base.stat.regression.RsquaredCI.ciRsquaredLikeConfintrFromR2 + class LinearRegression private constructor ( n: Int, meanX: Double, @@ -16,8 +18,10 @@ class LinearRegression private constructor ( r2: Double, aic: Double, bic: Double, - fTest: RegressionEvaluator.Companion.FTestResult -) : RegressionEvaluator(n, meanX, sumXX, model, standardErrorOfEstimate, tCritical, eq, r2, aic, bic, fTest) { + fTest: RegressionEvaluator.Companion.FTestResult, + r2ConfInt: R2ConfIntResult + +) : RegressionEvaluator(n, meanX, sumXX, model, standardErrorOfEstimate, tCritical, eq, r2, aic, bic, fTest, r2ConfInt) { companion object { fun fit(xs: List, ys: List, confidenceLevel: Double): LinearRegression? { check(xs, ys, confidenceLevel) @@ -58,7 +62,8 @@ class LinearRegression private constructor ( r2, calcAic(n, rss, k), calcBic(n, rss, k), - calcOverallModelFTest(n, 2, r2) + calcOverallModelFTest(n, 2, r2), + ciRsquaredLikeConfintrFromR2(n, 2, r2, confidenceLevel) ) } } diff --git a/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/regression/LocalPolynomialRegression.kt b/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/regression/LocalPolynomialRegression.kt index e6ccb459038..84f4f841c0d 100644 --- a/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/regression/LocalPolynomialRegression.kt +++ b/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/regression/LocalPolynomialRegression.kt @@ -18,7 +18,8 @@ class LocalPolynomialRegression private constructor ( tCritical: Double, r2: Double, ) : RegressionEvaluator(n, meanX, sumXX, model, standardErrorOfEstimate, tCritical, emptyList(), r2, Double.NaN, Double.NaN, - FTestResult(Double.NaN, Double.NaN, Double.NaN, Double.NaN) + FTestResult(Double.NaN, Double.NaN, Double.NaN, Double.NaN), + R2ConfIntResult(Double.NaN, Double.NaN, Double.NaN) ) { companion object { fun fit(xs: List, ys: List, confidenceLevel: Double, bandwidth: Double): LocalPolynomialRegression? { diff --git a/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/regression/NoncentralF.kt b/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/regression/NoncentralF.kt new file mode 100644 index 00000000000..57190915e49 --- /dev/null +++ b/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/regression/NoncentralF.kt @@ -0,0 +1,159 @@ +package org.jetbrains.letsPlot.core.plot.base.stat.regression + +import org.jetbrains.letsPlot.core.plot.base.stat.math3.Beta +import kotlin.math.abs +import kotlin.math.exp +import kotlin.math.ln + +internal object NoncentralF { + + /** + * CDF of the non-central F distribution via Poisson mixture of central F distributions. + * + * P(F <= x) = sum_{j>=0} w_j * I_z(df1/2 + j, df2/2), + * where z = df1*x / (df1*x + df2), and w_j ~ Poisson(lambda/2). + * + * This is the standard representation and is the key ingredient needed to reproduce + * confintr::ci_f_ncp / ci_rsquared logic. + */ + fun cumulativeProbability( + x: Double, + df1: Double, + df2: Double, + ncp: Double, + eps: Double = 1e-12, + maxTerms: Int = 100000 + ): Double { + if (x.isNaN() || df1 <= 0.0 || df2 <= 0.0 || ncp < 0.0) return Double.NaN + if (x <= 0.0) return 0.0 + if (!x.isFinite()) return 1.0 + + val z = (df1 * x) / (df1 * x + df2) + if (!z.isFinite()) return Double.NaN + if (z <= 0.0) return 0.0 + if (z >= 1.0) return 1.0 + + val a0 = df1 / 2.0 + val b = df2 / 2.0 + val mu = ncp / 2.0 + + // Central F case + if (mu == 0.0) { + return Beta.regularizedBeta(z, a0, b).coerceIn(0.0, 1.0) + } + + // Poisson weights recurrence from j = 0 + // w0 = exp(-mu), w_{j+1} = w_j * mu / (j+1) + var w = exp(-mu) + if (w == 0.0) { + // For large mu, forward start underflows. Fallback to a center-start summation. + return cumulativeProbabilityCenterSummation(x, df1, df2, ncp, eps, maxTerms) + } + + var sum = 0.0 + var weightSum = 0.0 + var j = 0 + + while (j < maxTerms) { + val a = a0 + j + val termCdf = Beta.regularizedBeta(z, a, b) + val term = w * termCdf + + sum += term + weightSum += w + + // stop when remaining probability mass is tiny and terms are tiny + if (w < eps && (1.0 - weightSum) < 10 * eps) { + break + } + + j += 1 + w *= mu / j.toDouble() + + if (!w.isFinite()) return Double.NaN + if (w == 0.0 && (1.0 - weightSum) < 1e-8) break + } + + // Numerical guard + return sum.coerceIn(0.0, 1.0) + } + + /** + * More stable fallback for large ncp when exp(-mu) underflows. + * Start near the Poisson mode and sum both directions with recursive weights. + */ + private fun cumulativeProbabilityCenterSummation( + x: Double, + df1: Double, + df2: Double, + ncp: Double, + eps: Double, + maxTerms: Int + ): Double { + val z = (df1 * x) / (df1 * x + df2) + val a0 = df1 / 2.0 + val b = df2 / 2.0 + val mu = ncp / 2.0 + + val m = kotlin.math.floor(mu).toInt().coerceAtLeast(0) + + // log w_m = -mu + m ln(mu) - ln(m!) + var logFact = 0.0 + for (k in 2..m) logFact += ln(k.toDouble()) + var wM = exp(-mu + if (m == 0) 0.0 else m * ln(mu) - logFact) + + if (!wM.isFinite() || wM == 0.0) { + // If still underflowed, we are in an extreme numeric regime. + // Best effort fallback: return NaN. + return Double.NaN + } + + var sum = 0.0 + var weightAccum = 0.0 + + // Center term + run { + val cdfM = Beta.regularizedBeta(z, a0 + m, b) + sum += wM * cdfM + weightAccum += wM + } + + // Upward + var wUp = wM + var j = m + var upSteps = 0 + while (upSteps < maxTerms) { + j += 1 + wUp *= mu / j.toDouble() + if (!wUp.isFinite() || wUp <= 0.0) break + + val cdf = Beta.regularizedBeta(z, a0 + j, b) + sum += wUp * cdf + weightAccum += wUp + + upSteps++ + if (wUp < eps) break + } + + // Downward + var wDown = wM + j = m + var downSteps = 0 + while (j > 0 && downSteps < maxTerms) { + // w_{j-1} = w_j * j / mu + wDown *= j.toDouble() / mu + j -= 1 + if (!wDown.isFinite() || wDown <= 0.0) break + + val cdf = Beta.regularizedBeta(z, a0 + j, b) + sum += wDown * cdf + weightAccum += wDown + + downSteps++ + if (wDown < eps) break + } + + // Not all Poisson mass may be covered in extreme cases; still clamp. + return sum.coerceIn(0.0, 1.0) + } +} diff --git a/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/regression/PolynomialRegression.kt b/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/regression/PolynomialRegression.kt index b1bf51d2ad4..051c3dde830 100644 --- a/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/regression/PolynomialRegression.kt +++ b/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/regression/PolynomialRegression.kt @@ -8,6 +8,7 @@ package org.jetbrains.letsPlot.core.plot.base.stat.regression import org.jetbrains.letsPlot.core.plot.base.stat.math3.ForsythePolynomialGenerator import org.jetbrains.letsPlot.core.plot.base.stat.math3.PolynomialFunction import org.jetbrains.letsPlot.core.plot.base.stat.math3.times +import org.jetbrains.letsPlot.core.plot.base.stat.regression.RsquaredCI.ciRsquaredLikeConfintrFromR2 class PolynomialRegression private constructor ( n: Int, @@ -21,7 +22,8 @@ class PolynomialRegression private constructor ( aic: Double, bic: Double, fTest: RegressionEvaluator.Companion.FTestResult, -) : RegressionEvaluator(n, meanX, sumXX, model, standardErrorOfEstimate, tCritical, eq, r2, aic, bic, fTest) { + r2ConfInt: R2ConfIntResult, +) : RegressionEvaluator(n, meanX, sumXX, model, standardErrorOfEstimate, tCritical, eq, r2, aic, bic, fTest, r2ConfInt) { companion object { fun fit(xs: List, ys: List, confidenceLevel: Double, deg: Int): PolynomialRegression? { check(xs, ys, confidenceLevel) @@ -59,7 +61,8 @@ class PolynomialRegression private constructor ( r2, calcAic(n, rss, deg + 2), calcBic(n, rss, deg + 2), - calcOverallModelFTest(n, deg + 1, r2) + calcOverallModelFTest(n, deg + 1, r2), + ciRsquaredLikeConfintrFromR2(n, deg + 1, r2, confidenceLevel) ) } diff --git a/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/regression/RegressionEvaluator.kt b/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/regression/RegressionEvaluator.kt index 317408a3636..678fa53246f 100644 --- a/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/regression/RegressionEvaluator.kt +++ b/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/regression/RegressionEvaluator.kt @@ -5,7 +5,6 @@ package org.jetbrains.letsPlot.core.plot.base.stat.regression -import org.jetbrains.letsPlot.core.plot.base.stat.regression.FDistribution import org.jetbrains.letsPlot.core.stat.tQuantile import kotlin.math.PI import kotlin.math.pow @@ -22,7 +21,8 @@ abstract class RegressionEvaluator protected constructor( val r2: Double, val aic: Double, val bic: Double, - val fTest: FTestResult + val fTest: FTestResult, + val r2ConfInt: R2ConfIntResult ) { val adjR2: Double get() { @@ -211,6 +211,5 @@ abstract class RegressionEvaluator protected constructor( // Guard against tiny numerical drift outside [0, 1] return (1.0 - cdf).coerceIn(0.0, 1.0) } - } } diff --git a/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/regression/RsquaredCI.kt b/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/regression/RsquaredCI.kt new file mode 100644 index 00000000000..f73855b66d5 --- /dev/null +++ b/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/regression/RsquaredCI.kt @@ -0,0 +1,105 @@ +/* + * Copyright (c) 2026. JetBrains s.r.o. + * Use of this source code is governed by the MIT license that can be found in the LICENSE file. + */ + +package org.jetbrains.letsPlot.core.plot.base.stat.regression + +data class R2ConfIntResult( + val level: Double, + val low: Double, + val high: Double +) + +internal object RsquaredCI { + + // confintr::f_to_r2 + // f / (f + df2 / df1) + fun fToR2(f: Double, df1: Double, df2: Double): Double { + if (!f.isFinite() || f < 0.0 || df1 <= 0.0 || df2 <= 0.0) return Double.NaN + return (f / (f + df2 / df1)).coerceIn(0.0, 1.0) + } + + // confintr::ncp_to_r2 + // ncp / (ncp + df1 + df2 + 1) + fun ncpToR2(ncp: Double, df1: Double, df2: Double): Double { + if (ncp.isNaN() || ncp < 0.0 || df1 <= 0.0 || df2 <= 0.0) return Double.NaN + if (ncp == Double.POSITIVE_INFINITY) return 1.0 + return (ncp / (ncp + df1 + df2 + 1.0)).coerceIn(0.0, 1.0) + } + + /** + * CI for population R^2 using the same method as confintr::ci_rsquared(): + * test inversion for noncentral F NCP, then map NCP -> R^2. + */ + fun ciRsquaredLikeConfintr( + fStat: Double, + df1: Double, + df2: Double, + confidenceLevel: Double + ): R2ConfIntResult { + val level = confidenceLevel + if (!fStat.isFinite() || fStat < 0.0 || df1 <= 0.0 || df2 <= 0.0 || level <= 0.0 || level >= 1.0) { + return R2ConfIntResult(level, Double.NaN, Double.NaN) + } + + val alpha = 1.0 - level + val probsLow = alpha / 2.0 + val probsHigh = 1.0 - alpha / 2.0 + + val ncpCi = FNoncentralityCI.ciFNoncentrality( + fStat = fStat, + df1 = df1, + df2 = df2, + probsLow = probsLow, + probsHigh = probsHigh + ) + + val low = ncpToR2(ncpCi.low, df1, df2) + val high = ncpToR2(ncpCi.high, df1, df2) + + return R2ConfIntResult(level, low, high) + } + + /** + * Convenience wrapper from your regression summary fields. + * eqSize includes intercept, so p = eqSize - 1 = df1. + */ + fun ciRsquaredLikeConfintrFromR2( + n: Int, + eqSize: Int, + r2: Double, + confidenceLevel: Double + ): R2ConfIntResult { + if (n <= 0 || eqSize <= 0 || !r2.isFinite()) { + return R2ConfIntResult(confidenceLevel, Double.NaN, Double.NaN) + } + + val df1 = (eqSize - 1).toDouble() + val df2 = n - eqSize.toDouble() // same as n - p - 1 + + if (df1 <= 0.0 || df2 <= 0.0) { + return R2ConfIntResult(confidenceLevel, Double.NaN, Double.NaN) + } + + val r2c = r2.coerceIn(0.0, 1.0) + + // Convert observed R² -> observed F (same relationship used in confintr helpers) + val fStat = when { + r2c == 0.0 -> 0.0 + r2c == 1.0 -> Double.POSITIVE_INFINITY + else -> (r2c / (1.0 - r2c)) * (df2 / df1) + } + + if (fStat == Double.POSITIVE_INFINITY) { + return R2ConfIntResult(confidenceLevel, 1.0, 1.0) + } + + return ciRsquaredLikeConfintr( + fStat = fStat, + df1 = df1, + df2 = df2, + confidenceLevel = confidenceLevel + ) + } +} diff --git a/plot-stem/src/commonMain/kotlin/org/jetbrains/letsPlot/core/spec/back/PlotConfigBackend.kt b/plot-stem/src/commonMain/kotlin/org/jetbrains/letsPlot/core/spec/back/PlotConfigBackend.kt index da72b449d29..49ecf252658 100644 --- a/plot-stem/src/commonMain/kotlin/org/jetbrains/letsPlot/core/spec/back/PlotConfigBackend.kt +++ b/plot-stem/src/commonMain/kotlin/org/jetbrains/letsPlot/core/spec/back/PlotConfigBackend.kt @@ -300,6 +300,22 @@ open class PlotConfigBackend( companion object { + private val SMOOTH_STAT_VARS_TO_KEEP = listOf( + Stats.R2, + Stats.R2_ADJ, + Stats.N, + Stats.AIC, + Stats.BIC, + Stats.METHOD, + Stats.F, + Stats.DF1, + Stats.DF2, + Stats.P, + Stats.CI_LEVEL, + Stats.CI_LOW, + Stats.CI_HIGH + ) + private fun variablesToKeep(facets: PlotFacets, layerConfig: LayerConfig): Set { val stat = layerConfig.stat // keep all original vars @@ -334,7 +350,7 @@ open class PlotConfigBackend( varsToKeep.removeAll(notRenderedVars) varsToKeep.addAll(renderedVars) - varsToKeep.addAll(listOf(Stats.R2, Stats.R2_ADJ, Stats.N, Stats.AIC, Stats.BIC, Stats.METHOD, Stats.F, Stats.DF1, Stats.DF2, Stats.P)) + varsToKeep.addAll(SMOOTH_STAT_VARS_TO_KEEP) varsToKeep.addAll(layerConfig.ownData.variables().filter { it.label.contains("smooth_eq_coef_") }) return HashSet() + From 77cfadb749ca0923f0ab655fa8b526144af83d41 Mon Sep 17 00:00:00 2001 From: Ivan Seleznev Date: Fri, 27 Feb 2026 07:48:50 +0100 Subject: [PATCH 07/10] Refactor regression classes to include xVals and yVals parameters, and update statistical calculations in SmoothStatSummary --- .../core/plot/base/stat/SmoothStatSummary.kt | 33 +- .../plot/base/stat/SmoothStatSummaryUtil.kt | 516 ++++++++++++++++++ .../base/stat/regression/FDistribution.kt | 118 ---- .../base/stat/regression/FNoncentralityCI.kt | 129 ----- .../base/stat/regression/LinearRegression.kt | 25 +- .../regression/LocalPolynomialRegression.kt | 16 +- .../plot/base/stat/regression/NoncentralF.kt | 159 ------ .../stat/regression/PolynomialRegression.kt | 24 +- .../stat/regression/RegressionEvaluator.kt | 142 +---- .../plot/base/stat/regression/RsquaredCI.kt | 105 ---- 10 files changed, 562 insertions(+), 705 deletions(-) create mode 100644 plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/SmoothStatSummaryUtil.kt delete mode 100644 plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/regression/FDistribution.kt delete mode 100644 plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/regression/FNoncentralityCI.kt delete mode 100644 plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/regression/NoncentralF.kt delete mode 100644 plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/regression/RsquaredCI.kt diff --git a/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/SmoothStatSummary.kt b/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/SmoothStatSummary.kt index 615df532527..a83678cb31c 100644 --- a/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/SmoothStatSummary.kt +++ b/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/SmoothStatSummary.kt @@ -12,6 +12,13 @@ import org.jetbrains.letsPlot.core.plot.base.DataFrame.Variable.Source.STAT import org.jetbrains.letsPlot.core.plot.base.StatContext import org.jetbrains.letsPlot.core.plot.base.data.TransformVar import org.jetbrains.letsPlot.core.plot.base.stat.SmoothStat.Method +import org.jetbrains.letsPlot.core.plot.base.stat.SmoothStatSummaryUtil.calcAdjustedRSquared +import org.jetbrains.letsPlot.core.plot.base.stat.SmoothStatSummaryUtil.calcAic +import org.jetbrains.letsPlot.core.plot.base.stat.SmoothStatSummaryUtil.calcBic +import org.jetbrains.letsPlot.core.plot.base.stat.SmoothStatSummaryUtil.calcOverallModelFTest +import org.jetbrains.letsPlot.core.plot.base.stat.SmoothStatSummaryUtil.calcRSquared +import org.jetbrains.letsPlot.core.plot.base.stat.SmoothStatSummaryUtil.calcRss +import org.jetbrains.letsPlot.core.plot.base.stat.SmoothStatSummaryUtil.calcR2ConfInt import org.jetbrains.letsPlot.core.plot.base.stat.regression.LinearRegression import org.jetbrains.letsPlot.core.plot.base.stat.regression.LocalPolynomialRegression import org.jetbrains.letsPlot.core.plot.base.stat.regression.PolynomialRegression @@ -109,23 +116,27 @@ class SmoothStatSummary( ) } ?: return DataFrame.Builder.emptyFrame() + val r2 = calcRSquared(regression.xVals, regression.yVals, regression.model) + val rss = calcRss(regression.xVals, regression.yVals, regression.model) + val fTest = calcOverallModelFTest(regression.n, regression.eq.size, r2) + val r2ConfInt = calcR2ConfInt(regression.n, regression.eq.size, r2, confidenceLevel) val dfb = DataFrame.Builder() .put(Stats.X, listOf(0.0)) .put(Stats.Y, listOf(0.0)) - .put(Stats.R2, listOf(regression.r2)) - .put(Stats.R2_ADJ, listOf(regression.adjR2)) + .put(Stats.R2, listOf(r2)) + .put(Stats.R2_ADJ, listOf(calcAdjustedRSquared(regression.n, regression.eq.size, r2))) .put(Stats.N, listOf(regression.n)) .put(Stats.METHOD, listOf(smoothingMethodLabel(smoothingMethod))) - .put(Stats.AIC, listOf(regression.aic)) - .put(Stats.BIC, listOf(regression.bic)) - .put(Stats.F, listOf(regression.fTest.fValue)) - .put(Stats.DF1, listOf(regression.fTest.df1)) - .put(Stats.DF2, listOf(regression.fTest.df2)) - .put(Stats.P, listOf(regression.fTest.pValue)) - .put(Stats.CI_LEVEL, listOf(regression.r2ConfInt.level)) - .put(Stats.CI_LOW, listOf(regression.r2ConfInt.low)) - .put(Stats.CI_HIGH, listOf(regression.r2ConfInt.high)) + .put(Stats.AIC, listOf(calcAic(regression.n, rss, regression.eq.size))) + .put(Stats.BIC, listOf(calcBic(regression.n, rss, regression.eq.size))) + .put(Stats.F, listOf(fTest.fValue)) + .put(Stats.DF1, listOf(fTest.df1)) + .put(Stats.DF2, listOf(fTest.df2)) + .put(Stats.P, listOf(fTest.pValue)) + .put(Stats.CI_LEVEL, listOf(r2ConfInt.level)) + .put(Stats.CI_LOW, listOf(r2ConfInt.low)) + .put(Stats.CI_HIGH, listOf(r2ConfInt.high)) val vars = myVariables ?: initVariables(regression.eq.size) regression.eq.forEachIndexed { index, coef -> diff --git a/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/SmoothStatSummaryUtil.kt b/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/SmoothStatSummaryUtil.kt new file mode 100644 index 00000000000..818ccd0dd3c --- /dev/null +++ b/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/SmoothStatSummaryUtil.kt @@ -0,0 +1,516 @@ +/* + * Copyright (c) 2026. JetBrains s.r.o. + * Use of this source code is governed by the MIT license that can be found in the LICENSE file. + */ + +package org.jetbrains.letsPlot.core.plot.base.stat + +import org.jetbrains.letsPlot.core.plot.base.stat.math3.Beta +import kotlin.math.PI +import kotlin.math.exp +import kotlin.math.ln + +object SmoothStatSummaryUtil { + + data class FTestResult( + val fValue: Double, + val pValue: Double, + val df1: Double, + val df2: Double + ) + + data class R2ConfIntResult( + val level: Double, + val low: Double, + val high: Double + ) + + private data class NcpConfIntResult( + val estimate: Double, + val low: Double, + val high: Double + ) + + internal fun calcR2ConfInt( + n: Int, + eqSize: Int, + r2: Double, + confidenceLevel: Double + ): R2ConfIntResult { + if (n <= 0 || eqSize <= 0 || !r2.isFinite()) { + return R2ConfIntResult(confidenceLevel, Double.NaN, Double.NaN) + } + + val df1 = (eqSize - 1).toDouble() + val df2 = n - eqSize.toDouble() // same as n - p - 1 + + if (df1 <= 0.0 || df2 <= 0.0) { + return R2ConfIntResult(confidenceLevel, Double.NaN, Double.NaN) + } + + // Convert observed R² -> observed F (same relationship used in confintr helpers) + val fStat = when (val r2c = r2.coerceIn(0.0, 1.0)) { + 0.0 -> 0.0 + 1.0 -> Double.POSITIVE_INFINITY + else -> (r2c / (1.0 - r2c)) * (df2 / df1) + } + + if (fStat == Double.POSITIVE_INFINITY) { + return R2ConfIntResult(confidenceLevel, 1.0, 1.0) + } + + return ciRSquaredLikeConfIntR( + fStat = fStat, + df1 = df1, + df2 = df2, + confidenceLevel = confidenceLevel + ) + } + + internal fun calcRSquared( + xVals: DoubleArray, + yVals: DoubleArray, + model: (Double) -> Double + ): Double { + val meanY = yVals.average() + + var ssTot = 0.0 + var ssRes = 0.0 + + for (i in xVals.indices) { + val y = yVals[i] + val yHat = model(xVals[i]) + + val diffRes = y - yHat + ssRes += diffRes * diffRes + + val diffMean = y - meanY + ssTot += diffMean * diffMean + } + + return if (ssTot == 0.0) { + 0.0 + } else { + 1.0 - ssRes / ssTot + } + } + + internal fun calcAdjustedRSquared(n: Int, nCoef: Int, r2: Double): Double { + val predictorsCount = (nCoef - 1).coerceAtLeast(0) + if (n <= predictorsCount + 1 || r2.isNaN()) { + return Double.NaN + } + return 1.0 - (1.0 - r2) * ((n - 1.0) / (n - predictorsCount - 1.0)) + } + + internal fun calcRss(xVals: DoubleArray, yVals: DoubleArray, model: (Double) -> Double): Double { + var rss = 0.0 + for (i in xVals.indices) { + val e = yVals[i] - model(xVals[i]) + rss += e * e + } + return rss + } + + internal fun calcAic(n: Int, rss: Double, predictorsCount: Int): Double { + val k = predictorsCount + 1 + if (n <= 0 || k <= 0 || !rss.isFinite()) return Double.NaN + // Guard against log(0) in a perfect fit + val rssSafe = maxOf(rss, 1e-12) + return n * ln(rssSafe / n) + + n * (1.0 + ln(2.0 * PI)) + + 2.0 * k + } + + internal fun calcBic(n: Int, rss: Double, predictorsCount: Int): Double { + val k = predictorsCount + 1 + if (n <= 0 || k <= 0 || !rss.isFinite()) return Double.NaN + // Guard against log(0) in a perfect fit + val rssSafe = maxOf(rss, 1e-12) + return n * ln(rssSafe / n) + + n * (1.0 + ln(2.0 * PI)) + + k * ln(n.toDouble()) + } + + + internal fun calcOverallModelFTest( + nRaw: Int, + eqSizeRaw: Int, // includes intercept + r2Raw: Double + ): FTestResult { + val n = nRaw.toDouble() + val p = (eqSizeRaw - 1).toDouble() // predictors without intercept + + val df1 = p + val df2 = n - p - 1.0 + + // Invalid setup + if (!r2Raw.isFinite() || n <= 0.0 || eqSizeRaw <= 0 || df1 <= 0.0 || df2 <= 0.0) { + return FTestResult(Double.NaN, Double.NaN, df1, df2) + } + + // Clamp possible floating-point overshoots + val r2 = r2Raw.coerceIn(0.0, 1.0) + + // Edge cases + if (r2 == 0.0) { + return FTestResult(0.0, 1.0, df1, df2) + } + if (r2 == 1.0) { + return FTestResult(Double.POSITIVE_INFINITY, 0.0, df1, df2) + } + + val numerator = r2 / df1 + val denominator = (1.0 - r2) / df2 + + if (!numerator.isFinite() || !denominator.isFinite() || denominator <= 0.0) { + return FTestResult(Double.NaN, Double.NaN, df1, df2) + } + + val fValue = numerator / denominator + + if (!fValue.isFinite()) { + return if (fValue == Double.POSITIVE_INFINITY) { + FTestResult(Double.POSITIVE_INFINITY, 0.0, df1, df2) + } else { + FTestResult(Double.NaN, Double.NaN, df1, df2) + } + } + + val pValue = fTestPValueUpperTail(fValue, df1, df2) + return FTestResult(fValue, pValue, df1, df2) + } + + private fun fDistributionCdf(x: Double, df1: Double, df2: Double): Double { + if (x <= 0.0) return 0.0 + if (x.isNaN()) return Double.NaN + if (df1 <= 0.0) return Double.NaN + if (df2 <= 0.0) return Double.NaN + + // CDF relation: + // F(x; d1, d2) = I_{ d1*x / (d1*x + d2) }(d1/2, d2/2) + val z = (df1 * x) / (df1 * x + df2) + + return Beta.regularizedBeta(z, df1 / 2.0, df2 / 2.0) + } + + // Same mapping as confintr source (Smithson p. 38), visible in ci_f_ncp code: + // f_to_ncp <- function(f, df1, df2) { df1 * f * (df1 + df2 + 1) / df2 } + private fun fToNcp(f: Double, df1: Double, df2: Double): Double { + if (!f.isFinite() || f < 0.0 || df1 <= 0.0 || df2 <= 0.0) return Double.NaN + return df1 * f * (df1 + df2 + 1.0) / df2 + } + + /** + * Two-sided CI for non-centrality parameter lambda of the F distribution. + * Mirrors confintr::ci_f_ncp logic (test inversion on noncentral F CDF). + * + * probs are lower/upper cumulative probs for the CI limits, e.g. (0.025, 0.975). + */ + private fun ciFNoncentrality( + fStat: Double, + df1: Double, + df2: Double, + probsLow: Double, + probsHigh: Double, + absTol: Double = 1e-10 + ): NcpConfIntResult { + if (!fStat.isFinite() || fStat < 0.0 || df1 <= 0.0 || df2 <= 0.0) { + return NcpConfIntResult(Double.NaN, Double.NaN, Double.NaN) + } + if (probsLow !in 0.0..1.0 || probsHigh !in 0.0..1.0 || probsLow > probsHigh) { + return NcpConfIntResult(Double.NaN, Double.NaN, Double.NaN) + } + + val estimate = fToNcp(fStat, df1, df2) + if (!estimate.isFinite()) { + return NcpConfIntResult(Double.NaN, Double.NaN, Double.NaN) + } + + // confintr uses iprobs = 1 - probs + val targetLower = 1.0 - probsLow // for lower CI root + val targetUpper = 1.0 - probsHigh // for upper CI root + + val low = if (probsLow == 0.0) { + 0.0 + } else { + val fn: (Double) -> Double = { ncp -> + nonCentralFDistributionCDF(fStat, df1, df2, ncp) - targetLower + } + // R code searches interval [0, estimate] + bisectionRootOrNull(fn, 0.0, estimate.coerceAtLeast(0.0), absTol) ?: 0.0 + } + + val high = if (probsHigh == 1.0) { + Double.POSITIVE_INFINITY + } else { + val fn: (Double) -> Double = { ncp -> + nonCentralFDistributionCDF(fStat, df1, df2, ncp) - targetUpper + } + + // confintr upper heuristic: + // pmax(4 * estimate, stat * df1 * 4, df1 * 100) + var upper = maxOf(4.0 * estimate, fStat * df1 * 4.0, df1 * 100.0) + + // expand if needed until sign change + var root = bisectionRootOrNull(fn, estimate.coerceAtLeast(0.0), upper, absTol) + var tries = 0 + while (root == null && tries < 20 && upper.isFinite()) { + upper *= 2.0 + root = bisectionRootOrNull(fn, estimate.coerceAtLeast(0.0), upper, absTol) + tries++ + } + root ?: Double.POSITIVE_INFINITY + } + + return NcpConfIntResult(estimate, low, high) + } + + /** + * Monotone root search by bisection. Returns null if no sign change / invalid. + */ + private fun bisectionRootOrNull( + f: (Double) -> Double, + a0: Double, + b0: Double, + absTol: Double, + maxIter: Int = 200 + ): Double? { + var a = a0 + var b = b0 + if (!a.isFinite() || !b.isFinite() || a > b) return null + + var fa = f(a) + val fb = f(b) + if (!fa.isFinite() || !fb.isFinite()) return null + + // exact hits + if (fa == 0.0) return a + if (fb == 0.0) return b + + // need sign change + if (fa * fb > 0.0) return null + + repeat(maxIter) { + val m = 0.5 * (a + b) + val fm = f(m) + if (!fm.isFinite()) return null + + if (fm == 0.0) return m + if ((b - a) <= absTol * (1.0 + kotlin.math.abs(a) + kotlin.math.abs(b))) { + return 0.5 * (a + b) + } + + if (fa * fm <= 0.0) { + b = m + } else { + a = m + fa = fm + } + } + + return 0.5 * (a + b) + } + + /** + * CDF of the non-central F distribution via Poisson mixture of central F distributions. + * + * P(F <= x) = sum_{j>=0} w_j * I_z(df1/2 + j, df2/2), + * where z = df1*x / (df1*x + df2), and w_j ~ Poisson(lambda/2). + * + * This is the standard representation and is the key ingredient needed to reproduce + * confintr::ci_f_ncp / ci_rsquared logic. + */ + private fun nonCentralFDistributionCDF( + x: Double, + df1: Double, + df2: Double, + ncp: Double, + eps: Double = 1e-12, + maxTerms: Int = 100000 + ): Double { + if (x.isNaN() || df1 <= 0.0 || df2 <= 0.0 || ncp < 0.0) return Double.NaN + if (x <= 0.0) return 0.0 + if (!x.isFinite()) return 1.0 + + val z = (df1 * x) / (df1 * x + df2) + if (!z.isFinite()) return Double.NaN + if (z <= 0.0) return 0.0 + if (z >= 1.0) return 1.0 + + val a0 = df1 / 2.0 + val b = df2 / 2.0 + val mu = ncp / 2.0 + + // Central F case + if (mu == 0.0) { + return Beta.regularizedBeta(z, a0, b).coerceIn(0.0, 1.0) + } + + // Poisson weights recurrence from j = 0 + // w0 = exp(-mu), w_{j+1} = w_j * mu / (j+1) + var w = exp(-mu) + if (w == 0.0) { + // For large mu, forward start underflows. Fallback to a center-start summation. + return cumulativeProbabilityCenterSummation(x, df1, df2, ncp, eps, maxTerms) + } + + var sum = 0.0 + var weightSum = 0.0 + var j = 0 + + while (j < maxTerms) { + val a = a0 + j + val termCdf = Beta.regularizedBeta(z, a, b) + val term = w * termCdf + + sum += term + weightSum += w + + // stop when remaining probability mass is tiny and terms are tiny + if (w < eps && (1.0 - weightSum) < 10 * eps) { + break + } + + j += 1 + w *= mu / j.toDouble() + + if (!w.isFinite()) return Double.NaN + if (w == 0.0 && (1.0 - weightSum) < 1e-8) break + } + + // Numerical guard + return sum.coerceIn(0.0, 1.0) + } + + /** + * More stable fallback for large ncp when exp(-mu) underflows. + * Start near the Poisson mode and sum both directions with recursive weights. + */ + private fun cumulativeProbabilityCenterSummation( + x: Double, + df1: Double, + df2: Double, + ncp: Double, + eps: Double, + maxTerms: Int + ): Double { + val z = (df1 * x) / (df1 * x + df2) + val a0 = df1 / 2.0 + val b = df2 / 2.0 + val mu = ncp / 2.0 + + val m = kotlin.math.floor(mu).toInt().coerceAtLeast(0) + + // log w_m = -mu + m ln(mu) - ln(m!) + var logFact = 0.0 + for (k in 2..m) logFact += ln(k.toDouble()) + val wM = exp(-mu + if (m == 0) 0.0 else m * ln(mu) - logFact) + + if (!wM.isFinite() || wM == 0.0) { + // If still under flowed, we are in an extreme numeric regime. + // Best effort fallback: return NaN. + return Double.NaN + } + + var sum = 0.0 + var weightAccum = 0.0 + + // Center term + run { + val cdfM = Beta.regularizedBeta(z, a0 + m, b) + sum += wM * cdfM + weightAccum += wM + } + + // Upward + var wUp = wM + var j = m + var upSteps = 0 + while (upSteps < maxTerms) { + j += 1 + wUp *= mu / j.toDouble() + if (!wUp.isFinite() || wUp <= 0.0) break + + val cdf = Beta.regularizedBeta(z, a0 + j, b) + sum += wUp * cdf + weightAccum += wUp + + upSteps++ + if (wUp < eps) break + } + + // Downward + var wDown = wM + j = m + var downSteps = 0 + while (j > 0 && downSteps < maxTerms) { + // w_{j-1} = w_j * j / mu + wDown *= j.toDouble() / mu + j -= 1 + if (!wDown.isFinite() || wDown <= 0.0) break + + val cdf = Beta.regularizedBeta(z, a0 + j, b) + sum += wDown * cdf + weightAccum += wDown + + downSteps++ + if (wDown < eps) break + } + + // Not all Poisson mass may be covered in extreme cases; still clamp. + return sum.coerceIn(0.0, 1.0) + } + + // confintr::ncp_to_r2 + // ncp / (ncp + df1 + df2 + 1) + private fun ncpToR2(ncp: Double, df1: Double, df2: Double): Double { + if (ncp.isNaN() || ncp < 0.0 || df1 <= 0.0 || df2 <= 0.0) return Double.NaN + if (ncp == Double.POSITIVE_INFINITY) return 1.0 + return (ncp / (ncp + df1 + df2 + 1.0)).coerceIn(0.0, 1.0) + } + + /** + * CI for population R^2 using the same method as confintr::ci_rsquared(): + * test inversion for noncentral F NCP, then map NCP -> R^2. + */ + private fun ciRSquaredLikeConfIntR( + fStat: Double, + df1: Double, + df2: Double, + confidenceLevel: Double + ): R2ConfIntResult { + if (!fStat.isFinite() || fStat < 0.0 || df1 <= 0.0 || df2 <= 0.0 || confidenceLevel <= 0.0 || confidenceLevel >= 1.0) { + return R2ConfIntResult(confidenceLevel, Double.NaN, Double.NaN) + } + + val alpha = 1.0 - confidenceLevel + val probsLow = alpha / 2.0 + val probsHigh = 1.0 - alpha / 2.0 + + val ncpCi = ciFNoncentrality( + fStat = fStat, + df1 = df1, + df2 = df2, + probsLow = probsLow, + probsHigh = probsHigh + ) + + val low = ncpToR2(ncpCi.low, df1, df2) + val high = ncpToR2(ncpCi.high, df1, df2) + + return R2ConfIntResult(confidenceLevel, low, high) + } + + private fun fTestPValueUpperTail(fValue: Double, df1: Double, df2: Double): Double { + if (!fValue.isFinite() || df1 <= 0.0 || df2 <= 0.0) return Double.NaN + + if (fValue < 0.0) return Double.NaN + if (fValue == Double.POSITIVE_INFINITY) return 0.0 + + val cdf = fDistributionCdf(fValue, df1, df2) + + // Guard against tiny numerical drift outside [0, 1] + return (1.0 - cdf).coerceIn(0.0, 1.0) + } +} \ No newline at end of file diff --git a/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/regression/FDistribution.kt b/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/regression/FDistribution.kt deleted file mode 100644 index 71a5ad075e9..00000000000 --- a/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/regression/FDistribution.kt +++ /dev/null @@ -1,118 +0,0 @@ -/* - * Copyright (c) 2026. JetBrains s.r.o. - * Use of this source code is governed by the MIT license that can be found in the LICENSE file. - */ - -package org.jetbrains.letsPlot.core.plot.base.stat.regression - -import org.jetbrains.letsPlot.core.plot.base.stat.math3.Beta -import kotlin.math.exp -import kotlin.math.ln - -/** - * Implementation of Fisher-Snedecor F-distribution. - * - * @param numeratorDegreesOfFreedom numerator degrees of freedom (df1), must be > 0 - * @param denominatorDegreesOfFreedom denominator degrees of freedom (df2), must be > 0 - */ -internal class FDistribution( - private val numeratorDegreesOfFreedom: Double, - private val denominatorDegreesOfFreedom: Double -) { - - init { - if (numeratorDegreesOfFreedom <= 0.0) { - error("NotStrictlyPositive - NUMERATOR_DEGREES_OF_FREEDOM: $numeratorDegreesOfFreedom") - } - if (denominatorDegreesOfFreedom <= 0.0) { - error("NotStrictlyPositive - DENOMINATOR_DEGREES_OF_FREEDOM: $denominatorDegreesOfFreedom") - } - } - - fun density(x: Double): Double { - if (x < 0.0) return 0.0 - if (x == 0.0) { - // density at 0 can be finite/infinite depending on df1; returning exact handling is optional - return when { - numeratorDegreesOfFreedom < 2.0 -> Double.POSITIVE_INFINITY - numeratorDegreesOfFreedom == 2.0 -> 1.0 - else -> 0.0 - } - } - - val d1 = numeratorDegreesOfFreedom - val d2 = denominatorDegreesOfFreedom - - // log-density for numerical stability: - // f(x) = (d1/d2)^(d1/2) * x^(d1/2 - 1) / B(d1/2, d2/2) * (1 + d1*x/d2)^(-(d1+d2)/2) - val nhalf = d1 / 2.0 - val mhalf = d2 / 2.0 - - val logNumerator = - nhalf * ln(d1 / d2) + - (nhalf - 1.0) * ln(x) - - Beta.logBeta(nhalf, mhalf) - - val logDenominatorPart = ((d1 + d2) / 2.0) * ln(1.0 + (d1 / d2) * x) - - return exp(logNumerator - logDenominatorPart) - } - - fun cumulativeProbability(x: Double): Double { - if (x <= 0.0) return 0.0 - if (x.isNaN()) return Double.NaN - - val d1 = numeratorDegreesOfFreedom - val d2 = denominatorDegreesOfFreedom - - // CDF relation: - // F(x; d1, d2) = I_{ d1*x / (d1*x + d2) }(d1/2, d2/2) - val z = (d1 * x) / (d1 * x + d2) - - return Beta.regularizedBeta(z, d1 / 2.0, d2 / 2.0) - } - - fun inverseCumulativeProbability(p: Double, absAccuracy: Double = 1e-9): Double { - if (p.isNaN() || p < 0.0 || p > 1.0) { - error("OutOfRange - p: $p") - } - if (p == 0.0) return 0.0 - if (p == 1.0) return Double.POSITIVE_INFINITY - - var lo = 0.0 - var hi = 1.0 - - // Expand upper bound until CDF(hi) >= p - while (true) { - val cdfHi = cumulativeProbability(hi) - if (!cdfHi.isFinite() || cdfHi >= p) break - - hi *= 2.0 - if (!hi.isFinite() || hi > 1e12) { - // F quantiles can be huge for extreme probs / dfs; return best effort bound - break - } - } - - // Binary search - repeat(200) { - val mid = (lo + hi) / 2.0 - val cdfMid = cumulativeProbability(mid) - - if (!cdfMid.isFinite()) { - hi = mid - } else if (cdfMid < p) { - lo = mid - } else { - hi = mid - } - - if ((hi - lo) <= absAccuracy * (1.0 + lo + hi)) { - return (lo + hi) / 2.0 - } - } - - return (lo + hi) / 2.0 - } - -} \ No newline at end of file diff --git a/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/regression/FNoncentralityCI.kt b/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/regression/FNoncentralityCI.kt deleted file mode 100644 index 34afdf1891f..00000000000 --- a/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/regression/FNoncentralityCI.kt +++ /dev/null @@ -1,129 +0,0 @@ -package org.jetbrains.letsPlot.core.plot.base.stat.regression - -internal data class NcpConfIntResult( - val estimate: Double, - val low: Double, - val high: Double -) - -internal object FNoncentralityCI { - - // Same mapping as confintr source (Smithson p. 38), visible in ci_f_ncp code: - // f_to_ncp <- function(f, df1, df2) { df1 * f * (df1 + df2 + 1) / df2 } - fun fToNcp(f: Double, df1: Double, df2: Double): Double { - if (!f.isFinite() || f < 0.0 || df1 <= 0.0 || df2 <= 0.0) return Double.NaN - return df1 * f * (df1 + df2 + 1.0) / df2 - } - - /** - * Two-sided CI for non-centrality parameter lambda of the F distribution. - * Mirrors confintr::ci_f_ncp logic (test inversion on noncentral F CDF). - * - * probs are lower/upper cumulative probs for the CI limits, e.g. (0.025, 0.975). - */ - fun ciFNoncentrality( - fStat: Double, - df1: Double, - df2: Double, - probsLow: Double, - probsHigh: Double, - absTol: Double = 1e-10 - ): NcpConfIntResult { - if (!fStat.isFinite() || fStat < 0.0 || df1 <= 0.0 || df2 <= 0.0) { - return NcpConfIntResult(Double.NaN, Double.NaN, Double.NaN) - } - if (!(probsLow in 0.0..1.0) || !(probsHigh in 0.0..1.0) || probsLow > probsHigh) { - return NcpConfIntResult(Double.NaN, Double.NaN, Double.NaN) - } - - val estimate = fToNcp(fStat, df1, df2) - if (!estimate.isFinite()) { - return NcpConfIntResult(Double.NaN, Double.NaN, Double.NaN) - } - - // confintr uses iprobs = 1 - probs - val targetLower = 1.0 - probsLow // for lower CI root - val targetUpper = 1.0 - probsHigh // for upper CI root - - val low = if (probsLow == 0.0) { - 0.0 - } else { - val fn: (Double) -> Double = { ncp -> - NoncentralF.cumulativeProbability(fStat, df1, df2, ncp) - targetLower - } - // R code searches interval [0, estimate] - bisectionRootOrNull(fn, 0.0, estimate.coerceAtLeast(0.0), absTol) ?: 0.0 - } - - val high = if (probsHigh == 1.0) { - Double.POSITIVE_INFINITY - } else { - val fn: (Double) -> Double = { ncp -> - NoncentralF.cumulativeProbability(fStat, df1, df2, ncp) - targetUpper - } - - // confintr upper heuristic: - // pmax(4 * estimate, stat * df1 * 4, df1 * 100) - var upper = maxOf(4.0 * estimate, fStat * df1 * 4.0, df1 * 100.0) - - // expand if needed until sign change - var root = bisectionRootOrNull(fn, estimate.coerceAtLeast(0.0), upper, absTol) - var tries = 0 - while (root == null && tries < 20 && upper.isFinite()) { - upper *= 2.0 - root = bisectionRootOrNull(fn, estimate.coerceAtLeast(0.0), upper, absTol) - tries++ - } - root ?: Double.POSITIVE_INFINITY - } - - return NcpConfIntResult(estimate, low, high) - } - - /** - * Monotone root search by bisection. Returns null if no sign change / invalid. - */ - private fun bisectionRootOrNull( - f: (Double) -> Double, - a0: Double, - b0: Double, - absTol: Double, - maxIter: Int = 200 - ): Double? { - var a = a0 - var b = b0 - if (!a.isFinite() || !b.isFinite() || a > b) return null - - var fa = f(a) - var fb = f(b) - if (!fa.isFinite() || !fb.isFinite()) return null - - // exact hits - if (fa == 0.0) return a - if (fb == 0.0) return b - - // need sign change - if (fa * fb > 0.0) return null - - repeat(maxIter) { - val m = 0.5 * (a + b) - val fm = f(m) - if (!fm.isFinite()) return null - - if (fm == 0.0) return m - if ((b - a) <= absTol * (1.0 + kotlin.math.abs(a) + kotlin.math.abs(b))) { - return 0.5 * (a + b) - } - - if (fa * fm <= 0.0) { - b = m - fb = fm - } else { - a = m - fa = fm - } - } - - return 0.5 * (a + b) - } -} diff --git a/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/regression/LinearRegression.kt b/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/regression/LinearRegression.kt index a86b4c0a4ea..57ad21006b9 100644 --- a/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/regression/LinearRegression.kt +++ b/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/regression/LinearRegression.kt @@ -5,23 +5,17 @@ package org.jetbrains.letsPlot.core.plot.base.stat.regression -import org.jetbrains.letsPlot.core.plot.base.stat.regression.RsquaredCI.ciRsquaredLikeConfintrFromR2 - class LinearRegression private constructor ( n: Int, meanX: Double, sumXX: Double, model: (Double) -> Double, + xVals: DoubleArray, + yVals: DoubleArray, standardErrorOfEstimate: Double, tCritical: Double, eq: List, - r2: Double, - aic: Double, - bic: Double, - fTest: RegressionEvaluator.Companion.FTestResult, - r2ConfInt: R2ConfIntResult - -) : RegressionEvaluator(n, meanX, sumXX, model, standardErrorOfEstimate, tCritical, eq, r2, aic, bic, fTest, r2ConfInt) { +) : RegressionEvaluator(n, meanX, sumXX, model, xVals, yVals, standardErrorOfEstimate, tCritical, eq) { companion object { fun fit(xs: List, ys: List, confidenceLevel: Double): LinearRegression? { check(xs, ys, confidenceLevel) @@ -47,23 +41,16 @@ class LinearRegression private constructor ( val intercept = meanY - slope * meanX val model: (Double) -> Double = { x -> slope * x + intercept } - val k = 3 // number of predictors (slope, intercept, sigma^2) - val rss = calcRss(xVals, yVals, model) - val r2 = calcRSquared(xVals, yVals, model) - return LinearRegression( n, meanX, sumXX, model, + xVals, + yVals, calcStandardErrorOfEstimate(xVals, yVals, model, degreesOfFreedom), calcTCritical(degreesOfFreedom, confidenceLevel), - listOf(intercept, slope), - r2, - calcAic(n, rss, k), - calcBic(n, rss, k), - calcOverallModelFTest(n, 2, r2), - ciRsquaredLikeConfintrFromR2(n, 2, r2, confidenceLevel) + listOf(intercept, slope) ) } } diff --git a/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/regression/LocalPolynomialRegression.kt b/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/regression/LocalPolynomialRegression.kt index 84f4f841c0d..aaf2b5ca6bb 100644 --- a/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/regression/LocalPolynomialRegression.kt +++ b/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/regression/LocalPolynomialRegression.kt @@ -7,20 +7,17 @@ package org.jetbrains.letsPlot.core.plot.base.stat.regression import org.jetbrains.letsPlot.core.plot.base.stat.math3.LoessInterpolator import org.jetbrains.letsPlot.core.plot.base.stat.math3.PolynomialSplineFunction -import org.jetbrains.letsPlot.core.plot.base.stat.regression.RegressionEvaluator.Companion.FTestResult class LocalPolynomialRegression private constructor ( n: Int, meanX: Double, sumXX: Double, model: (Double) -> Double, + xVals: DoubleArray, + yVals: DoubleArray, standardErrorOfEstimate: Double, - tCritical: Double, - r2: Double, -) : RegressionEvaluator(n, meanX, sumXX, model, standardErrorOfEstimate, tCritical, emptyList(), r2, Double.NaN, Double.NaN, - FTestResult(Double.NaN, Double.NaN, Double.NaN, Double.NaN), - R2ConfIntResult(Double.NaN, Double.NaN, Double.NaN) -) { + tCritical: Double +) : RegressionEvaluator(n, meanX, sumXX, model, xVals, yVals, standardErrorOfEstimate, tCritical, emptyList()) { companion object { fun fit(xs: List, ys: List, confidenceLevel: Double, bandwidth: Double): LocalPolynomialRegression? { check(xs, ys, confidenceLevel) @@ -48,9 +45,10 @@ class LocalPolynomialRegression private constructor ( meanX, sumXX, model, + xVals, + yVals, calcStandardErrorOfEstimate(xVals, yVals, model, degreesOfFreedom), - calcTCritical(degreesOfFreedom, confidenceLevel), - calcRSquared(xVals, yVals, model), + calcTCritical(degreesOfFreedom, confidenceLevel) ) } diff --git a/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/regression/NoncentralF.kt b/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/regression/NoncentralF.kt deleted file mode 100644 index 57190915e49..00000000000 --- a/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/regression/NoncentralF.kt +++ /dev/null @@ -1,159 +0,0 @@ -package org.jetbrains.letsPlot.core.plot.base.stat.regression - -import org.jetbrains.letsPlot.core.plot.base.stat.math3.Beta -import kotlin.math.abs -import kotlin.math.exp -import kotlin.math.ln - -internal object NoncentralF { - - /** - * CDF of the non-central F distribution via Poisson mixture of central F distributions. - * - * P(F <= x) = sum_{j>=0} w_j * I_z(df1/2 + j, df2/2), - * where z = df1*x / (df1*x + df2), and w_j ~ Poisson(lambda/2). - * - * This is the standard representation and is the key ingredient needed to reproduce - * confintr::ci_f_ncp / ci_rsquared logic. - */ - fun cumulativeProbability( - x: Double, - df1: Double, - df2: Double, - ncp: Double, - eps: Double = 1e-12, - maxTerms: Int = 100000 - ): Double { - if (x.isNaN() || df1 <= 0.0 || df2 <= 0.0 || ncp < 0.0) return Double.NaN - if (x <= 0.0) return 0.0 - if (!x.isFinite()) return 1.0 - - val z = (df1 * x) / (df1 * x + df2) - if (!z.isFinite()) return Double.NaN - if (z <= 0.0) return 0.0 - if (z >= 1.0) return 1.0 - - val a0 = df1 / 2.0 - val b = df2 / 2.0 - val mu = ncp / 2.0 - - // Central F case - if (mu == 0.0) { - return Beta.regularizedBeta(z, a0, b).coerceIn(0.0, 1.0) - } - - // Poisson weights recurrence from j = 0 - // w0 = exp(-mu), w_{j+1} = w_j * mu / (j+1) - var w = exp(-mu) - if (w == 0.0) { - // For large mu, forward start underflows. Fallback to a center-start summation. - return cumulativeProbabilityCenterSummation(x, df1, df2, ncp, eps, maxTerms) - } - - var sum = 0.0 - var weightSum = 0.0 - var j = 0 - - while (j < maxTerms) { - val a = a0 + j - val termCdf = Beta.regularizedBeta(z, a, b) - val term = w * termCdf - - sum += term - weightSum += w - - // stop when remaining probability mass is tiny and terms are tiny - if (w < eps && (1.0 - weightSum) < 10 * eps) { - break - } - - j += 1 - w *= mu / j.toDouble() - - if (!w.isFinite()) return Double.NaN - if (w == 0.0 && (1.0 - weightSum) < 1e-8) break - } - - // Numerical guard - return sum.coerceIn(0.0, 1.0) - } - - /** - * More stable fallback for large ncp when exp(-mu) underflows. - * Start near the Poisson mode and sum both directions with recursive weights. - */ - private fun cumulativeProbabilityCenterSummation( - x: Double, - df1: Double, - df2: Double, - ncp: Double, - eps: Double, - maxTerms: Int - ): Double { - val z = (df1 * x) / (df1 * x + df2) - val a0 = df1 / 2.0 - val b = df2 / 2.0 - val mu = ncp / 2.0 - - val m = kotlin.math.floor(mu).toInt().coerceAtLeast(0) - - // log w_m = -mu + m ln(mu) - ln(m!) - var logFact = 0.0 - for (k in 2..m) logFact += ln(k.toDouble()) - var wM = exp(-mu + if (m == 0) 0.0 else m * ln(mu) - logFact) - - if (!wM.isFinite() || wM == 0.0) { - // If still underflowed, we are in an extreme numeric regime. - // Best effort fallback: return NaN. - return Double.NaN - } - - var sum = 0.0 - var weightAccum = 0.0 - - // Center term - run { - val cdfM = Beta.regularizedBeta(z, a0 + m, b) - sum += wM * cdfM - weightAccum += wM - } - - // Upward - var wUp = wM - var j = m - var upSteps = 0 - while (upSteps < maxTerms) { - j += 1 - wUp *= mu / j.toDouble() - if (!wUp.isFinite() || wUp <= 0.0) break - - val cdf = Beta.regularizedBeta(z, a0 + j, b) - sum += wUp * cdf - weightAccum += wUp - - upSteps++ - if (wUp < eps) break - } - - // Downward - var wDown = wM - j = m - var downSteps = 0 - while (j > 0 && downSteps < maxTerms) { - // w_{j-1} = w_j * j / mu - wDown *= j.toDouble() / mu - j -= 1 - if (!wDown.isFinite() || wDown <= 0.0) break - - val cdf = Beta.regularizedBeta(z, a0 + j, b) - sum += wDown * cdf - weightAccum += wDown - - downSteps++ - if (wDown < eps) break - } - - // Not all Poisson mass may be covered in extreme cases; still clamp. - return sum.coerceIn(0.0, 1.0) - } -} diff --git a/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/regression/PolynomialRegression.kt b/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/regression/PolynomialRegression.kt index 051c3dde830..391bd334b52 100644 --- a/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/regression/PolynomialRegression.kt +++ b/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/regression/PolynomialRegression.kt @@ -8,22 +8,18 @@ package org.jetbrains.letsPlot.core.plot.base.stat.regression import org.jetbrains.letsPlot.core.plot.base.stat.math3.ForsythePolynomialGenerator import org.jetbrains.letsPlot.core.plot.base.stat.math3.PolynomialFunction import org.jetbrains.letsPlot.core.plot.base.stat.math3.times -import org.jetbrains.letsPlot.core.plot.base.stat.regression.RsquaredCI.ciRsquaredLikeConfintrFromR2 class PolynomialRegression private constructor ( n: Int, meanX: Double, sumXX: Double, model: (Double) -> Double, + xVals: DoubleArray, + yVals: DoubleArray, standardErrorOfEstimate: Double, tCritical: Double, - eq: List, - r2: Double, - aic: Double, - bic: Double, - fTest: RegressionEvaluator.Companion.FTestResult, - r2ConfInt: R2ConfIntResult, -) : RegressionEvaluator(n, meanX, sumXX, model, standardErrorOfEstimate, tCritical, eq, r2, aic, bic, fTest, r2ConfInt) { + eq: List +) : RegressionEvaluator(n, meanX, sumXX, model, xVals, yVals, standardErrorOfEstimate, tCritical, eq) { companion object { fun fit(xs: List, ys: List, confidenceLevel: Double, deg: Int): PolynomialRegression? { check(xs, ys, confidenceLevel) @@ -47,22 +43,16 @@ class PolynomialRegression private constructor ( val polynomial = calculatePolynomial(deg, xVals, yVals) val model: (Double) -> Double = { x -> polynomial.value(x) } - val r2 = calcRSquared(xVals, yVals, model) - val rss = calcRss(xVals, yVals, model) - return PolynomialRegression( n, meanX, sumXX, model, + xVals, + yVals, calcStandardErrorOfEstimate(xVals, yVals, model, degreesOfFreedom), calcTCritical(degreesOfFreedom, confidenceLevel), - polynomial.getCoefficients(), - r2, - calcAic(n, rss, deg + 2), - calcBic(n, rss, deg + 2), - calcOverallModelFTest(n, deg + 1, r2), - ciRsquaredLikeConfintrFromR2(n, deg + 1, r2, confidenceLevel) + polynomial.getCoefficients() ) } diff --git a/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/regression/RegressionEvaluator.kt b/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/regression/RegressionEvaluator.kt index 678fa53246f..e1cc7eb0cdc 100644 --- a/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/regression/RegressionEvaluator.kt +++ b/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/regression/RegressionEvaluator.kt @@ -6,7 +6,6 @@ package org.jetbrains.letsPlot.core.plot.base.stat.regression import org.jetbrains.letsPlot.core.stat.tQuantile -import kotlin.math.PI import kotlin.math.pow import kotlin.math.sqrt @@ -14,24 +13,13 @@ abstract class RegressionEvaluator protected constructor( val n: Int, private val meanX: Double, private val sumXX: Double, - private val model: (Double) -> Double, + val model: (Double) -> Double, + val xVals: DoubleArray, + val yVals: DoubleArray, private val standardErrorOfEstimate: Double, private val tCritical: Double, - val eq: List, - val r2: Double, - val aic: Double, - val bic: Double, - val fTest: FTestResult, - val r2ConfInt: R2ConfIntResult + val eq: List ) { - val adjR2: Double - get() { - val predictorsCount = (eq.size - 1).coerceAtLeast(0) - if (n <= predictorsCount + 1 || r2.isNaN()) { - return Double.NaN - } - return 1.0 - (1.0 - r2) * ((n - 1.0) / (n - predictorsCount - 1.0)) - } fun value(x: Double): Double { return model(x) @@ -89,127 +77,5 @@ abstract class RegressionEvaluator protected constructor( Double.NaN } } - - fun calcRSquared( - xVals: DoubleArray, - yVals: DoubleArray, - model: (Double) -> Double - ): Double { - val meanY = yVals.average() - - var ssTot = 0.0 - var ssRes = 0.0 - - for (i in xVals.indices) { - val y = yVals[i] - val yHat = model(xVals[i]) - - val diffRes = y - yHat - ssRes += diffRes * diffRes - - val diffMean = y - meanY - ssTot += diffMean * diffMean - } - - return if (ssTot == 0.0) { - 0.0 - } else { - 1.0 - ssRes / ssTot - } - } - - fun calcRss(xVals: DoubleArray, yVals: DoubleArray, model: (Double) -> Double): Double { - var rss = 0.0 - for (i in xVals.indices) { - val e = yVals[i] - model(xVals[i]) - rss += e * e - } - return rss - } - - fun calcAic(n: Int, rss: Double, k: Int): Double { - if (n <= 0 || k <= 0 || !rss.isFinite()) return Double.NaN - // Guard against log(0) in a perfect fit - val rssSafe = maxOf(rss, 1e-12) - return n * kotlin.math.ln(rssSafe / n) + - n * (1.0 + kotlin.math.ln(2.0 * PI)) + - 2.0 * k - } - - fun calcBic(n: Int, rss: Double, k: Int): Double { - if (n <= 0 || k <= 0 || !rss.isFinite()) return Double.NaN - // Guard against log(0) in a perfect fit - val rssSafe = maxOf(rss, 1e-12) - return n * kotlin.math.ln(rssSafe / n) + - n * (1.0 + kotlin.math.ln(2.0 * PI)) + - k * kotlin.math.ln(n.toDouble()) - } - - data class FTestResult( - val fValue: Double, - val pValue: Double, - val df1: Double, - val df2: Double - ) - - internal fun calcOverallModelFTest( - nRaw: Int, - eqSizeRaw: Int, // includes intercept - r2Raw: Double - ): FTestResult { - val n = nRaw.toDouble() - val p = (eqSizeRaw - 1).toDouble() // predictors without intercept - - val df1 = p - val df2 = n - p - 1.0 - - // Invalid setup - if (!r2Raw.isFinite() || n <= 0.0 || eqSizeRaw <= 0 || df1 <= 0.0 || df2 <= 0.0) { - return FTestResult(Double.NaN, Double.NaN, df1, df2) - } - - // Clamp possible floating-point overshoots - val r2 = r2Raw.coerceIn(0.0, 1.0) - - // Edge cases - if (r2 == 0.0) { - return FTestResult(0.0, 1.0, df1, df2) - } - if (r2 == 1.0) { - return FTestResult(Double.POSITIVE_INFINITY, 0.0, df1, df2) - } - - val numerator = r2 / df1 - val denominator = (1.0 - r2) / df2 - - if (!numerator.isFinite() || !denominator.isFinite() || denominator <= 0.0) { - return FTestResult(Double.NaN, Double.NaN, df1, df2) - } - - val fValue = numerator / denominator - - if (!fValue.isFinite()) { - return if (fValue == Double.POSITIVE_INFINITY) { - FTestResult(Double.POSITIVE_INFINITY, 0.0, df1, df2) - } else { - FTestResult(Double.NaN, Double.NaN, df1, df2) - } - } - - val pValue = fTestPValueUpperTail(fValue, df1, df2) - return FTestResult(fValue, pValue, df1, df2) - } - - internal fun fTestPValueUpperTail(fValue: Double, df1: Double, df2: Double): Double { - if (!fValue.isFinite() || df1 <= 0.0 || df2 <= 0.0) return Double.NaN - - if (fValue < 0.0) return Double.NaN - if (fValue == Double.POSITIVE_INFINITY) return 0.0 - - val cdf = FDistribution(df1, df2).cumulativeProbability(fValue) - - // Guard against tiny numerical drift outside [0, 1] - return (1.0 - cdf).coerceIn(0.0, 1.0) - } } } diff --git a/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/regression/RsquaredCI.kt b/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/regression/RsquaredCI.kt deleted file mode 100644 index f73855b66d5..00000000000 --- a/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/regression/RsquaredCI.kt +++ /dev/null @@ -1,105 +0,0 @@ -/* - * Copyright (c) 2026. JetBrains s.r.o. - * Use of this source code is governed by the MIT license that can be found in the LICENSE file. - */ - -package org.jetbrains.letsPlot.core.plot.base.stat.regression - -data class R2ConfIntResult( - val level: Double, - val low: Double, - val high: Double -) - -internal object RsquaredCI { - - // confintr::f_to_r2 - // f / (f + df2 / df1) - fun fToR2(f: Double, df1: Double, df2: Double): Double { - if (!f.isFinite() || f < 0.0 || df1 <= 0.0 || df2 <= 0.0) return Double.NaN - return (f / (f + df2 / df1)).coerceIn(0.0, 1.0) - } - - // confintr::ncp_to_r2 - // ncp / (ncp + df1 + df2 + 1) - fun ncpToR2(ncp: Double, df1: Double, df2: Double): Double { - if (ncp.isNaN() || ncp < 0.0 || df1 <= 0.0 || df2 <= 0.0) return Double.NaN - if (ncp == Double.POSITIVE_INFINITY) return 1.0 - return (ncp / (ncp + df1 + df2 + 1.0)).coerceIn(0.0, 1.0) - } - - /** - * CI for population R^2 using the same method as confintr::ci_rsquared(): - * test inversion for noncentral F NCP, then map NCP -> R^2. - */ - fun ciRsquaredLikeConfintr( - fStat: Double, - df1: Double, - df2: Double, - confidenceLevel: Double - ): R2ConfIntResult { - val level = confidenceLevel - if (!fStat.isFinite() || fStat < 0.0 || df1 <= 0.0 || df2 <= 0.0 || level <= 0.0 || level >= 1.0) { - return R2ConfIntResult(level, Double.NaN, Double.NaN) - } - - val alpha = 1.0 - level - val probsLow = alpha / 2.0 - val probsHigh = 1.0 - alpha / 2.0 - - val ncpCi = FNoncentralityCI.ciFNoncentrality( - fStat = fStat, - df1 = df1, - df2 = df2, - probsLow = probsLow, - probsHigh = probsHigh - ) - - val low = ncpToR2(ncpCi.low, df1, df2) - val high = ncpToR2(ncpCi.high, df1, df2) - - return R2ConfIntResult(level, low, high) - } - - /** - * Convenience wrapper from your regression summary fields. - * eqSize includes intercept, so p = eqSize - 1 = df1. - */ - fun ciRsquaredLikeConfintrFromR2( - n: Int, - eqSize: Int, - r2: Double, - confidenceLevel: Double - ): R2ConfIntResult { - if (n <= 0 || eqSize <= 0 || !r2.isFinite()) { - return R2ConfIntResult(confidenceLevel, Double.NaN, Double.NaN) - } - - val df1 = (eqSize - 1).toDouble() - val df2 = n - eqSize.toDouble() // same as n - p - 1 - - if (df1 <= 0.0 || df2 <= 0.0) { - return R2ConfIntResult(confidenceLevel, Double.NaN, Double.NaN) - } - - val r2c = r2.coerceIn(0.0, 1.0) - - // Convert observed R² -> observed F (same relationship used in confintr helpers) - val fStat = when { - r2c == 0.0 -> 0.0 - r2c == 1.0 -> Double.POSITIVE_INFINITY - else -> (r2c / (1.0 - r2c)) * (df2 / df1) - } - - if (fStat == Double.POSITIVE_INFINITY) { - return R2ConfIntResult(confidenceLevel, 1.0, 1.0) - } - - return ciRsquaredLikeConfintr( - fStat = fStat, - df1 = df1, - df2 = df2, - confidenceLevel = confidenceLevel - ) - } -} From 2dfce62e50bc23df036927723016465eecc413cd Mon Sep 17 00:00:00 2001 From: Ivan Seleznev Date: Fri, 27 Feb 2026 16:33:03 +0100 Subject: [PATCH 08/10] fix --- .../core/plot/base/stat/SmoothStatSummary.kt | 472 +++++++++++++++- .../plot/base/stat/SmoothStatSummaryUtil.kt | 516 ------------------ .../base/stat/regression/RegressionUtil.kt | 10 - 3 files changed, 464 insertions(+), 534 deletions(-) delete mode 100644 plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/SmoothStatSummaryUtil.kt diff --git a/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/SmoothStatSummary.kt b/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/SmoothStatSummary.kt index a83678cb31c..02cde242bc9 100644 --- a/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/SmoothStatSummary.kt +++ b/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/SmoothStatSummary.kt @@ -12,18 +12,14 @@ import org.jetbrains.letsPlot.core.plot.base.DataFrame.Variable.Source.STAT import org.jetbrains.letsPlot.core.plot.base.StatContext import org.jetbrains.letsPlot.core.plot.base.data.TransformVar import org.jetbrains.letsPlot.core.plot.base.stat.SmoothStat.Method -import org.jetbrains.letsPlot.core.plot.base.stat.SmoothStatSummaryUtil.calcAdjustedRSquared -import org.jetbrains.letsPlot.core.plot.base.stat.SmoothStatSummaryUtil.calcAic -import org.jetbrains.letsPlot.core.plot.base.stat.SmoothStatSummaryUtil.calcBic -import org.jetbrains.letsPlot.core.plot.base.stat.SmoothStatSummaryUtil.calcOverallModelFTest -import org.jetbrains.letsPlot.core.plot.base.stat.SmoothStatSummaryUtil.calcRSquared -import org.jetbrains.letsPlot.core.plot.base.stat.SmoothStatSummaryUtil.calcRss -import org.jetbrains.letsPlot.core.plot.base.stat.SmoothStatSummaryUtil.calcR2ConfInt +import org.jetbrains.letsPlot.core.plot.base.stat.math3.Beta import org.jetbrains.letsPlot.core.plot.base.stat.regression.LinearRegression import org.jetbrains.letsPlot.core.plot.base.stat.regression.LocalPolynomialRegression import org.jetbrains.letsPlot.core.plot.base.stat.regression.PolynomialRegression -import org.jetbrains.letsPlot.core.plot.base.stat.regression.smoothingMethodLabel import org.jetbrains.letsPlot.core.plot.base.util.SamplingUtil +import kotlin.math.PI +import kotlin.math.exp +import kotlin.math.ln import kotlin.random.Random // TODO: fix duplication SmoothStat @@ -155,6 +151,466 @@ class SmoothStatSummary( return myVariables!! } + + private data class FTestResult( + val fValue: Double, + val pValue: Double, + val df1: Double, + val df2: Double + ) + + private data class R2ConfIntResult( + val level: Double, + val low: Double, + val high: Double + ) + + private data class NcpConfIntResult( + val estimate: Double, + val low: Double, + val high: Double + ) + + private fun smoothingMethodLabel(method: Method): String { + return when (method) { + Method.LM -> "lm" + Method.LOESS -> "loess" + Method.GLM -> "glm" + Method.GAM -> "gam" + Method.RLM -> "rlm" + } + } + + private fun calcR2ConfInt( + n: Int, + eqSize: Int, + r2: Double, + confidenceLevel: Double + ): R2ConfIntResult { + if (n <= 0 || eqSize <= 0 || !r2.isFinite()) { + return R2ConfIntResult(confidenceLevel, Double.NaN, Double.NaN) + } + + val df1 = (eqSize - 1).toDouble() + val df2 = n - eqSize.toDouble() + + if (df1 <= 0.0 || df2 <= 0.0) { + return R2ConfIntResult(confidenceLevel, Double.NaN, Double.NaN) + } + + val fStat = when (val r2c = r2.coerceIn(0.0, 1.0)) { + 0.0 -> 0.0 + 1.0 -> Double.POSITIVE_INFINITY + else -> (r2c / (1.0 - r2c)) * (df2 / df1) + } + + if (fStat == Double.POSITIVE_INFINITY) { + return R2ConfIntResult(confidenceLevel, 1.0, 1.0) + } + + return ciRSquaredLikeConfIntR( + fStat = fStat, + df1 = df1, + df2 = df2, + confidenceLevel = confidenceLevel + ) + } + + private fun calcRSquared( + xVals: DoubleArray, + yVals: DoubleArray, + model: (Double) -> Double + ): Double { + val meanY = yVals.average() + + var ssTot = 0.0 + var ssRes = 0.0 + + for (i in xVals.indices) { + val y = yVals[i] + val yHat = model(xVals[i]) + + val diffRes = y - yHat + ssRes += diffRes * diffRes + + val diffMean = y - meanY + ssTot += diffMean * diffMean + } + + return if (ssTot == 0.0) { + 0.0 + } else { + 1.0 - ssRes / ssTot + } + } + + private fun calcAdjustedRSquared(n: Int, nCoef: Int, r2: Double): Double { + val predictorsCount = (nCoef - 1).coerceAtLeast(0) + if (n <= predictorsCount + 1 || r2.isNaN()) { + return Double.NaN + } + return 1.0 - (1.0 - r2) * ((n - 1.0) / (n - predictorsCount - 1.0)) + } + + private fun calcRss(xVals: DoubleArray, yVals: DoubleArray, model: (Double) -> Double): Double { + var rss = 0.0 + for (i in xVals.indices) { + val e = yVals[i] - model(xVals[i]) + rss += e * e + } + return rss + } + + private fun calcAic(n: Int, rss: Double, predictorsCount: Int): Double { + val k = predictorsCount + 1 + if (n <= 0 || k <= 0 || !rss.isFinite()) return Double.NaN + // Guard against log(0) in a perfect fit + val rssSafe = maxOf(rss, 1e-12) + return n * ln(rssSafe / n) + + n * (1.0 + ln(2.0 * PI)) + + 2.0 * k + } + + private fun calcBic(n: Int, rss: Double, predictorsCount: Int): Double { + val k = predictorsCount + 1 + if (n <= 0 || k <= 0 || !rss.isFinite()) return Double.NaN + // Guard against log(0) in a perfect fit + val rssSafe = maxOf(rss, 1e-12) + return n * ln(rssSafe / n) + + n * (1.0 + ln(2.0 * PI)) + + k * ln(n.toDouble()) + } + + + private fun calcOverallModelFTest( + nRaw: Int, + eqSizeRaw: Int, + r2Raw: Double + ): FTestResult { + val n = nRaw.toDouble() + val p = (eqSizeRaw - 1).toDouble() + + val df1 = p + val df2 = n - p - 1.0 + + if (!r2Raw.isFinite() || n <= 0.0 || eqSizeRaw <= 0 || df1 <= 0.0 || df2 <= 0.0) { + return FTestResult(Double.NaN, Double.NaN, df1, df2) + } + + val r2 = r2Raw.coerceIn(0.0, 1.0) + + if (r2 == 0.0) { + return FTestResult(0.0, 1.0, df1, df2) + } + if (r2 == 1.0) { + return FTestResult(Double.POSITIVE_INFINITY, 0.0, df1, df2) + } + + val numerator = r2 / df1 + val denominator = (1.0 - r2) / df2 + + if (!numerator.isFinite() || !denominator.isFinite() || denominator <= 0.0) { + return FTestResult(Double.NaN, Double.NaN, df1, df2) + } + + val fValue = numerator / denominator + + if (!fValue.isFinite()) { + return if (fValue == Double.POSITIVE_INFINITY) { + FTestResult(Double.POSITIVE_INFINITY, 0.0, df1, df2) + } else { + FTestResult(Double.NaN, Double.NaN, df1, df2) + } + } + + val pValue = fTestPValueUpperTail(fValue, df1, df2) + return FTestResult(fValue, pValue, df1, df2) + } + + private fun fDistributionCdf(x: Double, df1: Double, df2: Double): Double { + if (x <= 0.0) return 0.0 + if (x.isNaN()) return Double.NaN + if (df1 <= 0.0) return Double.NaN + if (df2 <= 0.0) return Double.NaN + + val z = (df1 * x) / (df1 * x + df2) + + return Beta.regularizedBeta(z, df1 / 2.0, df2 / 2.0) + } + + private fun fToNcp(f: Double, df1: Double, df2: Double): Double { + if (!f.isFinite() || f < 0.0 || df1 <= 0.0 || df2 <= 0.0) return Double.NaN + return df1 * f * (df1 + df2 + 1.0) / df2 + } + + private fun ciFNoncentrality( + fStat: Double, + df1: Double, + df2: Double, + probsLow: Double, + probsHigh: Double, + absTol: Double = 1e-10 + ): NcpConfIntResult { + if (!fStat.isFinite() || fStat < 0.0 || df1 <= 0.0 || df2 <= 0.0) { + return NcpConfIntResult(Double.NaN, Double.NaN, Double.NaN) + } + if (probsLow !in 0.0..1.0 || probsHigh !in 0.0..1.0 || probsLow > probsHigh) { + return NcpConfIntResult(Double.NaN, Double.NaN, Double.NaN) + } + + val estimate = fToNcp(fStat, df1, df2) + if (!estimate.isFinite()) { + return NcpConfIntResult(Double.NaN, Double.NaN, Double.NaN) + } + + val targetLower = 1.0 - probsLow + val targetUpper = 1.0 - probsHigh + + val low = if (probsLow == 0.0) { + 0.0 + } else { + val fn: (Double) -> Double = { ncp -> + nonCentralFDistributionCDF(fStat, df1, df2, ncp) - targetLower + } + + bisectionRootOrNull(fn, 0.0, estimate.coerceAtLeast(0.0), absTol) ?: 0.0 + } + + val high = if (probsHigh == 1.0) { + Double.POSITIVE_INFINITY + } else { + val fn: (Double) -> Double = { ncp -> + nonCentralFDistributionCDF(fStat, df1, df2, ncp) - targetUpper + } + + var upper = maxOf(4.0 * estimate, fStat * df1 * 4.0, df1 * 100.0) + + var root = bisectionRootOrNull(fn, estimate.coerceAtLeast(0.0), upper, absTol) + var tries = 0 + while (root == null && tries < 20 && upper.isFinite()) { + upper *= 2.0 + root = bisectionRootOrNull(fn, estimate.coerceAtLeast(0.0), upper, absTol) + tries++ + } + root ?: Double.POSITIVE_INFINITY + } + + return NcpConfIntResult(estimate, low, high) + } + + private fun bisectionRootOrNull( + f: (Double) -> Double, + a0: Double, + b0: Double, + absTol: Double, + maxIter: Int = 200 + ): Double? { + var a = a0 + var b = b0 + if (!a.isFinite() || !b.isFinite() || a > b) return null + + var fa = f(a) + val fb = f(b) + if (!fa.isFinite() || !fb.isFinite()) return null + + if (fa == 0.0) return a + if (fb == 0.0) return b + + if (fa * fb > 0.0) return null + + repeat(maxIter) { + val m = 0.5 * (a + b) + val fm = f(m) + if (!fm.isFinite()) return null + + if (fm == 0.0) return m + if ((b - a) <= absTol * (1.0 + kotlin.math.abs(a) + kotlin.math.abs(b))) { + return 0.5 * (a + b) + } + + if (fa * fm <= 0.0) { + b = m + } else { + a = m + fa = fm + } + } + + return 0.5 * (a + b) + } + + // CDF of the non-central F distribution via Poisson mixture of central F distributions. + private fun nonCentralFDistributionCDF( + x: Double, + df1: Double, + df2: Double, + ncp: Double, + eps: Double = 1e-12, + maxTerms: Int = 100000 + ): Double { + if (x.isNaN() || df1 <= 0.0 || df2 <= 0.0 || ncp < 0.0) return Double.NaN + if (x <= 0.0) return 0.0 + if (!x.isFinite()) return 1.0 + + val z = (df1 * x) / (df1 * x + df2) + if (!z.isFinite()) return Double.NaN + if (z <= 0.0) return 0.0 + if (z >= 1.0) return 1.0 + + val a0 = df1 / 2.0 + val b = df2 / 2.0 + val mu = ncp / 2.0 + + if (mu == 0.0) { + return Beta.regularizedBeta(z, a0, b).coerceIn(0.0, 1.0) + } + + var w = exp(-mu) + if (w == 0.0) { + return cumulativeProbabilityCenterSummation(x, df1, df2, ncp, eps, maxTerms) + } + + var sum = 0.0 + var weightSum = 0.0 + var j = 0 + + while (j < maxTerms) { + val a = a0 + j + val termCdf = Beta.regularizedBeta(z, a, b) + val term = w * termCdf + + sum += term + weightSum += w + + if (w < eps && (1.0 - weightSum) < 10 * eps) { + break + } + + j += 1 + w *= mu / j.toDouble() + + if (!w.isFinite()) return Double.NaN + if (w == 0.0 && (1.0 - weightSum) < 1e-8) break + } + + return sum.coerceIn(0.0, 1.0) + } + + /** + * More stable fallback for large ncp when exp(-mu) underflows. + * Start near the Poisson mode and sum both directions with recursive weights. + */ + private fun cumulativeProbabilityCenterSummation( + x: Double, + df1: Double, + df2: Double, + ncp: Double, + eps: Double, + maxTerms: Int + ): Double { + val z = (df1 * x) / (df1 * x + df2) + val a0 = df1 / 2.0 + val b = df2 / 2.0 + val mu = ncp / 2.0 + + val m = kotlin.math.floor(mu).toInt().coerceAtLeast(0) + + var logFact = 0.0 + for (k in 2..m) logFact += ln(k.toDouble()) + val wM = exp(-mu + if (m == 0) 0.0 else m * ln(mu) - logFact) + + if (!wM.isFinite() || wM == 0.0) { + return Double.NaN + } + + var sum = 0.0 + var weightAccum = 0.0 + + run { + val cdfM = Beta.regularizedBeta(z, a0 + m, b) + sum += wM * cdfM + weightAccum += wM + } + + var wUp = wM + var j = m + var upSteps = 0 + while (upSteps < maxTerms) { + j += 1 + wUp *= mu / j.toDouble() + if (!wUp.isFinite() || wUp <= 0.0) break + + val cdf = Beta.regularizedBeta(z, a0 + j, b) + sum += wUp * cdf + weightAccum += wUp + + upSteps++ + if (wUp < eps) break + } + + var wDown = wM + j = m + var downSteps = 0 + while (j > 0 && downSteps < maxTerms) { + wDown *= j.toDouble() / mu + j -= 1 + if (!wDown.isFinite() || wDown <= 0.0) break + + val cdf = Beta.regularizedBeta(z, a0 + j, b) + sum += wDown * cdf + weightAccum += wDown + + downSteps++ + if (wDown < eps) break + } + + return sum.coerceIn(0.0, 1.0) + } + + private fun ncpToR2(ncp: Double, df1: Double, df2: Double): Double { + if (ncp.isNaN() || ncp < 0.0 || df1 <= 0.0 || df2 <= 0.0) return Double.NaN + if (ncp == Double.POSITIVE_INFINITY) return 1.0 + return (ncp / (ncp + df1 + df2 + 1.0)).coerceIn(0.0, 1.0) + } + + private fun ciRSquaredLikeConfIntR( + fStat: Double, + df1: Double, + df2: Double, + confidenceLevel: Double + ): R2ConfIntResult { + if (!fStat.isFinite() || fStat < 0.0 || df1 <= 0.0 || df2 <= 0.0 || confidenceLevel <= 0.0 || confidenceLevel >= 1.0) { + return R2ConfIntResult(confidenceLevel, Double.NaN, Double.NaN) + } + + val alpha = 1.0 - confidenceLevel + val probsLow = alpha / 2.0 + val probsHigh = 1.0 - alpha / 2.0 + + val ncpCi = ciFNoncentrality( + fStat = fStat, + df1 = df1, + df2 = df2, + probsLow = probsLow, + probsHigh = probsHigh + ) + + val low = ncpToR2(ncpCi.low, df1, df2) + val high = ncpToR2(ncpCi.high, df1, df2) + + return R2ConfIntResult(confidenceLevel, low, high) + } + + private fun fTestPValueUpperTail(fValue: Double, df1: Double, df2: Double): Double { + if (!fValue.isFinite() || df1 <= 0.0 || df2 <= 0.0) return Double.NaN + + if (fValue < 0.0) return Double.NaN + if (fValue == Double.POSITIVE_INFINITY) return 0.0 + + val cdf = fDistributionCdf(fValue, df1, df2) + + return (1.0 - cdf).coerceIn(0.0, 1.0) + } } diff --git a/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/SmoothStatSummaryUtil.kt b/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/SmoothStatSummaryUtil.kt deleted file mode 100644 index 818ccd0dd3c..00000000000 --- a/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/SmoothStatSummaryUtil.kt +++ /dev/null @@ -1,516 +0,0 @@ -/* - * Copyright (c) 2026. JetBrains s.r.o. - * Use of this source code is governed by the MIT license that can be found in the LICENSE file. - */ - -package org.jetbrains.letsPlot.core.plot.base.stat - -import org.jetbrains.letsPlot.core.plot.base.stat.math3.Beta -import kotlin.math.PI -import kotlin.math.exp -import kotlin.math.ln - -object SmoothStatSummaryUtil { - - data class FTestResult( - val fValue: Double, - val pValue: Double, - val df1: Double, - val df2: Double - ) - - data class R2ConfIntResult( - val level: Double, - val low: Double, - val high: Double - ) - - private data class NcpConfIntResult( - val estimate: Double, - val low: Double, - val high: Double - ) - - internal fun calcR2ConfInt( - n: Int, - eqSize: Int, - r2: Double, - confidenceLevel: Double - ): R2ConfIntResult { - if (n <= 0 || eqSize <= 0 || !r2.isFinite()) { - return R2ConfIntResult(confidenceLevel, Double.NaN, Double.NaN) - } - - val df1 = (eqSize - 1).toDouble() - val df2 = n - eqSize.toDouble() // same as n - p - 1 - - if (df1 <= 0.0 || df2 <= 0.0) { - return R2ConfIntResult(confidenceLevel, Double.NaN, Double.NaN) - } - - // Convert observed R² -> observed F (same relationship used in confintr helpers) - val fStat = when (val r2c = r2.coerceIn(0.0, 1.0)) { - 0.0 -> 0.0 - 1.0 -> Double.POSITIVE_INFINITY - else -> (r2c / (1.0 - r2c)) * (df2 / df1) - } - - if (fStat == Double.POSITIVE_INFINITY) { - return R2ConfIntResult(confidenceLevel, 1.0, 1.0) - } - - return ciRSquaredLikeConfIntR( - fStat = fStat, - df1 = df1, - df2 = df2, - confidenceLevel = confidenceLevel - ) - } - - internal fun calcRSquared( - xVals: DoubleArray, - yVals: DoubleArray, - model: (Double) -> Double - ): Double { - val meanY = yVals.average() - - var ssTot = 0.0 - var ssRes = 0.0 - - for (i in xVals.indices) { - val y = yVals[i] - val yHat = model(xVals[i]) - - val diffRes = y - yHat - ssRes += diffRes * diffRes - - val diffMean = y - meanY - ssTot += diffMean * diffMean - } - - return if (ssTot == 0.0) { - 0.0 - } else { - 1.0 - ssRes / ssTot - } - } - - internal fun calcAdjustedRSquared(n: Int, nCoef: Int, r2: Double): Double { - val predictorsCount = (nCoef - 1).coerceAtLeast(0) - if (n <= predictorsCount + 1 || r2.isNaN()) { - return Double.NaN - } - return 1.0 - (1.0 - r2) * ((n - 1.0) / (n - predictorsCount - 1.0)) - } - - internal fun calcRss(xVals: DoubleArray, yVals: DoubleArray, model: (Double) -> Double): Double { - var rss = 0.0 - for (i in xVals.indices) { - val e = yVals[i] - model(xVals[i]) - rss += e * e - } - return rss - } - - internal fun calcAic(n: Int, rss: Double, predictorsCount: Int): Double { - val k = predictorsCount + 1 - if (n <= 0 || k <= 0 || !rss.isFinite()) return Double.NaN - // Guard against log(0) in a perfect fit - val rssSafe = maxOf(rss, 1e-12) - return n * ln(rssSafe / n) + - n * (1.0 + ln(2.0 * PI)) + - 2.0 * k - } - - internal fun calcBic(n: Int, rss: Double, predictorsCount: Int): Double { - val k = predictorsCount + 1 - if (n <= 0 || k <= 0 || !rss.isFinite()) return Double.NaN - // Guard against log(0) in a perfect fit - val rssSafe = maxOf(rss, 1e-12) - return n * ln(rssSafe / n) + - n * (1.0 + ln(2.0 * PI)) + - k * ln(n.toDouble()) - } - - - internal fun calcOverallModelFTest( - nRaw: Int, - eqSizeRaw: Int, // includes intercept - r2Raw: Double - ): FTestResult { - val n = nRaw.toDouble() - val p = (eqSizeRaw - 1).toDouble() // predictors without intercept - - val df1 = p - val df2 = n - p - 1.0 - - // Invalid setup - if (!r2Raw.isFinite() || n <= 0.0 || eqSizeRaw <= 0 || df1 <= 0.0 || df2 <= 0.0) { - return FTestResult(Double.NaN, Double.NaN, df1, df2) - } - - // Clamp possible floating-point overshoots - val r2 = r2Raw.coerceIn(0.0, 1.0) - - // Edge cases - if (r2 == 0.0) { - return FTestResult(0.0, 1.0, df1, df2) - } - if (r2 == 1.0) { - return FTestResult(Double.POSITIVE_INFINITY, 0.0, df1, df2) - } - - val numerator = r2 / df1 - val denominator = (1.0 - r2) / df2 - - if (!numerator.isFinite() || !denominator.isFinite() || denominator <= 0.0) { - return FTestResult(Double.NaN, Double.NaN, df1, df2) - } - - val fValue = numerator / denominator - - if (!fValue.isFinite()) { - return if (fValue == Double.POSITIVE_INFINITY) { - FTestResult(Double.POSITIVE_INFINITY, 0.0, df1, df2) - } else { - FTestResult(Double.NaN, Double.NaN, df1, df2) - } - } - - val pValue = fTestPValueUpperTail(fValue, df1, df2) - return FTestResult(fValue, pValue, df1, df2) - } - - private fun fDistributionCdf(x: Double, df1: Double, df2: Double): Double { - if (x <= 0.0) return 0.0 - if (x.isNaN()) return Double.NaN - if (df1 <= 0.0) return Double.NaN - if (df2 <= 0.0) return Double.NaN - - // CDF relation: - // F(x; d1, d2) = I_{ d1*x / (d1*x + d2) }(d1/2, d2/2) - val z = (df1 * x) / (df1 * x + df2) - - return Beta.regularizedBeta(z, df1 / 2.0, df2 / 2.0) - } - - // Same mapping as confintr source (Smithson p. 38), visible in ci_f_ncp code: - // f_to_ncp <- function(f, df1, df2) { df1 * f * (df1 + df2 + 1) / df2 } - private fun fToNcp(f: Double, df1: Double, df2: Double): Double { - if (!f.isFinite() || f < 0.0 || df1 <= 0.0 || df2 <= 0.0) return Double.NaN - return df1 * f * (df1 + df2 + 1.0) / df2 - } - - /** - * Two-sided CI for non-centrality parameter lambda of the F distribution. - * Mirrors confintr::ci_f_ncp logic (test inversion on noncentral F CDF). - * - * probs are lower/upper cumulative probs for the CI limits, e.g. (0.025, 0.975). - */ - private fun ciFNoncentrality( - fStat: Double, - df1: Double, - df2: Double, - probsLow: Double, - probsHigh: Double, - absTol: Double = 1e-10 - ): NcpConfIntResult { - if (!fStat.isFinite() || fStat < 0.0 || df1 <= 0.0 || df2 <= 0.0) { - return NcpConfIntResult(Double.NaN, Double.NaN, Double.NaN) - } - if (probsLow !in 0.0..1.0 || probsHigh !in 0.0..1.0 || probsLow > probsHigh) { - return NcpConfIntResult(Double.NaN, Double.NaN, Double.NaN) - } - - val estimate = fToNcp(fStat, df1, df2) - if (!estimate.isFinite()) { - return NcpConfIntResult(Double.NaN, Double.NaN, Double.NaN) - } - - // confintr uses iprobs = 1 - probs - val targetLower = 1.0 - probsLow // for lower CI root - val targetUpper = 1.0 - probsHigh // for upper CI root - - val low = if (probsLow == 0.0) { - 0.0 - } else { - val fn: (Double) -> Double = { ncp -> - nonCentralFDistributionCDF(fStat, df1, df2, ncp) - targetLower - } - // R code searches interval [0, estimate] - bisectionRootOrNull(fn, 0.0, estimate.coerceAtLeast(0.0), absTol) ?: 0.0 - } - - val high = if (probsHigh == 1.0) { - Double.POSITIVE_INFINITY - } else { - val fn: (Double) -> Double = { ncp -> - nonCentralFDistributionCDF(fStat, df1, df2, ncp) - targetUpper - } - - // confintr upper heuristic: - // pmax(4 * estimate, stat * df1 * 4, df1 * 100) - var upper = maxOf(4.0 * estimate, fStat * df1 * 4.0, df1 * 100.0) - - // expand if needed until sign change - var root = bisectionRootOrNull(fn, estimate.coerceAtLeast(0.0), upper, absTol) - var tries = 0 - while (root == null && tries < 20 && upper.isFinite()) { - upper *= 2.0 - root = bisectionRootOrNull(fn, estimate.coerceAtLeast(0.0), upper, absTol) - tries++ - } - root ?: Double.POSITIVE_INFINITY - } - - return NcpConfIntResult(estimate, low, high) - } - - /** - * Monotone root search by bisection. Returns null if no sign change / invalid. - */ - private fun bisectionRootOrNull( - f: (Double) -> Double, - a0: Double, - b0: Double, - absTol: Double, - maxIter: Int = 200 - ): Double? { - var a = a0 - var b = b0 - if (!a.isFinite() || !b.isFinite() || a > b) return null - - var fa = f(a) - val fb = f(b) - if (!fa.isFinite() || !fb.isFinite()) return null - - // exact hits - if (fa == 0.0) return a - if (fb == 0.0) return b - - // need sign change - if (fa * fb > 0.0) return null - - repeat(maxIter) { - val m = 0.5 * (a + b) - val fm = f(m) - if (!fm.isFinite()) return null - - if (fm == 0.0) return m - if ((b - a) <= absTol * (1.0 + kotlin.math.abs(a) + kotlin.math.abs(b))) { - return 0.5 * (a + b) - } - - if (fa * fm <= 0.0) { - b = m - } else { - a = m - fa = fm - } - } - - return 0.5 * (a + b) - } - - /** - * CDF of the non-central F distribution via Poisson mixture of central F distributions. - * - * P(F <= x) = sum_{j>=0} w_j * I_z(df1/2 + j, df2/2), - * where z = df1*x / (df1*x + df2), and w_j ~ Poisson(lambda/2). - * - * This is the standard representation and is the key ingredient needed to reproduce - * confintr::ci_f_ncp / ci_rsquared logic. - */ - private fun nonCentralFDistributionCDF( - x: Double, - df1: Double, - df2: Double, - ncp: Double, - eps: Double = 1e-12, - maxTerms: Int = 100000 - ): Double { - if (x.isNaN() || df1 <= 0.0 || df2 <= 0.0 || ncp < 0.0) return Double.NaN - if (x <= 0.0) return 0.0 - if (!x.isFinite()) return 1.0 - - val z = (df1 * x) / (df1 * x + df2) - if (!z.isFinite()) return Double.NaN - if (z <= 0.0) return 0.0 - if (z >= 1.0) return 1.0 - - val a0 = df1 / 2.0 - val b = df2 / 2.0 - val mu = ncp / 2.0 - - // Central F case - if (mu == 0.0) { - return Beta.regularizedBeta(z, a0, b).coerceIn(0.0, 1.0) - } - - // Poisson weights recurrence from j = 0 - // w0 = exp(-mu), w_{j+1} = w_j * mu / (j+1) - var w = exp(-mu) - if (w == 0.0) { - // For large mu, forward start underflows. Fallback to a center-start summation. - return cumulativeProbabilityCenterSummation(x, df1, df2, ncp, eps, maxTerms) - } - - var sum = 0.0 - var weightSum = 0.0 - var j = 0 - - while (j < maxTerms) { - val a = a0 + j - val termCdf = Beta.regularizedBeta(z, a, b) - val term = w * termCdf - - sum += term - weightSum += w - - // stop when remaining probability mass is tiny and terms are tiny - if (w < eps && (1.0 - weightSum) < 10 * eps) { - break - } - - j += 1 - w *= mu / j.toDouble() - - if (!w.isFinite()) return Double.NaN - if (w == 0.0 && (1.0 - weightSum) < 1e-8) break - } - - // Numerical guard - return sum.coerceIn(0.0, 1.0) - } - - /** - * More stable fallback for large ncp when exp(-mu) underflows. - * Start near the Poisson mode and sum both directions with recursive weights. - */ - private fun cumulativeProbabilityCenterSummation( - x: Double, - df1: Double, - df2: Double, - ncp: Double, - eps: Double, - maxTerms: Int - ): Double { - val z = (df1 * x) / (df1 * x + df2) - val a0 = df1 / 2.0 - val b = df2 / 2.0 - val mu = ncp / 2.0 - - val m = kotlin.math.floor(mu).toInt().coerceAtLeast(0) - - // log w_m = -mu + m ln(mu) - ln(m!) - var logFact = 0.0 - for (k in 2..m) logFact += ln(k.toDouble()) - val wM = exp(-mu + if (m == 0) 0.0 else m * ln(mu) - logFact) - - if (!wM.isFinite() || wM == 0.0) { - // If still under flowed, we are in an extreme numeric regime. - // Best effort fallback: return NaN. - return Double.NaN - } - - var sum = 0.0 - var weightAccum = 0.0 - - // Center term - run { - val cdfM = Beta.regularizedBeta(z, a0 + m, b) - sum += wM * cdfM - weightAccum += wM - } - - // Upward - var wUp = wM - var j = m - var upSteps = 0 - while (upSteps < maxTerms) { - j += 1 - wUp *= mu / j.toDouble() - if (!wUp.isFinite() || wUp <= 0.0) break - - val cdf = Beta.regularizedBeta(z, a0 + j, b) - sum += wUp * cdf - weightAccum += wUp - - upSteps++ - if (wUp < eps) break - } - - // Downward - var wDown = wM - j = m - var downSteps = 0 - while (j > 0 && downSteps < maxTerms) { - // w_{j-1} = w_j * j / mu - wDown *= j.toDouble() / mu - j -= 1 - if (!wDown.isFinite() || wDown <= 0.0) break - - val cdf = Beta.regularizedBeta(z, a0 + j, b) - sum += wDown * cdf - weightAccum += wDown - - downSteps++ - if (wDown < eps) break - } - - // Not all Poisson mass may be covered in extreme cases; still clamp. - return sum.coerceIn(0.0, 1.0) - } - - // confintr::ncp_to_r2 - // ncp / (ncp + df1 + df2 + 1) - private fun ncpToR2(ncp: Double, df1: Double, df2: Double): Double { - if (ncp.isNaN() || ncp < 0.0 || df1 <= 0.0 || df2 <= 0.0) return Double.NaN - if (ncp == Double.POSITIVE_INFINITY) return 1.0 - return (ncp / (ncp + df1 + df2 + 1.0)).coerceIn(0.0, 1.0) - } - - /** - * CI for population R^2 using the same method as confintr::ci_rsquared(): - * test inversion for noncentral F NCP, then map NCP -> R^2. - */ - private fun ciRSquaredLikeConfIntR( - fStat: Double, - df1: Double, - df2: Double, - confidenceLevel: Double - ): R2ConfIntResult { - if (!fStat.isFinite() || fStat < 0.0 || df1 <= 0.0 || df2 <= 0.0 || confidenceLevel <= 0.0 || confidenceLevel >= 1.0) { - return R2ConfIntResult(confidenceLevel, Double.NaN, Double.NaN) - } - - val alpha = 1.0 - confidenceLevel - val probsLow = alpha / 2.0 - val probsHigh = 1.0 - alpha / 2.0 - - val ncpCi = ciFNoncentrality( - fStat = fStat, - df1 = df1, - df2 = df2, - probsLow = probsLow, - probsHigh = probsHigh - ) - - val low = ncpToR2(ncpCi.low, df1, df2) - val high = ncpToR2(ncpCi.high, df1, df2) - - return R2ConfIntResult(confidenceLevel, low, high) - } - - private fun fTestPValueUpperTail(fValue: Double, df1: Double, df2: Double): Double { - if (!fValue.isFinite() || df1 <= 0.0 || df2 <= 0.0) return Double.NaN - - if (fValue < 0.0) return Double.NaN - if (fValue == Double.POSITIVE_INFINITY) return 0.0 - - val cdf = fDistributionCdf(fValue, df1, df2) - - // Guard against tiny numerical drift outside [0, 1] - return (1.0 - cdf).coerceIn(0.0, 1.0) - } -} \ No newline at end of file diff --git a/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/regression/RegressionUtil.kt b/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/regression/RegressionUtil.kt index 44064b10535..b6e015c967e 100644 --- a/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/regression/RegressionUtil.kt +++ b/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/regression/RegressionUtil.kt @@ -115,13 +115,3 @@ fun averageByX(xs: List, ys: List): Pair "lm" - SmoothStat.Method.LOESS -> "loess" - SmoothStat.Method.GLM -> "glm" - SmoothStat.Method.GAM -> "gam" - SmoothStat.Method.RLM -> "rlm" - } -} From abac5944db888e16c2be1c5d05bb278a44b0cb88 Mon Sep 17 00:00:00 2001 From: Ivan Seleznev Date: Fri, 27 Feb 2026 16:35:01 +0100 Subject: [PATCH 09/10] Remove inherit_color() call in smooth_labels --- python-package/lets_plot/plot/annotation.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/python-package/lets_plot/plot/annotation.py b/python-package/lets_plot/plot/annotation.py index 029d31d2aa2..27a4e0e78ff 100644 --- a/python-package/lets_plot/plot/annotation.py +++ b/python-package/lets_plot/plot/annotation.py @@ -355,8 +355,6 @@ def __init__(self, variables: List[str] = None): self._label_x = None self._label_y = None - self.inherit_color() - def eq(self, lhs=None, rhs=None, format=None, threshold=None) -> "smooth_labels": """ From dd74334010a0000d132fbe20c59b07ae7a9c0ee9 Mon Sep 17 00:00:00 2001 From: Ivan Seleznev Date: Fri, 27 Feb 2026 16:49:36 +0100 Subject: [PATCH 10/10] Remove unuse SmoothStat import --- .../letsPlot/core/plot/base/stat/regression/RegressionUtil.kt | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/regression/RegressionUtil.kt b/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/regression/RegressionUtil.kt index b6e015c967e..f7a4ad2887b 100644 --- a/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/regression/RegressionUtil.kt +++ b/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/regression/RegressionUtil.kt @@ -5,9 +5,8 @@ package org.jetbrains.letsPlot.core.plot.base.stat.regression -import org.jetbrains.letsPlot.core.plot.base.stat.math3.Percentile import org.jetbrains.letsPlot.core.commons.data.SeriesUtil -import org.jetbrains.letsPlot.core.plot.base.stat.SmoothStat +import org.jetbrains.letsPlot.core.plot.base.stat.math3.Percentile import kotlin.random.Random internal object RegressionUtil {