diff --git a/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/SmoothStatSummary.kt b/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/SmoothStatSummary.kt index 291c20d116f..02cde242bc9 100644 --- a/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/SmoothStatSummary.kt +++ b/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/SmoothStatSummary.kt @@ -12,10 +12,14 @@ import org.jetbrains.letsPlot.core.plot.base.DataFrame.Variable.Source.STAT import org.jetbrains.letsPlot.core.plot.base.StatContext import org.jetbrains.letsPlot.core.plot.base.data.TransformVar import org.jetbrains.letsPlot.core.plot.base.stat.SmoothStat.Method +import org.jetbrains.letsPlot.core.plot.base.stat.math3.Beta import org.jetbrains.letsPlot.core.plot.base.stat.regression.LinearRegression import org.jetbrains.letsPlot.core.plot.base.stat.regression.LocalPolynomialRegression import org.jetbrains.letsPlot.core.plot.base.stat.regression.PolynomialRegression import org.jetbrains.letsPlot.core.plot.base.util.SamplingUtil +import kotlin.math.PI +import kotlin.math.exp +import kotlin.math.ln import kotlin.random.Random // TODO: fix duplication SmoothStat @@ -108,11 +112,27 @@ class SmoothStatSummary( ) } ?: return DataFrame.Builder.emptyFrame() + val r2 = calcRSquared(regression.xVals, regression.yVals, regression.model) + val rss = calcRss(regression.xVals, regression.yVals, regression.model) + val fTest = calcOverallModelFTest(regression.n, regression.eq.size, r2) + val r2ConfInt = calcR2ConfInt(regression.n, regression.eq.size, r2, confidenceLevel) + val dfb = DataFrame.Builder() .put(Stats.X, listOf(0.0)) .put(Stats.Y, listOf(0.0)) - .put(Stats.R2, listOf(regression.r2)) - .put(Stats.R2_ADJ, listOf(regression.adjR2)) + .put(Stats.R2, listOf(r2)) + .put(Stats.R2_ADJ, listOf(calcAdjustedRSquared(regression.n, regression.eq.size, r2))) + .put(Stats.N, listOf(regression.n)) + .put(Stats.METHOD, listOf(smoothingMethodLabel(smoothingMethod))) + .put(Stats.AIC, listOf(calcAic(regression.n, rss, regression.eq.size))) + .put(Stats.BIC, listOf(calcBic(regression.n, rss, regression.eq.size))) + .put(Stats.F, listOf(fTest.fValue)) + .put(Stats.DF1, listOf(fTest.df1)) + .put(Stats.DF2, listOf(fTest.df2)) + .put(Stats.P, listOf(fTest.pValue)) + .put(Stats.CI_LEVEL, listOf(r2ConfInt.level)) + .put(Stats.CI_LOW, listOf(r2ConfInt.low)) + .put(Stats.CI_HIGH, listOf(r2ConfInt.high)) val vars = myVariables ?: initVariables(regression.eq.size) regression.eq.forEachIndexed { index, coef -> @@ -131,6 +151,466 @@ class SmoothStatSummary( return myVariables!! } + + private data class FTestResult( + val fValue: Double, + val pValue: Double, + val df1: Double, + val df2: Double + ) + + private data class R2ConfIntResult( + val level: Double, + val low: Double, + val high: Double + ) + + private data class NcpConfIntResult( + val estimate: Double, + val low: Double, + val high: Double + ) + + private fun smoothingMethodLabel(method: Method): String { + return when (method) { + Method.LM -> "lm" + Method.LOESS -> "loess" + Method.GLM -> "glm" + Method.GAM -> "gam" + Method.RLM -> "rlm" + } + } + + private fun calcR2ConfInt( + n: Int, + eqSize: Int, + r2: Double, + confidenceLevel: Double + ): R2ConfIntResult { + if (n <= 0 || eqSize <= 0 || !r2.isFinite()) { + return R2ConfIntResult(confidenceLevel, Double.NaN, Double.NaN) + } + + val df1 = (eqSize - 1).toDouble() + val df2 = n - eqSize.toDouble() + + if (df1 <= 0.0 || df2 <= 0.0) { + return R2ConfIntResult(confidenceLevel, Double.NaN, Double.NaN) + } + + val fStat = when (val r2c = r2.coerceIn(0.0, 1.0)) { + 0.0 -> 0.0 + 1.0 -> Double.POSITIVE_INFINITY + else -> (r2c / (1.0 - r2c)) * (df2 / df1) + } + + if (fStat == Double.POSITIVE_INFINITY) { + return R2ConfIntResult(confidenceLevel, 1.0, 1.0) + } + + return ciRSquaredLikeConfIntR( + fStat = fStat, + df1 = df1, + df2 = df2, + confidenceLevel = confidenceLevel + ) + } + + private fun calcRSquared( + xVals: DoubleArray, + yVals: DoubleArray, + model: (Double) -> Double + ): Double { + val meanY = yVals.average() + + var ssTot = 0.0 + var ssRes = 0.0 + + for (i in xVals.indices) { + val y = yVals[i] + val yHat = model(xVals[i]) + + val diffRes = y - yHat + ssRes += diffRes * diffRes + + val diffMean = y - meanY + ssTot += diffMean * diffMean + } + + return if (ssTot == 0.0) { + 0.0 + } else { + 1.0 - ssRes / ssTot + } + } + + private fun calcAdjustedRSquared(n: Int, nCoef: Int, r2: Double): Double { + val predictorsCount = (nCoef - 1).coerceAtLeast(0) + if (n <= predictorsCount + 1 || r2.isNaN()) { + return Double.NaN + } + return 1.0 - (1.0 - r2) * ((n - 1.0) / (n - predictorsCount - 1.0)) + } + + private fun calcRss(xVals: DoubleArray, yVals: DoubleArray, model: (Double) -> Double): Double { + var rss = 0.0 + for (i in xVals.indices) { + val e = yVals[i] - model(xVals[i]) + rss += e * e + } + return rss + } + + private fun calcAic(n: Int, rss: Double, predictorsCount: Int): Double { + val k = predictorsCount + 1 + if (n <= 0 || k <= 0 || !rss.isFinite()) return Double.NaN + // Guard against log(0) in a perfect fit + val rssSafe = maxOf(rss, 1e-12) + return n * ln(rssSafe / n) + + n * (1.0 + ln(2.0 * PI)) + + 2.0 * k + } + + private fun calcBic(n: Int, rss: Double, predictorsCount: Int): Double { + val k = predictorsCount + 1 + if (n <= 0 || k <= 0 || !rss.isFinite()) return Double.NaN + // Guard against log(0) in a perfect fit + val rssSafe = maxOf(rss, 1e-12) + return n * ln(rssSafe / n) + + n * (1.0 + ln(2.0 * PI)) + + k * ln(n.toDouble()) + } + + + private fun calcOverallModelFTest( + nRaw: Int, + eqSizeRaw: Int, + r2Raw: Double + ): FTestResult { + val n = nRaw.toDouble() + val p = (eqSizeRaw - 1).toDouble() + + val df1 = p + val df2 = n - p - 1.0 + + if (!r2Raw.isFinite() || n <= 0.0 || eqSizeRaw <= 0 || df1 <= 0.0 || df2 <= 0.0) { + return FTestResult(Double.NaN, Double.NaN, df1, df2) + } + + val r2 = r2Raw.coerceIn(0.0, 1.0) + + if (r2 == 0.0) { + return FTestResult(0.0, 1.0, df1, df2) + } + if (r2 == 1.0) { + return FTestResult(Double.POSITIVE_INFINITY, 0.0, df1, df2) + } + + val numerator = r2 / df1 + val denominator = (1.0 - r2) / df2 + + if (!numerator.isFinite() || !denominator.isFinite() || denominator <= 0.0) { + return FTestResult(Double.NaN, Double.NaN, df1, df2) + } + + val fValue = numerator / denominator + + if (!fValue.isFinite()) { + return if (fValue == Double.POSITIVE_INFINITY) { + FTestResult(Double.POSITIVE_INFINITY, 0.0, df1, df2) + } else { + FTestResult(Double.NaN, Double.NaN, df1, df2) + } + } + + val pValue = fTestPValueUpperTail(fValue, df1, df2) + return FTestResult(fValue, pValue, df1, df2) + } + + private fun fDistributionCdf(x: Double, df1: Double, df2: Double): Double { + if (x <= 0.0) return 0.0 + if (x.isNaN()) return Double.NaN + if (df1 <= 0.0) return Double.NaN + if (df2 <= 0.0) return Double.NaN + + val z = (df1 * x) / (df1 * x + df2) + + return Beta.regularizedBeta(z, df1 / 2.0, df2 / 2.0) + } + + private fun fToNcp(f: Double, df1: Double, df2: Double): Double { + if (!f.isFinite() || f < 0.0 || df1 <= 0.0 || df2 <= 0.0) return Double.NaN + return df1 * f * (df1 + df2 + 1.0) / df2 + } + + private fun ciFNoncentrality( + fStat: Double, + df1: Double, + df2: Double, + probsLow: Double, + probsHigh: Double, + absTol: Double = 1e-10 + ): NcpConfIntResult { + if (!fStat.isFinite() || fStat < 0.0 || df1 <= 0.0 || df2 <= 0.0) { + return NcpConfIntResult(Double.NaN, Double.NaN, Double.NaN) + } + if (probsLow !in 0.0..1.0 || probsHigh !in 0.0..1.0 || probsLow > probsHigh) { + return NcpConfIntResult(Double.NaN, Double.NaN, Double.NaN) + } + + val estimate = fToNcp(fStat, df1, df2) + if (!estimate.isFinite()) { + return NcpConfIntResult(Double.NaN, Double.NaN, Double.NaN) + } + + val targetLower = 1.0 - probsLow + val targetUpper = 1.0 - probsHigh + + val low = if (probsLow == 0.0) { + 0.0 + } else { + val fn: (Double) -> Double = { ncp -> + nonCentralFDistributionCDF(fStat, df1, df2, ncp) - targetLower + } + + bisectionRootOrNull(fn, 0.0, estimate.coerceAtLeast(0.0), absTol) ?: 0.0 + } + + val high = if (probsHigh == 1.0) { + Double.POSITIVE_INFINITY + } else { + val fn: (Double) -> Double = { ncp -> + nonCentralFDistributionCDF(fStat, df1, df2, ncp) - targetUpper + } + + var upper = maxOf(4.0 * estimate, fStat * df1 * 4.0, df1 * 100.0) + + var root = bisectionRootOrNull(fn, estimate.coerceAtLeast(0.0), upper, absTol) + var tries = 0 + while (root == null && tries < 20 && upper.isFinite()) { + upper *= 2.0 + root = bisectionRootOrNull(fn, estimate.coerceAtLeast(0.0), upper, absTol) + tries++ + } + root ?: Double.POSITIVE_INFINITY + } + + return NcpConfIntResult(estimate, low, high) + } + + private fun bisectionRootOrNull( + f: (Double) -> Double, + a0: Double, + b0: Double, + absTol: Double, + maxIter: Int = 200 + ): Double? { + var a = a0 + var b = b0 + if (!a.isFinite() || !b.isFinite() || a > b) return null + + var fa = f(a) + val fb = f(b) + if (!fa.isFinite() || !fb.isFinite()) return null + + if (fa == 0.0) return a + if (fb == 0.0) return b + + if (fa * fb > 0.0) return null + + repeat(maxIter) { + val m = 0.5 * (a + b) + val fm = f(m) + if (!fm.isFinite()) return null + + if (fm == 0.0) return m + if ((b - a) <= absTol * (1.0 + kotlin.math.abs(a) + kotlin.math.abs(b))) { + return 0.5 * (a + b) + } + + if (fa * fm <= 0.0) { + b = m + } else { + a = m + fa = fm + } + } + + return 0.5 * (a + b) + } + + // CDF of the non-central F distribution via Poisson mixture of central F distributions. + private fun nonCentralFDistributionCDF( + x: Double, + df1: Double, + df2: Double, + ncp: Double, + eps: Double = 1e-12, + maxTerms: Int = 100000 + ): Double { + if (x.isNaN() || df1 <= 0.0 || df2 <= 0.0 || ncp < 0.0) return Double.NaN + if (x <= 0.0) return 0.0 + if (!x.isFinite()) return 1.0 + + val z = (df1 * x) / (df1 * x + df2) + if (!z.isFinite()) return Double.NaN + if (z <= 0.0) return 0.0 + if (z >= 1.0) return 1.0 + + val a0 = df1 / 2.0 + val b = df2 / 2.0 + val mu = ncp / 2.0 + + if (mu == 0.0) { + return Beta.regularizedBeta(z, a0, b).coerceIn(0.0, 1.0) + } + + var w = exp(-mu) + if (w == 0.0) { + return cumulativeProbabilityCenterSummation(x, df1, df2, ncp, eps, maxTerms) + } + + var sum = 0.0 + var weightSum = 0.0 + var j = 0 + + while (j < maxTerms) { + val a = a0 + j + val termCdf = Beta.regularizedBeta(z, a, b) + val term = w * termCdf + + sum += term + weightSum += w + + if (w < eps && (1.0 - weightSum) < 10 * eps) { + break + } + + j += 1 + w *= mu / j.toDouble() + + if (!w.isFinite()) return Double.NaN + if (w == 0.0 && (1.0 - weightSum) < 1e-8) break + } + + return sum.coerceIn(0.0, 1.0) + } + + /** + * More stable fallback for large ncp when exp(-mu) underflows. + * Start near the Poisson mode and sum both directions with recursive weights. + */ + private fun cumulativeProbabilityCenterSummation( + x: Double, + df1: Double, + df2: Double, + ncp: Double, + eps: Double, + maxTerms: Int + ): Double { + val z = (df1 * x) / (df1 * x + df2) + val a0 = df1 / 2.0 + val b = df2 / 2.0 + val mu = ncp / 2.0 + + val m = kotlin.math.floor(mu).toInt().coerceAtLeast(0) + + var logFact = 0.0 + for (k in 2..m) logFact += ln(k.toDouble()) + val wM = exp(-mu + if (m == 0) 0.0 else m * ln(mu) - logFact) + + if (!wM.isFinite() || wM == 0.0) { + return Double.NaN + } + + var sum = 0.0 + var weightAccum = 0.0 + + run { + val cdfM = Beta.regularizedBeta(z, a0 + m, b) + sum += wM * cdfM + weightAccum += wM + } + + var wUp = wM + var j = m + var upSteps = 0 + while (upSteps < maxTerms) { + j += 1 + wUp *= mu / j.toDouble() + if (!wUp.isFinite() || wUp <= 0.0) break + + val cdf = Beta.regularizedBeta(z, a0 + j, b) + sum += wUp * cdf + weightAccum += wUp + + upSteps++ + if (wUp < eps) break + } + + var wDown = wM + j = m + var downSteps = 0 + while (j > 0 && downSteps < maxTerms) { + wDown *= j.toDouble() / mu + j -= 1 + if (!wDown.isFinite() || wDown <= 0.0) break + + val cdf = Beta.regularizedBeta(z, a0 + j, b) + sum += wDown * cdf + weightAccum += wDown + + downSteps++ + if (wDown < eps) break + } + + return sum.coerceIn(0.0, 1.0) + } + + private fun ncpToR2(ncp: Double, df1: Double, df2: Double): Double { + if (ncp.isNaN() || ncp < 0.0 || df1 <= 0.0 || df2 <= 0.0) return Double.NaN + if (ncp == Double.POSITIVE_INFINITY) return 1.0 + return (ncp / (ncp + df1 + df2 + 1.0)).coerceIn(0.0, 1.0) + } + + private fun ciRSquaredLikeConfIntR( + fStat: Double, + df1: Double, + df2: Double, + confidenceLevel: Double + ): R2ConfIntResult { + if (!fStat.isFinite() || fStat < 0.0 || df1 <= 0.0 || df2 <= 0.0 || confidenceLevel <= 0.0 || confidenceLevel >= 1.0) { + return R2ConfIntResult(confidenceLevel, Double.NaN, Double.NaN) + } + + val alpha = 1.0 - confidenceLevel + val probsLow = alpha / 2.0 + val probsHigh = 1.0 - alpha / 2.0 + + val ncpCi = ciFNoncentrality( + fStat = fStat, + df1 = df1, + df2 = df2, + probsLow = probsLow, + probsHigh = probsHigh + ) + + val low = ncpToR2(ncpCi.low, df1, df2) + val high = ncpToR2(ncpCi.high, df1, df2) + + return R2ConfIntResult(confidenceLevel, low, high) + } + + private fun fTestPValueUpperTail(fValue: Double, df1: Double, df2: Double): Double { + if (!fValue.isFinite() || df1 <= 0.0 || df2 <= 0.0) return Double.NaN + + if (fValue < 0.0) return Double.NaN + if (fValue == Double.POSITIVE_INFINITY) return 0.0 + + val cdf = fDistributionCdf(fValue, df1, df2) + + return (1.0 - cdf).coerceIn(0.0, 1.0) + } } diff --git a/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/Stats.kt b/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/Stats.kt index 67705378b66..9d306daea33 100644 --- a/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/Stats.kt +++ b/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/Stats.kt @@ -40,6 +40,16 @@ object Stats { val R2_ADJ = DataFrame.Variable("..adjr2..", STAT, "adjr2") val R2 = DataFrame.Variable("..r2..", STAT, "r2") + val METHOD = DataFrame.Variable("..method..", STAT, "method") + val AIC = DataFrame.Variable("..aic..", STAT, "aic") + val BIC = DataFrame.Variable("..bic..", STAT, "bic") + val F = DataFrame.Variable("..f..", STAT, "f") + val DF1 = DataFrame.Variable("..df1..", STAT, "df1") + val DF2 = DataFrame.Variable("..df2..", STAT, "df2") + val P = DataFrame.Variable("..p..", STAT, "p") + val CI_LEVEL = DataFrame.Variable("..cilevel..", STAT, "cilevel") + val CI_LOW = DataFrame.Variable("..cilow..", STAT, "cilow") + val CI_HIGH = DataFrame.Variable("..cihigh..", STAT, "cihigh") val SCALED = DataFrame.Variable("..scaled..", STAT, "scaled") @@ -79,6 +89,16 @@ object Stats { GROUP, R2, R2_ADJ, + METHOD, + AIC, + BIC, + F, + DF1, + DF2, + P, + CI_LEVEL, + CI_LOW, + CI_HIGH, ) val result = HashMap() diff --git a/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/regression/LinearRegression.kt b/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/regression/LinearRegression.kt index 9f93704259d..57ad21006b9 100644 --- a/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/regression/LinearRegression.kt +++ b/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/regression/LinearRegression.kt @@ -10,11 +10,12 @@ class LinearRegression private constructor ( meanX: Double, sumXX: Double, model: (Double) -> Double, + xVals: DoubleArray, + yVals: DoubleArray, standardErrorOfEstimate: Double, tCritical: Double, eq: List, - r2: Double, -) : RegressionEvaluator(n, meanX, sumXX, model, standardErrorOfEstimate, tCritical, eq, r2) { +) : RegressionEvaluator(n, meanX, sumXX, model, xVals, yVals, standardErrorOfEstimate, tCritical, eq) { companion object { fun fit(xs: List, ys: List, confidenceLevel: Double): LinearRegression? { check(xs, ys, confidenceLevel) @@ -45,10 +46,11 @@ class LinearRegression private constructor ( meanX, sumXX, model, + xVals, + yVals, calcStandardErrorOfEstimate(xVals, yVals, model, degreesOfFreedom), calcTCritical(degreesOfFreedom, confidenceLevel), - listOf(intercept, slope), - calcRSquared(xVals, yVals, model) + listOf(intercept, slope) ) } } diff --git a/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/regression/LocalPolynomialRegression.kt b/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/regression/LocalPolynomialRegression.kt index 4bbeec76c77..aaf2b5ca6bb 100644 --- a/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/regression/LocalPolynomialRegression.kt +++ b/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/regression/LocalPolynomialRegression.kt @@ -13,10 +13,11 @@ class LocalPolynomialRegression private constructor ( meanX: Double, sumXX: Double, model: (Double) -> Double, + xVals: DoubleArray, + yVals: DoubleArray, standardErrorOfEstimate: Double, - tCritical: Double, - r2: Double, -) : RegressionEvaluator(n, meanX, sumXX, model, standardErrorOfEstimate, tCritical, emptyList(), r2) { + tCritical: Double +) : RegressionEvaluator(n, meanX, sumXX, model, xVals, yVals, standardErrorOfEstimate, tCritical, emptyList()) { companion object { fun fit(xs: List, ys: List, confidenceLevel: Double, bandwidth: Double): LocalPolynomialRegression? { check(xs, ys, confidenceLevel) @@ -44,9 +45,10 @@ class LocalPolynomialRegression private constructor ( meanX, sumXX, model, + xVals, + yVals, calcStandardErrorOfEstimate(xVals, yVals, model, degreesOfFreedom), - calcTCritical(degreesOfFreedom, confidenceLevel), - calcRSquared(xVals, yVals, model), + calcTCritical(degreesOfFreedom, confidenceLevel) ) } diff --git a/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/regression/PolynomialRegression.kt b/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/regression/PolynomialRegression.kt index 71f8e5a8ad4..391bd334b52 100644 --- a/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/regression/PolynomialRegression.kt +++ b/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/regression/PolynomialRegression.kt @@ -14,11 +14,12 @@ class PolynomialRegression private constructor ( meanX: Double, sumXX: Double, model: (Double) -> Double, + xVals: DoubleArray, + yVals: DoubleArray, standardErrorOfEstimate: Double, tCritical: Double, - eq: List, - r2: Double, -) : RegressionEvaluator(n, meanX, sumXX, model, standardErrorOfEstimate, tCritical, eq, r2) { + eq: List +) : RegressionEvaluator(n, meanX, sumXX, model, xVals, yVals, standardErrorOfEstimate, tCritical, eq) { companion object { fun fit(xs: List, ys: List, confidenceLevel: Double, deg: Int): PolynomialRegression? { check(xs, ys, confidenceLevel) @@ -47,10 +48,11 @@ class PolynomialRegression private constructor ( meanX, sumXX, model, + xVals, + yVals, calcStandardErrorOfEstimate(xVals, yVals, model, degreesOfFreedom), calcTCritical(degreesOfFreedom, confidenceLevel), - polynomial.getCoefficients(), - calcRSquared(xVals, yVals, model) + polynomial.getCoefficients() ) } diff --git a/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/regression/RegressionEvaluator.kt b/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/regression/RegressionEvaluator.kt index 151c3a21844..e1cc7eb0cdc 100644 --- a/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/regression/RegressionEvaluator.kt +++ b/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/regression/RegressionEvaluator.kt @@ -10,23 +10,16 @@ import kotlin.math.pow import kotlin.math.sqrt abstract class RegressionEvaluator protected constructor( - private val n: Int, + val n: Int, private val meanX: Double, private val sumXX: Double, - private val model: (Double) -> Double, + val model: (Double) -> Double, + val xVals: DoubleArray, + val yVals: DoubleArray, private val standardErrorOfEstimate: Double, private val tCritical: Double, - val eq: List, - val r2: Double, + val eq: List ) { - val adjR2: Double - get() { - val predictorsCount = (eq.size - 1).coerceAtLeast(0) - if (n <= predictorsCount + 1 || r2.isNaN()) { - return Double.NaN - } - return 1.0 - (1.0 - r2) * ((n - 1.0) / (n - predictorsCount - 1.0)) - } fun value(x: Double): Double { return model(x) @@ -84,33 +77,5 @@ abstract class RegressionEvaluator protected constructor( Double.NaN } } - - fun calcRSquared( - xVals: DoubleArray, - yVals: DoubleArray, - model: (Double) -> Double - ): Double { - val meanY = yVals.average() - - var ssTot = 0.0 - var ssRes = 0.0 - - for (i in xVals.indices) { - val y = yVals[i] - val yHat = model(xVals[i]) - - val diffRes = y - yHat - ssRes += diffRes * diffRes - - val diffMean = y - meanY - ssTot += diffMean * diffMean - } - - return if (ssTot == 0.0) { - 0.0 - } else { - 1.0 - ssRes / ssTot - } - } } } diff --git a/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/regression/RegressionUtil.kt b/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/regression/RegressionUtil.kt index cf055a22dce..f7a4ad2887b 100644 --- a/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/regression/RegressionUtil.kt +++ b/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/stat/regression/RegressionUtil.kt @@ -5,8 +5,8 @@ package org.jetbrains.letsPlot.core.plot.base.stat.regression -import org.jetbrains.letsPlot.core.plot.base.stat.math3.Percentile import org.jetbrains.letsPlot.core.commons.data.SeriesUtil +import org.jetbrains.letsPlot.core.plot.base.stat.math3.Percentile import kotlin.random.Random internal object RegressionUtil { diff --git a/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/tooltip/text/DataFrameField.kt b/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/tooltip/text/DataFrameField.kt index 7ba88229046..779733ae134 100644 --- a/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/tooltip/text/DataFrameField.kt +++ b/plot-base/src/commonMain/kotlin/org/jetbrains/letsPlot/core/plot/base/tooltip/text/DataFrameField.kt @@ -12,7 +12,7 @@ import org.jetbrains.letsPlot.core.plot.base.FormatterUtil import org.jetbrains.letsPlot.core.plot.base.PlotContext import org.jetbrains.letsPlot.core.plot.base.data.DataFrameUtil -open class DataFrameField( +class DataFrameField( private val name: String, private val format: String? = null ) : ValueSource { diff --git a/plot-stem/src/commonMain/kotlin/org/jetbrains/letsPlot/core/spec/back/PlotConfigBackend.kt b/plot-stem/src/commonMain/kotlin/org/jetbrains/letsPlot/core/spec/back/PlotConfigBackend.kt index eacdb25f232..49ecf252658 100644 --- a/plot-stem/src/commonMain/kotlin/org/jetbrains/letsPlot/core/spec/back/PlotConfigBackend.kt +++ b/plot-stem/src/commonMain/kotlin/org/jetbrains/letsPlot/core/spec/back/PlotConfigBackend.kt @@ -300,6 +300,22 @@ open class PlotConfigBackend( companion object { + private val SMOOTH_STAT_VARS_TO_KEEP = listOf( + Stats.R2, + Stats.R2_ADJ, + Stats.N, + Stats.AIC, + Stats.BIC, + Stats.METHOD, + Stats.F, + Stats.DF1, + Stats.DF2, + Stats.P, + Stats.CI_LEVEL, + Stats.CI_LOW, + Stats.CI_HIGH + ) + private fun variablesToKeep(facets: PlotFacets, layerConfig: LayerConfig): Set { val stat = layerConfig.stat // keep all original vars @@ -334,7 +350,7 @@ open class PlotConfigBackend( varsToKeep.removeAll(notRenderedVars) varsToKeep.addAll(renderedVars) - varsToKeep.addAll(listOf(Stats.R2, Stats.R2_ADJ)) + varsToKeep.addAll(SMOOTH_STAT_VARS_TO_KEEP) varsToKeep.addAll(layerConfig.ownData.variables().filter { it.label.contains("smooth_eq_coef_") }) return HashSet() + diff --git a/python-package/lets_plot/plot/annotation.py b/python-package/lets_plot/plot/annotation.py index 029d31d2aa2..27a4e0e78ff 100644 --- a/python-package/lets_plot/plot/annotation.py +++ b/python-package/lets_plot/plot/annotation.py @@ -355,8 +355,6 @@ def __init__(self, variables: List[str] = None): self._label_x = None self._label_y = None - self.inherit_color() - def eq(self, lhs=None, rhs=None, format=None, threshold=None) -> "smooth_labels": """