From 664fcadcb02f77cb2b8711338c45cd3770e13beb Mon Sep 17 00:00:00 2001 From: Ling Zhi Date: Wed, 18 Feb 2026 17:58:33 +0800 Subject: [PATCH 1/4] add yule_simon_cdf --- stan/math/prim/prob/yule_simon_cdf.hpp | 94 ++++++++++++++++++++ test/prob/yule_simon/yule_simon_cdf_test.hpp | 65 ++++++++++++++ 2 files changed, 159 insertions(+) create mode 100644 stan/math/prim/prob/yule_simon_cdf.hpp create mode 100644 test/prob/yule_simon/yule_simon_cdf_test.hpp diff --git a/stan/math/prim/prob/yule_simon_cdf.hpp b/stan/math/prim/prob/yule_simon_cdf.hpp new file mode 100644 index 00000000000..10a5ba2637e --- /dev/null +++ b/stan/math/prim/prob/yule_simon_cdf.hpp @@ -0,0 +1,94 @@ +#ifndef STAN_MATH_PRIM_PROB_YULE_SIMON_CDF_HPP +#define STAN_MATH_PRIM_PROB_YULE_SIMON_CDF_HPP + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace stan { +namespace math { + +/** \ingroup prob_dists + * Returns the CDF of the Yule-Simon distribution with shape parameter. + * Given containers of matching sizes, returns the product of probabilities. + * + * @tparam T_n type of outcome parameter + * @tparam T_alpha type of shape parameter + * + * @param n outcome variable + * @param alpha shape parameter + * @return probability or product of probabilities + * @throw std::domain_error if alpha fails to be positive + * @throw std::invalid_argument if container sizes mismatch + */ +template +inline return_type_t yule_simon_cdf(const T_n& n, + const T_alpha& alpha) { + using T_partials_return = partials_return_t; + using T_n_ref = ref_type_t; + using T_alpha_ref = ref_type_t; + static constexpr const char* function = "yule_simon_cdf"; + + check_consistent_sizes(function, "Outcome variable", n, "Shape parameter", + alpha); + if (size_zero(n, alpha)) { + return 1.0; + } + + T_n_ref n_ref = n; + T_alpha_ref alpha_ref = alpha; + check_positive_finite(function, "Shape parameter", alpha_ref); + + scalar_seq_view n_vec(n); + scalar_seq_view alpha_vec(alpha_ref); + const size_t max_size_seq_view = max_size(n_ref, alpha_ref); + + // Explicit return for invalid or extreme values + // The gradients are technically ill-defined, but treated as zero + for (int i = 0; i < stan::math::size(n); i++) { + if (n_vec.val(i) < 1.0) { + return 0.0; + } + if (n_vec.val(i) == std::numeric_limits::max()) { + return 1.0; + } + } + + T_partials_return cdf(1.0); + auto ops_partials = make_partials_propagator(alpha_ref); + for (size_t i = 0; i < max_size_seq_view; i++) { + auto np1 = n_vec.val(i) + 1.0; + auto ap1 = alpha_vec.val(i) + 1.0; + auto nap1 = n_vec.val(i) + ap1; + + auto ccdf = stan::math::exp(lgamma(ap1) + lgamma(np1) - lgamma(nap1)); + cdf *= 1.0 - ccdf; + + if constexpr (is_autodiff_v) { + auto chain_rule_term = -ccdf / (1.0 - ccdf); + partials<0>(ops_partials)[i] + += (digamma(ap1) - digamma(nap1)) * chain_rule_term; + } + } + + if constexpr (is_autodiff_v) { + for (size_t i = 0; i < stan::math::size(alpha); ++i) { + partials<0>(ops_partials)[i] *= cdf; + } + } + + return ops_partials.build(cdf); +} + +} // namespace math +} // namespace stan +#endif diff --git a/test/prob/yule_simon/yule_simon_cdf_test.hpp b/test/prob/yule_simon/yule_simon_cdf_test.hpp new file mode 100644 index 00000000000..769d2b4b15e --- /dev/null +++ b/test/prob/yule_simon/yule_simon_cdf_test.hpp @@ -0,0 +1,65 @@ +// Arguments: Ints, Doubles +#include +#include + +using std::numeric_limits; +using std::vector; + +class AgradCdfYuleSimon : public AgradCdfTest { + public: + void valid_values(vector>& parameters, + vector& cdf) { + vector param(2); + + param[0] = 5; // n + param[1] = 20.0; // alpha + parameters.push_back(param); + cdf.push_back(0.9999811782420478); // expected cdf + + param[0] = 10; // n + param[1] = 5.5; // alpha + parameters.push_back(param); + cdf.push_back(0.9997987132162779); // expected cdf + } + + void invalid_values(vector& index, vector& value) { + // n + + // alpha + index.push_back(1U); + value.push_back(0.0); + + index.push_back(1U); + value.push_back(-1.0); + + index.push_back(1U); + value.push_back(std::numeric_limits::infinity()); + } + + // BOUND INCLUDED IN ORDER FOR TEST TO PASS WITH CURRENT FRAMEWORK + bool has_lower_bound() { return false; } + + bool has_upper_bound() { return false; } + + template + stan::return_type_t cdf(const T_n& n, const T_alpha& alpha, + const T2&, const T3&, const T4&, + const T5&) { + return stan::math::yule_simon_cdf(n, alpha); + } + + template + stan::return_type_t cdf_function(const T_n& n, + const T_alpha& alpha, + const T2&, const T3&, + const T4&, const T5&) { + using stan::math::lgamma; + + auto log_ccdf + = lgamma(alpha + 1.0) + lgamma(n + 1.0) - lgamma(n + alpha + 1.0); + + return 1.0 - stan::math::exp(log_ccdf); + } +}; From c185ee6677d7d56805c57ef6a82191f858c593a2 Mon Sep 17 00:00:00 2001 From: Stan Jenkins Date: Wed, 18 Feb 2026 06:08:00 -0500 Subject: [PATCH 2/4] [Jenkins] auto-formatting by clang-format version 10.0.0-4ubuntu1 --- test/prob/yule_simon/yule_simon_cdf_test.hpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/test/prob/yule_simon/yule_simon_cdf_test.hpp b/test/prob/yule_simon/yule_simon_cdf_test.hpp index 769d2b4b15e..6194b64ac75 100644 --- a/test/prob/yule_simon/yule_simon_cdf_test.hpp +++ b/test/prob/yule_simon/yule_simon_cdf_test.hpp @@ -7,8 +7,7 @@ using std::vector; class AgradCdfYuleSimon : public AgradCdfTest { public: - void valid_values(vector>& parameters, - vector& cdf) { + void valid_values(vector>& parameters, vector& cdf) { vector param(2); param[0] = 5; // n From dd1cd2102327a6293d1381a1eaf4721b8e64c8cd Mon Sep 17 00:00:00 2001 From: Zhi Ling Date: Thu, 19 Feb 2026 17:05:50 +0800 Subject: [PATCH 3/4] Add test cases for yule-simon cdf also check the lower tail --- test/prob/yule_simon/yule_simon_cdf_test.hpp | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/test/prob/yule_simon/yule_simon_cdf_test.hpp b/test/prob/yule_simon/yule_simon_cdf_test.hpp index 6194b64ac75..5dc304dff7d 100644 --- a/test/prob/yule_simon/yule_simon_cdf_test.hpp +++ b/test/prob/yule_simon/yule_simon_cdf_test.hpp @@ -19,6 +19,16 @@ class AgradCdfYuleSimon : public AgradCdfTest { param[1] = 5.5; // alpha parameters.push_back(param); cdf.push_back(0.9997987132162779); // expected cdf + + param[0] = 1; // n + param[1] = 0.1; // alpha + parameters.push_back(param); + cdf.push_back(0.0909090909090918); // expected cdf + + param[0] = 1; // n + param[1] = 0.01; // alpha + parameters.push_back(param); + cdf.push_back(0.0099009900990106); // expected cdf } void invalid_values(vector& index, vector& value) { From 13125df2d0d2107485a948edcd8fc46171a7a777 Mon Sep 17 00:00:00 2001 From: Stan Jenkins Date: Thu, 19 Feb 2026 04:06:49 -0500 Subject: [PATCH 4/4] [Jenkins] auto-formatting by clang-format version 10.0.0-4ubuntu1 --- test/prob/yule_simon/yule_simon_cdf_test.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/prob/yule_simon/yule_simon_cdf_test.hpp b/test/prob/yule_simon/yule_simon_cdf_test.hpp index 5dc304dff7d..add428a29ec 100644 --- a/test/prob/yule_simon/yule_simon_cdf_test.hpp +++ b/test/prob/yule_simon/yule_simon_cdf_test.hpp @@ -20,12 +20,12 @@ class AgradCdfYuleSimon : public AgradCdfTest { parameters.push_back(param); cdf.push_back(0.9997987132162779); // expected cdf - param[0] = 1; // n + param[0] = 1; // n param[1] = 0.1; // alpha parameters.push_back(param); cdf.push_back(0.0909090909090918); // expected cdf - param[0] = 1; // n + param[0] = 1; // n param[1] = 0.01; // alpha parameters.push_back(param); cdf.push_back(0.0099009900990106); // expected cdf