From 664fcadcb02f77cb2b8711338c45cd3770e13beb Mon Sep 17 00:00:00 2001
From: Ling Zhi <lingzhi255@gmail.com>
Date: Wed, 18 Feb 2026 17:58:33 +0800
Subject: [PATCH 1/4] add yule_simon_cdf

---
 stan/math/prim/prob/yule_simon_cdf.hpp       | 94 ++++++++++++++++++++
 test/prob/yule_simon/yule_simon_cdf_test.hpp | 65 ++++++++++++++
 2 files changed, 159 insertions(+)
 create mode 100644 stan/math/prim/prob/yule_simon_cdf.hpp
 create mode 100644 test/prob/yule_simon/yule_simon_cdf_test.hpp
diff --git a/stan/math/prim/prob/yule_simon_cdf.hpp b/stan/math/prim/prob/yule_simon_cdf.hpp
new file mode 100644
index 00000000000..10a5ba2637e
--- /dev/null
+++ b/stan/math/prim/prob/yule_simon_cdf.hpp
@@ -0,0 +1,94 @@
+#ifndef STAN_MATH_PRIM_PROB_YULE_SIMON_CDF_HPP
+#define STAN_MATH_PRIM_PROB_YULE_SIMON_CDF_HPP
+
+#include <stan/math/prim/meta.hpp>
+#include <stan/math/prim/err.hpp>
+#include <stan/math/prim/fun/constants.hpp>
+#include <stan/math/prim/fun/digamma.hpp>
+#include <stan/math/prim/fun/beta.hpp>
+#include <stan/math/prim/fun/lgamma.hpp>
+#include <stan/math/prim/fun/max_size.hpp>
+#include <stan/math/prim/fun/scalar_seq_view.hpp>
+#include <stan/math/prim/fun/size.hpp>
+#include <stan/math/prim/fun/size_zero.hpp>
+#include <stan/math/prim/fun/value_of.hpp>
+#include <stan/math/prim/functor/partials_propagator.hpp>
+
+namespace stan {
+namespace math {
+
+/** \ingroup prob_dists
+ * Returns the CDF of the Yule-Simon distribution with shape parameter.
+ * Given containers of matching sizes, returns the product of probabilities.
+ *
+ * @tparam T_n type of outcome parameter
+ * @tparam T_alpha type of shape parameter
+ *
+ * @param n outcome variable
+ * @param alpha shape parameter
+ * @return probability or product of probabilities
+ * @throw std::domain_error if alpha fails to be positive
+ * @throw std::invalid_argument if container sizes mismatch
+ */
+template <typename T_n, typename T_alpha>
+inline return_type_t<T_alpha> yule_simon_cdf(const T_n& n,
+                                             const T_alpha& alpha) {
+  using T_partials_return = partials_return_t<T_n, T_alpha>;
+  using T_n_ref = ref_type_t<T_n>;
+  using T_alpha_ref = ref_type_t<T_alpha>;
+  static constexpr const char* function = "yule_simon_cdf";
+
+  check_consistent_sizes(function, "Outcome variable", n, "Shape parameter",
+                         alpha);
+  if (size_zero(n, alpha)) {
+    return 1.0;
+  }
+
+  T_n_ref n_ref = n;
+  T_alpha_ref alpha_ref = alpha;
+  check_positive_finite(function, "Shape parameter", alpha_ref);
+
+  scalar_seq_view<T_n> n_vec(n);
+  scalar_seq_view<T_alpha_ref> alpha_vec(alpha_ref);
+  const size_t max_size_seq_view = max_size(n_ref, alpha_ref);
+
+  // Explicit return for invalid or extreme values
+  // The gradients are technically ill-defined, but treated as zero
+  for (int i = 0; i < stan::math::size(n); i++) {
+    if (n_vec.val(i) < 1.0) {
+      return 0.0;
+    }
+    if (n_vec.val(i) == std::numeric_limits<int>::max()) {
+      return 1.0;
+    }
+  }
+
+  T_partials_return cdf(1.0);
+  auto ops_partials = make_partials_propagator(alpha_ref);
+  for (size_t i = 0; i < max_size_seq_view; i++) {
+    auto np1 = n_vec.val(i) + 1.0;
+    auto ap1 = alpha_vec.val(i) + 1.0;
+    auto nap1 = n_vec.val(i) + ap1;
+
+    auto ccdf = stan::math::exp(lgamma(ap1) + lgamma(np1) - lgamma(nap1));
+    cdf *= 1.0 - ccdf;
+
+    if constexpr (is_autodiff_v<T_alpha>) {
+      auto chain_rule_term = -ccdf / (1.0 - ccdf);
+      partials<0>(ops_partials)[i]
+          += (digamma(ap1) - digamma(nap1)) * chain_rule_term;
+    }
+  }
+
+  if constexpr (is_autodiff_v<T_alpha>) {
+    for (size_t i = 0; i < stan::math::size(alpha); ++i) {
+      partials<0>(ops_partials)[i] *= cdf;
+    }
+  }
+
+  return ops_partials.build(cdf);
+}
+
+}  // namespace math
+}  // namespace stan
+#endif
diff --git a/test/prob/yule_simon/yule_simon_cdf_test.hpp b/test/prob/yule_simon/yule_simon_cdf_test.hpp
new file mode 100644
index 00000000000..769d2b4b15e
--- /dev/null
+++ b/test/prob/yule_simon/yule_simon_cdf_test.hpp
@@ -0,0 +1,65 @@
+// Arguments: Ints, Doubles
+#include <stan/math/prim/prob/yule_simon_cdf.hpp>
+#include <stan/math/prim/fun/lgamma.hpp>
+
+using std::numeric_limits;
+using std::vector;
+
+class AgradCdfYuleSimon : public AgradCdfTest {
+ public:
+  void valid_values(vector<vector<double>>& parameters,
+                    vector<double>& cdf) {
+    vector<double> param(2);
+
+    param[0] = 5;     // n
+    param[1] = 20.0;  // alpha
+    parameters.push_back(param);
+    cdf.push_back(0.9999811782420478);  // expected cdf
+
+    param[0] = 10;   // n
+    param[1] = 5.5;  // alpha
+    parameters.push_back(param);
+    cdf.push_back(0.9997987132162779);  // expected cdf
+  }
+
+  void invalid_values(vector<size_t>& index, vector<double>& value) {
+    // n
+
+    // alpha
+    index.push_back(1U);
+    value.push_back(0.0);
+
+    index.push_back(1U);
+    value.push_back(-1.0);
+
+    index.push_back(1U);
+    value.push_back(std::numeric_limits<double>::infinity());
+  }
+
+  // BOUND INCLUDED IN ORDER FOR TEST TO PASS WITH CURRENT FRAMEWORK
+  bool has_lower_bound() { return false; }
+
+  bool has_upper_bound() { return false; }
+
+  template <class T_n, class T_alpha, typename T2, typename T3, typename T4,
+            typename T5>
+  stan::return_type_t<T_n, T_alpha> cdf(const T_n& n, const T_alpha& alpha,
+                                        const T2&, const T3&, const T4&,
+                                        const T5&) {
+    return stan::math::yule_simon_cdf(n, alpha);
+  }
+
+  template <class T_n, class T_alpha, typename T2, typename T3, typename T4,
+            typename T5>
+  stan::return_type_t<T_n, T_alpha> cdf_function(const T_n& n,
+                                                 const T_alpha& alpha,
+                                                 const T2&, const T3&,
+                                                 const T4&, const T5&) {
+    using stan::math::lgamma;
+
+    auto log_ccdf
+        = lgamma(alpha + 1.0) + lgamma(n + 1.0) - lgamma(n + alpha + 1.0);
+
+    return 1.0 - stan::math::exp(log_ccdf);
+  }
+};

From c185ee6677d7d56805c57ef6a82191f858c593a2 Mon Sep 17 00:00:00 2001
From: Stan Jenkins <mc.stanislaw@gmail.com>
Date: Wed, 18 Feb 2026 06:08:00 -0500
Subject: [PATCH 2/4] [Jenkins] auto-formatting by clang-format version
 10.0.0-4ubuntu1

---
 test/prob/yule_simon/yule_simon_cdf_test.hpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/test/prob/yule_simon/yule_simon_cdf_test.hpp b/test/prob/yule_simon/yule_simon_cdf_test.hpp
index 769d2b4b15e..6194b64ac75 100644
--- a/test/prob/yule_simon/yule_simon_cdf_test.hpp
+++ b/test/prob/yule_simon/yule_simon_cdf_test.hpp
@@ -7,8 +7,7 @@ using std::vector;
 
 class AgradCdfYuleSimon : public AgradCdfTest {
  public:
-  void valid_values(vector<vector<double>>& parameters,
-                    vector<double>& cdf) {
+  void valid_values(vector<vector<double>>& parameters, vector<double>& cdf) {
     vector<double> param(2);
 
     param[0] = 5;     // n

From dd1cd2102327a6293d1381a1eaf4721b8e64c8cd Mon Sep 17 00:00:00 2001
From: Zhi Ling <lingzhi255@gmail.com>
Date: Thu, 19 Feb 2026 17:05:50 +0800
Subject: [PATCH 3/4] Add test cases for yule-simon cdf

also check the lower tail
---
 test/prob/yule_simon/yule_simon_cdf_test.hpp | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/test/prob/yule_simon/yule_simon_cdf_test.hpp b/test/prob/yule_simon/yule_simon_cdf_test.hpp
index 6194b64ac75..5dc304dff7d 100644
--- a/test/prob/yule_simon/yule_simon_cdf_test.hpp
+++ b/test/prob/yule_simon/yule_simon_cdf_test.hpp
@@ -19,6 +19,16 @@ class AgradCdfYuleSimon : public AgradCdfTest {
     param[1] = 5.5;  // alpha
     parameters.push_back(param);
     cdf.push_back(0.9997987132162779);  // expected cdf
+
+    param[0] = 1;   // n
+    param[1] = 0.1;  // alpha
+    parameters.push_back(param);
+    cdf.push_back(0.0909090909090918);  // expected cdf
+
+    param[0] = 1;   // n
+    param[1] = 0.01;  // alpha
+    parameters.push_back(param);
+    cdf.push_back(0.0099009900990106);  // expected cdf
   }
 
   void invalid_values(vector<size_t>& index, vector<double>& value) {

From 13125df2d0d2107485a948edcd8fc46171a7a777 Mon Sep 17 00:00:00 2001
From: Stan Jenkins <mc.stanislaw@gmail.com>
Date: Thu, 19 Feb 2026 04:06:49 -0500
Subject: [PATCH 4/4] [Jenkins] auto-formatting by clang-format version
 10.0.0-4ubuntu1

---
 test/prob/yule_simon/yule_simon_cdf_test.hpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/test/prob/yule_simon/yule_simon_cdf_test.hpp b/test/prob/yule_simon/yule_simon_cdf_test.hpp
index 5dc304dff7d..add428a29ec 100644
--- a/test/prob/yule_simon/yule_simon_cdf_test.hpp
+++ b/test/prob/yule_simon/yule_simon_cdf_test.hpp
@@ -20,12 +20,12 @@ class AgradCdfYuleSimon : public AgradCdfTest {
     parameters.push_back(param);
     cdf.push_back(0.9997987132162779);  // expected cdf
 
-    param[0] = 1;   // n
+    param[0] = 1;    // n
     param[1] = 0.1;  // alpha
     parameters.push_back(param);
     cdf.push_back(0.0909090909090918);  // expected cdf
 
-    param[0] = 1;   // n
+    param[0] = 1;     // n
     param[1] = 0.01;  // alpha
     parameters.push_back(param);
     cdf.push_back(0.0099009900990106);  // expected cdf