LostRuins · CasualAutopsy · Jun 26, 2026 · Jun 26, 2026
diff --git a/gpttype_adapter.cpp b/gpttype_adapter.cpp
@@ -1772,13 +1772,18 @@ void sample_top_p(llama_token_data_array * cur_p, float p, size_t min_keep) {
     cur_p->size = last_idx;
 }
 
-void sample_min_p(llama_token_data_array * cur_p, float p, size_t min_keep) {
+void sample_min_p(llama_token_data_array * cur_p, float p, size_t min_keep, bool * norm_minp) {
     if (p <= 0.0f || !cur_p->size) {
         return;
     }
 
     bool min_p_applied = false;
 
+    if (norm_minp) {
+        sample_softmax(cur_p);
+        *norm_minp = false;
+    }
+
     // if the cur_p aren't sorted, try the unsorted implementation first
     if (!cur_p->sorted) {
         std::vector<llama_token_data> filtered_tokens;
@@ -1959,11 +1964,9 @@ void sample_top_n_sigma(llama_token_data_array * cur_p, float nsigma) {
     auto last   = std::remove_if(cur_p->data, cur_p->data + cur_p->size,
                                  [&](auto & tk) { return tk.logit < nsigmax - (nsigma * nsigstd); });
     cur_p->size = last - cur_p->data;
-
-    sample_softmax(cur_p);
 }
 
-void sample_entropy(llama_token_data_array * cur_p, float min_temp, float max_temp, float exponent_val, float smoothing_factor, float smoothing_curve) {
+void sample_entropy(llama_token_data_array * cur_p, float min_temp, float max_temp, float exponent_val, float smoothing_factor, float smoothing_curve, bool * norm_minp) {
     // no need to do anything if there is only one (or zero) candidates
     if (cur_p->size <= 1) {
         return;
@@ -1994,19 +1997,7 @@ void sample_entropy(llama_token_data_array * cur_p, float min_temp, float max_te
         cur_p->data[i].logit /= dyn_temp;
     }
 
-    // Re-compute softmax probabilities after scaling logits with dynamic temperature
-    const double max_l_double = cur_p->data[0].logit;
-
-    double cum_sum_double = 0.0;
-    for (size_t i = 0; i < cur_p->size; ++i) {
-        double p = exp(cur_p->data[i].logit - max_l_double);
-        cur_p->data[i].p = p; // Store the scaled probability
-        cum_sum_double += p;
-    }
-
-    for (size_t i = 0; i < cur_p->size; ++i) {
-        cur_p->data[i].p /= cum_sum_double; // Re-normalize the probabilities
-    }
+    *norm_minp = true;
 
     // Only apply smoothing if smoothing_factor is > 0. Do not change base implementation otherwise.
     if (smoothing_factor > 0 && cur_p->size > 1) {
@@ -2019,12 +2010,11 @@ void sample_entropy(llama_token_data_array * cur_p, float min_temp, float max_te
             float s = (smoothing_curve - 1) / 2;
             cur_p->data[i].logit = -(k * smoothing_factor * logit_shifted * logit_shifted) + (s * smoothing_factor * logit_shifted * logit_shifted * logit_shifted) + h;
         }
-        sample_softmax(cur_p);
     }
 
 }
 
-void sample_temperature(llama_token_data_array * candidates_p, float temp, float smoothing_factor, float smoothing_curve)
+void sample_temperature(llama_token_data_array * candidates_p, float temp, float smoothing_factor, float smoothing_curve, bool * norm_minp)
 {
     if (temp <= 0)
     {
@@ -2046,7 +2036,8 @@ void sample_temperature(llama_token_data_array * candidates_p, float temp, float
             float s = (smoothing_curve - 1) / 2;
             candidates_p->data[i].logit = -(k * smoothing_factor * logit_shifted * logit_shifted) + (s * smoothing_factor * logit_shifted * logit_shifted * logit_shifted) + h;
         }
-        sample_softmax(candidates_p);
+
+        *norm_minp = true;
     }
 }
 
@@ -2231,12 +2222,14 @@ const std::vector<int> & think_start_seq, const std::vector<int> & think_end_seq
         }
     }
 
+    bool norm_minp = false;
+
     if (mirostat == 1 || mirostat == 2)
     {
         static float mirostat_mu = 2.0f * mirostat_tau;
         const int mirostat_m = 100;
         sample_rep_pen(n_ctx, rep_pen_range, rep_pen, rep_pen_slope, presence_penalty, &candidates_p);
-        sample_temperature(&candidates_p, temp, smoothing_factor, smoothing_curve);
+        sample_temperature(&candidates_p, temp, smoothing_factor, smoothing_curve, &norm_minp);
         if (mirostat == 1)
         {
             id = sample_token_mirostat(n_vocab, &candidates_p, rng, mirostat_tau, mirostat_eta, mirostat_m, &mirostat_mu);
@@ -2260,7 +2253,7 @@ const std::vector<int> & think_start_seq, const std::vector<int> & think_end_seq
                     break;
                 case KCPP_SAMPLER_TOP_P:
                     sample_top_p(&candidates_p, top_p, 1);
-                    sample_min_p(&candidates_p, min_p, 1);
+                    sample_min_p(&candidates_p, min_p, 1, &norm_minp);
                     break;
                 case KCPP_SAMPLER_TFS:
                     sample_tail_free(&candidates_p, tfs, 1);
@@ -2277,11 +2270,11 @@ const std::vector<int> & think_start_seq, const std::vector<int> & think_end_seq
                         dynatemp_min = dynatemp_min<0?0:dynatemp_min;
                         dynatemp_max = dynatemp_max<0?0:dynatemp_max;
                         dynatemp_exponent = dynatemp_exponent<0?0:dynatemp_exponent;
-                        sample_entropy(&candidates_p, dynatemp_min, dynatemp_max, dynatemp_exponent, smoothing_factor, smoothing_curve);
+                        sample_entropy(&candidates_p, dynatemp_min, dynatemp_max, dynatemp_exponent, smoothing_factor, smoothing_curve, &norm_minp);
                     }
                     else
                     {
-                        sample_temperature(&candidates_p, temp, smoothing_factor, smoothing_curve);
+                        sample_temperature(&candidates_p, temp, smoothing_factor, smoothing_curve, &norm_minp);
                     }
                     if (nsigma > 0.0f)
                     {