diff --git a/src/denoiser.hpp b/src/denoiser.hpp index c9c9d881d..e3fc82464 100644 --- a/src/denoiser.hpp +++ b/src/denoiser.hpp @@ -1290,32 +1290,12 @@ static sd::Tensor sample_ddim_trailing(denoise_cb_t model, const std::vector& sigmas, std::shared_ptr rng, float eta) { - float beta_start = 0.00085f; - float beta_end = 0.0120f; - std::vector alphas_cumprod(TIMESTEPS); - std::vector compvis_sigmas(TIMESTEPS); - for (int i = 0; i < TIMESTEPS; i++) { - alphas_cumprod[i] = - (i == 0 ? 1.0f : alphas_cumprod[i - 1]) * - (1.0f - - std::pow(sqrtf(beta_start) + - (sqrtf(beta_end) - sqrtf(beta_start)) * - ((float)i / (TIMESTEPS - 1)), - 2)); - compvis_sigmas[i] = - std::sqrt((1 - alphas_cumprod[i]) / alphas_cumprod[i]); - } int steps = static_cast(sigmas.size()) - 1; for (int i = 0; i < steps; i++) { - int timestep = static_cast(roundf(TIMESTEPS - i * ((float)TIMESTEPS / steps))) - 1; - int prev_timestep = timestep - TIMESTEPS / steps; - float sigma = static_cast(compvis_sigmas[timestep]); - if (i == 0) { - x *= std::sqrt(sigma * sigma + 1) / sigma; - } else { - x *= std::sqrt(sigma * sigma + 1); - } + + float sigma = sigmas[i]; + float sigma_to = sigmas[i + 1]; auto model_output_opt = model(x, sigma, i + 1); if (model_output_opt.empty()) { @@ -1324,8 +1304,8 @@ static sd::Tensor sample_ddim_trailing(denoise_cb_t model, sd::Tensor model_output = std::move(model_output_opt); model_output = (x - model_output) * (1.0f / sigma); - float alpha_prod_t = static_cast(alphas_cumprod[timestep]); - float alpha_prod_t_prev = static_cast(prev_timestep >= 0 ? alphas_cumprod[prev_timestep] : alphas_cumprod[0]); + float alpha_prod_t = 1.0f / (sigma * sigma + 1.0f); + float alpha_prod_t_prev = 1.0f / (sigma_to * sigma_to + 1.0f); float beta_prod_t = 1.0f - alpha_prod_t; sd::Tensor pred_original_sample = ((x / std::sqrt(sigma * sigma + 1)) - @@ -1337,12 +1317,13 @@ static sd::Tensor sample_ddim_trailing(denoise_cb_t model, (1.0f - alpha_prod_t / alpha_prod_t_prev); float std_dev_t = eta * std::sqrt(variance); - x = std::sqrt(alpha_prod_t_prev) * pred_original_sample + - std::sqrt(1.0f - alpha_prod_t_prev - std::pow(std_dev_t, 2)) * model_output; + x = pred_original_sample + + std::sqrt((1.0f - alpha_prod_t_prev - std::pow(std_dev_t, 2))/ alpha_prod_t_prev) * model_output; if (eta > 0) { - x += std_dev_t * sd::Tensor::randn_like(x, rng); + x+= std_dev_t / std::sqrt(alpha_prod_t_prev) * sd::Tensor::randn_like(x, rng); } + } return x; } @@ -1368,19 +1349,25 @@ static sd::Tensor sample_tcd(denoise_cb_t model, std::sqrt((1 - alphas_cumprod[i]) / alphas_cumprod[i]); } - int original_steps = 50; - int steps = static_cast(sigmas.size()) - 1; + auto get_timestep_from_sigma = [&](float s) -> int { + auto it = std::lower_bound(compvis_sigmas.begin(), compvis_sigmas.end(), s); + if (it == compvis_sigmas.begin()) return 0; + if (it == compvis_sigmas.end()) return TIMESTEPS - 1; + int idx_high = static_cast(std::distance(compvis_sigmas.begin(), it)); + int idx_low = idx_high - 1; + if (std::abs(compvis_sigmas[idx_high] - s) < std::abs(compvis_sigmas[idx_low] - s)) { + return idx_high; + } + return idx_low; + }; + + int steps = static_cast(sigmas.size()) - 1; for (int i = 0; i < steps; i++) { - int timestep = TIMESTEPS - 1 - (TIMESTEPS / original_steps) * (int)floor(i * ((float)original_steps / steps)); - int prev_timestep = i >= steps - 1 ? 0 : TIMESTEPS - 1 - (TIMESTEPS / original_steps) * (int)floor((i + 1) * ((float)original_steps / steps)); - int timestep_s = (int)floor((1 - eta) * prev_timestep); - float sigma = static_cast(compvis_sigmas[timestep]); - if (i == 0) { - x *= std::sqrt(sigma * sigma + 1) / sigma; - } else { - x *= std::sqrt(sigma * sigma + 1); - } + float sigma_to = sigmas[i + 1]; + int prev_timestep = get_timestep_from_sigma(sigma_to); + int timestep_s = (int)floor((1 - eta) * prev_timestep); + float sigma = sigmas[i]; auto model_output_opt = model(x, sigma, i + 1); if (model_output_opt.empty()) { @@ -1389,9 +1376,9 @@ static sd::Tensor sample_tcd(denoise_cb_t model, sd::Tensor model_output = std::move(model_output_opt); model_output = (x - model_output) * (1.0f / sigma); - float alpha_prod_t = static_cast(alphas_cumprod[timestep]); + float alpha_prod_t = 1.0f / (sigma * sigma + 1.0f); float beta_prod_t = 1.0f - alpha_prod_t; - float alpha_prod_t_prev = static_cast(prev_timestep >= 0 ? alphas_cumprod[prev_timestep] : alphas_cumprod[0]); + float alpha_prod_t_prev = 1.0f / (sigma_to * sigma_to + 1.0f); float alpha_prod_s = static_cast(alphas_cumprod[timestep_s]); float beta_prod_s = 1.0f - alpha_prod_s; @@ -1399,13 +1386,14 @@ static sd::Tensor sample_tcd(denoise_cb_t model, std::sqrt(beta_prod_t) * model_output) * (1.0f / std::sqrt(alpha_prod_t)); - x = std::sqrt(alpha_prod_s) * pred_original_sample + - std::sqrt(beta_prod_s) * model_output; + x = std::sqrt(alpha_prod_s / alpha_prod_t_prev) * pred_original_sample + + std::sqrt(beta_prod_s / alpha_prod_t_prev) * model_output; - if (eta > 0 && i != steps - 1) { + if (eta > 0 && sigma_to > 0.0f) { x = std::sqrt(alpha_prod_t_prev / alpha_prod_s) * x + - std::sqrt(1.0f - alpha_prod_t_prev / alpha_prod_s) * sd::Tensor::randn_like(x, rng); + std::sqrt(1.0f / alpha_prod_t_prev - 1.0f / alpha_prod_s) * sd::Tensor::randn_like(x, rng); } + } return x; } diff --git a/src/stable-diffusion.cpp b/src/stable-diffusion.cpp index 683a07d53..c07531e82 100644 --- a/src/stable-diffusion.cpp +++ b/src/stable-diffusion.cpp @@ -2415,8 +2415,10 @@ enum scheduler_t sd_get_default_scheduler(const sd_ctx_t* sd_ctx, enum sample_me return EXPONENTIAL_SCHEDULER; } } - if (sample_method == LCM_SAMPLE_METHOD) { + if (sample_method == LCM_SAMPLE_METHOD || sample_method == TCD_SAMPLE_METHOD) { return LCM_SCHEDULER; + } else if (sample_method == DDIM_TRAILING_SAMPLE_METHOD) { + return SIMPLE_SCHEDULER; } return DISCRETE_SCHEDULER; }