@@ -970,6 +970,102 @@ static sd::Tensor<float> sample_dpmpp_2s_ancestral(denoise_cb_t model,
970970 return x;
971971}
972972
973+ static sd::Tensor<float > sample_dpmpp_2s_ancestral_flow (denoise_cb_t model,
974+ sd::Tensor<float > x,
975+ const std::vector<float >& sigmas,
976+ std::shared_ptr<RNG> rng,
977+ float eta = 1 .0f ) {
978+ int steps = static_cast <int >(sigmas.size ()) - 1 ;
979+ for (int i = 0 ; i < steps; i++) {
980+ float sigma = sigmas[i];
981+ float sigma_to = sigmas[i + 1 ];
982+
983+ bool opt_first_step = (1.0 - sigma < 1e-6 );
984+
985+ auto denoised_opt = model (x, sigma, (opt_first_step ? 1 : -1 ) * (i + 1 ));
986+ if (denoised_opt.empty ()) {
987+ return {};
988+ }
989+ sd::Tensor<float > denoised = std::move (denoised_opt);
990+
991+ if (sigma_to == 0 .0f ) {
992+ // Euler method (final step, no noise)
993+ // sigma_to == 0 --> sigma_down = 0, so:
994+ // x + d * (sigma_down - sigma)
995+ // = x + ((x - denoised) / sigma) * (sigma_down - sigma)
996+ // = x + ((x - denoised) / sigma) * ( 0 - sigma)
997+ // = x + ((x - denoised) ) * -1
998+ // = x -x + denoised
999+ x = denoised;
1000+
1001+ } else {
1002+ auto [sigma_down, sigma_up, alpha_scale] = get_ancestral_step_flow (sigma, sigma_to, eta);
1003+ sd::Tensor<float > D_i;
1004+
1005+ if (opt_first_step) {
1006+ // the reformulated exp_s calc already accounts for this, but we can avoid
1007+ // a redundant model call for the typical sigma 1 at the first step:
1008+ // exp_s = sqrt((1-sigma)/sigma * (1-sigma_down)/sigma_down)
1009+ // = sqrt((1- 1)/ 1 * (1-sigma_down)/sigma_down)
1010+ // = 0
1011+ // so sigma_s = 1 = sigma, and sigma_s_i_ratio = sigma_s / sigma = 1
1012+ // u = (x*sigma_s_i_ratio)+(denoised*(1.0f-sigma_s_i_ratio))
1013+ // = (x*1)+(denoised*0) = x
1014+ // so D_i = model(u, sigma_s, i + 1)
1015+ // = model(x, sigma, i + 1)
1016+ // = denoised
1017+ D_i = denoised;
1018+
1019+ } else {
1020+ float sigma_s;
1021+
1022+ // ref implementation would be:
1023+ // auto lambda_fn = [](float sigma) -> float {
1024+ // return std::log((1.0f - sigma) / sigma); };
1025+ // auto sigma_fn = [](float lbda) -> float {
1026+ // return 1.0f / (std::exp(lbda) + 1.0f); };
1027+ // t_i = lambda_fn(sigma);
1028+ // t_down = lambda_fn(sigma_down);
1029+ // float r = 0.5f;
1030+ // h = t_down - t_i;
1031+ // s = t_i + r * h;
1032+ // sigma_s = sigma_fn(s);
1033+
1034+ // assuming r is constant, we sidestep the singularity at sigma -> 1 by:
1035+ // s = 0.5 * (lambda_fn(sigma) + lambda_fn(sigma_down))
1036+ // = 0.5 * (log((1-sigma)/sigma) + log((1-sigma_down)/sigma_down))
1037+ // = 0.5 * log(((1-sigma)/sigma) * ((1-sigma_down)/sigma_down))
1038+ // = log(sqrt (((1-sigma)/sigma) * ((1-sigma_down)/sigma_down)))
1039+ // so exp(s) = sqrt((1-sigma)/sigma * (1-sigma_down)/sigma_down)
1040+ // and sigma_s = sigma_fn(s) = 1.0f / (exp(s) + 1.0f)
1041+
1042+ float exp_s = std::sqrt (((1 - sigma) / sigma) * ((1 - sigma_down) / sigma_down));
1043+ sigma_s = 1 .0f / (exp_s + 1 .0f );
1044+
1045+ float sigma_s_i_ratio = sigma_s / sigma;
1046+ sd::Tensor<float > u = (x * sigma_s_i_ratio) + (denoised * (1 .0f - sigma_s_i_ratio));
1047+
1048+ auto denoised2_opt = model (u, sigma_s, i + 1 );
1049+ if (denoised2_opt.empty ()) {
1050+ return {};
1051+ }
1052+ D_i = std::move (denoised2_opt);
1053+ }
1054+
1055+ float sigma_down_i_ratio = sigma_down / sigma;
1056+ x = (x * sigma_down_i_ratio) + (D_i * (1 .0f - sigma_down_i_ratio));
1057+
1058+ if (sigma_to > 0 .0f && eta > 0 .0f ) {
1059+ x = alpha_scale * x + sd::Tensor<float >::randn_like (x, rng) * sigma_up;
1060+ }
1061+ }
1062+ }
1063+
1064+ return x;
1065+ }
1066+
1067+
1068+
9731069static sd::Tensor<float > sample_dpmpp_2m (denoise_cb_t model,
9741070 sd::Tensor<float > x,
9751071 const std::vector<float >& sigmas) {
@@ -1566,7 +1662,10 @@ static sd::Tensor<float> sample_k_diffusion(sample_method_t method,
15661662 case DPM2_SAMPLE_METHOD:
15671663 return sample_dpm2 (model, std::move (x), sigmas);
15681664 case DPMPP2S_A_SAMPLE_METHOD:
1569- return sample_dpmpp_2s_ancestral (model, std::move (x), sigmas, rng, eta);
1665+ if (is_flow_denoiser)
1666+ return sample_dpmpp_2s_ancestral_flow (model, std::move (x), sigmas, rng, eta);
1667+ else
1668+ return sample_dpmpp_2s_ancestral (model, std::move (x), sigmas, rng, eta);
15701669 case DPMPP2M_SAMPLE_METHOD:
15711670 return sample_dpmpp_2m (model, std::move (x), sigmas);
15721671 case DPMPP2Mv2_SAMPLE_METHOD:
0 commit comments