diff --git a/acb_hypgeom.h b/acb_hypgeom.h index 3631980a..3e7176e0 100644 --- a/acb_hypgeom.h +++ b/acb_hypgeom.h @@ -31,6 +31,12 @@ void acb_hypgeom_rising_ui_jet_rs(acb_ptr res, const acb_t x, ulong n, ulong m, void acb_hypgeom_rising_ui_jet_bs(acb_ptr res, const acb_t x, ulong n, slong len, slong prec); void acb_hypgeom_rising_ui_jet(acb_ptr res, const acb_t x, ulong n, slong len, slong prec); +void acb_hypgeom_gamma_stirling_sum_horner(acb_t s, const acb_t z, slong N, slong prec); +void acb_hypgeom_gamma_stirling_sum_improved(acb_t s, const acb_t z, slong N, slong K, slong prec); +void acb_hypgeom_gamma_stirling(acb_t res, const acb_t x, int reciprocal, slong prec); +int acb_hypgeom_gamma_taylor(acb_t res, const acb_t x, int reciprocal, slong prec); +void acb_hypgeom_gamma(acb_t y, const acb_t x, slong prec); +void acb_hypgeom_rgamma(acb_t y, const acb_t x, slong prec); void acb_hypgeom_pfq_bound_factor(mag_t C, acb_srcptr a, slong p, acb_srcptr b, slong q, const acb_t z, ulong n); diff --git a/acb_hypgeom/gamma.c b/acb_hypgeom/gamma.c new file mode 100644 index 00000000..fca3b8ac --- /dev/null +++ b/acb_hypgeom/gamma.c @@ -0,0 +1,185 @@ +/* + Copyright (C) 2014, 2015, 2021 Fredrik Johansson + + This file is part of Arb. + + Arb is free software: you can redistribute it and/or modify it under + the terms of the GNU Lesser General Public License (LGPL) as published + by the Free Software Foundation; either version 2.1 of the License, or + (at your option) any later version. See . +*/ + +#include "acb_hypgeom.h" +#include "arb_hypgeom.h" + +void +acb_hypgeom_gamma_stirling_choose_param(int * reflect, slong * r, slong * n, + const acb_t z, int use_reflect, int digamma, slong prec); + +void acb_gamma_stirling_bound(mag_ptr err, const acb_t x, slong k0, slong knum, slong n); + +void +acb_hypgeom_gamma_stirling_inner(acb_t s, const acb_t z, slong N, slong prec) +{ + acb_t logz, t; + mag_t err; + + mag_init(err); + acb_init(t); + acb_init(logz); + + acb_gamma_stirling_bound(err, z, 0, 1, N); + + /* t = (z-0.5)*log(z) - z + log(2*pi)/2 */ + acb_log(logz, z, prec); + arb_one(acb_realref(t)); + arb_mul_2exp_si(acb_realref(t), acb_realref(t), -1); + acb_sub(t, z, t, prec); + acb_mul(t, logz, t, prec); + acb_sub(t, t, z, prec); + arb_const_log_sqrt2pi(acb_realref(logz), prec); + arb_add(acb_realref(t), acb_realref(t), acb_realref(logz), prec); + + /* sum part */ + if (prec <= 384) + acb_hypgeom_gamma_stirling_sum_horner(s, z, N, prec); + else + acb_hypgeom_gamma_stirling_sum_improved(s, z, N, 0, prec); + + acb_add(s, s, t, prec); + acb_add_error_mag(s, err); + + acb_clear(t); + acb_clear(logz); + mag_clear(err); +} + + +void +acb_hypgeom_gamma_stirling(acb_t y, const acb_t x, int reciprocal, slong prec) +{ + int reflect; + slong r, n, wp; + acb_t t, u, v; + double acc; + + wp = prec + FLINT_BIT_COUNT(prec); + + /* todo: for large x (if exact or accurate enough), increase precision */ + acc = acb_rel_accuracy_bits(x); + acc = FLINT_MAX(acc, 0); + wp = FLINT_MIN(prec, acc + 20); + wp = FLINT_MAX(wp, 2); + wp = wp + FLINT_BIT_COUNT(wp); + + if (acc < 3) /* try to avoid divisions blowing up */ + { + if (arf_cmp_d(arb_midref(acb_realref(x)), -0.5) < 0) + { + reflect = 1; + r = 0; + } + else if (arf_cmp_si(arb_midref(acb_realref(x)), 1) < 0) + { + reflect = 0; + r = 1; + } + else + { + reflect = 0; + r = 0; + } + + n = 1; + } + else + { + acb_hypgeom_gamma_stirling_choose_param(&reflect, &r, &n, x, 1, 0, wp); + } + + acb_init(t); + acb_init(u); + acb_init(v); + + if (reflect) + { + acb_sub_ui(t, x, 1, wp); + acb_neg(t, t); + acb_rising_ui_rec(u, t, r, wp); + arb_const_pi(acb_realref(v), wp); + acb_mul_arb(u, u, acb_realref(v), wp); + acb_add_ui(t, t, r, wp); + acb_hypgeom_gamma_stirling_inner(v, t, n, wp); + + if (reciprocal) + { + /* rgamma(x) = gamma(1-x+r) sin(pi x) / ((rf(1-x, r) * pi) */ + acb_exp(v, v, wp); + acb_sin_pi(t, x, wp); + acb_mul(v, v, t, wp); + acb_mul(y, u, v, wp); + acb_div(y, v, u, prec); + } + else + { + /* gamma(x) = (rf(1-x, r) * pi) rgamma(1-x+r) csc(pi x) */ + acb_neg(v, v); + acb_exp(v, v, wp); + acb_csc_pi(t, x, wp); + acb_mul(v, v, t, wp); + acb_mul(y, v, u, prec); + } + } + else + { + acb_add_ui(t, x, r, wp); + acb_hypgeom_gamma_stirling_inner(u, t, n, wp); + + if (reciprocal) + { + /* rgamma(x) = rf(x,r) rgamma(x+r) */ + acb_neg(u, u); + acb_exp(u, u, prec); + acb_rising_ui_rec(v, x, r, wp); + acb_mul(y, v, u, prec); + } + else + { + /* gamma(x) = gamma(x+r) / rf(x,r) */ + acb_exp(u, u, prec); + acb_rising_ui_rec(v, x, r, wp); + acb_div(y, u, v, prec); + } + } + + acb_clear(t); + acb_clear(u); + acb_clear(v); +} + +void +acb_hypgeom_gamma(acb_t y, const acb_t x, slong prec) +{ + if (acb_is_real(x)) + { + arb_hypgeom_gamma(acb_realref(y), acb_realref(x), prec); + arb_zero(acb_imagref(y)); + return; + } + + acb_hypgeom_gamma_stirling(y, x, 0, prec); +} + +void +acb_hypgeom_rgamma(acb_t y, const acb_t x, slong prec) +{ + if (acb_is_real(x)) + { + arb_hypgeom_rgamma(acb_realref(y), acb_realref(x), prec); + arb_zero(acb_imagref(y)); + return; + } + + acb_hypgeom_gamma_stirling(y, x, 1, prec); +} + diff --git a/acb_hypgeom/gamma_stirling_sum_horner.c b/acb_hypgeom/gamma_stirling_sum_horner.c new file mode 100644 index 00000000..07d05416 --- /dev/null +++ b/acb_hypgeom/gamma_stirling_sum_horner.c @@ -0,0 +1,76 @@ +/* + Copyright (C) 2021 Fredrik Johansson + + This file is part of Arb. + + Arb is free software: you can redistribute it and/or modify it under + the terms of the GNU Lesser General Public License (LGPL) as published + by the Free Software Foundation; either version 2.1 of the License, or + (at your option) any later version. See . +*/ + +#include "arb_hypgeom.h" +#include "acb_hypgeom.h" + +void arb_gamma_stirling_coeff(acb_t b, ulong k, int digamma, slong prec); + +void +acb_hypgeom_gamma_stirling_sum_horner(acb_t s, const acb_t z, slong N, slong prec) +{ + acb_t b, t, zinv, w; + mag_t zinv_mag; + slong n, term_mag, term_prec; + slong * term_mags; + + if (N <= 1) + { + acb_zero(s); + return; + } + + acb_init(b); + acb_init(t); + acb_init(zinv); + acb_init(w); + mag_init(zinv_mag); + + acb_inv(zinv, z, prec); + acb_mul(w, zinv, zinv, prec); + + acb_get_mag(zinv_mag, zinv); + term_mags = flint_malloc(sizeof(ulong) * N); + + _arb_hypgeom_gamma_stirling_term_bounds(term_mags, zinv_mag, N); + + acb_zero(s); + + for (n = N - 1; n >= 1; n--) + { + term_mag = term_mags[n]; + term_prec = prec + term_mag; + term_prec = FLINT_MIN(term_prec, prec); + term_prec = FLINT_MAX(term_prec, 10); + + if (prec - term_prec > 200) + { + acb_set_round(t, w, term_prec); + acb_mul(s, s, t, term_prec); + } + else + acb_mul(s, s, w, term_prec); + + arb_gamma_stirling_coeff(b, n, 0, term_prec); + acb_add(s, s, b, term_prec); + } + + acb_mul(s, s, zinv, prec); + + flint_free(term_mags); + + acb_clear(t); + acb_clear(b); + acb_clear(zinv); + acb_clear(w); + mag_clear(zinv_mag); +} + diff --git a/acb_hypgeom/gamma_stirling_sum_improved.c b/acb_hypgeom/gamma_stirling_sum_improved.c new file mode 100644 index 00000000..3c1d8dfd --- /dev/null +++ b/acb_hypgeom/gamma_stirling_sum_improved.c @@ -0,0 +1,439 @@ +/* + Copyright (C) 2021 Fredrik Johansson + + This file is part of Arb. + + Arb is free software: you can redistribute it and/or modify it under + the terms of the GNU Lesser General Public License (LGPL) as published + by the Free Software Foundation; either version 2.1 of the License, or + (at your option) any later version. See . +*/ + +#include "arb_hypgeom.h" +#include "acb_hypgeom.h" +#include "bernoulli.h" + +/* todo: move to acb module */ +static void +acb_fma_ui(acb_t res, const acb_t x, ulong y, const acb_t z, slong prec) +{ + arf_t t; + arf_init_set_ui(t, y); /* no need to free */ + arb_fma_arf(acb_realref(res), acb_realref(x), t, acb_realref(z), prec); + arb_fma_arf(acb_imagref(res), acb_imagref(x), t, acb_imagref(z), prec); +} + +void +acb_hypgeom_gamma_stirling_sum_improved(acb_t s, const acb_t z, slong N, slong K, slong prec) +{ + acb_t b, t, zinv, w, u, S2, S3, S4; + mag_t zinv_mag, err; + slong n, term_mag, term_prec; + slong * term_mags; + slong i, j, k, M; + slong * Mk; + acb_ptr upow, ukpow; + slong kodd, kpow_exp, wp; + fmpz_t kpow; + slong m; + + if (N <= 1) + { + acb_zero(s); + return; + } + + if (N == 2) + { + acb_mul_ui(s, z, 12, prec); + acb_inv(s, s, prec); + return; + } + + if (K == 0) + { + if (prec <= 128) + K = 1; + else if (prec <= 1024) + K = 2; + else + { + K = 4 + 0.1 * sqrt(FLINT_MAX(prec - 4096, 0)); + K = FLINT_MIN(K, 100); + } + } + + acb_init(b); + acb_init(t); + acb_init(zinv); + acb_init(w); + acb_init(u); + acb_init(S2); + acb_init(S3); + acb_init(S4); + mag_init(zinv_mag); + mag_init(err); + + acb_inv(zinv, z, prec); + acb_mul(w, zinv, zinv, prec); + + acb_get_mag(zinv_mag, zinv); + + term_mags = flint_malloc(sizeof(ulong) * (N + 1)); + + /* Avoid possible overflow. (This case is not interesting to handle well, + but we need to handle it.) */ + if (mag_cmp_2exp_si(zinv_mag, 10) > 0) + K = 1; + + /* Avoid possible underflow. */ + if (mag_cmp_2exp_si(zinv_mag, -100000) < 0) + mag_set_ui_2exp_si(zinv_mag, 1, -100000); + + _arb_hypgeom_gamma_stirling_term_bounds(term_mags, zinv_mag, N + 1); + + /* We will assume below that term_mags is nonincreasing */ + for (n = N - 2; n >= 1; n--) + { + if (term_mags[n] < term_mags[n + 1]) + term_mags[n] = term_mags[n + 1]; + } + + Mk = NULL; + + if (K > 1) + { + Mk = flint_malloc(sizeof(slong) * (K + 1)); + Mk[0] = Mk[1] = N; + + for (k = 2; k <= K; k++) + { + double log2_k; + + Mk[k] = N; + + log2_k = log(k) * (1.0 / 0.693147180559945309); + + while (Mk[k] > 2) + { + slong err, Mnew; + + Mnew = Mk[k] - 1; + err = term_mags[Mnew] - log2_k * (2 * Mnew) + FLINT_BIT_COUNT(N - Mnew); + + if (err < -prec) + Mk[k] = Mnew; + else + break; + } + } + + /* We will assume that Mk is nonincreasing. */ + for (k = 2; k <= K; k++) + { + if (Mk[k] > Mk[k - 1]) + Mk[k] = Mk[k - 1]; + } + + while (K >= 2 && Mk[K] == Mk[K - 1]) + K--; + + M = Mk[K]; + + mag_hurwitz_zeta_uiui(err, 2 * M, K); + mag_mul_ui(err, err, N - M); + mag_mul_2exp_si(err, err, term_mags[M]); + + for (k = 1; k < K; k++) + { + mag_t t; + mag_init(t); + mag_set_ui_lower(t, k); + mag_inv(t, t); + mag_pow_ui(t, t, 2 * Mk[k]); + mag_mul_ui(t, t, N - Mk[k]); + mag_mul_2exp_si(t, t, term_mags[Mk[k]]); + mag_add(err, err, t); + mag_clear(t); + } + } + else + { + M = N; + } + + m = sqrt(N - M); + m = FLINT_MAX(m, 1); + + /* S3 precision */ + wp = prec + term_mags[M]; + wp = FLINT_MIN(wp, prec); + wp = FLINT_MAX(wp, 10); + + acb_zero(S3); + + /* todo: could avoid one mul + div with precomputation here */ + /* u = -1 / (2 pi z)^2 */ + acb_const_pi(u, wp); + acb_mul(u, u, z, wp); + acb_mul_2exp_si(u, u, 1); + acb_mul(u, u, u, wp); + acb_inv(u, u, wp); + acb_neg(u, u); + + fmpz_init(kpow); + upow = _acb_vec_init(m + 1); + ukpow = _acb_vec_init(m + 1); + + _acb_vec_set_powers(upow, u, m + 1, wp); + + for (kodd = 1; kodd < K; kodd += 2) + { + for (k = kodd; k < K; k *= 2) + { + if (k == 1) + { + _acb_vec_set(ukpow, upow, m + 1); + fmpz_one(kpow); + kpow_exp = 0; + } + else if (k == kodd) + { + acb_set(ukpow + 0, upow + 0); + for (j = 1; j <= FLINT_MIN(Mk[k] - M - 1, m); j++) + { + if (j == 1) + fmpz_set_ui(kpow, k * k); + else + fmpz_mul_ui(kpow, kpow, k * k); + acb_div_fmpz(ukpow + j, upow + j, kpow, wp); + } + + /* set kpow = k^(2M) */ + fmpz_ui_pow_ui(kpow, k * k, M); + kpow_exp = 0; + } + else + { + /* compute x / k^(2j) given x / (k/2)^(2j) */ + for (j = 1; j <= FLINT_MIN(Mk[k] - M - 1, m); j++) + acb_mul_2exp_si(ukpow + j, ukpow + j, -2 * j); + kpow_exp += 2 * M; + } + + acb_zero(S4); + + for (n = Mk[k] - 1; n >= M; n--) + { + i = n - M; + + term_prec = prec + term_mags[n]; + term_prec = FLINT_MIN(wp, prec); + term_prec = FLINT_MAX(term_prec, 10); + + /* note: set_round makes small difference here */ + acb_fma_ui(S4, S4, (2 * n) * (2 * n - 1), ukpow + i % m, term_prec); + + if (i != 0 && i % m == 0) + { + /* note: set_round makes small difference here */ + acb_mul(S4, S4, ukpow + m, term_prec); + } + } + + /* divide by k^(2M) */ + if (k != 1) + { + if (!fmpz_is_one(kpow)) + acb_div_fmpz(S4, S4, kpow, wp); + + acb_mul_2exp_si(S4, S4, -kpow_exp); + } + + acb_add(S3, S3, S4, wp); + } + } + + /* multiply by -2 (2M-2)! u^M z */ + fmpz_fac_ui(kpow, 2 * M - 2); + acb_mul_fmpz(S3, S3, kpow, wp); + acb_pow_ui(u, u, M, wp); + acb_mul(S3, S3, u, wp); + acb_set_round(t, z, wp); + acb_mul(S3, S3, t, wp); + acb_mul_2exp_si(S3, S3, 1); + acb_neg(S3, S3); + + acb_add_error_mag(S3, err); + + _acb_vec_clear(upow, m + 1); + _acb_vec_clear(ukpow, m + 1); + fmpz_clear(kpow); + + acb_zero(S2); + + m = sqrt(M); + upow = _acb_vec_init(m + 1); + _acb_vec_set_powers(upow, w, m + 1, prec); + + BERNOULLI_ENSURE_CACHED(2 * M - 2); + + { + fmpz_t d, e, f, g, h; + fmpz q[4]; + + fmpz_init(d); + fmpz_init(e); + fmpz_init(f); + fmpz_init(g); + fmpz_init(h); + + fmpz_init(q); + fmpz_init(q + 1); + fmpz_init(q + 2); + fmpz_init(q + 3); + + for (n = M - 1; n >= 1; n--) + { + i = n - 1; + + if (i >= 4 && i % m >= 3 && prec >= 512 && prec <= 4096) + { + term_mag = term_mags[n - 3]; + term_prec = prec + term_mag; + term_prec = FLINT_MIN(term_prec, prec); + term_prec = FLINT_MAX(term_prec, 10); + + fmpz_mul_ui(d, fmpq_denref(bernoulli_cache + 2 * (n - 0)), 2 * (n - 0) * (2 * (n - 0) - 1)); + fmpz_mul_ui(e, fmpq_denref(bernoulli_cache + 2 * (n - 1)), 2 * (n - 1) * (2 * (n - 1) - 1)); + fmpz_mul_ui(g, fmpq_denref(bernoulli_cache + 2 * (n - 2)), 2 * (n - 2) * (2 * (n - 2) - 1)); + fmpz_mul_ui(h, fmpq_denref(bernoulli_cache + 2 * (n - 3)), 2 * (n - 3) * (2 * (n - 3) - 1)); + + /* q3 = egh q2 = dgh q1 = deh q0 = deg d = degh */ + fmpz_mul(q + 3, e, g); + fmpz_mul(q + 0, q + 3, d); + fmpz_mul(q + 3, q + 3, h); + fmpz_mul(q + 2, d, h); + fmpz_mul(q + 1, q + 2, e); + fmpz_mul(q + 2, q + 2, g); + fmpz_mul(d, q + 3, d); + + fmpz_mul(q + 3, q + 3, fmpq_numref(bernoulli_cache + 2 * (n - 0))); + fmpz_mul(q + 2, q + 2, fmpq_numref(bernoulli_cache + 2 * (n - 1))); + fmpz_mul(q + 1, q + 1, fmpq_numref(bernoulli_cache + 2 * (n - 2))); + fmpz_mul(q + 0, q + 0, fmpq_numref(bernoulli_cache + 2 * (n - 3))); + + acb_dot_fmpz(t, NULL, 0, upow + i % m - 3, 1, q, 1, 4, term_prec); + acb_div_fmpz(t, t, d, term_prec); + acb_add(S2, S2, t, term_prec); + + n -= 3; + i -= 3; + } + else if (i >= 3 && i % m >= 2) + { + term_mag = term_mags[n - 2]; + term_prec = prec + term_mag; + term_prec = FLINT_MIN(term_prec, prec); + term_prec = FLINT_MAX(term_prec, 10); + + fmpz_mul_ui(d, fmpq_denref(bernoulli_cache + 2 * (n - 0)), 2 * (n - 0) * (2 * (n - 0) - 1)); + fmpz_mul_ui(e, fmpq_denref(bernoulli_cache + 2 * (n - 1)), 2 * (n - 1) * (2 * (n - 1) - 1)); + fmpz_mul_ui(g, fmpq_denref(bernoulli_cache + 2 * (n - 2)), 2 * (n - 2) * (2 * (n - 2) - 1)); + + fmpz_mul(q + 2, e, g); + fmpz_mul(q + 2, q + 2, fmpq_numref(bernoulli_cache + 2 * (n - 0))); + fmpz_mul(q + 1, d, g); + fmpz_mul(q + 1, q + 1, fmpq_numref(bernoulli_cache + 2 * (n - 1))); + fmpz_mul(q + 0, d, e); + fmpz_mul(d, q + 0, g); + fmpz_mul(q + 0, q + 0, fmpq_numref(bernoulli_cache + 2 * (n - 2))); + + acb_dot_fmpz(t, NULL, 0, upow + i % m - 2, 1, q, 1, 3, term_prec); + acb_div_fmpz(t, t, d, term_prec); + acb_add(S2, S2, t, term_prec); + + n -= 2; + i -= 2; + } + else if (i >= 1 && i % m >= 1) + { + term_mag = term_mags[n - 1]; + term_prec = prec + term_mag; + term_prec = FLINT_MIN(term_prec, prec); + term_prec = FLINT_MAX(term_prec, 10); + + fmpz_mul_ui(d, fmpq_denref(bernoulli_cache + 2 * (n - 0)), 2 * (n - 0) * (2 * (n - 0) - 1)); + fmpz_mul_ui(e, fmpq_denref(bernoulli_cache + 2 * (n - 1)), 2 * (n - 1) * (2 * (n - 1) - 1)); + + fmpz_mul(f, fmpq_numref(bernoulli_cache + 2 * (n - 0)), e); + acb_set_round(u, upow + i % m, term_prec); + acb_mul_fmpz(t, u, f, term_prec); /* todo: output-sensitive mul */ + fmpz_mul(f, fmpq_numref(bernoulli_cache + 2 * (n - 1)), d); + acb_set_round(u, upow + i % m - 1, term_prec); + acb_mul_fmpz(u, u, f, term_prec); /* todo: output-sensitive mul */ + acb_add(t, t, u, term_prec); + + fmpz_mul(d, d, e); + acb_div_fmpz(t, t, d, term_prec); + acb_add(S2, S2, t, term_prec); + + n--; + i--; + } + else + { + term_mag = term_mags[n]; + term_prec = prec + term_mag; + term_prec = FLINT_MIN(term_prec, prec); + term_prec = FLINT_MAX(term_prec, 10); + + acb_set_round(u, upow + i % m, term_prec); + acb_mul_fmpz(t, u, fmpq_numref(bernoulli_cache + 2 * n), term_prec); /* todo: output-sensitive mul */ + fmpz_mul_ui(d, fmpq_denref(bernoulli_cache + 2 * n), 2 * n * (2 * n - 1)); + acb_div_fmpz(t, t, d, term_prec); + + acb_add(S2, S2, t, term_prec); + } + + if (i != 0 && i % m == 0) + { + acb_set_round(u, upow + m, term_prec); /* todo: output-sensitive mul */ + acb_mul(S2, S2, u, term_prec); + } + } + + fmpz_clear(q); + fmpz_clear(q + 1); + fmpz_clear(q + 2); + fmpz_clear(q + 3); + + fmpz_clear(d); + fmpz_clear(e); + fmpz_clear(f); + fmpz_clear(g); + fmpz_clear(h); + } + + _acb_vec_clear(upow, m + 1); + + acb_mul(S2, S2, zinv, prec); + acb_add(s, S2, S3, prec); + + flint_free(term_mags); + + if (Mk != NULL) + flint_free(Mk); + + acb_clear(b); + acb_clear(t); + acb_clear(zinv); + acb_clear(w); + acb_clear(u); + acb_clear(S2); + acb_clear(S3); + acb_clear(S4); + mag_clear(zinv_mag); + mag_clear(err); +} + diff --git a/acb_hypgeom/test/t-gamma_stirling_sum.c b/acb_hypgeom/test/t-gamma_stirling_sum.c new file mode 100644 index 00000000..019b84f9 --- /dev/null +++ b/acb_hypgeom/test/t-gamma_stirling_sum.c @@ -0,0 +1,62 @@ +/* + Copyright (C) 2021 Fredrik Johansson + + This file is part of Arb. + + Arb is free software: you can redistribute it and/or modify it under + the terms of the GNU Lesser General Public License (LGPL) as published + by the Free Software Foundation; either version 2.1 of the License, or + (at your option) any later version. See . +*/ + +#include "acb_hypgeom.h" + +int main() +{ + slong iter; + flint_rand_t state; + + flint_printf("gamma_stirling_sum...."); + fflush(stdout); + + flint_randinit(state); + + for (iter = 0; iter < 10000 * arb_test_multiplier(); iter++) + { + acb_t z, s1, s2; + slong prec, N, K; + + prec = 2 + n_randint(state, 800); + N = n_randint(state, 200); + K = n_randint(state, 20); + + acb_init(z); + acb_init(s1); + acb_init(s2); + + acb_randtest(z, state, prec, 10 + n_randint(state, 200)); + + acb_hypgeom_gamma_stirling_sum_horner(s1, z, N, prec); + acb_hypgeom_gamma_stirling_sum_improved(s2, z, N, K, prec); + + if (!acb_overlaps(s1, s2)) + { + flint_printf("FAIL\n\n"); + flint_printf("N = %wd, K = %wd, prec = %wd\n\n", N, K, prec); + flint_printf("z = "); acb_printn(z, 1000, 0); flint_printf("\n\n"); + flint_printf("s1 = "); acb_printn(s1, 1000, 0); flint_printf("\n\n"); + flint_printf("s2 = "); acb_printn(s2, 1000, 0); flint_printf("\n\n"); + flint_abort(); + } + + acb_clear(z); + acb_clear(s1); + acb_clear(s2); + } + + flint_randclear(state); + flint_cleanup(); + flint_printf("PASS\n"); + return EXIT_SUCCESS; +} + diff --git a/arb_hypgeom/gamma.c b/arb_hypgeom/gamma.c index 59b3b7ad..f3bae73f 100644 --- a/arb_hypgeom/gamma.c +++ b/arb_hypgeom/gamma.c @@ -13,7 +13,7 @@ #include "bernoulli.h" /* tuning factor */ -double GAMMA_STIRLING_BETA = 0.27; +double GAMMA_STIRLING_BETA = 0.0; #define PI 3.1415926535897932385 @@ -50,7 +50,7 @@ static void choose_small(int * reflect, slong * r, slong * n, double x, double y, int use_reflect, int digamma, slong prec) { - double w, argz, log2z; + double w, argz, log2z, BETA; slong rr; /* use reflection formula if very negative */ @@ -64,8 +64,20 @@ choose_small(int * reflect, slong * r, slong * n, *reflect = 0; } + BETA = GAMMA_STIRLING_BETA; + + if (BETA < 0.12) + { + if (prec <= 32768) + BETA = 0.17; + else if (prec <= 131072) + BETA = 0.20; + else + BETA = 0.24; + } + /* argument reduction until |z| >= w */ - w = FLINT_MAX(1.0, GAMMA_STIRLING_BETA * prec); + w = FLINT_MAX(1.0, BETA * prec); rr = 0; while (x < 1.0 || x*x + y*y < w*w) diff --git a/doc/source/acb_hypgeom.rst b/doc/source/acb_hypgeom.rst index 30ecf7b2..a0d8accd 100644 --- a/doc/source/acb_hypgeom.rst +++ b/doc/source/acb_hypgeom.rst @@ -55,6 +55,45 @@ Rising factorials parameter *m* which can be set to zero to choose automatically. The default version chooses an algorithm automatically. +Gamma function +------------------------------------------------------------------------------- + +.. function:: void acb_hypgeom_gamma_stirling_sum_horner(acb_t s, const acb_t z, slong N, slong prec) + void acb_hypgeom_gamma_stirling_sum_improved(acb_t s, const acb_t z, slong N, slong K, slong prec) + + Sets *res* to the final sum in the Stirling series for the gamma function + truncated before the term with index *N*, i.e. computes + `\sum_{n=1}^{N-1} B_{2n} / (2n(2n-1) z^{2n-1})`. + The *horner* version uses Horner scheme with gradual precision adjustments. + The *improved* version uses rectangular splitting for the low-index + terms and reexpands the high-index terms as hypergeometric polynomials, + using a splitting parameter *K* (which can be set to 0 to use a default + value). + +.. function:: void acb_hypgeom_gamma_stirling(acb_t res, const acb_t x, int reciprocal, slong prec) + + Sets *res* to the gamma function of *x* computed using the Stirling + series together with argument reduction. If *reciprocal* is set, + the reciprocal gamma function is computed instead. + +.. function:: int acb_hypgeom_gamma_taylor(acb_t res, const acb_t x, int reciprocal, slong prec) + + Attempts to compute the gamma function of *x* using Taylor series + together with argument reduction. This is only supported if *x* and *prec* + are both small enough. If successful, returns 1; otherwise, does nothing + and returns 0. If *reciprocal* is set, the reciprocal gamma function is + computed instead. + +.. function:: void acb_hypgeom_gamma(acb_t y, const acb_t x, slong prec) + + Sets *res* to the gamma function of *x* computed using a default + algorithm choice. + +.. function:: void acb_hypgeom_rgamma(acb_t y, const acb_t x, slong prec) + + Sets *res* to the reciprocal gamma function of *x* computed using a default + algorithm choice. + Convergent series -------------------------------------------------------------------------------