diff --git a/arb_hypgeom/legendre_p_ui_asymp.c b/arb_hypgeom/legendre_p_ui_asymp.c index 59a5a4ea..4e66004d 100644 --- a/arb_hypgeom/legendre_p_ui_asymp.c +++ b/arb_hypgeom/legendre_p_ui_asymp.c @@ -11,60 +11,77 @@ #include "arb_hypgeom.h" -#define BIG (1 << (FLINT_BITS / 2 - 2)) - -static void PP(fmpz_t res, ulong n, slong k) -{ - /* (2 * k - 1) * (2 * k - 1) */ - if (k < BIG) - { - fmpz_set_ui(res, (2 * k - 1) * (2 * k - 1)); - } - else - { - fmpz_set_ui(res, 2 * k - 1); - fmpz_mul_ui(res, res, 2 * k - 1); - } -} - -static void QQ(fmpz_t res, ulong n, slong k) -{ - if (k < BIG && n < BIG) - { - fmpz_set_ui(res, k * (2 * k + 2 * n + 1)); - } - else - { - fmpz_set_ui(res, n); - fmpz_mul_2exp(res, res, 1); - fmpz_add_ui(res, res, 2 * k + 1); - fmpz_mul_ui(res, res, k); - } -} +#define UNROLL 4 static void asymp_series(acb_t res, ulong n, acb_srcptr xpow, slong m, slong K, slong prec) { slong j, k, khi, klo, u, r; + fmpz * c; acb_t s; - fmpz_t cc, dd; acb_init(s); - fmpz_init(cc); - fmpz_init(dd); + c = _fmpz_vec_init(UNROLL + 1); k = K - 1; while (k >= 1) { - u = FLINT_MIN(5, k); + u = FLINT_MIN(UNROLL, k); + khi = k; klo = k - u + 1; - PP(cc, n, khi); - for (j = klo; j < khi; j++) + for (j = klo; j <= khi; j++) { - PP(dd, n, j); - fmpz_mul(cc, cc, dd); + ulong aa = (2 * j - 1); + ulong bb = (2 * j - 1); + + if (j == klo) + fmpz_ui_mul_ui(c + khi - j, aa, bb); + else + fmpz_mul2_uiui(c + khi - j, c + khi - j + 1, aa, bb); + } + + for (j = khi; j >= klo; j--) + { + ulong aa = (j); + ulong bb = (2 * j + 2 * n + 1); + + if (n < UWORD_MAX / 8) + { + if (j == khi) + { + fmpz_ui_mul_ui(c + u, aa, bb); + } + else + { + fmpz_mul(c + khi - j, c + khi - j, c + u); + fmpz_mul2_uiui(c + u, c + u, aa, bb); + } + } + else + { + fmpz_t t; + fmpz_init(t); + + fmpz_set_ui(t, n); + fmpz_add_ui(t, t, j); + fmpz_mul_2exp(t, t, 1); + fmpz_add_ui(t, t, 1); + + if (j == khi) + { + fmpz_mul_ui(c + u, t, aa); + } + else + { + fmpz_mul(c + khi - j, c + khi - j, c + u); + fmpz_mul_ui(t, t, aa); + fmpz_mul(c + u, c + u, t); + } + + fmpz_clear(t); + } } while (k >= klo) @@ -74,36 +91,30 @@ asymp_series(acb_t res, ulong n, acb_srcptr xpow, slong m, slong K, slong prec) if (k == khi) { acb_add(s, s, xpow + r, prec); - acb_mul_fmpz(s, s, cc, prec); + acb_mul_fmpz(s, s, c + khi - k, prec); } else if (r == 0) { - acb_add_fmpz(s, s, cc, prec); + acb_add_fmpz(s, s, c + khi - k, prec); } else { - acb_addmul_fmpz(s, xpow + r, cc, prec); + acb_addmul_fmpz(s, xpow + r, c + khi - k, prec); } if (r == 0 && k != 0) acb_mul(s, s, xpow + m, prec); - PP(dd, n, k); - fmpz_divexact(cc, cc, dd); - QQ(dd, n, k); - fmpz_mul(cc, cc, dd); - k--; } - acb_div_fmpz(s, s, cc, prec); + acb_div_fmpz(s, s, c + u, prec); } acb_add_ui(res, s, 1, prec); acb_clear(s); - fmpz_clear(cc); - fmpz_clear(dd); + _fmpz_vec_clear(c, UNROLL + 1); } diff --git a/arb_hypgeom/legendre_p_ui_one.c b/arb_hypgeom/legendre_p_ui_one.c index e53b3bbe..3d75e9a8 100644 --- a/arb_hypgeom/legendre_p_ui_one.c +++ b/arb_hypgeom/legendre_p_ui_one.c @@ -11,58 +11,50 @@ #include "arb_hypgeom.h" -#define BIG (1 << (FLINT_BITS / 2 - 2)) - -static void PP(fmpz_t r, ulong n, ulong k, ulong prime) -{ - if (n < BIG && k < BIG) - { - fmpz_set_ui(r, (n - k + 1 - prime) * (n + k + prime)); - } - else - { - fmpz_set_ui(r, n - k + 1 - prime); - fmpz_mul_ui(r, r, n + k + prime); - } -} - -static void QQ(fmpz_t r, ulong n, ulong k, ulong prime) -{ - if (k < BIG) - { - fmpz_set_ui(r, k * (k + prime)); - } - else - { - fmpz_set_ui(r, k); - fmpz_mul_ui(r, r, k + prime); - } -} +#define UNROLL 4 static void sum_rs_inner(arb_t s, arb_srcptr xpow, slong m, ulong n, slong K, ulong prime, slong prec) { slong j, k, khi, klo, u, r; - fmpz_t cc, dd; - - fmpz_init(cc); - fmpz_init(dd); + fmpz * c; arb_zero(s); - fmpz_zero(cc); + c = _fmpz_vec_init(UNROLL + 1); k = K - 1; while (k >= 1) { - u = FLINT_MIN(5, k); + u = FLINT_MIN(UNROLL, k); + khi = k; klo = k - u + 1; - PP(cc, n, khi, prime); - for (j = klo; j < khi; j++) + for (j = klo; j <= khi; j++) { - PP(dd, n, j, prime); - fmpz_mul(cc, cc, dd); + ulong aa = (n - j + 1 - prime); + ulong bb = (n + j + prime); + + if (j == klo) + fmpz_ui_mul_ui(c + khi - j, aa, bb); + else + fmpz_mul2_uiui(c + khi - j, c + khi - j + 1, aa, bb); + } + + for (j = khi; j >= klo; j--) + { + ulong aa = (j); + ulong bb = (j + prime); + + if (j == khi) + { + fmpz_ui_mul_ui(c + u, aa, bb); + } + else + { + fmpz_mul(c + khi - j, c + khi - j, c + u); + fmpz_mul2_uiui(c + u, c + u, aa, bb); + } } while (k >= klo) @@ -71,28 +63,23 @@ sum_rs_inner(arb_t s, arb_srcptr xpow, slong m, ulong n, slong K, ulong prime, s if (k == khi) { arb_add(s, s, xpow + r, prec); - arb_mul_fmpz(s, s, cc, prec); + arb_mul_fmpz(s, s, c + khi - k, prec); } else if (r == 0) - arb_add_fmpz(s, s, cc, prec); + arb_add_fmpz(s, s, c + khi - k, prec); else - arb_addmul_fmpz(s, xpow + r, cc, prec); + arb_addmul_fmpz(s, xpow + r, c + khi - k, prec); if (r == 0 && k != 0) arb_mul(s, s, xpow + m, prec); - PP(dd, n, k, prime); - fmpz_divexact(cc, cc, dd); - QQ(dd, n, k, prime); - fmpz_mul(cc, cc, dd); k--; } - arb_div_fmpz(s, s, cc, prec); + arb_div_fmpz(s, s, c + u, prec); } - fmpz_clear(cc); - fmpz_clear(dd); + _fmpz_vec_clear(c, UNROLL + 1); } void diff --git a/arb_hypgeom/legendre_p_ui_zero.c b/arb_hypgeom/legendre_p_ui_zero.c index 39d160ac..3efaeb15 100644 --- a/arb_hypgeom/legendre_p_ui_zero.c +++ b/arb_hypgeom/legendre_p_ui_zero.c @@ -11,63 +11,55 @@ #include "arb_hypgeom.h" -#define BIG (1 << (FLINT_BITS / 2 - 2)) - -static __inline__ void PP(fmpz_t r, ulong n, ulong k) -{ - slong sigma = n % 2 ? 1 : -1; - ulong d = n / 2; - - if (n < BIG && k < BIG) - { - fmpz_set_ui(r, (d - k + 1) * (2 * d + 2 * k + sigma)); - } - else - { - fmpz_set_ui(r, d - k + 1); - fmpz_mul_ui(r, r, 2 * d + 2 * k + sigma); - } -} - -static __inline__ void QQ(fmpz_t r, ulong n, ulong k) -{ - slong sigma = n % 2 ? 1 : -1; - - if (k < BIG) - { - fmpz_set_ui(r, k * (2 * k + sigma)); - } - else - { - fmpz_set_ui(r, k); - fmpz_mul_ui(r, r, 2 * k + sigma); - } -} +#define UNROLL 4 static void sum_rs_inner(arb_t s, arb_srcptr xpow, slong m, ulong n, slong K, slong prec) { slong j, k, khi, klo, u, r; - fmpz_t cc, dd; + ulong d; + slong sigma; + fmpz * c; - fmpz_init(cc); - fmpz_init(dd); + sigma = n % 2 ? 1 : -1; + d = n / 2; arb_zero(s); - fmpz_zero(cc); + c = _fmpz_vec_init(UNROLL + 1); k = K - 1; while (k >= 1) { - u = FLINT_MIN(5, k); + u = FLINT_MIN(UNROLL, k); + khi = k; klo = k - u + 1; - PP(cc, n, khi); - for (j = klo; j < khi; j++) + for (j = klo; j <= khi; j++) { - PP(dd, n, j); - fmpz_mul(cc, cc, dd); + ulong aa = (d - j + 1); + ulong bb = (2 * d + 2 * j + sigma); + + if (j == klo) + fmpz_ui_mul_ui(c + khi - j, aa, bb); + else + fmpz_mul2_uiui(c + khi - j, c + khi - j + 1, aa, bb); + } + + for (j = khi; j >= klo; j--) + { + ulong aa = (j); + ulong bb = (2 * j + sigma); + + if (j == khi) + { + fmpz_ui_mul_ui(c + u, aa, bb); + } + else + { + fmpz_mul(c + khi - j, c + khi - j, c + u); + fmpz_mul2_uiui(c + u, c + u, aa, bb); + } } while (k >= klo) @@ -76,28 +68,23 @@ sum_rs_inner(arb_t s, arb_srcptr xpow, slong m, ulong n, slong K, slong prec) if (k == khi) { arb_add(s, s, xpow + r, prec); - arb_mul_fmpz(s, s, cc, prec); + arb_mul_fmpz(s, s, c + khi - k, prec); } else if (r == 0) - arb_add_fmpz(s, s, cc, prec); + arb_add_fmpz(s, s, c + khi - k, prec); else - arb_addmul_fmpz(s, xpow + r, cc, prec); + arb_addmul_fmpz(s, xpow + r, c + khi - k, prec); if (r == 0 && k != 0) arb_mul(s, s, xpow + m, prec); - PP(dd, n, k); - fmpz_divexact(cc, cc, dd); - QQ(dd, n, k); - fmpz_mul(cc, cc, dd); k--; } - arb_div_fmpz(s, s, cc, prec); + arb_div_fmpz(s, s, c + u, prec); } - fmpz_clear(cc); - fmpz_clear(dd); + _fmpz_vec_clear(c, UNROLL + 1); } static void diff --git a/doc/source/fmpz_extras.rst b/doc/source/fmpz_extras.rst index f5152987..a2e97459 100644 --- a/doc/source/fmpz_extras.rst +++ b/doc/source/fmpz_extras.rst @@ -28,6 +28,10 @@ Convenience methods Sets *z* to `x / 2^{exp}`, rounded away from zero. +.. function:: void fmpz_ui_mul_ui(fmpz_t x, ulong a, ulong b) + + Sets *x* to *a* times *b*. + .. function:: void fmpz_ui_pow_ui(fmpz_t x, ulong b, ulong e) Sets *x* to *b* raised to the power *e*. diff --git a/fmpz_extras.h b/fmpz_extras.h index 10ddd80f..6b3387d5 100644 --- a/fmpz_extras.h +++ b/fmpz_extras.h @@ -172,6 +172,20 @@ _fmpz_size(const fmpz_t f) return mpz_size(COEFF_TO_PTR(d)); } +static __inline__ void +fmpz_ui_mul_ui(fmpz_t r, ulong a, ulong b) +{ + if (a < (UWORD(1) << (FLINT_BITS / 2)) && b < (UWORD(1) << (FLINT_BITS / 2))) + { + fmpz_set_ui(r, a * b); + } + else + { + fmpz_set_ui(r, a); + fmpz_mul_ui(r, r, b); + } +} + static __inline__ void fmpz_ui_pow_ui(fmpz_t x, ulong b, ulong e) {