slight speedup of Legendre polynomials

This commit is contained in:
Fredrik Johansson 2017-10-18 15:17:52 +02:00
parent 0346fe4df7
commit ac044b4e29
5 changed files with 150 additions and 147 deletions

View file

@ -11,60 +11,77 @@
#include "arb_hypgeom.h"
#define BIG (1 << (FLINT_BITS / 2 - 2))
static void PP(fmpz_t res, ulong n, slong k)
{
/* (2 * k - 1) * (2 * k - 1) */
if (k < BIG)
{
fmpz_set_ui(res, (2 * k - 1) * (2 * k - 1));
}
else
{
fmpz_set_ui(res, 2 * k - 1);
fmpz_mul_ui(res, res, 2 * k - 1);
}
}
static void QQ(fmpz_t res, ulong n, slong k)
{
if (k < BIG && n < BIG)
{
fmpz_set_ui(res, k * (2 * k + 2 * n + 1));
}
else
{
fmpz_set_ui(res, n);
fmpz_mul_2exp(res, res, 1);
fmpz_add_ui(res, res, 2 * k + 1);
fmpz_mul_ui(res, res, k);
}
}
#define UNROLL 4
static void
asymp_series(acb_t res, ulong n, acb_srcptr xpow, slong m, slong K, slong prec)
{
slong j, k, khi, klo, u, r;
fmpz * c;
acb_t s;
fmpz_t cc, dd;
acb_init(s);
fmpz_init(cc);
fmpz_init(dd);
c = _fmpz_vec_init(UNROLL + 1);
k = K - 1;
while (k >= 1)
{
u = FLINT_MIN(5, k);
u = FLINT_MIN(UNROLL, k);
khi = k;
klo = k - u + 1;
PP(cc, n, khi);
for (j = klo; j < khi; j++)
for (j = klo; j <= khi; j++)
{
PP(dd, n, j);
fmpz_mul(cc, cc, dd);
ulong aa = (2 * j - 1);
ulong bb = (2 * j - 1);
if (j == klo)
fmpz_ui_mul_ui(c + khi - j, aa, bb);
else
fmpz_mul2_uiui(c + khi - j, c + khi - j + 1, aa, bb);
}
for (j = khi; j >= klo; j--)
{
ulong aa = (j);
ulong bb = (2 * j + 2 * n + 1);
if (n < UWORD_MAX / 8)
{
if (j == khi)
{
fmpz_ui_mul_ui(c + u, aa, bb);
}
else
{
fmpz_mul(c + khi - j, c + khi - j, c + u);
fmpz_mul2_uiui(c + u, c + u, aa, bb);
}
}
else
{
fmpz_t t;
fmpz_init(t);
fmpz_set_ui(t, n);
fmpz_add_ui(t, t, j);
fmpz_mul_2exp(t, t, 1);
fmpz_add_ui(t, t, 1);
if (j == khi)
{
fmpz_mul_ui(c + u, t, aa);
}
else
{
fmpz_mul(c + khi - j, c + khi - j, c + u);
fmpz_mul_ui(t, t, aa);
fmpz_mul(c + u, c + u, t);
}
fmpz_clear(t);
}
}
while (k >= klo)
@ -74,36 +91,30 @@ asymp_series(acb_t res, ulong n, acb_srcptr xpow, slong m, slong K, slong prec)
if (k == khi)
{
acb_add(s, s, xpow + r, prec);
acb_mul_fmpz(s, s, cc, prec);
acb_mul_fmpz(s, s, c + khi - k, prec);
}
else if (r == 0)
{
acb_add_fmpz(s, s, cc, prec);
acb_add_fmpz(s, s, c + khi - k, prec);
}
else
{
acb_addmul_fmpz(s, xpow + r, cc, prec);
acb_addmul_fmpz(s, xpow + r, c + khi - k, prec);
}
if (r == 0 && k != 0)
acb_mul(s, s, xpow + m, prec);
PP(dd, n, k);
fmpz_divexact(cc, cc, dd);
QQ(dd, n, k);
fmpz_mul(cc, cc, dd);
k--;
}
acb_div_fmpz(s, s, cc, prec);
acb_div_fmpz(s, s, c + u, prec);
}
acb_add_ui(res, s, 1, prec);
acb_clear(s);
fmpz_clear(cc);
fmpz_clear(dd);
_fmpz_vec_clear(c, UNROLL + 1);
}

View file

@ -11,58 +11,50 @@
#include "arb_hypgeom.h"
#define BIG (1 << (FLINT_BITS / 2 - 2))
static void PP(fmpz_t r, ulong n, ulong k, ulong prime)
{
if (n < BIG && k < BIG)
{
fmpz_set_ui(r, (n - k + 1 - prime) * (n + k + prime));
}
else
{
fmpz_set_ui(r, n - k + 1 - prime);
fmpz_mul_ui(r, r, n + k + prime);
}
}
static void QQ(fmpz_t r, ulong n, ulong k, ulong prime)
{
if (k < BIG)
{
fmpz_set_ui(r, k * (k + prime));
}
else
{
fmpz_set_ui(r, k);
fmpz_mul_ui(r, r, k + prime);
}
}
#define UNROLL 4
static void
sum_rs_inner(arb_t s, arb_srcptr xpow, slong m, ulong n, slong K, ulong prime, slong prec)
{
slong j, k, khi, klo, u, r;
fmpz_t cc, dd;
fmpz_init(cc);
fmpz_init(dd);
fmpz * c;
arb_zero(s);
fmpz_zero(cc);
c = _fmpz_vec_init(UNROLL + 1);
k = K - 1;
while (k >= 1)
{
u = FLINT_MIN(5, k);
u = FLINT_MIN(UNROLL, k);
khi = k;
klo = k - u + 1;
PP(cc, n, khi, prime);
for (j = klo; j < khi; j++)
for (j = klo; j <= khi; j++)
{
PP(dd, n, j, prime);
fmpz_mul(cc, cc, dd);
ulong aa = (n - j + 1 - prime);
ulong bb = (n + j + prime);
if (j == klo)
fmpz_ui_mul_ui(c + khi - j, aa, bb);
else
fmpz_mul2_uiui(c + khi - j, c + khi - j + 1, aa, bb);
}
for (j = khi; j >= klo; j--)
{
ulong aa = (j);
ulong bb = (j + prime);
if (j == khi)
{
fmpz_ui_mul_ui(c + u, aa, bb);
}
else
{
fmpz_mul(c + khi - j, c + khi - j, c + u);
fmpz_mul2_uiui(c + u, c + u, aa, bb);
}
}
while (k >= klo)
@ -71,28 +63,23 @@ sum_rs_inner(arb_t s, arb_srcptr xpow, slong m, ulong n, slong K, ulong prime, s
if (k == khi)
{
arb_add(s, s, xpow + r, prec);
arb_mul_fmpz(s, s, cc, prec);
arb_mul_fmpz(s, s, c + khi - k, prec);
}
else if (r == 0)
arb_add_fmpz(s, s, cc, prec);
arb_add_fmpz(s, s, c + khi - k, prec);
else
arb_addmul_fmpz(s, xpow + r, cc, prec);
arb_addmul_fmpz(s, xpow + r, c + khi - k, prec);
if (r == 0 && k != 0)
arb_mul(s, s, xpow + m, prec);
PP(dd, n, k, prime);
fmpz_divexact(cc, cc, dd);
QQ(dd, n, k, prime);
fmpz_mul(cc, cc, dd);
k--;
}
arb_div_fmpz(s, s, cc, prec);
arb_div_fmpz(s, s, c + u, prec);
}
fmpz_clear(cc);
fmpz_clear(dd);
_fmpz_vec_clear(c, UNROLL + 1);
}
void

View file

@ -11,63 +11,55 @@
#include "arb_hypgeom.h"
#define BIG (1 << (FLINT_BITS / 2 - 2))
static __inline__ void PP(fmpz_t r, ulong n, ulong k)
{
slong sigma = n % 2 ? 1 : -1;
ulong d = n / 2;
if (n < BIG && k < BIG)
{
fmpz_set_ui(r, (d - k + 1) * (2 * d + 2 * k + sigma));
}
else
{
fmpz_set_ui(r, d - k + 1);
fmpz_mul_ui(r, r, 2 * d + 2 * k + sigma);
}
}
static __inline__ void QQ(fmpz_t r, ulong n, ulong k)
{
slong sigma = n % 2 ? 1 : -1;
if (k < BIG)
{
fmpz_set_ui(r, k * (2 * k + sigma));
}
else
{
fmpz_set_ui(r, k);
fmpz_mul_ui(r, r, 2 * k + sigma);
}
}
#define UNROLL 4
static void
sum_rs_inner(arb_t s, arb_srcptr xpow, slong m, ulong n, slong K, slong prec)
{
slong j, k, khi, klo, u, r;
fmpz_t cc, dd;
ulong d;
slong sigma;
fmpz * c;
fmpz_init(cc);
fmpz_init(dd);
sigma = n % 2 ? 1 : -1;
d = n / 2;
arb_zero(s);
fmpz_zero(cc);
c = _fmpz_vec_init(UNROLL + 1);
k = K - 1;
while (k >= 1)
{
u = FLINT_MIN(5, k);
u = FLINT_MIN(UNROLL, k);
khi = k;
klo = k - u + 1;
PP(cc, n, khi);
for (j = klo; j < khi; j++)
for (j = klo; j <= khi; j++)
{
PP(dd, n, j);
fmpz_mul(cc, cc, dd);
ulong aa = (d - j + 1);
ulong bb = (2 * d + 2 * j + sigma);
if (j == klo)
fmpz_ui_mul_ui(c + khi - j, aa, bb);
else
fmpz_mul2_uiui(c + khi - j, c + khi - j + 1, aa, bb);
}
for (j = khi; j >= klo; j--)
{
ulong aa = (j);
ulong bb = (2 * j + sigma);
if (j == khi)
{
fmpz_ui_mul_ui(c + u, aa, bb);
}
else
{
fmpz_mul(c + khi - j, c + khi - j, c + u);
fmpz_mul2_uiui(c + u, c + u, aa, bb);
}
}
while (k >= klo)
@ -76,28 +68,23 @@ sum_rs_inner(arb_t s, arb_srcptr xpow, slong m, ulong n, slong K, slong prec)
if (k == khi)
{
arb_add(s, s, xpow + r, prec);
arb_mul_fmpz(s, s, cc, prec);
arb_mul_fmpz(s, s, c + khi - k, prec);
}
else if (r == 0)
arb_add_fmpz(s, s, cc, prec);
arb_add_fmpz(s, s, c + khi - k, prec);
else
arb_addmul_fmpz(s, xpow + r, cc, prec);
arb_addmul_fmpz(s, xpow + r, c + khi - k, prec);
if (r == 0 && k != 0)
arb_mul(s, s, xpow + m, prec);
PP(dd, n, k);
fmpz_divexact(cc, cc, dd);
QQ(dd, n, k);
fmpz_mul(cc, cc, dd);
k--;
}
arb_div_fmpz(s, s, cc, prec);
arb_div_fmpz(s, s, c + u, prec);
}
fmpz_clear(cc);
fmpz_clear(dd);
_fmpz_vec_clear(c, UNROLL + 1);
}
static void

View file

@ -28,6 +28,10 @@ Convenience methods
Sets *z* to `x / 2^{exp}`, rounded away from zero.
.. function:: void fmpz_ui_mul_ui(fmpz_t x, ulong a, ulong b)
Sets *x* to *a* times *b*.
.. function:: void fmpz_ui_pow_ui(fmpz_t x, ulong b, ulong e)
Sets *x* to *b* raised to the power *e*.

View file

@ -172,6 +172,20 @@ _fmpz_size(const fmpz_t f)
return mpz_size(COEFF_TO_PTR(d));
}
static __inline__ void
fmpz_ui_mul_ui(fmpz_t r, ulong a, ulong b)
{
if (a < (UWORD(1) << (FLINT_BITS / 2)) && b < (UWORD(1) << (FLINT_BITS / 2)))
{
fmpz_set_ui(r, a * b);
}
else
{
fmpz_set_ui(r, a);
fmpz_mul_ui(r, r, b);
}
}
static __inline__ void
fmpz_ui_pow_ui(fmpz_t x, ulong b, ulong e)
{