even faster arb_gamma_fmpq at high precision

This commit is contained in:
fredrik 2021-06-29 19:13:07 +02:00
parent daf9755097
commit 38e0c1d552
2 changed files with 162 additions and 48 deletions

View file

@ -683,19 +683,145 @@ bsplit2(arb_t P, arb_t Q, const fmpz_t zp, const fmpz_t zq,
}
}
static void
bsplit3(arb_t P, arb_t Q, const fmpz_t zp, const fmpz_t zq,
const slong * xexp, arb_srcptr xpow,
ulong N, slong a, slong b, int cont, slong prec)
{
if (b - a == 1)
{
fmpz_t t;
fmpz_init(t);
arb_set(P, xpow + 0); /* N zq */
fmpz_set(t, zp);
fmpz_submul_ui(t, zq, a + 1); /* zp - (a + 1) zq */
arb_set_fmpz(Q, t);
fmpz_clear(t);
}
else
{
arb_t Pb, Qb;
slong step, i, m;
arb_init(Pb);
arb_init(Qb);
step = (b - a) / 2;
m = a + step;
bsplit3(P, Q, zp, zq, xexp, xpow, N, a, m, 1, prec);
bsplit3(Pb, Qb, zp, zq, xexp, xpow, N, m, b, 1, prec);
i = _arb_get_exp_pos(xexp, m - a);
arb_mul(P, P, xpow + i, prec);
if (b - m != m - a)
arb_mul(P, P, xpow + 0, prec);
arb_addmul(P, Q, Pb, prec);
if (cont)
{
arb_mul(Q, Q, Qb, prec);
}
else
{
i = _arb_get_exp_pos(xexp, m - a);
arb_mul(Q, xpow + i, xpow + i, prec);
if (b - m != m - a)
arb_mul(Q, Q, xpow + 0, prec);
}
arb_clear(Pb);
arb_clear(Qb);
}
}
double d_lambertw_branch1(double x);
static ulong
more_trailing_zeros(ulong N)
{
ulong bc, N2;
bc = FLINT_BIT_COUNT(N);
if (bc >= 8)
{
N2 = (N >> (bc - 5)) << (bc - 5);
N2 += ((N2 != N) << (bc - 5));
return N2;
}
else
{
return N;
}
}
#define C_LOG2 0.69314718055994530942
#define C_EXP1 2.7182818284590452354
static void
build_bsplit_power_table(arb_ptr xpow, const slong * xexp, slong len, slong prec)
{
slong i;
for (i = 1; i < len; i++)
{
if (xexp[i] == 2 * xexp[i-1])
{
arb_mul(xpow + i, xpow + i - 1, xpow + i - 1, prec);
}
else if (xexp[i] == 2 * xexp[i-2]) /* prefer squaring if possible */
{
arb_mul(xpow + i, xpow + i - 2, xpow + i - 2, prec);
}
else if (xexp[i] == 2 * xexp[i-1] + 1)
{
arb_mul(xpow + i, xpow + i - 1, xpow + i - 1, prec);
arb_mul(xpow + i, xpow + i, xpow, prec);
}
else if (xexp[i] == 2 * xexp[i-2] + 1)
{
arb_mul(xpow + i, xpow + i - 2, xpow + i - 2, prec);
arb_mul(xpow + i, xpow + i, xpow, prec);
}
else
{
flint_printf("power table has the wrong structure!\n");
flint_abort();
}
}
}
/* assumes z in [1, 2] */
static void
arb_gamma_fmpq_general_off1(arb_t res, const fmpq_t z, slong prec)
{
slong wp, N, n, i, length;
slong wp, N, n, n2, length, length2, wp2;
double alpha;
arb_t P, Q;
slong * xexp;
slong *xexp, *xexp2;
arb_ptr xpow;
mag_t err, err2;
wp = prec + 30;
N = 0.69314718056 * wp;
n = 2.71828182846 * N;
alpha = 0.52; /* tuning parameter between 0.5 and 1 */
N = alpha * C_LOG2 * wp;
N = more_trailing_zeros(N);
alpha = N / (C_LOG2 * wp);
/* Terms in convergent series */
n = (1 - alpha) / d_lambertw((1 - alpha) / (alpha * C_EXP1)) * C_LOG2 * wp;
/* Precision and terms in asymptotic series */
wp2 = wp * (1 - alpha);
wp2 = FLINT_MAX(wp2, 30);
n2 = (alpha - 1) / d_lambertw_branch1((alpha - 1) / (alpha * C_EXP1)) * C_LOG2 * wp;
n2 = FLINT_MAX(n2, 2); /* binary splitting correctness */
mag_init(err);
mag_init(err2);
@ -704,45 +830,23 @@ arb_gamma_fmpq_general_off1(arb_t res, const fmpq_t z, slong prec)
/* compute the powers of x = N*zq that will appear (at least x^1) */
xexp = flint_calloc(2 * FLINT_BITS, sizeof(slong));
xexp2 = flint_calloc(2 * FLINT_BITS, sizeof(slong));
length = _arb_compute_bs_exponents(xexp, n);
xpow = _arb_vec_init(length);
length2 = _arb_compute_bs_exponents(xexp2, n2);
xpow = _arb_vec_init(FLINT_MAX(length, length2));
arb_set_fmpz(xpow + 0, fmpq_denref(z));
arb_mul_ui(xpow + 0, xpow + 0, N, wp);
/* build x^i table */
for (i = 1; i < length; i++)
{
if (xexp[i] == 2 * xexp[i-1])
{
arb_mul(xpow + i, xpow + i - 1, xpow + i - 1, wp);
}
else if (xexp[i] == 2 * xexp[i-2]) /* prefer squaring if possible */
{
arb_mul(xpow + i, xpow + i - 2, xpow + i - 2, wp);
}
else if (xexp[i] == 2 * xexp[i-1] + 1)
{
arb_mul(xpow + i, xpow + i - 1, xpow + i - 1, wp);
arb_mul(xpow + i, xpow + i, xpow, wp);
}
else if (xexp[i] == 2 * xexp[i-2] + 1)
{
arb_mul(xpow + i, xpow + i - 2, xpow + i - 2, wp);
arb_mul(xpow + i, xpow + i, xpow, wp);
}
else
{
flint_printf("power table has the wrong structure!\n");
flint_abort();
}
}
build_bsplit_power_table(xpow, xexp, length, wp);
/* 1F1(1, 1+z, N) */
bsplit2(P, Q, fmpq_numref(z), fmpq_denref(z), xexp, xpow, N, 0, n, 0, wp);
arb_div(P, Q, P, wp);
/* series error bound: N^n / n! (1 + (N/n) + ...) */
/* Convergent series error bound: N^n / n! (1 + (N/n) + ...) */
mag_set_ui(err, N);
mag_pow_ui(err, err, n);
mag_rfac_ui(err2, n);
@ -753,28 +857,38 @@ arb_gamma_fmpq_general_off1(arb_t res, const fmpq_t z, slong prec)
mag_mul(err, err, err2);
arb_add_error_mag(P, err);
_arb_vec_clear(xpow, length);
flint_free(xexp);
/* N^z * exp(-N) * 1F1 / z */
/* divide 1F1 by z */
arb_mul_fmpz(P, P, fmpq_denref(z), wp);
arb_div_fmpz(P, P, fmpq_numref(z), wp);
arb_swap(res, P);
build_bsplit_power_table(xpow, xexp2, length2, wp2);
bsplit3(P, Q, fmpq_numref(z), fmpq_denref(z), xexp2, xpow, N, 0, n2, 0, wp2);
arb_div(P, P, Q, wp2);
/* 2F0 error bound (bounded by first omitted term) */
mag_fac_ui(err, n2);
mag_set_ui_lower(err2, N);
mag_pow_ui_lower(err2, err2, n2);
mag_div(err, err, err2);
arb_add_error_mag(P, err);
/* N^z * exp(-N) * (1F1/z + 2F0/N) */
arb_div_ui(P, P, N, wp2);
arb_add(res, res, P, wp);
arb_set_ui(Q, N);
arb_pow_fmpq(Q, Q, z, wp);
arb_mul(P, P, Q, wp);
arb_mul(res, res, Q, wp);
arb_set_si(Q, -N);
arb_exp(Q, Q, wp);
arb_mul(P, P, Q, wp);
arb_mul(res, res, Q, wp);
/* integral error bound: (N+1) exp(-N) */
mag_set_ui_lower(err, N);
mag_expinv(err, err);
mag_mul_ui(err, err, N + 1);
arb_add_error_mag(P, err);
arb_swap(res, P);
_arb_vec_clear(xpow, FLINT_MAX(length, length2));
flint_free(xexp);
flint_free(xexp2);
arb_clear(P);
arb_clear(Q);
@ -898,7 +1012,7 @@ arb_gamma_fmpq(arb_t y, const fmpq_t x, slong prec)
}
}
if (q != 1 && prec > 9000 + 400 * fmpz_bits(fmpq_denref(x)) &&
if (q != 1 && prec > 7000 + 300 * fmpz_bits(fmpq_denref(x)) &&
(slong) fmpz_bits(fmpq_numref(x)) - (slong) fmpz_bits(fmpq_denref(x)) < FLINT_BITS &&
fabs(fmpq_get_d(x)) < 0.03 * prec * sqrt(prec))
{

View file

@ -63,7 +63,7 @@ static const double pol7[5] = {
1.0,-3556.4306263369027831,1.4761527435056145298e6,
-9.8425904825010893103e7,7.0373606710750560344e8 };
static double
double
d_lambertw_branch1(double x)
{
double w, u;