speed up arb_exp() between 4600 and 19000 bits

This commit is contained in:
Fredrik Johansson 2017-09-06 16:45:56 +02:00
parent fcd3d3bf12
commit d3e395f4f4
6 changed files with 322 additions and 3 deletions

1
arb.h
View file

@ -937,6 +937,7 @@ void _arb_exp_taylor_rs(mp_ptr y, mp_limb_t * error,
mp_srcptr x, mp_size_t xn, ulong N);
void arb_exp_arf_bb(arb_t z, const arf_t x, slong prec, int minus_one);
void arb_exp_arf_rs_generic(arb_t res, const arf_t x, slong prec, int minus_one);
int _arb_get_mpn_fixed_mod_log2(mp_ptr w, fmpz_t q, mp_limb_t * error,
const arf_t x, mp_size_t wn);

View file

@ -77,8 +77,12 @@ arb_exp_arf_overflow(arb_t z, slong expbound, int negative, int minus_one, slong
static void
arb_exp_arf_fallback(arb_t z, const arf_t x, slong mag, slong prec, int minus_one)
{
if (mag > 64)
/* reduce by log(2) if needed, but avoid computing log(2) unnecessarily at
extremely high precision */
if (mag > 64 || (mag > 8 && prec < 1000000))
arb_exp_arf_huge(z, x, mag, prec, minus_one);
else if (prec < 19000)
arb_exp_arf_rs_generic(z, x, prec, minus_one);
else
arb_exp_arf_bb(z, x, prec, minus_one);
}

180
arb/exp_arf_rs_generic.c Normal file
View file

@ -0,0 +1,180 @@
/*
Copyright (C) 2014 Fredrik Johansson
This file is part of Arb.
Arb is free software: you can redistribute it and/or modify it under
the terms of the GNU Lesser General Public License (LGPL) as published
by the Free Software Foundation; either version 2.1 of the License, or
(at your option) any later version. See <http://www.gnu.org/licenses/>.
*/
#include "arb.h"
void
arb_exp_taylor_sum_rs_generic(arb_t res, const arb_t x, slong N, slong prec)
{
arb_t s;
mag_t err;
arb_init(s);
mag_init(err);
arb_get_mag(err, x);
mag_exp_tail(err, err, N);
if (N <= 2)
{
if (N == 0)
arb_zero(res);
else if (N == 1)
arb_one(res);
else if (N == 2)
arb_add_ui(res, x, 1, prec);
arb_add_error_mag(res, err);
}
else
{
arb_ptr tpow;
slong j, k, m, M, tp, xmag;
mp_limb_t c, d, chi, clo;
xmag = arf_abs_bound_lt_2exp_si(arb_midref(x));
/* Convert to order as a series in x^2. */
M = N / 2;
m = n_sqrt(M);
/* not intended (and not 32-bit safe...) */
if (M > 30000)
{
flint_abort();
}
tpow = _arb_vec_init(m + 1);
arb_mul(s, x, x, prec);
_arb_vec_set_powers(tpow, s, m + 1, prec);
arb_zero(s);
c = 1;
j = (M - 1) % m;
for (k = M - 1; k >= 0; k--)
{
tp = prec - 2 * k * (-xmag) + 10;
tp = FLINT_MAX(tp, 2);
tp = FLINT_MIN(tp, prec);
d = (2 * k) * (2 * k + 1);
if (k != 0)
{
umul_ppmm(chi, clo, c, d);
if (chi != 0)
{
arb_div_ui(s, s, c, tp);
c = 1;
}
}
arb_addmul_ui(s, tpow + j, c, tp);
if (k != 0)
{
c *= d;
if (j == 0)
{
arb_mul(s, s, tpow + m, tp);
j = m - 1;
}
else
{
j--;
}
}
}
arb_div_ui(s, s, c, prec);
arb_mul(s, s, x, prec);
arb_add_error_mag(s, err);
/* exp = sinh + sqrt(1 + sinh^2) */
arb_mul(res, s, s, prec);
arb_add_ui(res, res, 1, prec);
arb_sqrt(res, res, prec);
arb_add(res, res, s, prec);
_arb_vec_clear(tpow, m + 1);
}
mag_clear(err);
arb_clear(s);
}
void
arb_exp_arf_rs_generic(arb_t res, const arf_t x, slong prec, int minus_one)
{
slong q, xmag, wp, k, N;
arb_t t;
if (arf_is_zero(x))
{
if (minus_one)
arb_zero(res);
else
arb_one(res);
return;
}
xmag = arf_abs_bound_lt_2exp_si(x);
/* 1 + x + O(x^2) */
/* We don't really need to worry too much about degenerate input
because the main exp function already takes care of it. */
if (xmag < -prec - 4)
{
mag_t t;
mag_init(t);
arf_get_mag(t, x);
mag_exp_tail(t, t, 2);
arb_set_arf(res, x);
arb_add_ui(res, res, minus_one ? 0 : 1, prec);
arb_add_error_mag(res, t);
mag_clear(t);
return;
}
arb_init(t);
/* generic tuning value */
q = 4.5 * pow(prec, 0.2);
q = FLINT_MAX(q, 6);
/* adjust to magnitude */
q = FLINT_MAX(0, xmag + q);
wp = prec + 10 + 2 * q + 2 * FLINT_BIT_COUNT(prec);
if (minus_one && xmag < 0)
wp += (-xmag);
/* t = x/2^q */
arf_mul_2exp_si(arb_midref(t), x, -q);
N = _arb_exp_taylor_bound(xmag - q, wp);
arb_exp_taylor_sum_rs_generic(t, t, N, wp);
/* exp(x) = exp(x/2^q)^(2^q) */
for (k = 0; k < q; k++)
arb_mul(t, t, t, wp);
if (minus_one)
arb_sub_ui(t, t, 1, wp);
arb_set_round(res, t, prec);
arb_clear(t);
}

View file

@ -11,8 +11,6 @@
#include "arb.h"
slong _arb_get_exp_pos(const slong * tab, slong step);
static void
arb_zero_pm_one(arb_t res)
{

View file

@ -0,0 +1,131 @@
/*
Copyright (C) 2017 Fredrik Johansson
This file is part of Arb.
Arb is free software: you can redistribute it and/or modify it under
the terms of the GNU Lesser General Public License (LGPL) as published
by the Free Software Foundation; either version 2.1 of the License, or
(at your option) any later version. See <http://www.gnu.org/licenses/>.
*/
#include "arb.h"
/* these functions are not exposed to the public for now,
but it still makes sense to test them explicitly */
void arb_exp_taylor_sum_rs_generic(arb_t s, const arb_t x, slong N, slong prec);
int main()
{
slong iter;
flint_rand_t state;
flint_printf("exp_arf_rs_generic....");
fflush(stdout);
flint_randinit(state);
/* test the rs algorithm explicitly */
for (iter = 0; iter < 10000 * arb_test_multiplier(); iter++)
{
arb_t x, s1, s2;
slong prec;
int minus1;
arb_init(x);
arb_init(s1);
arb_init(s2);
prec = 2 + n_randint(state, 2000);
minus1 = n_randint(state, 2);
arb_randtest(x, state, 1 + n_randint(state, 2000), 3);
mag_zero(arb_radref(x));
if (n_randint(state, 2))
arb_mul_2exp_si(x, x, -n_randint(state, 2 * prec));
if (minus1)
arb_expm1(s1, x, prec);
else
arb_exp(s1, x, prec);
switch (n_randint(state, 2))
{
case 0:
arb_exp_arf_rs_generic(s2, arb_midref(x), prec, minus1);
break;
case 1:
arb_set(s2, x);
arb_exp_arf_rs_generic(s2, arb_midref(s2), prec, minus1);
break;
}
if (!arb_overlaps(s1, s2))
{
flint_printf("FAIL: overlap\n\n");
flint_printf("prec = %wd, minus1 = %d\n\n", prec, minus1);
flint_printf("x = "); arb_printn(x, 500, 0); flint_printf("\n\n");
flint_printf("s1 = "); arb_printn(s1, 500, 0); flint_printf("\n\n");
flint_printf("s2 = "); arb_printn(s2, 500, 0); flint_printf("\n\n");
flint_abort();
}
if (arb_rel_accuracy_bits(s2) < prec - 2)
{
flint_printf("FAIL: poor accuracy\n\n");
flint_printf("prec = %wd, acc1 = %wd, minus1 = %d\n\n",
prec, arb_rel_accuracy_bits(s2), minus1);
flint_printf("x = "); arb_printn(x, 500, 0); flint_printf("\n\n");
flint_printf("s1 = "); arb_printn(s1, 500, 0); flint_printf("\n\n");
flint_printf("s2 = "); arb_printn(s2, 500, 0); flint_printf("\n\n");
flint_abort();
}
arb_clear(x);
arb_clear(s1);
arb_clear(s2);
}
/* test the series evaluation code directly */
for (iter = 0; iter < 10000 * arb_test_multiplier(); iter++)
{
arb_t x, y, z;
slong prec;
slong N;
arb_init(x);
arb_init(y);
arb_init(z);
prec = 2 + n_randint(state, 2000);
N = n_randint(state, 100);
arb_randtest(x, state, 1 + n_randint(state, 2000), 1);
mag_zero(arb_radref(x));
if (n_randint(state, 2))
arb_mul_2exp_si(x, x, -n_randint(state, 2 * prec));
arb_exp(y, x, prec);
arb_exp_taylor_sum_rs_generic(z, x, N, prec);
if (!arb_overlaps(z, y))
{
flint_printf("FAIL: overlap\n\n");
flint_printf("prec = %wd, N = %wd\n\n", prec, N);
flint_printf("x = "); arb_printn(x, 500, 0); flint_printf("\n\n");
flint_printf("y = "); arb_printn(y, 500, 0); flint_printf("\n\n");
flint_printf("z = "); arb_printn(z, 500, 0); flint_printf("\n\n");
flint_abort();
}
arb_clear(x);
arb_clear(y);
arb_clear(z);
}
flint_randclear(state);
flint_cleanup();
flint_printf("PASS\n");
return EXIT_SUCCESS;
}

View file

@ -1541,6 +1541,11 @@ Internals for computing elementary functions
resulting in slightly higher memory usage but better speed. For best
efficiency, *N* should have many trailing zero bits.
.. function:: void arb_exp_arf_rs_generic(arb_t res, const arf_t x, slong prec, int minus_one)
Computes the exponential function using a generic version of the rectangular
splitting strategy, intended for intermediate precision.
.. function:: void _arb_atan_sum_bs_simple(fmpz_t T, fmpz_t Q, mp_bitcnt_t * Qexp, const fmpz_t x, mp_bitcnt_t r, slong N)
.. function:: void _arb_atan_sum_bs_powtab(fmpz_t T, fmpz_t Q, mp_bitcnt_t * Qexp, const fmpz_t x, mp_bitcnt_t r, slong N)