mirror of
https://github.com/vale981/arb
synced 2025-03-05 09:21:38 -05:00
speed up arb_exp() between 4600 and 19000 bits
This commit is contained in:
parent
fcd3d3bf12
commit
d3e395f4f4
6 changed files with 322 additions and 3 deletions
1
arb.h
1
arb.h
|
@ -937,6 +937,7 @@ void _arb_exp_taylor_rs(mp_ptr y, mp_limb_t * error,
|
|||
mp_srcptr x, mp_size_t xn, ulong N);
|
||||
|
||||
void arb_exp_arf_bb(arb_t z, const arf_t x, slong prec, int minus_one);
|
||||
void arb_exp_arf_rs_generic(arb_t res, const arf_t x, slong prec, int minus_one);
|
||||
|
||||
int _arb_get_mpn_fixed_mod_log2(mp_ptr w, fmpz_t q, mp_limb_t * error,
|
||||
const arf_t x, mp_size_t wn);
|
||||
|
|
|
@ -77,8 +77,12 @@ arb_exp_arf_overflow(arb_t z, slong expbound, int negative, int minus_one, slong
|
|||
static void
|
||||
arb_exp_arf_fallback(arb_t z, const arf_t x, slong mag, slong prec, int minus_one)
|
||||
{
|
||||
if (mag > 64)
|
||||
/* reduce by log(2) if needed, but avoid computing log(2) unnecessarily at
|
||||
extremely high precision */
|
||||
if (mag > 64 || (mag > 8 && prec < 1000000))
|
||||
arb_exp_arf_huge(z, x, mag, prec, minus_one);
|
||||
else if (prec < 19000)
|
||||
arb_exp_arf_rs_generic(z, x, prec, minus_one);
|
||||
else
|
||||
arb_exp_arf_bb(z, x, prec, minus_one);
|
||||
}
|
||||
|
|
180
arb/exp_arf_rs_generic.c
Normal file
180
arb/exp_arf_rs_generic.c
Normal file
|
@ -0,0 +1,180 @@
|
|||
/*
|
||||
Copyright (C) 2014 Fredrik Johansson
|
||||
|
||||
This file is part of Arb.
|
||||
|
||||
Arb is free software: you can redistribute it and/or modify it under
|
||||
the terms of the GNU Lesser General Public License (LGPL) as published
|
||||
by the Free Software Foundation; either version 2.1 of the License, or
|
||||
(at your option) any later version. See <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include "arb.h"
|
||||
|
||||
void
|
||||
arb_exp_taylor_sum_rs_generic(arb_t res, const arb_t x, slong N, slong prec)
|
||||
{
|
||||
arb_t s;
|
||||
mag_t err;
|
||||
|
||||
arb_init(s);
|
||||
mag_init(err);
|
||||
|
||||
arb_get_mag(err, x);
|
||||
mag_exp_tail(err, err, N);
|
||||
|
||||
if (N <= 2)
|
||||
{
|
||||
if (N == 0)
|
||||
arb_zero(res);
|
||||
else if (N == 1)
|
||||
arb_one(res);
|
||||
else if (N == 2)
|
||||
arb_add_ui(res, x, 1, prec);
|
||||
arb_add_error_mag(res, err);
|
||||
}
|
||||
else
|
||||
{
|
||||
arb_ptr tpow;
|
||||
slong j, k, m, M, tp, xmag;
|
||||
mp_limb_t c, d, chi, clo;
|
||||
|
||||
xmag = arf_abs_bound_lt_2exp_si(arb_midref(x));
|
||||
|
||||
/* Convert to order as a series in x^2. */
|
||||
M = N / 2;
|
||||
|
||||
m = n_sqrt(M);
|
||||
|
||||
/* not intended (and not 32-bit safe...) */
|
||||
if (M > 30000)
|
||||
{
|
||||
flint_abort();
|
||||
}
|
||||
|
||||
tpow = _arb_vec_init(m + 1);
|
||||
|
||||
arb_mul(s, x, x, prec);
|
||||
_arb_vec_set_powers(tpow, s, m + 1, prec);
|
||||
arb_zero(s);
|
||||
|
||||
c = 1;
|
||||
j = (M - 1) % m;
|
||||
|
||||
for (k = M - 1; k >= 0; k--)
|
||||
{
|
||||
tp = prec - 2 * k * (-xmag) + 10;
|
||||
tp = FLINT_MAX(tp, 2);
|
||||
tp = FLINT_MIN(tp, prec);
|
||||
|
||||
d = (2 * k) * (2 * k + 1);
|
||||
|
||||
if (k != 0)
|
||||
{
|
||||
umul_ppmm(chi, clo, c, d);
|
||||
|
||||
if (chi != 0)
|
||||
{
|
||||
arb_div_ui(s, s, c, tp);
|
||||
c = 1;
|
||||
}
|
||||
}
|
||||
|
||||
arb_addmul_ui(s, tpow + j, c, tp);
|
||||
|
||||
if (k != 0)
|
||||
{
|
||||
c *= d;
|
||||
|
||||
if (j == 0)
|
||||
{
|
||||
arb_mul(s, s, tpow + m, tp);
|
||||
j = m - 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
j--;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
arb_div_ui(s, s, c, prec);
|
||||
arb_mul(s, s, x, prec);
|
||||
|
||||
arb_add_error_mag(s, err);
|
||||
|
||||
/* exp = sinh + sqrt(1 + sinh^2) */
|
||||
arb_mul(res, s, s, prec);
|
||||
arb_add_ui(res, res, 1, prec);
|
||||
arb_sqrt(res, res, prec);
|
||||
arb_add(res, res, s, prec);
|
||||
|
||||
_arb_vec_clear(tpow, m + 1);
|
||||
}
|
||||
|
||||
mag_clear(err);
|
||||
arb_clear(s);
|
||||
}
|
||||
|
||||
void
|
||||
arb_exp_arf_rs_generic(arb_t res, const arf_t x, slong prec, int minus_one)
|
||||
{
|
||||
slong q, xmag, wp, k, N;
|
||||
arb_t t;
|
||||
|
||||
if (arf_is_zero(x))
|
||||
{
|
||||
if (minus_one)
|
||||
arb_zero(res);
|
||||
else
|
||||
arb_one(res);
|
||||
return;
|
||||
}
|
||||
|
||||
xmag = arf_abs_bound_lt_2exp_si(x);
|
||||
|
||||
/* 1 + x + O(x^2) */
|
||||
/* We don't really need to worry too much about degenerate input
|
||||
because the main exp function already takes care of it. */
|
||||
if (xmag < -prec - 4)
|
||||
{
|
||||
mag_t t;
|
||||
mag_init(t);
|
||||
arf_get_mag(t, x);
|
||||
mag_exp_tail(t, t, 2);
|
||||
arb_set_arf(res, x);
|
||||
arb_add_ui(res, res, minus_one ? 0 : 1, prec);
|
||||
arb_add_error_mag(res, t);
|
||||
mag_clear(t);
|
||||
return;
|
||||
}
|
||||
|
||||
arb_init(t);
|
||||
|
||||
/* generic tuning value */
|
||||
q = 4.5 * pow(prec, 0.2);
|
||||
q = FLINT_MAX(q, 6);
|
||||
/* adjust to magnitude */
|
||||
q = FLINT_MAX(0, xmag + q);
|
||||
|
||||
wp = prec + 10 + 2 * q + 2 * FLINT_BIT_COUNT(prec);
|
||||
if (minus_one && xmag < 0)
|
||||
wp += (-xmag);
|
||||
|
||||
/* t = x/2^q */
|
||||
arf_mul_2exp_si(arb_midref(t), x, -q);
|
||||
|
||||
N = _arb_exp_taylor_bound(xmag - q, wp);
|
||||
arb_exp_taylor_sum_rs_generic(t, t, N, wp);
|
||||
|
||||
/* exp(x) = exp(x/2^q)^(2^q) */
|
||||
for (k = 0; k < q; k++)
|
||||
arb_mul(t, t, t, wp);
|
||||
|
||||
if (minus_one)
|
||||
arb_sub_ui(t, t, 1, wp);
|
||||
|
||||
arb_set_round(res, t, prec);
|
||||
arb_clear(t);
|
||||
}
|
||||
|
|
@ -11,8 +11,6 @@
|
|||
|
||||
#include "arb.h"
|
||||
|
||||
slong _arb_get_exp_pos(const slong * tab, slong step);
|
||||
|
||||
static void
|
||||
arb_zero_pm_one(arb_t res)
|
||||
{
|
||||
|
|
131
arb/test/t-exp_arf_rs_generic.c
Normal file
131
arb/test/t-exp_arf_rs_generic.c
Normal file
|
@ -0,0 +1,131 @@
|
|||
/*
|
||||
Copyright (C) 2017 Fredrik Johansson
|
||||
|
||||
This file is part of Arb.
|
||||
|
||||
Arb is free software: you can redistribute it and/or modify it under
|
||||
the terms of the GNU Lesser General Public License (LGPL) as published
|
||||
by the Free Software Foundation; either version 2.1 of the License, or
|
||||
(at your option) any later version. See <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include "arb.h"
|
||||
|
||||
/* these functions are not exposed to the public for now,
|
||||
but it still makes sense to test them explicitly */
|
||||
void arb_exp_taylor_sum_rs_generic(arb_t s, const arb_t x, slong N, slong prec);
|
||||
|
||||
int main()
|
||||
{
|
||||
slong iter;
|
||||
flint_rand_t state;
|
||||
|
||||
flint_printf("exp_arf_rs_generic....");
|
||||
fflush(stdout);
|
||||
|
||||
flint_randinit(state);
|
||||
|
||||
/* test the rs algorithm explicitly */
|
||||
for (iter = 0; iter < 10000 * arb_test_multiplier(); iter++)
|
||||
{
|
||||
arb_t x, s1, s2;
|
||||
slong prec;
|
||||
int minus1;
|
||||
|
||||
arb_init(x);
|
||||
arb_init(s1);
|
||||
arb_init(s2);
|
||||
|
||||
prec = 2 + n_randint(state, 2000);
|
||||
minus1 = n_randint(state, 2);
|
||||
arb_randtest(x, state, 1 + n_randint(state, 2000), 3);
|
||||
mag_zero(arb_radref(x));
|
||||
|
||||
if (n_randint(state, 2))
|
||||
arb_mul_2exp_si(x, x, -n_randint(state, 2 * prec));
|
||||
|
||||
if (minus1)
|
||||
arb_expm1(s1, x, prec);
|
||||
else
|
||||
arb_exp(s1, x, prec);
|
||||
|
||||
switch (n_randint(state, 2))
|
||||
{
|
||||
case 0:
|
||||
arb_exp_arf_rs_generic(s2, arb_midref(x), prec, minus1);
|
||||
break;
|
||||
case 1:
|
||||
arb_set(s2, x);
|
||||
arb_exp_arf_rs_generic(s2, arb_midref(s2), prec, minus1);
|
||||
break;
|
||||
}
|
||||
|
||||
if (!arb_overlaps(s1, s2))
|
||||
{
|
||||
flint_printf("FAIL: overlap\n\n");
|
||||
flint_printf("prec = %wd, minus1 = %d\n\n", prec, minus1);
|
||||
flint_printf("x = "); arb_printn(x, 500, 0); flint_printf("\n\n");
|
||||
flint_printf("s1 = "); arb_printn(s1, 500, 0); flint_printf("\n\n");
|
||||
flint_printf("s2 = "); arb_printn(s2, 500, 0); flint_printf("\n\n");
|
||||
flint_abort();
|
||||
}
|
||||
|
||||
if (arb_rel_accuracy_bits(s2) < prec - 2)
|
||||
{
|
||||
flint_printf("FAIL: poor accuracy\n\n");
|
||||
flint_printf("prec = %wd, acc1 = %wd, minus1 = %d\n\n",
|
||||
prec, arb_rel_accuracy_bits(s2), minus1);
|
||||
flint_printf("x = "); arb_printn(x, 500, 0); flint_printf("\n\n");
|
||||
flint_printf("s1 = "); arb_printn(s1, 500, 0); flint_printf("\n\n");
|
||||
flint_printf("s2 = "); arb_printn(s2, 500, 0); flint_printf("\n\n");
|
||||
flint_abort();
|
||||
}
|
||||
|
||||
arb_clear(x);
|
||||
arb_clear(s1);
|
||||
arb_clear(s2);
|
||||
}
|
||||
|
||||
/* test the series evaluation code directly */
|
||||
for (iter = 0; iter < 10000 * arb_test_multiplier(); iter++)
|
||||
{
|
||||
arb_t x, y, z;
|
||||
slong prec;
|
||||
slong N;
|
||||
|
||||
arb_init(x);
|
||||
arb_init(y);
|
||||
arb_init(z);
|
||||
|
||||
prec = 2 + n_randint(state, 2000);
|
||||
N = n_randint(state, 100);
|
||||
arb_randtest(x, state, 1 + n_randint(state, 2000), 1);
|
||||
mag_zero(arb_radref(x));
|
||||
|
||||
if (n_randint(state, 2))
|
||||
arb_mul_2exp_si(x, x, -n_randint(state, 2 * prec));
|
||||
|
||||
arb_exp(y, x, prec);
|
||||
arb_exp_taylor_sum_rs_generic(z, x, N, prec);
|
||||
|
||||
if (!arb_overlaps(z, y))
|
||||
{
|
||||
flint_printf("FAIL: overlap\n\n");
|
||||
flint_printf("prec = %wd, N = %wd\n\n", prec, N);
|
||||
flint_printf("x = "); arb_printn(x, 500, 0); flint_printf("\n\n");
|
||||
flint_printf("y = "); arb_printn(y, 500, 0); flint_printf("\n\n");
|
||||
flint_printf("z = "); arb_printn(z, 500, 0); flint_printf("\n\n");
|
||||
flint_abort();
|
||||
}
|
||||
|
||||
arb_clear(x);
|
||||
arb_clear(y);
|
||||
arb_clear(z);
|
||||
}
|
||||
|
||||
flint_randclear(state);
|
||||
flint_cleanup();
|
||||
flint_printf("PASS\n");
|
||||
return EXIT_SUCCESS;
|
||||
}
|
||||
|
|
@ -1541,6 +1541,11 @@ Internals for computing elementary functions
|
|||
resulting in slightly higher memory usage but better speed. For best
|
||||
efficiency, *N* should have many trailing zero bits.
|
||||
|
||||
.. function:: void arb_exp_arf_rs_generic(arb_t res, const arf_t x, slong prec, int minus_one)
|
||||
|
||||
Computes the exponential function using a generic version of the rectangular
|
||||
splitting strategy, intended for intermediate precision.
|
||||
|
||||
.. function:: void _arb_atan_sum_bs_simple(fmpz_t T, fmpz_t Q, mp_bitcnt_t * Qexp, const fmpz_t x, mp_bitcnt_t r, slong N)
|
||||
|
||||
.. function:: void _arb_atan_sum_bs_powtab(fmpz_t T, fmpz_t Q, mp_bitcnt_t * Qexp, const fmpz_t x, mp_bitcnt_t r, slong N)
|
||||
|
|
Loading…
Add table
Reference in a new issue