arb/elefun/exp_fixed_precomp.c
2013-07-30 13:06:08 +02:00

218 lines
5.6 KiB
C

/*=============================================================================
This file is part of ARB.
ARB is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
ARB is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with ARB; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
=============================================================================*/
/******************************************************************************
Copyright (C) 2013 Fredrik Johansson
******************************************************************************/
#include "elefun.h"
/* todo: dynamic alloc */
TLS_PREFIX fmpz_t ln2_cache;
TLS_PREFIX fmpz exp_cache[EXP_CACHE_LEVELS][EXP_CACHE_NUM];
TLS_PREFIX long exp_cache_exp[EXP_CACHE_LEVELS][EXP_CACHE_NUM];
TLS_PREFIX int exp_cache_init = 0;
void
fmprb_get_fmpz_fixed_si_check_1ulp(fmpz_t r, const fmprb_t x, long exponent)
{
fmprb_t t;
fmprb_init(t);
fmpr_get_fmpz_fixed_si(r, fmprb_midref(x), exponent);
/* now compute an upper bound for the error */
fmprb_set_fmpz(t, r);
fmprb_mul_2exp_si(t, t, exponent);
fmprb_sub(t, t, x, FMPRB_RAD_PREC);
fmprb_abs(t, t);
fmpr_add(fmprb_radref(t), fmprb_midref(t), fmprb_radref(t),
FMPRB_RAD_PREC, FMPR_RND_CEIL);
if (fmpr_cmp_2exp_si(fmprb_radref(t), exponent) > 0)
{
printf("error larger than 1 ulp!\n");
abort();
}
fmprb_clear(t);
}
void
exp_cache_cleanup()
{
long i, j;
for (i = 0; i < EXP_CACHE_LEVELS; i++)
for (j = 0; j < EXP_CACHE_NUM; j++)
fmpz_clear(exp_cache[i] + j);
fmpz_clear(ln2_cache);
exp_cache_init = 0;
}
void
compute_exp_cache()
{
long i, j, wp;
fmprb_t x, y;
wp = EXP_CACHE_PREC + 30;
fmprb_init(x);
fmprb_init(y);
for (i = 0; i < EXP_CACHE_LEVELS; i++)
{
fmpr_set_ui_2exp_si(fmprb_midref(x), 1, -(i+1) * EXP_CACHE_BITS);
fmpr_zero(fmprb_radref(x));
elefun_exp_via_mpfr(x, x, wp);
fmprb_one(y);
for (j = 0; j < EXP_CACHE_NUM; j++)
{
fmpz_init(exp_cache[i] + j);
fmprb_get_fmpz_fixed_si_check_1ulp(exp_cache[i] + j, y, -EXP_CACHE_PREC);
fmprb_mul(y, y, x, wp);
}
}
fmprb_const_log2(x, wp);
fmpz_init(ln2_cache);
fmprb_get_fmpz_fixed_si_check_1ulp(ln2_cache, x, -EXP_CACHE_PREC);
fmprb_clear(x);
fmprb_clear(y);
exp_cache_init = 1;
flint_register_cleanup_function(exp_cache_cleanup);
}
void
elefun_exp_fixed_precomp(fmpz_t y, fmpz_t yerr, fmpz_t exponent,
const fmpz_t x, const fmpz_t xerr, long prec)
{
long i, n;
ulong r, red;
fmpz_t t, u, uerr;
fmpz_init(t);
fmpz_init(u);
fmpz_init(uerr);
if (!exp_cache_init)
compute_exp_cache();
/* x = u + exponent * ln2 */
fmpz_tdiv_q_2exp(t, ln2_cache, EXP_CACHE_PREC - prec);
fmpz_fdiv_qr(exponent, u, x, t);
/* ln2 has 1 ulp error -> u gains exponent * ulp error */
if (fmpz_sgn(exponent) >= 0)
fmpz_add(uerr, xerr, exponent);
else
fmpz_sub(uerr, xerr, exponent);
/* extract top bits of u */
if (prec < EXP_CACHE_LEVELS * EXP_CACHE_BITS)
{
fmpz_mul_2exp(t, u, EXP_CACHE_LEVELS * EXP_CACHE_BITS - prec);
red = fmpz_get_ui(t);
fmpz_zero(u);
}
else
{
fmpz_tdiv_q_2exp(t, u, prec - EXP_CACHE_LEVELS * EXP_CACHE_BITS);
red = fmpz_get_ui(t);
fmpz_mul_2exp(t, t, prec - EXP_CACHE_LEVELS * EXP_CACHE_BITS);
fmpz_sub(u, u, t);
}
/* Taylor series for remaining bits */
n = elefun_exp_taylor_bound(-EXP_CACHE_REDUCTION, prec);
elefun_exp_fixed_taylor_horner_precomp(y, yerr, u, n, prec);
/* 1 ulp error for truncation of Taylor series */
fmpz_add_ui(yerr, yerr, 1);
/* cache for high bits */
for (i = EXP_CACHE_LEVELS - 1; i >= 0; i--)
{
r = (red >> ((EXP_CACHE_LEVELS - i - 1) * EXP_CACHE_BITS));
r &= ((1UL << (EXP_CACHE_BITS)) - 1);
fmpz_tdiv_q_2exp(u, exp_cache[i] + r, EXP_CACHE_PREC - prec);
fmpz_mul_tdiv_q_2exp(y, y, u, prec);
}
if (EXP_CACHE_LEVELS == 2)
{
fmpz_mul_ui(yerr, yerr, 7);
fmpz_add_ui(yerr, yerr, 9);
}
else
abort();
/* propagated error <= (err(u) * exp(uerr)) * exp(x) */
/* set uerr to a bound for err(u) * exp(err(u)) */
if (fmpz_bits(uerr) <= prec) /* for a < 1.79329, a*exp(a) < a + a^2 + a^3 */
{
fmpz_mul(u, uerr, uerr); /* uerr^2 */
fmpz_cdiv_q_2exp(u, u, prec);
fmpz_mul(t, u, uerr); /* uerr^3 */
fmpz_cdiv_q_2exp(t, t, prec);
fmpz_add(uerr, uerr, t);
fmpz_add(uerr, uerr, u);
}
else
{
/* crude bound: a*exp(a) <= 4^ceil(a) */
fmpz_cdiv_q_2exp(u, uerr, prec);
if (fmpz_bits(u) > 20)
abort();
fmpz_one(uerr);
fmpz_mul_2exp(uerr, uerr, prec + 2 * (*u));
}
fmpz_add(t, y, yerr); /* upper bound for exp(u) */
fmpz_mul(uerr, uerr, t); /* multiply by (err(u) * exp(err(u))) */
fmpz_cdiv_q_2exp(uerr, uerr, prec);
fmpz_add(yerr, yerr, uerr); /* add to final error for y = exp(x) */
fmpz_clear(t);
fmpz_clear(u);
fmpz_clear(uerr);
}