arb/mpr/exp_basecase.c
2012-04-30 12:58:08 +02:00

206 lines
5.1 KiB
C

/*=============================================================================
This file is part of ARB.
ARB is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
ARB is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with ARB; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
=============================================================================*/
/******************************************************************************
Copyright (C) 2012 Fredrik Johansson
******************************************************************************/
#include <alloca.h>
#include "mpr.h"
#define EXP_CACHE_PREC_LIMBS 6
#define EXP_CACHE1_BITS 8
#define EXP_CACHE2_BITS 8
mp_limb_t exp_cache_1[1 << EXP_CACHE1_BITS][EXP_CACHE_PREC_LIMBS + 1];
mp_limb_t exp_cache_2[1 << EXP_CACHE1_BITS][EXP_CACHE_PREC_LIMBS + 1];
int exp_cache_initialised = 0;
mp_size_t
fixed_set_mpfr(mp_ptr y, mpfr_srcptr t, mp_size_t prec)
{
mp_size_t n;
mpz_t z;
mpfr_t u;
mpz_init(z);
mpfr_init2(u, mpfr_get_prec(t));
mpfr_mul_2exp(u, t, prec * FLINT_BITS, GMP_RNDD);
mpfr_get_z(z, u, GMP_RNDD);
n = z->_mp_size;
mpn_copyi(y, z->_mp_d, n);
mpn_zero(y + n, prec - n);
mpz_clear(z);
mpfr_clear(u);
return n;
}
void
exp_cache_init()
{
int i, prec;
mpfr_t h, exph;
prec = EXP_CACHE_PREC_LIMBS * FLINT_BITS + 1;
mpfr_init2(h, prec + 16);
mpfr_init2(exph, prec + 16);
mpfr_set_ui_2exp(h, 1, -EXP_CACHE1_BITS, GMP_RNDN);
mpfr_exp(exph, h, GMP_RNDN);
mpfr_set_ui(h, 1, GMP_RNDN);
for (i = 0; i < (1 << EXP_CACHE1_BITS); i++)
{
fixed_set_mpfr(exp_cache_1[i], h, EXP_CACHE_PREC_LIMBS);
mpfr_mul(h, h, exph, GMP_RNDN);
}
mpfr_set_ui_2exp(h, 1, -EXP_CACHE1_BITS-EXP_CACHE2_BITS, GMP_RNDN);
mpfr_exp(exph, h, GMP_RNDN);
mpfr_set_ui(h, 1, GMP_RNDN);
for (i = 0; i < (1 << EXP_CACHE2_BITS); i++)
{
fixed_set_mpfr(exp_cache_2[i], h, EXP_CACHE_PREC_LIMBS);
mpfr_mul(h, h, exph, GMP_RNDN);
}
mpfr_clear(h);
mpfr_clear(exph);
exp_cache_initialised = 1;
}
static const mp_limb_t exp_coeffs[] =
{
#if FLINT64
0UL,
2432902008176640000UL,
1216451004088320000UL,
405483668029440000UL,
101370917007360000UL,
20274183401472000UL,
3379030566912000UL,
482718652416000UL,
60339831552000UL,
6704425728000UL,
670442572800UL,
60949324800UL,
5079110400UL,
390700800UL,
27907200UL,
1860480UL,
116280UL,
6840UL,
380UL,
20UL,
1UL,
#else
0UL,
479001600UL,
239500800UL,
79833600UL,
19958400UL,
3991680UL,
665280UL,
95040UL,
11880UL,
1320UL,
132UL,
12UL,
1UL,
#endif
};
#define EXP_DIVFREE_MAXTERMS (sizeof(exp_coeffs) / sizeof(mp_limb_t))
static const unsigned char fac_bits[] =
{
1, 1, 2, 3, 5, 7, 10, 13, 16, 19, 22, 26, 29, 33, 37, 41, 45, 49,
53, 57, 62, 66, 70, 75, 80, 84, 89, 94, 98, 103, 108, 113, 118, 123,
128, 133, 139, 144, 149, 154, 160, 165, 170, 176, 181, 187, 192, 198,
203, 209, 215, 220, 226, 232, 238, 243, 249, 255
};
static __inline__ long
exp_needed_terms(long reduced, long tol_bits)
{
int i;
i = 2 + (tol_bits + 1) / (1 + reduced);
while (reduced*i + fac_bits[i] - 1 > tol_bits) i--;
return i + 1;
}
void
mpr_exp_basecase(mp_ptr y, mp_srcptr x, long limbs, mp_bitcnt_t tol_bits)
{
mp_limb_t top;
long terms;
int i1, i2;
if (exp_cache_initialised == 0)
exp_cache_init();
top = x[limbs - 1];
i1 = top >> (FLINT_BITS - EXP_CACHE1_BITS);
i2 = (top >> (FLINT_BITS - (EXP_CACHE1_BITS + EXP_CACHE2_BITS))) & \
((1UL<<EXP_CACHE2_BITS) - 1);
terms = exp_needed_terms(EXP_CACHE1_BITS + EXP_CACHE2_BITS, tol_bits);
if (terms <= EXP_DIVFREE_MAXTERMS)
{
mp_limb_t t[EXP_CACHE_PREC_LIMBS*2 + 2];
mp_limb_t u[EXP_CACHE_PREC_LIMBS*2 + 2];
mpn_copyi(t, x, limbs - 1);
t[limbs - 1] = (top << (EXP_CACHE1_BITS + EXP_CACHE2_BITS)) >> \
(EXP_CACHE1_BITS + EXP_CACHE2_BITS);
mpr_polyval_1(u, t, limbs, exp_coeffs, terms);
mpn_divrem_1(u, 0, u, limbs + 1, exp_coeffs[1]);
u[limbs] = 1;
/* exp(x1+x2+t) = exp(x1)*exp(x2)*exp(t) */
mpn_mul_basecase(t, u, limbs + 1,
exp_cache_1[i1] + (EXP_CACHE_PREC_LIMBS - limbs), limbs + 1);
mpn_mul_basecase(u, t + limbs, limbs + 1,
exp_cache_2[i2] + (EXP_CACHE_PREC_LIMBS - limbs), limbs + 1);
mpn_copyi(y, u + limbs, limbs + 1);
return;
}
printf("exp: not implemented: %ld terms\n", terms);
abort();
}