diff --git a/arf.h b/arf.h index f3ff8cb3..bbba6050 100644 --- a/arf.h +++ b/arf.h @@ -104,7 +104,7 @@ arf_rnd_to_mpfr(arf_rnd_t rnd) #define ARF_IS_LAGOM(x) (ARF_EXP(x) >= ARF_MIN_LAGOM_EXP && \ ARF_EXP(x) <= ARF_MAX_LAGOM_EXP) -/* At least two limbs (needs pointer). */ +/* More than two limbs (needs pointer). */ #define ARF_HAS_PTR(x) ((x)->size > ((2 << 1) + 1)) /* Raw size field (encodes both limb size and sign). */ @@ -466,8 +466,9 @@ int _arf_set_round_ui(arf_t x, ulong v, int sgnbit, long prec, arf_rnd_t rnd); /* Assumes xn > 0, x[0] != 0. */ /* TBD: 1, 2 limb versions, top-aligned version */ -int arf_set_round_mpn(arf_t y, mp_srcptr x, mp_size_t xn, - int sgnbit, const fmpz_t topexp, long prec, arf_rnd_t rnd); +int +_arf_set_round_mpn(arf_t y, long * exp_shift, mp_srcptr x, mp_size_t xn, + int sgnbit, long prec, arf_rnd_t rnd); static __inline__ int arf_set_round_ui(arf_t x, ulong v, long prec, arf_rnd_t rnd) @@ -484,7 +485,9 @@ arf_set_round_si(arf_t x, long v, long prec, arf_rnd_t rnd) static __inline__ int arf_set_round_mpz(arf_t y, const mpz_t x, long prec, arf_rnd_t rnd) { + int inexact; long size = x->_mp_size; + long fix; if (size == 0) { @@ -492,8 +495,11 @@ arf_set_round_mpz(arf_t y, const mpz_t x, long prec, arf_rnd_t rnd) return 0; } - return arf_set_round_mpn(y, x->_mp_d, FLINT_ABS(size), - (size < 0), NULL, prec, rnd); + inexact = _arf_set_round_mpn(y, &fix, x->_mp_d, FLINT_ABS(size), + (size < 0), prec, rnd); + _fmpz_demote(ARF_EXPREF(y)); + ARF_EXP(y) = FLINT_ABS(size) * FLINT_BITS + fix; + return inexact; } static __inline__ int @@ -703,6 +709,29 @@ void arf_randtest_special(arf_t x, flint_rand_t state, long bits, long mag_bits) #define ADD2_FAST_MAX (COEFF_MAX / 4) #define ADD2_FAST_MIN (-ADD2_FAST_MAX) +static __inline__ void +_fmpz_set_fast(fmpz_t f, const fmpz_t g) +{ + if (!COEFF_IS_MPZ(*f) && !COEFF_IS_MPZ(*g)) + *f = *g; + else + fmpz_set(f, g); +} + +static __inline__ void +_fmpz_add_fast(fmpz_t z, const fmpz_t x, long c) +{ + fmpz ze, xe; + + ze = *z; + xe = *x; + + if (!COEFF_IS_MPZ(ze) && (xe > ADD2_FAST_MIN && xe < ADD2_FAST_MAX)) + *z = xe + c; + else + fmpz_add_si(z, x, c); +} + static __inline__ void _fmpz_add2_fast(fmpz_t z, const fmpz_t x, const fmpz_t y, long c) { @@ -799,6 +828,50 @@ int arf_mul_rnd_down(arf_ptr z, arf_srcptr x, arf_srcptr y, long prec); ? arf_mul_rnd_down(z, x, y, prec) \ : arf_mul_rnd_any(z, x, y, prec, rnd)) +#define ARF_ADD_STACK_ALLOC 40 +#define ARF_ADD_TLS_ALLOC 1000 + +extern TLS_PREFIX mp_ptr __arf_add_tmp; +extern TLS_PREFIX long __arf_add_alloc; + +extern void _arf_add_tmp_cleanup(void); + +#define ARF_ADD_TMP_DECL \ + mp_limb_t tmp_stack[ARF_ADD_STACK_ALLOC]; \ + +#define ARF_ADD_TMP_ALLOC(tmp, alloc) \ + if (alloc <= ARF_ADD_STACK_ALLOC) \ + { \ + tmp = tmp_stack; \ + } \ + else if (alloc <= ARF_ADD_TLS_ALLOC) \ + { \ + if (__arf_add_alloc < alloc) \ + { \ + if (__arf_add_alloc == 0) \ + { \ + flint_register_cleanup_function(_arf_add_tmp_cleanup); \ + } \ + __arf_add_tmp = flint_realloc(__arf_add_tmp, sizeof(mp_limb_t) * alloc); \ + __arf_add_alloc = alloc; \ + } \ + tmp = __arf_add_tmp; \ + } \ + else \ + { \ + tmp = flint_malloc(sizeof(mp_limb_t) * alloc); \ + } + +#define ARF_ADD_TMP_FREE(tmp, alloc) \ + if (alloc > ARF_ADD_TLS_ALLOC) \ + flint_free(tmp); + +int _arf_add_mpn(arf_t z, mp_srcptr xp, mp_size_t xn, int xsgnbit, + const fmpz_t xexp, mp_srcptr yp, mp_size_t yn, int ysgnbit, + mp_bitcnt_t shift, long prec, arf_rnd_t rnd); + +int arf_add(arf_ptr z, arf_srcptr x, arf_srcptr y, long prec, arf_rnd_t rnd); + #ifdef __cplusplus } #endif diff --git a/arf/add.c b/arf/add.c new file mode 100644 index 00000000..04fec923 --- /dev/null +++ b/arf/add.c @@ -0,0 +1,91 @@ +/*============================================================================= + + This file is part of ARB. + + ARB is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + ARB is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with ARB; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + +=============================================================================*/ +/****************************************************************************** + + Copyright (C) 2014 Fredrik Johansson + +******************************************************************************/ + +#include "arf.h" + +int +arf_add_special(arf_t z, const arf_t x, const arf_t y, long prec, arf_rnd_t rnd) +{ + if (arf_is_zero(x)) + { + if (arf_is_zero(y)) + { + arf_zero(z); + return 0; + } + else + return arf_set_round(z, y, prec, rnd); + } + else if (arf_is_zero(y)) + { + return arf_set_round(z, x, prec, rnd); + } + else if (arf_is_nan(x) || arf_is_nan(y) + || (arf_is_pos_inf(x) && arf_is_neg_inf(y)) + || (arf_is_neg_inf(x) && arf_is_pos_inf(y))) + { + arf_nan(z); + return 0; + } + else if (arf_is_special(x)) + { + arf_set(z, x); + return 0; + } + else + { + arf_set(z, y); + return 0; + } +} + +int +arf_add(arf_ptr z, arf_srcptr x, arf_srcptr y, long prec, arf_rnd_t rnd) +{ + mp_size_t xn, yn; + mp_srcptr xptr, yptr; + long shift; + + if (arf_is_special(x) || arf_is_special(y)) + { + return arf_add_special(z, x, y, prec, rnd); + } + + shift = _fmpz_sub_small(ARF_EXPREF(x), ARF_EXPREF(y)); + + if (shift < 0) + { + arf_srcptr __t; + __t = x; x = y; y = __t; + shift = -shift; + } + + ARF_GET_MPN_READONLY(xptr, xn, x); + ARF_GET_MPN_READONLY(yptr, yn, y); + + return _arf_add_mpn(z, xptr, xn, ARF_SGNBIT(x), ARF_EXPREF(x), + yptr, yn, ARF_SGNBIT(y), shift, prec, rnd); +} + diff --git a/arf/add_mpn.c b/arf/add_mpn.c new file mode 100644 index 00000000..5f523665 --- /dev/null +++ b/arf/add_mpn.c @@ -0,0 +1,248 @@ +/*============================================================================= + + This file is part of ARB. + + ARB is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + ARB is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with ARB; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + +=============================================================================*/ +/****************************************************************************** + + Copyright (C) 2014 Fredrik Johansson + +******************************************************************************/ + +#include "arf.h" + +TLS_PREFIX mp_ptr __arf_add_tmp = NULL; +TLS_PREFIX long __arf_add_alloc = 0; + +void _arf_add_tmp_cleanup(void) +{ + flint_free(__arf_add_tmp); + __arf_add_tmp = NULL; + __arf_add_alloc = 0; +} + +/* Assumptions: top limbs of x and y nonzero. */ +int +_arf_add_mpn(arf_t z, mp_srcptr xp, mp_size_t xn, int xsgnbit, const fmpz_t xexp, + mp_srcptr yp, mp_size_t yn, int ysgnbit, mp_bitcnt_t shift, + long prec, arf_rnd_t rnd) +{ + mp_size_t wn, zn, zn_original, alloc, xbase, wbase; + mp_size_t shift_limbs; + mp_bitcnt_t shift_bits; + int inexact; + long fix; + mp_limb_t cy; + mp_ptr tmp; + ARF_ADD_TMP_DECL + + /* x +/- eps */ + if (shift > prec + 1 && + prec != ARF_PREC_EXACT && + shift > (xn + 1) * FLINT_BITS) + { + zn = (prec + FLINT_BITS - 1) / FLINT_BITS; + zn_original = zn = FLINT_MAX(zn, xn) + 2; + shift_limbs = zn - xn; + alloc = zn + 1; + + ARF_ADD_TMP_ALLOC(tmp, alloc) + + flint_mpn_zero(tmp, shift_limbs); + flint_mpn_copyi(tmp + shift_limbs, xp, xn); + + if (xsgnbit == ysgnbit) + { + tmp[0] = 1; + } + else + { + mpn_sub_1(tmp, tmp, zn, 1); + while (tmp[zn-1] == 0) + zn--; + } + + _arf_set_round_mpn(z, &fix, tmp, zn, xsgnbit, prec, rnd); + fix += (zn - zn_original) * FLINT_BITS; + _fmpz_add_fast(ARF_EXPREF(z), xexp, fix); + + ARF_ADD_TMP_FREE(tmp, alloc) + return 1; + } + + shift_limbs = shift / FLINT_BITS; + shift_bits = shift % FLINT_BITS; + + /* TODO: shift == 0 optimization! (common case) */ + + /* w = y shifted */ + wn = yn + (shift_bits != 0); + + /* Size of sum (possibly one more limb than this for carry). */ + zn_original = zn = FLINT_MAX(xn, wn + shift_limbs); + alloc = zn + 1; + + ARF_ADD_TMP_ALLOC(tmp, alloc) + + /* Low limbs of both operands */ + xbase = zn - xn; + wbase = zn - shift_limbs - wn; + + /* Shift y to its correct offset in the sum. */ + if (shift_bits == 0) + flint_mpn_copyi(tmp + wbase, yp, yn); + else + tmp[wbase] = cy = mpn_rshift(tmp + wbase + 1, yp, yn, shift_bits); + + if (xsgnbit == ysgnbit) + { + if (shift_limbs >= xn) + { + /* XXXXXX + WWWWWWW */ + flint_mpn_zero(tmp + wn, zn - xn - wn); + flint_mpn_copyi(tmp + zn - xn, xp, xn); + cy = 0; + } + else if (shift_limbs == 0) + { + /* XXXXXX or XXXXX + WWWW WWWWWWW */ + if (wn < xn) + { + flint_mpn_copyi(tmp, xp, xn - wn); + cy = mpn_add_n(tmp + wbase, xp + wbase, tmp + wbase, wn); + } + else + { + cy = mpn_add_n(tmp + xbase, xp, tmp + xbase, xn); + } + } + else if (xbase >= wbase) + { + /* XXXXXX or XXXXXX + WWWWWWW WWWW */ + cy = mpn_add_n(tmp + xbase, tmp + xbase, xp, wn - xbase); + cy = mpn_add_1(tmp + wn, xp + wn - xbase, zn - wn, cy); + } + else + { + /* XXXXXXXX todo: merge with above? + WWW */ + flint_mpn_copyi(tmp, xp, wbase); + cy = mpn_add_n(tmp + wbase, tmp + wbase, xp + wbase, wn); + cy = mpn_add_1(tmp + zn - shift_limbs, xp + xn - shift_limbs, shift_limbs, cy); + } + + /* There could be a carry. */ + tmp[zn] = cy; + zn += cy; + } + else + { + if (shift_limbs >= xn) + { + /* XXXXXX + WWWWWWW */ + mpn_neg(tmp, tmp, wn); + flint_mpn_store(tmp + wn, zn - xn - wn, -LIMB_ONE); + mpn_sub_1(tmp + zn - xn, xp, xn, 1); + } + else if (shift_limbs == 0) + { + /* XXXXXX (1) or XXXXX (2) + WWWW WWWWWWW */ + if (wn <= xn) /* (1) */ + { + if (mpn_cmp(xp + wbase, tmp + wbase, wn) >= 0) + { + flint_mpn_copyi(tmp, xp, wbase); + mpn_sub_n(tmp + wbase, xp + wbase, tmp + wbase, wn); + } + else + { + xsgnbit = ysgnbit; + mpn_sub_n(tmp + wbase, tmp + wbase, xp + wbase, wn); + if (wbase != 0) + { + cy = mpn_neg_n(tmp, xp, wbase); + mpn_sub_1(tmp + wbase, tmp + wbase, wn, cy); + } + } + } + else /* (2) */ + { + if (mpn_cmp(tmp + xbase, xp, xn) >= 0) + { + xsgnbit = ysgnbit; + mpn_sub_n(tmp + xbase, tmp + xbase, xp, xn); + } + else + { + cy = mpn_neg(tmp, tmp, xbase); + mpn_sub_n(tmp + xbase, xp, tmp + xbase, xn); + mpn_sub_1(tmp + xbase, tmp + xbase, xn, cy); + } + } + } + else if (xbase >= wbase) + { + /* XXXXXX or XXXXXX + WWWWWWW WWWW */ + if (xbase > 0) + { + cy = mpn_sub_n(tmp + xbase, xp, tmp + xbase, wn - xbase); + cy = mpn_sub_1(tmp + wn, xp + xn - shift_limbs, shift_limbs, cy); + cy = mpn_neg(tmp, tmp, xbase); + mpn_sub_1(tmp + xbase, tmp + xbase, xn, cy); + } + else + { + cy = mpn_sub_n(tmp, xp, tmp, wn); + cy = mpn_sub_1(tmp + wn, xp + wn, xn - wn, cy); + } + } + else + { + /* XXXXXXXX + WWW */ + flint_mpn_copyi(tmp, xp, wbase); + cy = mpn_sub_n(tmp + wbase, xp + wbase, tmp + wbase, wn); + mpn_sub_1(tmp + zn - shift_limbs, xp + xn - shift_limbs, shift_limbs, cy); + } + + /* There could be cancellation. */ + while (zn > 0 && tmp[zn - 1] == 0) + zn--; + } + + if (zn == 0) + { + arf_zero(z); + inexact = 0; + } + else + { + inexact = _arf_set_round_mpn(z, &fix, tmp, zn, xsgnbit, prec, rnd); + fix += (zn - zn_original) * FLINT_BITS; + _fmpz_add_fast(ARF_EXPREF(z), xexp, fix); + } + + ARF_ADD_TMP_FREE(tmp, alloc) + return inexact; +} + diff --git a/arf/mul_rnd_any.c b/arf/mul_rnd_any.c index 9a34639f..08b32d19 100644 --- a/arf/mul_rnd_any.c +++ b/arf/mul_rnd_any.c @@ -40,6 +40,7 @@ arf_mul_rnd_any(arf_ptr z, arf_srcptr x, arf_srcptr y, long prec, arf_rnd_t rnd) { mp_size_t xn, yn; + long fix; int sgnbit, inexact; xn = ARF_XSIZE(x); @@ -90,8 +91,8 @@ arf_mul_rnd_any(arf_ptr z, arf_srcptr x, arf_srcptr y, mpn_mul(tmp, xptr, xn, yptr, yn); } - _fmpz_add2_fast(ARF_EXPREF(z), ARF_EXPREF(x), ARF_EXPREF(y), 0); - inexact = arf_set_round_mpn(z, tmp, zn, sgnbit, ARF_EXPREF(z), prec, rnd); + inexact = _arf_set_round_mpn(z, &fix, tmp, zn, sgnbit, prec, rnd); + _fmpz_add2_fast(ARF_EXPREF(z), ARF_EXPREF(x), ARF_EXPREF(y), fix); ARF_MUL_TMP_FREE(tmp, alloc) return inexact; diff --git a/arf/mul_rnd_down.c b/arf/mul_rnd_down.c index c8f031df..0fe7b8b4 100644 --- a/arf/mul_rnd_down.c +++ b/arf/mul_rnd_down.c @@ -30,6 +30,7 @@ arf_mul_rnd_down(arf_ptr z, arf_srcptr x, arf_srcptr y, long prec) { mp_size_t xn, yn, zn; mp_limb_t hi, lo; + long expfix; int sgnbit, ret, fix; mp_ptr zptr; @@ -184,8 +185,8 @@ arf_mul_rnd_down(arf_ptr z, arf_srcptr x, arf_srcptr y, long prec) } zn = xn + yn; - _fmpz_add2_fast(ARF_EXPREF(z), ARF_EXPREF(x), ARF_EXPREF(y), 0); - ret = arf_set_round_mpn(z, zz, zn, sgnbit, ARF_EXPREF(z), prec, ARF_RND_DOWN); + ret = _arf_set_round_mpn(z, &expfix, zz, zn, sgnbit, prec, ARF_RND_DOWN); + _fmpz_add2_fast(ARF_EXPREF(z), ARF_EXPREF(x), ARF_EXPREF(y), expfix); return ret; } else if (yn > MUL_MPFR_MIN_LIMBS && prec != ARF_PREC_EXACT @@ -223,8 +224,8 @@ arf_mul_rnd_down(arf_ptr z, arf_srcptr x, arf_srcptr y, long prec) mpn_mul(tmp, xptr, xn, yptr, yn); } - _fmpz_add2_fast(ARF_EXPREF(z), ARF_EXPREF(x), ARF_EXPREF(y), 0); - ret = arf_set_round_mpn(z, tmp, zn, sgnbit, ARF_EXPREF(z), prec, ARF_RND_DOWN); + ret = _arf_set_round_mpn(z, &expfix, tmp, zn, sgnbit, prec, ARF_RND_DOWN); + _fmpz_add2_fast(ARF_EXPREF(z), ARF_EXPREF(x), ARF_EXPREF(y), expfix); ARF_MUL_TMP_FREE(tmp, alloc) return ret; diff --git a/arf/neg_round.c b/arf/neg_round.c index 55884a71..2dac26d1 100644 --- a/arf/neg_round.c +++ b/arf/neg_round.c @@ -50,10 +50,14 @@ arf_neg_round(arf_t y, const arf_t x, long prec, arf_rnd_t rnd) { mp_srcptr xptr; mp_size_t xn; + int inexact; + long fix; ARF_GET_MPN_READONLY(xptr, xn, x); - return arf_set_round_mpn(y, xptr, xn, ARF_SGNBIT(x) ^ 1, - ARF_EXPREF(x), prec, rnd); + inexact = _arf_set_round_mpn(y, &fix, xptr, xn, + ARF_SGNBIT(x) ^ 1, prec, rnd); + _fmpz_add_fast(ARF_EXPREF(y), ARF_EXPREF(x), fix); + return inexact; } } } diff --git a/arf/set_round.c b/arf/set_round.c index aa32b32c..4ded050d 100644 --- a/arf/set_round.c +++ b/arf/set_round.c @@ -50,10 +50,14 @@ arf_set_round(arf_t y, const arf_t x, long prec, arf_rnd_t rnd) { mp_srcptr xptr; mp_size_t xn; + long fix; + int inexact; ARF_GET_MPN_READONLY(xptr, xn, x); - return arf_set_round_mpn(y, xptr, xn, ARF_SGNBIT(x), - ARF_EXPREF(x), prec, rnd); + inexact = _arf_set_round_mpn(y, &fix, xptr, xn, + ARF_SGNBIT(x), prec, rnd); + _fmpz_add_fast(ARF_EXPREF(y), ARF_EXPREF(x), fix); + return inexact; } } } diff --git a/arf/set_round_mpn.c b/arf/set_round_mpn.c index 4f830ce2..c9425cfb 100644 --- a/arf/set_round_mpn.c +++ b/arf/set_round_mpn.c @@ -36,8 +36,8 @@ _arf_set_mpn_roundup_power_two(arf_t y, int sgnbit) } int -arf_set_round_mpn(arf_t y, mp_srcptr x, mp_size_t xn, - int sgnbit, const fmpz_t topexp, long prec, arf_rnd_t rnd) +_arf_set_round_mpn(arf_t y, long * exp_shift, mp_srcptr x, mp_size_t xn, + int sgnbit, long prec, arf_rnd_t rnd) { unsigned int leading; mp_bitcnt_t exp, bc, val, val_bits; @@ -51,10 +51,7 @@ arf_set_round_mpn(arf_t y, mp_srcptr x, mp_size_t xn, exp = xn * FLINT_BITS - leading; /* Set exponent. */ - if (topexp == NULL) - fmpz_set_ui(ARF_EXPREF(y), exp); - else - fmpz_sub_si_inline(ARF_EXPREF(y), topexp, leading); + *exp_shift = -(long) leading; /* Find first nonzero bit. */ val_limbs = 0; @@ -112,7 +109,10 @@ arf_set_round_mpn(arf_t y, mp_srcptr x, mp_size_t xn, /* Overflow to next power of two (unlikely). */ if (val == exp) { - _arf_set_mpn_roundup_power_two(y, sgnbit); + exp_shift[0]++; + ARF_DEMOTE(y); + ARF_NOPTR_D(y)[0] = LIMB_TOP; + ARF_XSIZE(y) = ARF_MAKE_XSIZE(1, sgnbit); return 1; } } diff --git a/arf/set_round_ui.c b/arf/set_round_ui.c index 99a22c4d..af40cb85 100644 --- a/arf/set_round_ui.c +++ b/arf/set_round_ui.c @@ -25,6 +25,35 @@ #include "arf.h" +/* Top-aligns the nonzero limb v and rounds it to prec bit. */ +#define ARF_NORMALISE_ROUND_LIMB(inexact, exp, v, \ + sgnbit, prec, rnd) \ + do { \ + count_leading_zeros(exp, v); \ + v <<= exp; \ + exp = FLINT_BITS - exp; \ + if (prec >= exp) \ + { \ + inexact = 0; \ + } \ + else \ + { \ + mp_limb_t __t = v; \ + v = MASK_LIMB(v, FLINT_BITS - prec); \ + inexact = (__t != v); \ + if (inexact && arf_rounds_up(rnd, sgnbit)) \ + { \ + v += (LIMB_ONE << (FLINT_BITS - prec)); \ + if (v == 0) \ + { \ + v = LIMB_TOP; \ + exp++; \ + } \ + } \ + } \ + } \ + while (0) + int _arf_set_round_ui(arf_t x, ulong v, int sgnbit, long prec, arf_rnd_t rnd) { @@ -35,40 +64,12 @@ _arf_set_round_ui(arf_t x, ulong v, int sgnbit, long prec, arf_rnd_t rnd) } else { - unsigned int bc, mask_bits; - int inexact; - mp_limb_t t; + int exp, inexact; - count_leading_zeros(bc, v); - v <<= bc; - bc = FLINT_BITS - bc; + ARF_NORMALISE_ROUND_LIMB(inexact, exp, v, sgnbit, prec, rnd); - if (prec >= bc) - { - inexact = 0; - } - else - { - mask_bits = FLINT_BITS - prec; - - t = v; - v = (v >> mask_bits) << mask_bits; - inexact = (t != v); - - if (inexact && arf_rounds_up(rnd, sgnbit)) - { - v += (LIMB_ONE << (FLINT_BITS - prec)); - - /* Overflow to the next power of two (unlikely). */ - if (v == 0) - { - v = LIMB_TOP; - bc++; - } - } - } - - fmpz_set_ui(ARF_EXPREF(x), bc); + _fmpz_demote(ARF_EXPREF(x)); + ARF_EXP(x) = exp; ARF_DEMOTE(x); ARF_XSIZE(x) = ARF_MAKE_XSIZE(1, sgnbit); ARF_NOPTR_D(x)[0] = v; diff --git a/arf/test/t-add.c b/arf/test/t-add.c new file mode 100644 index 00000000..0cd0dcb2 --- /dev/null +++ b/arf/test/t-add.c @@ -0,0 +1,182 @@ +/*============================================================================= + + This file is part of ARB. + + ARB is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + ARB is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with ARB; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + +=============================================================================*/ +/****************************************************************************** + + Copyright (C) 2012 Fredrik Johansson + +******************************************************************************/ + +#include "arf.h" + +int +arf_add_naive(arf_t z, const arf_t x, const arf_t y, long prec, arf_rnd_t rnd) +{ + fmpr_t a, b; + long r; + + fmpr_init(a); + fmpr_init(b); + + arf_get_fmpr(a, x); + arf_get_fmpr(b, y); + + r = fmpr_add(a, a, b, prec, rnd); + + arf_set_fmpr(z, a); + + fmpr_clear(a); + fmpr_clear(b); + + return (r == FMPR_RESULT_EXACT) ? 0 : 1; +} + +int main() +{ + long iter, iter2; + flint_rand_t state; + + printf("add...."); + fflush(stdout); + + flint_randinit(state); + + for (iter = 0; iter < 10000; iter++) + { + arf_t x, y, z, v; + long prec, r1, r2; + arf_rnd_t rnd; + + arf_init(x); + arf_init(y); + arf_init(z); + arf_init(v); + + for (iter2 = 0; iter2 < 100; iter2++) + { + arf_randtest_special(x, state, 2000, 100); + arf_randtest_special(y, state, 2000, 100); + prec = 2 + n_randint(state, 2000); + + if (n_randint(state, 10) == 0 && + fmpz_bits(ARF_EXPREF(x)) < 10 && + fmpz_bits(ARF_EXPREF(y)) < 10) + { + prec = ARF_PREC_EXACT; + } + + switch (n_randint(state, 4)) + { + case 0: rnd = ARF_RND_DOWN; break; + case 1: rnd = ARF_RND_UP; break; + case 2: rnd = ARF_RND_FLOOR; break; + default: rnd = ARF_RND_CEIL; break; + } + + switch (n_randint(state, 5)) + { + case 0: + r1 = arf_add(z, x, y, prec, rnd); + r2 = arf_add_naive(v, x, y, prec, rnd); + if (!arf_equal(z, v) || r1 != r2) + { + printf("FAIL!\n"); + printf("prec = %ld, rnd = %d\n\n", prec, rnd); + printf("x = "); arf_print(x); printf("\n\n"); + printf("y = "); arf_print(y); printf("\n\n"); + printf("z = "); arf_print(z); printf("\n\n"); + printf("v = "); arf_print(v); printf("\n\n"); + printf("r1 = %ld, r2 = %ld\n", r1, r2); + abort(); + } + break; + + case 1: + r1 = arf_add(z, x, x, prec, rnd); + r2 = arf_add_naive(v, x, x, prec, rnd); + if (!arf_equal(z, v) || r1 != r2) + { + printf("FAIL (aliasing 1)!\n"); + printf("prec = %ld, rnd = %d\n\n", prec, rnd); + printf("x = "); arf_print(x); printf("\n\n"); + printf("z = "); arf_print(z); printf("\n\n"); + printf("v = "); arf_print(v); printf("\n\n"); + printf("r1 = %ld, r2 = %ld\n", r1, r2); + abort(); + } + break; + + case 2: + r2 = arf_add_naive(v, x, x, prec, rnd); + r1 = arf_add(x, x, x, prec, rnd); + if (!arf_equal(v, x) || r1 != r2) + { + printf("FAIL (aliasing 2)!\n"); + printf("prec = %ld, rnd = %d\n\n", prec, rnd); + printf("x = "); arf_print(x); printf("\n\n"); + printf("z = "); arf_print(z); printf("\n\n"); + printf("v = "); arf_print(v); printf("\n\n"); + printf("r1 = %ld, r2 = %ld\n", r1, r2); + abort(); + } + break; + + case 3: + r2 = arf_add_naive(v, x, y, prec, rnd); + r1 = arf_add(x, x, y, prec, rnd); + if (!arf_equal(x, v) || r1 != r2) + { + printf("FAIL (aliasing 3)!\n"); + printf("prec = %ld, rnd = %d\n\n", prec, rnd); + printf("x = "); arf_print(x); printf("\n\n"); + printf("y = "); arf_print(y); printf("\n\n"); + printf("v = "); arf_print(v); printf("\n\n"); + printf("r1 = %ld, r2 = %ld\n", r1, r2); + abort(); + } + break; + + default: + r2 = arf_add_naive(v, x, y, prec, rnd); + r1 = arf_add(x, y, x, prec, rnd); + if (!arf_equal(x, v) || r1 != r2) + { + printf("FAIL (aliasing 4)!\n"); + printf("prec = %ld, rnd = %d\n\n", prec, rnd); + printf("x = "); arf_print(x); printf("\n\n"); + printf("y = "); arf_print(y); printf("\n\n"); + printf("v = "); arf_print(v); printf("\n\n"); + printf("r1 = %ld, r2 = %ld\n", r1, r2); + abort(); + } + break; + } + } + + arf_clear(x); + arf_clear(y); + arf_clear(z); + arf_clear(v); + } + + flint_randclear(state); + flint_cleanup(); + printf("PASS\n"); + return EXIT_SUCCESS; +} diff --git a/fmpz_extras.h b/fmpz_extras.h index 140ee312..5ca26651 100644 --- a/fmpz_extras.h +++ b/fmpz_extras.h @@ -200,25 +200,10 @@ static __inline__ void fmpz_sub_mul2exp(fmpz_t z, const fmpz_t x, const fmpz_t y fmpz_clear(t); } -static long _fmpz_sub_small_large(const fmpz_t x, const fmpz_t y) -{ - fmpz_t t; - fmpz_init(t); - fmpz_sub(t, x, y); - if (!COEFF_IS_MPZ(*t)) - { - /* no need to free t */ - return *t; - } - else - { - int sign = fmpz_sgn(t); - fmpz_clear(t); - return (sign > 0) ? LONG_MAX : -LONG_MAX; - } -} +long _fmpz_sub_small_large(const fmpz_t x, const fmpz_t y); -static __inline__ long _fmpz_sub_small(const fmpz_t x, const fmpz_t y) +static __inline__ long +_fmpz_sub_small(const fmpz_t x, const fmpz_t y) { if (!COEFF_IS_MPZ(*x) && !COEFF_IS_MPZ(*y)) { diff --git a/fmpz_extras.h.save b/fmpz_extras.h.save new file mode 100644 index 00000000..d4a98b9e --- /dev/null +++ b/fmpz_extras.h.save @@ -0,0 +1,299 @@ +/*============================================================================= + + This file is part of ARB. + + ARB is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + ARB is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with ARB; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + +=============================================================================*/ +/****************************************************************************** + + Copyright (C) 2013 Fredrik Johansson + +******************************************************************************/ + +#ifndef FMPZ_EXTRAS_H +#define FMPZ_EXTRAS_H + +#include +#include "flint.h" +#include "fmpz.h" + +#ifdef __cplusplus +extern "C" { +#endif + +static __inline__ void +fmpz_add_inline(fmpz_t z, const fmpz_t x, const fmpz_t y) +{ + fmpz f, g; + + f = *x; + g = *y; + + if (!COEFF_IS_MPZ(f) && !COEFF_IS_MPZ(g)) + fmpz_set_si(z, f + g); + else + fmpz_add(z, x, y); +} + +static __inline__ void +fmpz_add_si(fmpz_t z, const fmpz_t x, long y) +{ + if (y >= 0) + fmpz_add_ui(z, x, y); + else + fmpz_sub_ui(z, x, -y); +} + +static __inline__ void +fmpz_sub_si(fmpz_t z, const fmpz_t x, long y) +{ + if (y >= 0) + fmpz_sub_ui(z, x, y); + else + fmpz_add_ui(z, x, -y); +} + +static __inline__ void +fmpz_add_si_inline(fmpz_t z, const fmpz_t x, long y) +{ + fmpz f; + + f = *x; + + if (!COEFF_IS_MPZ(f) && (COEFF_MIN <= y && y <= COEFF_MAX)) + fmpz_set_si(z, f + y); + else + fmpz_add_si(z, x, y); +} + +static __inline__ void +fmpz_sub_si_inline(fmpz_t z, const fmpz_t x, long y) +{ + fmpz f; + + f = *x; + + if (!COEFF_IS_MPZ(f) && (COEFF_MIN <= y && y <= COEFF_MAX)) + fmpz_set_si(z, f - y); + else + fmpz_sub_si(z, x, y); +} + +static __inline__ void +fmpz_add_ui_inline(fmpz_t z, const fmpz_t x, ulong y) +{ + fmpz f = *x; + + if (!COEFF_IS_MPZ(f) && y <= COEFF_MAX) + fmpz_set_si(z, f + y); + else + fmpz_add_ui(z, x, y); + +#if} + +static __inline__ void +fmpz_add2_fmpz_si_inline(fmpz_t z, const fmpz_t x, const fmpz_t y, long c) +{ + fmpz f, g, h; + + f = *x; + g = *y; + + if (!COEFF_IS_MPZ(f) && !COEFF_IS_MPZ(g)) + { + h = f + g; + + if ((COEFF_MIN <= h && h <= COEFF_MAX) && + (COEFF_MIN <= c && c <= COEFF_MAX)) + { + fmpz_set_si(z, h + c); + return; + } + } + + fmpz_add(z, x, y); + fmpz_add_si(z, z, c); +} + +/* sets z = +/- ({src, n} >> shift) where 0 <= shift < FLINT_BITS + and the top limb of src is nonzero */ +/* TODO: optimize for result = 1 limb */ + +static __inline__ void +fmpz_set_mpn_rshift(fmpz_t z, mp_srcptr src, mp_size_t n, unsigned int shift, int negative) +{ + __mpz_struct * zptr; + zptr = _fmpz_promote(z); + + if (zptr->_mp_alloc < n) + mpz_realloc2(zptr, n * FLINT_BITS); + + if (shift == 0) + { + flint_mpn_copyi(zptr->_mp_d, src, n); + } + else + { + mpn_rshift(zptr->_mp_d, src, n, shift); + while (zptr->_mp_d[n - 1] == 0) /* todo: can only do one iter? */ + n--; + } + + zptr->_mp_size = negative ? -n : n; + _fmpz_demote_val(z); +} + +/* sets z = +/- {src, n} where n >= 2 and the top limb of src is nonzero */ +static __inline__ void +fmpz_set_mpn_large(fmpz_t z, mp_srcptr src, mp_size_t n, int negative) +{ + __mpz_struct * zz; + zz = _fmpz_promote(z); + + if (zz->_mp_alloc < n) + mpz_realloc2(zz, n * FLINT_BITS); + + flint_mpn_copyi(zz->_mp_d, src, n); + zz->_mp_size = negative ? -n : n; +} + +/* round away from zero */ +static __inline__ void +fmpz_adiv_q_2exp(fmpz_t z, const fmpz_t x, mp_bitcnt_t exp) +{ + int sign = fmpz_sgn(x); + + if (sign > 0) + fmpz_cdiv_q_2exp(z, x, exp); + else + fmpz_fdiv_q_2exp(z, x, exp); +} + +/* sets z = x + y*2^shift */ +static __inline__ void fmpz_add_mul2exp(fmpz_t z, const fmpz_t x, const fmpz_t y, ulong shift) +{ + fmpz_t t; + fmpz_init(t); + fmpz_mul_2exp(t, y, shift); + fmpz_add(z, x, t); + fmpz_clear(t); +} + +static __inline__ void fmpz_sub_mul2exp(fmpz_t z, const fmpz_t x, const fmpz_t y, ulong shift) +{ + fmpz_t t; + fmpz_init(t); + fmpz_mul_2exp(t, y, shift); + fmpz_sub(z, x, t); + fmpz_clear(t); +} + +static long _fmpz_sub_small_large(const fmpz_t x, const fmpz_t y) +{ + fmpz_t t; + fmpz_init(t); + fmpz_sub(t, x, y); + if (!COEFF_IS_MPZ(*t)) + { + /* no need to free t */ + return *t; + } + else + { + int sign = fmpz_sgn(t); + fmpz_clear(t); + return (sign > 0) ? LONG_MAX : -LONG_MAX; + } +} + +static __inline__ long _fmpz_sub_small(const fmpz_t x, const fmpz_t y) +{ + if (!COEFF_IS_MPZ(*x) && !COEFF_IS_MPZ(*y)) + { + return (*x) - (*y); + } + else + { + return _fmpz_sub_small_large(x, y); + } +} + +static __inline__ mp_size_t +_fmpz_size(const fmpz_t f) +{ + fmpz d = *f; + + if (!COEFF_IS_MPZ(d)) + return 1; + else + return mpz_size(COEFF_TO_PTR(d)); +} + +static __inline__ void +fmpz_ui_pow_ui(fmpz_t x, ulong b, ulong e) +{ + if (e <= 1) + { + fmpz_set_ui(x, e == 0 ? 1UL : b); + } + else + { + fmpz_set_ui(x, b); + fmpz_pow_ui(x, x, e); + } +} + +static __inline__ void +fmpz_max(fmpz_t z, const fmpz_t x, const fmpz_t y) +{ + if (fmpz_cmp(x, y) >= 0) + fmpz_set(z, x); + else + fmpz_set(z, y); +} + +static __inline__ void +fmpz_min(fmpz_t z, const fmpz_t x, const fmpz_t y) +{ + if (fmpz_cmp(x, y) < 0) + fmpz_set(z, x); + else + fmpz_set(z, y); +} + +#define FMPZ_GET_MPN_READONLY(zsign, zn, zptr, ztmp, zv) \ + if (!COEFF_IS_MPZ(zv)) \ + { \ + (zsign) = (zv) < 0; \ + (ztmp) = FLINT_ABS(zv); \ + (zptr) = &(ztmp); \ + (zn) = 1; \ + } \ + else \ + { \ + __mpz_struct * ___zz = COEFF_TO_PTR(zv); \ + (zptr) = ___zz->_mp_d; \ + (zn) = ___zz->_mp_size; \ + (zsign) = (zn) < 0; \ + (zn) = FLINT_ABS(zn); \ + } + +#ifdef __cplusplus +} +#endif + +#endif + diff --git a/fmpz_extras/sub_small_large.c b/fmpz_extras/sub_small_large.c new file mode 100644 index 00000000..e59769df --- /dev/null +++ b/fmpz_extras/sub_small_large.c @@ -0,0 +1,45 @@ +/*============================================================================= + + This file is part of ARB. + + ARB is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + ARB is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with ARB; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + +=============================================================================*/ +/****************************************************************************** + + Copyright (C) 2014 Fredrik Johansson + +******************************************************************************/ + +#include "fmpz_extras.h" + +long +_fmpz_sub_small_large(const fmpz_t x, const fmpz_t y) +{ + fmpz_t t; + fmpz_init(t); + fmpz_sub(t, x, y); + if (!COEFF_IS_MPZ(*t)) + { + /* no need to free t */ + return *t; + } + else + { + int sign = fmpz_sgn(t); + fmpz_clear(t); + return (sign > 0) ? LONG_MAX : -LONG_MAX; + } +}