preliminary addition code; some other code adjustments

This commit is contained in:
Fredrik Johansson 2014-05-02 15:09:11 +02:00
parent 0c4dcd412f
commit 81055070cf
13 changed files with 1006 additions and 72 deletions

83
arf.h
View file

@ -104,7 +104,7 @@ arf_rnd_to_mpfr(arf_rnd_t rnd)
#define ARF_IS_LAGOM(x) (ARF_EXP(x) >= ARF_MIN_LAGOM_EXP && \ #define ARF_IS_LAGOM(x) (ARF_EXP(x) >= ARF_MIN_LAGOM_EXP && \
ARF_EXP(x) <= ARF_MAX_LAGOM_EXP) ARF_EXP(x) <= ARF_MAX_LAGOM_EXP)
/* At least two limbs (needs pointer). */ /* More than two limbs (needs pointer). */
#define ARF_HAS_PTR(x) ((x)->size > ((2 << 1) + 1)) #define ARF_HAS_PTR(x) ((x)->size > ((2 << 1) + 1))
/* Raw size field (encodes both limb size and sign). */ /* Raw size field (encodes both limb size and sign). */
@ -466,8 +466,9 @@ int _arf_set_round_ui(arf_t x, ulong v, int sgnbit, long prec, arf_rnd_t rnd);
/* Assumes xn > 0, x[0] != 0. */ /* Assumes xn > 0, x[0] != 0. */
/* TBD: 1, 2 limb versions, top-aligned version */ /* TBD: 1, 2 limb versions, top-aligned version */
int arf_set_round_mpn(arf_t y, mp_srcptr x, mp_size_t xn, int
int sgnbit, const fmpz_t topexp, long prec, arf_rnd_t rnd); _arf_set_round_mpn(arf_t y, long * exp_shift, mp_srcptr x, mp_size_t xn,
int sgnbit, long prec, arf_rnd_t rnd);
static __inline__ int static __inline__ int
arf_set_round_ui(arf_t x, ulong v, long prec, arf_rnd_t rnd) arf_set_round_ui(arf_t x, ulong v, long prec, arf_rnd_t rnd)
@ -484,7 +485,9 @@ arf_set_round_si(arf_t x, long v, long prec, arf_rnd_t rnd)
static __inline__ int static __inline__ int
arf_set_round_mpz(arf_t y, const mpz_t x, long prec, arf_rnd_t rnd) arf_set_round_mpz(arf_t y, const mpz_t x, long prec, arf_rnd_t rnd)
{ {
int inexact;
long size = x->_mp_size; long size = x->_mp_size;
long fix;
if (size == 0) if (size == 0)
{ {
@ -492,8 +495,11 @@ arf_set_round_mpz(arf_t y, const mpz_t x, long prec, arf_rnd_t rnd)
return 0; return 0;
} }
return arf_set_round_mpn(y, x->_mp_d, FLINT_ABS(size), inexact = _arf_set_round_mpn(y, &fix, x->_mp_d, FLINT_ABS(size),
(size < 0), NULL, prec, rnd); (size < 0), prec, rnd);
_fmpz_demote(ARF_EXPREF(y));
ARF_EXP(y) = FLINT_ABS(size) * FLINT_BITS + fix;
return inexact;
} }
static __inline__ int static __inline__ int
@ -703,6 +709,29 @@ void arf_randtest_special(arf_t x, flint_rand_t state, long bits, long mag_bits)
#define ADD2_FAST_MAX (COEFF_MAX / 4) #define ADD2_FAST_MAX (COEFF_MAX / 4)
#define ADD2_FAST_MIN (-ADD2_FAST_MAX) #define ADD2_FAST_MIN (-ADD2_FAST_MAX)
static __inline__ void
_fmpz_set_fast(fmpz_t f, const fmpz_t g)
{
if (!COEFF_IS_MPZ(*f) && !COEFF_IS_MPZ(*g))
*f = *g;
else
fmpz_set(f, g);
}
static __inline__ void
_fmpz_add_fast(fmpz_t z, const fmpz_t x, long c)
{
fmpz ze, xe;
ze = *z;
xe = *x;
if (!COEFF_IS_MPZ(ze) && (xe > ADD2_FAST_MIN && xe < ADD2_FAST_MAX))
*z = xe + c;
else
fmpz_add_si(z, x, c);
}
static __inline__ void static __inline__ void
_fmpz_add2_fast(fmpz_t z, const fmpz_t x, const fmpz_t y, long c) _fmpz_add2_fast(fmpz_t z, const fmpz_t x, const fmpz_t y, long c)
{ {
@ -799,6 +828,50 @@ int arf_mul_rnd_down(arf_ptr z, arf_srcptr x, arf_srcptr y, long prec);
? arf_mul_rnd_down(z, x, y, prec) \ ? arf_mul_rnd_down(z, x, y, prec) \
: arf_mul_rnd_any(z, x, y, prec, rnd)) : arf_mul_rnd_any(z, x, y, prec, rnd))
#define ARF_ADD_STACK_ALLOC 40
#define ARF_ADD_TLS_ALLOC 1000
extern TLS_PREFIX mp_ptr __arf_add_tmp;
extern TLS_PREFIX long __arf_add_alloc;
extern void _arf_add_tmp_cleanup(void);
#define ARF_ADD_TMP_DECL \
mp_limb_t tmp_stack[ARF_ADD_STACK_ALLOC]; \
#define ARF_ADD_TMP_ALLOC(tmp, alloc) \
if (alloc <= ARF_ADD_STACK_ALLOC) \
{ \
tmp = tmp_stack; \
} \
else if (alloc <= ARF_ADD_TLS_ALLOC) \
{ \
if (__arf_add_alloc < alloc) \
{ \
if (__arf_add_alloc == 0) \
{ \
flint_register_cleanup_function(_arf_add_tmp_cleanup); \
} \
__arf_add_tmp = flint_realloc(__arf_add_tmp, sizeof(mp_limb_t) * alloc); \
__arf_add_alloc = alloc; \
} \
tmp = __arf_add_tmp; \
} \
else \
{ \
tmp = flint_malloc(sizeof(mp_limb_t) * alloc); \
}
#define ARF_ADD_TMP_FREE(tmp, alloc) \
if (alloc > ARF_ADD_TLS_ALLOC) \
flint_free(tmp);
int _arf_add_mpn(arf_t z, mp_srcptr xp, mp_size_t xn, int xsgnbit,
const fmpz_t xexp, mp_srcptr yp, mp_size_t yn, int ysgnbit,
mp_bitcnt_t shift, long prec, arf_rnd_t rnd);
int arf_add(arf_ptr z, arf_srcptr x, arf_srcptr y, long prec, arf_rnd_t rnd);
#ifdef __cplusplus #ifdef __cplusplus
} }
#endif #endif

91
arf/add.c Normal file
View file

@ -0,0 +1,91 @@
/*=============================================================================
This file is part of ARB.
ARB is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
ARB is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with ARB; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
=============================================================================*/
/******************************************************************************
Copyright (C) 2014 Fredrik Johansson
******************************************************************************/
#include "arf.h"
int
arf_add_special(arf_t z, const arf_t x, const arf_t y, long prec, arf_rnd_t rnd)
{
if (arf_is_zero(x))
{
if (arf_is_zero(y))
{
arf_zero(z);
return 0;
}
else
return arf_set_round(z, y, prec, rnd);
}
else if (arf_is_zero(y))
{
return arf_set_round(z, x, prec, rnd);
}
else if (arf_is_nan(x) || arf_is_nan(y)
|| (arf_is_pos_inf(x) && arf_is_neg_inf(y))
|| (arf_is_neg_inf(x) && arf_is_pos_inf(y)))
{
arf_nan(z);
return 0;
}
else if (arf_is_special(x))
{
arf_set(z, x);
return 0;
}
else
{
arf_set(z, y);
return 0;
}
}
int
arf_add(arf_ptr z, arf_srcptr x, arf_srcptr y, long prec, arf_rnd_t rnd)
{
mp_size_t xn, yn;
mp_srcptr xptr, yptr;
long shift;
if (arf_is_special(x) || arf_is_special(y))
{
return arf_add_special(z, x, y, prec, rnd);
}
shift = _fmpz_sub_small(ARF_EXPREF(x), ARF_EXPREF(y));
if (shift < 0)
{
arf_srcptr __t;
__t = x; x = y; y = __t;
shift = -shift;
}
ARF_GET_MPN_READONLY(xptr, xn, x);
ARF_GET_MPN_READONLY(yptr, yn, y);
return _arf_add_mpn(z, xptr, xn, ARF_SGNBIT(x), ARF_EXPREF(x),
yptr, yn, ARF_SGNBIT(y), shift, prec, rnd);
}

248
arf/add_mpn.c Normal file
View file

@ -0,0 +1,248 @@
/*=============================================================================
This file is part of ARB.
ARB is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
ARB is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with ARB; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
=============================================================================*/
/******************************************************************************
Copyright (C) 2014 Fredrik Johansson
******************************************************************************/
#include "arf.h"
TLS_PREFIX mp_ptr __arf_add_tmp = NULL;
TLS_PREFIX long __arf_add_alloc = 0;
void _arf_add_tmp_cleanup(void)
{
flint_free(__arf_add_tmp);
__arf_add_tmp = NULL;
__arf_add_alloc = 0;
}
/* Assumptions: top limbs of x and y nonzero. */
int
_arf_add_mpn(arf_t z, mp_srcptr xp, mp_size_t xn, int xsgnbit, const fmpz_t xexp,
mp_srcptr yp, mp_size_t yn, int ysgnbit, mp_bitcnt_t shift,
long prec, arf_rnd_t rnd)
{
mp_size_t wn, zn, zn_original, alloc, xbase, wbase;
mp_size_t shift_limbs;
mp_bitcnt_t shift_bits;
int inexact;
long fix;
mp_limb_t cy;
mp_ptr tmp;
ARF_ADD_TMP_DECL
/* x +/- eps */
if (shift > prec + 1 &&
prec != ARF_PREC_EXACT &&
shift > (xn + 1) * FLINT_BITS)
{
zn = (prec + FLINT_BITS - 1) / FLINT_BITS;
zn_original = zn = FLINT_MAX(zn, xn) + 2;
shift_limbs = zn - xn;
alloc = zn + 1;
ARF_ADD_TMP_ALLOC(tmp, alloc)
flint_mpn_zero(tmp, shift_limbs);
flint_mpn_copyi(tmp + shift_limbs, xp, xn);
if (xsgnbit == ysgnbit)
{
tmp[0] = 1;
}
else
{
mpn_sub_1(tmp, tmp, zn, 1);
while (tmp[zn-1] == 0)
zn--;
}
_arf_set_round_mpn(z, &fix, tmp, zn, xsgnbit, prec, rnd);
fix += (zn - zn_original) * FLINT_BITS;
_fmpz_add_fast(ARF_EXPREF(z), xexp, fix);
ARF_ADD_TMP_FREE(tmp, alloc)
return 1;
}
shift_limbs = shift / FLINT_BITS;
shift_bits = shift % FLINT_BITS;
/* TODO: shift == 0 optimization! (common case) */
/* w = y shifted */
wn = yn + (shift_bits != 0);
/* Size of sum (possibly one more limb than this for carry). */
zn_original = zn = FLINT_MAX(xn, wn + shift_limbs);
alloc = zn + 1;
ARF_ADD_TMP_ALLOC(tmp, alloc)
/* Low limbs of both operands */
xbase = zn - xn;
wbase = zn - shift_limbs - wn;
/* Shift y to its correct offset in the sum. */
if (shift_bits == 0)
flint_mpn_copyi(tmp + wbase, yp, yn);
else
tmp[wbase] = cy = mpn_rshift(tmp + wbase + 1, yp, yn, shift_bits);
if (xsgnbit == ysgnbit)
{
if (shift_limbs >= xn)
{
/* XXXXXX
WWWWWWW */
flint_mpn_zero(tmp + wn, zn - xn - wn);
flint_mpn_copyi(tmp + zn - xn, xp, xn);
cy = 0;
}
else if (shift_limbs == 0)
{
/* XXXXXX or XXXXX
WWWW WWWWWWW */
if (wn < xn)
{
flint_mpn_copyi(tmp, xp, xn - wn);
cy = mpn_add_n(tmp + wbase, xp + wbase, tmp + wbase, wn);
}
else
{
cy = mpn_add_n(tmp + xbase, xp, tmp + xbase, xn);
}
}
else if (xbase >= wbase)
{
/* XXXXXX or XXXXXX
WWWWWWW WWWW */
cy = mpn_add_n(tmp + xbase, tmp + xbase, xp, wn - xbase);
cy = mpn_add_1(tmp + wn, xp + wn - xbase, zn - wn, cy);
}
else
{
/* XXXXXXXX todo: merge with above?
WWW */
flint_mpn_copyi(tmp, xp, wbase);
cy = mpn_add_n(tmp + wbase, tmp + wbase, xp + wbase, wn);
cy = mpn_add_1(tmp + zn - shift_limbs, xp + xn - shift_limbs, shift_limbs, cy);
}
/* There could be a carry. */
tmp[zn] = cy;
zn += cy;
}
else
{
if (shift_limbs >= xn)
{
/* XXXXXX
WWWWWWW */
mpn_neg(tmp, tmp, wn);
flint_mpn_store(tmp + wn, zn - xn - wn, -LIMB_ONE);
mpn_sub_1(tmp + zn - xn, xp, xn, 1);
}
else if (shift_limbs == 0)
{
/* XXXXXX (1) or XXXXX (2)
WWWW WWWWWWW */
if (wn <= xn) /* (1) */
{
if (mpn_cmp(xp + wbase, tmp + wbase, wn) >= 0)
{
flint_mpn_copyi(tmp, xp, wbase);
mpn_sub_n(tmp + wbase, xp + wbase, tmp + wbase, wn);
}
else
{
xsgnbit = ysgnbit;
mpn_sub_n(tmp + wbase, tmp + wbase, xp + wbase, wn);
if (wbase != 0)
{
cy = mpn_neg_n(tmp, xp, wbase);
mpn_sub_1(tmp + wbase, tmp + wbase, wn, cy);
}
}
}
else /* (2) */
{
if (mpn_cmp(tmp + xbase, xp, xn) >= 0)
{
xsgnbit = ysgnbit;
mpn_sub_n(tmp + xbase, tmp + xbase, xp, xn);
}
else
{
cy = mpn_neg(tmp, tmp, xbase);
mpn_sub_n(tmp + xbase, xp, tmp + xbase, xn);
mpn_sub_1(tmp + xbase, tmp + xbase, xn, cy);
}
}
}
else if (xbase >= wbase)
{
/* XXXXXX or XXXXXX
WWWWWWW WWWW */
if (xbase > 0)
{
cy = mpn_sub_n(tmp + xbase, xp, tmp + xbase, wn - xbase);
cy = mpn_sub_1(tmp + wn, xp + xn - shift_limbs, shift_limbs, cy);
cy = mpn_neg(tmp, tmp, xbase);
mpn_sub_1(tmp + xbase, tmp + xbase, xn, cy);
}
else
{
cy = mpn_sub_n(tmp, xp, tmp, wn);
cy = mpn_sub_1(tmp + wn, xp + wn, xn - wn, cy);
}
}
else
{
/* XXXXXXXX
WWW */
flint_mpn_copyi(tmp, xp, wbase);
cy = mpn_sub_n(tmp + wbase, xp + wbase, tmp + wbase, wn);
mpn_sub_1(tmp + zn - shift_limbs, xp + xn - shift_limbs, shift_limbs, cy);
}
/* There could be cancellation. */
while (zn > 0 && tmp[zn - 1] == 0)
zn--;
}
if (zn == 0)
{
arf_zero(z);
inexact = 0;
}
else
{
inexact = _arf_set_round_mpn(z, &fix, tmp, zn, xsgnbit, prec, rnd);
fix += (zn - zn_original) * FLINT_BITS;
_fmpz_add_fast(ARF_EXPREF(z), xexp, fix);
}
ARF_ADD_TMP_FREE(tmp, alloc)
return inexact;
}

View file

@ -40,6 +40,7 @@ arf_mul_rnd_any(arf_ptr z, arf_srcptr x, arf_srcptr y,
long prec, arf_rnd_t rnd) long prec, arf_rnd_t rnd)
{ {
mp_size_t xn, yn; mp_size_t xn, yn;
long fix;
int sgnbit, inexact; int sgnbit, inexact;
xn = ARF_XSIZE(x); xn = ARF_XSIZE(x);
@ -90,8 +91,8 @@ arf_mul_rnd_any(arf_ptr z, arf_srcptr x, arf_srcptr y,
mpn_mul(tmp, xptr, xn, yptr, yn); mpn_mul(tmp, xptr, xn, yptr, yn);
} }
_fmpz_add2_fast(ARF_EXPREF(z), ARF_EXPREF(x), ARF_EXPREF(y), 0); inexact = _arf_set_round_mpn(z, &fix, tmp, zn, sgnbit, prec, rnd);
inexact = arf_set_round_mpn(z, tmp, zn, sgnbit, ARF_EXPREF(z), prec, rnd); _fmpz_add2_fast(ARF_EXPREF(z), ARF_EXPREF(x), ARF_EXPREF(y), fix);
ARF_MUL_TMP_FREE(tmp, alloc) ARF_MUL_TMP_FREE(tmp, alloc)
return inexact; return inexact;

View file

@ -30,6 +30,7 @@ arf_mul_rnd_down(arf_ptr z, arf_srcptr x, arf_srcptr y, long prec)
{ {
mp_size_t xn, yn, zn; mp_size_t xn, yn, zn;
mp_limb_t hi, lo; mp_limb_t hi, lo;
long expfix;
int sgnbit, ret, fix; int sgnbit, ret, fix;
mp_ptr zptr; mp_ptr zptr;
@ -184,8 +185,8 @@ arf_mul_rnd_down(arf_ptr z, arf_srcptr x, arf_srcptr y, long prec)
} }
zn = xn + yn; zn = xn + yn;
_fmpz_add2_fast(ARF_EXPREF(z), ARF_EXPREF(x), ARF_EXPREF(y), 0); ret = _arf_set_round_mpn(z, &expfix, zz, zn, sgnbit, prec, ARF_RND_DOWN);
ret = arf_set_round_mpn(z, zz, zn, sgnbit, ARF_EXPREF(z), prec, ARF_RND_DOWN); _fmpz_add2_fast(ARF_EXPREF(z), ARF_EXPREF(x), ARF_EXPREF(y), expfix);
return ret; return ret;
} }
else if (yn > MUL_MPFR_MIN_LIMBS && prec != ARF_PREC_EXACT else if (yn > MUL_MPFR_MIN_LIMBS && prec != ARF_PREC_EXACT
@ -223,8 +224,8 @@ arf_mul_rnd_down(arf_ptr z, arf_srcptr x, arf_srcptr y, long prec)
mpn_mul(tmp, xptr, xn, yptr, yn); mpn_mul(tmp, xptr, xn, yptr, yn);
} }
_fmpz_add2_fast(ARF_EXPREF(z), ARF_EXPREF(x), ARF_EXPREF(y), 0); ret = _arf_set_round_mpn(z, &expfix, tmp, zn, sgnbit, prec, ARF_RND_DOWN);
ret = arf_set_round_mpn(z, tmp, zn, sgnbit, ARF_EXPREF(z), prec, ARF_RND_DOWN); _fmpz_add2_fast(ARF_EXPREF(z), ARF_EXPREF(x), ARF_EXPREF(y), expfix);
ARF_MUL_TMP_FREE(tmp, alloc) ARF_MUL_TMP_FREE(tmp, alloc)
return ret; return ret;

View file

@ -50,10 +50,14 @@ arf_neg_round(arf_t y, const arf_t x, long prec, arf_rnd_t rnd)
{ {
mp_srcptr xptr; mp_srcptr xptr;
mp_size_t xn; mp_size_t xn;
int inexact;
long fix;
ARF_GET_MPN_READONLY(xptr, xn, x); ARF_GET_MPN_READONLY(xptr, xn, x);
return arf_set_round_mpn(y, xptr, xn, ARF_SGNBIT(x) ^ 1, inexact = _arf_set_round_mpn(y, &fix, xptr, xn,
ARF_EXPREF(x), prec, rnd); ARF_SGNBIT(x) ^ 1, prec, rnd);
_fmpz_add_fast(ARF_EXPREF(y), ARF_EXPREF(x), fix);
return inexact;
} }
} }
} }

View file

@ -50,10 +50,14 @@ arf_set_round(arf_t y, const arf_t x, long prec, arf_rnd_t rnd)
{ {
mp_srcptr xptr; mp_srcptr xptr;
mp_size_t xn; mp_size_t xn;
long fix;
int inexact;
ARF_GET_MPN_READONLY(xptr, xn, x); ARF_GET_MPN_READONLY(xptr, xn, x);
return arf_set_round_mpn(y, xptr, xn, ARF_SGNBIT(x), inexact = _arf_set_round_mpn(y, &fix, xptr, xn,
ARF_EXPREF(x), prec, rnd); ARF_SGNBIT(x), prec, rnd);
_fmpz_add_fast(ARF_EXPREF(y), ARF_EXPREF(x), fix);
return inexact;
} }
} }
} }

View file

@ -36,8 +36,8 @@ _arf_set_mpn_roundup_power_two(arf_t y, int sgnbit)
} }
int int
arf_set_round_mpn(arf_t y, mp_srcptr x, mp_size_t xn, _arf_set_round_mpn(arf_t y, long * exp_shift, mp_srcptr x, mp_size_t xn,
int sgnbit, const fmpz_t topexp, long prec, arf_rnd_t rnd) int sgnbit, long prec, arf_rnd_t rnd)
{ {
unsigned int leading; unsigned int leading;
mp_bitcnt_t exp, bc, val, val_bits; mp_bitcnt_t exp, bc, val, val_bits;
@ -51,10 +51,7 @@ arf_set_round_mpn(arf_t y, mp_srcptr x, mp_size_t xn,
exp = xn * FLINT_BITS - leading; exp = xn * FLINT_BITS - leading;
/* Set exponent. */ /* Set exponent. */
if (topexp == NULL) *exp_shift = -(long) leading;
fmpz_set_ui(ARF_EXPREF(y), exp);
else
fmpz_sub_si_inline(ARF_EXPREF(y), topexp, leading);
/* Find first nonzero bit. */ /* Find first nonzero bit. */
val_limbs = 0; val_limbs = 0;
@ -112,7 +109,10 @@ arf_set_round_mpn(arf_t y, mp_srcptr x, mp_size_t xn,
/* Overflow to next power of two (unlikely). */ /* Overflow to next power of two (unlikely). */
if (val == exp) if (val == exp)
{ {
_arf_set_mpn_roundup_power_two(y, sgnbit); exp_shift[0]++;
ARF_DEMOTE(y);
ARF_NOPTR_D(y)[0] = LIMB_TOP;
ARF_XSIZE(y) = ARF_MAKE_XSIZE(1, sgnbit);
return 1; return 1;
} }
} }

View file

@ -25,6 +25,35 @@
#include "arf.h" #include "arf.h"
/* Top-aligns the nonzero limb v and rounds it to prec bit. */
#define ARF_NORMALISE_ROUND_LIMB(inexact, exp, v, \
sgnbit, prec, rnd) \
do { \
count_leading_zeros(exp, v); \
v <<= exp; \
exp = FLINT_BITS - exp; \
if (prec >= exp) \
{ \
inexact = 0; \
} \
else \
{ \
mp_limb_t __t = v; \
v = MASK_LIMB(v, FLINT_BITS - prec); \
inexact = (__t != v); \
if (inexact && arf_rounds_up(rnd, sgnbit)) \
{ \
v += (LIMB_ONE << (FLINT_BITS - prec)); \
if (v == 0) \
{ \
v = LIMB_TOP; \
exp++; \
} \
} \
} \
} \
while (0)
int int
_arf_set_round_ui(arf_t x, ulong v, int sgnbit, long prec, arf_rnd_t rnd) _arf_set_round_ui(arf_t x, ulong v, int sgnbit, long prec, arf_rnd_t rnd)
{ {
@ -35,40 +64,12 @@ _arf_set_round_ui(arf_t x, ulong v, int sgnbit, long prec, arf_rnd_t rnd)
} }
else else
{ {
unsigned int bc, mask_bits; int exp, inexact;
int inexact;
mp_limb_t t;
count_leading_zeros(bc, v); ARF_NORMALISE_ROUND_LIMB(inexact, exp, v, sgnbit, prec, rnd);
v <<= bc;
bc = FLINT_BITS - bc;
if (prec >= bc) _fmpz_demote(ARF_EXPREF(x));
{ ARF_EXP(x) = exp;
inexact = 0;
}
else
{
mask_bits = FLINT_BITS - prec;
t = v;
v = (v >> mask_bits) << mask_bits;
inexact = (t != v);
if (inexact && arf_rounds_up(rnd, sgnbit))
{
v += (LIMB_ONE << (FLINT_BITS - prec));
/* Overflow to the next power of two (unlikely). */
if (v == 0)
{
v = LIMB_TOP;
bc++;
}
}
}
fmpz_set_ui(ARF_EXPREF(x), bc);
ARF_DEMOTE(x); ARF_DEMOTE(x);
ARF_XSIZE(x) = ARF_MAKE_XSIZE(1, sgnbit); ARF_XSIZE(x) = ARF_MAKE_XSIZE(1, sgnbit);
ARF_NOPTR_D(x)[0] = v; ARF_NOPTR_D(x)[0] = v;

182
arf/test/t-add.c Normal file
View file

@ -0,0 +1,182 @@
/*=============================================================================
This file is part of ARB.
ARB is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
ARB is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with ARB; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
=============================================================================*/
/******************************************************************************
Copyright (C) 2012 Fredrik Johansson
******************************************************************************/
#include "arf.h"
int
arf_add_naive(arf_t z, const arf_t x, const arf_t y, long prec, arf_rnd_t rnd)
{
fmpr_t a, b;
long r;
fmpr_init(a);
fmpr_init(b);
arf_get_fmpr(a, x);
arf_get_fmpr(b, y);
r = fmpr_add(a, a, b, prec, rnd);
arf_set_fmpr(z, a);
fmpr_clear(a);
fmpr_clear(b);
return (r == FMPR_RESULT_EXACT) ? 0 : 1;
}
int main()
{
long iter, iter2;
flint_rand_t state;
printf("add....");
fflush(stdout);
flint_randinit(state);
for (iter = 0; iter < 10000; iter++)
{
arf_t x, y, z, v;
long prec, r1, r2;
arf_rnd_t rnd;
arf_init(x);
arf_init(y);
arf_init(z);
arf_init(v);
for (iter2 = 0; iter2 < 100; iter2++)
{
arf_randtest_special(x, state, 2000, 100);
arf_randtest_special(y, state, 2000, 100);
prec = 2 + n_randint(state, 2000);
if (n_randint(state, 10) == 0 &&
fmpz_bits(ARF_EXPREF(x)) < 10 &&
fmpz_bits(ARF_EXPREF(y)) < 10)
{
prec = ARF_PREC_EXACT;
}
switch (n_randint(state, 4))
{
case 0: rnd = ARF_RND_DOWN; break;
case 1: rnd = ARF_RND_UP; break;
case 2: rnd = ARF_RND_FLOOR; break;
default: rnd = ARF_RND_CEIL; break;
}
switch (n_randint(state, 5))
{
case 0:
r1 = arf_add(z, x, y, prec, rnd);
r2 = arf_add_naive(v, x, y, prec, rnd);
if (!arf_equal(z, v) || r1 != r2)
{
printf("FAIL!\n");
printf("prec = %ld, rnd = %d\n\n", prec, rnd);
printf("x = "); arf_print(x); printf("\n\n");
printf("y = "); arf_print(y); printf("\n\n");
printf("z = "); arf_print(z); printf("\n\n");
printf("v = "); arf_print(v); printf("\n\n");
printf("r1 = %ld, r2 = %ld\n", r1, r2);
abort();
}
break;
case 1:
r1 = arf_add(z, x, x, prec, rnd);
r2 = arf_add_naive(v, x, x, prec, rnd);
if (!arf_equal(z, v) || r1 != r2)
{
printf("FAIL (aliasing 1)!\n");
printf("prec = %ld, rnd = %d\n\n", prec, rnd);
printf("x = "); arf_print(x); printf("\n\n");
printf("z = "); arf_print(z); printf("\n\n");
printf("v = "); arf_print(v); printf("\n\n");
printf("r1 = %ld, r2 = %ld\n", r1, r2);
abort();
}
break;
case 2:
r2 = arf_add_naive(v, x, x, prec, rnd);
r1 = arf_add(x, x, x, prec, rnd);
if (!arf_equal(v, x) || r1 != r2)
{
printf("FAIL (aliasing 2)!\n");
printf("prec = %ld, rnd = %d\n\n", prec, rnd);
printf("x = "); arf_print(x); printf("\n\n");
printf("z = "); arf_print(z); printf("\n\n");
printf("v = "); arf_print(v); printf("\n\n");
printf("r1 = %ld, r2 = %ld\n", r1, r2);
abort();
}
break;
case 3:
r2 = arf_add_naive(v, x, y, prec, rnd);
r1 = arf_add(x, x, y, prec, rnd);
if (!arf_equal(x, v) || r1 != r2)
{
printf("FAIL (aliasing 3)!\n");
printf("prec = %ld, rnd = %d\n\n", prec, rnd);
printf("x = "); arf_print(x); printf("\n\n");
printf("y = "); arf_print(y); printf("\n\n");
printf("v = "); arf_print(v); printf("\n\n");
printf("r1 = %ld, r2 = %ld\n", r1, r2);
abort();
}
break;
default:
r2 = arf_add_naive(v, x, y, prec, rnd);
r1 = arf_add(x, y, x, prec, rnd);
if (!arf_equal(x, v) || r1 != r2)
{
printf("FAIL (aliasing 4)!\n");
printf("prec = %ld, rnd = %d\n\n", prec, rnd);
printf("x = "); arf_print(x); printf("\n\n");
printf("y = "); arf_print(y); printf("\n\n");
printf("v = "); arf_print(v); printf("\n\n");
printf("r1 = %ld, r2 = %ld\n", r1, r2);
abort();
}
break;
}
}
arf_clear(x);
arf_clear(y);
arf_clear(z);
arf_clear(v);
}
flint_randclear(state);
flint_cleanup();
printf("PASS\n");
return EXIT_SUCCESS;
}

View file

@ -200,25 +200,10 @@ static __inline__ void fmpz_sub_mul2exp(fmpz_t z, const fmpz_t x, const fmpz_t y
fmpz_clear(t); fmpz_clear(t);
} }
static long _fmpz_sub_small_large(const fmpz_t x, const fmpz_t y) long _fmpz_sub_small_large(const fmpz_t x, const fmpz_t y);
{
fmpz_t t;
fmpz_init(t);
fmpz_sub(t, x, y);
if (!COEFF_IS_MPZ(*t))
{
/* no need to free t */
return *t;
}
else
{
int sign = fmpz_sgn(t);
fmpz_clear(t);
return (sign > 0) ? LONG_MAX : -LONG_MAX;
}
}
static __inline__ long _fmpz_sub_small(const fmpz_t x, const fmpz_t y) static __inline__ long
_fmpz_sub_small(const fmpz_t x, const fmpz_t y)
{ {
if (!COEFF_IS_MPZ(*x) && !COEFF_IS_MPZ(*y)) if (!COEFF_IS_MPZ(*x) && !COEFF_IS_MPZ(*y))
{ {

299
fmpz_extras.h.save Normal file
View file

@ -0,0 +1,299 @@
/*=============================================================================
This file is part of ARB.
ARB is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
ARB is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with ARB; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
=============================================================================*/
/******************************************************************************
Copyright (C) 2013 Fredrik Johansson
******************************************************************************/
#ifndef FMPZ_EXTRAS_H
#define FMPZ_EXTRAS_H
#include <limits.h>
#include "flint.h"
#include "fmpz.h"
#ifdef __cplusplus
extern "C" {
#endif
static __inline__ void
fmpz_add_inline(fmpz_t z, const fmpz_t x, const fmpz_t y)
{
fmpz f, g;
f = *x;
g = *y;
if (!COEFF_IS_MPZ(f) && !COEFF_IS_MPZ(g))
fmpz_set_si(z, f + g);
else
fmpz_add(z, x, y);
}
static __inline__ void
fmpz_add_si(fmpz_t z, const fmpz_t x, long y)
{
if (y >= 0)
fmpz_add_ui(z, x, y);
else
fmpz_sub_ui(z, x, -y);
}
static __inline__ void
fmpz_sub_si(fmpz_t z, const fmpz_t x, long y)
{
if (y >= 0)
fmpz_sub_ui(z, x, y);
else
fmpz_add_ui(z, x, -y);
}
static __inline__ void
fmpz_add_si_inline(fmpz_t z, const fmpz_t x, long y)
{
fmpz f;
f = *x;
if (!COEFF_IS_MPZ(f) && (COEFF_MIN <= y && y <= COEFF_MAX))
fmpz_set_si(z, f + y);
else
fmpz_add_si(z, x, y);
}
static __inline__ void
fmpz_sub_si_inline(fmpz_t z, const fmpz_t x, long y)
{
fmpz f;
f = *x;
if (!COEFF_IS_MPZ(f) && (COEFF_MIN <= y && y <= COEFF_MAX))
fmpz_set_si(z, f - y);
else
fmpz_sub_si(z, x, y);
}
static __inline__ void
fmpz_add_ui_inline(fmpz_t z, const fmpz_t x, ulong y)
{
fmpz f = *x;
if (!COEFF_IS_MPZ(f) && y <= COEFF_MAX)
fmpz_set_si(z, f + y);
else
fmpz_add_ui(z, x, y);
#if}
static __inline__ void
fmpz_add2_fmpz_si_inline(fmpz_t z, const fmpz_t x, const fmpz_t y, long c)
{
fmpz f, g, h;
f = *x;
g = *y;
if (!COEFF_IS_MPZ(f) && !COEFF_IS_MPZ(g))
{
h = f + g;
if ((COEFF_MIN <= h && h <= COEFF_MAX) &&
(COEFF_MIN <= c && c <= COEFF_MAX))
{
fmpz_set_si(z, h + c);
return;
}
}
fmpz_add(z, x, y);
fmpz_add_si(z, z, c);
}
/* sets z = +/- ({src, n} >> shift) where 0 <= shift < FLINT_BITS
and the top limb of src is nonzero */
/* TODO: optimize for result = 1 limb */
static __inline__ void
fmpz_set_mpn_rshift(fmpz_t z, mp_srcptr src, mp_size_t n, unsigned int shift, int negative)
{
__mpz_struct * zptr;
zptr = _fmpz_promote(z);
if (zptr->_mp_alloc < n)
mpz_realloc2(zptr, n * FLINT_BITS);
if (shift == 0)
{
flint_mpn_copyi(zptr->_mp_d, src, n);
}
else
{
mpn_rshift(zptr->_mp_d, src, n, shift);
while (zptr->_mp_d[n - 1] == 0) /* todo: can only do one iter? */
n--;
}
zptr->_mp_size = negative ? -n : n;
_fmpz_demote_val(z);
}
/* sets z = +/- {src, n} where n >= 2 and the top limb of src is nonzero */
static __inline__ void
fmpz_set_mpn_large(fmpz_t z, mp_srcptr src, mp_size_t n, int negative)
{
__mpz_struct * zz;
zz = _fmpz_promote(z);
if (zz->_mp_alloc < n)
mpz_realloc2(zz, n * FLINT_BITS);
flint_mpn_copyi(zz->_mp_d, src, n);
zz->_mp_size = negative ? -n : n;
}
/* round away from zero */
static __inline__ void
fmpz_adiv_q_2exp(fmpz_t z, const fmpz_t x, mp_bitcnt_t exp)
{
int sign = fmpz_sgn(x);
if (sign > 0)
fmpz_cdiv_q_2exp(z, x, exp);
else
fmpz_fdiv_q_2exp(z, x, exp);
}
/* sets z = x + y*2^shift */
static __inline__ void fmpz_add_mul2exp(fmpz_t z, const fmpz_t x, const fmpz_t y, ulong shift)
{
fmpz_t t;
fmpz_init(t);
fmpz_mul_2exp(t, y, shift);
fmpz_add(z, x, t);
fmpz_clear(t);
}
static __inline__ void fmpz_sub_mul2exp(fmpz_t z, const fmpz_t x, const fmpz_t y, ulong shift)
{
fmpz_t t;
fmpz_init(t);
fmpz_mul_2exp(t, y, shift);
fmpz_sub(z, x, t);
fmpz_clear(t);
}
static long _fmpz_sub_small_large(const fmpz_t x, const fmpz_t y)
{
fmpz_t t;
fmpz_init(t);
fmpz_sub(t, x, y);
if (!COEFF_IS_MPZ(*t))
{
/* no need to free t */
return *t;
}
else
{
int sign = fmpz_sgn(t);
fmpz_clear(t);
return (sign > 0) ? LONG_MAX : -LONG_MAX;
}
}
static __inline__ long _fmpz_sub_small(const fmpz_t x, const fmpz_t y)
{
if (!COEFF_IS_MPZ(*x) && !COEFF_IS_MPZ(*y))
{
return (*x) - (*y);
}
else
{
return _fmpz_sub_small_large(x, y);
}
}
static __inline__ mp_size_t
_fmpz_size(const fmpz_t f)
{
fmpz d = *f;
if (!COEFF_IS_MPZ(d))
return 1;
else
return mpz_size(COEFF_TO_PTR(d));
}
static __inline__ void
fmpz_ui_pow_ui(fmpz_t x, ulong b, ulong e)
{
if (e <= 1)
{
fmpz_set_ui(x, e == 0 ? 1UL : b);
}
else
{
fmpz_set_ui(x, b);
fmpz_pow_ui(x, x, e);
}
}
static __inline__ void
fmpz_max(fmpz_t z, const fmpz_t x, const fmpz_t y)
{
if (fmpz_cmp(x, y) >= 0)
fmpz_set(z, x);
else
fmpz_set(z, y);
}
static __inline__ void
fmpz_min(fmpz_t z, const fmpz_t x, const fmpz_t y)
{
if (fmpz_cmp(x, y) < 0)
fmpz_set(z, x);
else
fmpz_set(z, y);
}
#define FMPZ_GET_MPN_READONLY(zsign, zn, zptr, ztmp, zv) \
if (!COEFF_IS_MPZ(zv)) \
{ \
(zsign) = (zv) < 0; \
(ztmp) = FLINT_ABS(zv); \
(zptr) = &(ztmp); \
(zn) = 1; \
} \
else \
{ \
__mpz_struct * ___zz = COEFF_TO_PTR(zv); \
(zptr) = ___zz->_mp_d; \
(zn) = ___zz->_mp_size; \
(zsign) = (zn) < 0; \
(zn) = FLINT_ABS(zn); \
}
#ifdef __cplusplus
}
#endif
#endif

View file

@ -0,0 +1,45 @@
/*=============================================================================
This file is part of ARB.
ARB is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
ARB is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with ARB; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
=============================================================================*/
/******************************************************************************
Copyright (C) 2014 Fredrik Johansson
******************************************************************************/
#include "fmpz_extras.h"
long
_fmpz_sub_small_large(const fmpz_t x, const fmpz_t y)
{
fmpz_t t;
fmpz_init(t);
fmpz_sub(t, x, y);
if (!COEFF_IS_MPZ(*t))
{
/* no need to free t */
return *t;
}
else
{
int sign = fmpz_sgn(t);
fmpz_clear(t);
return (sign > 0) ? LONG_MAX : -LONG_MAX;
}
}