arb/fmpr/add.c

495 lines
14 KiB
C
Raw Normal View History

/*=============================================================================
This file is part of ARB.
ARB is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
ARB is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with ARB; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
=============================================================================*/
/******************************************************************************
Copyright (C) 2012, 2013 Fredrik Johansson
******************************************************************************/
#include "fmpr.h"
#define ADD_STACK_ALLOC 40
#define ADD_TLS_ALLOC 1000
2013-05-28 17:45:52 +02:00
TLS_PREFIX mp_ptr __add_tmp = NULL;
TLS_PREFIX long __add_alloc = 0;
2013-05-28 12:23:53 +02:00
#define ADD_TMP_ALLOC \
if (alloc <= ADD_STACK_ALLOC) \
{ \
tmp = tmp_stack; \
} \
else if (alloc <= ADD_TLS_ALLOC) \
{ \
if (__add_alloc < alloc) \
{ \
__add_tmp = flint_realloc(__add_tmp, sizeof(mp_limb_t) * alloc); \
__add_alloc = alloc; \
} \
tmp = __add_tmp; \
} \
else \
{ \
tmp = flint_malloc(sizeof(mp_limb_t) * alloc); \
}
#define ADD_TMP_FREE \
if (alloc > ADD_TLS_ALLOC) \
flint_free(tmp);
#define READ_SIGN_PTR(zsign, ztmp, zptr, zn, zv) \
if (!COEFF_IS_MPZ(zv)) \
{ \
zsign = zv < 0; \
ztmp = FLINT_ABS(zv); \
zptr = &ztmp; \
zn = 1; \
} \
else \
{ \
__mpz_struct * zz = COEFF_TO_PTR(zv); \
zptr = zz->_mp_d; \
zn = zz->_mp_size; \
zsign = zn < 0; \
zn = FLINT_ABS(zn); \
}
static __inline__ long
_fmpr_add_small(fmpr_t z, mp_limb_t x, int xsign, const fmpz_t xexp, mp_limb_t y, int ysign, const fmpz_t yexp, long shift, long prec, long rnd)
{
mp_limb_t hi, lo, t, u;
int sign = ysign;
t = x;
u = y;
lo = u << shift;
hi = (shift == 0) ? 0 : (u >> (FLINT_BITS - shift));
if (xsign == ysign)
{
add_ssaaaa(hi, lo, hi, lo, 0, t);
}
else
{
if (hi == 0)
{
if (lo >= t)
{
lo = lo - t;
}
else
{
lo = t - lo;
sign = !sign;
}
}
else
{
sub_ddmmss(hi, lo, hi, lo, 0, t);
}
}
if (hi == 0)
return fmpr_set_round_ui_2exp_fmpz(z, lo, xexp, sign, prec, rnd);
else
return fmpr_set_round_uiui_2exp_fmpz(z, hi, lo, xexp, sign, prec, rnd);
}
/* computes x + y * 2^shift (optionally negated) */
long
_fmpr_add_large(fmpr_t z,
mp_srcptr xman, mp_size_t xn, int xsign, const fmpz_t xexp,
mp_srcptr yman, mp_size_t yn, int ysign, const fmpz_t yexp,
long shift, long prec, fmpr_rnd_t rnd)
{
long tn, zn, alloc, ret, shift_bits, shift_limbs;
int negative;
mp_limb_t tmp_stack[ADD_STACK_ALLOC];
mp_limb_t cy;
mp_ptr tmp, tmp2;
shift_limbs = shift / FLINT_BITS;
shift_bits = shift % FLINT_BITS;
/* x does not overlap with y or the result -- outcome is
equivalent to adding/subtracting a small number to/from y
and rounding */
if (shift > xn * FLINT_BITS &&
prec != FMPR_PREC_EXACT &&
xn * FLINT_BITS + prec - (FLINT_BITS * (yn - 1)) < shift)
{
zn = (prec + FLINT_BITS - 1) / FLINT_BITS;
zn = FLINT_MAX(zn, yn) + 2;
shift_limbs = zn - yn;
alloc = zn;
ADD_TMP_ALLOC
flint_mpn_zero(tmp, shift_limbs);
flint_mpn_copyi(tmp + shift_limbs, yman, yn);
if (xsign == ysign)
{
tmp[0] = 1;
}
else
{
mpn_sub_1(tmp, tmp, zn, 1);
while (tmp[zn-1] == 0)
zn--;
}
ret = _fmpr_set_round_mpn(&shift, fmpr_manref(z), tmp, zn, ysign, prec, rnd);
shift -= shift_limbs * FLINT_BITS;
fmpz_add_si_inline(fmpr_expref(z), yexp, shift);
ADD_TMP_FREE
return ret;
}
if (xsign == ysign)
{
negative = xsign;
/* certainly no overlap between x and y * 2^shift
XXXXX
YYYYY
|-------|
shift_limbs
todo: handle huge case efficiently!
*/
if (shift_limbs >= xn)
{
alloc = shift_limbs + yn + 1;
2013-05-28 12:23:53 +02:00
ADD_TMP_ALLOC
flint_mpn_copyi(tmp, xman, xn);
flint_mpn_zero(tmp + xn, shift_limbs - xn);
if (shift_bits == 0)
{
flint_mpn_copyi(tmp + shift_limbs, yman, yn);
zn = shift_limbs + yn;
}
else
{
cy = mpn_lshift(tmp + shift_limbs, yman, yn, shift_bits);
tmp[shift_limbs + yn] = cy;
zn = shift_limbs + yn + (cy != 0);
}
}
else /* may have overlap between x and y * 2^shift */
{
long alloc1, alloc2;
alloc2 = yn + 1; /* shifted value of y */
alloc1 = FLINT_MAX(alloc2 + shift_limbs, xn) + 1; /* space for sum */
alloc = alloc1 + alloc2;
2013-05-28 12:23:53 +02:00
ADD_TMP_ALLOC
tmp2 = tmp + alloc1;
flint_mpn_copyi(tmp, xman, shift_limbs);
/* {t, tn} = shift of y (ignoring shift_limbs zero limbs) */
if (shift_bits == 0)
{
flint_mpn_copyi(tmp2, yman, yn);
tn = yn;
}
else
{
cy = mpn_lshift(tmp2, yman, yn, shift_bits);
tmp2[yn] = cy;
tn = yn + (cy != 0);
}
if (tn + shift_limbs == xn)
{
/* XXXXXXXX
TTTTT */
cy = mpn_add_n(tmp + shift_limbs, xman + shift_limbs, tmp2, tn);
tmp[xn] = cy;
zn = xn + (cy != 0);
}
else if (tn + shift_limbs < xn)
{
/* XXXXXXXX
TTTTT */
cy = mpn_add_n(tmp + shift_limbs, xman + shift_limbs, tmp2, tn);
cy = mpn_add_1(tmp + shift_limbs + tn, xman + shift_limbs + tn, xn - tn - shift_limbs, cy);
tmp[xn] = cy;
zn = xn + (cy != 0);
}
else
{
/* XXXXXXXX
TTTTTTT */
cy = mpn_add_n(tmp + shift_limbs, xman + shift_limbs, tmp2, xn - shift_limbs);
cy = mpn_add_1(tmp + xn, tmp2 + xn - shift_limbs, tn + shift_limbs - xn, cy);
tmp[tn + shift_limbs] = cy;
zn = tn + shift_limbs + (cy != 0);
}
}
}
else
{
/* certainly no overlap between x and y * 2^shift
XXXXX
YYYYY
|-------|
shift_limbs
todo: handle huge case efficiently!
*/
if (shift_limbs >= xn)
{
alloc = shift_limbs + yn + 1;
2013-05-28 12:23:53 +02:00
ADD_TMP_ALLOC
mpn_neg_n(tmp, xman, xn);
/* since x is nonzero, there is certainly carry (borrow) */
flint_mpn_store(tmp + xn, shift_limbs - xn, (mp_limb_t) -1);
if (shift_bits == 0)
{
flint_mpn_copyi(tmp + shift_limbs, yman, yn);
zn = shift_limbs + yn;
}
else
{
cy = mpn_lshift(tmp + shift_limbs, yman, yn, shift_bits);
tmp[shift_limbs + yn] = cy;
zn = shift_limbs + yn + (cy != 0);
}
mpn_sub_1(tmp + shift_limbs, tmp + shift_limbs, zn - shift_limbs, 1);
/* y has larger absolute value, and determines the sign */
negative = ysign;
}
else /* may have overlap between x and y * 2^shift */
{
long alloc1, alloc2;
alloc2 = yn + 1; /* shifted value of y */
alloc1 = FLINT_MAX(alloc2 + shift_limbs, xn); /* space for difference */
alloc = alloc1 + alloc2;
2013-05-28 12:23:53 +02:00
ADD_TMP_ALLOC
tmp2 = tmp + alloc1;
/* {t, tn} = shift of y (ignoring shift_limbs zero limbs) */
if (shift_bits == 0)
{
flint_mpn_copyi(tmp2, yman, yn);
tn = yn;
}
else
{
cy = mpn_lshift(tmp2, yman, yn, shift_bits);
tmp2[yn] = cy;
tn = yn + (cy != 0);
}
if (tn + shift_limbs == xn)
{
int comp;
/* XXXXXXXX
TTTTT */
comp = mpn_cmp(xman + shift_limbs, tmp2, tn);
if (comp == 0)
{
long i;
for (i = 0; i < shift_limbs; i++)
{
if (xman[i] != 0)
{
comp = 1;
break;
}
}
/* the result is exactly zero */
if (comp == 0)
{
fmpr_zero(z);
ret = FMPR_RESULT_EXACT;
goto cleanup;
}
}
/* x is larger */
if (comp > 0)
{
flint_mpn_copyi(tmp, xman, shift_limbs);
mpn_sub_n(tmp + shift_limbs, xman + shift_limbs, tmp2, tn);
negative = xsign;
}
else
{
mpn_sub_n(tmp + shift_limbs, tmp2, xman + shift_limbs, tn);
if (shift_limbs != 0)
{
cy = mpn_neg_n(tmp, xman, shift_limbs);
mpn_sub_1(tmp + shift_limbs, tmp + shift_limbs, tn, cy);
}
negative = ysign;
}
zn = xn;
}
else if (tn + shift_limbs < xn)
{
/* XXXXXXXX
TTTTT */
flint_mpn_copyi(tmp, xman, shift_limbs);
cy = mpn_sub_n(tmp + shift_limbs, xman + shift_limbs, tmp2, tn);
mpn_sub_1(tmp + shift_limbs + tn, xman + shift_limbs + tn, xn - tn - shift_limbs, cy);
zn = xn;
negative = xsign;
}
else
{
/* XXXXXXXX
TTTTTTT */
cy = mpn_sub_n(tmp + shift_limbs, tmp2, xman + shift_limbs, xn - shift_limbs);
mpn_sub_1(tmp + xn, tmp2 + xn - shift_limbs, tn + shift_limbs - xn, cy);
if (shift_limbs != 0)
{
cy = mpn_neg_n(tmp, xman, shift_limbs);
mpn_sub_1(tmp + shift_limbs, tmp + shift_limbs, tn, cy);
}
zn = shift_limbs + tn;
negative = ysign;
}
}
2013-05-28 12:23:53 +02:00
/* there might have been cancellation */
while (tmp[zn-1] == 0)
zn--;
}
ret = _fmpr_set_round_mpn(&shift, fmpr_manref(z), tmp, zn, negative, prec, rnd);
fmpz_add_si_inline(fmpr_expref(z), xexp, shift);
cleanup:
2013-05-28 12:23:53 +02:00
ADD_TMP_FREE
return ret;
}
static long
_fmpr_add_special(fmpr_t z, const fmpr_t x, const fmpr_t y, long prec, fmpr_rnd_t rnd)
{
if (fmpr_is_zero(x))
{
if (fmpr_is_zero(y))
{
fmpr_zero(z);
return FMPR_RESULT_EXACT;
}
else
return fmpr_set_round(z, y, prec, rnd);
}
else if (fmpr_is_zero(y))
{
return fmpr_set_round(z, x, prec, rnd);
}
else if (fmpr_is_nan(x) || fmpr_is_nan(y)
|| (fmpr_is_pos_inf(x) && fmpr_is_neg_inf(y))
|| (fmpr_is_neg_inf(x) && fmpr_is_pos_inf(y)))
{
fmpr_nan(z);
return FMPR_RESULT_EXACT;
}
else if (fmpr_is_special(x))
{
fmpr_set(z, x);
return FMPR_RESULT_EXACT;
}
else
{
fmpr_set(z, y);
return FMPR_RESULT_EXACT;
}
}
long
fmpr_add(fmpr_t z, const fmpr_t x, const fmpr_t y, long prec, fmpr_rnd_t rnd)
{
2013-05-28 12:23:53 +02:00
long shift, xn, yn;
mp_limb_t xtmp, ytmp;
mp_ptr xptr, yptr;
fmpz xv, yv;
const fmpz * xexp;
const fmpz * yexp;
int xsign, ysign;
if (fmpr_is_special(x) || fmpr_is_special(y))
{
return _fmpr_add_special(z, x, y, prec, rnd);
}
shift = _fmpz_sub_small(fmpr_expref(y), fmpr_expref(x));
if (shift >= 0)
{
xexp = fmpr_expref(x);
yexp = fmpr_expref(y);
xv = *fmpr_manref(x);
yv = *fmpr_manref(y);
}
else
{
xexp = fmpr_expref(y);
yexp = fmpr_expref(x);
xv = *fmpr_manref(y);
yv = *fmpr_manref(x);
shift = -shift;
}
2012-09-06 13:18:02 +02:00
READ_SIGN_PTR(xsign, xtmp, xptr, xn, xv)
READ_SIGN_PTR(ysign, ytmp, yptr, yn, yv)
2012-09-06 13:18:02 +02:00
if ((xn == 1) && (yn == 1) && (shift < FLINT_BITS))
return _fmpr_add_small(z, xptr[0], xsign, xexp, yptr[0], ysign, yexp, shift, prec, rnd);
else
return _fmpr_add_large(z, xptr, xn, xsign, xexp, yptr, yn, ysign, yexp, shift, prec, rnd);
2012-09-06 13:18:02 +02:00
}