arb/fmpr/mul.c
2013-02-17 17:58:15 +01:00

333 lines
9.5 KiB
C

/*=============================================================================
This file is part of ARB.
ARB is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
ARB is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with ARB; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
=============================================================================*/
/******************************************************************************
Copyright (C) 2012 Fredrik Johansson
******************************************************************************/
#include "fmpr.h"
static __inline__ int
rounds_up(fmpr_rnd_t rnd, int negative)
{
if (rnd == FMPR_RND_DOWN) return 0;
if (rnd == FMPR_RND_UP) return 1;
if (rnd == FMPR_RND_FLOOR) return negative;
return !negative;
}
#define MUL_STACK_ALLOC 40
/* requires xn >= yn, xman and yman both normalised and odd */
static long
_fmpr_mul_large(fmpz_t zman, fmpz_t zexp,
mp_srcptr xman, mp_size_t xn, const fmpz_t xexp,
mp_srcptr yman, mp_size_t yn, const fmpz_t yexp,
int negative, long prec, fmpr_rnd_t rnd)
{
long zn, alloc, lead, bc, ret;
mp_limb_t tmp_stack[MUL_STACK_ALLOC];
mp_ptr tmp;
zn = xn + yn;
alloc = zn + 1; /* may need one extra temp limb for rounding carry */
if (alloc > MUL_STACK_ALLOC)
tmp = flint_malloc(alloc * sizeof(mp_limb_t));
else
tmp = tmp_stack;
mpn_mul(tmp, xman, xn, yman, yn);
zn = zn - (tmp[zn-1] == 0);
count_leading_zeros(lead, tmp[zn-1]);
bc = zn * FLINT_BITS - lead;
if (bc <= prec)
{
if (zn == 1)
{
if (!negative)
fmpz_set_ui(zman, tmp[0]);
else
fmpz_neg_ui(zman, tmp[0]);
}
else
{
fmpz_set_mpn_large(zman, tmp, zn, negative);
}
fmpz_add_inline(zexp, xexp, yexp);
ret = FMPR_RESULT_EXACT;
}
else
{
long cut_limb, cut_bit, shift, trail_limbs, trail_bits;
mp_limb_t cy;
mp_ptr res;
shift = bc - prec;
cut_limb = shift / FLINT_BITS;
cut_bit = shift % FLINT_BITS;
/* cut off at the rounding bit */
tmp[cut_limb] &= ~(((mp_limb_t) 1UL << cut_bit) - 1UL);
if (rounds_up(rnd, negative))
{
cy = mpn_add_1(tmp + cut_limb,
tmp + cut_limb, zn - cut_limb, (1UL << cut_bit));
tmp[zn] = cy;
zn += cy;
}
/* remove trailing zero limbs */
trail_limbs = 0;
while (tmp[cut_limb + trail_limbs] == 0)
trail_limbs++;
res = tmp + (cut_limb + trail_limbs);
zn -= (cut_limb + trail_limbs);
count_trailing_zeros(trail_bits, res[0]);
ret = (trail_limbs * FLINT_BITS) + trail_bits - cut_bit;
shift = (cut_limb + trail_limbs) * FLINT_BITS + trail_bits;
fmpz_set_mpn_rshift(zman, res, zn, trail_bits, negative);
fmpz_add2_fmpz_si_inline(zexp, xexp, yexp, shift);
}
if (alloc > MUL_STACK_ALLOC)
flint_free(tmp);
return ret;
}
static void
_fmpr_mul_special(fmpr_t z, const fmpr_t x, const fmpr_t y)
{
if (fmpr_is_zero(x))
{
if (!fmpr_is_special(y) || fmpr_is_zero(y))
fmpr_zero(z);
else
fmpr_nan(z);
return;
}
if (fmpr_is_zero(y))
{
if (!fmpr_is_special(x))
fmpr_zero(z);
else
fmpr_nan(z);
return;
}
if ((fmpr_is_inf(x) && (fmpr_is_inf(y) || !fmpr_is_special(y))) ||
(fmpr_is_inf(y) && !fmpr_is_special(x)))
{
if (fmpr_sgn(x) == fmpr_sgn(y))
fmpr_pos_inf(z);
else
fmpr_neg_inf(z);
return;
}
fmpr_nan(z);
}
long
fmpr_mul(fmpr_t z, const fmpr_t x, const fmpr_t y, long prec, fmpr_rnd_t rnd)
{
__mpz_struct *xmpz, *ympz;
long xn, yn;
int negative;
fmpz u, v;
if (fmpr_is_special(x) || fmpr_is_special(y))
{
_fmpr_mul_special(z, x, y);
return FMPR_RESULT_EXACT;
}
u = *fmpr_manref(x);
v = *fmpr_manref(y);
if (!COEFF_IS_MPZ(u))
{
if (!COEFF_IS_MPZ(v))
{
mp_limb_t hi, lo;
negative = (u ^ v) < 0;
u = FLINT_ABS(u);
v = FLINT_ABS(v);
umul_ppmm(hi, lo, u, v);
if (hi == 0)
{
/* 1 limb */
long lead, trail, bc, shift, ret;
count_leading_zeros(lead, lo);
bc = FLINT_BITS - lead;
shift = 0;
ret = FMPR_RESULT_EXACT;
if (bc > prec)
{
shift = bc - prec;
lo = (lo >> shift) + rounds_up(rnd, negative);
count_trailing_zeros(trail, lo);
lo >>= trail;
shift += trail;
ret = trail;
}
if (!negative)
fmpz_set_ui(fmpr_manref(z), lo);
else
fmpz_neg_ui(fmpr_manref(z), lo);
fmpz_add2_fmpz_si_inline(fmpr_expref(z), fmpr_expref(x), fmpr_expref(y), shift);
return ret;
}
else
{
/* 2 limbs */
long lead, trail, bc, shift, ret;
count_leading_zeros(lead, hi);
bc = 2 * FLINT_BITS - lead;
shift = 0;
ret = FMPR_RESULT_EXACT;
if (bc > prec)
{
shift = bc - prec;
/* round */
if (shift < FLINT_BITS)
{
lo = (lo >> shift) | (hi << (FLINT_BITS - shift));
hi >>= shift;
}
else
{
lo = hi >> (shift - FLINT_BITS);
hi = 0;
}
if (rounds_up(rnd, negative))
add_ssaaaa(hi, lo, hi, lo, 0, 1);
/* remove trailing zeros */
if (lo == 0)
{
count_trailing_zeros(trail, hi);
hi >>= trail;
shift += FLINT_BITS + trail;
ret = FLINT_BITS + trail;
if (!negative)
fmpz_set_ui(fmpr_manref(z), hi);
else
fmpz_neg_ui(fmpr_manref(z), hi);
}
else
{
count_trailing_zeros(trail, lo);
if (trail != 0)
{
lo = (lo >> trail) | (hi << (FLINT_BITS - trail));
hi >>= trail;
shift += trail;
}
ret = trail;
if (!negative)
fmpz_set_uiui(fmpr_manref(z), hi, lo);
else
fmpz_neg_uiui(fmpr_manref(z), hi, lo);
}
}
else
{
if (!negative)
fmpz_set_uiui(fmpr_manref(z), hi, lo);
else
fmpz_neg_uiui(fmpr_manref(z), hi, lo);
}
fmpz_add2_fmpz_si_inline(fmpr_expref(z), fmpr_expref(x), fmpr_expref(y), shift);
return ret;
}
}
else
{
mp_limb_t t;
ympz = COEFF_TO_PTR(v);
yn = ympz->_mp_size;
negative = (yn < 0) ^ (u < 0);
t = FLINT_ABS(u);
yn = FLINT_ABS(yn);
return _fmpr_mul_large(fmpr_manref(z), fmpr_expref(z),
ympz->_mp_d, yn, fmpr_expref(y),
&t, 1, fmpr_expref(x), negative, prec, rnd);
}
}
else
{
xmpz = COEFF_TO_PTR(u);
xn = xmpz->_mp_size;
if (!COEFF_IS_MPZ(v))
{
mp_limb_t t;
negative = (xn < 0) ^ (v < 0);
t = FLINT_ABS(v);
xn = FLINT_ABS(xn);
return _fmpr_mul_large(fmpr_manref(z), fmpr_expref(z),
xmpz->_mp_d, xn, fmpr_expref(x),
&t, 1, fmpr_expref(y), negative, prec, rnd);
}
else
{
ympz = COEFF_TO_PTR(v);
yn = ympz->_mp_size;
negative = (xn ^ yn) < 0;
xn = FLINT_ABS(xn);
yn = FLINT_ABS(yn);
if (xn >= yn)
return _fmpr_mul_large(fmpr_manref(z), fmpr_expref(z),
xmpz->_mp_d, xn, fmpr_expref(x),
ympz->_mp_d, yn, fmpr_expref(y), negative, prec, rnd);
else
return _fmpr_mul_large(fmpr_manref(z), fmpr_expref(z),
ympz->_mp_d, yn, fmpr_expref(y),
xmpz->_mp_d, xn, fmpr_expref(x), negative, prec, rnd);
}
}
}