/*=============================================================================

    This file is part of ARB.

    ARB is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.

    ARB is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with ARB; if not, write to the Free Software
    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA

=============================================================================*/
/******************************************************************************

    Copyright (C) 2012 Fredrik Johansson

******************************************************************************/

#include "fmprb.h"

/*
We speed up the radius operations by working with mantissas aligned to
FMPRB_RAD_PREC bits (possibly one less bit after multiplying).

The error is computed as x*b + y*a + a*b + r where x, b, y, a, a, b, r are
floating-point numbers of the form m * 2^e and m has
exactly FMPRB_RAD_PREC bits, i.e. m <= 2^FMPRB_RAD_PREC - 1.

The mantissas of the products x*b, y*a, a*b are computed as
m3 = floor((m1 * m2) / 2^FMPRB_RAD_PREC) + 1.
One can then verify that m3 <= 2^FMPRB_RAD_PREC - 1.

In the additions, we do not normalise the output mantissa. The result is
largest when exponents are the same: then we have precisely m3 = m1 + m2.

Thus the final mantissa is <= 4 * (2^FMPRB_RAD_PREC - 1).
If FMPRB_RAD_PREC <= FLINT_BITS - 2, this precisely fits in a limb
without overflow.
*/

#define _RAD_MUL(a, ae, b, be) \
    do { \
        mp_limb_t hi, lo; \
        umul_ppmm(hi, lo, a, b); \
        a = ((hi << (FLINT_BITS - FMPRB_RAD_PREC)) | (lo >> FMPRB_RAD_PREC)) + 1; \
        fmpz_add_inline(ae, ae, be); \
    } while (0); \

static __inline__ void
_rad_bound(mp_limb_t * m, fmpz_t exp, const fmpr_t t)
{
    slong e;
    *m = fmpz_abs_ubound_ui_2exp(&e, fmpr_manref(t), FMPRB_RAD_PREC);
    fmpz_add_si_inline(exp, fmpr_expref(t), e);
}

static __inline__ mp_limb_t
_rad_add(mp_limb_t a, fmpz_t aexp, mp_limb_t b, fmpz_t bexp)
{
    slong shift;

    shift = _fmpz_sub_small(aexp, bexp);

    if (shift == 0)
    {
        return a + b;
    }
    else if (shift > 0)
    {
        if (shift <= FMPRB_RAD_PREC)
            return a + (b >> shift) + 1;
        else
            return a + 1;
    }
    else
    {
        fmpz_swap(aexp, bexp);

        if ((-shift) <= FMPRB_RAD_PREC)
            return b + (a >> (-shift)) + 1;
        else
            return b + 1;
    }
}

void _fmprb_mul_main(fmpr_t z, fmpr_t c,
    const fmpr_t x, const fmpr_t a,
    const fmpr_t y, const fmpr_t b, slong prec)
{
    mp_limb_t xm, am, ym, bm;
    fmpz_t xe, ae, ye, be;
    slong r, shift;

    fmpz_init(xe);
    fmpz_init(ae);
    fmpz_init(ye);
    fmpz_init(be);

    _rad_bound(&xm, xe, x);
    _rad_bound(&ym, ye, y);
    _rad_bound(&am, ae, a);
    _rad_bound(&bm, be, b);

    _RAD_MUL(xm, xe, bm, be);
    _RAD_MUL(ym, ye, am, ae);
    xm = _rad_add(xm, xe, ym, ye);

    _RAD_MUL(am, ae, bm, be);
    xm = _rad_add(xm, xe, am, ae);

    r = fmpr_mul(z, x, y, prec, FMPR_RND_DOWN);

    if (r != FMPR_RESULT_EXACT)
    {
        am = UWORD(1) << FMPRB_RAD_PREC;
        fmpz_add_si_inline(ae, fmpr_expref(z), -r - 2*FMPRB_RAD_PREC);
        xm = _rad_add(xm, xe, am, ae);
    }

    shift = FMPRB_RAD_PREC;

    /* make the radius mantissa odd and small */
    xm += !(xm & 1);
    while (xm >= (UWORD(1) << FMPRB_RAD_PREC))
    {
        xm = (xm >> 1) + 1;
        xm += !(xm & 1);
        shift++;
    }

    fmpz_set_ui(fmpr_manref(c), xm);
    fmpz_add_si_inline(fmpr_expref(c), xe, shift);

    fmpz_clear(xe);
    fmpz_clear(ae);
    fmpz_clear(ye);
    fmpz_clear(be);
}

void _fmprb_mul_fmpr_main(fmpr_t z, fmpr_t c,
    const fmpr_t x, const fmpr_t a,
    const fmpr_t y, slong prec)
{
    mp_limb_t ym, am;
    fmpz_t ye, ae;
    slong r, shift;

    fmpz_init(ye);
    fmpz_init(ae);

    _rad_bound(&ym, ye, y);
    _rad_bound(&am, ae, a);
    _RAD_MUL(ym, ye, am, ae);

    r = fmpr_mul(z, x, y, prec, FMPR_RND_DOWN);

    if (r != FMPR_RESULT_EXACT)
    {
        am = UWORD(1) << FMPRB_RAD_PREC;
        fmpz_add_si_inline(ae, fmpr_expref(z), -r - 2*FMPRB_RAD_PREC);
        ym = _rad_add(ym, ye, am, ae);
    }

    shift = FMPRB_RAD_PREC;

    /* make the radius mantissa odd and small */
    ym += !(ym & 1);
    while (ym >= (UWORD(1) << FMPRB_RAD_PREC))
    {
        ym = (ym >> 1) + 1;
        ym += !(ym & 1);
        shift++;
    }

    fmpz_set_ui(fmpr_manref(c), ym);
    fmpz_add_si_inline(fmpr_expref(c), ye, shift);

    fmpz_clear(ye);
    fmpz_clear(ae);
}

void
fmprb_mul_fmpr(fmprb_t z, const fmprb_t x, const fmpr_t y, slong prec)
{
    if (fmprb_is_exact(x))
    {
        slong r;
        r = fmpr_mul(fmprb_midref(z), fmprb_midref(x), y, prec, FMPR_RND_DOWN);
        fmpr_set_error_result(fmprb_radref(z), fmprb_midref(z), r);
    }
    else
    {
        if (fmpr_is_special(fmprb_midref(x)) ||
            fmpr_is_special(fmprb_radref(x)) || fmpr_is_special(y))
        {
            fmprb_mul_fmpr_naive(z, x, y, prec);
        }
        else
        {
            _fmprb_mul_fmpr_main(fmprb_midref(z), fmprb_radref(z),
                fmprb_midref(x), fmprb_radref(x), y, prec);
        }
    }
}

void
fmprb_mul(fmprb_t z, const fmprb_t x, const fmprb_t y, slong prec)
{
    if (fmprb_is_exact(x))
    {
        fmprb_mul_fmpr(z, y, fmprb_midref(x), prec);
    }
    else if (fmprb_is_exact(y))
    {
        fmprb_mul_fmpr(z, x, fmprb_midref(y), prec);
    }
    else
    {
        if (fmpr_is_special(fmprb_midref(x)) || 
                fmpr_is_special(fmprb_radref(x)) ||
                fmpr_is_special(fmprb_midref(y)) ||
                fmpr_is_special(fmprb_radref(y)))
        {
            fmprb_mul_main_naive(z, x, y, prec);
        }
        else
        {
            _fmprb_mul_main(fmprb_midref(z), fmprb_radref(z),
                fmprb_midref(x), fmprb_radref(x),
                fmprb_midref(y), fmprb_radref(y), prec);
        }
    }
}

void
fmprb_mul_ui(fmprb_t z, const fmprb_t x, ulong y, slong prec)
{
    fmpr_t t;
    fmpr_init(t);
    fmpr_set_ui(t, y);
    fmprb_mul_fmpr(z, x, t, prec);
    fmpr_clear(t);
}

void
fmprb_mul_si(fmprb_t z, const fmprb_t x, slong y, slong prec)
{
    fmpr_t t;
    fmpr_init(t);
    fmpr_set_si(t, y);
    fmprb_mul_fmpr(z, x, t, prec);
    fmpr_clear(t);
}

void
fmprb_mul_fmpz(fmprb_t z, const fmprb_t x, const fmpz_t y, slong prec)
{
    fmpr_t t;
    fmpr_init(t);
    fmpr_set_fmpz(t, y);
    fmprb_mul_fmpr(z, x, t, prec);
    fmpr_clear(t);
}