preliminary addition code; some other code adjustments

2025-03-05 09:21:38 -05:00 · 2014-05-02 15:09:11 +02:00 · 2014-05-02 15:09:11 +02:00 · 81055070cf
commit 81055070cf
parent 0c4dcd412f
13 changed files with 1006 additions and 72 deletions
--- a/arf.h
+++ b/arf.h
@ -104,7 +104,7 @@ arf_rnd_to_mpfr(arf_rnd_t rnd)
 #define ARF_IS_LAGOM(x) (ARF_EXP(x) >= ARF_MIN_LAGOM_EXP && \
                         ARF_EXP(x) <= ARF_MAX_LAGOM_EXP)
-/* At least two limbs (needs pointer). */
+/* More than two limbs (needs pointer). */
 #define ARF_HAS_PTR(x) ((x)->size > ((2 << 1) + 1))
 /* Raw size field (encodes both limb size and sign). */
@ -466,8 +466,9 @@ int _arf_set_round_ui(arf_t x, ulong v, int sgnbit, long prec, arf_rnd_t rnd);
 /* Assumes xn > 0, x[0] != 0. */
 /* TBD: 1, 2 limb versions, top-aligned version */
-int arf_set_round_mpn(arf_t y, mp_srcptr x, mp_size_t xn,
+int
-    int sgnbit, const fmpz_t topexp, long prec, arf_rnd_t rnd);
+_arf_set_round_mpn(arf_t y, long * exp_shift, mp_srcptr x, mp_size_t xn,
    int sgnbit, long prec, arf_rnd_t rnd);
 static __inline__ int
 arf_set_round_ui(arf_t x, ulong v, long prec, arf_rnd_t rnd)
@ -484,7 +485,9 @@ arf_set_round_si(arf_t x, long v, long prec, arf_rnd_t rnd)
 static __inline__ int
 arf_set_round_mpz(arf_t y, const mpz_t x, long prec, arf_rnd_t rnd)
 {
    int inexact;
    long size = x->_mp_size;
    long fix;
    if (size == 0)
    {
@ -492,8 +495,11 @@ arf_set_round_mpz(arf_t y, const mpz_t x, long prec, arf_rnd_t rnd)
        return 0;
    }
-    return arf_set_round_mpn(y, x->_mp_d, FLINT_ABS(size),
+    inexact = _arf_set_round_mpn(y, &fix, x->_mp_d, FLINT_ABS(size),
-        (size < 0), NULL, prec, rnd);
+        (size < 0), prec, rnd);
    _fmpz_demote(ARF_EXPREF(y));
    ARF_EXP(y) = FLINT_ABS(size) * FLINT_BITS + fix;
    return inexact;
 }
 static __inline__ int
@ -703,6 +709,29 @@ void arf_randtest_special(arf_t x, flint_rand_t state, long bits, long mag_bits)
 #define ADD2_FAST_MAX (COEFF_MAX / 4)
 #define ADD2_FAST_MIN (-ADD2_FAST_MAX)
 static __inline__ void
 _fmpz_set_fast(fmpz_t f, const fmpz_t g)
 {
    if (!COEFF_IS_MPZ(*f) && !COEFF_IS_MPZ(*g))
        *f = *g;
    else
        fmpz_set(f, g);
 }
 static __inline__ void
 _fmpz_add_fast(fmpz_t z, const fmpz_t x, long c)
 {
    fmpz ze, xe;
    ze = *z;
    xe = *x;
    if (!COEFF_IS_MPZ(ze) && (xe > ADD2_FAST_MIN && xe < ADD2_FAST_MAX))
        *z = xe + c;
    else
        fmpz_add_si(z, x, c);
 }
 static __inline__ void
 _fmpz_add2_fast(fmpz_t z, const fmpz_t x, const fmpz_t y, long c)
 {
@ -799,6 +828,50 @@ int arf_mul_rnd_down(arf_ptr z, arf_srcptr x, arf_srcptr y, long prec);
        ? arf_mul_rnd_down(z, x, y, prec)        \
        : arf_mul_rnd_any(z, x, y, prec, rnd))
 #define ARF_ADD_STACK_ALLOC 40
 #define ARF_ADD_TLS_ALLOC 1000
 extern TLS_PREFIX mp_ptr __arf_add_tmp;
 extern TLS_PREFIX long __arf_add_alloc;
 extern void _arf_add_tmp_cleanup(void);
 #define ARF_ADD_TMP_DECL \
    mp_limb_t tmp_stack[ARF_ADD_STACK_ALLOC]; \
 #define ARF_ADD_TMP_ALLOC(tmp, alloc) \
    if (alloc <= ARF_ADD_STACK_ALLOC) \
    { \
        tmp = tmp_stack; \
    } \
    else if (alloc <= ARF_ADD_TLS_ALLOC) \
    { \
        if (__arf_add_alloc < alloc) \
        { \
            if (__arf_add_alloc == 0) \
            { \
                flint_register_cleanup_function(_arf_add_tmp_cleanup); \
            } \
            __arf_add_tmp = flint_realloc(__arf_add_tmp, sizeof(mp_limb_t) * alloc); \
            __arf_add_alloc = alloc; \
        } \
        tmp = __arf_add_tmp; \
    } \
    else \
    { \
        tmp = flint_malloc(sizeof(mp_limb_t) * alloc); \
    }
 #define ARF_ADD_TMP_FREE(tmp, alloc) \
    if (alloc > ARF_ADD_TLS_ALLOC) \
        flint_free(tmp);
 int _arf_add_mpn(arf_t z, mp_srcptr xp, mp_size_t xn, int xsgnbit,
    const fmpz_t xexp, mp_srcptr yp, mp_size_t yn, int ysgnbit,
    mp_bitcnt_t shift, long prec, arf_rnd_t rnd);
 int arf_add(arf_ptr z, arf_srcptr x, arf_srcptr y, long prec, arf_rnd_t rnd);
 #ifdef __cplusplus
 }
 #endif
--- a/arf/add.c
+++ b/arf/add.c
@ -0,0 +1,91 @@
 /*=============================================================================
    This file is part of ARB.
    ARB is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.
    ARB is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
    GNU General Public License for more details.
    You should have received a copy of the GNU General Public License
    along with ARB; if not, write to the Free Software
    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
 =============================================================================*/
 /******************************************************************************
    Copyright (C) 2014 Fredrik Johansson
 ******************************************************************************/
 #include "arf.h"
 int
 arf_add_special(arf_t z, const arf_t x, const arf_t y, long prec, arf_rnd_t rnd)
 {
    if (arf_is_zero(x))
    {
        if (arf_is_zero(y))
        {
            arf_zero(z);
            return 0;
        }
        else
            return arf_set_round(z, y, prec, rnd);
    }
    else if (arf_is_zero(y))
    {
        return arf_set_round(z, x, prec, rnd);
    }
    else if (arf_is_nan(x) || arf_is_nan(y)
        || (arf_is_pos_inf(x) && arf_is_neg_inf(y))
        || (arf_is_neg_inf(x) && arf_is_pos_inf(y)))
    {
        arf_nan(z);
        return 0;
    }
    else if (arf_is_special(x))
    {
        arf_set(z, x);
        return 0;
    }
    else
    {
        arf_set(z, y);
        return 0;
    }
 }
 int
 arf_add(arf_ptr z, arf_srcptr x, arf_srcptr y, long prec, arf_rnd_t rnd)
 {
    mp_size_t xn, yn;
    mp_srcptr xptr, yptr;
    long shift;
    if (arf_is_special(x) || arf_is_special(y))
    {
        return arf_add_special(z, x, y, prec, rnd);
    }
    shift = _fmpz_sub_small(ARF_EXPREF(x), ARF_EXPREF(y));
    if (shift < 0)
    {
        arf_srcptr __t;
        __t = x; x = y; y = __t;
        shift = -shift;
    }
    ARF_GET_MPN_READONLY(xptr, xn, x);
    ARF_GET_MPN_READONLY(yptr, yn, y);
    return _arf_add_mpn(z, xptr, xn, ARF_SGNBIT(x), ARF_EXPREF(x),
                           yptr, yn, ARF_SGNBIT(y), shift, prec, rnd);
 }
--- a/arf/add_mpn.c
+++ b/arf/add_mpn.c
@ -0,0 +1,248 @@
 /*=============================================================================
    This file is part of ARB.
    ARB is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.
    ARB is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
    GNU General Public License for more details.
    You should have received a copy of the GNU General Public License
    along with ARB; if not, write to the Free Software
    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
 =============================================================================*/
 /******************************************************************************
    Copyright (C) 2014 Fredrik Johansson
 ******************************************************************************/
 #include "arf.h"
 TLS_PREFIX mp_ptr __arf_add_tmp = NULL;
 TLS_PREFIX long __arf_add_alloc = 0;
 void _arf_add_tmp_cleanup(void)
 {
    flint_free(__arf_add_tmp);
    __arf_add_tmp = NULL;
    __arf_add_alloc = 0;
 }
 /* Assumptions: top limbs of x and y nonzero. */
 int
 _arf_add_mpn(arf_t z, mp_srcptr xp, mp_size_t xn, int xsgnbit, const fmpz_t xexp,
                      mp_srcptr yp, mp_size_t yn, int ysgnbit, mp_bitcnt_t shift,
                      long prec, arf_rnd_t rnd)
 {
    mp_size_t wn, zn, zn_original, alloc, xbase, wbase;
    mp_size_t shift_limbs;
    mp_bitcnt_t shift_bits;
    int inexact;
    long fix;
    mp_limb_t cy;
    mp_ptr tmp;
    ARF_ADD_TMP_DECL
    /* x +/- eps */
    if (shift > prec + 1 &&
        prec != ARF_PREC_EXACT &&
        shift > (xn + 1) * FLINT_BITS)
    {
        zn = (prec + FLINT_BITS - 1) / FLINT_BITS;
        zn_original = zn = FLINT_MAX(zn, xn) + 2;
        shift_limbs = zn - xn;
        alloc = zn + 1;
        ARF_ADD_TMP_ALLOC(tmp, alloc)
        flint_mpn_zero(tmp, shift_limbs);
        flint_mpn_copyi(tmp + shift_limbs, xp, xn);
        if (xsgnbit == ysgnbit)
        {
            tmp[0] = 1;
        }
        else
        {
            mpn_sub_1(tmp, tmp, zn, 1);
            while (tmp[zn-1] == 0)
                zn--;
        }
        _arf_set_round_mpn(z, &fix, tmp, zn, xsgnbit, prec, rnd);
        fix += (zn - zn_original) * FLINT_BITS;
        _fmpz_add_fast(ARF_EXPREF(z), xexp, fix);
        ARF_ADD_TMP_FREE(tmp, alloc)
        return 1;
    }
    shift_limbs = shift / FLINT_BITS;
    shift_bits = shift % FLINT_BITS;
    /* TODO: shift == 0 optimization! (common case) */
    /* w = y shifted */
    wn = yn + (shift_bits != 0);
    /* Size of sum (possibly one more limb than this for carry). */
    zn_original = zn = FLINT_MAX(xn, wn + shift_limbs);
    alloc = zn + 1;
    ARF_ADD_TMP_ALLOC(tmp, alloc)
    /* Low limbs of both operands */
    xbase = zn - xn;
    wbase = zn - shift_limbs - wn;
    /* Shift y to its correct offset in the sum. */
    if (shift_bits == 0)
        flint_mpn_copyi(tmp + wbase, yp, yn);
    else
        tmp[wbase] = cy = mpn_rshift(tmp + wbase + 1, yp, yn, shift_bits);
    if (xsgnbit == ysgnbit)
    {
        if (shift_limbs >= xn)
        {
            /*   XXXXXX
                         WWWWWWW    */
            flint_mpn_zero(tmp + wn, zn - xn - wn);
            flint_mpn_copyi(tmp + zn - xn, xp, xn);
            cy = 0;
        }
        else if (shift_limbs == 0)
        {
            /*  XXXXXX        or    XXXXX
                WWWW                WWWWWWW     */
            if (wn < xn)
            {
                flint_mpn_copyi(tmp, xp, xn - wn);
                cy = mpn_add_n(tmp + wbase, xp + wbase, tmp + wbase, wn);
            }
            else
            {
                cy = mpn_add_n(tmp + xbase, xp, tmp + xbase, xn);
            }
        }
        else if (xbase >= wbase)
        {
            /* XXXXXX         or  XXXXXX
                 WWWWWWW            WWWW    */
            cy = mpn_add_n(tmp + xbase, tmp + xbase, xp, wn - xbase);
            cy = mpn_add_1(tmp + wn, xp + wn - xbase, zn - wn, cy);
        }
        else
        {
            /* XXXXXXXX           todo: merge with above?
                  WWW    */
            flint_mpn_copyi(tmp, xp, wbase);
            cy = mpn_add_n(tmp + wbase, tmp + wbase, xp + wbase, wn);
            cy = mpn_add_1(tmp + zn - shift_limbs, xp + xn - shift_limbs, shift_limbs, cy);
        }
        /* There could be a carry. */
        tmp[zn] = cy;
        zn += cy;
    }
    else
    {
        if (shift_limbs >= xn)
        {
            /*   XXXXXX
                         WWWWWWW    */
            mpn_neg(tmp, tmp, wn);
            flint_mpn_store(tmp + wn, zn - xn - wn, -LIMB_ONE);
            mpn_sub_1(tmp + zn - xn, xp, xn, 1);
        }
        else if (shift_limbs == 0)
        {
            /*  XXXXXX (1)    or    XXXXX   (2)
                WWWW                WWWWWWW     */
            if (wn <= xn) /* (1) */
            {
                if (mpn_cmp(xp + wbase, tmp + wbase, wn) >= 0)
                {
                    flint_mpn_copyi(tmp, xp, wbase);
                    mpn_sub_n(tmp + wbase, xp + wbase, tmp + wbase, wn);
                }
                else
                {
                    xsgnbit = ysgnbit;
                    mpn_sub_n(tmp + wbase, tmp + wbase, xp + wbase, wn);
                    if (wbase != 0)
                    {
                        cy = mpn_neg_n(tmp, xp, wbase);
                        mpn_sub_1(tmp + wbase, tmp + wbase, wn, cy);
                    }
                }
            }
            else /* (2) */
            {
                if (mpn_cmp(tmp + xbase, xp, xn) >= 0)
                {
                    xsgnbit = ysgnbit;
                    mpn_sub_n(tmp + xbase, tmp + xbase, xp, xn);
                }
                else
                {
                    cy = mpn_neg(tmp, tmp, xbase);
                    mpn_sub_n(tmp + xbase, xp, tmp + xbase, xn);
                    mpn_sub_1(tmp + xbase, tmp + xbase, xn, cy);
                }
            }
        }
        else if (xbase >= wbase)
        {
            /* XXXXXX         or  XXXXXX
                 WWWWWWW            WWWW    */
            if (xbase > 0)
            {
                cy = mpn_sub_n(tmp + xbase, xp, tmp + xbase, wn - xbase);
                cy = mpn_sub_1(tmp + wn, xp + xn - shift_limbs, shift_limbs, cy);
                cy = mpn_neg(tmp, tmp, xbase);
                mpn_sub_1(tmp + xbase, tmp + xbase, xn, cy);
            }
            else
            {
                cy = mpn_sub_n(tmp, xp, tmp, wn);
                cy = mpn_sub_1(tmp + wn, xp + wn, xn - wn, cy);
            }
        }
        else
        {
            /* XXXXXXXX
                  WWW    */
            flint_mpn_copyi(tmp, xp, wbase);
            cy = mpn_sub_n(tmp + wbase, xp + wbase, tmp + wbase, wn);
            mpn_sub_1(tmp + zn - shift_limbs, xp + xn - shift_limbs, shift_limbs, cy);
        }
        /* There could be cancellation. */
        while (zn > 0 && tmp[zn - 1] == 0)
            zn--;
    }
    if (zn == 0)
    {
        arf_zero(z);
        inexact = 0;
    }
    else
    {
        inexact = _arf_set_round_mpn(z, &fix, tmp, zn, xsgnbit, prec, rnd);
        fix += (zn - zn_original) * FLINT_BITS;
        _fmpz_add_fast(ARF_EXPREF(z), xexp, fix);
    }
    ARF_ADD_TMP_FREE(tmp, alloc)
    return inexact;
 }
--- a/arf/mul_rnd_any.c
+++ b/arf/mul_rnd_any.c
@ -40,6 +40,7 @@ arf_mul_rnd_any(arf_ptr z, arf_srcptr x, arf_srcptr y,
        long prec, arf_rnd_t rnd)
 {
    mp_size_t xn, yn;
    long fix;
    int sgnbit, inexact;
    xn = ARF_XSIZE(x);
@ -90,8 +91,8 @@ arf_mul_rnd_any(arf_ptr z, arf_srcptr x, arf_srcptr y,
            mpn_mul(tmp, xptr, xn, yptr, yn);
        }
-        _fmpz_add2_fast(ARF_EXPREF(z), ARF_EXPREF(x), ARF_EXPREF(y), 0);
+        inexact = _arf_set_round_mpn(z, &fix, tmp, zn, sgnbit, prec, rnd);
-        inexact = arf_set_round_mpn(z, tmp, zn, sgnbit, ARF_EXPREF(z), prec, rnd);
+        _fmpz_add2_fast(ARF_EXPREF(z), ARF_EXPREF(x), ARF_EXPREF(y), fix);
        ARF_MUL_TMP_FREE(tmp, alloc)
        return inexact;
--- a/arf/mul_rnd_down.c
+++ b/arf/mul_rnd_down.c
@ -30,6 +30,7 @@ arf_mul_rnd_down(arf_ptr z, arf_srcptr x, arf_srcptr y, long prec)
 {
    mp_size_t xn, yn, zn;
    mp_limb_t hi, lo;
    long expfix;
    int sgnbit, ret, fix;
    mp_ptr zptr;
@ -184,8 +185,8 @@ arf_mul_rnd_down(arf_ptr z, arf_srcptr x, arf_srcptr y, long prec)
        }
        zn = xn + yn;
-        _fmpz_add2_fast(ARF_EXPREF(z), ARF_EXPREF(x), ARF_EXPREF(y), 0);
+        ret = _arf_set_round_mpn(z, &expfix, zz, zn, sgnbit, prec, ARF_RND_DOWN);
-        ret = arf_set_round_mpn(z, zz, zn, sgnbit, ARF_EXPREF(z), prec, ARF_RND_DOWN);
+        _fmpz_add2_fast(ARF_EXPREF(z), ARF_EXPREF(x), ARF_EXPREF(y), expfix);
        return ret;
    }
    else if (yn > MUL_MPFR_MIN_LIMBS && prec != ARF_PREC_EXACT
@ -223,8 +224,8 @@ arf_mul_rnd_down(arf_ptr z, arf_srcptr x, arf_srcptr y, long prec)
            mpn_mul(tmp, xptr, xn, yptr, yn);
        }
-        _fmpz_add2_fast(ARF_EXPREF(z), ARF_EXPREF(x), ARF_EXPREF(y), 0);
+        ret = _arf_set_round_mpn(z, &expfix, tmp, zn, sgnbit, prec, ARF_RND_DOWN);
-        ret = arf_set_round_mpn(z, tmp, zn, sgnbit, ARF_EXPREF(z), prec, ARF_RND_DOWN);
+        _fmpz_add2_fast(ARF_EXPREF(z), ARF_EXPREF(x), ARF_EXPREF(y), expfix);
        ARF_MUL_TMP_FREE(tmp, alloc)
        return ret;
--- a/arf/neg_round.c
+++ b/arf/neg_round.c
@ -50,10 +50,14 @@ arf_neg_round(arf_t y, const arf_t x, long prec, arf_rnd_t rnd)
        {
            mp_srcptr xptr;
            mp_size_t xn;
            int inexact;
            long fix;
            ARF_GET_MPN_READONLY(xptr, xn, x);
-            return arf_set_round_mpn(y, xptr, xn, ARF_SGNBIT(x) ^ 1,
+            inexact = _arf_set_round_mpn(y, &fix, xptr, xn,
-                ARF_EXPREF(x), prec, rnd);
+                ARF_SGNBIT(x) ^ 1, prec, rnd);
            _fmpz_add_fast(ARF_EXPREF(y), ARF_EXPREF(x), fix);
            return inexact;
        }
    }
 }
--- a/arf/set_round.c
+++ b/arf/set_round.c
@ -50,10 +50,14 @@ arf_set_round(arf_t y, const arf_t x, long prec, arf_rnd_t rnd)
        {
            mp_srcptr xptr;
            mp_size_t xn;
            long fix;
            int inexact;
            ARF_GET_MPN_READONLY(xptr, xn, x);
-            return arf_set_round_mpn(y, xptr, xn, ARF_SGNBIT(x),
+            inexact = _arf_set_round_mpn(y, &fix, xptr, xn,
-                ARF_EXPREF(x), prec, rnd);
+                ARF_SGNBIT(x), prec, rnd);
            _fmpz_add_fast(ARF_EXPREF(y), ARF_EXPREF(x), fix);
            return inexact;
        }
    }
 }
--- a/arf/set_round_mpn.c
+++ b/arf/set_round_mpn.c
@ -36,8 +36,8 @@ _arf_set_mpn_roundup_power_two(arf_t y, int sgnbit)
 }
 int
-arf_set_round_mpn(arf_t y, mp_srcptr x, mp_size_t xn,
+_arf_set_round_mpn(arf_t y, long * exp_shift, mp_srcptr x, mp_size_t xn,
-    int sgnbit, const fmpz_t topexp, long prec, arf_rnd_t rnd)
+    int sgnbit, long prec, arf_rnd_t rnd)
 {
    unsigned int leading;
    mp_bitcnt_t exp, bc, val, val_bits;
@ -51,10 +51,7 @@ arf_set_round_mpn(arf_t y, mp_srcptr x, mp_size_t xn,
    exp = xn * FLINT_BITS - leading;
    /* Set exponent. */
-    if (topexp == NULL)
+    *exp_shift = -(long) leading;
        fmpz_set_ui(ARF_EXPREF(y), exp);
    else
        fmpz_sub_si_inline(ARF_EXPREF(y), topexp, leading);
    /* Find first nonzero bit. */
    val_limbs = 0;
@ -112,7 +109,10 @@ arf_set_round_mpn(arf_t y, mp_srcptr x, mp_size_t xn,
            /* Overflow to next power of two (unlikely). */
            if (val == exp)
            {
-                _arf_set_mpn_roundup_power_two(y, sgnbit);
+                exp_shift[0]++;
                ARF_DEMOTE(y);
                ARF_NOPTR_D(y)[0] = LIMB_TOP;
                ARF_XSIZE(y) = ARF_MAKE_XSIZE(1, sgnbit);
                return 1;
            }
        }
--- a/arf/set_round_ui.c
+++ b/arf/set_round_ui.c
@ -25,6 +25,35 @@
 #include "arf.h"
 /* Top-aligns the nonzero limb v and rounds it to prec bit. */
 #define ARF_NORMALISE_ROUND_LIMB(inexact, exp, v,               \
                                    sgnbit, prec, rnd)          \
    do {                                                        \
        count_leading_zeros(exp, v);                            \
        v <<= exp;                                              \
        exp = FLINT_BITS - exp;                                 \
        if (prec >= exp)                                        \
        {                                                       \
            inexact = 0;                                        \
        }                                                       \
        else                                                    \
        {                                                       \
            mp_limb_t __t = v;                                  \
            v = MASK_LIMB(v, FLINT_BITS - prec);                \
            inexact = (__t != v);                               \
            if (inexact && arf_rounds_up(rnd, sgnbit))          \
            {                                                   \
                v += (LIMB_ONE << (FLINT_BITS - prec));         \
                if (v == 0)                                     \
                {                                               \
                    v = LIMB_TOP;                               \
                    exp++;                                      \
                }                                               \
            }                                                   \
        }                                                       \
    }                                                           \
    while (0)
 int
 _arf_set_round_ui(arf_t x, ulong v, int sgnbit, long prec, arf_rnd_t rnd)
 {
@ -35,40 +64,12 @@ _arf_set_round_ui(arf_t x, ulong v, int sgnbit, long prec, arf_rnd_t rnd)
    }
    else
    {
-        unsigned int bc, mask_bits;
+        int exp, inexact;
        int inexact;
        mp_limb_t t;
-        count_leading_zeros(bc, v);
+        ARF_NORMALISE_ROUND_LIMB(inexact, exp, v, sgnbit, prec, rnd);
        v <<= bc;
        bc = FLINT_BITS - bc;
-        if (prec >= bc)
+        _fmpz_demote(ARF_EXPREF(x));
-        {
+        ARF_EXP(x) = exp;
            inexact = 0;
        }
        else
        {
            mask_bits = FLINT_BITS - prec;
            t = v;
            v = (v >> mask_bits) << mask_bits;
            inexact = (t != v);
            if (inexact && arf_rounds_up(rnd, sgnbit))
            {
                v += (LIMB_ONE << (FLINT_BITS - prec));
                /* Overflow to the next power of two (unlikely). */
                if (v == 0)
                {
                    v = LIMB_TOP;
                    bc++;
                }
            }
        }
        fmpz_set_ui(ARF_EXPREF(x), bc);
        ARF_DEMOTE(x);
        ARF_XSIZE(x) = ARF_MAKE_XSIZE(1, sgnbit);
        ARF_NOPTR_D(x)[0] = v;
--- a/arf/test/t-add.c
+++ b/arf/test/t-add.c
@ -0,0 +1,182 @@
 /*=============================================================================
    This file is part of ARB.
    ARB is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.
    ARB is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
    GNU General Public License for more details.
    You should have received a copy of the GNU General Public License
    along with ARB; if not, write to the Free Software
    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
 =============================================================================*/
 /******************************************************************************
    Copyright (C) 2012 Fredrik Johansson
 ******************************************************************************/
 #include "arf.h"
 int
 arf_add_naive(arf_t z, const arf_t x, const arf_t y, long prec, arf_rnd_t rnd)
 {
    fmpr_t a, b;
    long r;
    fmpr_init(a);
    fmpr_init(b);
    arf_get_fmpr(a, x);
    arf_get_fmpr(b, y);
    r = fmpr_add(a, a, b, prec, rnd);
    arf_set_fmpr(z, a);
    fmpr_clear(a);
    fmpr_clear(b);
    return (r == FMPR_RESULT_EXACT) ? 0 : 1;
 }
 int main()
 {
    long iter, iter2;
    flint_rand_t state;
    printf("add....");
    fflush(stdout);
    flint_randinit(state);
    for (iter = 0; iter < 10000; iter++)
    {
        arf_t x, y, z, v;
        long prec, r1, r2;
        arf_rnd_t rnd;
        arf_init(x);
        arf_init(y);
        arf_init(z);
        arf_init(v);
        for (iter2 = 0; iter2 < 100; iter2++)
        {
            arf_randtest_special(x, state, 2000, 100);
            arf_randtest_special(y, state, 2000, 100);
            prec = 2 + n_randint(state, 2000);
            if (n_randint(state, 10) == 0 &&
                fmpz_bits(ARF_EXPREF(x)) < 10 &&
                fmpz_bits(ARF_EXPREF(y)) < 10)
            {
                prec = ARF_PREC_EXACT;
            }
            switch (n_randint(state, 4))
            {
                case 0:  rnd = ARF_RND_DOWN; break;
                case 1:  rnd = ARF_RND_UP; break;
                case 2:  rnd = ARF_RND_FLOOR; break;
                default: rnd = ARF_RND_CEIL; break;
            }
            switch (n_randint(state, 5))
            {
            case 0:
                r1 = arf_add(z, x, y, prec, rnd);
                r2 = arf_add_naive(v, x, y, prec, rnd);
                if (!arf_equal(z, v) || r1 != r2)
                {
                    printf("FAIL!\n");
                    printf("prec = %ld, rnd = %d\n\n", prec, rnd);
                    printf("x = "); arf_print(x); printf("\n\n");
                    printf("y = "); arf_print(y); printf("\n\n");
                    printf("z = "); arf_print(z); printf("\n\n");
                    printf("v = "); arf_print(v); printf("\n\n");
                    printf("r1 = %ld, r2 = %ld\n", r1, r2);
                    abort();
                }
                break;
            case 1:
                r1 = arf_add(z, x, x, prec, rnd);
                r2 = arf_add_naive(v, x, x, prec, rnd);
                if (!arf_equal(z, v) || r1 != r2)
                {
                    printf("FAIL (aliasing 1)!\n");
                    printf("prec = %ld, rnd = %d\n\n", prec, rnd);
                    printf("x = "); arf_print(x); printf("\n\n");
                    printf("z = "); arf_print(z); printf("\n\n");
                    printf("v = "); arf_print(v); printf("\n\n");
                    printf("r1 = %ld, r2 = %ld\n", r1, r2);
                    abort();
                }
                break;
            case 2:
                r2 = arf_add_naive(v, x, x, prec, rnd);
                r1 = arf_add(x, x, x, prec, rnd);
                if (!arf_equal(v, x) || r1 != r2)
                {
                    printf("FAIL (aliasing 2)!\n");
                    printf("prec = %ld, rnd = %d\n\n", prec, rnd);
                    printf("x = "); arf_print(x); printf("\n\n");
                    printf("z = "); arf_print(z); printf("\n\n");
                    printf("v = "); arf_print(v); printf("\n\n");
                    printf("r1 = %ld, r2 = %ld\n", r1, r2);
                    abort();
                }
                break;
            case 3:
                r2 = arf_add_naive(v, x, y, prec, rnd);
                r1 = arf_add(x, x, y, prec, rnd);
                if (!arf_equal(x, v) || r1 != r2)
                {
                    printf("FAIL (aliasing 3)!\n");
                    printf("prec = %ld, rnd = %d\n\n", prec, rnd);
                    printf("x = "); arf_print(x); printf("\n\n");
                    printf("y = "); arf_print(y); printf("\n\n");
                    printf("v = "); arf_print(v); printf("\n\n");
                    printf("r1 = %ld, r2 = %ld\n", r1, r2);
                    abort();
                }
                break;
            default:
                r2 = arf_add_naive(v, x, y, prec, rnd);
                r1 = arf_add(x, y, x, prec, rnd);
                if (!arf_equal(x, v) || r1 != r2)
                {
                    printf("FAIL (aliasing 4)!\n");
                    printf("prec = %ld, rnd = %d\n\n", prec, rnd);
                    printf("x = "); arf_print(x); printf("\n\n");
                    printf("y = "); arf_print(y); printf("\n\n");
                    printf("v = "); arf_print(v); printf("\n\n");
                    printf("r1 = %ld, r2 = %ld\n", r1, r2);
                    abort();
                }
                break;
            }
        }
        arf_clear(x);
        arf_clear(y);
        arf_clear(z);
        arf_clear(v);
    }
    flint_randclear(state);
    flint_cleanup();
    printf("PASS\n");
    return EXIT_SUCCESS;
 }
--- a/fmpz_extras.h
+++ b/fmpz_extras.h
@ -200,25 +200,10 @@ static __inline__ void fmpz_sub_mul2exp(fmpz_t z, const fmpz_t x, const fmpz_t y
    fmpz_clear(t);
 }
-static long _fmpz_sub_small_large(const fmpz_t x, const fmpz_t y)
+long _fmpz_sub_small_large(const fmpz_t x, const fmpz_t y);
 {
    fmpz_t t;
    fmpz_init(t);
    fmpz_sub(t, x, y);
    if (!COEFF_IS_MPZ(*t))
    {
        /* no need to free t */
        return *t;
    }
    else
    {
        int sign = fmpz_sgn(t);
        fmpz_clear(t);
        return (sign > 0) ? LONG_MAX : -LONG_MAX;
    }
 }
-static __inline__ long _fmpz_sub_small(const fmpz_t x, const fmpz_t y)
+static __inline__ long
 _fmpz_sub_small(const fmpz_t x, const fmpz_t y)
 {
    if (!COEFF_IS_MPZ(*x) && !COEFF_IS_MPZ(*y))
    {
--- a/fmpz_extras.h.save
+++ b/fmpz_extras.h.save
@ -0,0 +1,299 @@
 /*=============================================================================
    This file is part of ARB.
    ARB is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.
    ARB is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
    GNU General Public License for more details.
    You should have received a copy of the GNU General Public License
    along with ARB; if not, write to the Free Software
    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
 =============================================================================*/
 /******************************************************************************
    Copyright (C) 2013 Fredrik Johansson
 ******************************************************************************/
 #ifndef FMPZ_EXTRAS_H
 #define FMPZ_EXTRAS_H
 #include <limits.h>
 #include "flint.h"
 #include "fmpz.h"
 #ifdef __cplusplus
 extern "C" {
 #endif
 static __inline__ void
 fmpz_add_inline(fmpz_t z, const fmpz_t x, const fmpz_t y)
 {
    fmpz f, g;
    f = *x;
    g = *y;
    if (!COEFF_IS_MPZ(f) && !COEFF_IS_MPZ(g))
        fmpz_set_si(z, f + g);
    else
        fmpz_add(z, x, y);
 }
 static __inline__ void
 fmpz_add_si(fmpz_t z, const fmpz_t x, long y)
 {
    if (y >= 0)
        fmpz_add_ui(z, x, y);
    else
        fmpz_sub_ui(z, x, -y);
 }
 static __inline__ void
 fmpz_sub_si(fmpz_t z, const fmpz_t x, long y)
 {
    if (y >= 0)
        fmpz_sub_ui(z, x, y);
    else
        fmpz_add_ui(z, x, -y);
 }
 static __inline__ void
 fmpz_add_si_inline(fmpz_t z, const fmpz_t x, long y)
 {
    fmpz f;
    f = *x;
    if (!COEFF_IS_MPZ(f) && (COEFF_MIN <= y && y <= COEFF_MAX))
        fmpz_set_si(z, f + y);
    else
        fmpz_add_si(z, x, y);
 }
 static __inline__ void
 fmpz_sub_si_inline(fmpz_t z, const fmpz_t x, long y)
 {
    fmpz f;
    f = *x;
    if (!COEFF_IS_MPZ(f) && (COEFF_MIN <= y && y <= COEFF_MAX))
        fmpz_set_si(z, f - y);
    else
        fmpz_sub_si(z, x, y);
 }
 static __inline__ void
 fmpz_add_ui_inline(fmpz_t z, const fmpz_t x, ulong y)
 {
    fmpz f = *x;
    if (!COEFF_IS_MPZ(f) && y <= COEFF_MAX)
        fmpz_set_si(z, f + y);
    else
        fmpz_add_ui(z, x, y);
 #if}
 static __inline__ void
 fmpz_add2_fmpz_si_inline(fmpz_t z, const fmpz_t x, const fmpz_t y, long c)
 {
    fmpz f, g, h;
    f = *x;
    g = *y;
    if (!COEFF_IS_MPZ(f) && !COEFF_IS_MPZ(g))
    {
        h = f + g;
        if ((COEFF_MIN <= h && h <= COEFF_MAX) &&
            (COEFF_MIN <= c && c <= COEFF_MAX))
        {
            fmpz_set_si(z, h + c);
            return;
        }
    }
    fmpz_add(z, x, y);
    fmpz_add_si(z, z, c);
 }
 /* sets z = +/- ({src, n} >> shift) where 0 <= shift < FLINT_BITS
   and the top limb of src is nonzero */
 /* TODO: optimize for result = 1 limb */
 static __inline__ void
 fmpz_set_mpn_rshift(fmpz_t z, mp_srcptr src, mp_size_t n, unsigned int shift, int negative)
 {
    __mpz_struct * zptr;
    zptr = _fmpz_promote(z);
    if (zptr->_mp_alloc < n)
        mpz_realloc2(zptr, n * FLINT_BITS);
    if (shift == 0)
    {
        flint_mpn_copyi(zptr->_mp_d, src, n);
    }
    else
    {
        mpn_rshift(zptr->_mp_d, src, n, shift);
        while (zptr->_mp_d[n - 1] == 0) /* todo: can only do one iter? */
            n--;
    }
    zptr->_mp_size = negative ? -n : n;
    _fmpz_demote_val(z);
 }
 /* sets z = +/- {src, n} where n >= 2 and the top limb of src is nonzero */
 static __inline__ void
 fmpz_set_mpn_large(fmpz_t z, mp_srcptr src, mp_size_t n, int negative)
 {
    __mpz_struct * zz;
    zz = _fmpz_promote(z);
    if (zz->_mp_alloc < n)
        mpz_realloc2(zz, n * FLINT_BITS);
    flint_mpn_copyi(zz->_mp_d, src, n);
    zz->_mp_size = negative ? -n : n;
 }
 /* round away from zero */
 static __inline__ void
 fmpz_adiv_q_2exp(fmpz_t z, const fmpz_t x, mp_bitcnt_t exp)
 {
    int sign = fmpz_sgn(x);
    if (sign > 0)
        fmpz_cdiv_q_2exp(z, x, exp);
    else
        fmpz_fdiv_q_2exp(z, x, exp);
 }
 /* sets z = x + y*2^shift */
 static __inline__ void fmpz_add_mul2exp(fmpz_t z, const fmpz_t x, const fmpz_t y, ulong shift)
 {
    fmpz_t t;
    fmpz_init(t);
    fmpz_mul_2exp(t, y, shift);
    fmpz_add(z, x, t);
    fmpz_clear(t);
 }
 static __inline__ void fmpz_sub_mul2exp(fmpz_t z, const fmpz_t x, const fmpz_t y, ulong shift)
 {
    fmpz_t t;
    fmpz_init(t);
    fmpz_mul_2exp(t, y, shift);
    fmpz_sub(z, x, t);
    fmpz_clear(t);
 }
 static long _fmpz_sub_small_large(const fmpz_t x, const fmpz_t y)
 {
    fmpz_t t;
    fmpz_init(t);
    fmpz_sub(t, x, y);
    if (!COEFF_IS_MPZ(*t))
    {
        /* no need to free t */
        return *t;
    }
    else
    {
        int sign = fmpz_sgn(t);
        fmpz_clear(t);
        return (sign > 0) ? LONG_MAX : -LONG_MAX;
    }
 }
 static __inline__ long _fmpz_sub_small(const fmpz_t x, const fmpz_t y)
 {
    if (!COEFF_IS_MPZ(*x) && !COEFF_IS_MPZ(*y))
    {
        return (*x) - (*y);
    }
    else
    {
        return _fmpz_sub_small_large(x, y);
    }
 }
 static __inline__ mp_size_t
 _fmpz_size(const fmpz_t f)
 {
    fmpz d = *f;
    if (!COEFF_IS_MPZ(d))
        return 1;
    else
        return mpz_size(COEFF_TO_PTR(d));
 }
 static __inline__ void
 fmpz_ui_pow_ui(fmpz_t x, ulong b, ulong e)
 {
    if (e <= 1)
    {
        fmpz_set_ui(x, e == 0 ? 1UL : b);
    }
    else
    {
        fmpz_set_ui(x, b);
        fmpz_pow_ui(x, x, e);
    }
 }
 static __inline__ void
 fmpz_max(fmpz_t z, const fmpz_t x, const fmpz_t y)
 {
    if (fmpz_cmp(x, y) >= 0)
        fmpz_set(z, x);
    else
        fmpz_set(z, y);
 }
 static __inline__ void
 fmpz_min(fmpz_t z, const fmpz_t x, const fmpz_t y)
 {
    if (fmpz_cmp(x, y) < 0)
        fmpz_set(z, x);
    else
        fmpz_set(z, y);
 }
 #define FMPZ_GET_MPN_READONLY(zsign, zn, zptr, ztmp, zv) \
    if (!COEFF_IS_MPZ(zv)) \
    { \
        (zsign) = (zv) < 0; \
        (ztmp) = FLINT_ABS(zv); \
        (zptr) = &(ztmp); \
        (zn) = 1; \
    } \
    else \
    { \
        __mpz_struct * ___zz = COEFF_TO_PTR(zv); \
        (zptr) = ___zz->_mp_d; \
        (zn) = ___zz->_mp_size; \
        (zsign) = (zn) < 0; \
        (zn) = FLINT_ABS(zn); \
    }
 #ifdef __cplusplus
 }
 #endif
 #endif
--- a/fmpz_extras/sub_small_large.c
+++ b/fmpz_extras/sub_small_large.c
@ -0,0 +1,45 @@
 /*=============================================================================
    This file is part of ARB.
    ARB is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.
    ARB is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
    GNU General Public License for more details.
    You should have received a copy of the GNU General Public License
    along with ARB; if not, write to the Free Software
    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
 =============================================================================*/
 /******************************************************************************
    Copyright (C) 2014 Fredrik Johansson
 ******************************************************************************/
 #include "fmpz_extras.h"
 long
 _fmpz_sub_small_large(const fmpz_t x, const fmpz_t y)
 {
    fmpz_t t;
    fmpz_init(t);
    fmpz_sub(t, x, y);
    if (!COEFF_IS_MPZ(*t))
    {
        /* no need to free t */
        return *t;
    }
    else
    {
        int sign = fmpz_sgn(t);
        fmpz_clear(t);
        return (sign > 0) ? LONG_MAX : -LONG_MAX;
    }
 }