diff --git a/arf.h b/arf.h
index f3ff8cb3..bbba6050 100644
--- a/arf.h
+++ b/arf.h
@@ -104,7 +104,7 @@ arf_rnd_to_mpfr(arf_rnd_t rnd)
 #define ARF_IS_LAGOM(x) (ARF_EXP(x) >= ARF_MIN_LAGOM_EXP && \
                          ARF_EXP(x) <= ARF_MAX_LAGOM_EXP)
 
-/* At least two limbs (needs pointer). */
+/* More than two limbs (needs pointer). */
 #define ARF_HAS_PTR(x) ((x)->size > ((2 << 1) + 1))
 
 /* Raw size field (encodes both limb size and sign). */
@@ -466,8 +466,9 @@ int _arf_set_round_ui(arf_t x, ulong v, int sgnbit, long prec, arf_rnd_t rnd);
 
 /* Assumes xn > 0, x[0] != 0. */
 /* TBD: 1, 2 limb versions, top-aligned version */
-int arf_set_round_mpn(arf_t y, mp_srcptr x, mp_size_t xn,
-    int sgnbit, const fmpz_t topexp, long prec, arf_rnd_t rnd);
+int
+_arf_set_round_mpn(arf_t y, long * exp_shift, mp_srcptr x, mp_size_t xn,
+    int sgnbit, long prec, arf_rnd_t rnd);
 
 static __inline__ int
 arf_set_round_ui(arf_t x, ulong v, long prec, arf_rnd_t rnd)
@@ -484,7 +485,9 @@ arf_set_round_si(arf_t x, long v, long prec, arf_rnd_t rnd)
 static __inline__ int
 arf_set_round_mpz(arf_t y, const mpz_t x, long prec, arf_rnd_t rnd)
 {
+    int inexact;
     long size = x->_mp_size;
+    long fix;
 
     if (size == 0)
     {
@@ -492,8 +495,11 @@ arf_set_round_mpz(arf_t y, const mpz_t x, long prec, arf_rnd_t rnd)
         return 0;
     }
 
-    return arf_set_round_mpn(y, x->_mp_d, FLINT_ABS(size),
-        (size < 0), NULL, prec, rnd);
+    inexact = _arf_set_round_mpn(y, &fix, x->_mp_d, FLINT_ABS(size),
+        (size < 0), prec, rnd);
+    _fmpz_demote(ARF_EXPREF(y));
+    ARF_EXP(y) = FLINT_ABS(size) * FLINT_BITS + fix;
+    return inexact;
 }
 
 static __inline__ int
@@ -703,6 +709,29 @@ void arf_randtest_special(arf_t x, flint_rand_t state, long bits, long mag_bits)
 #define ADD2_FAST_MAX (COEFF_MAX / 4)
 #define ADD2_FAST_MIN (-ADD2_FAST_MAX)
 
+static __inline__ void
+_fmpz_set_fast(fmpz_t f, const fmpz_t g)
+{
+    if (!COEFF_IS_MPZ(*f) && !COEFF_IS_MPZ(*g))
+        *f = *g;
+    else
+        fmpz_set(f, g);
+}
+
+static __inline__ void
+_fmpz_add_fast(fmpz_t z, const fmpz_t x, long c)
+{
+    fmpz ze, xe;
+
+    ze = *z;
+    xe = *x;
+
+    if (!COEFF_IS_MPZ(ze) && (xe > ADD2_FAST_MIN && xe < ADD2_FAST_MAX))
+        *z = xe + c;
+    else
+        fmpz_add_si(z, x, c);
+}
+
 static __inline__ void
 _fmpz_add2_fast(fmpz_t z, const fmpz_t x, const fmpz_t y, long c)
 {
@@ -799,6 +828,50 @@ int arf_mul_rnd_down(arf_ptr z, arf_srcptr x, arf_srcptr y, long prec);
         ? arf_mul_rnd_down(z, x, y, prec)        \
         : arf_mul_rnd_any(z, x, y, prec, rnd))
 
+#define ARF_ADD_STACK_ALLOC 40
+#define ARF_ADD_TLS_ALLOC 1000
+
+extern TLS_PREFIX mp_ptr __arf_add_tmp;
+extern TLS_PREFIX long __arf_add_alloc;
+
+extern void _arf_add_tmp_cleanup(void);
+
+#define ARF_ADD_TMP_DECL \
+    mp_limb_t tmp_stack[ARF_ADD_STACK_ALLOC]; \
+
+#define ARF_ADD_TMP_ALLOC(tmp, alloc) \
+    if (alloc <= ARF_ADD_STACK_ALLOC) \
+    { \
+        tmp = tmp_stack; \
+    } \
+    else if (alloc <= ARF_ADD_TLS_ALLOC) \
+    { \
+        if (__arf_add_alloc < alloc) \
+        { \
+            if (__arf_add_alloc == 0) \
+            { \
+                flint_register_cleanup_function(_arf_add_tmp_cleanup); \
+            } \
+            __arf_add_tmp = flint_realloc(__arf_add_tmp, sizeof(mp_limb_t) * alloc); \
+            __arf_add_alloc = alloc; \
+        } \
+        tmp = __arf_add_tmp; \
+    } \
+    else \
+    { \
+        tmp = flint_malloc(sizeof(mp_limb_t) * alloc); \
+    }
+
+#define ARF_ADD_TMP_FREE(tmp, alloc) \
+    if (alloc > ARF_ADD_TLS_ALLOC) \
+        flint_free(tmp);
+
+int _arf_add_mpn(arf_t z, mp_srcptr xp, mp_size_t xn, int xsgnbit,
+    const fmpz_t xexp, mp_srcptr yp, mp_size_t yn, int ysgnbit,
+    mp_bitcnt_t shift, long prec, arf_rnd_t rnd);
+
+int arf_add(arf_ptr z, arf_srcptr x, arf_srcptr y, long prec, arf_rnd_t rnd);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/arf/add.c b/arf/add.c
new file mode 100644
index 00000000..04fec923
--- /dev/null
+++ b/arf/add.c
@@ -0,0 +1,91 @@
+/*=============================================================================
+
+    This file is part of ARB.
+
+    ARB is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    ARB is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with ARB; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+=============================================================================*/
+/******************************************************************************
+
+    Copyright (C) 2014 Fredrik Johansson
+
+******************************************************************************/
+
+#include "arf.h"
+
+int
+arf_add_special(arf_t z, const arf_t x, const arf_t y, long prec, arf_rnd_t rnd)
+{
+    if (arf_is_zero(x))
+    {
+        if (arf_is_zero(y))
+        {
+            arf_zero(z);
+            return 0;
+        }
+        else
+            return arf_set_round(z, y, prec, rnd);
+    }
+    else if (arf_is_zero(y))
+    {
+        return arf_set_round(z, x, prec, rnd);
+    }
+    else if (arf_is_nan(x) || arf_is_nan(y)
+        || (arf_is_pos_inf(x) && arf_is_neg_inf(y))
+        || (arf_is_neg_inf(x) && arf_is_pos_inf(y)))
+    {
+        arf_nan(z);
+        return 0;
+    }
+    else if (arf_is_special(x))
+    {
+        arf_set(z, x);
+        return 0;
+    }
+    else
+    {
+        arf_set(z, y);
+        return 0;
+    }
+}
+
+int
+arf_add(arf_ptr z, arf_srcptr x, arf_srcptr y, long prec, arf_rnd_t rnd)
+{
+    mp_size_t xn, yn;
+    mp_srcptr xptr, yptr;
+    long shift;
+
+    if (arf_is_special(x) || arf_is_special(y))
+    {
+        return arf_add_special(z, x, y, prec, rnd);
+    }
+
+    shift = _fmpz_sub_small(ARF_EXPREF(x), ARF_EXPREF(y));
+
+    if (shift < 0)
+    {
+        arf_srcptr __t;
+        __t = x; x = y; y = __t;
+        shift = -shift;
+    }
+
+    ARF_GET_MPN_READONLY(xptr, xn, x);
+    ARF_GET_MPN_READONLY(yptr, yn, y);
+
+    return _arf_add_mpn(z, xptr, xn, ARF_SGNBIT(x), ARF_EXPREF(x),
+                           yptr, yn, ARF_SGNBIT(y), shift, prec, rnd);
+}
+
diff --git a/arf/add_mpn.c b/arf/add_mpn.c
new file mode 100644
index 00000000..5f523665
--- /dev/null
+++ b/arf/add_mpn.c
@@ -0,0 +1,248 @@
+/*=============================================================================
+
+    This file is part of ARB.
+
+    ARB is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    ARB is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with ARB; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+=============================================================================*/
+/******************************************************************************
+
+    Copyright (C) 2014 Fredrik Johansson
+
+******************************************************************************/
+
+#include "arf.h"
+
+TLS_PREFIX mp_ptr __arf_add_tmp = NULL;
+TLS_PREFIX long __arf_add_alloc = 0;
+
+void _arf_add_tmp_cleanup(void)
+{
+    flint_free(__arf_add_tmp);
+    __arf_add_tmp = NULL;
+    __arf_add_alloc = 0;
+}
+
+/* Assumptions: top limbs of x and y nonzero. */
+int
+_arf_add_mpn(arf_t z, mp_srcptr xp, mp_size_t xn, int xsgnbit, const fmpz_t xexp,
+                      mp_srcptr yp, mp_size_t yn, int ysgnbit, mp_bitcnt_t shift,
+                      long prec, arf_rnd_t rnd)
+{
+    mp_size_t wn, zn, zn_original, alloc, xbase, wbase;
+    mp_size_t shift_limbs;
+    mp_bitcnt_t shift_bits;
+    int inexact;
+    long fix;
+    mp_limb_t cy;
+    mp_ptr tmp;
+    ARF_ADD_TMP_DECL
+
+    /* x +/- eps */
+    if (shift > prec + 1 &&
+        prec != ARF_PREC_EXACT &&
+        shift > (xn + 1) * FLINT_BITS)
+    {
+        zn = (prec + FLINT_BITS - 1) / FLINT_BITS;
+        zn_original = zn = FLINT_MAX(zn, xn) + 2;
+        shift_limbs = zn - xn;
+        alloc = zn + 1;
+
+        ARF_ADD_TMP_ALLOC(tmp, alloc)
+
+        flint_mpn_zero(tmp, shift_limbs);
+        flint_mpn_copyi(tmp + shift_limbs, xp, xn);
+
+        if (xsgnbit == ysgnbit)
+        {
+            tmp[0] = 1;
+        }
+        else
+        {
+            mpn_sub_1(tmp, tmp, zn, 1);
+            while (tmp[zn-1] == 0)
+                zn--;
+        }
+
+        _arf_set_round_mpn(z, &fix, tmp, zn, xsgnbit, prec, rnd);
+        fix += (zn - zn_original) * FLINT_BITS;
+        _fmpz_add_fast(ARF_EXPREF(z), xexp, fix);
+
+        ARF_ADD_TMP_FREE(tmp, alloc)
+        return 1;
+    }
+
+    shift_limbs = shift / FLINT_BITS;
+    shift_bits = shift % FLINT_BITS;
+
+    /* TODO: shift == 0 optimization! (common case) */
+
+    /* w = y shifted */
+    wn = yn + (shift_bits != 0);
+
+    /* Size of sum (possibly one more limb than this for carry). */
+    zn_original = zn = FLINT_MAX(xn, wn + shift_limbs);
+    alloc = zn + 1;
+
+    ARF_ADD_TMP_ALLOC(tmp, alloc)
+
+    /* Low limbs of both operands */
+    xbase = zn - xn;
+    wbase = zn - shift_limbs - wn;
+
+    /* Shift y to its correct offset in the sum. */
+    if (shift_bits == 0)
+        flint_mpn_copyi(tmp + wbase, yp, yn);
+    else
+        tmp[wbase] = cy = mpn_rshift(tmp + wbase + 1, yp, yn, shift_bits);
+
+    if (xsgnbit == ysgnbit)
+    {
+        if (shift_limbs >= xn)
+        {
+            /*   XXXXXX
+                         WWWWWWW    */
+            flint_mpn_zero(tmp + wn, zn - xn - wn);
+            flint_mpn_copyi(tmp + zn - xn, xp, xn);
+            cy = 0;
+        }
+        else if (shift_limbs == 0)
+        {
+            /*  XXXXXX        or    XXXXX
+                WWWW                WWWWWWW     */
+            if (wn < xn)
+            {
+                flint_mpn_copyi(tmp, xp, xn - wn);
+                cy = mpn_add_n(tmp + wbase, xp + wbase, tmp + wbase, wn);
+            }
+            else
+            {
+                cy = mpn_add_n(tmp + xbase, xp, tmp + xbase, xn);
+            }
+        }
+        else if (xbase >= wbase)
+        {
+            /* XXXXXX         or  XXXXXX
+                 WWWWWWW            WWWW    */
+            cy = mpn_add_n(tmp + xbase, tmp + xbase, xp, wn - xbase);
+            cy = mpn_add_1(tmp + wn, xp + wn - xbase, zn - wn, cy);
+        }
+        else
+        {
+            /* XXXXXXXX           todo: merge with above?
+                  WWW    */
+            flint_mpn_copyi(tmp, xp, wbase);
+            cy = mpn_add_n(tmp + wbase, tmp + wbase, xp + wbase, wn);
+            cy = mpn_add_1(tmp + zn - shift_limbs, xp + xn - shift_limbs, shift_limbs, cy);
+        }
+
+        /* There could be a carry. */
+        tmp[zn] = cy;
+        zn += cy;
+    }
+    else
+    {
+        if (shift_limbs >= xn)
+        {
+            /*   XXXXXX
+                         WWWWWWW    */
+            mpn_neg(tmp, tmp, wn);
+            flint_mpn_store(tmp + wn, zn - xn - wn, -LIMB_ONE);
+            mpn_sub_1(tmp + zn - xn, xp, xn, 1);
+        }
+        else if (shift_limbs == 0)
+        {
+            /*  XXXXXX (1)    or    XXXXX   (2)
+                WWWW                WWWWWWW     */
+            if (wn <= xn) /* (1) */
+            {
+                if (mpn_cmp(xp + wbase, tmp + wbase, wn) >= 0)
+                {
+                    flint_mpn_copyi(tmp, xp, wbase);
+                    mpn_sub_n(tmp + wbase, xp + wbase, tmp + wbase, wn);
+                }
+                else
+                {
+                    xsgnbit = ysgnbit;
+                    mpn_sub_n(tmp + wbase, tmp + wbase, xp + wbase, wn);
+                    if (wbase != 0)
+                    {
+                        cy = mpn_neg_n(tmp, xp, wbase);
+                        mpn_sub_1(tmp + wbase, tmp + wbase, wn, cy);
+                    }
+                }
+            }
+            else /* (2) */
+            {
+                if (mpn_cmp(tmp + xbase, xp, xn) >= 0)
+                {
+                    xsgnbit = ysgnbit;
+                    mpn_sub_n(tmp + xbase, tmp + xbase, xp, xn);
+                }
+                else
+                {
+                    cy = mpn_neg(tmp, tmp, xbase);
+                    mpn_sub_n(tmp + xbase, xp, tmp + xbase, xn);
+                    mpn_sub_1(tmp + xbase, tmp + xbase, xn, cy);
+                }
+            }
+        }
+        else if (xbase >= wbase)
+        {
+            /* XXXXXX         or  XXXXXX
+                 WWWWWWW            WWWW    */
+            if (xbase > 0)
+            {
+                cy = mpn_sub_n(tmp + xbase, xp, tmp + xbase, wn - xbase);
+                cy = mpn_sub_1(tmp + wn, xp + xn - shift_limbs, shift_limbs, cy);
+                cy = mpn_neg(tmp, tmp, xbase);
+                mpn_sub_1(tmp + xbase, tmp + xbase, xn, cy);
+            }
+            else
+            {
+                cy = mpn_sub_n(tmp, xp, tmp, wn);
+                cy = mpn_sub_1(tmp + wn, xp + wn, xn - wn, cy);
+            }
+        }
+        else
+        {
+            /* XXXXXXXX
+                  WWW    */
+            flint_mpn_copyi(tmp, xp, wbase);
+            cy = mpn_sub_n(tmp + wbase, xp + wbase, tmp + wbase, wn);
+            mpn_sub_1(tmp + zn - shift_limbs, xp + xn - shift_limbs, shift_limbs, cy);
+        }
+
+        /* There could be cancellation. */
+        while (zn > 0 && tmp[zn - 1] == 0)
+            zn--;
+    }
+
+    if (zn == 0)
+    {
+        arf_zero(z);
+        inexact = 0;
+    }
+    else
+    {
+        inexact = _arf_set_round_mpn(z, &fix, tmp, zn, xsgnbit, prec, rnd);
+        fix += (zn - zn_original) * FLINT_BITS;
+        _fmpz_add_fast(ARF_EXPREF(z), xexp, fix);
+    }
+
+    ARF_ADD_TMP_FREE(tmp, alloc)
+    return inexact;
+}
+
diff --git a/arf/mul_rnd_any.c b/arf/mul_rnd_any.c
index 9a34639f..08b32d19 100644
--- a/arf/mul_rnd_any.c
+++ b/arf/mul_rnd_any.c
@@ -40,6 +40,7 @@ arf_mul_rnd_any(arf_ptr z, arf_srcptr x, arf_srcptr y,
         long prec, arf_rnd_t rnd)
 {
     mp_size_t xn, yn;
+    long fix;
     int sgnbit, inexact;
 
     xn = ARF_XSIZE(x);
@@ -90,8 +91,8 @@ arf_mul_rnd_any(arf_ptr z, arf_srcptr x, arf_srcptr y,
             mpn_mul(tmp, xptr, xn, yptr, yn);
         }
 
-        _fmpz_add2_fast(ARF_EXPREF(z), ARF_EXPREF(x), ARF_EXPREF(y), 0);
-        inexact = arf_set_round_mpn(z, tmp, zn, sgnbit, ARF_EXPREF(z), prec, rnd);
+        inexact = _arf_set_round_mpn(z, &fix, tmp, zn, sgnbit, prec, rnd);
+        _fmpz_add2_fast(ARF_EXPREF(z), ARF_EXPREF(x), ARF_EXPREF(y), fix);
         ARF_MUL_TMP_FREE(tmp, alloc)
 
         return inexact;
diff --git a/arf/mul_rnd_down.c b/arf/mul_rnd_down.c
index c8f031df..0fe7b8b4 100644
--- a/arf/mul_rnd_down.c
+++ b/arf/mul_rnd_down.c
@@ -30,6 +30,7 @@ arf_mul_rnd_down(arf_ptr z, arf_srcptr x, arf_srcptr y, long prec)
 {
     mp_size_t xn, yn, zn;
     mp_limb_t hi, lo;
+    long expfix;
     int sgnbit, ret, fix;
     mp_ptr zptr;
 
@@ -184,8 +185,8 @@ arf_mul_rnd_down(arf_ptr z, arf_srcptr x, arf_srcptr y, long prec)
         }
 
         zn = xn + yn;
-        _fmpz_add2_fast(ARF_EXPREF(z), ARF_EXPREF(x), ARF_EXPREF(y), 0);
-        ret = arf_set_round_mpn(z, zz, zn, sgnbit, ARF_EXPREF(z), prec, ARF_RND_DOWN);
+        ret = _arf_set_round_mpn(z, &expfix, zz, zn, sgnbit, prec, ARF_RND_DOWN);
+        _fmpz_add2_fast(ARF_EXPREF(z), ARF_EXPREF(x), ARF_EXPREF(y), expfix);
         return ret;
     }
     else if (yn > MUL_MPFR_MIN_LIMBS && prec != ARF_PREC_EXACT
@@ -223,8 +224,8 @@ arf_mul_rnd_down(arf_ptr z, arf_srcptr x, arf_srcptr y, long prec)
             mpn_mul(tmp, xptr, xn, yptr, yn);
         }
 
-        _fmpz_add2_fast(ARF_EXPREF(z), ARF_EXPREF(x), ARF_EXPREF(y), 0);
-        ret = arf_set_round_mpn(z, tmp, zn, sgnbit, ARF_EXPREF(z), prec, ARF_RND_DOWN);
+        ret = _arf_set_round_mpn(z, &expfix, tmp, zn, sgnbit, prec, ARF_RND_DOWN);
+        _fmpz_add2_fast(ARF_EXPREF(z), ARF_EXPREF(x), ARF_EXPREF(y), expfix);
         ARF_MUL_TMP_FREE(tmp, alloc)
 
         return ret;
diff --git a/arf/neg_round.c b/arf/neg_round.c
index 55884a71..2dac26d1 100644
--- a/arf/neg_round.c
+++ b/arf/neg_round.c
@@ -50,10 +50,14 @@ arf_neg_round(arf_t y, const arf_t x, long prec, arf_rnd_t rnd)
         {
             mp_srcptr xptr;
             mp_size_t xn;
+            int inexact;
+            long fix;
 
             ARF_GET_MPN_READONLY(xptr, xn, x);
-            return arf_set_round_mpn(y, xptr, xn, ARF_SGNBIT(x) ^ 1,
-                ARF_EXPREF(x), prec, rnd);
+            inexact = _arf_set_round_mpn(y, &fix, xptr, xn,
+                ARF_SGNBIT(x) ^ 1, prec, rnd);
+            _fmpz_add_fast(ARF_EXPREF(y), ARF_EXPREF(x), fix);
+            return inexact;
         }
     }
 }
diff --git a/arf/set_round.c b/arf/set_round.c
index aa32b32c..4ded050d 100644
--- a/arf/set_round.c
+++ b/arf/set_round.c
@@ -50,10 +50,14 @@ arf_set_round(arf_t y, const arf_t x, long prec, arf_rnd_t rnd)
         {
             mp_srcptr xptr;
             mp_size_t xn;
+            long fix;
+            int inexact;
 
             ARF_GET_MPN_READONLY(xptr, xn, x);
-            return arf_set_round_mpn(y, xptr, xn, ARF_SGNBIT(x),
-                ARF_EXPREF(x), prec, rnd);
+            inexact = _arf_set_round_mpn(y, &fix, xptr, xn,
+                ARF_SGNBIT(x), prec, rnd);
+            _fmpz_add_fast(ARF_EXPREF(y), ARF_EXPREF(x), fix);
+            return inexact;
         }
     }
 }
diff --git a/arf/set_round_mpn.c b/arf/set_round_mpn.c
index 4f830ce2..c9425cfb 100644
--- a/arf/set_round_mpn.c
+++ b/arf/set_round_mpn.c
@@ -36,8 +36,8 @@ _arf_set_mpn_roundup_power_two(arf_t y, int sgnbit)
 }
 
 int
-arf_set_round_mpn(arf_t y, mp_srcptr x, mp_size_t xn,
-    int sgnbit, const fmpz_t topexp, long prec, arf_rnd_t rnd)
+_arf_set_round_mpn(arf_t y, long * exp_shift, mp_srcptr x, mp_size_t xn,
+    int sgnbit, long prec, arf_rnd_t rnd)
 {
     unsigned int leading;
     mp_bitcnt_t exp, bc, val, val_bits;
@@ -51,10 +51,7 @@ arf_set_round_mpn(arf_t y, mp_srcptr x, mp_size_t xn,
     exp = xn * FLINT_BITS - leading;
 
     /* Set exponent. */
-    if (topexp == NULL)
-        fmpz_set_ui(ARF_EXPREF(y), exp);
-    else
-        fmpz_sub_si_inline(ARF_EXPREF(y), topexp, leading);
+    *exp_shift = -(long) leading;
 
     /* Find first nonzero bit. */
     val_limbs = 0;
@@ -112,7 +109,10 @@ arf_set_round_mpn(arf_t y, mp_srcptr x, mp_size_t xn,
             /* Overflow to next power of two (unlikely). */
             if (val == exp)
             {
-                _arf_set_mpn_roundup_power_two(y, sgnbit);
+                exp_shift[0]++;
+                ARF_DEMOTE(y);
+                ARF_NOPTR_D(y)[0] = LIMB_TOP;
+                ARF_XSIZE(y) = ARF_MAKE_XSIZE(1, sgnbit);
                 return 1;
             }
         }
diff --git a/arf/set_round_ui.c b/arf/set_round_ui.c
index 99a22c4d..af40cb85 100644
--- a/arf/set_round_ui.c
+++ b/arf/set_round_ui.c
@@ -25,6 +25,35 @@
 
 #include "arf.h"
 
+/* Top-aligns the nonzero limb v and rounds it to prec bit. */
+#define ARF_NORMALISE_ROUND_LIMB(inexact, exp, v,               \
+                                    sgnbit, prec, rnd)          \
+    do {                                                        \
+        count_leading_zeros(exp, v);                            \
+        v <<= exp;                                              \
+        exp = FLINT_BITS - exp;                                 \
+        if (prec >= exp)                                        \
+        {                                                       \
+            inexact = 0;                                        \
+        }                                                       \
+        else                                                    \
+        {                                                       \
+            mp_limb_t __t = v;                                  \
+            v = MASK_LIMB(v, FLINT_BITS - prec);                \
+            inexact = (__t != v);                               \
+            if (inexact && arf_rounds_up(rnd, sgnbit))          \
+            {                                                   \
+                v += (LIMB_ONE << (FLINT_BITS - prec));         \
+                if (v == 0)                                     \
+                {                                               \
+                    v = LIMB_TOP;                               \
+                    exp++;                                      \
+                }                                               \
+            }                                                   \
+        }                                                       \
+    }                                                           \
+    while (0)
+
 int
 _arf_set_round_ui(arf_t x, ulong v, int sgnbit, long prec, arf_rnd_t rnd)
 {
@@ -35,40 +64,12 @@ _arf_set_round_ui(arf_t x, ulong v, int sgnbit, long prec, arf_rnd_t rnd)
     }
     else
     {
-        unsigned int bc, mask_bits;
-        int inexact;
-        mp_limb_t t;
+        int exp, inexact;
 
-        count_leading_zeros(bc, v);
-        v <<= bc;
-        bc = FLINT_BITS - bc;
+        ARF_NORMALISE_ROUND_LIMB(inexact, exp, v, sgnbit, prec, rnd);
 
-        if (prec >= bc)
-        {
-            inexact = 0;
-        }
-        else
-        {
-            mask_bits = FLINT_BITS - prec;
-
-            t = v;
-            v = (v >> mask_bits) << mask_bits;
-            inexact = (t != v);
-
-            if (inexact && arf_rounds_up(rnd, sgnbit))
-            {
-                v += (LIMB_ONE << (FLINT_BITS - prec));
-
-                /* Overflow to the next power of two (unlikely). */
-                if (v == 0)
-                {
-                    v = LIMB_TOP;
-                    bc++;
-                }
-            }
-        }
-
-        fmpz_set_ui(ARF_EXPREF(x), bc);
+        _fmpz_demote(ARF_EXPREF(x));
+        ARF_EXP(x) = exp;
         ARF_DEMOTE(x);
         ARF_XSIZE(x) = ARF_MAKE_XSIZE(1, sgnbit);
         ARF_NOPTR_D(x)[0] = v;
diff --git a/arf/test/t-add.c b/arf/test/t-add.c
new file mode 100644
index 00000000..0cd0dcb2
--- /dev/null
+++ b/arf/test/t-add.c
@@ -0,0 +1,182 @@
+/*=============================================================================
+
+    This file is part of ARB.
+
+    ARB is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    ARB is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with ARB; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+=============================================================================*/
+/******************************************************************************
+
+    Copyright (C) 2012 Fredrik Johansson
+
+******************************************************************************/
+
+#include "arf.h"
+
+int
+arf_add_naive(arf_t z, const arf_t x, const arf_t y, long prec, arf_rnd_t rnd)
+{
+    fmpr_t a, b;
+    long r;
+
+    fmpr_init(a);
+    fmpr_init(b);
+
+    arf_get_fmpr(a, x);
+    arf_get_fmpr(b, y);
+
+    r = fmpr_add(a, a, b, prec, rnd);
+
+    arf_set_fmpr(z, a);
+
+    fmpr_clear(a);
+    fmpr_clear(b);
+
+    return (r == FMPR_RESULT_EXACT) ? 0 : 1;
+}
+
+int main()
+{
+    long iter, iter2;
+    flint_rand_t state;
+
+    printf("add....");
+    fflush(stdout);
+
+    flint_randinit(state);
+
+    for (iter = 0; iter < 10000; iter++)
+    {
+        arf_t x, y, z, v;
+        long prec, r1, r2;
+        arf_rnd_t rnd;
+
+        arf_init(x);
+        arf_init(y);
+        arf_init(z);
+        arf_init(v);
+
+        for (iter2 = 0; iter2 < 100; iter2++)
+        {
+            arf_randtest_special(x, state, 2000, 100);
+            arf_randtest_special(y, state, 2000, 100);
+            prec = 2 + n_randint(state, 2000);
+
+            if (n_randint(state, 10) == 0 &&
+                fmpz_bits(ARF_EXPREF(x)) < 10 &&
+                fmpz_bits(ARF_EXPREF(y)) < 10)
+            {
+                prec = ARF_PREC_EXACT;
+            }
+
+            switch (n_randint(state, 4))
+            {
+                case 0:  rnd = ARF_RND_DOWN; break;
+                case 1:  rnd = ARF_RND_UP; break;
+                case 2:  rnd = ARF_RND_FLOOR; break;
+                default: rnd = ARF_RND_CEIL; break;
+            }
+
+            switch (n_randint(state, 5))
+            {
+            case 0:
+                r1 = arf_add(z, x, y, prec, rnd);
+                r2 = arf_add_naive(v, x, y, prec, rnd);
+                if (!arf_equal(z, v) || r1 != r2)
+                {
+                    printf("FAIL!\n");
+                    printf("prec = %ld, rnd = %d\n\n", prec, rnd);
+                    printf("x = "); arf_print(x); printf("\n\n");
+                    printf("y = "); arf_print(y); printf("\n\n");
+                    printf("z = "); arf_print(z); printf("\n\n");
+                    printf("v = "); arf_print(v); printf("\n\n");
+                    printf("r1 = %ld, r2 = %ld\n", r1, r2);
+                    abort();
+                }
+                break;
+
+            case 1:
+                r1 = arf_add(z, x, x, prec, rnd);
+                r2 = arf_add_naive(v, x, x, prec, rnd);
+                if (!arf_equal(z, v) || r1 != r2)
+                {
+                    printf("FAIL (aliasing 1)!\n");
+                    printf("prec = %ld, rnd = %d\n\n", prec, rnd);
+                    printf("x = "); arf_print(x); printf("\n\n");
+                    printf("z = "); arf_print(z); printf("\n\n");
+                    printf("v = "); arf_print(v); printf("\n\n");
+                    printf("r1 = %ld, r2 = %ld\n", r1, r2);
+                    abort();
+                }
+                break;
+
+            case 2:
+                r2 = arf_add_naive(v, x, x, prec, rnd);
+                r1 = arf_add(x, x, x, prec, rnd);
+                if (!arf_equal(v, x) || r1 != r2)
+                {
+                    printf("FAIL (aliasing 2)!\n");
+                    printf("prec = %ld, rnd = %d\n\n", prec, rnd);
+                    printf("x = "); arf_print(x); printf("\n\n");
+                    printf("z = "); arf_print(z); printf("\n\n");
+                    printf("v = "); arf_print(v); printf("\n\n");
+                    printf("r1 = %ld, r2 = %ld\n", r1, r2);
+                    abort();
+                }
+                break;
+
+            case 3:
+                r2 = arf_add_naive(v, x, y, prec, rnd);
+                r1 = arf_add(x, x, y, prec, rnd);
+                if (!arf_equal(x, v) || r1 != r2)
+                {
+                    printf("FAIL (aliasing 3)!\n");
+                    printf("prec = %ld, rnd = %d\n\n", prec, rnd);
+                    printf("x = "); arf_print(x); printf("\n\n");
+                    printf("y = "); arf_print(y); printf("\n\n");
+                    printf("v = "); arf_print(v); printf("\n\n");
+                    printf("r1 = %ld, r2 = %ld\n", r1, r2);
+                    abort();
+                }
+                break;
+
+            default:
+                r2 = arf_add_naive(v, x, y, prec, rnd);
+                r1 = arf_add(x, y, x, prec, rnd);
+                if (!arf_equal(x, v) || r1 != r2)
+                {
+                    printf("FAIL (aliasing 4)!\n");
+                    printf("prec = %ld, rnd = %d\n\n", prec, rnd);
+                    printf("x = "); arf_print(x); printf("\n\n");
+                    printf("y = "); arf_print(y); printf("\n\n");
+                    printf("v = "); arf_print(v); printf("\n\n");
+                    printf("r1 = %ld, r2 = %ld\n", r1, r2);
+                    abort();
+                }
+                break;
+            }
+        }
+
+        arf_clear(x);
+        arf_clear(y);
+        arf_clear(z);
+        arf_clear(v);
+    }
+
+    flint_randclear(state);
+    flint_cleanup();
+    printf("PASS\n");
+    return EXIT_SUCCESS;
+}
diff --git a/fmpz_extras.h b/fmpz_extras.h
index 140ee312..5ca26651 100644
--- a/fmpz_extras.h
+++ b/fmpz_extras.h
@@ -200,25 +200,10 @@ static __inline__ void fmpz_sub_mul2exp(fmpz_t z, const fmpz_t x, const fmpz_t y
     fmpz_clear(t);
 }
 
-static long _fmpz_sub_small_large(const fmpz_t x, const fmpz_t y)
-{
-    fmpz_t t;
-    fmpz_init(t);
-    fmpz_sub(t, x, y);
-    if (!COEFF_IS_MPZ(*t))
-    {
-        /* no need to free t */
-        return *t;
-    }
-    else
-    {
-        int sign = fmpz_sgn(t);
-        fmpz_clear(t);
-        return (sign > 0) ? LONG_MAX : -LONG_MAX;
-    }
-}
+long _fmpz_sub_small_large(const fmpz_t x, const fmpz_t y);
 
-static __inline__ long _fmpz_sub_small(const fmpz_t x, const fmpz_t y)
+static __inline__ long
+_fmpz_sub_small(const fmpz_t x, const fmpz_t y)
 {
     if (!COEFF_IS_MPZ(*x) && !COEFF_IS_MPZ(*y))
     {
diff --git a/fmpz_extras.h.save b/fmpz_extras.h.save
new file mode 100644
index 00000000..d4a98b9e
--- /dev/null
+++ b/fmpz_extras.h.save
@@ -0,0 +1,299 @@
+/*=============================================================================
+
+    This file is part of ARB.
+
+    ARB is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    ARB is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with ARB; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+=============================================================================*/
+/******************************************************************************
+
+    Copyright (C) 2013 Fredrik Johansson
+
+******************************************************************************/
+
+#ifndef FMPZ_EXTRAS_H
+#define FMPZ_EXTRAS_H
+
+#include <limits.h>
+#include "flint.h"
+#include "fmpz.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+static __inline__ void
+fmpz_add_inline(fmpz_t z, const fmpz_t x, const fmpz_t y)
+{
+    fmpz f, g;
+
+    f = *x;
+    g = *y;
+
+    if (!COEFF_IS_MPZ(f) && !COEFF_IS_MPZ(g))
+        fmpz_set_si(z, f + g);
+    else
+        fmpz_add(z, x, y);
+}
+
+static __inline__ void
+fmpz_add_si(fmpz_t z, const fmpz_t x, long y)
+{
+    if (y >= 0)
+        fmpz_add_ui(z, x, y);
+    else
+        fmpz_sub_ui(z, x, -y);
+}
+
+static __inline__ void
+fmpz_sub_si(fmpz_t z, const fmpz_t x, long y)
+{
+    if (y >= 0)
+        fmpz_sub_ui(z, x, y);
+    else
+        fmpz_add_ui(z, x, -y);
+}
+
+static __inline__ void
+fmpz_add_si_inline(fmpz_t z, const fmpz_t x, long y)
+{
+    fmpz f;
+
+    f = *x;
+
+    if (!COEFF_IS_MPZ(f) && (COEFF_MIN <= y && y <= COEFF_MAX))
+        fmpz_set_si(z, f + y);
+    else
+        fmpz_add_si(z, x, y);
+}
+
+static __inline__ void
+fmpz_sub_si_inline(fmpz_t z, const fmpz_t x, long y)
+{
+    fmpz f;
+
+    f = *x;
+
+    if (!COEFF_IS_MPZ(f) && (COEFF_MIN <= y && y <= COEFF_MAX))
+        fmpz_set_si(z, f - y);
+    else
+        fmpz_sub_si(z, x, y);
+}
+
+static __inline__ void
+fmpz_add_ui_inline(fmpz_t z, const fmpz_t x, ulong y)
+{
+    fmpz f = *x;
+
+    if (!COEFF_IS_MPZ(f) && y <= COEFF_MAX)
+        fmpz_set_si(z, f + y);
+    else
+        fmpz_add_ui(z, x, y);
+
+#if}
+
+static __inline__ void
+fmpz_add2_fmpz_si_inline(fmpz_t z, const fmpz_t x, const fmpz_t y, long c)
+{
+    fmpz f, g, h;
+
+    f = *x;
+    g = *y;
+
+    if (!COEFF_IS_MPZ(f) && !COEFF_IS_MPZ(g))
+    {
+        h = f + g;
+
+        if ((COEFF_MIN <= h && h <= COEFF_MAX) &&
+            (COEFF_MIN <= c && c <= COEFF_MAX))
+        {
+            fmpz_set_si(z, h + c);
+            return;
+        }
+    }
+
+    fmpz_add(z, x, y);
+    fmpz_add_si(z, z, c);
+}
+
+/* sets z = +/- ({src, n} >> shift) where 0 <= shift < FLINT_BITS
+   and the top limb of src is nonzero */
+/* TODO: optimize for result = 1 limb */
+
+static __inline__ void
+fmpz_set_mpn_rshift(fmpz_t z, mp_srcptr src, mp_size_t n, unsigned int shift, int negative)
+{
+    __mpz_struct * zptr;
+    zptr = _fmpz_promote(z);
+
+    if (zptr->_mp_alloc < n)
+        mpz_realloc2(zptr, n * FLINT_BITS);
+
+    if (shift == 0)
+    {
+        flint_mpn_copyi(zptr->_mp_d, src, n);
+    }
+    else
+    {
+        mpn_rshift(zptr->_mp_d, src, n, shift);
+        while (zptr->_mp_d[n - 1] == 0) /* todo: can only do one iter? */
+            n--;
+    }
+
+    zptr->_mp_size = negative ? -n : n;
+    _fmpz_demote_val(z);
+}
+
+/* sets z = +/- {src, n} where n >= 2 and the top limb of src is nonzero */
+static __inline__ void
+fmpz_set_mpn_large(fmpz_t z, mp_srcptr src, mp_size_t n, int negative)
+{
+    __mpz_struct * zz;
+    zz = _fmpz_promote(z);
+
+    if (zz->_mp_alloc < n)
+        mpz_realloc2(zz, n * FLINT_BITS);
+
+    flint_mpn_copyi(zz->_mp_d, src, n);
+    zz->_mp_size = negative ? -n : n;
+}
+
+/* round away from zero */
+static __inline__ void
+fmpz_adiv_q_2exp(fmpz_t z, const fmpz_t x, mp_bitcnt_t exp)
+{
+    int sign = fmpz_sgn(x);
+
+    if (sign > 0)
+        fmpz_cdiv_q_2exp(z, x, exp);
+    else
+        fmpz_fdiv_q_2exp(z, x, exp);
+}
+
+/* sets z = x + y*2^shift */
+static __inline__ void fmpz_add_mul2exp(fmpz_t z, const fmpz_t x, const fmpz_t y, ulong shift)
+{
+    fmpz_t t;
+    fmpz_init(t);
+    fmpz_mul_2exp(t, y, shift);
+    fmpz_add(z, x, t);
+    fmpz_clear(t);
+}
+
+static __inline__ void fmpz_sub_mul2exp(fmpz_t z, const fmpz_t x, const fmpz_t y, ulong shift)
+{
+    fmpz_t t;
+    fmpz_init(t);
+    fmpz_mul_2exp(t, y, shift);
+    fmpz_sub(z, x, t);
+    fmpz_clear(t);
+}
+
+static long _fmpz_sub_small_large(const fmpz_t x, const fmpz_t y)
+{
+    fmpz_t t;
+    fmpz_init(t);
+    fmpz_sub(t, x, y);
+    if (!COEFF_IS_MPZ(*t))
+    {
+        /* no need to free t */
+        return *t;
+    }
+    else
+    {
+        int sign = fmpz_sgn(t);
+        fmpz_clear(t);
+        return (sign > 0) ? LONG_MAX : -LONG_MAX;
+    }
+}
+
+static __inline__ long _fmpz_sub_small(const fmpz_t x, const fmpz_t y)
+{
+    if (!COEFF_IS_MPZ(*x) && !COEFF_IS_MPZ(*y))
+    {
+        return (*x) - (*y);
+    }
+    else
+    {
+        return _fmpz_sub_small_large(x, y);
+    }
+}
+
+static __inline__ mp_size_t
+_fmpz_size(const fmpz_t f)
+{
+    fmpz d = *f;
+
+    if (!COEFF_IS_MPZ(d))
+        return 1;
+    else
+        return mpz_size(COEFF_TO_PTR(d));
+}
+
+static __inline__ void
+fmpz_ui_pow_ui(fmpz_t x, ulong b, ulong e)
+{
+    if (e <= 1)
+    {
+        fmpz_set_ui(x, e == 0 ? 1UL : b);
+    }
+    else
+    {
+        fmpz_set_ui(x, b);
+        fmpz_pow_ui(x, x, e);
+    }
+}
+
+static __inline__ void
+fmpz_max(fmpz_t z, const fmpz_t x, const fmpz_t y)
+{
+    if (fmpz_cmp(x, y) >= 0)
+        fmpz_set(z, x);
+    else
+        fmpz_set(z, y);
+}
+
+static __inline__ void
+fmpz_min(fmpz_t z, const fmpz_t x, const fmpz_t y)
+{
+    if (fmpz_cmp(x, y) < 0)
+        fmpz_set(z, x);
+    else
+        fmpz_set(z, y);
+}
+
+#define FMPZ_GET_MPN_READONLY(zsign, zn, zptr, ztmp, zv) \
+    if (!COEFF_IS_MPZ(zv)) \
+    { \
+        (zsign) = (zv) < 0; \
+        (ztmp) = FLINT_ABS(zv); \
+        (zptr) = &(ztmp); \
+        (zn) = 1; \
+    } \
+    else \
+    { \
+        __mpz_struct * ___zz = COEFF_TO_PTR(zv); \
+        (zptr) = ___zz->_mp_d; \
+        (zn) = ___zz->_mp_size; \
+        (zsign) = (zn) < 0; \
+        (zn) = FLINT_ABS(zn); \
+    }
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
+
diff --git a/fmpz_extras/sub_small_large.c b/fmpz_extras/sub_small_large.c
new file mode 100644
index 00000000..e59769df
--- /dev/null
+++ b/fmpz_extras/sub_small_large.c
@@ -0,0 +1,45 @@
+/*=============================================================================
+
+    This file is part of ARB.
+
+    ARB is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    ARB is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with ARB; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+=============================================================================*/
+/******************************************************************************
+
+    Copyright (C) 2014 Fredrik Johansson
+
+******************************************************************************/
+
+#include "fmpz_extras.h"
+
+long
+_fmpz_sub_small_large(const fmpz_t x, const fmpz_t y)
+{
+    fmpz_t t;
+    fmpz_init(t);
+    fmpz_sub(t, x, y);
+    if (!COEFF_IS_MPZ(*t))
+    {
+        /* no need to free t */
+        return *t;
+    }
+    else
+    {
+        int sign = fmpz_sgn(t);
+        fmpz_clear(t);
+        return (sign > 0) ? LONG_MAX : -LONG_MAX;
+    }
+}