From ef3a687c042be0e4b865ce37f46b5b8b6391b261 Mon Sep 17 00:00:00 2001
From: Fredrik Johansson <fredrik.johansson@gmail.com>
Date: Wed, 9 Jan 2013 16:35:49 +0100
Subject: [PATCH] low-level set_round/normalise code

---
 fmpr.h                  |  39 ++++++-
 fmpr/normalise.c        |   3 +-
 fmpr/set_round.c        | 222 ++++++++++++++++++++++++++++++++++++++++
 fmpr/test/t-normalise.c |  85 +++++++++++++++
 4 files changed, 345 insertions(+), 4 deletions(-)
 create mode 100644 fmpr/set_round.c
 create mode 100644 fmpr/test/t-normalise.c

diff --git a/fmpr.h b/fmpr.h
index e2346b66..a582b05b 100644
--- a/fmpr.h
+++ b/fmpr.h
@@ -155,8 +155,7 @@ fmpr_one(fmpr_t x)
 
 /* ------------------------------------------------------------------------ */
 
-/* TODO: version for just the val2 reduction! */
-long _fmpr_normalise(fmpz_t man, fmpz_t exp, long prec, fmpr_rnd_t rnd);
+long _fmpr_normalise_naive(fmpz_t man, fmpz_t exp, long prec, fmpr_rnd_t rnd);
 
 static __inline__ void
 fmpr_set(fmpr_t y, const fmpr_t x)
@@ -168,8 +167,26 @@ fmpr_set(fmpr_t y, const fmpr_t x)
     }
 }
 
+long _fmpr_set_round(fmpz_t rman, fmpz_t rexp,
+    const fmpz_t man, const fmpz_t exp, long prec, fmpr_rnd_t rnd);
+
 static __inline__ long
-fmpr_set_round(fmpr_t y, const fmpr_t x, long prec, fmpr_rnd_t rnd)
+_fmpr_normalise(fmpz_t man, fmpz_t exp, long prec, fmpr_rnd_t rnd)
+{
+    if (fmpz_is_zero(man))
+    {
+        fmpz_zero(man);
+        fmpz_zero(exp);
+        return FMPR_PREC_EXACT;
+    }
+    else
+    {
+        return _fmpr_set_round(man, exp, man, exp, prec, rnd);
+    }
+}
+
+static __inline__ long
+fmpr_set_round_naive(fmpr_t y, const fmpr_t x, long prec, fmpr_rnd_t rnd)
 {
     fmpr_set(y, x);
     if (fmpr_is_special(y))
@@ -178,6 +195,22 @@ fmpr_set_round(fmpr_t y, const fmpr_t x, long prec, fmpr_rnd_t rnd)
         return _fmpr_normalise(fmpr_manref(y), fmpr_expref(y), prec, rnd);
 }
 
+static __inline__ long
+fmpr_set_round(fmpr_t y, const fmpr_t x, long prec, fmpr_rnd_t rnd)
+{
+    if (fmpr_is_special(x))
+    {
+        fmpr_set(y, x);
+        return FMPR_PREC_EXACT;
+    }
+    else
+    {
+        return _fmpr_set_round(fmpr_manref(y), fmpr_expref(y),
+            fmpr_manref(x), fmpr_expref(x), prec, rnd);
+    }
+}
+
+
 
 static __inline__ int
 fmpr_equal(const fmpr_t x, const fmpr_t y)
diff --git a/fmpr/normalise.c b/fmpr/normalise.c
index a4cc101e..6b9049cf 100644
--- a/fmpr/normalise.c
+++ b/fmpr/normalise.c
@@ -25,7 +25,8 @@
 
 #include "fmpr.h"
 
-long _fmpr_normalise(fmpz_t man, fmpz_t exp, long prec, fmpr_rnd_t rnd)
+long
+_fmpr_normalise_naive(fmpz_t man, fmpz_t exp, long prec, fmpr_rnd_t rnd)
 {
     /* TODO: this should perhaps raise an exception to avoid ambiguity */
     if (fmpz_is_zero(man))
diff --git a/fmpr/set_round.c b/fmpr/set_round.c
new file mode 100644
index 00000000..98db8a68
--- /dev/null
+++ b/fmpr/set_round.c
@@ -0,0 +1,222 @@
+/*=============================================================================
+
+    This file is part of ARB.
+
+    ARB is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    ARB is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with ARB; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+=============================================================================*/
+/******************************************************************************
+
+    Copyright (C) 2012 Fredrik Johansson
+
+******************************************************************************/
+
+#include "fmpr.h"
+
+static __inline__ void
+_fmpz_add_ui(fmpz_t z, const fmpz_t x, ulong y)
+{
+    fmpz f = *x;
+
+    if (!COEFF_IS_MPZ(f) && y <= COEFF_MAX)
+        fmpz_set_si(z, f + y);
+    else
+        fmpz_add_ui(z, x, y);
+}
+
+static __inline__ int
+rounds_up(fmpr_rnd_t rnd, int negative)
+{
+    if (rnd == FMPR_RND_DOWN) return 0;
+    if (rnd == FMPR_RND_UP) return 1;
+    if (rnd == FMPR_RND_FLOOR) return negative;
+    return !negative;
+}
+
+/* like mpn_scan0b, but takes an upper size */
+static __inline__ mp_bitcnt_t
+mpn_scan0b(mp_srcptr up, mp_size_t size, mp_bitcnt_t from_bit)
+{
+    mp_limb_t t;
+    long i, c;
+
+    i = from_bit / GMP_NUMB_BITS;
+    c = from_bit % FLINT_BITS;
+    t = ((~up[i]) >> c) << c;
+
+    while (t == 0)
+    {
+        i++;
+        if (i == size)
+            return size * FLINT_BITS;
+        else
+            t = ~up[i];
+    }
+
+    count_trailing_zeros(c, t);
+    return (i * FLINT_BITS) + c;
+}
+
+long
+_fmpr_set_round(fmpz_t rman, fmpz_t rexp,
+    const fmpz_t man, const fmpz_t exp, long prec, fmpr_rnd_t rnd)
+{
+    if (!COEFF_IS_MPZ(*man))
+    {
+        long lead, trail, bc, v, w, shift, ret;
+        int negative;
+
+        v = *man;
+
+        count_trailing_zeros(trail, v);
+        v >>= trail;
+        shift = trail;
+        ret = FMPR_PREC_EXACT;
+
+        /* may need to round */
+        if (prec < FLINT_BITS - 2 - trail)
+        {
+            if (v < 0)
+            {
+                negative = 1;
+                w = -v;
+            }
+            else
+            {
+                negative = 0;
+                w = v;
+            }
+
+            count_leading_zeros(lead, w);
+            bc = FLINT_BITS - lead;
+
+            /* round */
+            if (prec < bc)
+            {
+                w = (w >> (bc - prec)) + rounds_up(rnd, negative);
+                shift += bc - prec;
+                count_trailing_zeros(trail, w);
+                w >>= trail;
+                shift += trail;
+                v = negative ? -w : w;
+                ret = trail;
+            }
+        }
+
+        /* the input is small, so the output must be small too */
+        _fmpz_demote(rman);
+        *rman = v;
+        _fmpz_add_ui(rexp, exp, shift);
+        return ret;
+    }
+    else
+    {
+        long size, bc, val, val_bits, val_limbs, ret, old_val;
+        int negative, increment;
+        mp_ptr d;
+        __mpz_struct * z = COEFF_TO_PTR(*man);
+
+        size = z->_mp_size;
+        negative = size < 0;
+        size = FLINT_ABS(size);
+        d = z->_mp_d;
+
+        /* bit size */
+        count_leading_zeros(bc, d[size - 1]);
+        bc = FLINT_BITS - bc;
+        bc += (size - 1) * FLINT_BITS;
+
+        /* quick exit */
+        if (bc <= prec && (d[0] & 1))
+        {
+            if (rman != man) fmpz_set(rman, man);
+            if (rexp != exp) fmpz_set(rexp, exp);
+            return FMPR_PREC_EXACT;
+        }
+
+        /* trailing zeros */
+        val_limbs = 0;
+        while (d[val_limbs] == 0)
+            val_limbs++;
+        count_trailing_zeros(val_bits, d[val_limbs]);
+        val = val_bits + (val_limbs * FLINT_BITS);
+
+        /* no rounding necessary; just copy or shift to destination */
+        if (bc - val <= prec)
+        {
+            ret = FMPR_PREC_EXACT;
+            increment = 0;
+        }
+        else
+        {
+            /* truncation */
+            if (!rounds_up(rnd, negative))
+            {
+                old_val = val;
+                val = mpn_scan1(d, bc - prec);
+                increment = 0;
+            }
+            /* round to next higher odd mantissa */
+            else
+            {
+                old_val = val;
+                val = mpn_scan0b(d, size, bc - prec);
+
+                /* can overflow to next power of 2 */
+                if (val == bc)
+                {
+                    fmpz_set_si(rman, negative ? -1 : 1);
+                    _fmpz_add_ui(rexp, exp, bc);
+                    return prec;
+                }
+
+                /* otherwise, incrementing will not cause overflow below */
+                increment = 1;
+            }
+
+            val_limbs = val / FLINT_BITS;
+            val_bits = val % FLINT_BITS;
+            ret = prec - (bc - val);
+        }
+
+        /* the output mantissa is a small fmpz */
+        if (bc - val <= FLINT_BITS - 2)
+        {
+            mp_limb_t h;
+            if (val_limbs + 1 == size || val_bits == 0)
+                h = d[val_limbs] >> val_bits;
+            else
+                h = (d[val_limbs] >> val_bits)
+                    | (d[val_limbs + 1] << (FLINT_BITS - val_bits));
+            h += increment;
+            _fmpz_demote(rman);
+            *rman = negative ? -h : h;
+        }
+        /* the output mantissa is an mpz */
+        else
+        {
+            if (rman == man)
+                mpz_tdiv_q_2exp(z, z, val);
+            else
+                mpz_tdiv_q_2exp(_fmpz_promote(rman), z, val);
+            if (increment)
+                COEFF_TO_PTR(*rman)->_mp_d[0]++;
+        }
+
+        _fmpz_add_ui(rexp, exp, val);
+        return ret;
+    }
+}
+
diff --git a/fmpr/test/t-normalise.c b/fmpr/test/t-normalise.c
new file mode 100644
index 00000000..907d448d
--- /dev/null
+++ b/fmpr/test/t-normalise.c
@@ -0,0 +1,85 @@
+/*=============================================================================
+
+    This file is part of ARB.
+
+    ARB is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    ARB is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with ARB; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+=============================================================================*/
+/******************************************************************************
+
+    Copyright (C) 2012 Fredrik Johansson
+
+******************************************************************************/
+
+#include "fmpr.h"
+
+
+int main()
+{
+    long iter;
+    flint_rand_t state;
+
+    printf("normalise....");
+    fflush(stdout);
+
+    flint_randinit(state);
+
+    for (iter = 0; iter < 1000000; iter++)
+    {
+        fmpr_t x, y;
+        long prec, ret1, ret2;
+        fmpr_rnd_t rnd;
+
+        fmpr_init(x);
+        fmpr_init(y);
+
+        fmpz_randtest(fmpr_manref(x), state, 2000);
+        fmpz_randtest(fmpr_expref(x), state, 200);
+
+        fmpz_set(fmpr_manref(y), fmpr_manref(x));
+        fmpz_set(fmpr_expref(y), fmpr_expref(x));
+
+        switch (n_randint(state, 4))
+        {
+            case 0: rnd = FMPR_RND_DOWN; break;
+            case 1: rnd = FMPR_RND_UP; break;
+            case 2: rnd = FMPR_RND_FLOOR; break;
+            default: rnd = FMPR_RND_CEIL; break;
+        }
+
+        prec = 2 + n_randint(state, 2000);
+
+        ret1 = _fmpr_normalise(fmpr_manref(x), fmpr_expref(x), prec, rnd);
+        ret2 = _fmpr_normalise_naive(fmpr_manref(y), fmpr_expref(y), prec, rnd);
+
+        if (!fmpr_equal(x, y) || ret1 != ret2)
+        {
+            printf("FAIL!\n");
+            printf("x = "); fmpr_print(x); printf("\n\n");
+            printf("y = "); fmpr_print(y); printf("\n\n");
+            printf("ret1 = %ld, ret2 = %ld\n", ret1, ret2);
+            abort();
+        }
+
+        fmpr_clear(x);
+        fmpr_clear(y);
+    }
+
+    flint_randclear(state);
+    _fmpz_cleanup();
+    printf("PASS\n");
+    return EXIT_SUCCESS;
+}
+