arf_get_fmpz: speed up rounding to nearest in common cases

2025-03-06 09:51:39 -05:00 · 2016-02-25 15:03:09 +01:00 · 2016-02-25 15:03:09 +01:00 · 8c5d26de65
commit 8c5d26de65
parent 1e2333c48b
1 changed files with 51 additions and 21 deletions
--- a/arf/get_fmpz.c
+++ b/arf/get_fmpz.c
@ -131,33 +131,44 @@ arf_get_fmpz(fmpz_t z, const arf_t x, arf_rnd_t rnd)
        mp_srcptr xp;
        __mpz_struct * zz;
        /* TBD: implement efficiently */
        if (rnd == ARF_RND_NEAR)
        {
            fmpr_t t;
            fmpr_init(t);
            arf_get_fmpr(t, x);
            fmpr_get_fmpz(z, t, rnd);
            fmpr_clear(t);
            return;
        }
        exp = ARF_EXP(x);
        negative = ARF_SGNBIT(x);
        /* |x| < 1 */
        if (exp <= 0)
        {
-            if (rnd == ARF_RND_DOWN ||
+            int value;
            if (rnd == ARF_RND_NEAR)
            {
                if (exp == 0)
                {
                    /* check for the special case +/- 1/2 */
                    ARF_GET_MPN_READONLY(xp, xn, x);
                    if (xp[xn - 1] < LIMB_TOP || (xn == 1 && xp[xn - 1] == LIMB_TOP))
                        value = 0;
                    else
                        value = negative ? -1 : 1;
                }
                else
                {
                    value = 0;
                }
            }
            else if (rnd == ARF_RND_DOWN ||
                (rnd == ARF_RND_FLOOR && !negative) ||
                (rnd == ARF_RND_CEIL && negative))
            {
-                fmpz_zero(z);
+                value = 0;
            }
            else
            {
-                fmpz_set_si(z, negative ? -1 : 1);
+                value = negative ? -1 : 1;
            }
            _fmpz_demote(z);
            *z = value;
            return;
        }
@ -166,18 +177,26 @@ arf_get_fmpz(fmpz_t z, const arf_t x, arf_rnd_t rnd)
        /* |x| < 2^31 or 2^63 (must save 1 bit for rounding up!) */
        if (exp < FLINT_BITS)
        {
-            mp_limb_t v, v2;
+            mp_limb_t v, v2, v3;
            v = xp[xn - 1];
-            v2 = v >> (FLINT_BITS - exp);
+            v2 = v >> (FLINT_BITS - exp); /* integral part */
-            inexact = (xn > 1) || ((v2 << (FLINT_BITS - exp)) != v);
+            v3 = v << exp;                /* fractional part (truncated, at least 1 bit) */
            inexact = (xn > 1) || (v3 != 0);
            if (inexact && rnd != ARF_RND_DOWN)
            {
-                if (negative && (rnd == ARF_RND_UP || rnd == ARF_RND_FLOOR))
+                if (rnd == ARF_RND_NEAR)
-                    v2++;
+                {
-                if (!negative && (rnd == ARF_RND_UP || rnd == ARF_RND_CEIL))
+                    /* round up of fractional part is > 1/2,
-                    v2++;
+                       or if equal to 1/2 and the integral part is odd */
                    v2 += (v3 > LIMB_TOP) || (v3 == LIMB_TOP && (xn > 1 || (v2 & 1)));
                }
                else
                {
                    v2 += (rnd == ARF_RND_UP) || (negative ^ (rnd == ARF_RND_CEIL));
                }
            }
            if (negative)
@ -188,6 +207,17 @@ arf_get_fmpz(fmpz_t z, const arf_t x, arf_rnd_t rnd)
            return;
        }
        /* TBD: implement efficiently */
        if (rnd == ARF_RND_NEAR)
        {
            fmpr_t t;
            fmpr_init(t);
            arf_get_fmpr(t, x);
            fmpr_get_fmpz(z, t, rnd);
            fmpr_clear(t);
            return;
        }
        /* |x| >= 1 */
        zn = (exp + FLINT_BITS - 1) / FLINT_BITS;
        zz = _fmpz_promote(z);