refactoring; slightly speed up elementary functions for small input

2025-03-05 09:21:38 -05:00 · 2014-12-07 15:06:01 +01:00 · 2014-12-07 15:06:01 +01:00 · 6d4cbbb86b
commit 6d4cbbb86b
parent e3f7ef1f66
5 changed files with 33 additions and 23 deletions
--- a/arb.h
+++ b/arb.h
@ -1001,6 +1001,32 @@ void _arb_sin_cos_taylor_rs(mp_ptr ysin, mp_ptr ycos,
 int _arb_get_mpn_fixed_mod_pi4(mp_ptr w, fmpz_t q, int * octant,
    mp_limb_t * error, const arf_t x, mp_size_t wn);

+static __inline__ mp_bitcnt_t
+_arb_mpn_leading_zeros(mp_srcptr d, mp_size_t n)
+{
+    mp_limb_t t;
+    mp_size_t zero_limbs;
+    mp_bitcnt_t bits;
+
+    zero_limbs = 0;
+
+    while (1)
+    {
+        t = d[n - zero_limbs - 1];
+
+        if (t != 0)
+        {
+            count_leading_zeros(bits, t);
+            return bits + FLINT_BITS * zero_limbs;
+        }
+
+        zero_limbs++;
+
+        if (zero_limbs == n)
+            return FLINT_BITS * zero_limbs;
+    }
+}
+
 #ifdef __cplusplus
 }
 #endif
--- a/arb/atan.c
+++ b/arb/atan.c
@ -28,7 +28,7 @@
 #define TMP_ALLOC_LIMBS(size) TMP_ALLOC((size) * sizeof(mp_limb_t))

 /* atan(x) = x + eps, |eps| < x^3*/
-static void
+void
 arb_atan_eps(arb_t z, const arf_t x, long prec)
 {
    fmpz_t mag;
@ -41,7 +41,7 @@ arb_atan_eps(arb_t z, const arf_t x, long prec)
 }

 /* atan(x) = pi/2 - eps, eps < 1/x <= 2^(1-mag) */
-static void
+void
 arb_atan_inf_eps(arb_t z, const arf_t x, long prec)
 {
    fmpz_t mag;
@ -140,6 +140,7 @@ _arf_set_mpn_fixed(arf_t z, mp_srcptr xp, mp_size_t xn, mp_size_t fixn, int nega
    }
 }

+/* TODO: move out */
 void
 mag_add_ui_2exp_si(mag_t z, const mag_t x, ulong y, long e)
 {
@ -332,7 +333,7 @@ arb_atan_arf(arb_t z, const arf_t x, long prec)
        }

        /* |w| <= 2^-r */
-        r = FLINT_BITS - FLINT_BIT_COUNT(w[wn-1]);
+        r = _arb_mpn_leading_zeros(w, wn);

        /* N >= (wp-r)/(2r) */
        N = (wp - r + (2*r-1)) / (2*r);
--- a/arb/exp.c
+++ b/arb/exp.c
@ -253,7 +253,7 @@ arb_exp_arf(arb_t z, const arf_t x, long prec, int minus_one)
        }

        /* |w| <= 2^-r */
-        r = FLINT_BITS - FLINT_BIT_COUNT(w[wn-1]);
+        r = _arb_mpn_leading_zeros(w, wn);

        /* Choose number of terms N such that Taylor series truncation
           error is <= 2^-wp */
--- a/arb/log.c
+++ b/arb/log.c
@ -76,23 +76,6 @@ int _arf_set_mpn_fixed(arf_t z, mp_srcptr xp, mp_size_t xn, mp_size_t fixn, int

 void mag_add_ui_2exp_si(mag_t z, const mag_t x, ulong y, long e);

-static __inline__ mp_bitcnt_t
-_mpn_leading_zeros(mp_srcptr w, mp_size_t wn)
-{
-    mp_bitcnt_t r = 0;
-
-    while (wn > 0 && w[wn-1] == 0)
-    {
-        wn--;
-        r += FLINT_BITS;
-    }
-
-    if (wn > 0)
-        r += FLINT_BITS - FLINT_BIT_COUNT(w[wn-1]);
-
-    return r;
-}
-
 void
 arb_log_arf_huge(arb_t z, const arf_t x, long prec)
 {
@ -308,7 +291,7 @@ arb_log_arf(arb_t z, const arf_t x, long prec)
            error += 3;

            /* |w| <= 2^-r */
-            r = _mpn_leading_zeros(w, wn);
+            r = _arb_mpn_leading_zeros(w, wn);

            /* N >= (wp-r)/(2r) */
            N = (wp - r + (2*r-1)) / (2*r);
--- a/arb/sin_cos.c
+++ b/arb/sin_cos.c
@ -291,7 +291,7 @@ arb_sin_cos_arf_new(arb_t zsin, arb_t zcos, const arf_t x, long prec)
    }

    /* |w| <= 2^-r */
-    r = FLINT_BITS - FLINT_BIT_COUNT(w[wn-1]);
+    r = _arb_mpn_leading_zeros(w, wn);

    /* Choose number of terms N such that Taylor series truncation
       error is <= 2^-wp */