speed up sin and cos using sqrt trick and extend their table to higher precision

2025-03-04 17:01:40 -05:00 · 2014-10-24 16:32:56 +02:00 · 2014-10-24 16:32:56 +02:00 · 2639be4e60
commit 2639be4e60
parent eeae809300
6 changed files with 975 additions and 15 deletions
--- a/arb.h
+++ b/arb.h
@ -964,12 +964,12 @@ void _arb_exp_sum_bs_simple(fmpz_t T, fmpz_t Q, mp_bitcnt_t * Qexp,
 #define ARB_SIN_COS_TAB1_PREC 512
 #define ARB_SIN_COS_TAB1_LIMBS (ARB_SIN_COS_TAB1_PREC / FLINT_BITS)

-/* only goes up to log(2) * 32 */
+/* only goes up to (pi/4) * 32 */
 #define ARB_SIN_COS_TAB21_NUM 26
 #define ARB_SIN_COS_TAB21_BITS 5
 #define ARB_SIN_COS_TAB22_NUM (1 << ARB_SIN_COS_TAB22_BITS)
 #define ARB_SIN_COS_TAB22_BITS 5
-#define ARB_SIN_COS_TAB2_PREC 2560
+#define ARB_SIN_COS_TAB2_PREC 4608
 #define ARB_SIN_COS_TAB2_LIMBS (ARB_SIN_COS_TAB2_PREC / FLINT_BITS)

 extern const mp_limb_t arb_sin_cos_tab1[2 * ARB_SIN_COS_TAB1_NUM][ARB_SIN_COS_TAB1_LIMBS];
@ -983,7 +983,7 @@ void _arb_sin_cos_taylor_naive(mp_ptr ysin, mp_ptr ycos, mp_limb_t * error,
    mp_srcptr x, mp_size_t xn, ulong N);

 void _arb_sin_cos_taylor_rs(mp_ptr ysin, mp_ptr ycos,
-    mp_limb_t * error, mp_srcptr x, mp_size_t xn, ulong N);
+    mp_limb_t * error, mp_srcptr x, mp_size_t xn, ulong N, int sinonly);

 int _arb_get_mpn_fixed_mod_pi4(mp_ptr w, fmpz_t q, int * octant,
    mp_limb_t * error, const arf_t x, mp_size_t wn);
--- a/arb/sin_cos.c
+++ b/arb/sin_cos.c
@ -24,6 +24,7 @@
 ******************************************************************************/

 #include "arb.h"
+#include "mpn_extras.h"

 #define TMP_ALLOC_LIMBS(__n) TMP_ALLOC((__n) * sizeof(mp_limb_t))

@ -299,14 +300,42 @@ arb_sin_cos_arf_new(arb_t zsin, arb_t zcos, const arf_t x, long prec)
    /* the summation for sin/cos is actually done to (2N-1)! */
    N = (N + 1) / 2;

-    /* Evaluate Taylor series */
-    _arb_sin_cos_taylor_rs(sina, cosa, &error2, w, wn, N);
+    if (N < 14)
+    {
+        /* Evaluate Taylor series */
+        _arb_sin_cos_taylor_rs(sina, cosa, &error2, w, wn, N, 0);
+        /* Taylor series evaluation error */
+        error += error2;
+        /* Taylor series truncation error */
+        error += 1UL << (wprounded-wp);
+    }
+    else  /* Compute cos(a) from sin(a) using a square root. */
+    {
+        /* Evaluate Taylor series */
+        _arb_sin_cos_taylor_rs(sina, cosa, &error2, w, wn, N, 1);
+        error += error2;
+        error += 1UL << (wprounded-wp);

-    /* Taylor series evaluation error */
-    error += error2;
+        if (flint_mpn_zero_p(sina, wn))
+        {
+            flint_mpn_store(cosa, wn, LIMB_ONES);
+            error = FLINT_MAX(error, 1);
+        }
+        else
+        {
+            mpn_sqr(ta, sina, wn);
+            /* 1 - s^2 (negation guaranteed to have borrow) */
+            mpn_neg(ta, ta, 2 * wn);
+            /* top limb of ta must be nonzero, so no need to normalize
+               before calling mpn_sqrtrem */
+            mpn_sqrtrem(cosa, ta, ta, 2 * wn);

-    /* Taylor series truncation error */
-    error += 1UL << (wprounded-wp);
+            /* When converting sin to cos, the error for cos must be
+               smaller than the error for sin; but we also get 1 ulp
+               extra error from the square root. */
+            error += 1;
+        }
+    }

    /*
    sin(a+b) = sin(a)*cos(b) + cos(a)*sin(b)
--- a/arb/sin_cos_tab.c
+++ b/arb/sin_cos_tab.c
--- a/arb/sin_cos_taylor_rs.c
+++ b/arb/sin_cos_taylor_rs.c
@ -33,7 +33,7 @@ extern const mp_limb_t factorial_tab_numer[FACTORIAL_TAB_SIZE];
 extern const mp_limb_t factorial_tab_denom[FACTORIAL_TAB_SIZE];

 void _arb_sin_cos_taylor_rs(mp_ptr ysin, mp_ptr ycos,
-    mp_limb_t * error, mp_srcptr x, mp_size_t xn, ulong N)
+    mp_limb_t * error, mp_srcptr x, mp_size_t xn, ulong N, int sinonly)
 {
    mp_ptr s, t, xpow;
    mp_limb_t new_denom, old_denom, c;
@ -54,13 +54,13 @@ void _arb_sin_cos_taylor_rs(mp_ptr ysin, mp_ptr ycos,
        if (N == 0)
        {
            flint_mpn_zero(ysin, xn);
-            flint_mpn_zero(ycos, xn);
+            if (!sinonly) flint_mpn_zero(ycos, xn);
            error[0] = 0;
        }
        else if (N == 1)
        {
-            flint_mpn_store(ycos, xn, LIMB_ONES);
            flint_mpn_copyi(ysin, x, xn);
+            if (!sinonly) flint_mpn_store(ycos, xn, LIMB_ONES);
            error[0] = 1;
        }
    }
@ -92,7 +92,7 @@ void _arb_sin_cos_taylor_rs(mp_ptr ysin, mp_ptr ycos,
            mpn_sqr(XPOW_WRITE(k), XPOW_READ(k / 2), xn);
        }

-        for (cosorsin = 0; cosorsin < 2; cosorsin++)
+        for (cosorsin = sinonly; cosorsin < 2; cosorsin++)
        {
            flint_mpn_zero(s, xn + 1);

--- a/arb/test/t-sin_cos_taylor_rf.c
+++ b/arb/test/t-sin_cos_taylor_rf.c
@ -63,7 +63,7 @@ int main()
        x[xn - 1] &= (LIMB_ONES >> 4);

        _arb_sin_cos_taylor_naive(y1s, y1c, &err1, x, xn, N);
-        _arb_sin_cos_taylor_rs(y2s, y2c, &err2, x, xn, N);
+        _arb_sin_cos_taylor_rs(y2s, y2c, &err2, x, xn, N, 0);

        cmp = mpn_cmp(y1s, y2s, xn);

--- a/doc/source/arb.rst
+++ b/doc/source/arb.rst
@ -1030,7 +1030,7 @@ Internal helper functions

 .. function:: void _arb_sin_cos_taylor_naive(mp_ptr ysin, mp_ptr ycos, mp_limb_t * error, mp_srcptr x, mp_size_t xn, ulong N)

-.. function:: void _arb_sin_cos_taylor_rs(mp_ptr ysin, mp_ptr ycos, mp_limb_t * error, mp_srcptr x, mp_size_t xn, ulong N)
+.. function:: void _arb_sin_cos_taylor_rs(mp_ptr ysin, mp_ptr ycos, mp_limb_t * error, mp_srcptr x, mp_size_t xn, ulong N, int sinonly)

    Computes approximations of `y_s = \sum_{k=0}^{N-1} (-1)^k x^{2k+1} / (2k+1)!`
    and `y_c = \sum_{k=0}^{N-1} (-1)^k x^{2k} / (2k)!`.
@ -1042,6 +1042,9 @@ Internal helper functions
    The input *x* and outputs *ysin*, *ycos* are fixed-point numbers with
    *xn* fractional limbs. A bound for the ulp error is written to *error*.

+    If *sinonly* is 1, only the sine is computed; if *sinonly* is 0
+    both the sine and cosine are computed.
+
 .. function:: int _arb_get_mpn_fixed_mod_log2(mp_ptr w, fmpz_t q, mp_limb_t * error, const arf_t x, mp_size_t wn)

    Attempts to write `w = x - q \log(2)` with `0 \le w < \log(2)`, where *w*