optimize Euler's constant, rename function, and add error bounds

2025-03-06 01:41:39 -05:00 · 2013-03-21 15:30:28 +01:00 · 2013-03-21 15:30:28 +01:00 · d11adcae3c
commit d11adcae3c
parent 5b717cd6d7
7 changed files with 201 additions and 41 deletions
--- a/doc/source/fmprb.rst
+++ b/doc/source/fmprb.rst
@ -655,6 +655,32 @@ Constants
    using the generic hypergeometric series code.
    The value is cached for repeated use.

+.. function:: void fmprb_const_euler(fmprb_t res, long prec)
+
+    Sets *x* to Euler's constant `\gamma = \lim_{k \rightarrow \infty} (H_k - \log k)`
+    where `H_k` denotes a harmonic number. The value is cached for repeated use.
+    Uses the Brent-McMillan formula ([BM1980]_,  [MPFR2012]_)
+
+    .. math ::
+
+        \gamma = \frac{S_0(2n) - K_0(2n)}{I_0(2n)} - \log(n)
+
+    in which `n` is a free parameter and
+
+    .. math ::
+
+        S_0(x) = \sum_{k=0}^{\infty} \frac{H_k}{(k!)^2} \left(\frac{x}{2}\right)^{2k}, \quad
+        I_0(x) = \sum_{k=0}^{\infty} \frac{1}{(k!)^2} \left(\frac{x}{2}\right)^{2k}
+
+        2x I_0(x) K_0(x) \sim \sum_{k=0}^{\infty} \frac{[(2k)!]^3}{(k!)^4 8^{2k} x^{2k}}.
+
+    The first two series are evaluated simultaneously, and the error
+    is easily bounded.
+    The third series is a divergent asymptotic expansion. With some work, it
+    can be shown (to be published) that the error when `x = 2n` and
+    the sum goes up to `k = 2n-1` is bounded by `8e^{-4n}`.
+    Since `I_0(2n) \sim e^{2n} / n^{1/2}`, the final error is `O(e^{-8n})`.
+
 .. function:: void fmprb_const_catalan(fmprb_t s, long prec)

    Sets *x* to Catalan's constant `C = \sum_{n=0}^{\infty} (-1)^n / (2n+1)^2`.
--- a/doc/source/zeta.rst
+++ b/doc/source/zeta.rst
@ -173,15 +173,6 @@ Integer zeta values
 Related constants
 -------------------------------------------------------------------------------

-.. function:: void fmprb_const_euler_brent_mcmillan(fmprb_t res, long prec)
-
-    Sets *x* to Euler's constant `\gamma`, computed using the second
-    Bessel function formula of Brent and McMillan ([BM1980]_,  [MPFR2012]_).
-    Brent and McMillan conjectured that the error depending
-    on the internal parameter *n* is of order `O(e^{-8n})`. Brent has
-    recently proved that this bound is correct, but without determining
-    an explicit big-O factor [Bre2010]_.
-
 .. function:: void fmprb_const_khinchin(fmprb_t res, long prec)

    Sets *res* to Khinchin's constant `K_0`, computed as
--- a/fmprb.h
+++ b/fmprb.h
@ -308,6 +308,7 @@ void fmprb_const_sqrt_pi(fmprb_t t, long prec);
 void fmprb_const_log_sqrt2pi(fmprb_t t, long prec);
 void fmprb_const_log2(fmprb_t s, long prec);
 void fmprb_const_log10(fmprb_t s, long prec);
+void fmprb_const_euler(fmprb_t s, long prec);
 void fmprb_const_catalan(fmprb_t s, long prec);
 void fmprb_const_e(fmprb_t s, long prec);

--- a/zeta/const_euler_brent_mcmillan.c
+++ b/zeta/const_euler_brent_mcmillan.c
@ -19,11 +19,12 @@
 =============================================================================*/
 /******************************************************************************

-    Copyright (C) 2012 Fredrik Johansson
+    Copyright (C) 2012, 2013 Fredrik Johansson

 ******************************************************************************/

 #include "zeta.h"
+#include "hypgeom.h"

 typedef struct
 {
@ -103,7 +104,7 @@ euler_bsplit_1(euler_bsplit_t s, long n1, long n2, long N, long wp, int cont)
 {
    if (n2 - n1 == 1)
    {
-        fmprb_set_si(s->P, N); /* p = N^2 todo: shift optimization */
+        fmprb_set_si(s->P, N); /* p = N^2 */
        fmprb_mul(s->P, s->P, s->P, wp);
        fmprb_set_si(s->Q, n1 + 1); /* q = (k + 1)^2 */
        fmprb_mul(s->Q, s->Q, s->Q, wp);
@ -178,19 +179,103 @@ euler_bsplit_2(fmprb_t P, fmprb_t Q, fmprb_t T, long n1, long n2,
    }
 }

+static void
+atanh_bsplit(fmprb_t s, ulong c, long a, long prec)
+{
+    fmprb_t t;
+    hypgeom_t series;
+    hypgeom_init(series);
+    fmprb_init(t);
+
+    fmpz_poly_set_ui(series->A, 1);
+    fmpz_poly_set_coeff_ui(series->B, 0, 1);
+    fmpz_poly_set_coeff_ui(series->B, 1, 2);
+    fmpz_poly_set_ui(series->P, 1);
+    fmpz_poly_set_ui(series->Q, c * c);
+
+    fmprb_hypgeom_infsum(s, t, series, prec, prec);
+    fmprb_mul_si(s, s, a, prec);
+    fmprb_mul_ui(t, t, c, prec);
+    fmprb_div(s, s, t, prec);
+
+    fmprb_clear(t);
+    hypgeom_clear(series);
+}
+
+static ulong
+next_smooth(ulong n)
+{
+    ulong t, k;
+
+    for (k = n; ; k++)
+    {
+        t = k;
+        while (t % 2 == 0) t /= 2;
+        while (t % 3 == 0) t /= 3;
+        while (t % 5 == 0) t /= 5;
+        if (t == 1)
+            return k;
+    }
+}
+
 void
-fmprb_const_euler_brent_mcmillan(fmprb_t res, long prec)
+fmprb_log_ui_smooth(fmprb_t s, ulong n, long prec)
+{
+    ulong m, i, j, k;
+    fmprb_t t;
+
+    m = n;
+    i = j = k = 0;
+    while (m % 2 == 0) { m /= 2; i++; }
+    while (m % 3 == 0) { m /= 3; j++; } 
+    while (m % 5 == 0) { m /= 5; k++; }
+
+    if (m != 1)
+        abort();
+
+    fmprb_init(t);
+
+    prec += FLINT_CLOG2(prec);
+
+    atanh_bsplit(s, 31, 14*i + 22*j + 32*k, prec);
+    atanh_bsplit(t, 49, 10*i + 16*j + 24*k, prec);
+    fmprb_add(s, s, t, prec);
+    atanh_bsplit(t, 161, 6*i + 10*j + 14*k, prec);
+    fmprb_add(s, s, t, prec);
+
+    fmprb_clear(t);
+}
+
+void fmpr_gamma_ui_lbound(fmpr_t x, ulong n, long prec);
+
+
+void
+fmprb_const_euler_eval(fmprb_t res, long prec)
 {
    euler_bsplit_t sum;
    fmprb_t t, u, v, P2, T2, Q2;
-    long bits, wp, n, nterms1, nterms2;
+    long bits, wp, wp2, n, K, M;

-    bits = prec + 20;
-    n = 0.08665 * bits + 1;
+    bits = prec + 10;
+    n = 0.086643397569993163677 * bits + 1;  /* log(2) / 8 */

-    nterms1 = 4.9706258 * n + 1;
-    nterms2 = 2 * n + 1;
-    wp = bits + FLINT_BIT_COUNT(n);
+    /* round n to have many trailing zeros, speeding up arithmetic,
+       and make it smooth to allow computing the logarithm cheaply */
+    if (n > 256)
+    {
+        int b = FLINT_BIT_COUNT(n);
+        n = next_smooth((n >> (b-4)) + 1) << (b-4);
+    }
+    else
+    {
+        n = next_smooth(n);
+    }
+
+    K = 4.9706257595442318644 * n;  /* 3/W(3/e) */
+    M = 2 * n;
+
+    wp  = bits   + 2 * FLINT_BIT_COUNT(n);
+    wp2 = bits/2 + 2 * FLINT_BIT_COUNT(n);

    euler_bsplit_init(sum);
    fmprb_init(P2);
@ -200,29 +285,84 @@ fmprb_const_euler_brent_mcmillan(fmprb_t res, long prec)
    fmprb_init(u);
    fmprb_init(v);

-    /* Compute S0 = V / (Q * D), I0 = 1 + T / Q */
-    euler_bsplit_1(sum, 0, nterms1, n, wp, 0);
+    /* Compute S0 = V / (Q D) + eps1
+               I0 = 1 + T / Q + eps2 */
+    euler_bsplit_1(sum, 0, K, n, wp, 0);
+    /* I0 = T / Q + eps2 */
+    fmprb_add(sum->T, sum->T, sum->Q, wp);

-    /* Compute K0 = T2 / Q2 */
-    euler_bsplit_2(P2, Q2, T2, 0, nterms2, n, wp, 0);
+    /* Assuming K > 2 and K >= 4n, eps1 and eps2 are both bounded by
+       2 H_K / (K!)^2 * n^(2K) < 4 log(K) * n^(2K) / (K!)^2
+    */
+    {
+        fmpr_t e, f;
+        fmpr_init(e);
+        fmpr_init(f);

-    /* Compute (S0/I0 + K0/I0^2) = (Q2*(Q+T)*V - D*Q^2*T2)/(D*Q2*(Q+T)^2) */
-    fmprb_add(v, sum->Q, sum->T, wp);
-    fmprb_mul(t, v, Q2, wp);
-    fmprb_mul(u, sum->Q, sum->Q, wp);
-    fmprb_mul(u, u, T2, wp);
-    fmprb_mul(u, u, sum->D, wp);
-    fmprb_mul(sum->V, t, sum->V, wp);
-    fmprb_sub(sum->V, sum->V, u, wp);
-    fmprb_mul(u, sum->D, t, wp);
-    fmprb_mul(u, u, v, wp);
-    fmprb_div(t, sum->V, u, wp);
+        fmpr_set_ui(e, n);
+        fmpr_pow_sloppy_ui(e, e, 2 * K, FMPRB_RAD_PREC, FMPR_RND_UP);
+
+        fmpr_set_ui(f, K);
+        fmpr_log(f, f, FMPRB_RAD_PREC, FMPR_RND_UP);
+        fmpr_mul(e, e, f, FMPRB_RAD_PREC, FMPR_RND_UP);
+        fmpr_mul_2exp_si(e, e, 2);
+
+        fmpr_gamma_ui_lbound(f, K + 1, FMPRB_RAD_PREC);
+        fmpr_mul(f, f, f, FMPRB_RAD_PREC, FMPR_RND_DOWN);
+        fmpr_div(e, e, f, FMPRB_RAD_PREC, FMPR_RND_UP);
+
+        /* T / Q + eps = (T + eps Q) / Q */
+        fmprb_get_abs_ubound_fmpr(f, sum->Q, FMPRB_RAD_PREC);
+        fmpr_mul(e, e, f, FMPRB_RAD_PREC, FMPR_RND_UP);
+        fmprb_add_error_fmpr(sum->T, e);
+
+        /* V / (Q D) + eps = (V + eps Q D) / (Q D) */
+        fmprb_get_abs_ubound_fmpr(f, sum->D, FMPRB_RAD_PREC);
+        fmpr_mul(e, e, f, FMPRB_RAD_PREC, FMPR_RND_UP);
+        fmprb_add_error_fmpr(sum->V, e);
+
+        fmpr_clear(e);
+        fmpr_clear(f);
+    }
+
+    /* Compute S0 / I0 = V / (D T) */
+    fmprb_mul(t, sum->T, sum->D, wp);
+    fmprb_div(res, sum->V, t, wp);
+
+    /* Compute K0 (actually I_0(2n) K_0(2n)) = T2 / Q2 + eps */
+    euler_bsplit_2(P2, Q2, T2, 0, M, n, wp2, 0);
+
+    /* assuming M = 2n, eps is bounded by 2 exp(-4n) / n, and
+       T2 / Q2 = s + eps <=> (T2 - Q2 eps) / Q2 = s */
+    {
+        fmpr_t e, f;
+        fmpr_init(e);
+        fmpr_init(f);
+        fmpr_set_si(f, -4*n);
+        fmpr_exp(f, f, FMPRB_RAD_PREC, FMPR_RND_UP);
+        fmpr_div_ui(f, f, n, FMPRB_RAD_PREC, FMPR_RND_UP);
+        fmpr_mul_2exp_si(f, f, 1);
+        fmprb_get_abs_ubound_fmpr(e, Q2, FMPRB_RAD_PREC);
+        fmpr_mul(e, e, f, FMPRB_RAD_PREC, FMPR_RND_UP);
+        fmprb_add_error_fmpr(T2, e);
+        fmpr_clear(e);
+        fmpr_clear(f);
+    }
+
+    /* Compute K0 / I^2 = Q^2 * T2 / (Q2 * T^2) */
+    fmprb_set_round(t, sum->Q, wp2);
+    fmprb_mul(t, t, t, wp2);
+    fmprb_mul(t, t, T2, wp2);
+    fmprb_set_round(u, sum->T, wp2);
+    fmprb_mul(u, u, u, wp2);
+    fmprb_mul(u, u, Q2, wp2);
+    fmprb_div(t, t, u, wp2);
+
+    fmprb_sub(res, res, t, wp);

    /* subtract log(n) */
-    fmprb_log_ui(u, n, wp);
-    fmprb_sub(res, t, u, wp);
-
-    /* TODO: add error term */
+    fmprb_log_ui_smooth(u, n, wp);
+    fmprb_sub(res, res, u, wp);

    fmprb_clear(P2);
    fmprb_clear(T2);
@ -233,3 +373,6 @@ fmprb_const_euler_brent_mcmillan(fmprb_t res, long prec)

    euler_bsplit_clear(sum);
 }
+
+DEF_CACHED_CONSTANT(fmprb_const_euler, fmprb_const_euler_eval)
+
--- a/zeta/test/t-const_euler_brent_mcmillan.c
+++ b/zeta/test/t-const_euler_brent_mcmillan.c
@ -30,7 +30,7 @@ int main()
    long iter;
    flint_rand_t state;

-    printf("const_euler_brent_mcmillan....");
+    printf("const_euler....");
    fflush(stdout);
    flint_randinit(state);

@ -45,7 +45,7 @@ int main()
        fmprb_init(r);
        mpfr_init2(s, prec + 1000);

-        fmprb_const_euler_brent_mcmillan(r, prec);
+        fmprb_const_euler(r, prec);
        mpfr_const_euler(s, MPFR_RNDN);

        if (!fmprb_contains_mpfr(r, s))
--- a/fmprb_poly/log_gamma_series.c
+++ b/fmprb_poly/log_gamma_series.c
@ -37,7 +37,7 @@ fmprb_poly_log_gamma_series(fmprb_poly_t z, long n, long prec)
    _fmprb_poly_set_length(z, n);

    if (n > 0) fmprb_zero(z->coeffs);
-    if (n > 1) fmprb_const_euler_brent_mcmillan(z->coeffs + 1, prec);
+    if (n > 1) fmprb_const_euler(z->coeffs + 1, prec);
    if (n > 2) fmprb_zeta_ui_vec(z->coeffs + 2, 2, n - 2, prec);

    for (i = 2; i < n; i++)
--- a/zeta.h
+++ b/zeta.h
@ -31,7 +31,6 @@
 #include "fmprb.h"
 #include "fmpcb.h"

-void fmprb_const_euler_brent_mcmillan(fmprb_t res, long prec);
 void fmprb_const_zeta3_bsplit(fmprb_t x, long prec);

 void fmprb_const_khinchin(fmprb_t K, long prec);