use hurwitz_precomp to speed up acb_dirichlet_l

2025-03-04 17:01:40 -05:00 · 2016-11-26 10:39:27 +01:00 · 2016-11-26 10:39:27 +01:00 · 098d4437ad
commit 098d4437ad
parent 483253cbd4
10 changed files with 174 additions and 18 deletions
--- a/acb_dirichlet.h
+++ b/acb_dirichlet.h
@ -46,6 +46,7 @@ typedef struct
    acb_struct s;
    mag_struct err;
    acb_ptr coeffs;
+    int deflate;
    slong A;
    slong N;
    slong K;
@ -54,10 +55,11 @@ acb_dirichlet_hurwitz_precomp_struct;

 typedef acb_dirichlet_hurwitz_precomp_struct acb_dirichlet_hurwitz_precomp_t[1];

-void acb_dirichlet_hurwitz_precomp_init(acb_dirichlet_hurwitz_precomp_t pre, const acb_t s, slong A, slong K, slong N, slong prec);
+void acb_dirichlet_hurwitz_precomp_init(acb_dirichlet_hurwitz_precomp_t pre, const acb_t s, int deflate, slong A, slong K, slong N, slong prec);
 void acb_dirichlet_hurwitz_precomp_clear(acb_dirichlet_hurwitz_precomp_t pre);
 void acb_dirichlet_hurwitz_precomp_bound(mag_t res, const acb_t s, slong A, slong K, slong N);
 void acb_dirichlet_hurwitz_precomp_eval(acb_t res, const acb_dirichlet_hurwitz_precomp_t pre, ulong p, ulong q, slong prec);
+void acb_dirichlet_hurwitz_precomp_choose_param(ulong * A, ulong * K, ulong * N, const acb_t s, double num_eval, slong prec);

 void _acb_dirichlet_euler_product_real_ui(arb_t res, ulong s,
    const signed char * chi, int mod, int reciprocal, slong prec);
@ -122,7 +124,9 @@ void acb_dirichlet_jacobi_sum_ui(acb_t res, const dirichlet_group_t G, ulong a,

 void acb_dirichlet_l_euler_product(acb_t res, const acb_t s, const dirichlet_group_t G, const dirichlet_char_t chi, slong prec);

-void acb_dirichlet_l_hurwitz(acb_t res, const acb_t s, const dirichlet_group_t G, const dirichlet_char_t chi, slong prec);
+void acb_dirichlet_l_hurwitz(acb_t res, const acb_t s, const acb_dirichlet_hurwitz_precomp_t precomp,
+    const dirichlet_group_t G, const dirichlet_char_t chi, slong prec);
+
 void acb_dirichlet_l(acb_t res, const acb_t s, const dirichlet_group_t G, const dirichlet_char_t chi, slong prec);

 void acb_dirichlet_l_jet(acb_ptr res, const acb_t s, const dirichlet_group_t G, const dirichlet_char_t chi, int deflate, slong len, slong prec);
--- a/acb_dirichlet/hurwitz_precomp_choose_param.c
+++ b/acb_dirichlet/hurwitz_precomp_choose_param.c
@ -0,0 +1,100 @@
+/*
+    Copyright (C) 2016 Fredrik Johansson
+
+    This file is part of Arb.
+
+    Arb is free software: you can redistribute it and/or modify it under
+    the terms of the GNU Lesser General Public License (LGPL) as published
+    by the Free Software Foundation; either version 2.1 of the License, or
+    (at your option) any later version.  See <http://www.gnu.org/licenses/>.
+*/
+
+#include "acb_dirichlet.h"
+
+void
+acb_dirichlet_hurwitz_precomp_choose_param(ulong * _A, ulong *_K, ulong *_N,
+        const acb_t s, double num_eval, slong prec)
+{
+    double MUL_COST, POW_COST, ZETA_COST, height, abss, cost, best_cost;
+    ulong A, K, N, best_A, best_K, best_N;
+    mag_t err;
+
+    /* Default algorithm: separate evaluations with no precomputation. */
+    best_A = best_K = best_N = 0;
+    *_A = *_K = *_N = 0;
+
+    /* This is pointless. */
+    if (num_eval < 10)
+        return;
+
+    /* Precomputation fails at nonpositive integers. */
+    if (acb_contains_int(s) && !arb_is_positive(acb_realref(s)))
+        return;
+
+    prec = FLINT_MAX(prec, 8);
+    height = arf_get_d(arb_midref(acb_imagref(s)), ARF_RND_DOWN);
+    height = fabs(height);
+    abss = arf_get_d(arb_midref(acb_realref(s)), ARF_RND_DOWN);
+    abss = sqrt(abss*abss + height*height);
+
+    /* Relative evaluation time, estimated empirically. */
+    MUL_COST = 1.0;
+    POW_COST = 10.0 + FLINT_MIN(0.005 * prec, 200.0);
+    ZETA_COST = 200.0 + 2.0*height + (3.0*prec + 0.0002*height*prec)*prec;
+
+    /* Cost for the default algorithm. */
+    best_cost = num_eval * ZETA_COST;
+    /* Give the default algorithm some more leeway. */
+    best_cost *= 0.5;
+    if (acb_is_int(s))
+        best_cost *= 0.5;
+
+    mag_init(err);
+
+    for (N = 1; N <= FLINT_MIN(num_eval, 2048); N = FLINT_MAX(N+1, N*1.2))
+    {
+        /* AN should be at least as large as |s| */
+        A = FLINT_MAX(abss / N + 1.0, 1);
+
+        /* Estimate K based on asymptotics for the error term. We will
+           have to adjust up by actually computing the error bound. */
+        K = FLINT_MAX(prec * log(2) / (log(2*A*N) + 1.0) + 1.0, 1);
+
+        for ( ; K < num_eval; K = FLINT_MAX(K+1, K*1.2))
+        {
+            /* Too much space. */
+            if (_acb_vec_estimate_allocated_bytes(N * K, prec) > 1e9)
+                break;
+
+            acb_dirichlet_hurwitz_precomp_bound(err, s, A, K, N);
+
+            cost = (K * num_eval) * MUL_COST +     /* polynomial evaluation */
+                   (A * num_eval) * POW_COST +     /* power sum */
+                   (N * K) * ZETA_COST;            /* precomputation cost */
+
+            /* The accuracy is good enough. */
+            if (mag_cmp_2exp_si(err, -prec) <= 0)
+            {
+                if (cost < best_cost)
+                {
+                    best_cost = cost;
+                    best_A = A;
+                    best_K = K;
+                    best_N = N;
+                }
+                break;
+            }
+
+            /* It will only get worse from here. */
+            if (cost > best_cost)
+                break;
+        }
+    }
+
+    *_A = best_A;
+    *_K = best_K;
+    *_N = best_N;
+
+    mag_clear(err);
+}
+
--- a/acb_dirichlet/hurwitz_precomp_init.c
+++ b/acb_dirichlet/hurwitz_precomp_init.c
@ -10,16 +10,18 @@
 */

 #include "acb_dirichlet.h"
+#include "acb_poly.h"

 void
 acb_dirichlet_hurwitz_precomp_init(acb_dirichlet_hurwitz_precomp_t pre,
-        const acb_t s, slong A, slong K, slong N, slong prec)
+        const acb_t s, int deflate, slong A, slong K, slong N, slong prec)
 {
    slong i, k;

    if (A < 1 || K < 1 || N < 1)
        abort();

+    pre->deflate = deflate;
    pre->A = A;
    pre->K = K;
    pre->N = N;
@ -61,7 +63,12 @@ acb_dirichlet_hurwitz_precomp_init(acb_dirichlet_hurwitz_precomp_t pre,
            for (k = 0; k < K; k++)
            {
                acb_add_ui(t, s, k, prec);
-                acb_hurwitz_zeta(t, t, a, prec);
+
+                if (deflate && k == 0)
+                    _acb_poly_zeta_cpx_series(t, t, a, 1, 1, prec);
+                else
+                    acb_hurwitz_zeta(t, t, a, prec);
+
                acb_mul(pre->coeffs + i * K + k,
                        pre->coeffs + i * K + k, t, prec);
            }
--- a/acb_dirichlet/l.c
+++ b/acb_dirichlet/l.c
@ -23,7 +23,23 @@ acb_dirichlet_l_general(acb_t res, const acb_t s,
    }
    else
    {
-        acb_dirichlet_l_hurwitz(res, s, G, chi, prec);
+        ulong A, K, N;
+        slong wp;
+
+        wp = prec + n_clog(G->phi_q, 2);
+        acb_dirichlet_hurwitz_precomp_choose_param(&A, &K, &N, s, G->phi_q, wp);
+
+        if (A == 0)
+        {
+            acb_dirichlet_l_hurwitz(res, s, NULL, G, chi, prec);
+        }
+        else
+        {
+            acb_dirichlet_hurwitz_precomp_t pre;
+            acb_dirichlet_hurwitz_precomp_init(pre, s, acb_is_one(s), A, K, N, wp);
+            acb_dirichlet_l_hurwitz(res, s, pre, G, chi, prec);
+            acb_dirichlet_hurwitz_precomp_clear(pre);
+        }
    }
 }

--- a/acb_dirichlet/l_hurwitz.c
+++ b/acb_dirichlet/l_hurwitz.c
@ -15,6 +15,7 @@

 void
 acb_dirichlet_l_hurwitz(acb_t res, const acb_t s,
+    const acb_dirichlet_hurwitz_precomp_t precomp,
    const dirichlet_group_t G, const dirichlet_char_t chi, slong prec)
 {
    ulong order, chin, mult;
@ -48,18 +49,26 @@ acb_dirichlet_l_hurwitz(acb_t res, const acb_t s,
    order = dirichlet_order_char(G, chi);
    mult = G->expo / order;
    z = _acb_vec_init(order);
+    /* todo: use roots object */
    _acb_vec_nth_roots(z, order, prec);

    do {
        chin = dirichlet_pairing_char(G, chi, cn) / mult;

-        acb_set_ui(a, cn->n);
-        acb_div_ui(a, a, G->q, prec);
+        if (precomp == NULL)
+        {
+            acb_set_ui(a, cn->n);
+            acb_div_ui(a, a, G->q, prec);

-        if (deflate == 0)
-            acb_hurwitz_zeta(u, s, a, prec);
+            if (deflate == 0)
+                acb_hurwitz_zeta(u, s, a, prec);
+            else
+                _acb_poly_zeta_cpx_series(u, s, a, 1, 1, prec);
+        }
        else
-            _acb_poly_zeta_cpx_series(u, s, a, 1, 1, prec);
+        {
+            acb_dirichlet_hurwitz_precomp_eval(u, precomp, cn->n, G->q, prec);
+        }

        acb_addmul(t, z + chin, u, prec);

--- a/acb_dirichlet/test/t-hurwitz_precomp.c
+++ b/acb_dirichlet/test/t-hurwitz_precomp.c
@ -10,6 +10,7 @@
 */

 #include "acb_dirichlet.h"
+#include "acb_poly.h"

 int main()
 {
@ -27,12 +28,14 @@ int main()
        ulong p, q;
        slong prec1, prec2, A, K, N, i;
        acb_dirichlet_hurwitz_precomp_t pre;
+        int deflate;

        prec1 = 2 + n_randint(state, 100);
        prec2 = 2 + n_randint(state, 100);
        A = 1 + n_randint(state, 10);
        K = 1 + n_randint(state, 10);
        N = 1 + n_randint(state, 10);
+        deflate = (n_randint(state, 3) == 0);

        acb_init(s);
        acb_init(a);
@ -40,7 +43,7 @@ int main()
        acb_init(z2);

        acb_randtest(s, state, 1 + n_randint(state, 200), 2);
-        acb_dirichlet_hurwitz_precomp_init(pre, s, A, K, N, prec1);
+        acb_dirichlet_hurwitz_precomp_init(pre, s, deflate, A, K, N, prec1);

        for (i = 0; i < 10; i++)
        {
@ -51,7 +54,10 @@ int main()

            acb_set_ui(a, p);
            acb_div_ui(a, a, q, prec2);
-            acb_hurwitz_zeta(z2, s, a, prec2);
+            if (deflate)
+                _acb_poly_zeta_cpx_series(z2, s, a, 1, 1, prec2);
+            else
+                acb_hurwitz_zeta(z2, s, a, prec2);

            if (!acb_overlaps(z1, z2))
            {
--- a/acb_dirichlet/test/t-l.c
+++ b/acb_dirichlet/test/t-l.c
@ -46,7 +46,7 @@ int main()
        else
            acb_randtest(s, state, 2 + n_randint(state, 200), 2);

-        acb_dirichlet_l_hurwitz(t, s, G, chi, prec);
+        acb_dirichlet_l_hurwitz(t, s, NULL, G, chi, prec);
        acb_dirichlet_l(u, s, G, chi, prec);

        if (!acb_overlaps(t, u))
--- a/acb_dirichlet/test/t-l_euler_product.c
+++ b/acb_dirichlet/test/t-l_euler_product.c
@ -54,7 +54,7 @@ int main()
        }

        if (n_randint(state, 2))
-            acb_dirichlet_l_hurwitz(t, s, G, chi, prec);
+            acb_dirichlet_l_hurwitz(t, s, NULL, G, chi, prec);
        else
            acb_dirichlet_l_euler_product(t, s, G, chi, prec * 1.5);

--- a/acb_dirichlet/test/t-l_hurwitz.c
+++ b/acb_dirichlet/test/t-l_hurwitz.c
@ -112,7 +112,7 @@ int main()
                abort();
            }

-            acb_dirichlet_l_hurwitz(res, x + j, G, chi, prec + 10);
+            acb_dirichlet_l_hurwitz(res, x + j, NULL, G, chi, prec + 10);

            if (!acb_contains(ref, res))
            {
--- a/doc/source/acb_dirichlet.rst
+++ b/doc/source/acb_dirichlet.rst
@ -179,7 +179,7 @@ Hurwitz zeta function

 .. type:: acb_dirichlet_hurwitz_precomp_t

-.. function:: void acb_dirichlet_hurwitz_precomp_init(acb_dirichlet_hurwitz_precomp_t pre, const acb_t s, ulong A, ulong K, ulong N, slong prec)
+.. function:: void acb_dirichlet_hurwitz_precomp_init(acb_dirichlet_hurwitz_precomp_t pre, const acb_t s, int deflate, ulong A, ulong K, ulong N, slong prec)

    Precomputes a grid of Taylor polynomials for fast evaluation of
    `\zeta(s,a)` on `a \in (0,1]` with fixed *s*.
@ -195,10 +195,22 @@ Hurwitz zeta function
    and slows convergence, *AN* should be at least roughly as large as `|s|`,
    e.g. it is a good idea to have at least `AN > 0.5 |s|`.

+    If *deflate* is set, the deflated Hurwitz zeta function is used,
+    removing the pole at `s = 1`.
+
 .. function:: void acb_dirichlet_hurwitz_precomp_clear(acb_dirichlet_hurwitz_precomp_t pre)

    Clears the precomputed data.

+.. function:: void acb_dirichler_hurwitz_precomp_choose_param(ulong * A, ulong * K, ulong * N, const acb_t s, double num_eval, slong prec)
+
+    Chooses precomputation parameters *A*, *K* and *N* to minimize
+    the cost of *num_eval* evaluations of the Hurwitz zeta function
+    at argument *s* to precision *prec*.
+    If it is estimated that evaluating each Hurwitz zeta function from
+    scratch would be better than performing a precomputation, *A*, *K* and *N*
+    are all set to 0.
+
 .. function:: void acb_dirichlet_hurwitz_precomp_bound(mag_t res, const acb_t s, ulong A, ulong K, ulong N)

    Computes an upper bound for the truncation error (not accounting for
@ -344,7 +356,7 @@ L-functions

   - The default version computes it via the gauss sum.

-.. function:: void acb_dirichlet_l_hurwitz(acb_t res, const acb_t s, const dirichlet_group_t G, const dirichlet_char_t chi, slong prec)
+.. function:: void acb_dirichlet_l_hurwitz(acb_t res, const acb_t s, const acb_dirichlet_hurwitz_precomp_t precomp, const dirichlet_group_t G, const dirichlet_char_t chi, slong prec)

    Computes `L(s,\chi)` using decomposition in terms of the Hurwitz zeta function

@ -355,7 +367,9 @@ L-functions
    If `s = 1` and `\chi` is non-principal, the deflated Hurwitz zeta function
    is used to avoid poles.

-    This formula is slow for large *q*.
+    If *precomp* is *NULL*, each Hurwitz zeta function value is computed
+    directly. If a pre-initialized *precomp* object is provided, this will be
+    used instead to evaluate the Hurwitz zeta function.

 .. function:: void acb_dirichlet_l_euler_product(acb_t res, const acb_t s, const dirichlet_group_t G, const dirichlet_char_t chi, slong prec)