add space-efficient powsum_smooth version

This commit is contained in:
Fredrik Johansson 2016-10-22 00:26:10 +02:00
parent c4af23b1c5
commit 1b92a14213
5 changed files with 297 additions and 2 deletions

View file

@ -31,6 +31,7 @@ void acb_dirichlet_powsum_term(acb_ptr res, arb_t log_prev, ulong * prev,
const acb_t s, ulong k, int integer, int critical_line, slong len, slong prec);
void acb_dirichlet_powsum_sieved(acb_ptr z, const acb_t s, ulong n, slong len, slong prec);
void acb_dirichlet_powsum_smooth(acb_ptr z, const acb_t s, ulong n, slong len, slong prec);
typedef struct
{

View file

@ -0,0 +1,202 @@
/*
Copyright (C) 2016 Fredrik Johansson
This file is part of Arb.
Arb is free software: you can redistribute it and/or modify it under
the terms of the GNU Lesser General Public License (LGPL) as published
by the Free Software Foundation; either version 2.1 of the License, or
(at your option) any later version. See <http://www.gnu.org/licenses/>.
*/
#include "acb_dirichlet.h"
#include "acb_poly.h"
/* bound number of 5-smooth numbers up to n; see http://oeis.org/A188425 */
/* this does not need to be tight */
static slong
smooth_bound(ulong n)
{
if (n <= 256) return 52;
if (n <= 65536) return 284;
if (n <= 16777216) return 836;
return 13283; /* ok up to 2^64 */
}
static ulong smul(ulong x, ulong y)
{
ulong hi, lo;
umul_ppmm(hi, lo, x, y);
if (hi)
return UWORD_MAX;
else
return lo;
}
static slong index(const ulong * v, ulong c)
{
slong i;
for (i = 0; ; i++)
if (v[i] == c)
return i;
}
void
acb_dirichlet_powsum_smooth(acb_ptr res, const acb_t s, ulong N, slong d, slong prec)
{
ulong * smooth; /* numbers 2^i 3^j 5^k <= N */
slong num_smooth; /* the number of such numbers */
acb_ptr sums; /* partial sum for each smooth prefactor */
acb_ptr powers; /* (3^j 5^k)^(-s) */
acb_ptr t; /* temporary */
arb_t log_n;
ulong x2, x3, x5, m, n, nprev;
slong i2, i3, i5, i, j, iu;
int critical_line, integer;
if (N <= 1)
{
acb_set_ui(res, N);
_acb_vec_zero(res + 1, d - 1);
return;
}
if (N >= UWORD_MAX - 2)
abort();
critical_line = arb_is_exact(acb_realref(s)) &&
(arf_cmp_2exp_si(arb_midref(acb_realref(s)), -1) == 0);
integer = arb_is_zero(acb_imagref(s)) && arb_is_int(acb_realref(s));
/* generate the smooth numbers */
smooth = flint_malloc(smooth_bound(N) * sizeof(ulong));
smooth[0] = 1;
num_smooth = 1;
x2 = 2; x3 = 3; x5 = 5;
i2 = i3 = i5 = 0;
while ((m = FLINT_MIN(FLINT_MIN(x2, x3), x5)) <= N)
{
smooth[num_smooth++] = m;
if (m == x2) x2 = smul(2, smooth[++i2]);
if (m == x3) x3 = smul(3, smooth[++i3]);
if (m == x5) x5 = smul(5, smooth[++i5]);
}
sums = _acb_vec_init(num_smooth * d);
powers = _acb_vec_init(num_smooth * d);
t = _acb_vec_init(d);
arb_init(log_n);
arb_zero(log_n);
nprev = 1;
/* add 1^-s */
for (i = 0; i < num_smooth; i++)
acb_one(sums + i * d);
/* compute all the non-smooth index terms (bulk of the work) */
for (n = 7; n <= N; n += 2)
{
if ((n % 3 != 0) && (n % 5 != 0))
{
acb_dirichlet_powsum_term(t, log_n, &nprev, s, n, integer, critical_line, d, prec);
_acb_vec_add(sums, sums, t, d, prec);
for (i = 1; i < num_smooth && (smooth[i] <= (N / n)); i++)
_acb_vec_add(sums + i * d, sums + i * d, t, d, prec);
}
}
/* compute 2^(-s) and powers (3^j 3^k)^(-s) */
arb_zero(log_n);
nprev = 1;
for (i = 1; i < num_smooth; i++)
{
n = smooth[i];
if (n == 2)
{
acb_dirichlet_powsum_term(powers + i * d, log_n, &nprev, s,
n, integer, critical_line, d, prec);
}
else if (n % 2 != 0)
{
if (n <= 5)
{
acb_dirichlet_powsum_term(powers + i * d, log_n, &nprev, s,
n, integer, critical_line, d, prec);
}
else if (n % 3 == 0)
{
i3 = index(smooth, 3);
iu = index(smooth, n / 3);
_acb_poly_mullow(powers + i * d,
powers + i3 * d, d, powers + iu * d, d, d, prec);
}
else
{
i5 = index(smooth, 5);
iu = index(smooth, n / 5);
_acb_poly_mullow(powers + i * d,
powers + i5 * d, d, powers + iu * d, d, d, prec);
}
}
}
/* merge the sums into the power-of-two sums */
for (i = 0; i < num_smooth; i++)
{
ulong u, v;
m = smooth[i];
u = m;
v = 0;
while ((u & 1) == 0)
{
u >>= 1;
v++;
}
if ((UWORD(1) << v) != m)
{
j = index(smooth, UWORD(1) << v);
iu = index(smooth, u);
if (u == 1)
{
_acb_vec_add(sums + j * d, sums + j * d, sums + i * d, d, prec);
}
else
{
_acb_poly_mullow(t, sums + i * d, d, powers + iu * d, d, d, prec);
_acb_vec_add(sums + j * d, sums + j * d, t, d, prec);
}
}
}
/* finally evaluate with respect to powers of 2 using horner */
_acb_vec_zero(res, d);
i2 = index(smooth, 2);
for (i = num_smooth - 1; i >= 0; i--)
{
n = smooth[i];
if ((n & (n - 1)) == 0)
{
_acb_poly_mullow(t, powers + i2 * d, d, res, d, d, prec);
_acb_vec_add(res, sums + i * d, t, d, prec);
}
}
_acb_vec_clear(sums, num_smooth * d);
_acb_vec_clear(powers, num_smooth * d);
_acb_vec_clear(t, d);
arb_clear(log_n);
flint_free(smooth);
}

View file

@ -0,0 +1,77 @@
/*
Copyright (C) 2013 Fredrik Johansson
This file is part of Arb.
Arb is free software: you can redistribute it and/or modify it under
the terms of the GNU Lesser General Public License (LGPL) as published
by the Free Software Foundation; either version 2.1 of the License, or
(at your option) any later version. See <http://www.gnu.org/licenses/>.
*/
#include "acb_dirichlet.h"
#include "acb_poly.h"
int main()
{
slong iter;
flint_rand_t state;
flint_printf("powsum_smooth....");
fflush(stdout);
flint_randinit(state);
for (iter = 0; iter < 2000 * arb_test_multiplier(); iter++)
{
acb_t s;
acb_ptr z1, z2;
slong i, n, len, prec;
acb_init(s);
if (n_randint(state, 2))
{
acb_randtest(s, state, 1 + n_randint(state, 200), 3);
}
else
{
arb_set_ui(acb_realref(s), 1);
arb_mul_2exp_si(acb_realref(s), acb_realref(s), -1);
arb_randtest(acb_imagref(s), state, 1 + n_randint(state, 200), 4);
}
prec = 2 + n_randint(state, 200);
n = n_randtest(state) % 500;
len = 1 + n_randint(state, 4);
z1 = _acb_vec_init(len);
z2 = _acb_vec_init(len);
acb_dirichlet_powsum_sieved(z1, s, n, len, prec);
acb_dirichlet_powsum_smooth(z2, s, n, len, prec);
for (i = 0; i < len; i++)
{
if (!acb_overlaps(z1 + i, z2 + i))
{
flint_printf("FAIL: overlap\n\n");
flint_printf("iter = %wd\n", iter);
flint_printf("n = %wd, prec = %wd, len = %wd, i = %wd\n\n", n, prec, len, i);
flint_printf("s = "); acb_printd(s, prec / 3.33); flint_printf("\n\n");
flint_printf("z1 = "); acb_printd(z1 + i, prec / 3.33); flint_printf("\n\n");
flint_printf("z2 = "); acb_printd(z2 + i, prec / 3.33); flint_printf("\n\n");
abort();
}
}
acb_clear(s);
_acb_vec_clear(z1, len);
_acb_vec_clear(z2, len);
}
flint_randclear(state);
flint_cleanup();
flint_printf("PASS\n");
return EXIT_SUCCESS;
}

View file

@ -10,6 +10,7 @@
*/
#include "acb_poly.h"
#include "acb_dirichlet.h"
/* res = src * (c + x) */
void _acb_poly_mullow_cpx(acb_ptr res, acb_srcptr src, slong len, const acb_t c, slong trunc, slong prec)
@ -50,8 +51,10 @@ _acb_poly_zeta_em_sum(acb_ptr z, const acb_t s, const acb_t a, int deflate, ulon
acb_one(one);
/* sum 1/(k+a)^(s+x) */
if (acb_is_one(a) && d <= 3 && _acb_vec_estimate_allocated_bytes(d * N / 6, prec) < SIEVE_ALLOC_LIMIT)
_acb_poly_powsum_one_series_sieved(sum, s, N, d, prec);
if (acb_is_one(a) && d <= 2 && _acb_vec_estimate_allocated_bytes(d * N / 6, prec) < SIEVE_ALLOC_LIMIT)
acb_dirichlet_powsum_sieved(sum, s, N, d, prec);
else if (acb_is_one(a) && d <= 4) /* todo: also better for slightly larger d, if N and prec large enough */
acb_dirichlet_powsum_smooth(sum, s, N, d, prec);
else if (N > 50 && flint_get_num_threads() > 1)
_acb_poly_powsum_series_naive_threaded(sum, s, a, one, N, d, prec);
else

View file

@ -51,6 +51,18 @@ Truncated L-series and power sums
power series multiplications, it is only faster than the naive
algorithm when *len* is small.
.. function:: void acb_dirichlet_powsum_smooth(acb_ptr res, const acb_t s, ulong n, slong len, slong prec)
Sets *res* to `\sum_{k=1}^n k^{-(s+x)}`
as a power series in *x* truncated to length *len*.
This function performs partial sieving by adding multiples of 5-smooth *k*
into separate buckets. Asymptotically, this requires computing 4/15
of the powers, which is slower than *sieved*, but only requires
logarithmic extra space. It is also faster for large *len*, since most
power series multiplications are traded for additions.
A slightly bigger gain for larger *n* could be achieved by using more
small prime factors, at the expense of space.
Hurwitz zeta function
-------------------------------------------------------------------------------