mirror of
https://github.com/vale981/arb
synced 2025-03-04 17:01:40 -05:00
add space-efficient powsum_smooth version
This commit is contained in:
parent
c4af23b1c5
commit
1b92a14213
5 changed files with 297 additions and 2 deletions
|
@ -31,6 +31,7 @@ void acb_dirichlet_powsum_term(acb_ptr res, arb_t log_prev, ulong * prev,
|
|||
const acb_t s, ulong k, int integer, int critical_line, slong len, slong prec);
|
||||
|
||||
void acb_dirichlet_powsum_sieved(acb_ptr z, const acb_t s, ulong n, slong len, slong prec);
|
||||
void acb_dirichlet_powsum_smooth(acb_ptr z, const acb_t s, ulong n, slong len, slong prec);
|
||||
|
||||
typedef struct
|
||||
{
|
||||
|
|
202
acb_dirichlet/powsum_smooth.c
Normal file
202
acb_dirichlet/powsum_smooth.c
Normal file
|
@ -0,0 +1,202 @@
|
|||
/*
|
||||
Copyright (C) 2016 Fredrik Johansson
|
||||
|
||||
This file is part of Arb.
|
||||
|
||||
Arb is free software: you can redistribute it and/or modify it under
|
||||
the terms of the GNU Lesser General Public License (LGPL) as published
|
||||
by the Free Software Foundation; either version 2.1 of the License, or
|
||||
(at your option) any later version. See <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include "acb_dirichlet.h"
|
||||
#include "acb_poly.h"
|
||||
|
||||
/* bound number of 5-smooth numbers up to n; see http://oeis.org/A188425 */
|
||||
/* this does not need to be tight */
|
||||
static slong
|
||||
smooth_bound(ulong n)
|
||||
{
|
||||
if (n <= 256) return 52;
|
||||
if (n <= 65536) return 284;
|
||||
if (n <= 16777216) return 836;
|
||||
return 13283; /* ok up to 2^64 */
|
||||
}
|
||||
|
||||
static ulong smul(ulong x, ulong y)
|
||||
{
|
||||
ulong hi, lo;
|
||||
umul_ppmm(hi, lo, x, y);
|
||||
if (hi)
|
||||
return UWORD_MAX;
|
||||
else
|
||||
return lo;
|
||||
}
|
||||
|
||||
static slong index(const ulong * v, ulong c)
|
||||
{
|
||||
slong i;
|
||||
for (i = 0; ; i++)
|
||||
if (v[i] == c)
|
||||
return i;
|
||||
}
|
||||
|
||||
void
|
||||
acb_dirichlet_powsum_smooth(acb_ptr res, const acb_t s, ulong N, slong d, slong prec)
|
||||
{
|
||||
ulong * smooth; /* numbers 2^i 3^j 5^k <= N */
|
||||
slong num_smooth; /* the number of such numbers */
|
||||
acb_ptr sums; /* partial sum for each smooth prefactor */
|
||||
acb_ptr powers; /* (3^j 5^k)^(-s) */
|
||||
acb_ptr t; /* temporary */
|
||||
arb_t log_n;
|
||||
ulong x2, x3, x5, m, n, nprev;
|
||||
slong i2, i3, i5, i, j, iu;
|
||||
int critical_line, integer;
|
||||
|
||||
if (N <= 1)
|
||||
{
|
||||
acb_set_ui(res, N);
|
||||
_acb_vec_zero(res + 1, d - 1);
|
||||
return;
|
||||
}
|
||||
|
||||
if (N >= UWORD_MAX - 2)
|
||||
abort();
|
||||
|
||||
critical_line = arb_is_exact(acb_realref(s)) &&
|
||||
(arf_cmp_2exp_si(arb_midref(acb_realref(s)), -1) == 0);
|
||||
|
||||
integer = arb_is_zero(acb_imagref(s)) && arb_is_int(acb_realref(s));
|
||||
|
||||
/* generate the smooth numbers */
|
||||
smooth = flint_malloc(smooth_bound(N) * sizeof(ulong));
|
||||
smooth[0] = 1;
|
||||
num_smooth = 1;
|
||||
x2 = 2; x3 = 3; x5 = 5;
|
||||
i2 = i3 = i5 = 0;
|
||||
|
||||
while ((m = FLINT_MIN(FLINT_MIN(x2, x3), x5)) <= N)
|
||||
{
|
||||
smooth[num_smooth++] = m;
|
||||
if (m == x2) x2 = smul(2, smooth[++i2]);
|
||||
if (m == x3) x3 = smul(3, smooth[++i3]);
|
||||
if (m == x5) x5 = smul(5, smooth[++i5]);
|
||||
}
|
||||
|
||||
sums = _acb_vec_init(num_smooth * d);
|
||||
powers = _acb_vec_init(num_smooth * d);
|
||||
t = _acb_vec_init(d);
|
||||
arb_init(log_n);
|
||||
|
||||
arb_zero(log_n);
|
||||
nprev = 1;
|
||||
|
||||
/* add 1^-s */
|
||||
for (i = 0; i < num_smooth; i++)
|
||||
acb_one(sums + i * d);
|
||||
|
||||
/* compute all the non-smooth index terms (bulk of the work) */
|
||||
for (n = 7; n <= N; n += 2)
|
||||
{
|
||||
if ((n % 3 != 0) && (n % 5 != 0))
|
||||
{
|
||||
acb_dirichlet_powsum_term(t, log_n, &nprev, s, n, integer, critical_line, d, prec);
|
||||
_acb_vec_add(sums, sums, t, d, prec);
|
||||
|
||||
for (i = 1; i < num_smooth && (smooth[i] <= (N / n)); i++)
|
||||
_acb_vec_add(sums + i * d, sums + i * d, t, d, prec);
|
||||
}
|
||||
}
|
||||
|
||||
/* compute 2^(-s) and powers (3^j 3^k)^(-s) */
|
||||
arb_zero(log_n);
|
||||
nprev = 1;
|
||||
|
||||
for (i = 1; i < num_smooth; i++)
|
||||
{
|
||||
n = smooth[i];
|
||||
|
||||
if (n == 2)
|
||||
{
|
||||
acb_dirichlet_powsum_term(powers + i * d, log_n, &nprev, s,
|
||||
n, integer, critical_line, d, prec);
|
||||
}
|
||||
else if (n % 2 != 0)
|
||||
{
|
||||
if (n <= 5)
|
||||
{
|
||||
acb_dirichlet_powsum_term(powers + i * d, log_n, &nprev, s,
|
||||
n, integer, critical_line, d, prec);
|
||||
}
|
||||
else if (n % 3 == 0)
|
||||
{
|
||||
i3 = index(smooth, 3);
|
||||
iu = index(smooth, n / 3);
|
||||
_acb_poly_mullow(powers + i * d,
|
||||
powers + i3 * d, d, powers + iu * d, d, d, prec);
|
||||
}
|
||||
else
|
||||
{
|
||||
i5 = index(smooth, 5);
|
||||
iu = index(smooth, n / 5);
|
||||
_acb_poly_mullow(powers + i * d,
|
||||
powers + i5 * d, d, powers + iu * d, d, d, prec);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* merge the sums into the power-of-two sums */
|
||||
for (i = 0; i < num_smooth; i++)
|
||||
{
|
||||
ulong u, v;
|
||||
|
||||
m = smooth[i];
|
||||
u = m;
|
||||
v = 0;
|
||||
|
||||
while ((u & 1) == 0)
|
||||
{
|
||||
u >>= 1;
|
||||
v++;
|
||||
}
|
||||
|
||||
if ((UWORD(1) << v) != m)
|
||||
{
|
||||
j = index(smooth, UWORD(1) << v);
|
||||
iu = index(smooth, u);
|
||||
|
||||
if (u == 1)
|
||||
{
|
||||
_acb_vec_add(sums + j * d, sums + j * d, sums + i * d, d, prec);
|
||||
}
|
||||
else
|
||||
{
|
||||
_acb_poly_mullow(t, sums + i * d, d, powers + iu * d, d, d, prec);
|
||||
_acb_vec_add(sums + j * d, sums + j * d, t, d, prec);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* finally evaluate with respect to powers of 2 using horner */
|
||||
_acb_vec_zero(res, d);
|
||||
i2 = index(smooth, 2);
|
||||
|
||||
for (i = num_smooth - 1; i >= 0; i--)
|
||||
{
|
||||
n = smooth[i];
|
||||
|
||||
if ((n & (n - 1)) == 0)
|
||||
{
|
||||
_acb_poly_mullow(t, powers + i2 * d, d, res, d, d, prec);
|
||||
_acb_vec_add(res, sums + i * d, t, d, prec);
|
||||
}
|
||||
}
|
||||
|
||||
_acb_vec_clear(sums, num_smooth * d);
|
||||
_acb_vec_clear(powers, num_smooth * d);
|
||||
_acb_vec_clear(t, d);
|
||||
arb_clear(log_n);
|
||||
flint_free(smooth);
|
||||
}
|
||||
|
77
acb_dirichlet/test/t-powsum_smooth.c
Normal file
77
acb_dirichlet/test/t-powsum_smooth.c
Normal file
|
@ -0,0 +1,77 @@
|
|||
/*
|
||||
Copyright (C) 2013 Fredrik Johansson
|
||||
|
||||
This file is part of Arb.
|
||||
|
||||
Arb is free software: you can redistribute it and/or modify it under
|
||||
the terms of the GNU Lesser General Public License (LGPL) as published
|
||||
by the Free Software Foundation; either version 2.1 of the License, or
|
||||
(at your option) any later version. See <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include "acb_dirichlet.h"
|
||||
#include "acb_poly.h"
|
||||
|
||||
int main()
|
||||
{
|
||||
slong iter;
|
||||
flint_rand_t state;
|
||||
|
||||
flint_printf("powsum_smooth....");
|
||||
fflush(stdout);
|
||||
|
||||
flint_randinit(state);
|
||||
|
||||
for (iter = 0; iter < 2000 * arb_test_multiplier(); iter++)
|
||||
{
|
||||
acb_t s;
|
||||
acb_ptr z1, z2;
|
||||
slong i, n, len, prec;
|
||||
|
||||
acb_init(s);
|
||||
|
||||
if (n_randint(state, 2))
|
||||
{
|
||||
acb_randtest(s, state, 1 + n_randint(state, 200), 3);
|
||||
}
|
||||
else
|
||||
{
|
||||
arb_set_ui(acb_realref(s), 1);
|
||||
arb_mul_2exp_si(acb_realref(s), acb_realref(s), -1);
|
||||
arb_randtest(acb_imagref(s), state, 1 + n_randint(state, 200), 4);
|
||||
}
|
||||
|
||||
prec = 2 + n_randint(state, 200);
|
||||
n = n_randtest(state) % 500;
|
||||
len = 1 + n_randint(state, 4);
|
||||
|
||||
z1 = _acb_vec_init(len);
|
||||
z2 = _acb_vec_init(len);
|
||||
|
||||
acb_dirichlet_powsum_sieved(z1, s, n, len, prec);
|
||||
acb_dirichlet_powsum_smooth(z2, s, n, len, prec);
|
||||
|
||||
for (i = 0; i < len; i++)
|
||||
{
|
||||
if (!acb_overlaps(z1 + i, z2 + i))
|
||||
{
|
||||
flint_printf("FAIL: overlap\n\n");
|
||||
flint_printf("iter = %wd\n", iter);
|
||||
flint_printf("n = %wd, prec = %wd, len = %wd, i = %wd\n\n", n, prec, len, i);
|
||||
flint_printf("s = "); acb_printd(s, prec / 3.33); flint_printf("\n\n");
|
||||
flint_printf("z1 = "); acb_printd(z1 + i, prec / 3.33); flint_printf("\n\n");
|
||||
flint_printf("z2 = "); acb_printd(z2 + i, prec / 3.33); flint_printf("\n\n");
|
||||
abort();
|
||||
}
|
||||
}
|
||||
|
||||
acb_clear(s);
|
||||
_acb_vec_clear(z1, len);
|
||||
_acb_vec_clear(z2, len);
|
||||
}
|
||||
|
||||
flint_randclear(state);
|
||||
flint_cleanup();
|
||||
flint_printf("PASS\n");
|
||||
return EXIT_SUCCESS;
|
||||
}
|
|
@ -10,6 +10,7 @@
|
|||
*/
|
||||
|
||||
#include "acb_poly.h"
|
||||
#include "acb_dirichlet.h"
|
||||
|
||||
/* res = src * (c + x) */
|
||||
void _acb_poly_mullow_cpx(acb_ptr res, acb_srcptr src, slong len, const acb_t c, slong trunc, slong prec)
|
||||
|
@ -50,8 +51,10 @@ _acb_poly_zeta_em_sum(acb_ptr z, const acb_t s, const acb_t a, int deflate, ulon
|
|||
acb_one(one);
|
||||
|
||||
/* sum 1/(k+a)^(s+x) */
|
||||
if (acb_is_one(a) && d <= 3 && _acb_vec_estimate_allocated_bytes(d * N / 6, prec) < SIEVE_ALLOC_LIMIT)
|
||||
_acb_poly_powsum_one_series_sieved(sum, s, N, d, prec);
|
||||
if (acb_is_one(a) && d <= 2 && _acb_vec_estimate_allocated_bytes(d * N / 6, prec) < SIEVE_ALLOC_LIMIT)
|
||||
acb_dirichlet_powsum_sieved(sum, s, N, d, prec);
|
||||
else if (acb_is_one(a) && d <= 4) /* todo: also better for slightly larger d, if N and prec large enough */
|
||||
acb_dirichlet_powsum_smooth(sum, s, N, d, prec);
|
||||
else if (N > 50 && flint_get_num_threads() > 1)
|
||||
_acb_poly_powsum_series_naive_threaded(sum, s, a, one, N, d, prec);
|
||||
else
|
||||
|
|
|
@ -51,6 +51,18 @@ Truncated L-series and power sums
|
|||
power series multiplications, it is only faster than the naive
|
||||
algorithm when *len* is small.
|
||||
|
||||
.. function:: void acb_dirichlet_powsum_smooth(acb_ptr res, const acb_t s, ulong n, slong len, slong prec)
|
||||
|
||||
Sets *res* to `\sum_{k=1}^n k^{-(s+x)}`
|
||||
as a power series in *x* truncated to length *len*.
|
||||
This function performs partial sieving by adding multiples of 5-smooth *k*
|
||||
into separate buckets. Asymptotically, this requires computing 4/15
|
||||
of the powers, which is slower than *sieved*, but only requires
|
||||
logarithmic extra space. It is also faster for large *len*, since most
|
||||
power series multiplications are traded for additions.
|
||||
A slightly bigger gain for larger *n* could be achieved by using more
|
||||
small prime factors, at the expense of space.
|
||||
|
||||
Hurwitz zeta function
|
||||
-------------------------------------------------------------------------------
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue