faster Bernoulli number generation

This commit is contained in:
Fredrik Johansson 2012-12-19 14:04:00 +01:00
parent 2ba582364a
commit 76a4b8dcc1
13 changed files with 641 additions and 34 deletions

View file

@ -102,5 +102,5 @@ build/%.lo: %.c
build/%.o: %.c
$(CC) -fPIC $(CFLAGS) $(INCS) -c $< -o $@
BUILD_DIRS = fmpr fmprb fmprb_poly fmprb_mat fmpz_holonomic fmpcb fmpcb_poly fmpcb_mat
BUILD_DIRS = fmpr fmprb fmprb_poly fmprb_mat fmpz_holonomic fmpcb fmpcb_poly fmpcb_mat bernoulli

107
bernoulli.h Normal file
View file

@ -0,0 +1,107 @@
/*=============================================================================
This file is part of ARB.
ARB is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
ARB is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with ARB; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
=============================================================================*/
/******************************************************************************
Copyright (C) 2012 Fredrik Johansson
******************************************************************************/
#ifndef BERNOULLI_H
#define BERNOULLI_H
#include <math.h>
#include "flint.h"
#include "fmpz.h"
#include "fmpz_vec.h"
#include "fmpq.h"
#include "arith.h"
#include "fmprb.h"
extern long bernoulli_cache_num;
extern fmpq * bernoulli_cache;
void bernoulli_cache_compute(long n);
/*
Crude bound for the bits in d(n) = denom(B_n).
By von Staudt-Clausen, d(n) = prod_{p-1 | n} p
<= prod_{k | n} 2k
<= n^{sigma_0(n)}.
We get a more accurate estimate taking the square root of this.
Further, at least for sufficiently large n,
sigma_0(n) < exp(1.066 log(n) / log(log(n))).
*/
static __inline__ long denom_size(long n)
{
return 0.5 * 1.4427 * log(n) * pow(n, 1.066 / log(log(n)));
}
static __inline__ long zeta_terms(ulong s, long prec)
{
long N;
N = pow(2.0, (prec + 1.0) / (s - 1.0));
N += ((N % 2) == 0);
return N;
}
static __inline__ long power_prec(long i, ulong s1, long wp)
{
long p = wp - s1 * log(i) * 1.44269504088896341;
return FLINT_MAX(p, 10);
}
/* we should technically add O(log(n)) guard bits, but this is unnecessary
in practice since the denominator estimate is quite a bit larger
than the true denominators
*/
static __inline__ long global_prec(ulong nmax)
{
return arith_bernoulli_number_size(nmax) + denom_size(nmax);
}
/* avoid potential numerical problems for very small n */
#define bernoulli_rev_MIN 32
typedef struct
{
long alloc;
long prec;
long max_power;
fmpz * powers;
fmpz_t pow_error;
fmprb_t prefactor;
fmprb_t two_pi_squared;
ulong n;
}
bernoulli_rev_struct;
typedef bernoulli_rev_struct bernoulli_rev_t[1];
void bernoulli_rev_init(bernoulli_rev_t iter, ulong nmax);
void bernoulli_rev_next(fmpz_t numer, fmpz_t denom, bernoulli_rev_t iter);
void bernoulli_rev_clear(bernoulli_rev_t iter);
#endif

43
bernoulli/Makefile Normal file
View file

@ -0,0 +1,43 @@
SOURCES = $(wildcard *.c)
OBJS = $(patsubst %.c, $(BUILD_DIR)/%.o, $(SOURCES))
LIB_OBJS = $(patsubst %.c, $(BUILD_DIR)/%.lo, $(SOURCES))
TEST_SOURCES = $(wildcard test/*.c)
PROF_SOURCES = $(wildcard profile/*.c)
TUNE_SOURCES = $(wildcard tune/*.c)
TESTS = $(patsubst %.c, %, $(TEST_SOURCES))
PROFS = $(patsubst %.c, %, $(PROF_SOURCES))
TUNE = $(patsubst %.c, %, $(TUNE_SOURCES))
all: $(OBJS)
library: $(LIB_OBJS)
profile:
$(foreach prog, $(PROFS), $(CC) -O2 -std=c99 $(INCS) $(prog).c ../profiler.o -o $(BUILD_DIR)/$(prog) $(LIBS) -lflint;)
tune: $(TUNE_SOURCES)
$(foreach prog, $(TUNE), $(CC) -O2 -std=c99 $(INCS) $(prog).c -o $(BUILD_DIR)/$(prog) $(LIBS) -lflint;)
$(BUILD_DIR)/%.o: %.c
$(CC) $(CFLAGS) -c $(INCS) $< -o $@
$(BUILD_DIR)/%.lo: %.c
$(CC) -fPIC $(CFLAGS) $(INCS) -c $< -o $@
clean:
rm -rf $(BUILD_DIR)
check: library
$(foreach prog, $(TESTS), $(CC) $(CFLAGS) $(INCS) $(prog).c -o $(BUILD_DIR)/$(prog) $(LIBS) -lflint;)
$(foreach prog, $(TESTS), $(BUILD_DIR)/$(prog);)
.PHONY: profile clean check all

61
bernoulli/cache_compute.c Normal file
View file

@ -0,0 +1,61 @@
/*=============================================================================
This file is part of ARB.
ARB is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
ARB is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with ARB; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
=============================================================================*/
/******************************************************************************
Copyright (C) 2012 Fredrik Johansson
******************************************************************************/
#include "bernoulli.h"
long bernoulli_cache_num;
fmpq * bernoulli_cache;
void
bernoulli_cache_compute(long n)
{
if (bernoulli_cache_num < n)
{
long i, new_num;
bernoulli_rev_t iter;
new_num = FLINT_MAX(bernoulli_cache_num * 2, n);
new_num = n;
bernoulli_cache = flint_realloc(bernoulli_cache, new_num * sizeof(fmpq));
for (i = n; i < new_num; i++)
fmpq_init(bernoulli_cache + i);
i = new_num - 1;
i -= (i % 2);
bernoulli_rev_init(iter, i);
for ( ; i >= 0; i -= 2)
bernoulli_rev_next(fmpq_numref(bernoulli_cache + i),
fmpq_denref(bernoulli_cache + i), i);
bernoulli_rev_clear(iter);
if (new_num > 1)
fmpq_set_si(bernoulli_cache + i, -1, 2);
bernoulli_cache_num = new_num;
}
}

39
bernoulli/rev_clear.c Normal file
View file

@ -0,0 +1,39 @@
/*=============================================================================
This file is part of ARB.
ARB is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
ARB is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with ARB; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
=============================================================================*/
/******************************************************************************
Copyright (C) 2012 Fredrik Johansson
******************************************************************************/
#include "bernoulli.h"
void
bernoulli_rev_clear(bernoulli_rev_t iter)
{
if (iter->alloc != 0)
{
_fmpz_vec_clear(iter->powers, iter->alloc);
fmpz_clear(iter->pow_error);
fmprb_clear(iter->prefactor);
fmprb_clear(iter->two_pi_squared);
}
}

85
bernoulli/rev_init.c Normal file
View file

@ -0,0 +1,85 @@
/*=============================================================================
This file is part of ARB.
ARB is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
ARB is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with ARB; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
=============================================================================*/
/******************************************************************************
Copyright (C) 2012 Fredrik Johansson
******************************************************************************/
#include "bernoulli.h"
void
bernoulli_rev_init(bernoulli_rev_t iter, ulong nmax)
{
long j;
fmpz_t t;
fmprb_t x;
int round1, round2;
long wp;
nmax -= (nmax % 2);
iter->n = nmax;
iter->alloc = 0;
if (nmax < bernoulli_rev_MIN)
return;
iter->prec = wp = global_prec(nmax);
iter->max_power = zeta_terms(nmax, iter->prec);
iter->alloc = iter->max_power + 1;
iter->powers = _fmpz_vec_init(iter->alloc);
fmpz_init(iter->pow_error);
fmprb_init(iter->prefactor);
fmprb_init(iter->two_pi_squared);
fmprb_init(x);
fmpz_init(t);
/* precompute powers */
for (j = 3; j <= iter->max_power; j += 2)
{
fmprb_ui_pow_ui(x, j, nmax, power_prec(j, nmax, wp));
fmprb_ui_div(x, 1UL, x, power_prec(j, nmax, wp));
round1 = fmpr_get_fmpz_fixed_si(t, fmprb_midref(x), -wp);
fmpz_set(iter->powers + j, t);
/* error: the radius, plus two roundings */
round2 = fmpr_get_fmpz_fixed_si(t, fmprb_radref(x), -wp);
fmpz_add_ui(t, t, (round1 != 0) + (round2 != 0));
if (fmpz_cmp(iter->pow_error, t) < 0)
fmpz_set(iter->pow_error, t);
}
/* precompute (2pi)^2 and 2*(n!)/(2pi)^n */
fmprb_fac_ui(iter->prefactor, nmax, wp);
fmprb_mul_2exp_si(iter->prefactor, iter->prefactor, 1);
fmprb_const_pi(x, wp);
fmprb_mul_2exp_si(x, x, 1);
fmprb_mul(iter->two_pi_squared, x, x, wp);
fmprb_pow_ui(x, iter->two_pi_squared, nmax / 2, wp);
fmprb_div(iter->prefactor, iter->prefactor, x, wp);
fmpz_clear(t);
fmprb_clear(x);
}

143
bernoulli/rev_next.c Normal file
View file

@ -0,0 +1,143 @@
/*=============================================================================
This file is part of ARB.
ARB is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
ARB is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with ARB; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
=============================================================================*/
/******************************************************************************
Copyright (C) 2012 Fredrik Johansson
******************************************************************************/
#include "bernoulli.h"
void
bernoulli_rev_next(fmpz_t numer, fmpz_t denom, bernoulli_rev_t iter)
{
ulong n;
long j, wp;
fmpz_t sum;
fmpr_t err;
fmprb_t z, h;
n = iter->n;
wp = iter->prec;
if (n < bernoulli_rev_MIN)
{
_arith_bernoulli_number(numer, denom, n);
if (n != 0)
iter->n -= 2;
return;
}
fmpz_init(sum);
fmpr_init(err);
fmprb_init(z);
fmprb_init(h);
/* add all odd powers */
fmpz_zero(sum);
for (j = iter->max_power; j >= 3; j -= 2)
fmpz_add(sum, sum, iter->powers + j);
fmprb_set_fmpz(z, sum);
/* bound numerical error from the powers */
fmpz_mul_ui(sum, iter->pow_error, iter->max_power / 2);
fmpr_set_fmpz(err, sum);
fmprb_add_error_fmpr(z, err);
fmprb_mul_2exp_si(z, z, -wp);
fmprb_add_ui(z, z, 1, wp);
/* add truncation error: sum_{k > N} 1/k^n <= 1/N^(i-1) */
fmpr_set_ui(err, iter->max_power);
fmpr_pow_sloppy_ui(err, err, n - 1, FMPRB_RAD_PREC, FMPR_RND_DOWN);
fmpr_ui_div(err, 1, err, FMPRB_RAD_PREC, FMPR_RND_UP);
fmprb_add_error_fmpr(z, err);
/* convert zeta to Bernoulli number */
fmprb_div_2expm1_ui(h, z, n, wp);
fmprb_add(z, z, h, wp);
fmprb_mul(z, z, iter->prefactor, wp);
arith_bernoulli_number_denom(denom, n);
fmprb_mul_fmpz(z, z, denom, wp);
if (n % 4 == 0)
fmprb_neg(z, z);
/* printf("%ld: ", n); fmprb_printd(z, 5); printf("\n"); */
if (!fmprb_get_unique_fmpz(numer, z))
{
printf("warning: insufficient precision for B_%ld\n", n);
_arith_bernoulli_number(numer, denom, n);
}
/* update prefactor */
if (n > 0)
{
fmprb_mul(iter->prefactor, iter->prefactor, iter->two_pi_squared, wp);
fmprb_div_ui(iter->prefactor, iter->prefactor, n, wp);
fmprb_div_ui(iter->prefactor, iter->prefactor, n - 1, wp);
}
/* update powers */
for (j = 3; j <= iter->max_power; j += 2)
fmpz_mul2_uiui(iter->powers + j, iter->powers + j, j, j);
/* bound error after update */
fmpz_mul2_uiui(iter->pow_error, iter->pow_error,
iter->max_power, iter->max_power);
/* readjust precision */
if (n % 64 == 0 && n > bernoulli_rev_MIN)
{
long new_prec, new_max;
new_prec = global_prec(n);
new_max = zeta_terms(n, new_prec);
if (new_prec < iter->prec && new_max <= iter->max_power)
{
/* change precision of the powers */
for (j = 3; j <= new_max; j += 2)
fmpz_tdiv_q_2exp(iter->powers + j, iter->powers + j,
iter->prec - new_prec);
/* the error also changes precision */
fmpz_cdiv_q_2exp(iter->pow_error, iter->pow_error,
iter->prec - new_prec);
/* contribution of rounding error when changing the precision
of the powers */
fmpz_add_ui(iter->pow_error, iter->pow_error, 1);
/* speed improvement (could be skipped with better multiplication) */
fmprb_set_round(iter->two_pi_squared, iter->two_pi_squared, new_prec);
iter->max_power = new_max;
iter->prec = new_prec;
}
}
iter->n -= 2;
fmpz_clear(sum);
fmpr_clear(err);
fmprb_clear(z);
fmprb_clear(h);
}

112
bernoulli/test/t-rev.c Normal file
View file

@ -0,0 +1,112 @@
/*=============================================================================
This file is part of ARB.
ARB is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
ARB is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with ARB; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
=============================================================================*/
/******************************************************************************
Copyright (C) 2012 Fredrik Johansson
******************************************************************************/
#include <stdio.h>
#include <stdlib.h>
#include "bernoulli.h"
#include "ulong_extras.h"
#include "nmod_poly.h"
#include "nmod_vec.h"
int main()
{
flint_rand_t state;
long nmax, n, bound, count;
mp_limb_t p, pinv, m1, m2;
nmod_poly_t A;
printf("rev....");
fflush(stdout);
flint_randinit(state);
bound = 100000;
p = n_nextprime(1UL << (FLINT_BITS - 1), 0);
pinv = n_preinvert_limb(p);
nmod_poly_init(A, p);
nmod_poly_set_coeff_ui(A, 1, 1);
nmod_poly_exp_series(A, A, bound);
nmod_poly_shift_right(A, A, 1);
nmod_poly_inv_series(A, A, bound);
m1 = 1;
for (n = 0; n < A->length; n++)
{
A->coeffs[n] = n_mulmod2_preinv(A->coeffs[n], m1, p, pinv);
m1 = n_mulmod2_preinv(m1, n + 1, p, pinv);
}
for (nmax = 0; nmax < bound; nmax = 1.5 * nmax + 2)
{
fmpz_t numer, denom;
bernoulli_rev_t iter;
fmpz_init(numer);
fmpz_init(denom);
nmax += (nmax % 2);
bernoulli_rev_init(iter, nmax);
if (nmax < 8000)
count = 4000;
else
count = 100;
/* printf("nmax = %ld, count = %ld\n", nmax, count); */
for (n = nmax; n >= 0 && count > 0; n -= 2, count--)
{
bernoulli_rev_next(numer, denom, iter);
m1 = fmpz_fdiv_ui(numer, p);
m2 = fmpz_fdiv_ui(denom, p);
m2 = n_invmod(m2, p);
m1 = n_mulmod2_preinv(m1, m2, p, pinv);
m2 = nmod_poly_get_coeff_ui(A, n);
if (m1 != m2)
{
printf("FAIL:\n");
printf("nmax = %ld, n = %ld\n", nmax, n);
printf("m1 = %lu mod %lu\n", m1, p);
printf("m2 = %lu mod %lu\n", m2, p);
abort();
}
}
bernoulli_rev_clear(iter);
fmpz_clear(numer);
fmpz_clear(denom);
}
flint_randclear(state);
_fmpz_cleanup();
printf("PASS\n");
return EXIT_SUCCESS;
}

46
doc/source/bernoulli.rst Normal file
View file

@ -0,0 +1,46 @@
**bernoulli.h** -- support code for Bernoulli numbers
===============================================================================
Generation of Bernoulli numbers
--------------------------------------------------------------------------------
.. type:: bernoulli_rev_t
.. function:: bernoulli_rev_init(bernrev_iter_t iter, ulong n)
.. function:: bernoulli_rev_next(fmpz_t numer, fmpz_t denom, bernrev_iter_t iter)
.. function:: bernoulli_rev_clear(bernrev_iter_t iter)
Generates a range of even-indexed Bernoulli numbers in reverse order,
i.e. generates `B_n, B_{n-2}, B_{n-4}, \ldots, B_0`.
The exact, minimal numerators and denominators are produced.
The Bernoulli numbers are computed by direct summation of the zeta series.
This is made fast by storing a table of powers (as done by Bloemen et al.
http://remcobloemen.nl/2009/11/even-faster-zeta-calculation.html).
As an optimization, we only include the odd powers, and use
fixed-point arithmetic.
The reverse iteration order is preferred for performance reasons,
as the powers can be updated using multiplications instead of divisions,
and we avoid having to periodically recompute terms to higher precision.
To generate Bernoulli numbers in the forward direction without having
to store all of them, one can split the desired range into smaller
blocks and compute each block with a single reverse pass.
Caching
-------------------------------------------------------------------------------
.. var:: long bernoulli_cache_num;
.. var:: fmpq * bernoulli_cache;
Global cache of Bernoulli numbers.
.. function:: void bernoulli_cache_compute(long n)
Makes sure that the Bernoulli numbers up to at least `B_{n-1}` are cached
globally. Warning: this function is not currently threadsafe.

View file

@ -36,6 +36,7 @@ Module documentation
fmpcb_poly.rst
fmpcb_mat.rst
fmpz_holonomic.rst
bernoulli.rst
Credits and references
::::::::::::::::::::::::

View file

@ -24,9 +24,7 @@
******************************************************************************/
#include "fmpcb.h"
void bernoulli_cache_compute(long n);
extern fmpq * bernoulli_cache;
#include "bernoulli.h"
void
fmpcb_zeta_em_sum(fmpcb_t z, const fmpcb_t s, ulong N, ulong M, long prec)

View file

@ -25,9 +25,7 @@
#include "fmpcb.h"
#include "fmpcb_poly.h"
void bernoulli_cache_compute(long n);
extern fmpq * bernoulli_cache;
#include "bernoulli.h"
/* res = src * (c + x) */
void _fmpcb_poly_mullow_cpx(fmpcb_struct * res, const fmpcb_struct * src, long len, const fmpcb_t c, long trunc, long prec)

View file

@ -26,33 +26,7 @@
#include <math.h>
#include "arith.h"
#include "fmprb.h"
/* TODO: the following helper functions should be moved to separate files,
and need test code */
long bernoulli_cache_num = 0;
fmpq * bernoulli_cache = NULL;
/* makes sure that b_0, b_1 ... b_{n-1} are cached
TODO: don't recompute from scratch if nearly large enough */
void
bernoulli_cache_compute(long n)
{
if (bernoulli_cache_num < n)
{
long new_num;
new_num = FLINT_MAX(bernoulli_cache_num * 2, n);
new_num = n;
_fmpz_vec_clear((fmpz *) bernoulli_cache, 2 * bernoulli_cache_num);
bernoulli_cache = (fmpq *) _fmpz_vec_init(2 * new_num);
arith_bernoulli_number_vec(bernoulli_cache, new_num);
bernoulli_cache_num = new_num;
}
}
#include "bernoulli.h"
void
fmprb_stirling_series_coeff(fmprb_t b, ulong k, long prec)