diff --git a/bernoulli.h b/bernoulli.h index 68d37266..0c6c144b 100644 --- a/bernoulli.h +++ b/bernoulli.h @@ -93,6 +93,7 @@ void bernoulli_rev_next(fmpz_t numer, fmpz_t denom, bernoulli_rev_t iter); void bernoulli_rev_clear(bernoulli_rev_t iter); +void bernoulli_fmpq_vec_no_cache(fmpq * res, ulong a, slong num); #define BERNOULLI_ENSURE_CACHED(n) \ do { \ diff --git a/bernoulli/cache_compute.c b/bernoulli/cache_compute.c index b6a7c472..b114e4a6 100644 --- a/bernoulli/cache_compute.c +++ b/bernoulli/cache_compute.c @@ -31,43 +31,34 @@ bernoulli_cleanup(void) void bernoulli_cache_compute(slong n) { - if (bernoulli_cache_num < n) + slong old_num = bernoulli_cache_num; + + if (old_num < n) { slong i, new_num; - bernoulli_rev_t iter; - if (bernoulli_cache_num == 0) + if (old_num == 0) { flint_register_cleanup_function(bernoulli_cleanup); } if (n <= 128) - new_num = FLINT_MAX(bernoulli_cache_num + 32, n); + new_num = FLINT_MAX(old_num + 32, n); else - new_num = FLINT_MAX(bernoulli_cache_num + 128, n); + new_num = FLINT_MAX(old_num + 128, n); bernoulli_cache = flint_realloc(bernoulli_cache, new_num * sizeof(fmpq)); - for (i = bernoulli_cache_num; i < new_num; i++) + for (i = old_num; i < new_num; i++) fmpq_init(bernoulli_cache + i); if (new_num <= 128) { + /* todo: use recursion, but only compute new entries */ arith_bernoulli_number_vec(bernoulli_cache, new_num); } else { - i = new_num - 1; - i -= (i % 2); - bernoulli_rev_init(iter, i); - for ( ; i >= bernoulli_cache_num; i -= 2) - { - bernoulli_rev_next(fmpq_numref(bernoulli_cache + i), - fmpq_denref(bernoulli_cache + i), iter); - } - bernoulli_rev_clear(iter); - - if (new_num > 1) - fmpq_set_si(bernoulli_cache + 1, -1, 2); + bernoulli_fmpq_vec_no_cache(bernoulli_cache + old_num, old_num, new_num - old_num); } bernoulli_cache_num = new_num; diff --git a/bernoulli/fmpq_ui_multi_mod.c b/bernoulli/fmpq_ui_multi_mod.c index 398fa741..218066e7 100644 --- a/bernoulli/fmpq_ui_multi_mod.c +++ b/bernoulli/fmpq_ui_multi_mod.c @@ -91,8 +91,9 @@ crt_basecase(crt_res_t * res, slong a, slong b, crt_args_t * args) } } -static void -tree_crt(fmpz_t r, fmpz_t m, mp_srcptr residues, mp_srcptr primes, slong len) +/* todo: optimize basecase and move to flint */ +void +_arb_tree_crt(fmpz_t r, fmpz_t m, mp_srcptr residues, mp_srcptr primes, slong len) { crt_res_t res; crt_args_t args; @@ -234,7 +235,7 @@ _bernoulli_fmpq_ui_multi_mod(fmpz_t num, fmpz_t den, ulong n, double alpha) #endif fmpz_init(M); - tree_crt(num, M, residues, primes, num_primes); + _arb_tree_crt(num, M, residues, primes, num_primes); fmpz_mul(num, num, den); fmpz_mod(num, num, M); diff --git a/bernoulli/fmpq_vec.c b/bernoulli/fmpq_vec.c new file mode 100644 index 00000000..8474ee76 --- /dev/null +++ b/bernoulli/fmpq_vec.c @@ -0,0 +1,101 @@ +/* + Copyright (C) 2012 Fredrik Johansson + + This file is part of Arb. + + Arb is free software: you can redistribute it and/or modify it under + the terms of the GNU Lesser General Public License (LGPL) as published + by the Free Software Foundation; either version 2.1 of the License, or + (at your option) any later version. See . +*/ + +#include "bernoulli.h" + +static void +bernoulli_vec_compute_one_thread(fmpq * res, slong a, slong b) +{ + slong i; + bernoulli_rev_t iter; + + if (b <= a) + return; + + /* Even indices */ + i = b - 1; + i -= (i % 2); + bernoulli_rev_init(iter, i); + for ( ; i >= a; i -= 2) + bernoulli_rev_next(fmpq_numref(res + i - a), fmpq_denref(res + i - a), iter); + bernoulli_rev_clear(iter); + + /* Odd indices */ + for (i = b - 1 - (b % 2); i >= a; i -= 2) + { + if (i == 1) + fmpq_set_si(res + i - a, -1, 2); + else + fmpq_zero(res + i - a); + } +} + +typedef struct +{ + fmpq * res; + slong a; + slong b; + slong block_size; + slong num_blocks; +} +work_chunk_t; + +static void +worker(slong i, void * _work) +{ + work_chunk_t work = *((work_chunk_t *) _work); + slong a, b; + + /* reverse strided scheduling */ + i = work.num_blocks - 1 - i; + + a = work.a + i * work.block_size; + b = FLINT_MIN(a + work.block_size, work.b); + + bernoulli_vec_compute_one_thread(work.res + a - work.a, a, b); +} + +void +bernoulli_fmpq_vec_no_cache(fmpq * res, ulong a, slong num) +{ + if (a > (UWORD(1) << 31) || num > 1000000000) + { + flint_printf("bernoulli_fmpq_vec_no_cache: excessive input\n"); + flint_abort(); + } + + if (a == 0 && num <= 128) + { + arith_bernoulli_number_vec(res, num); + return; + } + + if (num < 200 || flint_get_num_threads() == 1) + { + bernoulli_vec_compute_one_thread(res, a, a + num); + } + else + { + slong num_blocks, block_size; + work_chunk_t work; + + block_size = FLINT_MAX((a + num) / 32, 128); + num_blocks = (num + block_size - 1) / block_size; + + work.res = res; + work.a = a; + work.b = a + num; + work.block_size = block_size; + work.num_blocks = num_blocks; + + flint_parallel_do(worker, &work, num_blocks, -1, FLINT_PARALLEL_STRIDED); + } +} diff --git a/bernoulli/test/t-fmpq_vec.c b/bernoulli/test/t-fmpq_vec.c new file mode 100644 index 00000000..105b087f --- /dev/null +++ b/bernoulli/test/t-fmpq_vec.c @@ -0,0 +1,98 @@ +/* + Copyright (C) 2012 Fredrik Johansson + + This file is part of Arb. + + Arb is free software: you can redistribute it and/or modify it under + the terms of the GNU Lesser General Public License (LGPL) as published + by the Free Software Foundation; either version 2.1 of the License, or + (at your option) any later version. See . +*/ + +#include +#include +#include "bernoulli.h" +#include "flint/ulong_extras.h" +#include "flint/nmod_poly.h" +#include "flint/nmod_vec.h" + +int main() +{ + slong iter; + flint_rand_t state; + slong n, bound; + mp_limb_t p, pinv, m1, m2; + nmod_poly_t A; + + flint_printf("fmpq_vec...."); + fflush(stdout); + flint_randinit(state); + + bound = 1000 * FLINT_MIN(1.0, arb_test_multiplier()); + + p = n_nextprime(UWORD(1) << (FLINT_BITS - 1), 0); + pinv = n_preinvert_limb(p); + + nmod_poly_init(A, p); + nmod_poly_set_coeff_ui(A, 1, 1); + nmod_poly_exp_series(A, A, bound); + nmod_poly_shift_right(A, A, 1); + nmod_poly_inv_series(A, A, bound); + + m1 = 1; + for (n = 0; n < A->length; n++) + { + A->coeffs[n] = n_mulmod2_preinv(A->coeffs[n], m1, p, pinv); + m1 = n_mulmod2_preinv(m1, n + 1, p, pinv); + } + + for (iter = 0; iter < 100 * arb_test_multiplier(); iter++) + { + slong a, b, num; + fmpq * res; + slong i; + + b = n_randint(state, bound); + a = n_randint(state, b + 1); + num = b - a; + + flint_set_num_threads(1 + n_randint(state, 4)); + + printf("a = %ld, b = %ld, num = %ld\n", a, b, num); + + res = _fmpq_vec_init(num); + + for (i = 0; i < num; i++) + fmpq_randtest(res + i, state, 100); + + bernoulli_fmpq_vec_no_cache(res, a, num); + + for (i = 0; i < num; i++) + { + m1 = fmpz_fdiv_ui(fmpq_numref(res + i), p); + m2 = fmpz_fdiv_ui(fmpq_denref(res + i), p); + m2 = n_invmod(m2, p); + m1 = n_mulmod2_preinv(m1, m2, p, pinv); + m2 = nmod_poly_get_coeff_ui(A, a + i); + + if (m1 != m2) + { + flint_printf("FAIL:\n"); + flint_printf("a = %wd, b = %wd, num = %wd, n = %wd\n", a, b, num, a + i); + flint_printf("m1 = %wu mod %wu\n", m1, p); + flint_printf("m2 = %wu mod %wu\n", m2, p); + flint_abort(); + } + } + + _fmpq_vec_clear(res, num); + } + + nmod_poly_clear(A); + + flint_randclear(state); + flint_cleanup_master(); + flint_printf("PASS\n"); + return EXIT_SUCCESS; +} + diff --git a/doc/source/bernoulli.rst b/doc/source/bernoulli.rst index b6e91fc8..89743527 100644 --- a/doc/source/bernoulli.rst +++ b/doc/source/bernoulli.rst @@ -58,6 +58,18 @@ Generation of Bernoulli numbers Frees all memory allocated internally by *iter*. +.. function:: void bernoulli_fmpq_vec_no_cache(fmpq * res, ulong a, slong num) + + Writes *num* consecutive Bernoulli numbers to *res* starting + with `B_a`. This function is not currently optimized for a small + count *num*. The entries are not read from or written + to the Bernoulli number cache; if retrieving a vector of + Bernoulli numbers is needed more than once, + use :func:`bernoulli_cache_compute` + followed by :func:`bernoulli_fmpq_ui` instead. + + This function is a wrapper for the *rev* iterators. It can use + multiple threads internally. Caching ------------------------------------------------------------------------------- @@ -74,6 +86,9 @@ Caching Makes sure that the Bernoulli numbers up to at least `B_{n-1}` are cached. Calling :func:`flint_cleanup()` frees the cache. + The cache is extended by calling :func:`bernoulli_fmpq_vec_no_cache` + internally. + Bounding -------------------------------------------------------------------------------