diff --git a/doc/source/credits.rst b/doc/source/credits.rst index aa04a697..72c98db0 100644 --- a/doc/source/credits.rst +++ b/doc/source/credits.rst @@ -66,6 +66,8 @@ Bibliography .. [PS1973] M.S. Paterson and L. J. Stockmeyer, "On the number of nonscalar multiplications necessary to evaluate polynomials", SIAM J. Comput (1973) +.. [Smi2001] D.M. Smith, "Algorithm: Fortran 90 Software for Floating-Point Multiple Precision Arithmetic, Gamma and Related Functions", Transactions on Mathematical Software 27 (2001) 377-387, http://myweb.lmu.edu/dmsmith/toms2001.pdf + .. [Tak2000] D.Takahashi, "A fast algorithm for computing large Fibonacci numbers", Information Processing Letters 75 (2000) 243-246. http://www.ii.uni.wroc.pl/~lorys/IPL/article75-6-1.pdf diff --git a/doc/source/gamma.rst b/doc/source/gamma.rst index eb2d62ed..63be9f40 100644 --- a/doc/source/gamma.rst +++ b/doc/source/gamma.rst @@ -170,6 +170,43 @@ Rising factorials *gamma_rising_fmpcb_ui_bsplit* automatically choose an algorithm depending on the inputs. +.. function :: void gamma_rising_fmprb_ui_delta(fmprb_t y, const fmprb_t x, ulong n, ulong m, long prec) + +.. function :: void gamma_rising_fmpcb_ui_delta(fmpcb_t y, const fmpcb_t x, ulong n, ulong m, long prec) + + Sets `y` to the rising factorial `x (x+1) (x+2) \cdots (x+n-1)`, + computed as a product of partial products + `(x+k)(x+k+1)\cdots(x+k+m-1)`. Each partial product is obtained + from the previous by using a precomputed table of powers of `x` to + evaluate the difference + + .. math :: + + \Delta_m(x,k) = (x+k+m)_{(m)} - (x+k)_{(m)}. + + The instance `m = 4` of this algorithm was used by Smith ([Smi2001]_), + but we generalize it to a variable `m` which can be chosen nearly + optimally depending on the precision and `n`. + + The polynomials `\Delta_m(x,k) \in \mathbb{Z}[k][x]` are generated dynamically. + Expanding the rising factorials, applying the binomial theorem + a couple of times, and doing several rearrangements of the sums, we + find the closed form + + .. math :: + + \Delta_m(x,k) = \sum_{v=0}^{m-1} x^v \sum_{i=0}^{m-v-1} k^i C_m(v,i), + + where + + .. math :: + + C_m(v,i) = \sum_{j=i+1}^{m-v} m^{j-i} \left[{m \atop v+j}\right] {{v+j} \choose v} {j \choose i} + + in which the square bracket denotes an unsigned Stirling number + of the first kind. + + .. function :: void gamma_rising_fmprb_ui_multipoint(fmprb_t f, const fmprb_t c, ulong n, long prec) Sets `y` to the rising factorial `x (x+1) (x+2) \cdots (x+n-1)`, diff --git a/gamma.h b/gamma.h index b27f72ca..08864d73 100644 --- a/gamma.h +++ b/gamma.h @@ -81,6 +81,9 @@ void gamma_rising_fmprb_ui_bsplit_eight(fmprb_t y, const fmprb_t x, ulong n, lon void gamma_rising_fmprb_ui_bsplit_rectangular(fmprb_t y, const fmprb_t x, ulong n, ulong step, long prec); void gamma_rising_fmprb_ui_bsplit(fmprb_t y, const fmprb_t x, ulong n, long prec); +void gamma_rising_fmprb_ui_delta(fmprb_t y, const fmprb_t x, ulong n, ulong m, long prec); +void gamma_rising_fmpcb_ui_delta(fmpcb_t y, const fmpcb_t x, ulong n, ulong m, long prec); + void gamma_rising_fmpcb_ui_bsplit_simple(fmpcb_t y, const fmpcb_t x, ulong n, long prec); void gamma_rising_fmpcb_ui_bsplit_eight(fmpcb_t y, const fmpcb_t x, ulong n, long prec); void gamma_rising_fmpcb_ui_bsplit_rectangular(fmpcb_t y, const fmpcb_t x, ulong n, ulong step, long prec); diff --git a/gamma/rising_fmpcb_ui_bsplit.c b/gamma/rising_fmpcb_ui_bsplit.c index 12c5adf4..64334eb3 100644 --- a/gamma/rising_fmpcb_ui_bsplit.c +++ b/gamma/rising_fmpcb_ui_bsplit.c @@ -28,11 +28,9 @@ void gamma_rising_fmpcb_ui_bsplit(fmpcb_t y, const fmpcb_t x, ulong n, long prec) { - if (n < 8 || fmpcb_bits(x) < prec / 8) + if (prec < 256 || n < 8 || fmpcb_bits(x) < prec / 8) gamma_rising_fmpcb_ui_bsplit_simple(y, x, n, prec); - else if (prec < 250 || n < 250000 / prec) - gamma_rising_fmpcb_ui_bsplit_eight(y, x, n, prec); else - gamma_rising_fmpcb_ui_bsplit_rectangular(y, x, n, 0, prec); + gamma_rising_fmpcb_ui_delta(y, x, n, 0, prec); } diff --git a/gamma/rising_fmpcb_ui_delta.c b/gamma/rising_fmpcb_ui_delta.c new file mode 100644 index 00000000..fa84c28a --- /dev/null +++ b/gamma/rising_fmpcb_ui_delta.c @@ -0,0 +1,139 @@ +/*============================================================================= + + This file is part of ARB. + + ARB is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + ARB is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with ARB; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + +=============================================================================*/ +/****************************************************************************** + + Copyright (C) 2013 Fredrik Johansson + +******************************************************************************/ + +#include "gamma.h" +#include "arith.h" + +void rising_difference_polynomial(fmpz * s, fmpz * c, ulong m); + +void +gamma_rising_fmpcb_ui_delta(fmpcb_t y, const fmpcb_t x, ulong n, ulong m, long prec) +{ + fmpcb_struct * xs; + fmpcb_t t, u, v; + ulong i, k, rem; + fmpz_t c, h; + fmpz *s, *d; + long wp; + + if (n == 0) + { + fmpcb_one(y); + return; + } + + if (n == 1) + { + fmpcb_set_round(y, x, prec); + return; + } + + wp = FMPR_PREC_ADD(prec, FLINT_BIT_COUNT(n)); + + fmpcb_init(t); + fmpcb_init(u); + fmpcb_init(v); + fmpz_init(c); + fmpz_init(h); + + if (m == 0) + { + ulong m1, m2; + m1 = 0.04 * pow(2.0 * wp, 0.65); + m2 = n_sqrt(n); + m = FLINT_MIN(m1, m2); + } + + m = FLINT_MIN(m, n); + m = FLINT_MAX(m, 1); + + xs = _fmpcb_vec_init(m + 1); + d = _fmpz_vec_init(m * m); + s = _fmpz_vec_init(m + 1); + + for (i = 0; i <= m; i++) + { + if (i == 0) + fmpcb_one(xs + i); + else if (i == 1) + fmpcb_set(xs + i, x); + else if (i % 2 == 0) + fmpcb_mul(xs + i, xs + i / 2, xs + i / 2, wp); + else + fmpcb_mul(xs + i, xs + i - 1, x, wp); + } + + rising_difference_polynomial(s, d, m); + + /* tail */ + rem = m; + while (rem + m <= n) + rem += m; + fmpcb_one(y); + for (k = rem; k < n; k++) + { + fmpcb_add_ui(t, xs + 1, k, wp); + fmpcb_mul(y, y, t, wp); + } + + /* initial rising factorial */ + fmpcb_zero(t); + for (i = 1; i <= m; i++) + fmpcb_addmul_fmpz(t, xs + i, s + i, wp); + + fmpcb_mul(y, y, t, wp); + + /* the leading coefficient is always the same */ + fmpcb_mul_fmpz(xs + m - 1, xs + m - 1, d + m - 1 + 0, wp); + + for (k = 0; k + 2 * m <= n; k += m) + { + for (i = 0; i < m - 1; i++) + { + fmpz_set_ui(h, k); + _fmpz_poly_evaluate_horner_fmpz(c, d + i * m, m - i, h); + + if (i == 0) + fmpcb_add_fmpz(t, t, c, wp); + else + fmpcb_addmul_fmpz(t, xs + i, c, wp); + } + + fmpcb_add(t, t, xs + m - 1, wp); + fmpcb_mul(y, y, t, wp); + } + + fmpcb_set_round(y, y, prec); + + fmpcb_clear(t); + fmpcb_clear(u); + fmpcb_clear(v); + _fmpcb_vec_clear(xs, m + 1); + _fmpz_vec_clear(d, m * m); + _fmpz_vec_clear(s, m + 1); + fmpz_clear(c); + fmpz_clear(h); +} + diff --git a/gamma/rising_fmprb_ui_bsplit.c b/gamma/rising_fmprb_ui_bsplit.c index b82089b0..1dc66c39 100644 --- a/gamma/rising_fmprb_ui_bsplit.c +++ b/gamma/rising_fmprb_ui_bsplit.c @@ -28,11 +28,9 @@ void gamma_rising_fmprb_ui_bsplit(fmprb_t y, const fmprb_t x, ulong n, long prec) { - if (prec < 768 || n < 8 || fmprb_bits(x) < prec / 8) + if (prec < 512 || n < 8 || fmprb_bits(x) < prec / 8) gamma_rising_fmprb_ui_bsplit_simple(y, x, n, prec); - else if (prec < 1500 || n < 500000 / prec) - gamma_rising_fmprb_ui_bsplit_eight(y, x, n, prec); else - gamma_rising_fmprb_ui_bsplit_rectangular(y, x, n, 0, prec); + gamma_rising_fmprb_ui_delta(y, x, n, 0, prec); } diff --git a/gamma/rising_fmprb_ui_delta.c b/gamma/rising_fmprb_ui_delta.c new file mode 100644 index 00000000..bd726c28 --- /dev/null +++ b/gamma/rising_fmprb_ui_delta.c @@ -0,0 +1,174 @@ +/*============================================================================= + + This file is part of ARB. + + ARB is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + ARB is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with ARB; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + +=============================================================================*/ +/****************************************************************************** + + Copyright (C) 2013 Fredrik Johansson + +******************************************************************************/ + +#include "gamma.h" +#include "arith.h" + +void +rising_difference_polynomial(fmpz * s, fmpz * c, ulong m) +{ + long i, j, k; + fmpz_t t; + + fmpz_init(t); + + arith_stirling_number_1u_vec(s, m, m + 1); + + for (k = 0; k < m; k++) + { + for (i = 0; i <= m - k - 1; i++) + { + fmpz_zero(c + m * k + i); + + for (j = i + 1; j + k <= m; j++) + { + if (j == i + 1) + { + fmpz_bin_uiui(t, 1+i+k, k); + fmpz_mul_ui(t, t, m * (i+1)); + } + else + { + fmpz_mul_ui(t, t, m * (k + j)); + fmpz_divexact_ui(t, t, j - i); + } + + fmpz_addmul(c + m * k + i, s + j + k, t); + } + } + } + + fmpz_clear(t); +} + +void +gamma_rising_fmprb_ui_delta(fmprb_t y, const fmprb_t x, ulong n, ulong m, long prec) +{ + fmprb_struct * xs; + fmprb_t t, u, v; + ulong i, k, rem; + fmpz_t c, h; + fmpz *s, *d; + long wp; + + if (n == 0) + { + fmprb_one(y); + return; + } + + if (n == 1) + { + fmprb_set_round(y, x, prec); + return; + } + + wp = FMPR_PREC_ADD(prec, FLINT_BIT_COUNT(n)); + + fmprb_init(t); + fmprb_init(u); + fmprb_init(v); + fmpz_init(c); + fmpz_init(h); + + if (m == 0) + { + ulong m1, m2; + m1 = 0.04 * pow(wp, 0.65); + m2 = n_sqrt(n); + m = FLINT_MIN(m1, m2); + } + + m = FLINT_MIN(m, n); + m = FLINT_MAX(m, 1); + + xs = _fmprb_vec_init(m + 1); + d = _fmpz_vec_init(m * m); + s = _fmpz_vec_init(m + 1); + + for (i = 0; i <= m; i++) + { + if (i == 0) + fmprb_one(xs + i); + else if (i == 1) + fmprb_set(xs + i, x); + else if (i % 2 == 0) + fmprb_mul(xs + i, xs + i / 2, xs + i / 2, wp); + else + fmprb_mul(xs + i, xs + i - 1, x, wp); + } + + rising_difference_polynomial(s, d, m); + + /* tail */ + rem = m; + while (rem + m <= n) + rem += m; + fmprb_one(y); + for (k = rem; k < n; k++) + { + fmprb_add_ui(t, xs + 1, k, wp); + fmprb_mul(y, y, t, wp); + } + + /* initial rising factorial */ + fmprb_zero(t); + for (i = 1; i <= m; i++) + fmprb_addmul_fmpz(t, xs + i, s + i, wp); + + fmprb_mul(y, y, t, wp); + + /* the leading coefficient is always the same */ + fmprb_mul_fmpz(xs + m - 1, xs + m - 1, d + m - 1 + 0, wp); + + for (k = 0; k + 2 * m <= n; k += m) + { + for (i = 0; i < m - 1; i++) + { + fmpz_set_ui(h, k); + _fmpz_poly_evaluate_horner_fmpz(c, d + i * m, m - i, h); + + if (i == 0) + fmprb_add_fmpz(t, t, c, wp); + else + fmprb_addmul_fmpz(t, xs + i, c, wp); + } + + fmprb_add(t, t, xs + m - 1, wp); + fmprb_mul(y, y, t, wp); + } + + fmprb_set_round(y, y, prec); + + fmprb_clear(t); + fmprb_clear(u); + fmprb_clear(v); + _fmprb_vec_clear(xs, m + 1); + _fmpz_vec_clear(d, m * m); + _fmpz_vec_clear(s, m + 1); + fmpz_clear(c); + fmpz_clear(h); +} + diff --git a/gamma/stirling_choose_param.c b/gamma/stirling_choose_param.c index a5784955..e3a60531 100644 --- a/gamma/stirling_choose_param.c +++ b/gamma/stirling_choose_param.c @@ -27,7 +27,7 @@ #include "bernoulli.h" /* tuning factor */ -#define GAMMA_STIRLING_BETA 0.23 +#define GAMMA_STIRLING_BETA 0.27 static __inline__ long _fmpr_mag(const fmpr_t x) diff --git a/gamma/test/t-rising_fmpcb_ui_delta.c b/gamma/test/t-rising_fmpcb_ui_delta.c new file mode 100644 index 00000000..1e1a8037 --- /dev/null +++ b/gamma/test/t-rising_fmpcb_ui_delta.c @@ -0,0 +1,120 @@ +/*============================================================================= + + This file is part of fmpcb. + + fmpcb is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + fmpcb is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with fmpcb; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + +=============================================================================*/ +/****************************************************************************** + + Copyright (C) 2012 Fredrik Johansson + +******************************************************************************/ + +#include "gamma.h" + +int main() +{ + long iter; + flint_rand_t state; + + printf("rising_fmpcb_ui_delta...."); + fflush(stdout); + + flint_randinit(state); + + /* check functional equation */ + for (iter = 0; iter < 1000; iter++) + { + fmpcb_t x, xn, y, z; + ulong n, m, step1, step2, step3; + + fmpcb_init(x); + fmpcb_init(xn); + fmpcb_init(y); + fmpcb_init(z); + + fmpcb_randtest(x, state, 1 + n_randint(state, 4000), 10); + n = n_randint(state, 80); + m = n_randint(state, 40); + fmpcb_add_ui(xn, x, n, 1 + n_randint(state, 4000)); + + step1 = n_randint(state, 20); + step2 = n_randint(state, 20); + step3 = n_randint(state, 20); + + gamma_rising_fmpcb_ui_delta(y, x, n, step1, 2 + n_randint(state, 4000)); + gamma_rising_fmpcb_ui_delta(z, xn, m, step2, 2 + n_randint(state, 4000)); + fmpcb_mul(y, y, z, 2 + n_randint(state, 4000)); + + gamma_rising_fmpcb_ui_delta(z, x, n + m, step3, 2 + n_randint(state, 4000)); + + if (!fmpcb_overlaps(y, z)) + { + printf("FAIL: overlap\n\n"); + printf("n = %lu\n", n); + printf("m = %lu\n", m); + printf("x = "); fmpcb_print(x); printf("\n\n"); + printf("xn = "); fmpcb_print(xn); printf("\n\n"); + printf("y = "); fmpcb_print(y); printf("\n\n"); + printf("z = "); fmpcb_print(z); printf("\n\n"); + abort(); + } + + fmpcb_clear(x); + fmpcb_clear(xn); + fmpcb_clear(y); + fmpcb_clear(z); + } + + /* aliasing of y and x */ + for (iter = 0; iter < 1000; iter++) + { + fmpcb_t x, y; + ulong n; + long prec; + ulong step; + + fmpcb_init(x); + fmpcb_init(y); + + fmpcb_randtest(x, state, 1 + n_randint(state, 200), 10); + fmpcb_randtest(y, state, 1 + n_randint(state, 200), 10); + n = n_randint(state, 100); + + step = n_randint(state, 20); + + prec = 2 + n_randint(state, 4000); + gamma_rising_fmpcb_ui_delta(y, x, n, step, prec); + gamma_rising_fmpcb_ui_delta(x, x, n, step, prec); + + if (!fmpcb_equal(x, y)) + { + printf("FAIL: aliasing\n\n"); + printf("x = "); fmpcb_print(x); printf("\n\n"); + printf("y = "); fmpcb_print(y); printf("\n\n"); + printf("n = %lu\n", n); + abort(); + } + + fmpcb_clear(x); + fmpcb_clear(y); + } + + flint_randclear(state); + _fmpz_cleanup(); + printf("PASS\n"); + return EXIT_SUCCESS; +} diff --git a/gamma/test/t-rising_fmprb_ui_delta.c b/gamma/test/t-rising_fmprb_ui_delta.c new file mode 100644 index 00000000..19d6939f --- /dev/null +++ b/gamma/test/t-rising_fmprb_ui_delta.c @@ -0,0 +1,126 @@ +/*============================================================================= + + This file is part of fmprb. + + fmprb is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + fmprb is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with fmprb; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + +=============================================================================*/ +/****************************************************************************** + + Copyright (C) 2012 Fredrik Johansson + +******************************************************************************/ + +#include "gamma.h" + +int main() +{ + long iter; + flint_rand_t state; + + printf("rising_fmprb_ui_delta...."); + fflush(stdout); + + flint_randinit(state); + + /* compare with fmpq */ + for (iter = 0; iter < 1000; iter++) + { + fmprb_t a, b; + fmpq_t x, y, z; + ulong n, step; + long i; + + fmprb_init(a); + fmprb_init(b); + + fmpq_init(x); + fmpq_init(y); + fmpq_init(z); + + fmprb_randtest(a, state, 1 + n_randint(state, 1000), 10); + fmprb_randtest(b, state, 1 + n_randint(state, 1000), 10); + n = n_randint(state, 80); + + fmprb_get_rand_fmpq(x, state, a, 1 + n_randint(state, 1000)); + + step = n_randint(state, 20); + gamma_rising_fmprb_ui_delta(b, a, n, step, 2 + n_randint(state, 1000)); + + fmpq_one(y); + for (i = 0; i < n; i++) + { + fmpq_set_si(z, i, 1); + fmpq_add(z, x, z); + fmpq_mul(y, y, z); + } + + if (!fmprb_contains_fmpq(b, y)) + { + printf("FAIL: containment\n\n"); + printf("n = %lu\n", n); + printf("a = "); fmprb_print(a); printf("\n\n"); + printf("x = "); fmpq_print(x); printf("\n\n"); + printf("b = "); fmprb_print(b); printf("\n\n"); + printf("y = "); fmpq_print(y); printf("\n\n"); + abort(); + } + + fmprb_clear(a); + fmprb_clear(b); + + fmpq_clear(x); + fmpq_clear(y); + fmpq_clear(z); + } + + /* aliasing of y and x */ + for (iter = 0; iter < 1000; iter++) + { + fmprb_t x, y; + ulong n, step; + long prec; + + fmprb_init(x); + fmprb_init(y); + + fmprb_randtest(x, state, 1 + n_randint(state, 200), 10); + fmprb_randtest(y, state, 1 + n_randint(state, 200), 10); + n = n_randint(state, 100); + + prec = 2 + n_randint(state, 1000); + + step = n_randint(state, 20); + gamma_rising_fmprb_ui_delta(y, x, n, step, prec); + gamma_rising_fmprb_ui_delta(x, x, n, step, prec); + + if (!fmprb_equal(x, y)) + { + printf("FAIL: aliasing\n\n"); + printf("x = "); fmprb_print(x); printf("\n\n"); + printf("y = "); fmprb_print(y); printf("\n\n"); + printf("n = %lu\n", n); + abort(); + } + + fmprb_clear(x); + fmprb_clear(y); + } + + flint_randclear(state); + _fmpz_cleanup(); + printf("PASS\n"); + return EXIT_SUCCESS; +}