faster rising factorials again

This commit is contained in:
Fredrik Johansson 2013-03-09 12:32:56 +01:00
parent 79143c1c7e
commit 72e4d68b63
10 changed files with 606 additions and 9 deletions

View file

@ -66,6 +66,8 @@ Bibliography
.. [PS1973] M.S. Paterson and L. J. Stockmeyer, "On the number of nonscalar multiplications necessary to evaluate polynomials", SIAM J. Comput (1973) .. [PS1973] M.S. Paterson and L. J. Stockmeyer, "On the number of nonscalar multiplications necessary to evaluate polynomials", SIAM J. Comput (1973)
.. [Smi2001] D.M. Smith, "Algorithm: Fortran 90 Software for Floating-Point Multiple Precision Arithmetic, Gamma and Related Functions", Transactions on Mathematical Software 27 (2001) 377-387, http://myweb.lmu.edu/dmsmith/toms2001.pdf
.. [Tak2000] D.Takahashi, "A fast algorithm for computing large Fibonacci numbers", Information Processing Letters 75 (2000) 243-246. http://www.ii.uni.wroc.pl/~lorys/IPL/article75-6-1.pdf .. [Tak2000] D.Takahashi, "A fast algorithm for computing large Fibonacci numbers", Information Processing Letters 75 (2000) 243-246. http://www.ii.uni.wroc.pl/~lorys/IPL/article75-6-1.pdf

View file

@ -170,6 +170,43 @@ Rising factorials
*gamma_rising_fmpcb_ui_bsplit* automatically choose *gamma_rising_fmpcb_ui_bsplit* automatically choose
an algorithm depending on the inputs. an algorithm depending on the inputs.
.. function :: void gamma_rising_fmprb_ui_delta(fmprb_t y, const fmprb_t x, ulong n, ulong m, long prec)
.. function :: void gamma_rising_fmpcb_ui_delta(fmpcb_t y, const fmpcb_t x, ulong n, ulong m, long prec)
Sets `y` to the rising factorial `x (x+1) (x+2) \cdots (x+n-1)`,
computed as a product of partial products
`(x+k)(x+k+1)\cdots(x+k+m-1)`. Each partial product is obtained
from the previous by using a precomputed table of powers of `x` to
evaluate the difference
.. math ::
\Delta_m(x,k) = (x+k+m)_{(m)} - (x+k)_{(m)}.
The instance `m = 4` of this algorithm was used by Smith ([Smi2001]_),
but we generalize it to a variable `m` which can be chosen nearly
optimally depending on the precision and `n`.
The polynomials `\Delta_m(x,k) \in \mathbb{Z}[k][x]` are generated dynamically.
Expanding the rising factorials, applying the binomial theorem
a couple of times, and doing several rearrangements of the sums, we
find the closed form
.. math ::
\Delta_m(x,k) = \sum_{v=0}^{m-1} x^v \sum_{i=0}^{m-v-1} k^i C_m(v,i),
where
.. math ::
C_m(v,i) = \sum_{j=i+1}^{m-v} m^{j-i} \left[{m \atop v+j}\right] {{v+j} \choose v} {j \choose i}
in which the square bracket denotes an unsigned Stirling number
of the first kind.
.. function :: void gamma_rising_fmprb_ui_multipoint(fmprb_t f, const fmprb_t c, ulong n, long prec) .. function :: void gamma_rising_fmprb_ui_multipoint(fmprb_t f, const fmprb_t c, ulong n, long prec)
Sets `y` to the rising factorial `x (x+1) (x+2) \cdots (x+n-1)`, Sets `y` to the rising factorial `x (x+1) (x+2) \cdots (x+n-1)`,

View file

@ -81,6 +81,9 @@ void gamma_rising_fmprb_ui_bsplit_eight(fmprb_t y, const fmprb_t x, ulong n, lon
void gamma_rising_fmprb_ui_bsplit_rectangular(fmprb_t y, const fmprb_t x, ulong n, ulong step, long prec); void gamma_rising_fmprb_ui_bsplit_rectangular(fmprb_t y, const fmprb_t x, ulong n, ulong step, long prec);
void gamma_rising_fmprb_ui_bsplit(fmprb_t y, const fmprb_t x, ulong n, long prec); void gamma_rising_fmprb_ui_bsplit(fmprb_t y, const fmprb_t x, ulong n, long prec);
void gamma_rising_fmprb_ui_delta(fmprb_t y, const fmprb_t x, ulong n, ulong m, long prec);
void gamma_rising_fmpcb_ui_delta(fmpcb_t y, const fmpcb_t x, ulong n, ulong m, long prec);
void gamma_rising_fmpcb_ui_bsplit_simple(fmpcb_t y, const fmpcb_t x, ulong n, long prec); void gamma_rising_fmpcb_ui_bsplit_simple(fmpcb_t y, const fmpcb_t x, ulong n, long prec);
void gamma_rising_fmpcb_ui_bsplit_eight(fmpcb_t y, const fmpcb_t x, ulong n, long prec); void gamma_rising_fmpcb_ui_bsplit_eight(fmpcb_t y, const fmpcb_t x, ulong n, long prec);
void gamma_rising_fmpcb_ui_bsplit_rectangular(fmpcb_t y, const fmpcb_t x, ulong n, ulong step, long prec); void gamma_rising_fmpcb_ui_bsplit_rectangular(fmpcb_t y, const fmpcb_t x, ulong n, ulong step, long prec);

View file

@ -28,11 +28,9 @@
void void
gamma_rising_fmpcb_ui_bsplit(fmpcb_t y, const fmpcb_t x, ulong n, long prec) gamma_rising_fmpcb_ui_bsplit(fmpcb_t y, const fmpcb_t x, ulong n, long prec)
{ {
if (n < 8 || fmpcb_bits(x) < prec / 8) if (prec < 256 || n < 8 || fmpcb_bits(x) < prec / 8)
gamma_rising_fmpcb_ui_bsplit_simple(y, x, n, prec); gamma_rising_fmpcb_ui_bsplit_simple(y, x, n, prec);
else if (prec < 250 || n < 250000 / prec)
gamma_rising_fmpcb_ui_bsplit_eight(y, x, n, prec);
else else
gamma_rising_fmpcb_ui_bsplit_rectangular(y, x, n, 0, prec); gamma_rising_fmpcb_ui_delta(y, x, n, 0, prec);
} }

View file

@ -0,0 +1,139 @@
/*=============================================================================
This file is part of ARB.
ARB is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
ARB is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with ARB; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
=============================================================================*/
/******************************************************************************
Copyright (C) 2013 Fredrik Johansson
******************************************************************************/
#include "gamma.h"
#include "arith.h"
void rising_difference_polynomial(fmpz * s, fmpz * c, ulong m);
void
gamma_rising_fmpcb_ui_delta(fmpcb_t y, const fmpcb_t x, ulong n, ulong m, long prec)
{
fmpcb_struct * xs;
fmpcb_t t, u, v;
ulong i, k, rem;
fmpz_t c, h;
fmpz *s, *d;
long wp;
if (n == 0)
{
fmpcb_one(y);
return;
}
if (n == 1)
{
fmpcb_set_round(y, x, prec);
return;
}
wp = FMPR_PREC_ADD(prec, FLINT_BIT_COUNT(n));
fmpcb_init(t);
fmpcb_init(u);
fmpcb_init(v);
fmpz_init(c);
fmpz_init(h);
if (m == 0)
{
ulong m1, m2;
m1 = 0.04 * pow(2.0 * wp, 0.65);
m2 = n_sqrt(n);
m = FLINT_MIN(m1, m2);
}
m = FLINT_MIN(m, n);
m = FLINT_MAX(m, 1);
xs = _fmpcb_vec_init(m + 1);
d = _fmpz_vec_init(m * m);
s = _fmpz_vec_init(m + 1);
for (i = 0; i <= m; i++)
{
if (i == 0)
fmpcb_one(xs + i);
else if (i == 1)
fmpcb_set(xs + i, x);
else if (i % 2 == 0)
fmpcb_mul(xs + i, xs + i / 2, xs + i / 2, wp);
else
fmpcb_mul(xs + i, xs + i - 1, x, wp);
}
rising_difference_polynomial(s, d, m);
/* tail */
rem = m;
while (rem + m <= n)
rem += m;
fmpcb_one(y);
for (k = rem; k < n; k++)
{
fmpcb_add_ui(t, xs + 1, k, wp);
fmpcb_mul(y, y, t, wp);
}
/* initial rising factorial */
fmpcb_zero(t);
for (i = 1; i <= m; i++)
fmpcb_addmul_fmpz(t, xs + i, s + i, wp);
fmpcb_mul(y, y, t, wp);
/* the leading coefficient is always the same */
fmpcb_mul_fmpz(xs + m - 1, xs + m - 1, d + m - 1 + 0, wp);
for (k = 0; k + 2 * m <= n; k += m)
{
for (i = 0; i < m - 1; i++)
{
fmpz_set_ui(h, k);
_fmpz_poly_evaluate_horner_fmpz(c, d + i * m, m - i, h);
if (i == 0)
fmpcb_add_fmpz(t, t, c, wp);
else
fmpcb_addmul_fmpz(t, xs + i, c, wp);
}
fmpcb_add(t, t, xs + m - 1, wp);
fmpcb_mul(y, y, t, wp);
}
fmpcb_set_round(y, y, prec);
fmpcb_clear(t);
fmpcb_clear(u);
fmpcb_clear(v);
_fmpcb_vec_clear(xs, m + 1);
_fmpz_vec_clear(d, m * m);
_fmpz_vec_clear(s, m + 1);
fmpz_clear(c);
fmpz_clear(h);
}

View file

@ -28,11 +28,9 @@
void void
gamma_rising_fmprb_ui_bsplit(fmprb_t y, const fmprb_t x, ulong n, long prec) gamma_rising_fmprb_ui_bsplit(fmprb_t y, const fmprb_t x, ulong n, long prec)
{ {
if (prec < 768 || n < 8 || fmprb_bits(x) < prec / 8) if (prec < 512 || n < 8 || fmprb_bits(x) < prec / 8)
gamma_rising_fmprb_ui_bsplit_simple(y, x, n, prec); gamma_rising_fmprb_ui_bsplit_simple(y, x, n, prec);
else if (prec < 1500 || n < 500000 / prec)
gamma_rising_fmprb_ui_bsplit_eight(y, x, n, prec);
else else
gamma_rising_fmprb_ui_bsplit_rectangular(y, x, n, 0, prec); gamma_rising_fmprb_ui_delta(y, x, n, 0, prec);
} }

View file

@ -0,0 +1,174 @@
/*=============================================================================
This file is part of ARB.
ARB is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
ARB is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with ARB; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
=============================================================================*/
/******************************************************************************
Copyright (C) 2013 Fredrik Johansson
******************************************************************************/
#include "gamma.h"
#include "arith.h"
void
rising_difference_polynomial(fmpz * s, fmpz * c, ulong m)
{
long i, j, k;
fmpz_t t;
fmpz_init(t);
arith_stirling_number_1u_vec(s, m, m + 1);
for (k = 0; k < m; k++)
{
for (i = 0; i <= m - k - 1; i++)
{
fmpz_zero(c + m * k + i);
for (j = i + 1; j + k <= m; j++)
{
if (j == i + 1)
{
fmpz_bin_uiui(t, 1+i+k, k);
fmpz_mul_ui(t, t, m * (i+1));
}
else
{
fmpz_mul_ui(t, t, m * (k + j));
fmpz_divexact_ui(t, t, j - i);
}
fmpz_addmul(c + m * k + i, s + j + k, t);
}
}
}
fmpz_clear(t);
}
void
gamma_rising_fmprb_ui_delta(fmprb_t y, const fmprb_t x, ulong n, ulong m, long prec)
{
fmprb_struct * xs;
fmprb_t t, u, v;
ulong i, k, rem;
fmpz_t c, h;
fmpz *s, *d;
long wp;
if (n == 0)
{
fmprb_one(y);
return;
}
if (n == 1)
{
fmprb_set_round(y, x, prec);
return;
}
wp = FMPR_PREC_ADD(prec, FLINT_BIT_COUNT(n));
fmprb_init(t);
fmprb_init(u);
fmprb_init(v);
fmpz_init(c);
fmpz_init(h);
if (m == 0)
{
ulong m1, m2;
m1 = 0.04 * pow(wp, 0.65);
m2 = n_sqrt(n);
m = FLINT_MIN(m1, m2);
}
m = FLINT_MIN(m, n);
m = FLINT_MAX(m, 1);
xs = _fmprb_vec_init(m + 1);
d = _fmpz_vec_init(m * m);
s = _fmpz_vec_init(m + 1);
for (i = 0; i <= m; i++)
{
if (i == 0)
fmprb_one(xs + i);
else if (i == 1)
fmprb_set(xs + i, x);
else if (i % 2 == 0)
fmprb_mul(xs + i, xs + i / 2, xs + i / 2, wp);
else
fmprb_mul(xs + i, xs + i - 1, x, wp);
}
rising_difference_polynomial(s, d, m);
/* tail */
rem = m;
while (rem + m <= n)
rem += m;
fmprb_one(y);
for (k = rem; k < n; k++)
{
fmprb_add_ui(t, xs + 1, k, wp);
fmprb_mul(y, y, t, wp);
}
/* initial rising factorial */
fmprb_zero(t);
for (i = 1; i <= m; i++)
fmprb_addmul_fmpz(t, xs + i, s + i, wp);
fmprb_mul(y, y, t, wp);
/* the leading coefficient is always the same */
fmprb_mul_fmpz(xs + m - 1, xs + m - 1, d + m - 1 + 0, wp);
for (k = 0; k + 2 * m <= n; k += m)
{
for (i = 0; i < m - 1; i++)
{
fmpz_set_ui(h, k);
_fmpz_poly_evaluate_horner_fmpz(c, d + i * m, m - i, h);
if (i == 0)
fmprb_add_fmpz(t, t, c, wp);
else
fmprb_addmul_fmpz(t, xs + i, c, wp);
}
fmprb_add(t, t, xs + m - 1, wp);
fmprb_mul(y, y, t, wp);
}
fmprb_set_round(y, y, prec);
fmprb_clear(t);
fmprb_clear(u);
fmprb_clear(v);
_fmprb_vec_clear(xs, m + 1);
_fmpz_vec_clear(d, m * m);
_fmpz_vec_clear(s, m + 1);
fmpz_clear(c);
fmpz_clear(h);
}

View file

@ -27,7 +27,7 @@
#include "bernoulli.h" #include "bernoulli.h"
/* tuning factor */ /* tuning factor */
#define GAMMA_STIRLING_BETA 0.23 #define GAMMA_STIRLING_BETA 0.27
static __inline__ long static __inline__ long
_fmpr_mag(const fmpr_t x) _fmpr_mag(const fmpr_t x)

View file

@ -0,0 +1,120 @@
/*=============================================================================
This file is part of fmpcb.
fmpcb is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
fmpcb is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with fmpcb; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
=============================================================================*/
/******************************************************************************
Copyright (C) 2012 Fredrik Johansson
******************************************************************************/
#include "gamma.h"
int main()
{
long iter;
flint_rand_t state;
printf("rising_fmpcb_ui_delta....");
fflush(stdout);
flint_randinit(state);
/* check functional equation */
for (iter = 0; iter < 1000; iter++)
{
fmpcb_t x, xn, y, z;
ulong n, m, step1, step2, step3;
fmpcb_init(x);
fmpcb_init(xn);
fmpcb_init(y);
fmpcb_init(z);
fmpcb_randtest(x, state, 1 + n_randint(state, 4000), 10);
n = n_randint(state, 80);
m = n_randint(state, 40);
fmpcb_add_ui(xn, x, n, 1 + n_randint(state, 4000));
step1 = n_randint(state, 20);
step2 = n_randint(state, 20);
step3 = n_randint(state, 20);
gamma_rising_fmpcb_ui_delta(y, x, n, step1, 2 + n_randint(state, 4000));
gamma_rising_fmpcb_ui_delta(z, xn, m, step2, 2 + n_randint(state, 4000));
fmpcb_mul(y, y, z, 2 + n_randint(state, 4000));
gamma_rising_fmpcb_ui_delta(z, x, n + m, step3, 2 + n_randint(state, 4000));
if (!fmpcb_overlaps(y, z))
{
printf("FAIL: overlap\n\n");
printf("n = %lu\n", n);
printf("m = %lu\n", m);
printf("x = "); fmpcb_print(x); printf("\n\n");
printf("xn = "); fmpcb_print(xn); printf("\n\n");
printf("y = "); fmpcb_print(y); printf("\n\n");
printf("z = "); fmpcb_print(z); printf("\n\n");
abort();
}
fmpcb_clear(x);
fmpcb_clear(xn);
fmpcb_clear(y);
fmpcb_clear(z);
}
/* aliasing of y and x */
for (iter = 0; iter < 1000; iter++)
{
fmpcb_t x, y;
ulong n;
long prec;
ulong step;
fmpcb_init(x);
fmpcb_init(y);
fmpcb_randtest(x, state, 1 + n_randint(state, 200), 10);
fmpcb_randtest(y, state, 1 + n_randint(state, 200), 10);
n = n_randint(state, 100);
step = n_randint(state, 20);
prec = 2 + n_randint(state, 4000);
gamma_rising_fmpcb_ui_delta(y, x, n, step, prec);
gamma_rising_fmpcb_ui_delta(x, x, n, step, prec);
if (!fmpcb_equal(x, y))
{
printf("FAIL: aliasing\n\n");
printf("x = "); fmpcb_print(x); printf("\n\n");
printf("y = "); fmpcb_print(y); printf("\n\n");
printf("n = %lu\n", n);
abort();
}
fmpcb_clear(x);
fmpcb_clear(y);
}
flint_randclear(state);
_fmpz_cleanup();
printf("PASS\n");
return EXIT_SUCCESS;
}

View file

@ -0,0 +1,126 @@
/*=============================================================================
This file is part of fmprb.
fmprb is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
fmprb is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with fmprb; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
=============================================================================*/
/******************************************************************************
Copyright (C) 2012 Fredrik Johansson
******************************************************************************/
#include "gamma.h"
int main()
{
long iter;
flint_rand_t state;
printf("rising_fmprb_ui_delta....");
fflush(stdout);
flint_randinit(state);
/* compare with fmpq */
for (iter = 0; iter < 1000; iter++)
{
fmprb_t a, b;
fmpq_t x, y, z;
ulong n, step;
long i;
fmprb_init(a);
fmprb_init(b);
fmpq_init(x);
fmpq_init(y);
fmpq_init(z);
fmprb_randtest(a, state, 1 + n_randint(state, 1000), 10);
fmprb_randtest(b, state, 1 + n_randint(state, 1000), 10);
n = n_randint(state, 80);
fmprb_get_rand_fmpq(x, state, a, 1 + n_randint(state, 1000));
step = n_randint(state, 20);
gamma_rising_fmprb_ui_delta(b, a, n, step, 2 + n_randint(state, 1000));
fmpq_one(y);
for (i = 0; i < n; i++)
{
fmpq_set_si(z, i, 1);
fmpq_add(z, x, z);
fmpq_mul(y, y, z);
}
if (!fmprb_contains_fmpq(b, y))
{
printf("FAIL: containment\n\n");
printf("n = %lu\n", n);
printf("a = "); fmprb_print(a); printf("\n\n");
printf("x = "); fmpq_print(x); printf("\n\n");
printf("b = "); fmprb_print(b); printf("\n\n");
printf("y = "); fmpq_print(y); printf("\n\n");
abort();
}
fmprb_clear(a);
fmprb_clear(b);
fmpq_clear(x);
fmpq_clear(y);
fmpq_clear(z);
}
/* aliasing of y and x */
for (iter = 0; iter < 1000; iter++)
{
fmprb_t x, y;
ulong n, step;
long prec;
fmprb_init(x);
fmprb_init(y);
fmprb_randtest(x, state, 1 + n_randint(state, 200), 10);
fmprb_randtest(y, state, 1 + n_randint(state, 200), 10);
n = n_randint(state, 100);
prec = 2 + n_randint(state, 1000);
step = n_randint(state, 20);
gamma_rising_fmprb_ui_delta(y, x, n, step, prec);
gamma_rising_fmprb_ui_delta(x, x, n, step, prec);
if (!fmprb_equal(x, y))
{
printf("FAIL: aliasing\n\n");
printf("x = "); fmprb_print(x); printf("\n\n");
printf("y = "); fmprb_print(y); printf("\n\n");
printf("n = %lu\n", n);
abort();
}
fmprb_clear(x);
fmprb_clear(y);
}
flint_randclear(state);
_fmpz_cleanup();
printf("PASS\n");
return EXIT_SUCCESS;
}