arb/fmprb/gamma_fmpq_karatsuba.c
2012-09-16 22:36:49 +02:00

227 lines
6.2 KiB
C

/*=============================================================================
This file is part of ARB.
ARB is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
ARB is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with ARB; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
=============================================================================*/
/******************************************************************************
Copyright (C) 2012 Fredrik Johansson
******************************************************************************/
#include "fmprb.h"
#include "fmprb_poly.h"
static void
bsplit(fmprb_t P, fmprb_t Q, fmprb_t B, fmprb_t T, long n1, long n2,
const fmprb_t anum, const fmprb_t aden, long n, long m, long prec)
{
if (n2 - n1 == 1)
{
long k = n1;
if (k == 0)
fmprb_pow_ui(P, aden, m + 1, prec);
else
fmprb_set_ui(P, n);
fmprb_set_ui(Q, (k == 0) ? 1 : k);
fmprb_set(B, anum);
fmprb_addmul_ui(B, aden, k, prec);
fmprb_pow_ui(B, B, m + 1, prec);
fmprb_mul_si(T, P, (k % 2) ? -1 : 1, prec);
}
else
{
long mid = (n1 + n2) / 2;
fmprb_t P2, Q2, B2, T2;
fmprb_init(P2);
fmprb_init(Q2);
fmprb_init(B2);
fmprb_init(T2);
bsplit(P, Q, B, T, n1, mid, anum, aden, n, m, prec);
bsplit(P2, Q2, B2, T2, mid, n2, anum, aden, n, m, prec);
if (!fmprb_is_one(B2))
fmprb_mul(T, T, B2, prec);
fmprb_mul(T, T, Q2, prec);
if (!fmprb_is_one(B))
fmprb_mul(T2, T2, B, prec);
fmprb_mul(T2, T2, P, prec);
fmprb_add(T, T, T2, prec);
fmprb_mul(P, P, P2, prec);
fmprb_mul(Q, Q, Q2, prec);
fmprb_mul(B, B, B2, prec);
fmprb_clear(P2);
fmprb_clear(Q2);
fmprb_clear(B2);
fmprb_clear(T2);
}
}
static void
karatsuba_error(fmpr_t bound, long s, long n, long r)
{
fmpr_t t, u, A, B;
long wp = FMPRB_RAD_PREC;
if (r < n || s < 2)
abort();
/* t = log(n)^s */
fmpr_set_ui(t, n);
fmpr_log(t, t, wp, FMPR_RND_UP);
fmpr_pow_sloppy_ui(t, t, s, wp, FMPR_RND_UP);
/* A = 5/3 * exp(-n) * log(n) ** s */
fmpr_set_si(u, -n);
fmpr_exp(u, u, wp, FMPR_RND_UP);
fmpr_mul(u, u, t, wp, FMPR_RND_UP);
fmpr_mul_ui(u, u, 5, wp, FMPR_RND_UP);
fmpr_div_ui(A, u, 3, wp, FMPR_RND_UP);
/* B = (e/(r+2))^(r+2) * (1 + n^(r+2) * log(n)**2) */
fmpr_set_ui(u, n);
fmpr_pow_sloppy_ui(u, u, r + 2, wp, FMPR_RND_UP);
fmpr_mul(t, t, u, wp, FMPR_RND_UP);
fmpr_add_ui(t, t, 1UL, wp, FMPR_RND_UP);
fmpr_set_ui(u, 1UL);
fmpr_exp(u, u, wp, FMPR_RND_UP);
fmpr_div_ui(u, u, r + 2, wp, FMPR_RND_UP);
fmpr_pow_sloppy_ui(u, u, r + 2, wp, FMPR_RND_UP);
fmpr_mul(B, t, u, wp, FMPR_RND_UP);
/* A + B */
fmpr_add(bound, A, B, wp, FMPR_RND_UP);
}
void
fmprb_gamma_fmpq_karatsuba(fmprb_struct * v, const fmpq_t a, long num, long prec)
{
long s, jmax, m, n, r, wp, bsplit_wp;
fmprb_struct * Sm, * logs;
fmprb_t t, u, anum, aden;
fmprb_t P, Q, B, T;
fmpr_t truncation_error;
fmpr_init(truncation_error);
/* Select parameters (heuristic) */
jmax = num - 1;
s = FLINT_MAX(2, jmax);
wp = prec * 1.01 + 10 + 2*jmax;
n = FLINT_MAX(wp * 0.693147180559945, 2*s*FLINT_BIT_COUNT(2*s) + 1);
/* round n to have many trailing zeros, speeding up arithmetic */
if (n > 256)
{
int b = FLINT_BIT_COUNT(n);
n = ((n >> (b-4)) + 1) << (b-4);
}
r = 3.59112147666862*n;
/* the bound computation is rigorous */
karatsuba_error(truncation_error, s, n, r);
/* Heuristic: when s is small, we can truncate in the binary splitting.
The factor 2.2 is not generally valid and should be replaced
based on theory. */
if (15 * s < wp)
bsplit_wp = 2.2 * wp;
else
bsplit_wp = FMPR_PREC_EXACT;
#if 0
printf("s = %ld n = %ld r = %ld err = ", s, n, r);
fmpr_printd(truncation_error, 10); printf("\n");
#endif
fmprb_init(t);
fmprb_init(u);
fmprb_init(anum);
fmprb_init(aden);
fmprb_init(P);
fmprb_init(Q);
fmprb_init(B);
fmprb_init(T);
fmprb_set_fmpz(anum, fmpq_numref(a));
fmprb_set_fmpz(aden, fmpq_denref(a));
Sm = _fmprb_vec_init(jmax + 1);
logs = _fmprb_vec_init(jmax + 1);
/* Compute S sums */
for (m = 0; m <= jmax; m++) {
/* 2.2 * wp */
bsplit(P, Q, B, T, 0, r + 1, anum, aden, n, m, bsplit_wp);
fmprb_mul(Q, Q, B, wp);
fmprb_div(Sm + m, T, Q, wp);
#if 0
printf("%ld %ld ", m, jmax); fmprb_printd(Sm + m, 10); printf("\n");
#endif
if (m % 2)
fmprb_neg(Sm + m, Sm + m);
}
/* Compute (log(n))^m / m! */
for (m = 0; m <= jmax; m++)
{
if (m == 0) {
fmprb_one(logs + m);
} else if (m == 1) {
fmprb_log_ui(logs + m, n, wp);
} else {
fmprb_mul(logs + m, logs + m - 1, logs + 1, wp);
fmprb_div_ui(logs + m, logs + m, m, wp);
}
}
/* Convolution */
_fmprb_poly_mullow(v, logs, jmax + 1, Sm, jmax + 1, jmax + 1, wp);
/* multiply by n^a */
fmprb_set_ui(t, n);
fmprb_pow_fmpq(t, t, a, wp);
for (m = 0; m <= jmax; m++)
fmprb_mul(v + m, v + m, t, wp);
/* Add error. TODO: the bound is for the derivatives directly,
so we should probably divide it by m! ? */
for (m = 0; m <= jmax; m++)
fmprb_add_error_fmpr(v + m, truncation_error);
fmprb_clear(t);
fmprb_clear(u);
fmprb_clear(anum);
fmprb_clear(aden);
fmprb_clear(P);
fmprb_clear(Q);
fmprb_clear(B);
fmprb_clear(T);
fmpr_clear(truncation_error);
_fmprb_vec_clear(Sm, jmax + 1);
_fmprb_vec_clear(logs, jmax + 1);
}