implement acb_dot and use in the acb_poly module

This commit is contained in:
fredrik 2018-08-26 19:59:30 +02:00
parent 7739f884de
commit ffa60bf319
19 changed files with 1528 additions and 116 deletions

8
acb.h
View file

@ -644,6 +644,14 @@ acb_submul_arb(acb_t z, const acb_t x, const arb_t y, slong prec)
arb_submul(acb_imagref(z), acb_imagref(x), y, prec);
}
void acb_dot_simple(acb_t res, const acb_t initial, int subtract,
acb_srcptr x, slong xstep, acb_srcptr y, slong ystep, slong len, slong prec);
void acb_dot_precise(acb_t res, const acb_t initial, int subtract,
acb_srcptr x, slong xstep, acb_srcptr y, slong ystep, slong len, slong prec);
void acb_dot(acb_t res, const acb_t initial, int subtract,
acb_srcptr x, slong xstep, acb_srcptr y, slong ystep, slong len, slong prec);
void acb_inv(acb_t z, const acb_t x, slong prec);
void acb_div(acb_t z, const acb_t x, const acb_t y, slong prec);

1008
acb/dot.c Normal file

File diff suppressed because it is too large Load diff

50
acb/dot_precise.c Normal file
View file

@ -0,0 +1,50 @@
/*
Copyright (C) 2018 Fredrik Johansson
This file is part of Arb.
Arb is free software: you can redistribute it and/or modify it under
the terms of the GNU Lesser General Public License (LGPL) as published
by the Free Software Foundation; either version 2.1 of the License, or
(at your option) any later version. See <http://www.gnu.org/licenses/>.
*/
#include "acb.h"
void
acb_dot_precise(acb_t res, const acb_t initial, int subtract, acb_srcptr x, slong xstep,
acb_srcptr y, slong ystep, slong len, slong prec)
{
arb_ptr tmp;
slong i;
tmp = flint_malloc(sizeof(arb_struct) * (4 * len));
for (i = 0; i < len; i++)
{
tmp[0 * len + i] = *acb_realref(x + i * xstep);
tmp[1 * len + i] = *acb_imagref(x + i * xstep);
tmp[2 * len + i] = *acb_realref(y + i * ystep);
arb_init(tmp + 3 * len + i);
arb_neg(tmp + 3 * len + i, acb_imagref(y + i * ystep));
}
arb_dot_precise(acb_realref(res), initial == NULL ? NULL : acb_realref(initial), subtract,
tmp, 1, tmp + 2 * len, 1, 2 * len, prec);
for (i = 0; i < len; i++)
arb_clear(tmp + 3 * len + i);
for (i = 0; i < len; i++)
{
tmp[0 * len + i] = *acb_realref(x + i * xstep);
tmp[1 * len + i] = *acb_imagref(x + i * xstep);
tmp[2 * len + i] = *acb_imagref(y + i * ystep);
tmp[3 * len + i] = *acb_realref(y + i * ystep);
}
arb_dot_precise(acb_imagref(res), initial == NULL ? NULL : acb_imagref(initial), subtract,
tmp, 1, tmp + 2 * len, 1, 2 * len, prec);
flint_free(tmp);
}

47
acb/dot_simple.c Normal file
View file

@ -0,0 +1,47 @@
/*
Copyright (C) 2018 Fredrik Johansson
This file is part of Arb.
Arb is free software: you can redistribute it and/or modify it under
the terms of the GNU Lesser General Public License (LGPL) as published
by the Free Software Foundation; either version 2.1 of the License, or
(at your option) any later version. See <http://www.gnu.org/licenses/>.
*/
#include "acb.h"
void
acb_dot_simple(acb_t res, const acb_t initial, int subtract,
acb_srcptr x, slong xstep, acb_srcptr y, slong ystep, slong len, slong prec)
{
slong i;
if (len <= 0)
{
if (initial == NULL)
acb_zero(res);
else
acb_set_round(res, initial, prec);
return;
}
if (initial == NULL)
{
acb_mul(res, x, y, prec);
}
else
{
if (subtract)
acb_neg(res, initial);
else
acb_set(res, initial);
acb_addmul(res, x, y, prec);
}
for (i = 1; i < len; i++)
acb_addmul(res, x + i * xstep, y + i * ystep, prec);
if (subtract)
acb_neg(res, res);
}

200
acb/test/t-dot.c Normal file
View file

@ -0,0 +1,200 @@
/*
Copyright (C) 2018 Fredrik Johansson
This file is part of Arb.
Arb is free software: you can redistribute it and/or modify it under
the terms of the GNU Lesser General Public License (LGPL) as published
by the Free Software Foundation; either version 2.1 of the License, or
(at your option) any later version. See <http://www.gnu.org/licenses/>.
*/
#include "acb.h"
ARB_DLL extern slong acb_dot_gauss_dot_cutoff;
int main()
{
slong iter;
flint_rand_t state;
flint_printf("dot....");
fflush(stdout);
flint_randinit(state);
for (iter = 0; iter < 1000000 * arb_test_multiplier(); iter++)
{
acb_ptr x, y;
acb_t s1, s2, z;
slong i, len, prec, xbits, ybits, ebits;
int ok, initial, subtract, revx, revy;
if (n_randint(state, 100) == 0)
len = n_randint(state, 50);
else if (n_randint(state, 10) == 0)
len = n_randint(state, 5);
else
len = n_randint(state, 3);
acb_dot_gauss_dot_cutoff = 3 + n_randint(state, 30);
if (n_randint(state, 10) != 0 || len > 10)
{
prec = 2 + n_randint(state, 500);
xbits = 2 + n_randint(state, 500);
ybits = 2 + n_randint(state, 500);
}
else
{
prec = 2 + n_randint(state, 5000);
xbits = 2 + n_randint(state, 5000);
ybits = 2 + n_randint(state, 5000);
}
if (n_randint(state, 100) == 0)
ebits = 2 + n_randint(state, 100);
else
ebits = 2 + n_randint(state, 10);
initial = n_randint(state, 2);
subtract = n_randint(state, 2);
revx = n_randint(state, 2);
revy = n_randint(state, 2);
x = _acb_vec_init(len);
y = _acb_vec_init(len);
acb_init(s1);
acb_init(s2);
acb_init(z);
switch (n_randint(state, 3))
{
case 0:
for (i = 0; i < len; i++)
{
acb_randtest(x + i, state, xbits, ebits);
acb_randtest(y + i, state, ybits, ebits);
}
break;
/* Test with cancellation */
case 1:
for (i = 0; i < len; i++)
{
if (i <= len / 2)
{
acb_randtest(x + i, state, xbits, ebits);
acb_randtest(y + i, state, ybits, ebits);
}
else
{
acb_neg(x + i, x + len - i - 1);
acb_set(y + i, y + len - i - 1);
}
}
break;
default:
for (i = 0; i < len; i++)
{
if (i <= len / 2)
{
acb_randtest(x + i, state, xbits, ebits);
acb_randtest(y + i, state, ybits, ebits);
}
else
{
acb_neg_round(x + i, x + len - i - 1, 2 + n_randint(state, 500));
acb_set_round(y + i, y + len - i - 1, 2 + n_randint(state, 500));
}
}
break;
}
acb_randtest(s1, state, 200, 100);
acb_randtest(s2, state, 200, 100);
acb_randtest(z, state, xbits, ebits);
acb_dot(s1, initial ? z : NULL, subtract,
revx ? (x + len - 1) : x, revx ? -1 : 1,
revy ? (y + len - 1) : y, revy ? -1 : 1,
len, prec);
acb_dot_precise(s2, initial ? z : NULL, subtract,
revx ? (x + len - 1) : x, revx ? -1 : 1,
revy ? (y + len - 1) : y, revy ? -1 : 1,
len, ebits <= 12 ? ARF_PREC_EXACT : 2 * prec + 100);
if (ebits <= 12)
ok = acb_contains(s1, s2);
else
ok = acb_overlaps(s1, s2);
if (!ok)
{
flint_printf("FAIL\n\n");
flint_printf("iter = %wd, len = %wd, prec = %wd, ebits = %wd, subtract = %d\n\n", iter, len, prec, ebits, subtract);
if (initial)
{
flint_printf("z = ", i); acb_printn(z, 100, ARB_STR_MORE); flint_printf(" (%wd)\n\n", acb_bits(z));
}
for (i = 0; i < len; i++)
{
flint_printf("x[%wd] = ", i); acb_printn(x + i, 100, ARB_STR_MORE); flint_printf(" (%wd)\n", acb_bits(x + i));
flint_printf("y[%wd] = ", i); acb_printn(y + i, 100, ARB_STR_MORE); flint_printf(" (%wd)\n", acb_bits(y + i));
}
flint_printf("\n\n");
flint_printf("s1 = "); acb_printn(s1, 100, ARB_STR_MORE); flint_printf("\n\n");
flint_printf("s2 = "); acb_printn(s2, 100, ARB_STR_MORE); flint_printf("\n\n");
flint_abort();
}
/* With the fast algorithm, we expect identical results when
reversing the vectors. */
if (ebits <= 12)
{
revx ^= 1;
revy ^= 1;
acb_dot(s2, initial ? z : NULL, subtract,
revx ? (x + len - 1) : x, revx ? -1 : 1,
revy ? (y + len - 1) : y, revy ? -1 : 1,
len, prec);
if (!acb_equal(s1, s2))
{
flint_printf("FAIL (reversal)\n\n");
flint_printf("iter = %wd, len = %wd, prec = %wd, ebits = %wd\n\n", iter, len, prec, ebits);
if (initial)
{
flint_printf("z = ", i); acb_printn(z, 100, ARB_STR_MORE); flint_printf(" (%wd)\n\n", acb_bits(z));
}
for (i = 0; i < len; i++)
{
flint_printf("x[%wd] = ", i); acb_printn(x + i, 100, ARB_STR_MORE); flint_printf(" (%wd)\n", acb_bits(x + i));
flint_printf("y[%wd] = ", i); acb_printn(y + i, 100, ARB_STR_MORE); flint_printf(" (%wd)\n", acb_bits(y + i));
}
flint_printf("\n\n");
flint_printf("s1 = "); acb_printn(s1, 100, ARB_STR_MORE); flint_printf("\n\n");
flint_printf("s2 = "); acb_printn(s2, 100, ARB_STR_MORE); flint_printf("\n\n");
flint_abort();
}
}
acb_clear(s1);
acb_clear(s2);
acb_clear(z);
_acb_vec_clear(x, len);
_acb_vec_clear(y, len);
}
flint_randclear(state);
flint_cleanup();
flint_printf("PASS\n");
return EXIT_SUCCESS;
}

View file

@ -44,7 +44,7 @@ _acb_poly_div_series(acb_ptr Q, acb_srcptr A, slong Alen,
{
/* The basecase algorithm is faster for much larger Blen and n than
this, but unfortunately has worse numerical stability. */
slong i, j;
slong i;
acb_t q;
acb_init(q);
@ -54,16 +54,8 @@ _acb_poly_div_series(acb_ptr Q, acb_srcptr A, slong Alen,
for (i = 1; i < n; i++)
{
acb_mul(Q + i, B + 1, Q + i - 1, prec);
for (j = 2; j < FLINT_MIN(i + 1, Blen); j++)
acb_addmul(Q + i, B + j, Q + i - j, prec);
if (i < Alen)
acb_sub(Q + i, A + i, Q + i, prec);
else
acb_neg(Q + i, Q + i);
acb_dot(Q + i, (i < Alen) ? A + i : NULL, 1,
B + 1, 1, Q + i - 1, -1, FLINT_MIN(i, Blen - 1), prec);
if (!acb_is_one(q))
acb_mul(Q + i, Q + i, q, prec);
}

View file

@ -12,7 +12,7 @@
#include "acb_poly.h"
/* allow changing this from the test code */
ARB_DLL slong acb_poly_newton_exp_cutoff = 120;
ARB_DLL slong acb_poly_newton_exp_cutoff = 0;
/* with inverse=1 simultaneously computes g = exp(-x) to length n
with inverse=0 uses g as scratch space, computing
@ -112,39 +112,51 @@ _acb_poly_exp_series(acb_ptr f, acb_srcptr h, slong hlen, slong n, slong prec)
_acb_vec_zero(f + j - d + 1, n - (j - d + 1));
acb_clear(t);
}
else if (hlen <= acb_poly_newton_exp_cutoff)
{
_acb_poly_exp_series_basecase(f, h, hlen, n, prec);
}
else
{
acb_ptr g, t;
acb_t u;
int fix;
slong cutoff;
g = _acb_vec_init((n + 1) / 2);
fix = (hlen < n || h == f || !acb_is_zero(h));
if (acb_poly_newton_exp_cutoff != 0)
cutoff = acb_poly_newton_exp_cutoff;
else if (prec <= 256)
cutoff = 750;
else
cutoff = 1e5 / pow(log(prec), 3);
if (fix)
if (hlen <= cutoff)
{
t = _acb_vec_init(n);
_acb_vec_set(t + 1, h + 1, hlen - 1);
_acb_poly_exp_series_basecase(f, h, hlen, n, prec);
}
else
t = (acb_ptr) h;
{
acb_ptr g, t;
acb_t u;
int fix;
acb_init(u);
acb_exp(u, h, prec);
g = _acb_vec_init((n + 1) / 2);
fix = (hlen < n || h == f || !acb_is_zero(h));
_acb_poly_exp_series_newton(f, g, t, n, prec, 0, acb_poly_newton_exp_cutoff);
if (fix)
{
t = _acb_vec_init(n);
_acb_vec_set(t + 1, h + 1, hlen - 1);
}
else
t = (acb_ptr) h;
if (!acb_is_one(u))
_acb_vec_scalar_mul(f, f, n, u, prec);
acb_init(u);
acb_exp(u, h, prec);
_acb_vec_clear(g, (n + 1) / 2);
if (fix)
_acb_vec_clear(t, n);
acb_clear(u);
_acb_poly_exp_series_newton(f, g, t, n, prec, 0, cutoff);
if (!acb_is_one(u))
_acb_vec_scalar_mul(f, f, n, u, prec);
_acb_vec_clear(g, (n + 1) / 2);
if (fix)
_acb_vec_clear(t, n);
acb_clear(u);
}
}
}

View file

@ -11,13 +11,11 @@
#include "acb_poly.h"
#define MUL_CUTOFF 24
static void
_acb_poly_exp_series_basecase_rec(acb_ptr f, acb_ptr a,
acb_srcptr h, slong hlen, slong n, slong prec)
{
slong j, k;
slong k;
acb_t s;
acb_init(s);
@ -29,10 +27,7 @@ _acb_poly_exp_series_basecase_rec(acb_ptr f, acb_ptr a,
for (k = 1; k < n; k++)
{
acb_zero(s);
for (j = 1; j < FLINT_MIN(k + 1, hlen); j++)
acb_addmul(s, a + j, f + k - j, prec);
acb_dot(s, NULL, 0, a + 1, 1, f + k - 1, -1, FLINT_MIN(k, hlen - 1), prec);
acb_div_ui(f + k, s, k, prec);
}
@ -45,7 +40,7 @@ _acb_poly_exp_series_basecase(acb_ptr f,
{
hlen = FLINT_MIN(n, hlen);
if (n < MUL_CUTOFF || hlen < 0.9 * n)
if (n < 20 || hlen < 0.9 * n || prec <= 2 * FLINT_BITS || n < 1000.0 / log(prec + 10) - 70)
{
acb_ptr t = _acb_vec_init(hlen);
_acb_poly_exp_series_basecase_rec(f, t, h, hlen, n, prec);

View file

@ -37,7 +37,7 @@ _acb_poly_inv_series(acb_ptr Qinv,
}
else
{
slong i, j, blen;
slong i, blen;
/* The basecase algorithm is faster for much larger Qlen or len than
this, but unfortunately also much less numerically stable. */
@ -48,15 +48,10 @@ _acb_poly_inv_series(acb_ptr Qinv,
for (i = 1; i < blen; i++)
{
acb_mul(Qinv + i, Q + 1, Qinv + i - 1, prec);
for (j = 2; j < FLINT_MIN(i + 1, Qlen); j++)
acb_addmul(Qinv + i, Q + j, Qinv + i - j, prec);
acb_dot(Qinv + i, NULL, 1,
Q + 1, 1, Qinv + i - 1, -1, FLINT_MIN(i, Qlen - 1), prec);
if (!acb_is_one(Qinv))
acb_mul(Qinv + i, Qinv + i, Qinv, prec);
acb_neg(Qinv + i, Qinv + i);
}
if (len > blen)
@ -116,4 +111,3 @@ acb_poly_inv_series(acb_poly_t Qinv, const acb_poly_t Q, slong n, slong prec)
_acb_poly_set_length(Qinv, n);
_acb_poly_normalise(Qinv);
}

View file

@ -11,17 +11,46 @@
#include "acb_poly.h"
#define CUTOFF 4
void
_acb_poly_mullow(acb_ptr res,
acb_srcptr poly1, slong len1,
acb_srcptr poly2, slong len2, slong n, slong prec)
{
if (n < CUTOFF || len1 < CUTOFF || len2 < CUTOFF)
if (n == 1)
{
acb_mul(res, poly1, poly2, prec);
}
else if (n <= 7 || len1 <= 7 || len2 <= 7)
{
_acb_poly_mullow_classical(res, poly1, len1, poly2, len2, n, prec);
}
else
_acb_poly_mullow_transpose(res, poly1, len1, poly2, len2, n, prec);
{
slong cutoff;
double p;
if (prec <= 2 * FLINT_BITS)
{
cutoff = 110;
}
else
{
p = log(prec);
cutoff = 10000.0 / (p * p * p);
cutoff = FLINT_MIN(cutoff, 60);
if (poly1 == poly2 && prec >= 256)
cutoff *= 1.25;
if (poly1 == poly2 && prec >= 4096)
cutoff *= 1.25;
cutoff = FLINT_MAX(cutoff, 8);
}
if (2 * FLINT_MIN(len1, len2) <= cutoff || n <= cutoff)
_acb_poly_mullow_classical(res, poly1, len1, poly2, len2, n, prec);
else
_acb_poly_mullow_transpose(res, poly1, len1, poly2, len2, n, prec);
}
}
void

View file

@ -26,35 +26,55 @@ _acb_poly_mullow_classical(acb_ptr res,
}
else if (poly1 == poly2 && len1 == len2)
{
slong i;
slong i, start, stop;
_acb_vec_scalar_mul(res, poly1, FLINT_MIN(len1, n), poly1, prec);
_acb_vec_scalar_mul(res + len1, poly1 + 1, n - len1, poly1 + len1 - 1, prec);
acb_sqr(res, poly1, prec);
acb_mul(res + 1, poly1, poly1 + 1, prec);
acb_mul_2exp_si(res + 1, res + 1, 1);
for (i = 1; i < len1 - 1; i++)
_acb_vec_scalar_addmul(res + i + 1, poly1 + 1,
FLINT_MIN(i - 1, n - (i + 1)), poly1 + i, prec);
for (i = 2; i < FLINT_MIN(n, 2 * len1 - 3); i++)
{
start = FLINT_MAX(0, i - len1 + 1);
stop = FLINT_MIN(len1 - 1, (i + 1) / 2 - 1);
for (i = 1; i < FLINT_MIN(2 * len1 - 2, n); i++)
acb_dot(res + i, NULL, 0, poly1 + start, 1,
poly1 + i - start, -1, stop - start + 1, prec);
acb_mul_2exp_si(res + i, res + i, 1);
if (i % 2 == 0 && i / 2 < len1)
acb_addmul(res + i, poly1 + i / 2, poly1 + i / 2, prec);
}
for (i = 1; i < FLINT_MIN(len1 - 1, (n + 1) / 2); i++)
acb_addmul(res + 2 * i, poly1 + i, poly1 + i, prec);
if (len1 > 2 && n >= 2 * len1 - 2)
{
acb_mul(res + 2 * len1 - 3, poly1 + len1 - 1, poly1 + len1 - 2, prec);
acb_mul_2exp_si(res + 2 * len1 - 3, res + 2 * len1 - 3, 1);
}
if (n >= 2 * len1 - 1)
acb_sqr(res + 2 * len1 - 2, poly1 + len1 - 1, prec);
}
else if (len1 == 1)
{
_acb_vec_scalar_mul(res, poly2, n, poly1, prec);
}
else if (len2 == 1)
{
_acb_vec_scalar_mul(res, poly1, n, poly2, prec);
}
else
{
slong i;
slong i, top1, top2;
_acb_vec_scalar_mul(res, poly1, FLINT_MIN(len1, n), poly2, prec);
acb_mul(res, poly1, poly2, prec);
if (n > len1)
_acb_vec_scalar_mul(res + len1, poly2 + 1, n - len1,
poly1 + len1 - 1, prec);
for (i = 1; i < n; i++)
{
top1 = FLINT_MIN(len1 - 1, i);
top2 = FLINT_MIN(len2 - 1, i);
for (i = 0; i < FLINT_MIN(len1, n) - 1; i++)
_acb_vec_scalar_addmul(res + i + 1, poly2 + 1,
FLINT_MIN(len2, n - i) - 1,
poly1 + i, prec);
acb_dot(res + i, NULL, 0, poly1 + i - top2, 1,
poly2 + top2, -1, top1 + top2 - i + 1, prec);
}
}
}
@ -94,4 +114,3 @@ acb_poly_mullow_classical(acb_poly_t res, const acb_poly_t poly1,
_acb_poly_set_length(res, n);
_acb_poly_normalise(res);
}

View file

@ -11,10 +11,8 @@
#include "acb_poly.h"
#define TANGENT_CUTOFF 80
void
_acb_poly_sin_cos_pi_series(acb_ptr s, acb_ptr c, const acb_srcptr h, slong hlen, slong n, slong prec)
_acb_poly_sin_cos_pi_series(acb_ptr s, acb_ptr c, acb_srcptr h, slong hlen, slong n, slong prec)
{
hlen = FLINT_MIN(hlen, n);
@ -36,10 +34,25 @@ _acb_poly_sin_cos_pi_series(acb_ptr s, acb_ptr c, const acb_srcptr h, slong hlen
acb_mul(c + 1, s, t, prec);
acb_clear(t);
}
else if (hlen < TANGENT_CUTOFF)
_acb_poly_sin_cos_series_basecase(s, c, h, hlen, n, prec, 1);
else
_acb_poly_sin_cos_series_tangent(s, c, h, hlen, n, prec, 1);
{
slong cutoff;
if (prec <= 128)
{
cutoff = 1400;
}
else
{
cutoff = 100000 / pow(log(prec), 3);
cutoff = FLINT_MIN(cutoff, 700);
}
if (hlen < cutoff)
_acb_poly_sin_cos_series_basecase(s, c, h, hlen, n, prec, 1);
else
_acb_poly_sin_cos_series_tangent(s, c, h, hlen, n, prec, 1);
}
}
void
@ -73,4 +86,3 @@ acb_poly_sin_cos_pi_series(acb_poly_t s, acb_poly_t c,
_acb_poly_set_length(c, n);
_acb_poly_normalise(c);
}

View file

@ -11,10 +11,8 @@
#include "acb_poly.h"
#define TANGENT_CUTOFF 80
void
_acb_poly_sin_cos_series(acb_ptr s, acb_ptr c, const acb_srcptr h, slong hlen, slong n, slong prec)
_acb_poly_sin_cos_series(acb_ptr s, acb_ptr c, acb_srcptr h, slong hlen, slong n, slong prec)
{
hlen = FLINT_MIN(hlen, n);
@ -35,10 +33,25 @@ _acb_poly_sin_cos_series(acb_ptr s, acb_ptr c, const acb_srcptr h, slong hlen, s
acb_mul(c + 1, s, t, prec);
acb_clear(t);
}
else if (hlen < TANGENT_CUTOFF)
_acb_poly_sin_cos_series_basecase(s, c, h, hlen, n, prec, 0);
else
_acb_poly_sin_cos_series_tangent(s, c, h, hlen, n, prec, 0);
{
slong cutoff;
if (prec <= 128)
{
cutoff = 1400;
}
else
{
cutoff = 100000 / pow(log(prec), 3);
cutoff = FLINT_MIN(cutoff, 700);
}
if (hlen < cutoff)
_acb_poly_sin_cos_series_basecase(s, c, h, hlen, n, prec, 0);
else
_acb_poly_sin_cos_series_tangent(s, c, h, hlen, n, prec, 0);
}
}
void

View file

@ -15,7 +15,7 @@ void
_acb_poly_sin_cos_series_basecase(acb_ptr s, acb_ptr c, acb_srcptr h, slong hlen,
slong n, slong prec, int times_pi)
{
slong j, k, alen = FLINT_MIN(n, hlen);
slong k, alen = FLINT_MIN(n, hlen);
acb_ptr a;
acb_t t, u;
@ -46,15 +46,8 @@ _acb_poly_sin_cos_series_basecase(acb_ptr s, acb_ptr c, acb_srcptr h, slong hlen
for (k = 1; k < n; k++)
{
acb_zero(t);
acb_zero(u);
for (j = 1; j < FLINT_MIN(k + 1, hlen); j++)
{
acb_submul(t, a + j, s + k - j, prec);
acb_addmul(u, a + j, c + k - j, prec);
}
acb_dot(t, NULL, 1, a + 1, 1, s + k - 1, -1, FLINT_MIN(k, hlen - 1), prec);
acb_dot(u, NULL, 0, a + 1, 1, c + k - 1, -1, FLINT_MIN(k, hlen - 1), prec);
acb_div_ui(c + k, t, k, prec);
acb_div_ui(s + k, u, k, prec);
}
@ -92,4 +85,3 @@ acb_poly_sin_cos_series_basecase(acb_poly_t s, acb_poly_t c,
_acb_poly_set_length(c, n);
_acb_poly_normalise(c);
}

View file

@ -12,7 +12,7 @@
#include "acb_poly.h"
void
_acb_poly_sinh_cosh_series(acb_ptr s, acb_ptr c, const acb_srcptr h, slong hlen, slong n, slong prec)
_acb_poly_sinh_cosh_series(acb_ptr s, acb_ptr c, acb_srcptr h, slong hlen, slong n, slong prec)
{
hlen = FLINT_MIN(hlen, n);
@ -32,10 +32,20 @@ _acb_poly_sinh_cosh_series(acb_ptr s, acb_ptr c, const acb_srcptr h, slong hlen,
acb_mul(c + 1, s, t, prec);
acb_clear(t);
}
else if (hlen < 60 || n < 120)
_acb_poly_sinh_cosh_series_basecase(s, c, h, hlen, n, prec);
else
_acb_poly_sinh_cosh_series_exponential(s, c, h, hlen, n, prec);
{
slong cutoff;
if (prec <= 128)
cutoff = 400;
else
cutoff = 30000 / pow(log(prec), 3);
if (hlen < cutoff)
_acb_poly_sinh_cosh_series_basecase(s, c, h, hlen, n, prec);
else
_acb_poly_sinh_cosh_series_exponential(s, c, h, hlen, n, prec);
}
}
void
@ -69,4 +79,3 @@ acb_poly_sinh_cosh_series(acb_poly_t s, acb_poly_t c,
_acb_poly_set_length(c, n);
_acb_poly_normalise(c);
}

View file

@ -15,7 +15,7 @@ void
_acb_poly_sinh_cosh_series_basecase(acb_ptr s, acb_ptr c, acb_srcptr h, slong hlen,
slong n, slong prec)
{
slong j, k, alen = FLINT_MIN(n, hlen);
slong k, alen = FLINT_MIN(n, hlen);
acb_ptr a;
acb_t t, u;
@ -37,15 +37,8 @@ _acb_poly_sinh_cosh_series_basecase(acb_ptr s, acb_ptr c, acb_srcptr h, slong hl
for (k = 1; k < n; k++)
{
acb_zero(t);
acb_zero(u);
for (j = 1; j < FLINT_MIN(k + 1, hlen); j++)
{
acb_addmul(t, a + j, s + k - j, prec);
acb_addmul(u, a + j, c + k - j, prec);
}
acb_dot(t, NULL, 0, a + 1, 1, s + k - 1, -1, FLINT_MIN(k, hlen - 1), prec);
acb_dot(u, NULL, 0, a + 1, 1, c + k - 1, -1, FLINT_MIN(k, hlen - 1), prec);
acb_div_ui(c + k, t, k, prec);
acb_div_ui(s + k, u, k, prec);
}
@ -83,4 +76,3 @@ acb_poly_sinh_cosh_series_basecase(acb_poly_t s, acb_poly_t c,
_acb_poly_set_length(c, n);
_acb_poly_normalise(c);
}

View file

@ -35,7 +35,7 @@ int main()
else
len = n_randint(state, 3);
if (n_randint(state, 100) == 0 || len > 10)
if (n_randint(state, 10) != 0 || len > 10)
{
prec = 2 + n_randint(state, 500);
xbits = 2 + n_randint(state, 500);

View file

@ -499,6 +499,46 @@ Arithmetic
Sets *z* to the quotient of *x* and *y*.
Dot product
-------------------------------------------------------------------------------
.. function:: void acb_dot_precise(acb_t res, const acb_t s, int subtract, acb_srcptr x, slong xstep, acb_srcptr y, slong ystep, slong len, slong prec)
.. function:: void acb_dot_simple(acb_t res, const acb_t s, int subtract, acb_srcptr x, slong xstep, acb_srcptr y, slong ystep, slong len, slong prec)
.. function:: void acb_dot(acb_t res, const acb_t s, int subtract, acb_srcptr x, slong xstep, acb_srcptr y, slong ystep, slong len, slong prec)
Computes the dot product of the vectors *x* and *y*, setting
*res* to `s + (-1)^{subtract} \sum_{i=0}^{len-1} x_i y_i`.
The initial term *s* is optional and can be
omitted by passing *NULL* (equivalently, `s = 0`).
The parameter *subtract* must be 0 or 1.
The length *len* is allowed to be negative, which is equivalent
to a length of zero.
The parameters *xstep* or *ystep* specify a step length for
traversing subsequences of the vectors *x* and *y*; either can be
negative to step in the reverse direction starting from
the initial pointer.
Aliasing is allowed between *res* and *s* but not between
*res* and the entries of *x* and *y*.
The default version determines the optimal precision for each term
and performs all internal calculations using mpn arithmetic
with minimal overhead. This is the preferred way to compute a
dot product; it is generally much faster and more precise
than a simple loop.
The *simple* version performs fused multiply-add operations in
a simple loop. This can be used for
testing purposes and is also used as a fallback by the
default version when the exponents are out of range
for the optimized code.
The *precise* version computes the dot product exactly up to the
final rounding. This can be extremely slow and is only intended
for testing.
Mathematical constants
-------------------------------------------------------------------------------

View file

@ -782,7 +782,7 @@ Arithmetic
Sets `z = x / (2^n - 1)`, rounded to *prec* bits.
Sum and dot product
Dot product
-------------------------------------------------------------------------------
.. function:: void arb_dot_precise(arb_t res, const arb_t s, int subtract, arb_srcptr x, slong xstep, arb_srcptr y, slong ystep, slong len, slong prec)