mirror of
https://github.com/vale981/arb
synced 2025-03-05 09:21:38 -05:00
implement acb_dot and use in the acb_poly module
This commit is contained in:
parent
7739f884de
commit
ffa60bf319
19 changed files with 1528 additions and 116 deletions
8
acb.h
8
acb.h
|
@ -644,6 +644,14 @@ acb_submul_arb(acb_t z, const acb_t x, const arb_t y, slong prec)
|
|||
arb_submul(acb_imagref(z), acb_imagref(x), y, prec);
|
||||
}
|
||||
|
||||
void acb_dot_simple(acb_t res, const acb_t initial, int subtract,
|
||||
acb_srcptr x, slong xstep, acb_srcptr y, slong ystep, slong len, slong prec);
|
||||
void acb_dot_precise(acb_t res, const acb_t initial, int subtract,
|
||||
acb_srcptr x, slong xstep, acb_srcptr y, slong ystep, slong len, slong prec);
|
||||
void acb_dot(acb_t res, const acb_t initial, int subtract,
|
||||
acb_srcptr x, slong xstep, acb_srcptr y, slong ystep, slong len, slong prec);
|
||||
|
||||
|
||||
void acb_inv(acb_t z, const acb_t x, slong prec);
|
||||
|
||||
void acb_div(acb_t z, const acb_t x, const acb_t y, slong prec);
|
||||
|
|
50
acb/dot_precise.c
Normal file
50
acb/dot_precise.c
Normal file
|
@ -0,0 +1,50 @@
|
|||
/*
|
||||
Copyright (C) 2018 Fredrik Johansson
|
||||
|
||||
This file is part of Arb.
|
||||
|
||||
Arb is free software: you can redistribute it and/or modify it under
|
||||
the terms of the GNU Lesser General Public License (LGPL) as published
|
||||
by the Free Software Foundation; either version 2.1 of the License, or
|
||||
(at your option) any later version. See <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include "acb.h"
|
||||
|
||||
void
|
||||
acb_dot_precise(acb_t res, const acb_t initial, int subtract, acb_srcptr x, slong xstep,
|
||||
acb_srcptr y, slong ystep, slong len, slong prec)
|
||||
{
|
||||
arb_ptr tmp;
|
||||
slong i;
|
||||
|
||||
tmp = flint_malloc(sizeof(arb_struct) * (4 * len));
|
||||
|
||||
for (i = 0; i < len; i++)
|
||||
{
|
||||
tmp[0 * len + i] = *acb_realref(x + i * xstep);
|
||||
tmp[1 * len + i] = *acb_imagref(x + i * xstep);
|
||||
tmp[2 * len + i] = *acb_realref(y + i * ystep);
|
||||
arb_init(tmp + 3 * len + i);
|
||||
arb_neg(tmp + 3 * len + i, acb_imagref(y + i * ystep));
|
||||
}
|
||||
|
||||
arb_dot_precise(acb_realref(res), initial == NULL ? NULL : acb_realref(initial), subtract,
|
||||
tmp, 1, tmp + 2 * len, 1, 2 * len, prec);
|
||||
|
||||
for (i = 0; i < len; i++)
|
||||
arb_clear(tmp + 3 * len + i);
|
||||
|
||||
for (i = 0; i < len; i++)
|
||||
{
|
||||
tmp[0 * len + i] = *acb_realref(x + i * xstep);
|
||||
tmp[1 * len + i] = *acb_imagref(x + i * xstep);
|
||||
tmp[2 * len + i] = *acb_imagref(y + i * ystep);
|
||||
tmp[3 * len + i] = *acb_realref(y + i * ystep);
|
||||
}
|
||||
|
||||
arb_dot_precise(acb_imagref(res), initial == NULL ? NULL : acb_imagref(initial), subtract,
|
||||
tmp, 1, tmp + 2 * len, 1, 2 * len, prec);
|
||||
|
||||
flint_free(tmp);
|
||||
}
|
47
acb/dot_simple.c
Normal file
47
acb/dot_simple.c
Normal file
|
@ -0,0 +1,47 @@
|
|||
/*
|
||||
Copyright (C) 2018 Fredrik Johansson
|
||||
|
||||
This file is part of Arb.
|
||||
|
||||
Arb is free software: you can redistribute it and/or modify it under
|
||||
the terms of the GNU Lesser General Public License (LGPL) as published
|
||||
by the Free Software Foundation; either version 2.1 of the License, or
|
||||
(at your option) any later version. See <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include "acb.h"
|
||||
|
||||
void
|
||||
acb_dot_simple(acb_t res, const acb_t initial, int subtract,
|
||||
acb_srcptr x, slong xstep, acb_srcptr y, slong ystep, slong len, slong prec)
|
||||
{
|
||||
slong i;
|
||||
|
||||
if (len <= 0)
|
||||
{
|
||||
if (initial == NULL)
|
||||
acb_zero(res);
|
||||
else
|
||||
acb_set_round(res, initial, prec);
|
||||
return;
|
||||
}
|
||||
|
||||
if (initial == NULL)
|
||||
{
|
||||
acb_mul(res, x, y, prec);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (subtract)
|
||||
acb_neg(res, initial);
|
||||
else
|
||||
acb_set(res, initial);
|
||||
acb_addmul(res, x, y, prec);
|
||||
}
|
||||
|
||||
for (i = 1; i < len; i++)
|
||||
acb_addmul(res, x + i * xstep, y + i * ystep, prec);
|
||||
|
||||
if (subtract)
|
||||
acb_neg(res, res);
|
||||
}
|
200
acb/test/t-dot.c
Normal file
200
acb/test/t-dot.c
Normal file
|
@ -0,0 +1,200 @@
|
|||
/*
|
||||
Copyright (C) 2018 Fredrik Johansson
|
||||
|
||||
This file is part of Arb.
|
||||
|
||||
Arb is free software: you can redistribute it and/or modify it under
|
||||
the terms of the GNU Lesser General Public License (LGPL) as published
|
||||
by the Free Software Foundation; either version 2.1 of the License, or
|
||||
(at your option) any later version. See <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include "acb.h"
|
||||
|
||||
ARB_DLL extern slong acb_dot_gauss_dot_cutoff;
|
||||
|
||||
int main()
|
||||
{
|
||||
slong iter;
|
||||
flint_rand_t state;
|
||||
|
||||
flint_printf("dot....");
|
||||
fflush(stdout);
|
||||
|
||||
flint_randinit(state);
|
||||
|
||||
for (iter = 0; iter < 1000000 * arb_test_multiplier(); iter++)
|
||||
{
|
||||
acb_ptr x, y;
|
||||
acb_t s1, s2, z;
|
||||
slong i, len, prec, xbits, ybits, ebits;
|
||||
int ok, initial, subtract, revx, revy;
|
||||
|
||||
if (n_randint(state, 100) == 0)
|
||||
len = n_randint(state, 50);
|
||||
else if (n_randint(state, 10) == 0)
|
||||
len = n_randint(state, 5);
|
||||
else
|
||||
len = n_randint(state, 3);
|
||||
|
||||
acb_dot_gauss_dot_cutoff = 3 + n_randint(state, 30);
|
||||
|
||||
if (n_randint(state, 10) != 0 || len > 10)
|
||||
{
|
||||
prec = 2 + n_randint(state, 500);
|
||||
xbits = 2 + n_randint(state, 500);
|
||||
ybits = 2 + n_randint(state, 500);
|
||||
}
|
||||
else
|
||||
{
|
||||
prec = 2 + n_randint(state, 5000);
|
||||
xbits = 2 + n_randint(state, 5000);
|
||||
ybits = 2 + n_randint(state, 5000);
|
||||
}
|
||||
|
||||
if (n_randint(state, 100) == 0)
|
||||
ebits = 2 + n_randint(state, 100);
|
||||
else
|
||||
ebits = 2 + n_randint(state, 10);
|
||||
|
||||
initial = n_randint(state, 2);
|
||||
subtract = n_randint(state, 2);
|
||||
revx = n_randint(state, 2);
|
||||
revy = n_randint(state, 2);
|
||||
|
||||
x = _acb_vec_init(len);
|
||||
y = _acb_vec_init(len);
|
||||
acb_init(s1);
|
||||
acb_init(s2);
|
||||
acb_init(z);
|
||||
|
||||
switch (n_randint(state, 3))
|
||||
{
|
||||
case 0:
|
||||
for (i = 0; i < len; i++)
|
||||
{
|
||||
acb_randtest(x + i, state, xbits, ebits);
|
||||
acb_randtest(y + i, state, ybits, ebits);
|
||||
}
|
||||
break;
|
||||
|
||||
/* Test with cancellation */
|
||||
case 1:
|
||||
for (i = 0; i < len; i++)
|
||||
{
|
||||
if (i <= len / 2)
|
||||
{
|
||||
acb_randtest(x + i, state, xbits, ebits);
|
||||
acb_randtest(y + i, state, ybits, ebits);
|
||||
}
|
||||
else
|
||||
{
|
||||
acb_neg(x + i, x + len - i - 1);
|
||||
acb_set(y + i, y + len - i - 1);
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
for (i = 0; i < len; i++)
|
||||
{
|
||||
if (i <= len / 2)
|
||||
{
|
||||
acb_randtest(x + i, state, xbits, ebits);
|
||||
acb_randtest(y + i, state, ybits, ebits);
|
||||
}
|
||||
else
|
||||
{
|
||||
acb_neg_round(x + i, x + len - i - 1, 2 + n_randint(state, 500));
|
||||
acb_set_round(y + i, y + len - i - 1, 2 + n_randint(state, 500));
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
acb_randtest(s1, state, 200, 100);
|
||||
acb_randtest(s2, state, 200, 100);
|
||||
acb_randtest(z, state, xbits, ebits);
|
||||
|
||||
acb_dot(s1, initial ? z : NULL, subtract,
|
||||
revx ? (x + len - 1) : x, revx ? -1 : 1,
|
||||
revy ? (y + len - 1) : y, revy ? -1 : 1,
|
||||
len, prec);
|
||||
|
||||
acb_dot_precise(s2, initial ? z : NULL, subtract,
|
||||
revx ? (x + len - 1) : x, revx ? -1 : 1,
|
||||
revy ? (y + len - 1) : y, revy ? -1 : 1,
|
||||
len, ebits <= 12 ? ARF_PREC_EXACT : 2 * prec + 100);
|
||||
|
||||
if (ebits <= 12)
|
||||
ok = acb_contains(s1, s2);
|
||||
else
|
||||
ok = acb_overlaps(s1, s2);
|
||||
|
||||
if (!ok)
|
||||
{
|
||||
flint_printf("FAIL\n\n");
|
||||
flint_printf("iter = %wd, len = %wd, prec = %wd, ebits = %wd, subtract = %d\n\n", iter, len, prec, ebits, subtract);
|
||||
|
||||
if (initial)
|
||||
{
|
||||
flint_printf("z = ", i); acb_printn(z, 100, ARB_STR_MORE); flint_printf(" (%wd)\n\n", acb_bits(z));
|
||||
}
|
||||
|
||||
for (i = 0; i < len; i++)
|
||||
{
|
||||
flint_printf("x[%wd] = ", i); acb_printn(x + i, 100, ARB_STR_MORE); flint_printf(" (%wd)\n", acb_bits(x + i));
|
||||
flint_printf("y[%wd] = ", i); acb_printn(y + i, 100, ARB_STR_MORE); flint_printf(" (%wd)\n", acb_bits(y + i));
|
||||
}
|
||||
flint_printf("\n\n");
|
||||
flint_printf("s1 = "); acb_printn(s1, 100, ARB_STR_MORE); flint_printf("\n\n");
|
||||
flint_printf("s2 = "); acb_printn(s2, 100, ARB_STR_MORE); flint_printf("\n\n");
|
||||
flint_abort();
|
||||
}
|
||||
|
||||
/* With the fast algorithm, we expect identical results when
|
||||
reversing the vectors. */
|
||||
if (ebits <= 12)
|
||||
{
|
||||
revx ^= 1;
|
||||
revy ^= 1;
|
||||
|
||||
acb_dot(s2, initial ? z : NULL, subtract,
|
||||
revx ? (x + len - 1) : x, revx ? -1 : 1,
|
||||
revy ? (y + len - 1) : y, revy ? -1 : 1,
|
||||
len, prec);
|
||||
|
||||
if (!acb_equal(s1, s2))
|
||||
{
|
||||
flint_printf("FAIL (reversal)\n\n");
|
||||
flint_printf("iter = %wd, len = %wd, prec = %wd, ebits = %wd\n\n", iter, len, prec, ebits);
|
||||
|
||||
if (initial)
|
||||
{
|
||||
flint_printf("z = ", i); acb_printn(z, 100, ARB_STR_MORE); flint_printf(" (%wd)\n\n", acb_bits(z));
|
||||
}
|
||||
|
||||
for (i = 0; i < len; i++)
|
||||
{
|
||||
flint_printf("x[%wd] = ", i); acb_printn(x + i, 100, ARB_STR_MORE); flint_printf(" (%wd)\n", acb_bits(x + i));
|
||||
flint_printf("y[%wd] = ", i); acb_printn(y + i, 100, ARB_STR_MORE); flint_printf(" (%wd)\n", acb_bits(y + i));
|
||||
}
|
||||
flint_printf("\n\n");
|
||||
flint_printf("s1 = "); acb_printn(s1, 100, ARB_STR_MORE); flint_printf("\n\n");
|
||||
flint_printf("s2 = "); acb_printn(s2, 100, ARB_STR_MORE); flint_printf("\n\n");
|
||||
flint_abort();
|
||||
}
|
||||
}
|
||||
|
||||
acb_clear(s1);
|
||||
acb_clear(s2);
|
||||
acb_clear(z);
|
||||
_acb_vec_clear(x, len);
|
||||
_acb_vec_clear(y, len);
|
||||
}
|
||||
|
||||
flint_randclear(state);
|
||||
flint_cleanup();
|
||||
flint_printf("PASS\n");
|
||||
return EXIT_SUCCESS;
|
||||
}
|
|
@ -44,7 +44,7 @@ _acb_poly_div_series(acb_ptr Q, acb_srcptr A, slong Alen,
|
|||
{
|
||||
/* The basecase algorithm is faster for much larger Blen and n than
|
||||
this, but unfortunately has worse numerical stability. */
|
||||
slong i, j;
|
||||
slong i;
|
||||
acb_t q;
|
||||
|
||||
acb_init(q);
|
||||
|
@ -54,16 +54,8 @@ _acb_poly_div_series(acb_ptr Q, acb_srcptr A, slong Alen,
|
|||
|
||||
for (i = 1; i < n; i++)
|
||||
{
|
||||
acb_mul(Q + i, B + 1, Q + i - 1, prec);
|
||||
|
||||
for (j = 2; j < FLINT_MIN(i + 1, Blen); j++)
|
||||
acb_addmul(Q + i, B + j, Q + i - j, prec);
|
||||
|
||||
if (i < Alen)
|
||||
acb_sub(Q + i, A + i, Q + i, prec);
|
||||
else
|
||||
acb_neg(Q + i, Q + i);
|
||||
|
||||
acb_dot(Q + i, (i < Alen) ? A + i : NULL, 1,
|
||||
B + 1, 1, Q + i - 1, -1, FLINT_MIN(i, Blen - 1), prec);
|
||||
if (!acb_is_one(q))
|
||||
acb_mul(Q + i, Q + i, q, prec);
|
||||
}
|
||||
|
|
|
@ -12,7 +12,7 @@
|
|||
#include "acb_poly.h"
|
||||
|
||||
/* allow changing this from the test code */
|
||||
ARB_DLL slong acb_poly_newton_exp_cutoff = 120;
|
||||
ARB_DLL slong acb_poly_newton_exp_cutoff = 0;
|
||||
|
||||
/* with inverse=1 simultaneously computes g = exp(-x) to length n
|
||||
with inverse=0 uses g as scratch space, computing
|
||||
|
@ -112,39 +112,51 @@ _acb_poly_exp_series(acb_ptr f, acb_srcptr h, slong hlen, slong n, slong prec)
|
|||
_acb_vec_zero(f + j - d + 1, n - (j - d + 1));
|
||||
acb_clear(t);
|
||||
}
|
||||
else if (hlen <= acb_poly_newton_exp_cutoff)
|
||||
{
|
||||
_acb_poly_exp_series_basecase(f, h, hlen, n, prec);
|
||||
}
|
||||
else
|
||||
{
|
||||
acb_ptr g, t;
|
||||
acb_t u;
|
||||
int fix;
|
||||
slong cutoff;
|
||||
|
||||
g = _acb_vec_init((n + 1) / 2);
|
||||
fix = (hlen < n || h == f || !acb_is_zero(h));
|
||||
if (acb_poly_newton_exp_cutoff != 0)
|
||||
cutoff = acb_poly_newton_exp_cutoff;
|
||||
else if (prec <= 256)
|
||||
cutoff = 750;
|
||||
else
|
||||
cutoff = 1e5 / pow(log(prec), 3);
|
||||
|
||||
if (fix)
|
||||
if (hlen <= cutoff)
|
||||
{
|
||||
t = _acb_vec_init(n);
|
||||
_acb_vec_set(t + 1, h + 1, hlen - 1);
|
||||
_acb_poly_exp_series_basecase(f, h, hlen, n, prec);
|
||||
}
|
||||
else
|
||||
t = (acb_ptr) h;
|
||||
{
|
||||
acb_ptr g, t;
|
||||
acb_t u;
|
||||
int fix;
|
||||
|
||||
acb_init(u);
|
||||
acb_exp(u, h, prec);
|
||||
g = _acb_vec_init((n + 1) / 2);
|
||||
fix = (hlen < n || h == f || !acb_is_zero(h));
|
||||
|
||||
_acb_poly_exp_series_newton(f, g, t, n, prec, 0, acb_poly_newton_exp_cutoff);
|
||||
if (fix)
|
||||
{
|
||||
t = _acb_vec_init(n);
|
||||
_acb_vec_set(t + 1, h + 1, hlen - 1);
|
||||
}
|
||||
else
|
||||
t = (acb_ptr) h;
|
||||
|
||||
if (!acb_is_one(u))
|
||||
_acb_vec_scalar_mul(f, f, n, u, prec);
|
||||
acb_init(u);
|
||||
acb_exp(u, h, prec);
|
||||
|
||||
_acb_vec_clear(g, (n + 1) / 2);
|
||||
if (fix)
|
||||
_acb_vec_clear(t, n);
|
||||
acb_clear(u);
|
||||
_acb_poly_exp_series_newton(f, g, t, n, prec, 0, cutoff);
|
||||
|
||||
if (!acb_is_one(u))
|
||||
_acb_vec_scalar_mul(f, f, n, u, prec);
|
||||
|
||||
_acb_vec_clear(g, (n + 1) / 2);
|
||||
if (fix)
|
||||
_acb_vec_clear(t, n);
|
||||
acb_clear(u);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -11,13 +11,11 @@
|
|||
|
||||
#include "acb_poly.h"
|
||||
|
||||
#define MUL_CUTOFF 24
|
||||
|
||||
static void
|
||||
_acb_poly_exp_series_basecase_rec(acb_ptr f, acb_ptr a,
|
||||
acb_srcptr h, slong hlen, slong n, slong prec)
|
||||
{
|
||||
slong j, k;
|
||||
slong k;
|
||||
|
||||
acb_t s;
|
||||
acb_init(s);
|
||||
|
@ -29,10 +27,7 @@ _acb_poly_exp_series_basecase_rec(acb_ptr f, acb_ptr a,
|
|||
|
||||
for (k = 1; k < n; k++)
|
||||
{
|
||||
acb_zero(s);
|
||||
for (j = 1; j < FLINT_MIN(k + 1, hlen); j++)
|
||||
acb_addmul(s, a + j, f + k - j, prec);
|
||||
|
||||
acb_dot(s, NULL, 0, a + 1, 1, f + k - 1, -1, FLINT_MIN(k, hlen - 1), prec);
|
||||
acb_div_ui(f + k, s, k, prec);
|
||||
}
|
||||
|
||||
|
@ -45,7 +40,7 @@ _acb_poly_exp_series_basecase(acb_ptr f,
|
|||
{
|
||||
hlen = FLINT_MIN(n, hlen);
|
||||
|
||||
if (n < MUL_CUTOFF || hlen < 0.9 * n)
|
||||
if (n < 20 || hlen < 0.9 * n || prec <= 2 * FLINT_BITS || n < 1000.0 / log(prec + 10) - 70)
|
||||
{
|
||||
acb_ptr t = _acb_vec_init(hlen);
|
||||
_acb_poly_exp_series_basecase_rec(f, t, h, hlen, n, prec);
|
||||
|
|
|
@ -37,7 +37,7 @@ _acb_poly_inv_series(acb_ptr Qinv,
|
|||
}
|
||||
else
|
||||
{
|
||||
slong i, j, blen;
|
||||
slong i, blen;
|
||||
|
||||
/* The basecase algorithm is faster for much larger Qlen or len than
|
||||
this, but unfortunately also much less numerically stable. */
|
||||
|
@ -48,15 +48,10 @@ _acb_poly_inv_series(acb_ptr Qinv,
|
|||
|
||||
for (i = 1; i < blen; i++)
|
||||
{
|
||||
acb_mul(Qinv + i, Q + 1, Qinv + i - 1, prec);
|
||||
|
||||
for (j = 2; j < FLINT_MIN(i + 1, Qlen); j++)
|
||||
acb_addmul(Qinv + i, Q + j, Qinv + i - j, prec);
|
||||
|
||||
acb_dot(Qinv + i, NULL, 1,
|
||||
Q + 1, 1, Qinv + i - 1, -1, FLINT_MIN(i, Qlen - 1), prec);
|
||||
if (!acb_is_one(Qinv))
|
||||
acb_mul(Qinv + i, Qinv + i, Qinv, prec);
|
||||
|
||||
acb_neg(Qinv + i, Qinv + i);
|
||||
}
|
||||
|
||||
if (len > blen)
|
||||
|
@ -116,4 +111,3 @@ acb_poly_inv_series(acb_poly_t Qinv, const acb_poly_t Q, slong n, slong prec)
|
|||
_acb_poly_set_length(Qinv, n);
|
||||
_acb_poly_normalise(Qinv);
|
||||
}
|
||||
|
||||
|
|
|
@ -11,17 +11,46 @@
|
|||
|
||||
#include "acb_poly.h"
|
||||
|
||||
#define CUTOFF 4
|
||||
|
||||
void
|
||||
_acb_poly_mullow(acb_ptr res,
|
||||
acb_srcptr poly1, slong len1,
|
||||
acb_srcptr poly2, slong len2, slong n, slong prec)
|
||||
{
|
||||
if (n < CUTOFF || len1 < CUTOFF || len2 < CUTOFF)
|
||||
if (n == 1)
|
||||
{
|
||||
acb_mul(res, poly1, poly2, prec);
|
||||
}
|
||||
else if (n <= 7 || len1 <= 7 || len2 <= 7)
|
||||
{
|
||||
_acb_poly_mullow_classical(res, poly1, len1, poly2, len2, n, prec);
|
||||
}
|
||||
else
|
||||
_acb_poly_mullow_transpose(res, poly1, len1, poly2, len2, n, prec);
|
||||
{
|
||||
slong cutoff;
|
||||
double p;
|
||||
|
||||
if (prec <= 2 * FLINT_BITS)
|
||||
{
|
||||
cutoff = 110;
|
||||
}
|
||||
else
|
||||
{
|
||||
p = log(prec);
|
||||
|
||||
cutoff = 10000.0 / (p * p * p);
|
||||
cutoff = FLINT_MIN(cutoff, 60);
|
||||
if (poly1 == poly2 && prec >= 256)
|
||||
cutoff *= 1.25;
|
||||
if (poly1 == poly2 && prec >= 4096)
|
||||
cutoff *= 1.25;
|
||||
cutoff = FLINT_MAX(cutoff, 8);
|
||||
}
|
||||
|
||||
if (2 * FLINT_MIN(len1, len2) <= cutoff || n <= cutoff)
|
||||
_acb_poly_mullow_classical(res, poly1, len1, poly2, len2, n, prec);
|
||||
else
|
||||
_acb_poly_mullow_transpose(res, poly1, len1, poly2, len2, n, prec);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
|
|
|
@ -26,35 +26,55 @@ _acb_poly_mullow_classical(acb_ptr res,
|
|||
}
|
||||
else if (poly1 == poly2 && len1 == len2)
|
||||
{
|
||||
slong i;
|
||||
slong i, start, stop;
|
||||
|
||||
_acb_vec_scalar_mul(res, poly1, FLINT_MIN(len1, n), poly1, prec);
|
||||
_acb_vec_scalar_mul(res + len1, poly1 + 1, n - len1, poly1 + len1 - 1, prec);
|
||||
acb_sqr(res, poly1, prec);
|
||||
acb_mul(res + 1, poly1, poly1 + 1, prec);
|
||||
acb_mul_2exp_si(res + 1, res + 1, 1);
|
||||
|
||||
for (i = 1; i < len1 - 1; i++)
|
||||
_acb_vec_scalar_addmul(res + i + 1, poly1 + 1,
|
||||
FLINT_MIN(i - 1, n - (i + 1)), poly1 + i, prec);
|
||||
for (i = 2; i < FLINT_MIN(n, 2 * len1 - 3); i++)
|
||||
{
|
||||
start = FLINT_MAX(0, i - len1 + 1);
|
||||
stop = FLINT_MIN(len1 - 1, (i + 1) / 2 - 1);
|
||||
|
||||
for (i = 1; i < FLINT_MIN(2 * len1 - 2, n); i++)
|
||||
acb_dot(res + i, NULL, 0, poly1 + start, 1,
|
||||
poly1 + i - start, -1, stop - start + 1, prec);
|
||||
acb_mul_2exp_si(res + i, res + i, 1);
|
||||
if (i % 2 == 0 && i / 2 < len1)
|
||||
acb_addmul(res + i, poly1 + i / 2, poly1 + i / 2, prec);
|
||||
}
|
||||
|
||||
for (i = 1; i < FLINT_MIN(len1 - 1, (n + 1) / 2); i++)
|
||||
acb_addmul(res + 2 * i, poly1 + i, poly1 + i, prec);
|
||||
if (len1 > 2 && n >= 2 * len1 - 2)
|
||||
{
|
||||
acb_mul(res + 2 * len1 - 3, poly1 + len1 - 1, poly1 + len1 - 2, prec);
|
||||
acb_mul_2exp_si(res + 2 * len1 - 3, res + 2 * len1 - 3, 1);
|
||||
}
|
||||
|
||||
if (n >= 2 * len1 - 1)
|
||||
acb_sqr(res + 2 * len1 - 2, poly1 + len1 - 1, prec);
|
||||
}
|
||||
else if (len1 == 1)
|
||||
{
|
||||
_acb_vec_scalar_mul(res, poly2, n, poly1, prec);
|
||||
}
|
||||
else if (len2 == 1)
|
||||
{
|
||||
_acb_vec_scalar_mul(res, poly1, n, poly2, prec);
|
||||
}
|
||||
else
|
||||
{
|
||||
slong i;
|
||||
slong i, top1, top2;
|
||||
|
||||
_acb_vec_scalar_mul(res, poly1, FLINT_MIN(len1, n), poly2, prec);
|
||||
acb_mul(res, poly1, poly2, prec);
|
||||
|
||||
if (n > len1)
|
||||
_acb_vec_scalar_mul(res + len1, poly2 + 1, n - len1,
|
||||
poly1 + len1 - 1, prec);
|
||||
for (i = 1; i < n; i++)
|
||||
{
|
||||
top1 = FLINT_MIN(len1 - 1, i);
|
||||
top2 = FLINT_MIN(len2 - 1, i);
|
||||
|
||||
for (i = 0; i < FLINT_MIN(len1, n) - 1; i++)
|
||||
_acb_vec_scalar_addmul(res + i + 1, poly2 + 1,
|
||||
FLINT_MIN(len2, n - i) - 1,
|
||||
poly1 + i, prec);
|
||||
acb_dot(res + i, NULL, 0, poly1 + i - top2, 1,
|
||||
poly2 + top2, -1, top1 + top2 - i + 1, prec);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -94,4 +114,3 @@ acb_poly_mullow_classical(acb_poly_t res, const acb_poly_t poly1,
|
|||
_acb_poly_set_length(res, n);
|
||||
_acb_poly_normalise(res);
|
||||
}
|
||||
|
||||
|
|
|
@ -11,10 +11,8 @@
|
|||
|
||||
#include "acb_poly.h"
|
||||
|
||||
#define TANGENT_CUTOFF 80
|
||||
|
||||
void
|
||||
_acb_poly_sin_cos_pi_series(acb_ptr s, acb_ptr c, const acb_srcptr h, slong hlen, slong n, slong prec)
|
||||
_acb_poly_sin_cos_pi_series(acb_ptr s, acb_ptr c, acb_srcptr h, slong hlen, slong n, slong prec)
|
||||
{
|
||||
hlen = FLINT_MIN(hlen, n);
|
||||
|
||||
|
@ -36,10 +34,25 @@ _acb_poly_sin_cos_pi_series(acb_ptr s, acb_ptr c, const acb_srcptr h, slong hlen
|
|||
acb_mul(c + 1, s, t, prec);
|
||||
acb_clear(t);
|
||||
}
|
||||
else if (hlen < TANGENT_CUTOFF)
|
||||
_acb_poly_sin_cos_series_basecase(s, c, h, hlen, n, prec, 1);
|
||||
else
|
||||
_acb_poly_sin_cos_series_tangent(s, c, h, hlen, n, prec, 1);
|
||||
{
|
||||
slong cutoff;
|
||||
|
||||
if (prec <= 128)
|
||||
{
|
||||
cutoff = 1400;
|
||||
}
|
||||
else
|
||||
{
|
||||
cutoff = 100000 / pow(log(prec), 3);
|
||||
cutoff = FLINT_MIN(cutoff, 700);
|
||||
}
|
||||
|
||||
if (hlen < cutoff)
|
||||
_acb_poly_sin_cos_series_basecase(s, c, h, hlen, n, prec, 1);
|
||||
else
|
||||
_acb_poly_sin_cos_series_tangent(s, c, h, hlen, n, prec, 1);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
|
@ -73,4 +86,3 @@ acb_poly_sin_cos_pi_series(acb_poly_t s, acb_poly_t c,
|
|||
_acb_poly_set_length(c, n);
|
||||
_acb_poly_normalise(c);
|
||||
}
|
||||
|
||||
|
|
|
@ -11,10 +11,8 @@
|
|||
|
||||
#include "acb_poly.h"
|
||||
|
||||
#define TANGENT_CUTOFF 80
|
||||
|
||||
void
|
||||
_acb_poly_sin_cos_series(acb_ptr s, acb_ptr c, const acb_srcptr h, slong hlen, slong n, slong prec)
|
||||
_acb_poly_sin_cos_series(acb_ptr s, acb_ptr c, acb_srcptr h, slong hlen, slong n, slong prec)
|
||||
{
|
||||
hlen = FLINT_MIN(hlen, n);
|
||||
|
||||
|
@ -35,10 +33,25 @@ _acb_poly_sin_cos_series(acb_ptr s, acb_ptr c, const acb_srcptr h, slong hlen, s
|
|||
acb_mul(c + 1, s, t, prec);
|
||||
acb_clear(t);
|
||||
}
|
||||
else if (hlen < TANGENT_CUTOFF)
|
||||
_acb_poly_sin_cos_series_basecase(s, c, h, hlen, n, prec, 0);
|
||||
else
|
||||
_acb_poly_sin_cos_series_tangent(s, c, h, hlen, n, prec, 0);
|
||||
{
|
||||
slong cutoff;
|
||||
|
||||
if (prec <= 128)
|
||||
{
|
||||
cutoff = 1400;
|
||||
}
|
||||
else
|
||||
{
|
||||
cutoff = 100000 / pow(log(prec), 3);
|
||||
cutoff = FLINT_MIN(cutoff, 700);
|
||||
}
|
||||
|
||||
if (hlen < cutoff)
|
||||
_acb_poly_sin_cos_series_basecase(s, c, h, hlen, n, prec, 0);
|
||||
else
|
||||
_acb_poly_sin_cos_series_tangent(s, c, h, hlen, n, prec, 0);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
|
|
|
@ -15,7 +15,7 @@ void
|
|||
_acb_poly_sin_cos_series_basecase(acb_ptr s, acb_ptr c, acb_srcptr h, slong hlen,
|
||||
slong n, slong prec, int times_pi)
|
||||
{
|
||||
slong j, k, alen = FLINT_MIN(n, hlen);
|
||||
slong k, alen = FLINT_MIN(n, hlen);
|
||||
acb_ptr a;
|
||||
acb_t t, u;
|
||||
|
||||
|
@ -46,15 +46,8 @@ _acb_poly_sin_cos_series_basecase(acb_ptr s, acb_ptr c, acb_srcptr h, slong hlen
|
|||
|
||||
for (k = 1; k < n; k++)
|
||||
{
|
||||
acb_zero(t);
|
||||
acb_zero(u);
|
||||
|
||||
for (j = 1; j < FLINT_MIN(k + 1, hlen); j++)
|
||||
{
|
||||
acb_submul(t, a + j, s + k - j, prec);
|
||||
acb_addmul(u, a + j, c + k - j, prec);
|
||||
}
|
||||
|
||||
acb_dot(t, NULL, 1, a + 1, 1, s + k - 1, -1, FLINT_MIN(k, hlen - 1), prec);
|
||||
acb_dot(u, NULL, 0, a + 1, 1, c + k - 1, -1, FLINT_MIN(k, hlen - 1), prec);
|
||||
acb_div_ui(c + k, t, k, prec);
|
||||
acb_div_ui(s + k, u, k, prec);
|
||||
}
|
||||
|
@ -92,4 +85,3 @@ acb_poly_sin_cos_series_basecase(acb_poly_t s, acb_poly_t c,
|
|||
_acb_poly_set_length(c, n);
|
||||
_acb_poly_normalise(c);
|
||||
}
|
||||
|
||||
|
|
|
@ -12,7 +12,7 @@
|
|||
#include "acb_poly.h"
|
||||
|
||||
void
|
||||
_acb_poly_sinh_cosh_series(acb_ptr s, acb_ptr c, const acb_srcptr h, slong hlen, slong n, slong prec)
|
||||
_acb_poly_sinh_cosh_series(acb_ptr s, acb_ptr c, acb_srcptr h, slong hlen, slong n, slong prec)
|
||||
{
|
||||
hlen = FLINT_MIN(hlen, n);
|
||||
|
||||
|
@ -32,10 +32,20 @@ _acb_poly_sinh_cosh_series(acb_ptr s, acb_ptr c, const acb_srcptr h, slong hlen,
|
|||
acb_mul(c + 1, s, t, prec);
|
||||
acb_clear(t);
|
||||
}
|
||||
else if (hlen < 60 || n < 120)
|
||||
_acb_poly_sinh_cosh_series_basecase(s, c, h, hlen, n, prec);
|
||||
else
|
||||
_acb_poly_sinh_cosh_series_exponential(s, c, h, hlen, n, prec);
|
||||
{
|
||||
slong cutoff;
|
||||
|
||||
if (prec <= 128)
|
||||
cutoff = 400;
|
||||
else
|
||||
cutoff = 30000 / pow(log(prec), 3);
|
||||
|
||||
if (hlen < cutoff)
|
||||
_acb_poly_sinh_cosh_series_basecase(s, c, h, hlen, n, prec);
|
||||
else
|
||||
_acb_poly_sinh_cosh_series_exponential(s, c, h, hlen, n, prec);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
|
@ -69,4 +79,3 @@ acb_poly_sinh_cosh_series(acb_poly_t s, acb_poly_t c,
|
|||
_acb_poly_set_length(c, n);
|
||||
_acb_poly_normalise(c);
|
||||
}
|
||||
|
||||
|
|
|
@ -15,7 +15,7 @@ void
|
|||
_acb_poly_sinh_cosh_series_basecase(acb_ptr s, acb_ptr c, acb_srcptr h, slong hlen,
|
||||
slong n, slong prec)
|
||||
{
|
||||
slong j, k, alen = FLINT_MIN(n, hlen);
|
||||
slong k, alen = FLINT_MIN(n, hlen);
|
||||
acb_ptr a;
|
||||
acb_t t, u;
|
||||
|
||||
|
@ -37,15 +37,8 @@ _acb_poly_sinh_cosh_series_basecase(acb_ptr s, acb_ptr c, acb_srcptr h, slong hl
|
|||
|
||||
for (k = 1; k < n; k++)
|
||||
{
|
||||
acb_zero(t);
|
||||
acb_zero(u);
|
||||
|
||||
for (j = 1; j < FLINT_MIN(k + 1, hlen); j++)
|
||||
{
|
||||
acb_addmul(t, a + j, s + k - j, prec);
|
||||
acb_addmul(u, a + j, c + k - j, prec);
|
||||
}
|
||||
|
||||
acb_dot(t, NULL, 0, a + 1, 1, s + k - 1, -1, FLINT_MIN(k, hlen - 1), prec);
|
||||
acb_dot(u, NULL, 0, a + 1, 1, c + k - 1, -1, FLINT_MIN(k, hlen - 1), prec);
|
||||
acb_div_ui(c + k, t, k, prec);
|
||||
acb_div_ui(s + k, u, k, prec);
|
||||
}
|
||||
|
@ -83,4 +76,3 @@ acb_poly_sinh_cosh_series_basecase(acb_poly_t s, acb_poly_t c,
|
|||
_acb_poly_set_length(c, n);
|
||||
_acb_poly_normalise(c);
|
||||
}
|
||||
|
||||
|
|
|
@ -35,7 +35,7 @@ int main()
|
|||
else
|
||||
len = n_randint(state, 3);
|
||||
|
||||
if (n_randint(state, 100) == 0 || len > 10)
|
||||
if (n_randint(state, 10) != 0 || len > 10)
|
||||
{
|
||||
prec = 2 + n_randint(state, 500);
|
||||
xbits = 2 + n_randint(state, 500);
|
||||
|
|
|
@ -499,6 +499,46 @@ Arithmetic
|
|||
|
||||
Sets *z* to the quotient of *x* and *y*.
|
||||
|
||||
Dot product
|
||||
-------------------------------------------------------------------------------
|
||||
|
||||
.. function:: void acb_dot_precise(acb_t res, const acb_t s, int subtract, acb_srcptr x, slong xstep, acb_srcptr y, slong ystep, slong len, slong prec)
|
||||
|
||||
.. function:: void acb_dot_simple(acb_t res, const acb_t s, int subtract, acb_srcptr x, slong xstep, acb_srcptr y, slong ystep, slong len, slong prec)
|
||||
|
||||
.. function:: void acb_dot(acb_t res, const acb_t s, int subtract, acb_srcptr x, slong xstep, acb_srcptr y, slong ystep, slong len, slong prec)
|
||||
|
||||
Computes the dot product of the vectors *x* and *y*, setting
|
||||
*res* to `s + (-1)^{subtract} \sum_{i=0}^{len-1} x_i y_i`.
|
||||
|
||||
The initial term *s* is optional and can be
|
||||
omitted by passing *NULL* (equivalently, `s = 0`).
|
||||
The parameter *subtract* must be 0 or 1.
|
||||
The length *len* is allowed to be negative, which is equivalent
|
||||
to a length of zero.
|
||||
The parameters *xstep* or *ystep* specify a step length for
|
||||
traversing subsequences of the vectors *x* and *y*; either can be
|
||||
negative to step in the reverse direction starting from
|
||||
the initial pointer.
|
||||
Aliasing is allowed between *res* and *s* but not between
|
||||
*res* and the entries of *x* and *y*.
|
||||
|
||||
The default version determines the optimal precision for each term
|
||||
and performs all internal calculations using mpn arithmetic
|
||||
with minimal overhead. This is the preferred way to compute a
|
||||
dot product; it is generally much faster and more precise
|
||||
than a simple loop.
|
||||
|
||||
The *simple* version performs fused multiply-add operations in
|
||||
a simple loop. This can be used for
|
||||
testing purposes and is also used as a fallback by the
|
||||
default version when the exponents are out of range
|
||||
for the optimized code.
|
||||
|
||||
The *precise* version computes the dot product exactly up to the
|
||||
final rounding. This can be extremely slow and is only intended
|
||||
for testing.
|
||||
|
||||
Mathematical constants
|
||||
-------------------------------------------------------------------------------
|
||||
|
||||
|
|
|
@ -782,7 +782,7 @@ Arithmetic
|
|||
|
||||
Sets `z = x / (2^n - 1)`, rounded to *prec* bits.
|
||||
|
||||
Sum and dot product
|
||||
Dot product
|
||||
-------------------------------------------------------------------------------
|
||||
|
||||
.. function:: void arb_dot_precise(arb_t res, const arb_t s, int subtract, arb_srcptr x, slong xstep, arb_srcptr y, slong ystep, slong len, slong prec)
|
||||
|
|
Loading…
Add table
Reference in a new issue