use arb_dot in arb_poly multiplication

This commit is contained in:
fredrik 2018-08-20 21:52:08 +02:00
parent a5aaa9f2a5
commit 10ebab1b70
3 changed files with 67 additions and 21 deletions

View file

@ -461,6 +461,7 @@ arb_dot(arb_t res, const arb_t initial, int subtract, arb_srcptr x, slong xstep,
mp_ptr tmp, sum; /* Workspace */
ARF_ADD_TMP_DECL;
/* todo: fast fma and fmma (len=2) code */
if (len <= 1)
{
if (initial == NULL)

View file

@ -11,8 +11,6 @@
#include "arb_poly.h"
#define BLOCK_CUTOFF 16
void
_arb_poly_mullow(arb_ptr res,
arb_srcptr poly1, slong len1,
@ -22,9 +20,34 @@ _arb_poly_mullow(arb_ptr res,
{
arb_mul(res, poly1, poly2, prec);
}
else if (n <= 8 || len1 <= 8 || len2 <= 8)
{
_arb_poly_mullow_classical(res, poly1, len1, poly2, len2, n, prec);
}
else
{
if (n < BLOCK_CUTOFF || len1 < BLOCK_CUTOFF || len2 < BLOCK_CUTOFF)
slong m, cutoff;
len1 = FLINT_MIN(len1, n);
len2 = FLINT_MIN(len2, n);
m = FLINT_MAX(len1, len2);
m = FLINT_MAX(m, n);
if (prec <= 128) cutoff = 100;
else if (prec <= 192) cutoff = 55;
else if (prec <= 256) cutoff = 70;
else if (prec <= 512) cutoff = 50;
else if (prec <= 1024) cutoff = 35;
else if (prec <= 2048) cutoff = 25;
else if (prec <= 4096) cutoff = 35;
else if (prec <= 8192) cutoff = 25;
else if (prec <= 16384) cutoff = 20;
else cutoff = 15;
if (poly1 == poly2 && prec >= 256)
cutoff *= 1.25;
if (m < cutoff)
_arb_poly_mullow_classical(res, poly1, len1, poly2, len2, n, prec);
else
_arb_poly_mullow_block(res, poly1, len1, poly2, len2, n, prec);
@ -71,6 +94,8 @@ arb_poly_mullow(arb_poly_t res, const arb_poly_t poly1,
}
else
{
abort();
if (res == poly1 || res == poly2)
{
arb_t t;

View file

@ -27,35 +27,55 @@ _arb_poly_mullow_classical(arb_ptr res,
}
else if (poly1 == poly2 && len1 == len2)
{
slong i;
slong i, start, stop;
_arb_vec_scalar_mul(res, poly1, FLINT_MIN(len1, n), poly1, prec);
_arb_vec_scalar_mul(res + len1, poly1 + 1, n - len1, poly1 + len1 - 1, prec);
arb_sqr(res, poly1, prec);
arb_mul(res + 1, poly1, poly1 + 1, prec);
arb_mul_2exp_si(res + 1, res + 1, 1);
for (i = 1; i < len1 - 1; i++)
_arb_vec_scalar_addmul(res + i + 1, poly1 + 1,
FLINT_MIN(i - 1, n - (i + 1)), poly1 + i, prec);
for (i = 2; i < FLINT_MIN(n, 2 * len1 - 3); i++)
{
start = FLINT_MAX(0, i - len1 + 1);
stop = FLINT_MIN(len1 - 1, (i + 1) / 2 - 1);
for (i = 1; i < FLINT_MIN(2 * len1 - 2, n); i++)
arb_dot(res + i, NULL, 0, poly1 + start, 1,
poly1 + i - start, -1, stop - start + 1, prec);
arb_mul_2exp_si(res + i, res + i, 1);
if (i % 2 == 0 && i / 2 < len1)
arb_addmul(res + i, poly1 + i / 2, poly1 + i / 2, prec);
}
for (i = 1; i < FLINT_MIN(len1 - 1, (n + 1) / 2); i++)
arb_addmul(res + 2 * i, poly1 + i, poly1 + i, prec);
if (len1 > 2 && n >= 2 * len1 - 2)
{
arb_mul(res + 2 * len1 - 3, poly1 + len1 - 1, poly1 + len1 - 2, prec);
arb_mul_2exp_si(res + 2 * len1 - 3, res + 2 * len1 - 3, 1);
}
if (n >= 2 * len1 - 1)
arb_sqr(res + 2 * len1 - 2, poly1 + len1 - 1, prec);
}
else if (len1 == 1)
{
_arb_vec_scalar_mul(res, poly2, n, poly1, prec);
}
else if (len2 == 1)
{
_arb_vec_scalar_mul(res, poly1, n, poly2, prec);
}
else
{
slong i;
slong i, top1, top2;
_arb_vec_scalar_mul(res, poly1, FLINT_MIN(len1, n), poly2, prec);
arb_mul(res, poly1, poly2, prec);
if (n > len1)
_arb_vec_scalar_mul(res + len1, poly2 + 1, n - len1,
poly1 + len1 - 1, prec);
for (i = 1; i < n; i++)
{
top1 = FLINT_MIN(len1 - 1, i);
top2 = FLINT_MIN(len2 - 1, i);
for (i = 0; i < FLINT_MIN(len1, n) - 1; i++)
_arb_vec_scalar_addmul(res + i + 1, poly2 + 1,
FLINT_MIN(len2, n - i) - 1,
poly1 + i, prec);
arb_dot(res + i, NULL, 0, poly1 + i - top2, 1,
poly2 + top2, -1, top1 + top2 - i + 1, prec);
}
}
}