From 10ebab1b7026189e239785d5876605c57077acce Mon Sep 17 00:00:00 2001 From: fredrik Date: Mon, 20 Aug 2018 21:52:08 +0200 Subject: [PATCH] use arb_dot in arb_poly multiplication --- arb/dot.c | 1 + arb_poly/mullow.c | 31 ++++++++++++++++++-- arb_poly/mullow_classical.c | 56 +++++++++++++++++++++++++------------ 3 files changed, 67 insertions(+), 21 deletions(-) diff --git a/arb/dot.c b/arb/dot.c index 15f33d83..04598f3a 100644 --- a/arb/dot.c +++ b/arb/dot.c @@ -461,6 +461,7 @@ arb_dot(arb_t res, const arb_t initial, int subtract, arb_srcptr x, slong xstep, mp_ptr tmp, sum; /* Workspace */ ARF_ADD_TMP_DECL; + /* todo: fast fma and fmma (len=2) code */ if (len <= 1) { if (initial == NULL) diff --git a/arb_poly/mullow.c b/arb_poly/mullow.c index e981d3d8..8f596838 100644 --- a/arb_poly/mullow.c +++ b/arb_poly/mullow.c @@ -11,8 +11,6 @@ #include "arb_poly.h" -#define BLOCK_CUTOFF 16 - void _arb_poly_mullow(arb_ptr res, arb_srcptr poly1, slong len1, @@ -22,9 +20,34 @@ _arb_poly_mullow(arb_ptr res, { arb_mul(res, poly1, poly2, prec); } + else if (n <= 8 || len1 <= 8 || len2 <= 8) + { + _arb_poly_mullow_classical(res, poly1, len1, poly2, len2, n, prec); + } else { - if (n < BLOCK_CUTOFF || len1 < BLOCK_CUTOFF || len2 < BLOCK_CUTOFF) + slong m, cutoff; + + len1 = FLINT_MIN(len1, n); + len2 = FLINT_MIN(len2, n); + m = FLINT_MAX(len1, len2); + m = FLINT_MAX(m, n); + + if (prec <= 128) cutoff = 100; + else if (prec <= 192) cutoff = 55; + else if (prec <= 256) cutoff = 70; + else if (prec <= 512) cutoff = 50; + else if (prec <= 1024) cutoff = 35; + else if (prec <= 2048) cutoff = 25; + else if (prec <= 4096) cutoff = 35; + else if (prec <= 8192) cutoff = 25; + else if (prec <= 16384) cutoff = 20; + else cutoff = 15; + + if (poly1 == poly2 && prec >= 256) + cutoff *= 1.25; + + if (m < cutoff) _arb_poly_mullow_classical(res, poly1, len1, poly2, len2, n, prec); else _arb_poly_mullow_block(res, poly1, len1, poly2, len2, n, prec); @@ -71,6 +94,8 @@ arb_poly_mullow(arb_poly_t res, const arb_poly_t poly1, } else { + abort(); + if (res == poly1 || res == poly2) { arb_t t; diff --git a/arb_poly/mullow_classical.c b/arb_poly/mullow_classical.c index ea2dbebc..42b64dcd 100644 --- a/arb_poly/mullow_classical.c +++ b/arb_poly/mullow_classical.c @@ -27,35 +27,55 @@ _arb_poly_mullow_classical(arb_ptr res, } else if (poly1 == poly2 && len1 == len2) { - slong i; + slong i, start, stop; - _arb_vec_scalar_mul(res, poly1, FLINT_MIN(len1, n), poly1, prec); - _arb_vec_scalar_mul(res + len1, poly1 + 1, n - len1, poly1 + len1 - 1, prec); + arb_sqr(res, poly1, prec); + arb_mul(res + 1, poly1, poly1 + 1, prec); + arb_mul_2exp_si(res + 1, res + 1, 1); - for (i = 1; i < len1 - 1; i++) - _arb_vec_scalar_addmul(res + i + 1, poly1 + 1, - FLINT_MIN(i - 1, n - (i + 1)), poly1 + i, prec); + for (i = 2; i < FLINT_MIN(n, 2 * len1 - 3); i++) + { + start = FLINT_MAX(0, i - len1 + 1); + stop = FLINT_MIN(len1 - 1, (i + 1) / 2 - 1); - for (i = 1; i < FLINT_MIN(2 * len1 - 2, n); i++) + arb_dot(res + i, NULL, 0, poly1 + start, 1, + poly1 + i - start, -1, stop - start + 1, prec); arb_mul_2exp_si(res + i, res + i, 1); + if (i % 2 == 0 && i / 2 < len1) + arb_addmul(res + i, poly1 + i / 2, poly1 + i / 2, prec); + } - for (i = 1; i < FLINT_MIN(len1 - 1, (n + 1) / 2); i++) - arb_addmul(res + 2 * i, poly1 + i, poly1 + i, prec); + if (len1 > 2 && n >= 2 * len1 - 2) + { + arb_mul(res + 2 * len1 - 3, poly1 + len1 - 1, poly1 + len1 - 2, prec); + arb_mul_2exp_si(res + 2 * len1 - 3, res + 2 * len1 - 3, 1); + } + + if (n >= 2 * len1 - 1) + arb_sqr(res + 2 * len1 - 2, poly1 + len1 - 1, prec); + } + else if (len1 == 1) + { + _arb_vec_scalar_mul(res, poly2, n, poly1, prec); + } + else if (len2 == 1) + { + _arb_vec_scalar_mul(res, poly1, n, poly2, prec); } else { - slong i; + slong i, top1, top2; - _arb_vec_scalar_mul(res, poly1, FLINT_MIN(len1, n), poly2, prec); + arb_mul(res, poly1, poly2, prec); - if (n > len1) - _arb_vec_scalar_mul(res + len1, poly2 + 1, n - len1, - poly1 + len1 - 1, prec); + for (i = 1; i < n; i++) + { + top1 = FLINT_MIN(len1 - 1, i); + top2 = FLINT_MIN(len2 - 1, i); - for (i = 0; i < FLINT_MIN(len1, n) - 1; i++) - _arb_vec_scalar_addmul(res + i + 1, poly2 + 1, - FLINT_MIN(len2, n - i) - 1, - poly1 + i, prec); + arb_dot(res + i, NULL, 0, poly1 + i - top2, 1, + poly2 + top2, -1, top1 + top2 - i + 1, prec); + } } }