From 8c71a3c8dc208cd1c1ef6b464e5d25d10a2ce067 Mon Sep 17 00:00:00 2001 From: Fredrik Johansson Date: Tue, 15 Apr 2014 17:07:02 +0200 Subject: [PATCH] more accurate version of fmprb_poly_mullow_block (two full poly muls for error bounds) --- fmprb_poly.h | 6 + fmprb_poly/mullow.c | 9 + fmprb_poly/mullow_block2.c | 503 ++++++++++++++++++++++++ fmprb_poly/mullow_scaled.c | 5 +- fmprb_poly/test/t-mullow_block.c | 4 +- fmprb_poly/test/t-mullow_block2.c | 223 +++++++++++ fmprb_poly/test/t-mullow_block_scaled.c | 4 +- 7 files changed, 748 insertions(+), 6 deletions(-) create mode 100644 fmprb_poly/mullow_block2.c create mode 100644 fmprb_poly/test/t-mullow_block2.c diff --git a/fmprb_poly.h b/fmprb_poly.h index 12da3867..fff19e85 100644 --- a/fmprb_poly.h +++ b/fmprb_poly.h @@ -223,6 +223,12 @@ void _fmprb_poly_mullow_block_scaled(fmprb_ptr z, fmprb_srcptr x, long xlen, fmp void fmprb_poly_mullow_block_scaled(fmprb_poly_t res, const fmprb_poly_t poly1, const fmprb_poly_t poly2, long len, long prec); +void _fmprb_poly_mullow_block2(fmprb_ptr z, fmprb_srcptr x, long xlen, + fmprb_srcptr y, long ylen, long n, long prec); + +void fmprb_poly_mullow_block2(fmprb_poly_t res, const fmprb_poly_t poly1, + const fmprb_poly_t poly2, long n, long prec); + void _fmprb_poly_mullow(fmprb_ptr C, fmprb_srcptr A, long lenA, fmprb_srcptr B, long lenB, long n, long prec); diff --git a/fmprb_poly/mullow.c b/fmprb_poly/mullow.c index 211244b8..22225eb8 100644 --- a/fmprb_poly/mullow.c +++ b/fmprb_poly/mullow.c @@ -28,6 +28,8 @@ #define BLOCK_CUTOFF 5 #define SCALE_CUTOFF 50 +#define BLOCK2_CUTOFF 7 + void _fmprb_poly_mullow(fmprb_ptr res, fmprb_srcptr poly1, long len1, @@ -39,12 +41,19 @@ _fmprb_poly_mullow(fmprb_ptr res, } else { +#if 0 if (n < BLOCK_CUTOFF || len1 < BLOCK_CUTOFF || len2 < BLOCK_CUTOFF) _fmprb_poly_mullow_classical(res, poly1, len1, poly2, len2, n, prec); else if (n < SCALE_CUTOFF || len1 < SCALE_CUTOFF || len2 < SCALE_CUTOFF) _fmprb_poly_mullow_block(res, poly1, len1, poly2, len2, n, prec); else _fmprb_poly_mullow_block_scaled(res, poly1, len1, poly2, len2, n, prec); +#else + if (n < BLOCK2_CUTOFF || len1 < BLOCK2_CUTOFF || len2 < BLOCK2_CUTOFF) + _fmprb_poly_mullow_classical(res, poly1, len1, poly2, len2, n, prec); + else + _fmprb_poly_mullow_block2(res, poly1, len1, poly2, len2, n, prec); +#endif } } diff --git a/fmprb_poly/mullow_block2.c b/fmprb_poly/mullow_block2.c new file mode 100644 index 00000000..f452de5a --- /dev/null +++ b/fmprb_poly/mullow_block2.c @@ -0,0 +1,503 @@ +/*============================================================================= + + This file is part of ARB. + + ARB is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + ARB is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with ARB; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + +=============================================================================*/ +/****************************************************************************** + + Copyright (C) 2014 Fredrik Johansson + +******************************************************************************/ + +#include "fmprb_poly.h" + +void _fmprb_poly_get_scale(fmpz_t scale, fmprb_srcptr x, long xlen, + fmprb_srcptr y, long ylen); + +/*static __inline__ */ void +fmpr_add_abs_ubound(fmpr_t z, const fmpr_t x, const fmpr_t y, long prec) +{ + if (fmpr_sgn(x) >= 0) + { + if (fmpr_sgn(y) >= 0) + fmpr_add(z, x, y, prec, FMPR_RND_UP); + else + fmpr_sub(z, x, y, prec, FMPR_RND_UP); + } + else + { + if (fmpr_sgn(y) >= 0) + fmpr_sub(z, y, x, prec, FMPR_RND_UP); + else + { + fmpr_add(z, x, y, prec, FMPR_RND_UP); + fmpr_neg(z, z); + } + } +} + +static __inline__ void +fmpr_abs_round(fmpr_t z, const fmpr_t x, long prec, fmpr_rnd_t rnd) +{ + if (fmpr_sgn(x) >= 0) + fmpr_set_round(z, x, prec, rnd); + else + fmpr_neg_round(z, x, prec, rnd); +} + +/* Break fmpr vector into same-exponent blocks where the largest block + has a height of at most ALPHA*prec + BETA bits. */ +#define ALPHA 3.0 +#define BETA 512 + +void +_fmpr_vec_get_fmpz_2exp_blocks(fmpz * coeffs, fmpz * exps, long * blocks, + const fmpz_t scale, fmpr_srcptr x, long len, long step, long prec) +{ + fmpz_t top, bot, t, b, v, block_top, block_bot; + long i, j, s, block; + int in_zero; + + fmpz_init(top); + fmpz_init(bot); + fmpz_init(t); + fmpz_init(b); + fmpz_init(v); + fmpz_init(block_top); + fmpz_init(block_bot); + + blocks[0] = 0; + block = 0; + in_zero = 1; + + for (i = 0; i < len; i++) + { + /* Skip (must be zero, since we assume there are no Infs/NaNs) */ + if (fmpr_is_special(x + i * step)) + continue; + + /* Bottom and top exponent of current number */ + fmpz_set(bot, fmpr_expref(x + i * step)); + /* Divide coefficient by 2^(scale * i) */ + fmpz_submul_ui(bot, scale, i); + fmpz_add_ui(top, bot, fmpz_bits(fmpr_manref(x + i * step))); + + /* Extend current block. */ + if (in_zero) + { + fmpz_swap(block_top, top); + fmpz_swap(block_bot, bot); + } + else + { + fmpz_max(t, top, block_top); + fmpz_min(b, bot, block_bot); + fmpz_sub(v, t, b); + + /* extend current block */ + if (prec == FMPR_PREC_EXACT || fmpz_cmp_ui(v, ALPHA * prec + BETA) < 0) + { + fmpz_swap(block_top, t); + fmpz_swap(block_bot, b); + } + else /* start new block */ + { + /* write exponent for previous block */ + fmpz_set(exps + block, block_bot); + + block++; + blocks[block] = i; + + fmpz_swap(block_top, top); + fmpz_swap(block_bot, bot); + } + } + + in_zero = 0; + } + + /* write exponent for last block */ + fmpz_set(exps + block, block_bot); + + /* end marker */ + blocks[block + 1] = len; + + /* write the block data */ + for (i = 0; blocks[i] != len; i++) + { + for (j = blocks[i]; j < blocks[i + 1]; j++) + { + if (fmpr_is_zero(x + j * step)) + { + fmpz_zero(coeffs + j); + } + else + { + /* Divide coefficient by 2^(scale * j) */ + fmpz_mul_ui(t, scale, j); + fmpz_sub(t, fmpr_expref(x + j * step), t); + s = _fmpz_sub_small(t, exps + i); + + if (s < 0) abort(); /* Bug catcher */ + + fmpz_mul_2exp(coeffs + j, fmpr_manref(x + j * step), s); + } + } + } + + fmpz_clear(top); + fmpz_clear(bot); + fmpz_clear(t); + fmpz_clear(b); + fmpz_clear(v); + fmpz_clear(block_top); + fmpz_clear(block_bot); +} + +static int +has_infnan(fmprb_srcptr x, long len) +{ + long i; + + for (i = 0; i < len; i++) + { + if (fmpr_is_nan(fmprb_midref(x + i)) || fmpr_is_inf(fmprb_midref(x + i)) + || fmpr_is_nan(fmprb_radref(x + i)) || fmpr_is_inf(fmprb_radref(x + i))) + { + return 1; + } + } + + return 0; +} + +void +_fmprb_poly_addmullow_rad(fmprb_ptr z, fmpz * zz, + const fmpz * xz, const fmpz * xexps, const long * xblocks, long xlen, + const fmpz * yz, const fmpz * yexps, const long * yblocks, long ylen, + long n) +{ + long i, j, k, xp, yp, xl, yl, bn; + fmpz_t zexp; + fmpr_t t; + + fmpz_init(zexp); + fmpr_init(t); + + for (i = 0; (xp = xblocks[i]) != xlen; i++) + { + for (j = 0; (yp = yblocks[j]) != ylen; j++) + { + if (xp + yp >= n) + continue; + + xl = xblocks[i + 1] - xp; + yl = yblocks[j + 1] - yp; + bn = FLINT_MIN(xl + yl - 1, n - xp - yp); + xl = FLINT_MIN(xl, bn); + yl = FLINT_MIN(yl, bn); + + if (xl >= yl) + _fmpz_poly_mullow(zz, xz + xp, xl, yz + yp, yl, bn); + else + _fmpz_poly_mullow(zz, yz + yp, yl, xz + xp, xl, bn); + + fmpz_add_inline(zexp, xexps + i, yexps + j); + + for (k = 0; k < bn; k++) + { + fmpr_set_round_fmpz_2exp(t, zz + k, zexp, + FMPRB_RAD_PREC, FMPR_RND_UP); + fmpr_add(fmprb_radref(z + xp + yp + k), + fmprb_radref(z + xp + yp + k), t, + FMPRB_RAD_PREC, FMPR_RND_UP); + } + } + } + + fmpz_clear(zexp); + fmpr_clear(t); +} + +void +_fmprb_poly_addmullow_block(fmprb_ptr z, fmpz * zz, + const fmpz * xz, const fmpz * xexps, const long * xblocks, long xlen, + const fmpz * yz, const fmpz * yexps, const long * yblocks, long ylen, + long n, long prec, int squaring) +{ + long i, j, k, xp, yp, xl, yl, bn; + fmpz_t zexp; + fmpr_t t; + + fmpz_init(zexp); + fmpr_init(t); + + if (squaring) + { + for (i = 0; (xp = xblocks[i]) != xlen; i++) + { + if (2 * xp >= n) + continue; + + xl = xblocks[i + 1] - xp; + bn = FLINT_MIN(2 * xl - 1, n - 2 * xp); + xl = FLINT_MIN(xl, bn); + + _fmpz_poly_sqrlow(zz, xz + xp, xl, bn); + fmpz_add_inline(zexp, xexps + i, xexps + i); + + for (k = 0; k < bn; k++) + { + fmpr_set_fmpz_2exp(t, zz + k, zexp); + fmprb_add_fmpr(z + 2 * xp + k, z + 2 * xp + k, t, prec); + } + } + } + + for (i = 0; (xp = xblocks[i]) != xlen; i++) + { + for (j = squaring ? i + 1 : 0; (yp = yblocks[j]) != ylen; j++) + { + if (xp + yp >= n) + continue; + + xl = xblocks[i + 1] - xp; + yl = yblocks[j + 1] - yp; + bn = FLINT_MIN(xl + yl - 1, n - xp - yp); + xl = FLINT_MIN(xl, bn); + yl = FLINT_MIN(yl, bn); + + if (xl >= yl) + _fmpz_poly_mullow(zz, xz + xp, xl, yz + yp, yl, bn); + else + _fmpz_poly_mullow(zz, yz + yp, yl, xz + xp, xl, bn); + + fmpz_add2_fmpz_si_inline(zexp, xexps + i, yexps + j, squaring); + + for (k = 0; k < bn; k++) + { + fmpr_set_fmpz_2exp(t, zz + k, zexp); + fmprb_add_fmpr(z + xp + yp + k, z + xp + yp + k, t, prec); + } + } + } + + fmpz_clear(zexp); + fmpr_clear(t); +} + +void +_fmprb_poly_mullow_block2(fmprb_ptr z, fmprb_srcptr x, long xlen, + fmprb_srcptr y, long ylen, long n, long prec) +{ + long xmlen, xrlen, ymlen, yrlen, i; + fmpz *xz, *yz, *zz; + fmpz *xe, *ye; + long *xblocks, *yblocks; + int squaring; + fmpz_t scale, t; + + xlen = FLINT_MIN(xlen, n); + ylen = FLINT_MIN(ylen, n); + + squaring = (x == y) && (xlen == ylen); + + /* Strip trailing zeros */ + xmlen = xrlen = xlen; + while (xmlen > 0 && fmpr_is_zero(fmprb_midref(x + xmlen - 1))) xmlen--; + while (xrlen > 0 && fmpr_is_zero(fmprb_radref(x + xrlen - 1))) xrlen--; + + if (squaring) + { + ymlen = xmlen; + yrlen = xrlen; + } + else + { + ymlen = yrlen = ylen; + while (ymlen > 0 && fmpr_is_zero(fmprb_midref(y + ymlen - 1))) ymlen--; + while (yrlen > 0 && fmpr_is_zero(fmprb_radref(y + yrlen - 1))) yrlen--; + } + + /* We don't know how to deal with infinities or NaNs */ + if (has_infnan(x, xlen) || (!squaring && has_infnan(y, ylen))) + { + _fmprb_poly_mullow_classical(z, x, xlen, y, ylen, n, prec); + return; + } + + xlen = FLINT_MAX(xmlen, xrlen); + ylen = FLINT_MAX(ymlen, yrlen); + + /* Start with the zero polynomial */ + _fmprb_vec_zero(z, n); + + /* Nothing to do */ + if (xlen == 0 || ylen == 0) + return; + + n = FLINT_MIN(n, xlen + ylen - 1); + + fmpz_init(scale); + fmpz_init(t); + xz = _fmpz_vec_init(xlen); + yz = _fmpz_vec_init(ylen); + zz = _fmpz_vec_init(n); + xe = _fmpz_vec_init(xlen); + ye = _fmpz_vec_init(ylen); + xblocks = flint_malloc(sizeof(long) * (xlen + 1)); + yblocks = flint_malloc(sizeof(long) * (ylen + 1)); + + _fmprb_poly_get_scale(scale, x, xlen, y, ylen); + + /* Error propagation */ + /* (xm + xr)*(ym + yr) = (xm*ym) + (xr*ym + xm*yr + xr*yr) + = (xm*ym) + (xm*yr + xr*(ym + yr)) */ + if (xrlen != 0 || yrlen != 0) + { + fmpr_ptr tmp = _fmpr_vec_init(FLINT_MAX(xlen, ylen)); + + /* (xm + xr)^2 = (xm*ym) + (xr^2 + 2 xm xr) + = (xm*ym) + xr*(2 xm + xr) */ + if (squaring) + { + _fmpr_vec_get_fmpz_2exp_blocks(xz, xe, xblocks, scale, fmprb_radref(x), xrlen, 2, FMPRB_RAD_PREC); + + for (i = 0; i < xlen; i++) + { + fmpr_abs_round(tmp + i, fmprb_midref(x + i), FMPRB_RAD_PREC, FMPR_RND_UP); + fmpr_mul_2exp_si(tmp + i, tmp + i, 1); + fmpr_add(tmp + i, tmp + i, fmprb_radref(x + i), FMPRB_RAD_PREC, FMPR_RND_UP); + } + _fmpr_vec_get_fmpz_2exp_blocks(yz, ye, yblocks, scale, tmp, xlen, 1, FMPRB_RAD_PREC); + + _fmprb_poly_addmullow_rad(z, zz, xz, xe, xblocks, xrlen, yz, ye, yblocks, xlen, n); + } + else if (yrlen == 0) + { + /* xr * |ym| */ + _fmpr_vec_get_fmpz_2exp_blocks(xz, xe, xblocks, scale, fmprb_radref(x), xrlen, 2, FMPRB_RAD_PREC); + for (i = 0; i < ymlen; i++) + fmpr_abs_round(tmp + i, fmprb_midref(y + i), FMPRB_RAD_PREC, FMPR_RND_UP); + _fmpr_vec_get_fmpz_2exp_blocks(yz, ye, yblocks, scale, tmp, ymlen, 1, FMPRB_RAD_PREC); + _fmprb_poly_addmullow_rad(z, zz, xz, xe, xblocks, xrlen, yz, ye, yblocks, ymlen, n); + } + else + { + /* |xm| * yr */ + for (i = 0; i < xmlen; i++) + fmpr_abs_round(tmp + i, fmprb_midref(x + i), FMPRB_RAD_PREC, FMPR_RND_UP); + + _fmpr_vec_get_fmpz_2exp_blocks(xz, xe, xblocks, scale, tmp, xmlen, 1, FMPRB_RAD_PREC); + + _fmpr_vec_get_fmpz_2exp_blocks(yz, ye, yblocks, scale, fmprb_radref(y), yrlen, 2, FMPRB_RAD_PREC); + + _fmprb_poly_addmullow_rad(z, zz, xz, xe, xblocks, xmlen, yz, ye, yblocks, yrlen, n); + + /* xr*(|ym| + yr) */ + if (xrlen != 0) + { + _fmpr_vec_get_fmpz_2exp_blocks(xz, xe, xblocks, scale, fmprb_radref(x), xrlen, 2, FMPRB_RAD_PREC); + + for (i = 0; i < ylen; i++) + fmpr_add_abs_ubound(tmp + i, fmprb_midref(y + i), fmprb_radref(y + i), FMPRB_RAD_PREC); + + _fmpr_vec_get_fmpz_2exp_blocks(yz, ye, yblocks, scale, tmp, ylen, 1, FMPRB_RAD_PREC); + _fmprb_poly_addmullow_rad(z, zz, xz, xe, xblocks, xrlen, yz, ye, yblocks, ylen, n); + } + } + + _fmpr_vec_clear(tmp, FLINT_MAX(xlen, ylen)); + } + + /* multiply midpoints */ + if (xmlen != 0 && ymlen != 0) + { + _fmpr_vec_get_fmpz_2exp_blocks(xz, xe, xblocks, scale, fmprb_midref(x), xmlen, 2, prec); + + if (squaring) + { + _fmprb_poly_addmullow_block(z, zz, xz, xe, xblocks, xmlen, xz, xe, xblocks, xmlen, n, prec, 1); + } + else + { + _fmpr_vec_get_fmpz_2exp_blocks(yz, ye, yblocks, scale, fmprb_midref(y), ymlen, 2, prec); + _fmprb_poly_addmullow_block(z, zz, xz, xe, xblocks, xmlen, yz, ye, yblocks, ymlen, n, prec, 0); + } + } + + /* Unscale. */ + if (!fmpz_is_zero(scale)) + { + fmpz_zero(t); + for (i = 0; i < n; i++) + { + fmprb_mul_2exp_fmpz(z + i, z + i, t); + fmpz_add(t, t, scale); + } + } + + _fmpz_vec_clear(xz, xlen); + _fmpz_vec_clear(yz, ylen); + _fmpz_vec_clear(zz, n); + _fmpz_vec_clear(xe, xlen); + _fmpz_vec_clear(ye, ylen); + flint_free(xblocks); + flint_free(yblocks); + fmpz_clear(scale); + fmpz_clear(t); +} + +void +fmprb_poly_mullow_block2(fmprb_poly_t res, const fmprb_poly_t poly1, + const fmprb_poly_t poly2, long n, long prec) +{ + long xlen, ylen, zlen; + + xlen = poly1->length; + ylen = poly2->length; + + if (xlen == 0 || ylen == 0 || n == 0) + { + fmprb_poly_zero(res); + return; + } + + xlen = FLINT_MIN(xlen, n); + ylen = FLINT_MIN(ylen, n); + zlen = FLINT_MIN(xlen + ylen - 1, n); + + if (res == poly1 || res == poly2) + { + fmprb_poly_t tmp; + fmprb_poly_init2(tmp, zlen); + _fmprb_poly_mullow_block2(tmp->coeffs, poly1->coeffs, xlen, + poly2->coeffs, ylen, zlen, prec); + fmprb_poly_swap(res, tmp); + fmprb_poly_clear(tmp); + } + else + { + fmprb_poly_fit_length(res, zlen); + _fmprb_poly_mullow_block2(res->coeffs, poly1->coeffs, xlen, + poly2->coeffs, ylen, zlen, prec); + } + + _fmprb_poly_set_length(res, zlen); + _fmprb_poly_normalise(res); +} + diff --git a/fmprb_poly/mullow_scaled.c b/fmprb_poly/mullow_scaled.c index 3c97a66c..02dd60d5 100644 --- a/fmprb_poly/mullow_scaled.c +++ b/fmprb_poly/mullow_scaled.c @@ -25,8 +25,9 @@ #include "fmprb_poly.h" -static __inline__ void -_fmprb_poly_get_scale(fmpz_t scale, fmprb_srcptr x, long xlen, fmprb_srcptr y, long ylen) +void +_fmprb_poly_get_scale(fmpz_t scale, fmprb_srcptr x, long xlen, + fmprb_srcptr y, long ylen) { long xa, xb, ya, yb, den; diff --git a/fmprb_poly/test/t-mullow_block.c b/fmprb_poly/test/t-mullow_block.c index c5a86a1a..bbf2a9ce 100644 --- a/fmprb_poly/test/t-mullow_block.c +++ b/fmprb_poly/test/t-mullow_block.c @@ -36,7 +36,7 @@ int main() flint_randinit(state); /* compare with fmpq_poly */ - for (iter = 0; iter < 10000; iter++) + for (iter = 0; iter < 1000; iter++) { long qbits1, qbits2, rbits1, rbits2, rbits3, trunc; fmpq_poly_t A, B, C; @@ -136,7 +136,7 @@ int main() fmprb_poly_clear(d); } - for (iter = 0; iter < 3000; iter++) + for (iter = 0; iter < 1000; iter++) { long rbits1, rbits2, rbits3, trunc; fmprb_poly_t a, b, c, ab, ac, bc, abc, abc2; diff --git a/fmprb_poly/test/t-mullow_block2.c b/fmprb_poly/test/t-mullow_block2.c new file mode 100644 index 00000000..cc27dbdb --- /dev/null +++ b/fmprb_poly/test/t-mullow_block2.c @@ -0,0 +1,223 @@ +/*============================================================================= + + This file is part of ARB. + + ARB is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + ARB is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with ARB; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + +=============================================================================*/ +/****************************************************************************** + + Copyright (C) 2012, 2013 Fredrik Johansson + +******************************************************************************/ + +#include "fmprb_poly.h" + +int main() +{ + long iter; + flint_rand_t state; + + printf("mullow_block2...."); + fflush(stdout); + + flint_randinit(state); + + /* compare with fmpq_poly */ + for (iter = 0; iter < 10000; iter++) + { + long qbits1, qbits2, rbits1, rbits2, rbits3, trunc; + fmpq_poly_t A, B, C; + fmprb_poly_t a, b, c, d; + + qbits1 = 2 + n_randint(state, 1000); + qbits2 = 2 + n_randint(state, 1000); + rbits1 = 2 + n_randint(state, 1000); + rbits2 = 2 + n_randint(state, 1000); + rbits3 = 2 + n_randint(state, 1000); + trunc = n_randint(state, 100); + + fmpq_poly_init(A); + fmpq_poly_init(B); + fmpq_poly_init(C); + + fmprb_poly_init(a); + fmprb_poly_init(b); + fmprb_poly_init(c); + fmprb_poly_init(d); + + fmpq_poly_randtest(A, state, 1 + n_randint(state, 100), qbits1); + fmpq_poly_randtest(B, state, 1 + n_randint(state, 100), qbits2); + fmpq_poly_mullow(C, A, B, trunc); + + fmprb_poly_set_fmpq_poly(a, A, rbits1); + fmprb_poly_set_fmpq_poly(b, B, rbits2); + fmprb_poly_mullow_block2(c, a, b, trunc, rbits3); + + if (!fmprb_poly_contains_fmpq_poly(c, C)) + { + printf("FAIL\n\n"); + printf("bits3 = %ld\n", rbits3); + printf("trunc = %ld\n", trunc); + + printf("A = "); fmpq_poly_print(A); printf("\n\n"); + printf("B = "); fmpq_poly_print(B); printf("\n\n"); + printf("C = "); fmpq_poly_print(C); printf("\n\n"); + + printf("a = "); fmprb_poly_printd(a, 15); printf("\n\n"); + printf("b = "); fmprb_poly_printd(b, 15); printf("\n\n"); + printf("c = "); fmprb_poly_printd(c, 15); printf("\n\n"); + + abort(); + } + + fmprb_poly_set(d, a); + fmprb_poly_mullow_block2(d, d, b, trunc, rbits3); + if (!fmprb_poly_equal(d, c)) + { + printf("FAIL (aliasing 1)\n\n"); + abort(); + } + + fmprb_poly_set(d, b); + fmprb_poly_mullow_block2(d, a, d, trunc, rbits3); + if (!fmprb_poly_equal(d, c)) + { + printf("FAIL (aliasing 2)\n\n"); + abort(); + } + + /* test squaring */ + fmprb_poly_set(b, a); + fmprb_poly_mullow_block2(c, a, b, trunc, rbits3); + fmprb_poly_mullow_block2(d, a, a, trunc, rbits3); + if (!fmprb_poly_overlaps(c, d)) /* not guaranteed to be identical */ + { + printf("FAIL (squaring)\n\n"); + + printf("a = "); fmprb_poly_printd(a, 15); printf("\n\n"); + printf("b = "); fmprb_poly_printd(b, 15); printf("\n\n"); + printf("c = "); fmprb_poly_printd(c, 15); printf("\n\n"); + + abort(); + } + + fmprb_poly_mullow_block2(a, a, a, trunc, rbits3); + if (!fmprb_poly_equal(d, a)) + { + printf("FAIL (aliasing, squaring)\n\n"); + + printf("a = "); fmprb_poly_printd(a, 15); printf("\n\n"); + printf("b = "); fmprb_poly_printd(b, 15); printf("\n\n"); + printf("d = "); fmprb_poly_printd(d, 15); printf("\n\n"); + + abort(); + } + + fmpq_poly_clear(A); + fmpq_poly_clear(B); + fmpq_poly_clear(C); + + fmprb_poly_clear(a); + fmprb_poly_clear(b); + fmprb_poly_clear(c); + fmprb_poly_clear(d); + } + + for (iter = 0; iter < 3000; iter++) + { + long rbits1, rbits2, rbits3, trunc; + fmprb_poly_t a, b, c, ab, ac, bc, abc, abc2; + + rbits1 = 2 + n_randint(state, 300); + rbits2 = 2 + n_randint(state, 300); + rbits3 = 2 + n_randint(state, 300); + trunc = n_randint(state, 100); + + fmprb_poly_init(a); + fmprb_poly_init(b); + fmprb_poly_init(c); + fmprb_poly_init(ab); + fmprb_poly_init(ac); + fmprb_poly_init(bc); + fmprb_poly_init(abc); + fmprb_poly_init(abc2); + + fmprb_poly_randtest(a, state, 1 + n_randint(state, 100), rbits1, 1 + n_randint(state, 100)); + fmprb_poly_randtest(b, state, 1 + n_randint(state, 100), rbits2, 1 + n_randint(state, 100)); + fmprb_poly_randtest(c, state, 1 + n_randint(state, 100), rbits2, 1 + n_randint(state, 100)); + + /* check a*(b+c) = a*b + a*c */ + fmprb_poly_mullow_block2(ab, a, b, trunc, rbits3); + fmprb_poly_mullow_block2(ac, a, c, trunc, rbits3); + fmprb_poly_add(abc, ab, ac, rbits3); + + fmprb_poly_add(bc, b, c, rbits3); + fmprb_poly_mullow_block2(abc2, a, bc, trunc, rbits3); + + if (!fmprb_poly_overlaps(abc, abc2)) + { + printf("FAIL (a*(b+c) = a*b + a*c) \n\n"); + printf("bits3 = %ld\n", rbits3); + printf("trunc = %ld\n", trunc); + + printf("a = "); fmprb_poly_printd(a, 15); printf("\n\n"); + printf("b = "); fmprb_poly_printd(b, 15); printf("\n\n"); + printf("c = "); fmprb_poly_printd(c, 15); printf("\n\n"); + + abort(); + } + + /* check (b+c)^2 = b^2 + 2bc + c^2 */ + fmprb_poly_mullow_block2(a, b, c, trunc, rbits3); + fmprb_poly_scalar_mul_2exp_si(a, a, 1); + fmprb_poly_mullow_block2(abc, b, b, trunc, rbits3); + fmprb_poly_mullow_block2(abc2, c, c, trunc, rbits3); + fmprb_poly_add(abc, abc, a, rbits3); + fmprb_poly_add(abc, abc, abc2, rbits3); + + fmprb_poly_mullow_block2(abc2, bc, bc, trunc, rbits3); + + if (!fmprb_poly_overlaps(abc, abc2)) + { + printf("FAIL ((b+c)^2 = b^2 + 2bc + c^2) \n\n"); + printf("bits3 = %ld\n", rbits3); + printf("trunc = %ld\n", trunc); + + printf("b = "); fmprb_poly_printd(b, 15); printf("\n\n"); + printf("c = "); fmprb_poly_printd(c, 15); printf("\n\n"); + + printf("abc = "); fmprb_poly_printd(abc, 15); printf("\n\n"); + printf("abc2 = "); fmprb_poly_printd(abc2, 15); printf("\n\n"); + + abort(); + } + + fmprb_poly_clear(a); + fmprb_poly_clear(b); + fmprb_poly_clear(c); + fmprb_poly_clear(ab); + fmprb_poly_clear(ac); + fmprb_poly_clear(bc); + fmprb_poly_clear(abc); + fmprb_poly_clear(abc2); + } + + flint_randclear(state); + flint_cleanup(); + printf("PASS\n"); + return EXIT_SUCCESS; +} + diff --git a/fmprb_poly/test/t-mullow_block_scaled.c b/fmprb_poly/test/t-mullow_block_scaled.c index 6fe933dd..58182600 100644 --- a/fmprb_poly/test/t-mullow_block_scaled.c +++ b/fmprb_poly/test/t-mullow_block_scaled.c @@ -36,7 +36,7 @@ int main() flint_randinit(state); /* compare with fmpq_poly */ - for (iter = 0; iter < 10000; iter++) + for (iter = 0; iter < 1000; iter++) { long qbits1, qbits2, rbits1, rbits2, rbits3, trunc; fmpq_poly_t A, B, C; @@ -136,7 +136,7 @@ int main() fmprb_poly_clear(d); } - for (iter = 0; iter < 3000; iter++) + for (iter = 0; iter < 1000; iter++) { long rbits1, rbits2, rbits3, trunc; fmprb_poly_t a, b, c, ab, ac, bc, abc, abc2;