arb/fmprb_poly/mullow_block.c

/*=============================================================================

    This file is part of ARB.

    ARB is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.

    ARB is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with ARB; if not, write to the Free Software
    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA

=============================================================================*/
/******************************************************************************

    Copyright (C) 2012 Fredrik Johansson

******************************************************************************/

#include "fmprb_poly.h"

void _fmprb_poly_get_fmpz_poly_2exp(fmpr_t error, fmpz_t exp, fmpz  * coeffs,
                            fmprb_srcptr A, long lenA, long prec);

void
_fmprb_vec_add_fmpz_vec_2exp(fmprb_ptr z,
    fmprb_srcptr x, const fmpz * y, const fmpz_t exp, long len, long prec)
{
    fmpr_t t;
    long i;

    fmpr_init(t);

    for (i = 0; i < len; i++)
    {
        fmpr_set_fmpz_2exp(t, y + i, exp);
        fmprb_add_fmpr(z + i, x + i, t, prec);
    }

    fmpr_clear(t);
}


/*
First a bunch of ugly code for bounding the errors.
This code should be refactored, and we should switch
to some kind of proper fmpr polynomial multiplication
when the lengths get extremely large

*/

#define ZERO      (LONG_MIN / 2)
#define MIN_SMALL (LONG_MIN / 8)
#define MAX_SMALL (-MIN_SMALL)

int read_exps(long * xmag, const fmpr_struct * x, long xlen)
{
    long i, mag;
    fmpz exp;

    for (i = 0; i < xlen; i++)
    {
        /* steps of two to read midpoints / radii */
        if (fmpr_is_zero(x + 2 * i))
        {
            xmag[i] = ZERO;
        }
        else
        {
            exp = *fmpr_expref(x + 2 * i);

            if (COEFF_IS_MPZ(exp))
                return 0;

            mag = exp + fmpz_bits(fmpr_manref(x + 2 * i));

            if (mag < MIN_SMALL || mag > MAX_SMALL)
                return 0;

            xmag[i] = mag;
        }
    }

    return 1;  /* all are small */
}

void
add_mulbound(fmprb_ptr z,
        const fmpr_struct * x, long xlen,
        const fmpr_struct * y, long ylen, long n)
{
    long i, j, k, a, b, count;
    long *xmag, *ymag;
    long mag, zmag;
    int xsmall, ysmall;
    fmpr_t err;

    xmag = flint_calloc(xlen + ylen, sizeof(long));
    ymag = xmag + xlen;

    fmpr_init(err);

    xsmall = read_exps(xmag, x, xlen);
    ysmall = read_exps(ymag, y, ylen);

    for (i = 0; i < n; i++)
    {
        count = 0;
        a = FLINT_MAX(0, i - ylen + 1);
        b = FLINT_MIN(i + 1, xlen);
        zmag = ZERO;
        count = b - a;

        if (xsmall && ysmall)
        {
            for (j = a; j < b; j++)
            {
                k = i - j;
                mag = xmag[j] + ymag[k];
                zmag = FLINT_MAX(zmag, mag);
            }

            if (zmag < MIN_SMALL + MIN_SMALL)
                count = 0;

            if (count != 0)
            {
                fmpr_set_ui(err, count);
                fmpr_mul_2exp_si(err, err, zmag);
                fmpr_add(fmprb_radref(z + i), fmprb_radref(z + i), err, FMPRB_RAD_PREC, FMPR_RND_UP);
            }
        }
        else
        {
            for (j = a; j < b; j++)
            {
                k = i - j;
                fmpr_mul(err, x + 2*j, y + 2*k, FMPRB_RAD_PREC, FMPR_RND_UP);
                fmpr_abs(err, err);
                fmpr_add(fmprb_radref(z + i), fmprb_radref(z + i), err, FMPRB_RAD_PREC, FMPR_RND_UP);
            }
        }
    }

    fmpr_clear(err);
    flint_free(xmag);
}


static int
is_exact(fmprb_srcptr x, long len)
{
    long i;

    for (i = 0; i < len; i++)
        if (!fmprb_is_exact(x + i))
            return 0;

    return 1;
}

/* TODO: distribute to get rid of one polynomial multiplication */
static void
add_errors(fmprb_ptr z, fmprb_srcptr x, long xlen, fmprb_srcptr y, long ylen, long n, int squaring)
{
    int xexact, yexact;
    const fmpr_struct * xmid = ((fmpr_struct *) x);
    const fmpr_struct * xrad = ((fmpr_struct *) x) + 1;
    const fmpr_struct * ymid = ((fmpr_struct *) y);
    const fmpr_struct * yrad = ((fmpr_struct *) y) + 1;

    if (squaring)
    {
        long i;
        xexact = is_exact(x, xlen);

        if (!xexact)
        {
            add_mulbound(z, xmid, xlen, xrad, xlen, n);

            /* XXX: for tightness, assumes that z is zero to begin with */
            for (i = 0; i < n; i++)
                fmpr_mul_2exp_si(fmprb_radref(z + i), fmprb_radref(z + i), 1);

            add_mulbound(z, xrad, xlen, xrad, xlen, n);
        }

    }
    else
    {
        xexact = is_exact(x, xlen);
        yexact = is_exact(y, ylen);

        if (!yexact)            add_mulbound(z, xmid, xlen, yrad, ylen, n);
        if (!xexact)            add_mulbound(z, xrad, xlen, ymid, ylen, n);
        if (!xexact && !yexact) add_mulbound(z, xrad, xlen, yrad, ylen, n);
    }
}

/* largest block height = ALPHA*prec + BETA */
#define ALPHA 3.0
#define BETA 512
#define CLASSICAL_CUTOFF 6

static void
get_blocks(long * xblocks, fmprb_srcptr x, long xlen, long prec)
{
    fmpz_t top, bot, t, b, v, block_top, block_bot;
    long i, block;
    int in_zero;

    fmpz_init(top);
    fmpz_init(bot);
    fmpz_init(t);
    fmpz_init(b);
    fmpz_init(v);
    fmpz_init(block_top);
    fmpz_init(block_bot);

    xblocks[0] = 0;
    block = 0;
    in_zero = 1;

    for (i = 0; i < xlen; i++)
    {
        if (fmpr_is_special(fmprb_midref(x + i)))
            continue;

        fmpz_set(bot, fmpr_expref(fmprb_midref(x + i)));
        fmpz_add_ui(top, bot, fmpz_bits(fmpr_manref(fmprb_midref(x + i))));

        /* extend current block */
        if (in_zero)
        {
            fmpz_swap(block_top, top);
            fmpz_swap(block_bot, bot);
        }
        else
        {
            fmpz_max(t, top, block_top);
            fmpz_min(b, bot, block_bot);

            fmpz_sub(v, t, b);

            /* extend current block */
            if (prec == FMPR_PREC_EXACT || fmpz_cmp_ui(v, ALPHA * prec + BETA) < 0)
            {
                fmpz_swap(block_top, t);
                fmpz_swap(block_bot, b);
            }
            else  /* start new block */
            {
                block++;
                xblocks[block] = i;

                fmpz_swap(block_top, top);
                fmpz_swap(block_bot, bot);
            }
        }

        in_zero = 0;
    }

    xblocks[block + 1] = xlen;

    fmpz_clear(top);
    fmpz_clear(bot);
    fmpz_clear(t);
    fmpz_clear(b);
    fmpz_clear(v);
    fmpz_clear(block_top);
    fmpz_clear(block_bot);
}

static int
has_infnan(fmprb_srcptr x, long len)
{
    long i;

    for (i = 0; i < len; i++)
    {
        if (fmpr_is_nan(fmprb_midref(x + i)) || fmpr_is_inf(fmprb_midref(x + i))
            || fmpr_is_nan(fmprb_radref(x + i)) || fmpr_is_inf(fmprb_radref(x + i)))
        {
            return 1;
        }
    }

    return 0;
}

void
_fmprb_poly_mullow_block(fmprb_ptr z,
    fmprb_srcptr x, long xlen,
    fmprb_srcptr y, long ylen,
    long n, long prec)
{
    long *xblocks, *yblocks;
    long i, j, xp, yp, xl, yl, bn, r, s;
    fmpr_t t, error;
    fmpz_t xexp, yexp, zexp;
    fmpz *xz, *yz, *zz;
    int squaring;

    xlen = FLINT_MIN(xlen, n);
    ylen = FLINT_MIN(ylen, n);

    squaring = (x == y) && (xlen == ylen);

    if (has_infnan(x, xlen) || (!squaring && has_infnan(y, ylen)))
    {
        _fmprb_poly_mullow_classical(z, x, xlen, y, ylen, n, prec);
        return;
    }

    /* Zero polynomials can appear for example in complex multiplication.
       TODO: we should also throw away zero leading and trailing
       parts of the blocks computed below. */
    if (_fmprb_vec_is_zero(x, xlen) || (!squaring && _fmprb_vec_is_zero(y, ylen)))
    {
        _fmprb_vec_zero(z, n);
        return;
    }

    xblocks = flint_malloc(sizeof(long) * (xlen + 1));
    yblocks = flint_malloc(sizeof(long) * (ylen + 1));

    fmpz_init(xexp);
    fmpz_init(yexp);
    fmpz_init(zexp);

    xz = _fmpz_vec_init(xlen);
    yz = _fmpz_vec_init(ylen);
    zz = _fmpz_vec_init(n);

    fmpr_init(error);
    fmpr_init(t);

    /* split input polynomials into blocks with similar-size exponents */
    get_blocks(xblocks, x, xlen, prec);

    if (squaring)
        for (i = 0; i < xlen + 1; i++)
            yblocks[i] = xblocks[i];
    else
        get_blocks(yblocks, y, ylen, prec);

    /* start with the zero polynomial plus the error */
    _fmprb_vec_zero(z, n);

    add_errors(z, x, xlen, y, ylen, n, squaring);

    if (squaring)
    {
        /* sum of squares of blocks */
        for (i = 0; (xp = xblocks[i]) != xlen; i++)
        {
            if (2 * xp < n)
            {
                xl = xblocks[i + 1] - xp;
                bn = FLINT_MIN(2 * xl - 1, n - 2 * xp);
                xl = FLINT_MIN(xl, bn);

                _fmprb_poly_get_fmpz_poly_2exp(error, xexp, xz, x + xp, xl, FMPR_PREC_EXACT);
                _fmpz_poly_sqrlow(zz, xz, xl, bn);
                fmpz_add(zexp, xexp, xexp);
                _fmprb_vec_add_fmpz_vec_2exp(z + 2 * xp, z + 2 * xp, zz, zexp, bn, prec);
            }
        }
    }

    for (i = 0; (xp = xblocks[i]) != xlen; i++)
    {
        for (j = squaring ? i + 1 : 0; (yp = yblocks[j]) != ylen; j++)
        {
            if (xp + yp < n)
            {
                xl = xblocks[i + 1] - xp;
                yl = yblocks[j + 1] - yp;
                bn = FLINT_MIN(xl + yl - 1, n - xp - yp);
                xl = FLINT_MIN(xl, bn);
                yl = FLINT_MIN(yl, bn);

                /* classical multiplication; we just need the midpoints */
                if (bn < CLASSICAL_CUTOFF || xl < CLASSICAL_CUTOFF || yl < CLASSICAL_CUTOFF)
                {
                    for (r = 0; r < xl; r++)
                    {
                        for (s = 0; s < yl && xp + yp + r + s < n; s++)
                        {
                            long zi, ret;
                            zi = xp + yp + r + s;

                            fmpr_mul(t, fmprb_midref(x + xp + r), fmprb_midref(y + yp + s), FMPR_PREC_EXACT, FMPR_RND_DOWN);
                            if (squaring)
                                fmpr_mul_2exp_si(t, t, 1);

                            ret = fmpr_add(fmprb_midref(z + zi), fmprb_midref(z + zi), t, prec, FMPR_RND_DOWN);
                            fmpr_add_error_result(fmprb_radref(z + zi), fmprb_radref(z + zi),
                                fmprb_midref(z + zi), ret, prec, FMPR_RND_UP);
                        }
                    }
                }
                else
                {
                    /* exact multiplication over Z[x]
                       todo: might want to verify that error==0 */

                    _fmprb_poly_get_fmpz_poly_2exp(error, xexp, xz, x + xp, xl, FMPR_PREC_EXACT);
                    _fmprb_poly_get_fmpz_poly_2exp(error, yexp, yz, y + yp, yl, FMPR_PREC_EXACT);
                    _fmpz_poly_mullow(zz, xz, xl, yz, yl, bn);
                    fmpz_add(zexp, xexp, yexp);
                    if (squaring)
                        fmpz_add_ui(zexp, zexp, 1);
                    _fmprb_vec_add_fmpz_vec_2exp(z + xp + yp, z + xp + yp, zz, zexp, bn, prec);
                }
            }
        }
    }

    fmpz_clear(xexp);
    fmpz_clear(yexp);
    fmpz_clear(zexp);

    _fmpz_vec_clear(xz, xlen);
    _fmpz_vec_clear(yz, ylen);
    _fmpz_vec_clear(zz, n);

    fmpr_clear(error);
    fmpr_clear(t);

    flint_free(xblocks);
    flint_free(yblocks);
}

void
fmprb_poly_mullow_block(fmprb_poly_t res, const fmprb_poly_t poly1,
              const fmprb_poly_t poly2, long n, long prec)
{
    long xlen, ylen, zlen;

    xlen = poly1->length;
    ylen = poly2->length;

    if (xlen == 0 || ylen == 0 || n == 0)
    {
        fmprb_poly_zero(res);
        return;
    }

    xlen = FLINT_MIN(xlen, n);
    ylen = FLINT_MIN(ylen, n);
    zlen = FLINT_MIN(xlen + ylen - 1, n);

    if (res == poly1 || res == poly2)
    {
        fmprb_poly_t tmp;
        fmprb_poly_init2(tmp, zlen);
        _fmprb_poly_mullow_block(tmp->coeffs, poly1->coeffs, xlen,
            poly2->coeffs, ylen, zlen, prec);
        fmprb_poly_swap(res, tmp);
        fmprb_poly_clear(tmp);
    }
    else
    {
        fmprb_poly_fit_length(res, zlen);
        _fmprb_poly_mullow_block(res->coeffs, poly1->coeffs, xlen,
            poly2->coeffs, ylen, zlen, prec);
    }

    _fmprb_poly_set_length(res, zlen);
    _fmprb_poly_normalise(res);
}
numerically stable blockwise polynomial multiplication (should be working; more testing necessary) 2013-04-02 19:13:27 +02:00			`/*=============================================================================`

			`This file is part of ARB.`

			`ARB is free software; you can redistribute it and/or modify`
			`it under the terms of the GNU General Public License as published by`
			`the Free Software Foundation; either version 2 of the License, or`
			`(at your option) any later version.`

			`ARB is distributed in the hope that it will be useful,`
			`but WITHOUT ANY WARRANTY; without even the implied warranty of`
			`MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the`
			`GNU General Public License for more details.`

			`You should have received a copy of the GNU General Public License`
			`along with ARB; if not, write to the Free Software`
			`Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA`

			`=============================================================================*/`
			`/******************************************************************************`

			`Copyright (C) 2012 Fredrik Johansson`

			`******************************************************************************/`

			`#include "fmprb_poly.h"`

			`void _fmprb_poly_get_fmpz_poly_2exp(fmpr_t error, fmpz_t exp, fmpz * coeffs,`
introduce fmprb_ptr, fmprb_srcptr, fmpcb_ptr, fmpcb_srcptr typedefs 2013-07-17 20:18:15 +02:00			`fmprb_srcptr A, long lenA, long prec);`
numerically stable blockwise polynomial multiplication (should be working; more testing necessary) 2013-04-02 19:13:27 +02:00
			`void`
introduce fmprb_ptr, fmprb_srcptr, fmpcb_ptr, fmpcb_srcptr typedefs 2013-07-17 20:18:15 +02:00			`_fmprb_vec_add_fmpz_vec_2exp(fmprb_ptr z,`
			`fmprb_srcptr x, const fmpz * y, const fmpz_t exp, long len, long prec)`
numerically stable blockwise polynomial multiplication (should be working; more testing necessary) 2013-04-02 19:13:27 +02:00			`{`
			`fmpr_t t;`
			`long i;`

			`fmpr_init(t);`

			`for (i = 0; i < len; i++)`
			`{`
			`fmpr_set_fmpz_2exp(t, y + i, exp);`
			`fmprb_add_fmpr(z + i, x + i, t, prec);`
			`}`

			`fmpr_clear(t);`
			`}`


			`/*`
			`First a bunch of ugly code for bounding the errors.`
			`This code should be refactored, and we should switch`
			`to some kind of proper fmpr polynomial multiplication`
			`when the lengths get extremely large`

			`*/`

			`#define ZERO (LONG_MIN / 2)`
			`#define MIN_SMALL (LONG_MIN / 8)`
			`#define MAX_SMALL (-MIN_SMALL)`

			`int read_exps(long * xmag, const fmpr_struct * x, long xlen)`
			`{`
			`long i, mag;`
			`fmpz exp;`

			`for (i = 0; i < xlen; i++)`
			`{`
			`/* steps of two to read midpoints / radii */`
			`if (fmpr_is_zero(x + 2 * i))`
			`{`
			`xmag[i] = ZERO;`
			`}`
			`else`
			`{`
			`exp = fmpr_expref(x + 2 i);`

			`if (COEFF_IS_MPZ(exp))`
			`return 0;`

			`mag = exp + fmpz_bits(fmpr_manref(x + 2 * i));`

			`if (mag < MIN_SMALL \|\| mag > MAX_SMALL)`
			`return 0;`

			`xmag[i] = mag;`
			`}`
			`}`

			`return 1; /* all are small */`
			`}`

			`void`
introduce fmprb_ptr, fmprb_srcptr, fmpcb_ptr, fmpcb_srcptr typedefs 2013-07-17 20:18:15 +02:00			`add_mulbound(fmprb_ptr z,`
numerically stable blockwise polynomial multiplication (should be working; more testing necessary) 2013-04-02 19:13:27 +02:00			`const fmpr_struct * x, long xlen,`
			`const fmpr_struct * y, long ylen, long n)`
			`{`
			`long i, j, k, a, b, count;`
			`long xmag, ymag;`
			`long mag, zmag;`
			`int xsmall, ysmall;`
			`fmpr_t err;`

			`xmag = flint_calloc(xlen + ylen, sizeof(long));`
			`ymag = xmag + xlen;`

			`fmpr_init(err);`

			`xsmall = read_exps(xmag, x, xlen);`
			`ysmall = read_exps(ymag, y, ylen);`

			`for (i = 0; i < n; i++)`
			`{`
			`count = 0;`
			`a = FLINT_MAX(0, i - ylen + 1);`
			`b = FLINT_MIN(i + 1, xlen);`
			`zmag = ZERO;`
			`count = b - a;`

			`if (xsmall && ysmall)`
			`{`
			`for (j = a; j < b; j++)`
			`{`
			`k = i - j;`
			`mag = xmag[j] + ymag[k];`
			`zmag = FLINT_MAX(zmag, mag);`
			`}`

			`if (zmag < MIN_SMALL + MIN_SMALL)`
			`count = 0;`

			`if (count != 0)`
			`{`
			`fmpr_set_ui(err, count);`
			`fmpr_mul_2exp_si(err, err, zmag);`
			`fmpr_add(fmprb_radref(z + i), fmprb_radref(z + i), err, FMPRB_RAD_PREC, FMPR_RND_UP);`
			`}`
			`}`
			`else`
			`{`
			`for (j = a; j < b; j++)`
			`{`
			`k = i - j;`
			`fmpr_mul(err, x + 2j, y + 2k, FMPRB_RAD_PREC, FMPR_RND_UP);`
			`fmpr_abs(err, err);`
			`fmpr_add(fmprb_radref(z + i), fmprb_radref(z + i), err, FMPRB_RAD_PREC, FMPR_RND_UP);`
			`}`
			`}`
			`}`

			`fmpr_clear(err);`
			`flint_free(xmag);`
			`}`


			`static int`
introduce fmprb_ptr, fmprb_srcptr, fmpcb_ptr, fmpcb_srcptr typedefs 2013-07-17 20:18:15 +02:00			`is_exact(fmprb_srcptr x, long len)`
numerically stable blockwise polynomial multiplication (should be working; more testing necessary) 2013-04-02 19:13:27 +02:00			`{`
			`long i;`

			`for (i = 0; i < len; i++)`
			`if (!fmprb_is_exact(x + i))`
			`return 0;`

			`return 1;`
			`}`

speed up squaring in polynomial multiplication 2013-07-25 16:38:42 +02:00			`/* TODO: distribute to get rid of one polynomial multiplication */`
numerically stable blockwise polynomial multiplication (should be working; more testing necessary) 2013-04-02 19:13:27 +02:00			`static void`
speed up squaring in polynomial multiplication 2013-07-25 16:38:42 +02:00			`add_errors(fmprb_ptr z, fmprb_srcptr x, long xlen, fmprb_srcptr y, long ylen, long n, int squaring)`
numerically stable blockwise polynomial multiplication (should be working; more testing necessary) 2013-04-02 19:13:27 +02:00			`{`
			`int xexact, yexact;`
speed up squaring in polynomial multiplication 2013-07-25 16:38:42 +02:00			`const fmpr_struct * xmid = ((fmpr_struct *) x);`
			`const fmpr_struct * xrad = ((fmpr_struct *) x) + 1;`
			`const fmpr_struct * ymid = ((fmpr_struct *) y);`
			`const fmpr_struct * yrad = ((fmpr_struct *) y) + 1;`
numerically stable blockwise polynomial multiplication (should be working; more testing necessary) 2013-04-02 19:13:27 +02:00
speed up squaring in polynomial multiplication 2013-07-25 16:38:42 +02:00			`if (squaring)`
			`{`
			`long i;`
			`xexact = is_exact(x, xlen);`

			`if (!xexact)`
			`{`
			`add_mulbound(z, xmid, xlen, xrad, xlen, n);`

			`/* XXX: for tightness, assumes that z is zero to begin with */`
			`for (i = 0; i < n; i++)`
			`fmpr_mul_2exp_si(fmprb_radref(z + i), fmprb_radref(z + i), 1);`
numerically stable blockwise polynomial multiplication (should be working; more testing necessary) 2013-04-02 19:13:27 +02:00
speed up squaring in polynomial multiplication 2013-07-25 16:38:42 +02:00			`add_mulbound(z, xrad, xlen, xrad, xlen, n);`
			`}`

			`}`
			`else`
			`{`
			`xexact = is_exact(x, xlen);`
			`yexact = is_exact(y, ylen);`

			`if (!yexact) add_mulbound(z, xmid, xlen, yrad, ylen, n);`
			`if (!xexact) add_mulbound(z, xrad, xlen, ymid, ylen, n);`
			`if (!xexact && !yexact) add_mulbound(z, xrad, xlen, yrad, ylen, n);`
			`}`
numerically stable blockwise polynomial multiplication (should be working; more testing necessary) 2013-04-02 19:13:27 +02:00			`}`

			`/* largest block height = ALPHAprec + BETA /`
			`#define ALPHA 3.0`
			`#define BETA 512`
			`#define CLASSICAL_CUTOFF 6`

			`static void`
introduce fmprb_ptr, fmprb_srcptr, fmpcb_ptr, fmpcb_srcptr typedefs 2013-07-17 20:18:15 +02:00			`get_blocks(long * xblocks, fmprb_srcptr x, long xlen, long prec)`
numerically stable blockwise polynomial multiplication (should be working; more testing necessary) 2013-04-02 19:13:27 +02:00			`{`
			`fmpz_t top, bot, t, b, v, block_top, block_bot;`
			`long i, block;`
			`int in_zero;`

			`fmpz_init(top);`
			`fmpz_init(bot);`
			`fmpz_init(t);`
			`fmpz_init(b);`
			`fmpz_init(v);`
			`fmpz_init(block_top);`
			`fmpz_init(block_bot);`

			`xblocks[0] = 0;`
			`block = 0;`
			`in_zero = 1;`

			`for (i = 0; i < xlen; i++)`
			`{`
			`if (fmpr_is_special(fmprb_midref(x + i)))`
			`continue;`

			`fmpz_set(bot, fmpr_expref(fmprb_midref(x + i)));`
			`fmpz_add_ui(top, bot, fmpz_bits(fmpr_manref(fmprb_midref(x + i))));`

			`/* extend current block */`
			`if (in_zero)`
			`{`
			`fmpz_swap(block_top, top);`
			`fmpz_swap(block_bot, bot);`
			`}`
			`else`
			`{`
			`fmpz_max(t, top, block_top);`
			`fmpz_min(b, bot, block_bot);`

			`fmpz_sub(v, t, b);`

			`/* extend current block */`
			`if (prec == FMPR_PREC_EXACT \|\| fmpz_cmp_ui(v, ALPHA * prec + BETA) < 0)`
			`{`
			`fmpz_swap(block_top, t);`
			`fmpz_swap(block_bot, b);`
			`}`
			`else /* start new block */`
			`{`
			`block++;`
			`xblocks[block] = i;`

			`fmpz_swap(block_top, top);`
			`fmpz_swap(block_bot, bot);`
			`}`
			`}`

			`in_zero = 0;`
			`}`

			`xblocks[block + 1] = xlen;`

			`fmpz_clear(top);`
			`fmpz_clear(bot);`
			`fmpz_clear(t);`
			`fmpz_clear(b);`
			`fmpz_clear(v);`
			`fmpz_clear(block_top);`
			`fmpz_clear(block_bot);`
			`}`

			`static int`
introduce fmprb_ptr, fmprb_srcptr, fmpcb_ptr, fmpcb_srcptr typedefs 2013-07-17 20:18:15 +02:00			`has_infnan(fmprb_srcptr x, long len)`
numerically stable blockwise polynomial multiplication (should be working; more testing necessary) 2013-04-02 19:13:27 +02:00			`{`
			`long i;`

			`for (i = 0; i < len; i++)`
			`{`
			`if (fmpr_is_nan(fmprb_midref(x + i)) \|\| fmpr_is_inf(fmprb_midref(x + i))`
			`\|\| fmpr_is_nan(fmprb_radref(x + i)) \|\| fmpr_is_inf(fmprb_radref(x + i)))`
			`{`
			`return 1;`
			`}`
			`}`

			`return 0;`
			`}`

			`void`
introduce fmprb_ptr, fmprb_srcptr, fmpcb_ptr, fmpcb_srcptr typedefs 2013-07-17 20:18:15 +02:00			`_fmprb_poly_mullow_block(fmprb_ptr z,`
			`fmprb_srcptr x, long xlen,`
			`fmprb_srcptr y, long ylen,`
numerically stable blockwise polynomial multiplication (should be working; more testing necessary) 2013-04-02 19:13:27 +02:00			`long n, long prec)`
			`{`
			`long xblocks, yblocks;`
			`long i, j, xp, yp, xl, yl, bn, r, s;`
			`fmpr_t t, error;`
			`fmpz_t xexp, yexp, zexp;`
			`fmpz xz, yz, *zz;`
speed up squaring in polynomial multiplication 2013-07-25 16:38:42 +02:00			`int squaring;`
numerically stable blockwise polynomial multiplication (should be working; more testing necessary) 2013-04-02 19:13:27 +02:00
speed up squaring in polynomial multiplication 2013-07-25 16:38:42 +02:00			`xlen = FLINT_MIN(xlen, n);`
			`ylen = FLINT_MIN(ylen, n);`

			`squaring = (x == y) && (xlen == ylen);`

			`if (has_infnan(x, xlen) \|\| (!squaring && has_infnan(y, ylen)))`
numerically stable blockwise polynomial multiplication (should be working; more testing necessary) 2013-04-02 19:13:27 +02:00			`{`
			`_fmprb_poly_mullow_classical(z, x, xlen, y, ylen, n, prec);`
			`return;`
			`}`

implement block multiplication with prescaling 2013-08-24 14:12:40 +02:00			`/* Zero polynomials can appear for example in complex multiplication.`
			`TODO: we should also throw away zero leading and trailing`
			`parts of the blocks computed below. */`
			`if (_fmprb_vec_is_zero(x, xlen) \|\| (!squaring && _fmprb_vec_is_zero(y, ylen)))`
			`{`
			`_fmprb_vec_zero(z, n);`
			`return;`
			`}`

numerically stable blockwise polynomial multiplication (should be working; more testing necessary) 2013-04-02 19:13:27 +02:00			`xblocks = flint_malloc(sizeof(long) * (xlen + 1));`
			`yblocks = flint_malloc(sizeof(long) * (ylen + 1));`

			`fmpz_init(xexp);`
			`fmpz_init(yexp);`
			`fmpz_init(zexp);`

			`xz = _fmpz_vec_init(xlen);`
			`yz = _fmpz_vec_init(ylen);`
			`zz = _fmpz_vec_init(n);`

			`fmpr_init(error);`
			`fmpr_init(t);`

			`/* split input polynomials into blocks with similar-size exponents */`
			`get_blocks(xblocks, x, xlen, prec);`
speed up squaring in polynomial multiplication 2013-07-25 16:38:42 +02:00
			`if (squaring)`
			`for (i = 0; i < xlen + 1; i++)`
			`yblocks[i] = xblocks[i];`
			`else`
			`get_blocks(yblocks, y, ylen, prec);`
numerically stable blockwise polynomial multiplication (should be working; more testing necessary) 2013-04-02 19:13:27 +02:00
			`/* start with the zero polynomial plus the error */`
			`_fmprb_vec_zero(z, n);`
speed up squaring in polynomial multiplication 2013-07-25 16:38:42 +02:00
			`add_errors(z, x, xlen, y, ylen, n, squaring);`

			`if (squaring)`
			`{`
			`/* sum of squares of blocks */`
			`for (i = 0; (xp = xblocks[i]) != xlen; i++)`
			`{`
			`if (2 * xp < n)`
			`{`
			`xl = xblocks[i + 1] - xp;`
			`bn = FLINT_MIN(2 * xl - 1, n - 2 * xp);`
			`xl = FLINT_MIN(xl, bn);`

			`_fmprb_poly_get_fmpz_poly_2exp(error, xexp, xz, x + xp, xl, FMPR_PREC_EXACT);`
			`_fmpz_poly_sqrlow(zz, xz, xl, bn);`
			`fmpz_add(zexp, xexp, xexp);`
			`_fmprb_vec_add_fmpz_vec_2exp(z + 2 * xp, z + 2 * xp, zz, zexp, bn, prec);`
			`}`
			`}`
			`}`
numerically stable blockwise polynomial multiplication (should be working; more testing necessary) 2013-04-02 19:13:27 +02:00
			`for (i = 0; (xp = xblocks[i]) != xlen; i++)`
			`{`
speed up squaring in polynomial multiplication 2013-07-25 16:38:42 +02:00			`for (j = squaring ? i + 1 : 0; (yp = yblocks[j]) != ylen; j++)`
numerically stable blockwise polynomial multiplication (should be working; more testing necessary) 2013-04-02 19:13:27 +02:00			`{`
			`if (xp + yp < n)`
			`{`
			`xl = xblocks[i + 1] - xp;`
			`yl = yblocks[j + 1] - yp;`
			`bn = FLINT_MIN(xl + yl - 1, n - xp - yp);`
			`xl = FLINT_MIN(xl, bn);`
			`yl = FLINT_MIN(yl, bn);`

			`/* classical multiplication; we just need the midpoints */`
			`if (bn < CLASSICAL_CUTOFF \|\| xl < CLASSICAL_CUTOFF \|\| yl < CLASSICAL_CUTOFF)`
			`{`
			`for (r = 0; r < xl; r++)`
			`{`
			`for (s = 0; s < yl && xp + yp + r + s < n; s++)`
			`{`
			`long zi, ret;`
			`zi = xp + yp + r + s;`

			`fmpr_mul(t, fmprb_midref(x + xp + r), fmprb_midref(y + yp + s), FMPR_PREC_EXACT, FMPR_RND_DOWN);`
speed up squaring in polynomial multiplication 2013-07-25 16:38:42 +02:00			`if (squaring)`
			`fmpr_mul_2exp_si(t, t, 1);`
numerically stable blockwise polynomial multiplication (should be working; more testing necessary) 2013-04-02 19:13:27 +02:00
speed up squaring in polynomial multiplication 2013-07-25 16:38:42 +02:00			`ret = fmpr_add(fmprb_midref(z + zi), fmprb_midref(z + zi), t, prec, FMPR_RND_DOWN);`
numerically stable blockwise polynomial multiplication (should be working; more testing necessary) 2013-04-02 19:13:27 +02:00			`fmpr_add_error_result(fmprb_radref(z + zi), fmprb_radref(z + zi),`
			`fmprb_midref(z + zi), ret, prec, FMPR_RND_UP);`
			`}`
			`}`
			`}`
			`else`
			`{`
			`/* exact multiplication over Z[x]`
			`todo: might want to verify that error==0 */`

			`_fmprb_poly_get_fmpz_poly_2exp(error, xexp, xz, x + xp, xl, FMPR_PREC_EXACT);`
			`_fmprb_poly_get_fmpz_poly_2exp(error, yexp, yz, y + yp, yl, FMPR_PREC_EXACT);`
			`_fmpz_poly_mullow(zz, xz, xl, yz, yl, bn);`
			`fmpz_add(zexp, xexp, yexp);`
speed up squaring in polynomial multiplication 2013-07-25 16:38:42 +02:00			`if (squaring)`
			`fmpz_add_ui(zexp, zexp, 1);`
numerically stable blockwise polynomial multiplication (should be working; more testing necessary) 2013-04-02 19:13:27 +02:00			`_fmprb_vec_add_fmpz_vec_2exp(z + xp + yp, z + xp + yp, zz, zexp, bn, prec);`
			`}`
			`}`
			`}`
			`}`

			`fmpz_clear(xexp);`
			`fmpz_clear(yexp);`
			`fmpz_clear(zexp);`

			`_fmpz_vec_clear(xz, xlen);`
			`_fmpz_vec_clear(yz, ylen);`
			`_fmpz_vec_clear(zz, n);`

			`fmpr_clear(error);`
			`fmpr_clear(t);`

			`flint_free(xblocks);`
			`flint_free(yblocks);`
			`}`

			`void`
			`fmprb_poly_mullow_block(fmprb_poly_t res, const fmprb_poly_t poly1,`
			`const fmprb_poly_t poly2, long n, long prec)`
			`{`
			`long xlen, ylen, zlen;`

			`xlen = poly1->length;`
			`ylen = poly2->length;`

			`if (xlen == 0 \|\| ylen == 0 \|\| n == 0)`
			`{`
			`fmprb_poly_zero(res);`
			`return;`
			`}`

			`xlen = FLINT_MIN(xlen, n);`
			`ylen = FLINT_MIN(ylen, n);`
			`zlen = FLINT_MIN(xlen + ylen - 1, n);`

			`if (res == poly1 \|\| res == poly2)`
			`{`
			`fmprb_poly_t tmp;`
			`fmprb_poly_init2(tmp, zlen);`
			`_fmprb_poly_mullow_block(tmp->coeffs, poly1->coeffs, xlen,`
			`poly2->coeffs, ylen, zlen, prec);`
			`fmprb_poly_swap(res, tmp);`
			`fmprb_poly_clear(tmp);`
			`}`
			`else`
			`{`
			`fmprb_poly_fit_length(res, zlen);`
			`_fmprb_poly_mullow_block(res->coeffs, poly1->coeffs, xlen,`
			`poly2->coeffs, ylen, zlen, prec);`
			`}`

			`_fmprb_poly_set_length(res, zlen);`
			`_fmprb_poly_normalise(res);`
			`}`