low-level fmprb_mul

This commit is contained in:
Fredrik Johansson 2013-01-10 19:19:18 +01:00
parent f376100ac2
commit 96733b7083
5 changed files with 608 additions and 63 deletions

View file

@ -228,6 +228,11 @@ void fmprb_ui_div(fmprb_t z, ulong x, const fmprb_t y, long prec);
void fmprb_div_2expm1_ui(fmprb_t y, const fmprb_t x, ulong n, long prec);
void fmprb_mul_fmpr_naive(fmprb_t z, const fmprb_t x, const fmpr_t y, long prec);
void fmprb_mul_main_naive(fmprb_t z, const fmprb_t x, const fmprb_t y, long prec);
void fmprb_mul_naive(fmprb_t z, const fmprb_t x, const fmprb_t y, long prec);
void fmprb_mul(fmprb_t z, const fmprb_t x, const fmprb_t y, long prec);
void fmprb_mul_ui(fmprb_t z, const fmprb_t x, ulong y, long prec);
void fmprb_mul_si(fmprb_t z, const fmprb_t x, long y, long prec);

View file

@ -25,95 +25,263 @@
#include "fmprb.h"
void
fmprb_mul(fmprb_t z, const fmprb_t x, const fmprb_t y, long prec)
static __inline__ void
_fmpz_add_si(fmpz_t z, const fmpz_t x, long y)
{
long r;
fmpz f;
if (fmprb_is_exact(x))
f = *x;
if (!COEFF_IS_MPZ(f) && !COEFF_IS_MPZ(y))
fmpz_set_si(z, f + y);
else if (y >= 0)
fmpz_add_ui(z, x, y);
else
fmpz_sub_ui(z, x, -y);
}
static __inline__ void
_fmpz_add(fmpz_t z, const fmpz_t x, const fmpz_t y)
{
fmpz f, g;
f = *x;
g = *y;
if (!COEFF_IS_MPZ(f) && !COEFF_IS_MPZ(g))
fmpz_set_si(z, f + g);
else
fmpz_add(z, x, y);
}
/* we speed up the radius operations by working with mantissas aligned to
FMPRB_RAD_PREC bits (possibly one less bit after multiplying)
TODO: write down proof that the additions don't cause overflow
*/
#define _RAD_MUL(a, ae, b, be) \
do { \
mp_limb_t hi, lo; \
umul_ppmm(hi, lo, a, b); \
a = ((hi << (FLINT_BITS - FMPRB_RAD_PREC)) | (lo >> FMPRB_RAD_PREC)) + 1; \
_fmpz_add(ae, ae, be); \
} while (0); \
static __inline__ void
_rad_bound(mp_limb_t * m, fmpz_t exp, const fmpr_t t)
{
long e;
*m = fmpz_abs_ubound_ui_2exp(&e, fmpr_manref(t), FMPRB_RAD_PREC);
_fmpz_add_si(exp, fmpr_expref(t), e);
}
static __inline__ mp_limb_t
_rad_add(mp_limb_t a, fmpz_t aexp, mp_limb_t b, fmpz_t bexp)
{
long shift;
shift = _fmpz_sub_small(aexp, bexp);
if (shift == 0)
{
if (fmprb_is_exact(y))
{
r = fmpr_mul(fmprb_midref(z), fmprb_midref(x), fmprb_midref(y), prec, FMPR_RND_DOWN);
fmpr_set_error_result(fmprb_radref(z), fmprb_midref(z), r);
}
return a + b;
}
else if (shift > 0)
{
if (shift <= FMPRB_RAD_PREC)
return a + (b >> shift) + 1;
else
{
/* x * (y+b) = x*y + x*b */
fmpr_mul(fmprb_radref(z), fmprb_midref(x), fmprb_radref(y), FMPRB_RAD_PREC, FMPR_RND_UP);
fmpr_abs(fmprb_radref(z), fmprb_radref(z));
r = fmpr_mul(fmprb_midref(z), fmprb_midref(x), fmprb_midref(y), prec, FMPR_RND_DOWN);
fmpr_add_error_result(fmprb_radref(z), fmprb_radref(z),
fmprb_midref(z), r, FMPRB_RAD_PREC, FMPR_RND_UP);
}
return a + 1;
}
else
{
if (fmprb_is_exact(x))
{
/* (x+a) * y = x*y + y*a */
fmpr_mul(fmprb_radref(z), fmprb_midref(y), fmprb_radref(x), FMPRB_RAD_PREC, FMPR_RND_UP);
fmpr_abs(fmprb_radref(z), fmprb_radref(z));
fmpz_swap(aexp, bexp);
r = fmpr_mul(fmprb_midref(z), fmprb_midref(x), fmprb_midref(y), prec, FMPR_RND_DOWN);
fmpr_add_error_result(fmprb_radref(z), fmprb_radref(z),
fmprb_midref(z), r, FMPRB_RAD_PREC, FMPR_RND_UP);
if ((-shift) <= FMPRB_RAD_PREC)
return b + (a >> (-shift)) + 1;
else
return b + 1;
}
}
void _fmprb_mul_main(fmpr_t z, fmpr_t c,
const fmpr_t x, const fmpr_t a,
const fmpr_t y, const fmpr_t b, long prec)
{
mp_limb_t xm, am, ym, bm;
fmpz_t xe, ae, ye, be;
long r, shift;
fmpz_init(xe);
fmpz_init(ae);
fmpz_init(ye);
fmpz_init(be);
_rad_bound(&xm, xe, x);
_rad_bound(&ym, ye, y);
_rad_bound(&am, ae, a);
_rad_bound(&bm, be, b);
_RAD_MUL(xm, xe, bm, be);
_RAD_MUL(ym, ye, am, ae);
xm = _rad_add(xm, xe, ym, ye);
_RAD_MUL(am, ae, bm, be);
xm = _rad_add(xm, xe, am, ae);
r = fmpr_mul(z, x, y, prec, FMPR_RND_DOWN);
if (r != FMPR_RESULT_EXACT)
{
am = 1UL << FMPRB_RAD_PREC;
_fmpz_add_si(ae, fmpr_expref(z), -r - 2*FMPRB_RAD_PREC);
xm = _rad_add(xm, xe, am, ae);
}
shift = FMPRB_RAD_PREC;
/* make the radius mantissa odd and small */
xm += !(xm & 1);
while (xm >= (1UL << FMPRB_RAD_PREC))
{
xm = (xm >> 1) + 1;
xm += !(xm & 1);
shift++;
}
fmpz_set_ui(fmpr_manref(c), xm);
_fmpz_add_si(fmpr_expref(c), xe, shift);
fmpz_clear(xe);
fmpz_clear(ae);
fmpz_clear(ye);
fmpz_clear(be);
}
void _fmprb_mul_fmpr_main(fmpr_t z, fmpr_t c,
const fmpr_t x, const fmpr_t a,
const fmpr_t y, long prec)
{
mp_limb_t ym, am;
fmpz_t ye, ae;
long r, shift;
fmpz_init(ye);
fmpz_init(ae);
_rad_bound(&ym, ye, y);
_rad_bound(&am, ae, a);
_RAD_MUL(ym, ye, am, ae);
r = fmpr_mul(z, x, y, prec, FMPR_RND_DOWN);
if (r != FMPR_RESULT_EXACT)
{
am = 1UL << FMPRB_RAD_PREC;
_fmpz_add_si(ae, fmpr_expref(z), -r - 2*FMPRB_RAD_PREC);
ym = _rad_add(ym, ye, am, ae);
}
shift = FMPRB_RAD_PREC;
/* make the radius mantissa odd and small */
ym += !(ym & 1);
while (ym >= (1UL << FMPRB_RAD_PREC))
{
ym = (ym >> 1) + 1;
ym += !(ym & 1);
shift++;
}
fmpz_set_ui(fmpr_manref(c), ym);
_fmpz_add_si(fmpr_expref(c), ye, shift);
fmpz_clear(ye);
fmpz_clear(ae);
}
void
fmprb_mul_fmpr(fmprb_t z, const fmprb_t x, const fmpr_t y, long prec)
{
if (fmprb_is_exact(x))
{
long r;
r = fmpr_mul(fmprb_midref(z), fmprb_midref(x), y, prec, FMPR_RND_DOWN);
fmpr_set_error_result(fmprb_radref(z), fmprb_midref(z), r);
}
else
{
if (fmpr_is_special(fmprb_midref(x)) ||
fmpr_is_special(fmprb_radref(x)) || fmpr_is_special(y))
{
fmprb_mul_fmpr_naive(z, x, y, prec);
}
else
{
/* (x+a)*(y+b) = x*y + x*b + y*a + a*b*/
fmpr_t t, u;
fmpr_init(t);
fmpr_init(u);
fmpr_mul(t, fmprb_midref(x), fmprb_radref(y), FMPRB_RAD_PREC, FMPR_RND_UP);
fmpr_abs(t, t);
fmpr_mul(u, fmprb_midref(y), fmprb_radref(x), FMPRB_RAD_PREC, FMPR_RND_UP);
fmpr_abs(u, u);
fmpr_add(t, t, u, FMPRB_RAD_PREC, FMPR_RND_UP);
fmpr_addmul(t, fmprb_radref(x), fmprb_radref(y), FMPRB_RAD_PREC, FMPR_RND_UP);
r = fmpr_mul(fmprb_midref(z), fmprb_midref(x), fmprb_midref(y), prec, FMPR_RND_DOWN);
fmpr_add_error_result(fmprb_radref(z), t,
fmprb_midref(z), r, FMPRB_RAD_PREC, FMPR_RND_UP);
fmpr_clear(t);
fmpr_clear(u);
_fmprb_mul_fmpr_main(fmprb_midref(z), fmprb_radref(z),
fmprb_midref(x), fmprb_radref(x), y, prec);
}
}
}
fmprb_adjust(z);
void
fmprb_mul(fmprb_t z, const fmprb_t x, const fmprb_t y, long prec)
{
if (fmprb_is_exact(x))
{
fmprb_mul_fmpr(z, y, fmprb_midref(x), prec);
}
else if (fmprb_is_exact(y))
{
fmprb_mul_fmpr(z, x, fmprb_midref(y), prec);
}
else
{
if (fmpr_is_special(fmprb_midref(x)) ||
fmpr_is_special(fmprb_radref(x)) ||
fmpr_is_special(fmprb_midref(y)) ||
fmpr_is_special(fmprb_radref(y)))
{
fmprb_mul_main_naive(z, x, y, prec);
}
else
{
_fmprb_mul_main(fmprb_midref(z), fmprb_radref(z),
fmprb_midref(x), fmprb_radref(x),
fmprb_midref(y), fmprb_radref(y), prec);
}
}
}
void
fmprb_mul_ui(fmprb_t z, const fmprb_t x, ulong y, long prec)
{
fmprb_t t;
fmprb_init(t);
fmprb_set_ui(t, y);
fmprb_mul(z, x, t, prec);
fmprb_clear(t);
fmpr_t t;
fmpr_init(t);
fmpr_set_ui(t, y);
fmprb_mul_fmpr(z, x, t, prec);
fmpr_clear(t);
}
void
fmprb_mul_si(fmprb_t z, const fmprb_t x, long y, long prec)
{
fmprb_t t;
fmprb_init(t);
fmprb_set_si(t, y);
fmprb_mul(z, x, t, prec);
fmprb_clear(t);
fmpr_t t;
fmpr_init(t);
fmpr_set_si(t, y);
fmprb_mul_fmpr(z, x, t, prec);
fmpr_clear(t);
}
void
fmprb_mul_fmpz(fmprb_t z, const fmprb_t x, const fmpz_t y, long prec)
{
fmprb_t t;
fmprb_init(t);
fmprb_set_fmpz(t, y);
fmprb_mul(z, x, t, prec);
fmprb_clear(t);
fmpr_t t;
fmpr_init(t);
fmpr_set_fmpz(t, y);
fmprb_mul_fmpr(z, x, t, prec);
fmpr_clear(t);
}

83
fmprb/mul_naive.c Normal file
View file

@ -0,0 +1,83 @@
/*=============================================================================
This file is part of ARB.
ARB is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
ARB is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with ARB; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
=============================================================================*/
/******************************************************************************
Copyright (C) 2012 Fredrik Johansson
******************************************************************************/
#include "fmprb.h"
void
fmprb_mul_fmpr_naive(fmprb_t z, const fmprb_t x, const fmpr_t y, long prec)
{
long r;
/* (x+a) * y = x*y + y*a */
fmpr_mul(fmprb_radref(z), fmprb_radref(x), y, FMPRB_RAD_PREC, FMPR_RND_UP);
fmpr_abs(fmprb_radref(z), fmprb_radref(z));
r = fmpr_mul(fmprb_midref(z), fmprb_midref(x), y, prec, FMPR_RND_DOWN);
fmpr_add_error_result(fmprb_radref(z), fmprb_radref(z),
fmprb_midref(z), r, FMPRB_RAD_PREC, FMPR_RND_UP);
}
void
fmprb_mul_main_naive(fmprb_t z, const fmprb_t x, const fmprb_t y, long prec)
{
fmpr_t t, u;
long r;
fmpr_init(t);
fmpr_init(u);
/* (x+a)*(y+b) = x*y + x*b + y*a + a*b*/
fmpr_mul(t, fmprb_midref(x), fmprb_radref(y), FMPRB_RAD_PREC, FMPR_RND_UP);
fmpr_abs(t, t);
fmpr_mul(u, fmprb_midref(y), fmprb_radref(x), FMPRB_RAD_PREC, FMPR_RND_UP);
fmpr_abs(u, u);
fmpr_add(t, t, u, FMPRB_RAD_PREC, FMPR_RND_UP);
fmpr_addmul(t, fmprb_radref(x), fmprb_radref(y), FMPRB_RAD_PREC, FMPR_RND_UP);
r = fmpr_mul(fmprb_midref(z), fmprb_midref(x), fmprb_midref(y), prec, FMPR_RND_DOWN);
fmpr_add_error_result(fmprb_radref(z), t,
fmprb_midref(z), r, FMPRB_RAD_PREC, FMPR_RND_UP);
fmpr_clear(t);
fmpr_clear(u);
}
void
fmprb_mul_naive(fmprb_t z, const fmprb_t x, const fmprb_t y, long prec)
{
if (fmprb_is_exact(x))
fmprb_mul_fmpr_naive(z, y, fmprb_midref(x), prec);
else if (fmprb_is_exact(y))
fmprb_mul_fmpr_naive(z, x, fmprb_midref(y), prec);
else
fmprb_mul_main_naive(z, x, y, prec);
fmprb_adjust(z);
}

View file

@ -25,9 +25,28 @@
#include "fmprb.h"
int
fmpr_close(const fmpr_t a, const fmpr_t b)
{
fmpr_t t;
int res1, res2;
fmpr_mul_ui(t, b, 257, FMPRB_RAD_PREC, FMPR_RND_UP);
fmpr_mul_2exp_si(t, t, -8);
res1 = fmpr_cmp(a, t) <= 0;
fmpr_mul_ui(t, a, 257, FMPRB_RAD_PREC, FMPR_RND_UP);
fmpr_mul_2exp_si(t, t, -8);
res2 = fmpr_cmp(b, t) <= 0;
fmpr_clear(t);
return res1 && res2;
}
int main()
{
long iter;
long iter, iter2;
flint_rand_t state;
printf("mul....");
@ -161,6 +180,108 @@ int main()
fmpq_clear(z);
}
/* main test */
for (iter = 0; iter < 10000; iter++)
{
fmprb_t x, y, z, v;
long prec;
fmprb_init(x);
fmprb_init(y);
fmprb_init(z);
fmprb_init(v);
for (iter2 = 0; iter2 < 100; iter2++)
{
fmpr_randtest_special(fmprb_midref(x), state, 2000, 200);
fmpr_randtest_special(fmprb_radref(x), state, 200, 200);
fmpr_abs(fmprb_radref(x), fmprb_radref(x));
fmpr_randtest_special(fmprb_midref(y), state, 2000, 200);
fmpr_randtest_special(fmprb_radref(y), state, 200, 200);
fmpr_abs(fmprb_radref(y), fmprb_radref(y));
prec = 2 + n_randint(state, 2000);
switch (n_randint(state, 5))
{
case 0:
fmprb_mul(z, x, y, prec);
fmprb_mul_naive(v, x, y, prec);
if (!fmpr_equal(fmprb_midref(z), fmprb_midref(v))
|| !fmpr_close(fmprb_radref(z), fmprb_radref(v)))
{
printf("FAIL!\n");
printf("x = "); fmprb_print(x); printf("\n\n");
printf("y = "); fmprb_print(y); printf("\n\n");
printf("z = "); fmprb_print(z); printf("\n\n");
printf("v = "); fmprb_print(v); printf("\n\n");
abort();
}
break;
case 1:
fmprb_set(y, x);
fmprb_mul(z, x, y, prec);
fmprb_mul(v, x, x, prec);
if (!fmprb_equal(z, v))
{
printf("FAIL (aliasing 1)!\n");
printf("x = "); fmprb_print(x); printf("\n\n");
printf("z = "); fmprb_print(z); printf("\n\n");
printf("v = "); fmprb_print(v); printf("\n\n");
abort();
}
break;
case 2:
fmprb_mul(v, x, x, prec);
fmprb_mul(x, x, x, prec);
if (!fmprb_equal(v, x))
{
printf("FAIL (aliasing 2)!\n");
printf("x = "); fmprb_print(x); printf("\n\n");
printf("z = "); fmprb_print(z); printf("\n\n");
printf("v = "); fmprb_print(v); printf("\n\n");
abort();
}
break;
case 3:
fmprb_mul(v, x, y, prec);
fmprb_mul(x, x, y, prec);
if (!fmprb_equal(x, v))
{
printf("FAIL (aliasing 3)!\n");
printf("x = "); fmprb_print(x); printf("\n\n");
printf("y = "); fmprb_print(y); printf("\n\n");
printf("v = "); fmprb_print(v); printf("\n\n");
abort();
}
break;
default:
fmprb_mul(v, x, y, prec);
fmprb_mul(x, y, x, prec);
if (!fmprb_equal(x, v))
{
printf("FAIL (aliasing 4)!\n");
printf("x = "); fmprb_print(x); printf("\n\n");
printf("y = "); fmprb_print(y); printf("\n\n");
printf("v = "); fmprb_print(v); printf("\n\n");
abort();
}
break;
}
}
fmprb_clear(x);
fmprb_clear(y);
fmprb_clear(z);
fmprb_clear(v);
}
flint_randclear(state);
_fmpz_cleanup();
printf("PASS\n");

168
fmprb/test/t-mul_naive.c Normal file
View file

@ -0,0 +1,168 @@
/*=============================================================================
This file is part of ARB.
ARB is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
ARB is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with ARB; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
=============================================================================*/
/******************************************************************************
Copyright (C) 2012 Fredrik Johansson
******************************************************************************/
#include "fmprb.h"
int main()
{
long iter;
flint_rand_t state;
printf("mul_naive....");
fflush(stdout);
flint_randinit(state);
for (iter = 0; iter < 100000; iter++)
{
fmprb_t a, b, c;
fmpq_t x, y, z;
fmprb_init(a);
fmprb_init(b);
fmprb_init(c);
fmpq_init(x);
fmpq_init(y);
fmpq_init(z);
fmprb_randtest(a, state, 1 + n_randint(state, 200), 10);
fmprb_randtest(b, state, 1 + n_randint(state, 200), 10);
fmprb_randtest(c, state, 1 + n_randint(state, 200), 10);
fmprb_get_rand_fmpq(x, state, a, 1 + n_randint(state, 200));
fmprb_get_rand_fmpq(y, state, b, 1 + n_randint(state, 200));
fmprb_mul_naive(c, a, b, 2 + n_randint(state, 200));
fmpq_mul(z, x, y);
if (!fmprb_contains_fmpq(c, z))
{
printf("FAIL: containment\n\n");
printf("a = "); fmprb_print(a); printf("\n\n");
printf("x = "); fmpq_print(x); printf("\n\n");
printf("b = "); fmprb_print(b); printf("\n\n");
printf("y = "); fmpq_print(y); printf("\n\n");
printf("c = "); fmprb_print(c); printf("\n\n");
printf("z = "); fmpq_print(z); printf("\n\n");
abort();
}
fmprb_clear(a);
fmprb_clear(b);
fmprb_clear(c);
fmpq_clear(x);
fmpq_clear(y);
fmpq_clear(z);
}
/* aliasing of c and a */
for (iter = 0; iter < 10000; iter++)
{
fmprb_t a, b;
fmpq_t x, y, z;
fmprb_init(a);
fmprb_init(b);
fmpq_init(x);
fmpq_init(y);
fmpq_init(z);
fmprb_randtest(a, state, 1 + n_randint(state, 200), 10);
fmprb_randtest(b, state, 1 + n_randint(state, 200), 10);
fmprb_get_rand_fmpq(x, state, a, 1 + n_randint(state, 200));
fmprb_get_rand_fmpq(y, state, b, 1 + n_randint(state, 200));
fmprb_mul_naive(a, a, b, 2 + n_randint(state, 200));
fmpq_mul(z, x, y);
if (!fmprb_contains_fmpq(a, z))
{
printf("FAIL: aliasing (c, a)\n\n");
printf("a = "); fmprb_print(a); printf("\n\n");
printf("x = "); fmpq_print(x); printf("\n\n");
printf("b = "); fmprb_print(b); printf("\n\n");
printf("y = "); fmpq_print(y); printf("\n\n");
printf("z = "); fmpq_print(z); printf("\n\n");
abort();
}
fmprb_clear(a);
fmprb_clear(b);
fmpq_clear(x);
fmpq_clear(y);
fmpq_clear(z);
}
/* aliasing of c and b */
for (iter = 0; iter < 10000; iter++)
{
fmprb_t a, b;
fmpq_t x, y, z;
fmprb_init(a);
fmprb_init(b);
fmpq_init(x);
fmpq_init(y);
fmpq_init(z);
fmprb_randtest(a, state, 1 + n_randint(state, 200), 10);
fmprb_randtest(b, state, 1 + n_randint(state, 200), 10);
fmprb_get_rand_fmpq(x, state, a, 1 + n_randint(state, 200));
fmprb_get_rand_fmpq(y, state, b, 1 + n_randint(state, 200));
fmprb_mul_naive(b, a, b, 2 + n_randint(state, 200));
fmpq_mul(z, x, y);
if (!fmprb_contains_fmpq(b, z))
{
printf("FAIL: aliasing (c, b)\n\n");
printf("a = "); fmprb_print(a); printf("\n\n");
printf("x = "); fmpq_print(x); printf("\n\n");
printf("b = "); fmprb_print(b); printf("\n\n");
printf("y = "); fmpq_print(y); printf("\n\n");
printf("z = "); fmpq_print(z); printf("\n\n");
abort();
}
fmprb_clear(a);
fmprb_clear(b);
fmpq_clear(x);
fmpq_clear(y);
fmpq_clear(z);
}
flint_randclear(state);
_fmpz_cleanup();
printf("PASS\n");
return EXIT_SUCCESS;
}