add arf_fma, arb_fma, arb_fma_ui

This commit is contained in:
fredrik 2021-07-25 12:41:17 +02:00
parent e7bcb28c0f
commit 7748f0230c
8 changed files with 487 additions and 0 deletions

4
arb.h
View file

@ -425,6 +425,10 @@ void arb_submul_si(arb_t z, const arb_t x, slong y, slong prec);
void arb_submul_ui(arb_t z, const arb_t x, ulong y, slong prec);
void arb_submul_fmpz(arb_t z, const arb_t x, const fmpz_t y, slong prec);
void arb_fma(arb_t res, const arb_t x, const arb_t y, const arb_t z, slong prec);
void arb_fma_arf(arb_t res, const arb_t x, const arf_t y, const arb_t z, slong prec);
void arb_fma_ui(arb_t res, const arb_t x, ulong y, const arb_t z, slong prec);
void arb_dot_simple(arb_t res, const arb_t initial, int subtract,
arb_srcptr x, slong xstep, arb_srcptr y, slong ystep, slong len, slong prec);
void arb_dot_precise(arb_t res, const arb_t initial, int subtract,

125
arb/fma.c Normal file
View file

@ -0,0 +1,125 @@
/*
Copyright (C) 2021 Fredrik Johansson
This file is part of Arb.
Arb is free software: you can redistribute it and/or modify it under
the terms of the GNU Lesser General Public License (LGPL) as published
by the Free Software Foundation; either version 2.1 of the License, or
(at your option) any later version. See <http://www.gnu.org/licenses/>.
*/
#include "arb.h"
void
arb_fma_arf(arb_t res, const arb_t x, const arf_t y, const arb_t z, slong prec)
{
mag_t ym;
int inexact;
if (arb_is_exact(x))
{
inexact = arf_fma(arb_midref(res), arb_midref(x), y, arb_midref(z), prec, ARB_RND);
if (inexact)
arf_mag_add_ulp(arb_radref(res), arb_radref(z), arb_midref(res), prec);
else
mag_set(arb_radref(res), arb_radref(z));
}
else if (ARB_IS_LAGOM(res) && ARB_IS_LAGOM(x) && ARF_IS_LAGOM(y) && ARB_IS_LAGOM(z))
{
mag_t tm;
mag_fast_init_set_arf(ym, y);
*tm = *arb_radref(z);
mag_fast_addmul(tm, ym, arb_radref(x));
*arb_radref(res) = *tm;
inexact = arf_fma(arb_midref(res), arb_midref(x), y, arb_midref(z), prec, ARB_RND);
if (inexact)
arf_mag_fast_add_ulp(arb_radref(res), arb_radref(res), arb_midref(res), prec);
}
else
{
mag_t tm;
mag_init(tm);
mag_init_set_arf(ym, y);
mag_set(tm, arb_radref(z));
mag_addmul(tm, ym, arb_radref(x));
mag_set(arb_radref(res), tm);
inexact = arf_fma(arb_midref(res), arb_midref(x), y, arb_midref(z), prec, ARB_RND);
if (inexact)
arf_mag_add_ulp(arb_radref(res), arb_radref(res), arb_midref(res), prec);
mag_clear(tm);
mag_clear(ym);
}
}
void
arb_fma(arb_t res, const arb_t x, const arb_t y, const arb_t z, slong prec)
{
mag_t zr, xm, ym;
int inexact;
if (arb_is_exact(y))
{
arb_fma_arf(res, x, arb_midref(y), z, prec);
}
else if (arb_is_exact(x))
{
arb_fma_arf(res, y, arb_midref(x), z, prec);
}
else if (ARB_IS_LAGOM(res) && ARB_IS_LAGOM(x) && ARB_IS_LAGOM(y) && ARB_IS_LAGOM(z))
{
mag_fast_init_set_arf(xm, arb_midref(x));
mag_fast_init_set_arf(ym, arb_midref(y));
mag_fast_init_set(zr, arb_radref(z));
mag_fast_addmul(zr, xm, arb_radref(y));
mag_fast_addmul(zr, ym, arb_radref(x));
mag_fast_addmul(zr, arb_radref(x), arb_radref(y));
inexact = arf_fma(arb_midref(res), arb_midref(x), arb_midref(y), arb_midref(z),
prec, ARF_RND_DOWN);
if (inexact)
arf_mag_fast_add_ulp(zr, zr, arb_midref(res), prec);
*arb_radref(res) = *zr;
}
else
{
mag_init_set_arf(xm, arb_midref(x));
mag_init_set_arf(ym, arb_midref(y));
mag_init_set(zr, arb_radref(z));
mag_addmul(zr, xm, arb_radref(y));
mag_addmul(zr, ym, arb_radref(x));
mag_addmul(zr, arb_radref(x), arb_radref(y));
inexact = arf_fma(arb_midref(res), arb_midref(x), arb_midref(y), arb_midref(z),
prec, ARF_RND_DOWN);
if (inexact)
arf_mag_add_ulp(arb_radref(res), zr, arb_midref(res), prec);
else
mag_set(arb_radref(res), zr);
mag_clear(zr);
mag_clear(xm);
mag_clear(ym);
}
}
void
arb_fma_ui(arb_t res, const arb_t x, ulong y, const arb_t z, slong prec)
{
arf_t t;
arf_init_set_ui(t, y); /* no need to free */
arb_fma_arf(res, x, t, z, prec);
}

125
arb/test/t-fma.c Normal file
View file

@ -0,0 +1,125 @@
/*
Copyright (C) 2012 Fredrik Johansson
This file is part of Arb.
Arb is free software: you can redistribute it and/or modify it under
the terms of the GNU Lesser General Public License (LGPL) as published
by the Free Software Foundation; either version 2.1 of the License, or
(at your option) any later version. See <http://www.gnu.org/licenses/>.
*/
#include "arb.h"
void
arb_fma_naive(arb_t res, const arb_t x, const arb_t y, const arb_t z, slong prec)
{
arb_t t;
arb_init(t);
arb_set(t, z);
arb_addmul(t, x, y, prec);
arb_set(res, t);
arb_clear(t);
}
int main()
{
slong iter;
flint_rand_t state;
flint_printf("fma....");
fflush(stdout);
flint_randinit(state);
for (iter = 0; iter < 10000 * arb_test_multiplier(); iter++)
{
arb_t x, y, z, res1, res2;
slong prec;
int aliasing;
arb_init(x);
arb_init(y);
arb_init(z);
arb_init(res1);
arb_init(res2);
prec = 2 + n_randint(state, 200);
arb_randtest_special(x, state, 200, 100);
arb_randtest_special(y, state, 200, 100);
arb_randtest_special(z, state, 200, 100);
arb_randtest_special(res1, state, 200, 100);
arb_randtest_special(res2, state, 200, 100);
if (n_randint(state, 10) == 0 &&
fmpz_bits(ARF_EXPREF(arb_midref(x))) < 10 &&
fmpz_bits(ARF_EXPREF(arb_midref(y))) < 10 &&
fmpz_bits(ARF_EXPREF(arb_midref(z))) < 10)
{
prec = ARF_PREC_EXACT;
}
aliasing = n_randint(state, 7);
switch (aliasing)
{
case 0:
arb_fma(res1, x, y, z, prec);
arb_fma_naive(res2, x, y, z, prec);
break;
case 1:
arb_set(res1, z);
arb_fma(res1, x, y, res1, prec);
arb_fma_naive(res2, x, y, z, prec);
break;
case 2:
arb_set(res1, x);
arb_fma(res1, res1, y, z, prec);
arb_fma_naive(res2, x, y, z, prec);
break;
case 3:
arb_set(res1, y);
arb_fma(res1, x, res1, z, prec);
arb_fma_naive(res2, x, y, z, prec);
break;
case 4:
arb_fma(res1, x, x, z, prec);
arb_fma_naive(res2, x, x, z, prec);
break;
case 5:
arb_set(res1, x);
arb_fma(res1, res1, res1, z, prec);
arb_fma_naive(res2, x, x, z, prec);
break;
default:
arb_set(res1, x);
arb_fma(res1, res1, res1, res1, prec);
arb_fma_naive(res2, x, x, x, prec);
break;
}
if (!arb_equal(res1, res2))
{
flint_printf("FAIL!\n");
flint_printf("prec = %wd, aliasing = %d\n\n", prec, aliasing);
flint_printf("x = "); arb_printd(x, 30); flint_printf("\n\n");
flint_printf("y = "); arb_printd(y, 30); flint_printf("\n\n");
flint_printf("z = "); arb_printd(z, 30); flint_printf("\n\n");
flint_printf("res1 = "); arb_printd(res1, 30); flint_printf("\n\n");
flint_printf("res2 = "); arb_printd(res2, 30); flint_printf("\n\n");
flint_abort();
}
arb_clear(x);
arb_clear(y);
arb_clear(z);
arb_clear(res1);
arb_clear(res2);
}
flint_randclear(state);
flint_cleanup();
flint_printf("PASS\n");
return EXIT_SUCCESS;
}

2
arf.h
View file

@ -1071,6 +1071,8 @@ arf_submul_fmpz(arf_ptr z, arf_srcptr x, const fmpz_t y, slong prec, arf_rnd_t r
return arf_submul_mpz(z, x, COEFF_TO_PTR(*y), prec, rnd);
}
int arf_fma(arf_ptr res, arf_srcptr x, arf_srcptr y, arf_srcptr z, slong prec, arf_rnd_t rnd);
int arf_sosq(arf_t z, const arf_t x, const arf_t y, slong prec, arf_rnd_t rnd);
int arf_div(arf_ptr z, arf_srcptr x, arf_srcptr y, slong prec, arf_rnd_t rnd);

78
arf/fma.c Normal file
View file

@ -0,0 +1,78 @@
/*
Copyright (C) 2021 Fredrik Johansson
This file is part of Arb.
Arb is free software: you can redistribute it and/or modify it under
the terms of the GNU Lesser General Public License (LGPL) as published
by the Free Software Foundation; either version 2.1 of the License, or
(at your option) any later version. See <http://www.gnu.org/licenses/>.
*/
#include "arf.h"
int
arf_fma(arf_ptr res, arf_srcptr x, arf_srcptr y, arf_srcptr z, slong prec, arf_rnd_t rnd)
{
mp_size_t xn, yn, zn, tn, alloc;
mp_srcptr xptr, yptr, zptr;
mp_ptr tptr, tptr2;
fmpz_t texp;
slong shift;
int tsgnbit, inexact;
ARF_MUL_TMP_DECL
if (arf_is_special(x) || arf_is_special(y) || arf_is_special(z))
{
if (arf_is_zero(z))
{
return arf_mul(res, x, y, prec, rnd);
}
else if (arf_is_finite(x) && arf_is_finite(y))
{
return arf_set_round(res, z, prec, rnd);
}
else
{
/* todo: speed up */
arf_t t;
arf_init(t);
arf_mul(t, x, y, ARF_PREC_EXACT, ARF_RND_DOWN);
inexact = arf_add(res, z, t, prec, rnd);
arf_clear(t);
return inexact;
}
}
tsgnbit = ARF_SGNBIT(x) ^ ARF_SGNBIT(y);
ARF_GET_MPN_READONLY(xptr, xn, x);
ARF_GET_MPN_READONLY(yptr, yn, y);
ARF_GET_MPN_READONLY(zptr, zn, z);
fmpz_init(texp);
_fmpz_add2_fast(texp, ARF_EXPREF(x), ARF_EXPREF(y), 0);
shift = _fmpz_sub_small(ARF_EXPREF(z), texp);
alloc = tn = xn + yn;
ARF_MUL_TMP_ALLOC(tptr2, alloc)
tptr = tptr2;
ARF_MPN_MUL(tptr, xptr, xn, yptr, yn);
tn -= (tptr[0] == 0);
tptr += (tptr[0] == 0);
if (shift >= 0)
inexact = _arf_add_mpn(res, zptr, zn, ARF_SGNBIT(z), ARF_EXPREF(z),
tptr, tn, tsgnbit, shift, prec, rnd);
else
inexact = _arf_add_mpn(res, tptr, tn, tsgnbit, texp,
zptr, zn, ARF_SGNBIT(z), -shift, prec, rnd);
ARF_MUL_TMP_FREE(tptr2, alloc)
fmpz_clear(texp);
return inexact;
}

141
arf/test/t-fma.c Normal file
View file

@ -0,0 +1,141 @@
/*
Copyright (C) 2012 Fredrik Johansson
This file is part of Arb.
Arb is free software: you can redistribute it and/or modify it under
the terms of the GNU Lesser General Public License (LGPL) as published
by the Free Software Foundation; either version 2.1 of the License, or
(at your option) any later version. See <http://www.gnu.org/licenses/>.
*/
#include "arf.h"
int
arf_fma_naive(arf_t res, const arf_t x, const arf_t y, const arf_t z, slong prec, arf_rnd_t rnd)
{
arf_t t;
int inexact;
arf_init(t);
arf_mul(t, x, y, ARF_PREC_EXACT, ARF_RND_DOWN);
inexact = arf_add(res, z, t, prec, rnd);
arf_clear(t);
return inexact;
}
int main()
{
slong iter;
flint_rand_t state;
flint_printf("fma....");
fflush(stdout);
flint_randinit(state);
for (iter = 0; iter < 10000 * arb_test_multiplier(); iter++)
{
arf_t x, y, z, res1, res2;
slong prec, r1, r2;
arf_rnd_t rnd;
int aliasing;
arf_init(x);
arf_init(y);
arf_init(z);
arf_init(res1);
arf_init(res2);
prec = 2 + n_randint(state, 200);
arf_randtest_special(x, state, 200, 100);
arf_randtest_special(y, state, 200, 100);
arf_randtest_special(z, state, 200, 100);
arf_randtest_special(res1, state, 200, 100);
arf_randtest_special(res2, state, 200, 100);
if (n_randint(state, 10) == 0 &&
fmpz_bits(ARF_EXPREF(x)) < 10 &&
fmpz_bits(ARF_EXPREF(y)) < 10 &&
fmpz_bits(ARF_EXPREF(z)) < 10)
{
prec = ARF_PREC_EXACT;
}
switch (n_randint(state, 5))
{
case 0: rnd = ARF_RND_DOWN; break;
case 1: rnd = ARF_RND_UP; break;
case 2: rnd = ARF_RND_FLOOR; break;
case 3: rnd = ARF_RND_CEIL; break;
default: rnd = ARF_RND_NEAR; break;
}
aliasing = n_randint(state, 7);
switch (aliasing)
{
case 0:
r1 = arf_fma(res1, x, y, z, prec, rnd);
r2 = arf_fma_naive(res2, x, y, z, prec, rnd);
break;
case 1:
arf_set(res1, z);
r1 = arf_fma(res1, x, y, res1, prec, rnd);
r2 = arf_fma_naive(res2, x, y, z, prec, rnd);
break;
case 2:
arf_set(res1, x);
r1 = arf_fma(res1, res1, y, z, prec, rnd);
r2 = arf_fma_naive(res2, x, y, z, prec, rnd);
break;
case 3:
arf_set(res1, y);
r1 = arf_fma(res1, x, res1, z, prec, rnd);
r2 = arf_fma_naive(res2, x, y, z, prec, rnd);
break;
case 4:
r1 = arf_fma(res1, x, x, z, prec, rnd);
r2 = arf_fma_naive(res2, x, x, z, prec, rnd);
break;
case 5:
arf_set(res1, x);
r1 = arf_fma(res1, res1, res1, z, prec, rnd);
r2 = arf_fma_naive(res2, x, x, z, prec, rnd);
break;
default:
arf_set(res1, x);
r1 = arf_fma(res1, res1, res1, res1, prec, rnd);
r2 = arf_fma_naive(res2, x, x, x, prec, rnd);
break;
}
if (!arf_equal(res1, res2) || r1 != r2)
{
flint_printf("FAIL!\n");
flint_printf("prec = %wd, rnd = %d, aliasing = %d\n\n", prec, rnd, aliasing);
flint_printf("x = "); arf_print(x); flint_printf("\n\n");
flint_printf("y = "); arf_print(y); flint_printf("\n\n");
flint_printf("z = "); arf_print(z); flint_printf("\n\n");
flint_printf("res1 = "); arf_print(res1); flint_printf("\n\n");
flint_printf("res2 = "); arf_print(res2); flint_printf("\n\n");
flint_printf("r1 = %wd, r2 = %wd\n", r1, r2);
flint_abort();
}
arf_clear(x);
arf_clear(y);
arf_clear(z);
arf_clear(res1);
arf_clear(res2);
}
flint_randclear(state);
flint_cleanup();
flint_printf("PASS\n");
return EXIT_SUCCESS;
}

View file

@ -828,6 +828,13 @@ Arithmetic
Sets `z = z - x \cdot y`, rounded to prec bits. The precision can be
*ARF_PREC_EXACT* provided that the result fits in memory.
.. function:: void arb_fma(arb_t res, const arb_t x, const arb_t y, const arb_t z, slong prec)
void arb_fma_arf(arb_t res, const arb_t x, const arf_t y, const arb_t z, slong prec)
void arb_fma_ui(arb_t res, const arb_t x, ulong y, const arb_t z, slong prec)
Sets *res* to `x \cdot y + z`. This is equivalent to an *addmul* except
that *res* and *z* can be separate variables.
.. function:: void arb_inv(arb_t z, const arb_t x, slong prec)
Sets *z* to `1 / x`.

View file

@ -621,6 +621,11 @@ Addition and multiplication
Performs a fused multiply-subtract `z = z - x \cdot y`, updating *z* in-place.
.. function:: int arf_fma(arf_t res, const arf_t x, const arf_t y, const arf_t z, slong prec, arf_rnd_t rnd)
Sets *res* to `x \cdot y + z`. This is equivalent to an *addmul* except
that *res* and *z* can be separate variables.
.. function:: int arf_sosq(arf_t res, const arf_t x, const arf_t y, slong prec, arf_rnd_t rnd)
Sets *res* to `x^2 + y^2`, rounded to *prec* bits in the direction specified by *rnd*.