add fmpr_divappr_abs_ubound; slightly speed up a few radius operations

This commit is contained in:
Fredrik Johansson 2013-06-09 19:53:01 +02:00
parent c7745d8725
commit 82fd23e0da
9 changed files with 235 additions and 36 deletions

View file

@ -453,6 +453,11 @@ Arithmetic
Sets `z = x / y`, rounded according to *prec* and *rnd*. If *y* is zero, Sets `z = x / y`, rounded according to *prec* and *rnd*. If *y* is zero,
*z* is set to NaN. *z* is set to NaN.
.. function:: void fmpr_divappr_abs_ubound(fmpr_t z, const fmpr_t x, const fmpr_t y, long prec)
Sets `z` to an upper bound for `|x| / |y|`, computed to a precision
of approximately *prec* bits. The error can be a few ulp.
.. function:: long fmpr_addmul(fmpr_t z, const fmpr_t x, const fmpr_t y, long prec, fmpr_rnd_t rnd) .. function:: long fmpr_addmul(fmpr_t z, const fmpr_t x, const fmpr_t y, long prec, fmpr_rnd_t rnd)
.. function:: long fmpr_addmul_ui(fmpr_t z, const fmpr_t x, ulong y, long prec, fmpr_rnd_t rnd) .. function:: long fmpr_addmul_ui(fmpr_t z, const fmpr_t x, ulong y, long prec, fmpr_rnd_t rnd)

2
fmpr.h
View file

@ -411,6 +411,8 @@ long fmpr_div_fmpz(fmpr_t z, const fmpr_t x, const fmpz_t y, long prec, fmpr_rnd
long fmpr_fmpz_div(fmpr_t z, const fmpz_t x, const fmpr_t y, long prec, fmpr_rnd_t rnd); long fmpr_fmpz_div(fmpr_t z, const fmpz_t x, const fmpr_t y, long prec, fmpr_rnd_t rnd);
long fmpr_fmpz_div_fmpz(fmpr_t z, const fmpz_t x, const fmpz_t y, long prec, fmpr_rnd_t rnd); long fmpr_fmpz_div_fmpz(fmpr_t z, const fmpz_t x, const fmpz_t y, long prec, fmpr_rnd_t rnd);
void fmpr_divappr_abs_ubound(fmpr_t z, const fmpr_t x, const fmpr_t y, long prec);
long fmpr_addmul(fmpr_t z, const fmpr_t x, const fmpr_t y, long prec, fmpr_rnd_t rnd); long fmpr_addmul(fmpr_t z, const fmpr_t x, const fmpr_t y, long prec, fmpr_rnd_t rnd);
long fmpr_addmul_ui(fmpr_t z, const fmpr_t x, ulong y, long prec, fmpr_rnd_t rnd); long fmpr_addmul_ui(fmpr_t z, const fmpr_t x, ulong y, long prec, fmpr_rnd_t rnd);
long fmpr_addmul_si(fmpr_t z, const fmpr_t x, long y, long prec, fmpr_rnd_t rnd); long fmpr_addmul_si(fmpr_t z, const fmpr_t x, long y, long prec, fmpr_rnd_t rnd);

88
fmpr/divappr_abs_ubound.c Normal file
View file

@ -0,0 +1,88 @@
/*=============================================================================
This file is part of ARB.
ARB is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
ARB is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with ARB; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
=============================================================================*/
/******************************************************************************
Copyright (C) 2013 Fredrik Johansson
******************************************************************************/
#include "fmpr.h"
void
fmpr_divappr_abs_ubound(fmpr_t z, const fmpr_t x, const fmpr_t y, long prec)
{
if (fmpr_is_special(x) || fmpr_is_special(y) || fmpz_is_pm1(fmpr_manref(y)))
{
fmpr_div(z, x, y, prec, FMPR_RND_UP);
fmpr_abs(z, z);
}
else
{
fmpz_t t, u;
long xbits, ybits, tbits, ubits, shift;
xbits = fmpz_bits(fmpr_manref(x));
ybits = fmpz_bits(fmpr_manref(y));
fmpz_init(t);
fmpz_init(u);
ubits = FLINT_MIN(ybits, prec);
tbits = prec + ubits + 1;
/* upper bound for |x|, shifted */
if (xbits <= tbits)
{
fmpz_mul_2exp(t, fmpr_manref(x), tbits - xbits);
fmpz_abs(t, t);
}
else if (fmpz_sgn(fmpr_manref(x)) > 0)
{
fmpz_cdiv_q_2exp(t, fmpr_manref(x), xbits - tbits);
}
else
{
fmpz_fdiv_q_2exp(t, fmpr_manref(x), xbits - tbits);
fmpz_neg(t, t);
}
/* lower bound for |y|, shifted */
if (ybits <= ubits)
fmpz_mul_2exp(u, fmpr_manref(y), ubits - ybits);
else
fmpz_tdiv_q_2exp(u, fmpr_manref(y), ybits - ubits);
fmpz_abs(u, u);
fmpz_cdiv_q(fmpr_manref(z), t, u);
shift = (ubits - ybits) - (tbits - xbits);
fmpz_sub(fmpr_expref(z), fmpr_expref(x), fmpr_expref(y));
if (shift >= 0)
fmpz_add_ui(fmpr_expref(z), fmpr_expref(z), shift);
else
fmpz_sub_ui(fmpr_expref(z), fmpr_expref(z), -shift);
_fmpr_normalise(fmpr_manref(z), fmpr_expref(z), prec, FMPR_RND_UP);
fmpz_clear(t);
fmpz_clear(u);
}
}

View file

@ -0,0 +1,80 @@
/*=============================================================================
This file is part of ARB.
ARB is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
ARB is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with ARB; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
=============================================================================*/
/******************************************************************************
Copyright (C) 2013 Fredrik Johansson
******************************************************************************/
#include "fmpr.h"
int main()
{
long iter;
flint_rand_t state;
printf("divappr_abs_ubound....");
fflush(stdout);
flint_randinit(state);
for (iter = 0; iter < 10000; iter++)
{
fmpr_t a, b, c, d;
long prec;
fmpr_init(a);
fmpr_init(b);
fmpr_init(c);
fmpr_init(d);
fmpr_randtest_special(a, state, 2 + n_randint(state, 200), 100);
fmpr_randtest_special(b, state, 2 + n_randint(state, 200), 100);
fmpr_randtest_special(c, state, 2 + n_randint(state, 200), 100);
fmpr_randtest_special(d, state, 2 + n_randint(state, 200), 100);
prec = 2 + n_randint(state, 200);
fmpr_div(c, a, b, prec, FMPR_RND_UP);
fmpr_abs(c, c);
fmpr_divappr_abs_ubound(d, a, b, prec);
if (fmpr_cmp(c, d) > 0)
{
printf("FAIL:\n");
fmpr_printd(a, prec / 3.32); printf("\n");
fmpr_printd(b, prec / 3.32); printf("\n");
fmpr_printd(c, prec / 3.32); printf("\n");
fmpr_printd(d, prec / 3.32); printf("\n");
abort();
}
fmpr_clear(a);
fmpr_clear(b);
fmpr_clear(c);
fmpr_clear(d);
}
flint_randclear(state);
_fmpz_cleanup();
printf("PASS\n");
return EXIT_SUCCESS;
}

View file

@ -133,7 +133,7 @@ fmprb_atan(fmprb_t z, const fmprb_t x, long prec)
{ {
fmpr_mul(t, t, t, FMPRB_RAD_PREC, FMPR_RND_DOWN); fmpr_mul(t, t, t, FMPRB_RAD_PREC, FMPR_RND_DOWN);
fmpr_add_ui(t, t, 1UL, FMPRB_RAD_PREC, FMPR_RND_DOWN); fmpr_add_ui(t, t, 1UL, FMPRB_RAD_PREC, FMPR_RND_DOWN);
fmpr_div(t, fmprb_radref(x), t, FMPRB_RAD_PREC, FMPR_RND_UP); fmpr_divappr_abs_ubound(t, fmprb_radref(x), t, FMPRB_RAD_PREC);
} }
else else
{ {

View file

@ -25,40 +25,54 @@
#include "fmprb.h" #include "fmprb.h"
static __inline__ void
fmprb_div_zero(fmprb_t z)
{
fmpr_zero(fmprb_midref(z));
fmpr_pos_inf(fmprb_radref(z));
return;
}
void
fmprb_div_fmpr(fmprb_t z, const fmprb_t x, const fmpr_t y, long prec)
{
long r;
if (fmpr_is_zero(y))
{
fmprb_div_zero(z);
}
else if (fmprb_is_exact(x))
{
r = fmpr_div(fmprb_midref(z), fmprb_midref(x), y, prec, FMPR_RND_DOWN);
fmpr_set_error_result(fmprb_radref(z), fmprb_midref(z), r);
}
else
{
/* (x + a) / y = x/y + a/y */
fmpr_divappr_abs_ubound(fmprb_radref(z), fmprb_radref(x), y, FMPRB_RAD_PREC);
fmpr_abs(fmprb_radref(z), fmprb_radref(z));
r = fmpr_div(fmprb_midref(z), fmprb_midref(x), y, prec, FMPR_RND_DOWN);
fmpr_add_error_result(fmprb_radref(z), fmprb_radref(z),
fmprb_midref(z), r, FMPRB_RAD_PREC, FMPR_RND_UP);
}
}
void void
fmprb_div(fmprb_t z, const fmprb_t x, const fmprb_t y, long prec) fmprb_div(fmprb_t z, const fmprb_t x, const fmprb_t y, long prec)
{ {
long r; long r;
if (fmprb_contains_zero(y))
{
fmpr_zero(fmprb_midref(z));
fmpr_pos_inf(fmprb_radref(z));
return;
}
if (fmprb_is_exact(y)) if (fmprb_is_exact(y))
{ {
if (fmprb_is_exact(x)) fmprb_div_fmpr(z, x, fmprb_midref(y), prec);
{
r = fmpr_div(fmprb_midref(z), fmprb_midref(x), fmprb_midref(y), prec, FMPR_RND_DOWN);
fmpr_set_error_result(fmprb_radref(z), fmprb_midref(z), r);
}
else
{
/* (x + a) / y = x/y + a/y */
fmpr_div(fmprb_radref(z), fmprb_radref(x), fmprb_midref(y), FMPRB_RAD_PREC, FMPR_RND_UP);
fmpr_abs(fmprb_radref(z), fmprb_radref(z));
r = fmpr_div(fmprb_midref(z), fmprb_midref(x), fmprb_midref(y), prec, FMPR_RND_DOWN);
fmpr_add_error_result(fmprb_radref(z), fmprb_radref(z),
fmprb_midref(z), r, FMPRB_RAD_PREC, FMPR_RND_UP);
}
} }
else else
{ {
fmpr_t t, u; fmpr_t t, u;
fmpr_init(t); fmpr_init(t);
fmpr_init(u); fmpr_init(u);
@ -71,18 +85,28 @@ fmprb_div(fmprb_t z, const fmprb_t x, const fmprb_t y, long prec)
/* denominator of error bound: |y|(|y|-b), rounded down */ /* denominator of error bound: |y|(|y|-b), rounded down */
if (fmpr_sgn(fmprb_midref(y)) > 0) if (fmpr_sgn(fmprb_midref(y)) > 0)
fmpr_sub(u, fmprb_radref(y), fmprb_midref(y), FMPRB_RAD_PREC, FMPR_RND_DOWN); {
fmpr_sub(u, fmprb_midref(y), fmprb_radref(y), FMPRB_RAD_PREC, FMPR_RND_DOWN);
}
else else
fmpr_add(u, fmprb_radref(y), fmprb_midref(y), FMPRB_RAD_PREC, FMPR_RND_DOWN); {
fmpr_mul(u, u, fmprb_midref(y), FMPRB_RAD_PREC, FMPR_RND_DOWN); fmpr_add(u, fmprb_midref(y), fmprb_radref(y), FMPRB_RAD_PREC, FMPR_RND_DOWN);
fmpr_abs(u, u); fmpr_neg(u, u);
}
/* error bound */ if (fmpr_sgn(u) <= 0 || fmpr_is_nan(u))
fmpr_div(t, t, u, FMPRB_RAD_PREC, FMPR_RND_UP); {
fmprb_div_zero(z);
}
else
{
fmpr_mul(u, u, fmprb_midref(y), FMPRB_RAD_PREC, FMPR_RND_DOWN);
fmpr_divappr_abs_ubound(t, t, u, FMPRB_RAD_PREC);
r = fmpr_div(fmprb_midref(z), fmprb_midref(x), fmprb_midref(y), prec, FMPR_RND_DOWN); r = fmpr_div(fmprb_midref(z), fmprb_midref(x), fmprb_midref(y), prec, FMPR_RND_DOWN);
fmpr_add_error_result(fmprb_radref(z), t, fmpr_add_error_result(fmprb_radref(z), t,
fmprb_midref(z), r, FMPRB_RAD_PREC, FMPR_RND_UP); fmprb_midref(z), r, FMPRB_RAD_PREC, FMPR_RND_UP);
}
fmpr_clear(t); fmpr_clear(t);
fmpr_clear(u); fmpr_clear(u);

View file

@ -176,7 +176,7 @@ fmprb_log(fmprb_t y, const fmprb_t x, long prec)
} }
else else
{ {
fmpr_div(err, fmprb_radref(x), err, FMPRB_RAD_PREC, FMPR_RND_UP); fmpr_divappr_abs_ubound(err, fmprb_radref(x), err, FMPRB_RAD_PREC);
fmpr_log1p_ubound(err, err); fmpr_log1p_ubound(err, err);
} }

View file

@ -74,7 +74,7 @@ fmprb_root(fmprb_t z, const fmprb_t x, ulong k, long prec)
/* derivative x^(1/k) / (x k) at lower point */ /* derivative x^(1/k) / (x k) at lower point */
fmpr_root(t, err, k, FMPRB_RAD_PREC, FMPR_RND_UP); fmpr_root(t, err, k, FMPRB_RAD_PREC, FMPR_RND_UP);
fmpr_mul_ui(u, err, k, FMPRB_RAD_PREC, FMPR_RND_DOWN); fmpr_mul_ui(u, err, k, FMPRB_RAD_PREC, FMPR_RND_DOWN);
fmpr_div(t, t, u, FMPRB_RAD_PREC, FMPR_RND_UP); fmpr_divappr_abs_ubound(t, t, u, FMPRB_RAD_PREC);
/* multiplied by distance */ /* multiplied by distance */
fmpr_mul(err, t, fmprb_radref(x), FMPRB_RAD_PREC, FMPR_RND_UP); fmpr_mul(err, t, fmprb_radref(x), FMPRB_RAD_PREC, FMPR_RND_UP);

View file

@ -54,7 +54,7 @@ fmprb_rsqrt(fmprb_t z, const fmprb_t x, long prec)
/* error bound: (1/2) t^(-3/2) * rad */ /* error bound: (1/2) t^(-3/2) * rad */
fmpr_rsqrt(err, t, FMPRB_RAD_PREC, FMPR_RND_UP); fmpr_rsqrt(err, t, FMPRB_RAD_PREC, FMPR_RND_UP);
fmpr_div(err, err, t, FMPRB_RAD_PREC, FMPR_RND_UP); fmpr_divappr_abs_ubound(err, err, t, FMPRB_RAD_PREC);
fmpr_mul(err, err, fmprb_radref(x), FMPRB_RAD_PREC, FMPR_RND_UP); fmpr_mul(err, err, fmprb_radref(x), FMPRB_RAD_PREC, FMPR_RND_UP);
fmpr_mul_2exp_si(err, err, -1); fmpr_mul_2exp_si(err, err, -1);