From e72abb11efcce2b0a68134714af851870a051e2e Mon Sep 17 00:00:00 2001 From: Fredrik Johansson Date: Sun, 4 May 2014 02:01:18 +0200 Subject: [PATCH] division code --- arf.h | 120 ++++++++++++++++++++++++++++++++ arf/div.c | 140 +++++++++++++++++++++++++++++++++++++ arf/test/t-div.c | 178 +++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 438 insertions(+) create mode 100644 arf/div.c create mode 100644 arf/test/t-div.c diff --git a/arf.h b/arf.h index 8945fca4..6971746b 100644 --- a/arf.h +++ b/arf.h @@ -409,6 +409,32 @@ arf_neg(arf_t y, const arf_t x) } } +static __inline__ void +arf_init_set_ui(arf_t x, ulong v) +{ + if (v == 0) + { + ARF_EXP(x) = ARF_EXP_ZERO; + ARF_XSIZE(x) = 0; + } + else + { + unsigned int c; + count_leading_zeros(c, v); + ARF_EXP(x) = FLINT_BITS - c; + ARF_NOPTR_D(x)[0] = v << c; + ARF_XSIZE(x) = ARF_MAKE_XSIZE(1, 0); + } +} + +static __inline__ void +arf_init_set_si(arf_t x, long v) +{ + arf_init_set_ui(x, FLINT_ABS(v)); + if (v < 0) + ARF_NEG(x); +} + static __inline__ void arf_set_ui(arf_t x, ulong v) { @@ -753,6 +779,27 @@ _fmpz_add2_fast(fmpz_t z, const fmpz_t x, const fmpz_t y, long c) } } +static __inline__ void +_fmpz_sub2_fast(fmpz_t z, const fmpz_t x, const fmpz_t y, long c) +{ + fmpz ze, xe, ye; + + ze = *z; + xe = *x; + ye = *y; + + if (!COEFF_IS_MPZ(ze) && (xe > ADD2_FAST_MIN && xe < ADD2_FAST_MAX) && + (ye > ADD2_FAST_MIN && ye < ADD2_FAST_MAX)) + { + *z = xe - ye + c; + } + else + { + fmpz_sub(z, x, y); + fmpz_add_si(z, z, c); + } +} + #define MUL_MPFR_MIN_LIMBS 25 #define MUL_MPFR_MAX_LIMBS 10000 @@ -920,6 +967,79 @@ int arf_sub_si(arf_ptr z, arf_srcptr x, long y, long prec, arf_rnd_t rnd); int arf_sub_ui(arf_ptr z, arf_srcptr x, ulong y, long prec, arf_rnd_t rnd); int arf_sub_fmpz(arf_ptr z, arf_srcptr x, const fmpz_t y, long prec, arf_rnd_t rnd); +int arf_div(arf_ptr z, arf_srcptr x, arf_srcptr y, long prec, arf_rnd_t rnd); + +static __inline__ int +arf_div_ui(arf_ptr z, arf_srcptr x, ulong y, long prec, arf_rnd_t rnd) +{ + arf_t t; + arf_init_set_ui(t, y); /* no need to free */ + return arf_div(z, x, t, prec, rnd); +} + +static __inline__ int +arf_ui_div(arf_ptr z, ulong x, arf_srcptr y, long prec, arf_rnd_t rnd) +{ + arf_t t; + arf_init_set_ui(t, x); /* no need to free */ + return arf_div(z, t, y, prec, rnd); +} + +static __inline__ int +arf_div_si(arf_ptr z, arf_srcptr x, long y, long prec, arf_rnd_t rnd) +{ + arf_t t; + arf_init_set_si(t, y); /* no need to free */ + return arf_div(z, x, t, prec, rnd); +} + +static __inline__ int +arf_si_div(arf_ptr z, long x, arf_srcptr y, long prec, arf_rnd_t rnd) +{ + arf_t t; + arf_init_set_si(t, x); /* no need to free */ + return arf_div(z, t, y, prec, rnd); +} + +static __inline__ int +arf_div_fmpz(arf_ptr z, arf_srcptr x, const fmpz_t y, long prec, arf_rnd_t rnd) +{ + arf_t t; + int r; + arf_init(t); + arf_set_fmpz(t, y); + r = arf_div(z, x, t, prec, rnd); + arf_clear(t); + return r; +} + +static __inline__ int +arf_fmpz_div(arf_ptr z, const fmpz_t x, arf_srcptr y, long prec, arf_rnd_t rnd) +{ + arf_t t; + int r; + arf_init(t); + arf_set_fmpz(t, x); + r = arf_div(z, t, y, prec, rnd); + arf_clear(t); + return r; +} + +static __inline__ int +arf_fmpz_div_fmpz(arf_ptr z, const fmpz_t x, const fmpz_t y, long prec, arf_rnd_t rnd) +{ + arf_t t, u; + int r; + arf_init(t); + arf_init(u); + arf_set_fmpz(t, x); + arf_set_fmpz(u, y); + r = arf_div(z, t, u, prec, rnd); + arf_clear(t); + arf_clear(u); + return r; +} + #ifdef __cplusplus } #endif diff --git a/arf/div.c b/arf/div.c new file mode 100644 index 00000000..b3a99f92 --- /dev/null +++ b/arf/div.c @@ -0,0 +1,140 @@ +/*============================================================================= + + This file is part of ARB. + + ARB is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + ARB is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with ARB; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + +=============================================================================*/ +/****************************************************************************** + + Copyright (C) 2014 Fredrik Johansson + +******************************************************************************/ + +#include "arf.h" +#include "mpn_extras.h" + +void +arf_div_special(arf_t z, const arf_t x, const arf_t y) +{ + if ((arf_is_zero(x) && !arf_is_zero(y) && !arf_is_nan(y)) || + (arf_is_inf(y) && !arf_is_special(x))) + { + arf_zero(z); + } + else if (arf_is_zero(y) || (arf_is_special(x) && arf_is_special(y)) || + arf_is_nan(x) || arf_is_nan(y)) + { + arf_nan(z); + } + else if (arf_sgn(x) == arf_sgn(y)) + arf_pos_inf(z); + else + arf_neg_inf(z); +} + +int +arf_div(arf_ptr z, arf_srcptr x, arf_srcptr y, long prec, arf_rnd_t rnd) +{ + mp_size_t xn, yn, zn, sn, tn, alloc; + mp_srcptr xptr, yptr; + mp_ptr tmp; + mp_ptr tptr, zptr; + int inexact; + long fix, fix2; + ARF_MUL_TMP_DECL + + if (arf_is_special(x) || arf_is_special(y)) + { + arf_div_special(z, x, y); + return 0; + } + + ARF_GET_MPN_READONLY(xptr, xn, x); + ARF_GET_MPN_READONLY(yptr, yn, y); + + /* Division by a power of two */ + if (yn == 1 && yptr[0] == LIMB_TOP) + { + fmpz_t t; + fmpz_init_set(t, ARF_EXPREF(y)); + + if (ARF_SGNBIT(y)) + inexact = arf_neg_round(z, x, prec, rnd); + else + inexact = arf_set_round(z, x, prec, rnd); + + _fmpz_sub2_fast(ARF_EXPREF(z), ARF_EXPREF(z), t, 1); + return inexact; + } + + sn = FLINT_MAX(prec - xn * FLINT_BITS + yn * FLINT_BITS, 0) + 32; + sn = (sn + FLINT_BITS - 1) / FLINT_BITS; + + /* Need space for numerator (tn), quotient (zn), and one extra limb at the + top of tptr for the multiplication to check the remainder. */ + tn = xn + sn; + zn = tn - yn + 1; + alloc = zn + (tn + 1); + + ARF_MUL_TMP_ALLOC(tmp, alloc) + + zptr = tmp; + tptr = tmp + zn; + + flint_mpn_zero(tptr, sn); + flint_mpn_copyi(tptr + sn, xptr, xn); + mpn_tdiv_q(zptr, tptr, tn, yptr, yn); + + if (zptr[zn - 1] == 0) + { + zn--; + fix2 = 0; + } + else + { + fix2 = FLINT_BITS; + } + + /* The remainder can only be zero if the last several bits of the + extended quotient are zero. */ + if ((zptr[0] & ((LIMB_ONE << 24) - 1)) == 0) + { + /* The quotient is exact iff multiplying by y gives back the + input. Note: the multiplication may write sn + xn + 1 limbs, but + tptr[sn + xn] is guaranteed to be zero in that case since the + approximate quotient cannot be larger than the true quotient. */ + if (zn >= yn) + mpn_mul(tptr, zptr, zn, yptr, yn); + else + mpn_mul(tptr, yptr, yn, zptr, zn); + + /* The quotient is not exact. Perturbing the approximate quotient + and rounding gives the correct the result. */ + if (!flint_mpn_zero_p(tptr, sn) || + mpn_cmp(tptr + sn, xptr, xn) != 0) + { + zptr[0]++; + } + } + + inexact = _arf_set_round_mpn(z, &fix, zptr, zn, + ARF_SGNBIT(x) ^ ARF_SGNBIT(y), prec, rnd); + _fmpz_sub2_fast(ARF_EXPREF(z), ARF_EXPREF(x), ARF_EXPREF(y), fix + fix2); + ARF_MUL_TMP_FREE(tmp, alloc) + + return inexact; +} + diff --git a/arf/test/t-div.c b/arf/test/t-div.c new file mode 100644 index 00000000..553337a7 --- /dev/null +++ b/arf/test/t-div.c @@ -0,0 +1,178 @@ +/*============================================================================= + + This file is part of ARB. + + ARB is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + ARB is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with ARB; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + +=============================================================================*/ +/****************************************************************************** + + Copyright (C) 2012 Fredrik Johansson + +******************************************************************************/ + +#include "arf.h" + +int +arf_div_naive(arf_t z, const arf_t x, const arf_t y, long prec, arf_rnd_t rnd) +{ + fmpr_t a, b; + long r; + + fmpr_init(a); + fmpr_init(b); + + arf_get_fmpr(a, x); + arf_get_fmpr(b, y); + + r = fmpr_div(a, a, b, prec, rnd); + + arf_set_fmpr(z, a); + + fmpr_clear(a); + fmpr_clear(b); + + return (r == FMPR_RESULT_EXACT) ? 0 : 1; +} + +int main() +{ + long iter, iter2; + flint_rand_t state; + + printf("div...."); + fflush(stdout); + + flint_randinit(state); + + for (iter = 0; iter < 10000; iter++) + { + arf_t x, y, z, v; + long prec, r1, r2; + arf_rnd_t rnd; + + arf_init(x); + arf_init(y); + arf_init(z); + arf_init(v); + + for (iter2 = 0; iter2 < 100; iter2++) + { + arf_randtest_special(x, state, 2000, 100); + arf_randtest_special(y, state, 2000, 100); + prec = 2 + n_randint(state, 2000); + + if (n_randint(state, 20) == 0) + arf_mul(x, x, y, 2 + n_randint(state, 3000), ARF_RND_FLOOR); + + switch (n_randint(state, 4)) + { + case 0: rnd = ARF_RND_DOWN; break; + case 1: rnd = ARF_RND_UP; break; + case 2: rnd = ARF_RND_FLOOR; break; + default: rnd = ARF_RND_CEIL; break; + } + + switch (n_randint(state, 5)) + { + case 0: + r1 = arf_div(z, x, y, prec, rnd); + r2 = arf_div_naive(v, x, y, prec, rnd); + if (!arf_equal(z, v) || r1 != r2) + { + printf("FAIL!\n"); + printf("prec = %ld, rnd = %d\n\n", prec, rnd); + printf("x = "); arf_print(x); printf("\n\n"); + printf("y = "); arf_print(y); printf("\n\n"); + printf("z = "); arf_print(z); printf("\n\n"); + printf("v = "); arf_print(v); printf("\n\n"); + printf("r1 = %ld, r2 = %ld\n", r1, r2); + abort(); + } + break; + + case 1: + r1 = arf_div(z, x, x, prec, rnd); + r2 = arf_div_naive(v, x, x, prec, rnd); + if (!arf_equal(z, v) || r1 != r2) + { + printf("FAIL (aliasing 1)!\n"); + printf("prec = %ld, rnd = %d\n\n", prec, rnd); + printf("x = "); arf_print(x); printf("\n\n"); + printf("z = "); arf_print(z); printf("\n\n"); + printf("v = "); arf_print(v); printf("\n\n"); + printf("r1 = %ld, r2 = %ld\n", r1, r2); + abort(); + } + break; + + case 2: + r2 = arf_div_naive(v, x, x, prec, rnd); + r1 = arf_div(x, x, x, prec, rnd); + if (!arf_equal(v, x) || r1 != r2) + { + printf("FAIL (aliasing 2)!\n"); + printf("prec = %ld, rnd = %d\n\n", prec, rnd); + printf("x = "); arf_print(x); printf("\n\n"); + printf("z = "); arf_print(z); printf("\n\n"); + printf("v = "); arf_print(v); printf("\n\n"); + printf("r1 = %ld, r2 = %ld\n", r1, r2); + abort(); + } + break; + + case 3: + r2 = arf_div_naive(v, x, y, prec, rnd); + r1 = arf_div(x, x, y, prec, rnd); + if (!arf_equal(x, v) || r1 != r2) + { + printf("FAIL (aliasing 3)!\n"); + printf("prec = %ld, rnd = %d\n\n", prec, rnd); + printf("x = "); arf_print(x); printf("\n\n"); + printf("y = "); arf_print(y); printf("\n\n"); + printf("v = "); arf_print(v); printf("\n\n"); + printf("r1 = %ld, r2 = %ld\n", r1, r2); + abort(); + } + break; + + default: + r2 = arf_div_naive(v, y, x, prec, rnd); + r1 = arf_div(x, y, x, prec, rnd); + if (!arf_equal(x, v) || r1 != r2) + { + printf("FAIL (aliasing 4)!\n"); + printf("prec = %ld, rnd = %d\n\n", prec, rnd); + printf("x = "); arf_print(x); printf("\n\n"); + printf("y = "); arf_print(y); printf("\n\n"); + printf("v = "); arf_print(v); printf("\n\n"); + printf("r1 = %ld, r2 = %ld\n", r1, r2); + abort(); + } + break; + } + } + + arf_clear(x); + arf_clear(y); + arf_clear(z); + arf_clear(v); + } + + flint_randclear(state); + flint_cleanup(); + printf("PASS\n"); + return EXIT_SUCCESS; +}