mirror of
https://github.com/vale981/arb
synced 2025-03-06 01:41:39 -05:00
418 lines
8.6 KiB
C
418 lines
8.6 KiB
C
/*=============================================================================
|
|
|
|
This file is part of ARB.
|
|
|
|
ARB is free software; you can redistribute it and/or modify
|
|
it under the terms of the GNU General Public License as published by
|
|
the Free Software Foundation; either version 2 of the License, or
|
|
(at your option) any later version.
|
|
|
|
ARB is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with ARB; if not, write to the Free Software
|
|
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
|
|
|
=============================================================================*/
|
|
/******************************************************************************
|
|
|
|
Copyright (C) 2012 Fredrik Johansson
|
|
|
|
******************************************************************************/
|
|
|
|
#ifndef UFLOAT_H
|
|
#define UFLOAT_H
|
|
|
|
#include <stdio.h>
|
|
#include <mpir.h>
|
|
#include "flint.h"
|
|
#include "fmpz.h"
|
|
#include "ulong_extras.h"
|
|
#include "math.h"
|
|
|
|
static __inline__ mp_limb_t
|
|
n_rshift_ceil(mp_limb_t a, int k)
|
|
{
|
|
return (a >> k) + (((a >> k) << k) != a);
|
|
}
|
|
|
|
/*
|
|
A ufloat holds a nonzero bound for the magnitude of a number.
|
|
The mantissa has exactly UFLOAT_PREC bits, which must be at most
|
|
FLINT_BITS / 2.
|
|
*/
|
|
|
|
#define UFLOAT_PREC 15
|
|
#define UFLOAT_MAN_MIN (1UL << (UFLOAT_PREC - 1))
|
|
#define UFLOAT_MAN_MAX ((1UL << UFLOAT_PREC) - 1)
|
|
|
|
typedef struct
|
|
{
|
|
mp_limb_t man;
|
|
long exp;
|
|
}
|
|
ufloat_struct;
|
|
|
|
typedef ufloat_struct ufloat_t[1];
|
|
|
|
static __inline__ void
|
|
ufloat_print(const ufloat_t x)
|
|
{
|
|
double t = ldexp(x->man, x->exp - UFLOAT_PREC);
|
|
|
|
printf("ufloat{man=%lu, exp=%ld, %.20g}\n", x->man, x->exp, t);
|
|
}
|
|
|
|
static __inline__ void
|
|
ufloat_normalise(ufloat_t z)
|
|
{
|
|
int b = FLINT_BIT_COUNT(z->man);
|
|
|
|
if (b < UFLOAT_PREC)
|
|
{
|
|
z->man <<= (UFLOAT_PREC - b);
|
|
z->exp -= (UFLOAT_PREC - b);
|
|
}
|
|
else
|
|
{
|
|
int adjust;
|
|
|
|
z->man = n_rshift_ceil(z->man, (b - UFLOAT_PREC));
|
|
z->exp += (b - UFLOAT_PREC);
|
|
|
|
/* possible overflow to power of two */
|
|
adjust = z->man >> UFLOAT_PREC;
|
|
z->man = n_rshift_ceil(z->man, adjust);
|
|
z->exp += adjust;
|
|
}
|
|
}
|
|
|
|
|
|
static __inline__ int
|
|
ufloat_cmp_one(const ufloat_t u)
|
|
{
|
|
if (u->exp == 1L)
|
|
return u->man == (1UL << (UFLOAT_PREC - 1)) ? 0 : 1;
|
|
else
|
|
return u->exp < 1L ? -1 : 1;
|
|
}
|
|
|
|
static __inline__ void
|
|
ufloat_set(ufloat_t u, const ufloat_t v)
|
|
{
|
|
u->man = v->man;
|
|
u->exp = v->exp;
|
|
}
|
|
|
|
|
|
static __inline__ void
|
|
ufloat_zero(ufloat_t u)
|
|
{
|
|
u->man = 0UL;
|
|
u->exp = 0L;
|
|
}
|
|
|
|
/* TODO: add constant_p handling; overflow */
|
|
static __inline__ void
|
|
ufloat_set_ui_2exp(ufloat_t u, mp_limb_t a, long exp)
|
|
{
|
|
u->man = a;
|
|
u->exp = exp + UFLOAT_PREC;
|
|
ufloat_normalise(u);
|
|
}
|
|
|
|
/* TODO: overflow */
|
|
static __inline__ void
|
|
ufloat_set_ll_2exp(ufloat_t u, mp_limb_t hi, mp_limb_t lo, long exp)
|
|
{
|
|
if (hi == 0UL)
|
|
{
|
|
ufloat_set_ui_2exp(u, lo, exp);
|
|
}
|
|
else
|
|
{
|
|
unsigned int c;
|
|
count_leading_zeros(c, hi);
|
|
|
|
if (c != 0)
|
|
hi = (hi << c) | (lo >> (FLINT_BITS - c));
|
|
|
|
if (hi + 1UL != 0UL)
|
|
ufloat_set_ui_2exp(u, hi + 1UL, exp + FLINT_BITS - c);
|
|
else
|
|
ufloat_set_ui_2exp(u, 1UL, exp + 2 * FLINT_BITS - c);
|
|
}
|
|
}
|
|
|
|
static __inline__ void
|
|
ufloat_one(ufloat_t u)
|
|
{
|
|
u->man = (1UL << (UFLOAT_PREC - 1));
|
|
u->exp = 1L;
|
|
}
|
|
|
|
static __inline__ void
|
|
ufloat_add(ufloat_t z, const ufloat_t x, const ufloat_t y)
|
|
{
|
|
long shift;
|
|
int adjust;
|
|
|
|
shift = x->exp - y->exp;
|
|
|
|
if (shift >= 0)
|
|
{
|
|
z->exp = x->exp;
|
|
if (shift >= UFLOAT_PREC)
|
|
z->man = x->man + 1;
|
|
else
|
|
z->man = x->man + n_rshift_ceil(y->man, shift);
|
|
}
|
|
else
|
|
{
|
|
shift = -shift;
|
|
z->exp = y->exp;
|
|
if (shift >= UFLOAT_PREC)
|
|
z->man = y->man + 1;
|
|
else
|
|
z->man = y->man + n_rshift_ceil(x->man, shift);
|
|
}
|
|
|
|
/* adjust for carry */
|
|
adjust = z->man >> UFLOAT_PREC;
|
|
z->man = n_rshift_ceil(z->man, adjust);
|
|
z->exp += adjust;
|
|
}
|
|
|
|
/* bound |x-y| */
|
|
static __inline__ void
|
|
ufloat_sub(ufloat_t z, const ufloat_t x, const ufloat_t y)
|
|
{
|
|
long shift;
|
|
|
|
mp_limb_t a, b;
|
|
|
|
shift = x->exp - y->exp;
|
|
|
|
if (shift == 0)
|
|
{
|
|
if (x->man >= y->man)
|
|
z->man = x->man - y->man;
|
|
else
|
|
z->man = y->man - x->man;
|
|
z->exp = x->exp;
|
|
ufloat_normalise(z);
|
|
return;
|
|
}
|
|
|
|
if (shift > 0)
|
|
{
|
|
z->exp = x->exp;
|
|
a = x->man;
|
|
b = y->man;
|
|
}
|
|
else
|
|
{
|
|
shift = -shift;
|
|
z->exp = y->exp;
|
|
a = y->man;
|
|
b = x->man;
|
|
}
|
|
|
|
if (shift >= UFLOAT_PREC)
|
|
{
|
|
z->man = a;
|
|
}
|
|
else
|
|
{
|
|
z->man = (a << shift) - b;
|
|
z->exp -= shift;
|
|
}
|
|
|
|
ufloat_normalise(z);
|
|
}
|
|
|
|
static __inline__ void
|
|
ufloat_set_2exp(ufloat_t y, const ufloat_t x, long exp)
|
|
{
|
|
|
|
}
|
|
|
|
static __inline__ void
|
|
ufloat_add_2exp(ufloat_t y, const ufloat_t x, long exp)
|
|
{
|
|
long s, shift = x->exp - exp;
|
|
|
|
/* x is larger than 2^exp */
|
|
if (shift > 0)
|
|
{
|
|
s = FLINT_MAX(0L, UFLOAT_PREC - shift);
|
|
y->man = x->man + (1UL << s);
|
|
y->exp = x->exp;
|
|
}
|
|
/* 2^exp is larger than x */
|
|
else
|
|
{
|
|
s = FLINT_MIN(UFLOAT_PREC - 1L, 1L - shift);
|
|
y->man = (1UL << (UFLOAT_PREC - 1)) + (x->man >> s) + 1UL;
|
|
y->exp = exp + 1L;
|
|
}
|
|
|
|
/* unlikely */
|
|
if (y->man > UFLOAT_MAN_MAX)
|
|
{
|
|
y->man = n_rshift_ceil(y->man, 1);
|
|
y->exp++;
|
|
}
|
|
}
|
|
|
|
|
|
static __inline__ void
|
|
ufloat_mul(ufloat_t z, const ufloat_t x, const ufloat_t y)
|
|
{
|
|
mp_limb_t m;
|
|
int adjust, shift;
|
|
|
|
z->exp = x->exp + y->exp;
|
|
m = x->man * y->man;
|
|
|
|
/* product has one bit too little */
|
|
adjust = m >> (2 * UFLOAT_PREC - 1);
|
|
shift = UFLOAT_PREC - 1 + adjust;
|
|
z->exp += shift;
|
|
|
|
m = n_rshift_ceil(m, shift);
|
|
|
|
/* power of two */
|
|
adjust = m >> UFLOAT_PREC;
|
|
m >>= adjust;
|
|
z->exp += adjust;
|
|
|
|
z->man = m;
|
|
}
|
|
|
|
static __inline__ void
|
|
ufloat_addmul(ufloat_t z, const ufloat_t x, const ufloat_t y)
|
|
{
|
|
ufloat_t t;
|
|
ufloat_mul(t, x, y);
|
|
ufloat_add(z, z, t);
|
|
}
|
|
|
|
static __inline__ void
|
|
ufloat_div(ufloat_t z, const ufloat_t x, const ufloat_t y)
|
|
{
|
|
int adjust;
|
|
|
|
z->man = (x->man << UFLOAT_PREC) / y->man + 1;
|
|
z->exp = x->exp - y->exp;
|
|
|
|
/* quotient can be one bit too large */
|
|
adjust = z->man >> UFLOAT_PREC;
|
|
z->man = n_rshift_ceil(z->man, adjust);
|
|
z->exp += adjust;
|
|
|
|
/* power of two */
|
|
adjust = z->man >> UFLOAT_PREC;
|
|
z->man >>= adjust;
|
|
z->exp += adjust;
|
|
}
|
|
|
|
static __inline__ void
|
|
ufloat_randtest(ufloat_t u, flint_rand_t state, long erange)
|
|
{
|
|
u->man = n_randbits(state, UFLOAT_PREC);
|
|
u->man |= (1UL << (UFLOAT_PREC - 1));
|
|
u->exp = n_randint(state, erange) - (erange / 2) + 1;
|
|
}
|
|
|
|
static __inline__ void
|
|
ufloat_set_fmpz(ufloat_t u, const fmpz_t z)
|
|
{
|
|
u->man = fmpz_abs_ubound_ui_2exp(&u->exp, z, UFLOAT_PREC);
|
|
u->exp += UFLOAT_PREC;
|
|
}
|
|
|
|
static __inline__ void
|
|
ufloat_set_fmpz_lower(ufloat_t u, const fmpz_t z)
|
|
{
|
|
u->man = fmpz_abs_lbound_ui_2exp(&u->exp, z, UFLOAT_PREC);
|
|
u->exp += UFLOAT_PREC;
|
|
}
|
|
|
|
static __inline__ void
|
|
ufloat_get_fmpz(fmpz_t z, const ufloat_t u)
|
|
{
|
|
long exp = u->exp - UFLOAT_PREC;
|
|
|
|
_fmpz_demote(z);
|
|
*z = u->man;
|
|
|
|
if (exp >= 0)
|
|
{
|
|
if (exp < FLINT_BITS - UFLOAT_PREC - 2)
|
|
*z = (u->man << exp);
|
|
else
|
|
fmpz_mul_2exp(z, z, exp);
|
|
}
|
|
else
|
|
{
|
|
if (exp > -FLINT_BITS)
|
|
*z = n_rshift_ceil(u->man, -exp);
|
|
else
|
|
*z = 1;
|
|
}
|
|
}
|
|
|
|
static __inline__ int
|
|
ufloat_equal(const ufloat_t u, const ufloat_t v)
|
|
{
|
|
return u->man == v->man && u->exp == v->exp;
|
|
}
|
|
|
|
static __inline__ void
|
|
ufloat_get_mpfr(mpfr_t x, const ufloat_t u)
|
|
{
|
|
mpfr_set_ui_2exp(x, u->man, u->exp - UFLOAT_PREC, MPFR_RNDU);
|
|
}
|
|
|
|
static __inline__ void
|
|
ufloat_set_mpfr(ufloat_t u, const mpfr_t x)
|
|
{
|
|
if (mpfr_zero_p(x))
|
|
{
|
|
u->man = 0;
|
|
u->exp = 0;
|
|
}
|
|
else
|
|
{
|
|
mpz_t t;
|
|
long exp, bits;
|
|
|
|
mpz_init(t);
|
|
exp = mpfr_get_z_2exp(t, x);
|
|
mpz_abs(t, t);
|
|
|
|
bits = mpz_sizeinbase(t, 2);
|
|
|
|
if (bits > UFLOAT_PREC)
|
|
{
|
|
mpz_cdiv_q_2exp(t, t, bits - UFLOAT_PREC);
|
|
exp += bits - UFLOAT_PREC;
|
|
}
|
|
|
|
u->exp = exp + UFLOAT_PREC;
|
|
u->man = mpz_get_ui(t);
|
|
ufloat_normalise(u);
|
|
|
|
mpz_clear(t);
|
|
}
|
|
}
|
|
|
|
void ufloat_log(ufloat_t z, const ufloat_t x);
|
|
|
|
void ufloat_log1p(ufloat_t z, const ufloat_t x);
|
|
|
|
#endif
|