arb/arb_hypgeom/gamma_taylor.c

/*
    Copyright (C) 2021 Fredrik Johansson

    This file is part of Arb.

    Arb is free software: you can redistribute it and/or modify it under
    the terms of the GNU Lesser General Public License (LGPL) as published
    by the Free Software Foundation; either version 2.1 of the License, or
    (at your option) any later version.  See <http://www.gnu.org/licenses/>.
*/

#include "flint/double_extras.h"
#include "arb_hypgeom.h"

#define DEBUG 0


const double arb_hypgeom_rgamma_d_tab[128] = {
    1.0,
    0.57721566490153286061,
    -0.65587807152025388108,
    -0.042002635034095235529,
    0.1665386113822914895,
    -0.042197734555544336748,
    -0.0096219715278769735621,
    0.0072189432466630995424,
    -0.0011651675918590651121,
    -0.00021524167411495097282,
    0.00012805028238811618615,
    -0.000020134854780788238656,
    -1.2504934821426706573e-6,
    1.1330272319816958824e-6,
    -2.0563384169776071035e-7,
    6.1160951044814158179e-9,
    5.0020076444692229301e-9,
    -1.1812745704870201446e-9,
    1.0434267116911005105e-10,
    7.782263439905071254e-12,
    -3.6968056186422057082e-12,
    5.100370287454475979e-13,
    -2.0583260535665067832e-14,
    -5.3481225394230179824e-15,
    1.2267786282382607902e-15,
    -1.1812593016974587695e-16,
    1.1866922547516003326e-18,
    1.4123806553180317816e-18,
    -2.2987456844353702066e-19,
    1.7144063219273374334e-20,
    1.3373517304936931149e-22,
    -2.0542335517666727893e-22,
    2.7360300486079998448e-23,
    -1.7323564459105166391e-24,
    -2.3606190244992872873e-26,
    1.8649829417172944307e-26,
    -2.2180956242071972044e-27,
    1.2977819749479936688e-28,
    1.1806974749665284062e-30,
    -1.1245843492770880903e-30,
    1.277085175140866204e-31,
    -7.3914511696151408235e-33,
    1.134750257554215761e-35,
    4.6391346410587220299e-35,
    -5.3473368184391988751e-36,
    3.2079959236133526229e-37,
    -4.4458297365507568821e-39,
    -1.3111745188819887129e-39,
    1.6470333525438138868e-40,
    -1.0562331785035812186e-41,
    2.6784429826430494784e-43,
    2.4247154948517826897e-44,
    -3.736587834535612554e-45,
    2.6283329809401954491e-46,
    -9.2981759953768862996e-48,
    -2.3279424186994705986e-49,
    6.1696208352443874204e-50,
    -4.9282955867709899305e-51,
    2.1835131834145106973e-52,
    -1.2187221891475165553e-54,
    -7.1171088416628746319e-55,
    6.9205040543286892535e-56,
    -3.6764384683566763277e-57,
    8.563098056275654328e-59,
    4.9630454283668443848e-60,
    -7.1542945770816152182e-61,
    4.5517276890885041177e-62,
    -1.6183993053202944344e-63,
    -3.8180434243999502464e-66,
    5.1850524119058482295e-66,
    -4.1671368092239208861e-67,
    1.9162906929373887193e-68,
    -3.8089281324683658733e-70,
    -2.2063861055924121016e-71,
    2.7722310960098954165e-72,
    -1.5987660478100181057e-73,
    5.3197307804174034028e-75,
    -8.0517461416842390432e-78,
    -1.2484629810263795113e-77,
    9.6431887683992238428e-79,
    -4.2827980483017479213e-80,
    9.5087142369030441861e-82,
    2.7131392138694383464e-83,
    -4.0968779415069156659e-84,
    2.3742980019740160598e-85,
    -8.2770890210072789764e-87,
    9.072497609426645865e-89,
    1.0645558195026985633e-89,
    -9.285335619603754493e-91,
    4.3333135927203670323e-92,
    -1.1745606334673315984e-93,
    -2.6908010752365215433e-96,
    2.3898952892036810357e-96,
    -1.5569361182789167325e-97,
    6.0488748201074133757e-99,
    -1.2273370571029378615e-100,
    -2.540738850916238751e-102,
    3.7708800953170816508e-103,
    -2.0089261677502892352e-104,
    6.6158100911447349361e-106,
    -9.2404702022121568081e-108,
    -4.82072018655246532e-109,
    4.4938898756858357188e-110,
    -2.0497789059725778416e-111,
    5.7862770569866937508e-113,
    -4.5696744624334387424e-115,
    -5.8267365553303743945e-116,
    4.2025380699297338056e-117,
    -1.6889318527713702846e-118,
    4.1226213324018604871e-120,
    -8.2451196593745569675e-123,
    -5.2036993784470216679e-123,
    3.1616685922306712047e-124,
    -1.1432359131094236326e-125,
    2.4359648735131490197e-127,
    8.8701584767164321698e-130,
    -3.6328610892429035156e-130,
    1.9485148907440212068e-131,
    -6.450096583602651512e-133,
    1.215186561728963791e-134,
    1.0637863819629713691e-136,
    -2.0430980587447135517e-137,
    9.9760876002985183681e-139,
    -3.0707428945789381066e-140,
    5.2091832948433107534e-142,
    6.7131589510935005823e-144,
    -9.434301219575868381e-145,
    4.2908149482548296582e-146,
};

#define GAMMA_MIN_X 1.4616321449683623413
#define GAMMA_MIN_Y 0.88560319441088870028

/* Crude upper bound for psi(x) for x > 0, adequate for perturbation bounds
   for gamma. */
double
d_abs_digamma_ubound(double x)
{
    if (x <= 1.0)
    {
        return (1.0 + 1e-14) / x + 0.57721566490153286061 - x + 1e-14;
    }
    else if (x <= GAMMA_MIN_X)
    {
        return -1.250380137503405359*x + 1.8275958024049382196 + 1e-14;
    }
    else if (x <= 8.0)
    {
        return (x - GAMMA_MIN_X) * (1.7581621716802087234 +
            x * (-0.74622516195984912595 + x * (0.17009872711678924164 +
            x * (-0.018637559864260712285 + x * 0.00077747045691426195132)))) + 1e-12;
    }
    else if (x <= 128.0)
    {
        return 0.75334126757115431475 + x * (0.21045131598436795981 +
            x * (-0.0075387469533717503617 + x * (0.00017308475161765275722 +
                x * (-2.4025446500822043239e-6 + x * (1.9547402969088507111e-8 +
            x * (-8.5654894222045481692e-11 + x * 1.5584520745423393038e-13)))))) + 1e-12;
    }
    else
    {
        return (mag_d_log_upper_bound(x) + 1.0 / x) * (1.0 + 1e-14);
    }
}

/* Upper or lower bound (depending on direction) for gamma(x),
   assuming x > 0, no overflow. */
double
_arb_hypgeom_d_gamma(double x, int direction)
{
    double s, t, p;
    int i, r;

    if (direction == 1)
        p = 1 + 1e-14;
    else
        p = 1 - 1e-14;

    if (x < 0.5)
    {
        s = d_polyval(arb_hypgeom_rgamma_d_tab, 19, x);
        s = 1.0 / (s * x);
    }
    else if (x <= 1.5)
    {
        s = 1.0 / d_polyval(arb_hypgeom_rgamma_d_tab, 19, x - 1.0);
    }
    else
    {
        r = (int) (x + 0.5);

        s = d_polyval(arb_hypgeom_rgamma_d_tab, 19, x - r);

        t = 1.0;
        for (i = 0; i < r - 1; i++)
            t *= (x - i - 1) * p;

        s = t / s;
    }

    return s * p;
}

/* Set res = [a, b]; not checking overflow or underflow. */
void arb_set_interval_d_fast(arb_t res, double a, double b, slong prec)
{
    double mid, rad;

    if (a > b)
    {
        flint_printf("arb_set_interval_d_fast: expected a < b\n");
        flint_abort();
    }

    mid = a + 0.5 * (b - a);
    rad = (0.5 * (b - a) + (mid * 1e-15)) * (1 + 1e-15);
    arf_set_d(arb_midref(res), mid);
    mag_set_d(arb_radref(res), rad);
    arb_set_round(res, res, prec);
}

int _arf_increment_fast(arf_t x, slong prec);


/* Try to compute gamma(x) using Taylor series. Returns 1 on success, 0 on
   failure (x too large or precision too large). */
int
arb_hypgeom_gamma_taylor(arb_t res, const arb_t x, int reciprocal, slong prec)
{
    double dx, dxerr, log2u, ds, du;
    slong i, n, wp, r, tail_bound, rad_exp, mid_exp;
    arf_t s, u, v;
    short term_prec[ARB_HYPGEOM_GAMMA_TAB_NUM];
    int success;

#if DEBUG
    printf("INPUT: "); arb_printd(x, 200); printf("\n");
    printf("INPUT prec: %ld\n", prec);
#endif

    /* We don't want to deal with infinities or huge/tiny exponents here. */
    if (!ARB_IS_LAGOM(x))
        return 0;

    /* 2^e bounds for the midpoint and radius. */
    mid_exp = arf_is_zero(arb_midref(x)) ? WORD_MIN : ARF_EXP(arb_midref(x));
    rad_exp = mag_is_zero(arb_radref(x)) ? WORD_MIN : MAG_EXP(arb_radref(x));

    /* Containing zero. */
    if (rad_exp >= mid_exp && arb_contains_zero(x))
    {
        if (reciprocal)
        {
            arb_t t;
            arb_init(t);
            arb_add_ui(t, x, 1, prec + 10);

            if (!arb_contains_zero(t))
            {
                success = arb_hypgeom_gamma_taylor(t, t, reciprocal, prec + 10);
                if (success)
                    arb_mul(res, x, t, prec);
            }
            else
            {
                /* todo: accurate wide interval */
                success = 0;
            }

            arb_clear(t);
            return success;
        }
        else
        {
            arb_indeterminate(res);
            return 1;
        }
    }

    /* Quick exclusion of too large numbers. */
    if (mid_exp > 8 || rad_exp > 8)
        return 0;

    /* Adjust precision if the input is not precise. */
    if (rad_exp != WORD_MIN)
        prec = FLINT_MIN(prec, -rad_exp + MAG_BITS);
    prec = FLINT_MAX(prec, 2);

    /* Midpoint and radius as doubles. */
    dx = arf_get_d(arb_midref(x), ARF_RND_NEAR);
    dxerr = mag_get_d(arb_radref(x));

    /* Too large to be efficient (high precision), or gamma(x) may overflow
       doubles (wide case). */
    if (dx + dxerr > 160.0 || dx - dxerr < -160.0)
        return 0;

    /* Very close to 0, reduce to gamma(x) = gamma(x + 1) / x. */
    if (mid_exp < -32 || (dx - dxerr >= -0.5 && dx - dxerr < ldexp(1.0, -6)))
    {
        arb_t t;
        arb_init(t);
        arb_add_ui(t, x, 1, prec + 10);

#if DEBUG
    printf("DIVIDING NEAR 0\n");
#endif

        success = arb_hypgeom_gamma_taylor(t, t, reciprocal, prec + 10);
        if (success)
        {
            if (reciprocal)
                arb_mul(res, x, t, prec);
            else
                arb_div(res, t, x, prec);
        }

        arb_clear(t);
        return success;
    }

    /* Nearest (roughly) integer to x, to use as shift for argument reduction
       to move to the interval [-0.5,0.5]. It's OK that dx is approximate so
       that the reduced argument will actually lie in [-0.5-eps,0.5+eps]. */
    if (dx >= 0.0)
        r = (slong) (dx + 0.5);
    else
        r = -(slong) (-dx + 0.5);

    /* Tuning cutoff. */
    if (prec >= 40)
    {
        if (r < -(40 + (prec - 40) / 4))
            return 0;

        if (r > 70 + (prec - 40) / 8)
            return 0;
    }

    /* For negative numbers, reduce to the positive case. */
    /* gamma(x) = (-1)^r * gamma(1+x-r) / (rf(1+r-x,-r)*(x-r)) */
    /* 1/gamma(x) = (-1)^r * rgamma(1+x-r) * rf(1+r-x,-r) * (x-r) */
    if (dx < 0.0)
    {
        arb_t t, u, v;

        arb_init(t);
        arb_init(u);
        arb_init(v);

        arb_sub_si(t, x, r, prec + 10);

        /* Pole. */
        if (!reciprocal && arb_contains_zero(t))
        {
            arb_indeterminate(res);
            success = 1;
        }
        else
        {
            arb_add_si(u, x, 1 - r, prec + 10);

            success = 1;
            if (reciprocal && !arb_is_positive(u))
            {
                /* todo: accurate wide interval */
                success = 0;
            }

            success = arb_hypgeom_gamma_taylor(u, u, reciprocal, prec + 10);

            if (success)
            {
                /* Wide bounds for rising factorial. */
                if (prec < 44)
                {
                    double a, b, c, d;

                    c = (-dx + r + 1 - dxerr) * (1 - 1e-14);
                    d = (-dx + r + 1 + dxerr) * (1 + 1e-14);
                    a = b = 1.0;

                    for (i = 0; i < -r; i++)
                    {
                        a = a * ((c + i) * (1 - 1e-15));
                        b = b * ((d + i) * (1 + 1e-15));
                    }

                    arb_set_interval_d_fast(v, a, b, 53);

                    if (reciprocal)
                    {
                        arb_mul(res, u, v, prec + 10);
                        arb_mul(res, res, t, prec);
                    }
                    else
                    {
                        arb_div(res, u, v, prec + 10);
                        arb_div(res, res, t, prec);
                    }
                }
                else
                {
                    arb_neg(v, x);
                    arb_add_si(v, v, 1 + r, prec + 10);
                    arb_hypgeom_rising_ui_rec(v, v, -r, prec + 10);
                    arb_mul(v, v, t, prec + 10);

                    if (reciprocal)
                        arb_mul(res, u, v, prec);
                    else
                        arb_div(res, u, v, prec);
                }

                if (r % 2)
                    arb_neg(res, res);
            }
        }

        arb_clear(t);
        arb_clear(u);
        arb_clear(v);
        return success;
    }

    /* Wide enclosure. */
    if (prec < 40 || rad_exp > -16)
    {
        double a, b, c;

#if DEBUG
    printf("WIDE CASE\n");
#endif

        dxerr += ldexp(1.0, mid_exp - 51);
        dxerr *= (1 + 1e-15);

        a = (dx - dxerr) * (1 - 1e-15);
        b = (dx + dxerr) * (1 + 1e-15);

        if (a >= GAMMA_MIN_X)
        {
            a = _arb_hypgeom_d_gamma(a, -1);
            b = _arb_hypgeom_d_gamma(b, 1);
        }
        else if (b <= GAMMA_MIN_X)
        {
            c = _arb_hypgeom_d_gamma(a, 1);
            a = _arb_hypgeom_d_gamma(b, -1);
            b = c;
        }
        else
        {
            a = _arb_hypgeom_d_gamma(a, 1);
            b = _arb_hypgeom_d_gamma(b, 1);
            b = FLINT_MAX(a, b);
            a = GAMMA_MIN_Y * (1 - 1e-15);
        }

        if (reciprocal)
        {
            c = (1.0 / b) * (1 - 1e-15);
            b = (1.0 / a) * (1 + 1e-15);
            a = c;
        }

        arb_set_interval_d_fast(res, a, b, prec);
        return 1;
    }

    /* Propagated error. */
    if (rad_exp == WORD_MIN)
    {
        dxerr = 0.0;
        rad_exp = WORD_MIN;
    }
    else
    {
        /* First-order relative error estimate plus safety factor to guarantee
           an upper bound. */
        dxerr = MAG_MAN(arb_radref(x)) * ldexp(1.0, -MAG_BITS);
        dxerr = dxerr * d_abs_digamma_ubound(dx) * 1.001;
    }

#if DEBUG
    flint_printf("propagated error = %g x 2^%wd\n", dxerr, rad_exp);
#endif

    wp = prec + 6 + FLINT_BIT_COUNT(FLINT_ABS(r));

    if (wp > ARB_HYPGEOM_GAMMA_TAB_PREC)
        return 0;

    success = 0;

    arf_init(s);
    arf_init(u);
    arf_init(v);

    /* u = x - r */
    arf_sub_si(u, arb_midref(x), r, wp, ARF_RND_DOWN);

    /* du = dx - r; */
    du = arf_get_d(u, ARF_RND_NEAR);

    /* bound log2(u) */
    if (-0.0001 < du && du < 0.0001)
        log2u = arf_is_zero(u) ? -wp : ARF_EXP(u);
    else
        log2u = mag_d_log_upper_bound(du < 0 ? -du : du) * 1.4426950408889634074 * (1 + 1e-14);

    term_prec[0] = wp;
    n = 0;

    for (i = 1; i < ARB_HYPGEOM_GAMMA_TAB_NUM; i++)
    {
        tail_bound = arb_hypgeom_gamma_coeffs[i].exp + i * log2u + 5;

        if (tail_bound <= -wp)
        {
            n = i;
            break;
        }

        term_prec[i] = FLINT_MIN(FLINT_MAX(wp + tail_bound, 2), wp);
    }

    if (n == 0)
    {
        flint_printf("warning: gamma_taylor: unexpected failure\n");
        success = 0;
        goto cleanup;
    }

#if DEBUG
    printf("COMPUTATION: wp = %ld, du = %g, log2u = %g, n = %ld\n", wp, du, log2u, n);
#endif

    if (wp <= 512 && n <= 128)
    {
        ds = 0.0;
        for (i = n - 1; i >= 1 && term_prec[i] <= 53; i--)
        {
#if DEBUG
            flint_printf("add term %wd with precision %wd (doubles)\n", i, term_prec[i]);
#endif

            ds = du * ds + arb_hypgeom_rgamma_d_tab[i];
        }

        arf_set_d(s, ds);
    }
    else
    {
        i = n - 1;
    }

    for ( ; i >= 1; i--)
    {
        arf_t c;

#if DEBUG
        flint_printf("add term %wd with precision %wd\n", i, term_prec[i]);
#endif

        if (!_arb_hypgeom_gamma_coeff_shallow(c, NULL, i, term_prec[i]))
        {
            flint_printf("arb_hypgeom_gamma_taylor: prec = %wd, du = %g, log2u = %d, term_prec[%wd] = %wd",
                prec, du, log2u, i, term_prec[i]);
            flint_abort();
        }

        if (term_prec[i] < wp - 128)
        {
            arf_set_round(v, u, term_prec[i], ARF_RND_DOWN);
            arf_mul(s, s, v, term_prec[i], ARF_RND_DOWN);
            arf_add(s, s, c, term_prec[i], ARF_RND_DOWN);
        }
        else
        {
            arf_mul(s, s, u, term_prec[i], ARF_RND_DOWN);
            arf_add(s, s, c, term_prec[i], ARF_RND_DOWN);
        }
    }

    if (i == 0)
    {
#if DEBUG
        flint_printf("add term %wd with precision %wd\n", i, term_prec[i]);
#endif

        arf_mul(s, s, u, wp, ARF_RND_DOWN);
        arf_add_ui(s, s, 1, wp, ARF_RND_DOWN);
    }

    if (r == 0 || r == 1)
    {
        if (r == 0)
            arf_mul(s, s, u, wp, ARF_RND_DOWN);

        if (reciprocal)
        {
            arf_set_round(arb_midref(res), s, prec, ARF_RND_DOWN);
        }
        else
        {
            arf_one(u);
            arf_div(arb_midref(res), u, s, prec, ARF_RND_DOWN);
        }
        arf_mag_set_ulp(arb_radref(res), arb_midref(res), prec - 1);
    }
    else if (wp <= 320 || r <= 3)
    {
        _arf_increment_fast(u, wp);
        arf_set(v, u);

        for (i = 2; i < r; i++)
        {
            _arf_increment_fast(u, wp);
            arf_mul(v, v, u, wp, ARF_RND_DOWN);
        }

        if (reciprocal)
            arf_div(arb_midref(res), s, v, prec, ARF_RND_DOWN);
        else
            arf_div(arb_midref(res), v, s, prec, ARF_RND_DOWN);
        arf_mag_set_ulp(arb_radref(res), arb_midref(res), prec - 1);
    }
    else
    {
        arb_t t;
        arb_init(t);
        _arf_increment_fast(u, wp);
        arb_set_arf(t, u);
        arb_hypgeom_rising_ui_rec(t, t, r - 1, wp);

        if (reciprocal)
        {
            arb_set_arf(res, s);
            arb_div(res, res, t, prec);
        }
        else
            arb_div_arf(res, t, s, prec);

        arf_mag_add_ulp(arb_radref(res), arb_radref(res), arb_midref(res), prec - 1);
        arb_clear(t);
    }

    /* Add propagated error. */
    if (dxerr != 0)
    {
        mag_t err;
        double dy;
        dy = arf_get_d(arb_midref(res), ARF_RND_UP);
        dxerr = dxerr * dy * (1 + 1e-15);
        MAG_SET_D_2EXP(MAG_MAN(err), MAG_EXP(err), dxerr, rad_exp);
        mag_add(arb_radref(res), arb_radref(res), err);
    }

    success = 1;

#if DEBUG
    printf("OUTPUT: "); arb_printd(res, 200); printf("\n");
#endif

cleanup:
    arf_clear(s);
    arf_clear(u);
    arf_clear(v);

    return success;
}