parallel binary splitting evaluation of various constants

This commit is contained in:
Fredrik Johansson 2022-05-16 14:25:49 +02:00
parent 1c618abfae
commit 5c838ec33d
5 changed files with 424 additions and 101 deletions

View file

@ -1,5 +1,5 @@
/*
Copyright (C) 2012, 2013 Fredrik Johansson
Copyright (C) 2012, 2013, 2022 Fredrik Johansson
This file is part of Arb.
@ -21,11 +21,13 @@ typedef struct
arb_t C;
arb_t D;
arb_t V;
} euler_bsplit_struct;
slong a;
slong b;
} euler_bsplit_1_struct;
typedef euler_bsplit_struct euler_bsplit_t[1];
typedef euler_bsplit_1_struct euler_bsplit_1_t[1];
static void euler_bsplit_init(euler_bsplit_t s)
static void euler_bsplit_1_init(euler_bsplit_1_t s, void * args)
{
arb_init(s->P);
arb_init(s->Q);
@ -35,7 +37,7 @@ static void euler_bsplit_init(euler_bsplit_t s)
arb_init(s->V);
}
static void euler_bsplit_clear(euler_bsplit_t s)
static void euler_bsplit_1_clear(euler_bsplit_1_t s, void * args)
{
arb_clear(s->P);
arb_clear(s->Q);
@ -45,12 +47,26 @@ static void euler_bsplit_clear(euler_bsplit_t s)
arb_clear(s->V);
}
typedef struct
{
slong N;
slong prec;
slong a;
slong b;
}
bsplit_args_t;
static void
euler_bsplit_1_merge(euler_bsplit_t s, euler_bsplit_t L, euler_bsplit_t R,
slong wp, int cont)
euler_bsplit_1_merge(euler_bsplit_1_t s, euler_bsplit_1_t L, euler_bsplit_1_t R, bsplit_args_t * args)
{
arb_t t, u, v;
slong wp = args->prec;
slong b = R->b;
int cont = (b != args->b);
arb_init(t);
arb_init(u);
arb_init(v);
@ -84,14 +100,19 @@ euler_bsplit_1_merge(euler_bsplit_t s, euler_bsplit_t L, euler_bsplit_t R,
arb_clear(t);
arb_clear(u);
arb_clear(v);
s->a = L->a;
s->b = R->b;
}
static void
euler_bsplit_1(euler_bsplit_t s, slong n1, slong n2, slong N, slong wp, int cont)
euler_bsplit_1_basecase(euler_bsplit_1_t s, slong n1, slong n2, bsplit_args_t * args)
{
if (n2 - n1 == 1)
{
arb_set_si(s->P, N); /* p = N^2 */
slong wp = args->prec;
arb_set_si(s->P, args->N); /* p = N^2 */
arb_mul(s->P, s->P, s->P, wp);
arb_set_si(s->Q, n1 + 1); /* q = (k + 1)^2 */
arb_mul(s->Q, s->Q, s->Q, wp);
@ -99,28 +120,114 @@ euler_bsplit_1(euler_bsplit_t s, slong n1, slong n2, slong N, slong wp, int cont
arb_set_si(s->D, n1 + 1);
arb_set(s->T, s->P);
arb_set(s->V, s->P);
s->a = n1;
s->b = n2;
}
else
{
euler_bsplit_t L, R;
slong m = (n1 + n2) / 2;
euler_bsplit_1_t L, R;
slong m = n1 + (n2 - n1) / 2;
euler_bsplit_init(L);
euler_bsplit_init(R);
euler_bsplit_1(L, n1, m, N, wp, 1);
euler_bsplit_1(R, m, n2, N, wp, 1);
euler_bsplit_1_merge(s, L, R, wp, cont);
euler_bsplit_clear(L);
euler_bsplit_clear(R);
euler_bsplit_1_init(L, args);
euler_bsplit_1_init(R, args);
euler_bsplit_1_basecase(L, n1, m, args);
euler_bsplit_1_basecase(R, m, n2, args);
euler_bsplit_1_merge(s, L, R, args);
euler_bsplit_1_clear(L, args);
euler_bsplit_1_clear(R, args);
}
}
static void
euler_bsplit_2(arb_t P, arb_t Q, arb_t T, slong n1, slong n2,
slong N, slong wp, int cont)
euler_bsplit_1(euler_bsplit_1_t s, slong n1, slong n2, slong N, slong wp, int cont)
{
bsplit_args_t args;
args.N = N;
args.prec = wp;
args.a = n1;
args.b = n2;
flint_parallel_binary_splitting(s,
(bsplit_basecase_func_t) euler_bsplit_1_basecase,
(bsplit_merge_func_t) euler_bsplit_1_merge,
sizeof(euler_bsplit_1_struct),
(bsplit_init_func_t) euler_bsplit_1_init,
(bsplit_clear_func_t) euler_bsplit_1_clear,
&args, n1, n2, 4, -1, 0);
}
typedef struct
{
arb_t P;
arb_t Q;
arb_t T;
slong a;
slong b;
} euler_bsplit_2_struct;
typedef euler_bsplit_2_struct euler_bsplit_2_t[1];
static void euler_bsplit_2_init(euler_bsplit_2_t s, void * args)
{
arb_init(s->P);
arb_init(s->Q);
arb_init(s->T);
}
static void euler_bsplit_2_clear(euler_bsplit_2_t s, void * args)
{
arb_clear(s->P);
arb_clear(s->Q);
arb_clear(s->T);
}
static void
euler_bsplit_2_merge(euler_bsplit_2_t s, euler_bsplit_2_t L, euler_bsplit_2_t R, bsplit_args_t * args)
{
arb_ptr P = s->P;
arb_ptr Q = s->Q;
arb_ptr T = s->T;
arb_ptr P2 = R->P;
arb_ptr Q2 = R->Q;
arb_ptr T2 = R->T;
slong wp = args->prec;
slong b = R->b;
int cont = (b != args->b);
arb_mul(T, T, Q2, wp);
arb_mul(T2, T2, P, wp);
arb_add(T, T, T2, wp);
if (cont)
arb_mul(P, P, P2, wp);
arb_mul(Q, Q, Q2, wp);
s->a = L->a;
s->b = R->b;
}
static void
euler_bsplit_2_basecase(euler_bsplit_2_t s, slong n1, slong n2, bsplit_args_t * args)
{
if (n2 - n1 == 1)
{
slong wp = args->prec;
slong N = args->N;
arb_ptr P = s->P;
arb_ptr Q = s->Q;
arb_ptr T = s->T;
if (n2 - n1 != 1)
flint_abort();
if (n1 == 0)
{
arb_set_si(P, 1);
@ -138,34 +245,52 @@ euler_bsplit_2(arb_t P, arb_t Q, arb_t T, slong n1, slong n2,
}
arb_set(T, P);
s->a = n1;
s->b = n2;
}
else
{
arb_t P2, Q2, T2;
slong m = (n1 + n2) / 2;
euler_bsplit_2_t R;
slong m = n1 + (n2 - n1) / 2;
arb_init(P2);
arb_init(Q2);
arb_init(T2);
euler_bsplit_2(P, Q, T, n1, m, N, wp, 1);
euler_bsplit_2(P2, Q2, T2, m, n2, N, wp, 1);
arb_mul(T, T, Q2, wp);
arb_mul(T2, T2, P, wp);
arb_add(T, T, T2, wp);
if (cont)
arb_mul(P, P, P2, wp);
arb_mul(Q, Q, Q2, wp);
arb_clear(P2);
arb_clear(Q2);
arb_clear(T2);
euler_bsplit_2_init(R, args);
euler_bsplit_2_basecase(s, n1, m, args);
euler_bsplit_2_basecase(R, m, n2, args);
euler_bsplit_2_merge(s, s, R, args);
euler_bsplit_2_clear(R, args);
}
}
static void
euler_bsplit_2(arb_t P, arb_t Q, arb_t T, slong n1, slong n2,
slong N, slong wp, int cont)
{
euler_bsplit_2_t s;
bsplit_args_t args;
args.N = N;
args.prec = wp;
args.a = n1;
args.b = n2;
*s->P = *P;
*s->Q = *Q;
*s->T = *T;
flint_parallel_binary_splitting(s,
(bsplit_basecase_func_t) euler_bsplit_2_basecase,
(bsplit_merge_func_t) euler_bsplit_2_merge,
sizeof(euler_bsplit_2_struct),
(bsplit_init_func_t) euler_bsplit_2_init,
(bsplit_clear_func_t) euler_bsplit_2_clear,
&args, n1, n2, 4, -1, FLINT_PARALLEL_BSPLIT_LEFT_INPLACE);
*P = *s->P;
*Q = *s->Q;
*T = *s->T;
}
static void
atanh_bsplit(arb_t s, ulong c, slong a, slong prec)
{
@ -236,7 +361,7 @@ arb_log_ui_smooth(arb_t s, ulong n, slong prec)
void
arb_const_euler_eval(arb_t res, slong prec)
{
euler_bsplit_t sum;
euler_bsplit_1_t sum;
arb_t t, u, v, P2, T2, Q2;
slong bits, wp, wp2, n, N, M;
@ -273,7 +398,7 @@ arb_const_euler_eval(arb_t res, slong prec)
wp = bits + 2 * FLINT_BIT_COUNT(n);
wp2 = bits/2 + 2 * FLINT_BIT_COUNT(n);
euler_bsplit_init(sum);
euler_bsplit_1_init(sum, NULL);
arb_init(P2);
arb_init(T2);
arb_init(Q2);
@ -331,7 +456,7 @@ arb_const_euler_eval(arb_t res, slong prec)
arb_clear(u);
arb_clear(v);
euler_bsplit_clear(sum);
euler_bsplit_1_clear(sum, NULL);
}
ARB_DEF_CACHED_CONSTANT(arb_const_euler_brent_mcmillan, arb_const_euler_eval)

View file

@ -26,6 +26,8 @@ int main()
mpfr_t s;
slong accuracy, prec;
flint_set_num_threads(1 + n_randint(state, 3));
prec = 2 + n_randint(state, 1 << n_randint(state, 16));
arb_init(r);
@ -57,7 +59,7 @@ int main()
}
flint_randclear(state);
flint_cleanup();
flint_cleanup_master();
flint_printf("PASS\n");
return EXIT_SUCCESS;
}

View file

@ -28,6 +28,8 @@ int main()
prec = 2 + n_randint(state, 1 << n_randint(state, 18));
flint_set_num_threads(1 + n_randint(state, 3));
arb_init(r);
mpfr_init2(s, prec + 1000);
@ -57,7 +59,7 @@ int main()
}
flint_randclear(state);
flint_cleanup();
flint_cleanup_master();
flint_printf("PASS\n");
return EXIT_SUCCESS;
}

View file

@ -1,34 +1,83 @@
/* This file is public domain. Author: Fredrik Johansson. */
#include <string.h>
#include "arb.h"
#include "flint/profiler.h"
int main(int argc, char *argv[])
{
arb_t x;
slong prec, digits, condense;
slong i, prec, digits, condense, num_threads;
int constant;
if (argc < 2)
{
flint_printf("usage: build/examples/pi digits [condense = 20]\n");
flint_printf("usage: build/examples/pi digits [-condense n] [-threads n] [-constant name]\n");
flint_printf("compute digits of pi (or other constants)\n");
flint_printf("supported: pi, e, log2, euler, catalan, khinchin, zeta3\n");
return 1;
}
digits = atol(argv[1]);
if (argc > 2)
condense = atol(argv[2]);
else
condense = 20;
num_threads = 1;
condense = 20;
constant = 0;
for (i = 2; i < argc; i++)
{
if (!strcmp(argv[i], "-condense"))
condense = atol(argv[i+1]);
else if (!strcmp(argv[i], "-threads"))
num_threads = atol(argv[i+1]);
else if (!strcmp(argv[i], "-constant"))
{
if (!strcmp(argv[i+1], "pi"))
constant = 0;
else if (!strcmp(argv[i+1], "e"))
constant = 1;
else if (!strcmp(argv[i+1], "log2"))
constant = 2;
else if (!strcmp(argv[i+1], "euler"))
constant = 3;
else if (!strcmp(argv[i+1], "catalan"))
constant = 4;
else if (!strcmp(argv[i+1], "khinchin"))
constant = 5;
else if (!strcmp(argv[i+1], "zeta3"))
constant = 6;
else
{
flint_printf("unknown constant\n");
flint_abort();
}
}
}
flint_set_num_threads(num_threads);
arb_init(x);
prec = digits * 3.3219280948873623 + 5;
flint_printf("computing pi with a precision of %wd bits... ", prec);
flint_printf("precision = %wd bits... ", prec);
fflush(stdout);
TIMEIT_ONCE_START
arb_const_pi(x, prec);
if (constant == 0)
arb_const_pi(x, prec);
else if (constant == 1)
arb_const_e(x, prec);
else if (constant == 2)
arb_const_log2(x, prec);
else if (constant == 3)
arb_const_euler(x, prec);
else if (constant == 4)
arb_const_catalan(x, prec);
else if (constant == 5)
arb_const_khinchin(x, prec);
else if (constant == 6)
arb_zeta_ui(x, 3, prec);
TIMEIT_ONCE_STOP
SHOW_MEMORY_USAGE
@ -38,7 +87,7 @@ int main(int argc, char *argv[])
flint_printf("\n");
arb_clear(x);
flint_cleanup();
flint_cleanup_master();
return 0;
}

View file

@ -1,5 +1,5 @@
/*
Copyright (C) 2012 Fredrik Johansson
Copyright (C) 2012, 2022 Fredrik Johansson
This file is part of Arb.
@ -9,6 +9,7 @@
(at your option) any later version. See <http://www.gnu.org/licenses/>.
*/
#include "flint/thread_support.h"
#include "hypgeom.h"
static __inline__ void
@ -79,68 +80,212 @@ bsplit_recursive_fmpz(fmpz_t P, fmpz_t Q, fmpz_t B, fmpz_t T,
}
}
typedef struct
{
arb_struct P;
arb_struct Q;
arb_struct B;
arb_struct T;
slong a;
slong b;
}
bsplit_res_t;
typedef struct
{
const hypgeom_struct * hyp;
slong prec;
slong a;
slong b;
}
bsplit_args_t;
static void
bsplit_init(bsplit_res_t * x, void * args)
{
arb_init(&x->P);
arb_init(&x->Q);
arb_init(&x->B);
arb_init(&x->T);
}
static void
bsplit_clear(bsplit_res_t * x, void * args)
{
arb_clear(&x->P);
arb_clear(&x->Q);
arb_clear(&x->B);
arb_clear(&x->T);
}
static void
bsplit_basecase(bsplit_res_t * res, slong a, slong b, bsplit_args_t * args)
{
fmpz_t PP, QQ, BB, TT;
int cont;
fmpz_init(PP);
fmpz_init(QQ);
fmpz_init(BB);
fmpz_init(TT);
cont = (b != args->b);
bsplit_recursive_fmpz(PP, QQ, BB, TT, args->hyp, a, b, cont);
arb_set_fmpz(&res->P, PP);
arb_set_fmpz(&res->Q, QQ);
arb_set_fmpz(&res->B, BB);
arb_set_fmpz(&res->T, TT);
res->a = a;
res->b = b;
fmpz_clear(PP);
fmpz_clear(QQ);
fmpz_clear(BB);
fmpz_clear(TT);
}
/* res = left */
static void
bsplit_merge(bsplit_res_t * res, bsplit_res_t * left, bsplit_res_t * right, bsplit_args_t * args)
{
arb_ptr P = &res->P;
arb_ptr Q = &res->Q;
arb_ptr B = &res->B;
arb_ptr T = &res->T;
arb_ptr P2 = &right->P;
arb_ptr Q2 = &right->Q;
arb_ptr B2 = &right->B;
arb_ptr T2 = &right->T;
slong prec = args->prec;
slong b = right->b;
int cont = b != args->b;
if (res != left)
flint_abort();
if (arb_is_one(B) && arb_is_one(B2))
{
arb_mul(T, T, Q2, prec);
arb_addmul(T, P, T2, prec);
}
else
{
arb_mul(T, T, B2, prec);
arb_mul(T, T, Q2, prec);
arb_mul(T2, T2, B, prec);
arb_addmul(T, P, T2, prec);
}
arb_mul(B, B, B2, prec);
arb_mul(Q, Q, Q2, prec);
if (cont)
arb_mul(P, P, P2, prec);
res->b = right->b;
}
#define WANT_PARALLEL 1
static void
bsplit_recursive_arb(arb_t P, arb_t Q, arb_t B, arb_t T,
const hypgeom_t hyp, slong a, slong b, int cont, slong prec)
{
if (b - a < 4)
if (WANT_PARALLEL)
{
fmpz_t PP, QQ, BB, TT;
bsplit_res_t res;
bsplit_args_t args;
fmpz_init(PP);
fmpz_init(QQ);
fmpz_init(BB);
fmpz_init(TT);
res.P = *P;
res.Q = *Q;
res.B = *B;
res.T = *T;
bsplit_recursive_fmpz(PP, QQ, BB, TT, hyp, a, b, cont);
args.hyp = hyp;
args.prec = prec;
args.a = a;
args.b = b;
arb_set_fmpz(P, PP);
arb_set_fmpz(Q, QQ);
arb_set_fmpz(B, BB);
arb_set_fmpz(T, TT);
flint_parallel_binary_splitting(&res,
(bsplit_basecase_func_t) bsplit_basecase,
(bsplit_merge_func_t) bsplit_merge,
sizeof(bsplit_res_t),
(bsplit_init_func_t) bsplit_init,
(bsplit_clear_func_t) bsplit_clear,
&args, a, b, 4, -1, FLINT_PARALLEL_BSPLIT_LEFT_INPLACE);
fmpz_clear(PP);
fmpz_clear(QQ);
fmpz_clear(BB);
fmpz_clear(TT);
*P = res.P;
*Q = res.Q;
*B = res.B;
*T = res.T;
}
else
{
slong m;
arb_t P2, Q2, B2, T2;
m = (a + b) / 2;
arb_init(P2);
arb_init(Q2);
arb_init(B2);
arb_init(T2);
bsplit_recursive_arb(P, Q, B, T, hyp, a, m, 1, prec);
bsplit_recursive_arb(P2, Q2, B2, T2, hyp, m, b, 1, prec);
if (arb_is_one(B) && arb_is_one(B2))
if (b - a < 4)
{
arb_mul(T, T, Q2, prec);
arb_addmul(T, P, T2, prec);
fmpz_t PP, QQ, BB, TT;
fmpz_init(PP);
fmpz_init(QQ);
fmpz_init(BB);
fmpz_init(TT);
bsplit_recursive_fmpz(PP, QQ, BB, TT, hyp, a, b, cont);
arb_set_fmpz(P, PP);
arb_set_fmpz(Q, QQ);
arb_set_fmpz(B, BB);
arb_set_fmpz(T, TT);
fmpz_clear(PP);
fmpz_clear(QQ);
fmpz_clear(BB);
fmpz_clear(TT);
}
else
{
arb_mul(T, T, B2, prec);
arb_mul(T, T, Q2, prec);
arb_mul(T2, T2, B, prec);
arb_addmul(T, P, T2, prec);
slong m;
arb_t P2, Q2, B2, T2;
m = (a + b) / 2;
arb_init(P2);
arb_init(Q2);
arb_init(B2);
arb_init(T2);
bsplit_recursive_arb(P, Q, B, T, hyp, a, m, 1, prec);
bsplit_recursive_arb(P2, Q2, B2, T2, hyp, m, b, 1, prec);
if (arb_is_one(B) && arb_is_one(B2))
{
arb_mul(T, T, Q2, prec);
arb_addmul(T, P, T2, prec);
}
else
{
arb_mul(T, T, B2, prec);
arb_mul(T, T, Q2, prec);
arb_mul(T2, T2, B, prec);
arb_addmul(T, P, T2, prec);
}
arb_mul(B, B, B2, prec);
arb_mul(Q, Q, Q2, prec);
if (cont)
arb_mul(P, P, P2, prec);
arb_clear(P2);
arb_clear(Q2);
arb_clear(B2);
arb_clear(T2);
}
arb_mul(B, B, B2, prec);
arb_mul(Q, Q, Q2, prec);
if (cont)
arb_mul(P, P, P2, prec);
arb_clear(P2);
arb_clear(Q2);
arb_clear(B2);
arb_clear(T2);
}
}