diff --git a/arb/const_euler.c b/arb/const_euler.c index 90fa4279..f760690b 100644 --- a/arb/const_euler.c +++ b/arb/const_euler.c @@ -1,5 +1,5 @@ /* - Copyright (C) 2012, 2013 Fredrik Johansson + Copyright (C) 2012, 2013, 2022 Fredrik Johansson This file is part of Arb. @@ -21,11 +21,13 @@ typedef struct arb_t C; arb_t D; arb_t V; -} euler_bsplit_struct; + slong a; + slong b; +} euler_bsplit_1_struct; -typedef euler_bsplit_struct euler_bsplit_t[1]; +typedef euler_bsplit_1_struct euler_bsplit_1_t[1]; -static void euler_bsplit_init(euler_bsplit_t s) +static void euler_bsplit_1_init(euler_bsplit_1_t s, void * args) { arb_init(s->P); arb_init(s->Q); @@ -35,7 +37,7 @@ static void euler_bsplit_init(euler_bsplit_t s) arb_init(s->V); } -static void euler_bsplit_clear(euler_bsplit_t s) +static void euler_bsplit_1_clear(euler_bsplit_1_t s, void * args) { arb_clear(s->P); arb_clear(s->Q); @@ -45,12 +47,26 @@ static void euler_bsplit_clear(euler_bsplit_t s) arb_clear(s->V); } +typedef struct +{ + slong N; + slong prec; + slong a; + slong b; +} +bsplit_args_t; + static void -euler_bsplit_1_merge(euler_bsplit_t s, euler_bsplit_t L, euler_bsplit_t R, - slong wp, int cont) +euler_bsplit_1_merge(euler_bsplit_1_t s, euler_bsplit_1_t L, euler_bsplit_1_t R, bsplit_args_t * args) { arb_t t, u, v; + slong wp = args->prec; + + slong b = R->b; + + int cont = (b != args->b); + arb_init(t); arb_init(u); arb_init(v); @@ -84,14 +100,19 @@ euler_bsplit_1_merge(euler_bsplit_t s, euler_bsplit_t L, euler_bsplit_t R, arb_clear(t); arb_clear(u); arb_clear(v); + + s->a = L->a; + s->b = R->b; } static void -euler_bsplit_1(euler_bsplit_t s, slong n1, slong n2, slong N, slong wp, int cont) +euler_bsplit_1_basecase(euler_bsplit_1_t s, slong n1, slong n2, bsplit_args_t * args) { if (n2 - n1 == 1) { - arb_set_si(s->P, N); /* p = N^2 */ + slong wp = args->prec; + + arb_set_si(s->P, args->N); /* p = N^2 */ arb_mul(s->P, s->P, s->P, wp); arb_set_si(s->Q, n1 + 1); /* q = (k + 1)^2 */ arb_mul(s->Q, s->Q, s->Q, wp); @@ -99,28 +120,114 @@ euler_bsplit_1(euler_bsplit_t s, slong n1, slong n2, slong N, slong wp, int cont arb_set_si(s->D, n1 + 1); arb_set(s->T, s->P); arb_set(s->V, s->P); + + s->a = n1; + s->b = n2; } else { - euler_bsplit_t L, R; - slong m = (n1 + n2) / 2; + euler_bsplit_1_t L, R; + slong m = n1 + (n2 - n1) / 2; - euler_bsplit_init(L); - euler_bsplit_init(R); - euler_bsplit_1(L, n1, m, N, wp, 1); - euler_bsplit_1(R, m, n2, N, wp, 1); - euler_bsplit_1_merge(s, L, R, wp, cont); - euler_bsplit_clear(L); - euler_bsplit_clear(R); + euler_bsplit_1_init(L, args); + euler_bsplit_1_init(R, args); + euler_bsplit_1_basecase(L, n1, m, args); + euler_bsplit_1_basecase(R, m, n2, args); + euler_bsplit_1_merge(s, L, R, args); + euler_bsplit_1_clear(L, args); + euler_bsplit_1_clear(R, args); } } static void -euler_bsplit_2(arb_t P, arb_t Q, arb_t T, slong n1, slong n2, - slong N, slong wp, int cont) +euler_bsplit_1(euler_bsplit_1_t s, slong n1, slong n2, slong N, slong wp, int cont) +{ + bsplit_args_t args; + + args.N = N; + args.prec = wp; + args.a = n1; + args.b = n2; + + flint_parallel_binary_splitting(s, + (bsplit_basecase_func_t) euler_bsplit_1_basecase, + (bsplit_merge_func_t) euler_bsplit_1_merge, + sizeof(euler_bsplit_1_struct), + (bsplit_init_func_t) euler_bsplit_1_init, + (bsplit_clear_func_t) euler_bsplit_1_clear, + &args, n1, n2, 4, -1, 0); +} + +typedef struct +{ + arb_t P; + arb_t Q; + arb_t T; + slong a; + slong b; +} euler_bsplit_2_struct; + +typedef euler_bsplit_2_struct euler_bsplit_2_t[1]; + +static void euler_bsplit_2_init(euler_bsplit_2_t s, void * args) +{ + arb_init(s->P); + arb_init(s->Q); + arb_init(s->T); +} + +static void euler_bsplit_2_clear(euler_bsplit_2_t s, void * args) +{ + arb_clear(s->P); + arb_clear(s->Q); + arb_clear(s->T); +} + +static void +euler_bsplit_2_merge(euler_bsplit_2_t s, euler_bsplit_2_t L, euler_bsplit_2_t R, bsplit_args_t * args) +{ + arb_ptr P = s->P; + arb_ptr Q = s->Q; + arb_ptr T = s->T; + + arb_ptr P2 = R->P; + arb_ptr Q2 = R->Q; + arb_ptr T2 = R->T; + + slong wp = args->prec; + + slong b = R->b; + + int cont = (b != args->b); + + arb_mul(T, T, Q2, wp); + arb_mul(T2, T2, P, wp); + arb_add(T, T, T2, wp); + + if (cont) + arb_mul(P, P, P2, wp); + + arb_mul(Q, Q, Q2, wp); + + s->a = L->a; + s->b = R->b; +} + +static void +euler_bsplit_2_basecase(euler_bsplit_2_t s, slong n1, slong n2, bsplit_args_t * args) { if (n2 - n1 == 1) { + slong wp = args->prec; + slong N = args->N; + + arb_ptr P = s->P; + arb_ptr Q = s->Q; + arb_ptr T = s->T; + + if (n2 - n1 != 1) + flint_abort(); + if (n1 == 0) { arb_set_si(P, 1); @@ -138,34 +245,52 @@ euler_bsplit_2(arb_t P, arb_t Q, arb_t T, slong n1, slong n2, } arb_set(T, P); + + s->a = n1; + s->b = n2; } else { - arb_t P2, Q2, T2; - slong m = (n1 + n2) / 2; + euler_bsplit_2_t R; + slong m = n1 + (n2 - n1) / 2; - arb_init(P2); - arb_init(Q2); - arb_init(T2); - - euler_bsplit_2(P, Q, T, n1, m, N, wp, 1); - euler_bsplit_2(P2, Q2, T2, m, n2, N, wp, 1); - - arb_mul(T, T, Q2, wp); - arb_mul(T2, T2, P, wp); - arb_add(T, T, T2, wp); - - if (cont) - arb_mul(P, P, P2, wp); - - arb_mul(Q, Q, Q2, wp); - - arb_clear(P2); - arb_clear(Q2); - arb_clear(T2); + euler_bsplit_2_init(R, args); + euler_bsplit_2_basecase(s, n1, m, args); + euler_bsplit_2_basecase(R, m, n2, args); + euler_bsplit_2_merge(s, s, R, args); + euler_bsplit_2_clear(R, args); } } +static void +euler_bsplit_2(arb_t P, arb_t Q, arb_t T, slong n1, slong n2, + slong N, slong wp, int cont) +{ + euler_bsplit_2_t s; + bsplit_args_t args; + + args.N = N; + args.prec = wp; + args.a = n1; + args.b = n2; + + *s->P = *P; + *s->Q = *Q; + *s->T = *T; + + flint_parallel_binary_splitting(s, + (bsplit_basecase_func_t) euler_bsplit_2_basecase, + (bsplit_merge_func_t) euler_bsplit_2_merge, + sizeof(euler_bsplit_2_struct), + (bsplit_init_func_t) euler_bsplit_2_init, + (bsplit_clear_func_t) euler_bsplit_2_clear, + &args, n1, n2, 4, -1, FLINT_PARALLEL_BSPLIT_LEFT_INPLACE); + + *P = *s->P; + *Q = *s->Q; + *T = *s->T; +} + static void atanh_bsplit(arb_t s, ulong c, slong a, slong prec) { @@ -236,7 +361,7 @@ arb_log_ui_smooth(arb_t s, ulong n, slong prec) void arb_const_euler_eval(arb_t res, slong prec) { - euler_bsplit_t sum; + euler_bsplit_1_t sum; arb_t t, u, v, P2, T2, Q2; slong bits, wp, wp2, n, N, M; @@ -273,7 +398,7 @@ arb_const_euler_eval(arb_t res, slong prec) wp = bits + 2 * FLINT_BIT_COUNT(n); wp2 = bits/2 + 2 * FLINT_BIT_COUNT(n); - euler_bsplit_init(sum); + euler_bsplit_1_init(sum, NULL); arb_init(P2); arb_init(T2); arb_init(Q2); @@ -331,7 +456,7 @@ arb_const_euler_eval(arb_t res, slong prec) arb_clear(u); arb_clear(v); - euler_bsplit_clear(sum); + euler_bsplit_1_clear(sum, NULL); } ARB_DEF_CACHED_CONSTANT(arb_const_euler_brent_mcmillan, arb_const_euler_eval) diff --git a/arb/test/t-const_euler.c b/arb/test/t-const_euler.c index 5d3130da..183010df 100644 --- a/arb/test/t-const_euler.c +++ b/arb/test/t-const_euler.c @@ -26,6 +26,8 @@ int main() mpfr_t s; slong accuracy, prec; + flint_set_num_threads(1 + n_randint(state, 3)); + prec = 2 + n_randint(state, 1 << n_randint(state, 16)); arb_init(r); @@ -57,7 +59,7 @@ int main() } flint_randclear(state); - flint_cleanup(); + flint_cleanup_master(); flint_printf("PASS\n"); return EXIT_SUCCESS; } diff --git a/arb/test/t-const_pi.c b/arb/test/t-const_pi.c index 4fa14fcb..d94af9c6 100644 --- a/arb/test/t-const_pi.c +++ b/arb/test/t-const_pi.c @@ -28,6 +28,8 @@ int main() prec = 2 + n_randint(state, 1 << n_randint(state, 18)); + flint_set_num_threads(1 + n_randint(state, 3)); + arb_init(r); mpfr_init2(s, prec + 1000); @@ -57,7 +59,7 @@ int main() } flint_randclear(state); - flint_cleanup(); + flint_cleanup_master(); flint_printf("PASS\n"); return EXIT_SUCCESS; } diff --git a/examples/pi.c b/examples/pi.c index ccbe02ab..122ba891 100644 --- a/examples/pi.c +++ b/examples/pi.c @@ -1,34 +1,83 @@ /* This file is public domain. Author: Fredrik Johansson. */ +#include #include "arb.h" #include "flint/profiler.h" int main(int argc, char *argv[]) { arb_t x; - slong prec, digits, condense; + slong i, prec, digits, condense, num_threads; + int constant; if (argc < 2) { - flint_printf("usage: build/examples/pi digits [condense = 20]\n"); + flint_printf("usage: build/examples/pi digits [-condense n] [-threads n] [-constant name]\n"); + flint_printf("compute digits of pi (or other constants)\n"); + flint_printf("supported: pi, e, log2, euler, catalan, khinchin, zeta3\n"); return 1; } digits = atol(argv[1]); - if (argc > 2) - condense = atol(argv[2]); - else - condense = 20; + num_threads = 1; + condense = 20; + constant = 0; + + for (i = 2; i < argc; i++) + { + if (!strcmp(argv[i], "-condense")) + condense = atol(argv[i+1]); + else if (!strcmp(argv[i], "-threads")) + num_threads = atol(argv[i+1]); + else if (!strcmp(argv[i], "-constant")) + { + if (!strcmp(argv[i+1], "pi")) + constant = 0; + else if (!strcmp(argv[i+1], "e")) + constant = 1; + else if (!strcmp(argv[i+1], "log2")) + constant = 2; + else if (!strcmp(argv[i+1], "euler")) + constant = 3; + else if (!strcmp(argv[i+1], "catalan")) + constant = 4; + else if (!strcmp(argv[i+1], "khinchin")) + constant = 5; + else if (!strcmp(argv[i+1], "zeta3")) + constant = 6; + else + { + flint_printf("unknown constant\n"); + flint_abort(); + } + } + } + + flint_set_num_threads(num_threads); arb_init(x); prec = digits * 3.3219280948873623 + 5; - flint_printf("computing pi with a precision of %wd bits... ", prec); + flint_printf("precision = %wd bits... ", prec); + fflush(stdout); TIMEIT_ONCE_START - arb_const_pi(x, prec); + if (constant == 0) + arb_const_pi(x, prec); + else if (constant == 1) + arb_const_e(x, prec); + else if (constant == 2) + arb_const_log2(x, prec); + else if (constant == 3) + arb_const_euler(x, prec); + else if (constant == 4) + arb_const_catalan(x, prec); + else if (constant == 5) + arb_const_khinchin(x, prec); + else if (constant == 6) + arb_zeta_ui(x, 3, prec); TIMEIT_ONCE_STOP SHOW_MEMORY_USAGE @@ -38,7 +87,7 @@ int main(int argc, char *argv[]) flint_printf("\n"); arb_clear(x); - flint_cleanup(); + flint_cleanup_master(); return 0; } diff --git a/hypgeom/sum.c b/hypgeom/sum.c index 0885fabf..b5efb430 100644 --- a/hypgeom/sum.c +++ b/hypgeom/sum.c @@ -1,5 +1,5 @@ /* - Copyright (C) 2012 Fredrik Johansson + Copyright (C) 2012, 2022 Fredrik Johansson This file is part of Arb. @@ -9,6 +9,7 @@ (at your option) any later version. See . */ +#include "flint/thread_support.h" #include "hypgeom.h" static __inline__ void @@ -79,68 +80,212 @@ bsplit_recursive_fmpz(fmpz_t P, fmpz_t Q, fmpz_t B, fmpz_t T, } } +typedef struct +{ + arb_struct P; + arb_struct Q; + arb_struct B; + arb_struct T; + slong a; + slong b; +} +bsplit_res_t; + +typedef struct +{ + const hypgeom_struct * hyp; + slong prec; + slong a; + slong b; +} +bsplit_args_t; + +static void +bsplit_init(bsplit_res_t * x, void * args) +{ + arb_init(&x->P); + arb_init(&x->Q); + arb_init(&x->B); + arb_init(&x->T); +} + +static void +bsplit_clear(bsplit_res_t * x, void * args) +{ + arb_clear(&x->P); + arb_clear(&x->Q); + arb_clear(&x->B); + arb_clear(&x->T); +} + +static void +bsplit_basecase(bsplit_res_t * res, slong a, slong b, bsplit_args_t * args) +{ + fmpz_t PP, QQ, BB, TT; + int cont; + + fmpz_init(PP); + fmpz_init(QQ); + fmpz_init(BB); + fmpz_init(TT); + + cont = (b != args->b); + + bsplit_recursive_fmpz(PP, QQ, BB, TT, args->hyp, a, b, cont); + + arb_set_fmpz(&res->P, PP); + arb_set_fmpz(&res->Q, QQ); + arb_set_fmpz(&res->B, BB); + arb_set_fmpz(&res->T, TT); + + res->a = a; + res->b = b; + + fmpz_clear(PP); + fmpz_clear(QQ); + fmpz_clear(BB); + fmpz_clear(TT); +} + +/* res = left */ +static void +bsplit_merge(bsplit_res_t * res, bsplit_res_t * left, bsplit_res_t * right, bsplit_args_t * args) +{ + arb_ptr P = &res->P; + arb_ptr Q = &res->Q; + arb_ptr B = &res->B; + arb_ptr T = &res->T; + + arb_ptr P2 = &right->P; + arb_ptr Q2 = &right->Q; + arb_ptr B2 = &right->B; + arb_ptr T2 = &right->T; + + slong prec = args->prec; + + slong b = right->b; + + int cont = b != args->b; + + if (res != left) + flint_abort(); + + if (arb_is_one(B) && arb_is_one(B2)) + { + arb_mul(T, T, Q2, prec); + arb_addmul(T, P, T2, prec); + } + else + { + arb_mul(T, T, B2, prec); + arb_mul(T, T, Q2, prec); + arb_mul(T2, T2, B, prec); + arb_addmul(T, P, T2, prec); + } + + arb_mul(B, B, B2, prec); + arb_mul(Q, Q, Q2, prec); + if (cont) + arb_mul(P, P, P2, prec); + + res->b = right->b; +} + +#define WANT_PARALLEL 1 + static void bsplit_recursive_arb(arb_t P, arb_t Q, arb_t B, arb_t T, const hypgeom_t hyp, slong a, slong b, int cont, slong prec) { - if (b - a < 4) + if (WANT_PARALLEL) { - fmpz_t PP, QQ, BB, TT; + bsplit_res_t res; + bsplit_args_t args; - fmpz_init(PP); - fmpz_init(QQ); - fmpz_init(BB); - fmpz_init(TT); + res.P = *P; + res.Q = *Q; + res.B = *B; + res.T = *T; - bsplit_recursive_fmpz(PP, QQ, BB, TT, hyp, a, b, cont); + args.hyp = hyp; + args.prec = prec; + args.a = a; + args.b = b; - arb_set_fmpz(P, PP); - arb_set_fmpz(Q, QQ); - arb_set_fmpz(B, BB); - arb_set_fmpz(T, TT); + flint_parallel_binary_splitting(&res, + (bsplit_basecase_func_t) bsplit_basecase, + (bsplit_merge_func_t) bsplit_merge, + sizeof(bsplit_res_t), + (bsplit_init_func_t) bsplit_init, + (bsplit_clear_func_t) bsplit_clear, + &args, a, b, 4, -1, FLINT_PARALLEL_BSPLIT_LEFT_INPLACE); - fmpz_clear(PP); - fmpz_clear(QQ); - fmpz_clear(BB); - fmpz_clear(TT); + *P = res.P; + *Q = res.Q; + *B = res.B; + *T = res.T; } else { - slong m; - arb_t P2, Q2, B2, T2; - - m = (a + b) / 2; - - arb_init(P2); - arb_init(Q2); - arb_init(B2); - arb_init(T2); - - bsplit_recursive_arb(P, Q, B, T, hyp, a, m, 1, prec); - bsplit_recursive_arb(P2, Q2, B2, T2, hyp, m, b, 1, prec); - - if (arb_is_one(B) && arb_is_one(B2)) + if (b - a < 4) { - arb_mul(T, T, Q2, prec); - arb_addmul(T, P, T2, prec); + fmpz_t PP, QQ, BB, TT; + + fmpz_init(PP); + fmpz_init(QQ); + fmpz_init(BB); + fmpz_init(TT); + + bsplit_recursive_fmpz(PP, QQ, BB, TT, hyp, a, b, cont); + + arb_set_fmpz(P, PP); + arb_set_fmpz(Q, QQ); + arb_set_fmpz(B, BB); + arb_set_fmpz(T, TT); + + fmpz_clear(PP); + fmpz_clear(QQ); + fmpz_clear(BB); + fmpz_clear(TT); } else { - arb_mul(T, T, B2, prec); - arb_mul(T, T, Q2, prec); - arb_mul(T2, T2, B, prec); - arb_addmul(T, P, T2, prec); + slong m; + arb_t P2, Q2, B2, T2; + + m = (a + b) / 2; + + arb_init(P2); + arb_init(Q2); + arb_init(B2); + arb_init(T2); + + bsplit_recursive_arb(P, Q, B, T, hyp, a, m, 1, prec); + bsplit_recursive_arb(P2, Q2, B2, T2, hyp, m, b, 1, prec); + + if (arb_is_one(B) && arb_is_one(B2)) + { + arb_mul(T, T, Q2, prec); + arb_addmul(T, P, T2, prec); + } + else + { + arb_mul(T, T, B2, prec); + arb_mul(T, T, Q2, prec); + arb_mul(T2, T2, B, prec); + arb_addmul(T, P, T2, prec); + } + + arb_mul(B, B, B2, prec); + arb_mul(Q, Q, Q2, prec); + if (cont) + arb_mul(P, P, P2, prec); + + arb_clear(P2); + arb_clear(Q2); + arb_clear(B2); + arb_clear(T2); } - - arb_mul(B, B, B2, prec); - arb_mul(Q, Q, Q2, prec); - if (cont) - arb_mul(P, P, P2, prec); - - arb_clear(P2); - arb_clear(Q2); - arb_clear(B2); - arb_clear(T2); } }