diff --git a/acb_dirichlet.h b/acb_dirichlet.h index a28d3756..4ce3ab8d 100644 --- a/acb_dirichlet.h +++ b/acb_dirichlet.h @@ -298,8 +298,12 @@ void acb_dirichlet_dft_pol(acb_ptr w, acb_srcptr v, slong len, slong prec); void acb_dirichlet_dft_crt(acb_ptr w, acb_srcptr v, slong len, slong prec); void acb_dirichlet_dft_cyc(acb_ptr w, acb_srcptr v, slong len, slong prec); void acb_dirichlet_dft_rad2(acb_ptr v, int e, slong prec); +void acb_dirichlet_dft_bluestein(acb_ptr w, acb_srcptr v, slong len, slong prec); void acb_dirichlet_dft_prod(acb_ptr w, acb_srcptr v, slong * cyc, slong num, slong prec); +void acb_dirichlet_dft_convol_naive(acb_ptr w, acb_srcptr f, acb_srcptr g, slong len, slong prec); +void acb_dirichlet_dft_convol_rad2(acb_ptr w, acb_srcptr f, acb_srcptr g, slong len, slong prec); + void acb_dirichlet_dft_conrey(acb_ptr w, acb_srcptr v, const acb_dirichlet_group_t G, slong prec); void acb_dirichlet_dft(acb_ptr w, acb_srcptr v, const acb_dirichlet_group_t G, slong prec); @@ -347,6 +351,16 @@ acb_dirichlet_dft_rad2_struct; typedef acb_dirichlet_dft_rad2_struct acb_dirichlet_dft_rad2_t[1]; +typedef struct +{ + slong n; + acb_ptr z; + acb_dirichlet_dft_rad2_t rad2; +} +acb_dirichlet_dft_bluestein_struct; + +typedef acb_dirichlet_dft_bluestein_struct acb_dirichlet_dft_bluestein_t[1]; + typedef struct { slong n; @@ -438,11 +452,12 @@ acb_dirichlet_dft_cyc_precomp(acb_ptr w, acb_srcptr v, const acb_dirichlet_dft_c { acb_dirichlet_dft_step(w, v, cyc->cyc, cyc->num, prec); } -void acb_dirichlet_dft_rad2_precomp(acb_ptr v, acb_dirichlet_dft_rad2_t rad2, slong prec); +void acb_dirichlet_dft_rad2_precomp(acb_ptr v, const acb_dirichlet_dft_rad2_t rad2, slong prec); void acb_dirichlet_dft_crt_precomp(acb_ptr w, acb_srcptr v, const acb_dirichlet_dft_crt_t crt, slong prec); void acb_dirichlet_dft_prod_precomp(acb_ptr w, acb_srcptr v, const acb_dirichlet_dft_prod_t prod, slong prec); -void acb_dirichlet_dft_inverse_rad2_precomp(acb_ptr v, acb_dirichlet_dft_rad2_t rad2, slong prec); +void acb_dirichlet_dft_inverse_rad2_precomp(acb_ptr v, const acb_dirichlet_dft_rad2_t rad2, slong prec); +void acb_dirichlet_dft_convol_rad2_precomp(acb_ptr w, acb_srcptr f, acb_srcptr g, slong len, const acb_dirichlet_dft_rad2_t, slong prec); void _acb_dirichlet_dft_precomp_init(acb_dirichlet_dft_pre_t pre, slong dv, acb_ptr z, slong dz, slong len, slong prec); void acb_dirichlet_dft_precomp_init(acb_dirichlet_dft_pre_t pre, slong len, slong prec); @@ -502,6 +517,15 @@ acb_dirichlet_dft_rad2_clear(acb_dirichlet_dft_rad2_t t) _acb_vec_clear(t->z, t->nz); } +void acb_dirichlet_dft_bluestein_init(acb_dirichlet_dft_bluestein_t t, slong n, slong prec); + +ACB_DIRICHLET_INLINE void +acb_dirichlet_dft_bluestein_clear(acb_dirichlet_dft_bluestein_t t) +{ + _acb_vec_clear(t->z, t->n); + acb_dirichlet_dft_rad2_clear(t->rad2); +} + void _acb_dirichlet_dft_crt_init(acb_dirichlet_dft_crt_t crt, slong dv, slong len, slong prec); ACB_DIRICHLET_INLINE void @@ -543,6 +567,18 @@ _acb_vec_kronecker_mul(acb_ptr z, acb_srcptr x, acb_srcptr y, slong len, slong p acb_mul(z + k, x + k, y + k, prec); } +/* z[k] = conj(x[k])*y[k] */ +ACB_DIRICHLET_INLINE void +acb_vec_kronecker_mul_conj(acb_ptr z, acb_srcptr x, acb_srcptr y, slong len, slong prec) +{ + slong k; + for (k = 0; k < len; k++) + { + acb_conj(z + k, x + k); + acb_mul(z + k, z + k, y + k, prec); + } +} + ACB_DIRICHLET_INLINE void acb_vec_printd(acb_srcptr vec, slong len, slong digits) { diff --git a/acb_dirichlet/dft_bluestein.c b/acb_dirichlet/dft_bluestein.c new file mode 100644 index 00000000..a73294ec --- /dev/null +++ b/acb_dirichlet/dft_bluestein.c @@ -0,0 +1,67 @@ +/* + Copyright (C) 2016 Pascal Molin + + This file is part of Arb. + + Arb is free software: you can redistribute it and/or modify it under + the terms of the GNU Lesser General Public License (LGPL) as published + by the Free Software Foundation; either version 2.1 of the License, or + (at your option) any later version. See . +*/ + +#include "acb_dirichlet.h" + +void +acb_dirichlet_dft_bluestein_init(acb_dirichlet_dft_bluestein_t t, slong n, slong prec) +{ + + nmod_t n2; + slong k, k2; + acb_ptr z2n; + int e = n_clog(2 * n - 1, 2); + acb_dirichlet_dft_rad2_init(t->rad2, e, prec); + z2n = _acb_vec_init(2 * n); + acb_dirichlet_vec_nth_roots(z2n, 2 * n, prec); + nmod_init(&n2, 2 * n); + t->n = n; + t->z = _acb_vec_init(n); + for (k = 0, k2 = 0; k < n; k++) + { + acb_conj(t->z + k, z2n + k2); + k2 = nmod_add(k2, 2 * k + 1, n2); + } + _acb_vec_clear(z2n, 2 * n); +} + +void +acb_dirichlet_dft_bluestein_precomp(acb_ptr w, acb_srcptr v, const acb_dirichlet_dft_bluestein_t t, slong prec) +{ + slong n = t->n; + acb_ptr vz, wz, z; + z = t->z; + /* TODO: allocate directly length 2^e and pad */ + flint_printf("\n\n====================\n\nv\n"); + acb_vec_printd_index(v, n, 10); + vz = _acb_vec_init(n); + acb_vec_kronecker_mul_conj(vz, z, v, n, prec); + flint_printf("\nvz\n"); + acb_vec_printd_index(vz, n, 10); + wz = _acb_vec_init(n); + acb_dirichlet_dft_convol_rad2_precomp(wz, vz, z, n, t->rad2, prec); + flint_printf("\nwz\n"); + acb_vec_printd_index(wz, n, 10); + acb_vec_kronecker_mul_conj(w, z, wz, n, prec); + flint_printf("\nw\n"); + acb_vec_printd_index(w, n, 10); + _acb_vec_clear(wz, n); + _acb_vec_clear(vz, n); +} + +void +acb_dirichlet_dft_bluestein(acb_ptr w, acb_srcptr v, slong len, slong prec) +{ + acb_dirichlet_dft_bluestein_t t; + acb_dirichlet_dft_bluestein_init(t, len, prec); + acb_dirichlet_dft_bluestein_precomp(w, v, t, prec); + acb_dirichlet_dft_bluestein_clear(t); +} diff --git a/acb_dirichlet/dft_convol_fft.c b/acb_dirichlet/dft_convol_fft.c index 23a3b72d..07a836bc 100644 --- a/acb_dirichlet/dft_convol_fft.c +++ b/acb_dirichlet/dft_convol_fft.c @@ -16,14 +16,25 @@ void acb_dirichlet_dft_convol_pad(acb_ptr fp, acb_ptr gp, acb_srcptr f, acb_srcptr g, slong n, slong np) { slong k; + + if (np < 2 * n - 1) + { + flint_printf("dft_convol_pad: overlapping padding %ld < 2*%ld-1\n", np, n); + abort(); + } + for (k = 0; k < n; k++) acb_set(gp + k, g + k); + for (; k < np; k++) + acb_zero(gp + k); + for (k = 0; k < n; k++) acb_set(fp + k, f + k); for (k = 1; k < n; k++) acb_set(fp + np - k, f + n - k); for (k = n; k <= np - n; k++) acb_zero(fp + k); + } void @@ -37,22 +48,58 @@ acb_dirichlet_dft_inverse_cyc(acb_ptr w, acb_srcptr v, slong len, slong prec) } void -acb_dirichlet_dft_convol_fft(acb_ptr w, acb_srcptr f, acb_srcptr g, slong len, slong prec) +acb_dirichlet_dft_convol_rad2_precomp(acb_ptr w, acb_srcptr f, acb_srcptr g, slong len, const acb_dirichlet_dft_rad2_t rad2, slong prec) { - int e; - slong k, np; + slong np; acb_ptr fp, gp; - acb_dirichlet_dft_rad2_t dft; - e = n_clog(2 * len + 1, 2); - acb_dirichlet_dft_rad2_init(dft, e, prec); - np = dft->n; + np = rad2->n; + + flint_printf("\nf\n"); + acb_vec_printd_index(f, len, 10); + flint_printf("\ng\n"); + acb_vec_printd_index(g, len, 10); + fp = _acb_vec_init(np); gp = _acb_vec_init(np); acb_dirichlet_dft_convol_pad(fp, gp, f, g, len, np); - acb_dirichlet_dft_rad2_precomp(fp, dft, prec); - acb_dirichlet_dft_rad2_precomp(gp, dft, prec); + + flint_printf("\nF\n"); + acb_vec_printd_index(fp, np, 10); + flint_printf("\nG\n"); + acb_vec_printd_index(gp, np, 10); + + acb_dirichlet_dft_rad2_precomp(fp, rad2, prec); + + flint_printf("\nDFT F\n"); + acb_vec_printd_index(fp, np, 10); + + acb_dirichlet_dft_rad2_precomp(gp, rad2, prec); + + flint_printf("\nDFT G\n"); + acb_vec_printd_index(gp, np, 10); + _acb_vec_kronecker_mul(gp, gp, fp, np, prec); - acb_dirichlet_dft_inverse_rad2_precomp(gp, dft, prec); - for (k = 0; k < len; k++) - acb_set(w + k, gp + k); + + flint_printf("\n(DFT F)(DFT G)=DFT(F*G)\n"); + acb_vec_printd_index(gp, np, 10); + + acb_dirichlet_dft_inverse_rad2_precomp(gp, rad2, prec); + + flint_printf("\nF*G\n"); + acb_vec_printd_index(gp, np, 10); + + _acb_vec_set(w, gp, len); + _acb_vec_clear(fp, np); + _acb_vec_clear(gp, np); +} + +void +acb_dirichlet_dft_convol_rad2(acb_ptr w, acb_srcptr f, acb_srcptr g, slong len, slong prec) +{ + int e; + acb_dirichlet_dft_rad2_t dft; + e = n_clog(2 * len - 1, 2); + acb_dirichlet_dft_rad2_init(dft, e, prec); + acb_dirichlet_dft_convol_rad2_precomp(w, f, g, len, dft, prec); + acb_dirichlet_dft_rad2_clear(dft); } diff --git a/acb_dirichlet/dft_convol_naive.c b/acb_dirichlet/dft_convol_naive.c index be7f2c8e..a11db829 100644 --- a/acb_dirichlet/dft_convol_naive.c +++ b/acb_dirichlet/dft_convol_naive.c @@ -24,8 +24,8 @@ acb_dirichlet_dft_convol_naive(acb_ptr w, acb_srcptr f, acb_srcptr g, slong len, gy = g; acb_zero(wx); for (y = 0; y <= x; y++) - acb_addmul(wx, fx--, gy++, prec); + acb_addmul(wx, fx - y, g + y, prec); for (; y < len; y++) - acb_addmul(wx, f + x - y, g + y, prec); + acb_addmul(wx, fx + (len - y), g + y, prec); } } diff --git a/acb_dirichlet/dft_rad2.c b/acb_dirichlet/dft_rad2.c index bfd99ff1..061a6441 100644 --- a/acb_dirichlet/dft_rad2.c +++ b/acb_dirichlet/dft_rad2.c @@ -38,7 +38,7 @@ acb_dirichlet_dft_rad2_reorder(acb_ptr v, slong n) /* remark: can use same rad2 with smaller power of 2 */ void -acb_dirichlet_dft_rad2_precomp(acb_ptr v, acb_dirichlet_dft_rad2_t rad2, slong prec) +acb_dirichlet_dft_rad2_precomp(acb_ptr v, const acb_dirichlet_dft_rad2_t rad2, slong prec) { slong j, k, l; slong n = rad2->n, nz = rad2->nz; @@ -46,6 +46,8 @@ acb_dirichlet_dft_rad2_precomp(acb_ptr v, acb_dirichlet_dft_rad2_t rad2, slong p acb_t tmp; acb_init(tmp); + acb_dirichlet_dft_rad2_reorder(v, n); + for (k = 1, l = nz; k < n; k <<= 1, l >>= 1) for (p = v; p < vend; p += k) for (j = 0; j < nz; j += l, p++) @@ -59,7 +61,7 @@ acb_dirichlet_dft_rad2_precomp(acb_ptr v, acb_dirichlet_dft_rad2_t rad2, slong p } void -acb_dirichlet_dft_inverse_rad2_precomp(acb_ptr v, acb_dirichlet_dft_rad2_t rad2, slong prec) +acb_dirichlet_dft_inverse_rad2_precomp(acb_ptr v, const acb_dirichlet_dft_rad2_t rad2, slong prec) { slong k, n = rad2->n; acb_dirichlet_dft_rad2_precomp(v, rad2, prec); diff --git a/acb_dirichlet/test/t-convol.c b/acb_dirichlet/test/t-convol.c new file mode 100644 index 00000000..df2a00fa --- /dev/null +++ b/acb_dirichlet/test/t-convol.c @@ -0,0 +1,118 @@ +/* + Copyright (C) 2016 Pascal Molin + + This file is part of Arb. + + Arb is free software: you can redistribute it and/or modify it under + the terms of the GNU Lesser General Public License (LGPL) as published + by the Free Software Foundation; either version 2.1 of the License, or + (at your option) any later version. See . +*/ + +#include "acb_dirichlet.h" + +typedef void (*do_f) (acb_ptr z, acb_srcptr x, acb_srcptr y, slong len, slong prec); + +void +check_vec_eq_prec(acb_srcptr w1, acb_srcptr w2, slong len, slong prec, slong digits, ulong q, char f1[], char f2[]) +{ + slong i; + + for (i = 0; i < len; i++) + { + if (!acb_overlaps(w1 + i, w2 + i)) + { + flint_printf("FAIL\n\n"); + flint_printf("q = %wu, size = %wu\n", q, len); + flint_printf("\nDFT differ from index %ld / %ld \n", i, len); + flint_printf("\n%s =\n", f1); + acb_vec_printd_index(w1, len, digits); + flint_printf("\n%s =\n", f2); + acb_vec_printd_index(w2, len, digits); + flint_printf("\n\n"); + abort(); + } + else if (!acb_is_zero(w1+i) && (acb_rel_accuracy_bits(w1 + i) < 30 + || acb_rel_accuracy_bits(w2 + i) < 30)) + { + flint_printf("FAIL\n\n"); + flint_printf("q = %wu\n", q); + flint_printf("\nDFT inaccurate from index %ld / %ld \n", i, len); + flint_printf("\nnaive =\n"); + acb_printd(w1 + i, digits); + flint_printf("\nfast =\n"); + acb_printd(w2 + i, digits); + flint_printf("\nerrors %ld & %ld [prec = %wu]\n", + acb_rel_accuracy_bits(w1 + i), + acb_rel_accuracy_bits(w2 + i), prec); + abort(); + } + } +} + + +int main() +{ + + slong k; + slong prec = 100, digits = 30; + slong nq = 13; + ulong q[13] = { 2, 3, 4, 5, 6, 23, 10, 15, 30, 59, 308, 335, 961}; + flint_rand_t state; + + slong f, nf = 2; + do_f func[2] = { acb_dirichlet_dft_convol_naive, acb_dirichlet_dft_convol_rad2 }; + char * name[4] = { "naive", "rad2" }; + + flint_printf("convol...."); + fflush(stdout); + + flint_randinit(state); + + for (k = 0; k < nq; k++) + { + slong i; + acb_ptr z1, z2, x, y; + + z1 = _acb_vec_init(q[k]); + z2 = _acb_vec_init(q[k]); + x = _acb_vec_init(q[k]); + y = _acb_vec_init(q[k]); + + for (i = 0; i < q[k]; i++) + { + acb_set_si(x + i, q[k] - i); + acb_set_si(y + i, i * i); + /* + acb_set_si(x + i, n_randint(state, q[k])); + acb_set_si(y + i, n_randint(state, q[k])); + */ + } + + for (f = 0; f < nf; f++) + { + + acb_ptr z = (f == 0) ? z1 : z2; + + func[f](z, x, y, q[k], prec); + + if (f == 0) + continue; + + check_vec_eq_prec(z1, z2, q[k], prec, digits, q[k], name[0], name[f]); + + } + + _acb_vec_clear(x, q[k]); + _acb_vec_clear(y, q[k]); + _acb_vec_clear(z1, q[k]); + _acb_vec_clear(z2, q[k]); + } + + flint_randclear(state); + flint_cleanup(); + flint_printf("PASS\n"); + return EXIT_SUCCESS; +} + + diff --git a/acb_dirichlet/test/t-dft.c b/acb_dirichlet/test/t-dft.c index 02fa7bb6..1e783277 100644 --- a/acb_dirichlet/test/t-dft.c +++ b/acb_dirichlet/test/t-dft.c @@ -60,9 +60,9 @@ int main() ulong q[13] = { 2, 3, 4, 5, 6, 23, 10, 15, 30, 59, 308, 335, 961}; flint_rand_t state; - slong f, nf = 3; - do_f func[3] = { acb_dirichlet_dft_pol, acb_dirichlet_dft_cyc, acb_dirichlet_dft_crt }; - char * name[3] = { "pol", "cyc", "crt" }; + slong f, nf = 4; + do_f func[4] = { acb_dirichlet_dft_pol, acb_dirichlet_dft_cyc, acb_dirichlet_dft_crt , acb_dirichlet_dft_bluestein }; + char * name[4] = { "pol", "cyc", "crt", "bluestein" }; flint_printf("dft...."); fflush(stdout); @@ -104,11 +104,14 @@ int main() /* radix2 dft */ for (k = 1; k < 12; k++) { - slong n = 1 << k; + slong n = 1 << k, j; acb_ptr v, w1, w2; v = w2 = _acb_vec_init(n); w1 = _acb_vec_init(n); + for (j = 0; j < n; j++) + acb_set_si(v + k, k); + acb_dirichlet_dft_pol(w1, v, n, prec); acb_dirichlet_dft_rad2(v, k, prec);